[파이썬] 딥러닝을 이용한 언어 번역

04 Sep 2023

python

언어 번역은 기술의 발전과 함께 오랫동안 연구되어온 분야입니다. 그리고 이제 딥러닝을 이용한 언어 번역은 많은 관심을 받고 있습니다. 딥러닝은 언어 번역에서 탁월한 성능을 보여주는데, 이는 대량의 데이터를 기반으로 언어의 의미를 학습하고 새로운 문장을 생성하는 능력을 갖추고 있기 때문입니다.

언어 번역을 위한 딥러닝 모델

언어 번역을 위해 사용되는 딥러닝 모델 중 가장 대표적인 것은 인코더-디코더 (Encoder-Decoder) 구조입니다. 인코더는 입력 문장을 임베딩하고, 디코더는 임베딩된 입력을 토대로 번역된 문장을 생성합니다.

다음은 간단한 예제 코드입니다. 이 예제는 영어를 한국어로 번역하는 딥러닝 모델을 구현하는 과정을 보여줍니다. 이를 위해 Keras 라이브러리를 사용하였습니다.

import numpy as np
from keras.models import Model
from keras.layers import Input, LSTM, Dense

# 입력 시퀀스와 출력 시퀀스의 최대 길이
input_seq_len = 20
output_seq_len = 22

# 학습 데이터 준비
input_texts = ['Hello', 'How are you?', 'Goodbye']
output_texts = ['안녕', '어떻게 지내?', '안녕히 가세요']

# 입력 문장과 출력 문장의 길이를 패딩
input_data = np.zeros((len(input_texts), input_seq_len), dtype='float32')
output_data = np.zeros((len(output_texts), output_seq_len), dtype='float32')

for i, (input_text, output_text) in enumerate(zip(input_texts, output_texts)):
    for t, char in enumerate(input_text):
        input_data[i, t] = ord(char)   # 문자를 아스키 코드로 변환하여 입력 데이터로 사용
    for t, char in enumerate(output_text):
        output_data[i, t] = ord(char)  # 문자를 아스키 코드로 변환하여 출력 데이터로 사용

# Encoder-Decoder 모델 구현
encoder_input = Input(shape=(None,))
encoder = LSTM(128, return_state=True)
encoder_output, state_h, state_c = encoder(encoder_input)
encoder_states = [state_h, state_c]

decoder_input = Input(shape=(None,))
decoder = LSTM(128, return_sequences=True, return_state=True)
decoder_output, _, _ = decoder(decoder_input, initial_state=encoder_states)
decoder_dense = Dense(output_seq_len, activation='softmax')
decoder_output = decoder_dense(decoder_output)

model = Model([encoder_input, decoder_input], decoder_output)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

# 모델 학습
model.fit([input_data, output_data[:, :-1]], output_data[:, 1:],
          batch_size=1,
          epochs=100)

# 번역 문장 생성
encoder_model = Model(encoder_input, encoder_states)

decoder_state_input_h = Input(shape=(128,))
decoder_state_input_c = Input(shape=(128,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_outputs, state_h, state_c = decoder(decoder_input, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)

decoder_model = Model([decoder_input] + decoder_states_inputs, [decoder_outputs] + decoder_states)

def translate(input_sentence):
    # 입력 문장을 인코딩
    states_value = encoder_model.predict(input_sentence)

    # 번역 문장 초기화
    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = ord('\t')  # 출력 문장의 시작 토큰

    stop_condition = False
    translated_sentence = ''

    # 디코더를 사용하여 번역 문장을 생성
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)

        # 예측 결과 중 가장 높은 확률을 가진 토큰을 가져옴
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = chr(sampled_token_index)

        translated_sentence += sampled_char

        # 출력 문장의 끝인지 확인
        if sampled_char == '\n' or len(translated_sentence) > output_seq_len:
            stop_condition = True

        # 번역된 토큰을 다음 입력으로 사용
        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index

        # 이전 상태를 다음 디코더 예측에 사용
        states_value = [h, c]

    return translated_sentence

# 예제 문장 번역
input_sentence = np.zeros((1, input_seq_len))
for t, char in enumerate('Hello'):
    input_sentence[0, t] = ord(char)

translation = translate(input_sentence)
print(translation)  # 출력: '안녕하세요\n'

이 예제는 단순히 영어 단어를 한국어로 번역하기 위한 간단한 모델입니다. 실제로 사용되는 딥러닝 기반 언어 번역 모델은 훨씬 더 복잡하고 정교한 구조를 갖추고 있습니다.