Spaces:

ginigen
/

Sign-language

Building

App Files Files Community

ginipick commited on Jan 27

Commit

d9bcf7b

verified ·

1 Parent(s): 96f121e

Update src/main.py

Browse files

Files changed (1) hide show

src/main.py +9 -60

src/main.py CHANGED Viewed

@@ -19,9 +19,7 @@ dataset, list_2000_tokens = dg.load_data()
 def clean_quotes(text):
     """따옴표 정리 함수"""
-    # 연속된 따옴표 제거
     text = re.sub(r"'+", "'", text)
-    # 불필요한 공백 제거
     text = re.sub(r'\s+', ' ', text).strip()
     return text
@@ -31,39 +29,18 @@ def is_korean(text):
 def is_english(text):
     """텍스트가 영어인지 확인하는 함수"""
-    # 따옴표와 공백을 제외한 나머지 텍스트 확인
     text_without_quotes = re.sub(r"'[^']*'|\s", "", text)
-    # 영어 알파벳과 기본 문장부호만 포함되어 있는지 확인
     return bool(re.match(r'^[A-Za-z.,!?-]*$', text_without_quotes))
 def normalize_quotes(text):
     """따옴표 형식을 정규화하는 함수"""
-    # 연속된 따옴표 제거
     text = re.sub(r"'+", "'", text)
-    # 불필요한 공백 제거
     text = re.sub(r'\s+', ' ', text).strip()
-    # 이미 따옴표로 묶인 단어가 있는지 확인
-    existing_quotes = re.findall(r"'([^']*)'", text)
-    if existing_quotes:
-        return text  # 이미 따옴표가 있으면 그대로 반환
-    # 대문자로 된 단어 찾기 (예: JOHN)
-    uppercase_words = re.findall(r'\b[A-Z]+\b', text)
-    if uppercase_words:
-        # 대문자 단어에 따옴표 추가
-        for word in uppercase_words:
-            text = text.replace(word, f"'{word}'")
         return text
-    # 위의 조건에 해당하지 않는 경우 첫 단어에 따옴표 추가
-    words = text.split()
-    if words:
-        # 모든 따옴표 제거 후 첫 단어에만 따옴표 추가
-        first_word = words[0].replace("'", "")
-        words[0] = f"'{first_word}'"
-        words[1:] = [w.replace("'", "") for w in words[1:]]
-        return ' '.join(words)
     return text
 def find_quoted_words(text):
@@ -77,21 +54,19 @@ def spell_out_word(word):
 def translate_korean_text(text):
     """한글 전용 번역 함수"""
     try:
-        # 1. 따옴표로 묶인 단어 찾기
         quoted_match = re.search(r"'([^']*)'", text)
         if not quoted_match:
             return text
         quoted_word = quoted_match.group(1)
-        # 2. 전체 문장을 먼저 번역
         url = "https://translate.googleapis.com/translate_a/single"
         params = {
             "client": "gtx",
             "sl": "ko",
             "tl": "en",
             "dt": "t",
-            "q": text.replace(f"'{quoted_word}'", "XXXXX")  # 특수한 마커로 대체
         }
         response = requests.get(url, params=params)
         if response.status_code != 200:
@@ -99,11 +74,9 @@ def translate_korean_text(text):
         translated_text = ' '.join(item[0] for item in response.json()[0] if item[0])
-        # 3. 따옴표 안의 단어 처리
-        # 영어 단어이거나 대문자로 된 단어는 그대로 유지
-        if re.match(r'^[A-Z]+$', quoted_word) or re.match(r'^[A-Za-z]+$', quoted_word):
             proper_noun = quoted_word.upper()
-        else:  # 한글 단어인 경우
             params["q"] = quoted_word
             response = requests.get(url, params=params)
             if response.status_code == 200:
@@ -111,14 +84,8 @@ def translate_korean_text(text):
             else:
                 proper_noun = quoted_word.upper()
-        # 4. 최종 문장 조합
-        # XXXXX를 따옴표로 묶은 proper_noun으로 대체
         final_text = translated_text.replace("XXXXX", f"'{proper_noun}'")
-        # 불필요한 대문자 변환 방지
         final_text = re.sub(r'\bNAME\b', 'name', final_text)
-        # 마침표 처리
         final_text = final_text.replace(" .", ".")
         return final_text
@@ -130,19 +97,15 @@ def translate_korean_text(text):
 def translate_korean_to_english(text):
     """전체 텍스트 번역 함수"""
     try:
-        # 입력 텍스트 정규화
         text = normalize_quotes(text)
-        # 영어 입력 확인
         if is_english(text):
-            # 기존 영어 처리 방식 유지
             quoted_match = re.search(r"'([^']*)'", text)
             if quoted_match:
                 quoted_word = quoted_match.group(1).upper()
                 text = re.sub(r"'[^']*'", f"'{quoted_word}'", text, 1)
             return text
-        # 한글 입력인 경우 새로운 함수로 처리
         if is_korean(text):
             return translate_korean_text(text)
@@ -151,8 +114,6 @@ def translate_korean_to_english(text):
         print(f"Translation error: {e}")
         return text
 @app.route('/')
 def index():
     return render_template('index.html', title=app.config['TITLE'])
@@ -165,42 +126,30 @@ def result():
             return render_template('error.html', error="Please enter text to translate")
         try:
-            # 입력 텍스트 정규화
             input_text = normalize_quotes(input_text)
-            # 번역 수행
             english_text = translate_korean_to_english(input_text)
             if not english_text:
                 raise Exception("Translation failed")
-            # 따옴표로 묶인 단어 추출 (첫 번째 단어만)
-            quoted_words = re.findall(r"'([^']*)'", english_text)
-            first_quoted_word = quoted_words[0] if quoted_words else None
-            # ASL 변환을 위해 따옴표 제거
             clean_english = re.sub(r"'([^']*)'", r"\1", english_text)
             eng_to_asl_translator = NlpSpacyBaseTranslator(sentence=clean_english)
             generated_gloss = eng_to_asl_translator.translate_to_gloss()
-            # 단어 처리
             processed_gloss = []
             words = generated_gloss.split()
             for word in words:
                 word_upper = word.upper()
-                if first_quoted_word and word_upper == first_quoted_word.upper():
-                    # 고유명사인 경우 철자를 하나씩 분리
                     spelled_word = spell_out_word(word)
                     processed_gloss.extend(['FINGERSPELL-START'] + spelled_word.split() + ['FINGERSPELL-END'])
                 else:
-                    # 일반 단어는 기존 방식대로 처리
-                    word_lower = word.lower()
-                    if word_lower.isalnum():
-                        processed_gloss.append(word_lower)
             gloss_sentence_before_synonym = " ".join(processed_gloss)
-            # 고유명사가 아닌 단어들만 동의어 처리
             final_gloss = []
             i = 0
             while i < len(processed_gloss):
@@ -304,4 +253,4 @@ def download_video(gloss_sentence):
         return f"Error downloading video: {str(e)}", 500
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860, debug=True)

 def clean_quotes(text):
     """따옴표 정리 함수"""
     text = re.sub(r"'+", "'", text)
     text = re.sub(r'\s+', ' ', text).strip()
     return text
 def is_english(text):
     """텍스트가 영어인지 확인하는 함수"""
     text_without_quotes = re.sub(r"'[^']*'|\s", "", text)
     return bool(re.match(r'^[A-Za-z.,!?-]*$', text_without_quotes))
 def normalize_quotes(text):
     """따옴표 형식을 정규화하는 함수"""
     text = re.sub(r"'+", "'", text)
     text = re.sub(r'\s+', ' ', text).strip()
+    # 이미 따옴표로 묶인 단어가 있으면 그대로 반환
+    if re.search(r"'[^']*'", text):
         return text
     return text
 def find_quoted_words(text):
 def translate_korean_text(text):
     """한글 전용 번역 함수"""
     try:
         quoted_match = re.search(r"'([^']*)'", text)
         if not quoted_match:
             return text
         quoted_word = quoted_match.group(1)
         url = "https://translate.googleapis.com/translate_a/single"
         params = {
             "client": "gtx",
             "sl": "ko",
             "tl": "en",
             "dt": "t",
+            "q": text.replace(f"'{quoted_word}'", "XXXXX")
         }
         response = requests.get(url, params=params)
         if response.status_code != 200:
         translated_text = ' '.join(item[0] for item in response.json()[0] if item[0])
+        if re.match(r'^[A-Za-z]+$', quoted_word):
             proper_noun = quoted_word.upper()
+        else:
             params["q"] = quoted_word
             response = requests.get(url, params=params)
             if response.status_code == 200:
             else:
                 proper_noun = quoted_word.upper()
         final_text = translated_text.replace("XXXXX", f"'{proper_noun}'")
         final_text = re.sub(r'\bNAME\b', 'name', final_text)
         final_text = final_text.replace(" .", ".")
         return final_text
 def translate_korean_to_english(text):
     """전체 텍스트 번역 함수"""
     try:
         text = normalize_quotes(text)
         if is_english(text):
             quoted_match = re.search(r"'([^']*)'", text)
             if quoted_match:
                 quoted_word = quoted_match.group(1).upper()
                 text = re.sub(r"'[^']*'", f"'{quoted_word}'", text, 1)
             return text
         if is_korean(text):
             return translate_korean_text(text)
         print(f"Translation error: {e}")
         return text
 @app.route('/')
 def index():
     return render_template('index.html', title=app.config['TITLE'])
             return render_template('error.html', error="Please enter text to translate")
         try:
             input_text = normalize_quotes(input_text)
             english_text = translate_korean_to_english(input_text)
             if not english_text:
                 raise Exception("Translation failed")
+            quoted_words = find_quoted_words(english_text)
             clean_english = re.sub(r"'([^']*)'", r"\1", english_text)
             eng_to_asl_translator = NlpSpacyBaseTranslator(sentence=clean_english)
             generated_gloss = eng_to_asl_translator.translate_to_gloss()
             processed_gloss = []
             words = generated_gloss.split()
             for word in words:
                 word_upper = word.upper()
+                if quoted_words and word_upper in [w.upper() for w in quoted_words]:
                     spelled_word = spell_out_word(word)
                     processed_gloss.extend(['FINGERSPELL-START'] + spelled_word.split() + ['FINGERSPELL-END'])
                 else:
+                    processed_gloss.append(word.lower())
             gloss_sentence_before_synonym = " ".join(processed_gloss)
             final_gloss = []
             i = 0
             while i < len(processed_gloss):
         return f"Error downloading video: {str(e)}", 500
 if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860, debug=True)