import gradio as gr import re from difflib import Differ from src.translation_agent.utils import * LANGUAGES = { 'English': 'English', 'Español': 'Spanish', 'Français': 'French', 'Deutsch': 'German', 'Italiano': 'Italian', 'Português': 'Portuguese', 'Русский': 'Russian', '中文': 'Chinese', '日本語': 'Japanese', '한국어': 'Korean', 'العربية': 'Arabic', 'हिन्दी': 'Hindi', } def diff_texts(text1, text2, lang): d = Differ() ic(lang) if lang == '中文': return [ (token[2:], token[0] if token[0] != " " else None) for token in d.compare(text1, text2) if token[0] in ["+", " "] ] else: words1 = re.findall(r'\S+|\s+', text1) words2 = re.findall(r'\S+|\s+', text2) return [ (token[2:], token[0] if token[0] != " " else None) for token in d.compare(words1, words2) if token[0] in ["+", " "] ] def translate_text(source_lang, target_lang, source_text, country, max_tokens=MAX_TOKENS_PER_CHUNK): num_tokens_in_text = num_tokens_in_string(source_text) ic(num_tokens_in_text) if num_tokens_in_text < max_tokens: ic("Translating text as single chunk") #Note: use yield from B() if put yield in function B() translation_1 = one_chunk_initial_translation( source_lang, target_lang, source_text ) yield translation_1, None, None reflection = one_chunk_reflect_on_translation( source_lang, target_lang, source_text, translation_1, country ) yield translation_1, reflection, None translation_2 = one_chunk_improve_translation( source_lang, target_lang, source_text, translation_1, reflection ) translation_diff = diff_texts(translation_1, translation_2, target_lang) yield translation_1, reflection, translation_diff else: ic("Translating text as multiple chunks") token_size = calculate_chunk_size( token_count=num_tokens_in_text, token_limit=max_tokens ) ic(token_size) text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( model_name = "gpt-4", chunk_size=token_size, chunk_overlap=0, ) source_text_chunks = text_splitter.split_text(source_text) translation_1_chunks = multichunk_initial_translation( source_lang, target_lang, source_text_chunks ) ic(translation_1_chunks) translation_1 = "".join(translation_1_chunks) yield translation_1, None, None reflection_chunks = multichunk_reflect_on_translation( source_lang, target_lang, source_text_chunks, translation_1_chunks, country, ) ic(reflection_chunks) reflection = "".join(reflection_chunks) yield translation_1, reflection, None translation_2_chunks = multichunk_improve_translation( source_lang, target_lang, source_text_chunks, translation_1_chunks, reflection_chunks, ) ic(translation_2_chunks) translation_2 = "".join(translation_2_chunks) translation_diff = diff_texts(translation_1, translation_2, target_lang) yield translation_1, reflection, translation_diff def update_ui(translation_1, reflection, translation_diff): return gr.update(value=translation_1), gr.update(value=reflection), gr.update(value=translation_diff) with gr.Blocks() as demo: gr.Markdown("# Andrew Ng's Translation Agent ") with gr.Row(): source_lang = gr.Dropdown(choices=list(LANGUAGES.keys()), value='English', label="Source Language") target_lang = gr.Dropdown(choices=list(LANGUAGES.keys()), value='中文', label="Target Language") country = gr.Textbox(label="Country (for target language)") source_text = gr.Textbox(label="Source Text", lines=5, show_copy_button=True) btn = gr.Button("Translate") with gr.Row(): translation_1 = gr.Textbox(label="Initial Translation", lines=3) reflection = gr.Textbox(label="Reflection", lines=3) translation_diff = gr.HighlightedText (label="Final Translation", combine_adjacent=True, show_legend=True, color_map={"+": "red"}) #translation = gr.Textbox(label="Final Translation", lines=5, show_copy_button=True) btn.click(translate_text, inputs=[source_lang, target_lang, source_text, country], outputs=[translation_1, reflection, translation_diff], queue=True) btn.click(update_ui, inputs=[translation_1, reflection, translation_diff], outputs=[translation_1, reflection, translation_diff], queue=True) demo.launch()