Spaces:

vilarin
/

Translation-Agent-WebUI

Running

App Files Files Community

vilarin commited on Jun 29, 2024

Commit

783d533

verified ·

1 Parent(s): b5a3249

Upload 12 files

Browse files

Files changed (11) hide show

app/__init__.py +0 -0
app/__pycache__/__init__.cpython-310.pyc +0 -0
app/webui/README.md +79 -5
app/webui/__pycache__/__init__.cpython-310.pyc +0 -0
app/webui/__pycache__/app.cpython-310.pyc +0 -0
app/webui/__pycache__/patch.cpython-310.pyc +0 -0
app/webui/__pycache__/process.cpython-310.pyc +0 -0
app/webui/app.py +9 -10
app/webui/patch.py +19 -16
app/webui/process.py +12 -20
app/webui/requirements.txt +12 -0

app/__init__.py ADDED Viewed

File without changes

app/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (136 Bytes). View file

app/webui/README.md CHANGED Viewed

@@ -1,7 +1,81 @@
-# Tranlsation-Agent-Webui
-## Usage
-    git clone https://github.com/andrewyng/translation-agent.git
-    cd translation-agent\app\webui
-    pip install -

+## Translation Agent WebUI
+This repository contains a Gradio web UI for a translation agent that utilizes various language models for translation.
+**Features:**
+- **Tokenized Text:**  Displays translated text with tokenization, highlighting differences between original and translated words.
+- **Document Upload:** Supports uploading various document formats (PDF, TXT, DOC, etc.) for translation.
+- **Multiple API Support:**  Integrates with popular language models like:
+    - Groq
+    - OpenAI
+    - Cohere
+    - Ollama
+    - Together AI
+    - Hugging Face Inference API
+    ...
+Llama Index supported, easily extendable
+**Getting Started**
+1. **Install Dependencies:**
+    **Linux(Using Python Venv)**
+    ```bash
+        git clone https://github.com/andrewyng/translation-agent.git
+        cd translation-agent
+        python -m venv web_ui
+        source web_ui/bin/activate
+        pip install -r app/webui/requirements.txt
+    ```
+    **Windows**
+    ```bash
+        git clone https://github.com/andrewyng/translation-agent.git
+        cd translation-agent
+        python -m venv web_ui
+        .\web_ui\Scripts\activate
+        pip install -r app/webui/requirements.txt
+    ```
+2. **Set API Keys:**
+   - Rename `.env.sample` to `.env`, you can add your API keys for each service:
+     ```
+     OPENAI_API_KEY="sk-xxxxx" # Keep this field
+     GROQ_API_KEY="xxxxx"
+     COHERE_API_KEY="xxxxx"
+     TOGETHER_API_KEY="xxxxx"
+     HF_TOKEN="xxxxx"
+     ```
+    - Then you can also set the API_KEY in webui.
+3. **Run the Web UI:**
+    ```bash
+    python -m app.webui.app
+    ```
+4. **Access the Web UI:**
+   Open your web browser and navigate to `http://127.0.0.1:7860/`.
+**Usage:**
+1. Select your desired translation API from the Endpoint dropdown menu.
+2. If using Hugging Face API, enter your `HF_TOKEN` in the `api_key` textbox.
+3. Input the source text or upload your document file.
+4. Submit and get translation, the UI will display the translated text with tokenization and highlight differences.
+**Customization:**
+- **Add New LLMs:**  Modify the `patch.py` file to integrate additional LLMs.
+**Contributing:**
+Contributions are welcome! Feel free to open issues or submit pull requests.
+**License:**
+This project is licensed under the MIT License.

app/webui/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (142 Bytes). View file

app/webui/__pycache__/app.cpython-310.pyc ADDED Viewed

Binary file (4.22 kB). View file

app/webui/__pycache__/patch.cpython-310.pyc ADDED Viewed

Binary file (3.37 kB). View file

app/webui/__pycache__/process.cpython-310.pyc ADDED Viewed

Binary file (2.33 kB). View file

app/webui/app.py CHANGED Viewed

@@ -7,7 +7,7 @@ sys.path.insert(0, project_root)
 import re
 import gradio as gr
-from app.webui.process import model_load, lang_detector, diff_texts, translator
 from llama_index.core import SimpleDirectoryReader
 def huanik(
@@ -63,8 +63,8 @@ def update_model(endpoint):
     return gr.update(value=endpoint_model_map[endpoint])
 def read_doc(file):
-    docs = SimpleDirectoryReader(input_files=file).load_data()
-    return docs
 TITLE = """
 <h1><a href="https://github.com/andrewyng/translation-agent">Translation-Agent</a> webUI</h1>
@@ -82,7 +82,7 @@ CSS = """
     }
 """
-with gr.Blocks(theme="soft", css=CSS) as demo:
     gr.Markdown(TITLE)
     with gr.Row():
         with gr.Column(scale=1):
@@ -94,7 +94,7 @@ with gr.Blocks(theme="soft", css=CSS) as demo:
             model = gr.Textbox(label="Model", value="gpt-4o", )
             api_key = gr.Textbox(label="API_KEY", type="password", )
             source_lang = gr.Textbox(
-                label="Source Lang(Auto-Detect)",
                 value="English",
             )
             target_lang = gr.Textbox(
@@ -130,14 +130,14 @@ with gr.Blocks(theme="soft", css=CSS) as demo:
                 value="How we live is so different from how we ought to live that he who studies "+\
                 "what ought to be done rather than what is done will learn the way to his downfall "+\
                 "rather than to his preservation.",
-                lines=5,
             )
             with gr.Tab("Final"):
-                output_final = gr.Textbox(label="FInal Translation", lines=3, show_copy_button=True)
             with gr.Tab("Initial"):
-                output_init = gr.Textbox(label="Init Translation", lines=3, show_copy_button=True)
             with gr.Tab("Reflection"):
-                output_reflect = gr.Textbox(label="Reflection", lines=3, show_copy_button=True)
             with gr.Tab("Diff"):
                 output_diff = gr.HighlightedText(visible = False)
     with gr.Row():
@@ -146,7 +146,6 @@ with gr.Blocks(theme="soft", css=CSS) as demo:
         clear = gr.ClearButton([source_text, output_init, output_reflect, output_final])
     endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
-    source_text.change(lang_detector, source_text, source_lang)
     submit.click(fn=huanik, inputs=[endpoint, model, api_key, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output], outputs=[output_init, output_reflect, output_final, output_diff])
     upload.upload(fn=read_doc, inputs = upload, outputs = source_text)

 import re
 import gradio as gr
+from app.webui.process import model_load, diff_texts, translator
 from llama_index.core import SimpleDirectoryReader
 def huanik(
     return gr.update(value=endpoint_model_map[endpoint])
 def read_doc(file):
+    docs = SimpleDirectoryReader(input_files=[file]).load_data()
+    return docs[0].text
 TITLE = """
 <h1><a href="https://github.com/andrewyng/translation-agent">Translation-Agent</a> webUI</h1>
     }
 """
+with gr.Blocks(theme="soft", css=CSS, fill_height=True) as demo:
     gr.Markdown(TITLE)
     with gr.Row():
         with gr.Column(scale=1):
             model = gr.Textbox(label="Model", value="gpt-4o", )
             api_key = gr.Textbox(label="API_KEY", type="password", )
             source_lang = gr.Textbox(
+                label="Source Lang",
                 value="English",
             )
             target_lang = gr.Textbox(
                 value="How we live is so different from how we ought to live that he who studies "+\
                 "what ought to be done rather than what is done will learn the way to his downfall "+\
                 "rather than to his preservation.",
+                lines=10,
             )
             with gr.Tab("Final"):
+                output_final = gr.Textbox(label="FInal Translation", lines=10, show_copy_button=True)
             with gr.Tab("Initial"):
+                output_init = gr.Textbox(label="Init Translation", lines=10, show_copy_button=True)
             with gr.Tab("Reflection"):
+                output_reflect = gr.Textbox(label="Reflection", lines=10, show_copy_button=True)
             with gr.Tab("Diff"):
                 output_diff = gr.HighlightedText(visible = False)
     with gr.Row():
         clear = gr.ClearButton([source_text, output_init, output_reflect, output_final])
     endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
     submit.click(fn=huanik, inputs=[endpoint, model, api_key, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output], outputs=[output_init, output_reflect, output_final, output_diff])
     upload.upload(fn=read_doc, inputs = upload, outputs = source_text)

app/webui/patch.py CHANGED Viewed

@@ -1,9 +1,7 @@
 # a monkey patch to use llama-index completion
 import os
-from typing import Union, Callable
-from functools import wraps
-from src.translation_agent.utils import *
 from llama_index.llms.groq import Groq
 from llama_index.llms.cohere import Cohere
@@ -28,12 +26,12 @@ def model_load(
     if endpoint == "Groq":
         llm = Groq(
             model=model,
-            api_key=api_key,
         )
     elif endpoint == "Cohere":
         llm = Cohere(
             model=model,
-            api_key=api_key,
         )
     elif endpoint == "OpenAI":
         llm = OpenAI(
@@ -43,16 +41,16 @@ def model_load(
     elif endpoint == "TogetherAI":
         llm = TogetherLLM(
             model=model,
-            api_key=api_key,
         )
-    elif endpoint == "ollama":
         llm = Ollama(
             model=model,
             request_timeout=120.0)
     elif endpoint == "Huggingface":
         llm = HuggingFaceInferenceAPI(
             model_name=model,
-            token=api_key,
             task="text-generation",
         )
     Settings.llm = llm
@@ -63,10 +61,7 @@ def model_load(
     Settings.num_output = num_output
-def completion_wrapper(func: Callable) -> Callable:
-    @wraps(func)
-    def wrapper(
         prompt: str,
         system_message: str = "You are a helpful assistant.",
         temperature: float = 0.3,
@@ -126,7 +121,15 @@ def completion_wrapper(func: Callable) -> Callable:
                 )
                 return response.message.content
-    return wrapper
-openai_completion = get_completion
-get_completion = completion_wrapper(openai_completion)

 # a monkey patch to use llama-index completion
 import os
+from typing import Union
+import src.translation_agent.utils as utils
 from llama_index.llms.groq import Groq
 from llama_index.llms.cohere import Cohere
     if endpoint == "Groq":
         llm = Groq(
             model=model,
+            api_key=api_key if api_key else os.getenv("GROQ_API_KEY"),
         )
     elif endpoint == "Cohere":
         llm = Cohere(
             model=model,
+            api_key=api_key if api_key else os.getenv("COHERE_API_KEY"),
         )
     elif endpoint == "OpenAI":
         llm = OpenAI(
     elif endpoint == "TogetherAI":
         llm = TogetherLLM(
             model=model,
+            api_key=api_key if api_key else os.getenv("TOGETHER_API_KEY"),
         )
+    elif endpoint == "Ollama":
         llm = Ollama(
             model=model,
             request_timeout=120.0)
     elif endpoint == "Huggingface":
         llm = HuggingFaceInferenceAPI(
             model_name=model,
+            token=api_key if api_key else os.getenv("HF_TOKEN"),
             task="text-generation",
         )
     Settings.llm = llm
     Settings.num_output = num_output
+def get_completion(
         prompt: str,
         system_message: str = "You are a helpful assistant.",
         temperature: float = 0.3,
                 )
                 return response.message.content
+utils.get_completion = get_completion
+one_chunk_initial_translation = utils.one_chunk_initial_translation
+one_chunk_reflect_on_translation = utils.one_chunk_reflect_on_translation
+one_chunk_improve_translation = utils.one_chunk_improve_translation
+one_chunk_translate_text = utils.one_chunk_translate_text
+num_tokens_in_string = utils.num_tokens_in_string
+multichunk_initial_translation = utils.multichunk_initial_translation
+multichunk_reflect_on_translation = utils.multichunk_reflect_on_translation
+multichunk_improve_translation = utils.multichunk_improve_translation
+multichunk_translation = utils.multichunk_translation
+calculate_chunk_size =utils.calculate_chunk_size

app/webui/process.py CHANGED Viewed

@@ -1,34 +1,26 @@
-from polyglot.detect import Detector
-from polyglot.text import Text
 from difflib import Differ
 from icecream import ic
-from app.webui.patch import *
 from llama_index.core.node_parser import SentenceSplitter
-def lang_detector(text):
-    min_chars = 5
-    if len(text) < min_chars:
-        return "Input text too short"
-    try:
-        detector = Detector(text).language
-        lang_info = str(detector)
-        code = re.search(r"name: (\w+)", lang_info).group(1)
-        return code
-    except Exception as e:
-        return f"ERROR：{str(e)}"
-def tokenize(text):
-    # Use polyglot to tokenize the text
-    polyglot_text = Text(text)
-    words = polyglot_text.words
     # Check if the text contains spaces
     if ' ' in text:
         # Create a list of words and spaces
         tokens = []
         for word in words:
             tokens.append(word)
-            tokens.append(' ')  # Add space after each word
         return tokens[:-1]  # Remove the last space
     else:
         return words
@@ -62,7 +54,7 @@ def translator(
         target_lang,
         source_text,
         country,
-        max_tokens=MAX_TOKENS_PER_CHUNK
 ):
     """Translate the source_text from source_lang to target_lang."""
     num_tokens_in_text = num_tokens_in_string(source_text)

+import re
+import nltk
 from difflib import Differ
 from icecream import ic
+from app.webui.patch import model_load,num_tokens_in_string,one_chunk_initial_translation, one_chunk_reflect_on_translation, one_chunk_improve_translation
+from app.webui.patch import calculate_chunk_size, multichunk_initial_translation, multichunk_reflect_on_translation, multichunk_improve_translation
 from llama_index.core.node_parser import SentenceSplitter
+nltk.download('punkt', quiet=True)
+def tokenize(text):
+    # Use nltk to tokenize the text
+    words = nltk.word_tokenize(text)
     # Check if the text contains spaces
     if ' ' in text:
         # Create a list of words and spaces
         tokens = []
         for word in words:
             tokens.append(word)
+            if not word.startswith("'") and not word.endswith("'"):  # Avoid adding space after punctuation
+                tokens.append(' ')  # Add space after each word
         return tokens[:-1]  # Remove the last space
     else:
         return words
         target_lang,
         source_text,
         country,
+        max_tokens=1000,
 ):
     """Translate the source_text from source_lang to target_lang."""
     num_tokens_in_text = num_tokens_in_string(source_text)

app/webui/requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+llama-index
+llama-index-llms-groq
+llama-index-llms-openai
+llama-index-llms-cohere
+llama-index-llms-together
+llama-index-llms-ollama
+llama-index-llms-huggingface-api
+tiktoken
+icecream
+nltk
+langchain-text-splitters
+gradio