vilarin commited on
Commit
783d533
1 Parent(s): b5a3249

Upload 12 files

Browse files
app/__init__.py ADDED
File without changes
app/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (136 Bytes). View file
 
app/webui/README.md CHANGED
@@ -1,7 +1,81 @@
1
- # Tranlsation-Agent-Webui
2
 
3
- ## Usage
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- git clone https://github.com/andrewyng/translation-agent.git
6
- cd translation-agent\app\webui
7
- pip install -
 
 
1
 
2
+ ## Translation Agent WebUI
3
+
4
+ This repository contains a Gradio web UI for a translation agent that utilizes various language models for translation.
5
+
6
+ **Features:**
7
+
8
+ - **Tokenized Text:** Displays translated text with tokenization, highlighting differences between original and translated words.
9
+ - **Document Upload:** Supports uploading various document formats (PDF, TXT, DOC, etc.) for translation.
10
+ - **Multiple API Support:** Integrates with popular language models like:
11
+ - Groq
12
+ - OpenAI
13
+ - Cohere
14
+ - Ollama
15
+ - Together AI
16
+ - Hugging Face Inference API
17
+ ...
18
+ Llama Index supported, easily extendable
19
+
20
+
21
+ **Getting Started**
22
+
23
+ 1. **Install Dependencies:**
24
+ **Linux(Using Python Venv)**
25
+ ```bash
26
+ git clone https://github.com/andrewyng/translation-agent.git
27
+ cd translation-agent
28
+ python -m venv web_ui
29
+ source web_ui/bin/activate
30
+ pip install -r app/webui/requirements.txt
31
+
32
+ ```
33
+ **Windows**
34
+ ```bash
35
+ git clone https://github.com/andrewyng/translation-agent.git
36
+ cd translation-agent
37
+ python -m venv web_ui
38
+ .\web_ui\Scripts\activate
39
+ pip install -r app/webui/requirements.txt
40
+
41
+ ```
42
+
43
+ 2. **Set API Keys:**
44
+ - Rename `.env.sample` to `.env`, you can add your API keys for each service:
45
+
46
+ ```
47
+ OPENAI_API_KEY="sk-xxxxx" # Keep this field
48
+ GROQ_API_KEY="xxxxx"
49
+ COHERE_API_KEY="xxxxx"
50
+ TOGETHER_API_KEY="xxxxx"
51
+ HF_TOKEN="xxxxx"
52
+ ```
53
+ - Then you can also set the API_KEY in webui.
54
+
55
+ 3. **Run the Web UI:**
56
+ ```bash
57
+ python -m app.webui.app
58
+ ```
59
+
60
+ 4. **Access the Web UI:**
61
+ Open your web browser and navigate to `http://127.0.0.1:7860/`.
62
+
63
+ **Usage:**
64
+
65
+ 1. Select your desired translation API from the Endpoint dropdown menu.
66
+ 2. If using Hugging Face API, enter your `HF_TOKEN` in the `api_key` textbox.
67
+ 3. Input the source text or upload your document file.
68
+ 4. Submit and get translation, the UI will display the translated text with tokenization and highlight differences.
69
+
70
+ **Customization:**
71
+
72
+ - **Add New LLMs:** Modify the `patch.py` file to integrate additional LLMs.
73
+
74
+ **Contributing:**
75
+
76
+ Contributions are welcome! Feel free to open issues or submit pull requests.
77
+
78
+ **License:**
79
+
80
+ This project is licensed under the MIT License.
81
 
 
 
 
app/webui/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (142 Bytes). View file
 
app/webui/__pycache__/app.cpython-310.pyc ADDED
Binary file (4.22 kB). View file
 
app/webui/__pycache__/patch.cpython-310.pyc ADDED
Binary file (3.37 kB). View file
 
app/webui/__pycache__/process.cpython-310.pyc ADDED
Binary file (2.33 kB). View file
 
app/webui/app.py CHANGED
@@ -7,7 +7,7 @@ sys.path.insert(0, project_root)
7
 
8
  import re
9
  import gradio as gr
10
- from app.webui.process import model_load, lang_detector, diff_texts, translator
11
  from llama_index.core import SimpleDirectoryReader
12
 
13
  def huanik(
@@ -63,8 +63,8 @@ def update_model(endpoint):
63
  return gr.update(value=endpoint_model_map[endpoint])
64
 
65
  def read_doc(file):
66
- docs = SimpleDirectoryReader(input_files=file).load_data()
67
- return docs
68
 
69
  TITLE = """
70
  <h1><a href="https://github.com/andrewyng/translation-agent">Translation-Agent</a> webUI</h1>
@@ -82,7 +82,7 @@ CSS = """
82
  }
83
  """
84
 
85
- with gr.Blocks(theme="soft", css=CSS) as demo:
86
  gr.Markdown(TITLE)
87
  with gr.Row():
88
  with gr.Column(scale=1):
@@ -94,7 +94,7 @@ with gr.Blocks(theme="soft", css=CSS) as demo:
94
  model = gr.Textbox(label="Model", value="gpt-4o", )
95
  api_key = gr.Textbox(label="API_KEY", type="password", )
96
  source_lang = gr.Textbox(
97
- label="Source Lang(Auto-Detect)",
98
  value="English",
99
  )
100
  target_lang = gr.Textbox(
@@ -130,14 +130,14 @@ with gr.Blocks(theme="soft", css=CSS) as demo:
130
  value="How we live is so different from how we ought to live that he who studies "+\
131
  "what ought to be done rather than what is done will learn the way to his downfall "+\
132
  "rather than to his preservation.",
133
- lines=5,
134
  )
135
  with gr.Tab("Final"):
136
- output_final = gr.Textbox(label="FInal Translation", lines=3, show_copy_button=True)
137
  with gr.Tab("Initial"):
138
- output_init = gr.Textbox(label="Init Translation", lines=3, show_copy_button=True)
139
  with gr.Tab("Reflection"):
140
- output_reflect = gr.Textbox(label="Reflection", lines=3, show_copy_button=True)
141
  with gr.Tab("Diff"):
142
  output_diff = gr.HighlightedText(visible = False)
143
  with gr.Row():
@@ -146,7 +146,6 @@ with gr.Blocks(theme="soft", css=CSS) as demo:
146
  clear = gr.ClearButton([source_text, output_init, output_reflect, output_final])
147
 
148
  endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
149
- source_text.change(lang_detector, source_text, source_lang)
150
  submit.click(fn=huanik, inputs=[endpoint, model, api_key, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output], outputs=[output_init, output_reflect, output_final, output_diff])
151
  upload.upload(fn=read_doc, inputs = upload, outputs = source_text)
152
 
 
7
 
8
  import re
9
  import gradio as gr
10
+ from app.webui.process import model_load, diff_texts, translator
11
  from llama_index.core import SimpleDirectoryReader
12
 
13
  def huanik(
 
63
  return gr.update(value=endpoint_model_map[endpoint])
64
 
65
  def read_doc(file):
66
+ docs = SimpleDirectoryReader(input_files=[file]).load_data()
67
+ return docs[0].text
68
 
69
  TITLE = """
70
  <h1><a href="https://github.com/andrewyng/translation-agent">Translation-Agent</a> webUI</h1>
 
82
  }
83
  """
84
 
85
+ with gr.Blocks(theme="soft", css=CSS, fill_height=True) as demo:
86
  gr.Markdown(TITLE)
87
  with gr.Row():
88
  with gr.Column(scale=1):
 
94
  model = gr.Textbox(label="Model", value="gpt-4o", )
95
  api_key = gr.Textbox(label="API_KEY", type="password", )
96
  source_lang = gr.Textbox(
97
+ label="Source Lang",
98
  value="English",
99
  )
100
  target_lang = gr.Textbox(
 
130
  value="How we live is so different from how we ought to live that he who studies "+\
131
  "what ought to be done rather than what is done will learn the way to his downfall "+\
132
  "rather than to his preservation.",
133
+ lines=10,
134
  )
135
  with gr.Tab("Final"):
136
+ output_final = gr.Textbox(label="FInal Translation", lines=10, show_copy_button=True)
137
  with gr.Tab("Initial"):
138
+ output_init = gr.Textbox(label="Init Translation", lines=10, show_copy_button=True)
139
  with gr.Tab("Reflection"):
140
+ output_reflect = gr.Textbox(label="Reflection", lines=10, show_copy_button=True)
141
  with gr.Tab("Diff"):
142
  output_diff = gr.HighlightedText(visible = False)
143
  with gr.Row():
 
146
  clear = gr.ClearButton([source_text, output_init, output_reflect, output_final])
147
 
148
  endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
 
149
  submit.click(fn=huanik, inputs=[endpoint, model, api_key, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output], outputs=[output_init, output_reflect, output_final, output_diff])
150
  upload.upload(fn=read_doc, inputs = upload, outputs = source_text)
151
 
app/webui/patch.py CHANGED
@@ -1,9 +1,7 @@
1
  # a monkey patch to use llama-index completion
2
  import os
3
- from typing import Union, Callable
4
- from functools import wraps
5
- from src.translation_agent.utils import *
6
-
7
 
8
  from llama_index.llms.groq import Groq
9
  from llama_index.llms.cohere import Cohere
@@ -28,12 +26,12 @@ def model_load(
28
  if endpoint == "Groq":
29
  llm = Groq(
30
  model=model,
31
- api_key=api_key,
32
  )
33
  elif endpoint == "Cohere":
34
  llm = Cohere(
35
  model=model,
36
- api_key=api_key,
37
  )
38
  elif endpoint == "OpenAI":
39
  llm = OpenAI(
@@ -43,16 +41,16 @@ def model_load(
43
  elif endpoint == "TogetherAI":
44
  llm = TogetherLLM(
45
  model=model,
46
- api_key=api_key,
47
  )
48
- elif endpoint == "ollama":
49
  llm = Ollama(
50
  model=model,
51
  request_timeout=120.0)
52
  elif endpoint == "Huggingface":
53
  llm = HuggingFaceInferenceAPI(
54
  model_name=model,
55
- token=api_key,
56
  task="text-generation",
57
  )
58
  Settings.llm = llm
@@ -63,10 +61,7 @@ def model_load(
63
  Settings.num_output = num_output
64
 
65
 
66
-
67
- def completion_wrapper(func: Callable) -> Callable:
68
- @wraps(func)
69
- def wrapper(
70
  prompt: str,
71
  system_message: str = "You are a helpful assistant.",
72
  temperature: float = 0.3,
@@ -126,7 +121,15 @@ def completion_wrapper(func: Callable) -> Callable:
126
  )
127
  return response.message.content
128
 
129
- return wrapper
130
 
131
- openai_completion = get_completion
132
- get_completion = completion_wrapper(openai_completion)
 
 
 
 
 
 
 
 
 
1
  # a monkey patch to use llama-index completion
2
  import os
3
+ from typing import Union
4
+ import src.translation_agent.utils as utils
 
 
5
 
6
  from llama_index.llms.groq import Groq
7
  from llama_index.llms.cohere import Cohere
 
26
  if endpoint == "Groq":
27
  llm = Groq(
28
  model=model,
29
+ api_key=api_key if api_key else os.getenv("GROQ_API_KEY"),
30
  )
31
  elif endpoint == "Cohere":
32
  llm = Cohere(
33
  model=model,
34
+ api_key=api_key if api_key else os.getenv("COHERE_API_KEY"),
35
  )
36
  elif endpoint == "OpenAI":
37
  llm = OpenAI(
 
41
  elif endpoint == "TogetherAI":
42
  llm = TogetherLLM(
43
  model=model,
44
+ api_key=api_key if api_key else os.getenv("TOGETHER_API_KEY"),
45
  )
46
+ elif endpoint == "Ollama":
47
  llm = Ollama(
48
  model=model,
49
  request_timeout=120.0)
50
  elif endpoint == "Huggingface":
51
  llm = HuggingFaceInferenceAPI(
52
  model_name=model,
53
+ token=api_key if api_key else os.getenv("HF_TOKEN"),
54
  task="text-generation",
55
  )
56
  Settings.llm = llm
 
61
  Settings.num_output = num_output
62
 
63
 
64
+ def get_completion(
 
 
 
65
  prompt: str,
66
  system_message: str = "You are a helpful assistant.",
67
  temperature: float = 0.3,
 
121
  )
122
  return response.message.content
123
 
124
+ utils.get_completion = get_completion
125
 
126
+ one_chunk_initial_translation = utils.one_chunk_initial_translation
127
+ one_chunk_reflect_on_translation = utils.one_chunk_reflect_on_translation
128
+ one_chunk_improve_translation = utils.one_chunk_improve_translation
129
+ one_chunk_translate_text = utils.one_chunk_translate_text
130
+ num_tokens_in_string = utils.num_tokens_in_string
131
+ multichunk_initial_translation = utils.multichunk_initial_translation
132
+ multichunk_reflect_on_translation = utils.multichunk_reflect_on_translation
133
+ multichunk_improve_translation = utils.multichunk_improve_translation
134
+ multichunk_translation = utils.multichunk_translation
135
+ calculate_chunk_size =utils.calculate_chunk_size
app/webui/process.py CHANGED
@@ -1,34 +1,26 @@
1
- from polyglot.detect import Detector
2
- from polyglot.text import Text
3
  from difflib import Differ
4
  from icecream import ic
5
- from app.webui.patch import *
 
 
6
  from llama_index.core.node_parser import SentenceSplitter
7
 
8
- def lang_detector(text):
9
- min_chars = 5
10
- if len(text) < min_chars:
11
- return "Input text too short"
12
- try:
13
- detector = Detector(text).language
14
- lang_info = str(detector)
15
- code = re.search(r"name: (\w+)", lang_info).group(1)
16
- return code
17
- except Exception as e:
18
- return f"ERROR:{str(e)}"
19
 
20
- def tokenize(text):
21
- # Use polyglot to tokenize the text
22
- polyglot_text = Text(text)
23
- words = polyglot_text.words
24
 
 
 
 
25
  # Check if the text contains spaces
26
  if ' ' in text:
27
  # Create a list of words and spaces
28
  tokens = []
29
  for word in words:
30
  tokens.append(word)
31
- tokens.append(' ') # Add space after each word
 
32
  return tokens[:-1] # Remove the last space
33
  else:
34
  return words
@@ -62,7 +54,7 @@ def translator(
62
  target_lang,
63
  source_text,
64
  country,
65
- max_tokens=MAX_TOKENS_PER_CHUNK
66
  ):
67
  """Translate the source_text from source_lang to target_lang."""
68
  num_tokens_in_text = num_tokens_in_string(source_text)
 
1
+ import re
2
+ import nltk
3
  from difflib import Differ
4
  from icecream import ic
5
+ from app.webui.patch import model_load,num_tokens_in_string,one_chunk_initial_translation, one_chunk_reflect_on_translation, one_chunk_improve_translation
6
+ from app.webui.patch import calculate_chunk_size, multichunk_initial_translation, multichunk_reflect_on_translation, multichunk_improve_translation
7
+
8
  from llama_index.core.node_parser import SentenceSplitter
9
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ nltk.download('punkt', quiet=True)
 
 
 
12
 
13
+ def tokenize(text):
14
+ # Use nltk to tokenize the text
15
+ words = nltk.word_tokenize(text)
16
  # Check if the text contains spaces
17
  if ' ' in text:
18
  # Create a list of words and spaces
19
  tokens = []
20
  for word in words:
21
  tokens.append(word)
22
+ if not word.startswith("'") and not word.endswith("'"): # Avoid adding space after punctuation
23
+ tokens.append(' ') # Add space after each word
24
  return tokens[:-1] # Remove the last space
25
  else:
26
  return words
 
54
  target_lang,
55
  source_text,
56
  country,
57
+ max_tokens=1000,
58
  ):
59
  """Translate the source_text from source_lang to target_lang."""
60
  num_tokens_in_text = num_tokens_in_string(source_text)
app/webui/requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ llama-index
2
+ llama-index-llms-groq
3
+ llama-index-llms-openai
4
+ llama-index-llms-cohere
5
+ llama-index-llms-together
6
+ llama-index-llms-ollama
7
+ llama-index-llms-huggingface-api
8
+ tiktoken
9
+ icecream
10
+ nltk
11
+ langchain-text-splitters
12
+ gradio