tonic commited on
Commit
5701b30
1 Parent(s): e115e9b

improve the interface , add parsing for longest phrases , language code quick fix for surya, adding translation with aya

Browse files
Files changed (2) hide show
  1. app.py +93 -26
  2. languages.json +0 -0
app.py CHANGED
@@ -5,7 +5,7 @@ from surya.ocr import run_ocr
5
  from surya.model.detection.segformer import load_model as load_det_model, load_processor as load_det_processor
6
  from surya.model.recognition.model import load_model as load_rec_model
7
  from surya.model.recognition.processor import load_processor as load_rec_processor
8
- # from lang_list import TEXT_SOURCE_LANGUAGE_NAMES
9
  from gradio_client import Client
10
  from dotenv import load_dotenv
11
  import requests
@@ -27,7 +27,7 @@ choices = df["name"].to_list()
27
  inputlanguage = ""
28
  producetext = "\n\nProduce a complete expositional blog post in {target_language} based on the above :"
29
  formatinputstring = "\n\nthe above text is a learning aid. you must use rich text format to rewrite the above and add 1 . a red color tags for nouns 2. a blue color tag for verbs 3. a green color tag for adjectives and adverbs:"
30
-
31
  # Regular expression patterns for each color
32
  patterns = {
33
  "red": r'<span style="color: red;">(.*?)</span>',
@@ -41,6 +41,35 @@ matches = {
41
  "blue": [],
42
  "green": [],
43
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  class TaggedPhraseExtractor:
45
  def __init__(self, text=''):
46
  self.text = text
@@ -55,24 +84,32 @@ class TaggedPhraseExtractor:
55
  self.patterns[color] = pattern
56
 
57
  def extract_phrases(self):
58
- """Extract phrases for all colors and patterns added."""
59
- matches = {color: re.findall(pattern, self.text) for color, pattern in self.patterns.items()}
60
- return matches
 
 
 
 
 
 
 
 
 
61
 
62
  def print_phrases(self):
63
- """Extract phrases and print them."""
64
  matches = self.extract_phrases()
65
- for color, phrases in matches.items():
66
  print(f"Phrases with color {color}:")
67
- for phrase in phrases:
 
 
 
68
  print(f"- {phrase}")
69
- print()
70
 
71
- co = cohere.Client(COHERE_API_KEY)
72
- audio_client = Client(SEAMLESSM4T)
73
- client = Client(SEAMLESSM4T)
74
-
75
- def process_audio_to_text(audio_path, inputlanguage="English"):
76
  """
77
  Convert audio input to text using the Gradio client.
78
  """
@@ -80,7 +117,7 @@ def process_audio_to_text(audio_path, inputlanguage="English"):
80
  result = audio_client.predict(
81
  audio_path,
82
  inputlanguage,
83
- inputlanguage,
84
  api_name="/s2tt"
85
  )
86
  print("Audio Result: ", result)
@@ -100,8 +137,8 @@ def process_text_to_audio(text, translatefrom="English", translateto="English"):
100
  return result[0]
101
 
102
  class OCRProcessor:
103
- def __init__(self, langs=["en"]): #add input language code
104
- self.langs = langs
105
  self.det_processor, self.det_model = load_det_processor(), load_det_model()
106
  self.rec_model, self.rec_processor = load_rec_model(), load_rec_processor()
107
 
@@ -109,18 +146,19 @@ class OCRProcessor:
109
  """
110
  Process a PIL image and return the OCR text.
111
  """
112
- predictions = run_ocr([image], [self.langs], self.det_model, self.det_processor, self.rec_model, self.rec_processor)
113
- return predictions[0] # Assuming the first item in predictions contains the desired text
114
 
115
  def process_pdf(self, pdf_path):
116
  """
117
  Process a PDF file and return the OCR text.
118
  """
119
- predictions = run_ocr([pdf_path], [self.langs], self.det_model, self.det_processor, self.rec_model, self.rec_processor)
120
- return predictions[0] # Assuming the first item in predictions contains the desired text
121
 
122
  def process_input(image=None, file=None, audio=None, text="", translateto = "English", translatefrom = "English" ):
123
- ocr_processor = OCRProcessor()
 
124
  final_text = text
125
  if image is not None:
126
  ocr_prediction = ocr_processor.process_image(image)
@@ -171,7 +209,20 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
171
 
172
  audio_output = process_text_to_audio(processed_text, translateto, translateto)
173
 
174
- return processed_text, audio_output
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
  def main():
177
  with gr.Blocks() as demo:
@@ -193,12 +244,28 @@ def main():
193
  process_button = gr.Button("🌟AyaTonic")
194
 
195
  processed_text_output = RichTextbox(label="Processed Text")
196
- audio_output = gr.Audio(label="Audio Output")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
  process_button.click(
199
- fn=process_input,
200
  inputs=[image_input, file_input, audio_input, text_input, input_language, target_language],
201
- outputs=[processed_text_output, audio_output]
202
  )
203
 
204
  if __name__ == "__main__":
 
5
  from surya.model.detection.segformer import load_model as load_det_model, load_processor as load_det_processor
6
  from surya.model.recognition.model import load_model as load_rec_model
7
  from surya.model.recognition.processor import load_processor as load_rec_processor
8
+ from lang_list import TEXT_SOURCE_LANGUAGE_NAMES , LANGUAGE_NAME_TO_CODE , text_source_language_codes
9
  from gradio_client import Client
10
  from dotenv import load_dotenv
11
  import requests
 
27
  inputlanguage = ""
28
  producetext = "\n\nProduce a complete expositional blog post in {target_language} based on the above :"
29
  formatinputstring = "\n\nthe above text is a learning aid. you must use rich text format to rewrite the above and add 1 . a red color tags for nouns 2. a blue color tag for verbs 3. a green color tag for adjectives and adverbs:"
30
+ translatetextinst = "\n\nthe above text is a learning aid. you must use markdown format to translate the above into {inputlanguage} :'"
31
  # Regular expression patterns for each color
32
  patterns = {
33
  "red": r'<span style="color: red;">(.*?)</span>',
 
41
  "blue": [],
42
  "green": [],
43
  }
44
+
45
+ co = cohere.Client(COHERE_API_KEY)
46
+ audio_client = Client(SEAMLESSM4T)
47
+
48
+ def get_language_code(language_name):
49
+ """
50
+ Extracts the first two letters of the language code based on the language name.
51
+ """
52
+ code = df.loc[df['name'] == language_name, 'code'].values[0]
53
+ return code[:2]
54
+
55
+ def translate_text(text, instructions=translatetextinst):
56
+ """
57
+ - text (str): The initial text.
58
+ Returns:
59
+ - str: The translated text response.
60
+ """
61
+ prompt = f"{text}{instructions}"
62
+ response = co.generate(
63
+ model='c4ai-aya',
64
+ prompt=prompt,
65
+ max_tokens=2986,
66
+ temperature=0.6,
67
+ k=0,
68
+ stop_sequences=[],
69
+ return_likelihoods='NONE'
70
+ )
71
+ return response.generations[0].text
72
+
73
  class TaggedPhraseExtractor:
74
  def __init__(self, text=''):
75
  self.text = text
 
84
  self.patterns[color] = pattern
85
 
86
  def extract_phrases(self):
87
+ """Extract phrases for all colors and patterns added, including the three longest phrases."""
88
+ matches = {}
89
+ three_matches = {}
90
+ for color, pattern in self.patterns.items():
91
+ found_phrases = re.findall(pattern, self.text)
92
+ sorted_phrases = sorted(found_phrases, key=len, reverse=True)
93
+ matches[color] = {
94
+ 'all_phrases': found_phrases,
95
+ 'top_three_longest': sorted_phrases[:3]
96
+ }
97
+ three_matches = sorted_phrases[:3]
98
+ return matches , three_matches
99
 
100
  def print_phrases(self):
101
+ """Extract phrases and print them, including the three longest phrases."""
102
  matches = self.extract_phrases()
103
+ for color, data in matches.items():
104
  print(f"Phrases with color {color}:")
105
+ for phrase in data['all_phrases']:
106
+ print(f"- {phrase}")
107
+ print(f"\nThree longest phrases for color {color}:")
108
+ for phrase in data['top_three_longest']:
109
  print(f"- {phrase}")
110
+ print()
111
 
112
+ def process_audio_to_text(audio_path, inputlanguage="English", outputlanguage="English"):
 
 
 
 
113
  """
114
  Convert audio input to text using the Gradio client.
115
  """
 
117
  result = audio_client.predict(
118
  audio_path,
119
  inputlanguage,
120
+ outputlanguage,
121
  api_name="/s2tt"
122
  )
123
  print("Audio Result: ", result)
 
137
  return result[0]
138
 
139
  class OCRProcessor:
140
+ def __init__(self, lang_code=["en"]):
141
+ self.lang_code = lang_code
142
  self.det_processor, self.det_model = load_det_processor(), load_det_model()
143
  self.rec_model, self.rec_processor = load_rec_model(), load_rec_processor()
144
 
 
146
  """
147
  Process a PIL image and return the OCR text.
148
  """
149
+ predictions = run_ocr([image], [self.lang_code], self.det_model, self.det_processor, self.rec_model, self.rec_processor)
150
+ return predictions[0]
151
 
152
  def process_pdf(self, pdf_path):
153
  """
154
  Process a PDF file and return the OCR text.
155
  """
156
+ predictions = run_ocr([pdf_path], [self.lang_code], self.det_model, self.det_processor, self.rec_model, self.rec_processor)
157
+ return predictions[0]
158
 
159
  def process_input(image=None, file=None, audio=None, text="", translateto = "English", translatefrom = "English" ):
160
+ lang_code = get_language_code(translatefrom)
161
+ ocr_processor = OCRProcessor(lang_code)
162
  final_text = text
163
  if image is not None:
164
  ocr_prediction = ocr_processor.process_image(image)
 
209
 
210
  audio_output = process_text_to_audio(processed_text, translateto, translateto)
211
 
212
+ extractor = TaggedPhraseExtractor(processed_text)
213
+ longest_phrases = extractor.get_longest_phrases()
214
+
215
+ # Translate the longest phrases back into the native language
216
+ translated_phrases = [translate_text(phrase, translateto, translatefrom) for phrase in longest_phrases]
217
+
218
+ # Convert the original and translated phrases to audio
219
+ audio_samples = {
220
+ "target_language": [text_to_audio(phrase, translateto) for phrase in longest_phrases],
221
+ "native_language": [text_to_audio(phrase, translatefrom) for phrase in translated_phrases]
222
+ }
223
+
224
+ return audio_output, processed_text, audio_samples, longest_phrases, translated_phrases
225
+
226
 
227
  def main():
228
  with gr.Blocks() as demo:
 
244
  process_button = gr.Button("🌟AyaTonic")
245
 
246
  processed_text_output = RichTextbox(label="Processed Text")
247
+ longest_phrases_1 = gr.Textbox(label="Focus")
248
+ translated_phrases_output_1 = gr.Textbox(label="Translated Phrases")
249
+ audio_output_native_phrase_1 = gr.Audio(label="Audio Output (Native Language)")
250
+ audio_output_target_phrase_1 = gr.Audio(label="Audio Output (Target Language)")
251
+ longest_phrases_2 = gr.Textbox(label="Focus")
252
+ translated_phrases_output_2 = gr.Textbox(label="Translated Phrases")
253
+ audio_output_native_phrase_2 = gr.Audio(label="Audio Output (Native Language)")
254
+ audio_output_target_phrase_2 = gr.Audio(label="Audio Output (Target Language)")
255
+ longest_phrases_3 = gr.Textbox(label="Focus")
256
+ translated_phrases_output_3 = gr.Textbox(label="Translated Phrases")
257
+ audio_output_native_phrase_3 = gr.Audio(label="Audio Output (Native Language)")
258
+ audio_output_target_phrase_3 = gr.Audio(label="Audio Output (Target Language)")
259
+
260
+ def update_outputs(image, file, audio, text, input_language, target_language):
261
+ processed_text, audio_samples, longest_phrases, translated_phrases = process_input(
262
+ image, file, audio, text, input_language, target_language)
263
+ return processed_text, audio_samples['native_language'], audio_samples['target_language'], "\n".join(longest_phrases), "\n".join(translated_phrases) # Fix this
264
 
265
  process_button.click(
266
+ fn=update_outputs,
267
  inputs=[image_input, file_input, audio_input, text_input, input_language, target_language],
268
+ outputs=[processed_text_output, audio_output_native_phrase_1, audio_output_target_phrase_1, longest_phrases_1, translated_phrases_output_1, audio_output_native_phrase_2, audio_output_target_phrase_2, longest_phrases_2, translated_phrases_output_2, audio_output_native_phrase_3, audio_output_target_phrase_3, longest_phrases_3, translated_phrases_output_3] #add education output
269
  )
270
 
271
  if __name__ == "__main__":
languages.json DELETED
File without changes