.env CHANGED
@@ -1,2 +1,2 @@
1
  CO_API_KEY=KQBPf0H0ENZESIC5nuUJ4i4jjg34xMPAkYK7s31W
2
- SEAMLESSM4T=facebook/seamless-m4t-v2-large
 
1
  CO_API_KEY=KQBPf0H0ENZESIC5nuUJ4i4jjg34xMPAkYK7s31W
2
+ SEAMLESSM4T=https://facebook-seamless-m4t-v2-large.hf.space/--replicas/v4gsf/
__pycache__/lang_list.cpython-311.pyc DELETED
Binary file (5.61 kB)
 
app.py CHANGED
@@ -5,20 +5,15 @@ from surya.ocr import run_ocr
5
  from surya.model.detection.segformer import load_model as load_det_model, load_processor as load_det_processor
6
  from surya.model.recognition.model import load_model as load_rec_model
7
  from surya.model.recognition.processor import load_processor as load_rec_processor
8
- from lang_list import TEXT_SOURCE_LANGUAGE_NAMES , LANGUAGE_NAME_TO_CODE , text_source_language_codes
9
  from gradio_client import Client
10
  from dotenv import load_dotenv
11
  import requests
12
- from io import BytesIO
13
  import cohere
14
  import os
15
  import re
16
  import pandas as pd
17
- import pydub
18
- from pydub import AudioSegment
19
- from pydub.utils import make_chunks
20
- from pathlib import Path
21
- import hashlib
22
 
23
 
24
  title = "# Welcome to AyaTonic"
@@ -27,12 +22,14 @@ description = "Learn a New Language With Aya"
27
  load_dotenv()
28
  COHERE_API_KEY = os.getenv('CO_API_KEY')
29
  SEAMLESSM4T = os.getenv('SEAMLESSM4T')
 
30
  df = pd.read_csv("lang_list.csv")
31
- choices = df["name"].to_list()
32
  inputlanguage = ""
33
  producetext = "\n\nProduce a complete expositional blog post in {target_language} based on the above :"
34
- formatinputstring = """\n\nthe above text is a learning aid. you must use rich text format to rewrite the above and add 1 . a red color tags for nouns 2. a blue color tag for verbs 3. a green color tag for adjectives and adverbs: Example: <span style="color: red;">(.?)</span>. Don't change other format of span tag other than color and the (.?). """
35
- translatetextinst = "\n\nthe above text is a learning aid. you must use markdown format to translate the above into {inputlanguage} :'"
 
36
  patterns = {
37
  "red": r'<span style="color: red;">(.*?)</span>',
38
  "blue": r'<span style="color: blue;">(.*?)</span>',
@@ -45,70 +42,10 @@ matches = {
45
  "blue": [],
46
  "green": [],
47
  }
48
-
49
- co = cohere.Client(COHERE_API_KEY)
50
- audio_client = Client(SEAMLESSM4T)
51
-
52
- def get_language_code(language_name):
53
- """
54
- Extracts the first two letters of the language code based on the language name.
55
- """
56
- try:
57
- code = df.loc[df['name'].str.lower() == language_name.lower(), 'code'].values[0]
58
- return code
59
- except IndexError:
60
- print(f"Language name '{language_name}' not found.")
61
- return None
62
-
63
- def translate_text(text, inputlanguage, target_language):
64
- """
65
- Translates text.
66
- """
67
- # Ensure you format the instruction string within the function body
68
- instructions = translatetextinst.format(inputlanguage=inputlanguage)
69
- producetext_formatted = producetext.format(target_language=target_language)
70
- prompt = f"{text}{producetext_formatted}\n{instructions}"
71
- response = co.generate(
72
- model='c4ai-aya',
73
- prompt=prompt,
74
- max_tokens=2986,
75
- temperature=0.6,
76
- k=0,
77
- stop_sequences=[],
78
- return_likelihoods='NONE'
79
- )
80
- return response.generations[0].text
81
-
82
- class LongAudioProcessor:
83
- def __init__(self, audio_client, api_key=None):
84
- self.client = audio_client
85
- self.process_audio_to_text = process_audio_to_text
86
- self.api_key = api_key
87
-
88
- def process_long_audio(self, audio_path, inputlanguage, outputlanguage, chunk_length_ms=20000):
89
- """
90
- Process audio files longer than 29 seconds by chunking them into smaller segments.
91
- """
92
- audio = AudioSegment.from_file(audio_path)
93
- chunks = make_chunks(audio, chunk_length_ms)
94
- full_text = ""
95
- for i, chunk in enumerate(chunks):
96
- chunk_name = f"chunk{i}.wav"
97
- with open(chunk_name, 'wb') as file:
98
- chunk.export(file, format="wav")
99
- try:
100
- result = self.process_audio_to_text(chunk_name, inputlanguage=inputlanguage, outputlanguage=outputlanguage)
101
- full_text += " " + result.strip()
102
- except Exception as e:
103
- print(f"Error processing {chunk_name}: {e}")
104
- finally:
105
- if os.path.exists(chunk_name):
106
- os.remove(chunk_name)
107
- return full_text.strip()
108
  class TaggedPhraseExtractor:
109
  def __init__(self, text=''):
110
  self.text = text
111
- self.patterns = patterns
112
 
113
  def set_text(self, text):
114
  """Set the text to search within."""
@@ -119,142 +56,73 @@ class TaggedPhraseExtractor:
119
  self.patterns[color] = pattern
120
 
121
  def extract_phrases(self):
122
- """Extract phrases for all colors and patterns added, including the three longest phrases."""
123
- matches = {}
124
- for color, pattern in self.patterns.items():
125
- found_phrases = re.findall(pattern, self.text)
126
- sorted_phrases = sorted(found_phrases, key=len, reverse=True)
127
- matches[color] = sorted_phrases[:3]
128
  return matches
129
 
130
  def print_phrases(self):
131
- """Extract phrases and print them, including the three longest phrases."""
132
  matches = self.extract_phrases()
133
- for color, data in matches.items():
134
  print(f"Phrases with color {color}:")
135
- for phrase in data['all_phrases']:
136
- print(f"- {phrase}")
137
- print(f"\nThree longest phrases for color {color}:")
138
- for phrase in data['top_three_longest']:
139
  print(f"- {phrase}")
140
- print()
141
 
142
- def process_audio_to_text(audio_path, inputlanguage="English", outputlanguage="English"):
 
 
 
143
  """
144
  Convert audio input to text using the Gradio client.
145
  """
146
- audio_client = Client(SEAMLESSM4T)
147
  result = audio_client.predict(
148
  audio_path,
149
  inputlanguage,
150
- outputlanguage,
151
  api_name="/s2tt"
152
  )
153
  print("Audio Result: ", result)
154
- return result[0]
155
-
156
-
157
-
158
- def process_text_to_audio(text, translatefrom="English", translateto="English"):
159
- """
160
- Convert text input to audio using the Gradio client and return a URL to the generated audio.
161
- """
162
- try:
163
- # Assuming audio_client.predict is correctly set up and returns a tuple (local_file_path, translated_text)
164
- result = audio_client.predict(
165
- text,
166
- translatefrom,
167
- translateto,
168
- api_name="/t2st"
169
- )
170
-
171
- if not isinstance(result, tuple) or len(result) < 2:
172
- raise ValueError("Unexpected result format from audio_client.predict")
173
-
174
-
175
- # Print or log the raw API response for inspection
176
- print("Raw API Response:", result)
177
-
178
- # Initialize variables
179
- audio_file_path = ""
180
 
181
- # Process the result
182
- if result:
183
- for item in result:
184
- if isinstance(item, str):
185
- # Check if the item is a URL pointing to an audio file or a base64 encoded string
186
- if any(ext in item.lower() for ext in ['.mp3', '.wav', '.ogg']) or is_base64(item):
187
- audio_file_path = item
188
- break
189
-
190
- if not audio_file_path:
191
- raise ValueError("No audio file path found in the response")
192
-
193
- # If the response is a direct file path or a base64 string, handle accordingly
194
- # For simplicity, we're returning the URL or base64 string directly
195
- return audio_file_path
196
-
197
- except Exception as e:
198
- print(f"Error processing text to audio: {e}")
199
- return ""
200
-
201
-
202
- def save_audio_data_to_file(audio_data, directory="audio_files", filename="output_audio.wav"):
203
- """
204
- Save audio data to a file and return the file path.
205
- """
206
- os.makedirs(directory, exist_ok=True)
207
- file_path = os.path.join(directory, filename)
208
- with open(file_path, 'wb') as file:
209
- file.write(audio_data)
210
- return file_path
211
-
212
- # Ensure the function that reads the audio file checks if the path is a file
213
- def read_audio_file(file_path):
214
- """
215
- Read and return the audio file content if the path is a file.
216
  """
217
- if os.path.isfile(file_path):
218
- with open(file_path, 'rb') as file:
219
- return file.read()
220
- else:
221
- raise ValueError(f"Expected a file path, got a directory: {file_path}")
222
-
223
-
224
- def initialize_ocr_models():
225
  """
226
- Load the detection and recognition models along with their processors.
227
- """
228
- det_processor, det_model = load_det_processor(), load_det_model()
229
- rec_model, rec_processor = load_rec_model(), load_rec_processor()
230
- return det_processor, det_model, rec_model, rec_processor
 
 
231
 
232
  class OCRProcessor:
233
- def __init__(self, lang_code=["en"]):
234
- self.lang_code = lang_code
235
- self.det_processor, self.det_model, self.rec_model, self.rec_processor = initialize_ocr_models()
 
236
 
237
  def process_image(self, image):
238
  """
239
  Process a PIL image and return the OCR text.
240
  """
241
- predictions = run_ocr([image], [self.lang_code], self.det_model, self.det_processor, self.rec_model, self.rec_processor)
242
- return predictions[0]
243
 
244
  def process_pdf(self, pdf_path):
245
  """
246
  Process a PDF file and return the OCR text.
247
  """
248
- predictions = run_ocr([pdf_path], [self.lang_code], self.det_model, self.det_processor, self.rec_model, self.rec_processor)
249
- return predictions[0]
250
 
251
- def process_input(image=None, file=None, audio=None, text="", translateto = "English", translatefrom = "English" ):
252
- lang_code = get_language_code(translatefrom)
253
- ocr_processor = OCRProcessor(lang_code)
254
  final_text = text
255
- print("Image :", image)
256
  if image is not None:
257
  ocr_prediction = ocr_processor.process_image(image)
 
258
  for idx in range(len((list(ocr_prediction)[0][1]))):
259
  final_text += " "
260
  final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
@@ -262,11 +130,13 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
262
  if file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
263
  pil_image = Image.open(file)
264
  ocr_prediction = ocr_processor.process_image(pil_image)
 
265
  for idx in range(len((list(ocr_prediction)[0][1]))):
266
  final_text += " "
267
  final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
268
  elif file.name.lower().endswith('.pdf'):
269
  ocr_prediction = ocr_processor.process_pdf(file.name)
 
270
  for idx in range(len((list(ocr_prediction)[0][1]))):
271
  final_text += " "
272
  final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
@@ -274,11 +144,10 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
274
  final_text += "\nUnsupported file type."
275
  print("OCR Text: ", final_text)
276
  if audio is not None:
277
- long_audio_processor = LongAudioProcessor(audio_client)
278
- audio_text = long_audio_processor.process_long_audio(audio, inputlanguage=translatefrom, outputlanguage=translateto)
279
  final_text += "\n" + audio_text
280
 
281
- final_text_with_producetext = final_text + producetext.format(target_language=translateto)
282
 
283
  response = co.generate(
284
  model='c4ai-aya',
@@ -298,91 +167,96 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
298
  )
299
  processed_text = response.generations[0].text
300
 
301
- audio_output = process_text_to_audio(processed_text, translateto, translateto)
302
- extractor = TaggedPhraseExtractor(final_text)
303
- matches = extractor.extract_phrases()
304
-
305
- top_phrases = []
306
- for color, phrases in matches.items():
307
- top_phrases.extend(phrases)
308
-
309
- while len(top_phrases) < 3:
310
- top_phrases.append("")
311
-
312
- audio_outputs = []
313
- translations = []
314
- for phrase in top_phrases:
315
- if phrase:
316
- translated_phrase = translate_text(phrase, translatefrom=translatefrom, translateto=translateto)
317
- translations.append(translated_phrase)
318
- target_audio = process_text_to_audio(phrase, translatefrom=translateto, translateto=translateto)
319
- native_audio = process_text_to_audio(translated_phrase, translatefrom=translatefrom, translateto=translatefrom)
320
- audio_outputs.append((target_audio, native_audio))
321
- else:
322
- translations.append("")
323
- audio_outputs.append(("", ""))
324
-
325
- return final_text, audio_output, top_phrases, translations, audio_outputs
326
-
327
-
328
-
329
- inputs = [
330
-
331
- gr.Dropdown(choices=choices, label="Your Native Language"),
332
- gr.Dropdown(choices=choices, label="Language To Learn"),
333
- gr.Audio(sources="microphone", type="filepath", label="Mic Input"),
334
- gr.Image(type="pil", label="Camera Input"),
335
- gr.Textbox(lines=2, label="Text Input"),
336
- gr.File(label="File Upload")
337
- ]
338
-
339
- outputs = [
340
- RichTextbox(label="Processed Text"),
341
- gr.Audio(label="Audio"),
342
- gr.Textbox(label="Focus 1"),
343
- gr.Textbox(label="Translated Phrases 1"),
344
- gr.Audio(label="Audio Output (Native Language) 1"),
345
- gr.Audio(label="Audio Output (Target Language) 1"),
346
- gr.Textbox(label="Focus 2"),
347
- gr.Textbox(label="Translated Phrases 2"),
348
- gr.Audio(label="Audio Output (Native Language) 2"),
349
- gr.Audio(label="Audio Output (Target Language) 2"),
350
- gr.Textbox(label="Focus 3"),
351
- gr.Textbox(label="Translated Phrases 3"),
352
- gr.Audio(label="Audio Output (Native Language) 3"),
353
- gr.Audio(label="Audio Output (Target Language) 3")
354
- ]
355
-
356
-
357
- def update_outputs(inputlanguage, target_language, audio, image, text, file):
358
- processed_text, audio_output_path, top_phrases, translations, audio_outputs = process_input(
359
- image=image, file=file, audio=audio, text=text,
360
- translateto=target_language, translatefrom=inputlanguage
361
- )
362
-
363
- output_tuple = (
364
- processed_text, # RichTextbox content
365
- audio_output_path, # Main audio output
366
- top_phrases[0] if len(top_phrases) > 0 else "", # Focus 1
367
- translations[0] if len(translations) > 0 else "", # Translated Phrases 1
368
- audio_outputs[0][0] if len(audio_outputs) > 0 else "", # Audio Output (Native Language) 1
369
- audio_outputs[0][1] if len(audio_outputs) > 0 else "", # Audio Output (Target Language) 1
370
- top_phrases[1] if len(top_phrases) > 1 else "", # Focus 2
371
- translations[1] if len(translations) > 1 else "", # Translated Phrases 2
372
- audio_outputs[1][0] if len(audio_outputs) > 1 else "", # Audio Output (Native Language) 2
373
- audio_outputs[1][1] if len(audio_outputs) > 1 else "", # Audio Output (Target Language) 2
374
- top_phrases[2] if len(top_phrases) > 2 else "", # Focus 3
375
- translations[2] if len(translations) > 2 else "", # Translated Phrases 3
376
- audio_outputs[2][0] if len(audio_outputs) > 2 else "", # Audio Output (Native Language) 3
377
- audio_outputs[2][1] if len(audio_outputs) > 2 else "" # Audio Output (Target Language) 3
378
- )
379
-
380
- return output_tuple
381
-
382
- def interface_func(inputlanguage, target_language, audio, image, text, file):
383
- return update_outputs(inputlanguage, target_language, audio, image, text, file)
384
-
385
- iface = gr.Interface(fn=interface_func, inputs=inputs, outputs=outputs, title=title, description=description)
386
 
387
  if __name__ == "__main__":
388
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  from surya.model.detection.segformer import load_model as load_det_model, load_processor as load_det_processor
6
  from surya.model.recognition.model import load_model as load_rec_model
7
  from surya.model.recognition.processor import load_processor as load_rec_processor
8
+ from lang_list import LANGUAGE_NAME_TO_CODE, TEXT_SOURCE_LANGUAGE_NAMES, S2ST_TARGET_LANGUAGE_NAMES
9
  from gradio_client import Client
10
  from dotenv import load_dotenv
11
  import requests
12
+ from io import BytesIO
13
  import cohere
14
  import os
15
  import re
16
  import pandas as pd
 
 
 
 
 
17
 
18
 
19
  title = "# Welcome to AyaTonic"
 
22
  load_dotenv()
23
  COHERE_API_KEY = os.getenv('CO_API_KEY')
24
  SEAMLESSM4T = os.getenv('SEAMLESSM4T')
25
+
26
  df = pd.read_csv("lang_list.csv")
27
+
28
  inputlanguage = ""
29
  producetext = "\n\nProduce a complete expositional blog post in {target_language} based on the above :"
30
+ formatinputstring = "\n\nthe above text is a learning aid. you must use rich text format to rewrite the above and add 1 . a red color tags for nouns 2. a blue color tag for verbs 3. a green color tag for adjectives and adverbs:"
31
+
32
+ # Regular expression patterns for each color
33
  patterns = {
34
  "red": r'<span style="color: red;">(.*?)</span>',
35
  "blue": r'<span style="color: blue;">(.*?)</span>',
 
42
  "blue": [],
43
  "green": [],
44
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  class TaggedPhraseExtractor:
46
  def __init__(self, text=''):
47
  self.text = text
48
+ self.patterns = {}
49
 
50
  def set_text(self, text):
51
  """Set the text to search within."""
 
56
  self.patterns[color] = pattern
57
 
58
  def extract_phrases(self):
59
+ """Extract phrases for all colors and patterns added."""
60
+ matches = {color: re.findall(pattern, self.text) for color, pattern in self.patterns.items()}
 
 
 
 
61
  return matches
62
 
63
  def print_phrases(self):
64
+ """Extract phrases and print them."""
65
  matches = self.extract_phrases()
66
+ for color, phrases in matches.items():
67
  print(f"Phrases with color {color}:")
68
+ for phrase in phrases:
 
 
 
69
  print(f"- {phrase}")
70
+ print()
71
 
72
+ co = cohere.Client(COHERE_API_KEY)
73
+ audio_client = Client(SEAMLESSM4T)
74
+
75
+ def process_audio_to_text(audio_path, inputlanguage="English"):
76
  """
77
  Convert audio input to text using the Gradio client.
78
  """
 
79
  result = audio_client.predict(
80
  audio_path,
81
  inputlanguage,
82
+ inputlanguage,
83
  api_name="/s2tt"
84
  )
85
  print("Audio Result: ", result)
86
+ return result['text'] # Adjust based on the actual response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ def process_text_to_audio(text, target_language="English"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  """
90
+ Convert text input to audio using the Gradio client.
 
 
 
 
 
 
 
91
  """
92
+ result = audio_client.predict(
93
+ text,
94
+ target_language,
95
+ target_language, # could be make a variation for learning content
96
+ api_name="/t2st"
97
+ )
98
+ return result['audio'] # Adjust based on the actual response
99
 
100
  class OCRProcessor:
101
+ def __init__(self, langs=["en"]):
102
+ self.langs = langs
103
+ self.det_processor, self.det_model = load_det_processor(), load_det_model()
104
+ self.rec_model, self.rec_processor = load_rec_model(), load_rec_processor()
105
 
106
  def process_image(self, image):
107
  """
108
  Process a PIL image and return the OCR text.
109
  """
110
+ predictions = run_ocr([image], [self.langs], self.det_model, self.det_processor, self.rec_model, self.rec_processor)
111
+ return predictions[0] # Assuming the first item in predictions contains the desired text
112
 
113
  def process_pdf(self, pdf_path):
114
  """
115
  Process a PDF file and return the OCR text.
116
  """
117
+ predictions = run_ocr([pdf_path], [self.langs], self.det_model, self.det_processor, self.rec_model, self.rec_processor)
118
+ return predictions[0] # Assuming the first item in predictions contains the desired text
119
 
120
+ def process_input(image=None, file=None, audio=None, text=""):
121
+ ocr_processor = OCRProcessor()
 
122
  final_text = text
 
123
  if image is not None:
124
  ocr_prediction = ocr_processor.process_image(image)
125
+ # gettig text from ocr object
126
  for idx in range(len((list(ocr_prediction)[0][1]))):
127
  final_text += " "
128
  final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
 
130
  if file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
131
  pil_image = Image.open(file)
132
  ocr_prediction = ocr_processor.process_image(pil_image)
133
+ # gettig text from ocr object
134
  for idx in range(len((list(ocr_prediction)[0][1]))):
135
  final_text += " "
136
  final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
137
  elif file.name.lower().endswith('.pdf'):
138
  ocr_prediction = ocr_processor.process_pdf(file.name)
139
+ # gettig text from ocr object
140
  for idx in range(len((list(ocr_prediction)[0][1]))):
141
  final_text += " "
142
  final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
 
144
  final_text += "\nUnsupported file type."
145
  print("OCR Text: ", final_text)
146
  if audio is not None:
147
+ audio_text = process_audio_to_text(audio)
 
148
  final_text += "\n" + audio_text
149
 
150
+ final_text_with_producetext = final_text + producetext
151
 
152
  response = co.generate(
153
  model='c4ai-aya',
 
167
  )
168
  processed_text = response.generations[0].text
169
 
170
+ audio_output = process_text_to_audio(processed_text)
171
+
172
+ return processed_text, audio_output
173
+ # Define Gradio interface
174
+ iface = gr.Interface(
175
+ fn=process_input,
176
+ inputs=[
177
+ gr.Image(type="pil", label="Camera Input"),
178
+ gr.File(label="File Upload"),
179
+ gr.Audio(sources="microphone", type="filepath", label="Mic Input"),
180
+ gr.Textbox(lines=2, label="Text Input"),
181
+ # gr.Dropdown(choices=TEXT_SOURCE_LANGUAGE_NAMES, label="Input Language"),
182
+ # gr.Dropdown(choices=TEXT_SOURCE_LANGUAGE_NAMES, label="Target Language")
183
+ gr.Dropdown(choices=df["name"].to_list(), label="Input Language"),
184
+ gr.Dropdown(choices=df["name"].to_list(), label="Target Language")
185
+ ],
186
+ outputs=[
187
+ RichTextbox(label="Processed Text"),
188
+ gr.Audio(label="Audio Output")
189
+ ],
190
+ title=title,
191
+ description=description
192
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  if __name__ == "__main__":
195
+ iface.launch()
196
+
197
+
198
+ # co = cohere.Client('yhA228YGeZSl1ctten8LQxw2dky2nngHetXFjV2Q') # This is your trial API key
199
+ # response = co.generate(
200
+ # model='c4ai-aya',
201
+ # prompt='एक यांत्रिक घड़ी दिन के समय को प्रदान करने के लिए एक गैर-इलेक्ट्रॉनिक तंत्र का उपयोग करती है। एक मुख्य स्प्रिंग का उपयोग यांत्रिक तंत्र को ऊर्जा संग्रहीत करने के लिए किया जाता है। एक यांत्रिक घड़ी में दांतों का एक कुंडल होता है जो धीरे-धीरे मुख्य स्प्रिंग से संचालित होता है। दांतों के कुंडल को एक यांत्रिक तंत्र में स्थानांतरित करने के लिए पहियों की एक श्रृंखला का उपयोग किया जाता है जो हाथों को घड़ी के चेहरे पर दाईं ओर ले जाता है। घड़ी के तंत्र को स्थिर करने और यह सुनिश्चित करने के लिए कि हाथ सही दिशा में घूमते हैं, एक कंपन का उपयोग किया जाता है। ',
202
+ # max_tokens=3674,
203
+ # temperature=0.9,
204
+ # k=0,
205
+ # stop_sequences=[],
206
+ # return_likelihoods='NONE')
207
+ # print('Prediction: {}'.format(response.generations[0].text))
208
+
209
+ # client = Client("https://facebook-seamless-m4t-v2-large.hf.space/--replicas/nq5nn/")
210
+ # result = client.predict(
211
+ # https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav, # filepath in 'Input speech' Audio component
212
+ # Afrikaans, # Literal[Afrikaans, Amharic, Armenian, Assamese, Basque, Belarusian, Bengali, Bosnian, Bulgarian, Burmese, Cantonese, Catalan, Cebuano, Central Kurdish, Croatian, Czech, Danish, Dutch, Egyptian Arabic, English, Estonian, Finnish, French, Galician, Ganda, Georgian, German, Greek, Gujarati, Halh Mongolian, Hebrew, Hindi, Hungarian, Icelandic, Igbo, Indonesian, Irish, Italian, Japanese, Javanese, Kannada, Kazakh, Khmer, Korean, Kyrgyz, Lao, Lithuanian, Luo, Macedonian, Maithili, Malayalam, Maltese, Mandarin Chinese, Marathi, Meitei, Modern Standard Arabic, Moroccan Arabic, Nepali, North Azerbaijani, Northern Uzbek, Norwegian Bokmål, Norwegian Nynorsk, Nyanja, Odia, Polish, Portuguese, Punjabi, Romanian, Russian, Serbian, Shona, Sindhi, Slovak, Slovenian, Somali, Southern Pashto, Spanish, Standard Latvian, Standard Malay, Swahili, Swedish, Tagalog, Tajik, Tamil, Telugu, Thai, Turkish, Ukrainian, Urdu, Vietnamese, Welsh, West Central Oromo, Western Persian, Yoruba, Zulu] in 'Source language' Dropdown component
213
+ # Bengali, # Literal[Bengali, Catalan, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Hindi, Indonesian, Italian, Japanese, Korean, Maltese, Mandarin Chinese, Modern Standard Arabic, Northern Uzbek, Polish, Portuguese, Romanian, Russian, Slovak, Spanish, Swahili, Swedish, Tagalog, Telugu, Thai, Turkish, Ukrainian, Urdu, Vietnamese, Welsh, Western Persian] in 'Target language' Dropdown component
214
+ # api_name="/s2st"
215
+ # )
216
+ # print(result)
217
+
218
+ # co = cohere.Client('yhA228YGeZSl1ctten8LQxw2dky2nngHetXFjV2Q')
219
+ # response = co.generate(
220
+ # model='command-nightly',
221
+ # prompt='Les mécanismes de montres mécaniques\n\nLes mécanismes de montres mécaniques sont des mécanismes qui indiquent la journée, mais pas l\'électronique. Elles utilisent un ressort principal pour stocker l\'énergie nécessaire au fonctionnement des mécanismes. Un train d\'engrenages est utilisé pour transférer l\'énergie du ressort principal à un ensemble de roues qui font tourner les aiguilles dans le sens horaire sur le cadran de la montre.\n\nLes mécanismes de montres mécaniques sontdakshineswar omkarnathji, qui sont des lieux de culte qui sont construits dans le temple. Les engrenages sont des roues qui sont utilisées pour transférer l\'énergie du ressort principal à un ensemble de roues qui font tourner les aiguilles dans le sens horaire sur le cadran de la montre.\n\nLe ressort principal est un ressort qui est utilisé pour stocker l\'énergie nécessaire au fonctionnement des mécanismes de la montre. Le ressort principal est un ressort qui est utilisé pour stocker l\'énergie nécessaire au fonctionnement des mécanismes de la montre, et il est utilisé pour transférer l\'énergie aux engrenages, qui sont des roues qui sont utilisées pour faire tourner les aiguilles dans le sens horaire sur le cadran de la montre.\n\nLes engrenages sont des roues qui sont utilisées pour faire tourner les aiguilles dans le sens horaire sur le cadran de la montre, et elles sont utilisées pour transférer l\'énergie du ressort principal aux roues qui font tourner les aiguilles dans le sens horaire sur le cadran de la montre.\n\nLes mécanismes de montres mécaniques sont des mécanismes qui indiquent la journée, et elles sont utilisées pour transférer l\'énergie du ressort principal à un ensemble de roues qui font tourner les aiguilles dans le sens horaire sur le cadran de la montre.\n\nLes mécanismes de montres mécaniques sont des mécanismes qui indiquent la journée, et elles sont utilisées pour transférer l\'énergie du ressort principal à un ensemble de roues qui font tourner les aiguilles dans le sens horaire sur le cadran de la montre, et elles sont utilisées pour stabiliser le mécanisme de la montre, et pour s\'assurer que les aiguilles tournent dans le bon sens.\n\nthe above text is a learning aid. you must use rich text format to rewrite the above and add 1 . a red color tags for nouns 2. a blue color tag for verbs 3. a green color tag for adjectives and adverbs:',
222
+ # max_tokens=7294,
223
+ # temperature=0.6,
224
+ # k=0,
225
+ # stop_sequences=[],
226
+ # return_likelihoods='NONE')
227
+ # print('Prediction: {}'.format(response.generations[0].text))
228
+ # example = RichTextbox().example_inputs()
229
+
230
+
231
+
232
+ # iface = gr.Interface(
233
+ # fn=process_input,
234
+ # inputs=[
235
+ # gr.Image(type="pil", label="Camera Input"),
236
+ # gr.File(label="File Upload"),
237
+ # gr.Audio(sources="microphone", type="filepath", label="Mic Input"),
238
+ # gr.Textbox(lines=2, label="Text Input"),
239
+ # gr.Dropdown(choices=TEXT_SOURCE_LANGUAGE_NAMES, label="Input Language"),
240
+ # gr.Dropdown(choices=TEXT_SOURCE_LANGUAGE_NAMES, label="Target Language")
241
+ # ],
242
+ # outputs=[
243
+ # gr.RichTextbox(label="Processed Text"),
244
+ # gr.Audio(label="Audio Output")
245
+ # ],
246
+ # title="OCR and Speech Processing App",
247
+ # description="This app processes images, PDFs, and audio inputs to generate text and audio outputs."
248
+ # )
249
+
250
+ # if __name__ == "__main__":
251
+ # # iface.launch()
252
+
253
+ # demo = gr.Interface(
254
+ # lambda x:x,
255
+ # RichTextbox(), # interactive version of your component
256
+ # RichTextbox(), # static version of your component
257
+ # examples=[[example]], # uncomment this line to view the "example version" of your component
258
+ # )
259
+
260
+
261
+ # if __name__ == "__main__":
262
+ # demo.launch()
ayatonic.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ CO_API_KEY=KQBPf0H0ENZESIC5nuUJ4i4jjg34xMPAkYK7s31W
2
+ SEAMLESSM4T=https://facebook-seamless-m4t-v2-large.hf.space/--replicas/v4gsf/
audio_files/audio_3505178120260920029.wav → languages.json RENAMED
File without changes
requirements.txt CHANGED
@@ -1,11 +1,10 @@
1
  gradio
2
  gradio_rich_textbox
3
  gradio-client
 
 
 
4
  torchvision
5
  torch
6
  python-dotenv
7
- pandas
8
- pydub
9
- cohere
10
- surya-ocr
11
- pillow
 
1
  gradio
2
  gradio_rich_textbox
3
  gradio-client
4
+ cohere
5
+ surya-ocr
6
+ pillow
7
  torchvision
8
  torch
9
  python-dotenv
10
+ pandas
 
 
 
 
script.py DELETED
@@ -1,10 +0,0 @@
1
- from gradio_client import Client
2
-
3
- client = Client("https://facebook-seamless-m4t-v2-large.hf.space/--replicas/v4gsf/")
4
- result = client.predict(
5
- "Hello my name is tonic!", # str in 'Input text' Textbox component
6
- "English", # Literal[Afrikaans, Amharic, Armenian, Assamese, Basque, Belarusian, Bengali, Bosnian, Bulgarian, Burmese, Cantonese, Catalan, Cebuano, Central Kurdish, Croatian, Czech, Danish, Dutch, Egyptian Arabic, English, Estonian, Finnish, French, Galician, Ganda, Georgian, German, Greek, Gujarati, Halh Mongolian, Hebrew, Hindi, Hungarian, Icelandic, Igbo, Indonesian, Irish, Italian, Japanese, Javanese, Kannada, Kazakh, Khmer, Korean, Kyrgyz, Lao, Lithuanian, Luo, Macedonian, Maithili, Malayalam, Maltese, Mandarin Chinese, Marathi, Meitei, Modern Standard Arabic, Moroccan Arabic, Nepali, North Azerbaijani, Northern Uzbek, Norwegian Bokmål, Norwegian Nynorsk, Nyanja, Odia, Polish, Portuguese, Punjabi, Romanian, Russian, Serbian, Shona, Sindhi, Slovak, Slovenian, Somali, Southern Pashto, Spanish, Standard Latvian, Standard Malay, Swahili, Swedish, Tagalog, Tajik, Tamil, Telugu, Thai, Turkish, Ukrainian, Urdu, Vietnamese, Welsh, West Central Oromo, Western Persian, Yoruba, Zulu] in 'Source language' Dropdown component
7
- "French", # Literal[Bengali, Catalan, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Hindi, Indonesian, Italian, Japanese, Korean, Maltese, Mandarin Chinese, Modern Standard Arabic, Northern Uzbek, Polish, Portuguese, Romanian, Russian, Slovak, Spanish, Swahili, Swedish, Tagalog, Telugu, Thai, Turkish, Ukrainian, Urdu, Vietnamese, Welsh, Western Persian] in 'Target language' Dropdown component
8
- api_name="/t2st"
9
- )
10
- print(result)