Spaces:

MultiTransformer
/

AyaTonic

Runtime error

@@ -5,20 +5,15 @@ from surya.ocr import run_ocr
 from surya.model.detection.segformer import load_model as load_det_model, load_processor as load_det_processor
 from surya.model.recognition.model import load_model as load_rec_model
 from surya.model.recognition.processor import load_processor as load_rec_processor
-from lang_list import TEXT_SOURCE_LANGUAGE_NAMES , LANGUAGE_NAME_TO_CODE , text_source_language_codes
 from gradio_client import Client
 from dotenv import load_dotenv
 import requests
-from io import BytesIO
 import cohere
 import os
 import re
 import pandas as pd
-import pydub
-from pydub import AudioSegment
-from pydub.utils import make_chunks
-from pathlib import Path
-import hashlib
 title = "# Welcome to AyaTonic"
@@ -27,12 +22,14 @@ description = "Learn a New Language With Aya"
 load_dotenv()
 COHERE_API_KEY = os.getenv('CO_API_KEY')
 SEAMLESSM4T = os.getenv('SEAMLESSM4T')
 df = pd.read_csv("lang_list.csv")
-choices = df["name"].to_list()
 inputlanguage = ""
 producetext =  "\n\nProduce a complete expositional blog post in {target_language} based on the above :"
-formatinputstring = """\n\nthe above text is a learning aid. you must use rich text format to rewrite the above and add 1 . a red color tags for nouns 2. a blue color tag for verbs 3. a green color tag for adjectives and adverbs: Example: <span style="color: red;">(.?)</span>. Don't change other format of span tag other than color and the (.?). """
-translatetextinst = "\n\nthe above text is a learning aid. you must use markdown format to translate the above into {inputlanguage} :'"
 patterns = {
     "red": r'<span style="color: red;">(.*?)</span>',
     "blue": r'<span style="color: blue;">(.*?)</span>',
@@ -45,70 +42,10 @@ matches = {
     "blue": [],
     "green": [],
 }
-co = cohere.Client(COHERE_API_KEY)
-audio_client = Client(SEAMLESSM4T)
-def get_language_code(language_name):
-    """
-    Extracts the first two letters of the language code based on the language name.
-    """
-    try:
-        code = df.loc[df['name'].str.lower() == language_name.lower(), 'code'].values[0]
-        return code
-    except IndexError:
-        print(f"Language name '{language_name}' not found.")
-        return None
-def translate_text(text, inputlanguage, target_language):
-    """
-    Translates text.
-    """
-    # Ensure you format the instruction string within the function body
-    instructions = translatetextinst.format(inputlanguage=inputlanguage)
-    producetext_formatted = producetext.format(target_language=target_language)
-    prompt = f"{text}{producetext_formatted}\n{instructions}"
-    response = co.generate(
-        model='c4ai-aya',
-        prompt=prompt,
-        max_tokens=2986,
-        temperature=0.6,
-        k=0,
-        stop_sequences=[],
-        return_likelihoods='NONE'
-    )
-    return response.generations[0].text
-class LongAudioProcessor:
-    def __init__(self, audio_client, api_key=None):
-        self.client = audio_client
-        self.process_audio_to_text = process_audio_to_text
-        self.api_key = api_key
-    def process_long_audio(self, audio_path, inputlanguage, outputlanguage, chunk_length_ms=20000):
-        """
-        Process audio files longer than 29 seconds by chunking them into smaller segments.
-        """
-        audio = AudioSegment.from_file(audio_path)
-        chunks = make_chunks(audio, chunk_length_ms)
-        full_text = ""
-        for i, chunk in enumerate(chunks):
-            chunk_name = f"chunk{i}.wav"
-            with open(chunk_name, 'wb') as file:
-                chunk.export(file, format="wav")
-            try:
-                result = self.process_audio_to_text(chunk_name, inputlanguage=inputlanguage, outputlanguage=outputlanguage)
-                full_text += " " + result.strip()
-            except Exception as e:
-                print(f"Error processing {chunk_name}: {e}")
-            finally:
-                if os.path.exists(chunk_name):
-                    os.remove(chunk_name)
-        return full_text.strip()
 class TaggedPhraseExtractor:
     def __init__(self, text=''):
         self.text = text
-        self.patterns = patterns
     def set_text(self, text):
         """Set the text to search within."""
@@ -119,142 +56,73 @@ class TaggedPhraseExtractor:
         self.patterns[color] = pattern
     def extract_phrases(self):
-        """Extract phrases for all colors and patterns added, including the three longest phrases."""
-        matches = {}
-        for color, pattern in self.patterns.items():
-            found_phrases = re.findall(pattern, self.text)
-            sorted_phrases = sorted(found_phrases, key=len, reverse=True)
-            matches[color] = sorted_phrases[:3]
         return matches
     def print_phrases(self):
-        """Extract phrases and print them, including the three longest phrases."""
         matches = self.extract_phrases()
-        for color, data in matches.items():
             print(f"Phrases with color {color}:")
-            for phrase in data['all_phrases']:
-                print(f"- {phrase}")
-            print(f"\nThree longest phrases for color {color}:")
-            for phrase in data['top_three_longest']:
                 print(f"- {phrase}")
-            print()
-def process_audio_to_text(audio_path, inputlanguage="English", outputlanguage="English"):
     """
     Convert audio input to text using the Gradio client.
     """
-    audio_client = Client(SEAMLESSM4T)
     result = audio_client.predict(
         audio_path,
         inputlanguage,
-        outputlanguage,
         api_name="/s2tt"
     )
     print("Audio Result: ", result)
-    return result[0]
-def process_text_to_audio(text, translatefrom="English", translateto="English"):
-    """
-    Convert text input to audio using the Gradio client and return a URL to the generated audio.
-    """
-    try:
-        # Assuming audio_client.predict is correctly set up and returns a tuple (local_file_path, translated_text)
-        result = audio_client.predict(
-            text,
-            translatefrom,
-            translateto,
-            api_name="/t2st"
-        )
-        if not isinstance(result, tuple) or len(result) < 2:
-            raise ValueError("Unexpected result format from audio_client.predict")
-        # Print or log the raw API response for inspection
-        print("Raw API Response:", result)
-        # Initialize variables
-        audio_file_path = ""
-        # Process the result
-        if result:
-            for item in result:
-                if isinstance(item, str):
-                    # Check if the item is a URL pointing to an audio file or a base64 encoded string
-                    if any(ext in item.lower() for ext in ['.mp3', '.wav', '.ogg']) or is_base64(item):
-                        audio_file_path = item
-                        break
-        if not audio_file_path:
-            raise ValueError("No audio file path found in the response")
-        # If the response is a direct file path or a base64 string, handle accordingly
-        # For simplicity, we're returning the URL or base64 string directly
-        return audio_file_path
-    except Exception as e:
-        print(f"Error processing text to audio: {e}")
-        return ""
-def save_audio_data_to_file(audio_data, directory="audio_files", filename="output_audio.wav"):
-    """
-    Save audio data to a file and return the file path.
-    """
-    os.makedirs(directory, exist_ok=True)
-    file_path = os.path.join(directory, filename)
-    with open(file_path, 'wb') as file:
-        file.write(audio_data)
-    return file_path
-# Ensure the function that reads the audio file checks if the path is a file
-def read_audio_file(file_path):
-    """
-    Read and return the audio file content if the path is a file.
     """
-    if os.path.isfile(file_path):
-        with open(file_path, 'rb') as file:
-            return file.read()
-    else:
-        raise ValueError(f"Expected a file path, got a directory: {file_path}")
-def initialize_ocr_models():
     """
-    Load the detection and recognition models along with their processors.
-    """
-    det_processor, det_model = load_det_processor(), load_det_model()
-    rec_model, rec_processor = load_rec_model(), load_rec_processor()
-    return det_processor, det_model, rec_model, rec_processor
 class OCRProcessor:
-    def __init__(self, lang_code=["en"]):
-        self.lang_code = lang_code
-        self.det_processor, self.det_model, self.rec_model, self.rec_processor = initialize_ocr_models()
     def process_image(self, image):
         """
         Process a PIL image and return the OCR text.
         """
-        predictions = run_ocr([image], [self.lang_code], self.det_model, self.det_processor, self.rec_model, self.rec_processor)
-        return predictions[0]
     def process_pdf(self, pdf_path):
         """
         Process a PDF file and return the OCR text.
         """
-        predictions = run_ocr([pdf_path], [self.lang_code], self.det_model, self.det_processor, self.rec_model, self.rec_processor)
-        return predictions[0]
-def process_input(image=None, file=None, audio=None, text="", translateto = "English", translatefrom = "English" ):
-    lang_code = get_language_code(translatefrom)
-    ocr_processor = OCRProcessor(lang_code)
     final_text = text
-    print("Image :", image)
     if image is not None:
         ocr_prediction = ocr_processor.process_image(image)
         for idx in range(len((list(ocr_prediction)[0][1]))):
             final_text += " "
             final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
@@ -262,11 +130,13 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
         if file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
             pil_image = Image.open(file)
             ocr_prediction = ocr_processor.process_image(pil_image)
             for idx in range(len((list(ocr_prediction)[0][1]))):
                 final_text += " "
                 final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
         elif file.name.lower().endswith('.pdf'):
             ocr_prediction = ocr_processor.process_pdf(file.name)
             for idx in range(len((list(ocr_prediction)[0][1]))):
                 final_text += " "
                 final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
@@ -274,11 +144,10 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
             final_text += "\nUnsupported file type."
     print("OCR Text: ", final_text)
     if audio is not None:
-        long_audio_processor = LongAudioProcessor(audio_client)
-        audio_text = long_audio_processor.process_long_audio(audio, inputlanguage=translatefrom, outputlanguage=translateto)
         final_text += "\n" + audio_text
-    final_text_with_producetext = final_text + producetext.format(target_language=translateto)
     response = co.generate(
         model='c4ai-aya',
@@ -298,91 +167,96 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
     )
     processed_text = response.generations[0].text
-    audio_output = process_text_to_audio(processed_text, translateto, translateto)
-    extractor = TaggedPhraseExtractor(final_text)
-    matches = extractor.extract_phrases()
-    top_phrases = []
-    for color, phrases in matches.items():
-        top_phrases.extend(phrases)
-    while len(top_phrases) < 3:
-        top_phrases.append("")
-    audio_outputs = []
-    translations = []
-    for phrase in top_phrases:
-        if phrase:
-            translated_phrase = translate_text(phrase, translatefrom=translatefrom, translateto=translateto)
-            translations.append(translated_phrase)
-            target_audio = process_text_to_audio(phrase, translatefrom=translateto, translateto=translateto)
-            native_audio = process_text_to_audio(translated_phrase, translatefrom=translatefrom, translateto=translatefrom)
-            audio_outputs.append((target_audio, native_audio))
-        else:
-            translations.append("")
-            audio_outputs.append(("", ""))
-    return final_text, audio_output, top_phrases, translations, audio_outputs
-inputs = [
-    gr.Dropdown(choices=choices, label="Your Native Language"),
-    gr.Dropdown(choices=choices, label="Language To Learn"),
-    gr.Audio(sources="microphone", type="filepath", label="Mic Input"),
-    gr.Image(type="pil", label="Camera Input"),
-    gr.Textbox(lines=2, label="Text Input"),
-    gr.File(label="File Upload")
-]
-outputs = [
-    RichTextbox(label="Processed Text"),
-    gr.Audio(label="Audio"),
-    gr.Textbox(label="Focus 1"),
-    gr.Textbox(label="Translated Phrases 1"),
-    gr.Audio(label="Audio Output (Native Language) 1"),
-    gr.Audio(label="Audio Output (Target Language) 1"),
-    gr.Textbox(label="Focus 2"),
-    gr.Textbox(label="Translated Phrases 2"),
-    gr.Audio(label="Audio Output (Native Language) 2"),
-    gr.Audio(label="Audio Output (Target Language) 2"),
-    gr.Textbox(label="Focus 3"),
-    gr.Textbox(label="Translated Phrases 3"),
-    gr.Audio(label="Audio Output (Native Language) 3"),
-    gr.Audio(label="Audio Output (Target Language) 3")
-]
-def update_outputs(inputlanguage, target_language, audio, image, text, file):
-    processed_text, audio_output_path, top_phrases, translations, audio_outputs = process_input(
-        image=image, file=file, audio=audio, text=text,
-        translateto=target_language, translatefrom=inputlanguage
-    )
-    output_tuple = (
-        processed_text,  # RichTextbox content
-        audio_output_path,  # Main audio output
-        top_phrases[0] if len(top_phrases) > 0 else "",  # Focus 1
-        translations[0] if len(translations) > 0 else "",  # Translated Phrases 1
-        audio_outputs[0][0] if len(audio_outputs) > 0 else "",  # Audio Output (Native Language) 1
-        audio_outputs[0][1] if len(audio_outputs) > 0 else "",  # Audio Output (Target Language) 1
-        top_phrases[1] if len(top_phrases) > 1 else "",  # Focus 2
-        translations[1] if len(translations) > 1 else "",  # Translated Phrases 2
-        audio_outputs[1][0] if len(audio_outputs) > 1 else "",  # Audio Output (Native Language) 2
-        audio_outputs[1][1] if len(audio_outputs) > 1 else "",  # Audio Output (Target Language) 2
-        top_phrases[2] if len(top_phrases) > 2 else "",  # Focus 3
-        translations[2] if len(translations) > 2 else "",  # Translated Phrases 3
-        audio_outputs[2][0] if len(audio_outputs) > 2 else "",  # Audio Output (Native Language) 3
-        audio_outputs[2][1] if len(audio_outputs) > 2 else ""   # Audio Output (Target Language) 3
-    )
-    return output_tuple
-def interface_func(inputlanguage, target_language, audio, image, text, file):
-    return update_outputs(inputlanguage, target_language, audio, image, text, file)
-iface = gr.Interface(fn=interface_func, inputs=inputs, outputs=outputs, title=title, description=description)
 if __name__ == "__main__":
-    iface.launch()

 from surya.model.detection.segformer import load_model as load_det_model, load_processor as load_det_processor
 from surya.model.recognition.model import load_model as load_rec_model
 from surya.model.recognition.processor import load_processor as load_rec_processor
+from lang_list import LANGUAGE_NAME_TO_CODE, TEXT_SOURCE_LANGUAGE_NAMES, S2ST_TARGET_LANGUAGE_NAMES
 from gradio_client import Client
 from dotenv import load_dotenv
 import requests
+from io import BytesIO
 import cohere
 import os
 import re
 import pandas as pd
 title = "# Welcome to AyaTonic"
 load_dotenv()
 COHERE_API_KEY = os.getenv('CO_API_KEY')
 SEAMLESSM4T = os.getenv('SEAMLESSM4T')
 df = pd.read_csv("lang_list.csv")
 inputlanguage = ""
 producetext =  "\n\nProduce a complete expositional blog post in {target_language} based on the above :"
+formatinputstring = "\n\nthe above text is a learning aid. you must use rich text format to rewrite the above and add 1 . a red color tags for nouns 2. a blue color tag for verbs 3. a green color tag for adjectives and adverbs:"
+# Regular expression patterns for each color
 patterns = {
     "red": r'<span style="color: red;">(.*?)</span>',
     "blue": r'<span style="color: blue;">(.*?)</span>',
     "blue": [],
     "green": [],
 }
 class TaggedPhraseExtractor:
     def __init__(self, text=''):
         self.text = text
+        self.patterns = {}
     def set_text(self, text):
         """Set the text to search within."""
         self.patterns[color] = pattern
     def extract_phrases(self):
+        """Extract phrases for all colors and patterns added."""
+        matches = {color: re.findall(pattern, self.text) for color, pattern in self.patterns.items()}
         return matches
     def print_phrases(self):
+        """Extract phrases and print them."""
         matches = self.extract_phrases()
+        for color, phrases in matches.items():
             print(f"Phrases with color {color}:")
+            for phrase in phrases:
                 print(f"- {phrase}")
+            print()
+co = cohere.Client(COHERE_API_KEY)
+audio_client = Client(SEAMLESSM4T)
+def process_audio_to_text(audio_path, inputlanguage="English"):
     """
     Convert audio input to text using the Gradio client.
     """
     result = audio_client.predict(
         audio_path,
         inputlanguage,
+        inputlanguage,
         api_name="/s2tt"
     )
     print("Audio Result: ", result)
+    return result['text']  # Adjust based on the actual response
+def process_text_to_audio(text, target_language="English"):
     """
+    Convert text input to audio using the Gradio client.
     """
+    result = audio_client.predict(
+        text,
+        target_language,
+        target_language,  # could be make a variation for learning content
+        api_name="/t2st"
+    )
+    return result['audio']  # Adjust based on the actual response
 class OCRProcessor:
+    def __init__(self, langs=["en"]):
+        self.langs = langs
+        self.det_processor, self.det_model = load_det_processor(), load_det_model()
+        self.rec_model, self.rec_processor = load_rec_model(), load_rec_processor()
     def process_image(self, image):
         """
         Process a PIL image and return the OCR text.
         """
+        predictions = run_ocr([image], [self.langs], self.det_model, self.det_processor, self.rec_model, self.rec_processor)
+        return predictions[0]  # Assuming the first item in predictions contains the desired text
     def process_pdf(self, pdf_path):
         """
         Process a PDF file and return the OCR text.
         """
+        predictions = run_ocr([pdf_path], [self.langs], self.det_model, self.det_processor, self.rec_model, self.rec_processor)
+        return predictions[0]  # Assuming the first item in predictions contains the desired text
+def process_input(image=None, file=None, audio=None, text=""):
+    ocr_processor = OCRProcessor()
     final_text = text
     if image is not None:
         ocr_prediction = ocr_processor.process_image(image)
+        # gettig text from ocr object
         for idx in range(len((list(ocr_prediction)[0][1]))):
             final_text += " "
             final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
         if file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
             pil_image = Image.open(file)
             ocr_prediction = ocr_processor.process_image(pil_image)
+            # gettig text from ocr object
             for idx in range(len((list(ocr_prediction)[0][1]))):
                 final_text += " "
                 final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
         elif file.name.lower().endswith('.pdf'):
             ocr_prediction = ocr_processor.process_pdf(file.name)
+            # gettig text from ocr object
             for idx in range(len((list(ocr_prediction)[0][1]))):
                 final_text += " "
                 final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
             final_text += "\nUnsupported file type."
     print("OCR Text: ", final_text)
     if audio is not None:
+        audio_text = process_audio_to_text(audio)
         final_text += "\n" + audio_text
+    final_text_with_producetext = final_text + producetext
     response = co.generate(
         model='c4ai-aya',
     )
     processed_text = response.generations[0].text
+    audio_output = process_text_to_audio(processed_text)
+    return processed_text, audio_output
+# Define Gradio interface
+iface = gr.Interface(
+    fn=process_input,
+    inputs=[
+        gr.Image(type="pil", label="Camera Input"),
+        gr.File(label="File Upload"),
+        gr.Audio(sources="microphone", type="filepath", label="Mic Input"),
+        gr.Textbox(lines=2, label="Text Input"),
+        # gr.Dropdown(choices=TEXT_SOURCE_LANGUAGE_NAMES, label="Input Language"),
+        # gr.Dropdown(choices=TEXT_SOURCE_LANGUAGE_NAMES, label="Target Language")
+        gr.Dropdown(choices=df["name"].to_list(), label="Input Language"),
+        gr.Dropdown(choices=df["name"].to_list(), label="Target Language")
+    ],
+    outputs=[
+        RichTextbox(label="Processed Text"),
+        gr.Audio(label="Audio Output")
+    ],
+    title=title,
+    description=description
+)
 if __name__ == "__main__":
+    iface.launch()
+# co = cohere.Client('yhA228YGeZSl1ctten8LQxw2dky2nngHetXFjV2Q') # This is your trial API key
+# response = co.generate(
+#   model='c4ai-aya',
+#   prompt='एक यांत्रिक घड़ी दिन के समय को प्रदान करने के लिए एक गैर-इलेक्ट्रॉनिक तंत्र का उपयोग करती है। एक मुख्य स्प्रिंग का उपयोग यांत्रिक तंत्र को ऊर्जा संग्रहीत करने के लिए किया जाता है। एक यांत्रिक घड़ी में दांतों का एक कुंडल होता है जो धीरे-धीरे मुख्य स्प्रिंग से संचालित होता है। दांतों के कुंडल को एक यांत्रिक तंत्र में स्थानांतरित करने के लिए पहियों की एक श्रृंखला का उपयोग किया जाता है जो हाथों को घड़ी के चेहरे पर दाईं ओर ले जाता है। घड़ी के तंत्र को स्थिर करने और यह सुनिश्चित करने के लिए कि हाथ सही दिशा में घूमते हैं, एक कंपन का उपयोग किया जाता है। ',
+#   max_tokens=3674,
+#   temperature=0.9,
+#   k=0,
+#   stop_sequences=[],
+#   return_likelihoods='NONE')
+# print('Prediction: {}'.format(response.generations[0].text))
+# client = Client("https://facebook-seamless-m4t-v2-large.hf.space/--replicas/nq5nn/")
+# result = client.predict(
+# 		https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav,	# filepath  in 'Input speech' Audio component
+# 		Afrikaans,	# Literal[Afrikaans, Amharic, Armenian, Assamese, Basque, Belarusian, Bengali, Bosnian, Bulgarian, Burmese, Cantonese, Catalan, Cebuano, Central Kurdish, Croatian, Czech, Danish, Dutch, Egyptian Arabic, English, Estonian, Finnish, French, Galician, Ganda, Georgian, German, Greek, Gujarati, Halh Mongolian, Hebrew, Hindi, Hungarian, Icelandic, Igbo, Indonesian, Irish, Italian, Japanese, Javanese, Kannada, Kazakh, Khmer, Korean, Kyrgyz, Lao, Lithuanian, Luo, Macedonian, Maithili, Malayalam, Maltese, Mandarin Chinese, Marathi, Meitei, Modern Standard Arabic, Moroccan Arabic, Nepali, North Azerbaijani, Northern Uzbek, Norwegian Bokmål, Norwegian Nynorsk, Nyanja, Odia, Polish, Portuguese, Punjabi, Romanian, Russian, Serbian, Shona, Sindhi, Slovak, Slovenian, Somali, Southern Pashto, Spanish, Standard Latvian, Standard Malay, Swahili, Swedish, Tagalog, Tajik, Tamil, Telugu, Thai, Turkish, Ukrainian, Urdu, Vietnamese, Welsh, West Central Oromo, Western Persian, Yoruba, Zulu]  in 'Source language' Dropdown component
+# 		Bengali,	# Literal[Bengali, Catalan, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Hindi, Indonesian, Italian, Japanese, Korean, Maltese, Mandarin Chinese, Modern Standard Arabic, Northern Uzbek, Polish, Portuguese, Romanian, Russian, Slovak, Spanish, Swahili, Swedish, Tagalog, Telugu, Thai, Turkish, Ukrainian, Urdu, Vietnamese, Welsh, Western Persian]  in 'Target language' Dropdown component
+# 							api_name="/s2st"
+# )
+# print(result)
+# co = cohere.Client('yhA228YGeZSl1ctten8LQxw2dky2nngHetXFjV2Q')
+# response = co.generate(
+#   model='command-nightly',
+#   prompt='Les mécanismes de montres mécaniques\n\nLes mécanismes de montres mécaniques sont des mécanismes qui indiquent la journée, mais pas l\'électronique. Elles utilisent un ressort principal pour stocker l\'énergie nécessaire au fonctionnement des mécanismes. Un train d\'engrenages est utilisé pour transférer l\'énergie du ressort principal à un ensemble de roues qui font tourner les aiguilles dans le sens horaire sur le cadran de la montre.\n\nLes mécanismes de montres mécaniques sontdakshineswar omkarnathji, qui sont des lieux de culte qui sont construits dans le temple. Les engrenages sont des roues qui sont utilisées pour transférer l\'énergie du ressort principal à un ensemble de roues qui font tourner les aiguilles dans le sens horaire sur le cadran de la montre.\n\nLe ressort principal est un ressort qui est utilisé pour stocker l\'énergie nécessaire au fonctionnement des mécanismes de la montre. Le ressort principal est un ressort qui est utilisé pour stocker l\'énergie nécessaire au fonctionnement des mécanismes de la montre, et il est utilisé pour transférer l\'énergie aux engrenages, qui sont des roues qui sont utilisées pour faire tourner les aiguilles dans le sens horaire sur le cadran de la montre.\n\nLes engrenages sont des roues qui sont utilisées pour faire tourner les aiguilles dans le sens horaire sur le cadran de la montre, et elles sont utilisées pour transférer l\'énergie du ressort principal aux roues qui font tourner les aiguilles dans le sens horaire sur le cadran de la montre.\n\nLes mécanismes de montres mécaniques sont des mécanismes qui indiquent la journée, et elles sont utilisées pour transférer l\'énergie du ressort principal à un ensemble de roues qui font tourner les aiguilles dans le sens horaire sur le cadran de la montre.\n\nLes mécanismes de montres mécaniques sont des mécanismes qui indiquent la journée, et elles sont utilisées pour transférer l\'énergie du ressort principal à un ensemble de roues qui font tourner les aiguilles dans le sens horaire sur le cadran de la montre, et elles sont utilisées pour stabiliser le mécanisme de la montre, et pour s\'assurer que les aiguilles tournent dans le bon sens.\n\nthe above text is a learning aid. you must use rich text format to rewrite the above and add 1 . a red color tags for nouns 2. a blue color tag for verbs 3. a green color tag for adjectives and adverbs:',
+#   max_tokens=7294,
+#   temperature=0.6,
+#   k=0,
+#   stop_sequences=[],
+#   return_likelihoods='NONE')
+# print('Prediction: {}'.format(response.generations[0].text))
+# example = RichTextbox().example_inputs()
+# iface = gr.Interface(
+#     fn=process_input,
+#     inputs=[
+#         gr.Image(type="pil", label="Camera Input"),
+#         gr.File(label="File Upload"),
+#         gr.Audio(sources="microphone", type="filepath", label="Mic Input"),
+#         gr.Textbox(lines=2, label="Text Input"),
+#         gr.Dropdown(choices=TEXT_SOURCE_LANGUAGE_NAMES, label="Input Language"),
+#         gr.Dropdown(choices=TEXT_SOURCE_LANGUAGE_NAMES, label="Target Language")
+#     ],
+#     outputs=[
+#         gr.RichTextbox(label="Processed Text"),
+#         gr.Audio(label="Audio Output")
+#     ],
+#     title="OCR and Speech Processing App",
+#     description="This app processes images, PDFs, and audio inputs to generate text and audio outputs."
+# )
+# if __name__ == "__main__":
+# #     iface.launch()
+# demo = gr.Interface(
+#     lambda x:x,
+#     RichTextbox(),  # interactive version of your component
+#     RichTextbox(),  # static version of your component
+#     examples=[[example]],  # uncomment this line to view the "example version" of your component
+# )
+# if __name__ == "__main__":
+#     demo.launch()

ayatonic.env ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ CO_API_KEY=KQBPf0H0ENZESIC5nuUJ4i4jjg34xMPAkYK7s31W
2	+ SEAMLESSM4T=https://facebook-seamless-m4t-v2-large.hf.space/--replicas/v4gsf/

audio_files/audio_3505178120260920029.wav → languages.json RENAMED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -1,11 +1,10 @@
 gradio
 gradio_rich_textbox
 gradio-client
 torchvision
 torch
 python-dotenv
-pandas
-pydub
-cohere
-surya-ocr
-pillow

 gradio
 gradio_rich_textbox
 gradio-client
+cohere
+surya-ocr
+pillow
 torchvision
 torch
 python-dotenv
+pandas

script.py DELETED Viewed

@@ -1,10 +0,0 @@
-from gradio_client import Client
-client = Client("https://facebook-seamless-m4t-v2-large.hf.space/--replicas/v4gsf/")
-result = client.predict(
-		"Hello my name is tonic!",	# str  in 'Input text' Textbox component
-		"English",	# Literal[Afrikaans, Amharic, Armenian, Assamese, Basque, Belarusian, Bengali, Bosnian, Bulgarian, Burmese, Cantonese, Catalan, Cebuano, Central Kurdish, Croatian, Czech, Danish, Dutch, Egyptian Arabic, English, Estonian, Finnish, French, Galician, Ganda, Georgian, German, Greek, Gujarati, Halh Mongolian, Hebrew, Hindi, Hungarian, Icelandic, Igbo, Indonesian, Irish, Italian, Japanese, Javanese, Kannada, Kazakh, Khmer, Korean, Kyrgyz, Lao, Lithuanian, Luo, Macedonian, Maithili, Malayalam, Maltese, Mandarin Chinese, Marathi, Meitei, Modern Standard Arabic, Moroccan Arabic, Nepali, North Azerbaijani, Northern Uzbek, Norwegian Bokmål, Norwegian Nynorsk, Nyanja, Odia, Polish, Portuguese, Punjabi, Romanian, Russian, Serbian, Shona, Sindhi, Slovak, Slovenian, Somali, Southern Pashto, Spanish, Standard Latvian, Standard Malay, Swahili, Swedish, Tagalog, Tajik, Tamil, Telugu, Thai, Turkish, Ukrainian, Urdu, Vietnamese, Welsh, West Central Oromo, Western Persian, Yoruba, Zulu]  in 'Source language' Dropdown component
-		"French",	# Literal[Bengali, Catalan, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Hindi, Indonesian, Italian, Japanese, Korean, Maltese, Mandarin Chinese, Modern Standard Arabic, Northern Uzbek, Polish, Portuguese, Romanian, Russian, Slovak, Spanish, Swahili, Swedish, Tagalog, Telugu, Thai, Turkish, Ukrainian, Urdu, Vietnamese, Welsh, Western Persian]  in 'Target language' Dropdown component
-		api_name="/t2st"
-)
-print(result)