Spaces:
Runtime error
Runtime error
tonic
commited on
Commit
•
971bee9
1
Parent(s):
5701b30
adding long audio parsing
Browse files- app.py +30 -2
- requirements.txt +2 -1
app.py
CHANGED
@@ -14,7 +14,9 @@ import cohere
|
|
14 |
import os
|
15 |
import re
|
16 |
import pandas as pd
|
17 |
-
|
|
|
|
|
18 |
|
19 |
title = "# Welcome to AyaTonic"
|
20 |
description = "Learn a New Language With Aya"
|
@@ -70,6 +72,31 @@ def translate_text(text, instructions=translatetextinst):
|
|
70 |
)
|
71 |
return response.generations[0].text
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
class TaggedPhraseExtractor:
|
74 |
def __init__(self, text=''):
|
75 |
self.text = text
|
@@ -184,7 +211,8 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
|
|
184 |
final_text += "\nUnsupported file type."
|
185 |
print("OCR Text: ", final_text)
|
186 |
if audio is not None:
|
187 |
-
|
|
|
188 |
final_text += "\n" + audio_text
|
189 |
|
190 |
final_text_with_producetext = final_text + producetext
|
|
|
14 |
import os
|
15 |
import re
|
16 |
import pandas as pd
|
17 |
+
import pydub
|
18 |
+
from pydub import AudioSegment
|
19 |
+
from pydub.utils import make_chunks
|
20 |
|
21 |
title = "# Welcome to AyaTonic"
|
22 |
description = "Learn a New Language With Aya"
|
|
|
72 |
)
|
73 |
return response.generations[0].text
|
74 |
|
75 |
+
class LongAudioProcessor:
|
76 |
+
def __init__(self, audio_client, api_key=None):
|
77 |
+
self.client = audio_client
|
78 |
+
self.api_key = api_key
|
79 |
+
|
80 |
+
def process_long_audio(self, audio_path, chunk_length_ms=20000):
|
81 |
+
"""
|
82 |
+
Process audio files longer than 29 seconds by chunking them into smaller segments.
|
83 |
+
"""
|
84 |
+
audio = AudioSegment.from_file(audio_path)
|
85 |
+
chunks = make_chunks(audio, chunk_length_ms)
|
86 |
+
full_text = ""
|
87 |
+
for i, chunk in enumerate(chunks):
|
88 |
+
chunk_name = f"chunk{i}.wav"
|
89 |
+
with open(chunk_name, 'wb') as file:
|
90 |
+
chunk.export(file, format="wav")
|
91 |
+
try:
|
92 |
+
result = self.process_audio_to_text(chunk_name)
|
93 |
+
full_text += " " + result.strip()
|
94 |
+
except Exception as e:
|
95 |
+
print(f"Error processing {chunk_name}: {e}")
|
96 |
+
finally:
|
97 |
+
if os.path.exists(chunk_name):
|
98 |
+
os.remove(chunk_name)
|
99 |
+
return full_text.strip()
|
100 |
class TaggedPhraseExtractor:
|
101 |
def __init__(self, text=''):
|
102 |
self.text = text
|
|
|
211 |
final_text += "\nUnsupported file type."
|
212 |
print("OCR Text: ", final_text)
|
213 |
if audio is not None:
|
214 |
+
long_audio_processor = LongAudioProcessor(audio_client)
|
215 |
+
audio_text = long_audio_processor.process_long_audio(audio, inputlanguage=translatefrom, outputlanguage=translateto)
|
216 |
final_text += "\n" + audio_text
|
217 |
|
218 |
final_text_with_producetext = final_text + producetext
|
requirements.txt
CHANGED
@@ -7,4 +7,5 @@ pillow
|
|
7 |
torchvision
|
8 |
torch
|
9 |
python-dotenv
|
10 |
-
pandas
|
|
|
|
7 |
torchvision
|
8 |
torch
|
9 |
python-dotenv
|
10 |
+
pandas
|
11 |
+
pydub
|