Spaces:
Runtime error
Runtime error
Added new transcript feature
Browse files- .gitignore +3 -1
- __pycache__/transcription_service.cpython-311.pyc +0 -0
- app.py +42 -127
- css.txt +15 -0
- languages_info.json +128 -0
- requirements.txt +40 -6
- transcription_service.py +69 -0
.gitignore
CHANGED
@@ -1 +1,3 @@
|
|
1 |
-
rt/
|
|
|
|
|
|
1 |
+
rt/
|
2 |
+
checkopenai.py
|
3 |
+
msq-ai-78bdccb055f4.json
|
__pycache__/transcription_service.cpython-311.pyc
ADDED
Binary file (1.56 kB). View file
|
|
app.py
CHANGED
@@ -1,113 +1,40 @@
|
|
1 |
import gradio as gr
|
2 |
-
import
|
3 |
import threading as th
|
4 |
import os
|
|
|
|
|
5 |
|
6 |
|
7 |
|
8 |
def translateoutput(text,language):
|
9 |
-
|
|
|
|
|
|
|
|
|
10 |
model="gpt-3.5-turbo",
|
11 |
messages=[
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
|
|
17 |
|
18 |
|
19 |
# Initialize a global variable to hold previous output
|
20 |
-
language_info=
|
21 |
-
'Afrikaans': 'af',
|
22 |
-
'English': 'en',
|
23 |
-
'Arabic': 'ar',
|
24 |
-
'Armenian': 'hy',
|
25 |
-
'Azerbaijani': 'az',
|
26 |
-
'Belarusian': 'be',
|
27 |
-
'Bosnian': 'bs',
|
28 |
-
'Bulgarian': 'bg',
|
29 |
-
'Catalan': 'ca',
|
30 |
-
'Chinese': 'zh',
|
31 |
-
'Croatian': 'hr',
|
32 |
-
'Czech': 'cs',
|
33 |
-
'Danish': 'da',
|
34 |
-
'Dutch': 'nl',
|
35 |
-
'English': 'en',
|
36 |
-
'Estonian': 'et',
|
37 |
-
'Finnish': 'fi',
|
38 |
-
'French': 'fr',
|
39 |
-
'Galician': 'gl',
|
40 |
-
'German': 'de',
|
41 |
-
'Greek': 'el',
|
42 |
-
'Hebrew': 'he',
|
43 |
-
'Hindi': 'hi',
|
44 |
-
'Hungarian': 'hu',
|
45 |
-
'Icelandic': 'is',
|
46 |
-
'Indonesian': 'id',
|
47 |
-
'Italian': 'it',
|
48 |
-
'Japanese': 'ja',
|
49 |
-
'Kannada': 'kn',
|
50 |
-
'Kazakh': 'kk',
|
51 |
-
'Korean': 'ko',
|
52 |
-
'Latvian': 'lv',
|
53 |
-
'Lithuanian': 'lt',
|
54 |
-
'Macedonian': 'mk',
|
55 |
-
'Malay': 'ms',
|
56 |
-
'Marathi': 'mr',
|
57 |
-
'Maori': 'mi',
|
58 |
-
'Nepali': 'ne',
|
59 |
-
'Norwegian': 'no',
|
60 |
-
'Persian': 'fa',
|
61 |
-
'Polish': 'pl',
|
62 |
-
'Portuguese': 'pt',
|
63 |
-
'Romanian': 'ro',
|
64 |
-
'Russian': 'ru',
|
65 |
-
'Serbian': 'sr',
|
66 |
-
'Slovak': 'sk',
|
67 |
-
'Slovenian': 'sl',
|
68 |
-
'Spanish': 'es',
|
69 |
-
'Swahili': 'sw',
|
70 |
-
'Swedish': 'sv',
|
71 |
-
'Tagalog': 'tl',
|
72 |
-
'Tamil': 'ta',
|
73 |
-
'Thai': 'th',
|
74 |
-
'Turkish': 'tr',
|
75 |
-
'Ukrainian': 'uk',
|
76 |
-
'Urdu': 'ur',
|
77 |
-
'Vietnamese': 'vi',
|
78 |
-
'Welsh': 'cy',
|
79 |
-
'Other': 'Other'
|
80 |
-
}
|
81 |
-
|
82 |
-
|
83 |
|
84 |
-
|
85 |
-
message=""
|
86 |
-
|
87 |
-
with open(audio_file, 'rb') as f:
|
88 |
-
result = openai.Audio.translate("whisper-1", f)
|
89 |
-
text=result.text
|
90 |
-
|
91 |
-
if lan=="English" or lan=="Other" or text=="":
|
92 |
-
message=text
|
93 |
-
else:
|
94 |
-
|
95 |
-
text=translateoutput(text,lan)
|
96 |
-
message=text
|
97 |
-
th.current_thread().return_value=message
|
98 |
|
99 |
|
100 |
|
101 |
def transcription(audio_file,input_lang):
|
102 |
global language_info
|
103 |
|
104 |
-
|
105 |
-
|
106 |
-
result = openai.Audio.transcribe("whisper-1", f)
|
107 |
-
th.current_thread().return_value=result.text
|
108 |
-
else:
|
109 |
-
result = openai.Audio.transcribe("whisper-1", f,language=language_info[input_lang])
|
110 |
-
th.current_thread().return_value=result.text
|
111 |
|
112 |
|
113 |
|
@@ -115,16 +42,13 @@ def transcription(audio_file,input_lang):
|
|
115 |
|
116 |
def func(audio_file,input_lang,lan,state="",state1=""):
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
transcribe_text=t2.return_value
|
126 |
-
state+=transcribe_text+" "
|
127 |
-
state1+=translation_text+" "
|
128 |
state=state.replace(".","\n")
|
129 |
state1=state1.replace(".","\n")
|
130 |
|
@@ -135,19 +59,17 @@ def func(audio_file,input_lang,lan,state="",state1=""):
|
|
135 |
|
136 |
|
137 |
def gpt_api(text,language):
|
138 |
-
if text=="":
|
139 |
return ""
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
model="gpt-3.5-turbo",
|
144 |
messages=[
|
145 |
-
|
146 |
-
|
147 |
-
]
|
148 |
-
|
149 |
-
|
150 |
-
message=completion.choices[0]['message']['content']
|
151 |
th.current_thread().return_value=message
|
152 |
|
153 |
|
@@ -170,21 +92,12 @@ def clear_output_data():
|
|
170 |
|
171 |
return "","","",""
|
172 |
|
|
|
|
|
|
|
|
|
173 |
|
174 |
-
css=
|
175 |
-
#clear:hover {background-color: #ff0000;transition: 0.5s;}
|
176 |
-
#summery {background-color: ##919cbf;border-radius:5%;}
|
177 |
-
#summery:hover {background-color:#2dcc9a ;transition: 0.5s;}
|
178 |
-
# div {background-image:url("https://images.unsplash.com/photo-1506259091721-347e791bab0f?auto=format&fit=crop&q=80&w=1470&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D");
|
179 |
-
# background-size: cover;
|
180 |
-
# background-position: center;
|
181 |
-
# background-repeat: no-repeat;
|
182 |
-
# background-attachment: fixed;
|
183 |
-
# color=white;!imoportant;
|
184 |
-
}
|
185 |
-
'''
|
186 |
-
|
187 |
-
with gr.Blocks(theme=gr.themes.Soft(),css=css) as app:
|
188 |
|
189 |
gr.Markdown("## Mufin Real-Time Audio Transcription And Translation",elem_id="heading")
|
190 |
gr.Markdown("### say any language we are here to translate it for team!!",elem_classes="heading")
|
@@ -192,8 +105,9 @@ with gr.Blocks(theme=gr.themes.Soft(),css=css) as app:
|
|
192 |
with gr.Row():
|
193 |
mic = gr.Audio(sources="microphone",streaming=True,type='filepath',label='Speak')
|
194 |
|
195 |
-
input_lan=gr.Dropdown(choices=language_info.keys(),label="Choose Input Language please",value="English",interactive=True)
|
196 |
-
|
|
|
197 |
summery=gr.Button(value="Summery",variant="secondary",size="small",elem_id="summery")
|
198 |
clear_output = gr.ClearButton(value="Clear Output",variant="stop",size="small",elem_id="clear")
|
199 |
with gr.Row():
|
@@ -211,6 +125,7 @@ with gr.Blocks(theme=gr.themes.Soft(),css=css) as app:
|
|
211 |
# pass
|
212 |
summery.click(make_summery,[text,text1,input_lan,lan],[sumer_ts,sumer_tr],cancels=[st],queue=False)
|
213 |
clear_output.click(clear_output_data,[],[text,text1,sumer_tr,sumer_ts],cancels=[st],queue=False)
|
|
|
214 |
# gr.update(visible=True)
|
215 |
|
216 |
app.queue()
|
|
|
1 |
import gradio as gr
|
2 |
+
from openai import OpenAI
|
3 |
import threading as th
|
4 |
import os
|
5 |
+
import json
|
6 |
+
from transcription_service import transcribe_speech_local
|
7 |
|
8 |
|
9 |
|
10 |
def translateoutput(text,language):
|
11 |
+
if text=="" or text is None:
|
12 |
+
return ""
|
13 |
+
client = OpenAI()
|
14 |
+
|
15 |
+
response = client.chat.completions.create(
|
16 |
model="gpt-3.5-turbo",
|
17 |
messages=[
|
18 |
+
{"role": "system", "content": f"You will be provided with a sentence in English, and your task is to translate it into {language}."},
|
19 |
+
{"role": "user", "content": text},
|
20 |
+
|
21 |
+
]
|
22 |
+
)
|
23 |
+
return response.choices[0].message.content
|
24 |
|
25 |
|
26 |
# Initialize a global variable to hold previous output
|
27 |
+
language_info=json.load(open("languages_info.json","r"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
|
32 |
|
33 |
def transcription(audio_file,input_lang):
|
34 |
global language_info
|
35 |
|
36 |
+
return transcribe_speech_local(audio_file,language_info[input_lang])
|
37 |
+
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
|
40 |
|
|
|
42 |
|
43 |
def func(audio_file,input_lang,lan,state="",state1=""):
|
44 |
|
45 |
+
text_transcription=transcription(audio_file,input_lang)
|
46 |
+
text_translation=translateoutput(text_transcription,lan)
|
47 |
+
if text_transcription is None:
|
48 |
+
text_transcription=""
|
49 |
+
|
50 |
+
state+=str(text_transcription)+" "
|
51 |
+
state1+=str(text_translation)+" "
|
|
|
|
|
|
|
52 |
state=state.replace(".","\n")
|
53 |
state1=state1.replace(".","\n")
|
54 |
|
|
|
59 |
|
60 |
|
61 |
def gpt_api(text,language):
|
62 |
+
if text=="" or text is None:
|
63 |
return ""
|
64 |
+
client = OpenAI()
|
65 |
+
|
66 |
+
response = client.chat.completions.create(
|
67 |
model="gpt-3.5-turbo",
|
68 |
messages=[
|
69 |
+
{"role": "system", "content": f"You will be provided with a text data, and your task is to make very concise summery it into {language}."},
|
70 |
+
{"role": "user", "content": text},
|
71 |
+
])
|
72 |
+
message=response.choices[0].message.content
|
|
|
|
|
73 |
th.current_thread().return_value=message
|
74 |
|
75 |
|
|
|
92 |
|
93 |
return "","","",""
|
94 |
|
95 |
+
def switch(input_lan,lan):
|
96 |
+
return lan,input_lan
|
97 |
+
|
98 |
+
|
99 |
|
100 |
+
with gr.Blocks(theme=gr.themes.Soft(),css="css.txt") as app:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
gr.Markdown("## Mufin Real-Time Audio Transcription And Translation",elem_id="heading")
|
103 |
gr.Markdown("### say any language we are here to translate it for team!!",elem_classes="heading")
|
|
|
105 |
with gr.Row():
|
106 |
mic = gr.Audio(sources="microphone",streaming=True,type='filepath',label='Speak')
|
107 |
|
108 |
+
input_lan=gr.Dropdown(choices=language_info.keys(),label="Choose Input Language please",value="English (United Kingdom)",interactive=True)
|
109 |
+
switchbutton=gr.Button(value='\u21C6',variant="secondary",elem_id="switchbtn")
|
110 |
+
lan=gr.Dropdown(choices=language_info.keys(),label="Choose a language for translation",value="Korean (South Korea)",interactive=True)
|
111 |
summery=gr.Button(value="Summery",variant="secondary",size="small",elem_id="summery")
|
112 |
clear_output = gr.ClearButton(value="Clear Output",variant="stop",size="small",elem_id="clear")
|
113 |
with gr.Row():
|
|
|
125 |
# pass
|
126 |
summery.click(make_summery,[text,text1,input_lan,lan],[sumer_ts,sumer_tr],cancels=[st],queue=False)
|
127 |
clear_output.click(clear_output_data,[],[text,text1,sumer_tr,sumer_ts],cancels=[st],queue=False)
|
128 |
+
switchbutton.click(switch,[input_lan,lan],[input_lan,lan],cancels=[st],queue=False)
|
129 |
# gr.update(visible=True)
|
130 |
|
131 |
app.queue()
|
css.txt
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#clear {background-color: ##919cbf;border-radius:5%;}
|
2 |
+
#clear:hover {color:white;background-color: #ff0000;transition: 0.5s;}
|
3 |
+
#summery {background-color: ##919cbf;border-radius:5%;}
|
4 |
+
#summery:hover {background-color:#2980b9;transition: 0.5s;color:#fff}
|
5 |
+
|
6 |
+
#switchbtn {
|
7 |
+
margin: 0.6em 0em 0.55em 0;
|
8 |
+
max-width: 2.5em;
|
9 |
+
min-width: 2.5em !important;
|
10 |
+
height: 2.4em;
|
11 |
+
}
|
12 |
+
#switchbtn:hover{
|
13 |
+
background-color:#2980b9;transition: 0.5s;color:#fff;
|
14 |
+
}
|
15 |
+
|
languages_info.json
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"Afrikaans (South Africa)": "af-ZA",
|
3 |
+
"Albanian (Albania)": "sq-AL",
|
4 |
+
"Arabic (Algeria)": "ar-DZ",
|
5 |
+
"Arabic (Bahrain)": "ar-BH",
|
6 |
+
"Arabic (Egypt)": "ar-EG",
|
7 |
+
"Arabic (Iraq)": "ar-IQ",
|
8 |
+
"Arabic (Jordan)": "ar-JO",
|
9 |
+
"Arabic (Kuwait)": "ar-KW",
|
10 |
+
"Arabic (Lebanon)": "ar-LB",
|
11 |
+
"Arabic (Morocco)": "ar-MA",
|
12 |
+
"Arabic (Oman)": "ar-OM",
|
13 |
+
"Arabic (Qatar)": "ar-QA",
|
14 |
+
"Arabic (Saudi Arabia)": "ar-SA",
|
15 |
+
"Arabic (Sudan)": "ar-SD",
|
16 |
+
"Arabic (Syria)": "ar-SY",
|
17 |
+
"Arabic (Tunisia)": "ar-TN",
|
18 |
+
"Arabic (United Arab Emirates)": "ar-AE",
|
19 |
+
"Arabic (Yemen)": "ar-YE",
|
20 |
+
"Armenian (Armenia)": "hy-AM",
|
21 |
+
"Bengali (Bangladesh)": "bn-BD",
|
22 |
+
"Bengali (India)": "bn-IN",
|
23 |
+
"Bosnian (Bosnia and Herzegovina)": "bs-BA",
|
24 |
+
"Catalan (Spain)": "ca-ES",
|
25 |
+
"Croatian (Croatia)": "hr-HR",
|
26 |
+
"Czech (Czech Republic)": "cs-CZ",
|
27 |
+
"Danish (Denmark)": "da-DK",
|
28 |
+
"Dutch (Belgium)": "nl-BE",
|
29 |
+
"Dutch (Netherlands)": "nl-NL",
|
30 |
+
"English (Australia)": "en-AU",
|
31 |
+
"English (Canada)": "en-CA",
|
32 |
+
"English (Ghana)": "en-GH",
|
33 |
+
"English (India)": "en-IN",
|
34 |
+
"English (Ireland)": "en-IE",
|
35 |
+
"English (Kenya)": "en-KE",
|
36 |
+
"English (New Zealand)": "en-NZ",
|
37 |
+
"English (Nigeria)": "en-NG",
|
38 |
+
"English (Philippines)": "en-PH",
|
39 |
+
"English (Singapore)": "en-SG",
|
40 |
+
"English (South Africa)": "en-ZA",
|
41 |
+
"English (Tanzania)": "en-TZ",
|
42 |
+
"English (United Kingdom)": "en-GB",
|
43 |
+
"English (United States)": "en-US",
|
44 |
+
"Estonian (Estonia)": "et-EE",
|
45 |
+
"Filipino (Philippines)": "fil-PH",
|
46 |
+
"Finnish (Finland)": "fi-FI",
|
47 |
+
"French (Belgium)": "fr-BE",
|
48 |
+
"French (Canada)": "fr-CA",
|
49 |
+
"French (France)": "fr-FR",
|
50 |
+
"French (Switzerland)": "fr-CH",
|
51 |
+
"Galician (Spain)": "gl-ES",
|
52 |
+
"Georgian (Georgia)": "ka-GE",
|
53 |
+
"German (Austria)": "de-AT",
|
54 |
+
"German (Germany)": "de-DE",
|
55 |
+
"German (Switzerland)": "de-CH",
|
56 |
+
"Greek (Greece)": "el-GR",
|
57 |
+
"Gujarati (India)": "gu-IN",
|
58 |
+
"Hebrew (Israel)": "iw-IL",
|
59 |
+
"Hindi (India)": "hi-IN",
|
60 |
+
"Hungarian (Hungary)": "hu-HU",
|
61 |
+
"Icelandic (Iceland)": "is-IS",
|
62 |
+
"Indonesian (Indonesia)": "id-ID",
|
63 |
+
"Italian (Italy)": "it-IT",
|
64 |
+
"Italian (Switzerland)": "it-CH",
|
65 |
+
"Japanese (Japan)": "ja-JP",
|
66 |
+
"Javanese (Indonesia)": "jv-ID",
|
67 |
+
"Kannada (India)": "kn-IN",
|
68 |
+
"Khmer (Cambodia)": "km-KH",
|
69 |
+
"Korean (South Korea)": "ko-KR",
|
70 |
+
"Lao (Laos)": "lo-LA",
|
71 |
+
"Latvian (Latvia)": "lv-LV",
|
72 |
+
"Lithuanian (Lithuania)": "lt-LT",
|
73 |
+
"Macedonian (North Macedonia)": "mk-MK",
|
74 |
+
"Malay (Malaysia)": "ms-MY",
|
75 |
+
"Malayalam (India)": "ml-IN",
|
76 |
+
"Marathi (India)": "mr-IN",
|
77 |
+
"Burmese (Myanmar [Burma])": "my-MM",
|
78 |
+
"Nepali (Nepal)": "ne-NP",
|
79 |
+
"Norwegian (Norway)": "no-NO",
|
80 |
+
"Persian (Iran)": "fa-IR",
|
81 |
+
"Polish (Poland)": "pl-PL",
|
82 |
+
"Portuguese (Brazil)": "pt-BR",
|
83 |
+
"Portuguese (Portugal)": "pt-PT",
|
84 |
+
"Punjabi (India)": "pa-IN",
|
85 |
+
"Romanian (Romania)": "ro-RO",
|
86 |
+
"Russian (Russia)": "ru-RU",
|
87 |
+
"Serbian (Serbia)": "sr-RS",
|
88 |
+
"Sinhala (Sri Lanka)": "si-LK",
|
89 |
+
"Slovak (Slovakia)": "sk-SK",
|
90 |
+
"Slovenian (Slovenia)": "sl-SI",
|
91 |
+
"Spanish (Argentina)": "es-AR",
|
92 |
+
"Spanish (Bolivia)": "es-BO",
|
93 |
+
"Spanish (Chile)": "es-CL",
|
94 |
+
"Spanish (Colombia)": "es-CO",
|
95 |
+
"Spanish (Costa Rica)": "es-CR",
|
96 |
+
"Spanish (Dominican Republic)": "es-DO",
|
97 |
+
"Spanish (Ecuador)": "es-EC",
|
98 |
+
"Spanish (El Salvador)": "es-SV",
|
99 |
+
"Spanish (Guatemala)": "es-GT",
|
100 |
+
"Spanish (Honduras)": "es-HN",
|
101 |
+
"Spanish (Mexico)": "es-MX",
|
102 |
+
"Spanish (Nicaragua)": "es-NI",
|
103 |
+
"Spanish (Panama)": "es-PA",
|
104 |
+
"Spanish (Paraguay)": "es-PY",
|
105 |
+
"Spanish (Peru)": "es-PE",
|
106 |
+
"Spanish (Puerto Rico)": "es-PR",
|
107 |
+
"Spanish (Spain)": "es-ES",
|
108 |
+
"Spanish (United States)": "es-US",
|
109 |
+
"Spanish (Uruguay)": "es-UY",
|
110 |
+
"Spanish (Venezuela)": "es-VE",
|
111 |
+
"Sundanese (Indonesia)": "su-ID",
|
112 |
+
"Swahili (Kenya)": "sw-KE",
|
113 |
+
"Swahili (Tanzania)": "sw-TZ",
|
114 |
+
"Swedish (Sweden)": "sv-SE",
|
115 |
+
"Tamil (India)": "ta-IN",
|
116 |
+
"Tamil (Malaysia)": "ta-MY",
|
117 |
+
"Tamil (Singapore)": "ta-SG",
|
118 |
+
"Tamil (Sri Lanka)": "ta-LK",
|
119 |
+
"Telugu (India)": "te-IN",
|
120 |
+
"Thai (Thailand)": "th-TH",
|
121 |
+
"Turkish (Turkey)": "tr-TR",
|
122 |
+
"Ukrainian (Ukraine)": "uk-UA",
|
123 |
+
"Urdu (Pakistan)": "ur-PK",
|
124 |
+
"Urdu (India)": "ur-IN",
|
125 |
+
"Uzbek (Uzbekistan)": "uz-UZ",
|
126 |
+
"Vietnamese (Vietnam)": "vi-VN",
|
127 |
+
"Zulu (South Africa)": "zu-ZA"
|
128 |
+
}
|
requirements.txt
CHANGED
@@ -6,12 +6,16 @@ annotated-types==0.6.0
|
|
6 |
anyio==3.7.1
|
7 |
async-timeout==4.0.3
|
8 |
attrs==23.1.0
|
|
|
|
|
9 |
certifi==2023.7.22
|
|
|
10 |
charset-normalizer==3.3.2
|
11 |
click==8.1.7
|
12 |
colorama==0.4.6
|
13 |
contourpy==1.2.0
|
14 |
cycler==0.12.1
|
|
|
15 |
distro==1.8.0
|
16 |
fastapi==0.104.1
|
17 |
ffmpy==0.3.1
|
@@ -19,29 +23,54 @@ filelock==3.13.1
|
|
19 |
fonttools==4.44.0
|
20 |
frozenlist==1.4.0
|
21 |
fsspec==2023.10.0
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
gradio_client==0.7.0
|
|
|
|
|
24 |
h11==0.14.0
|
25 |
httpcore==1.0.1
|
|
|
26 |
httpx==0.25.1
|
27 |
huggingface-hub==0.18.0
|
28 |
idna==3.4
|
29 |
importlib-resources==6.1.1
|
30 |
Jinja2==3.1.2
|
|
|
31 |
jsonschema==4.19.2
|
32 |
jsonschema-specifications==2023.7.1
|
33 |
kiwisolver==1.4.5
|
|
|
|
|
|
|
34 |
markdown-it-py==3.0.0
|
35 |
MarkupSafe==2.1.3
|
36 |
matplotlib==3.8.1
|
37 |
mdurl==0.1.2
|
|
|
|
|
38 |
multidict==6.0.4
|
|
|
|
|
|
|
39 |
numpy==1.26.1
|
40 |
-
openai==
|
41 |
orjson==3.9.10
|
42 |
packaging==23.2
|
43 |
pandas==2.1.2
|
44 |
Pillow==10.1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
pydantic==2.4.2
|
46 |
pydantic_core==2.10.1
|
47 |
pydub==0.25.1
|
@@ -55,22 +84,27 @@ referencing==0.30.2
|
|
55 |
requests==2.31.0
|
56 |
rich==13.6.0
|
57 |
rpds-py==0.12.0
|
|
|
|
|
|
|
58 |
semantic-version==2.10.0
|
59 |
shellingham==1.5.4
|
60 |
six==1.16.0
|
61 |
sniffio==1.3.0
|
|
|
|
|
62 |
starlette==0.27.0
|
|
|
|
|
63 |
tomlkit==0.12.0
|
64 |
toolz==0.12.0
|
|
|
65 |
tqdm==4.66.1
|
66 |
typer==0.9.0
|
67 |
typing_extensions==4.8.0
|
68 |
tzdata==2023.3
|
|
|
69 |
urllib3==2.0.7
|
70 |
uvicorn==0.24.0.post1
|
71 |
websockets==11.0.3
|
72 |
yarl==1.9.2
|
73 |
-
noisereduce==3.0.0
|
74 |
-
librosa==0.10.1
|
75 |
-
pydub==0.25.1
|
76 |
-
torch==2.1.1
|
|
|
6 |
anyio==3.7.1
|
7 |
async-timeout==4.0.3
|
8 |
attrs==23.1.0
|
9 |
+
audioread==3.0.1
|
10 |
+
cachetools==5.3.2
|
11 |
certifi==2023.7.22
|
12 |
+
cffi==1.16.0
|
13 |
charset-normalizer==3.3.2
|
14 |
click==8.1.7
|
15 |
colorama==0.4.6
|
16 |
contourpy==1.2.0
|
17 |
cycler==0.12.1
|
18 |
+
decorator==5.1.1
|
19 |
distro==1.8.0
|
20 |
fastapi==0.104.1
|
21 |
ffmpy==0.3.1
|
|
|
23 |
fonttools==4.44.0
|
24 |
frozenlist==1.4.0
|
25 |
fsspec==2023.10.0
|
26 |
+
google-api-core==2.14.0
|
27 |
+
google-api-python-client==2.109.0
|
28 |
+
google-auth==2.23.4
|
29 |
+
google-auth-httplib2==0.1.1
|
30 |
+
google-cloud-speech==2.22.0
|
31 |
+
googleapis-common-protos==1.61.0
|
32 |
+
gradio==4.7.1
|
33 |
gradio_client==0.7.0
|
34 |
+
grpcio==1.59.3
|
35 |
+
grpcio-status==1.59.3
|
36 |
h11==0.14.0
|
37 |
httpcore==1.0.1
|
38 |
+
httplib2==0.22.0
|
39 |
httpx==0.25.1
|
40 |
huggingface-hub==0.18.0
|
41 |
idna==3.4
|
42 |
importlib-resources==6.1.1
|
43 |
Jinja2==3.1.2
|
44 |
+
joblib==1.3.2
|
45 |
jsonschema==4.19.2
|
46 |
jsonschema-specifications==2023.7.1
|
47 |
kiwisolver==1.4.5
|
48 |
+
lazy_loader==0.3
|
49 |
+
librosa==0.10.1
|
50 |
+
llvmlite==0.41.1
|
51 |
markdown-it-py==3.0.0
|
52 |
MarkupSafe==2.1.3
|
53 |
matplotlib==3.8.1
|
54 |
mdurl==0.1.2
|
55 |
+
mpmath==1.3.0
|
56 |
+
msgpack==1.0.7
|
57 |
multidict==6.0.4
|
58 |
+
networkx==3.2.1
|
59 |
+
noisereduce==3.0.0
|
60 |
+
numba==0.58.1
|
61 |
numpy==1.26.1
|
62 |
+
openai==1.3.6
|
63 |
orjson==3.9.10
|
64 |
packaging==23.2
|
65 |
pandas==2.1.2
|
66 |
Pillow==10.1.0
|
67 |
+
platformdirs==4.0.0
|
68 |
+
pooch==1.8.0
|
69 |
+
proto-plus==1.22.3
|
70 |
+
protobuf==4.25.1
|
71 |
+
pyasn1==0.5.1
|
72 |
+
pyasn1-modules==0.3.0
|
73 |
+
pycparser==2.21
|
74 |
pydantic==2.4.2
|
75 |
pydantic_core==2.10.1
|
76 |
pydub==0.25.1
|
|
|
84 |
requests==2.31.0
|
85 |
rich==13.6.0
|
86 |
rpds-py==0.12.0
|
87 |
+
rsa==4.9
|
88 |
+
scikit-learn==1.3.2
|
89 |
+
scipy==1.11.4
|
90 |
semantic-version==2.10.0
|
91 |
shellingham==1.5.4
|
92 |
six==1.16.0
|
93 |
sniffio==1.3.0
|
94 |
+
soundfile==0.12.1
|
95 |
+
soxr==0.3.7
|
96 |
starlette==0.27.0
|
97 |
+
sympy==1.12
|
98 |
+
threadpoolctl==3.2.0
|
99 |
tomlkit==0.12.0
|
100 |
toolz==0.12.0
|
101 |
+
torch==2.1.1
|
102 |
tqdm==4.66.1
|
103 |
typer==0.9.0
|
104 |
typing_extensions==4.8.0
|
105 |
tzdata==2023.3
|
106 |
+
uritemplate==4.1.1
|
107 |
urllib3==2.0.7
|
108 |
uvicorn==0.24.0.post1
|
109 |
websockets==11.0.3
|
110 |
yarl==1.9.2
|
|
|
|
|
|
|
|
transcription_service.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from google.cloud import speech
|
2 |
+
# import json
|
3 |
+
# import os
|
4 |
+
# os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'msq-ai-78bdccb055f4.json'
|
5 |
+
|
6 |
+
# def speech_to_text(
|
7 |
+
# config: speech.RecognitionConfig,
|
8 |
+
# audio: speech.RecognitionAudio,
|
9 |
+
# ) -> speech.RecognizeResponse:
|
10 |
+
# client = speech.SpeechClient()
|
11 |
+
|
12 |
+
# # Synchronous speech recognition request
|
13 |
+
# response = client.recognize(config=config, audio=audio)
|
14 |
+
|
15 |
+
# return response
|
16 |
+
|
17 |
+
|
18 |
+
# def print_response(response: speech.RecognizeResponse):
|
19 |
+
# for result in response.results:
|
20 |
+
# return print_result(result)
|
21 |
+
|
22 |
+
|
23 |
+
# def print_result(result: speech.SpeechRecognitionResult):
|
24 |
+
# best_alternative = result.alternatives[0]
|
25 |
+
# # print("-" * 80)
|
26 |
+
# # print(f"language_code: {result.language_code}")
|
27 |
+
# return best_alternative.transcript
|
28 |
+
# # print(f"confidence: {best_alternative.confidence:.0%}")
|
29 |
+
|
30 |
+
|
31 |
+
# def main(audio,language):
|
32 |
+
# config = speech.RecognitionConfig(
|
33 |
+
# language_code=language,
|
34 |
+
# )
|
35 |
+
# audio = speech.RecognitionAudio(
|
36 |
+
# uri=audio,
|
37 |
+
# )
|
38 |
+
# response=speech_to_text(config,audio)
|
39 |
+
# return print_response(response)
|
40 |
+
|
41 |
+
|
42 |
+
import os
|
43 |
+
from google.cloud import speech
|
44 |
+
|
45 |
+
|
46 |
+
def transcribe_speech_local(wav_file,language):
|
47 |
+
"""
|
48 |
+
Transcribes a local WAV file using Google's Speech-to-Text API.
|
49 |
+
"""
|
50 |
+
client = speech.SpeechClient()
|
51 |
+
|
52 |
+
with open(wav_file, "rb") as audio_file:
|
53 |
+
content = audio_file.read()
|
54 |
+
|
55 |
+
audio = speech.RecognitionAudio(content=content)
|
56 |
+
config = speech.RecognitionConfig(
|
57 |
+
# encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
58 |
+
# Adjust this according to your file's sample rate
|
59 |
+
language_code=language,
|
60 |
+
enable_automatic_punctuation=True
|
61 |
+
)
|
62 |
+
|
63 |
+
response = client.recognize(config=config, audio=audio)
|
64 |
+
|
65 |
+
for result in response.results:
|
66 |
+
return "{}".format(result.alternatives[0].transcript)
|
67 |
+
|
68 |
+
# Example usage
|
69 |
+
|