MufinApps commited on
Commit
81ce2b1
1 Parent(s): 61c69c6

Added new transcript feature

Browse files
.gitignore CHANGED
@@ -1 +1,3 @@
1
- rt/
 
 
 
1
+ rt/
2
+ checkopenai.py
3
+ msq-ai-78bdccb055f4.json
__pycache__/transcription_service.cpython-311.pyc ADDED
Binary file (1.56 kB). View file
 
app.py CHANGED
@@ -1,113 +1,40 @@
1
  import gradio as gr
2
- import openai
3
  import threading as th
4
  import os
 
 
5
 
6
 
7
 
8
  def translateoutput(text,language):
9
- completion = openai.ChatCompletion.create(
 
 
 
 
10
  model="gpt-3.5-turbo",
11
  messages=[
12
- {"role": "system", "content": f"You will be provided with a sentence in English, and your task is to translate it into {language}."},
13
- {"role": "user", "content":text}
14
- ]
15
- )
16
- return completion.choices[0]['message']['content']
 
17
 
18
 
19
  # Initialize a global variable to hold previous output
20
- language_info={
21
- 'Afrikaans': 'af',
22
- 'English': 'en',
23
- 'Arabic': 'ar',
24
- 'Armenian': 'hy',
25
- 'Azerbaijani': 'az',
26
- 'Belarusian': 'be',
27
- 'Bosnian': 'bs',
28
- 'Bulgarian': 'bg',
29
- 'Catalan': 'ca',
30
- 'Chinese': 'zh',
31
- 'Croatian': 'hr',
32
- 'Czech': 'cs',
33
- 'Danish': 'da',
34
- 'Dutch': 'nl',
35
- 'English': 'en',
36
- 'Estonian': 'et',
37
- 'Finnish': 'fi',
38
- 'French': 'fr',
39
- 'Galician': 'gl',
40
- 'German': 'de',
41
- 'Greek': 'el',
42
- 'Hebrew': 'he',
43
- 'Hindi': 'hi',
44
- 'Hungarian': 'hu',
45
- 'Icelandic': 'is',
46
- 'Indonesian': 'id',
47
- 'Italian': 'it',
48
- 'Japanese': 'ja',
49
- 'Kannada': 'kn',
50
- 'Kazakh': 'kk',
51
- 'Korean': 'ko',
52
- 'Latvian': 'lv',
53
- 'Lithuanian': 'lt',
54
- 'Macedonian': 'mk',
55
- 'Malay': 'ms',
56
- 'Marathi': 'mr',
57
- 'Maori': 'mi',
58
- 'Nepali': 'ne',
59
- 'Norwegian': 'no',
60
- 'Persian': 'fa',
61
- 'Polish': 'pl',
62
- 'Portuguese': 'pt',
63
- 'Romanian': 'ro',
64
- 'Russian': 'ru',
65
- 'Serbian': 'sr',
66
- 'Slovak': 'sk',
67
- 'Slovenian': 'sl',
68
- 'Spanish': 'es',
69
- 'Swahili': 'sw',
70
- 'Swedish': 'sv',
71
- 'Tagalog': 'tl',
72
- 'Tamil': 'ta',
73
- 'Thai': 'th',
74
- 'Turkish': 'tr',
75
- 'Ukrainian': 'uk',
76
- 'Urdu': 'ur',
77
- 'Vietnamese': 'vi',
78
- 'Welsh': 'cy',
79
- 'Other': 'Other'
80
- }
81
-
82
-
83
 
84
- def translate(audio_file,lan):
85
- message=""
86
-
87
- with open(audio_file, 'rb') as f:
88
- result = openai.Audio.translate("whisper-1", f)
89
- text=result.text
90
-
91
- if lan=="English" or lan=="Other" or text=="":
92
- message=text
93
- else:
94
-
95
- text=translateoutput(text,lan)
96
- message=text
97
- th.current_thread().return_value=message
98
 
99
 
100
 
101
  def transcription(audio_file,input_lang):
102
  global language_info
103
 
104
- with open(audio_file, 'rb') as f:
105
- if input_lang=="Other":
106
- result = openai.Audio.transcribe("whisper-1", f)
107
- th.current_thread().return_value=result.text
108
- else:
109
- result = openai.Audio.transcribe("whisper-1", f,language=language_info[input_lang])
110
- th.current_thread().return_value=result.text
111
 
112
 
113
 
@@ -115,16 +42,13 @@ def transcription(audio_file,input_lang):
115
 
116
  def func(audio_file,input_lang,lan,state="",state1=""):
117
 
118
- t1 = th.Thread(target=translate, args=(audio_file,lan,))
119
- t2 = th.Thread(target=transcription, args=(audio_file,input_lang,))
120
- t1.start()
121
- t2.start()
122
- t1.join()
123
- t2.join()
124
- translation_text=t1.return_value
125
- transcribe_text=t2.return_value
126
- state+=transcribe_text+" "
127
- state1+=translation_text+" "
128
  state=state.replace(".","\n")
129
  state1=state1.replace(".","\n")
130
 
@@ -135,19 +59,17 @@ def func(audio_file,input_lang,lan,state="",state1=""):
135
 
136
 
137
  def gpt_api(text,language):
138
- if text=="":
139
  return ""
140
- if len(text)>2000:
141
- text=text[-2000:]
142
- completion = openai.ChatCompletion.create(
143
  model="gpt-3.5-turbo",
144
  messages=[
145
- {"role": "system", "content": f"your task is to make a concise summery and useful summery from the given text in {language}."},
146
- {"role": "user", "content":text}
147
- ]
148
- )
149
-
150
- message=completion.choices[0]['message']['content']
151
  th.current_thread().return_value=message
152
 
153
 
@@ -170,21 +92,12 @@ def clear_output_data():
170
 
171
  return "","","",""
172
 
 
 
 
 
173
 
174
- css='''#clear {background-color: ##919cbf;border-radius:5%;}
175
- #clear:hover {background-color: #ff0000;transition: 0.5s;}
176
- #summery {background-color: ##919cbf;border-radius:5%;}
177
- #summery:hover {background-color:#2dcc9a ;transition: 0.5s;}
178
- # div {background-image:url("https://images.unsplash.com/photo-1506259091721-347e791bab0f?auto=format&fit=crop&q=80&w=1470&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D");
179
- # background-size: cover;
180
- # background-position: center;
181
- # background-repeat: no-repeat;
182
- # background-attachment: fixed;
183
- # color=white;!imoportant;
184
- }
185
- '''
186
-
187
- with gr.Blocks(theme=gr.themes.Soft(),css=css) as app:
188
 
189
  gr.Markdown("## Mufin Real-Time Audio Transcription And Translation",elem_id="heading")
190
  gr.Markdown("### say any language we are here to translate it for team!!",elem_classes="heading")
@@ -192,8 +105,9 @@ with gr.Blocks(theme=gr.themes.Soft(),css=css) as app:
192
  with gr.Row():
193
  mic = gr.Audio(sources="microphone",streaming=True,type='filepath',label='Speak')
194
 
195
- input_lan=gr.Dropdown(choices=language_info.keys(),label="Choose Input Language please",value="English",interactive=True)
196
- lan=gr.Dropdown(choices=language_info.keys(),label="Choose a language for translation",value="Korean",interactive=True)
 
197
  summery=gr.Button(value="Summery",variant="secondary",size="small",elem_id="summery")
198
  clear_output = gr.ClearButton(value="Clear Output",variant="stop",size="small",elem_id="clear")
199
  with gr.Row():
@@ -211,6 +125,7 @@ with gr.Blocks(theme=gr.themes.Soft(),css=css) as app:
211
  # pass
212
  summery.click(make_summery,[text,text1,input_lan,lan],[sumer_ts,sumer_tr],cancels=[st],queue=False)
213
  clear_output.click(clear_output_data,[],[text,text1,sumer_tr,sumer_ts],cancels=[st],queue=False)
 
214
  # gr.update(visible=True)
215
 
216
  app.queue()
 
1
  import gradio as gr
2
+ from openai import OpenAI
3
  import threading as th
4
  import os
5
+ import json
6
+ from transcription_service import transcribe_speech_local
7
 
8
 
9
 
10
  def translateoutput(text,language):
11
+ if text=="" or text is None:
12
+ return ""
13
+ client = OpenAI()
14
+
15
+ response = client.chat.completions.create(
16
  model="gpt-3.5-turbo",
17
  messages=[
18
+ {"role": "system", "content": f"You will be provided with a sentence in English, and your task is to translate it into {language}."},
19
+ {"role": "user", "content": text},
20
+
21
+ ]
22
+ )
23
+ return response.choices[0].message.content
24
 
25
 
26
  # Initialize a global variable to hold previous output
27
+ language_info=json.load(open("languages_info.json","r"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
 
32
 
33
  def transcription(audio_file,input_lang):
34
  global language_info
35
 
36
+ return transcribe_speech_local(audio_file,language_info[input_lang])
37
+
 
 
 
 
 
38
 
39
 
40
 
 
42
 
43
  def func(audio_file,input_lang,lan,state="",state1=""):
44
 
45
+ text_transcription=transcription(audio_file,input_lang)
46
+ text_translation=translateoutput(text_transcription,lan)
47
+ if text_transcription is None:
48
+ text_transcription=""
49
+
50
+ state+=str(text_transcription)+" "
51
+ state1+=str(text_translation)+" "
 
 
 
52
  state=state.replace(".","\n")
53
  state1=state1.replace(".","\n")
54
 
 
59
 
60
 
61
  def gpt_api(text,language):
62
+ if text=="" or text is None:
63
  return ""
64
+ client = OpenAI()
65
+
66
+ response = client.chat.completions.create(
67
  model="gpt-3.5-turbo",
68
  messages=[
69
+ {"role": "system", "content": f"You will be provided with a text data, and your task is to make very concise summery it into {language}."},
70
+ {"role": "user", "content": text},
71
+ ])
72
+ message=response.choices[0].message.content
 
 
73
  th.current_thread().return_value=message
74
 
75
 
 
92
 
93
  return "","","",""
94
 
95
+ def switch(input_lan,lan):
96
+ return lan,input_lan
97
+
98
+
99
 
100
+ with gr.Blocks(theme=gr.themes.Soft(),css="css.txt") as app:
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  gr.Markdown("## Mufin Real-Time Audio Transcription And Translation",elem_id="heading")
103
  gr.Markdown("### say any language we are here to translate it for team!!",elem_classes="heading")
 
105
  with gr.Row():
106
  mic = gr.Audio(sources="microphone",streaming=True,type='filepath',label='Speak')
107
 
108
+ input_lan=gr.Dropdown(choices=language_info.keys(),label="Choose Input Language please",value="English (United Kingdom)",interactive=True)
109
+ switchbutton=gr.Button(value='\u21C6',variant="secondary",elem_id="switchbtn")
110
+ lan=gr.Dropdown(choices=language_info.keys(),label="Choose a language for translation",value="Korean (South Korea)",interactive=True)
111
  summery=gr.Button(value="Summery",variant="secondary",size="small",elem_id="summery")
112
  clear_output = gr.ClearButton(value="Clear Output",variant="stop",size="small",elem_id="clear")
113
  with gr.Row():
 
125
  # pass
126
  summery.click(make_summery,[text,text1,input_lan,lan],[sumer_ts,sumer_tr],cancels=[st],queue=False)
127
  clear_output.click(clear_output_data,[],[text,text1,sumer_tr,sumer_ts],cancels=[st],queue=False)
128
+ switchbutton.click(switch,[input_lan,lan],[input_lan,lan],cancels=[st],queue=False)
129
  # gr.update(visible=True)
130
 
131
  app.queue()
css.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #clear {background-color: ##919cbf;border-radius:5%;}
2
+ #clear:hover {color:white;background-color: #ff0000;transition: 0.5s;}
3
+ #summery {background-color: ##919cbf;border-radius:5%;}
4
+ #summery:hover {background-color:#2980b9;transition: 0.5s;color:#fff}
5
+
6
+ #switchbtn {
7
+ margin: 0.6em 0em 0.55em 0;
8
+ max-width: 2.5em;
9
+ min-width: 2.5em !important;
10
+ height: 2.4em;
11
+ }
12
+ #switchbtn:hover{
13
+ background-color:#2980b9;transition: 0.5s;color:#fff;
14
+ }
15
+
languages_info.json ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Afrikaans (South Africa)": "af-ZA",
3
+ "Albanian (Albania)": "sq-AL",
4
+ "Arabic (Algeria)": "ar-DZ",
5
+ "Arabic (Bahrain)": "ar-BH",
6
+ "Arabic (Egypt)": "ar-EG",
7
+ "Arabic (Iraq)": "ar-IQ",
8
+ "Arabic (Jordan)": "ar-JO",
9
+ "Arabic (Kuwait)": "ar-KW",
10
+ "Arabic (Lebanon)": "ar-LB",
11
+ "Arabic (Morocco)": "ar-MA",
12
+ "Arabic (Oman)": "ar-OM",
13
+ "Arabic (Qatar)": "ar-QA",
14
+ "Arabic (Saudi Arabia)": "ar-SA",
15
+ "Arabic (Sudan)": "ar-SD",
16
+ "Arabic (Syria)": "ar-SY",
17
+ "Arabic (Tunisia)": "ar-TN",
18
+ "Arabic (United Arab Emirates)": "ar-AE",
19
+ "Arabic (Yemen)": "ar-YE",
20
+ "Armenian (Armenia)": "hy-AM",
21
+ "Bengali (Bangladesh)": "bn-BD",
22
+ "Bengali (India)": "bn-IN",
23
+ "Bosnian (Bosnia and Herzegovina)": "bs-BA",
24
+ "Catalan (Spain)": "ca-ES",
25
+ "Croatian (Croatia)": "hr-HR",
26
+ "Czech (Czech Republic)": "cs-CZ",
27
+ "Danish (Denmark)": "da-DK",
28
+ "Dutch (Belgium)": "nl-BE",
29
+ "Dutch (Netherlands)": "nl-NL",
30
+ "English (Australia)": "en-AU",
31
+ "English (Canada)": "en-CA",
32
+ "English (Ghana)": "en-GH",
33
+ "English (India)": "en-IN",
34
+ "English (Ireland)": "en-IE",
35
+ "English (Kenya)": "en-KE",
36
+ "English (New Zealand)": "en-NZ",
37
+ "English (Nigeria)": "en-NG",
38
+ "English (Philippines)": "en-PH",
39
+ "English (Singapore)": "en-SG",
40
+ "English (South Africa)": "en-ZA",
41
+ "English (Tanzania)": "en-TZ",
42
+ "English (United Kingdom)": "en-GB",
43
+ "English (United States)": "en-US",
44
+ "Estonian (Estonia)": "et-EE",
45
+ "Filipino (Philippines)": "fil-PH",
46
+ "Finnish (Finland)": "fi-FI",
47
+ "French (Belgium)": "fr-BE",
48
+ "French (Canada)": "fr-CA",
49
+ "French (France)": "fr-FR",
50
+ "French (Switzerland)": "fr-CH",
51
+ "Galician (Spain)": "gl-ES",
52
+ "Georgian (Georgia)": "ka-GE",
53
+ "German (Austria)": "de-AT",
54
+ "German (Germany)": "de-DE",
55
+ "German (Switzerland)": "de-CH",
56
+ "Greek (Greece)": "el-GR",
57
+ "Gujarati (India)": "gu-IN",
58
+ "Hebrew (Israel)": "iw-IL",
59
+ "Hindi (India)": "hi-IN",
60
+ "Hungarian (Hungary)": "hu-HU",
61
+ "Icelandic (Iceland)": "is-IS",
62
+ "Indonesian (Indonesia)": "id-ID",
63
+ "Italian (Italy)": "it-IT",
64
+ "Italian (Switzerland)": "it-CH",
65
+ "Japanese (Japan)": "ja-JP",
66
+ "Javanese (Indonesia)": "jv-ID",
67
+ "Kannada (India)": "kn-IN",
68
+ "Khmer (Cambodia)": "km-KH",
69
+ "Korean (South Korea)": "ko-KR",
70
+ "Lao (Laos)": "lo-LA",
71
+ "Latvian (Latvia)": "lv-LV",
72
+ "Lithuanian (Lithuania)": "lt-LT",
73
+ "Macedonian (North Macedonia)": "mk-MK",
74
+ "Malay (Malaysia)": "ms-MY",
75
+ "Malayalam (India)": "ml-IN",
76
+ "Marathi (India)": "mr-IN",
77
+ "Burmese (Myanmar [Burma])": "my-MM",
78
+ "Nepali (Nepal)": "ne-NP",
79
+ "Norwegian (Norway)": "no-NO",
80
+ "Persian (Iran)": "fa-IR",
81
+ "Polish (Poland)": "pl-PL",
82
+ "Portuguese (Brazil)": "pt-BR",
83
+ "Portuguese (Portugal)": "pt-PT",
84
+ "Punjabi (India)": "pa-IN",
85
+ "Romanian (Romania)": "ro-RO",
86
+ "Russian (Russia)": "ru-RU",
87
+ "Serbian (Serbia)": "sr-RS",
88
+ "Sinhala (Sri Lanka)": "si-LK",
89
+ "Slovak (Slovakia)": "sk-SK",
90
+ "Slovenian (Slovenia)": "sl-SI",
91
+ "Spanish (Argentina)": "es-AR",
92
+ "Spanish (Bolivia)": "es-BO",
93
+ "Spanish (Chile)": "es-CL",
94
+ "Spanish (Colombia)": "es-CO",
95
+ "Spanish (Costa Rica)": "es-CR",
96
+ "Spanish (Dominican Republic)": "es-DO",
97
+ "Spanish (Ecuador)": "es-EC",
98
+ "Spanish (El Salvador)": "es-SV",
99
+ "Spanish (Guatemala)": "es-GT",
100
+ "Spanish (Honduras)": "es-HN",
101
+ "Spanish (Mexico)": "es-MX",
102
+ "Spanish (Nicaragua)": "es-NI",
103
+ "Spanish (Panama)": "es-PA",
104
+ "Spanish (Paraguay)": "es-PY",
105
+ "Spanish (Peru)": "es-PE",
106
+ "Spanish (Puerto Rico)": "es-PR",
107
+ "Spanish (Spain)": "es-ES",
108
+ "Spanish (United States)": "es-US",
109
+ "Spanish (Uruguay)": "es-UY",
110
+ "Spanish (Venezuela)": "es-VE",
111
+ "Sundanese (Indonesia)": "su-ID",
112
+ "Swahili (Kenya)": "sw-KE",
113
+ "Swahili (Tanzania)": "sw-TZ",
114
+ "Swedish (Sweden)": "sv-SE",
115
+ "Tamil (India)": "ta-IN",
116
+ "Tamil (Malaysia)": "ta-MY",
117
+ "Tamil (Singapore)": "ta-SG",
118
+ "Tamil (Sri Lanka)": "ta-LK",
119
+ "Telugu (India)": "te-IN",
120
+ "Thai (Thailand)": "th-TH",
121
+ "Turkish (Turkey)": "tr-TR",
122
+ "Ukrainian (Ukraine)": "uk-UA",
123
+ "Urdu (Pakistan)": "ur-PK",
124
+ "Urdu (India)": "ur-IN",
125
+ "Uzbek (Uzbekistan)": "uz-UZ",
126
+ "Vietnamese (Vietnam)": "vi-VN",
127
+ "Zulu (South Africa)": "zu-ZA"
128
+ }
requirements.txt CHANGED
@@ -6,12 +6,16 @@ annotated-types==0.6.0
6
  anyio==3.7.1
7
  async-timeout==4.0.3
8
  attrs==23.1.0
 
 
9
  certifi==2023.7.22
 
10
  charset-normalizer==3.3.2
11
  click==8.1.7
12
  colorama==0.4.6
13
  contourpy==1.2.0
14
  cycler==0.12.1
 
15
  distro==1.8.0
16
  fastapi==0.104.1
17
  ffmpy==0.3.1
@@ -19,29 +23,54 @@ filelock==3.13.1
19
  fonttools==4.44.0
20
  frozenlist==1.4.0
21
  fsspec==2023.10.0
22
- gradio==4.1.2
 
 
 
 
 
 
23
  gradio_client==0.7.0
 
 
24
  h11==0.14.0
25
  httpcore==1.0.1
 
26
  httpx==0.25.1
27
  huggingface-hub==0.18.0
28
  idna==3.4
29
  importlib-resources==6.1.1
30
  Jinja2==3.1.2
 
31
  jsonschema==4.19.2
32
  jsonschema-specifications==2023.7.1
33
  kiwisolver==1.4.5
 
 
 
34
  markdown-it-py==3.0.0
35
  MarkupSafe==2.1.3
36
  matplotlib==3.8.1
37
  mdurl==0.1.2
 
 
38
  multidict==6.0.4
 
 
 
39
  numpy==1.26.1
40
- openai==0.28.1
41
  orjson==3.9.10
42
  packaging==23.2
43
  pandas==2.1.2
44
  Pillow==10.1.0
 
 
 
 
 
 
 
45
  pydantic==2.4.2
46
  pydantic_core==2.10.1
47
  pydub==0.25.1
@@ -55,22 +84,27 @@ referencing==0.30.2
55
  requests==2.31.0
56
  rich==13.6.0
57
  rpds-py==0.12.0
 
 
 
58
  semantic-version==2.10.0
59
  shellingham==1.5.4
60
  six==1.16.0
61
  sniffio==1.3.0
 
 
62
  starlette==0.27.0
 
 
63
  tomlkit==0.12.0
64
  toolz==0.12.0
 
65
  tqdm==4.66.1
66
  typer==0.9.0
67
  typing_extensions==4.8.0
68
  tzdata==2023.3
 
69
  urllib3==2.0.7
70
  uvicorn==0.24.0.post1
71
  websockets==11.0.3
72
  yarl==1.9.2
73
- noisereduce==3.0.0
74
- librosa==0.10.1
75
- pydub==0.25.1
76
- torch==2.1.1
 
6
  anyio==3.7.1
7
  async-timeout==4.0.3
8
  attrs==23.1.0
9
+ audioread==3.0.1
10
+ cachetools==5.3.2
11
  certifi==2023.7.22
12
+ cffi==1.16.0
13
  charset-normalizer==3.3.2
14
  click==8.1.7
15
  colorama==0.4.6
16
  contourpy==1.2.0
17
  cycler==0.12.1
18
+ decorator==5.1.1
19
  distro==1.8.0
20
  fastapi==0.104.1
21
  ffmpy==0.3.1
 
23
  fonttools==4.44.0
24
  frozenlist==1.4.0
25
  fsspec==2023.10.0
26
+ google-api-core==2.14.0
27
+ google-api-python-client==2.109.0
28
+ google-auth==2.23.4
29
+ google-auth-httplib2==0.1.1
30
+ google-cloud-speech==2.22.0
31
+ googleapis-common-protos==1.61.0
32
+ gradio==4.7.1
33
  gradio_client==0.7.0
34
+ grpcio==1.59.3
35
+ grpcio-status==1.59.3
36
  h11==0.14.0
37
  httpcore==1.0.1
38
+ httplib2==0.22.0
39
  httpx==0.25.1
40
  huggingface-hub==0.18.0
41
  idna==3.4
42
  importlib-resources==6.1.1
43
  Jinja2==3.1.2
44
+ joblib==1.3.2
45
  jsonschema==4.19.2
46
  jsonschema-specifications==2023.7.1
47
  kiwisolver==1.4.5
48
+ lazy_loader==0.3
49
+ librosa==0.10.1
50
+ llvmlite==0.41.1
51
  markdown-it-py==3.0.0
52
  MarkupSafe==2.1.3
53
  matplotlib==3.8.1
54
  mdurl==0.1.2
55
+ mpmath==1.3.0
56
+ msgpack==1.0.7
57
  multidict==6.0.4
58
+ networkx==3.2.1
59
+ noisereduce==3.0.0
60
+ numba==0.58.1
61
  numpy==1.26.1
62
+ openai==1.3.6
63
  orjson==3.9.10
64
  packaging==23.2
65
  pandas==2.1.2
66
  Pillow==10.1.0
67
+ platformdirs==4.0.0
68
+ pooch==1.8.0
69
+ proto-plus==1.22.3
70
+ protobuf==4.25.1
71
+ pyasn1==0.5.1
72
+ pyasn1-modules==0.3.0
73
+ pycparser==2.21
74
  pydantic==2.4.2
75
  pydantic_core==2.10.1
76
  pydub==0.25.1
 
84
  requests==2.31.0
85
  rich==13.6.0
86
  rpds-py==0.12.0
87
+ rsa==4.9
88
+ scikit-learn==1.3.2
89
+ scipy==1.11.4
90
  semantic-version==2.10.0
91
  shellingham==1.5.4
92
  six==1.16.0
93
  sniffio==1.3.0
94
+ soundfile==0.12.1
95
+ soxr==0.3.7
96
  starlette==0.27.0
97
+ sympy==1.12
98
+ threadpoolctl==3.2.0
99
  tomlkit==0.12.0
100
  toolz==0.12.0
101
+ torch==2.1.1
102
  tqdm==4.66.1
103
  typer==0.9.0
104
  typing_extensions==4.8.0
105
  tzdata==2023.3
106
+ uritemplate==4.1.1
107
  urllib3==2.0.7
108
  uvicorn==0.24.0.post1
109
  websockets==11.0.3
110
  yarl==1.9.2
 
 
 
 
transcription_service.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from google.cloud import speech
2
+ # import json
3
+ # import os
4
+ # os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'msq-ai-78bdccb055f4.json'
5
+
6
+ # def speech_to_text(
7
+ # config: speech.RecognitionConfig,
8
+ # audio: speech.RecognitionAudio,
9
+ # ) -> speech.RecognizeResponse:
10
+ # client = speech.SpeechClient()
11
+
12
+ # # Synchronous speech recognition request
13
+ # response = client.recognize(config=config, audio=audio)
14
+
15
+ # return response
16
+
17
+
18
+ # def print_response(response: speech.RecognizeResponse):
19
+ # for result in response.results:
20
+ # return print_result(result)
21
+
22
+
23
+ # def print_result(result: speech.SpeechRecognitionResult):
24
+ # best_alternative = result.alternatives[0]
25
+ # # print("-" * 80)
26
+ # # print(f"language_code: {result.language_code}")
27
+ # return best_alternative.transcript
28
+ # # print(f"confidence: {best_alternative.confidence:.0%}")
29
+
30
+
31
+ # def main(audio,language):
32
+ # config = speech.RecognitionConfig(
33
+ # language_code=language,
34
+ # )
35
+ # audio = speech.RecognitionAudio(
36
+ # uri=audio,
37
+ # )
38
+ # response=speech_to_text(config,audio)
39
+ # return print_response(response)
40
+
41
+
42
+ import os
43
+ from google.cloud import speech
44
+
45
+
46
+ def transcribe_speech_local(wav_file,language):
47
+ """
48
+ Transcribes a local WAV file using Google's Speech-to-Text API.
49
+ """
50
+ client = speech.SpeechClient()
51
+
52
+ with open(wav_file, "rb") as audio_file:
53
+ content = audio_file.read()
54
+
55
+ audio = speech.RecognitionAudio(content=content)
56
+ config = speech.RecognitionConfig(
57
+ # encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
58
+ # Adjust this according to your file's sample rate
59
+ language_code=language,
60
+ enable_automatic_punctuation=True
61
+ )
62
+
63
+ response = client.recognize(config=config, audio=audio)
64
+
65
+ for result in response.results:
66
+ return "{}".format(result.alternatives[0].transcript)
67
+
68
+ # Example usage
69
+