Spaces:

nimool
/

gbn2

Sleeping

App Files Files Community

nimool commited on Oct 10, 2023

Commit

7ba7f9e

•

1 Parent(s): 6daeff1

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -15

app.py CHANGED Viewed

@@ -4,10 +4,14 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 import gradio as gr
 import sox
 import subprocess
-from google_spell_checker import GoogleSpellChecker
-spell_checker = GoogleSpellChecker(lang="fa")
 def read_file_and_process(wav_file):
     filename = wav_file.split('.')[0]
@@ -34,22 +38,33 @@ def parse_transcription(logits):
     return transcription
-def corrector(sentence):
-  check_spell = spell_checker.check(sentence)
-  if check_spell[0] is False:
-    corrected = check_spell[1]
-    return corrected
-  else:
-    return sentence
 def parse(wav_file):
     input_values = read_file_and_process(wav_file)
     with torch.no_grad():
         logits = model(**input_values).logits
-    sentence = parse_transcription(logits)
-    corrected_sent = corrector(sentence)
-    return corrected_sent
 # def parse(wav_file):
 #     check_spell = ''
@@ -84,7 +99,7 @@ txtbox = gr.Textbox(
 title = "Speech-to-Text (persian)"
 description = "، توجه داشته باشید که هرچه گفتار شما شمرده تر باشد خروجی با کیفیت تری دارید.روی دکمه ضبط صدا کلیک کنید و سپس دسترسی مرورگر خود را به میکروفون دستگاه بدهید، سپس شروع به صحبت کنید و برای اتمام ضبط دوباره روی دکمه کلیک کنید"
-article = "<p style='text-align: center'><a href='https://github.com/nimaprgrmr'>Large-Scale Self- and Semi-Supervised Learning for Speech Translation</a></p>"
 demo = gr.Interface(fn=parse, inputs = input_,  outputs=txtbox, title=title, description=description, article = article,
              streaming=True, interactive=True,

 import gradio as gr
 import sox
 import subprocess
+# from google_spell_checker import GoogleSpellChecker
+import openai
+# Set your OpenAI API key
+api_key = "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"
+# spell_checker = GoogleSpellChecker(lang="fa")
 def read_file_and_process(wav_file):
     filename = wav_file.split('.')[0]
     return transcription
+# def corrector(sentence):
+#   check_spell = spell_checker.check(sentence)
+#   if check_spell[1] is None:
+#     return sentence
+#   else:
+#     return check_spell[1]
+def correct_text_with_gpt(text):
+    openai.api_key = api_key
+    response = openai.Completion.create(
+        engine="text-davinci-003",
+        prompt=f"Please correct the following text: '{text}'\n\nCorrected text:",
+        max_tokens=1000,
+        temperature=0.5,            # Temperature controls the randomness of the model's output. A higher value like 1.0 makes the output more random, while a lower value like 0.2 makes it more deterministic and focused.
+        top_p=1.0,                  # This parameter controls the diversity of the output. It sets a threshold for the cumulative probability of words to keep. Smaller values like 0.2 will result in more focused responses, while larger values like 0.8 will allow for more diversity.
+        frequency_penalty=0.2,     # encourages the use of less common words
+        presence_penalty=0.5,       # discourages the use of common words.
+    )
+    return response.choices[0].text.strip()
 def parse(wav_file):
     input_values = read_file_and_process(wav_file)
     with torch.no_grad():
         logits = model(**input_values).logits
+    return correct_text_with_gpt(parse_transcription(logits))
 # def parse(wav_file):
 #     check_spell = ''
 title = "Speech-to-Text (persian)"
 description = "، توجه داشته باشید که هرچه گفتار شما شمرده تر باشد خروجی با کیفیت تری دارید.روی دکمه ضبط صدا کلیک کنید و سپس دسترسی مرورگر خود را به میکروفون دستگاه بدهید، سپس شروع به صحبت کنید و برای اتمام ضبط دوباره روی دکمه کلیک کنید"
+# article = "<p style='text-align: center'><a href='https://github.com/nimaprgrmr'>Large-Scale Self- and Semi-Supervised Learning for Speech Translation</a></p>"
 demo = gr.Interface(fn=parse, inputs = input_,  outputs=txtbox, title=title, description=description, article = article,
              streaming=True, interactive=True,