nimool commited on
Commit
7ba7f9e
1 Parent(s): 6daeff1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -15
app.py CHANGED
@@ -4,10 +4,14 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
  import gradio as gr
5
  import sox
6
  import subprocess
7
- from google_spell_checker import GoogleSpellChecker
 
8
 
 
 
9
 
10
- spell_checker = GoogleSpellChecker(lang="fa")
 
11
 
12
  def read_file_and_process(wav_file):
13
  filename = wav_file.split('.')[0]
@@ -34,22 +38,33 @@ def parse_transcription(logits):
34
  return transcription
35
 
36
 
37
- def corrector(sentence):
38
- check_spell = spell_checker.check(sentence)
39
- if check_spell[0] is False:
40
- corrected = check_spell[1]
41
- return corrected
42
- else:
43
- return sentence
44
-
 
 
 
 
 
 
 
 
 
 
 
 
45
  def parse(wav_file):
46
  input_values = read_file_and_process(wav_file)
47
  with torch.no_grad():
48
  logits = model(**input_values).logits
49
- sentence = parse_transcription(logits)
50
- corrected_sent = corrector(sentence)
51
- return corrected_sent
52
-
53
 
54
  # def parse(wav_file):
55
  # check_spell = ''
@@ -84,7 +99,7 @@ txtbox = gr.Textbox(
84
 
85
  title = "Speech-to-Text (persian)"
86
  description = "، توجه داشته باشید که هرچه گفتار شما شمرده تر باشد خروجی با کیفیت تری دارید.روی دکمه ضبط صدا کلیک کنید و سپس دسترسی مرورگر خود را به میکروفون دستگاه بدهید، سپس شروع به صحبت کنید و برای اتمام ضبط دوباره روی دکمه کلیک کنید"
87
- article = "<p style='text-align: center'><a href='https://github.com/nimaprgrmr'>Large-Scale Self- and Semi-Supervised Learning for Speech Translation</a></p>"
88
 
89
  demo = gr.Interface(fn=parse, inputs = input_, outputs=txtbox, title=title, description=description, article = article,
90
  streaming=True, interactive=True,
 
4
  import gradio as gr
5
  import sox
6
  import subprocess
7
+ # from google_spell_checker import GoogleSpellChecker
8
+ import openai
9
 
10
+ # Set your OpenAI API key
11
+ api_key = "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"
12
 
13
+
14
+ # spell_checker = GoogleSpellChecker(lang="fa")
15
 
16
  def read_file_and_process(wav_file):
17
  filename = wav_file.split('.')[0]
 
38
  return transcription
39
 
40
 
41
+ # def corrector(sentence):
42
+ # check_spell = spell_checker.check(sentence)
43
+ # if check_spell[1] is None:
44
+ # return sentence
45
+ # else:
46
+ # return check_spell[1]
47
+ def correct_text_with_gpt(text):
48
+ openai.api_key = api_key
49
+ response = openai.Completion.create(
50
+ engine="text-davinci-003",
51
+ prompt=f"Please correct the following text: '{text}'\n\nCorrected text:",
52
+ max_tokens=1000,
53
+ temperature=0.5, # Temperature controls the randomness of the model's output. A higher value like 1.0 makes the output more random, while a lower value like 0.2 makes it more deterministic and focused.
54
+ top_p=1.0, # This parameter controls the diversity of the output. It sets a threshold for the cumulative probability of words to keep. Smaller values like 0.2 will result in more focused responses, while larger values like 0.8 will allow for more diversity.
55
+ frequency_penalty=0.2, # encourages the use of less common words
56
+ presence_penalty=0.5, # discourages the use of common words.
57
+ )
58
+ return response.choices[0].text.strip()
59
+
60
+
61
  def parse(wav_file):
62
  input_values = read_file_and_process(wav_file)
63
  with torch.no_grad():
64
  logits = model(**input_values).logits
65
+ return correct_text_with_gpt(parse_transcription(logits))
66
+
67
+
 
68
 
69
  # def parse(wav_file):
70
  # check_spell = ''
 
99
 
100
  title = "Speech-to-Text (persian)"
101
  description = "، توجه داشته باشید که هرچه گفتار شما شمرده تر باشد خروجی با کیفیت تری دارید.روی دکمه ضبط صدا کلیک کنید و سپس دسترسی مرورگر خود را به میکروفون دستگاه بدهید، سپس شروع به صحبت کنید و برای اتمام ضبط دوباره روی دکمه کلیک کنید"
102
+ # article = "<p style='text-align: center'><a href='https://github.com/nimaprgrmr'>Large-Scale Self- and Semi-Supervised Learning for Speech Translation</a></p>"
103
 
104
  demo = gr.Interface(fn=parse, inputs = input_, outputs=txtbox, title=title, description=description, article = article,
105
  streaming=True, interactive=True,