ysharma HF staff commited on
Commit
511d264
1 Parent(s): 71843eb
Files changed (1) hide show
  1. app.py +32 -7
app.py CHANGED
@@ -7,12 +7,23 @@ import requests
7
  import tempfile
8
  from neon_tts_plugin_coqui import CoquiTTS
9
  from datasets import load_dataset
10
- #import whisper
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- dataset = load_dataset("ysharma/short_jokes")
13
 
14
  # Language common in both the multilingual models - English, Chinese, Spanish, and French etc
15
- # /model 1: Whisper: Speech-to-text
16
  model = whisper.load_model("base")
17
  #model_med = whisper.load_model("medium")
18
  # Languages covered in Whisper - (exhaustive list) :
@@ -48,11 +59,25 @@ print(f"Languages for Coqui are: {LANGUAGES}")
48
  # nl - dutch, fi - finnish, sl - slovenian, lv - latvian, ga - ??
49
 
50
 
 
51
  # Driver function
52
  def driver_fun(audio) :
53
  translation, lang = whisper_stt(audio) # older : transcribe, translation, lang
54
  #text1 = model.transcribe(audio)["text"]
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  #if translation
57
  #For now only taking in English text for Bloom prompting as inference model is not high spec
58
  #text_generated = lang_model_response(transcribe, lang)
@@ -61,8 +86,8 @@ def driver_fun(audio) :
61
  #if lang in ['es', 'fr']:
62
  # speech = tts(transcribe, lang)
63
  #else:
64
- speech = tts(translation, 'en') #'en')
65
- return translation, speech #transcribe,
66
 
67
 
68
  # Whisper - speech-to-text
@@ -117,9 +142,9 @@ with demo:
117
  #out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
118
  with gr.Column():
119
  out_audio = gr.Audio(label='Audio response form CoquiTTS')
120
- #out_generated_text = gr.Textbox(label= 'AI response to your query in your preferred language using Bloom! ')
121
  #out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
122
 
123
- b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_audio]) #out_translation_en, out_generated_text,out_generated_text_en,
124
 
125
  demo.launch(enable_queue=True, debug=True)
 
7
  import tempfile
8
  from neon_tts_plugin_coqui import CoquiTTS
9
  from datasets import load_dataset
10
+ import random
11
+
12
+ dataset = load_dataset("ysharma/short_jokes", split="train")
13
+
14
+ # Model 2: Sentence Transformer
15
+ API_URL = "https://api-inference.huggingface.co/models/sentence-transformers/msmarco-distilbert-base-tas-b"
16
+ HF_TOKEN = os.environ["HF_TOKEN"]
17
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
18
+
19
+ def query(payload):
20
+ response = requests.post(API_URL, headers=headers, json=payload)
21
+ return response.json()
22
+
23
 
 
24
 
25
  # Language common in both the multilingual models - English, Chinese, Spanish, and French etc
26
+ # Model 1: Whisper: Speech-to-text
27
  model = whisper.load_model("base")
28
  #model_med = whisper.load_model("medium")
29
  # Languages covered in Whisper - (exhaustive list) :
 
59
  # nl - dutch, fi - finnish, sl - slovenian, lv - latvian, ga - ??
60
 
61
 
62
+
63
  # Driver function
64
  def driver_fun(audio) :
65
  translation, lang = whisper_stt(audio) # older : transcribe, translation, lang
66
  #text1 = model.transcribe(audio)["text"]
67
 
68
+ random_val = random.randrange(0,231657)
69
+ if random_val < 226657:
70
+ lower_limit = random_val
71
+ upper_limit = random_val + 5000
72
+ else:
73
+ lower_limit = random_val - 5000
74
+ upper_limit = random_val
75
+ print(f"lower_limit : upper_limit = {lower_limit} : {upper_limit}")
76
+ dataset_subset = dataset['Joke'][lower_limit : upper_limit]
77
+ data = query({"inputs": {"source_sentence": "That is a happy person","sentences": dataset_subset} } )
78
+ max_match_score = max(data)
79
+ indx_score = data.index(max_match_score)
80
+ joke = max_match_score[indx_score]
81
  #if translation
82
  #For now only taking in English text for Bloom prompting as inference model is not high spec
83
  #text_generated = lang_model_response(transcribe, lang)
 
86
  #if lang in ['es', 'fr']:
87
  # speech = tts(transcribe, lang)
88
  #else:
89
+ speech = tts(joke, 'en') #'en' # translation
90
+ return translation, joke, speech #transcribe,
91
 
92
 
93
  # Whisper - speech-to-text
 
142
  #out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
143
  with gr.Column():
144
  out_audio = gr.Audio(label='Audio response form CoquiTTS')
145
+ out_generated_joke = gr.Textbox(label= 'Joke returned! ')
146
  #out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
147
 
148
+ b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_generated_joke, out_audio]) #out_translation_en, out_generated_text,out_generated_text_en,
149
 
150
  demo.launch(enable_queue=True, debug=True)