Xingde Jiang commited on
Commit
877c60c
1 Parent(s): 63e488a

add text, audio, video capability

Browse files
Files changed (7) hide show
  1. app.py +868 -61
  2. audios/tempfile.mp3 +0 -0
  3. azure_utils.py +155 -0
  4. images/humancare.jpg +0 -0
  5. polly_utils.py +635 -0
  6. requirements.txt +10 -1
  7. videos/humancare.mp4 +0 -0
app.py CHANGED
@@ -14,6 +14,28 @@ Original file is located at
14
  # !pip install datasets
15
 
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  import gradio as gr
18
  from sentence_transformers import SentenceTransformer, CrossEncoder, util
19
  from torch import tensor as torch_tensor
@@ -22,9 +44,9 @@ from datasets import load_dataset
22
  """# import models"""
23
 
24
  bi_encoder = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')
25
- bi_encoder.max_seq_length = 256 #Truncate long passages to 256 tokens
26
 
27
- #The bi-encoder will retrieve top_k documents. We use a cross-encoder, to re-rank the results list to improve the quality
28
  cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
29
 
30
  """# import datasets"""
@@ -35,75 +57,860 @@ dataset_embed = load_dataset("gfhayworth/wiki_mini_embed", split='train')
35
  dataset_embed_pd = dataset_embed.to_pandas()
36
  mycorpus_embeddings = torch_tensor(dataset_embed_pd.values)
37
 
38
- def search(query, top_k=20, top_n = 1):
39
- question_embedding = bi_encoder.encode(query, convert_to_tensor=True)
40
- question_embedding = question_embedding #.cuda()
41
- hits = util.semantic_search(question_embedding, mycorpus_embeddings, top_k=top_k)
42
- hits = hits[0] # Get the hits for the first query
43
 
44
- ##### Re-Ranking #####
45
- cross_inp = [[query, mypassages[hit['corpus_id']]] for hit in hits]
46
- cross_scores = cross_encoder.predict(cross_inp)
 
 
 
47
 
48
- # Sort results by the cross-encoder scores
49
- for idx in range(len(cross_scores)):
50
- hits[idx]['cross-score'] = cross_scores[idx]
 
 
 
 
 
 
 
 
 
 
51
 
52
- hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
53
- predictions = hits[:top_n]
54
- return predictions
55
- # for hit in hits[0:3]:
56
- # print("\t{:.3f}\t{}".format(hit['cross-score'], mypassages[hit['corpus_id']].replace("\n", " ")))
57
 
58
  def get_text(qry):
59
- predictions = search(qry)
60
- prediction_text = []
61
- for hit in predictions:
62
- prediction_text.append("{}".format(mypassages[hit['corpus_id']]))
63
- return prediction_text
64
 
65
- # def prt_rslt(qry):
66
- # rslt = get_text(qry)
67
- # for r in rslt:
68
- # print(r)
 
69
 
70
  # prt_rslt("who is the best rapper in the world?")
71
 
72
- """# chat example"""
73
-
74
- def chat(message, history):
75
- history = history or []
76
- message = message.lower()
77
-
78
- responses = get_text(message)
79
- for response in responses:
80
- history.append((message, response))
81
- return history, history
82
-
83
- css=".gradio-container {background-color: lightgray}"
84
-
85
- with gr.Blocks(css=css) as demo:
86
- history_state = gr.State()
87
- gr.Markdown('# WikiBot')
88
- title='Wikipedia Chatbot'
89
- description='chatbot with search on Wikipedia'
90
- with gr.Row():
91
- chatbot = gr.Chatbot()
92
- with gr.Row():
93
- message = gr.Textbox(label='Input your question here:',
94
- placeholder='How many countries are in Europe?',
95
- lines=1)
96
- submit = gr.Button(value='Send',
97
- variant='secondary').style(full_width=False)
98
- submit.click(chat,
99
- inputs=[message, history_state],
100
- outputs=[chatbot, history_state])
101
- gr.Examples(
102
- examples=["How many countries are in Europe?",
103
- "Was Roman Emperor Constantine I a Christian?",
104
- "Who is the best rapper in the world?"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  inputs=message
106
  )
107
 
108
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
 
 
14
  # !pip install datasets
15
 
16
 
17
+ from azure_utils import AzureVoiceData
18
+ from polly_utils import PollyVoiceData, NEURAL_ENGINE
19
+ from langchain.prompts import PromptTemplate
20
+ from openai.error import AuthenticationError, InvalidRequestError, RateLimitError
21
+ import re
22
+ import sys
23
+ from io import StringIO
24
+ from threading import Lock
25
+ from langchain.llms import OpenAI
26
+ from langchain.chains.conversation.memory import ConversationBufferMemory
27
+ from langchain.agents import tool, load_tools, initialize_agent
28
+ from langchain import ConversationChain, LLMChain
29
+ import whisper
30
+ import warnings
31
+ import boto3
32
+ import datetime
33
+ from typing import Optional, Tuple
34
+ from contextlib import closing
35
+ # Console to variable
36
+ import io
37
+ import requests
38
+ import os
39
  import gradio as gr
40
  from sentence_transformers import SentenceTransformer, CrossEncoder, util
41
  from torch import tensor as torch_tensor
 
44
  """# import models"""
45
 
46
  bi_encoder = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')
47
+ bi_encoder.max_seq_length = 256 # Truncate long passages to 256 tokens
48
 
49
+ # The bi-encoder will retrieve top_k documents. We use a cross-encoder, to re-rank the results list to improve the quality
50
  cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
51
 
52
  """# import datasets"""
 
57
  dataset_embed_pd = dataset_embed.to_pandas()
58
  mycorpus_embeddings = torch_tensor(dataset_embed_pd.values)
59
 
 
 
 
 
 
60
 
61
+ def search(query, top_k=20, top_n=1):
62
+ question_embedding = bi_encoder.encode(query, convert_to_tensor=True)
63
+ question_embedding = question_embedding # .cuda()
64
+ hits = util.semantic_search(
65
+ question_embedding, mycorpus_embeddings, top_k=top_k)
66
+ hits = hits[0] # Get the hits for the first query
67
 
68
+ ##### Re-Ranking #####
69
+ cross_inp = [[query, mypassages[hit['corpus_id']]] for hit in hits]
70
+ cross_scores = cross_encoder.predict(cross_inp)
71
+
72
+ # Sort results by the cross-encoder scores
73
+ for idx in range(len(cross_scores)):
74
+ hits[idx]['cross-score'] = cross_scores[idx]
75
+
76
+ hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
77
+ predictions = hits[:top_n]
78
+ return predictions
79
+ # for hit in hits[0:3]:
80
+ # print("\t{:.3f}\t{}".format(hit['cross-score'], mypassages[hit['corpus_id']].replace("\n", " ")))
81
 
 
 
 
 
 
82
 
83
  def get_text(qry):
84
+ predictions = search(qry)
85
+ prediction_text = []
86
+ for hit in predictions:
87
+ prediction_text.append("{}".format(mypassages[hit['corpus_id']]))
88
+ return prediction_text
89
 
90
+
91
+ @tool("mysearch", return_direct=True)
92
+ def prt_rslt(query: str) -> str:
93
+ rslt = get_text(qry)
94
+ return '\n'.join(rslt)
95
 
96
  # prt_rslt("who is the best rapper in the world?")
97
 
98
+
99
+ # """# chat example"""
100
+ # def chat(message, history):
101
+ # history = history or []
102
+ # message = message.lower()
103
+
104
+ # responses = get_text(message)
105
+ # for response in responses:
106
+ # history.append((message, response))
107
+ # return history, history
108
+
109
+
110
+ # with gr.Blocks(css=CSS) as demo:
111
+ # history_state = gr.State()
112
+ # gr.Markdown('# WikiBot')
113
+ # title = 'Wikipedia Chatbot'
114
+ # description = 'chatbot with search on Wikipedia'
115
+ # with gr.Row():
116
+ # chatbot = gr.Chatbot()
117
+ # with gr.Row():
118
+ # message = gr.Textbox(label='Input your question here:',
119
+ # placeholder='How many countries are in Europe?',
120
+ # lines=1)
121
+ # submit = gr.Button(value='Send',
122
+ # variant='secondary').style(full_width=False)
123
+ # submit.click(chat,
124
+ # inputs=[message, history_state],
125
+ # outputs=[chatbot, history_state])
126
+ # gr.Examples(
127
+ # examples=["How many countries are in Europe?",
128
+ # "Was Roman Emperor Constantine I a Christian?",
129
+ # "Who is the best rapper in the world?"],
130
+ # inputs=message
131
+ # )
132
+
133
+ # demo.launch()
134
+
135
+
136
+ news_api_key = os.environ["NEWS_API_KEY"]
137
+ tmdb_bearer_token = os.environ["TMDB_BEARER_TOKEN"]
138
+
139
+ TOOLS_LIST = ['serpapi', 'wolfram-alpha', 'pal-math', 'pal-colored-objects', 'news-api', 'tmdb-api',
140
+ 'open-meteo-api'] # 'google-search'
141
+ TOOLS_DEFAULT_LIST = ['mysearch', 'serpapi', 'pal-math']
142
+ BUG_FOUND_MSG = "Congratulations, you've found a bug in this application!"
143
+ AUTH_ERR_MSG = "Please paste your OpenAI key from openai.com to use this application. It is not necessary to hit a button or key after pasting it."
144
+ MAX_TOKENS = 512
145
+ TEMPERATURE = 0
146
+
147
+ LOOPING_TALKING_HEAD = "videos/humancare.mp4"
148
+ TALKING_HEAD_WIDTH = "192"
149
+ MAX_TALKING_HEAD_TEXT_LENGTH = 155
150
+
151
+ # Pertains to Express-inator functionality
152
+ NUM_WORDS_DEFAULT = 0
153
+ MAX_WORDS = 400
154
+ FORMALITY_DEFAULT = "N/A"
155
+ TEMPERATURE_DEFAULT = 0.5
156
+ EMOTION_DEFAULT = "N/A"
157
+ LANG_LEVEL_DEFAULT = "N/A"
158
+ TRANSLATE_TO_DEFAULT = "N/A"
159
+ LITERARY_STYLE_DEFAULT = "N/A"
160
+ PROMPT_TEMPLATE = PromptTemplate(
161
+ input_variables=["original_words", "num_words", "formality",
162
+ "emotions", "lang_level", "translate_to", "literary_style"],
163
+ template="Restate {num_words}{formality}{emotions}{lang_level}{translate_to}{literary_style}the following: \n{original_words}\n",
164
+ )
165
+
166
+ POLLY_VOICE_DATA = PollyVoiceData()
167
+ AZURE_VOICE_DATA = AzureVoiceData()
168
+ VOICE_GENDER = 'Female' # "Male"
169
+
170
+ # Pertains to WHISPER functionality
171
+ WHISPER_DETECT_LANG = "Detect language"
172
+
173
+
174
+ # UNCOMMENT TO USE WHISPER
175
+ warnings.filterwarnings("ignore")
176
+ WHISPER_MODEL = whisper.load_model("tiny")
177
+ print("WHISPER_MODEL", WHISPER_MODEL)
178
+
179
+
180
+ # gradio settings
181
+ # css
182
+ CSS = ".gradio-container {background-color: lightgray}"
183
+
184
+ # placeholder for chat text input
185
+ PLACEHOLDER = "What is my plan benefit?",
186
+
187
+ # example questions
188
+ EXAMPLES = ["How many people live in Canada?",
189
+ "What is 2 to the 30th power?",
190
+ "If x+y=10 and x-y=4, what are x and y?",
191
+ "How much did it rain in SF today?",
192
+ "Get me information about the movie 'Avatar'",
193
+ "What are the top tech headlines in the US?",
194
+ "On the desk, you see two blue booklets, two purple booklets, and two yellow pairs of sunglasses - "
195
+ "if I remove all the pairs of sunglasses from the desk, how many purple items remain on it?"]
196
+ AUTHORS = """
197
+ <p>This application, developed by Greg Hayworth, Srikanth Tangelloju, Lincoln Snyder, Michal Piekarczyk, and Xingde Jiang,
198
+ demonstrates a conversational agent implemented with OpenAI GPT-3.5 and LangChain.
199
+ When necessary, it leverages tools for complex math, searching the internet, and accessing news and weather.
200
+ Uses talking heads from <a href='https://exh.ai/'>Ex-Human</a>.
201
+ For faster inference without waiting in queue, you may duplicate the space.
202
+ </p>"""
203
+ # UNCOMMENT TO USE WHISPER
204
+
205
+
206
+ def transcribe(aud_inp, whisper_lang):
207
+ if aud_inp is None:
208
+ return ""
209
+ aud = whisper.load_audio(aud_inp)
210
+ aud = whisper.pad_or_trim(aud)
211
+ mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
212
+ _, probs = WHISPER_MODEL.detect_language(mel)
213
+ options = whisper.DecodingOptions()
214
+ if whisper_lang != WHISPER_DETECT_LANG:
215
+ whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(
216
+ whisper_lang)
217
+ options = whisper.DecodingOptions(language=whisper_lang_code)
218
+ result = whisper.decode(WHISPER_MODEL, mel, options)
219
+ print("result.text", result.text)
220
+ result_text = ""
221
+ if result and result.text:
222
+ result_text = result.text
223
+ return result_text
224
+
225
+
226
+ # Pertains to Express-inator functionality
227
+ def transform_text(desc, express_chain, num_words, formality,
228
+ anticipation_level, joy_level, trust_level,
229
+ fear_level, surprise_level, sadness_level, disgust_level, anger_level,
230
+ lang_level, translate_to, literary_style):
231
+ num_words_prompt = ""
232
+ if num_words and int(num_words) != 0:
233
+ num_words_prompt = "using up to " + str(num_words) + " words, "
234
+
235
+ # Change some arguments to lower case
236
+ formality = formality.lower()
237
+ anticipation_level = anticipation_level.lower()
238
+ joy_level = joy_level.lower()
239
+ trust_level = trust_level.lower()
240
+ fear_level = fear_level.lower()
241
+ surprise_level = surprise_level.lower()
242
+ sadness_level = sadness_level.lower()
243
+ disgust_level = disgust_level.lower()
244
+ anger_level = anger_level.lower()
245
+
246
+ formality_str = ""
247
+ if formality != "n/a":
248
+ formality_str = "in a " + formality + " manner, "
249
+
250
+ # put all emotions into a list
251
+ emotions = []
252
+ if anticipation_level != "n/a":
253
+ emotions.append(anticipation_level)
254
+ if joy_level != "n/a":
255
+ emotions.append(joy_level)
256
+ if trust_level != "n/a":
257
+ emotions.append(trust_level)
258
+ if fear_level != "n/a":
259
+ emotions.append(fear_level)
260
+ if surprise_level != "n/a":
261
+ emotions.append(surprise_level)
262
+ if sadness_level != "n/a":
263
+ emotions.append(sadness_level)
264
+ if disgust_level != "n/a":
265
+ emotions.append(disgust_level)
266
+ if anger_level != "n/a":
267
+ emotions.append(anger_level)
268
+
269
+ emotions_str = ""
270
+ if len(emotions) > 0:
271
+ if len(emotions) == 1:
272
+ emotions_str = "with emotion of " + emotions[0] + ", "
273
+ else:
274
+ emotions_str = "with emotions of " + \
275
+ ", ".join(emotions[:-1]) + " and " + emotions[-1] + ", "
276
+
277
+ lang_level_str = ""
278
+ if lang_level != LANG_LEVEL_DEFAULT:
279
+ lang_level_str = "at a " + lang_level + \
280
+ " level, " if translate_to == TRANSLATE_TO_DEFAULT else ""
281
+
282
+ translate_to_str = ""
283
+ if translate_to != TRANSLATE_TO_DEFAULT:
284
+ translate_to_str = "translated to " + \
285
+ ("" if lang_level == TRANSLATE_TO_DEFAULT else lang_level +
286
+ " level ") + translate_to + ", "
287
+
288
+ literary_style_str = ""
289
+ if literary_style != LITERARY_STYLE_DEFAULT:
290
+ if literary_style == "Prose":
291
+ literary_style_str = "as prose, "
292
+ elif literary_style == "Summary":
293
+ literary_style_str = "as a summary, "
294
+ elif literary_style == "Outline":
295
+ literary_style_str = "as an outline numbers and lower case letters, "
296
+ elif literary_style == "Bullets":
297
+ literary_style_str = "as bullet points using bullets, "
298
+ elif literary_style == "Poetry":
299
+ literary_style_str = "as a poem, "
300
+ elif literary_style == "Haiku":
301
+ literary_style_str = "as a haiku, "
302
+ elif literary_style == "Limerick":
303
+ literary_style_str = "as a limerick, "
304
+ elif literary_style == "Joke":
305
+ literary_style_str = "as a very funny joke with a setup and punchline, "
306
+ elif literary_style == "Knock-knock":
307
+ literary_style_str = "as a very funny knock-knock joke, "
308
+
309
+ formatted_prompt = PROMPT_TEMPLATE.format(
310
+ original_words=desc,
311
+ num_words=num_words_prompt,
312
+ formality=formality_str,
313
+ emotions=emotions_str,
314
+ lang_level=lang_level_str,
315
+ translate_to=translate_to_str,
316
+ literary_style=literary_style_str
317
+ )
318
+
319
+ trans_instr = num_words_prompt + formality_str + emotions_str + \
320
+ lang_level_str + translate_to_str + literary_style_str
321
+ if express_chain and len(trans_instr.strip()) > 0:
322
+ generated_text = express_chain.run(
323
+ {'original_words': desc, 'num_words': num_words_prompt, 'formality': formality_str,
324
+ 'emotions': emotions_str, 'lang_level': lang_level_str, 'translate_to': translate_to_str,
325
+ 'literary_style': literary_style_str}).strip()
326
+ else:
327
+ print("Not transforming text")
328
+ generated_text = desc
329
+
330
+ # replace all newlines with <br> in generated_text
331
+ generated_text = generated_text.replace("\n", "\n\n")
332
+
333
+ prompt_plus_generated = "GPT prompt: " + \
334
+ formatted_prompt + "\n\n" + generated_text
335
+
336
+ print("\n==== date/time: " + str(datetime.datetime.now() -
337
+ datetime.timedelta(hours=5)) + " ====")
338
+ print("prompt_plus_generated: " + prompt_plus_generated)
339
+
340
+ return generated_text
341
+
342
+
343
+ def load_chain(tools_list, llm):
344
+ chain = None
345
+ express_chain = None
346
+ if llm:
347
+ print("\ntools_list", tools_list)
348
+ tool_names = tools_list
349
+ tools = load_tools(tool_names, llm=llm, news_api_key=news_api_key,
350
+ tmdb_bearer_token=tmdb_bearer_token)
351
+
352
+ memory = ConversationBufferMemory(memory_key="chat_history")
353
+
354
+ chain = initialize_agent(
355
+ tools, llm, agent="conversational-react-description", verbose=True, memory=memory)
356
+ express_chain = LLMChain(llm=llm, prompt=PROMPT_TEMPLATE, verbose=True)
357
+
358
+ return chain, express_chain
359
+
360
+
361
+ def set_openai_api_key(api_key):
362
+ """Set the api key and return chain.
363
+ If no api_key, then None is returned.
364
+ """
365
+ if api_key and api_key.startswith("sk-") and len(api_key) > 50:
366
+ os.environ["OPENAI_API_KEY"] = api_key
367
+ llm = OpenAI(temperature=TEMPERATURE, max_tokens=MAX_TOKENS)
368
+ chain, express_chain = load_chain(TOOLS_DEFAULT_LIST, llm)
369
+ os.environ["OPENAI_API_KEY"] = ""
370
+ return chain, express_chain, llm
371
+ return None, None, None
372
+
373
+
374
+ def run_chain(chain, inp, capture_hidden_text):
375
+ output = ""
376
+ hidden_text = None
377
+ if capture_hidden_text:
378
+ error_msg = None
379
+ tmp = sys.stdout
380
+ hidden_text_io = StringIO()
381
+ sys.stdout = hidden_text_io
382
+
383
+ try:
384
+ output = chain.run(input=inp)
385
+ except AuthenticationError as ae:
386
+ error_msg = AUTH_ERR_MSG
387
+ except RateLimitError as rle:
388
+ error_msg = "\n\nRateLimitError: " + str(rle)
389
+ except ValueError as ve:
390
+ error_msg = "\n\nValueError: " + str(ve)
391
+ except InvalidRequestError as ire:
392
+ error_msg = "\n\nInvalidRequestError: " + str(ire)
393
+ except Exception as e:
394
+ error_msg = "\n\n" + BUG_FOUND_MSG + ":\n\n" + str(e)
395
+
396
+ sys.stdout = tmp
397
+ hidden_text = hidden_text_io.getvalue()
398
+
399
+ # remove escape characters from hidden_text
400
+ hidden_text = re.sub(r'\x1b[^m]*m', '', hidden_text)
401
+
402
+ # remove "Entering new AgentExecutor chain..." from hidden_text
403
+ hidden_text = re.sub(
404
+ r"Entering new AgentExecutor chain...\n", "", hidden_text)
405
+
406
+ # remove "Finished chain." from hidden_text
407
+ hidden_text = re.sub(r"Finished chain.", "", hidden_text)
408
+
409
+ # Add newline after "Thought:" "Action:" "Observation:" "Input:" and "AI:"
410
+ hidden_text = re.sub(r"Thought:", "\n\nThought:", hidden_text)
411
+ hidden_text = re.sub(r"Action:", "\n\nAction:", hidden_text)
412
+ hidden_text = re.sub(r"Observation:", "\n\nObservation:", hidden_text)
413
+ hidden_text = re.sub(r"Input:", "\n\nInput:", hidden_text)
414
+ hidden_text = re.sub(r"AI:", "\n\nAI:", hidden_text)
415
+
416
+ if error_msg:
417
+ hidden_text += error_msg
418
+
419
+ print("hidden_text: ", hidden_text)
420
+ else:
421
+ try:
422
+ output = chain.run(input=inp)
423
+ except AuthenticationError as ae:
424
+ output = AUTH_ERR_MSG
425
+ except RateLimitError as rle:
426
+ output = "\n\nRateLimitError: " + str(rle)
427
+ except ValueError as ve:
428
+ output = "\n\nValueError: " + str(ve)
429
+ except InvalidRequestError as ire:
430
+ output = "\n\nInvalidRequestError: " + str(ire)
431
+ except Exception as e:
432
+ output = "\n\n" + BUG_FOUND_MSG + ":\n\n" + str(e)
433
+
434
+ return output, hidden_text
435
+
436
+
437
+ class ChatWrapper:
438
+
439
+ def __init__(self):
440
+ self.lock = Lock()
441
+
442
+ def __call__(
443
+ self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain: Optional[ConversationChain],
444
+ trace_chain: bool, speak_text: bool, talking_head: bool, monologue: bool, express_chain: Optional[LLMChain],
445
+ num_words, formality, anticipation_level, joy_level, trust_level,
446
+ fear_level, surprise_level, sadness_level, disgust_level, anger_level,
447
+ lang_level, translate_to, literary_style
448
+ ):
449
+ """Execute the chat functionality."""
450
+ self.lock.acquire()
451
+ try:
452
+ print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
453
+ print("inp: " + inp)
454
+ print("trace_chain: ", trace_chain)
455
+ print("speak_text: ", speak_text)
456
+ print("talking_head: ", talking_head)
457
+ print("monologue: ", monologue)
458
+ history = history or []
459
+ # If chain is None, that is because no API key was provided.
460
+ output = "Please paste your OpenAI key from openai.com to use this application. It is not necessary to hit a button or " \
461
+ "key after pasting it."
462
+ hidden_text = output
463
+
464
+ if chain and chain != "":
465
+ # Set OpenAI key
466
+ import openai
467
+ openai.api_key = api_key
468
+ if not monologue:
469
+ output, hidden_text = run_chain(
470
+ chain, inp, capture_hidden_text=trace_chain)
471
+ else:
472
+ output, hidden_text = inp, None
473
+
474
+ output = transform_text(output, express_chain, num_words, formality, anticipation_level, joy_level,
475
+ trust_level,
476
+ fear_level, surprise_level, sadness_level, disgust_level, anger_level,
477
+ lang_level, translate_to, literary_style)
478
+
479
+ text_to_display = output
480
+ if trace_chain:
481
+ text_to_display = hidden_text + "\n\n" + output
482
+ history.append((inp, text_to_display))
483
+
484
+ html_video, temp_file, html_audio, temp_aud_file = None, None, None, None
485
+ if speak_text:
486
+ if talking_head:
487
+ if len(output) <= MAX_TALKING_HEAD_TEXT_LENGTH:
488
+ html_video, temp_file = do_html_video_speak(
489
+ output, translate_to)
490
+ else:
491
+ temp_file = LOOPING_TALKING_HEAD
492
+ html_video = create_html_video(
493
+ temp_file, TALKING_HEAD_WIDTH)
494
+ html_audio, temp_aud_file = do_html_audio_speak(
495
+ output, translate_to)
496
+ else:
497
+ html_audio, temp_aud_file = do_html_audio_speak(
498
+ output, translate_to)
499
+ else:
500
+ if talking_head:
501
+ temp_file = LOOPING_TALKING_HEAD
502
+ html_video = create_html_video(
503
+ temp_file, TALKING_HEAD_WIDTH)
504
+ else:
505
+ # html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
506
+ # html_video = create_html_video(temp_file, "128")
507
+ pass
508
+
509
+ except Exception as e:
510
+ raise e
511
+ finally:
512
+ self.lock.release()
513
+ return history, history, html_video, temp_file, html_audio, temp_aud_file, ""
514
+ # return history, history, html_audio, temp_aud_file, ""
515
+
516
+
517
+ chat = ChatWrapper()
518
+
519
+
520
+ def do_html_audio_speak(words_to_speak, polly_language):
521
+ polly_client = boto3.Session(
522
+ aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
523
+ aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
524
+ region_name=os.environ["AWS_DEFAULT_REGION"]
525
+ ).client('polly')
526
+
527
+ voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(
528
+ polly_language, VOICE_GENDER)
529
+ if not voice_id:
530
+ voice_id = "Joanna"
531
+ # voice_id = "Matthew"
532
+ language_code = "en-US"
533
+ engine = NEURAL_ENGINE
534
+ response = polly_client.synthesize_speech(
535
+ Text=words_to_speak,
536
+ OutputFormat='mp3',
537
+ VoiceId=voice_id,
538
+ LanguageCode=language_code,
539
+ Engine=engine
540
+ )
541
+
542
+ html_audio = '<pre>no audio</pre>'
543
+
544
+ # Save the audio stream returned by Amazon Polly on Lambda's temp directory
545
+ if "AudioStream" in response:
546
+ with closing(response["AudioStream"]) as stream:
547
+ # output = os.path.join("/tmp/", "speech.mp3")
548
+
549
+ try:
550
+ with open('audios/tempfile.mp3', 'wb') as f:
551
+ f.write(stream.read())
552
+ temp_aud_file = gr.File("audios/tempfile.mp3")
553
+ temp_aud_file_url = "/file=" + temp_aud_file.value['name']
554
+ html_audio = f'<audio autoplay><source src={temp_aud_file_url} type="audio/mp3"></audio>'
555
+ except IOError as error:
556
+ # Could not write to file, exit gracefully
557
+ print(error)
558
+ return None, None
559
+ else:
560
+ # The response didn't contain audio data, exit gracefully
561
+ print("Could not stream audio")
562
+ return None, None
563
+
564
+ return html_audio, "audios/tempfile.mp3"
565
+
566
+
567
+ def create_html_video(file_name, width):
568
+ temp_file_url = "/file=" + tmp_file.value['name']
569
+ html_video = f'<video width={width} height={width} autoplay muted loop><source src={temp_file_url} type="video/mp4" poster="humancare.jpg"></video>'
570
+ return html_video
571
+
572
+
573
+ def do_html_video_speak(words_to_speak, azure_language):
574
+ azure_voice = AZURE_VOICE_DATA.get_voice(azure_language, VOICE_GENDER)
575
+ if not azure_voice:
576
+ azure_voice = "en-US-ChristopherNeural"
577
+
578
+ headers = {"Authorization": f"Bearer {os.environ['EXHUMAN_API_KEY']}"}
579
+ body = {
580
+ 'bot_name': 'humancare',
581
+ 'bot_response': words_to_speak,
582
+ 'azure_voice': azure_voice,
583
+ 'azure_style': 'friendly',
584
+ 'animation_pipeline': 'high_speed',
585
+ }
586
+ api_endpoint = "https://api.exh.ai/animations/v1/generate_lipsync"
587
+ res = requests.post(api_endpoint, json=body, headers=headers)
588
+ print("res.status_code: ", res.status_code)
589
+
590
+ html_video = '<pre>no video</pre>'
591
+ if isinstance(res.content, bytes):
592
+ response_stream = io.BytesIO(res.content)
593
+ print("len(res.content)): ", len(res.content))
594
+
595
+ with open('videos/tempfile.mp4', 'wb') as f:
596
+ f.write(response_stream.read())
597
+ temp_file = gr.File("videos/tempfile.mp4")
598
+ temp_file_url = "/file=" + temp_file.value['name']
599
+ html_video = f'<video width={TALKING_HEAD_WIDTH} height={TALKING_HEAD_WIDTH} autoplay><source src={temp_file_url} type="video/mp4" poster="humancare.jpg"></video>'
600
+ else:
601
+ print('video url unknown')
602
+ return html_video, "videos/tempfile.mp4"
603
+
604
+
605
+ def update_selected_tools(widget, state, llm):
606
+ if widget:
607
+ state = widget
608
+ chain, express_chain = load_chain(state, llm)
609
+ return state, llm, chain, express_chain
610
+
611
+
612
+ def update_talking_head(widget, state):
613
+ if widget:
614
+ state = widget
615
+
616
+ video_html_talking_head = create_html_video(
617
+ LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH)
618
+ return state, video_html_talking_head
619
+ else:
620
+ # return state, create_html_video(LOOPING_TALKING_HEAD, "32")
621
+ return None, "<pre></pre>"
622
+
623
+
624
+ def update_foo(widget, state):
625
+ if widget:
626
+ state = widget
627
+ return state
628
+
629
+
630
+ with gr.Blocks(css=CSS) as block:
631
+ llm_state = gr.State()
632
+ history_state = gr.State()
633
+ chain_state = gr.State()
634
+ express_chain_state = gr.State()
635
+ tools_list_state = gr.State(TOOLS_DEFAULT_LIST)
636
+ trace_chain_state = gr.State(False)
637
+ speak_text_state = gr.State(False)
638
+ talking_head_state = gr.State(True)
639
+ # Takes the input and repeats it back to the user, optionally transforming it.
640
+ monologue_state = gr.State(False)
641
+
642
+ # Pertains to Express-inator functionality
643
+ num_words_state = gr.State(NUM_WORDS_DEFAULT)
644
+ formality_state = gr.State(FORMALITY_DEFAULT)
645
+ anticipation_level_state = gr.State(EMOTION_DEFAULT)
646
+ joy_level_state = gr.State(EMOTION_DEFAULT)
647
+ trust_level_state = gr.State(EMOTION_DEFAULT)
648
+ fear_level_state = gr.State(EMOTION_DEFAULT)
649
+ surprise_level_state = gr.State(EMOTION_DEFAULT)
650
+ sadness_level_state = gr.State(EMOTION_DEFAULT)
651
+ disgust_level_state = gr.State(EMOTION_DEFAULT)
652
+ anger_level_state = gr.State(EMOTION_DEFAULT)
653
+ lang_level_state = gr.State(LANG_LEVEL_DEFAULT)
654
+ translate_to_state = gr.State(TRANSLATE_TO_DEFAULT)
655
+ literary_style_state = gr.State(LITERARY_STYLE_DEFAULT)
656
+
657
+ # Pertains to WHISPER functionality
658
+ whisper_lang_state = gr.State(WHISPER_DETECT_LANG)
659
+
660
+ with gr.Tab("Chat"):
661
+ with gr.Row():
662
+ # with gr.Column():
663
+ # gr.HTML(
664
+ # """<b><center>GPT + WolframAlpha + Whisper</center></b>
665
+ # <p><center>New feature in <b>Translate to</b>: Choose <b>Language level</b> (e.g. for conversation practice or explain like I'm five)</center></p>""")
666
+
667
+ openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key (sk-...)",
668
+ show_label=False, lines=1, type='password')
669
+
670
+ with gr.Row():
671
+ with gr.Column(scale=1, min_width=TALKING_HEAD_WIDTH, visible=True):
672
+ speak_text_cb = gr.Checkbox(label="Enable speech", value=False)
673
+ speak_text_cb.change(update_foo, inputs=[speak_text_cb, speak_text_state],
674
+ outputs=[speak_text_state])
675
+
676
+ my_file = gr.File(label="Upload a file",
677
+ type="file", visible=False)
678
+ tmp_file = gr.File(LOOPING_TALKING_HEAD, visible=False)
679
+ # tmp_file_url = "/file=" + tmp_file.value['name']
680
+ htm_video = create_html_video(
681
+ LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH)
682
+ video_html = gr.HTML(htm_video)
683
+
684
+ # my_aud_file = gr.File(label="Audio file", type="file", visible=True)
685
+ tmp_aud_file = gr.File("audios/tempfile.mp3", visible=False)
686
+ tmp_aud_file_url = "/file=" + tmp_aud_file.value['name']
687
+ htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
688
+ audio_html = gr.HTML(htm_audio)
689
+
690
+ with gr.Column(scale=7):
691
+ chatbot = gr.Chatbot()
692
+
693
+ with gr.Row():
694
+ message = gr.Textbox(label="What's on your mind??",
695
+ placeholder=PLACEHOLDER,
696
+ lines=1)
697
+ submit = gr.Button(value="Send", variant="secondary").style(
698
+ full_width=False)
699
+
700
+ # UNCOMMENT TO USE WHISPER
701
+ with gr.Row():
702
+ audio_comp = gr.Microphone(source="microphone", type="filepath", label="Just say it!",
703
+ interactive=True, streaming=False)
704
+ audio_comp.change(transcribe, inputs=[
705
+ audio_comp, whisper_lang_state], outputs=[message])
706
+
707
+ gr.Examples(
708
+ examples=EXAMPLES,
709
  inputs=message
710
  )
711
 
712
+ with gr.Tab("Settings"):
713
+ tools_cb_group = gr.CheckboxGroup(label="Tools:", choices=TOOLS_LIST,
714
+ value=TOOLS_DEFAULT_LIST)
715
+ tools_cb_group.change(update_selected_tools,
716
+ inputs=[tools_cb_group,
717
+ tools_list_state, llm_state],
718
+ outputs=[tools_list_state, llm_state, chain_state, express_chain_state])
719
+
720
+ trace_chain_cb = gr.Checkbox(
721
+ label="Show reasoning chain in chat bubble", value=False)
722
+ trace_chain_cb.change(update_foo, inputs=[trace_chain_cb, trace_chain_state],
723
+ outputs=[trace_chain_state])
724
+
725
+ # speak_text_cb = gr.Checkbox(label="Speak text from agent", value=False)
726
+ # speak_text_cb.change(update_foo, inputs=[speak_text_cb, speak_text_state],
727
+ # outputs=[speak_text_state])
728
+
729
+ talking_head_cb = gr.Checkbox(label="Show talking head", value=True)
730
+ talking_head_cb.change(update_talking_head, inputs=[talking_head_cb, talking_head_state],
731
+ outputs=[talking_head_state, video_html])
732
+
733
+ monologue_cb = gr.Checkbox(label="Babel fish mode (translate/restate what you enter, no conversational agent)",
734
+ value=False)
735
+ monologue_cb.change(update_foo, inputs=[monologue_cb, monologue_state],
736
+ outputs=[monologue_state])
737
+
738
+ with gr.Tab("Whisper STT"):
739
+ whisper_lang_radio = gr.Radio(label="Whisper speech-to-text language:", choices=[
740
+ WHISPER_DETECT_LANG, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
741
+ "Danish", "Dutch", "English (Australian)", "English (British)", "English (Indian)", "English (New Zealand)",
742
+ "English (South African)", "English (US)", "English (Welsh)", "Finnish", "French", "French (Canadian)",
743
+ "German", "German (Austrian)", "Georgian", "Hindi", "Icelandic", "Indonesian", "Italian", "Japanese",
744
+ "Korean", "Norwegian", "Polish",
745
+ "Portuguese (Brazilian)", "Portuguese (European)", "Romanian", "Russian", "Spanish (European)",
746
+ "Spanish (Mexican)", "Spanish (US)", "Swedish", "Turkish", "Ukrainian", "Welsh"],
747
+ value=WHISPER_DETECT_LANG)
748
+
749
+ whisper_lang_radio.change(update_foo,
750
+ inputs=[whisper_lang_radio,
751
+ whisper_lang_state],
752
+ outputs=[whisper_lang_state])
753
+
754
+ with gr.Tab("Translate to"):
755
+ lang_level_radio = gr.Radio(label="Language level:", choices=[
756
+ LANG_LEVEL_DEFAULT, "1st grade", "2nd grade", "3rd grade", "4th grade", "5th grade", "6th grade",
757
+ "7th grade", "8th grade", "9th grade", "10th grade", "11th grade", "12th grade", "University"],
758
+ value=LANG_LEVEL_DEFAULT)
759
+ lang_level_radio.change(update_foo, inputs=[lang_level_radio, lang_level_state],
760
+ outputs=[lang_level_state])
761
+
762
+ translate_to_radio = gr.Radio(label="Language:", choices=[
763
+ TRANSLATE_TO_DEFAULT, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
764
+ "Danish", "Dutch", "English (Australian)", "English (British)", "English (Indian)", "English (New Zealand)",
765
+ "English (South African)", "English (US)", "English (Welsh)", "Finnish", "French", "French (Canadian)",
766
+ "German", "German (Austrian)", "Georgian", "Hindi", "Icelandic", "Indonesian", "Italian", "Japanese",
767
+ "Korean", "Norwegian", "Polish",
768
+ "Portuguese (Brazilian)", "Portuguese (European)", "Romanian", "Russian", "Spanish (European)",
769
+ "Spanish (Mexican)", "Spanish (US)", "Swedish", "Turkish", "Ukrainian", "Welsh",
770
+ "emojis", "Gen Z slang", "how the stereotypical Karen would say it", "Klingon",
771
+ "Pirate", "Strange Planet expospeak technical talk", "Yoda"],
772
+ value=TRANSLATE_TO_DEFAULT)
773
+
774
+ translate_to_radio.change(update_foo,
775
+ inputs=[translate_to_radio,
776
+ translate_to_state],
777
+ outputs=[translate_to_state])
778
+
779
+ with gr.Tab("Formality"):
780
+ formality_radio = gr.Radio(label="Formality:",
781
+ choices=[FORMALITY_DEFAULT,
782
+ "Casual", "Polite", "Honorific"],
783
+ value=FORMALITY_DEFAULT)
784
+ formality_radio.change(update_foo,
785
+ inputs=[formality_radio, formality_state],
786
+ outputs=[formality_state])
787
+
788
+ with gr.Tab("Lit style"):
789
+ literary_style_radio = gr.Radio(label="Literary style:", choices=[
790
+ LITERARY_STYLE_DEFAULT, "Prose", "Summary", "Outline", "Bullets", "Poetry", "Haiku", "Limerick", "Joke",
791
+ "Knock-knock"],
792
+ value=LITERARY_STYLE_DEFAULT)
793
+
794
+ literary_style_radio.change(update_foo,
795
+ inputs=[literary_style_radio,
796
+ literary_style_state],
797
+ outputs=[literary_style_state])
798
+
799
+ with gr.Tab("Emotions"):
800
+ anticipation_level_radio = gr.Radio(label="Anticipation level:",
801
+ choices=[
802
+ EMOTION_DEFAULT, "Interest", "Anticipation", "Vigilance"],
803
+ value=EMOTION_DEFAULT)
804
+ anticipation_level_radio.change(update_foo,
805
+ inputs=[anticipation_level_radio,
806
+ anticipation_level_state],
807
+ outputs=[anticipation_level_state])
808
+
809
+ joy_level_radio = gr.Radio(label="Joy level:",
810
+ choices=[EMOTION_DEFAULT,
811
+ "Serenity", "Joy", "Ecstasy"],
812
+ value=EMOTION_DEFAULT)
813
+ joy_level_radio.change(update_foo,
814
+ inputs=[joy_level_radio, joy_level_state],
815
+ outputs=[joy_level_state])
816
+
817
+ trust_level_radio = gr.Radio(label="Trust level:",
818
+ choices=[
819
+ EMOTION_DEFAULT, "Acceptance", "Trust", "Admiration"],
820
+ value=EMOTION_DEFAULT)
821
+ trust_level_radio.change(update_foo,
822
+ inputs=[trust_level_radio, trust_level_state],
823
+ outputs=[trust_level_state])
824
+
825
+ fear_level_radio = gr.Radio(label="Fear level:",
826
+ choices=[EMOTION_DEFAULT,
827
+ "Apprehension", "Fear", "Terror"],
828
+ value=EMOTION_DEFAULT)
829
+ fear_level_radio.change(update_foo,
830
+ inputs=[fear_level_radio, fear_level_state],
831
+ outputs=[fear_level_state])
832
+
833
+ surprise_level_radio = gr.Radio(label="Surprise level:",
834
+ choices=[
835
+ EMOTION_DEFAULT, "Distraction", "Surprise", "Amazement"],
836
+ value=EMOTION_DEFAULT)
837
+ surprise_level_radio.change(update_foo,
838
+ inputs=[surprise_level_radio,
839
+ surprise_level_state],
840
+ outputs=[surprise_level_state])
841
+
842
+ sadness_level_radio = gr.Radio(label="Sadness level:",
843
+ choices=[
844
+ EMOTION_DEFAULT, "Pensiveness", "Sadness", "Grief"],
845
+ value=EMOTION_DEFAULT)
846
+ sadness_level_radio.change(update_foo,
847
+ inputs=[sadness_level_radio,
848
+ sadness_level_state],
849
+ outputs=[sadness_level_state])
850
+
851
+ disgust_level_radio = gr.Radio(label="Disgust level:",
852
+ choices=[EMOTION_DEFAULT,
853
+ "Boredom", "Disgust", "Loathing"],
854
+ value=EMOTION_DEFAULT)
855
+ disgust_level_radio.change(update_foo,
856
+ inputs=[disgust_level_radio,
857
+ disgust_level_state],
858
+ outputs=[disgust_level_state])
859
+
860
+ anger_level_radio = gr.Radio(label="Anger level:",
861
+ choices=[EMOTION_DEFAULT,
862
+ "Annoyance", "Anger", "Rage"],
863
+ value=EMOTION_DEFAULT)
864
+ anger_level_radio.change(update_foo,
865
+ inputs=[anger_level_radio, anger_level_state],
866
+ outputs=[anger_level_state])
867
+
868
+ with gr.Tab("Max words"):
869
+ num_words_slider = gr.Slider(label="Max number of words to generate (0 for don't care)",
870
+ value=NUM_WORDS_DEFAULT, minimum=0, maximum=MAX_WORDS, step=10)
871
+ num_words_slider.change(update_foo,
872
+ inputs=[num_words_slider, num_words_state],
873
+ outputs=[num_words_state])
874
+
875
+ gr.HTML(AUTHORS)
876
+
877
+ # gr.HTML("""
878
+ # <form action="https://www.paypal.com/donate" method="post" target="_blank">
879
+ # <input type="hidden" name="business" value="AK8BVNALBXSPQ" />
880
+ # <input type="hidden" name="no_recurring" value="0" />
881
+ # <input type="hidden" name="item_name" value="Please consider helping to defray the cost of APIs such as SerpAPI and WolframAlpha that this app uses." />
882
+ # <input type="hidden" name="currency_code" value="USD" />
883
+ # <input type="image" src="https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif" border="0" name="submit" title="PayPal - The safer, easier way to pay online!" alt="Donate with PayPal button" />
884
+ # <img alt="" border="0" src="https://www.paypal.com/en_US/i/scr/pixel.gif" width="1" height="1" />
885
+ # </form>
886
+ # """)
887
+
888
+ gr.HTML("""<center>
889
+ <a href="https://huggingface.co/spaces/gfhayworth/hack_qa?duplicate=true">
890
+ <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
891
+ Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a>
892
+ </center>""")
893
+
894
+ message.submit(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
895
+ speak_text_state, talking_head_state, monologue_state,
896
+ express_chain_state, num_words_state, formality_state,
897
+ anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
898
+ surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
899
+ lang_level_state, translate_to_state, literary_style_state],
900
+ outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message])
901
+ # outputs=[chatbot, history_state, audio_html, tmp_aud_file, message])
902
+
903
+ submit.click(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
904
+ speak_text_state, talking_head_state, monologue_state,
905
+ express_chain_state, num_words_state, formality_state,
906
+ anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
907
+ surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
908
+ lang_level_state, translate_to_state, literary_style_state],
909
+ outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message])
910
+ # outputs=[chatbot, history_state, audio_html, tmp_aud_file, message])
911
+
912
+ openai_api_key_textbox.change(set_openai_api_key,
913
+ inputs=[openai_api_key_textbox],
914
+ outputs=[chain_state, express_chain_state, llm_state])
915
 
916
+ block.launch(debug=True)
audios/tempfile.mp3 ADDED
Binary file (785 kB). View file
 
azure_utils.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This class stores Azure voice data. Specifically, the class stores several records containing
2
+ # language, lang_code, gender, voice_id and engine. The class also has a method to return the
3
+ # voice_id, lang_code and engine given a language and gender.
4
+
5
+ NEURAL_ENGINE = "neural"
6
+ STANDARD_ENGINE = "standard"
7
+
8
+
9
+ class AzureVoiceData:
10
+ def get_voice(self, language, gender):
11
+ for voice in self.voice_data:
12
+ if voice['language'] == language and voice['gender'] == gender:
13
+ return voice['azure_voice']
14
+ return None
15
+
16
+ def __init__(self):
17
+ self.voice_data = [
18
+ {'language': 'Arabic',
19
+ 'azure_voice': 'ar-EG-ShakirNeural',
20
+ 'gender': 'Male'},
21
+ {'language': 'Arabic (Gulf)',
22
+ 'azure_voice': 'ar-KW-FahedNeural',
23
+ 'gender': 'Male'},
24
+ {'language': 'Catalan',
25
+ 'azure_voice': 'ca-ES-EnricNeural',
26
+ 'gender': 'Male'},
27
+ {'language': 'Chinese (Cantonese)',
28
+ 'azure_voice': 'yue-CN-YunSongNeural',
29
+ 'gender': 'Male'},
30
+ {'language': 'Chinese (Mandarin)',
31
+ 'azure_voice': 'zh-CN-YunxiNeural',
32
+ 'gender': 'Male'},
33
+ {'language': 'Danish',
34
+ 'azure_voice': 'da-DK-JeppeNeural',
35
+ 'gender': 'Male'},
36
+ {'language': 'Dutch',
37
+ 'azure_voice': 'nl-NL-MaartenNeural',
38
+ 'gender': 'Male'},
39
+ {'language': 'English (Australian)',
40
+ 'azure_voice': 'en-AU-KenNeural',
41
+ 'gender': 'Male'},
42
+ {'language': 'English (British)',
43
+ 'azure_voice': 'en-GB-RyanNeural',
44
+ 'gender': 'Male'},
45
+ {'language': 'English (Indian)',
46
+ 'azure_voice': 'en-IN-PrabhatNeural',
47
+ 'gender': 'Male'},
48
+ {'language': 'English (New Zealand)',
49
+ 'azure_voice': 'en-NZ-MitchellNeural',
50
+ 'gender': 'Male'},
51
+ {'language': 'English (South African)',
52
+ 'azure_voice': 'en-ZA-LukeNeural',
53
+ 'gender': 'Male'},
54
+ {'language': 'English (US)',
55
+ 'azure_voice': 'en-US-ChristopherNeural',
56
+ 'gender': 'Male'},
57
+ {'language': 'English (Welsh)',
58
+ 'azure_voice': 'cy-GB-AledNeural',
59
+ 'gender': 'Male'},
60
+ {'language': 'Finnish',
61
+ 'azure_voice': 'fi-FI-HarriNeural',
62
+ 'gender': 'Male'},
63
+ {'language': 'French',
64
+ 'azure_voice': 'fr-FR-HenriNeural',
65
+ 'gender': 'Male'},
66
+ {'language': 'French (Canadian)',
67
+ 'azure_voice': 'fr-CA-AntoineNeural',
68
+ 'gender': 'Male'},
69
+ {'language': 'German',
70
+ 'azure_voice': 'de-DE-KlausNeural',
71
+ 'gender': 'Male'},
72
+ {'language': 'German (Austrian)',
73
+ 'azure_voice': 'de-AT-JonasNeural',
74
+ 'gender': 'Male'},
75
+ {'language': 'Hindi',
76
+ 'azure_voice': 'hi-IN-MadhurNeural',
77
+ 'gender': 'Male'},
78
+ {'language': 'Icelandic',
79
+ 'azure_voice': 'is-IS-GunnarNeural',
80
+ 'gender': 'Male'},
81
+ {'language': 'Italian',
82
+ 'azure_voice': 'it-IT-GianniNeural',
83
+ 'gender': 'Male'},
84
+ {'language': 'Japanese',
85
+ 'azure_voice': 'ja-JP-KeitaNeural',
86
+ 'gender': 'Male'},
87
+ {'language': 'Korean',
88
+ 'azure_voice': 'ko-KR-GookMinNeural',
89
+ 'gender': 'Male'},
90
+ {'language': 'Norwegian',
91
+ 'azure_voice': 'nb-NO-FinnNeural',
92
+ 'gender': 'Male'},
93
+ {'language': 'Polish',
94
+ 'azure_voice': 'pl-PL-MarekNeural',
95
+ 'gender': 'Male'},
96
+ {'language': 'Portuguese (Brazilian)',
97
+ 'azure_voice': 'pt-BR-NicolauNeural',
98
+ 'gender': 'Male'},
99
+ {'language': 'Portuguese (European)',
100
+ 'azure_voice': 'pt-PT-DuarteNeural',
101
+ 'gender': 'Male'},
102
+ {'language': 'Romanian',
103
+ 'azure_voice': 'ro-RO-EmilNeural',
104
+ 'gender': 'Male'},
105
+ {'language': 'Russian',
106
+ 'azure_voice': 'ru-RU-DmitryNeural',
107
+ 'gender': 'Male'},
108
+ {'language': 'Spanish (European)',
109
+ 'azure_voice': 'es-ES-TeoNeural',
110
+ 'gender': 'Male'},
111
+ {'language': 'Spanish (Mexican)',
112
+ 'azure_voice': 'es-MX-LibertoNeural',
113
+ 'gender': 'Male'},
114
+ {'language': 'Spanish (US)',
115
+ 'azure_voice': 'es-US-AlonsoNeural"',
116
+ 'gender': 'Male'},
117
+ {'language': 'Swedish',
118
+ 'azure_voice': 'sv-SE-MattiasNeural',
119
+ 'gender': 'Male'},
120
+ {'language': 'Turkish',
121
+ 'azure_voice': 'tr-TR-AhmetNeural',
122
+ 'gender': 'Male'},
123
+ {'language': 'Welsh',
124
+ 'azure_voice': 'cy-GB-AledNeural',
125
+ 'gender': 'Male'},
126
+ ]
127
+
128
+
129
+ # Run from the command-line
130
+ if __name__ == '__main__':
131
+ azure_voice_data = AzureVoiceData()
132
+
133
+ azure_voice = azure_voice_data.get_voice('English (US)', 'Male')
134
+ print('English (US)', 'Male', azure_voice)
135
+
136
+ azure_voice = azure_voice_data.get_voice('English (US)', 'Female')
137
+ print('English (US)', 'Female', azure_voice)
138
+
139
+ azure_voice = azure_voice_data.get_voice('French', 'Female')
140
+ print('French', 'Female', azure_voice)
141
+
142
+ azure_voice = azure_voice_data.get_voice('French', 'Male')
143
+ print('French', 'Male', azure_voice)
144
+
145
+ azure_voice = azure_voice_data.get_voice('Japanese', 'Female')
146
+ print('Japanese', 'Female', azure_voice)
147
+
148
+ azure_voice = azure_voice_data.get_voice('Japanese', 'Male')
149
+ print('Japanese', 'Male', azure_voice)
150
+
151
+ azure_voice = azure_voice_data.get_voice('Hindi', 'Female')
152
+ print('Hindi', 'Female', azure_voice)
153
+
154
+ azure_voice = azure_voice_data.get_voice('Hindi', 'Male')
155
+ print('Hindi', 'Male', azure_voice)
images/humancare.jpg ADDED
polly_utils.py ADDED
@@ -0,0 +1,635 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This class stores Polly voice data. Specifically, the class stores several records containing
2
+ # language, lang_code, gender, voice_id and engine. The class also has a method to return the
3
+ # voice_id, lang_code and engine given a language and gender.
4
+
5
+ NEURAL_ENGINE = "neural"
6
+ STANDARD_ENGINE = "standard"
7
+
8
+
9
+ class PollyVoiceData:
10
+ def get_voice(self, language, gender):
11
+ for voice in self.voice_data:
12
+ if voice['language'] == language and voice['gender'] == gender:
13
+ if voice['neural'] == 'Yes':
14
+ return voice['voice_id'], voice['lang_code'], NEURAL_ENGINE
15
+ for voice in self.voice_data:
16
+ if voice['language'] == language and voice['gender'] == gender:
17
+ if voice['standard'] == 'Yes':
18
+ return voice['voice_id'], voice['lang_code'], STANDARD_ENGINE
19
+ return None, None, None
20
+
21
+ def get_whisper_lang_code(self, language):
22
+ for voice in self.voice_data:
23
+ if voice['language'] == language:
24
+ return voice['whisper_lang_code']
25
+ return "en"
26
+
27
+ def __init__(self):
28
+ self.voice_data = [
29
+ {'language': 'Arabic',
30
+ 'lang_code': 'arb',
31
+ 'whisper_lang_code': 'ar',
32
+ 'voice_id': 'Zeina',
33
+ 'gender': 'Female',
34
+ 'neural': 'No',
35
+ 'standard': 'Yes'},
36
+ {'language': 'Arabic (Gulf)',
37
+ 'lang_code': 'ar-AE',
38
+ 'whisper_lang_code': 'ar',
39
+ 'voice_id': 'Hala',
40
+ 'gender': 'Female',
41
+ 'neural': 'Yes',
42
+ 'standard': 'No'},
43
+ {'language': 'Catalan',
44
+ 'lang_code': 'ca-ES',
45
+ 'whisper_lang_code': 'ca',
46
+ 'voice_id': 'Arlet',
47
+ 'gender': 'Female',
48
+ 'neural': 'Yes',
49
+ 'standard': 'No'},
50
+ {'language': 'Chinese (Cantonese)',
51
+ 'lang_code': 'yue-CN',
52
+ 'whisper_lang_code': 'zh',
53
+ 'voice_id': 'Hiujin',
54
+ 'gender': 'Female',
55
+ 'neural': 'Yes',
56
+ 'standard': 'No'},
57
+ {'language': 'Chinese (Mandarin)',
58
+ 'lang_code': 'cmn-CN',
59
+ 'whisper_lang_code': 'zh',
60
+ 'voice_id': 'Zhiyu',
61
+ 'gender': 'Female',
62
+ 'neural': 'Yes',
63
+ 'standard': 'No'},
64
+ {'language': 'Danish',
65
+ 'lang_code': 'da-DK',
66
+ 'whisper_lang_code': 'da',
67
+ 'voice_id': 'Naja',
68
+ 'gender': 'Female',
69
+ 'neural': 'No',
70
+ 'standard': 'Yes'},
71
+ {'language': 'Danish',
72
+ 'lang_code': 'da-DK',
73
+ 'whisper_lang_code': 'da',
74
+ 'voice_id': 'Mads',
75
+ 'gender': 'Male',
76
+ 'neural': 'No',
77
+ 'standard': 'Yes'},
78
+ {'language': 'Dutch',
79
+ 'lang_code': 'nl-NL',
80
+ 'whisper_lang_code': 'nl',
81
+ 'voice_id': 'Laura',
82
+ 'gender': 'Female',
83
+ 'neural': 'Yes',
84
+ 'standard': 'No'},
85
+ {'language': 'Dutch',
86
+ 'lang_code': 'nl-NL',
87
+ 'whisper_lang_code': 'nl',
88
+ 'voice_id': 'Lotte',
89
+ 'gender': 'Female',
90
+ 'neural': 'No',
91
+ 'standard': 'Yes'},
92
+ {'language': 'Dutch',
93
+ 'lang_code': 'nl-NL',
94
+ 'whisper_lang_code': 'nl',
95
+ 'voice_id': 'Ruben',
96
+ 'gender': 'Male',
97
+ 'neural': 'No',
98
+ 'standard': 'Yes'},
99
+ {'language': 'English (Australian)',
100
+ 'lang_code': 'en-AU',
101
+ 'whisper_lang_code': 'en',
102
+ 'voice_id': 'Nicole',
103
+ 'gender': 'Female',
104
+ 'neural': 'No',
105
+ 'standard': 'Yes'},
106
+ {'language': 'English (Australian)',
107
+ 'lang_code': 'en-AU',
108
+ 'whisper_lang_code': 'en',
109
+ 'voice_id': 'Olivia',
110
+ 'gender': 'Female',
111
+ 'neural': 'Yes',
112
+ 'standard': 'No'},
113
+ {'language': 'English (Australian)',
114
+ 'lang_code': 'en-AU',
115
+ 'whisper_lang_code': 'en',
116
+ 'voice_id': 'Russell',
117
+ 'gender': 'Male',
118
+ 'neural': 'No',
119
+ 'standard': 'Yes'},
120
+ {'language': 'English (British)',
121
+ 'lang_code': 'en-GB',
122
+ 'whisper_lang_code': 'en',
123
+ 'voice_id': 'Amy',
124
+ 'gender': 'Female',
125
+ 'neural': 'Yes',
126
+ 'standard': 'Yes'},
127
+ {'language': 'English (British)',
128
+ 'lang_code': 'en-GB',
129
+ 'whisper_lang_code': 'en',
130
+ 'voice_id': 'Emma',
131
+ 'gender': 'Female',
132
+ 'neural': 'Yes',
133
+ 'standard': 'Yes'},
134
+ {'language': 'English (British)',
135
+ 'lang_code': 'en-GB',
136
+ 'whisper_lang_code': 'en',
137
+ 'voice_id': 'Brian',
138
+ 'gender': 'Male',
139
+ 'neural': 'Yes',
140
+ 'standard': 'Yes'},
141
+ {'language': 'English (British)',
142
+ 'lang_code': 'en-GB',
143
+ 'whisper_lang_code': 'en',
144
+ 'voice_id': 'Arthur',
145
+ 'gender': 'Male',
146
+ 'neural': 'Yes',
147
+ 'standard': 'No'},
148
+ {'language': 'English (Indian)',
149
+ 'lang_code': 'en-IN',
150
+ 'whisper_lang_code': 'en',
151
+ 'voice_id': 'Aditi',
152
+ 'gender': 'Female',
153
+ 'neural': 'No',
154
+ 'standard': 'Yes'},
155
+ {'language': 'English (Indian)',
156
+ 'lang_code': 'en-IN',
157
+ 'whisper_lang_code': 'en',
158
+ 'voice_id': 'Raveena',
159
+ 'gender': 'Female',
160
+ 'neural': 'No',
161
+ 'standard': 'Yes'},
162
+ {'language': 'English (Indian)',
163
+ 'lang_code': 'en-IN',
164
+ 'whisper_lang_code': 'en',
165
+ 'voice_id': 'Kajal',
166
+ 'gender': 'Female',
167
+ 'neural': 'Yes',
168
+ 'standard': 'No'},
169
+ {'language': 'English (New Zealand)',
170
+ 'lang_code': 'en-NZ',
171
+ 'whisper_lang_code': 'en',
172
+ 'voice_id': 'Aria',
173
+ 'gender': 'Female',
174
+ 'neural': 'Yes',
175
+ 'standard': 'No'},
176
+ {'language': 'English (South African)',
177
+ 'lang_code': 'en-ZA',
178
+ 'whisper_lang_code': 'en',
179
+ 'voice_id': 'Ayanda',
180
+ 'gender': 'Female',
181
+ 'neural': 'Yes',
182
+ 'standard': 'No'},
183
+ {'language': 'English (US)',
184
+ 'lang_code': 'en-US',
185
+ 'whisper_lang_code': 'en',
186
+ 'voice_id': 'Ivy',
187
+ 'gender': 'Female (child)',
188
+ 'neural': 'Yes',
189
+ 'standard': 'Yes'},
190
+ {'language': 'English (US)',
191
+ 'lang_code': 'en-US',
192
+ 'whisper_lang_code': 'en',
193
+ 'voice_id': 'Joanna',
194
+ 'gender': 'Female',
195
+ 'neural': 'Yes',
196
+ 'standard': 'Yes'},
197
+ {'language': 'English (US)',
198
+ 'lang_code': 'en-US',
199
+ 'whisper_lang_code': 'en',
200
+ 'voice_id': 'Kendra',
201
+ 'gender': 'Female',
202
+ 'neural': 'Yes',
203
+ 'standard': 'Yes'},
204
+ {'language': 'English (US)',
205
+ 'lang_code': 'en-US',
206
+ 'whisper_lang_code': 'en',
207
+ 'voice_id': 'Kimberly',
208
+ 'gender': 'Female',
209
+ 'neural': 'Yes',
210
+ 'standard': 'Yes'},
211
+ {'language': 'English (US)',
212
+ 'lang_code': 'en-US',
213
+ 'whisper_lang_code': 'en',
214
+ 'voice_id': 'Salli',
215
+ 'gender': 'Female',
216
+ 'neural': 'Yes',
217
+ 'standard': 'Yes'},
218
+ {'language': 'English (US)',
219
+ 'lang_code': 'en-US',
220
+ 'whisper_lang_code': 'en',
221
+ 'voice_id': 'Joey',
222
+ 'gender': 'Male',
223
+ 'neural': 'Yes',
224
+ 'standard': 'Yes'},
225
+ {'language': 'English (US)',
226
+ 'lang_code': 'en-US',
227
+ 'whisper_lang_code': 'en',
228
+ 'voice_id': 'Justin',
229
+ 'gender': 'Male (child)',
230
+ 'neural': 'Yes',
231
+ 'standard': 'Yes'},
232
+ {'language': 'English (US)',
233
+ 'lang_code': 'en-US',
234
+ 'whisper_lang_code': 'en',
235
+ 'voice_id': 'Kevin',
236
+ 'gender': 'Male (child)',
237
+ 'neural': 'Yes',
238
+ 'standard': 'No'},
239
+ {'language': 'English (US)',
240
+ 'lang_code': 'en-US',
241
+ 'whisper_lang_code': 'en',
242
+ 'voice_id': 'Matthew',
243
+ 'gender': 'Male',
244
+ 'neural': 'Yes',
245
+ 'standard': 'Yes'},
246
+ {'language': 'English (Welsh)',
247
+ 'lang_code': 'en-GB-WLS',
248
+ 'whisper_lang_code': 'en',
249
+ 'voice_id': 'Geraint',
250
+ 'gender': 'Male',
251
+ 'neural': 'No',
252
+ 'standard': 'Yes'},
253
+ {'language': 'Finnish',
254
+ 'lang_code': 'fi-FI',
255
+ 'whisper_lang_code': 'fi',
256
+ 'voice_id': 'Suvi',
257
+ 'gender': 'Female',
258
+ 'neural': 'Yes',
259
+ 'standard': 'No'},
260
+ {'language': 'French',
261
+ 'lang_code': 'fr-FR',
262
+ 'whisper_lang_code': 'fr',
263
+ 'voice_id': 'Celine',
264
+ 'gender': 'Female',
265
+ 'neural': 'No',
266
+ 'standard': 'Yes'},
267
+ {'language': 'French',
268
+ 'lang_code': 'fr-FR',
269
+ 'whisper_lang_code': 'fr',
270
+ 'voice_id': 'Lea',
271
+ 'gender': 'Female',
272
+ 'neural': 'Yes',
273
+ 'standard': 'Yes'},
274
+ {'language': 'French',
275
+ 'lang_code': 'fr-FR',
276
+ 'whisper_lang_code': 'fr',
277
+ 'voice_id': 'Mathieu',
278
+ 'gender': 'Male',
279
+ 'neural': 'No',
280
+ 'standard': 'Yes'},
281
+ {'language': 'French (Canadian)',
282
+ 'lang_code': 'fr-CA',
283
+ 'whisper_lang_code': 'fr',
284
+ 'voice_id': 'Chantal',
285
+ 'gender': 'Female',
286
+ 'neural': 'No',
287
+ 'standard': 'Yes'},
288
+ {'language': 'French (Canadian)',
289
+ 'lang_code': 'fr-CA',
290
+ 'whisper_lang_code': 'fr',
291
+ 'voice_id': 'Gabrielle',
292
+ 'gender': 'Female',
293
+ 'neural': 'Yes',
294
+ 'standard': 'No'},
295
+ {'language': 'French (Canadian)',
296
+ 'lang_code': 'fr-CA',
297
+ 'whisper_lang_code': 'fr',
298
+ 'voice_id': 'Liam',
299
+ 'gender': 'Male',
300
+ 'neural': 'Yes',
301
+ 'standard': 'No'},
302
+ {'language': 'German',
303
+ 'lang_code': 'de-DE',
304
+ 'whisper_lang_code': 'de',
305
+ 'voice_id': 'Marlene',
306
+ 'gender': 'Female',
307
+ 'neural': 'No',
308
+ 'standard': 'Yes'},
309
+ {'language': 'German',
310
+ 'lang_code': 'de-DE',
311
+ 'whisper_lang_code': 'de',
312
+ 'voice_id': 'Vicki',
313
+ 'gender': 'Female',
314
+ 'neural': 'Yes',
315
+ 'standard': 'Yes'},
316
+ {'language': 'German',
317
+ 'lang_code': 'de-DE',
318
+ 'whisper_lang_code': 'de',
319
+ 'voice_id': 'Hans',
320
+ 'gender': 'Male',
321
+ 'neural': 'No',
322
+ 'standard': 'Yes'},
323
+ {'language': 'German',
324
+ 'lang_code': 'de-DE',
325
+ 'whisper_lang_code': 'de',
326
+ 'voice_id': 'Daniel',
327
+ 'gender': 'Male',
328
+ 'neural': 'Yes',
329
+ 'standard': 'No'},
330
+ {'language': 'German (Austrian)',
331
+ 'lang_code': 'de-AT',
332
+ 'whisper_lang_code': 'de',
333
+ 'voice_id': 'Hannah',
334
+ 'gender': 'Female',
335
+ 'neural': 'Yes',
336
+ 'standard': 'No'},
337
+ {'language': 'Hindi',
338
+ 'lang_code': 'hi-IN',
339
+ 'whisper_lang_code': 'hi',
340
+ 'voice_id': 'Aditi',
341
+ 'gender': 'Female',
342
+ 'neural': 'No',
343
+ 'standard': 'Yes'},
344
+ {'language': 'Hindi',
345
+ 'lang_code': 'hi-IN',
346
+ 'whisper_lang_code': 'hi',
347
+ 'voice_id': 'Kajal',
348
+ 'gender': 'Female',
349
+ 'neural': 'Yes',
350
+ 'standard': 'No'},
351
+ {'language': 'Icelandic',
352
+ 'lang_code': 'is-IS',
353
+ 'whisper_lang_code': 'is',
354
+ 'voice_id': 'Dora',
355
+ 'gender': 'Female',
356
+ 'neural': 'No',
357
+ 'standard': 'Yes'},
358
+ {'language': 'Icelandic',
359
+ 'lang_code': 'is-IS',
360
+ 'whisper_lang_code': 'is',
361
+ 'voice_id': 'Karl',
362
+ 'gender': 'Male',
363
+ 'neural': 'No',
364
+ 'standard': 'Yes'},
365
+ {'language': 'Italian',
366
+ 'lang_code': 'it-IT',
367
+ 'whisper_lang_code': 'it',
368
+ 'voice_id': 'Carla',
369
+ 'gender': 'Female',
370
+ 'neural': 'No',
371
+ 'standard': 'Yes'},
372
+ {'language': 'Italian',
373
+ 'lang_code': 'it-IT',
374
+ 'whisper_lang_code': 'it',
375
+ 'voice_id': 'Bianca',
376
+ 'gender': 'Female',
377
+ 'neural': 'Yes',
378
+ 'standard': 'Yes'},
379
+ {'language': 'Japanese',
380
+ 'lang_code': 'ja-JP',
381
+ 'whisper_lang_code': 'ja',
382
+ 'voice_id': 'Mizuki',
383
+ 'gender': 'Female',
384
+ 'neural': 'No',
385
+ 'standard': 'Yes'},
386
+ {'language': 'Japanese',
387
+ 'lang_code': 'ja-JP',
388
+ 'whisper_lang_code': 'ja',
389
+ 'voice_id': 'Takumi',
390
+ 'gender': 'Male',
391
+ 'neural': 'Yes',
392
+ 'standard': 'Yes'},
393
+ {'language': 'Korean',
394
+ 'lang_code': 'ko-KR',
395
+ 'whisper_lang_code': 'ko',
396
+ 'voice_id': 'Seoyeon',
397
+ 'gender': 'Female',
398
+ 'neural': 'Yes',
399
+ 'standard': 'Yes'},
400
+ {'language': 'Norwegian',
401
+ 'lang_code': 'nb-NO',
402
+ 'whisper_lang_code': 'no',
403
+ 'voice_id': 'Liv',
404
+ 'gender': 'Female',
405
+ 'neural': 'No',
406
+ 'standard': 'Yes'},
407
+ {'language': 'Norwegian',
408
+ 'lang_code': 'nb-NO',
409
+ 'whisper_lang_code': 'no',
410
+ 'voice_id': 'Ida',
411
+ 'gender': 'Female',
412
+ 'neural': 'Yes',
413
+ 'standard': 'No'},
414
+ {'language': 'Polish',
415
+ 'lang_code': 'pl-PL',
416
+ 'whisper_lang_code': 'pl',
417
+ 'voice_id': 'Ewa',
418
+ 'gender': 'Female',
419
+ 'neural': 'No',
420
+ 'standard': 'Yes'},
421
+ {'language': 'Polish',
422
+ 'lang_code': 'pl-PL',
423
+ 'whisper_lang_code': 'pl',
424
+ 'voice_id': 'Maja',
425
+ 'gender': 'Female',
426
+ 'neural': 'No',
427
+ 'standard': 'Yes'},
428
+ {'language': 'Polish',
429
+ 'lang_code': 'pl-PL',
430
+ 'whisper_lang_code': 'pl',
431
+ 'voice_id': 'Jacek',
432
+ 'gender': 'Male',
433
+ 'neural': 'No',
434
+ 'standard': 'Yes'},
435
+ {'language': 'Polish',
436
+ 'lang_code': 'pl-PL',
437
+ 'whisper_lang_code': 'pl',
438
+ 'voice_id': 'Jan',
439
+ 'gender': 'Male',
440
+ 'neural': 'No',
441
+ 'standard': 'Yes'},
442
+ {'language': 'Polish',
443
+ 'lang_code': 'pl-PL',
444
+ 'whisper_lang_code': 'pl',
445
+ 'voice_id': 'Ola',
446
+ 'gender': 'Female',
447
+ 'neural': 'Yes',
448
+ 'standard': 'No'},
449
+ {'language': 'Portuguese (Brazilian)',
450
+ 'lang_code': 'pt-BR',
451
+ 'whisper_lang_code': 'pt',
452
+ 'voice_id': 'Camila',
453
+ 'gender': 'Female',
454
+ 'neural': 'Yes',
455
+ 'standard': 'Yes'},
456
+ {'language': 'Portuguese (Brazilian)',
457
+ 'lang_code': 'pt-BR',
458
+ 'whisper_lang_code': 'pt',
459
+ 'voice_id': 'Vitoria',
460
+ 'gender': 'Female',
461
+ 'neural': 'Yes',
462
+ 'standard': 'Yes'},
463
+ {'language': 'Portuguese (Brazilian)',
464
+ 'lang_code': 'pt-BR',
465
+ 'whisper_lang_code': 'pt',
466
+ 'voice_id': 'Ricardo',
467
+ 'gender': 'Male',
468
+ 'neural': 'No',
469
+ 'standard': 'Yes'},
470
+ {'language': 'Portuguese (European)',
471
+ 'lang_code': 'pt-PT',
472
+ 'whisper_lang_code': 'pt',
473
+ 'voice_id': 'Ines',
474
+ 'gender': 'Female',
475
+ 'neural': 'Yes',
476
+ 'standard': 'Yes'},
477
+ {'language': 'Portuguese (European)',
478
+ 'lang_code': 'pt-PT',
479
+ 'whisper_lang_code': 'pt',
480
+ 'voice_id': 'Cristiano',
481
+ 'gender': 'Male',
482
+ 'neural': 'No',
483
+ 'standard': 'Yes'},
484
+ {'language': 'Romanian',
485
+ 'lang_code': 'ro-RO',
486
+ 'whisper_lang_code': 'ro',
487
+ 'voice_id': 'Carmen',
488
+ 'gender': 'Female',
489
+ 'neural': 'No',
490
+ 'standard': 'Yes'},
491
+ {'language': 'Russian',
492
+ 'lang_code': 'ru-RU',
493
+ 'whisper_lang_code': 'ru',
494
+ 'voice_id': 'Tatyana',
495
+ 'gender': 'Female',
496
+ 'neural': 'No',
497
+ 'standard': 'Yes'},
498
+ {'language': 'Russian',
499
+ 'lang_code': 'ru-RU',
500
+ 'whisper_lang_code': 'ru',
501
+ 'voice_id': 'Maxim',
502
+ 'gender': 'Male',
503
+ 'neural': 'No',
504
+ 'standard': 'Yes'},
505
+ {'language': 'Spanish (European)',
506
+ 'lang_code': 'es-ES',
507
+ 'whisper_lang_code': 'es',
508
+ 'voice_id': 'Conchita',
509
+ 'gender': 'Female',
510
+ 'neural': 'No',
511
+ 'standard': 'Yes'},
512
+ {'language': 'Spanish (European)',
513
+ 'lang_code': 'es-ES',
514
+ 'whisper_lang_code': 'es',
515
+ 'voice_id': 'Lucia',
516
+ 'gender': 'Female',
517
+ 'neural': 'Yes',
518
+ 'standard': 'Yes'},
519
+ {'language': 'Spanish (European)',
520
+ 'lang_code': 'es-ES',
521
+ 'whisper_lang_code': 'es',
522
+ 'voice_id': 'Enrique',
523
+ 'gender': 'Male',
524
+ 'neural': 'No',
525
+ 'standard': 'Yes'},
526
+ {'language': 'Spanish (Mexican)',
527
+ 'lang_code': 'es-MX',
528
+ 'whisper_lang_code': 'es',
529
+ 'voice_id': 'Mia',
530
+ 'gender': 'Female',
531
+ 'neural': 'Yes',
532
+ 'standard': 'Yes'},
533
+ {'language': 'Spanish (US)',
534
+ 'lang_code': 'es-US',
535
+ 'whisper_lang_code': 'es',
536
+ 'voice_id': 'Lupe',
537
+ 'gender': 'Female',
538
+ 'neural': 'Yes',
539
+ 'standard': 'Yes'},
540
+ {'language': 'Spanish (US)',
541
+ 'lang_code': 'es-US',
542
+ 'whisper_lang_code': 'es',
543
+ 'voice_id': 'Penelope',
544
+ 'gender': 'Female',
545
+ 'neural': 'No',
546
+ 'standard': 'Yes'},
547
+ {'language': 'Spanish (US)',
548
+ 'lang_code': 'es-US',
549
+ 'whisper_lang_code': 'es',
550
+ 'voice_id': 'Miguel',
551
+ 'gender': 'Male',
552
+ 'neural': 'No',
553
+ 'standard': 'Yes'},
554
+ {'language': 'Spanish (US)',
555
+ 'lang_code': 'es-US',
556
+ 'whisper_lang_code': 'es',
557
+ 'voice_id': 'Pedro',
558
+ 'gender': 'Male',
559
+ 'neural': 'Yes',
560
+ 'standard': 'No'},
561
+ {'language': 'Swedish',
562
+ 'lang_code': 'sv-SE',
563
+ 'whisper_lang_code': 'sv',
564
+ 'voice_id': 'Astrid',
565
+ 'gender': 'Female',
566
+ 'neural': 'No',
567
+ 'standard': 'Yes'},
568
+ {'language': 'Swedish',
569
+ 'lang_code': 'sv-SE',
570
+ 'whisper_lang_code': 'sv',
571
+ 'voice_id': 'Elin',
572
+ 'gender': 'Female',
573
+ 'neural': 'Yes',
574
+ 'standard': 'No'},
575
+ {'language': 'Turkish',
576
+ 'lang_code': 'tr-TR',
577
+ 'whisper_lang_code': 'tr',
578
+ 'voice_id': 'Filiz',
579
+ 'gender': 'Female',
580
+ 'neural': 'No',
581
+ 'standard': 'Yes'},
582
+ {'language': 'Welsh',
583
+ 'lang_code': 'cy-GB',
584
+ 'whisper_lang_code': 'cy',
585
+ 'voice_id': 'Gwyneth',
586
+ 'gender': 'Female',
587
+ 'neural': 'No',
588
+ 'standard': 'Yes'}
589
+ ]
590
+
591
+
592
+ # Run from the command-line
593
+ if __name__ == '__main__':
594
+ polly_voice_data = PollyVoiceData()
595
+
596
+ voice_id, language_code, engine = polly_voice_data.get_voice('English (US)', 'Male')
597
+ print('English (US)', 'Male', voice_id, language_code, engine)
598
+
599
+ voice_id, language_code, engine = polly_voice_data.get_voice('English (US)', 'Female')
600
+ print('English (US)', 'Female', voice_id, language_code, engine)
601
+
602
+ voice_id, language_code, engine = polly_voice_data.get_voice('French', 'Female')
603
+ print('French', 'Female', voice_id, language_code, engine)
604
+
605
+ voice_id, language_code, engine = polly_voice_data.get_voice('French', 'Male')
606
+ print('French', 'Male', voice_id, language_code, engine)
607
+
608
+ voice_id, language_code, engine = polly_voice_data.get_voice('Japanese', 'Female')
609
+ print('Japanese', 'Female', voice_id, language_code, engine)
610
+
611
+ voice_id, language_code, engine = polly_voice_data.get_voice('Japanese', 'Male')
612
+ print('Japanese', 'Male', voice_id, language_code, engine)
613
+
614
+ voice_id, language_code, engine = polly_voice_data.get_voice('Hindi', 'Female')
615
+ print('Hindi', 'Female', voice_id, language_code, engine)
616
+
617
+ voice_id, language_code, engine = polly_voice_data.get_voice('Hindi', 'Male')
618
+ print('Hindi', 'Male', voice_id, language_code, engine)
619
+
620
+ whisper_lang_code = polly_voice_data.get_whisper_lang_code('English (US)')
621
+ print('English (US) whisper_lang_code:', whisper_lang_code)
622
+
623
+ whisper_lang_code = polly_voice_data.get_whisper_lang_code('Chinese (Mandarin)')
624
+ print('Chinese (Mandarin) whisper_lang_code:', whisper_lang_code)
625
+
626
+ whisper_lang_code = polly_voice_data.get_whisper_lang_code('Norwegian')
627
+ print('Norwegian whisper_lang_code:', whisper_lang_code)
628
+
629
+ whisper_lang_code = polly_voice_data.get_whisper_lang_code('Dutch')
630
+ print('Dutch whisper_lang_code:', whisper_lang_code)
631
+
632
+ whisper_lang_code = polly_voice_data.get_whisper_lang_code('Foo')
633
+ print('Foo whisper_lang_code:', whisper_lang_code)
634
+
635
+
requirements.txt CHANGED
@@ -1,2 +1,11 @@
1
  sentence-transformers
2
- datasets
 
 
 
 
 
 
 
 
 
 
1
  sentence-transformers
2
+ datasets
3
+ openai==0.26.1
4
+ gradio==3.16.2
5
+ google-search-results
6
+ google-api-python-client==2.72.0
7
+ wolframalpha
8
+ langchain==0.0.63
9
+ requests==2.28.2
10
+ git+https://github.com/openai/whisper.git
11
+ boto3
videos/humancare.mp4 ADDED
Binary file (235 kB). View file