SnJForever commited on
Commit
b61c12e
1 Parent(s): 2e5153f

update the app

Browse files
Files changed (2) hide show
  1. app-lang.py +1008 -0
  2. app.py +212 -862
app-lang.py ADDED
@@ -0,0 +1,1008 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+ import ssl
4
+ from contextlib import closing
5
+ from typing import Optional, Tuple
6
+ import datetime
7
+
8
+ import boto3
9
+ import gradio as gr
10
+ import requests
11
+
12
+ # UNCOMMENT TO USE WHISPER
13
+ import warnings
14
+ import whisper
15
+
16
+ from langchain import ConversationChain, LLMChain
17
+
18
+ from langchain.agents import load_tools, initialize_agent
19
+ from langchain.chains.conversation.memory import ConversationBufferMemory
20
+ from langchain.llms import OpenAI, OpenAIChat
21
+ from threading import Lock
22
+
23
+ # Console to variable
24
+ from io import StringIO
25
+ import sys
26
+ import re
27
+
28
+ from openai.error import AuthenticationError, InvalidRequestError, RateLimitError
29
+
30
+ # Pertains to Express-inator functionality
31
+ from langchain.prompts import PromptTemplate
32
+
33
+ from polly_utils import PollyVoiceData, NEURAL_ENGINE
34
+ from azure_utils import AzureVoiceData
35
+
36
+ # Pertains to question answering functionality
37
+ from langchain.embeddings.openai import OpenAIEmbeddings
38
+ from langchain.text_splitter import CharacterTextSplitter
39
+ from langchain.vectorstores.faiss import FAISS
40
+ from langchain.docstore.document import Document
41
+ from langchain.chains.question_answering import load_qa_chain
42
+ import azure.cognitiveservices.speech as speechsdk
43
+ import base64
44
+
45
+
46
+ news_api_key = os.environ["NEWS_API_KEY"]
47
+
48
+ tmdb_bearer_token = os.environ["TMDB_BEARER_TOKEN"]
49
+
50
+ TOOLS_LIST = ['serpapi', 'wolfram-alpha', 'pal-math',
51
+ 'pal-colored-objects'] # 'google-search','news-api','tmdb-api','open-meteo-api'
52
+ TOOLS_DEFAULT_LIST = ['serpapi']
53
+ BUG_FOUND_MSG = "Congratulations, you've found a bug in this application!"
54
+ # AUTH_ERR_MSG = "Please paste your OpenAI key from openai.com to use this application. It is not necessary to hit a button or key after pasting it."
55
+ AUTH_ERR_MSG = "Please paste your OpenAI key from openai.com to use this application. "
56
+ MAX_TOKENS = 512
57
+
58
+ LOOPING_TALKING_HEAD = "videos/Michelle.mp4"
59
+ TALKING_HEAD_WIDTH = "192"
60
+ MAX_TALKING_HEAD_TEXT_LENGTH = 100
61
+
62
+ # Pertains to Express-inator functionality
63
+ NUM_WORDS_DEFAULT = 0
64
+ MAX_WORDS = 400
65
+ FORMALITY_DEFAULT = "N/A"
66
+ TEMPERATURE_DEFAULT = 0.5
67
+ EMOTION_DEFAULT = "N/A"
68
+ LANG_LEVEL_DEFAULT = "University"
69
+ TRANSLATE_TO_DEFAULT = "Chinese (Mandarin)"
70
+ LITERARY_STYLE_DEFAULT = "N/A"
71
+ PROMPT_TEMPLATE = PromptTemplate(
72
+ input_variables=["original_words", "num_words", "formality", "emotions", "lang_level", "translate_to",
73
+ "literary_style"],
74
+ template="Restate {num_words}{formality}{emotions}{lang_level}{translate_to}{literary_style}the following: \n{original_words}\n",
75
+ )
76
+
77
+ FORCE_TRANSLATE_DEFAULT = True
78
+ USE_GPT4_DEFAULT = False
79
+
80
+ POLLY_VOICE_DATA = PollyVoiceData()
81
+ AZURE_VOICE_DATA = AzureVoiceData()
82
+
83
+ # Pertains to WHISPER functionality
84
+ WHISPER_DETECT_LANG = "Chinese (Mandarin)"
85
+
86
+ # UNCOMMENT TO USE WHISPER
87
+ warnings.filterwarnings("ignore")
88
+ WHISPER_MODEL = whisper.load_model("tiny")
89
+ print("WHISPER_MODEL", WHISPER_MODEL)
90
+
91
+
92
+ # UNCOMMENT TO USE WHISPER
93
+ def transcribe(aud_inp, whisper_lang):
94
+ if aud_inp is None:
95
+ return ""
96
+ aud = whisper.load_audio(aud_inp)
97
+ aud = whisper.pad_or_trim(aud)
98
+ mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
99
+ _, probs = WHISPER_MODEL.detect_language(mel)
100
+ options = whisper.DecodingOptions()
101
+ if whisper_lang != WHISPER_DETECT_LANG:
102
+ whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang)
103
+ options = whisper.DecodingOptions(language=whisper_lang_code)
104
+ result = whisper.decode(WHISPER_MODEL, mel, options)
105
+ print("result.text", result.text)
106
+ result_text = ""
107
+ if result and result.text:
108
+ result_text = result.text
109
+ return result_text
110
+
111
+
112
+ # Temporarily address Wolfram Alpha SSL certificate issue
113
+ ssl._create_default_https_context = ssl._create_unverified_context
114
+
115
+
116
+ # TEMPORARY FOR TESTING
117
+ def transcribe_dummy(aud_inp_tb, whisper_lang):
118
+ if aud_inp_tb is None:
119
+ return ""
120
+ # aud = whisper.load_audio(aud_inp)
121
+ # aud = whisper.pad_or_trim(aud)
122
+ # mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
123
+ # _, probs = WHISPER_MODEL.detect_language(mel)
124
+ # options = whisper.DecodingOptions()
125
+ # options = whisper.DecodingOptions(language="ja")
126
+ # result = whisper.decode(WHISPER_MODEL, mel, options)
127
+ result_text = "Whisper will detect language"
128
+ if whisper_lang != WHISPER_DETECT_LANG:
129
+ whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang)
130
+ result_text = f"Whisper will use lang code: {whisper_lang_code}"
131
+ print("result_text", result_text)
132
+ return aud_inp_tb
133
+
134
+
135
+ # Pertains to Express-inator functionality
136
+ def transform_text(desc, express_chain, num_words, formality,
137
+ anticipation_level, joy_level, trust_level,
138
+ fear_level, surprise_level, sadness_level, disgust_level, anger_level,
139
+ lang_level, translate_to, literary_style, force_translate):
140
+ num_words_prompt = ""
141
+ if num_words and int(num_words) != 0:
142
+ num_words_prompt = "using up to " + str(num_words) + " words, "
143
+
144
+ # Change some arguments to lower case
145
+ formality = formality.lower()
146
+ anticipation_level = anticipation_level.lower()
147
+ joy_level = joy_level.lower()
148
+ trust_level = trust_level.lower()
149
+ fear_level = fear_level.lower()
150
+ surprise_level = surprise_level.lower()
151
+ sadness_level = sadness_level.lower()
152
+ disgust_level = disgust_level.lower()
153
+ anger_level = anger_level.lower()
154
+
155
+ formality_str = ""
156
+ if formality != "n/a":
157
+ formality_str = "in a " + formality + " manner, "
158
+
159
+ # put all emotions into a list
160
+ emotions = []
161
+ if anticipation_level != "n/a":
162
+ emotions.append(anticipation_level)
163
+ if joy_level != "n/a":
164
+ emotions.append(joy_level)
165
+ if trust_level != "n/a":
166
+ emotions.append(trust_level)
167
+ if fear_level != "n/a":
168
+ emotions.append(fear_level)
169
+ if surprise_level != "n/a":
170
+ emotions.append(surprise_level)
171
+ if sadness_level != "n/a":
172
+ emotions.append(sadness_level)
173
+ if disgust_level != "n/a":
174
+ emotions.append(disgust_level)
175
+ if anger_level != "n/a":
176
+ emotions.append(anger_level)
177
+
178
+ emotions_str = ""
179
+ if len(emotions) > 0:
180
+ if len(emotions) == 1:
181
+ emotions_str = "with emotion of " + emotions[0] + ", "
182
+ else:
183
+ emotions_str = "with emotions of " + ", ".join(emotions[:-1]) + " and " + emotions[-1] + ", "
184
+
185
+ lang_level_str = ""
186
+ if lang_level != LANG_LEVEL_DEFAULT:
187
+ lang_level_str = "at a level that a person in " + lang_level + " can easily comprehend, " if translate_to == TRANSLATE_TO_DEFAULT else ""
188
+
189
+ translate_to_str = ""
190
+ if translate_to != TRANSLATE_TO_DEFAULT and (force_translate or lang_level != LANG_LEVEL_DEFAULT):
191
+ translate_to_str = "translated to " + translate_to + (
192
+ "" if lang_level == LANG_LEVEL_DEFAULT else " at a level that a person in " + lang_level + " can easily comprehend") + ", "
193
+
194
+ literary_style_str = ""
195
+ if literary_style != LITERARY_STYLE_DEFAULT:
196
+ if literary_style == "Prose":
197
+ literary_style_str = "as prose, "
198
+ if literary_style == "Story":
199
+ literary_style_str = "as a story, "
200
+ elif literary_style == "Summary":
201
+ literary_style_str = "as a summary, "
202
+ elif literary_style == "Outline":
203
+ literary_style_str = "as an outline numbers and lower case letters, "
204
+ elif literary_style == "Bullets":
205
+ literary_style_str = "as bullet points using bullets, "
206
+ elif literary_style == "Poetry":
207
+ literary_style_str = "as a poem, "
208
+ elif literary_style == "Haiku":
209
+ literary_style_str = "as a haiku, "
210
+ elif literary_style == "Limerick":
211
+ literary_style_str = "as a limerick, "
212
+ elif literary_style == "Rap":
213
+ literary_style_str = "as a rap, "
214
+ elif literary_style == "Joke":
215
+ literary_style_str = "as a very funny joke with a setup and punchline, "
216
+ elif literary_style == "Knock-knock":
217
+ literary_style_str = "as a very funny knock-knock joke, "
218
+ elif literary_style == "FAQ":
219
+ literary_style_str = "as a FAQ with several questions and answers, "
220
+
221
+ formatted_prompt = PROMPT_TEMPLATE.format(
222
+ original_words=desc,
223
+ num_words=num_words_prompt,
224
+ formality=formality_str,
225
+ emotions=emotions_str,
226
+ lang_level=lang_level_str,
227
+ translate_to=translate_to_str,
228
+ literary_style=literary_style_str
229
+ )
230
+
231
+ trans_instr = num_words_prompt + formality_str + emotions_str + lang_level_str + translate_to_str + literary_style_str
232
+ if express_chain and len(trans_instr.strip()) > 0:
233
+ generated_text = express_chain.run(
234
+ {'original_words': desc, 'num_words': num_words_prompt, 'formality': formality_str,
235
+ 'emotions': emotions_str, 'lang_level': lang_level_str, 'translate_to': translate_to_str,
236
+ 'literary_style': literary_style_str}).strip()
237
+ else:
238
+ print("Not transforming text")
239
+ generated_text = desc
240
+
241
+ # replace all newlines with <br> in generated_text
242
+ generated_text = generated_text.replace("\n", "\n\n")
243
+
244
+ prompt_plus_generated = "GPT prompt: " + formatted_prompt + "\n\n" + generated_text
245
+
246
+ print("\n==== date/time: " + str(datetime.datetime.now() - datetime.timedelta(hours=5)) + " ====")
247
+ print("prompt_plus_generated: " + prompt_plus_generated)
248
+
249
+ return generated_text
250
+
251
+
252
+ def load_chain(tools_list, llm):
253
+ chain = None
254
+ express_chain = None
255
+ memory = None
256
+ if llm:
257
+ print("\ntools_list", tools_list)
258
+ tool_names = tools_list
259
+ tools = load_tools(tool_names, llm=llm, news_api_key=news_api_key, tmdb_bearer_token=tmdb_bearer_token)
260
+
261
+ memory = ConversationBufferMemory(memory_key="chat_history")
262
+
263
+ chain = initialize_agent(tools, llm, agent="conversational-react-description", verbose=True, memory=memory)
264
+ express_chain = LLMChain(llm=llm, prompt=PROMPT_TEMPLATE, verbose=True)
265
+ return chain, express_chain, memory
266
+
267
+
268
+ def set_openai_api_key(api_key, use_gpt4):
269
+ """Set the api key and return chain.
270
+ If no api_key, then None is returned.
271
+ """
272
+ if api_key and api_key.startswith("sk-") and len(api_key) > 50:
273
+ os.environ["OPENAI_API_KEY"] = api_key
274
+ print("\n\n ++++++++++++++ Setting OpenAI API key ++++++++++++++ \n\n")
275
+ print(str(datetime.datetime.now()) + ": Before OpenAI, OPENAI_API_KEY length: " + str(
276
+ len(os.environ["OPENAI_API_KEY"])))
277
+
278
+ if use_gpt4:
279
+ llm = OpenAIChat(temperature=0, max_tokens=MAX_TOKENS, model_name="gpt-4")
280
+ print("Trying to use llm OpenAIChat with gpt-4")
281
+ else:
282
+ print("Trying to use llm OpenAI with text-davinci-003")
283
+ llm = OpenAI(temperature=0, max_tokens=MAX_TOKENS, model_name="text-davinci-003")
284
+
285
+ print(str(datetime.datetime.now()) + ": After OpenAI, OPENAI_API_KEY length: " + str(
286
+ len(os.environ["OPENAI_API_KEY"])))
287
+ chain, express_chain, memory = load_chain(TOOLS_DEFAULT_LIST, llm)
288
+
289
+ # Pertains to question answering functionality
290
+ embeddings = OpenAIEmbeddings()
291
+
292
+ if use_gpt4:
293
+ qa_chain = load_qa_chain(OpenAIChat(temperature=0, model_name="gpt-4"), chain_type="stuff")
294
+ print("Trying to use qa_chain OpenAIChat with gpt-4")
295
+ else:
296
+ print("Trying to use qa_chain OpenAI with text-davinci-003")
297
+ qa_chain = OpenAI(temperature=0, max_tokens=MAX_TOKENS, model_name="text-davinci-003")
298
+
299
+ print(str(datetime.datetime.now()) + ": After load_chain, OPENAI_API_KEY length: " + str(
300
+ len(os.environ["OPENAI_API_KEY"])))
301
+ os.environ["OPENAI_API_KEY"] = ""
302
+ return chain, express_chain, llm, embeddings, qa_chain, memory, use_gpt4
303
+ return None, None, None, None, None, None, None
304
+
305
+
306
+ def run_chain(chain, inp, capture_hidden_text):
307
+ output = ""
308
+ hidden_text = None
309
+ if capture_hidden_text:
310
+ error_msg = None
311
+ tmp = sys.stdout
312
+ hidden_text_io = StringIO()
313
+ sys.stdout = hidden_text_io
314
+
315
+ try:
316
+ output = chain.run(input=inp)
317
+ except AuthenticationError as ae:
318
+ error_msg = AUTH_ERR_MSG + str(datetime.datetime.now()) + ". " + str(ae)
319
+ print("error_msg", error_msg)
320
+ except RateLimitError as rle:
321
+ error_msg = "\n\nRateLimitError: " + str(rle)
322
+ except ValueError as ve:
323
+ error_msg = "\n\nValueError: " + str(ve)
324
+ except InvalidRequestError as ire:
325
+ error_msg = "\n\nInvalidRequestError: " + str(ire)
326
+ except Exception as e:
327
+ error_msg = "\n\n" + BUG_FOUND_MSG + ":\n\n" + str(e)
328
+
329
+ sys.stdout = tmp
330
+ hidden_text = hidden_text_io.getvalue()
331
+
332
+ # remove escape characters from hidden_text
333
+ hidden_text = re.sub(r'\x1b[^m]*m', '', hidden_text)
334
+
335
+ # remove "Entering new AgentExecutor chain..." from hidden_text
336
+ hidden_text = re.sub(r"Entering new AgentExecutor chain...\n", "", hidden_text)
337
+
338
+ # remove "Finished chain." from hidden_text
339
+ hidden_text = re.sub(r"Finished chain.", "", hidden_text)
340
+
341
+ # Add newline after "Thought:" "Action:" "Observation:" "Input:" and "AI:"
342
+ hidden_text = re.sub(r"Thought:", "\n\nThought:", hidden_text)
343
+ hidden_text = re.sub(r"Action:", "\n\nAction:", hidden_text)
344
+ hidden_text = re.sub(r"Observation:", "\n\nObservation:", hidden_text)
345
+ hidden_text = re.sub(r"Input:", "\n\nInput:", hidden_text)
346
+ hidden_text = re.sub(r"AI:", "\n\nAI:", hidden_text)
347
+
348
+ if error_msg:
349
+ hidden_text += error_msg
350
+
351
+ print("hidden_text: ", hidden_text)
352
+ else:
353
+ try:
354
+ output = chain.run(input=inp)
355
+ except AuthenticationError as ae:
356
+ output = AUTH_ERR_MSG + str(datetime.datetime.now()) + ". " + str(ae)
357
+ print("output", output)
358
+ except RateLimitError as rle:
359
+ output = "\n\nRateLimitError: " + str(rle)
360
+ except ValueError as ve:
361
+ output = "\n\nValueError: " + str(ve)
362
+ except InvalidRequestError as ire:
363
+ output = "\n\nInvalidRequestError: " + str(ire)
364
+ except Exception as e:
365
+ output = "\n\n" + BUG_FOUND_MSG + ":\n\n" + str(e)
366
+
367
+ return output, hidden_text
368
+
369
+
370
+ def reset_memory(history, memory):
371
+ # memory.clear()
372
+ history = []
373
+ return history, history, memory
374
+
375
+
376
+ class ChatWrapper:
377
+
378
+ def __init__(self):
379
+ self.lock = Lock()
380
+
381
+ def __call__(
382
+ self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain: Optional[ConversationChain],
383
+ trace_chain: bool, speak_text: bool, talking_head: bool, monologue: bool, express_chain: Optional[LLMChain],
384
+ num_words, formality, anticipation_level, joy_level, trust_level,
385
+ fear_level, surprise_level, sadness_level, disgust_level, anger_level,
386
+ lang_level, translate_to, literary_style, qa_chain, docsearch, use_embeddings, force_translate
387
+ ):
388
+ """Execute the chat functionality."""
389
+ self.lock.acquire()
390
+ try:
391
+ print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
392
+ print("inp: " + inp)
393
+ print("trace_chain: ", trace_chain)
394
+ print("speak_text: ", speak_text)
395
+ print("talking_head: ", talking_head)
396
+ print("monologue: ", monologue)
397
+ history = history or []
398
+ # If chain is None, that is because no API key was provided.
399
+ output = "Please paste your OpenAI key from openai.com to use this app. " + str(datetime.datetime.now())
400
+ hidden_text = output
401
+
402
+ if chain:
403
+ # Set OpenAI key
404
+ import openai
405
+ openai.api_key = api_key
406
+ if not monologue:
407
+ if use_embeddings:
408
+ if inp and inp.strip() != "":
409
+ if docsearch:
410
+ docs = docsearch.similarity_search(inp)
411
+ output = str(qa_chain.run(input_documents=docs, question=inp))
412
+ else:
413
+ output, hidden_text = "Please supply some text in the the Embeddings tab.", None
414
+ else:
415
+ output, hidden_text = "What's on your mind?", None
416
+ else:
417
+ output, hidden_text = run_chain(chain, inp, capture_hidden_text=trace_chain)
418
+ else:
419
+ output, hidden_text = inp, None
420
+
421
+ output = transform_text(output, express_chain, num_words, formality, anticipation_level, joy_level,
422
+ trust_level,
423
+ fear_level, surprise_level, sadness_level, disgust_level, anger_level,
424
+ lang_level, translate_to, literary_style, force_translate)
425
+
426
+ text_to_display = output
427
+ if trace_chain:
428
+ text_to_display = hidden_text + "\n\n" + output
429
+ history.append((inp, text_to_display))
430
+
431
+ html_video, temp_file, html_audio, temp_aud_file = None, None, None, None
432
+ if speak_text:
433
+ if talking_head:
434
+ if len(output) <= MAX_TALKING_HEAD_TEXT_LENGTH:
435
+ # html_video, temp_file = do_html_video_speak(output, translate_to)
436
+ html_audio, temp_aud_file = do_html_audio_speak_azure(output, translate_to)
437
+ html_video, temp_file = do_html_video_speak_sad_talker(temp_aud_file, translate_to)
438
+ else:
439
+ temp_file = LOOPING_TALKING_HEAD
440
+ html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
441
+ html_audio, temp_aud_file = do_html_audio_speak_azure(output, translate_to)
442
+ else:
443
+ temp_file = LOOPING_TALKING_HEAD
444
+ html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
445
+ html_audio, temp_aud_file = do_html_audio_speak_azure(output, translate_to)
446
+ else:
447
+ if talking_head:
448
+ temp_file = LOOPING_TALKING_HEAD
449
+ html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
450
+ else:
451
+ # html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
452
+ # html_video = create_html_video(temp_file, "128")
453
+ pass
454
+
455
+ except Exception as e:
456
+ raise e
457
+ finally:
458
+ self.lock.release()
459
+ return history, history, html_video, temp_file, html_audio, temp_aud_file, ""
460
+ # return history, history, html_audio, temp_aud_file, ""
461
+
462
+
463
+ chat = ChatWrapper()
464
+
465
+ def do_html_audio_speak_azure(words_to_speak, axure_language):
466
+
467
+ html_audio = '<pre>no audio</pre>'
468
+
469
+ speech_key=os.environ["SPEECH_KEY"]
470
+ service_region=os.environ["SERVICE_REGION"]
471
+
472
+ speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
473
+ # Note: the voice setting will not overwrite the voice element in input SSML.
474
+ speech_config.speech_synthesis_voice_name = "zh-CN-XiaoxiaoNeural"
475
+
476
+ # 设置输出的音频文件路径和文件名
477
+ audio_config = speechsdk.audio.AudioOutputConfig(filename="audios/tempfile.mp3")
478
+
479
+ text = words_to_speak
480
+
481
+ # use the default speaker as audio output.
482
+ speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
483
+
484
+ result = speech_synthesizer.speak_text_async(text).get()
485
+ # Check result
486
+ if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
487
+ print("Speech synthesized for text [{}]".format(text))
488
+ try:
489
+ temp_aud_file = gr.File("audios/tempfile.mp3")
490
+ temp_aud_file_url = "/file=" + temp_aud_file.value['name']
491
+ html_audio = f'<audio autoplay><source src={temp_aud_file_url} type="audio/mp3"></audio>'
492
+ except IOError as error:
493
+ # Could not write to file, exit gracefully
494
+ print(error)
495
+ return None, None
496
+ elif result.reason == speechsdk.ResultReason.Canceled:
497
+ cancellation_details = result.cancellation_details
498
+ print("Speech synthesis canceled: {}".format(cancellation_details.reason))
499
+ if cancellation_details.reason == speechsdk.CancellationReason.Error:
500
+ print("Error details: {}".format(cancellation_details.error_details))
501
+ # The response didn't contain audio data, exit gracefully
502
+ print("Could not stream audio")
503
+ return None, None
504
+
505
+ return html_audio, "audios/tempfile.mp3"
506
+
507
+
508
+ def do_html_audio_speak(words_to_speak, polly_language):
509
+ polly_client = boto3.Session(
510
+ aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
511
+ aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
512
+ region_name=os.environ["AWS_DEFAULT_REGION"]
513
+ ).client('polly')
514
+
515
+ # voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(polly_language, "Female")
516
+ voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(polly_language, "Male")
517
+ if not voice_id:
518
+ # voice_id = "Joanna"
519
+ voice_id = "Matthew"
520
+ language_code = "en-US"
521
+ engine = NEURAL_ENGINE
522
+ response = polly_client.synthesize_speech(
523
+ Text=words_to_speak,
524
+ OutputFormat='mp3',
525
+ VoiceId=voice_id,
526
+ LanguageCode=language_code,
527
+ Engine=engine
528
+ )
529
+
530
+ html_audio = '<pre>no audio</pre>'
531
+
532
+ # Save the audio stream returned by Amazon Polly on Lambda's temp directory
533
+ if "AudioStream" in response:
534
+ with closing(response["AudioStream"]) as stream:
535
+ # output = os.path.join("/tmp/", "speech.mp3")
536
+
537
+ try:
538
+ with open('audios/tempfile.mp3', 'wb') as f:
539
+ f.write(stream.read())
540
+ temp_aud_file = gr.File("audios/tempfile.mp3")
541
+ temp_aud_file_url = "/file=" + temp_aud_file.value['name']
542
+ html_audio = f'<audio autoplay><source src={temp_aud_file_url} type="audio/mp3"></audio>'
543
+ except IOError as error:
544
+ # Could not write to file, exit gracefully
545
+ print(error)
546
+ return None, None
547
+ else:
548
+ # The response didn't contain audio data, exit gracefully
549
+ print("Could not stream audio")
550
+ return None, None
551
+
552
+ return html_audio, "audios/tempfile.mp3"
553
+
554
+
555
+ def create_html_video(file_name, width):
556
+ temp_file_url = "/file=" + tmp_file.value['name']
557
+ html_video = f'<video width={width} height={width} autoplay muted loop><source src={temp_file_url} type="video/mp4" poster="Michelle.png"></video>'
558
+ return html_video
559
+
560
+ def ToBase64(file):
561
+ with open(file, 'rb') as fileObj:
562
+ image_data = fileObj.read()
563
+ base64_data = base64.b64encode(image_data)
564
+ return base64_data.decode()
565
+
566
+
567
+ def do_html_video_speak_sad_talker(temp_aud_file, azure_language):
568
+
569
+ GRADIO_URL=os.environ["GRADIO_URL"]
570
+
571
+ img_data = ToBase64("images/Michelle.png")
572
+ audio_data = ToBase64(temp_aud_file)
573
+
574
+ response = requests.post(GRADIO_URL+"/run/sad_talker", json={
575
+ "data": [
576
+ "data:image/png;base64,"+img_data,
577
+ {"name":"audio.wav","data":"data:audio/wav;base64,"+audio_data},
578
+ "crop",
579
+ False,
580
+ False,
581
+ ]
582
+ },timeout=3000)
583
+ print(response.text)
584
+ res = response.json()
585
+
586
+ data = res["data"]
587
+ print(data)
588
+ video_rul = GRADIO_URL+"/file=" + data[0][0]['name']
589
+ print(video_rul)
590
+
591
+ html_video = '<pre>no video</pre>'
592
+
593
+ # with open('videos/tempfile.mp4', 'wb') as f:
594
+ # f.write(response_stream.read())
595
+ # temp_file = gr.File("videos/tempfile.mp4")
596
+ # temp_file_url = "/file=" + temp_file.value['name']
597
+ temp_file_url=video_rul
598
+ html_video = f'<video width={TALKING_HEAD_WIDTH} height={TALKING_HEAD_WIDTH} autoplay><source src={temp_file_url} type="video/mp4" poster="Michelle.png"></video>'
599
+
600
+ return html_video, "videos/tempfile.mp4"
601
+
602
+
603
+ def do_html_video_speak(words_to_speak, azure_language):
604
+ azure_voice = AZURE_VOICE_DATA.get_voice(azure_language, "Male")
605
+ if not azure_voice:
606
+ azure_voice = "en-US-ChristopherNeural"
607
+
608
+ headers = {"Authorization": f"Bearer {os.environ['EXHUMAN_API_KEY']}"}
609
+ body = {
610
+ 'bot_name': 'Michelle',
611
+ 'bot_response': words_to_speak,
612
+ 'azure_voice': azure_voice,
613
+ 'azure_style': 'friendly',
614
+ 'animation_pipeline': 'high_speed',
615
+ }
616
+ api_endpoint = "https://api.exh.ai/animations/v1/generate_lipsync"
617
+ res = requests.post(api_endpoint, json=body, headers=headers)
618
+ print("res.status_code: ", res.status_code)
619
+
620
+ html_video = '<pre>no video</pre>'
621
+ if isinstance(res.content, bytes):
622
+ response_stream = io.BytesIO(res.content)
623
+ print("len(res.content)): ", len(res.content))
624
+
625
+ with open('videos/tempfile.mp4', 'wb') as f:
626
+ f.write(response_stream.read())
627
+ temp_file = gr.File("videos/tempfile.mp4")
628
+ temp_file_url = "/file=" + temp_file.value['name']
629
+ html_video = f'<video width={TALKING_HEAD_WIDTH} height={TALKING_HEAD_WIDTH} autoplay><source src={temp_file_url} type="video/mp4" poster="Michelle.png"></video>'
630
+ else:
631
+ print('video url unknown')
632
+ return html_video, "videos/tempfile.mp4"
633
+
634
+
635
+ def update_selected_tools(widget, state, llm):
636
+ if widget:
637
+ state = widget
638
+ chain, express_chain, memory = load_chain(state, llm)
639
+ return state, llm, chain, express_chain
640
+
641
+
642
+ def update_talking_head(widget, state):
643
+ if widget:
644
+ state = widget
645
+
646
+ video_html_talking_head = create_html_video(LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH)
647
+ return state, video_html_talking_head
648
+ else:
649
+ # return state, create_html_video(LOOPING_TALKING_HEAD, "32")
650
+ return None, "<pre></pre>"
651
+
652
+
653
+ def update_foo(widget, state):
654
+ if widget:
655
+ state = widget
656
+ return state
657
+
658
+
659
+ # Pertains to question answering functionality
660
+ def update_embeddings(embeddings_text, embeddings, qa_chain):
661
+ if embeddings_text:
662
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
663
+ texts = text_splitter.split_text(embeddings_text)
664
+
665
+ docsearch = FAISS.from_texts(texts, embeddings)
666
+ print("Embeddings updated")
667
+ return docsearch
668
+
669
+
670
+ # Pertains to question answering functionality
671
+ def update_use_embeddings(widget, state):
672
+ if widget:
673
+ state = widget
674
+ return state
675
+
676
+
677
+ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
678
+ llm_state = gr.State()
679
+ history_state = gr.State()
680
+ chain_state = gr.State()
681
+ express_chain_state = gr.State()
682
+ tools_list_state = gr.State(TOOLS_DEFAULT_LIST)
683
+ trace_chain_state = gr.State(False)
684
+ speak_text_state = gr.State(True)
685
+ talking_head_state = gr.State(True)
686
+ monologue_state = gr.State(False) # Takes the input and repeats it back to the user, optionally transforming it.
687
+ force_translate_state = gr.State(FORCE_TRANSLATE_DEFAULT) #
688
+ memory_state = gr.State()
689
+
690
+ # Pertains to Express-inator functionality
691
+ num_words_state = gr.State(NUM_WORDS_DEFAULT)
692
+ formality_state = gr.State(FORMALITY_DEFAULT)
693
+ anticipation_level_state = gr.State(EMOTION_DEFAULT)
694
+ joy_level_state = gr.State(EMOTION_DEFAULT)
695
+ trust_level_state = gr.State(EMOTION_DEFAULT)
696
+ fear_level_state = gr.State(EMOTION_DEFAULT)
697
+ surprise_level_state = gr.State(EMOTION_DEFAULT)
698
+ sadness_level_state = gr.State(EMOTION_DEFAULT)
699
+ disgust_level_state = gr.State(EMOTION_DEFAULT)
700
+ anger_level_state = gr.State(EMOTION_DEFAULT)
701
+ lang_level_state = gr.State(LANG_LEVEL_DEFAULT)
702
+ translate_to_state = gr.State(TRANSLATE_TO_DEFAULT)
703
+ literary_style_state = gr.State(LITERARY_STYLE_DEFAULT)
704
+
705
+ # Pertains to WHISPER functionality
706
+ whisper_lang_state = gr.State(WHISPER_DETECT_LANG)
707
+
708
+ # Pertains to question answering functionality
709
+ embeddings_state = gr.State()
710
+ qa_chain_state = gr.State()
711
+ docsearch_state = gr.State()
712
+ use_embeddings_state = gr.State(False)
713
+
714
+ use_gpt4_state = gr.State(USE_GPT4_DEFAULT)
715
+
716
+ with gr.Tab("Chat"):
717
+ with gr.Row():
718
+ with gr.Column():
719
+ gr.HTML(
720
+ """<b><center>GPT + CHAT</center></b>
721
+ <p><center>Hit Enter after pasting your OpenAI API key.</center></p>
722
+
723
+ """)
724
+
725
+ openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key (sk-...) and hit Enter",
726
+ show_label=False, lines=1, type='password')
727
+
728
+ with gr.Row():
729
+ with gr.Column(scale=1, min_width=TALKING_HEAD_WIDTH, visible=True):
730
+ # speak_text_cb = gr.Checkbox(label="Enable speech", value=False)
731
+ # speak_text_cb.change(update_foo, inputs=[speak_text_cb, speak_text_state],
732
+ # outputs=[speak_text_state])
733
+
734
+ my_file = gr.File(label="Upload a file", type="file", visible=False)
735
+ tmp_file = gr.File(LOOPING_TALKING_HEAD, visible=False)
736
+ # tmp_file_url = "/file=" + tmp_file.value['name']
737
+ htm_video = create_html_video(LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH)
738
+ video_html = gr.HTML(htm_video)
739
+
740
+ # my_aud_file = gr.File(label="Audio file", type="file", visible=True)
741
+ tmp_aud_file = gr.File("audios/tempfile.mp3", visible=False)
742
+ tmp_aud_file_url = "/file=" + tmp_aud_file.value['name']
743
+ htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
744
+ audio_html = gr.HTML(htm_audio)
745
+
746
+ with gr.Column(scale=7):
747
+ chatbot = gr.Chatbot()
748
+
749
+ with gr.Row():
750
+ message = gr.Textbox(label="What's on your mind??",
751
+ placeholder="What's the answer to life, the universe, and everything?",
752
+ lines=1)
753
+ submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
754
+
755
+ # UNCOMMENT TO USE WHISPER
756
+ with gr.Row():
757
+ audio_comp = gr.Microphone(source="microphone", type="filepath", label="Just say it!",
758
+ interactive=True, streaming=False)
759
+ audio_comp.change(transcribe, inputs=[audio_comp, whisper_lang_state], outputs=[message])
760
+
761
+ # TEMPORARY FOR TESTING
762
+ # with gr.Row():
763
+ # audio_comp_tb = gr.Textbox(label="Just say it!", lines=1)
764
+ # audio_comp_tb.submit(transcribe_dummy, inputs=[audio_comp_tb, whisper_lang_state], outputs=[message])
765
+
766
+ gr.Examples(
767
+ examples=["How many people live in Canada?",
768
+ "What is 2 to the 30th power?",
769
+ "If x+y=10 and x-y=4, what are x and y?",
770
+ "How much did it rain in SF today?",
771
+ "Get me information about the movie 'Avatar'",
772
+ "What are the top tech headlines in the US?",
773
+ "On the desk, you see two blue booklets, two purple booklets, and two yellow pairs of sunglasses - "
774
+ "if I remove all the pairs of sunglasses from the desk, how many purple items remain on it?"],
775
+ inputs=message
776
+ )
777
+
778
+ # with gr.Tab("Settings"):
779
+ # tools_cb_group = gr.CheckboxGroup(label="Tools:", choices=TOOLS_LIST,
780
+ # value=TOOLS_DEFAULT_LIST)
781
+ # tools_cb_group.change(update_selected_tools,
782
+ # inputs=[tools_cb_group, tools_list_state, llm_state],
783
+ # outputs=[tools_list_state, llm_state, chain_state, express_chain_state])
784
+
785
+ # trace_chain_cb = gr.Checkbox(label="Show reasoning chain in chat bubble", value=False)
786
+ # trace_chain_cb.change(update_foo, inputs=[trace_chain_cb, trace_chain_state],
787
+ # outputs=[trace_chain_state])
788
+
789
+ # force_translate_cb = gr.Checkbox(label="Force translation to selected Output Language",
790
+ # value=FORCE_TRANSLATE_DEFAULT)
791
+ # force_translate_cb.change(update_foo, inputs=[force_translate_cb, force_translate_state],
792
+ # outputs=[force_translate_state])
793
+
794
+ # # speak_text_cb = gr.Checkbox(label="Speak text from agent", value=False)
795
+ # # speak_text_cb.change(update_foo, inputs=[speak_text_cb, speak_text_state],
796
+ # # outputs=[speak_text_state])
797
+
798
+ # talking_head_cb = gr.Checkbox(label="Show talking head", value=True)
799
+ # talking_head_cb.change(update_talking_head, inputs=[talking_head_cb, talking_head_state],
800
+ # outputs=[talking_head_state, video_html])
801
+
802
+ # monologue_cb = gr.Checkbox(label="Babel fish mode (translate/restate what you enter, no conversational agent)",
803
+ # value=False)
804
+ # monologue_cb.change(update_foo, inputs=[monologue_cb, monologue_state],
805
+ # outputs=[monologue_state])
806
+
807
+ # use_gpt4_cb = gr.Checkbox(label="Use GPT-4 (experimental) if your OpenAI API has access to it",
808
+ # value=USE_GPT4_DEFAULT)
809
+ # use_gpt4_cb.change(set_openai_api_key,
810
+ # inputs=[openai_api_key_textbox, use_gpt4_cb],
811
+ # outputs=[chain_state, express_chain_state, llm_state, embeddings_state,
812
+ # qa_chain_state, memory_state, use_gpt4_state])
813
+
814
+ # reset_btn = gr.Button(value="Reset chat", variant="secondary").style(full_width=False)
815
+ # reset_btn.click(reset_memory, inputs=[history_state, memory_state],
816
+ # outputs=[chatbot, history_state, memory_state])
817
+
818
+ # with gr.Tab("Whisper STT"):
819
+ # whisper_lang_radio = gr.Radio(label="Whisper speech-to-text language:", choices=[
820
+ # WHISPER_DETECT_LANG, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
821
+ # "Danish", "Dutch", "English (Australian)", "English (British)", "English (Indian)", "English (New Zealand)",
822
+ # "English (South African)", "English (US)", "English (Welsh)", "Finnish", "French", "French (Canadian)",
823
+ # "German", "German (Austrian)", "Georgian", "Hindi", "Icelandic", "Indonesian", "Italian", "Japanese",
824
+ # "Korean", "Norwegian", "Polish",
825
+ # "Portuguese (Brazilian)", "Portuguese (European)", "Romanian", "Russian", "Spanish (European)",
826
+ # "Spanish (Mexican)", "Spanish (US)", "Swedish", "Turkish", "Ukrainian", "Welsh"],
827
+ # value=WHISPER_DETECT_LANG)
828
+
829
+ # whisper_lang_radio.change(update_foo,
830
+ # inputs=[whisper_lang_radio, whisper_lang_state],
831
+ # outputs=[whisper_lang_state])
832
+
833
+ # with gr.Tab("Output Language"):
834
+ # lang_level_radio = gr.Radio(label="Language level:", choices=[
835
+ # LANG_LEVEL_DEFAULT, "1st grade", "2nd grade", "3rd grade", "4th grade", "5th grade", "6th grade",
836
+ # "7th grade", "8th grade", "9th grade", "10th grade", "11th grade", "12th grade", "University"],
837
+ # value=LANG_LEVEL_DEFAULT)
838
+ # lang_level_radio.change(update_foo, inputs=[lang_level_radio, lang_level_state],
839
+ # outputs=[lang_level_state])
840
+
841
+ # translate_to_radio = gr.Radio(label="Language:", choices=[
842
+ # TRANSLATE_TO_DEFAULT, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
843
+ # "Danish", "Dutch", "English (Australian)", "English (British)", "English (Indian)", "English (New Zealand)",
844
+ # "English (South African)", "English (US)", "English (Welsh)", "Finnish", "French", "French (Canadian)",
845
+ # "German", "German (Austrian)", "Georgian", "Hindi", "Icelandic", "Indonesian", "Italian", "Japanese",
846
+ # "Korean", "Norwegian", "Polish",
847
+ # "Portuguese (Brazilian)", "Portuguese (European)", "Romanian", "Russian", "Spanish (European)",
848
+ # "Spanish (Mexican)", "Spanish (US)", "Swedish", "Turkish", "Ukrainian", "Welsh",
849
+ # "emojis", "Gen Z slang", "how the stereotypical Karen would say it", "Klingon", "Neanderthal",
850
+ # "Pirate", "Strange Planet expospeak technical talk", "Yoda"],
851
+ # value=TRANSLATE_TO_DEFAULT)
852
+
853
+ # translate_to_radio.change(update_foo,
854
+ # inputs=[translate_to_radio, translate_to_state],
855
+ # outputs=[translate_to_state])
856
+
857
+ # with gr.Tab("Formality"):
858
+ # formality_radio = gr.Radio(label="Formality:",
859
+ # choices=[FORMALITY_DEFAULT, "Casual", "Polite", "Honorific"],
860
+ # value=FORMALITY_DEFAULT)
861
+ # formality_radio.change(update_foo,
862
+ # inputs=[formality_radio, formality_state],
863
+ # outputs=[formality_state])
864
+
865
+ # with gr.Tab("Lit Style"):
866
+ # literary_style_radio = gr.Radio(label="Literary style:", choices=[
867
+ # LITERARY_STYLE_DEFAULT, "Prose", "Story", "Summary", "Outline", "Bullets", "Poetry", "Haiku", "Limerick",
868
+ # "Rap",
869
+ # "Joke", "Knock-knock", "FAQ"],
870
+ # value=LITERARY_STYLE_DEFAULT)
871
+
872
+ # literary_style_radio.change(update_foo,
873
+ # inputs=[literary_style_radio, literary_style_state],
874
+ # outputs=[literary_style_state])
875
+
876
+ # with gr.Tab("Emotions"):
877
+ # anticipation_level_radio = gr.Radio(label="Anticipation level:",
878
+ # choices=[EMOTION_DEFAULT, "Interest", "Anticipation", "Vigilance"],
879
+ # value=EMOTION_DEFAULT)
880
+ # anticipation_level_radio.change(update_foo,
881
+ # inputs=[anticipation_level_radio, anticipation_level_state],
882
+ # outputs=[anticipation_level_state])
883
+
884
+ # joy_level_radio = gr.Radio(label="Joy level:",
885
+ # choices=[EMOTION_DEFAULT, "Serenity", "Joy", "Ecstasy"],
886
+ # value=EMOTION_DEFAULT)
887
+ # joy_level_radio.change(update_foo,
888
+ # inputs=[joy_level_radio, joy_level_state],
889
+ # outputs=[joy_level_state])
890
+
891
+ # trust_level_radio = gr.Radio(label="Trust level:",
892
+ # choices=[EMOTION_DEFAULT, "Acceptance", "Trust", "Admiration"],
893
+ # value=EMOTION_DEFAULT)
894
+ # trust_level_radio.change(update_foo,
895
+ # inputs=[trust_level_radio, trust_level_state],
896
+ # outputs=[trust_level_state])
897
+
898
+ # fear_level_radio = gr.Radio(label="Fear level:",
899
+ # choices=[EMOTION_DEFAULT, "Apprehension", "Fear", "Terror"],
900
+ # value=EMOTION_DEFAULT)
901
+ # fear_level_radio.change(update_foo,
902
+ # inputs=[fear_level_radio, fear_level_state],
903
+ # outputs=[fear_level_state])
904
+
905
+ # surprise_level_radio = gr.Radio(label="Surprise level:",
906
+ # choices=[EMOTION_DEFAULT, "Distraction", "Surprise", "Amazement"],
907
+ # value=EMOTION_DEFAULT)
908
+ # surprise_level_radio.change(update_foo,
909
+ # inputs=[surprise_level_radio, surprise_level_state],
910
+ # outputs=[surprise_level_state])
911
+
912
+ # sadness_level_radio = gr.Radio(label="Sadness level:",
913
+ # choices=[EMOTION_DEFAULT, "Pensiveness", "Sadness", "Grief"],
914
+ # value=EMOTION_DEFAULT)
915
+ # sadness_level_radio.change(update_foo,
916
+ # inputs=[sadness_level_radio, sadness_level_state],
917
+ # outputs=[sadness_level_state])
918
+
919
+ # disgust_level_radio = gr.Radio(label="Disgust level:",
920
+ # choices=[EMOTION_DEFAULT, "Boredom", "Disgust", "Loathing"],
921
+ # value=EMOTION_DEFAULT)
922
+ # disgust_level_radio.change(update_foo,
923
+ # inputs=[disgust_level_radio, disgust_level_state],
924
+ # outputs=[disgust_level_state])
925
+
926
+ # anger_level_radio = gr.Radio(label="Anger level:",
927
+ # choices=[EMOTION_DEFAULT, "Annoyance", "Anger", "Rage"],
928
+ # value=EMOTION_DEFAULT)
929
+ # anger_level_radio.change(update_foo,
930
+ # inputs=[anger_level_radio, anger_level_state],
931
+ # outputs=[anger_level_state])
932
+
933
+ # with gr.Tab("Max Words"):
934
+ # num_words_slider = gr.Slider(label="Max number of words to generate (0 for don't care)",
935
+ # value=NUM_WORDS_DEFAULT, minimum=0, maximum=MAX_WORDS, step=10)
936
+ # num_words_slider.change(update_foo,
937
+ # inputs=[num_words_slider, num_words_state],
938
+ # outputs=[num_words_state])
939
+
940
+ # with gr.Tab("Embeddings"):
941
+ # embeddings_text_box = gr.Textbox(label="Enter text for embeddings and hit Create:",
942
+ # lines=20)
943
+
944
+ # with gr.Row():
945
+ # use_embeddings_cb = gr.Checkbox(label="Use embeddings", value=False)
946
+ # use_embeddings_cb.change(update_use_embeddings, inputs=[use_embeddings_cb, use_embeddings_state],
947
+ # outputs=[use_embeddings_state])
948
+
949
+ # embeddings_text_submit = gr.Button(value="Create", variant="secondary").style(full_width=False)
950
+ # embeddings_text_submit.click(update_embeddings,
951
+ # inputs=[embeddings_text_box, embeddings_state, qa_chain_state],
952
+ # outputs=[docsearch_state])
953
+
954
+ # gr.HTML("""
955
+ # <p>This application, developed by <a href='https://www.linkedin.com/in/javafxpert/'>James L. Weaver</a>,
956
+ # demonstrates a conversational agent implemented with OpenAI GPT-3.5 and LangChain.
957
+ # When necessary, it leverages tools for complex math, searching the internet, and accessing news and weather.
958
+ # Uses talking heads from <a href='https://exh.ai/'>Ex-Human</a>.
959
+ # For faster inference without waiting in queue, you may duplicate the space.
960
+ # </p>""")
961
+
962
+ # gr.HTML("""
963
+ # <form action="https://www.paypal.com/donate" method="post" target="_blank">
964
+ # <input type="hidden" name="business" value="AK8BVNALBXSPQ" />
965
+ # <input type="hidden" name="no_recurring" value="0" />
966
+ # <input type="hidden" name="item_name" value="Please consider helping to defray the cost of APIs such as SerpAPI and WolframAlpha that this app uses." />
967
+ # <input type="hidden" name="currency_code" value="USD" />
968
+ # <input type="image" src="https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif" border="0" name="submit" title="PayPal - The safer, easier way to pay online!" alt="Donate with PayPal button" />
969
+ # <img alt="" border="0" src="https://www.paypal.com/en_US/i/scr/pixel.gif" width="1" height="1" />
970
+ # </form>
971
+ # """)
972
+
973
+ # gr.HTML("""<center>
974
+ # <a href="https://huggingface.co/spaces/JavaFXpert/Chat-GPT-LangChain?duplicate=true">
975
+ # <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
976
+ # Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a>
977
+ # </center>""")
978
+
979
+ message.submit(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
980
+ speak_text_state, talking_head_state, monologue_state,
981
+ express_chain_state, num_words_state, formality_state,
982
+ anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
983
+ surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
984
+ lang_level_state, translate_to_state, literary_style_state,
985
+ qa_chain_state, docsearch_state, use_embeddings_state,
986
+ force_translate_state],
987
+ outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message])
988
+
989
+ submit.click(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
990
+ speak_text_state, talking_head_state, monologue_state,
991
+ express_chain_state, num_words_state, formality_state,
992
+ anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
993
+ surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
994
+ lang_level_state, translate_to_state, literary_style_state,
995
+ qa_chain_state, docsearch_state, use_embeddings_state,
996
+ force_translate_state],
997
+ outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message])
998
+
999
+ openai_api_key_textbox.change(set_openai_api_key,
1000
+ inputs=[openai_api_key_textbox, use_gpt4_state],
1001
+ outputs=[chain_state, express_chain_state, llm_state, embeddings_state,
1002
+ qa_chain_state, memory_state, use_gpt4_state])
1003
+ openai_api_key_textbox.submit(set_openai_api_key,
1004
+ inputs=[openai_api_key_textbox, use_gpt4_state],
1005
+ outputs=[chain_state, express_chain_state, llm_state, embeddings_state,
1006
+ qa_chain_state, memory_state, use_gpt4_state])
1007
+
1008
+ block.launch(debug=True)
app.py CHANGED
@@ -1,93 +1,63 @@
1
- import io
2
- import os
3
- import ssl
4
- from contextlib import closing
5
- from typing import Optional, Tuple
6
- import datetime
7
-
8
- import boto3
9
  import gradio as gr
 
10
  import requests
11
-
12
- # UNCOMMENT TO USE WHISPER
13
- import warnings
14
  import whisper
 
 
15
 
16
- from langchain import ConversationChain, LLMChain
17
-
18
- from langchain.agents import load_tools, initialize_agent
19
- from langchain.chains.conversation.memory import ConversationBufferMemory
20
- from langchain.llms import OpenAI, OpenAIChat
21
- from threading import Lock
22
-
23
- # Console to variable
24
- from io import StringIO
25
- import sys
26
- import re
27
-
28
- from openai.error import AuthenticationError, InvalidRequestError, RateLimitError
29
-
30
- # Pertains to Express-inator functionality
31
- from langchain.prompts import PromptTemplate
32
 
33
  from polly_utils import PollyVoiceData, NEURAL_ENGINE
34
  from azure_utils import AzureVoiceData
35
 
36
- # Pertains to question answering functionality
37
- from langchain.embeddings.openai import OpenAIEmbeddings
38
- from langchain.text_splitter import CharacterTextSplitter
39
- from langchain.vectorstores.faiss import FAISS
40
- from langchain.docstore.document import Document
41
- from langchain.chains.question_answering import load_qa_chain
42
- import azure.cognitiveservices.speech as speechsdk
43
- import base64
44
-
45
-
46
- news_api_key = os.environ["NEWS_API_KEY"]
47
 
48
- tmdb_bearer_token = os.environ["TMDB_BEARER_TOKEN"]
 
 
49
 
50
- TOOLS_LIST = ['serpapi', 'wolfram-alpha', 'pal-math',
51
- 'pal-colored-objects'] # 'google-search','news-api','tmdb-api','open-meteo-api'
52
- TOOLS_DEFAULT_LIST = ['serpapi']
53
- BUG_FOUND_MSG = "Congratulations, you've found a bug in this application!"
54
- # AUTH_ERR_MSG = "Please paste your OpenAI key from openai.com to use this application. It is not necessary to hit a button or key after pasting it."
55
- AUTH_ERR_MSG = "Please paste your OpenAI key from openai.com to use this application. "
56
- MAX_TOKENS = 512
57
 
58
  LOOPING_TALKING_HEAD = "videos/Michelle.mp4"
59
  TALKING_HEAD_WIDTH = "192"
60
  MAX_TALKING_HEAD_TEXT_LENGTH = 100
61
 
62
- # Pertains to Express-inator functionality
63
- NUM_WORDS_DEFAULT = 0
64
- MAX_WORDS = 400
65
- FORMALITY_DEFAULT = "N/A"
66
- TEMPERATURE_DEFAULT = 0.5
67
- EMOTION_DEFAULT = "N/A"
68
- LANG_LEVEL_DEFAULT = "University"
69
- TRANSLATE_TO_DEFAULT = "Chinese (Mandarin)"
70
- LITERARY_STYLE_DEFAULT = "N/A"
71
- PROMPT_TEMPLATE = PromptTemplate(
72
- input_variables=["original_words", "num_words", "formality", "emotions", "lang_level", "translate_to",
73
- "literary_style"],
74
- template="Restate {num_words}{formality}{emotions}{lang_level}{translate_to}{literary_style}the following: \n{original_words}\n",
75
- )
76
-
77
- FORCE_TRANSLATE_DEFAULT = True
78
- USE_GPT4_DEFAULT = False
79
 
80
- POLLY_VOICE_DATA = PollyVoiceData()
81
- AZURE_VOICE_DATA = AzureVoiceData()
82
 
83
- # Pertains to WHISPER functionality
84
- WHISPER_DETECT_LANG = "Chinese (Mandarin)"
85
 
86
- # UNCOMMENT TO USE WHISPER
87
- warnings.filterwarnings("ignore")
88
- WHISPER_MODEL = whisper.load_model("tiny")
89
- print("WHISPER_MODEL", WHISPER_MODEL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
 
 
 
 
 
 
91
 
92
  # UNCOMMENT TO USE WHISPER
93
  def transcribe(aud_inp, whisper_lang):
@@ -108,361 +78,19 @@ def transcribe(aud_inp, whisper_lang):
108
  result_text = result.text
109
  return result_text
110
 
 
 
 
 
111
 
112
- # Temporarily address Wolfram Alpha SSL certificate issue
113
- ssl._create_default_https_context = ssl._create_unverified_context
114
-
115
-
116
- # TEMPORARY FOR TESTING
117
- def transcribe_dummy(aud_inp_tb, whisper_lang):
118
- if aud_inp_tb is None:
119
- return ""
120
- # aud = whisper.load_audio(aud_inp)
121
- # aud = whisper.pad_or_trim(aud)
122
- # mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
123
- # _, probs = WHISPER_MODEL.detect_language(mel)
124
- # options = whisper.DecodingOptions()
125
- # options = whisper.DecodingOptions(language="ja")
126
- # result = whisper.decode(WHISPER_MODEL, mel, options)
127
- result_text = "Whisper will detect language"
128
- if whisper_lang != WHISPER_DETECT_LANG:
129
- whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang)
130
- result_text = f"Whisper will use lang code: {whisper_lang_code}"
131
- print("result_text", result_text)
132
- return aud_inp_tb
133
-
134
-
135
- # Pertains to Express-inator functionality
136
- def transform_text(desc, express_chain, num_words, formality,
137
- anticipation_level, joy_level, trust_level,
138
- fear_level, surprise_level, sadness_level, disgust_level, anger_level,
139
- lang_level, translate_to, literary_style, force_translate):
140
- num_words_prompt = ""
141
- if num_words and int(num_words) != 0:
142
- num_words_prompt = "using up to " + str(num_words) + " words, "
143
-
144
- # Change some arguments to lower case
145
- formality = formality.lower()
146
- anticipation_level = anticipation_level.lower()
147
- joy_level = joy_level.lower()
148
- trust_level = trust_level.lower()
149
- fear_level = fear_level.lower()
150
- surprise_level = surprise_level.lower()
151
- sadness_level = sadness_level.lower()
152
- disgust_level = disgust_level.lower()
153
- anger_level = anger_level.lower()
154
-
155
- formality_str = ""
156
- if formality != "n/a":
157
- formality_str = "in a " + formality + " manner, "
158
-
159
- # put all emotions into a list
160
- emotions = []
161
- if anticipation_level != "n/a":
162
- emotions.append(anticipation_level)
163
- if joy_level != "n/a":
164
- emotions.append(joy_level)
165
- if trust_level != "n/a":
166
- emotions.append(trust_level)
167
- if fear_level != "n/a":
168
- emotions.append(fear_level)
169
- if surprise_level != "n/a":
170
- emotions.append(surprise_level)
171
- if sadness_level != "n/a":
172
- emotions.append(sadness_level)
173
- if disgust_level != "n/a":
174
- emotions.append(disgust_level)
175
- if anger_level != "n/a":
176
- emotions.append(anger_level)
177
-
178
- emotions_str = ""
179
- if len(emotions) > 0:
180
- if len(emotions) == 1:
181
- emotions_str = "with emotion of " + emotions[0] + ", "
182
- else:
183
- emotions_str = "with emotions of " + ", ".join(emotions[:-1]) + " and " + emotions[-1] + ", "
184
-
185
- lang_level_str = ""
186
- if lang_level != LANG_LEVEL_DEFAULT:
187
- lang_level_str = "at a level that a person in " + lang_level + " can easily comprehend, " if translate_to == TRANSLATE_TO_DEFAULT else ""
188
-
189
- translate_to_str = ""
190
- if translate_to != TRANSLATE_TO_DEFAULT and (force_translate or lang_level != LANG_LEVEL_DEFAULT):
191
- translate_to_str = "translated to " + translate_to + (
192
- "" if lang_level == LANG_LEVEL_DEFAULT else " at a level that a person in " + lang_level + " can easily comprehend") + ", "
193
-
194
- literary_style_str = ""
195
- if literary_style != LITERARY_STYLE_DEFAULT:
196
- if literary_style == "Prose":
197
- literary_style_str = "as prose, "
198
- if literary_style == "Story":
199
- literary_style_str = "as a story, "
200
- elif literary_style == "Summary":
201
- literary_style_str = "as a summary, "
202
- elif literary_style == "Outline":
203
- literary_style_str = "as an outline numbers and lower case letters, "
204
- elif literary_style == "Bullets":
205
- literary_style_str = "as bullet points using bullets, "
206
- elif literary_style == "Poetry":
207
- literary_style_str = "as a poem, "
208
- elif literary_style == "Haiku":
209
- literary_style_str = "as a haiku, "
210
- elif literary_style == "Limerick":
211
- literary_style_str = "as a limerick, "
212
- elif literary_style == "Rap":
213
- literary_style_str = "as a rap, "
214
- elif literary_style == "Joke":
215
- literary_style_str = "as a very funny joke with a setup and punchline, "
216
- elif literary_style == "Knock-knock":
217
- literary_style_str = "as a very funny knock-knock joke, "
218
- elif literary_style == "FAQ":
219
- literary_style_str = "as a FAQ with several questions and answers, "
220
-
221
- formatted_prompt = PROMPT_TEMPLATE.format(
222
- original_words=desc,
223
- num_words=num_words_prompt,
224
- formality=formality_str,
225
- emotions=emotions_str,
226
- lang_level=lang_level_str,
227
- translate_to=translate_to_str,
228
- literary_style=literary_style_str
229
- )
230
-
231
- trans_instr = num_words_prompt + formality_str + emotions_str + lang_level_str + translate_to_str + literary_style_str
232
- if express_chain and len(trans_instr.strip()) > 0:
233
- generated_text = express_chain.run(
234
- {'original_words': desc, 'num_words': num_words_prompt, 'formality': formality_str,
235
- 'emotions': emotions_str, 'lang_level': lang_level_str, 'translate_to': translate_to_str,
236
- 'literary_style': literary_style_str}).strip()
237
- else:
238
- print("Not transforming text")
239
- generated_text = desc
240
-
241
- # replace all newlines with <br> in generated_text
242
- generated_text = generated_text.replace("\n", "\n\n")
243
-
244
- prompt_plus_generated = "GPT prompt: " + formatted_prompt + "\n\n" + generated_text
245
-
246
- print("\n==== date/time: " + str(datetime.datetime.now() - datetime.timedelta(hours=5)) + " ====")
247
- print("prompt_plus_generated: " + prompt_plus_generated)
248
-
249
- return generated_text
250
-
251
-
252
- def load_chain(tools_list, llm):
253
- chain = None
254
- express_chain = None
255
- memory = None
256
- if llm:
257
- print("\ntools_list", tools_list)
258
- tool_names = tools_list
259
- tools = load_tools(tool_names, llm=llm, news_api_key=news_api_key, tmdb_bearer_token=tmdb_bearer_token)
260
-
261
- memory = ConversationBufferMemory(memory_key="chat_history")
262
-
263
- chain = initialize_agent(tools, llm, agent="conversational-react-description", verbose=True, memory=memory)
264
- express_chain = LLMChain(llm=llm, prompt=PROMPT_TEMPLATE, verbose=True)
265
- return chain, express_chain, memory
266
-
267
-
268
- def set_openai_api_key(api_key, use_gpt4):
269
- """Set the api key and return chain.
270
- If no api_key, then None is returned.
271
- """
272
- if api_key and api_key.startswith("sk-") and len(api_key) > 50:
273
- os.environ["OPENAI_API_KEY"] = api_key
274
- print("\n\n ++++++++++++++ Setting OpenAI API key ++++++++++++++ \n\n")
275
- print(str(datetime.datetime.now()) + ": Before OpenAI, OPENAI_API_KEY length: " + str(
276
- len(os.environ["OPENAI_API_KEY"])))
277
-
278
- if use_gpt4:
279
- llm = OpenAIChat(temperature=0, max_tokens=MAX_TOKENS, model_name="gpt-4")
280
- print("Trying to use llm OpenAIChat with gpt-4")
281
- else:
282
- print("Trying to use llm OpenAI with text-davinci-003")
283
- llm = OpenAI(temperature=0, max_tokens=MAX_TOKENS, model_name="text-davinci-003")
284
-
285
- print(str(datetime.datetime.now()) + ": After OpenAI, OPENAI_API_KEY length: " + str(
286
- len(os.environ["OPENAI_API_KEY"])))
287
- chain, express_chain, memory = load_chain(TOOLS_DEFAULT_LIST, llm)
288
-
289
- # Pertains to question answering functionality
290
- embeddings = OpenAIEmbeddings()
291
-
292
- if use_gpt4:
293
- qa_chain = load_qa_chain(OpenAIChat(temperature=0, model_name="gpt-4"), chain_type="stuff")
294
- print("Trying to use qa_chain OpenAIChat with gpt-4")
295
- else:
296
- print("Trying to use qa_chain OpenAI with text-davinci-003")
297
- qa_chain = OpenAI(temperature=0, max_tokens=MAX_TOKENS, model_name="text-davinci-003")
298
-
299
- print(str(datetime.datetime.now()) + ": After load_chain, OPENAI_API_KEY length: " + str(
300
- len(os.environ["OPENAI_API_KEY"])))
301
- os.environ["OPENAI_API_KEY"] = ""
302
- return chain, express_chain, llm, embeddings, qa_chain, memory, use_gpt4
303
- return None, None, None, None, None, None, None
304
-
305
-
306
- def run_chain(chain, inp, capture_hidden_text):
307
- output = ""
308
- hidden_text = None
309
- if capture_hidden_text:
310
- error_msg = None
311
- tmp = sys.stdout
312
- hidden_text_io = StringIO()
313
- sys.stdout = hidden_text_io
314
-
315
- try:
316
- output = chain.run(input=inp)
317
- except AuthenticationError as ae:
318
- error_msg = AUTH_ERR_MSG + str(datetime.datetime.now()) + ". " + str(ae)
319
- print("error_msg", error_msg)
320
- except RateLimitError as rle:
321
- error_msg = "\n\nRateLimitError: " + str(rle)
322
- except ValueError as ve:
323
- error_msg = "\n\nValueError: " + str(ve)
324
- except InvalidRequestError as ire:
325
- error_msg = "\n\nInvalidRequestError: " + str(ire)
326
- except Exception as e:
327
- error_msg = "\n\n" + BUG_FOUND_MSG + ":\n\n" + str(e)
328
-
329
- sys.stdout = tmp
330
- hidden_text = hidden_text_io.getvalue()
331
-
332
- # remove escape characters from hidden_text
333
- hidden_text = re.sub(r'\x1b[^m]*m', '', hidden_text)
334
-
335
- # remove "Entering new AgentExecutor chain..." from hidden_text
336
- hidden_text = re.sub(r"Entering new AgentExecutor chain...\n", "", hidden_text)
337
-
338
- # remove "Finished chain." from hidden_text
339
- hidden_text = re.sub(r"Finished chain.", "", hidden_text)
340
-
341
- # Add newline after "Thought:" "Action:" "Observation:" "Input:" and "AI:"
342
- hidden_text = re.sub(r"Thought:", "\n\nThought:", hidden_text)
343
- hidden_text = re.sub(r"Action:", "\n\nAction:", hidden_text)
344
- hidden_text = re.sub(r"Observation:", "\n\nObservation:", hidden_text)
345
- hidden_text = re.sub(r"Input:", "\n\nInput:", hidden_text)
346
- hidden_text = re.sub(r"AI:", "\n\nAI:", hidden_text)
347
-
348
- if error_msg:
349
- hidden_text += error_msg
350
-
351
- print("hidden_text: ", hidden_text)
352
- else:
353
- try:
354
- output = chain.run(input=inp)
355
- except AuthenticationError as ae:
356
- output = AUTH_ERR_MSG + str(datetime.datetime.now()) + ". " + str(ae)
357
- print("output", output)
358
- except RateLimitError as rle:
359
- output = "\n\nRateLimitError: " + str(rle)
360
- except ValueError as ve:
361
- output = "\n\nValueError: " + str(ve)
362
- except InvalidRequestError as ire:
363
- output = "\n\nInvalidRequestError: " + str(ire)
364
- except Exception as e:
365
- output = "\n\n" + BUG_FOUND_MSG + ":\n\n" + str(e)
366
-
367
- return output, hidden_text
368
-
369
-
370
- def reset_memory(history, memory):
371
- # memory.clear()
372
- history = []
373
- return history, history, memory
374
-
375
-
376
- class ChatWrapper:
377
-
378
- def __init__(self):
379
- self.lock = Lock()
380
-
381
- def __call__(
382
- self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain: Optional[ConversationChain],
383
- trace_chain: bool, speak_text: bool, talking_head: bool, monologue: bool, express_chain: Optional[LLMChain],
384
- num_words, formality, anticipation_level, joy_level, trust_level,
385
- fear_level, surprise_level, sadness_level, disgust_level, anger_level,
386
- lang_level, translate_to, literary_style, qa_chain, docsearch, use_embeddings, force_translate
387
- ):
388
- """Execute the chat functionality."""
389
- self.lock.acquire()
390
- try:
391
- print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
392
- print("inp: " + inp)
393
- print("trace_chain: ", trace_chain)
394
- print("speak_text: ", speak_text)
395
- print("talking_head: ", talking_head)
396
- print("monologue: ", monologue)
397
- history = history or []
398
- # If chain is None, that is because no API key was provided.
399
- output = "Please paste your OpenAI key from openai.com to use this app. " + str(datetime.datetime.now())
400
- hidden_text = output
401
-
402
- if chain:
403
- # Set OpenAI key
404
- import openai
405
- openai.api_key = api_key
406
- if not monologue:
407
- if use_embeddings:
408
- if inp and inp.strip() != "":
409
- if docsearch:
410
- docs = docsearch.similarity_search(inp)
411
- output = str(qa_chain.run(input_documents=docs, question=inp))
412
- else:
413
- output, hidden_text = "Please supply some text in the the Embeddings tab.", None
414
- else:
415
- output, hidden_text = "What's on your mind?", None
416
- else:
417
- output, hidden_text = run_chain(chain, inp, capture_hidden_text=trace_chain)
418
- else:
419
- output, hidden_text = inp, None
420
-
421
- output = transform_text(output, express_chain, num_words, formality, anticipation_level, joy_level,
422
- trust_level,
423
- fear_level, surprise_level, sadness_level, disgust_level, anger_level,
424
- lang_level, translate_to, literary_style, force_translate)
425
-
426
- text_to_display = output
427
- if trace_chain:
428
- text_to_display = hidden_text + "\n\n" + output
429
- history.append((inp, text_to_display))
430
-
431
- html_video, temp_file, html_audio, temp_aud_file = None, None, None, None
432
- if speak_text:
433
- if talking_head:
434
- if len(output) <= MAX_TALKING_HEAD_TEXT_LENGTH:
435
- # html_video, temp_file = do_html_video_speak(output, translate_to)
436
- html_audio, temp_aud_file = do_html_audio_speak_azure(output, translate_to)
437
- html_video, temp_file = do_html_video_speak_sad_talker(temp_aud_file, translate_to)
438
- else:
439
- temp_file = LOOPING_TALKING_HEAD
440
- html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
441
- html_audio, temp_aud_file = do_html_audio_speak_azure(output, translate_to)
442
- else:
443
- temp_file = LOOPING_TALKING_HEAD
444
- html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
445
- html_audio, temp_aud_file = do_html_audio_speak_azure(output, translate_to)
446
- else:
447
- if talking_head:
448
- temp_file = LOOPING_TALKING_HEAD
449
- html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
450
- else:
451
- # html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
452
- # html_video = create_html_video(temp_file, "128")
453
- pass
454
-
455
- except Exception as e:
456
- raise e
457
- finally:
458
- self.lock.release()
459
- return history, history, html_video, temp_file, html_audio, temp_aud_file, ""
460
- # return history, history, html_audio, temp_aud_file, ""
461
-
462
 
463
- chat = ChatWrapper()
464
 
465
- def do_html_audio_speak_azure(words_to_speak, axure_language):
466
 
467
  html_audio = '<pre>no audio</pre>'
468
 
@@ -505,66 +133,7 @@ def do_html_audio_speak_azure(words_to_speak, axure_language):
505
  return html_audio, "audios/tempfile.mp3"
506
 
507
 
508
- def do_html_audio_speak(words_to_speak, polly_language):
509
- polly_client = boto3.Session(
510
- aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
511
- aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
512
- region_name=os.environ["AWS_DEFAULT_REGION"]
513
- ).client('polly')
514
-
515
- # voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(polly_language, "Female")
516
- voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(polly_language, "Male")
517
- if not voice_id:
518
- # voice_id = "Joanna"
519
- voice_id = "Matthew"
520
- language_code = "en-US"
521
- engine = NEURAL_ENGINE
522
- response = polly_client.synthesize_speech(
523
- Text=words_to_speak,
524
- OutputFormat='mp3',
525
- VoiceId=voice_id,
526
- LanguageCode=language_code,
527
- Engine=engine
528
- )
529
-
530
- html_audio = '<pre>no audio</pre>'
531
-
532
- # Save the audio stream returned by Amazon Polly on Lambda's temp directory
533
- if "AudioStream" in response:
534
- with closing(response["AudioStream"]) as stream:
535
- # output = os.path.join("/tmp/", "speech.mp3")
536
-
537
- try:
538
- with open('audios/tempfile.mp3', 'wb') as f:
539
- f.write(stream.read())
540
- temp_aud_file = gr.File("audios/tempfile.mp3")
541
- temp_aud_file_url = "/file=" + temp_aud_file.value['name']
542
- html_audio = f'<audio autoplay><source src={temp_aud_file_url} type="audio/mp3"></audio>'
543
- except IOError as error:
544
- # Could not write to file, exit gracefully
545
- print(error)
546
- return None, None
547
- else:
548
- # The response didn't contain audio data, exit gracefully
549
- print("Could not stream audio")
550
- return None, None
551
-
552
- return html_audio, "audios/tempfile.mp3"
553
-
554
-
555
- def create_html_video(file_name, width):
556
- temp_file_url = "/file=" + tmp_file.value['name']
557
- html_video = f'<video width={width} height={width} autoplay muted loop><source src={temp_file_url} type="video/mp4" poster="Michelle.png"></video>'
558
- return html_video
559
-
560
- def ToBase64(file):
561
- with open(file, 'rb') as fileObj:
562
- image_data = fileObj.read()
563
- base64_data = base64.b64encode(image_data)
564
- return base64_data.decode()
565
-
566
-
567
- def do_html_video_speak_sad_talker(temp_aud_file, azure_language):
568
 
569
  GRADIO_URL=os.environ["GRADIO_URL"]
570
 
@@ -600,130 +169,130 @@ def do_html_video_speak_sad_talker(temp_aud_file, azure_language):
600
  return html_video, "videos/tempfile.mp4"
601
 
602
 
603
- def do_html_video_speak(words_to_speak, azure_language):
604
- azure_voice = AZURE_VOICE_DATA.get_voice(azure_language, "Male")
605
- if not azure_voice:
606
- azure_voice = "en-US-ChristopherNeural"
607
 
608
- headers = {"Authorization": f"Bearer {os.environ['EXHUMAN_API_KEY']}"}
609
- body = {
610
- 'bot_name': 'Michelle',
611
- 'bot_response': words_to_speak,
612
- 'azure_voice': azure_voice,
613
- 'azure_style': 'friendly',
614
- 'animation_pipeline': 'high_speed',
615
- }
616
- api_endpoint = "https://api.exh.ai/animations/v1/generate_lipsync"
617
- res = requests.post(api_endpoint, json=body, headers=headers)
618
- print("res.status_code: ", res.status_code)
619
 
620
- html_video = '<pre>no video</pre>'
621
- if isinstance(res.content, bytes):
622
- response_stream = io.BytesIO(res.content)
623
- print("len(res.content)): ", len(res.content))
624
-
625
- with open('videos/tempfile.mp4', 'wb') as f:
626
- f.write(response_stream.read())
627
- temp_file = gr.File("videos/tempfile.mp4")
628
- temp_file_url = "/file=" + temp_file.value['name']
629
- html_video = f'<video width={TALKING_HEAD_WIDTH} height={TALKING_HEAD_WIDTH} autoplay><source src={temp_file_url} type="video/mp4" poster="Michelle.png"></video>'
630
- else:
631
- print('video url unknown')
632
- return html_video, "videos/tempfile.mp4"
633
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
634
 
635
- def update_selected_tools(widget, state, llm):
636
- if widget:
637
- state = widget
638
- chain, express_chain, memory = load_chain(state, llm)
639
- return state, llm, chain, express_chain
640
-
641
-
642
- def update_talking_head(widget, state):
643
- if widget:
644
- state = widget
645
-
646
- video_html_talking_head = create_html_video(LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH)
647
- return state, video_html_talking_head
648
- else:
649
- # return state, create_html_video(LOOPING_TALKING_HEAD, "32")
650
- return None, "<pre></pre>"
651
-
652
-
653
- def update_foo(widget, state):
654
- if widget:
655
- state = widget
656
- return state
657
-
658
-
659
- # Pertains to question answering functionality
660
- def update_embeddings(embeddings_text, embeddings, qa_chain):
661
- if embeddings_text:
662
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
663
- texts = text_splitter.split_text(embeddings_text)
664
-
665
- docsearch = FAISS.from_texts(texts, embeddings)
666
- print("Embeddings updated")
667
- return docsearch
668
-
669
-
670
- # Pertains to question answering functionality
671
- def update_use_embeddings(widget, state):
672
- if widget:
673
- state = widget
674
- return state
675
-
676
-
677
- with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
678
- llm_state = gr.State()
679
- history_state = gr.State()
680
- chain_state = gr.State()
681
- express_chain_state = gr.State()
682
- tools_list_state = gr.State(TOOLS_DEFAULT_LIST)
683
- trace_chain_state = gr.State(False)
684
- speak_text_state = gr.State(True)
685
- talking_head_state = gr.State(True)
686
- monologue_state = gr.State(False) # Takes the input and repeats it back to the user, optionally transforming it.
687
- force_translate_state = gr.State(FORCE_TRANSLATE_DEFAULT) #
688
- memory_state = gr.State()
689
-
690
- # Pertains to Express-inator functionality
691
- num_words_state = gr.State(NUM_WORDS_DEFAULT)
692
- formality_state = gr.State(FORMALITY_DEFAULT)
693
- anticipation_level_state = gr.State(EMOTION_DEFAULT)
694
- joy_level_state = gr.State(EMOTION_DEFAULT)
695
- trust_level_state = gr.State(EMOTION_DEFAULT)
696
- fear_level_state = gr.State(EMOTION_DEFAULT)
697
- surprise_level_state = gr.State(EMOTION_DEFAULT)
698
- sadness_level_state = gr.State(EMOTION_DEFAULT)
699
- disgust_level_state = gr.State(EMOTION_DEFAULT)
700
- anger_level_state = gr.State(EMOTION_DEFAULT)
701
- lang_level_state = gr.State(LANG_LEVEL_DEFAULT)
702
- translate_to_state = gr.State(TRANSLATE_TO_DEFAULT)
703
- literary_style_state = gr.State(LITERARY_STYLE_DEFAULT)
704
-
705
- # Pertains to WHISPER functionality
706
- whisper_lang_state = gr.State(WHISPER_DETECT_LANG)
707
-
708
- # Pertains to question answering functionality
709
- embeddings_state = gr.State()
710
- qa_chain_state = gr.State()
711
- docsearch_state = gr.State()
712
- use_embeddings_state = gr.State(False)
713
-
714
- use_gpt4_state = gr.State(USE_GPT4_DEFAULT)
715
-
716
- with gr.Tab("Chat"):
717
- with gr.Row():
718
- with gr.Column():
719
- gr.HTML(
720
- """<b><center>GPT + CHAT</center></b>
721
- <p><center>Hit Enter after pasting your OpenAI API key.</center></p>
722
 
723
- """)
724
 
725
- openai_api_key_textbox = gr.Textbox(placeholder="Paste your OpenAI API key (sk-...) and hit Enter",
726
- show_label=False, lines=1, type='password')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
727
 
728
  with gr.Row():
729
  with gr.Column(scale=1, min_width=TALKING_HEAD_WIDTH, visible=True):
@@ -742,267 +311,48 @@ with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block:
742
  tmp_aud_file_url = "/file=" + tmp_aud_file.value['name']
743
  htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
744
  audio_html = gr.HTML(htm_audio)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
745
 
746
- with gr.Column(scale=7):
747
- chatbot = gr.Chatbot()
748
 
749
- with gr.Row():
750
- message = gr.Textbox(label="What's on your mind??",
751
- placeholder="What's the answer to life, the universe, and everything?",
752
- lines=1)
753
- submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
754
 
755
- # UNCOMMENT TO USE WHISPER
756
- with gr.Row():
757
- audio_comp = gr.Microphone(source="microphone", type="filepath", label="Just say it!",
758
- interactive=True, streaming=False)
759
- audio_comp.change(transcribe, inputs=[audio_comp, whisper_lang_state], outputs=[message])
760
-
761
- # TEMPORARY FOR TESTING
762
- # with gr.Row():
763
- # audio_comp_tb = gr.Textbox(label="Just say it!", lines=1)
764
- # audio_comp_tb.submit(transcribe_dummy, inputs=[audio_comp_tb, whisper_lang_state], outputs=[message])
765
-
766
- gr.Examples(
767
- examples=["How many people live in Canada?",
768
- "What is 2 to the 30th power?",
769
- "If x+y=10 and x-y=4, what are x and y?",
770
- "How much did it rain in SF today?",
771
- "Get me information about the movie 'Avatar'",
772
- "What are the top tech headlines in the US?",
773
- "On the desk, you see two blue booklets, two purple booklets, and two yellow pairs of sunglasses - "
774
- "if I remove all the pairs of sunglasses from the desk, how many purple items remain on it?"],
775
- inputs=message
776
- )
777
-
778
- # with gr.Tab("Settings"):
779
- # tools_cb_group = gr.CheckboxGroup(label="Tools:", choices=TOOLS_LIST,
780
- # value=TOOLS_DEFAULT_LIST)
781
- # tools_cb_group.change(update_selected_tools,
782
- # inputs=[tools_cb_group, tools_list_state, llm_state],
783
- # outputs=[tools_list_state, llm_state, chain_state, express_chain_state])
784
-
785
- # trace_chain_cb = gr.Checkbox(label="Show reasoning chain in chat bubble", value=False)
786
- # trace_chain_cb.change(update_foo, inputs=[trace_chain_cb, trace_chain_state],
787
- # outputs=[trace_chain_state])
788
-
789
- # force_translate_cb = gr.Checkbox(label="Force translation to selected Output Language",
790
- # value=FORCE_TRANSLATE_DEFAULT)
791
- # force_translate_cb.change(update_foo, inputs=[force_translate_cb, force_translate_state],
792
- # outputs=[force_translate_state])
793
-
794
- # # speak_text_cb = gr.Checkbox(label="Speak text from agent", value=False)
795
- # # speak_text_cb.change(update_foo, inputs=[speak_text_cb, speak_text_state],
796
- # # outputs=[speak_text_state])
797
-
798
- # talking_head_cb = gr.Checkbox(label="Show talking head", value=True)
799
- # talking_head_cb.change(update_talking_head, inputs=[talking_head_cb, talking_head_state],
800
- # outputs=[talking_head_state, video_html])
801
-
802
- # monologue_cb = gr.Checkbox(label="Babel fish mode (translate/restate what you enter, no conversational agent)",
803
- # value=False)
804
- # monologue_cb.change(update_foo, inputs=[monologue_cb, monologue_state],
805
- # outputs=[monologue_state])
806
-
807
- # use_gpt4_cb = gr.Checkbox(label="Use GPT-4 (experimental) if your OpenAI API has access to it",
808
- # value=USE_GPT4_DEFAULT)
809
- # use_gpt4_cb.change(set_openai_api_key,
810
- # inputs=[openai_api_key_textbox, use_gpt4_cb],
811
- # outputs=[chain_state, express_chain_state, llm_state, embeddings_state,
812
- # qa_chain_state, memory_state, use_gpt4_state])
813
-
814
- # reset_btn = gr.Button(value="Reset chat", variant="secondary").style(full_width=False)
815
- # reset_btn.click(reset_memory, inputs=[history_state, memory_state],
816
- # outputs=[chatbot, history_state, memory_state])
817
-
818
- # with gr.Tab("Whisper STT"):
819
- # whisper_lang_radio = gr.Radio(label="Whisper speech-to-text language:", choices=[
820
- # WHISPER_DETECT_LANG, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
821
- # "Danish", "Dutch", "English (Australian)", "English (British)", "English (Indian)", "English (New Zealand)",
822
- # "English (South African)", "English (US)", "English (Welsh)", "Finnish", "French", "French (Canadian)",
823
- # "German", "German (Austrian)", "Georgian", "Hindi", "Icelandic", "Indonesian", "Italian", "Japanese",
824
- # "Korean", "Norwegian", "Polish",
825
- # "Portuguese (Brazilian)", "Portuguese (European)", "Romanian", "Russian", "Spanish (European)",
826
- # "Spanish (Mexican)", "Spanish (US)", "Swedish", "Turkish", "Ukrainian", "Welsh"],
827
- # value=WHISPER_DETECT_LANG)
828
-
829
- # whisper_lang_radio.change(update_foo,
830
- # inputs=[whisper_lang_radio, whisper_lang_state],
831
- # outputs=[whisper_lang_state])
832
-
833
- # with gr.Tab("Output Language"):
834
- # lang_level_radio = gr.Radio(label="Language level:", choices=[
835
- # LANG_LEVEL_DEFAULT, "1st grade", "2nd grade", "3rd grade", "4th grade", "5th grade", "6th grade",
836
- # "7th grade", "8th grade", "9th grade", "10th grade", "11th grade", "12th grade", "University"],
837
- # value=LANG_LEVEL_DEFAULT)
838
- # lang_level_radio.change(update_foo, inputs=[lang_level_radio, lang_level_state],
839
- # outputs=[lang_level_state])
840
-
841
- # translate_to_radio = gr.Radio(label="Language:", choices=[
842
- # TRANSLATE_TO_DEFAULT, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)",
843
- # "Danish", "Dutch", "English (Australian)", "English (British)", "English (Indian)", "English (New Zealand)",
844
- # "English (South African)", "English (US)", "English (Welsh)", "Finnish", "French", "French (Canadian)",
845
- # "German", "German (Austrian)", "Georgian", "Hindi", "Icelandic", "Indonesian", "Italian", "Japanese",
846
- # "Korean", "Norwegian", "Polish",
847
- # "Portuguese (Brazilian)", "Portuguese (European)", "Romanian", "Russian", "Spanish (European)",
848
- # "Spanish (Mexican)", "Spanish (US)", "Swedish", "Turkish", "Ukrainian", "Welsh",
849
- # "emojis", "Gen Z slang", "how the stereotypical Karen would say it", "Klingon", "Neanderthal",
850
- # "Pirate", "Strange Planet expospeak technical talk", "Yoda"],
851
- # value=TRANSLATE_TO_DEFAULT)
852
-
853
- # translate_to_radio.change(update_foo,
854
- # inputs=[translate_to_radio, translate_to_state],
855
- # outputs=[translate_to_state])
856
-
857
- # with gr.Tab("Formality"):
858
- # formality_radio = gr.Radio(label="Formality:",
859
- # choices=[FORMALITY_DEFAULT, "Casual", "Polite", "Honorific"],
860
- # value=FORMALITY_DEFAULT)
861
- # formality_radio.change(update_foo,
862
- # inputs=[formality_radio, formality_state],
863
- # outputs=[formality_state])
864
-
865
- # with gr.Tab("Lit Style"):
866
- # literary_style_radio = gr.Radio(label="Literary style:", choices=[
867
- # LITERARY_STYLE_DEFAULT, "Prose", "Story", "Summary", "Outline", "Bullets", "Poetry", "Haiku", "Limerick",
868
- # "Rap",
869
- # "Joke", "Knock-knock", "FAQ"],
870
- # value=LITERARY_STYLE_DEFAULT)
871
-
872
- # literary_style_radio.change(update_foo,
873
- # inputs=[literary_style_radio, literary_style_state],
874
- # outputs=[literary_style_state])
875
-
876
- # with gr.Tab("Emotions"):
877
- # anticipation_level_radio = gr.Radio(label="Anticipation level:",
878
- # choices=[EMOTION_DEFAULT, "Interest", "Anticipation", "Vigilance"],
879
- # value=EMOTION_DEFAULT)
880
- # anticipation_level_radio.change(update_foo,
881
- # inputs=[anticipation_level_radio, anticipation_level_state],
882
- # outputs=[anticipation_level_state])
883
-
884
- # joy_level_radio = gr.Radio(label="Joy level:",
885
- # choices=[EMOTION_DEFAULT, "Serenity", "Joy", "Ecstasy"],
886
- # value=EMOTION_DEFAULT)
887
- # joy_level_radio.change(update_foo,
888
- # inputs=[joy_level_radio, joy_level_state],
889
- # outputs=[joy_level_state])
890
-
891
- # trust_level_radio = gr.Radio(label="Trust level:",
892
- # choices=[EMOTION_DEFAULT, "Acceptance", "Trust", "Admiration"],
893
- # value=EMOTION_DEFAULT)
894
- # trust_level_radio.change(update_foo,
895
- # inputs=[trust_level_radio, trust_level_state],
896
- # outputs=[trust_level_state])
897
-
898
- # fear_level_radio = gr.Radio(label="Fear level:",
899
- # choices=[EMOTION_DEFAULT, "Apprehension", "Fear", "Terror"],
900
- # value=EMOTION_DEFAULT)
901
- # fear_level_radio.change(update_foo,
902
- # inputs=[fear_level_radio, fear_level_state],
903
- # outputs=[fear_level_state])
904
-
905
- # surprise_level_radio = gr.Radio(label="Surprise level:",
906
- # choices=[EMOTION_DEFAULT, "Distraction", "Surprise", "Amazement"],
907
- # value=EMOTION_DEFAULT)
908
- # surprise_level_radio.change(update_foo,
909
- # inputs=[surprise_level_radio, surprise_level_state],
910
- # outputs=[surprise_level_state])
911
-
912
- # sadness_level_radio = gr.Radio(label="Sadness level:",
913
- # choices=[EMOTION_DEFAULT, "Pensiveness", "Sadness", "Grief"],
914
- # value=EMOTION_DEFAULT)
915
- # sadness_level_radio.change(update_foo,
916
- # inputs=[sadness_level_radio, sadness_level_state],
917
- # outputs=[sadness_level_state])
918
-
919
- # disgust_level_radio = gr.Radio(label="Disgust level:",
920
- # choices=[EMOTION_DEFAULT, "Boredom", "Disgust", "Loathing"],
921
- # value=EMOTION_DEFAULT)
922
- # disgust_level_radio.change(update_foo,
923
- # inputs=[disgust_level_radio, disgust_level_state],
924
- # outputs=[disgust_level_state])
925
-
926
- # anger_level_radio = gr.Radio(label="Anger level:",
927
- # choices=[EMOTION_DEFAULT, "Annoyance", "Anger", "Rage"],
928
- # value=EMOTION_DEFAULT)
929
- # anger_level_radio.change(update_foo,
930
- # inputs=[anger_level_radio, anger_level_state],
931
- # outputs=[anger_level_state])
932
-
933
- # with gr.Tab("Max Words"):
934
- # num_words_slider = gr.Slider(label="Max number of words to generate (0 for don't care)",
935
- # value=NUM_WORDS_DEFAULT, minimum=0, maximum=MAX_WORDS, step=10)
936
- # num_words_slider.change(update_foo,
937
- # inputs=[num_words_slider, num_words_state],
938
- # outputs=[num_words_state])
939
-
940
- # with gr.Tab("Embeddings"):
941
- # embeddings_text_box = gr.Textbox(label="Enter text for embeddings and hit Create:",
942
- # lines=20)
943
-
944
- # with gr.Row():
945
- # use_embeddings_cb = gr.Checkbox(label="Use embeddings", value=False)
946
- # use_embeddings_cb.change(update_use_embeddings, inputs=[use_embeddings_cb, use_embeddings_state],
947
- # outputs=[use_embeddings_state])
948
-
949
- # embeddings_text_submit = gr.Button(value="Create", variant="secondary").style(full_width=False)
950
- # embeddings_text_submit.click(update_embeddings,
951
- # inputs=[embeddings_text_box, embeddings_state, qa_chain_state],
952
- # outputs=[docsearch_state])
953
-
954
- # gr.HTML("""
955
- # <p>This application, developed by <a href='https://www.linkedin.com/in/javafxpert/'>James L. Weaver</a>,
956
- # demonstrates a conversational agent implemented with OpenAI GPT-3.5 and LangChain.
957
- # When necessary, it leverages tools for complex math, searching the internet, and accessing news and weather.
958
- # Uses talking heads from <a href='https://exh.ai/'>Ex-Human</a>.
959
- # For faster inference without waiting in queue, you may duplicate the space.
960
- # </p>""")
961
-
962
- # gr.HTML("""
963
- # <form action="https://www.paypal.com/donate" method="post" target="_blank">
964
- # <input type="hidden" name="business" value="AK8BVNALBXSPQ" />
965
- # <input type="hidden" name="no_recurring" value="0" />
966
- # <input type="hidden" name="item_name" value="Please consider helping to defray the cost of APIs such as SerpAPI and WolframAlpha that this app uses." />
967
- # <input type="hidden" name="currency_code" value="USD" />
968
- # <input type="image" src="https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif" border="0" name="submit" title="PayPal - The safer, easier way to pay online!" alt="Donate with PayPal button" />
969
- # <img alt="" border="0" src="https://www.paypal.com/en_US/i/scr/pixel.gif" width="1" height="1" />
970
- # </form>
971
- # """)
972
-
973
- # gr.HTML("""<center>
974
- # <a href="https://huggingface.co/spaces/JavaFXpert/Chat-GPT-LangChain?duplicate=true">
975
- # <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
976
- # Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a>
977
- # </center>""")
978
-
979
- message.submit(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
980
- speak_text_state, talking_head_state, monologue_state,
981
- express_chain_state, num_words_state, formality_state,
982
- anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
983
- surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
984
- lang_level_state, translate_to_state, literary_style_state,
985
- qa_chain_state, docsearch_state, use_embeddings_state,
986
- force_translate_state],
987
- outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message])
988
-
989
- submit.click(chat, inputs=[openai_api_key_textbox, message, history_state, chain_state, trace_chain_state,
990
- speak_text_state, talking_head_state, monologue_state,
991
- express_chain_state, num_words_state, formality_state,
992
- anticipation_level_state, joy_level_state, trust_level_state, fear_level_state,
993
- surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state,
994
- lang_level_state, translate_to_state, literary_style_state,
995
- qa_chain_state, docsearch_state, use_embeddings_state,
996
- force_translate_state],
997
- outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message])
998
-
999
- openai_api_key_textbox.change(set_openai_api_key,
1000
- inputs=[openai_api_key_textbox, use_gpt4_state],
1001
- outputs=[chain_state, express_chain_state, llm_state, embeddings_state,
1002
- qa_chain_state, memory_state, use_gpt4_state])
1003
- openai_api_key_textbox.submit(set_openai_api_key,
1004
- inputs=[openai_api_key_textbox, use_gpt4_state],
1005
- outputs=[chain_state, express_chain_state, llm_state, embeddings_state,
1006
- qa_chain_state, memory_state, use_gpt4_state])
1007
-
1008
- block.launch(debug=True)
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import openai
3
  import requests
4
+ import csv
5
+ import uuid
 
6
  import whisper
7
+ import azure.cognitiveservices.speech as speechsdk
8
+ import base64
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  from polly_utils import PollyVoiceData, NEURAL_ENGINE
12
  from azure_utils import AzureVoiceData
13
 
14
+ POLLY_VOICE_DATA = PollyVoiceData()
15
+ AZURE_VOICE_DATA = AzureVoiceData()
 
 
 
 
 
 
 
 
 
16
 
17
+ WHISPER_DETECT_LANG = "Chinese (Mandarin)"
18
+ WHISPER_MODEL = whisper.load_model("tiny")
19
+ print("WHISPER_MODEL", WHISPER_MODEL)
20
 
 
 
 
 
 
 
 
21
 
22
  LOOPING_TALKING_HEAD = "videos/Michelle.mp4"
23
  TALKING_HEAD_WIDTH = "192"
24
  MAX_TALKING_HEAD_TEXT_LENGTH = 100
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ prompt_templates = {"Default ChatGPT": ""}
 
28
 
29
+ def get_empty_state():
30
+ return {"total_tokens": 0, "messages": []}
31
 
32
+ def download_prompt_templates():
33
+ url = "https://raw.githubusercontent.com/f/awesome-chatgpt-prompts/main/prompts.csv"
34
+ try:
35
+ response = requests.get(url)
36
+ reader = csv.reader(response.text.splitlines())
37
+ next(reader) # skip the header row
38
+ for row in reader:
39
+ if len(row) >= 2:
40
+ act = row[0].strip('"')
41
+ prompt = row[1].strip('"')
42
+ prompt_templates[act] = prompt
43
+
44
+ except requests.exceptions.RequestException as e:
45
+ print(f"An error occurred while downloading prompt templates: {e}")
46
+ return
47
+
48
+ choices = list(prompt_templates.keys())
49
+ choices = choices[:1] + sorted(choices[1:])
50
+ return gr.update(value=choices[0], choices=choices)
51
+
52
+ def on_token_change(user_token):
53
+ openai.api_key = user_token
54
 
55
+ def on_type_change(type):
56
+ print(type)
57
+
58
+ def on_prompt_template_change(prompt_template):
59
+ if not isinstance(prompt_template, str): return
60
+ return prompt_templates[prompt_template]
61
 
62
  # UNCOMMENT TO USE WHISPER
63
  def transcribe(aud_inp, whisper_lang):
 
78
  result_text = result.text
79
  return result_text
80
 
81
+ def create_html_video(file_name, width):
82
+ temp_file_url = "/file=" + tmp_file.value['name']
83
+ html_video = f'<video width={width} height={width} autoplay muted loop><source src={temp_file_url} type="video/mp4" poster="Michelle.png"></video>'
84
+ return html_video
85
 
86
+ def ToBase64(file):
87
+ with open(file, 'rb') as fileObj:
88
+ image_data = fileObj.read()
89
+ base64_data = base64.b64encode(image_data)
90
+ return base64_data.decode()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
 
92
 
93
+ def do_html_audio_speak_azure(words_to_speak):
94
 
95
  html_audio = '<pre>no audio</pre>'
96
 
 
133
  return html_audio, "audios/tempfile.mp3"
134
 
135
 
136
+ def do_html_video_speak_sad_talker(temp_aud_file):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
  GRADIO_URL=os.environ["GRADIO_URL"]
139
 
 
169
  return html_video, "videos/tempfile.mp4"
170
 
171
 
 
 
 
 
172
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
+ def submit_message(type_select,user_token, prompt, prompt_template, temperature, max_tokens, context_length, state):
175
+ print(type_select)
176
+ history = state['messages']
 
 
 
 
 
 
 
 
 
 
177
 
178
+ if not prompt:
179
+ return gr.update(value=''), [(history[i]['content'], history[i+1]['content']) for i in range(0, len(history)-1, 2)], f"Total tokens used: {state['total_tokens']}", state
180
+
181
+ prompt_template = prompt_templates[prompt_template]
182
+
183
+ system_prompt = []
184
+ if prompt_template:
185
+ system_prompt = [{ "role": "system", "content": prompt_template }]
186
+
187
+ prompt_msg = { "role": "user", "content": prompt }
188
+
189
+ if not type_select:
190
+ history.append(prompt_msg)
191
+ history.append({
192
+ "role": "system",
193
+ "content": "Error: Type is not set."
194
+ })
195
+ return '', [(history[i]['content'], history[i+1]['content']) for i in range(0, len(history)-1, 2)], f"Total tokens used: 0", state
196
+
197
+ if not user_token:
198
+ history.append(prompt_msg)
199
+ history.append({
200
+ "role": "system",
201
+ "content": "Error: OpenAI API Key is not set."
202
+ })
203
+ return '', [(history[i]['content'], history[i+1]['content']) for i in range(0, len(history)-1, 2)], f"Total tokens used: 0", state
204
+
205
+ html_video, temp_file, html_audio, temp_aud_file = None, None, None, None
206
+ try:
207
+ if type_select=='TEXT':
208
+ text_history = [x for x in history if x['role'] != 'image' ]
209
+ print(text_history)
210
+ completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=system_prompt + text_history[-context_length*2:] + [prompt_msg], temperature=temperature, max_tokens=max_tokens)
211
+ print(prompt_msg,completion.choices[0].message.to_dict())
212
+ history.append(prompt_msg)
213
+ history.append(completion.choices[0].message.to_dict())
214
+
215
+ state['total_tokens'] += completion['usage']['total_tokens']
216
+ answer = completion.choices[0].message.to_dict()["content"]
217
+ if len(answer) <= MAX_TALKING_HEAD_TEXT_LENGTH:
218
+ # html_video, temp_file = do_html_video_speak(output, translate_to)
219
+ html_audio, temp_aud_file = do_html_audio_speak_azure(answer)
220
+ html_video, temp_file = do_html_video_speak_sad_talker(temp_aud_file)
221
+ else:
222
+ temp_file = LOOPING_TALKING_HEAD
223
+ html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
224
+ html_audio, temp_aud_file = do_html_audio_speak_azure(answer)
225
+
226
+ elif type_select=='IMAGE':
227
+ response = openai.Image.create(
228
+ prompt=prompt,
229
+ n=1,
230
+ size="512x512"
231
+ )
232
+ print("image result ",response)
233
+ image_url = response['data'][0]['url']
234
+
235
+ history.append({ "role": "image", "content": prompt })
236
+ history.append({ "role": "image", "content": image_url })
237
+
238
+ state['total_tokens'] += 0
239
+
240
+ temp_file = LOOPING_TALKING_HEAD
241
+ html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
242
+
243
+ except Exception as e:
244
+ history.append(prompt_msg)
245
+ history.append({
246
+ "role": "system",
247
+ "content": f"Error: {e}"
248
+ })
249
+
250
+ total_tokens_used_msg = f"Total tokens used: {state['total_tokens']}"
251
+
252
+ chat_messages = [(history[i]['content'], history[i+1]['content']) for i in range(0, len(history)-1, 2)]
253
+ print(1,chat_messages)
254
+ chat_messages=[]
255
+ for i in range(0, len(history)-1, 2):
256
+ print(history[i])
257
+ if(history[i]['role'] == 'image'):
258
+ picture_name = str(uuid.uuid1())+'.png'
259
+ reponse = requests.get(history[i+1]['content'])
260
+ with open('/home/user/app/'+picture_name,'wb') as f:
261
+ f.write(reponse.content)
262
+
263
+ image_his = {'name': '/home/user/app/'+picture_name, 'mime_type': 'image/png', 'alt_text': None, 'data': None, 'is_file': True}
264
+
265
+ chat_messages.append((history[i]['content'],image_his))
266
 
267
+ else:
268
+ chat_messages.append((history[i]['content'], history[i+1]['content']))
269
+ print(2,chat_messages)
270
+ return '', chat_messages, total_tokens_used_msg, state, html_video, temp_file, html_audio, temp_aud_file
271
+
272
+ def clear_conversation():
273
+ return gr.update(value=None, visible=True), None, "", get_empty_state()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
 
275
 
276
+ css = """
277
+ #col-container {max-width: 80%; margin-left: auto; margin-right: auto;}
278
+ #chatbox {min-height: 400px;}
279
+ #header {text-align: center;}
280
+ #prompt_template_preview {padding: 1em; border-width: 1px; border-style: solid; border-color: #e0e0e0; border-radius: 4px;}
281
+ #total_tokens_str {text-align: right; font-size: 0.8em; color: #666;}
282
+ #label {font-size: 0.8em; padding: 0.5em; margin: 0;}
283
+ .message { font-size: 1.2em; }
284
+ """
285
+
286
+ with gr.Blocks(css=css) as demo:
287
+
288
+ state = gr.State(get_empty_state())
289
+
290
+
291
+ with gr.Column(elem_id="col-container"):
292
+ gr.Markdown("""## OpenAI ChatGPT chat
293
+ Using the ofiicial API (gpt-3.5-turbo model)
294
+ """,
295
+ elem_id="header")
296
 
297
  with gr.Row():
298
  with gr.Column(scale=1, min_width=TALKING_HEAD_WIDTH, visible=True):
 
311
  tmp_aud_file_url = "/file=" + tmp_aud_file.value['name']
312
  htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
313
  audio_html = gr.HTML(htm_audio)
314
+ with gr.Column(scale=6):
315
+ chatbot = gr.Chatbot(elem_id="chatbox")
316
+ with gr.Row():
317
+ with gr.Column(scale=2, min_width=0):
318
+ type_select = gr.Dropdown(show_label=False, choices= ["TEXT", "IMAGE"],value="TEXT",interactive=True)
319
+ with gr.Column(scale=8):
320
+ input_message = gr.Textbox(show_label=False, placeholder="Enter text and press enter", visible=True).style(container=False)
321
+ btn_submit = gr.Button("Submit")
322
+ total_tokens_str = gr.Markdown(elem_id="total_tokens_str")
323
+ btn_clear_conversation = gr.Button("🔃 Start New Conversation")
324
+ with gr.Column(scale=3):
325
+ gr.Markdown("Enter your OpenAI API Key. You can get one [here](https://platform.openai.com/account/api-keys).", elem_id="label")
326
+ user_token = gr.Textbox(value='', placeholder="OpenAI API Key", type="password", show_label=False)
327
+ prompt_template = gr.Dropdown(label="Set a custom insruction for the chatbot:", choices=list(prompt_templates.keys()))
328
+ prompt_template_preview = gr.Markdown(elem_id="prompt_template_preview")
329
+ with gr.Accordion("Advanced parameters", open=False):
330
+ temperature = gr.Slider(minimum=0, maximum=2.0, value=0.7, step=0.1, label="Temperature", info="Higher = more creative/chaotic")
331
+ max_tokens = gr.Slider(minimum=100, maximum=4096, value=1000, step=1, label="Max tokens per response")
332
+ context_length = gr.Slider(minimum=1, maximum=10, value=2, step=1, label="Context length", info="Number of previous messages to send to the chatbot. Be careful with high values, it can blow up the token budget quickly.")
333
+ with gr.Row():
334
+ audio_comp = gr.Microphone(source="microphone", type="filepath", label="Just say it!",
335
+ interactive=True, streaming=False)
336
+ audio_comp.change(transcribe, inputs=[audio_comp, "Chinese (Mandarin)"], outputs=[input_message])
337
 
 
 
338
 
339
+ # gr.HTML('''<br><br><br><center>You can duplicate this Space to skip the queue:<a href="https://huggingface.co/spaces/anzorq/chatgpt-demo?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a><br>
340
+ # <p><img src="https://visitor-badge.glitch.me/badge?page_id=anzorq.chatgpt_api_demo_hf" alt="visitors"></p></center>''')
 
 
 
341
 
342
+ type_select.change(on_type_change,inputs=[type_select], outputs=[])
343
+
344
+ btn_submit.click(submit_message, [type_select,user_token, input_message, prompt_template, temperature, max_tokens, context_length, state], [input_message, chatbot, total_tokens_str, state, video_html, my_file, audio_html, tmp_aud_file])
345
+ input_message.submit(submit_message, [type_select,user_token, input_message, prompt_template, temperature, max_tokens, context_length, state], [input_message, chatbot, total_tokens_str, state, video_html, my_file, audio_html, tmp_aud_file])
346
+
347
+ btn_clear_conversation.click(clear_conversation, [], [input_message, chatbot, total_tokens_str, state])
348
+ prompt_template.change(on_prompt_template_change, inputs=[prompt_template], outputs=[prompt_template_preview])
349
+ user_token.change(on_token_change, inputs=[user_token], outputs=[])
350
+
351
+
352
+ demo.load(download_prompt_templates, inputs=None, outputs=[prompt_template], queur=False)
353
+
354
+
355
+ demo.queue(concurrency_count=10)
356
+ demo.launch(
357
+ # auth=("admin", "IBTGeE3NrPsrViDI"),
358
+ height='800px')