File size: 31,894 Bytes
e37f8aa 65056d9 89df8be 65056d9 e37f8aa 65056d9 e37f8aa 65056d9 e37f8aa 65056d9 e37f8aa 65056d9 e37f8aa 65056d9 e37f8aa 65056d9 e37f8aa 65056d9 e37f8aa 65056d9 e37f8aa 65056d9 e37f8aa 65056d9 e37f8aa 65056d9 e37f8aa 65056d9 e37f8aa 65056d9 e37f8aa 8eb6288 e37f8aa 8eb6288 e37f8aa 8eb6288 e37f8aa 65056d9 8eb6288 65056d9 e37f8aa 65056d9 8eb6288 65056d9 e37f8aa 8eb6288 e37f8aa 65056d9 e37f8aa 65056d9 e37f8aa 65056d9 2ea9f6d e37f8aa 8eb6288 e37f8aa 65056d9 e37f8aa 65056d9 e37f8aa 65056d9 8eb6288 65056d9 8eb6288 65056d9 e37f8aa 65056d9 8eb6288 65056d9 e37f8aa 8eb6288 65056d9 8eb6288 e37f8aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 |
import os
import datetime
import glob
import shutil
import requests
import io
import sys
import re
import boto3
from os import listdir
from os.path import isfile, join
import gradio
from sqlitedict import SqliteDict
import gradio as gr
from langchain import PromptTemplate
from langchain.agents import Tool
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.chains import LLMMathChain
from langchain import SerpAPIWrapper
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.summarize import load_summarize_chain
from langchain.llms import AzureOpenAI
from langchain.chat_models import AzureChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.memory import ChatMessageHistory
from langchain.memory import ConversationBufferMemory
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import UnstructuredFileLoader
import clickhouse_connect
from pathlib import Path
from langchain.document_loaders import YoutubeLoader
from azure_utils import AzureVoiceData
from polly_utils import PollyVoiceData, NEURAL_ENGINE
from contextlib import closing
from langchain.agents import create_pandas_dataframe_agent
import pandas as pd
#os env
os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
os.environ["OPENAI_API_BASE"] = "https://civet-project-001.openai.azure.com/"
os.environ["OPENAI_API_KEY"] = "0e3e5b666818488fa1b5cb4e4238ffa7"
os.environ["SERPAPI_API_KEY"] = "a5b67b8805b4e12b0ae147c9c6b2a7dbf3ab84fca5f24e531b6963b1f7fc1ff7"
global_deployment_id = "CivetGPT"
global_model_name = "gpt-35-turbo"
#chroma settings
chroma_api_impl = "HH_Azure_Openai"
root_file_path = "./data/" #其實是data 存放的位置
hr_source_path = "hr_source"
ks_source_path = "ks_source"
believe_source_path = 'be_source'
sqlite_name = "cache.sqlite3"
sqlite_key="stored_files"
persist_db = "persist_db"
hr_collection_name = "hr_db"
chroma_db_impl="localdb+langchain"
tmp_collection="tmp_collection"
davinci = "text-davinci-003"
#global text setting
inputText = "問題(按q 或Ctrl + c跳出): "
refuse_string="服務被拒. 內容可能涉及敏感字詞,政治,煽動他人或是其他不當言詞, 請改以其他內容嚐試"
#video
LOOPING_TALKING_HEAD = "./data/videos/Masahiro.mp4"
TALKING_HEAD_WIDTH = "192"
AZURE_VOICE_DATA = AzureVoiceData()
POLLY_VOICE_DATA = PollyVoiceData()
def save_sqlite(key,value):
try:
with SqliteDict(sqlite_name) as mydict:
old_value = mydict[key]
mydict[key] = value+old_value # Using dict[key] to store
mydict.commit() # Need to commit() to actually flush the data
except Exception as ex:
print("Error during storing data (Possibly unsupported):", ex)
def load_sqlite(key):
try:
with SqliteDict(sqlite_name) as mydict:
value = mydict[key] # No need to use commit(), since we are only loading data!
return value
except Exception as ex:
print("Error during loading data:", ex)
def delete_sql(key):
try:
with SqliteDict(sqlite_name) as mydict:
mydict[key] = [] # Using dict[key] to store
mydict.commit() # Need to commit() to actually flush the data
except Exception as ex:
print("Error during storing data (Possibly unsupported):", ex)
def ai_answer(answer):
print('AI 回答: \033[32m' + answer +'\033[0m')
def get_openaiembeddings():
return OpenAIEmbeddings(
deployment="CivetGPT_embedding",
model="text-embedding-ada-002",
#embed_batch_size=1
chunk_size=1
)
"""
def get_chroma_client():
chroma_client = chromadb.Client(Settings(chroma_api_impl=chroma_api_impl,
chroma_server_host=chroma_db_ip,
chroma_server_http_port=chroma_db_port
))
return chroma_client
"""
def multidocs_loader(files_path, file_ext):
full_files_pattern = "*." + file_ext
loader = DirectoryLoader(files_path, glob=full_files_pattern, show_progress=True)
data = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
documents = text_splitter.split_documents(data)
return documents
def unstructure_file_loader(filename_path):
loader = UnstructuredFileLoader(filename_path)
data = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
documents = text_splitter.split_documents(data)
return documents
def add_documents_into_cromadb(db_name, file_path, collection_name):
_db_name = db_name
documents = multidocs_loader(file_path,"*")
embeddings = get_openaiembeddings()
chroma_db = Chroma.from_documents(
documents,
embeddings,
collection_name=collection_name,
persist_directory=root_file_path+ persist_db,
chroma_db_impl=chroma_db_impl
)
chroma_db.persist()
print('adding documents done!')
def initial_croma_db(db_name, files_path, file_ext, collection_name):
_db_name = db_name
documents = multidocs_loader(files_path, file_ext)
embeddings = get_openaiembeddings()
chroma_db = Chroma.from_documents(
documents,
embeddings,
collection_name = collection_name,
persist_directory= root_file_path+ persist_db,
chroma_db_impl=chroma_db_impl
)
chroma_db.persist()
print('vectorstore done!')
def add_files_to_collection(input_file_path, collection_name):
file_path=root_file_path+input_file_path
add_documents_into_cromadb(persist_db, file_path, collection_name)
def get_prompt_summary_string():
return """使用中文替下面內容做個精簡摘要:
{text}
精簡摘要:"""
def get_prompt_template_string():
today = datetime.date.today().strftime("%Y年%m月%d日")
template_string = f"我是鴻海(等同Foxconn)的員工, 你是一個鴻海的人資專家. 今天是{today}".format(today=today)+"""
請根據歷史對話,針對這次的問題, 形成獨立問題. 請優先從提供的文件中尋找答案, 你被允許回答不知道, 但回答不知道時需要給中央人資的客服聯絡窗口資訊.
不論什麼問題, 都以中文回答
歷史對話: {chat_history}
這次的問題: {question}
人資專家:
"""
return template_string
def get_default_template_prompt():
template = "你是個知識廣泛的超級助手, 以下所有問題請用中文回答, 並請在500個中文字以內來解釋 {concept} 概念"
prompt = PromptTemplate(
input_variables = ["concept"],
template = template
)
return prompt
def fine_tuning_model_chat(my_deployment_id, my_model_name):
_prompt = get_default_template_prompt()
llm = AzureOpenAI(model_name=my_model_name, deployment_name = my_deployment_id)
while 1:
text = input(inputText)
if text == 'q':
break
response = llm(_prompt.format(concept = text))
ai_answer(response)
def chat_conversation():
print("resource: " + global_deployment_id + " / " + global_model_name)
chat = AzureChatOpenAI(
deployment_name = global_deployment_id,
model_name = global_model_name,
)
history = ChatMessageHistory()
history.add_ai_message("你是一個超級助理, 以下問題都用中文回答")
while 1:
text = input(inputText)
if text == 'q':
break
history.add_user_message(text)
ai_response = chat(history.messages)
ai_answer(ai_response.content)
def local_vector_search(question_str,chat_history, collection_name = hr_collection_name):
embedding = get_openaiembeddings()
vectorstore = Chroma( embedding_function=embedding,
collection_name=collection_name,
persist_directory=root_file_path+persist_db,
)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, ai_prefix = "AI超級助理")
llm = AzureOpenAI(
deployment_name = global_deployment_id,
model_name= global_model_name,
temperature = 0.0)
chat_llm = AzureChatOpenAI(
deployment_name = global_deployment_id,
model_name= global_model_name,
temperature = 0.2)
prompt = PromptTemplate(
template=get_prompt_template_string(),
input_variables=["question","chat_history"]
)
prompt.format(question=question_str,chat_history=chat_history)
km_chain = ConversationalRetrievalChain.from_llm(
llm=chat_llm,
retriever=vectorstore.as_retriever(),
memory=memory,
condense_question_prompt=prompt,
)
km_tool = Tool(
name='Knowledge Base',
func=km_chain.run,
description='一個非常有用的工具, 當要查詢任何公司政策以及鴻海相關資料都使用這個工具'
)
math_math = LLMMathChain(llm=llm,verbose=True)
math_tool = Tool(
name='Calculator',
func=math_math.run,
description='Useful for when you need to answer questions about math.'
)
search = SerpAPIWrapper()
search_tool = Tool(
name="Search",
func=search.run,
description="當你需要回答一般問題時,非常有用; 不可以用來回答任何跟鴻海有關的問題.",
)
tools=[math_tool,km_tool, search_tool]
agent=initialize_agent(
agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
tools=tools,
llm=chat_llm,
verbose=True,
memory=memory,
max_iterations=30,
)
result=km_chain(question_str)
#result=agent.run(question_str)
return result["answer"]
def make_markdown_table(array):
nl = "\n"
markdown = ""
for entry in array:
markdown += f"{entry} {nl}"
return markdown
def get_hr_files():
files = load_sqlite(sqlite_key)
if files == None:
return
else:
return make_markdown_table(files)
def get_be_csv_files():
return make_markdown_table(glob.glob(root_file_path+believe_source_path+"/*.csv"))
def update_hr_km(files):
file_paths = [file.name for file in files]
dest_file_path=root_file_path+hr_source_path
if not os.path.exists(dest_file_path):
os.makedirs(dest_file_path)
for file in file_paths:
shutil.copy(file, dest_file_path)
add_files_to_collection(hr_source_path, hr_collection_name)
save_sqlite(sqlite_key, [Path(file_path).name for file_path in file_paths])
return get_hr_files()
def update_be_csv_km(files):
file_paths = [file.name for file in files]
dest_file_path=root_file_path+believe_source_path
if not os.path.exists(dest_file_path):
os.makedirs(dest_file_path)
for file in file_paths:
shutil.copy(file, dest_file_path)
if files == None:
return ''
else:
return make_markdown_table(files)
def clear_all_collection(collection_name):
pass
def all_files_under_diretory(path):
files = glob.glob(path+'\*')
for f in files:
os.remove(f)
def clear_hr_datas():
#remove hr collection
client = get_chroma_client(hr_collection_name)
client.delete_collection(name=hr_collection_name)
print("Collection removed completely!")
#remove files
all_files_under_diretory(root_file_path+hr_source_path)
delete_sql(sqlite_key)
return get_hr_files()
def clear_be_csv_datas():
all_files_under_diretory(root_file_path+believe_source_path)
def num_of_collection(collection_name):
client = get_chroma_client(collection_name)
number = client.get_collection(collection_name).count()
return f"目前知識卷裡有{number}卷項目"
def clear_tmp_collection():
client = get_chroma_client(tmp_collection)
client.delete_collection(name=tmp_collection)
all_files_under_diretory(root_file_path+ks_source_path)
return num_of_collection(tmp_collection)
def content_summary(split_documents):
llm = AzureChatOpenAI(
deployment_name=global_deployment_id,
model_name=global_model_name,
temperature=0.2)
map_prompt = get_prompt_summary_string()
map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text"])
chain = load_summarize_chain(
llm=llm,
chain_type="map_reduce",
verbose=True,
map_prompt=map_prompt_template,
combine_prompt=map_prompt_template
)
try:
output = chain({"input_documents": split_documents}, return_only_outputs=True)
return output
except Exception as e:
print(e)
return {'output_text':refuse_string}
def pdf_summary(file_name):
print("file_name: "+file_name)
loader = UnstructuredFileLoader(file_name)
document = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=20
)
split_documents = text_splitter.split_documents(document)
return content_summary(split_documents)
def youtube_summary(youtube_url):
loader=YoutubeLoader.from_youtube_url(youtube_url, add_video_info=True, language=['en','zh-TW'], translation='zh-TW')
document=loader.load()
text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
split_documents=text_splitter.split_documents(document)
result = content_summary(split_documents)
return result['output_text']
def summary_large_file(files):
file_paths = [file.name for file in files]
print(file_paths[0])
result = pdf_summary(file_paths[0])
return result["output_text"]
def upload_large_file(files):
file_paths = [file.name for file in files]
return Path(file_paths[0]).stem
def set_allow_lightweight_delete():
client = clickhouse_connect.get_client(host='127.0.0.1',port=8123)
command = "SET allow_experimental_lightweight_delete = true;"
#command = "show databases;"
res=client.command(command)
print(res)
def get_chroma_client(collection_name):
vectorstore = Chroma(
embedding_function=get_openaiembeddings(),
collection_name=collection_name,
persist_directory= root_file_path+persist_db,
)
return vectorstore._client
def create_db():
files_path = root_file_path+hr_source_path
file_ext = "pdf"
initial_croma_db(persist_db, files_path, file_ext, hr_collection_name)
def generate_iframe_for_youtube(youtube_link):
regex = r"(?:https:\/\/)?(?:www\.)?(?:youtube\.com|youtu\.be)\/(?:watch\?v=)?(.+)"
_url=re.sub(regex, r"https://www.youtube.com/embed/\1", youtube_link)
embed_html = f'<iframe width="650" height="365" src="{_url}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
print(embed_html)
return embed_html
def create_html_video(file_name, width, temp_file_url):
html_video = f'<video width={width} height={width} autoplay muted loop><source src={temp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
return html_video
def do_html_audio_speak(words_to_speak):
polly_client = boto3.Session(
aws_access_key_id="AKIAV7Q7AAGW54RBR6FZ",
aws_secret_access_key="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",
region_name='us-west-2'
).client('polly')
language_code="cmn-CN"
engine = NEURAL_ENGINE
voice_id = "Zhiyu"
print("voice_id: "+voice_id+"\nlanguage_code="+language_code)
response = polly_client.synthesize_speech(
Text=words_to_speak,
OutputFormat='mp3',
VoiceId=voice_id,
LanguageCode=language_code,
Engine=engine
)
html_audio = '<pre>no audio</pre>'
# Save the audio stream returned by Amazon Polly on Lambda's temp directory
if "AudioStream" in response:
with closing(response["AudioStream"]) as stream:
try:
with open('./data/audios/tempfile.mp3', 'wb') as f:
f.write(stream.read())
temp_aud_file = gr.File("./data/audios/tempfile.mp3")
temp_aud_file_url = "/file=" + temp_aud_file.value['name']
html_audio = f'<audio autoplay><source src={temp_aud_file_url} type="audio/mp3"></audio>'
except IOError as error:
# Could not write to file, exit gracefully
print(error)
return None, None
else:
# The response didn't contain audio data, exit gracefully
print("Could not stream audio")
return None, None
return html_audio, "./data/audios/tempfile.mp3"
def do_html_video_speak():
key = "eyJhbGciOiJIUzUxMiJ9.eyJ1c2VybmFtZSI6ImNhdHNreXR3QGdtYWlsLmNvbSJ9.OypOUZF-xv4-b8i9F4_aaMQiJpxv0mXRT5kyuJwTMXVd4awV-O-Obntp--AqGghNNowzQ9oG7zArSnQjz2vQgg"
url = "https://api.exh.ai/animations/v2/generate_lipsync_from_audio"
files = {"audio_file": ("./data/audios/tempfile.mp3", open("./data/audios/tempfile.mp3", "rb"), "audio/mpeg")}
payload = {
"animation_pipeline": "high_quality",
"idle_url": "https://ugc-idle.s3-us-west-2.amazonaws.com/5fd9ba1b1607b39a4d559300c1e35bee.mp4"
}
headers = {
"accept": "application/json",
"authorization": f"Bearer {key}"
}
res = requests.post(url, data=payload, files=files, headers=headers)
print("res.status_code: ", res.status_code)
html_video = '<pre>no video</pre>'
if isinstance(res.content, bytes):
response_stream = io.BytesIO(res.content)
print("len(res.content)): ", len(res.content))
with open('./data/videos/tempfile.mp4', 'wb') as f:
f.write(response_stream.read())
temp_file = gr.File("./data/videos/tempfile.mp4")
temp_file_url = "/file=" + temp_file.value['name']
html_video = f'<video width={TALKING_HEAD_WIDTH} height={TALKING_HEAD_WIDTH} autoplay><source src={temp_file_url} type="video/mp4" poster="Masahiro.png"></video>'
else:
print('video url unknown')
return res, html_video, "./data/videos/tempfile.mp4"
def kh_update_km(files):
file_paths = [file.name for file in files]
dest_file_path = root_file_path + ks_source_path
if not os.path.exists(dest_file_path):
os.makedirs(dest_file_path)
for file in file_paths:
shutil.copy(file, dest_file_path)
add_files_to_collection(ks_source_path, tmp_collection)
return num_of_collection(tmp_collection)
class Logger:
def __init__(self, filename):
self.terminal = sys.stdout
self.log = open(filename, "w", encoding='UTF-8')
def write(self, message):
self.terminal.write(message)
self.log.write(message)
def flush(self):
self.terminal.flush()
self.log.flush()
def isatty(self):
return False
def read_logs():
sys.stdout.flush()
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
with open("output.log", "r", encoding='UTF-8') as f:
return ansi_escape.sub('', f.read())
def pandas_analysis(prompt_str, message, chat_history):
dir_path = f"{root_file_path}{believe_source_path}/*.csv"
res = glob.glob(dir_path)
df = pd.concat((pd.read_csv(f, encoding='unicode_escape') for f in res), ignore_index=True)
local_deploy_id= "text-davinci-003"
local_model_name = "text-davinci-003"
llm = AzureOpenAI(
deployment_name=local_deploy_id,
model_name=local_model_name,
max_tokens=2000,
temperature=0,
)
be_agent = create_pandas_dataframe_agent(
llm,
df,
prefix="Remove any ` from the Action Input",
max_iterations=30,
return_intermediate_steps=False,
max_execution_time=60,
handle_parsing_errors="Check your output and make sure it conforms!",
verbose=True)
new_str = prompt_str.format(message=message, chat_history=chat_history)
print(new_str)
answer = be_agent.run(new_str)
chat_history.append((message, answer))
return '', chat_history
def lunch_style(demo, logs=gr.Text()):
sys.stdout = Logger("output.log")
demo.load(read_logs, None, logs, every=1)
if len(sys.argv)==1:
print("running server as default value")
demo.launch(allowed_paths=[root_file_path, root_file_path+hr_source_path])
elif len(sys.argv)==2 and sys.argv[1] == "server":
local_ip = "10.40.23.232"
local_port = 7788
print(f"running server on http://{local_ip}:{local_port}")
demo.launch(allowed_paths=[root_file_path, root_file_path+hr_source_path],auth=("Foxconn", "Foxconn123!"),server_name=local_ip, server_port=local_port)
elif len(sys.argv)==4:
local_ip = sys.argv[2]
local_port = sys.argv[3]
print(f"running server on http://{local_ip}:{local_port}")
demo.launch(allowed_paths=[root_file_path, root_file_path+hr_source_path],auth=("Foxconn", "Foxconn123!"),server_name=local_ip, server_port=local_port)
else:
print("syntax: pythong <your_app>.py [server {ip_address, port}] ")
def gradio_run():
print("User Login")
with gr.Blocks(theme='bethecloud/storj_theme') as demo:
with gr.Row():
gr.Markdown("# HH Azure Openai Demo")
#Header section
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("""
### 這是一個基於各場景製造的Azure Openai Demo, 目前預計會包含場景有:
- 超長文本的摘要 ☑
- HR 智能客服小幫手 ☑
- 上傳過去歷史資料, 預測未來發展
- 上傳初步構想後, AI生成方案
- 網路上搜尋各式資料(包含google, wikipedia, youtube) 等, 綜合分析給結論
### 基礎的技術架構:
* 給予資料, 持續累加
* 存入vector(向量化) database, 依不同的collection 存放
* 問題以相似度(Similarity search), 結果再丟給gpt 做綜合回應
### 已知bug:
* N/A
如有任何Bug 歡迎隨時回饋
""")
with gr.Column(scale=1):
gr.Image(type="pil", value=root_file_path+"vector.png", label="技術概念圖")
gr.Markdown("""
> 中央資訊 Change Liao(廖晨志)
> teams/email: change.cc.liao@foxconn.com
> 分機: 5010108
""")
with gr.Row():
gr.Markdown("""
------
## Playground
請切換下方Tab 鍵試驗各項功能
""")
#First PoC Section
with gr.Tab("文本摘要"):
with gr.Row():
with gr.Column(scale=1):
gr.Markdown(f"""
## 第一項實驗: 超長文本摘要
請上傳任何文檔(.pdf, .doc, .csv, text 格式),上傳完成後稍等一會, AI 會在右側TextField 提供文本摘要
* 使用方式:
* 請在右邊按下 `請上傳超長文本(可接受text, pdf, doc, csv 格式)` 上傳你的文本
* AI 會開始解析內容, 檔案愈大解析愈久
* 上傳完後可以按同個按鍵, 再次上傳
* 後續會支援video 以及 audio格式
""")
with gr.Column(scale=1):
gr.Markdown("1.")
file_name_field = gr.Textbox(max_lines=1, label="上傳檔案",placeholder="目前沒有上傳檔案")
upload_button = gr.UploadButton("請上傳超長文本(可接受text, pdf, doc, csv 格式)",
file_types=["text", ".pdf", ".doc", ".csv"], file_count="multiple")
gr.Markdown("2.")
summary_text = gr.Textbox()
summary_text.label = "AI 摘要:"
summary_text.change = False
summary_text.lines = 12
upload_button.upload(upload_large_file, upload_button, file_name_field).then(summary_large_file,upload_button,summary_text)
#2nd Hr Section
with gr.Tab("HR 客服助手"):
with gr.Row():
with gr.Column(scale=1):
gr.Markdown(
"""
## 第二項實驗: HR 資料庫智能客服助手 AI 試驗
"""
)
gr.Markdown("""
### 使用方法
* 測試人員可在下方加入任何HR 相關資料, 亦可全部刪除後上傳.
* 系統會將資料向量化後,納入右方人資客服機器人資料庫
* 測試人員可在右方與客服機器人對話
(溫馨提醒: 儘可能所有檔案全部清掉, 再一次上傳所有想納入的檔案;且次數不要太多,以節省經費)
""")
file_list=gr.Textbox(get_hr_files, label="已存在知識庫的檔案(text,pdf,doc,csv)", placeholder="沒有任何檔案存在", max_lines=16, lines=16)
with gr.Row():
with gr.Column(scale=1):
upload_button = gr.UploadButton("上傳HR知識庫檔案",
file_types=["text", ".pdf", ".doc", ".csv"], file_count="multiple")
upload_button.upload(update_hr_km, inputs=upload_button, outputs=file_list)
with gr.Column(scale=1):
cleanDataBtn = gr.Button(value="刪除所有知識以及檔案")
cleanDataBtn.click(clear_hr_datas,outputs=file_list)
with gr.Column(scale=1):
with gr.Row():
with gr.Column():
tmp_file = gr.File(LOOPING_TALKING_HEAD, visible=False)
tmp_file_url = "/file=" + tmp_file.value['name']
htm_video = create_html_video(LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH, tmp_file_url)
video_html = gr.HTML(htm_video)
# my_aud_file = gr.File(label="Audio file", type="file", visible=True)
tmp_aud_file = gr.File("./data/audios/tempfile.mp3", visible=False)
tmp_aud_file_url = "/file=" + tmp_aud_file.value['name']
htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
audio_html = gr.HTML(htm_audio, visible=False)
with gr.Column():
isAudio = gr.Checkbox(label="是否要有語音", info="要開啟語音嗎?查詢時間會增長")
with gr.Row():
chatbot = gr.Chatbot(value=[], elem_id="chatbot").style(height=600)
with gr.Row():
with gr.Column(scale=5):
msg = gr.Textbox(
show_label=False,
placeholder="輸入你的問題",
)
with gr.Column(scale=1):
clear = gr.Button("清除")
def respond(message, chat_history):
vector_search_message = local_vector_search(message, chat_history)
chat_history.append((message, vector_search_message))
print("vector_search:"+vector_search_message)
if isAudio.value is False:
print("isAudio is False")
return '', chat_history, htm_video, ''
else:
print("isAudio is True")
html_audio, audio_file_path = do_html_audio_speak(vector_search_message)
res, new_html_video, video_file_path = do_html_video_speak()
if res.status_code == 200:
return '', chat_history, new_html_video, ''
else:
return '', chat_history, htm_video, html_audio
msg.submit(respond, [msg, chatbot], [msg, chatbot, video_html, audio_html], queue=True)
clear.click(lambda: None, None, chatbot, queue=False)
#3rd youtube
with gr.Tab("Youtube 影片摘要"):
with gr.Row():
with gr.Column(scale=1):
youtube_gr = gr.HTML(generate_iframe_for_youtube("https://www.youtube.com/embed/"))
youtube_link=gr.Textbox(interactive=True, label="在此貼上Youtube link:", placeholder="e.g. https://www.youtube.com/watch?v=xxxxxxxxx")
youtube_link.change(generate_iframe_for_youtube,youtube_link,youtube_gr)
youtube_analysis_btn=gr.Button("送出解析")
with gr.Column(scale=1):
youtube_summary_textbox=gr.Textbox(interactive=False, label="AI 解析", lines=20)
youtube_analysis_btn.click(youtube_summary,youtube_link,youtube_summary_textbox)
with gr.Tab("統計助手"):
with gr.Row():
gr.Markdown("""
### 使用方式
已經讀取所有提供的csv 資料, 可以詢問資料任何問題(Talk to data)
建議先詢問欄位後, 後續再構思其他問題
""")
with gr.Row():
with gr.Column():
tmp_chatbot = gr.Chatbot(value=[], elem_id="tmp_chatbot").style(height=700)
with gr.Row():
with gr.Column(scale=5):
tmp_msg = gr.Textbox(show_label=False,placeholder="輸入你的問題",)
with gr.Column(scale=1):
tmp_clear = gr.Button("清除對話")
with gr.Column():
prompt_textbox = gr.Textbox("""
你是一位資料科學家,提供給你的資料是2023年每一週的click 次數.
有下列定義:
1. 欄位 23W01 代表2023年的第一個星期; 23W02 代表2023年的第二個星期
請以中文回答我下面的問題:{message}
""", lines=10, label="Prompt:有{chat_history}及{message}, 請至少保留{message}變數",interactive=True, max_lines=10)
be_csv_file_list=gr.Textbox(get_be_csv_files, label="CSV Files", placeholder="沒有任何檔案存在", max_lines=10, lines=10)
upload_button = gr.UploadButton("上傳統計資料(.csv格式)",file_types=[".csv"],file_count="multiple")
upload_button.upload(update_be_csv_km, inputs=upload_button, outputs=be_csv_file_list).then(get_be_csv_files,outputs=be_csv_file_list)
cleanDataBtn = gr.Button(value="刪除所有csv 資料")
cleanDataBtn.click(clear_be_csv_datas, outputs=be_csv_file_list).then(get_be_csv_files, outputs=be_csv_file_list)
tmp_msg.submit(pandas_analysis, [prompt_textbox, tmp_msg, tmp_chatbot], [tmp_msg, tmp_chatbot],queue=True)
tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False)
with gr.Row():
gr.Examples([
'你有什麼欄位?'
], label="訊息範例", inputs=tmp_msg)
with gr.Row():
console = gr.Textbox(lines=11, label="Console", max_lines=11)
demo.queue(concurrency_count=10)
lunch_style(demo,console)
gradio_run()
|