Change Liao
commited on
Commit
·
89df8be
1
Parent(s):
25be1b2
add requirements.txt & Langchain_demo.ipynb
Browse files- Langchain_demo.ipynb +0 -0
- app.py +181 -41
- cache.sqlite3 +0 -0
- data/audios/tempfile.mp3 +0 -0
- data/ks_source/.gitattributes +0 -1
- data/videos/tempfile.mp4 +0 -0
- requirements.txt +0 -205
Langchain_demo.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app.py
CHANGED
@@ -15,23 +15,35 @@ from sqlitedict import SqliteDict
|
|
15 |
|
16 |
import gradio as gr
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
from langchain.llms import AzureOpenAI
|
19 |
from langchain.chat_models import AzureChatOpenAI
|
20 |
|
21 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
22 |
-
from langchain.chains import ConversationalRetrievalChain
|
23 |
|
24 |
from langchain.memory import ChatMessageHistory
|
25 |
-
from langchain import
|
|
|
26 |
from langchain.vectorstores import Chroma
|
27 |
|
28 |
from langchain.text_splitter import CharacterTextSplitter
|
29 |
-
from langchain.
|
30 |
-
from langchain.document_loaders import DirectoryLoader
|
31 |
|
|
|
32 |
from langchain.document_loaders import UnstructuredFileLoader
|
33 |
-
|
34 |
-
from langchain.chains.summarize import load_summarize_chain
|
35 |
|
36 |
import clickhouse_connect
|
37 |
from pathlib import Path
|
@@ -41,12 +53,16 @@ from langchain.document_loaders import YoutubeLoader
|
|
41 |
from azure_utils import AzureVoiceData
|
42 |
from polly_utils import PollyVoiceData, NEURAL_ENGINE
|
43 |
from contextlib import closing
|
|
|
|
|
44 |
|
45 |
#os env
|
46 |
os.environ["OPENAI_API_TYPE"] = "azure"
|
47 |
os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
|
48 |
os.environ["OPENAI_API_BASE"] = "https://civet-project-001.openai.azure.com/"
|
49 |
os.environ["OPENAI_API_KEY"] = "0e3e5b666818488fa1b5cb4e4238ffa7"
|
|
|
|
|
50 |
global_deployment_id = "CivetGPT"
|
51 |
global_model_name = "gpt-35-turbo"
|
52 |
|
@@ -56,6 +72,7 @@ chroma_api_impl = "HH_Azure_Openai"
|
|
56 |
root_file_path = "./data/" #其實是data 存放的位置
|
57 |
hr_source_path = "hr_source"
|
58 |
ks_source_path = "ks_source"
|
|
|
59 |
|
60 |
sqlite_name = "cache.sqlite3"
|
61 |
sqlite_key="stored_files"
|
@@ -63,6 +80,7 @@ persist_db = "persist_db"
|
|
63 |
hr_collection_name = "hr_db"
|
64 |
chroma_db_impl="localdb+langchain"
|
65 |
tmp_collection="tmp_collection"
|
|
|
66 |
|
67 |
#global text setting
|
68 |
inputText = "問題(按q 或Ctrl + c跳出): "
|
@@ -183,13 +201,13 @@ def get_prompt_summary_string():
|
|
183 |
|
184 |
def get_prompt_template_string():
|
185 |
today = datetime.date.today().strftime("%Y年%m月%d日")
|
186 |
-
template_string = f"
|
187 |
-
請根據歷史對話,針對這次的問題,
|
188 |
不論什麼問題, 都以中文回答
|
189 |
|
190 |
歷史對話: {chat_history}
|
191 |
這次的問題: {question}
|
192 |
-
|
193 |
"""
|
194 |
return template_string
|
195 |
|
@@ -238,23 +256,58 @@ def local_vector_search(question_str,chat_history, collection_name = hr_collecti
|
|
238 |
|
239 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, ai_prefix = "AI超級助理")
|
240 |
|
241 |
-
llm =
|
242 |
-
|
243 |
-
|
244 |
-
|
|
|
|
|
|
|
|
|
|
|
245 |
|
246 |
prompt = PromptTemplate(
|
247 |
template=get_prompt_template_string(),
|
248 |
input_variables=["question","chat_history"]
|
249 |
)
|
250 |
prompt.format(question=question_str,chat_history=chat_history)
|
251 |
-
|
252 |
-
llm=
|
253 |
retriever=vectorstore.as_retriever(),
|
254 |
memory=memory,
|
255 |
condense_question_prompt=prompt,
|
256 |
)
|
257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
return result["answer"]
|
259 |
|
260 |
def make_markdown_table(array):
|
@@ -636,18 +689,8 @@ def gradio_run():
|
|
636 |
tmp_aud_file_url = "/file=" + tmp_aud_file.value['name']
|
637 |
htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
|
638 |
audio_html = gr.HTML(htm_audio, visible=False)
|
639 |
-
def respond(message, chat_history):
|
640 |
-
vector_search_message = local_vector_search(message, chat_history)
|
641 |
-
chat_history.append((message, vector_search_message))
|
642 |
-
|
643 |
-
html_audio, audio_file_path = do_html_audio_speak(vector_search_message)
|
644 |
-
res, new_html_video, video_file_path = do_html_video_speak()
|
645 |
-
|
646 |
-
if res.status_code == 200:
|
647 |
-
return '', chat_history, new_html_video, ''
|
648 |
-
else:
|
649 |
-
return '', chat_history, htm_video, html_audio
|
650 |
with gr.Column():
|
|
|
651 |
gr.Markdown("""
|
652 |
### AI 虛擬客服:
|
653 |
* 這是一個實驗性質的AI 客服
|
@@ -655,6 +698,7 @@ def gradio_run():
|
|
655 |
* 想要放誰的頭像都可以, 要放董事長也可以.
|
656 |
* 訂閱制(有效時間 6/13~7/13)
|
657 |
""")
|
|
|
658 |
with gr.Row():
|
659 |
chatbot = gr.Chatbot(value=[], elem_id="chatbot").style(height=400)
|
660 |
with gr.Row():
|
@@ -665,8 +709,26 @@ def gradio_run():
|
|
665 |
)
|
666 |
with gr.Column(scale=1):
|
667 |
clear = gr.Button("清除")
|
668 |
-
|
669 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
670 |
#3rd youtube
|
671 |
with gr.Tab("Youtube 影片摘要"):
|
672 |
with gr.Row():
|
@@ -678,26 +740,25 @@ def gradio_run():
|
|
678 |
with gr.Column(scale=1):
|
679 |
youtube_summary_textbox=gr.Textbox(interactive=False, label="AI 解析", lines=20)
|
680 |
youtube_analysis_btn.click(youtube_summary,youtube_link,youtube_summary_textbox)
|
|
|
681 |
with gr.Tab("高雄市政府票証"):
|
682 |
-
|
683 |
-
import pandas as pd
|
684 |
mypath = root_file_path+ks_source_path
|
685 |
onlyfiles = os.listdir(mypath)
|
686 |
df = pd.concat((pd.read_csv(os.path.join(mypath, filename)) for filename in onlyfiles))
|
687 |
with gr.Row():
|
688 |
-
gr.Markdown(
|
689 |
### 使用方式
|
690 |
這是一個使用高雄公車票證資料, 運用AI協���決策的工具.
|
691 |
如果有出現error, 請重新刷新頁面. 有error 就代表運算到最後token 數量超出azure openai 上限了, 這部份還在想辦法調整中.
|
692 |
-
|
693 |
invField = gr.Textbox(visible=False)
|
694 |
gr.Examples(onlyfiles, label="資料庫檔案", inputs=invField, examples_per_page=4)
|
695 |
with gr.Row():
|
696 |
with gr.Column():
|
697 |
-
|
698 |
-
|
699 |
-
|
700 |
-
model_name=davinci,
|
701 |
max_tokens=2000,
|
702 |
temperature=0,
|
703 |
)
|
@@ -709,7 +770,6 @@ def gradio_run():
|
|
709 |
return_intermediate_steps=False,
|
710 |
verbose=True
|
711 |
)
|
712 |
-
|
713 |
def tmp_respond(prompt_str,message, chat_history):
|
714 |
try:
|
715 |
new_str=prompt_str.format(message=message, chat_history=chat_history)
|
@@ -734,14 +794,14 @@ def gradio_run():
|
|
734 |
with gr.Column(scale=1):
|
735 |
tmp_clear = gr.Button("清除對話")
|
736 |
with gr.Column():
|
737 |
-
prompt_textbox=gr.Textbox(
|
738 |
你是一位專業的資料科學家,有下列定義:
|
739 |
1.每個票卡序號代表一名乘客
|
740 |
2.原始票價視為花費或是消費
|
741 |
3.轉乘次數: 一名乘客在同一天有任意兩筆紀錄,其中一筆出下車站的資料等於另一筆進上車站的資料,其出下車站代表的車站的轉乘次數就要增加1.
|
742 |
歷史訊息是 {chat_history}
|
743 |
請以中文回答我下面的問題:{message}
|
744 |
-
|
745 |
console=gr.Textbox(lines=11, label="Console",max_lines=11)
|
746 |
tmp_msg.submit(tmp_respond, [prompt_textbox, tmp_msg, tmp_chatbot], [tmp_msg, tmp_chatbot], queue=True)
|
747 |
tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False)
|
@@ -751,8 +811,88 @@ def gradio_run():
|
|
751 |
'0001站轉乘旅客所佔比例',
|
752 |
'高雄捷運的2022年7月份運輸量與2022年6月份相比, 增減如何?',
|
753 |
'請給我2022年6月至2022年7月之間, 轉乘數量最高排名前五名的車站?',
|
754 |
-
'0001站 在2022年9月份轉乘數量是未知. 請依2022年7月份到2022年8月份的趨勢, 請以月份做為時間單位, 做出一個數學模型. 用此數學模型來預測 0001站 在2022年9月份的轉乘數量會多少, 增減如何?'
|
|
|
755 |
], label="訊息範例",inputs=tmp_msg)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
756 |
demo.queue(concurrency_count=10)
|
757 |
lunch_style(demo,console)
|
758 |
|
|
|
15 |
|
16 |
import gradio as gr
|
17 |
|
18 |
+
from langchain import PromptTemplate
|
19 |
+
from langchain.agents import Tool
|
20 |
+
from langchain.agents import load_tools
|
21 |
+
from langchain.agents import initialize_agent
|
22 |
+
|
23 |
+
from langchain.agents import AgentType
|
24 |
+
|
25 |
+
from langchain.chains import LLMMathChain
|
26 |
+
from langchain import SerpAPIWrapper
|
27 |
+
from langchain.chains import ConversationalRetrievalChain
|
28 |
+
|
29 |
+
from langchain.chains.summarize import load_summarize_chain
|
30 |
+
|
31 |
from langchain.llms import AzureOpenAI
|
32 |
from langchain.chat_models import AzureChatOpenAI
|
33 |
|
34 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
|
|
35 |
|
36 |
from langchain.memory import ChatMessageHistory
|
37 |
+
from langchain.memory import ConversationBufferMemory
|
38 |
+
|
39 |
from langchain.vectorstores import Chroma
|
40 |
|
41 |
from langchain.text_splitter import CharacterTextSplitter
|
42 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
43 |
|
44 |
+
from langchain.document_loaders import DirectoryLoader
|
45 |
from langchain.document_loaders import UnstructuredFileLoader
|
46 |
+
|
|
|
47 |
|
48 |
import clickhouse_connect
|
49 |
from pathlib import Path
|
|
|
53 |
from azure_utils import AzureVoiceData
|
54 |
from polly_utils import PollyVoiceData, NEURAL_ENGINE
|
55 |
from contextlib import closing
|
56 |
+
from langchain.agents import create_pandas_dataframe_agent
|
57 |
+
import pandas as pd
|
58 |
|
59 |
#os env
|
60 |
os.environ["OPENAI_API_TYPE"] = "azure"
|
61 |
os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
|
62 |
os.environ["OPENAI_API_BASE"] = "https://civet-project-001.openai.azure.com/"
|
63 |
os.environ["OPENAI_API_KEY"] = "0e3e5b666818488fa1b5cb4e4238ffa7"
|
64 |
+
os.environ["SERPAPI_API_KEY"] = "a5b67b8805b4e12b0ae147c9c6b2a7dbf3ab84fca5f24e531b6963b1f7fc1ff7"
|
65 |
+
|
66 |
global_deployment_id = "CivetGPT"
|
67 |
global_model_name = "gpt-35-turbo"
|
68 |
|
|
|
72 |
root_file_path = "./data/" #其實是data 存放的位置
|
73 |
hr_source_path = "hr_source"
|
74 |
ks_source_path = "ks_source"
|
75 |
+
believe_source_path = 'be_source'
|
76 |
|
77 |
sqlite_name = "cache.sqlite3"
|
78 |
sqlite_key="stored_files"
|
|
|
80 |
hr_collection_name = "hr_db"
|
81 |
chroma_db_impl="localdb+langchain"
|
82 |
tmp_collection="tmp_collection"
|
83 |
+
davinci = "text-davinci-003"
|
84 |
|
85 |
#global text setting
|
86 |
inputText = "問題(按q 或Ctrl + c跳出): "
|
|
|
201 |
|
202 |
def get_prompt_template_string():
|
203 |
today = datetime.date.today().strftime("%Y年%m月%d日")
|
204 |
+
template_string = f"我是鴻海(等同Foxconn)的員工, 你是一個鴻海的人資專家. 今天是{today}".format(today=today)+"""
|
205 |
+
請根據歷史對話,針對這次的問題, 形成獨立問題. 請優先從提供的文件中尋找答案, 你被允許回答不知道, 但回答不知道時需要給中央人資的客服聯絡窗口資訊.
|
206 |
不論什麼問題, 都以中文回答
|
207 |
|
208 |
歷史對話: {chat_history}
|
209 |
這次的問題: {question}
|
210 |
+
人資專家:
|
211 |
"""
|
212 |
return template_string
|
213 |
|
|
|
256 |
|
257 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, ai_prefix = "AI超級助理")
|
258 |
|
259 |
+
llm = AzureOpenAI(
|
260 |
+
deployment_name = global_deployment_id,
|
261 |
+
model_name= global_model_name,
|
262 |
+
temperature = 0.0)
|
263 |
+
|
264 |
+
chat_llm = AzureChatOpenAI(
|
265 |
+
deployment_name = global_deployment_id,
|
266 |
+
model_name= global_model_name,
|
267 |
+
temperature = 0.2)
|
268 |
|
269 |
prompt = PromptTemplate(
|
270 |
template=get_prompt_template_string(),
|
271 |
input_variables=["question","chat_history"]
|
272 |
)
|
273 |
prompt.format(question=question_str,chat_history=chat_history)
|
274 |
+
km_chain = ConversationalRetrievalChain.from_llm(
|
275 |
+
llm=chat_llm,
|
276 |
retriever=vectorstore.as_retriever(),
|
277 |
memory=memory,
|
278 |
condense_question_prompt=prompt,
|
279 |
)
|
280 |
+
km_tool = Tool(
|
281 |
+
name='Knowledge Base',
|
282 |
+
func=km_chain.run,
|
283 |
+
description='一個非常有用的工具, 當要查詢任何公司政策以及鴻海相關資料都使用這個工具'
|
284 |
+
)
|
285 |
+
|
286 |
+
math_math = LLMMathChain(llm=llm,verbose=True)
|
287 |
+
math_tool = Tool(
|
288 |
+
name='Calculator',
|
289 |
+
func=math_math.run,
|
290 |
+
description='Useful for when you need to answer questions about math.'
|
291 |
+
)
|
292 |
+
|
293 |
+
search = SerpAPIWrapper()
|
294 |
+
search_tool = Tool(
|
295 |
+
name="Search",
|
296 |
+
func=search.run,
|
297 |
+
description="當你需要回答一般問題時,非常有用; 不可以用來回答任何跟鴻海有關的問題.",
|
298 |
+
)
|
299 |
+
tools=[math_tool,km_tool, search_tool]
|
300 |
+
agent=initialize_agent(
|
301 |
+
agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
|
302 |
+
tools=tools,
|
303 |
+
llm=chat_llm,
|
304 |
+
verbose=True,
|
305 |
+
memory=memory,
|
306 |
+
max_iterations=30,
|
307 |
+
)
|
308 |
+
result=km_chain(question_str)
|
309 |
+
|
310 |
+
#result=agent.run(question_str)
|
311 |
return result["answer"]
|
312 |
|
313 |
def make_markdown_table(array):
|
|
|
689 |
tmp_aud_file_url = "/file=" + tmp_aud_file.value['name']
|
690 |
htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
|
691 |
audio_html = gr.HTML(htm_audio, visible=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
692 |
with gr.Column():
|
693 |
+
isAudio = gr.Checkbox(label="是否要有語音", info="要開啟語音嗎?查詢時間會增長")
|
694 |
gr.Markdown("""
|
695 |
### AI 虛擬客服:
|
696 |
* 這是一個實驗性質的AI 客服
|
|
|
698 |
* 想要放誰的頭像都可以, 要放董事長也可以.
|
699 |
* 訂閱制(有效時間 6/13~7/13)
|
700 |
""")
|
701 |
+
|
702 |
with gr.Row():
|
703 |
chatbot = gr.Chatbot(value=[], elem_id="chatbot").style(height=400)
|
704 |
with gr.Row():
|
|
|
709 |
)
|
710 |
with gr.Column(scale=1):
|
711 |
clear = gr.Button("清除")
|
712 |
+
|
713 |
+
def respond(message, chat_history):
|
714 |
+
vector_search_message = local_vector_search(message, chat_history)
|
715 |
+
chat_history.append((message, vector_search_message))
|
716 |
+
print("vector_search:"+vector_search_message)
|
717 |
+
if isAudio.value is False:
|
718 |
+
print("isAudio is False")
|
719 |
+
return '', chat_history, htm_video, ''
|
720 |
+
else:
|
721 |
+
print("isAudio is True")
|
722 |
+
html_audio, audio_file_path = do_html_audio_speak(vector_search_message)
|
723 |
+
res, new_html_video, video_file_path = do_html_video_speak()
|
724 |
+
|
725 |
+
if res.status_code == 200:
|
726 |
+
return '', chat_history, new_html_video, ''
|
727 |
+
else:
|
728 |
+
return '', chat_history, htm_video, html_audio
|
729 |
+
|
730 |
+
msg.submit(respond, [msg, chatbot], [msg, chatbot, video_html, audio_html], queue=True)
|
731 |
+
clear.click(lambda: None, None, chatbot, queue=False)
|
732 |
#3rd youtube
|
733 |
with gr.Tab("Youtube 影片摘要"):
|
734 |
with gr.Row():
|
|
|
740 |
with gr.Column(scale=1):
|
741 |
youtube_summary_textbox=gr.Textbox(interactive=False, label="AI 解析", lines=20)
|
742 |
youtube_analysis_btn.click(youtube_summary,youtube_link,youtube_summary_textbox)
|
743 |
+
"""
|
744 |
with gr.Tab("高雄市政府票証"):
|
745 |
+
|
|
|
746 |
mypath = root_file_path+ks_source_path
|
747 |
onlyfiles = os.listdir(mypath)
|
748 |
df = pd.concat((pd.read_csv(os.path.join(mypath, filename)) for filename in onlyfiles))
|
749 |
with gr.Row():
|
750 |
+
gr.Markdown('
|
751 |
### 使用方式
|
752 |
這是一個使用高雄公車票證資料, 運用AI協���決策的工具.
|
753 |
如果有出現error, 請重新刷新頁面. 有error 就代表運算到最後token 數量超出azure openai 上限了, 這部份還在想辦法調整中.
|
754 |
+
')
|
755 |
invField = gr.Textbox(visible=False)
|
756 |
gr.Examples(onlyfiles, label="資料庫檔案", inputs=invField, examples_per_page=4)
|
757 |
with gr.Row():
|
758 |
with gr.Column():
|
759 |
+
llm = AzureChatOpenAI(
|
760 |
+
deployment_name=global_deployment_id,
|
761 |
+
model_name=global_model_name,
|
|
|
762 |
max_tokens=2000,
|
763 |
temperature=0,
|
764 |
)
|
|
|
770 |
return_intermediate_steps=False,
|
771 |
verbose=True
|
772 |
)
|
|
|
773 |
def tmp_respond(prompt_str,message, chat_history):
|
774 |
try:
|
775 |
new_str=prompt_str.format(message=message, chat_history=chat_history)
|
|
|
794 |
with gr.Column(scale=1):
|
795 |
tmp_clear = gr.Button("清除對話")
|
796 |
with gr.Column():
|
797 |
+
prompt_textbox=gr.Textbox('
|
798 |
你是一位專業的資料科學家,有下列定義:
|
799 |
1.每個票卡序號代表一名乘客
|
800 |
2.原始票價視為花費或是消費
|
801 |
3.轉乘次數: 一名乘客在同一天有任意兩筆紀錄,其中一筆出下車站的資料等於另一筆進上車站的資料,其出下車站代表的車站的轉乘次數就要增加1.
|
802 |
歷史訊息是 {chat_history}
|
803 |
請以中文回答我下面的問題:{message}
|
804 |
+
', lines=10, label="Prompt:有{chat_history}及{message}, 請至少保留{message}變數", interactive=True, max_lines=10)
|
805 |
console=gr.Textbox(lines=11, label="Console",max_lines=11)
|
806 |
tmp_msg.submit(tmp_respond, [prompt_textbox, tmp_msg, tmp_chatbot], [tmp_msg, tmp_chatbot], queue=True)
|
807 |
tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False)
|
|
|
811 |
'0001站轉乘旅客所佔比例',
|
812 |
'高雄捷運的2022年7月份運輸量與2022年6月份相比, 增減如何?',
|
813 |
'請給我2022年6月至2022年7月之間, 轉乘數量最高排名前五名的車站?',
|
814 |
+
'0001站 在2022年9月份轉乘數量是未知. 請依2022年7月份到2022年8月份的趨勢, 請以月份做為時間單位, 做出一個數學模型. 用此數學模型來預測 0001站 在2022年9月份的轉乘數量會多少, 增減如何?',
|
815 |
+
'請以2022年6月~2022年8月0001車站轉乘資料為主,以月份以及轉乘量為變數,做一個迴歸模型,預測2022年9月份的轉乘量增加多少?'
|
816 |
], label="訊息範例",inputs=tmp_msg)
|
817 |
+
"""
|
818 |
+
with gr.Tab("相信人員統計助手"):
|
819 |
+
#model_symbol="text-davinci-003"
|
820 |
+
#model_symbol="Text-Curie"
|
821 |
+
mypath = root_file_path + believe_source_path
|
822 |
+
onlyfiles = os.listdir(mypath)
|
823 |
+
df = pd.concat((pd.read_csv(os.path.join(mypath, filename)) for filename in onlyfiles))
|
824 |
+
|
825 |
+
with gr.Row():
|
826 |
+
gr.Markdown("""
|
827 |
+
### 使用方式
|
828 |
+
資料裡有 `相信` 的active user 資料,
|
829 |
+
右方己經有先算出平均每個問題花費多少, 隨意詢問算法AI 即可算出多少費用.
|
830 |
+
若要改費用, 請在右方prompt 更改數字
|
831 |
+
""")
|
832 |
+
invField = gr.Textbox(visible=False)
|
833 |
+
gr.Examples(onlyfiles, label="資料庫檔案", inputs=invField, examples_per_page=4)
|
834 |
+
with gr.Row():
|
835 |
+
with gr.Column():
|
836 |
+
llm = AzureOpenAI(
|
837 |
+
deployment_name=global_deployment_id,
|
838 |
+
model_name=global_model_name,
|
839 |
+
max_tokens=2000,
|
840 |
+
temperature=0,
|
841 |
+
)
|
842 |
+
be_agent = create_pandas_dataframe_agent(
|
843 |
+
llm,
|
844 |
+
df,
|
845 |
+
max_iterations=30,
|
846 |
+
return_intermediate_steps=False,
|
847 |
+
max_execution_time=60,
|
848 |
+
handle_parsing_errors="Check your output and make sure it conforms!",
|
849 |
+
verbose=True)
|
850 |
+
def tmp_respond(prompt_str, message, chat_history):
|
851 |
+
new_str = prompt_str.format(message=message, chat_history=chat_history)
|
852 |
+
answer = be_agent.run(new_str)
|
853 |
+
chat_history.append((message, answer))
|
854 |
+
"""
|
855 |
+
try:
|
856 |
+
new_str = prompt_str.format(message=message, chat_history=chat_history)
|
857 |
+
answer = be_agent.run(new_str)
|
858 |
+
chat_history.append((message, answer))
|
859 |
+
except Exception as e:
|
860 |
+
response = str(e)
|
861 |
+
print(f"Got error!{response}")
|
862 |
+
if not response.startswith("Could not parse LLM output: `"):
|
863 |
+
raise e
|
864 |
+
answer = response.removeprefix("Could not parse LLM output: `").removesuffix("`")
|
865 |
+
print("answer:"+answer)
|
866 |
+
chat_history.append((message, answer))
|
867 |
+
"""
|
868 |
+
return '', chat_history
|
869 |
+
|
870 |
+
tmp_chatbot = gr.Chatbot(value=[], elem_id="tmp_chatbot").style(height=500)
|
871 |
+
with gr.Row():
|
872 |
+
with gr.Column(scale=5):
|
873 |
+
tmp_msg = gr.Textbox(show_label=False,placeholder="輸入你的問題",)
|
874 |
+
with gr.Column(scale=1):
|
875 |
+
tmp_clear = gr.Button("清除對話")
|
876 |
+
with gr.Column():
|
877 |
+
prompt_textbox = gr.Textbox("""
|
878 |
+
你是一位專業資料科學家,提供給你的是研究列表.
|
879 |
+
有下列定義:
|
880 |
+
|
881 |
+
1.Title是研究報告的標題
|
882 |
+
|
883 |
+
請以中文回答我下面的問題:{message}
|
884 |
+
""", lines=10, label="Prompt:有{chat_history}及{message}, 請至少保留{message}變數",interactive=True, max_lines=10)
|
885 |
+
console = gr.Textbox(lines=11, label="Console", max_lines=11)
|
886 |
+
tmp_msg.submit(tmp_respond, [prompt_textbox, tmp_msg, tmp_chatbot], [tmp_msg, tmp_chatbot],queue=True)
|
887 |
+
tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False)
|
888 |
+
with gr.Row():
|
889 |
+
gr.Examples([
|
890 |
+
'你有什麼欄位?',
|
891 |
+
'資料裡有屬於台灣(TW)的員工有多少位?',
|
892 |
+
'全台灣的員工, 每人每天問五個問題, 1個月花費多少錢?',
|
893 |
+
'如果龍華廠區的員工每人每天問3個問題,台灣員工每人每天問7個問題, 請問這樣一個月多少錢?'
|
894 |
+
], label="訊息範例", inputs=tmp_msg)
|
895 |
+
|
896 |
demo.queue(concurrency_count=10)
|
897 |
lunch_style(demo,console)
|
898 |
|
cache.sqlite3
CHANGED
Binary files a/cache.sqlite3 and b/cache.sqlite3 differ
|
|
data/audios/tempfile.mp3
CHANGED
Binary files a/data/audios/tempfile.mp3 and b/data/audios/tempfile.mp3 differ
|
|
data/ks_source/.gitattributes
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
*.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
data/videos/tempfile.mp4
CHANGED
Binary files a/data/videos/tempfile.mp4 and b/data/videos/tempfile.mp4 differ
|
|
requirements.txt
DELETED
@@ -1,205 +0,0 @@
|
|
1 |
-
aiofiles==23.1.0
|
2 |
-
aiohttp==3.8.4
|
3 |
-
aiosignal==1.3.1
|
4 |
-
altair==5.0.1
|
5 |
-
anyio==3.7.0
|
6 |
-
argilla==1.9.0
|
7 |
-
argon2-cffi==21.3.0
|
8 |
-
argon2-cffi-bindings==21.2.0
|
9 |
-
arrow==1.2.3
|
10 |
-
asttokens==2.2.1
|
11 |
-
async-timeout==4.0.2
|
12 |
-
attrs==23.1.0
|
13 |
-
backcall==0.2.0
|
14 |
-
backoff==2.2.1
|
15 |
-
beautifulsoup4==4.12.2
|
16 |
-
bleach==6.0.0
|
17 |
-
boto3==1.26.152
|
18 |
-
botocore==1.29.152
|
19 |
-
bs4==0.0.1
|
20 |
-
certifi==2023.5.7
|
21 |
-
cffi==1.15.1
|
22 |
-
chardet==5.1.0
|
23 |
-
charset-normalizer==3.1.0
|
24 |
-
chromadb==0.3.26
|
25 |
-
click==8.1.3
|
26 |
-
clickhouse-connect==0.6.2
|
27 |
-
colorama==0.4.6
|
28 |
-
coloredlogs==15.0.1
|
29 |
-
comm==0.1.3
|
30 |
-
commonmark==0.9.1
|
31 |
-
contourpy==1.0.7
|
32 |
-
cryptography==41.0.1
|
33 |
-
cycler==0.11.0
|
34 |
-
dataclasses-json==0.5.7
|
35 |
-
debugpy==1.6.7
|
36 |
-
decorator==5.1.1
|
37 |
-
defusedxml==0.7.1
|
38 |
-
Deprecated==1.2.14
|
39 |
-
distlib==0.3.6
|
40 |
-
duckdb==0.8.0
|
41 |
-
et-xmlfile==1.1.0
|
42 |
-
exceptiongroup==1.1.1
|
43 |
-
executing==1.2.0
|
44 |
-
fastapi==0.96.1
|
45 |
-
fastjsonschema==2.17.1
|
46 |
-
ffmpy==0.3.0
|
47 |
-
filelock==3.12.0
|
48 |
-
flatbuffers==23.5.26
|
49 |
-
fonttools==4.39.4
|
50 |
-
fqdn==1.5.1
|
51 |
-
frozenlist==1.3.3
|
52 |
-
fsspec==2023.6.0
|
53 |
-
gradio==3.34.0
|
54 |
-
gradio_client==0.2.6
|
55 |
-
greenlet==2.0.2
|
56 |
-
h11==0.14.0
|
57 |
-
hnswlib==0.7.0
|
58 |
-
httpcore==0.16.3
|
59 |
-
httptools==0.5.0
|
60 |
-
httpx==0.23.3
|
61 |
-
huggingface-hub==0.15.1
|
62 |
-
humanfriendly==10.0
|
63 |
-
idna==3.4
|
64 |
-
ipykernel==6.23.2
|
65 |
-
ipython==8.14.0
|
66 |
-
ipython-genutils==0.2.0
|
67 |
-
isoduration==20.11.0
|
68 |
-
jedi==0.18.2
|
69 |
-
Jinja2==3.1.2
|
70 |
-
jmespath==1.0.1
|
71 |
-
joblib==1.2.0
|
72 |
-
jsonpointer==2.3
|
73 |
-
jsonschema==4.17.3
|
74 |
-
jupyter-events==0.6.3
|
75 |
-
jupyter_client==8.2.0
|
76 |
-
jupyter_core==5.3.1
|
77 |
-
jupyter_server==2.6.0
|
78 |
-
jupyter_server_terminals==0.4.4
|
79 |
-
jupyterlab-pygments==0.2.2
|
80 |
-
kiwisolver==1.4.4
|
81 |
-
langchain==0.0.200
|
82 |
-
langchainplus-sdk==0.0.10
|
83 |
-
linkify-it-py==2.0.2
|
84 |
-
lxml==4.9.2
|
85 |
-
lz4==4.3.2
|
86 |
-
Markdown==3.4.3
|
87 |
-
markdown-it-py==2.2.0
|
88 |
-
MarkupSafe==2.1.3
|
89 |
-
marshmallow==3.19.0
|
90 |
-
marshmallow-enum==1.5.1
|
91 |
-
matplotlib==3.7.1
|
92 |
-
matplotlib-inline==0.1.6
|
93 |
-
mdit-py-plugins==0.3.3
|
94 |
-
mdurl==0.1.2
|
95 |
-
mistune==2.0.5
|
96 |
-
monotonic==1.6
|
97 |
-
mpmath==1.3.0
|
98 |
-
msg-parser==1.2.0
|
99 |
-
multidict==6.0.4
|
100 |
-
mypy-extensions==1.0.0
|
101 |
-
nbclassic==1.0.0
|
102 |
-
nbclient==0.8.0
|
103 |
-
nbconvert==7.5.0
|
104 |
-
nbformat==5.9.0
|
105 |
-
nest-asyncio==1.5.6
|
106 |
-
nltk==3.8.1
|
107 |
-
notebook_shim==0.2.3
|
108 |
-
numexpr==2.8.4
|
109 |
-
numpy==1.23.5
|
110 |
-
olefile==0.46
|
111 |
-
onnxruntime==1.15.0
|
112 |
-
openai==0.27.8
|
113 |
-
openapi-schema-pydantic==1.2.4
|
114 |
-
openpyxl==3.1.2
|
115 |
-
orjson==3.9.1
|
116 |
-
overrides==7.3.1
|
117 |
-
packaging==23.1
|
118 |
-
pandas==1.5.3
|
119 |
-
pandocfilters==1.5.0
|
120 |
-
parso==0.8.3
|
121 |
-
pdf2image==1.16.3
|
122 |
-
pdfminer.six==20221105
|
123 |
-
pickleshare==0.7.5
|
124 |
-
Pillow==9.5.0
|
125 |
-
pip-search==0.0.12
|
126 |
-
platformdirs==3.5.1
|
127 |
-
posthog==3.0.1
|
128 |
-
prometheus-client==0.17.0
|
129 |
-
prompt-toolkit==3.0.38
|
130 |
-
protobuf==4.23.2
|
131 |
-
psutil==5.9.5
|
132 |
-
pulsar-client==3.2.0
|
133 |
-
pure-eval==0.2.2
|
134 |
-
pycparser==2.21
|
135 |
-
pydantic==1.10.9
|
136 |
-
pydub==0.25.1
|
137 |
-
Pygments==2.15.1
|
138 |
-
pypandoc==1.11
|
139 |
-
pyparsing==3.0.9
|
140 |
-
pyreadline3==3.4.1
|
141 |
-
pyrsistent==0.19.3
|
142 |
-
python-dateutil==2.8.2
|
143 |
-
python-docx==0.8.11
|
144 |
-
python-dotenv==1.0.0
|
145 |
-
python-json-logger==2.0.7
|
146 |
-
python-magic==0.4.27
|
147 |
-
python-multipart==0.0.6
|
148 |
-
python-pptx==0.6.21
|
149 |
-
pytube==15.0.0
|
150 |
-
pytz==2023.3
|
151 |
-
PyYAML==6.0
|
152 |
-
pyzmq==25.1.0
|
153 |
-
regex==2023.6.3
|
154 |
-
requests==2.31.0
|
155 |
-
rfc3339-validator==0.1.4
|
156 |
-
rfc3986==1.5.0
|
157 |
-
rfc3986-validator==0.1.1
|
158 |
-
rich==13.0.1
|
159 |
-
s3transfer==0.6.1
|
160 |
-
scikit-learn==1.2.2
|
161 |
-
scipy==1.10.1
|
162 |
-
semantic-version==2.10.0
|
163 |
-
Send2Trash==1.8.2
|
164 |
-
six==1.16.0
|
165 |
-
sklearn==0.0.post5
|
166 |
-
sniffio==1.3.0
|
167 |
-
soupsieve==2.4.1
|
168 |
-
SQLAlchemy==2.0.16
|
169 |
-
sqlitedict==2.1.0
|
170 |
-
stack-data==0.6.2
|
171 |
-
starlette==0.27.0
|
172 |
-
sympy==1.12
|
173 |
-
tabulate==0.9.0
|
174 |
-
tenacity==8.2.2
|
175 |
-
terminado==0.17.1
|
176 |
-
threadpoolctl==3.1.0
|
177 |
-
tiktoken==0.4.0
|
178 |
-
tinycss2==1.2.1
|
179 |
-
tokenizers==0.13.3
|
180 |
-
toolz==0.12.0
|
181 |
-
tornado==6.3.2
|
182 |
-
tqdm==4.65.0
|
183 |
-
traitlets==5.9.0
|
184 |
-
typer==0.9.0
|
185 |
-
typing-inspect==0.9.0
|
186 |
-
typing_extensions==4.6.3
|
187 |
-
tzdata==2023.3
|
188 |
-
uc-micro-py==1.0.2
|
189 |
-
unstructured==0.7.3
|
190 |
-
uri-template==1.2.0
|
191 |
-
urllib3
|
192 |
-
uvicorn==0.22.0
|
193 |
-
virtualenv==20.23.0
|
194 |
-
watchfiles==0.19.0
|
195 |
-
wcwidth==0.2.6
|
196 |
-
webcolors==1.13
|
197 |
-
webencodings==0.5.1
|
198 |
-
websocket-client==1.5.3
|
199 |
-
websockets==11.0.3
|
200 |
-
wrapt==1.14.1
|
201 |
-
xlrd==2.0.1
|
202 |
-
XlsxWriter==3.1.2
|
203 |
-
yarl==1.9.2
|
204 |
-
youtube-transcript-api==0.6.0
|
205 |
-
zstandard==0.21.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|