import gradio as gr import pandas as pd import os from openai import OpenAI import json OPEN_AI_KEY = os.getenv("OPEN_AI_KEY") client = OpenAI(api_key=OPEN_AI_KEY) def process_file(file): # 读取文件 if file.name.endswith('.csv'): df = pd.read_csv(file) else: df = pd.read_excel(file) df_string = df.to_string() # 根据上传的文件内容生成问题 questions = generate_questions(df_string) df_summarise = generate_df_summarise(df_string) # 返回按钮文本和 DataFrame 字符串 return questions[0] if len(questions) > 0 else "", \ questions[1] if len(questions) > 1 else "", \ questions[2] if len(questions) > 2 else "", \ df_summarise, \ df_string def generate_df_summarise(df_string): # 使用 OpenAI 生成基于上传数据的问题 sys_content = "你是一個資料分析師,服務對象為老師,請精讀資料,使用 zh-TW" user_content = f"請根據 {df_string},大概描述這張表的欄位敘述、資料樣態與資料分析,告訴老師這張表的意義,以及可能的結論與對應方式" messages = [ {"role": "system", "content": sys_content}, {"role": "user", "content": user_content} ] print("=====messages=====") print(messages) print("=====messages=====") request_payload = { "model": "gpt-4-1106-preview", "messages": messages, "max_tokens": 4000, } response = client.chat.completions.create(**request_payload) df_summarise = response.choices[0].message.content.strip() print("=====df_summarise=====") print(df_summarise) print("=====df_summarise=====") return df_summarise def generate_questions(df_string): # 使用 OpenAI 生成基于上传数据的问题 sys_content = "你是一個資料分析師,user為老師,請精讀資料,並用既有資料為本質猜測用戶可能會問的問題,使用 zh-TW" user_content = f"請根據 {df_string} 生成三個問題,並用 JSON 格式返回 questions:[q1, q2, q3]" messages = [ {"role": "system", "content": sys_content}, {"role": "user", "content": user_content} ] response_format = { "type": "json_object" } print("=====messages=====") print(messages) print("=====messages=====") request_payload = { "model": "gpt-4-1106-preview", "messages": messages, "max_tokens": 4000, "response_format": response_format } response = client.chat.completions.create(**request_payload) questions = json.loads(response.choices[0].message.content)["questions"] print("=====json_response=====") print(questions) print("=====json_response=====") return questions def send_question(question, df_string_output, chat_history): # 当问题按钮被点击时调用此函数 return respond(question, df_string_output, chat_history) def respond(user_message, df_string_output, chat_history): print("=== 變數:user_message ===") print(user_message) print("=== 變數:chat_history ===") print(chat_history) sys_content = f"你是一個資料分析師,請用 {df_string_output} 為資料進行對話,使用 zh-TW" messages = [ {"role": "system", "content": sys_content}, {"role": "user", "content": user_message} ] print("=====messages=====") print(messages) print("=====messages=====") request_payload = { "model": "gpt-4-1106-preview", "messages": messages, "max_tokens": 4000 # 設定一個較大的值,可根據需要調整 } response = client.chat.completions.create(**request_payload) print(response) response_text = response.choices[0].message.content.strip() # 更新聊天历史 new_chat_history = (user_message, response_text) if chat_history is None: chat_history = [new_chat_history] else: chat_history.append(new_chat_history) # 返回聊天历史和空字符串清空输入框 return "", chat_history with gr.Blocks() as demo: with gr.Row(): with gr.Column(): file_upload = gr.File(label="Upload your file") chatbot = gr.Chatbot() msg = gr.Textbox(label="Message") send_button = gr.Button("Send") with gr.Column(): with gr.Tab("資料本文"): df_string_output = gr.Textbox() with gr.Tab("資料摘要"): gr.Markdown("## 這是一張什麼表?") df_summarise = gr.Textbox(container=True, show_copy_button=True, label="資料本文", lines=40) with gr.Tab("常用問題"): gr.Markdown("## 常用問題") btn_1 = gr.Button() btn_2 = gr.Button() btn_3 = gr.Button() send_button.click( respond, inputs=[msg, df_string_output, chatbot], outputs=[msg, chatbot] ) # 连接按钮点击事件 btn_1.click(respond, inputs=[btn_1, df_string_output, chatbot], outputs=[msg, chatbot]) btn_2.click(respond, inputs=[btn_2, df_string_output, chatbot], outputs=[msg, chatbot]) btn_3.click(respond, inputs=[btn_3, df_string_output, chatbot], outputs=[msg, chatbot]) # file_upload.change(process_file, inputs=file_upload, outputs=df_string_output) file_upload.change(process_file, inputs=file_upload, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output]) demo.launch()