youngtsai commited on
Commit
c567be4
1 Parent(s): e8ac9fc

add yt, web

Browse files
Files changed (2) hide show
  1. app.py +38 -2
  2. requirements.txt +4 -1
app.py CHANGED
@@ -1,5 +1,8 @@
1
  import gradio as gr
2
  import pandas as pd
 
 
 
3
  import os
4
  from openai import OpenAI
5
  import json
@@ -11,8 +14,14 @@ def process_file(file):
11
  # 读取文件
12
  if file.name.endswith('.csv'):
13
  df = pd.read_csv(file)
14
- else:
 
15
  df = pd.read_excel(file)
 
 
 
 
 
16
 
17
  df_string = df.to_string()
18
  # 宜蘭:移除@XX@符号 to |
@@ -29,6 +38,26 @@ def process_file(file):
29
  df_summarise, \
30
  df_string
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def generate_df_summarise(df_string):
33
  # 使用 OpenAI 生成基于上传数据的问题
34
  sys_content = "你是一個資料分析師,服務對象為老師,請精讀資料,使用 zh-TW"
@@ -131,7 +160,9 @@ def respond(user_message, df_string_output, chat_history):
131
  with gr.Blocks() as demo:
132
  with gr.Row():
133
  with gr.Column():
134
- file_upload = gr.File(label="Upload your file")
 
 
135
  chatbot = gr.Chatbot()
136
  msg = gr.Textbox(label="Message")
137
  send_button = gr.Button("Send")
@@ -163,6 +194,11 @@ with gr.Blocks() as demo:
163
  # file_upload.change(process_file, inputs=file_upload, outputs=df_string_output)
164
  file_upload.change(process_file, inputs=file_upload, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
165
 
 
 
 
 
 
166
 
167
 
168
 
 
1
  import gradio as gr
2
  import pandas as pd
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ from docx import Document
6
  import os
7
  from openai import OpenAI
8
  import json
 
14
  # 读取文件
15
  if file.name.endswith('.csv'):
16
  df = pd.read_csv(file)
17
+ text = df_to_text(df)
18
+ elif file.name.endswith('.xlsx'):
19
  df = pd.read_excel(file)
20
+ text = df_to_text(df)
21
+ elif file.name.endswith('.docx'):
22
+ text = docx_to_text(file)
23
+ else:
24
+ raise ValueError("Unsupported file type")
25
 
26
  df_string = df.to_string()
27
  # 宜蘭:移除@XX@符号 to |
 
38
  df_summarise, \
39
  df_string
40
 
41
+ def df_to_text(df):
42
+ # 将 DataFrame 转换为纯文本
43
+ return df.to_string()
44
+
45
+ def docx_to_text(file):
46
+ # 将 Word 文档转换为纯文本
47
+ doc = Document(file)
48
+ return "\n".join([para.text for para in doc.paragraphs])
49
+
50
+ def process_youtube_link(link):
51
+ # 从 YouTube 链接中提取视频 ID
52
+ return link.split("=")[-1]
53
+
54
+ def process_web_link(link):
55
+ # 抓取和解析网页内容
56
+ response = requests.get(link)
57
+ soup = BeautifulSoup(response.content, 'html.parser')
58
+ return soup.get_text()
59
+
60
+
61
  def generate_df_summarise(df_string):
62
  # 使用 OpenAI 生成基于上传数据的问题
63
  sys_content = "你是一個資料分析師,服務對象為老師,請精讀資料,使用 zh-TW"
 
160
  with gr.Blocks() as demo:
161
  with gr.Row():
162
  with gr.Column():
163
+ file_upload = gr.File(label="Upload your CSV or Word file")
164
+ youtube_link = gr.Textbox(label="Enter YouTube Link")
165
+ web_link = gr.Textbox(label="Enter Web Page Link")
166
  chatbot = gr.Chatbot()
167
  msg = gr.Textbox(label="Message")
168
  send_button = gr.Button("Send")
 
194
  # file_upload.change(process_file, inputs=file_upload, outputs=df_string_output)
195
  file_upload.change(process_file, inputs=file_upload, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
196
 
197
+ # 当输入 YouTube 链接时触发
198
+ youtube_link.change(process_youtube_link, inputs=youtube_link, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
199
+
200
+ # 当输入网页链接时触发
201
+ web_link.change(process_web_link, inputs=web_link, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
202
 
203
 
204
 
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
  gradio
2
  pandas
3
- openai >= 1.0.0
 
 
 
 
1
  gradio
2
  pandas
3
+ openai>=1.0.0
4
+ requests
5
+ beautifulsoup4
6
+ python-docx