Marathon23
commited on
Commit
•
dd40b1e
1
Parent(s):
3ea2e54
Update app.py
Browse files
app.py
CHANGED
@@ -9,27 +9,20 @@ from langchain_community.document_loaders import PyMuPDFLoader, PyPDFLoader
|
|
9 |
from langchain.vectorstores import Chroma
|
10 |
from langchain_community.embeddings import OpenAIEmbeddings
|
11 |
from langchain_community.chat_models import ChatOpenAI
|
12 |
-
import shutil
|
13 |
import logging
|
14 |
|
15 |
-
#
|
16 |
logging.basicConfig(level=logging.INFO)
|
17 |
logger = logging.getLogger(__name__)
|
18 |
|
19 |
-
#
|
20 |
-
api_key = os.getenv("OPENAI_API_KEY")
|
21 |
-
if not api_key:
|
22 |
-
raise ValueError("未能獲取 OPENAI_API_KEY。請在 Hugging Face Spaces 的 Secrets 中設置它。")
|
23 |
-
openai.api_key = api_key
|
24 |
-
logger.info("OpenAI API 密鑰已設置。")
|
25 |
-
|
26 |
-
# 確保向量資料庫目錄存在且有寫入權限
|
27 |
VECTORDB_DIR = os.path.abspath("./data")
|
28 |
os.makedirs(VECTORDB_DIR, exist_ok=True)
|
29 |
-
os.chmod(VECTORDB_DIR, 0o755)
|
30 |
logger.info(f"VECTORDB_DIR set to: {VECTORDB_DIR}")
|
31 |
|
32 |
-
#
|
33 |
def test_pdf_loader(file_path, loader_type='PyMuPDFLoader'):
|
34 |
logger.info(f"Testing PDF loader ({loader_type}) with file: {file_path}")
|
35 |
try:
|
@@ -49,8 +42,10 @@ def test_pdf_loader(file_path, loader_type='PyMuPDFLoader'):
|
|
49 |
except Exception as e:
|
50 |
logger.error(f"Error loading {file_path} with {loader_type}: {e}")
|
51 |
|
52 |
-
#
|
53 |
-
def load_and_process_documents(file_paths, loader_type='PyMuPDFLoader'):
|
|
|
|
|
54 |
documents = []
|
55 |
logger.info("開始載入上傳的 PDF 文件。")
|
56 |
|
@@ -70,7 +65,6 @@ def load_and_process_documents(file_paths, loader_type='PyMuPDFLoader'):
|
|
70 |
loaded_docs = loader.load()
|
71 |
if loaded_docs:
|
72 |
logger.info(f"載入 {file_path} 成功,包含 {len(loaded_docs)} 個文檔。")
|
73 |
-
# 打印第一個文檔的部分內容以確認
|
74 |
logger.info(f"第一個文檔內容: {loaded_docs[0].page_content[:500]}")
|
75 |
documents.extend(loaded_docs)
|
76 |
else:
|
@@ -93,7 +87,7 @@ def load_and_process_documents(file_paths, loader_type='PyMuPDFLoader'):
|
|
93 |
|
94 |
# 初始化向量資料庫
|
95 |
try:
|
96 |
-
embeddings = OpenAIEmbeddings(openai_api_key=api_key)
|
97 |
logger.info("初始化 OpenAIEmbeddings 成功。")
|
98 |
except Exception as e:
|
99 |
raise ValueError(f"初始化 OpenAIEmbeddings 時出現錯誤: {e}")
|
@@ -110,8 +104,8 @@ def load_and_process_documents(file_paths, loader_type='PyMuPDFLoader'):
|
|
110 |
|
111 |
return vectordb
|
112 |
|
113 |
-
#
|
114 |
-
def handle_query(user_message, chat_history, vectordb):
|
115 |
try:
|
116 |
if not user_message:
|
117 |
return chat_history
|
@@ -144,11 +138,32 @@ def handle_query(user_message, chat_history, vectordb):
|
|
144 |
logger.error(f"Error in handle_query: {e}")
|
145 |
return chat_history + [("系統", f"出現錯誤: {str(e)}")]
|
146 |
|
147 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
def process_files(files, state):
|
149 |
logger.info("process_files called")
|
150 |
if files:
|
151 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
logger.info(f"Received {len(files)} files")
|
153 |
saved_file_paths = []
|
154 |
for idx, file_data in enumerate(files):
|
@@ -182,7 +197,7 @@ def process_files(files, state):
|
|
182 |
# 列出文件大小
|
183 |
file_sizes = {file: os.path.getsize(os.path.join(VECTORDB_DIR, file)) for file in saved_files}
|
184 |
logger.info(f"File sizes in VECTORDB_DIR: {file_sizes}")
|
185 |
-
vectordb = load_and_process_documents(saved_file_paths, loader_type='PyMuPDFLoader')
|
186 |
state['vectordb'] = vectordb
|
187 |
return "PDF 文件已成功上傳並處理。您現在可以開始提問。", state
|
188 |
except Exception as e:
|
@@ -191,13 +206,17 @@ def process_files(files, state):
|
|
191 |
else:
|
192 |
return "請上傳至少一個 PDF 文件。", state
|
193 |
|
|
|
194 |
def chat_interface(user_message, chat_history, state):
|
195 |
vectordb = state.get('vectordb', None)
|
|
|
196 |
if not vectordb:
|
197 |
return chat_history, state, "請先上傳 PDF 文件以進行處理。"
|
|
|
|
|
198 |
|
199 |
# 處理查詢
|
200 |
-
updated_history = handle_query(user_message, chat_history, vectordb)
|
201 |
return updated_history, state, ""
|
202 |
|
203 |
# 設計 Gradio 介面
|
@@ -205,7 +224,19 @@ with gr.Blocks() as demo:
|
|
205 |
gr.Markdown("<h1 style='text-align: center;'>MBTI 與經典調酒 AI 助理</h1>")
|
206 |
|
207 |
# 定義共享的 state
|
208 |
-
state = gr.State({"vectordb": None})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
with gr.Tab("上傳 PDF 文件"):
|
211 |
with gr.Row():
|
@@ -243,6 +274,13 @@ with gr.Blocks() as demo:
|
|
243 |
outputs=[chatbot, state, txt]
|
244 |
)
|
245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
# 綁定上傳按鈕
|
247 |
upload_btn.click(
|
248 |
process_files,
|
|
|
9 |
from langchain.vectorstores import Chroma
|
10 |
from langchain_community.embeddings import OpenAIEmbeddings
|
11 |
from langchain_community.chat_models import ChatOpenAI
|
12 |
+
import shutil
|
13 |
import logging
|
14 |
|
15 |
+
# 設置日誌
|
16 |
logging.basicConfig(level=logging.INFO)
|
17 |
logger = logging.getLogger(__name__)
|
18 |
|
19 |
+
# 向量資料庫目錄
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
VECTORDB_DIR = os.path.abspath("./data")
|
21 |
os.makedirs(VECTORDB_DIR, exist_ok=True)
|
22 |
+
os.chmod(VECTORDB_DIR, 0o755)
|
23 |
logger.info(f"VECTORDB_DIR set to: {VECTORDB_DIR}")
|
24 |
|
25 |
+
# 測試 PDF 加載器
|
26 |
def test_pdf_loader(file_path, loader_type='PyMuPDFLoader'):
|
27 |
logger.info(f"Testing PDF loader ({loader_type}) with file: {file_path}")
|
28 |
try:
|
|
|
42 |
except Exception as e:
|
43 |
logger.error(f"Error loading {file_path} with {loader_type}: {e}")
|
44 |
|
45 |
+
# 載入和處理 PDF 文件
|
46 |
+
def load_and_process_documents(file_paths, loader_type='PyMuPDFLoader', api_key=None):
|
47 |
+
if not api_key:
|
48 |
+
raise ValueError("未提供 OpenAI API 密鑰。")
|
49 |
documents = []
|
50 |
logger.info("開始載入上傳的 PDF 文件。")
|
51 |
|
|
|
65 |
loaded_docs = loader.load()
|
66 |
if loaded_docs:
|
67 |
logger.info(f"載入 {file_path} 成功,包含 {len(loaded_docs)} 個文檔。")
|
|
|
68 |
logger.info(f"第一個文檔內容: {loaded_docs[0].page_content[:500]}")
|
69 |
documents.extend(loaded_docs)
|
70 |
else:
|
|
|
87 |
|
88 |
# 初始化向量資料庫
|
89 |
try:
|
90 |
+
embeddings = OpenAIEmbeddings(openai_api_key=api_key)
|
91 |
logger.info("初始化 OpenAIEmbeddings 成功。")
|
92 |
except Exception as e:
|
93 |
raise ValueError(f"初始化 OpenAIEmbeddings 時出現錯誤: {e}")
|
|
|
104 |
|
105 |
return vectordb
|
106 |
|
107 |
+
# 聊天處理函數
|
108 |
+
def handle_query(user_message, chat_history, vectordb, api_key):
|
109 |
try:
|
110 |
if not user_message:
|
111 |
return chat_history
|
|
|
138 |
logger.error(f"Error in handle_query: {e}")
|
139 |
return chat_history + [("系統", f"出現錯誤: {str(e)}")]
|
140 |
|
141 |
+
# 保存 API 密鑰的函數
|
142 |
+
def save_api_key(api_key, state):
|
143 |
+
if not api_key.startswith("sk-"):
|
144 |
+
return "請輸入有效的 OpenAI API 密鑰。", state
|
145 |
+
# 嘗試驗證 API 密鑰
|
146 |
+
try:
|
147 |
+
openai.api_key = api_key
|
148 |
+
openai.Engine.list() # 簡單的 API 請求來驗證密鑰
|
149 |
+
except Exception as e:
|
150 |
+
logger.error(f"Invalid OpenAI API key: {e}")
|
151 |
+
return "無效的 OpenAI API 密鑰。請重新輸入。", state
|
152 |
+
state['api_key'] = api_key
|
153 |
+
logger.info("使用者已保存自己的 OpenAI API 密鑰。")
|
154 |
+
return "API 密鑰已成功保存。您現在可以上傳 PDF 文件並開始提問。", state
|
155 |
+
|
156 |
+
# 處理文件上傳的函數
|
157 |
def process_files(files, state):
|
158 |
logger.info("process_files called")
|
159 |
if files:
|
160 |
try:
|
161 |
+
# 檢查是否已保存 API 密鑰
|
162 |
+
api_key = state.get('api_key', None)
|
163 |
+
if not api_key:
|
164 |
+
logger.error("使用者未提供 OpenAI API 密鑰。")
|
165 |
+
return "請先在「設定 API 密鑰」標籤中輸入並保存您的 OpenAI API 密鑰。", state
|
166 |
+
|
167 |
logger.info(f"Received {len(files)} files")
|
168 |
saved_file_paths = []
|
169 |
for idx, file_data in enumerate(files):
|
|
|
197 |
# 列出文件大小
|
198 |
file_sizes = {file: os.path.getsize(os.path.join(VECTORDB_DIR, file)) for file in saved_files}
|
199 |
logger.info(f"File sizes in VECTORDB_DIR: {file_sizes}")
|
200 |
+
vectordb = load_and_process_documents(saved_file_paths, loader_type='PyMuPDFLoader', api_key=api_key)
|
201 |
state['vectordb'] = vectordb
|
202 |
return "PDF 文件已成功上傳並處理。您現在可以開始提問。", state
|
203 |
except Exception as e:
|
|
|
206 |
else:
|
207 |
return "請上傳至少一個 PDF 文件。", state
|
208 |
|
209 |
+
# 聊天介面處理函數
|
210 |
def chat_interface(user_message, chat_history, state):
|
211 |
vectordb = state.get('vectordb', None)
|
212 |
+
api_key = state.get('api_key', None)
|
213 |
if not vectordb:
|
214 |
return chat_history, state, "請先上傳 PDF 文件以進行處理。"
|
215 |
+
if not api_key:
|
216 |
+
return chat_history, state, "請先在「設定 API 密鑰」標籤中輸入並保存您的 OpenAI API 密鑰。"
|
217 |
|
218 |
# 處理查詢
|
219 |
+
updated_history = handle_query(user_message, chat_history, vectordb, api_key)
|
220 |
return updated_history, state, ""
|
221 |
|
222 |
# 設計 Gradio 介面
|
|
|
224 |
gr.Markdown("<h1 style='text-align: center;'>MBTI 與經典調酒 AI 助理</h1>")
|
225 |
|
226 |
# 定義共享的 state
|
227 |
+
state = gr.State({"vectordb": None, "api_key": None})
|
228 |
+
|
229 |
+
with gr.Tab("設定 API 密鑰"):
|
230 |
+
with gr.Row():
|
231 |
+
with gr.Column(scale=1):
|
232 |
+
api_key_input = gr.Textbox(
|
233 |
+
label="輸入您的 OpenAI API 密鑰",
|
234 |
+
placeholder="sk-...",
|
235 |
+
type="password",
|
236 |
+
interactive=True
|
237 |
+
)
|
238 |
+
save_api_key_btn = gr.Button("保存 API 密鑰")
|
239 |
+
api_key_status = gr.Textbox(label="狀態", interactive=False)
|
240 |
|
241 |
with gr.Tab("上傳 PDF 文件"):
|
242 |
with gr.Row():
|
|
|
274 |
outputs=[chatbot, state, txt]
|
275 |
)
|
276 |
|
277 |
+
# 綁定保存 API 密鑰按鈕
|
278 |
+
save_api_key_btn.click(
|
279 |
+
save_api_key,
|
280 |
+
inputs=[api_key_input, state],
|
281 |
+
outputs=[api_key_status, state]
|
282 |
+
)
|
283 |
+
|
284 |
# 綁定上傳按鈕
|
285 |
upload_btn.click(
|
286 |
process_files,
|