CHUNYU0505 commited on
Commit
299f87b
·
verified ·
1 Parent(s): d0ba755

更新套件

Browse files
Files changed (1) hide show
  1. app.py +12 -10
app.py CHANGED
@@ -6,11 +6,10 @@ import os, glob, requests
6
  from langchain.docstore.document import Document
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain.chains import RetrievalQA
9
- from langchain_huggingface import HuggingFaceEmbeddings
10
  from docx import Document as DocxDocument
11
  import gradio as gr
12
  from langchain_community.vectorstores import FAISS
13
- from langchain_community.llms import HuggingFaceHub
14
 
15
  # -------------------------------
16
  # 2. 環境變數與資料路徑
@@ -21,7 +20,9 @@ os.makedirs(DB_PATH, exist_ok=True)
21
 
22
  HF_TOKEN = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
23
  if not HF_TOKEN:
24
- raise ValueError("請在 Hugging Face Space 的 Settings → Repository secrets 設定 HUGGINGFACEHUB_API_TOKEN")
 
 
25
 
26
  # -------------------------------
27
  # 3. 建立或載入向量資料庫
@@ -38,7 +39,9 @@ else:
38
  docs = []
39
  for filepath in txt_files:
40
  with open(filepath, "r", encoding="utf-8") as f:
41
- docs.append(Document(page_content=f.read(), metadata={"source": os.path.basename(filepath)}))
 
 
42
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
43
  split_docs = splitter.split_documents(docs)
44
  db = FAISS.from_documents(split_docs, embeddings_model)
@@ -47,12 +50,13 @@ else:
47
  retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
48
 
49
  # -------------------------------
50
- # 4. LLM 設定(Hugging Face Hub
51
  # -------------------------------
52
- llm = HuggingFaceHub(
53
  repo_id="google/flan-t5-large",
 
 
54
  model_kwargs={"temperature": 0.7, "max_new_tokens": 512},
55
- huggingfacehub_api_token=HF_TOKEN
56
  )
57
 
58
  qa_chain = RetrievalQA.from_chain_type(
@@ -102,7 +106,6 @@ def generate_article_with_rate(query, segments=5):
102
  doc.save(docx_file)
103
  full_text = "\n\n".join(all_text)
104
 
105
- # 取得 API 剩餘次數
106
  rate_info = get_hf_rate_limit()
107
  return f"{rate_info}\n\n{full_text}", docx_file
108
 
@@ -120,9 +123,8 @@ iface = gr.Interface(
120
  gr.File(label="下載 DOCX")
121
  ],
122
  title="佛教經論 RAG 系統 (HF API)",
123
- description="使用 Hugging Face Hub LLM + FAISS RAG,生成文章並提示 API 剩餘額度。"
124
  )
125
 
126
  if __name__ == "__main__":
127
  iface.launch()
128
-
 
6
  from langchain.docstore.document import Document
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain.chains import RetrievalQA
9
+ from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
10
  from docx import Document as DocxDocument
11
  import gradio as gr
12
  from langchain_community.vectorstores import FAISS
 
13
 
14
  # -------------------------------
15
  # 2. 環境變數與資料路徑
 
20
 
21
  HF_TOKEN = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
22
  if not HF_TOKEN:
23
+ raise ValueError(
24
+ "請在 Hugging Face Space 的 Settings → Repository secrets 設定 HUGGINGFACEHUB_API_TOKEN"
25
+ )
26
 
27
  # -------------------------------
28
  # 3. 建立或載入向量資料庫
 
39
  docs = []
40
  for filepath in txt_files:
41
  with open(filepath, "r", encoding="utf-8") as f:
42
+ docs.append(
43
+ Document(page_content=f.read(), metadata={"source": os.path.basename(filepath)})
44
+ )
45
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
46
  split_docs = splitter.split_documents(docs)
47
  db = FAISS.from_documents(split_docs, embeddings_model)
 
50
  retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
51
 
52
  # -------------------------------
53
+ # 4. LLM 設定(Hugging Face Endpoint
54
  # -------------------------------
55
+ llm = HuggingFaceEndpoint(
56
  repo_id="google/flan-t5-large",
57
+ task="text2text-generation", # 明確指定 task
58
+ huggingfacehub_api_token=HF_TOKEN,
59
  model_kwargs={"temperature": 0.7, "max_new_tokens": 512},
 
60
  )
61
 
62
  qa_chain = RetrievalQA.from_chain_type(
 
106
  doc.save(docx_file)
107
  full_text = "\n\n".join(all_text)
108
 
 
109
  rate_info = get_hf_rate_limit()
110
  return f"{rate_info}\n\n{full_text}", docx_file
111
 
 
123
  gr.File(label="下載 DOCX")
124
  ],
125
  title="佛教經論 RAG 系統 (HF API)",
126
+ description="使用 Hugging Face Endpoint LLM + FAISS RAG,生成文章並提示 API 剩餘額度。"
127
  )
128
 
129
  if __name__ == "__main__":
130
  iface.launch()