File size: 2,527 Bytes
2c46a3b 7fbd5f0 2c46a3b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_community.document_loaders import PyPDFLoader
# Set your API key
GOOGLE_API_KEY = "YOUR_GOOGLE_API_KEY"
def process_pdf_and_question(pdf_file, question):
# Load the models with the API key
llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY)
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
# Save the uploaded PDF temporarily
temp_pdf_path = "temp_handbook.pdf"
with open(temp_pdf_path, "wb") as f:
f.write(pdf_file)
# Load the PDF and create chunks
loader = PyPDFLoader(temp_pdf_path)
text_splitter = CharacterTextSplitter(
separator=".",
chunk_size=500,
chunk_overlap=50,
length_function=len,
is_separator_regex=False,
)
pages = loader.load_and_split(text_splitter)
# Turn the chunks into embeddings and store them in Chroma
vectordb = Chroma.from_documents(pages, embeddings)
# Configure Chroma as a retriever with top_k=10
retriever = vectordb.as_retriever(search_kwargs={"k": 10})
# Create the retrieval chain
template = """You are a helpful AI assistant. Answer based on the context provided.
context: {context}
input: {input}
answer:"""
prompt = PromptTemplate.from_template(template)
combine_docs_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)
# Invoke the retrieval chain
response = retrieval_chain.invoke({"input": question})
# Clean up the temporary PDF file
os.remove(temp_pdf_path)
return response["answer"]
# Define Gradio interface
iface = gr.Interface(
fn=process_pdf_and_question,
inputs=[
gr.File(label="上傳PDF手冊"),
gr.Textbox(label="輸入您的問題")
],
outputs=gr.Textbox(label="回答"),
title="PDF問答系統",
description="上傳PDF手冊並提出問題,AI將根據手冊內容回答您的問題。"
)
# Launch the interface
iface.launch() |