Spaces:

sidcww
/

20240823B

Sleeping

20240823B

File size: 2,527 Bytes

2c46a3b
 
 
 
 
 
 
 
 
 
 
 
7fbd5f0
 
2c46a3b

import gradio as gr
import os
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_community.document_loaders import PyPDFLoader

# Set your API key
GOOGLE_API_KEY = "YOUR_GOOGLE_API_KEY"


def process_pdf_and_question(pdf_file, question):
    # Load the models with the API key
    llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY)
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
    
    # Save the uploaded PDF temporarily
    temp_pdf_path = "temp_handbook.pdf"
    with open(temp_pdf_path, "wb") as f:
        f.write(pdf_file)
    
    # Load the PDF and create chunks
    loader = PyPDFLoader(temp_pdf_path)
    text_splitter = CharacterTextSplitter(
        separator=".",
        chunk_size=500,
        chunk_overlap=50,
        length_function=len,
        is_separator_regex=False,
    )
    pages = loader.load_and_split(text_splitter)
    
    # Turn the chunks into embeddings and store them in Chroma
    vectordb = Chroma.from_documents(pages, embeddings)
    
    # Configure Chroma as a retriever with top_k=10
    retriever = vectordb.as_retriever(search_kwargs={"k": 10})
    
    # Create the retrieval chain
    template = """You are a helpful AI assistant. Answer based on the context provided.
    context: {context}
    input: {input}
    answer:"""
    prompt = PromptTemplate.from_template(template)
    combine_docs_chain = create_stuff_documents_chain(llm, prompt)
    retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)
    
    # Invoke the retrieval chain
    response = retrieval_chain.invoke({"input": question})
    
    # Clean up the temporary PDF file
    os.remove(temp_pdf_path)
    
    return response["answer"]

# Define Gradio interface
iface = gr.Interface(
    fn=process_pdf_and_question,
    inputs=[
        gr.File(label="上傳PDF手冊"),
        gr.Textbox(label="輸入您的問題")
    ],
    outputs=gr.Textbox(label="回答"),
    title="PDF問答系統",
    description="上傳PDF手冊並提出問題，AI將根據手冊內容回答您的問題。"
)

# Launch the interface
iface.launch()