Spaces:

Rooobert
/

Nvidia_RAG_pdf_R

Sleeping

File size: 4,661 Bytes

1ffdd41
 
 
 
 
adcdd13
 
 
 
1ffdd41
adcdd13
666813f
1ffdd41
 
adcdd13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ffdd41
 
 
adcdd13
 
 
 
 
1ffdd41
 
 
 
 
 
adcdd13
 
 
1ffdd41
adcdd13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ffdd41
adcdd13
1ffdd41
adcdd13
1ffdd41
adcdd13
1ffdd41
 
adcdd13
1ffdd41
 
adcdd13
 
1ffdd41
adcdd13

import os
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain_community.llms import HuggingFacePipeline
from langchain.chains.question_answering import load_qa_chain
from langchain_core.prompts import PromptTemplate
from transformers import pipeline

# 載入 Mistral 模型
model_path = "nvidia/Mistral-NeMo-Minitron-8B-instruct"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
dtype = torch.bfloat16
print(f"使用設備: {device}")

# 初始化 tokenizer
mistral_tokenizer = AutoTokenizer.from_pretrained(model_path)

# 初始化模型
mistral_model = AutoModelForCausalLM.from_pretrained(
    model_path, 
    torch_dtype=dtype, 
    device_map=device,
    low_cpu_mem_usage=True
)

# 創建 pipeline
text_generation_pipeline = pipeline(
    "text-generation",
    model=mistral_model,
    tokenizer=mistral_tokenizer,
    max_length=512,
    temperature=0.3,
    top_p=0.95,
    device_map=device
)

# 為 pipeline 創建 LangChain 包裝器
llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

def initialize(file_path, question):
    try:
        prompt_template = """根據提供的上下文盡可能準確地回答問題。如果上下文中沒有包含答案，請說「上下文中沒有提供答案」\n\n
                          上下文: \n {context}?\n
                          問題: \n {question} \n
                          回答:
                        """
        prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
        
        if os.path.exists(file_path):
            pdf_loader = PyPDFLoader(file_path)
            pages = pdf_loader.load_and_split()
            
            # 限制上下文以避免超出令牌限制
            max_pages = 5  # 根據模型容量和文檔長度調整
            context = "\n".join(str(page.page_content) for page in pages[:max_pages])
            
            try:
                # 使用 Mistral 創建問答鏈
                stuff_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)
                
                # 使用有限的頁面獲取答案
                stuff_answer = stuff_chain(
                    {"input_documents": pages[:max_pages], "question": question, "context": context}, 
                    return_only_outputs=True
                )
                
                main_answer = stuff_answer['output_text']
                
                # 生成後續問題
                follow_up_prompt = f"根據這個回答: {main_answer}\n生成一個相關的後續問題:"
                follow_up_inputs = mistral_tokenizer.encode(follow_up_prompt, return_tensors='pt').to(device)
                
                with torch.no_grad():
                    follow_up_outputs = mistral_model.generate(
                        follow_up_inputs, 
                        max_length=256,
                        temperature=0.7,
                        top_p=0.9,
                        do_sample=True
                    )
                
                follow_up = mistral_tokenizer.decode(follow_up_outputs[0], skip_special_tokens=True)
                
                # 提取問題
                if "後續問題:" in follow_up.lower():
                    follow_up = follow_up.split("後續問題:", 1)[1].strip()
                
                combined_output = f"回答: {main_answer}\n\n可能的後續問題: {follow_up}"
                return combined_output
                
            except Exception as e:
                if "exceeds the maximum token count" in str(e):
                    return "錯誤: 文檔太大無法處理。請嘗試使用較小的文檔。"
                else:
                    raise e
        else:
            return "錯誤: 無法處理文檔。請確保PDF文件存在且有效。"
    except Exception as e:
        return f"發生錯誤: {str(e)}"

# 定義 Gradio 界面
def pdf_qa(file, question):
    if file is None:
        return "請先上傳PDF文件。"
    return initialize(file.name, question)

# 創建 Gradio 界面
demo = gr.Interface(
    fn=pdf_qa,
    inputs=[
        gr.File(label="上傳PDF文件", file_types=[".pdf"]),
        gr.Textbox(label="詢問文檔內容", placeholder="這個文檔主要講了什麼？")
    ],
    outputs=gr.Textbox(label="Mistral 回答"),
    title="基於Mistral的PDF問答系統",
    description="上傳PDF文件並提出問題，Mistral模型將分析內容並提供回答和可能的後續問題。"
)

if __name__ == "__main__":
    demo.launch()