import os
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain_community.llms import HuggingFacePipeline
from langchain.chains.question_answering import load_qa_chain
from langchain_core.prompts import PromptTemplate
from transformers import pipeline

# 載入 Mistral 模型
model_path = "nvidia/Mistral-NeMo-Minitron-8B-instruct"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
dtype = torch.bfloat16
print(f"使用設備: {device}")

# 初始化 tokenizer
mistral_tokenizer = AutoTokenizer.from_pretrained(model_path)

# 初始化模型
mistral_model = AutoModelForCausalLM.from_pretrained(
    model_path, 
    torch_dtype=dtype, 
    device_map=device,
    low_cpu_mem_usage=True
)

# 創建 pipeline
text_generation_pipeline = pipeline(
    "text-generation",
    model=mistral_model,
    tokenizer=mistral_tokenizer,
    max_length=512,
    temperature=0.3,
    top_p=0.95,
    device_map=device
)

# 為 pipeline 創建 LangChain 包裝器
llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

def initialize(file_path, question):
    try:
        prompt_template = """根據提供的上下文盡可能準確地回答問題。如果上下文中沒有包含答案，請說「上下文中沒有提供答案」\n\n
                          上下文: \n {context}?\n
                          問題: \n {question} \n
                          回答:
                        """
        prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
        
        if os.path.exists(file_path):
            pdf_loader = PyPDFLoader(file_path)
            pages = pdf_loader.load_and_split()
            
            # 限制上下文以避免超出令牌限制
            max_pages = 5  # 根據模型容量和文檔長度調整
            context = "\n".join(str(page.page_content) for page in pages[:max_pages])
            
            try:
                # 使用 Mistral 創建問答鏈
                stuff_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)
                
                # 使用有限的頁面獲取答案
                stuff_answer = stuff_chain(
                    {"input_documents": pages[:max_pages], "question": question, "context": context}, 
                    return_only_outputs=True
                )
                
                main_answer = stuff_answer['output_text']
                
                # 生成後續問題
                follow_up_prompt = f"根據這個回答: {main_answer}\n生成一個相關的後續問題:"
                follow_up_inputs = mistral_tokenizer.encode(follow_up_prompt, return_tensors='pt').to(device)
                
                with torch.no_grad():
                    follow_up_outputs = mistral_model.generate(
                        follow_up_inputs, 
                        max_length=256,
                        temperature=0.7,
                        top_p=0.9,
                        do_sample=True
                    )
                
                follow_up = mistral_tokenizer.decode(follow_up_outputs[0], skip_special_tokens=True)
                
                # 提取問題
                if "後續問題:" in follow_up.lower():
                    follow_up = follow_up.split("後續問題:", 1)[1].strip()
                
                combined_output = f"回答: {main_answer}\n\n可能的後續問題: {follow_up}"
                return combined_output
                
            except Exception as e:
                if "exceeds the maximum token count" in str(e):
                    return "錯誤: 文檔太大無法處理。請嘗試使用較小的文檔。"
                else:
                    raise e
        else:
            return "錯誤: 無法處理文檔。請確保PDF文件存在且有效。"
    except Exception as e:
        return f"發生錯誤: {str(e)}"

# 定義 Gradio 界面
def pdf_qa(file, question):
    if file is None:
        return "請先上傳PDF文件。"
    return initialize(file.name, question)

# 創建 Gradio 界面
demo = gr.Interface(
    fn=pdf_qa,
    inputs=[
        gr.File(label="上傳PDF文件", file_types=[".pdf"]),
        gr.Textbox(label="詢問文檔內容", placeholder="這個文檔主要講了什麼？")
    ],
    outputs=gr.Textbox(label="Mistral 回答"),
    title="基於Mistral的PDF問答系統",
    description="上傳PDF文件並提出問題，Mistral模型將分析內容並提供回答和可能的後續問題。"
)

if __name__ == "__main__":
    demo.launch()