File size: 5,353 Bytes
3ec9224
6b7ae1b
ccd769b
6b7ae1b
 
 
 
ccd769b
 
f08873e
ccd769b
6b7ae1b
 
 
ccd769b
7e34d60
ccd769b
 
6b7ae1b
 
2073925
7e34d60
2073925
 
 
 
 
 
 
 
 
 
6b7ae1b
2073925
6b7ae1b
 
2073925
6b7ae1b
 
 
 
 
 
7e34d60
 
 
2073925
6b7ae1b
2073925
6b7ae1b
 
2073925
6b7ae1b
 
2073925
 
6b7ae1b
 
2073925
6b7ae1b
2073925
ccd769b
7e34d60
2073925
 
 
7e34d60
 
 
 
 
 
 
 
2073925
7e34d60
2073925
 
 
 
6b7ae1b
 
 
 
2073925
 
 
 
 
6b7ae1b
2073925
 
6b7ae1b
 
2073925
5be8df6
2073925
099bb87
 
71bcd22
2073925
71bcd22
2073925
 
71bcd22
2073925
 
 
 
71bcd22
2073925
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71bcd22
 
7e34d60
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import gradio as gr
import os

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain_community.embeddings import HuggingFaceEmbeddings 
from langchain_community.llms import HuggingFacePipeline
from langchain.memory import ConversationBufferMemory

from pathlib import Path
import chromadb
from unidecode import unidecode

from transformers import AutoTokenizer, pipeline
import transformers
import torch
import re

# Lista de modelos 100% abertos e gratuitos
list_llm = [
    "google/flan-t5-xxl",          # Modelo para tarefas text-to-text
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",  # Modelo leve para diálogo
    "microsoft/phi-2",             # Modelo para raciocínio lógico
    "facebook/opt-1.3b",           # Modelo de geração de texto
    "EleutherAI/gpt-neo-1.3B",     # Versão open-source do GPT-3
    "bigscience/bloom-1b7",        # Modelo multilíngue
    "RWKV/rwkv-4-169m-pile",       # Modelo eficiente em RAM
    "gpt2-medium",                 # Clássico modelo de GPT-2
    "databricks/dolly-v2-3b",      # Modelo para instruções
    "mosaicml/mpt-7b-instruct"     # Modelo para instruções
]

list_llm_simple = [os.path.basename(llm) for llm in list_llm]

# Função para carregar documentos PDF
def load_doc(list_file_path, chunk_size, chunk_overlap):
    loaders = [PyPDFLoader(x) for x in list_file_path]
    pages = []
    for loader in loaders:
        pages.extend(loader.load())
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, 
        chunk_overlap=chunk_overlap
    )
    return text_splitter.split_documents(pages)

# Função para criar banco de dados vetorial
def create_db(splits, collection_name):
    embedding = HuggingFaceEmbeddings()
    return Chroma.from_documents(
        documents=splits,
        embedding=embedding,
        client=chromadb.EphemeralClient(),
        collection_name=collection_name
    )

# Função para inicializar o modelo LLM
def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
    progress(0.1, desc="Carregando tokenizer...")
    
    tokenizer = AutoTokenizer.from_pretrained(llm_model)
    
    progress(0.4, desc="Inicializando pipeline...")
    pipeline_obj = pipeline(
        "text-generation",
        model=llm_model,
        tokenizer=tokenizer,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        max_new_tokens=max_tokens,
        do_sample=True,
        top_k=top_k,
        temperature=temperature
    )
    
    llm = HuggingFacePipeline(pipeline=pipeline_obj)
    
    progress(0.7, desc="Configurando memória...")
    memory = ConversationBufferMemory(
        memory_key="chat_history",
        return_messages=True
    )
    
    progress(0.8, desc="Criando cadeia...")
    return ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vector_db.as_retriever(),
        memory=memory,
        chain_type="stuff",
        return_source_documents=True
    )

# Interface Gradio
def demo():
    with gr.Blocks(theme=gr.themes.Soft()) as demo:
        vector_db = gr.State()
        qa_chain = gr.State()
        
        gr.Markdown("## 🤖 Chatbot para PDFs com Modelos Gratuitos")
        
        with gr.Tab("📤 Upload PDF"):
            pdf_input = gr.Files(label="Selecione seus PDFs", file_types=[".pdf"])
            
        with gr.Tab("⚙️ Processamento"):
            chunk_size = gr.Slider(100, 1000, value=500, label="Tamanho dos Chunks")
            chunk_overlap = gr.Slider(0, 200, value=50, label="Sobreposição")
            process_btn = gr.Button("Processar PDFs")
            
        with gr.Tab("🧠 Modelo"):
            model_selector = gr.Dropdown(list_llm_simple, label="Selecione o Modelo", value=list_llm_simple[0])
            temperature = gr.Slider(0, 1, value=0.7, label="Criatividade")
            load_model_btn = gr.Button("Carregar Modelo")
            
        with gr.Tab("💬 Chat"):
            chatbot = gr.Chatbot(height=400)
            msg = gr.Textbox(label="Sua mensagem")
            clear_btn = gr.ClearButton([msg, chatbot])
            
        # Eventos
        process_btn.click(
            lambda files, cs, co: create_db(load_doc([f.name for f in files], cs, co), "docs"),
            inputs=[pdf_input, chunk_size, chunk_overlap],
            outputs=vector_db
        )
        
        load_model_btn.click(
            lambda model, temp: initialize_llmchain(list_llm[list_llm_simple.index(model)], temp, 512, 3, vector_db.value),
            inputs=[model_selector, temperature],
            outputs=qa_chain
        )
        
        def respond(message, chat_history):
            result = qa_chain.value({"question": message, "chat_history": chat_history})
            response = result["answer"]
            sources = "\n".join([f"📄 Página {doc.metadata['page']+1}: {doc.page_content[:50]}..." 
                               for doc in result["source_documents"][:2]])
            return f"{response}\n\n🔍 Fontes:\n{sources}"
        
        msg.submit(respond, [msg, chatbot], chatbot)
        
    demo.launch()

if __name__ == "__main__":
    demo()