luansouza4444 commited on
Commit
79466e6
·
verified ·
1 Parent(s): fdd6dc7

Upload 4 files

Browse files
Files changed (5) hide show
  1. .envor.txt +1 -0
  2. .gitattributes +1 -0
  3. LegisMiner.pdf +3 -0
  4. app.py.py +196 -0
  5. requirements.txt +7 -0
.envor.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ROUTER_API_KEY=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
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ LegisMiner.pdf filter=lfs diff=lfs merge=lfs -text
LegisMiner.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6702876fa520e29805546201280dfba74e4b6cf7e86c8d6dc03c3f485e7293ec
3
+ size 51675358
app.py.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """app
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1PhcQoTZvxdPQe6E1HMx_Nl4Zs_tY7J_y
8
+ """
9
+
10
+
11
+
12
+ import gradio as gr
13
+ import os
14
+ from dotenv import load_dotenv
15
+ from langchain_community.chat_models import ChatOpenAI
16
+ from langchain_community.document_loaders import PyPDFLoader
17
+ from langchain_community.embeddings import HuggingFaceEmbeddings
18
+ from langchain_community.vectorstores import FAISS
19
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
20
+ from langchain.chains import RetrievalQA
21
+ import tempfile
22
+ import numpy as np
23
+ from sklearn.metrics.pairwise import cosine_similarity
24
+
25
+ # ✅ Carrega variáveis de ambiente
26
+ load_dotenv()
27
+ OPENROUTER_API_KEY = os.getenv("ROUTER_API_KEY")
28
+
29
+ if not OPENROUTER_API_KEY:
30
+ raise ValueError("❌ A variável de ambiente ROUTER_API_KEY não está definida. Verifique o arquivo .env.")
31
+
32
+ # Inicialização
33
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
34
+ qa_chain = None
35
+ processed_file = None
36
+
37
+ # 🔁 Carrega automaticamente o legisMiner.pdf ao iniciar
38
+ def load_default_pdf():
39
+ global qa_chain, processed_file
40
+ try:
41
+ loader = PyPDFLoader("legisMiner.pdf")
42
+ documents = loader.load()
43
+
44
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
45
+ docs = text_splitter.split_documents(documents)
46
+
47
+ vectorstore = FAISS.from_documents(docs, embeddings)
48
+
49
+ llm = ChatOpenAI(
50
+ openai_api_key=OPENROUTER_API_KEY,
51
+ openai_api_base="https://openrouter.ai/api/v1",
52
+ model="deepseek/deepseek-r1-0528:free",
53
+ temperature=0.7
54
+ )
55
+
56
+ qa_chain = RetrievalQA.from_chain_type(
57
+ llm=llm,
58
+ retriever=vectorstore.as_retriever(),
59
+ return_source_documents=True
60
+ )
61
+
62
+ processed_file = "LegisMiner.pdf"
63
+ print("✅ LegisMiner.pdf carregado automaticamente.")
64
+ except Exception as e:
65
+ print(f"❌ Erro ao carregar LegisMiner.pdf automaticamente: {e}")
66
+
67
+ def calculate_rag_metrics(query, response, source_docs):
68
+ metrics = {}
69
+ try:
70
+ query_embedding = embeddings.embed_query(query)
71
+ response_embedding = embeddings.embed_query(response)
72
+ metrics["query_response_similarity"] = cosine_similarity(
73
+ [query_embedding], [response_embedding]
74
+ )[0][0]
75
+
76
+ doc_similarities = []
77
+ for doc in source_docs:
78
+ doc_embedding = embeddings.embed_query(doc.page_content[:1000])
79
+ similarity = cosine_similarity([response_embedding], [doc_embedding])[0][0]
80
+ doc_similarities.append(similarity)
81
+
82
+ metrics["avg_response_source_similarity"] = np.mean(doc_similarities) if doc_similarities else 0
83
+ metrics["max_response_source_similarity"] = max(doc_similarities) if doc_similarities else 0
84
+ metrics["num_source_documents"] = len(source_docs)
85
+
86
+ except Exception as e:
87
+ metrics["error"] = str(e)
88
+
89
+ return metrics
90
+
91
+ def process_pdf(file):
92
+ global qa_chain, processed_file
93
+
94
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
95
+ tmp.write(file)
96
+ pdf_path = tmp.name
97
+
98
+ try:
99
+ loader = PyPDFLoader(pdf_path)
100
+ documents = loader.load()
101
+
102
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
103
+ docs = text_splitter.split_documents(documents)
104
+
105
+ vectorstore = FAISS.from_documents(docs, embeddings)
106
+
107
+ llm = ChatOpenAI(
108
+ openai_api_key=OPENROUTER_API_KEY,
109
+ openai_api_base="https://openrouter.ai/api/v1",
110
+ model="deepseek/deepseek-r1-0528-qwen3-8b:free",
111
+ temperature=0.7
112
+ )
113
+
114
+ qa_chain = RetrievalQA.from_chain_type(
115
+ llm=llm,
116
+ retriever=vectorstore.as_retriever(),
117
+ return_source_documents=True
118
+ )
119
+
120
+ processed_file = os.path.basename(pdf_path)
121
+ return f"✅ PDF processado com sucesso: {processed_file}"
122
+
123
+ except Exception as e:
124
+ return f"❌ Erro ao processar PDF: {str(e)}"
125
+
126
+ def ask_question(question):
127
+ global qa_chain
128
+
129
+ if qa_chain is None:
130
+ return "⚠️ Por favor, carregue um PDF primeiro", "", {}
131
+
132
+ try:
133
+ resposta = qa_chain.invoke({"query": question})
134
+
135
+ sources = "\n\n".join(
136
+ [f"📄 Fonte {i+1}:\n{doc.page_content[:500]}..."
137
+ for i, doc in enumerate(resposta['source_documents'])]
138
+ )
139
+
140
+ metrics = calculate_rag_metrics(
141
+ question,
142
+ resposta['result'],
143
+ resposta['source_documents']
144
+ )
145
+
146
+ metrics_text = "\n".join(
147
+ [f"{k.replace('_', ' ').title()}: {v:.2f}" if isinstance(v, float) else f"{k.replace('_', ' ').title()}: {v}"
148
+ for k, v in metrics.items() if k != "error"]
149
+ )
150
+
151
+ return resposta['result'], sources, metrics_text
152
+
153
+ except Exception as e:
154
+ return f"❌ Erro ao processar pergunta: {str(e)}", "", {}
155
+
156
+ # Interface Gradio
157
+ with gr.Blocks(title="Chat com PDF usando OpenRouter", theme=gr.themes.Soft()) as demo:
158
+ gr.Markdown("# 🧠 Artificial Intelligence Applied to Regulatory Standard Processing in Mining\n### 💡 Development of a Decision Support Tool")
159
+
160
+ with gr.Row():
161
+ with gr.Column(scale=1):
162
+ file_input = gr.File(label="📤 Envie um PDF", type="binary")
163
+ process_btn = gr.Button("Processar PDF", variant="primary")
164
+ status_output = gr.Textbox(label="Status")
165
+
166
+ with gr.Column(scale=2):
167
+ question_input = gr.Textbox(label="Faça uma pergunta sobre Normas da Mineração", lines=3)
168
+ ask_btn = gr.Button("Enviar Pergunta", variant="primary")
169
+ answer_output = gr.Textbox(label="✅ Resposta", interactive=False)
170
+
171
+ with gr.Accordion("📄 Fontes usadas", open=False):
172
+ sources_output = gr.Textbox(label="Trechos relevantes", lines=10)
173
+
174
+ with gr.Accordion("📊 Métricas RAG", open=False):
175
+ metrics_output = gr.Textbox(label="Métricas", lines=4)
176
+
177
+
178
+
179
+ process_btn.click(
180
+ fn=process_pdf,
181
+ inputs=file_input,
182
+ outputs=status_output
183
+ )
184
+
185
+ ask_btn.click(
186
+ fn=ask_question,
187
+ inputs=question_input,
188
+ outputs=[answer_output, sources_output, metrics_output]
189
+ )
190
+
191
+ # 🔁 Carrega o PDF fixo ao iniciar
192
+ load_default_pdf()
193
+
194
+ # Compartilhamento opcional no Colab ou Hugging Face
195
+ share = True if 'COLAB_JUPYTER_TRANSPORT' in os.environ else False
196
+ demo.launch(share=share, debug=False)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ python-dotenv
3
+ langchain
4
+ faiss-cpu
5
+ sentence-transformers
6
+ scikit-learn
7
+ openai