|
|
|
|
|
import os |
|
import openai |
|
from llama_index import SimpleDirectoryReader, ServiceContext, VectorStoreIndex |
|
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, GPTVectorStoreIndex, set_global_service_context |
|
from llama_index.callbacks import CallbackManager, TokenCountingHandler |
|
import tiktoken |
|
|
|
from llama_index.vector_stores import ChromaVectorStore |
|
from llama_index.storage.storage_context import StorageContext |
|
from IPython.display import Markdown, display |
|
import chromadb |
|
from chromadb.utils import embedding_functions |
|
from langchain.chat_models import ChatOpenAI |
|
from llama_index.tools import QueryEngineTool, ToolMetadata |
|
from langchain.chat_models import ChatAnthropic, ChatOpenAI |
|
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor, SentenceTransformerRerank |
|
from llama_index.postprocessor import RankGPTRerank |
|
|
|
|
|
openai.api_key = os.environ.get('openai_key') |
|
os.environ["OPENAI_API_KEY"] = os.environ.get('openai_key') |
|
|
|
|
|
|
|
|
|
|
|
|
|
db2 = chromadb.PersistentClient(path="./db_sentence") |
|
chroma_collection = db2.get_or_create_collection("coleccion-pharmawise") |
|
llm = ChatOpenAI(model="gpt-4-1106-preview", temperature=0.1) |
|
vector_store = ChromaVectorStore(chroma_collection=chroma_collection) |
|
storage_context = StorageContext.from_defaults(vector_store=vector_store) |
|
service_context = ServiceContext.from_defaults(llm=llm) |
|
vector_index = VectorStoreIndex.from_vector_store(vector_store, service_context=service_context) |
|
|
|
|
|
""" |
|
rerank = SentenceTransformerRerank( |
|
top_n=5, model="BAAI/bge-reranker-base" |
|
) |
|
""" |
|
|
|
from llama_index.postprocessor import SentenceTransformerRerank |
|
|
|
rerank = SentenceTransformerRerank( |
|
model="cross-encoder/ms-marco-MiniLM-L-2-v2", top_n=5 |
|
) |
|
|
|
|
|
query_engine = vector_index.as_query_engine(similarity_top_k=20, streaming=True, node_postprocessors=[rerank]) |
|
|
|
prompt = """Responder en español como un experto en industria farmacéutica, buenas prácticas de manufactura, y regulaciones de DIGEMID, dando una respuesta detallada y basada solamente en los datos disponibles en el contexto. |
|
La pregunta a responder es la siguiente: """ |
|
|
|
|
|
|
|
|
|
|
|
|
|
import gradio as gr |
|
from gradio import components |
|
import textwrap |
|
|
|
def extraer_informacion_metadata(respuesta, max_results=10): |
|
|
|
source_nodes = respuesta.source_nodes |
|
|
|
|
|
page_file_info = [ |
|
f"Página {node.node.metadata.get('page_label', '')} del archivo {node.node.metadata.get('file_name', '')} (Relevance: {node.score:.2f} - Id: {node.node.id_})" |
|
for node in source_nodes |
|
] |
|
|
|
|
|
page_file_info = page_file_info[:max_results] |
|
|
|
return page_file_info |
|
|
|
|
|
def responder(pregunta): |
|
|
|
respuesta = query_engine.query(prompt + pregunta) |
|
|
|
|
|
partial_message = "" |
|
for chunk in respuesta.response_gen: |
|
partial_message += chunk |
|
yield partial_message |
|
|
|
|
|
metadata_info = extraer_informacion_metadata(respuesta, max_results=10) |
|
print(respuesta.source_nodes) |
|
|
|
|
|
if metadata_info: |
|
metadata_list = "\n".join(["- " + info for info in metadata_info]) |
|
partial_message += "\n\n" + metadata_list |
|
yield partial_message |
|
|
|
|
|
|
|
|
|
|
|
|
|
iface = gr.Interface(fn=responder, |
|
inputs=gr.Textbox(lines=2, placeholder='Escribe tu pregunta aquí...', label="Preguntas"), |
|
outputs=gr.Textbox(label="Su Respuesta"), |
|
theme='sudeepshouche/minimalist', |
|
title='PharmaWise GMP Perú (g4t rr5 k20 t01)', |
|
|
|
|
|
description='Realiza preguntas a tus datos y obtén al final del texto las paginas y documentos utilizados generar tu responder.', |
|
) |
|
|
|
|
|
iface.launch(debug=False, inline=False) |