import asyncio
asyncio.set_event_loop(asyncio.new_event_loop())
from llama_index.core import (
VectorStoreIndex,
ServiceContext,
SimpleDirectoryReader,
load_index_from_storage,
)
from llama_index.core.storage import StorageContext
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.prompts import PromptTemplate
from llama_index.core.response_synthesizers import TreeSummarize
from llama_index.core.query_pipeline import InputComponent
from llama_index.core.indices.knowledge_graph import KGTableRetriever
from llama_index.legacy.vector_stores.faiss import FaissVectorStore
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings
import openai
import os
from github import Github
from datetime import datetime
import gradio as gr
# OpenAI:
openai.api_key = os.environ.get('openai_key')
os.environ["OPENAI_API_KEY"] = os.environ.get('openai_key')
# Github:
exec(os.environ.get('logs_context'))
# Context:
exec(os.environ.get('context'))
project_name = "DEV PharmaWise Data Integrity 4.5"
def draw_graph():
global kg_data
G = nx.DiGraph()
for source, relation, target in kg_data:
G.add_edge(source, target, label=relation)
# Utilizar spring_layout para mejorar la disposición de los nodos
pos = nx.spring_layout(G)
plt.figure(figsize=(12, 8))
# Ajustar el tamaño de los nodos
nx.draw(G, pos, with_labels=True, node_color='skyblue', node_size=400, edge_color='k', linewidths=1, font_size=8, font_weight='bold')
# Ajustar el tamaño de las flechas y el espaciado entre ellas
edge_labels = {}
for source, target, data in G.edges(data=True):
if 'label' in data:
edge_labels[(source, target)] = data['label']
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=7, font_weight='normal')
plt.title("Graph")
plt.axis('off')
buf = BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
plt.close()
return Image.open(buf)
def extraer_informacion_metadata(respuesta, max_results=10):
# Obtener source_nodes de la respuesta
source_nodes = respuesta.source_nodes
# Obtener page_labels, file_names y scores de source_nodes
page_file_info = [
f"Página {node.node.metadata.get('page_label', '')} del archivo {node.node.metadata.get('file_name', '')} (Relevance: {node.score:.2f} - Id: {node.node.id_})"
for node in source_nodes
]
# Limitar la cantidad de resultados
page_file_info = page_file_info[:max_results]
return page_file_info
# Función para hacer commit
def commit_to_github(message, response):
if github_token:
g = Github(github_token)
repo = g.get_repo(repo_name)
# Obtiene la fecha actual
current_date = datetime.now().strftime("%Y-%m")
# Nombre del archivo: nombre_del_proyecto/año-mes-nombre_del_proyecto.txt
file_name = f"{project_name}/{current_date}-{project_name}.txt"
# Descripción del commit
commit_message = f"Actualización de {current_date}"
# Contenido. Fecha, pregunta, respuesta
content = f"({datetime.now().strftime('%d/%m/%Y %H:%M')})\nPregunta: {message}\nRespuesta: {response}\n----------\n"
try:
# Busca existencia de un .txt para actualizarlo con la nueva info
existing_file = repo.get_contents(file_name)
existing_content = existing_file.decoded_content.decode('utf-8')
new_content = f"{existing_content}{content}"
repo.update_file(file_name, commit_message, new_content, existing_file.sha, branch="main")
except:
# Si el archivo no existe, lo crea. Ej: cuando empieza un nuevo mes se crea un nuevo .txt
repo.create_file(file_name, commit_message, content, branch="main")
##### Logs start ##########
import pandas as pd
from datasets import load_dataset, Dataset, DatasetDict
from huggingface_hub import login, HfApi, file_exists, hf_hub_download
# HuggingFace Token:
HF_TOKEN = os.environ.get('hf')
project_name = "gmpcolombia_chat"
def save_to_dataset(user_message, response_text):
global project_name
current_month = datetime.now().strftime('%Y-%m')
filename = f"logs_{current_month}.csv"
repo_id = "PharmaWise-Urufarma/Logs"
if file_exists(repo_id=repo_id, filename=f"{project_name}/{filename}", repo_type="dataset", token=HF_TOKEN):
local_filepath = hf_hub_download(
repo_id=repo_id,
filename=f"{project_name}/{filename}",
repo_type="dataset",
token=HF_TOKEN
)
df = pd.read_csv(local_filepath)
else:
df = pd.DataFrame(columns=["timestamp", "user_message", "response_text", "flag"])
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
new_data = pd.DataFrame([{
"timestamp": timestamp,
"user_message": user_message,
"response_text": response_text,
"flag": ""
}])
df = pd.concat([df, new_data], ignore_index=True)
df.to_csv(filename, index=False)
api = HfApi()
api.upload_file(
path_or_fileobj=filename,
path_in_repo=f"{project_name}/{filename}",
repo_id=repo_id,
token=HF_TOKEN,
repo_type="dataset"
)
def normalize_text(text):
return text.strip().lower()
def print_like_dislike(x: gr.LikeData):
print(f"Value: {x.value}")
print(f"Liked: {x.liked}")
if x is not None:
text_value = x.value if isinstance(x.value, str) else x.value.get('value', '')
current_month = datetime.now().strftime('%Y-%m')
filename = f"logs_{current_month}.csv"
repo_id = "PharmaWise-Urufarma/Logs"
global project_name
if file_exists(repo_id=repo_id, filename=f"{project_name}/{filename}", repo_type="dataset", token=HF_TOKEN):
local_filepath = hf_hub_download(
repo_id=repo_id,
filename=f"{project_name}/{filename}",
repo_type="dataset",
token=HF_TOKEN
)
df = pd.read_csv(local_filepath)
normalized_value = normalize_text(text_value)
df['normalized_response_text'] = df['response_text'].apply(normalize_text)
response_indices = df.index[df['normalized_response_text'].str.contains(normalized_value, na=False)].tolist()
print(f"Response Indices: {response_indices}")
if response_indices:
response_index = response_indices[-1]
print(f"Updating index: {response_index} with value: {x.liked}")
df['flag'] = df['flag'].astype(object)
df.at[response_index, 'flag'] = str(x.liked)
df = df.drop(columns=['normalized_response_text'])
df.to_csv(filename, index=False)
api = HfApi()
api.upload_file(
path_or_fileobj=filename,
path_in_repo=f"{project_name}/{filename}",
repo_id=repo_id,
token=HF_TOKEN,
repo_type="dataset"
)
else:
print("No matching response found to update.")
else:
print(f"File {filename} does not exist in the repository.")
else:
print("x is None.")
# save_to_dataset(message, chat_history[-1][1])
# chatbot.like(print_like_dislike, None, None)
##### Logs end ##########
# Define the CSS with media queries
custom_css = """
#btns {
display: flex;
justify-content: center;
}
#btn_up, #btn_down {
background: none;
border: 3px solid rgba(147, 197, 253, 0.3);
border-radius: 3px;
position: relative; /* Necesario para que ::before use la posición relativa al botón */
padding-left: 24px; /* Espacio para el SVG */
}
#btn_up::before {
content: '';
display: inline-block;
width: 16px; /* Ancho del SVG */
height: 16px; /* Alto del SVG */
background: url("data:image/svg+xml; utf8, ");
background-size: contain;
background-repeat: no-repeat;
position: absolute;
left: 5px;
top: 50%;
transform: translateY(-50%);
}
#btn_down::before {
content: '';
display: inline-block;
width: 16px; /* Ancho del SVG */
height: 16px; /* Alto del SVG */
background: url("data:image/svg+xml; utf8, ");
background-size: contain;
background-repeat: no-repeat;
position: absolute;
left: 5px;
top: 50%;
transform: translateY(-50%);
}
"""
with gr.Blocks(theme='sudeepshouche/minimalist', css=custom_css) as demo:
def visible():
return {btn_graph: gr.Button(value="Grafo", visible=True)}
def enable_btns():
return {btn_like: gr.Button(visible=True, elem_id="btn_up"),
btn_dislike: gr.Button(visible=True, elem_id="btn_down")}
def disable_btns():
return {btn_like: gr.Button(visible=False),
btn_dislike: gr.Button(visible=False)}
def responder(pregunta):
# Query engine
respuesta = query_engine.query(pregunta)
partial_message = ""
for text in respuesta.response_gen:
partial_message += text
yield partial_message, None, None
# Extraer información de source_nodes
metadata_info = extraer_informacion_metadata(respuesta, max_results=10)
# Presentar la información de source_nodes en forma de lista con bullets
if metadata_info:
metadata_list = "\n".join(["- " + info for info in metadata_info])
partial_message += "\n\n" + metadata_list
yield partial_message
gr.Markdown("""
# PharmaWise Data Integrity 4.5
Realiza preguntas a tus datos y obtén al final del texto las paginas y documentos utilizados generar tu responder.
""")
with gr.Row():
with gr.Column():
pregunta = gr.Text(label="Pregunta")
with gr.Row():
btn_send = gr.Button(value="Preguntar", variant="primary")
clear = gr.Button(value="Limpiar")
btn_graph = gr.Button(value="Grafo", visible=False)
gr.Examples(label="Ejemplos", examples=["Explicar el concepto ALCOA"], inputs=[pregunta])
with gr.Column():
response = gr.Textbox(label="Respuesta")
with gr.Row(elem_id="btns"):
btn_dislike = gr.Button(value="", visible=False)
btn_like = gr.Button(value="", visible=False)
with gr.Row():
grafo = gr.Image(label="Grafo", show_share_button=False)
btn_send.click(responder, inputs=[pregunta], outputs=[response])
btn_graph.click(draw_graph, outputs=[grafo])
clear.click(lambda: None, None, pregunta, queue=False)
response.change(visible, [], [btn_graph])
response.change(enable_btns, [], [btn_like, btn_dislike])
demo.queue()
demo.launch()