import asyncio asyncio.set_event_loop(asyncio.new_event_loop()) from llama_index.core import ( VectorStoreIndex, ServiceContext, SimpleDirectoryReader, load_index_from_storage, ) from llama_index.core.storage import StorageContext from llama_index.core.node_parser import SentenceSplitter from llama_index.core.prompts import PromptTemplate from llama_index.core.response_synthesizers import TreeSummarize from llama_index.core.query_pipeline import InputComponent from llama_index.core.indices.knowledge_graph import KGTableRetriever from llama_index.legacy.vector_stores.faiss import FaissVectorStore from llama_index.llms.openai import OpenAI from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.core import Settings import openai import os from github import Github from datetime import datetime import gradio as gr # OpenAI: openai.api_key = os.environ.get('openai_key') os.environ["OPENAI_API_KEY"] = os.environ.get('openai_key') # Github: exec(os.environ.get('logs_context')) # Context: exec(os.environ.get('context')) project_name = "DEV PharmaWise Data Integrity 4.5" def draw_graph(): global kg_data G = nx.DiGraph() for source, relation, target in kg_data: G.add_edge(source, target, label=relation) # Utilizar spring_layout para mejorar la disposición de los nodos pos = nx.spring_layout(G) plt.figure(figsize=(12, 8)) # Ajustar el tamaño de los nodos nx.draw(G, pos, with_labels=True, node_color='skyblue', node_size=400, edge_color='k', linewidths=1, font_size=8, font_weight='bold') # Ajustar el tamaño de las flechas y el espaciado entre ellas edge_labels = {} for source, target, data in G.edges(data=True): if 'label' in data: edge_labels[(source, target)] = data['label'] nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=7, font_weight='normal') plt.title("Graph") plt.axis('off') buf = BytesIO() plt.savefig(buf, format='png') buf.seek(0) plt.close() return Image.open(buf) def extraer_informacion_metadata(respuesta, max_results=10): # Obtener source_nodes de la respuesta source_nodes = respuesta.source_nodes # Obtener page_labels, file_names y scores de source_nodes page_file_info = [ f"Página {node.node.metadata.get('page_label', '')} del archivo {node.node.metadata.get('file_name', '')} (Relevance: {node.score:.2f} - Id: {node.node.id_})" for node in source_nodes ] # Limitar la cantidad de resultados page_file_info = page_file_info[:max_results] return page_file_info # Función para hacer commit def commit_to_github(message, response): if github_token: g = Github(github_token) repo = g.get_repo(repo_name) # Obtiene la fecha actual current_date = datetime.now().strftime("%Y-%m") # Nombre del archivo: nombre_del_proyecto/año-mes-nombre_del_proyecto.txt file_name = f"{project_name}/{current_date}-{project_name}.txt" # Descripción del commit commit_message = f"Actualización de {current_date}" # Contenido. Fecha, pregunta, respuesta content = f"({datetime.now().strftime('%d/%m/%Y %H:%M')})\nPregunta: {message}\nRespuesta: {response}\n----------\n" try: # Busca existencia de un .txt para actualizarlo con la nueva info existing_file = repo.get_contents(file_name) existing_content = existing_file.decoded_content.decode('utf-8') new_content = f"{existing_content}{content}" repo.update_file(file_name, commit_message, new_content, existing_file.sha, branch="main") except: # Si el archivo no existe, lo crea. Ej: cuando empieza un nuevo mes se crea un nuevo .txt repo.create_file(file_name, commit_message, content, branch="main") ##### Logs start ########## import pandas as pd from datasets import load_dataset, Dataset, DatasetDict from huggingface_hub import login, HfApi, file_exists, hf_hub_download # HuggingFace Token: HF_TOKEN = os.environ.get('hf') project_name = "gmpcolombia_chat" def save_to_dataset(user_message, response_text): global project_name current_month = datetime.now().strftime('%Y-%m') filename = f"logs_{current_month}.csv" repo_id = "PharmaWise-Urufarma/Logs" if file_exists(repo_id=repo_id, filename=f"{project_name}/{filename}", repo_type="dataset", token=HF_TOKEN): local_filepath = hf_hub_download( repo_id=repo_id, filename=f"{project_name}/{filename}", repo_type="dataset", token=HF_TOKEN ) df = pd.read_csv(local_filepath) else: df = pd.DataFrame(columns=["timestamp", "user_message", "response_text", "flag"]) timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') new_data = pd.DataFrame([{ "timestamp": timestamp, "user_message": user_message, "response_text": response_text, "flag": "" }]) df = pd.concat([df, new_data], ignore_index=True) df.to_csv(filename, index=False) api = HfApi() api.upload_file( path_or_fileobj=filename, path_in_repo=f"{project_name}/{filename}", repo_id=repo_id, token=HF_TOKEN, repo_type="dataset" ) def normalize_text(text): return text.strip().lower() def print_like_dislike(x: gr.LikeData): print(f"Value: {x.value}") print(f"Liked: {x.liked}") if x is not None: text_value = x.value if isinstance(x.value, str) else x.value.get('value', '') current_month = datetime.now().strftime('%Y-%m') filename = f"logs_{current_month}.csv" repo_id = "PharmaWise-Urufarma/Logs" global project_name if file_exists(repo_id=repo_id, filename=f"{project_name}/{filename}", repo_type="dataset", token=HF_TOKEN): local_filepath = hf_hub_download( repo_id=repo_id, filename=f"{project_name}/{filename}", repo_type="dataset", token=HF_TOKEN ) df = pd.read_csv(local_filepath) normalized_value = normalize_text(text_value) df['normalized_response_text'] = df['response_text'].apply(normalize_text) response_indices = df.index[df['normalized_response_text'].str.contains(normalized_value, na=False)].tolist() print(f"Response Indices: {response_indices}") if response_indices: response_index = response_indices[-1] print(f"Updating index: {response_index} with value: {x.liked}") df['flag'] = df['flag'].astype(object) df.at[response_index, 'flag'] = str(x.liked) df = df.drop(columns=['normalized_response_text']) df.to_csv(filename, index=False) api = HfApi() api.upload_file( path_or_fileobj=filename, path_in_repo=f"{project_name}/{filename}", repo_id=repo_id, token=HF_TOKEN, repo_type="dataset" ) else: print("No matching response found to update.") else: print(f"File {filename} does not exist in the repository.") else: print("x is None.") # save_to_dataset(message, chat_history[-1][1]) # chatbot.like(print_like_dislike, None, None) ##### Logs end ########## # Define the CSS with media queries custom_css = """ #btns { display: flex; justify-content: center; } #btn_up, #btn_down { background: none; border: 3px solid rgba(147, 197, 253, 0.3); border-radius: 3px; position: relative; /* Necesario para que ::before use la posición relativa al botón */ padding-left: 24px; /* Espacio para el SVG */ } #btn_up::before { content: ''; display: inline-block; width: 16px; /* Ancho del SVG */ height: 16px; /* Alto del SVG */ background: url("data:image/svg+xml; utf8, "); background-size: contain; background-repeat: no-repeat; position: absolute; left: 5px; top: 50%; transform: translateY(-50%); } #btn_down::before { content: ''; display: inline-block; width: 16px; /* Ancho del SVG */ height: 16px; /* Alto del SVG */ background: url("data:image/svg+xml; utf8, "); background-size: contain; background-repeat: no-repeat; position: absolute; left: 5px; top: 50%; transform: translateY(-50%); } """ with gr.Blocks(theme='sudeepshouche/minimalist', css=custom_css) as demo: def visible(): return {btn_graph: gr.Button(value="Grafo", visible=True)} def enable_btns(): return {btn_like: gr.Button(visible=True, elem_id="btn_up"), btn_dislike: gr.Button(visible=True, elem_id="btn_down")} def disable_btns(): return {btn_like: gr.Button(visible=False), btn_dislike: gr.Button(visible=False)} def responder(pregunta): # Query engine respuesta = query_engine.query(pregunta) partial_message = "" for text in respuesta.response_gen: partial_message += text yield partial_message, None, None # Extraer información de source_nodes metadata_info = extraer_informacion_metadata(respuesta, max_results=10) # Presentar la información de source_nodes en forma de lista con bullets if metadata_info: metadata_list = "\n".join(["- " + info for info in metadata_info]) partial_message += "\n\n" + metadata_list yield partial_message gr.Markdown(""" # PharmaWise Data Integrity 4.5 Realiza preguntas a tus datos y obtén al final del texto las paginas y documentos utilizados generar tu responder. """) with gr.Row(): with gr.Column(): pregunta = gr.Text(label="Pregunta") with gr.Row(): btn_send = gr.Button(value="Preguntar", variant="primary") clear = gr.Button(value="Limpiar") btn_graph = gr.Button(value="Grafo", visible=False) gr.Examples(label="Ejemplos", examples=["Explicar el concepto ALCOA"], inputs=[pregunta]) with gr.Column(): response = gr.Textbox(label="Respuesta") with gr.Row(elem_id="btns"): btn_dislike = gr.Button(value="", visible=False) btn_like = gr.Button(value="", visible=False) with gr.Row(): grafo = gr.Image(label="Grafo", show_share_button=False) btn_send.click(responder, inputs=[pregunta], outputs=[response]) btn_graph.click(draw_graph, outputs=[grafo]) clear.click(lambda: None, None, pregunta, queue=False) response.change(visible, [], [btn_graph]) response.change(enable_btns, [], [btn_like, btn_dislike]) demo.queue() demo.launch()