from climateqa.engine.embeddings import get_embeddings_function embeddings_function = get_embeddings_function() from climateqa.knowledge.openalex import OpenAlex from sentence_transformers import CrossEncoder # reranker = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1") oa = OpenAlex() import gradio as gr from gradio_modal import Modal import pandas as pd import numpy as np import os import time import re import json from gradio import ChatMessage # from gradio_modal import Modal from io import BytesIO import base64 from datetime import datetime from azure.storage.fileshare import ShareServiceClient from utils import create_user_id from gradio_modal import Modal # ClimateQ&A imports from climateqa.engine.llm import get_llm from climateqa.engine.vectorstore import get_pinecone_vectorstore # from climateqa.knowledge.retriever import ClimateQARetriever from climateqa.engine.reranker import get_reranker from climateqa.engine.embeddings import get_embeddings_function from climateqa.engine.chains.prompts import audience_prompts from climateqa.sample_questions import QUESTIONS from climateqa.constants import POSSIBLE_REPORTS from climateqa.utils import get_image_from_azure_blob_storage from climateqa.engine.keywords import make_keywords_chain from climateqa.engine.chains.answer_rag import make_rag_papers_chain from climateqa.engine.graph import make_graph_agent,display_graph from climateqa.engine.graph import make_graph_agent,display_graph from front.utils import make_html_source, make_html_figure_sources,parse_output_llm_with_sources,serialize_docs,make_toolbox,make_html_df # Load environment variables in local mode try: from dotenv import load_dotenv load_dotenv() except Exception as e: pass # Set up Gradio Theme theme = gr.themes.Base( primary_hue="blue", secondary_hue="red", font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"], ) init_prompt = "" system_template = { "role": "system", "content": init_prompt, } account_key = os.environ["BLOB_ACCOUNT_KEY"] if len(account_key) == 86: account_key += "==" credential = { "account_key": account_key, "account_name": os.environ["BLOB_ACCOUNT_NAME"], } account_url = os.environ["BLOB_ACCOUNT_URL"] file_share_name = "climateqa" service = ShareServiceClient(account_url=account_url, credential=credential) share_client = service.get_share_client(file_share_name) user_id = create_user_id() CITATION_LABEL = "BibTeX citation for ClimateQ&A" CITATION_TEXT = r"""@misc{climateqa, author={Théo Alves Da Costa, Timothée Bohe}, title={ClimateQ&A, AI-powered conversational assistant for climate change and biodiversity loss}, year={2024}, howpublished= {\url{https://climateqa.com}}, } @software{climateqa, author = {Théo Alves Da Costa, Timothée Bohe}, publisher = {ClimateQ&A}, title = {ClimateQ&A, AI-powered conversational assistant for climate change and biodiversity loss}, } """ # Create vectorstore and retriever vectorstore = get_pinecone_vectorstore(embeddings_function) llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0) reranker = get_reranker("nano") agent = make_graph_agent(llm,vectorstore,reranker) async def chat(query,history,audience,sources,reports): """taking a query and a message history, use a pipeline (reformulation, retriever, answering) to yield a tuple of: (messages in gradio format, messages in langchain format, source documents)""" date_now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(f">> NEW QUESTION ({date_now}) : {query}") if audience == "Children": audience_prompt = audience_prompts["children"] elif audience == "General public": audience_prompt = audience_prompts["general"] elif audience == "Experts": audience_prompt = audience_prompts["experts"] else: audience_prompt = audience_prompts["experts"] # Prepare default values if len(sources) == 0: sources = ["IPCC"] # if len(reports) == 0: # TODO reports = [] inputs = {"user_input": query,"audience": audience_prompt,"sources_input":sources} result = agent.astream_events(inputs,version = "v1") # path_reformulation = "/logs/reformulation/final_output" # path_keywords = "/logs/keywords/final_output" # path_retriever = "/logs/find_documents/final_output" # path_answer = "/logs/answer/streamed_output_str/-" docs = [] docs_html = "" output_query = "" output_language = "" output_keywords = "" gallery = [] start_streaming = False figures = '
{img_name} - {img_caption}
")) docs_figures = [d for d in docs if d.metadata["chunk_type"] == "image"] for i, doc in enumerate(docs_figures): if doc.metadata["chunk_type"] == "image": try: key = f"Image {i+1}" image_path = doc.metadata["image_path"].split("documents/")[1] img = get_image_from_azure_blob_storage(image_path) # Convert the image to a byte buffer buffered = BytesIO() img.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() figures = figures + make_html_figure_sources(doc, i, img_str) gallery.append(img) except Exception as e: print(f"Skipped adding image {i} because of {e}") yield history,docs_html,output_query,output_language,gallery, figures#,output_query,output_keywords def save_feedback(feed: str, user_id): if len(feed) > 1: timestamp = str(datetime.now().timestamp()) file = user_id + timestamp + ".json" logs = { "user_id": user_id, "feedback": feed, "time": timestamp, } log_on_azure(file, logs, share_client) return "Feedback submitted, thank you!" def log_on_azure(file, logs, share_client): logs = json.dumps(logs) file_client = share_client.get_file_client(file) file_client.upload_file(logs) def generate_keywords(query): chain = make_keywords_chain(llm) keywords = chain.invoke(query) keywords = " AND ".join(keywords["keywords"]) return keywords papers_cols_widths = { "id":100, "title":300, "doi":100, "publication_year":100, "abstract":500, "is_oa":50, } papers_cols = list(papers_cols_widths.keys()) papers_cols_widths = list(papers_cols_widths.values()) async def find_papers(query,after): summary = "" keywords = generate_keywords(query) df_works = oa.search(keywords,after = after) df_works = df_works.dropna(subset=["abstract"]) df_works = oa.rerank(query,df_works,reranker) df_works = df_works.sort_values("rerank_score",ascending=False) docs_html = [] for i in range(10): docs_html.append(make_html_df(df_works, i)) docs_html = "".join(docs_html) print(docs_html) G = oa.make_network(df_works) height = "750px" network = oa.show_network(G,color_by = "rerank_score",notebook=False,height = height) network_html = network.generate_html() network_html = network_html.replace("'", "\"") css_to_inject = "" network_html = network_html + css_to_inject network_html = f"""""" docs = df_works["content"].head(10).tolist() df_works = df_works.reset_index(drop = True).reset_index().rename(columns = {"index":"doc"}) df_works["doc"] = df_works["doc"] + 1 df_works = df_works[papers_cols] yield docs_html, network_html, summary chain = make_rag_papers_chain(llm) result = chain.astream_log({"question": query,"docs": docs,"language":"English"}) path_answer = "/logs/StrOutputParser/streamed_output/-" async for op in result: op = op.ops[0] if op['path'] == path_answer: # reforulated question new_token = op['value'] # str summary += new_token else: continue yield docs_html, network_html, summary # -------------------------------------------------------------------- # Gradio # -------------------------------------------------------------------- init_prompt = """ Hello, I am ClimateQ&A, a conversational assistant designed to help you understand climate change and biodiversity loss. I will answer your questions by **sifting through the IPCC and IPBES scientific reports**. ❓ How to use - **Language**: You can ask me your questions in any language. - **Audience**: You can specify your audience (children, general public, experts) to get a more adapted answer. - **Sources**: You can choose to search in the IPCC or IPBES reports, or both. ⚠️ Limitations *Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.* 🛈 Information Please note that we log your questions for meta-analysis purposes, so avoid sharing any sensitive or personal information. What do you want to learn ? """ def vote(data: gr.LikeData): if data.liked: print(data.value) else: print(data) with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=theme,elem_id = "main-component") as demo: with gr.Tab("ClimateQ&A"): with gr.Row(elem_id="chatbot-row"): with gr.Column(scale=2): chatbot = gr.Chatbot( value = [ChatMessage(role="assistant", content=init_prompt)], type = "messages", show_copy_button=True, show_label = False, elem_id="chatbot", layout = "panel", avatar_images = (None,"https://i.ibb.co/YNyd5W2/logo4.png"), max_height="80vh", height="100vh" ) # bot.like(vote,None,None) with gr.Row(elem_id = "input-message"): textbox=gr.Textbox(placeholder="Ask me anything here!",show_label=False,scale=7,lines = 1,interactive = True,elem_id="input-textbox") with gr.Column(scale=1, variant="panel",elem_id = "right-panel"): with gr.Tabs() as tabs: with gr.TabItem("Examples",elem_id = "tab-examples",id = 0): examples_hidden = gr.Textbox(visible = False) first_key = list(QUESTIONS.keys())[0] dropdown_samples = gr.Dropdown(QUESTIONS.keys(),value = first_key,interactive = True,show_label = True,label = "Select a category of sample questions",elem_id = "dropdown-samples") samples = [] for i,key in enumerate(QUESTIONS.keys()): examples_visible = True if i == 0 else False with gr.Row(visible = examples_visible) as group_examples: examples_questions = gr.Examples( QUESTIONS[key], [examples_hidden], examples_per_page=8, run_on_click=False, elem_id=f"examples{i}", api_name=f"examples{i}", # label = "Click on the example question or enter your own", # cache_examples=True, ) samples.append(group_examples) with gr.Tab("Sources",elem_id = "tab-sources",id = 1): sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox") docs_textbox = gr.State("") # with Modal(visible = False) as config_modal: with gr.Tab("Papers",elem_id = "tab-citations",id = 4): btn_summary = gr.Button("Summary") # Fenêtre simulée pour le Summary with gr.Group(visible=False, elem_id="papers-summary-popup") as summary_popup: papers_summary = gr.Markdown("### Summary Content", visible=True, elem_id="papers-summary") btn_relevant_papers = gr.Button("Relevant papers") # Fenêtre simulée pour les Relevant Papers with gr.Group(visible=False, elem_id="papers-relevant-popup") as relevant_popup: papers_html = gr.HTML(show_label=False, elem_id="sources-textbox") docs_textbox = gr.State("") btn_citations_network = gr.Button("Citations network") # Fenêtre simulée pour le Citations Network with Modal(visible=False) as modal: citations_network = gr.HTML("