import streamlit as st import requests import json import os import pandas as pd from sentence_transformers import CrossEncoder import numpy as np import re from textwrap import dedent import google.generativeai as genai # Tool import from crewai.tools.gemini_tools import GeminiSearchTools from crewai.tools.mixtral_tools import MixtralSearchTools from crewai.tools.zephyr_tools import ZephyrSearchTools from crewai.tools.phi2_tools import Phi2SearchTools # Google Langchain from langchain_google_genai import GoogleGenerativeAI #Crew imports from crewai import Agent, Task, Crew, Process # Retrieve API Key from Environment Variable GOOGLE_AI_STUDIO = os.environ.get('GOOGLE_API_KEY') # Ensure the API key is available if not GOOGLE_AI_STUDIO: raise ValueError("API key not found. Please set the GOOGLE_AI_STUDIO2 environment variable.") # Set gemini_llm gemini_llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_AI_STUDIO) # CrewAI +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ def crewai_process_gemini(research_topic): # Define your agents with roles and goals GeminiAgent = Agent( role='Summary Evaluator', goal='To learn how to manage her anxiety in social situations through group therapy.', backstory="""Skilled in running query evaluation""", verbose=True, allow_delegation=False, llm = gemini_llm, tools=[ GeminiSearchTools.gemini_search ] ) # Create tasks for your agents task1 = Task( description=f"""Summarize {research_topic}""", agent=GeminiAgent ) # Instantiate your crew with a sequential process crew = Crew( agents=[GeminiAgent], tasks=[task1], verbose=2, process=Process.sequential ) # Get your crew to work! result = crew.kickoff() return result def crewai_process_mixtral_crazy(research_topic): # Define your agents with roles and goals MixtralCrazyAgent = Agent( role='Summary Evaluator', goal='Evaluate the summary using the HHEM-Victara Tuner', backstory="""Skilled in running query evaluation""", verbose=True, allow_delegation=False, llm = gemini_llm, tools=[ MixtralSearchTools.mixtral_crazy ] ) # Create tasks for your agents task1 = Task( description=f"""Summarize {research_topic}""", agent=MixtralCrazyAgent ) # Instantiate your crew with a sequential process crew = Crew( agents=[MixtralCrazyAgent], tasks=[task1], verbose=2, process=Process.sequential ) # Get your crew to work! result = crew.kickoff() return result def crewai_process_mixtral_normal(research_topic): # Define your agents with roles and goals MixtralNormalAgent = Agent( role='Summary Evaluator', goal='Evaluate the summary using the HHEM-Victara Tuner', backstory="""Skilled in running query evaluation""", verbose=True, allow_delegation=False, llm = gemini_llm, tools=[ MixtralSearchTools.mixtral_normal ] ) # Create tasks for your agents task1 = Task( description=f"""Summarize {research_topic}""", agent=MixtralNormalAgent ) # Instantiate your crew with a sequential process crew = Crew( agents=[MixtralNormalAgent], tasks=[task1], verbose=2, process=Process.sequential ) # Get your crew to work! result = crew.kickoff() return result def crewai_process_zephyr_normal(research_topic): # Define your agents with roles and goals ZephrNormalAgent = Agent( role='Summary Evaluator', goal='Evaluate the summary using the HHEM-Victara Tuner', backstory="""Skilled in running query evaluation""", verbose=True, allow_delegation=False, llm = gemini_llm, tools=[ ZephyrSearchTools.zephyr_normal ] ) # Create tasks for your agents task1 = Task( description=f"""Summarize {research_topic}""", agent=ZephrNormalAgent ) # Instantiate your crew with a sequential process crew = Crew( agents=[ZephrNormalAgent], tasks=[task1], verbose=2, process=Process.sequential ) # Get your crew to work! result = crew.kickoff() return result def crewai_process_phi2(research_topic): # Define your agents with roles and goals Phi2Agent = Agent( role='Emily Mental Patient Graphic Designer Anxiety', goal='Evaluate the summary using the HHEM-Victara Tuner', backstory="""Skilled in running query evaluation""", verbose=True, allow_delegation=False, llm = gemini_llm, tools=[ Phi2SearchTools.phi2_search ] ) # Create tasks for your agents task1 = Task( description=f"""Summarize {research_topic}""", agent=Phi2Agent ) # Instantiate your crew with a sequential process crew = Crew( agents=[Phi2Agent], tasks=[task1], verbose=2, process=Process.sequential ) # Get your crew to work! result = crew.kickoff() return result # Credentials ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ corpus_id = os.environ['VECTARA_CORPUS_ID'] customer_id = os.environ['VECTARA_CUSTOMER_ID'] api_key = os.environ['VECTARA_API_KEY'] # Get Data +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ def get_post_headers() -> dict: """Returns headers that should be attached to each post request.""" return { "x-api-key": api_key, "customer-id": customer_id, "Content-Type": "application/json", } def query_vectara(query: str, filter_str="", lambda_val=0.0) -> str: corpus_key = { "customerId": customer_id, "corpusId": corpus_id, "lexicalInterpolationConfig": {"lambda": lambda_val}, } if filter_str: corpus_key["metadataFilter"] = filter_str data = { "query": [ { "query": query, "start": 0, "numResults": 10, "contextConfig": { "sentencesBefore": 2, "sentencesAfter": 2 }, "corpusKey": [corpus_key], "summary": [ { "responseLang": "eng", "maxSummarizedResults": 5, "summarizerPromptName": "vectara-summary-ext-v1.2.0" }, ] } ] } response = requests.post( "https://api.vectara.io/v1/query", headers=get_post_headers(), data=json.dumps(data), timeout=130, ) if response.status_code != 200: st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})") return "" result = response.json() answer = result["responseSet"][0]["summary"][0]["text"] return re.sub(r'\[\d+(,\d+){0,5}\]', '', answer) # Initialize the HHEM model +++++++++++++++++++++++++++++++++++++++++++++++ model = CrossEncoder('vectara/hallucination_evaluation_model') # Function to compute HHEM scores def compute_hhem_scores(texts, summary): pairs = [[text, summary] for text in texts] scores = model.predict(pairs) return scores # Define the Vectara query function def vectara_query(query: str, config: dict): corpus_key = [{ "customerId": config["customer_id"], "corpusId": config["corpus_id"], "lexicalInterpolationConfig": {"lambda": config.get("lambda_val", 0.5)}, }] data = { "query": [{ "query": query, "start": 0, "numResults": config.get("top_k", 10), "contextConfig": { "sentencesBefore": 2, "sentencesAfter": 2, }, "corpusKey": corpus_key, "summary": [{ "responseLang": "eng", "maxSummarizedResults": 5, }] }] } headers = { "x-api-key": config["api_key"], "customer-id": config["customer_id"], "Content-Type": "application/json", } response = requests.post( headers=headers, url="https://api.vectara.io/v1/query", data=json.dumps(data), ) if response.status_code != 200: st.error(f"Query failed (code {response.status_code}, reason {response.reason}, details {response.text})") return [], "" result = response.json() responses = result["responseSet"][0]["response"] summary = result["responseSet"][0]["summary"][0]["text"] res = [[r['text'], r['score']] for r in responses] return res, summary # Create the main app with three tabs tab1, tab2, tab3, tab4 = st.tabs(["Synthetic Data", "Data Query", "HHEM-Victara Query Tuner", "Model Evaluation"]) with tab1: st.header("Synthetic Data") st.link_button("Create Synthetic Medical Data", "https://chat.openai.com/g/g-XyHciw52w-synthetic-clinical-data") with tab2: st.header("Data Query") st.link_button("Query & Summarize Data", "https://chat.openai.com/g/g-9tWqg4gRY-explore-summarize-medical-data") with tab3: st.header("HHEM-Victara Query Tuner") # User inputs query = st.text_area("Enter your text for query tuning", "", height=75) lambda_val = st.slider("Lambda Value", min_value=0.0, max_value=1.0, value=0.5) top_k = st.number_input("Top K Results", min_value=1, max_value=50, value=10) if st.button("Query Vectara"): config = { "api_key": os.environ.get("VECTARA_API_KEY", ""), "customer_id": os.environ.get("VECTARA_CUSTOMER_ID", ""), "corpus_id": os.environ.get("VECTARA_CORPUS_ID", ""), "lambda_val": lambda_val, "top_k": top_k, } results, summary = vectara_query(query, config) if results: st.subheader("Summary") st.write(summary) st.subheader("Top Results") # Extract texts from results texts = [r[0] for r in results[:5]] # Compute HHEM scores scores = compute_hhem_scores(texts, summary) # Prepare and display the dataframe df = pd.DataFrame({'Fact': texts, 'HHEM Score': scores}) st.dataframe(df) else: st.write("No results found.") with tab4: st.header("Model Evaluation") # User input for the research topic research_topic = st.text_input('Enter your research topic:', '') # Selection box for the function to execute process_selection = st.selectbox( 'Choose the process to run:', ('crewai_process_gemini', 'crewai_process_mixtral_crazy', 'crewai_process_mixtral_normal', 'crewai_process_zephyr_normal', 'crewai_process_phi2') ) # Button to execute the chosen function if st.button('Run Process'): if research_topic: # Ensure there's a topic provided if process_selection == 'crewai_process_gemini': result = crewai_process_gemini(research_topic) elif process_selection == 'crewai_process_mixtral_crazy': result = crewai_process_mixtral_crazy(research_topic) elif process_selection == 'crewai_process_mixtral_normal': result = crewai_process_mixtral_normal(research_topic) elif process_selection == 'crewai_process_zephyr_normal': result = crewai_process_zephyr_normal(research_topic) elif process_selection == 'crewai_process_phi2': result = crewai_process_phi2(research_topic) st.write(result) else: st.warning('Please enter a research topic.')