LEAP_GAIA / app.py
Solshine's picture
Update app.py
cab3bb7 verified
import gradio as gr
from langchain_openai import ChatOpenAI
# from dspy import Agent # Base class for custom agent
# from dspy import spawn_processes # Distributed computing utility
from transformers import pipeline
# Choose model
model_name = "dolphin-phi"
# Load the chosen LLM model
llm = pipeline("text-generation", model="TheBloke/dolphin-2_6-phi-2-GGUF")
#Vectara config:
# customer_id =
# corpus_id =
# api_key =
# Brought from Vectara example from Tonic. Global variables to hold component references
components = {}
dotenv.load_dotenv()
seamless_client = Client("TheBloke/dolphin-2_6-phi-2-GGUF")
HuggingFace_Token = os.getenv("HuggingFace_Token")
hf_token = os.getenv("HuggingFace_Token")
base_model_id = os.getenv('BASE_MODEL_ID', 'default_base_model_id')
model_directory = os.getenv('MODEL_DIRECTORY', 'default_model_directory')
import requests
# DSPy-based prompt generation
from dspy.agents import Agent
from dspy import spawn_processes
from dspy.utils import SentenceSplitter, SentimentAnalyzer, NamedEntityRecognizer
def dspy_generate_agent_prompts(prompt):
"""
Generates prompts for different agents based on the provided prompt and DSPy functionalities.
Args:
prompt (str): The user-provided prompt (e.g., customer reviews).
Returns:
list: A list containing agent-specific prompts.
"""
# 1. Split the prompt into individual sentences
sentences = SentenceSplitter().process(prompt)
# 2. Analyze sentiment for each sentence
sentiment_analyzer = SentimentAnalyzer()
sentiment_labels = []
for sentence in sentences:
sentiment_labels.append(sentiment_analyzer.analyze(sentence))
# 3. Extract named entities related to specific topics
ner = NamedEntityRecognizer(model_name="en_core_web_sm")
extracted_entities = {}
for sentence in sentences:
entities = ner.process(sentence)
for entity in entities:
if entity.label_ in ["FOOD", "ORG", "LOCATION"]: # Customize entity labels based on needs
extracted_entities.setdefault(entity.label_, []).append(entity.text)
# 4. Craft prompts for each agent (incomplete)
agent_prompts = []
# **Sentiment Analyzer Prompt:**
sentiment_prompt = f"Analyze the sentiment of the following sentences:\n" + "\n".join(sentences)
agent_prompts.append(sentiment_prompt)
# **Topic Extractor Prompt:** (Modify based on your specific topics)
topic_prompt = f"Extract the main topics discussed in the following text, focusing on food, service, and ambiance:\n{prompt}"
agent_prompts.append(topic_prompt)
# **Recommendation Generator Prompt:** (Modify based on your requirements)
positive_count = sum(label == "POSITIVE" for label in sentiment_labels)
negative_count = sum(label == "NEGATIVE" for label in sentiment_labels)
neutral_count = sum(label == "NEUTRAL" for label in sentiment_labels)
topic_mentions = "\n".join(f"{k}: {','.join(v)}" for k, v in extracted_entities.items())
recommendation_prompt = f"""Based on the sentiment analysis (positive: {positive_count}, negative: {negative_count}, neutral: {neutral_count}) and extracted topics ({topic_mentions}), suggest recommendations for the restaurant to improve."""
agent_prompts.append(recommendation_prompt)
return agent_prompts
def query_vectara(text):
user_message = text
# Read authentication parameters from the .env file
customer_id = os.getenv('CUSTOMER_ID')
corpus_id = os.getenv('CORPUS_ID')
api_key = os.getenv('API_KEY')
# Define the headers
api_key_header = {
"customer-id": customer_id,
"x-api-key": api_key
}
# Define the request body in the structure provided in the example
request_body = {
"query": [
{
"query": user_message,
"queryContext": "",
"start": 1,
"numResults": 25,
"contextConfig": {
"charsBefore": 0,
"charsAfter": 0,
"sentencesBefore": 2,
"sentencesAfter": 2,
"startTag": "%START_SNIPPET%",
"endTag": "%END_SNIPPET%",
},
"rerankingConfig": {
"rerankerId": 272725718,
"mmrConfig": {
"diversityBias": 0.35
}
},
"corpusKey": [
{
"customerId": customer_id,
"corpusId": corpus_id,
"semantics": 0,
"metadataFilter": "",
"lexicalInterpolationConfig": {
"lambda": 0
},
"dim": []
}
],
"summary": [
{
"maxSummarizedResults": 5,
"responseLang": "auto",
"summarizerPromptName": "vectara-summary-ext-v1.2.0"
}
]
}
]
}
# Make the API request using Gradio
response = requests.post(
"https://api.vectara.io/v1/query",
json=request_body, # Use json to automatically serialize the request body
verify=True,
headers=api_key_header
)
if response.status_code == 200:
query_data = response.json()
if query_data:
sources_info = []
# Extract the summary.
summary = query_data['responseSet'][0]['summary'][0]['text']
# Iterate over all response sets
for response_set in query_data.get('responseSet', []):
# Extract sources
# Limit to top 5 sources.
for source in response_set.get('response', [])[:5]:
source_metadata = source.get('metadata', [])
source_info = {}
for metadata in source_metadata:
metadata_name = metadata.get('name', '')
metadata_value = metadata.get('value', '')
if metadata_name == 'title':
source_info['title'] = metadata_value
elif metadata_name == 'author':
source_info['author'] = metadata_value
elif metadata_name == 'pageNumber':
source_info['page number'] = metadata_value
if source_info:
sources_info.append(source_info)
result = {"summary": summary, "sources": sources_info}
return f"{json.dumps(result, indent=2)}"
else:
return "No data found in the response."
else:
return f"Error: {response.status_code}"
# Define the main function to be used with Gradio
def generate_outputs(user_prompt):
# 1. Process prompt with langchain (replace with your actual implementation)
# processed_prompt = dspy_generate_agent_prompts(user_prompt) # Replaced langchain logic with DSPy function below
# 2. Generate synthetic data using DSPy's distributed computing capabilities
synthetic_data = generate_synthetic_data_distributed(user_prompt)
# 3. Combine user prompt and synthetic data
combined_data = f"{user_prompt}\n{synthetic_data}"
# 4. Generate prompts for agents using DSPy
agent_prompts = dspy_generate_agent_prompts(processed_prompt)
# 5. Use the chosen LLM for two of the prompts and vectara tool use for the third agent
output_1 = llm(agent_prompts[0], max_length=100)[0][combined_data]
output_2 = llm(agent_prompts[1], max_length=100)[0][combined_data]
output_3 = query_vectara(prompt)
# 6. Produce outputs with Langchain or DSPy (stand in section)
report, recommendations, visualization = produce_outputs(combined_data)
return report, recommendations, visualization
# Create the Gradio interface
gr.Interface(
fn=generate_outputs,
inputs=gr.Textbox(label="Enter a prompt"),
outputs=["textbox", "textbox", "image"],
title="Multi-Agent Prompt Processor",
description="Processes a prompt using Langchain, DSPy, and a chosen Hugging Face LLM to generate diverse outputs.",
).launch()