LEAP-phi-2-agentic-chatbot

Sleeping

File size: 6,056 Bytes

8b8b0b2
 
 
77ad276
bb35b84
 
8b8b0b2
55fee33
 
 
8b8b0b2
 
 
 
 
d982401
bb35b84
8b8b0b2
bb35b84
 
 
a13b06e
bb35b84
 
d982401
bb35b84
841129f
f3d3e2e
1b5413e
841129f
 
 
 
 
 
38cbd97
841129f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b8b0b2
 
23ac168
8b8b0b2
297da5b
841129f
 
8b8b0b2
 
 
 
 
23ac168
8b8b0b2
 
6f5234b
 
 
a13b06e
bca6bf8
a13b06e
bb35b84
 
 
 
 
 
5b4a85e
bb35b84
 
8b8b0b2
bb35b84
 
8b8b0b2
23ac168
8b8b0b2
 
 
1300829
841129f
 
8d83783
2a2899d
1300829
8b8b0b2
2a2899d
25721d0
23ac168
 
 
 
 
 
bb35b84
45ea516

import gradio as gr

import torch
import dspy
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, pipeline
from threading import Thread

from sentence_splitter import SentenceSplitter # the following are made up imports (?), SentimentAnalyzer, NamedEntityRecognizer


# The huggingface model id for Microsoft's phi-2 model
checkpoint = "microsoft/phi-2"

# Download and load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True)

# Text generation pipeline
phi2 = pipeline(
    "text-generation", 
    tokenizer=tokenizer, 
    model=model,  
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    device_map="cpu"
)
# DSPy-based prompt generation
# from dspy import Agent
# from dspy import spawn_processes

def dspy_generate_agent_prompts(prompt):
    """
    Generates prompts for different agents based on the provided prompt and DSPy functionalities.

    Args:
        prompt (str): The user-provided prompt (e.g., farm location and crops).

    Returns:
        list: A list containing agent-specific prompts.
    """

    # 1. Split the prompt into individual sentences
    sentences = SentenceSplitter().process(prompt)

    # 2. Analyze sentiment for each sentence
    sentiment_analyzer = SentimentAnalyzer()
    sentiment_labels = []
    for sentence in sentences:
        sentiment_labels.append(sentiment_analyzer.analyze(sentence))

    # 3. Extract named entities related to specific topics
    ner = NamedEntityRecognizer(model_name="en_core_web_sm")
    extracted_entities = {}
    for sentence in sentences:
        entities = ner.process(sentence)
        for entity in entities:
            if entity.label_ in ["FOOD", "ORG", "LOCATION"]:  # Customize entity labels based on needs
                extracted_entities.setdefault(entity.label_, []).append(entity.text)

    # 4. Craft prompts for each agent (incomplete)
    agent_prompts = []

    # **Sentiment Analyzer Prompt:**
    sentiment_prompt = f"Analyze the sentiment of the following sentences:\n" + "\n".join(sentences)
    agent_prompts.append(sentiment_prompt)

    # **Topic Extractor Prompt:** (Modify based on your specific topics)
    topic_prompt = f"Extract the main topics discussed in the following text, focusing on food, service, and ambiance:\n{prompt}"
    agent_prompts.append(topic_prompt)

    # **Recommendation Generator Prompt:** (Modify based on your requirements)
    positive_count = sum(label == "POSITIVE" for label in sentiment_labels)
    negative_count = sum(label == "NEGATIVE" for label in sentiment_labels)
    neutral_count = sum(label == "NEUTRAL" for label in sentiment_labels)
    topic_mentions = "\n".join(f"{k}: {','.join(v)}" for k, v in extracted_entities.items())

    recommendation_prompt = f"""Based on the sentiment analysis (positive: {positive_count}, negative: {negative_count}, neutral: {neutral_count}) and extracted topics ({topic_mentions}), suggest recommendations for organic farming methods to address user's concerns in their location."""
    agent_prompts.append(recommendation_prompt)

    return agent_prompts

# Function that accepts a prompt and generates text using the phi2 pipeline
def generate(message, chat_history, max_new_tokens):

  dspy_generate_agent_prompts(message) == synth_message
  instruction = "You are a helpful organic farming assistant to 'User'. You do not respond as 'User' or pretend to be 'User'. You only respond once as 'Assistant'. You are an agricultural assistant committed to regenerative practices. You are being supplied with a list of tasks which you will need to walk the user through with the compassionate heart of a teacher and using easily understandable language."
  final_prompt = f"Instruction: {instruction} {synth_message}\n"

  for sent, received in chat_history:
    final_prompt += "User: " + sent + "\n"
    final_prompt += "Assistant: " + received + "\n"

  final_prompt += "User: " + message + "\n"
  final_prompt += "Output:"

  if len(tokenizer.tokenize(final_prompt)) >= tokenizer.model_max_length - max_new_tokens:
    final_prompt = "Instruction: Say 'Input exceeded context size, please clear the chat history and retry!' Output:"
    
  # Streamer
  streamer = TextIteratorStreamer(tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=300.0)
  thread = Thread(target=phi2, kwargs={"text_inputs":final_prompt, "max_new_tokens":max_new_tokens, "streamer":streamer})
  thread.start()

  generated_text = ""
  for word in streamer:
    generated_text += word
    response = generated_text.strip()

    if "User:" in response:
      response = response.split("User:")[0].strip()

    if "Assistant:" in response:
      response = response.split("Assistant:")[1].strip()

    yield response

# Chat interface with gradio
with gr.Blocks() as demo:
  gr.Markdown("""
  # LEAP Phi-2 Agentic Chatbot Demo
  This multi-agent chatbot was created for LEAP hackathon, to offer interfce with a team of experts for organic farming advice, using Microsoft's 2.7 billion parameter [phi-2](https://huggingface.co/microsoft/phi-2) Transformer model and DSPy synthetic prompt agentics. 
  
  In order to reduce the response time on this hardware, `max_new_tokens` has been set to `21` in the text generation pipeline. With this default configuration, it takes approximately `60 seconds` for the response to start being generated, and streamed one word at a time. Use the slider below to increase or decrease the length of the generated text.
  """)

  tokens_slider = gr.Slider(8, 128, value=21, label="Maximum new tokens", info="A larger `max_new_tokens` parameter value gives you longer text responses but at the cost of a slower response time.")

  chatbot = gr.ChatInterface(
    fn=generate,
    additional_inputs=[tokens_slider],
    stop_btn=None,
    examples=[["Who is Leonhard Euler?"]]
  )
  
demo.queue().launch()