open-o1 / app.py
qdqd's picture
Update app.py
36d71df verified
import gradio as gr
from duckduckgo_search import DDGS
from collections import deque
import asyncio
import random
import logging
# Setup logging
logging.basicConfig(level=logging.INFO)
# Asynchronous function to get LLM response
async def get_llm_response_async(prompt, model, max_retries=3):
for attempt in range(max_retries):
try:
response = await asyncio.to_thread(DDGS().chat, prompt, model=model)
return response.split()
except Exception as e:
if attempt < max_retries - 1:
logging.error(f"Error occurred: {e}. Retrying in {2**attempt} seconds...")
await asyncio.sleep(2**attempt + random.random())
else:
logging.error(f"Max retries reached. Error: {e}")
return f"<error>Unable to get response from AI after {max_retries} attempts.</error>".split()
# Asynchronous generator to process messages
async def process_message_async(message, history, analysis_prompt, rethinking_prompt, refinement_prompt):
conversation_history = deque(maxlen=5)
for h in history:
conversation_history.append(f"User: {h[0]}\nEcho-Refraction: {h[1]}")
context = "\n".join(conversation_history)
full_response = ""
gpt4o_prompt = f"{analysis_prompt}\n\nConversation history:\n{context}\n\nUser query: {message}\n\nPlease analyze this query and respond accordingly."
gpt4o_response = await get_llm_response_async(gpt4o_prompt, "gpt-4o-mini")
full_response += "Analysis:\n"
for word in gpt4o_response:
full_response += word + " "
yield full_response
if "<error>" in " ".join(gpt4o_response):
return
llama_prompt = f"{rethinking_prompt}\n\nConversation history:\n{context}\n\nOriginal user query: {message}\n\nInitial response: {' '.join(gpt4o_response)}\n\nPlease review and suggest improvements or confirm if satisfactory."
llama_response = await get_llm_response_async(llama_prompt, "gpt-4o-mini")
full_response += "\n\nRethinking:\n"
for word in llama_response:
full_response += word + " "
yield full_response
if "<error>" in " ".join(llama_response):
return
if "done" not in " ".join(llama_response).lower():
final_gpt4o_prompt = f"{refinement_prompt}\n\nConversation history:\n{context}\n\nOriginal user query: {message}\n\nInitial response: {' '.join(gpt4o_response)}\n\nSuggestion: {' '.join(llama_response)}\n\nPlease provide a final response considering the suggestion."
final_response = await get_llm_response_async(final_gpt4o_prompt, "gpt-4o-mini")
full_response += "\n\nFinal Response:\n"
for word in final_response:
full_response += word + " "
yield full_response
else:
full_response += "\n\nFinal Response: The initial response is satisfactory and no further refinement is needed."
yield full_response
# Asynchronous function to handle responses
async def respond_async(message, history, analysis_prompt, rethinking_prompt, refinement_prompt):
async for chunk in process_message_async(message, history, analysis_prompt, rethinking_prompt, refinement_prompt):
yield chunk
# Prompts remain the same
analysis_prompt = """
You are Echo-Refraction, an AI assistant tasked with analyzing user queries. Your role is to:
1. Carefully examine the user's input for clarity, completeness, and potential ambiguities.
2. Identify if the query needs refinement or additional information.
3. If refinement is needed, suggest specific improvements or ask clarifying questions.
4. If the query is clear, respond with "Query is clear and ready for processing."
5. Provide a brief explanation of your analysis in all cases.
Enclose your response in <analyzing> tags.
"""
rethinking_prompt = """
You are Echo-Refraction, an advanced AI model responsible for critically evaluating and improving responses. Your task is to:
1. Carefully review the original user query and the initial response.
2. Analyze the response for accuracy, relevance, completeness, and potential improvements.
3. Consider perspectives or approaches that might enhance the response.
4. If you identify areas for improvement:
a. Clearly explain what aspects need refinement and why.
b. Provide specific suggestions for how the response could be enhanced.
c. If necessary, propose additional information or context that could be included.
5. If the initial response is satisfactory and you have no suggestions for improvement, respond with "Done."
Enclose your response in <rethinking> tags.
"""
refinement_prompt = """
You are Echo-Refraction, an AI assistant tasked with providing a final, refined response to the user. Your role is to:
1. Review the original user query, your initial response, and the suggestions provided.
2. Consider the feedback and suggestions for improvement.
3. Integrate the suggested improvements into your response, ensuring that:
a. The information is accurate and up-to-date.
b. The response is comprehensive and addresses all aspects of the user's query.
c. The language is clear, concise, and appropriate for the user's level of understanding.
4. If you disagree with any suggestions, provide a brief explanation of why you chose not to incorporate them.
5. Deliver a final response that represents the best possible answer to the user's query.
Enclose your response in <output> tags.
"""
# Create a Gradio ChatInterface using the async respond function
demo = gr.ChatInterface(
fn=respond_async, # Use the asynchronous respond function
title="Open-O1",
description="Chat with Open-O1, an AI assistant that analyzes, rethinks, and refines responses. Watch as it streams its thought process word by word!",
examples=[
["How many 'r' are there in the word 'strawberry'"],
["Five days ago I went to the fruit market and bought some bananas. On what day was I at the market?"],
["Bob likes representing his name as a number. If b represents 1 what number represents Bob?"],
],
cache_examples=False,
)
# Launch the demo
if __name__ == "__main__":
demo.launch(show_api=False, share=True)