File size: 5,055 Bytes
8c61a2e 38366a7 09f366d 38366a7 09f366d 38366a7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import gradio as gr
import requests
import asyncio
import aiohttp
import os
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
# Models setup
models = {
"Mistral-7B-Instruct": "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2",
"DeepSeek-7B-Instruct": "https://api-inference.huggingface.co/models/deepseek-ai/deepseek-llm-7b-instruct",
"Qwen-7B-Chat": "https://api-inference.huggingface.co/models/Qwen/Qwen-7B-Chat"
}
# Judge model (Mixtral-8x7B)
judge_model_url = "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B-Instruct-v0.1"
# Your Hugging Face API Token
API_TOKEN = HF_API_TOKEN
HEADERS = {"Authorization": f"Bearer {API_TOKEN}"}
# Async function to call a model
async def query_model(session, model_name, question):
payload = {"inputs": question, "parameters": {"max_new_tokens": 300}}
try:
async with session.post(models[model_name], headers=HEADERS, json=payload, timeout=60) as response:
result = await response.json()
if isinstance(result, list) and len(result) > 0:
return model_name, result[0]["generated_text"]
elif isinstance(result, dict) and "generated_text" in result:
return model_name, result["generated_text"]
else:
return model_name, str(result)
except Exception as e:
return model_name, f"Error: {str(e)}"
# Async function to call all models
async def gather_model_answers(question):
async with aiohttp.ClientSession() as session:
tasks = [query_model(session, model_name, question) for model_name in models]
results = await asyncio.gather(*tasks)
return dict(results)
# Function to ask the judge
def judge_best_answer(question, answers):
# Format the prompt for the Judge
judge_prompt = f"""
You are a wise AI Judge. A user asked the following question:
Question:
{question}
Here are the answers provided by different models:
Answer 1 (Mistral-7B-Instruct):
{answers['Mistral-7B-Instruct']}
Answer 2 (DeepSeek-7B-Instruct):
{answers['DeepSeek-7B-Instruct']}
Answer 3 (Qwen-7B-Chat):
{answers['Qwen-7B-Chat']}
Please carefully read all three answers. Your job:
- Pick the best answer (Answer 1, Answer 2, or Answer 3).
- Explain briefly why you chose that answer.
Respond in this JSON format:
{{"best_answer": "Answer X", "reason": "Your reasoning here"}}
""".strip()
payload = {"inputs": judge_prompt, "parameters": {"max_new_tokens": 300}}
response = requests.post(judge_model_url, headers=HEADERS, json=payload)
if response.status_code == 200:
result = response.json()
# Try to extract JSON from response
import json
import re
# Attempt to extract JSON block
match = re.search(r"\{.*\}", str(result))
if match:
try:
judge_decision = json.loads(match.group(0))
return judge_decision
except json.JSONDecodeError:
return {"best_answer": "Unknown", "reason": "Failed to parse judge output."}
else:
return {"best_answer": "Unknown", "reason": "No JSON found in judge output."}
else:
return {"best_answer": "Unknown", "reason": f"Judge API error: {response.status_code}"}
# Final app logic
def multi_model_qa(question):
answers = asyncio.run(gather_model_answers(question))
judge_decision = judge_best_answer(question, answers)
# Find the selected best answer
best_answer_key = judge_decision.get("best_answer", "")
best_answer_text = ""
if "1" in best_answer_key:
best_answer_text = answers["Mistral-7B-Instruct"]
elif "2" in best_answer_key:
best_answer_text = answers["DeepSeek-7B-Instruct"]
elif "3" in best_answer_key:
best_answer_text = answers["Qwen-7B-Chat"]
else:
best_answer_text = "Could not determine best answer."
return (
answers["Mistral-7B-Instruct"],
answers["DeepSeek-7B-Instruct"],
answers["Qwen-7B-Chat"],
best_answer_text,
judge_decision.get("reason", "No reasoning provided.")
)
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# 🧠 Multi-Model Answer Aggregator")
gr.Markdown("Ask any question. The system queries multiple models and the AI Judge selects the best answer.")
question_input = gr.Textbox(label="Enter your question", placeholder="Ask me anything...", lines=2)
submit_btn = gr.Button("Get Best Answer")
mistral_output = gr.Textbox(label="Mistral-7B-Instruct Answer")
deepseek_output = gr.Textbox(label="DeepSeek-7B-Instruct Answer")
qwen_output = gr.Textbox(label="Qwen-7B-Chat Answer")
best_answer_output = gr.Textbox(label="🏆 Best Answer Selected")
judge_reasoning_output = gr.Textbox(label="⚖️ Judge's Reasoning")
submit_btn.click(
multi_model_qa,
inputs=[question_input],
outputs=[mistral_output, deepseek_output, qwen_output, best_answer_output, judge_reasoning_output]
)
demo.launch()
|