Spaces:

MGZON
/

mgzon-app

Running

App Files Files Community

ibrahimlasfar commited on Aug 30

Commit

b8d38d2

1 Parent(s): 7250ede

Update chatbot with real-time audio/image input and model selection

Browse files

Files changed (5) hide show

README.md +9 -2
api/endpoints.py +33 -22
main.py +74 -45
utils/generation.py +61 -196
utils/web_search.py +5 -5

README.md CHANGED Viewed

@@ -1,9 +1,9 @@
 ---
-title: MGZON FLAN-T5 API
 emoji: "🤖"
 colorFrom: "blue"
 colorTo: "green"
-sdk: docker
 app_file: main.py
 pinned: false
 ---
@@ -37,6 +37,13 @@ This model is a fine-tuned version of [MGZON/Veltrix](https://huggingface.co/MGZ
 It achieves the following results on the evaluation set:
 - Loss: nan
 ## Model description
 More information needed

 ---
+title: MGZon Chatbot
 emoji: "🤖"
 colorFrom: "blue"
 colorTo: "green"
+sdk: gradio
 app_file: main.py
 pinned: false
 ---
 It achieves the following results on the evaluation set:
 - Loss: nan
+## Features
+- Real-time voice input/output with Whisper and Parler-TTS.
+- Image capture and analysis with CLIP.
+- Web search integration with Google API.
+- Model selection for flexible query handling.
+- Enhanced UI with custom icons and responsive design.
 ## Model description
 More information needed

api/endpoints.py CHANGED Viewed

@@ -1,21 +1,24 @@
 import os
 from fastapi import APIRouter, HTTPException, UploadFile, File
 from openai import OpenAI
 from api.models import QueryRequest
 from utils.generation import request_generation, select_model
 from utils.web_search import web_search
 router = APIRouter()
 HF_TOKEN = os.getenv("HF_TOKEN")
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
-MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:fireworks-ai")
 @router.get("/api/model-info")
 def model_info():
     return {
         "model_name": MODEL_NAME,
-        "secondary_model": os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"),
         "tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1"),
         "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
         "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
@@ -33,7 +36,7 @@ async def performance_stats():
 @router.post("/api/chat")
 async def chat_endpoint(req: QueryRequest):
-    model_name, api_endpoint = select_model(req.message)
     stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
@@ -44,17 +47,16 @@ async def chat_endpoint(req: QueryRequest):
         temperature=req.temperature,
         max_new_tokens=req.max_new_tokens,
         deep_search=req.enable_browsing,
     )
-    response = "".join(list(stream))
     return {"response": response}
-# في api/endpoints.py
 @router.post("/api/audio-transcription")
 async def audio_transcription_endpoint(file: UploadFile = File(...)):
     model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
     audio_data = await file.read()
-    response = "".join(list(request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message="Transcribe audio",
@@ -64,14 +66,16 @@ async def audio_transcription_endpoint(file: UploadFile = File(...)):
         max_new_tokens=128000,
         input_type="audio",
         audio_data=audio_data,
-    )))
     return {"transcription": response}
 @router.post("/api/text-to-speech")
 async def text_to_speech_endpoint(req: dict):
     text = req.get("text", "")
     model_name, api_endpoint = select_model("text to speech", input_type="text")
-    response = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=text,
@@ -80,8 +84,9 @@ async def text_to_speech_endpoint(req: dict):
         temperature=0.7,
         max_new_tokens=128000,
         input_type="text",
     )
-    audio_data = b"".join(list(response))
     return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
 @router.post("/api/code")
@@ -91,7 +96,7 @@ async def code_endpoint(req: dict):
     code = req.get("code", "")
     prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
     model_name, api_endpoint = select_model(prompt)
-    response = "".join(list(request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=prompt,
@@ -99,14 +104,16 @@ async def code_endpoint(req: dict):
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
-    )))
     return {"generated_code": response}
 @router.post("/api/analysis")
 async def analysis_endpoint(req: dict):
     message = req.get("text", "")
     model_name, api_endpoint = select_model(message)
-    response = "".join(list(request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=message,
@@ -114,24 +121,28 @@ async def analysis_endpoint(req: dict):
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
-    )))
     return {"analysis": response}
 @router.post("/api/image-analysis")
-async def image_analysis_endpoint(req: dict):
-    image_url = req.get("image_url", "")
-    task = req.get("task", "describe")
-    prompt = f"Perform the following task on the image at {image_url}: {task}"
-    model_name, api_endpoint = select_model(prompt)
-    response = "".join(list(request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
-        message=prompt,
         system_prompt="You are an expert in image analysis. Provide detailed descriptions or classifications based on the query.",
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
-    )))
     return {"image_analysis": response}
 @router.get("/api/test-model")

 import os
 from fastapi import APIRouter, HTTPException, UploadFile, File
+from fastapi.responses import StreamingResponse
 from openai import OpenAI
 from api.models import QueryRequest
 from utils.generation import request_generation, select_model
 from utils.web_search import web_search
+import io
 router = APIRouter()
 HF_TOKEN = os.getenv("HF_TOKEN")
+BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
+MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
 @router.get("/api/model-info")
 def model_info():
     return {
         "model_name": MODEL_NAME,
+        "secondary_model": os.getenv("SECONDARY_MODEL_NAME", "openai/gpt-oss-20b:together"),
         "tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1"),
         "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
         "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
 @router.post("/api/chat")
 async def chat_endpoint(req: QueryRequest):
+    model_name, api_endpoint = select_model(req.message, model_choice=req.model_choice if hasattr(req, 'model_choice') else None)
     stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         temperature=req.temperature,
         max_new_tokens=req.max_new_tokens,
         deep_search=req.enable_browsing,
+        output_type="text"
     )
+    response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"response": response}
 @router.post("/api/audio-transcription")
 async def audio_transcription_endpoint(file: UploadFile = File(...)):
     model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
     audio_data = await file.read()
+    stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message="Transcribe audio",
         max_new_tokens=128000,
         input_type="audio",
         audio_data=audio_data,
+        output_type="text"
+    )
+    response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"transcription": response}
 @router.post("/api/text-to-speech")
 async def text_to_speech_endpoint(req: dict):
     text = req.get("text", "")
     model_name, api_endpoint = select_model("text to speech", input_type="text")
+    stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=text,
         temperature=0.7,
         max_new_tokens=128000,
         input_type="text",
+        output_type="speech"
     )
+    audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
     return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
 @router.post("/api/code")
     code = req.get("code", "")
     prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
     model_name, api_endpoint = select_model(prompt)
+    stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=prompt,
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
+        output_type="text"
+    )
+    response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"generated_code": response}
 @router.post("/api/analysis")
 async def analysis_endpoint(req: dict):
     message = req.get("text", "")
     model_name, api_endpoint = select_model(message)
+    stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=message,
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
+        output_type="text"
+    )
+    response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"analysis": response}
 @router.post("/api/image-analysis")
+async def image_analysis_endpoint(file: UploadFile = File(...)):
+    model_name, api_endpoint = select_model("image analysis", input_type="image")
+    image_data = await file.read()
+    stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
+        message="Analyze this image",
         system_prompt="You are an expert in image analysis. Provide detailed descriptions or classifications based on the query.",
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
+        input_type="image",
+        image_data=image_data,
+        output_type="text"
+    )
+    response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"image_analysis": response}
 @router.get("/api/test-model")

main.py CHANGED Viewed

@@ -20,7 +20,7 @@ logger.info("Files in /app/: %s", os.listdir("/app"))
 # إعداد العميل لـ Hugging Face Inference API
 HF_TOKEN = os.getenv("HF_TOKEN")
-BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")  # إضافة التوكن الاحتياطي
 if not HF_TOKEN:
     logger.error("HF_TOKEN is not set in environment variables.")
     raise ValueError("HF_TOKEN is required for Inference API.")
@@ -31,71 +31,84 @@ CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
 # إعداد CSS
 css = """
-.gradio-container { max-width: 1200px; margin: auto; }
-.chatbot { border: 1px solid #ccc; border-radius: 10px; padding: 15px; background-color: #f9f9f9; }
-.input-textbox { font-size: 16px; padding: 10px; }
 .upload-button::before {
-    content: '📷';
-    margin-right: 8px;
-    font-size: 22px;
 }
 .audio-input::before {
-    content: '🎤';
-    margin-right: 8px;
-    font-size: 22px;
 }
 .audio-output::before {
     content: '🔊';
-    margin-right: 8px;
-    font-size: 22px;
 }
 .loading::after {
     content: '';
     display: inline-block;
-    width: 16px;
-    height: 16px;
-    border: 2px solid #333;
     border-top-color: transparent;
     border-radius: 50%;
     animation: spin 1s linear infinite;
-    margin-left: 8px;
 }
 @keyframes spin {
     to { transform: rotate(360deg); }
 }
 .output-container {
     margin-top: 20px;
-    padding: 10px;
     border: 1px solid #ddd;
-    border-radius: 8px;
 }
 .audio-output-container {
     display: flex;
     align-items: center;
-    gap: 10px;
-    margin-top: 10px;
 }
 """
 # دالة لمعالجة الإدخال (نص، صوت، صور، ملفات)
-def process_input(message, audio_input=None, file_input=None, history=None, system_prompt=None, temperature=0.7, reasoning_effort="medium", enable_browsing=True, max_new_tokens=128000):
     input_type = "text"
     audio_data = None
     image_data = None
     if audio_input:
         input_type = "audio"
-        with open(audio_input, "rb") as f:
-            audio_data = f.read()
-        message = "Transcribe this audio"
-    elif file_input:
-        input_type = "file"
-        if file_input.endswith(('.png', '.jpg', '.jpeg')):
-            input_type = "image"
-            with open(file_input, "rb") as f:
-                image_data = f.read()
-            message = f"Analyze image: {file_input}"
-        else:
-            message = f"Analyze file: {file_input}"
     response_text = ""
     audio_response = None
@@ -109,7 +122,9 @@ def process_input(message, audio_input=None, file_input=None, history=None, syst
         max_new_tokens=max_new_tokens,
         input_type=input_type,
         audio_data=audio_data,
-        image_data=image_data
     ):
         if isinstance(chunk, bytes):
             audio_response = io.BytesIO(chunk)
@@ -122,7 +137,7 @@ def process_input(message, audio_input=None, file_input=None, history=None, syst
 chatbot_ui = gr.ChatInterface(
     fn=process_input,
     chatbot=gr.Chatbot(
-        label="MGZon Chatbot",
         height=800,
         latex_delimiters=LATEX_DELIMS,
     ),
@@ -130,28 +145,42 @@ chatbot_ui = gr.ChatInterface(
     additional_inputs=[
         gr.Textbox(
             label="System Prompt",
-            value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image, and file inputs. For audio, transcribe using Whisper. For text-to-speech, use Parler-TTS. For images and files, analyze content appropriately. Continue generating content until the query is fully addressed, leveraging the full capacity of the model.",
             lines=4
         ),
         gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7),
         gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
         gr.Checkbox(label="Enable DeepSearch (web browsing)", value=True),
         gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000),
-        gr.Audio(label="Voice Input", type="filepath", elem_classes="audio-input"),
-        gr.File(label="Upload Image/File", file_types=["image", ".pdf", ".txt"], elem_classes="upload-button"),
     ],
     additional_outputs=[gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output", autoplay=True)],
     stop_btn="Stop",
     examples=[
-        ["Explain the difference between supervised and unsupervised learning in detail with examples."],
-        ["Generate a complete React component for a login form with form validation and error handling."],
-        ["Describe this image: https://example.com/image.jpg"],
-        ["Transcribe this audio: [upload audio file]."],
-        ["Convert this text to speech: Hello, welcome to MGZon!"],
-        ["Analyze this file: [upload PDF or text file]."],
     ],
     title="MGZon Chatbot",
-    description="A versatile chatbot powered by DeepSeek, CLIP, Whisper, and Parler-TTS for text, image, audio, and file queries. Supports long responses, voice input/output, file uploads with custom icons, and backup token switching. Licensed under Apache 2.0.",
     theme="gradio/soft",
     css=css,
 )

 # إعداد العميل لـ Hugging Face Inference API
 HF_TOKEN = os.getenv("HF_TOKEN")
+BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
 if not HF_TOKEN:
     logger.error("HF_TOKEN is not set in environment variables.")
     raise ValueError("HF_TOKEN is required for Inference API.")
 # إعداد CSS
 css = """
+.gradio-container { max-width: 1200px; margin: auto; font-family: Arial, sans-serif; }
+.chatbot { border: 1px solid #ccc; border-radius: 12px; padding: 20px; background-color: #f0f4f8; }
+.input-textbox { font-size: 18px; padding: 12px; border-radius: 8px; }
 .upload-button::before {
+    content: '📸';
+    margin-right: 10px;
+    font-size: 24px;
 }
 .audio-input::before {
+    content: '🎙️';
+    margin-right: 10px;
+    font-size: 24px;
 }
 .audio-output::before {
     content: '🔊';
+    margin-right: 10px;
+    font-size: 24px;
+}
+.send-button {
+    background-color: #007bff;
+    color: white;
+    padding: 10px 20px;
+    border-radius: 8px;
+    cursor: pointer;
+    font-size: 16px;
+    transition: background-color 0.3s;
+}
+.send-button:hover {
+    background-color: #0056b3;
 }
 .loading::after {
     content: '';
     display: inline-block;
+    width: 18px;
+    height: 18px;
+    border: 3px solid #007bff;
     border-top-color: transparent;
     border-radius: 50%;
     animation: spin 1s linear infinite;
+    margin-left: 10px;
 }
 @keyframes spin {
     to { transform: rotate(360deg); }
 }
 .output-container {
     margin-top: 20px;
+    padding: 15px;
     border: 1px solid #ddd;
+    border-radius: 10px;
+    background-color: #fff;
 }
 .audio-output-container {
     display: flex;
     align-items: center;
+    gap: 12px;
+    margin-top: 15px;
+}
+.model-selector {
+    border-radius: 8px;
+    padding: 10px;
+    font-size: 16px;
 }
 """
 # دالة لمعالجة الإدخال (نص، صوت، صور، ملفات)
+def process_input(message, audio_input=None, image_input=None, model_choice="openai/gpt-oss-120b:cerebras", history=None, system_prompt=None, temperature=0.7, reasoning_effort="medium", enable_browsing=True, max_new_tokens=128000, output_type="text"):
     input_type = "text"
     audio_data = None
     image_data = None
     if audio_input:
         input_type = "audio"
+        audio_data = audio_input
+        message = "Transcribe this audio and respond accordingly"
+    elif image_input:
+        input_type = "image"
+        image_data = image_input
+        message = f"Analyze this image: {message or 'Describe the image'}"
     response_text = ""
     audio_response = None
         max_new_tokens=max_new_tokens,
         input_type=input_type,
         audio_data=audio_data,
+        image_data=image_data,
+        model_choice=model_choice,
+        output_type=output_type
     ):
         if isinstance(chunk, bytes):
             audio_response = io.BytesIO(chunk)
 chatbot_ui = gr.ChatInterface(
     fn=process_input,
     chatbot=gr.Chatbot(
+        label="MGZon Chatbot",
         height=800,
         latex_delimiters=LATEX_DELIMS,
     ),
     additional_inputs=[
         gr.Textbox(
             label="System Prompt",
+            value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image, and file inputs. For audio, transcribe using Whisper and respond with text or speech. For images, analyze using CLIP and provide detailed descriptions. For general queries, use the selected model to provide in-depth answers.",
             lines=4
         ),
         gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7),
         gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
         gr.Checkbox(label="Enable DeepSearch (web browsing)", value=True),
         gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000),
+        gr.Dropdown(
+            label="Model Choice",
+            choices=[
+                "openai/gpt-oss-120b:cerebras",
+                "openai/gpt-oss-20b:together",
+                "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
+                "mistralai/Mixtral-8x7B-Instruct-v0.1",
+                "openai/clip-vit-base-patch32",
+                "openai/whisper-large-v3-turbo",
+                "parler-tts/parler-tts-mini-v1"
+            ],
+            value="openai/gpt-oss-120b:cerebras",
+            elem_classes="model-selector"
+        ),
+        gr.Audio(label="Record & Send Voice", type="numpy", streaming=True, elem_classes="audio-input"),
+        gr.Image(label="Capture & Send Image", type="numpy", source="webcam", elem_classes="upload-button"),
+        gr.Radio(label="Output Type", choices=["text", "speech"], value="text")
     ],
     additional_outputs=[gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output", autoplay=True)],
     stop_btn="Stop",
     examples=[
+        ["Explain the history of AI in detail."],
+        ["Generate a React login component with validation."],
+        ["Describe this image: [capture image]."],
+        ["Transcribe and respond to this audio: [record audio]."],
+        ["Convert this text to speech: Welcome to MGZon!"],
     ],
     title="MGZon Chatbot",
+    description="A versatile chatbot powered by multiple models for text, image, and audio queries. Supports real-time voice and image input, model selection, and web search. Licensed under Apache 2.0.",
     theme="gradio/soft",
     css=css,
 )

utils/generation.py CHANGED Viewed

@@ -15,11 +15,12 @@ import torchaudio
 from PIL import Image
 from transformers import CLIPModel, CLIPProcessor, AutoProcessor
 from parler_tts import ParlerTTSForConditionalGeneration
 logger = logging.getLogger(__name__)
 # إعداد Cache
-cache = TTLCache(maxsize=100, ttl=600)  # Cache بحجم 100 ومدة 10 دقايق
 # تعريف LATEX_DELIMS
 LATEX_DELIMS = [
@@ -31,11 +32,11 @@ LATEX_DELIMS = [
 # إعداد العميل لـ Hugging Face Inference API
 HF_TOKEN = os.getenv("HF_TOKEN")
-BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")  # توكن احتياطي
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
 FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co/v1"
-MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:fireworks-ai")
-SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
 TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32")
 CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
@@ -43,7 +44,6 @@ ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3-turbo")
 TTS_MODEL = os.getenv("TTS_MODEL", "parler-tts/parler-tts-mini-v1")
 def check_model_availability(model_name: str, api_base: str, api_key: str) -> tuple[bool, str]:
-    """التحقق من توفر النموذج عبر API مع دعم التوكن الاحتياطي"""
     try:
         response = requests.get(
             f"{api_base}/models/{model_name}",
@@ -64,17 +64,18 @@ def check_model_availability(model_name: str, api_base: str, api_key: str) -> tu
             return check_model_availability(model_name, api_base, BACKUP_HF_TOKEN)
         return False, api_key
-def select_model(query: str, input_type: str = "text") -> tuple[str, str]:
     query_lower = query.lower()
-    # دعم الصوت
     if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
         logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
         return ASR_MODEL, FALLBACK_API_ENDPOINT
-    # دعم تحويل النص إلى صوت
     if any(keyword in query_lower for keyword in ["text-to-speech", "tts", "تحويل نص إلى صوت"]):
         logger.info(f"Selected {TTS_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for text-to-speech")
         return TTS_MODEL, FALLBACK_API_ENDPOINT
-    # نماذج CLIP للاستعلامات المتعلقة بالصور
     image_patterns = [
         r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
         r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
@@ -83,16 +84,6 @@ def select_model(query: str, input_type: str = "text") -> tuple[str, str]:
         if re.search(pattern, query_lower, re.IGNORECASE):
             logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
             return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
-    # نموذج DeepSeek للاستعلامات المتعلقة بـ MGZon
-    mgzon_patterns = [
-        r"\bmgzon\b", r"\bmgzon\s+(products|services|platform|features|mission|technology|solutions|oauth)\b",
-        r"\bميزات\s+mgzon\b", r"\bخدمات\s+mgzon\b", r"\boauth\b"
-    ]
-    for pattern in mgzon_patterns:
-        if re.search(pattern, query_lower, re.IGNORECASE):
-            logger.info(f"Selected {SECONDARY_MODEL_NAME} with endpoint {FALLBACK_API_ENDPOINT} for MGZon-related query: {query}")
-            return SECONDARY_MODEL_NAME, FALLBACK_API_ENDPOINT
-    # النموذج الافتراضي للاستعلامات العامة
     logger.info(f"Selected {MODEL_NAME} with endpoint {API_ENDPOINT} for general query: {query}")
     return MODEL_NAME, API_ENDPOINT
@@ -113,16 +104,13 @@ def request_generation(
     input_type: str = "text",
     audio_data: Optional[bytes] = None,
     image_data: Optional[bytes] = None,
 ) -> Generator[bytes | str, None, None]:
-    from utils.web_search import web_search  # تأخير الاستيراد
-    # التحقق من توفر النموذج مع دعم التوكن الاحتياطي
     is_available, selected_api_key = check_model_availability(model_name, api_base, api_key)
     if not is_available:
         yield f"Error: Model {model_name} is not available. Please check the model endpoint or token."
         return
-    # إنشاء مفتاح للـ cache
     cache_key = hashlib.md5(json.dumps({
         "message": message,
         "system_prompt": system_prompt,
@@ -143,7 +131,7 @@ def request_generation(
     enhanced_system_prompt = system_prompt
     # معالجة الصوت (ASR)
-    if model_name == ASR_MODEL and audio_data:
         task_type = "audio_transcription"
         try:
             audio_file = io.BytesIO(audio_data)
@@ -158,6 +146,15 @@ def request_generation(
                 response_format="text"
             )
             yield transcription
             cache[cache_key] = [transcription]
             return
         except Exception as e:
@@ -166,11 +163,11 @@ def request_generation(
             return
     # معالجة تحويل النص إلى صوت (TTS)
-    if model_name == TTS_MODEL:
         task_type = "text_to_speech"
         try:
-            model = ParlerTTSForConditionalGeneration.from_pretrained(model_name)
-            processor = AutoProcessor.from_pretrained(model_name)
             inputs = processor(text=message, return_tensors="pt")
             audio = model.generate(**inputs)
             audio_file = io.BytesIO()
@@ -185,7 +182,7 @@ def request_generation(
             return
     # معالجة الصور
-    if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data:
         task_type = "image_analysis"
         try:
             model = CLIPModel.from_pretrained(model_name)
@@ -195,8 +192,18 @@ def request_generation(
             outputs = model(**inputs)
             logits_per_image = outputs.logits_per_image
             probs = logits_per_image.softmax(dim=1)
-            yield f"Image analysis result: {probs.tolist()}"
-            cache[cache_key] = [f"Image analysis result: {probs.tolist()}"]
             return
         except Exception as e:
             logger.error(f"Image analysis failed: {e}")
@@ -206,27 +213,16 @@ def request_generation(
     # تحسين system_prompt بناءً على نوع المهمة
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
         task_type = "image"
-        enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query. Continue until the query is fully addressed."
     elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
         task_type = "code"
-        enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations. Support frameworks like React, Django, Flask, and others. Format code with triple backticks (```) and specify the language. Continue until the task is fully addressed."
     elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
         task_type = "analysis"
-        enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights. Continue until all aspects of the query are thoroughly covered."
-    elif any(keyword in message.lower() for keyword in ["review", "مراجعة"]):
-        task_type = "review"
-        enhanced_system_prompt = f"{system_prompt}\nReview the provided content thoroughly, identify issues, and suggest improvements with detailed explanations. Ensure the response is complete and detailed."
-    elif any(keyword in message.lower() for keyword in ["publish", "نشر"]):
-        task_type = "publish"
-        enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices. Provide a complete and detailed response."
     else:
-        enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable. Continue generating content until the query is fully answered, leveraging the full capacity of the model."
-    # إذا كان الاستعلام قصيرًا، شجع على التفصيل
-    if len(message.split()) < 5:
-        enhanced_system_prompt += "\nEven for short or general queries, provide a detailed, in-depth response with examples, explanations, and additional context to ensure completeness."
-    logger.info(f"Task type detected: {task_type}")
     input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
     if chat_history:
         for msg in chat_history:
@@ -262,8 +258,6 @@ def request_generation(
         reasoning_started = False
         reasoning_closed = False
         saw_visible_output = False
-        last_tool_name = None
-        last_tool_args = None
         buffer = ""
         for chunk in stream:
@@ -291,16 +285,6 @@ def request_generation(
                     buffer = ""
                 continue
-            if chunk.choices[0].delta.tool_calls and model_name in [MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME]:
-                tool_call = chunk.choices[0].delta.tool_calls[0]
-                name = getattr(tool_call, "function", {}).get("name", None)
-                args = getattr(tool_call, "function", {}).get("arguments", None)
-                if name:
-                    last_tool_name = name
-                if args:
-                    last_tool_args = args
-                continue
             if chunk.choices[0].finish_reason in ("stop", "tool_calls", "error", "length"):
                 if buffer:
                     cached_chunks.append(buffer)
@@ -313,16 +297,8 @@ def request_generation(
                     reasoning_closed = True
                 if not saw_visible_output:
-                    msg = "I attempted to call a tool, but tools aren't executed in this environment, so no final answer was produced."
-                    if last_tool_name:
-                        try:
-                            args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
-                        except Exception:
-                            args_text = str(last_tool_args)
-                        msg += f"\n\n• Tool requested: **{last_tool_name}**\n• Arguments: `{args_text}`"
-                    cached_chunks.append(msg)
-                    yield msg
                 if chunk.choices[0].finish_reason == "error":
                     cached_chunks.append(f"Error: Unknown error")
                     yield f"Error: Unknown error"
@@ -335,6 +311,16 @@ def request_generation(
             cached_chunks.append(buffer)
             yield buffer
         cache[cache_key] = cached_chunks
     except Exception as e:
@@ -357,134 +343,12 @@ def request_generation(
                 input_type=input_type,
                 audio_data=audio_data,
                 image_data=image_data,
             ):
                 yield chunk
             return
-        if model_name == MODEL_NAME:
-            fallback_model = SECONDARY_MODEL_NAME
-            fallback_endpoint = FALLBACK_API_ENDPOINT
-            logger.info(f"Retrying with fallback model: {fallback_model} on {fallback_endpoint}")
-            try:
-                is_available, selected_api_key = check_model_availability(fallback_model, fallback_endpoint, selected_api_key)
-                if not is_available:
-                    yield f"Error: Fallback model {fallback_model} is not available."
-                    return
-                client = OpenAI(api_key=selected_api_key, base_url=fallback_endpoint, timeout=120.0)
-                stream = client.chat.completions.create(
-                    model=fallback_model,
-                    messages=input_messages,
-                    temperature=temperature,
-                    max_tokens=max_new_tokens,
-                    stream=True,
-                    tools=[],
-                    tool_choice="none",
-                )
-                for chunk in stream:
-                    if chunk.choices[0].delta.content:
-                        content = chunk.choices[0].delta.content
-                        if content == "<|channel|>analysis<|message|>":
-                            if not reasoning_started:
-                                cached_chunks.append("analysis")
-                                yield "analysis"
-                                reasoning_started = True
-                            continue
-                        if content == "<|channel|>final<|message|>":
-                            if reasoning_started and not reasoning_closed:
-                                cached_chunks.append("assistantfinal")
-                                yield "assistantfinal"
-                                reasoning_closed = True
-                            continue
-                        saw_visible_output = True
-                        buffer += content
-                        if "\n" in buffer or len(buffer) > 5000:
-                            cached_chunks.append(buffer)
-                            yield buffer
-                            buffer = ""
-                        continue
-                    if chunk.choices[0].finish_reason in ("stop", "error", "length"):
-                        if buffer:
-                            cached_chunks.append(buffer)
-                            yield buffer
-                            buffer = ""
-                        if reasoning_started and not reasoning_closed:
-                            cached_chunks.append("assistantfinal")
-                            yield "assistantfinal"
-                            reasoning_closed = True
-                        if not saw_visible_output:
-                            cached_chunks.append("No visible output produced.")
-                            yield "No visible output produced."
-                        if chunk.choices[0].finish_reason == "error":
-                            cached_chunks.append(f"Error: Unknown error with fallback model {fallback_model}")
-                            yield f"Error: Unknown error with fallback model {fallback_model}"
-                        elif chunk.choices[0].finish_reason == "length":
-                            cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
-                            yield "Response truncated due to token limit. Please refine your query or request continuation."
-                        break
-                if buffer:
-                    cached_chunks.append(buffer)
-                    yield buffer
-                cache[cache_key] = cached_chunks
-            except Exception as e2:
-                logger.exception(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
-                try:
-                    is_available, selected_api_key = check_model_availability(TERTIARY_MODEL_NAME, FALLBACK_API_ENDPOINT, selected_api_key)
-                    if not is_available:
-                        yield f"Error: Tertiary model {TERTIARY_MODEL_NAME} is not available."
-                        return
-                    client = OpenAI(api_key=selected_api_key, base_url=FALLBACK_API_ENDPOINT, timeout=120.0)
-                    stream = client.chat.completions.create(
-                        model=TERTIARY_MODEL_NAME,
-                        messages=input_messages,
-                        temperature=temperature,
-                        max_tokens=max_new_tokens,
-                        stream=True,
-                        tools=[],
-                        tool_choice="none",
-                    )
-                    for chunk in stream:
-                        if chunk.choices[0].delta.content:
-                            content = chunk.choices[0].delta.content
-                            saw_visible_output = True
-                            buffer += content
-                            if "\n" in buffer or len(buffer) > 5000:
-                                cached_chunks.append(buffer)
-                                yield buffer
-                                buffer = ""
-                            continue
-                        if chunk.choices[0].finish_reason in ("stop", "error", "length"):
-                            if buffer:
-                                cached_chunks.append(buffer)
-                                yield buffer
-                                buffer = ""
-                            if not saw_visible_output:
-                                cached_chunks.append("No visible output produced.")
-                                yield "No visible output produced."
-                            if chunk.choices[0].finish_reason == "error":
-                                cached_chunks.append(f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}")
-                                yield f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}"
-                            elif chunk.choices[0].finish_reason == "length":
-                                cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
-                                yield "Response truncated due to token limit. Please refine your query or request continuation."
-                            break
-                    if buffer:
-                        cached_chunks.append(buffer)
-                        yield buffer
-                    cache[cache_key] = cached_chunks
-                except Exception as e3:
-                    logger.exception(f"[Gateway] Streaming failed for tertiary model {TERTIARY_MODEL_NAME}: {e3}")
-                    yield f"Error: Failed to load all models: Primary ({model_name}), Secondary ({fallback_model}), Tertiary ({TERTIARY_MODEL_NAME}). Please check your model configurations."
-                    return
-        else:
-            yield f"Error: Failed to load model {model_name}: {e}"
-            return
 def format_final(analysis_text: str, visible_text: str) -> str:
     reasoning_safe = html.escape((analysis_text or "").strip())
@@ -500,12 +364,12 @@ def format_final(analysis_text: str, visible_text: str) -> str:
         f"{response}" if response else "No final response available."
     )
-def generate(message, history, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, input_type="text", audio_data=None, image_data=None):
     if not message.strip() and not audio_data and not image_data:
-        yield "Please enter a prompt or upload a file."
         return
-    model_name, api_endpoint = select_model(message, input_type=input_type)
     chat_history = []
     for h in history:
         if isinstance(h, dict):
@@ -534,7 +398,7 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
             "type": "function",
             "function": {
                 "name": "code_generation",
-                "description": "Generate or modify code for various frameworks (React, Django, Flask, etc.)",
                 "parameters": {
                     "type": "object",
                     "properties": {
@@ -612,6 +476,7 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
             input_type=input_type,
             audio_data=audio_data,
             image_data=image_data,
         )
         for chunk in stream:

 from PIL import Image
 from transformers import CLIPModel, CLIPProcessor, AutoProcessor
 from parler_tts import ParlerTTSForConditionalGeneration
+from utils.web_search import web_search  # نقل الاستيراد خارج الدالة
 logger = logging.getLogger(__name__)
 # إعداد Cache
+cache = TTLCache(maxsize=100, ttl=600)
 # تعريف LATEX_DELIMS
 LATEX_DELIMS = [
 # إعداد العميل لـ Hugging Face Inference API
 HF_TOKEN = os.getenv("HF_TOKEN")
+BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
 FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co/v1"
+MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
+SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "openai/gpt-oss-20b:together")
 TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32")
 CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
 TTS_MODEL = os.getenv("TTS_MODEL", "parler-tts/parler-tts-mini-v1")
 def check_model_availability(model_name: str, api_base: str, api_key: str) -> tuple[bool, str]:
     try:
         response = requests.get(
             f"{api_base}/models/{model_name}",
             return check_model_availability(model_name, api_base, BACKUP_HF_TOKEN)
         return False, api_key
+def select_model(query: str, input_type: str = "text", model_choice: Optional[str] = None) -> tuple[str, str]:
+    if model_choice:
+        logger.info(f"User-selected model: {model_choice}")
+        return model_choice, API_ENDPOINT if model_choice in [MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME] else FALLBACK_API_ENDPOINT
     query_lower = query.lower()
     if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
         logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
         return ASR_MODEL, FALLBACK_API_ENDPOINT
     if any(keyword in query_lower for keyword in ["text-to-speech", "tts", "تحويل نص إلى صوت"]):
         logger.info(f"Selected {TTS_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for text-to-speech")
         return TTS_MODEL, FALLBACK_API_ENDPOINT
     image_patterns = [
         r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
         r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
         if re.search(pattern, query_lower, re.IGNORECASE):
             logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
             return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
     logger.info(f"Selected {MODEL_NAME} with endpoint {API_ENDPOINT} for general query: {query}")
     return MODEL_NAME, API_ENDPOINT
     input_type: str = "text",
     audio_data: Optional[bytes] = None,
     image_data: Optional[bytes] = None,
+    output_type: str = "text"
 ) -> Generator[bytes | str, None, None]:
     is_available, selected_api_key = check_model_availability(model_name, api_base, api_key)
     if not is_available:
         yield f"Error: Model {model_name} is not available. Please check the model endpoint or token."
         return
     cache_key = hashlib.md5(json.dumps({
         "message": message,
         "system_prompt": system_prompt,
     enhanced_system_prompt = system_prompt
     # معالجة الصوت (ASR)
+    if model_name == ASR_MODEL and audio_data is not None:
         task_type = "audio_transcription"
         try:
             audio_file = io.BytesIO(audio_data)
                 response_format="text"
             )
             yield transcription
+            if output_type == "speech":
+                tts_model = TTS_MODEL
+                tts_inputs = AutoProcessor.from_pretrained(tts_model)(text=transcription, return_tensors="pt")
+                tts_model_instance = ParlerTTSForConditionalGeneration.from_pretrained(tts_model)
+                audio = tts_model_instance.generate(**tts_inputs)
+                audio_file = io.BytesIO()
+                torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
+                audio_file.seek(0)
+                yield audio_file.read()
             cache[cache_key] = [transcription]
             return
         except Exception as e:
             return
     # معالجة تحويل النص إلى صوت (TTS)
+    if model_name == TTS_MODEL or output_type == "speech":
         task_type = "text_to_speech"
         try:
+            model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
+            processor = AutoProcessor.from_pretrained(TTS_MODEL)
             inputs = processor(text=message, return_tensors="pt")
             audio = model.generate(**inputs)
             audio_file = io.BytesIO()
             return
     # معالجة الصور
+    if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data is not None:
         task_type = "image_analysis"
         try:
             model = CLIPModel.from_pretrained(model_name)
             outputs = model(**inputs)
             logits_per_image = outputs.logits_per_image
             probs = logits_per_image.softmax(dim=1)
+            analysis = f"Image analysis result: {probs.tolist()}"
+            yield analysis
+            if output_type == "speech":
+                tts_model = TTS_MODEL
+                tts_inputs = AutoProcessor.from_pretrained(tts_model)(text=analysis, return_tensors="pt")
+                tts_model_instance = ParlerTTSForConditionalGeneration.from_pretrained(tts_model)
+                audio = tts_model_instance.generate(**tts_inputs)
+                audio_file = io.BytesIO()
+                torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
+                audio_file.seek(0)
+                yield audio_file.read()
+            cache[cache_key] = [analysis]
             return
         except Exception as e:
             logger.error(f"Image analysis failed: {e}")
     # تحسين system_prompt بناءً على نوع المهمة
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
         task_type = "image"
+        enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query."
     elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
         task_type = "code"
+        enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations."
     elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
         task_type = "analysis"
+        enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights."
     else:
+        enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable."
     input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
     if chat_history:
         for msg in chat_history:
         reasoning_started = False
         reasoning_closed = False
         saw_visible_output = False
         buffer = ""
         for chunk in stream:
                     buffer = ""
                 continue
             if chunk.choices[0].finish_reason in ("stop", "tool_calls", "error", "length"):
                 if buffer:
                     cached_chunks.append(buffer)
                     reasoning_closed = True
                 if not saw_visible_output:
+                    cached_chunks.append("No visible output produced.")
+                    yield "No visible output produced."
                 if chunk.choices[0].finish_reason == "error":
                     cached_chunks.append(f"Error: Unknown error")
                     yield f"Error: Unknown error"
             cached_chunks.append(buffer)
             yield buffer
+        if output_type == "speech":
+            tts_model = TTS_MODEL
+            tts_inputs = AutoProcessor.from_pretrained(tts_model)(text=buffer, return_tensors="pt")
+            tts_model_instance = ParlerTTSForConditionalGeneration.from_pretrained(tts_model)
+            audio = tts_model_instance.generate(**tts_inputs)
+            audio_file = io.BytesIO()
+            torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
+            audio_file.seek(0)
+            yield audio_file.read()
         cache[cache_key] = cached_chunks
     except Exception as e:
                 input_type=input_type,
                 audio_data=audio_data,
                 image_data=image_data,
+                output_type=output_type
             ):
                 yield chunk
             return
+        yield f"Error: Failed to load model {model_name}: {e}"
+        return
 def format_final(analysis_text: str, visible_text: str) -> str:
     reasoning_safe = html.escape((analysis_text or "").strip())
         f"{response}" if response else "No final response available."
     )
+def generate(message, history, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, input_type="text", audio_data=None, image_data=None, model_choice=None, output_type="text"):
     if not message.strip() and not audio_data and not image_data:
+        yield "Please enter a prompt, record audio, or capture an image."
         return
+    model_name, api_endpoint = select_model(message, input_type=input_type, model_choice=model_choice)
     chat_history = []
     for h in history:
         if isinstance(h, dict):
             "type": "function",
             "function": {
                 "name": "code_generation",
+                "description": "Generate or modify code for various frameworks",
                 "parameters": {
                     "type": "object",
                     "properties": {
             input_type=input_type,
             audio_data=audio_data,
             image_data=image_data,
+            output_type=output_type
         )
         for chunk in stream:

utils/web_search.py CHANGED Viewed

@@ -11,23 +11,23 @@ def web_search(query: str) -> str:
         google_cse_id = os.getenv("GOOGLE_CSE_ID")
         if not google_api_key or not google_cse_id:
             return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
-        url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}+site:https://hager-zon.vercel.app/"
-        response = requests.get(url, timeout=10)
         response.raise_for_status()
         results = response.json().get("items", [])
         if not results:
             return "No web results found."
         search_results = []
-        for i, item in enumerate(results[:5]):
             title = item.get("title", "")
             snippet = item.get("snippet", "")
             link = item.get("link", "")
             try:
-                page_response = requests.get(link, timeout=5)
                 page_response.raise_for_status()
                 soup = BeautifulSoup(page_response.text, "html.parser")
                 paragraphs = soup.find_all("p")
-                page_content = " ".join([p.get_text() for p in paragraphs][:1000])
             except Exception as e:
                 logger.warning(f"Failed to fetch page content for {link}: {e}")
                 page_content = snippet

         google_cse_id = os.getenv("GOOGLE_CSE_ID")
         if not google_api_key or not google_cse_id:
             return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
+        url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
+        response = requests.get(url, timeout=5)
         response.raise_for_status()
         results = response.json().get("items", [])
         if not results:
             return "No web results found."
         search_results = []
+        for i, item in enumerate(results[:3]):  # قللنا العدد لتسريع البحث
             title = item.get("title", "")
             snippet = item.get("snippet", "")
             link = item.get("link", "")
             try:
+                page_response = requests.get(link, timeout=3)
                 page_response.raise_for_status()
                 soup = BeautifulSoup(page_response.text, "html.parser")
                 paragraphs = soup.find_all("p")
+                page_content = " ".join([p.get_text() for p in paragraphs][:500])
             except Exception as e:
                 logger.warning(f"Failed to fetch page content for {link}: {e}")
                 page_content = snippet