Spaces:

MGZON
/

mgzon-app

Running

App Files Files Community

ibrahimlasfar commited on Aug 31

Commit

bb3c951

1 Parent(s): 2ee9112

Update app with audio/image buttons, model fixes, and UI enhancements

Browse files

Files changed (8) hide show

Dockerfile +2 -1
README.md +1 -1
api/endpoints.py +39 -12
api/models.py +3 -2
main.py +137 -63
requirements.txt +3 -3
utils/generation.py +190 -42
utils/web_search.py +13 -4

Dockerfile CHANGED Viewed

@@ -3,12 +3,13 @@ FROM python:3.10-slim
 # Set working directory
 WORKDIR /app
-# Install chromium-driver and build dependencies
 RUN apt-get update && apt-get install -y \
     chromium-driver \
     git \
     gcc \
     libc-dev \
     && apt-get clean && rm -rf /var/lib/apt/lists/*
 # Update pip

 # Set working directory
 WORKDIR /app
+# Install system dependencies
 RUN apt-get update && apt-get install -y \
     chromium-driver \
     git \
     gcc \
     libc-dev \
+    ffmpeg \
     && apt-get clean && rm -rf /var/lib/apt/lists/*
 # Update pip

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: MGZON Chat
 emoji: "🤖"
 colorFrom: "blue"
 colorTo: "green"

 ---
+title: MGZon Chatbot
 emoji: "🤖"
 colorFrom: "blue"
 colorTo: "green"

api/endpoints.py CHANGED Viewed

@@ -5,6 +5,7 @@ import io
 from openai import OpenAI
 from api.models import QueryRequest
 from utils.generation import request_generation, select_model
 router = APIRouter()
@@ -12,13 +13,15 @@ HF_TOKEN = os.getenv("HF_TOKEN")
 BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
 MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:together")
 @router.get("/api/model-info")
 def model_info():
     return {
         "model_name": MODEL_NAME,
-        "secondary_model": os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B:featherless-ai"),
-        "tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "openai/gpt-oss-120b:cerebras"),
         "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
         "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
         "api_base": API_ENDPOINT,
@@ -46,7 +49,11 @@ async def chat_endpoint(req: QueryRequest):
         temperature=req.temperature,
         max_new_tokens=req.max_new_tokens,
         deep_search=req.enable_browsing,
     )
     response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"response": response}
@@ -54,7 +61,7 @@ async def chat_endpoint(req: QueryRequest):
 async def audio_transcription_endpoint(file: UploadFile = File(...)):
     model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
     audio_data = await file.read()
-    response = "".join([chunk for chunk in request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message="Transcribe audio",
@@ -64,14 +71,15 @@ async def audio_transcription_endpoint(file: UploadFile = File(...)):
         max_new_tokens=128000,
         input_type="audio",
         audio_data=audio_data,
-    ) if isinstance(chunk, str)])
     return {"transcription": response}
 @router.post("/api/text-to-speech")
 async def text_to_speech_endpoint(req: dict):
     text = req.get("text", "")
     model_name, api_endpoint = select_model("text to speech", input_type="text")
-    response = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=text,
@@ -80,8 +88,9 @@ async def text_to_speech_endpoint(req: dict):
         temperature=0.7,
         max_new_tokens=128000,
         input_type="text",
     )
-    audio_data = b"".join([chunk for chunk in response if isinstance(chunk, bytes)])
     return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
 @router.post("/api/code")
@@ -89,9 +98,10 @@ async def code_endpoint(req: dict):
     framework = req.get("framework")
     task = req.get("task")
     code = req.get("code", "")
     prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
     model_name, api_endpoint = select_model(prompt)
-    response = "".join([chunk for chunk in request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=prompt,
@@ -99,14 +109,20 @@ async def code_endpoint(req: dict):
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
-    ) if isinstance(chunk, str)])
     return {"generated_code": response}
 @router.post("/api/analysis")
 async def analysis_endpoint(req: dict):
     message = req.get("text", "")
     model_name, api_endpoint = select_model(message)
-    response = "".join([chunk for chunk in request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=message,
@@ -114,14 +130,20 @@ async def analysis_endpoint(req: dict):
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
-    ) if isinstance(chunk, str)])
     return {"analysis": response}
 @router.post("/api/image-analysis")
 async def image_analysis_endpoint(file: UploadFile = File(...)):
     model_name, api_endpoint = select_model("analyze image", input_type="image")
     image_data = await file.read()
-    response = "".join([chunk for chunk in request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message="Analyze this image",
@@ -131,7 +153,12 @@ async def image_analysis_endpoint(file: UploadFile = File(...)):
         max_new_tokens=128000,
         input_type="image",
         image_data=image_data,
-    ) if isinstance(chunk, str)])
     return {"image_analysis": response}
 @router.get("/api/test-model")

 from openai import OpenAI
 from api.models import QueryRequest
 from utils.generation import request_generation, select_model
+from utils.web_search import web_search
 router = APIRouter()
 BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
 MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:together")
+SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B:featherless-ai")
+TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "openai/gpt-oss-120b:cerebras")
 @router.get("/api/model-info")
 def model_info():
     return {
         "model_name": MODEL_NAME,
+        "secondary_model": SECONDARY_MODEL_NAME,
+        "tertiary_model": TERTIARY_MODEL_NAME,
         "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
         "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
         "api_base": API_ENDPOINT,
         temperature=req.temperature,
         max_new_tokens=req.max_new_tokens,
         deep_search=req.enable_browsing,
+        output_format=req.output_format
     )
+    if req.output_format == "audio":
+        audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
+        return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
     response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"response": response}
 async def audio_transcription_endpoint(file: UploadFile = File(...)):
     model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
     audio_data = await file.read()
+    response = "".join(list(request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message="Transcribe audio",
         max_new_tokens=128000,
         input_type="audio",
         audio_data=audio_data,
+        output_format="text"
+    )))
     return {"transcription": response}
 @router.post("/api/text-to-speech")
 async def text_to_speech_endpoint(req: dict):
     text = req.get("text", "")
     model_name, api_endpoint = select_model("text to speech", input_type="text")
+    stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=text,
         temperature=0.7,
         max_new_tokens=128000,
         input_type="text",
+        output_format="audio"
     )
+    audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
     return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
 @router.post("/api/code")
     framework = req.get("framework")
     task = req.get("task")
     code = req.get("code", "")
+    output_format = req.get("output_format", "text")
     prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
     model_name, api_endpoint = select_model(prompt)
+    stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=prompt,
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
+        output_format=output_format
+    )
+    if output_format == "audio":
+        audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
+        return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
+    response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"generated_code": response}
 @router.post("/api/analysis")
 async def analysis_endpoint(req: dict):
     message = req.get("text", "")
+    output_format = req.get("output_format", "text")
     model_name, api_endpoint = select_model(message)
+    stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=message,
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
+        output_format=output_format
+    )
+    if output_format == "audio":
+        audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
+        return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
+    response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"analysis": response}
 @router.post("/api/image-analysis")
 async def image_analysis_endpoint(file: UploadFile = File(...)):
+    output_format = "text"  # يمكن تعديله لدعم الصوت
     model_name, api_endpoint = select_model("analyze image", input_type="image")
     image_data = await file.read()
+    stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message="Analyze this image",
         max_new_tokens=128000,
         input_type="image",
         image_data=image_data,
+        output_format=output_format
+    )
+    if output_format == "audio":
+        audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
+        return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
+    response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"image_analysis": response}
 @router.get("/api/test-model")

api/models.py CHANGED Viewed

@@ -3,8 +3,9 @@ from typing import List, Optional
 class QueryRequest(BaseModel):
     message: str
-    system_prompt: str = "You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image inputs. For audio, transcribe using Whisper. For text-to-speech, use Parler-TTS. For images, analyze using CLIP. Respond with voice output when requested. Continue until the query is fully addressed."
     history: Optional[List[dict]] = None
     temperature: float = 0.7
     max_new_tokens: int = 128000
-    enable_browsing: bool = True

 class QueryRequest(BaseModel):
     message: str
+    system_prompt: str = "You are an expert assistant providing detailed, comprehensive, and well-structured responses. For code, include comments, examples, and complete implementations. For image-related queries, provide detailed analysis or descriptions. For general queries, provide in-depth explanations with examples and additional context where applicable. Respond in the requested output format (text or audio)."
     history: Optional[List[dict]] = None
     temperature: float = 0.7
     max_new_tokens: int = 128000
+    enable_browsing: bool = False
+    output_format: str = "text"  # جديد: دعم نوع الإخراج

main.py CHANGED Viewed

@@ -32,42 +32,88 @@ CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
 # إعداد CSS
 css = """
 .gradio-container { max-width: 1200px; margin: auto; font-family: Arial, sans-serif; }
-.chatbot { border: 1px solid #ccc; border-radius: 12px; padding: 20px; background-color: #f5f5f5; }
-.input-textbox { font-size: 16px; padding: 12px; border-radius: 8px; }
-.upload-button, .capture-button, .record-button {
-    background-color: #4CAF50; color: white; padding: 10px 20px; border-radius: 8px; font-size: 16px; cursor: pointer;
 }
-.upload-button:hover, .capture-button:hover, .record-button:hover { background-color: #45a049; }
-.upload-button::before { content: '📷 '; font-size: 20px; }
-.capture-button::before { content: '🎥 '; font-size: 20px; }
-.record-button::before { content: '🎤 '; font-size: 20px; }
-.audio-output::before { content: '🔊 '; font-size: 20px; }
 .loading::after {
-    content: ''; display: inline-block; width: 18px; height: 18px; border: 3px solid #333;
-    border-top-color: transparent; border-radius: 50%; animation: spin 1s linear infinite; margin-left: 10px;
 }
-@keyframes spin { to { transform: rotate(360deg); } }
 .output-container {
-    margin-top: 20px; padding: 15px; border: 1px solid #ddd; border-radius: 10px; background-color: #fff;
 }
 .audio-output-container {
-    display: flex; align-items: center; gap: 12px; margin-top: 15px;
 }
 """
 # دالة لمعالجة الإدخال
-def process_input(message, audio_input=None, image_input=None, history=None, system_prompt=None, temperature=0.7, reasoning_effort="medium", enable_browsing=True, max_new_tokens=128000):
     input_type = "text"
     audio_data = None
     image_data = None
     if audio_input:
         input_type = "audio"
-        audio_data = audio_input
         message = "Transcribe this audio"
     elif image_input:
         input_type = "image"
-        image_data = image_input
-        message = "Analyze this image"
     response_text = ""
     audio_response = None
@@ -81,7 +127,8 @@ def process_input(message, audio_input=None, image_input=None, history=None, sys
         max_new_tokens=max_new_tokens,
         input_type=input_type,
         audio_data=audio_data,
-        image_data=image_data
     ):
         if isinstance(chunk, bytes):
             audio_response = io.BytesIO(chunk)
@@ -90,56 +137,78 @@ def process_input(message, audio_input=None, image_input=None, history=None, sys
             response_text += chunk
         yield response_text, audio_response
-# دالة لتفعيل تسجيل الصوت
-def start_recording():
-    return gr.update(visible=True)
-# دالة لتفعيل التقاط الصورة
-def start_image_capture():
-    return gr.update(visible=True)
 # إعداد واجهة Gradio
-chatbot_ui = gr.Interface(
-    fn=process_input,
-    inputs=[
-        gr.Textbox(label="Message", placeholder="Type your message or use buttons below...", elem_classes="input-textbox"),
-        gr.Audio(label="Record Audio", sources=["microphone"], type="numpy", streaming=True, visible=False, elem_classes="record-button"),
-        gr.Image(label="Capture/Upload Image", sources=["webcam", "upload"], type="numpy", visible=False, elem_classes="capture-button"),
-        gr.State(value=[]),  # History
-        gr.Textbox(
-            label="System Prompt",
-            value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image inputs. For audio, transcribe using Whisper. For text-to-speech, use Parler-TTS. For images, analyze using CLIP. Respond with voice output when requested. Continue until the query is fully addressed.",
-            lines=4
-        ),
-        gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7),
-        gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
-        gr.Checkbox(label="Enable DeepSearch", value=True),
-        gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000),
-    ],
-    outputs=[
-        gr.Markdown(label="Response", elem_classes="output-container"),
-        gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output", autoplay=True)
-    ],
-    additional_inputs=[
-        gr.Button("Record Audio", elem_classes="record-button", onclick=start_recording),
-        gr.Button("Capture/Upload Image", elem_classes="capture-button", onclick=start_image_capture),
-    ],
-    examples=[
-        ["Explain the history of AI in detail."],
-        ["Generate a React component for a login form."],
-        ["Transcribe this audio: [record audio]."],
-        ["Convert this text to speech: Hello, welcome to MGZon!"],
-        ["Analyze this image: [capture/upload image]."],
-    ],
-    title="MGZon Chatbot",
-    description="A versatile chatbot powered by advanced AI models. Supports text, audio, and image inputs with voice responses. Licensed under Apache 2.0.",
-    theme="gradio/soft",
-    css=css,
-)
 # إعداد FastAPI
 app = FastAPI(title="MGZon Chatbot API")
-app.include_router(api_router)
 # ربط Gradio مع FastAPI
 app = gr.mount_gradio_app(app, chatbot_ui, path="/gradio")
@@ -163,22 +232,27 @@ class NotFoundMiddleware(BaseHTTPMiddleware):
 app.add_middleware(NotFoundMiddleware)
 @app.get("/", response_class=HTMLResponse)
 async def root(request: Request):
     return templates.TemplateResponse("index.html", {"request": request})
 @app.get("/docs", response_class=HTMLResponse)
 async def docs(request: Request):
     return templates.TemplateResponse("docs.html", {"request": request})
 @app.get("/swagger", response_class=HTMLResponse)
 async def swagger_ui():
     return get_swagger_ui_html(openapi_url="/openapi.json", title="MGZon API Documentation")
 @app.get("/launch-chatbot", response_class=RedirectResponse)
 async def launch_chatbot():
     return RedirectResponse(url="/gradio", status_code=302)
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))

 # إعداد CSS
 css = """
 .gradio-container { max-width: 1200px; margin: auto; font-family: Arial, sans-serif; }
+.chatbot {
+    border: 1px solid #ccc;
+    border-radius: 15px;
+    padding: 20px;
+    background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
+}
+.input-textbox {
+    font-size: 18px;
+    padding: 12px;
+    border-radius: 8px;
+    border: 1px solid #aaa;
+}
+.upload-button, .audio-input-button, .audio-record-button {
+    background: #4CAF50;
+    color: white;
+    border-radius: 8px;
+    padding: 10px 20px;
+    font-size: 16px;
+    cursor: pointer;
+}
+.upload-button:hover, .audio-input-button:hover, .audio-record-button:hover {
+    background: #45a049;
+}
+.upload-button::before {
+    content: '📷 ';
+    font-size: 20px;
+}
+.audio-input-button::before {
+    content: '🎤 ';
+    font-size: 20px;
+}
+.audio-record-button::before {
+    content: '🔊 ';
+    font-size: 20px;
 }
 .loading::after {
+    content: '';
+    display: inline-block;
+    width: 18px;
+    height: 18px;
+    border: 3px solid #333;
+    border-top-color: transparent;
+    border-radius: 50%;
+    animation: spin 1s linear infinite;
+    margin-left: 10px;
+}
+@keyframes spin {
+    to { transform: rotate(360deg); }
 }
 .output-container {
+    margin-top: 25px;
+    padding: 15px;
+    border: 1px solid #ddd;
+    border-radius: 10px;
+    background: #fff;
 }
 .audio-output-container {
+    display: flex;
+    align-items: center;
+    gap: 15px;
+    margin-top: 15px;
+}
+.output-format-radio {
+    margin-top: 10px;
 }
 """
 # دالة لمعالجة الإدخال
+def process_input(message, audio_input=None, image_input=None, history=None, system_prompt=None, temperature=0.7, reasoning_effort="medium", enable_browsing=True, max_new_tokens=128000, output_format="text"):
     input_type = "text"
     audio_data = None
     image_data = None
     if audio_input:
         input_type = "audio"
+        with open(audio_input, "rb") as f:
+            audio_data = f.read()
         message = "Transcribe this audio"
     elif image_input:
         input_type = "image"
+        with open(image_input, "rb") as f:
+            image_data = f.read()
+        message = f"Analyze this image"
     response_text = ""
     audio_response = None
         max_new_tokens=max_new_tokens,
         input_type=input_type,
         audio_data=audio_data,
+        image_data=image_data,
+        output_format=output_format
     ):
         if isinstance(chunk, bytes):
             audio_response = io.BytesIO(chunk)
             response_text += chunk
         yield response_text, audio_response
+# دالة لمعالجة زر إرسال الصوت
+def submit_audio(audio_input, output_format):
+    if not audio_input:
+        return "Please upload or record an audio file.", None
+    return process_input(message="", audio_input=audio_input, output_format=output_format)
+# دالة لمعالجة زر إرسال الصورة
+def submit_image(image_input, output_format):
+    if not image_input:
+        return "Please upload an image.", None
+    return process_input(message="", image_input=image_input, output_format=output_format)
 # إعداد واجهة Gradio
+with gr.Blocks(css=css, theme="gradio/soft") as chatbot_ui:
+    gr.Markdown(
+        """
+        # MGZon Chatbot 🤖
+        A versatile chatbot powered by DeepSeek, GPT-OSS, CLIP, Whisper, and Parler-TTS. Supports text, audio, and image inputs with text or voice outputs. Upload files, record audio, or type your query and choose your output format!
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=3):
+            chatbot = gr.Chatbot(label="Chat", height=500, latex_delimiters=LATEX_DELIMS)
+        with gr.Column(scale=1):
+            with gr.Accordion("⚙️ Settings", open=True):
+                system_prompt = gr.Textbox(
+                    label="System Prompt",
+                    value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image, and file inputs. For audio, transcribe using Whisper. For text-to-speech, use Parler-TTS. For images, analyze content appropriately. Respond in the requested output format (text or audio).",
+                    lines=4
+                )
+                temperature = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7)
+                reasoning_effort = gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium")
+                enable_browsing = gr.Checkbox(label="Enable DeepSearch (web browsing)", value=True)
+                max_new_tokens = gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000)
+                output_format = gr.Radio(
+                    label="Output Format",
+                    choices=["text", "audio"],
+                    value="text",
+                    elem_classes="output-format-radio"
+                )
+    with gr.Row():
+        message = gr.Textbox(label="Type your message", placeholder="Enter your query or describe your request...", lines=2, elem_classes="input-textbox")
+        submit_btn = gr.Button("Send", variant="primary")
+    with gr.Row():
+        with gr.Column(scale=1):
+            audio_input = gr.Audio(label="Record or Upload Audio", type="filepath", elem_classes="audio-input")
+            audio_submit_btn = gr.Button("Send Audio", elem_classes="audio-input-button")
+        with gr.Column(scale=1):
+            image_input = gr.File(label="Upload Image", file_types=["image"], elem_classes="upload-button")
+            image_submit_btn = gr.Button("Send Image", elem_classes="upload-button")
+    output_text = gr.Textbox(label="Response", lines=10, elem_classes="output-container")
+    output_audio = gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output-container", autoplay=True)
+    # ربط الأزرار
+    submit_btn.click(
+        fn=process_input,
+        inputs=[message, audio_input, image_input, chatbot, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, output_format],
+        outputs=[output_text, output_audio]
+    )
+    audio_submit_btn.click(
+        fn=submit_audio,
+        inputs=[audio_input, output_format],
+        outputs=[output_text, output_audio]
+    )
+    image_submit_btn.click(
+        fn=submit_image,
+        inputs=[image_input, output_format],
+        outputs=[output_text, output_audio]
+    )
 # إعداد FastAPI
 app = FastAPI(title="MGZon Chatbot API")
 # ربط Gradio مع FastAPI
 app = gr.mount_gradio_app(app, chatbot_ui, path="/gradio")
 app.add_middleware(NotFoundMiddleware)
+# Root endpoint
 @app.get("/", response_class=HTMLResponse)
 async def root(request: Request):
     return templates.TemplateResponse("index.html", {"request": request})
+# Docs endpoint
 @app.get("/docs", response_class=HTMLResponse)
 async def docs(request: Request):
     return templates.TemplateResponse("docs.html", {"request": request})
+# Swagger UI endpoint
 @app.get("/swagger", response_class=HTMLResponse)
 async def swagger_ui():
     return get_swagger_ui_html(openapi_url="/openapi.json", title="MGZon API Documentation")
+# Redirect لـ /gradio
 @app.get("/launch-chatbot", response_class=RedirectResponse)
 async def launch_chatbot():
     return RedirectResponse(url="/gradio", status_code=302)
+# تشغيل الخادم
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))

requirements.txt CHANGED Viewed

@@ -1,11 +1,11 @@
 fastapi==0.115.2
 uvicorn==0.30.6
-gradio>=4.44.1
 openai==1.42.0
 httpx==0.27.0
 python-dotenv==1.0.1
 pydocstyle==6.3.0
-requests==2.32.5
 beautifulsoup4==4.12.3
 tenacity==8.5.0
 selenium==4.25.0
@@ -18,7 +18,7 @@ numpy==1.26.4
 parler-tts @ git+https://github.com/huggingface/parler-tts.git@5d0aca9753ab74ded179732f5bd797f7a8c6f8ee
 torch==2.4.1
 torchaudio==2.4.1
-transformers==4.43.3
 webrtcvad==2.0.10
 Pillow==10.4.0
 urllib3==2.0.7

 fastapi==0.115.2
 uvicorn==0.30.6
+gradio==4.48.0
 openai==1.42.0
 httpx==0.27.0
 python-dotenv==1.0.1
 pydocstyle==6.3.0
+requests==2.32.3
 beautifulsoup4==4.12.3
 tenacity==8.5.0
 selenium==4.25.0
 parler-tts @ git+https://github.com/huggingface/parler-tts.git@5d0aca9753ab74ded179732f5bd797f7a8c6f8ee
 torch==2.4.1
 torchaudio==2.4.1
+transformers==4.45.1
 webrtcvad==2.0.10
 Pillow==10.4.0
 urllib3==2.0.7

utils/generation.py CHANGED Viewed

@@ -15,7 +15,7 @@ import torchaudio
 from PIL import Image
 from transformers import CLIPModel, CLIPProcessor, AutoProcessor
 from parler_tts import ParlerTTSForConditionalGeneration
-from utils.web_search import web_search  # استيراد مباشر
 logger = logging.getLogger(__name__)
@@ -66,19 +66,35 @@ def check_model_availability(model_name: str, api_base: str, api_key: str) -> tu
 def select_model(query: str, input_type: str = "text") -> tuple[str, str]:
     query_lower = query.lower()
     if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
         logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
         return ASR_MODEL, FALLBACK_API_ENDPOINT
     if any(keyword in query_lower for keyword in ["text-to-speech", "tts", "تحويل نص إلى صوت"]):
         logger.info(f"Selected {TTS_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for text-to-speech")
         return TTS_MODEL, FALLBACK_API_ENDPOINT
-    if input_type == "image" or any(pattern in query_lower for pattern in [
         r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
         r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
-    ]):
-        logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
-        return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
-    logger.info(f"Selected {MODEL_NAME} with endpoint {API_ENDPOINT} for general query: {query}")
     return MODEL_NAME, API_ENDPOINT
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=2, min=4, max=60))
@@ -98,6 +114,7 @@ def request_generation(
     input_type: str = "text",
     audio_data: Optional[bytes] = None,
     image_data: Optional[bytes] = None,
 ) -> Generator[bytes | str, None, None]:
     is_available, selected_api_key = check_model_availability(model_name, api_base, api_key)
     if not is_available:
@@ -110,7 +127,8 @@ def request_generation(
         "model_name": model_name,
         "chat_history": chat_history,
         "temperature": temperature,
-        "max_new_tokens": max_new_tokens
     }, sort_keys=True).encode()).hexdigest()
     if cache_key in cache:
@@ -123,7 +141,8 @@ def request_generation(
     task_type = "general"
     enhanced_system_prompt = system_prompt
-    if model_name == ASR_MODEL and audio_data is not None:
         task_type = "audio_transcription"
         try:
             audio_file = io.BytesIO(audio_data)
@@ -145,11 +164,12 @@ def request_generation(
             yield f"Error: Audio transcription failed: {e}"
             return
-    if model_name == TTS_MODEL:
         task_type = "text_to_speech"
         try:
-            model = ParlerTTSForConditionalGeneration.from_pretrained(model_name, token=selected_api_key)
-            processor = AutoProcessor.from_pretrained(model_name, token=selected_api_key)
             inputs = processor(text=message, return_tensors="pt")
             audio = model.generate(**inputs)
             audio_file = io.BytesIO()
@@ -163,44 +183,58 @@ def request_generation(
             yield f"Error: Text-to-speech failed: {e}"
             return
-    if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data is not None:
         task_type = "image_analysis"
         try:
-            model = CLIPModel.from_pretrained(model_name, token=selected_api_key)
-            processor = CLIPProcessor.from_pretrained(model_name, token=selected_api_key)
             image = Image.open(io.BytesIO(image_data)).convert("RGB")
             inputs = processor(text=message, images=image, return_tensors="pt", padding=True)
             outputs = model(**inputs)
             logits_per_image = outputs.logits_per_image
             probs = logits_per_image.softmax(dim=1)
-            yield f"Image analysis result: {probs.tolist()}"
-            cache[cache_key] = [f"Image analysis result: {probs.tolist()}"]
             return
         except Exception as e:
             logger.error(f"Image analysis failed: {e}")
             yield f"Error: Image analysis failed: {e}"
             return
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
         task_type = "image"
-        enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query."
     elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
         task_type = "code"
-        enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations."
     elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
         task_type = "analysis"
-        enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights."
     elif any(keyword in message.lower() for keyword in ["review", "مراجعة"]):
         task_type = "review"
-        enhanced_system_prompt = f"{system_prompt}\nReview the provided content thoroughly, identify issues, and suggest improvements with detailed explanations."
     elif any(keyword in message.lower() for keyword in ["publish", "نشر"]):
         task_type = "publish"
-        enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices."
     else:
-        enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable."
     if len(message.split()) < 5:
-        enhanced_system_prompt += "\nEven for short queries, provide a detailed, in-depth response with examples and context."
     logger.info(f"Task type detected: {task_type}")
     input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
@@ -289,7 +323,7 @@ def request_generation(
                     reasoning_closed = True
                 if not saw_visible_output:
-                    msg = "I attempted to call a tool, but tools aren't executed in this environment."
                     if last_tool_name:
                         try:
                             args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
@@ -303,14 +337,30 @@ def request_generation(
                     cached_chunks.append(f"Error: Unknown error")
                     yield f"Error: Unknown error"
                 elif chunk.choices[0].finish_reason == "length":
-                    cached_chunks.append("Response truncated due to token limit. Please refine your query.")
-                    yield "Response truncated due to token limit. Please refine your query."
                 break
         if buffer:
             cached_chunks.append(buffer)
             yield buffer
         cache[cache_key] = cached_chunks
     except Exception as e:
@@ -333,16 +383,20 @@ def request_generation(
                 input_type=input_type,
                 audio_data=audio_data,
                 image_data=image_data,
             ):
                 yield chunk
             return
-        for fallback_model in [SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME]:
-            logger.info(f"Retrying with fallback model: {fallback_model}")
             try:
-                is_available, selected_api_key = check_model_availability(fallback_model, FALLBACK_API_ENDPOINT, selected_api_key)
                 if not is_available:
-                    continue
-                client = OpenAI(api_key=selected_api_key, base_url=FALLBACK_API_ENDPOINT, timeout=120.0)
                 stream = client.chat.completions.create(
                     model=fallback_model,
                     messages=input_messages,
@@ -355,18 +409,39 @@ def request_generation(
                 for chunk in stream:
                     if chunk.choices[0].delta.content:
                         content = chunk.choices[0].delta.content
                         saw_visible_output = True
                         buffer += content
                         if "\n" in buffer or len(buffer) > 5000:
                             cached_chunks.append(buffer)
                             yield buffer
                             buffer = ""
                         continue
                     if chunk.choices[0].finish_reason in ("stop", "error", "length"):
                         if buffer:
                             cached_chunks.append(buffer)
                             yield buffer
                             buffer = ""
                         if not saw_visible_output:
                             cached_chunks.append("No visible output produced.")
                             yield "No visible output produced."
@@ -374,19 +449,91 @@ def request_generation(
                             cached_chunks.append(f"Error: Unknown error with fallback model {fallback_model}")
                             yield f"Error: Unknown error with fallback model {fallback_model}"
                         elif chunk.choices[0].finish_reason == "length":
-                            cached_chunks.append("Response truncated due to token limit.")
-                            yield "Response truncated due to token limit."
                         break
-                if buffer:
-                    cached_chunks.append(buffer)
-                    yield buffer
                 cache[cache_key] = cached_chunks
-                return
             except Exception as e2:
                 logger.exception(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
-                continue
-        yield f"Error: Failed to load all models: Primary ({model_name}), Secondary ({SECONDARY_MODEL_NAME}), Tertiary ({TERTIARY_MODEL_NAME})."
-        return
 def format_final(analysis_text: str, visible_text: str) -> str:
     reasoning_safe = html.escape((analysis_text or "").strip())
@@ -402,7 +549,7 @@ def format_final(analysis_text: str, visible_text: str) -> str:
         f"{response}" if response else "No final response available."
     )
-def generate(message, history, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, input_type="text", audio_data=None, image_data=None):
     if not message.strip() and not audio_data and not image_data:
         yield "Please enter a prompt or upload a file."
         return
@@ -436,7 +583,7 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
             "type": "function",
             "function": {
                 "name": "code_generation",
-                "description": "Generate or modify code for various frameworks",
                 "parameters": {
                     "type": "object",
                     "properties": {
@@ -514,6 +661,7 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
             input_type=input_type,
             audio_data=audio_data,
             image_data=image_data,
         )
         for chunk in stream:

 from PIL import Image
 from transformers import CLIPModel, CLIPProcessor, AutoProcessor
 from parler_tts import ParlerTTSForConditionalGeneration
+from utils.web_search import web_search  # نقل الاستيراد لأعلى
 logger = logging.getLogger(__name__)
 def select_model(query: str, input_type: str = "text") -> tuple[str, str]:
     query_lower = query.lower()
+    # دعم الصوت
     if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
         logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
         return ASR_MODEL, FALLBACK_API_ENDPOINT
+    # دعم تحويل النص إلى صوت
     if any(keyword in query_lower for keyword in ["text-to-speech", "tts", "تحويل نص إلى صوت"]):
         logger.info(f"Selected {TTS_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for text-to-speech")
         return TTS_MODEL, FALLBACK_API_ENDPOINT
+    # نماذج CLIP للصور
+    image_patterns = [
         r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
         r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
+    ]
+    for pattern in image_patterns:
+        if re.search(pattern, query_lower, re.IGNORECASE):
+            logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
+            return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
+    # اختيار النموذج بناءً على توفره
+    available_models = [
+        (MODEL_NAME, API_ENDPOINT),
+        (SECONDARY_MODEL_NAME, FALLBACK_API_ENDPOINT),
+        (TERTIARY_MODEL_NAME, FALLBACK_API_ENDPOINT)
+    ]
+    for model_name, api_endpoint in available_models:
+        is_available, _ = check_model_availability(model_name, api_endpoint, HF_TOKEN)
+        if is_available:
+            logger.info(f"Selected {model_name} with endpoint {api_endpoint} for query: {query}")
+            return model_name, api_endpoint
+    logger.error("No models available. Falling back to default.")
     return MODEL_NAME, API_ENDPOINT
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=2, min=4, max=60))
     input_type: str = "text",
     audio_data: Optional[bytes] = None,
     image_data: Optional[bytes] = None,
+    output_format: str = "text"  # جديد: تحديد نوع الإخراج (text أو audio)
 ) -> Generator[bytes | str, None, None]:
     is_available, selected_api_key = check_model_availability(model_name, api_base, api_key)
     if not is_available:
         "model_name": model_name,
         "chat_history": chat_history,
         "temperature": temperature,
+        "max_new_tokens": max_new_tokens,
+        "output_format": output_format
     }, sort_keys=True).encode()).hexdigest()
     if cache_key in cache:
     task_type = "general"
     enhanced_system_prompt = system_prompt
+    # معالجة الصوت (ASR)
+    if model_name == ASR_MODEL and audio_data:
         task_type = "audio_transcription"
         try:
             audio_file = io.BytesIO(audio_data)
             yield f"Error: Audio transcription failed: {e}"
             return
+    # معالجة تحويل النص إلى صوت (TTS)
+    if model_name == TTS_MODEL or output_format == "audio":
         task_type = "text_to_speech"
         try:
+            model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
+            processor = AutoProcessor.from_pretrained(TTS_MODEL)
             inputs = processor(text=message, return_tensors="pt")
             audio = model.generate(**inputs)
             audio_file = io.BytesIO()
             yield f"Error: Text-to-speech failed: {e}"
             return
+    # معالجة الصور
+    if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data:
         task_type = "image_analysis"
         try:
+            model = CLIPModel.from_pretrained(model_name)
+            processor = CLIPProcessor.from_pretrained(model_name)
             image = Image.open(io.BytesIO(image_data)).convert("RGB")
             inputs = processor(text=message, images=image, return_tensors="pt", padding=True)
             outputs = model(**inputs)
             logits_per_image = outputs.logits_per_image
             probs = logits_per_image.softmax(dim=1)
+            result = f"Image analysis result: {probs.tolist()}"
+            if output_format == "audio":
+                # تحويل النتيجة إلى صوت
+                model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
+                processor = AutoProcessor.from_pretrained(TTS_MODEL)
+                inputs = processor(text=result, return_tensors="pt")
+                audio = model.generate(**inputs)
+                audio_file = io.BytesIO()
+                torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
+                audio_file.seek(0)
+                yield audio_file.read()
+            else:
+                yield result
+            cache[cache_key] = [result]
             return
         except Exception as e:
             logger.error(f"Image analysis failed: {e}")
             yield f"Error: Image analysis failed: {e}"
             return
+    # تحسين system_prompt بناءً على نوع المهمة
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
         task_type = "image"
+        enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query. Continue until the query is fully addressed."
     elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
         task_type = "code"
+        enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations. Support frameworks like React, Django, Flask, and others. Format code with triple backticks (```) and specify the language. Continue until the task is fully addressed."
     elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
         task_type = "analysis"
+        enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights. Continue until all aspects of the query are thoroughly covered."
     elif any(keyword in message.lower() for keyword in ["review", "مراجعة"]):
         task_type = "review"
+        enhanced_system_prompt = f"{system_prompt}\nReview the provided content thoroughly, identify issues, and suggest improvements with detailed explanations. Ensure the response is complete and detailed."
     elif any(keyword in message.lower() for keyword in ["publish", "نشر"]):
         task_type = "publish"
+        enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices. Provide a complete and detailed response."
     else:
+        enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable. Continue generating content until the query is fully answered, leveraging the full capacity of the model."
     if len(message.split()) < 5:
+        enhanced_system_prompt += "\nEven for short or general queries, provide a detailed, in-depth response with examples, explanations, and additional context to ensure completeness."
     logger.info(f"Task type detected: {task_type}")
     input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
                     reasoning_closed = True
                 if not saw_visible_output:
+                    msg = "I attempted to call a tool, but tools aren't executed in this environment, so no final answer was produced."
                     if last_tool_name:
                         try:
                             args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
                     cached_chunks.append(f"Error: Unknown error")
                     yield f"Error: Unknown error"
                 elif chunk.choices[0].finish_reason == "length":
+                    cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
+                    yield "Response truncated due to token limit. Please refine your query or request continuation."
                 break
         if buffer:
             cached_chunks.append(buffer)
             yield buffer
+        # إذا طلب الإخراج صوتي
+        if output_format == "audio" and buffer:
+            try:
+                model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
+                processor = AutoProcessor.from_pretrained(TTS_MODEL)
+                inputs = processor(text=buffer, return_tensors="pt")
+                audio = model.generate(**inputs)
+                audio_file = io.BytesIO()
+                torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
+                audio_file.seek(0)
+                cached_chunks.append(audio_file.read())
+                yield audio_file.read()
+            except Exception as e:
+                logger.error(f"Text-to-speech conversion failed: {e}")
+                yield f"Error: Text-to-speech conversion failed: {e}"
         cache[cache_key] = cached_chunks
     except Exception as e:
                 input_type=input_type,
                 audio_data=audio_data,
                 image_data=image_data,
+                output_format=output_format,
             ):
                 yield chunk
             return
+        if model_name == MODEL_NAME:
+            fallback_model = SECONDARY_MODEL_NAME
+            fallback_endpoint = FALLBACK_API_ENDPOINT
+            logger.info(f"Retrying with fallback model: {fallback_model} on {fallback_endpoint}")
             try:
+                is_available, selected_api_key = check_model_availability(fallback_model, fallback_endpoint, selected_api_key)
                 if not is_available:
+                    yield f"Error: Fallback model {fallback_model} is not available."
+                    return
+                client = OpenAI(api_key=selected_api_key, base_url=fallback_endpoint, timeout=120.0)
                 stream = client.chat.completions.create(
                     model=fallback_model,
                     messages=input_messages,
                 for chunk in stream:
                     if chunk.choices[0].delta.content:
                         content = chunk.choices[0].delta.content
+                        if content == "<|channel|>analysis<|message|>":
+                            if not reasoning_started:
+                                cached_chunks.append("analysis")
+                                yield "analysis"
+                                reasoning_started = True
+                            continue
+                        if content == "<|channel|>final<|message|>":
+                            if reasoning_started and not reasoning_closed:
+                                cached_chunks.append("assistantfinal")
+                                yield "assistantfinal"
+                                reasoning_closed = True
+                            continue
                         saw_visible_output = True
                         buffer += content
                         if "\n" in buffer or len(buffer) > 5000:
                             cached_chunks.append(buffer)
                             yield buffer
                             buffer = ""
                         continue
                     if chunk.choices[0].finish_reason in ("stop", "error", "length"):
                         if buffer:
                             cached_chunks.append(buffer)
                             yield buffer
                             buffer = ""
+                        if reasoning_started and not reasoning_closed:
+                            cached_chunks.append("assistantfinal")
+                            yield "assistantfinal"
+                            reasoning_closed = True
                         if not saw_visible_output:
                             cached_chunks.append("No visible output produced.")
                             yield "No visible output produced."
                             cached_chunks.append(f"Error: Unknown error with fallback model {fallback_model}")
                             yield f"Error: Unknown error with fallback model {fallback_model}"
                         elif chunk.choices[0].finish_reason == "length":
+                            cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
+                            yield "Response truncated due to token limit. Please refine your query or request continuation."
                         break
+                if buffer and output_format == "audio":
+                    try:
+                        model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
+                        processor = AutoProcessor.from_pretrained(TTS_MODEL)
+                        inputs = processor(text=buffer, return_tensors="pt")
+                        audio = model.generate(**inputs)
+                        audio_file = io.BytesIO()
+                        torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
+                        audio_file.seek(0)
+                        cached_chunks.append(audio_file.read())
+                        yield audio_file.read()
+                    except Exception as e:
+                        logger.error(f"Text-to-speech conversion failed: {e}")
+                        yield f"Error: Text-to-speech conversion failed: {e}"
                 cache[cache_key] = cached_chunks
             except Exception as e2:
                 logger.exception(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
+                try:
+                    is_available, selected_api_key = check_model_availability(TERTIARY_MODEL_NAME, FALLBACK_API_ENDPOINT, selected_api_key)
+                    if not is_available:
+                        yield f"Error: Tertiary model {TERTIARY_MODEL_NAME} is not available."
+                        return
+                    client = OpenAI(api_key=selected_api_key, base_url=FALLBACK_API_ENDPOINT, timeout=120.0)
+                    stream = client.chat.completions.create(
+                        model=TERTIARY_MODEL_NAME,
+                        messages=input_messages,
+                        temperature=temperature,
+                        max_tokens=max_new_tokens,
+                        stream=True,
+                        tools=[],
+                        tool_choice="none",
+                    )
+                    for chunk in stream:
+                        if chunk.choices[0].delta.content:
+                            content = chunk.choices[0].delta.content
+                            saw_visible_output = True
+                            buffer += content
+                            if "\n" in buffer or len(buffer) > 5000:
+                                cached_chunks.append(buffer)
+                                yield buffer
+                                buffer = ""
+                            continue
+                        if chunk.choices[0].finish_reason in ("stop", "error", "length"):
+                            if buffer:
+                                cached_chunks.append(buffer)
+                                yield buffer
+                                buffer = ""
+                            if not saw_visible_output:
+                                cached_chunks.append("No visible output produced.")
+                                yield "No visible output produced."
+                            if chunk.choices[0].finish_reason == "error":
+                                cached_chunks.append(f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}")
+                                yield f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}"
+                            elif chunk.choices[0].finish_reason == "length":
+                                cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
+                                yield "Response truncated due to token limit. Please refine your query or request continuation."
+                            break
+                    if buffer and output_format == "audio":
+                        try:
+                            model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
+                            processor = AutoProcessor.from_pretrained(TTS_MODEL)
+                            inputs = processor(text=buffer, return_tensors="pt")
+                            audio = model.generate(**inputs)
+                            audio_file = io.BytesIO()
+                            torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
+                            audio_file.seek(0)
+                            cached_chunks.append(audio_file.read())
+                            yield audio_file.read()
+                        except Exception as e:
+                            logger.error(f"Text-to-speech conversion failed: {e}")
+                            yield f"Error: Text-to-speech conversion failed: {e}"
+                    cache[cache_key] = cached_chunks
+                except Exception as e3:
+                    logger.exception(f"[Gateway] Streaming failed for tertiary model {TERTIARY_MODEL_NAME}: {e3}")
+                    yield f"Error: Failed to load all models: Primary ({model_name}), Secondary ({fallback_model}), Tertiary ({TERTIARY_MODEL_NAME}). Please check your model configurations."
+                    return
+        else:
+            yield f"Error: Failed to load model {model_name}: {e}"
+            return
 def format_final(analysis_text: str, visible_text: str) -> str:
     reasoning_safe = html.escape((analysis_text or "").strip())
         f"{response}" if response else "No final response available."
     )
+def generate(message, history, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, input_type="text", audio_data=None, image_data=None, output_format="text"):
     if not message.strip() and not audio_data and not image_data:
         yield "Please enter a prompt or upload a file."
         return
             "type": "function",
             "function": {
                 "name": "code_generation",
+                "description": "Generate or modify code for various frameworks (React, Django, Flask, etc.)",
                 "parameters": {
                     "type": "object",
                     "properties": {
             input_type=input_type,
             audio_data=audio_data,
             image_data=image_data,
+            output_format=output_format,
         )
         for chunk in stream:

utils/web_search.py CHANGED Viewed

@@ -12,18 +12,27 @@ def web_search(query: str) -> str:
         if not google_api_key or not google_cse_id:
             return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
         url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
-        response = requests.get(url, timeout=5)
         response.raise_for_status()
         results = response.json().get("items", [])
         if not results:
             return "No web results found."
         search_results = []
-        for i, item in enumerate(results[:3]):  # قللنا العدد لتسريع البحث
             title = item.get("title", "")
             snippet = item.get("snippet", "")
             link = item.get("link", "")
-            search_results.append(f"Result {i+1}:\nTitle: {title}\nLink: {link}\nContent: {snippet}\n")
         return "\n".join(search_results)
     except Exception as e:
-        logger.exception(f"Web search failed: {e}")
         return f"Web search error: {e}"

         if not google_api_key or not google_cse_id:
             return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
         url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
+        response = requests.get(url, timeout=10)
         response.raise_for_status()
         results = response.json().get("items", [])
         if not results:
             return "No web results found."
         search_results = []
+        for i, item in enumerate(results[:5]):
             title = item.get("title", "")
             snippet = item.get("snippet", "")
             link = item.get("link", "")
+            try:
+                page_response = requests.get(link, timeout=5)
+                page_response.raise_for_status()
+                soup = BeautifulSoup(page_response.text, "html.parser")
+                paragraphs = soup.find_all("p")
+                page_content = " ".join([p.get_text() for p in paragraphs][:1000])
+            except Exception as e:
+                logger.warning(f"Failed to fetch page content for {link}: {e}")
+                page_content = snippet
+            search_results.append(f"Result {i+1}:\nTitle: {title}\nLink: {link}\nContent: {page_content}\n")
         return "\n".join(search_results)
     except Exception as e:
+        logger.exception("Web search failed")
         return f"Web search error: {e}"