Spaces:

MGZON
/

mgzon-app

Running

App Files Files Community

Mark-Lasfar commited on Sep 18

Commit

dcad397

1 Parent(s): a1a7a58

endpoints.py generation.py

Browse files

Files changed (6) hide show

api/endpoints.py +92 -21
generated_image.png +3 -0
requirements.txt +8 -1
templates/index.html +1 -1
utils/generation.py +123 -12
utils/utils/constants.py +21 -0

api/endpoints.py CHANGED Viewed

@@ -20,7 +20,9 @@ from motor.motor_asyncio import AsyncIOMotorClient
 from datetime import datetime
 import logging
 from typing import List, Optional
 router = APIRouter()
 logger = logging.getLogger(__name__)
@@ -37,24 +39,7 @@ if not BACKUP_HF_TOKEN:
 ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
 FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
-MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
-SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
-TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "meta-llama/Llama-3-8b-chat-hf")
-CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
-CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
-ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
-TTS_MODEL = os.getenv("TTS_MODEL", "facebook/mms-tts-ara")
-# Model alias mapping for user-friendly names
-MODEL_ALIASES = {
-    "advanced": MODEL_NAME,
-    "standard": SECONDARY_MODEL_NAME,
-    "light": TERTIARY_MODEL_NAME,
-    "image_base": CLIP_BASE_MODEL,
-    "image_advanced": CLIP_LARGE_MODEL,
-    "audio": ASR_MODEL,
-    "tts": TTS_MODEL
-}
 # MongoDB setup
 MONGO_URI = os.getenv("MONGODB_URI")
@@ -62,6 +47,10 @@ client = AsyncIOMotorClient(MONGO_URI)
 db = client["hager"]
 session_message_counts = db["session_message_counts"]
 # Helper function to handle sessions for non-logged-in users
 async def handle_session(request: Request):
     if not hasattr(request, "session"):
@@ -142,7 +131,7 @@ async def performance_stats():
     return {
         "queue_size": int(os.getenv("QUEUE_SIZE", 80)),
         "concurrency_limit": int(os.getenv("CONCURRENCY_LIMIT", 20)),
-        "uptime": os.popen("uptime").read().strip()
     }
 @router.post("/api/chat")
@@ -287,6 +276,88 @@ async def chat_endpoint(
     return {"response": response}
 @router.post("/api/audio-transcription")
 async def audio_transcription_endpoint(
     request: Request,
@@ -824,7 +895,7 @@ async def verify_token(user: User = Depends(current_active_user)):
         raise HTTPException(status_code=401, detail="Invalid or expired token")
     return {"status": "valid"}
 @router.put("/users/me")
 async def update_user_settings(
     settings: UserUpdate,

 from datetime import datetime
 import logging
 from typing import List, Optional
+from utils.constants import MODEL_ALIASES, MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME, CLIP_BASE_MODEL, CLIP_LARGE_MODEL, ASR_MODEL, TTS_MODEL, IMAGE_GEN_MODEL, SECONDARY_IMAGE_GEN_MODEL
+import psutil
+import time
 router = APIRouter()
 logger = logging.getLogger(__name__)
 ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
 FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
 # MongoDB setup
 MONGO_URI = os.getenv("MONGODB_URI")
 db = client["hager"]
 session_message_counts = db["session_message_counts"]
+class ImageGenRequest(BaseModel):
+    prompt: str
+    output_format: str = "image"
 # Helper function to handle sessions for non-logged-in users
 async def handle_session(request: Request):
     if not hasattr(request, "session"):
     return {
         "queue_size": int(os.getenv("QUEUE_SIZE", 80)),
         "concurrency_limit": int(os.getenv("CONCURRENCY_LIMIT", 20)),
+        "uptime": time.time() - psutil.boot_time()  # مدة تشغيل النظام بالثواني
     }
 @router.post("/api/chat")
     return {"response": response}
+@router.post("/api/image-generation")
+async def image_generation_endpoint(
+    request: Request,
+    req: dict,
+    file: Optional[UploadFile] = File(None),
+    user: User = Depends(current_active_user),
+    db: AsyncSession = Depends(get_db)
+):
+    if not user:
+        await handle_session(request)
+    prompt = req.get("prompt", "")
+    output_format = req.get("output_format", "image")
+    if not prompt.strip():
+        raise HTTPException(status_code=400, detail="Prompt is required for image generation.")
+    model_name, api_endpoint = select_model(prompt, input_type="image_gen")
+    is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
+    if not is_available:
+        logger.error(f"Model {model_name} is not available at {api_endpoint}")
+        raise HTTPException(status_code=503, detail=f"Model {model_name} is not available. Please try another model.")
+    image_data = None
+    if file:
+        image_data = await file.read()
+    system_prompt = enhance_system_prompt(
+        "You are an expert in generating high-quality images based on detailed prompts. Ensure the output is visually appealing and matches the user's description.",
+        prompt, user
+    )
+    stream = request_generation(
+        api_key=api_key,
+        api_base=selected_endpoint,
+        message=prompt,
+        system_prompt=system_prompt,
+        model_name=model_name,
+        temperature=0.7,
+        max_new_tokens=2048,
+        input_type="image_gen",
+        image_data=image_data,
+        output_format=output_format
+    )
+    if output_format == "image":
+        image_chunks = []
+        try:
+            for chunk in stream:
+                logger.debug(f"Processing image chunk: {chunk[:100] if isinstance(chunk, str) else 'bytes'}")
+                if isinstance(chunk, bytes):
+                    image_chunks.append(chunk)
+                else:
+                    logger.warning(f"Unexpected non-bytes chunk in image stream: {chunk}")
+            if not image_chunks:
+                logger.error("No image data generated.")
+                raise HTTPException(status_code=500, detail="No image data generated for image generation.")
+            image_data = b"".join(image_chunks)
+            return StreamingResponse(io.BytesIO(image_data), media_type="image/png")
+        except Exception as e:
+            logger.error(f"Image generation failed: {e}")
+            raise HTTPException(status_code=500, detail=f"Image generation failed: {str(e)}")
+    response_chunks = []
+    try:
+        for chunk in stream:
+            logger.debug(f"Processing text chunk: {chunk[:100]}...")
+            if isinstance(chunk, str) and chunk.strip() and chunk not in ["analysis", "assistantfinal"]:
+                response_chunks.append(chunk)
+            else:
+                logger.warning(f"Skipping chunk: {chunk}")
+        response = "".join(response_chunks)
+        if not response.strip():
+            logger.error("Empty response generated.")
+            raise HTTPException(status_code=500, detail="Empty response generated from model.")
+        return {"response": response}
+    except Exception as e:
+        logger.error(f"Image generation failed: {e}")
+        raise HTTPException(status_code=500, detail=f"Image generation failed: {str(e)}")
 @router.post("/api/audio-transcription")
 async def audio_transcription_endpoint(
     request: Request,
         raise HTTPException(status_code=401, detail="Invalid or expired token")
     return {"status": "valid"}
 @router.put("/users/me")
 async def update_user_settings(
     settings: UserUpdate,

generated_image.png ADDED Viewed

Git LFS Details

SHA256: 0019dfc4b32d63c1392aa264aed2253c1e0c2fb09216f8e2cc269bbfb8bb49b5
Pointer size: 126 Bytes
Size of remote file: 9 Bytes

requirements.txt CHANGED Viewed

@@ -2,6 +2,7 @@ fastapi==0.95.2
 fastapi-users[sqlalchemy,oauth2]==10.4.2
 pydantic==1.10.13
 email-validator==1.3.1
 aiosqlite==0.21.0
 sqlalchemy==2.0.43
 python-jose[cryptography]==3.3.0
@@ -39,9 +40,15 @@ pymongo==4.10.1
 parler-tts @ git+https://github.com/huggingface/parler-tts.git@5d0aca9753ab74ded179732f5bd797f7a8c6f8ee
 soupsieve>=2.5
 tqdm>=4.66.0
 argon2-cffi>=23.1.0
 wsproto>=1.2.0
 descript-audiotools>=0.7.2
 scipy>=1.15.0
 librosa>=0.10.0
-matplotlib>=3.10.0

 fastapi-users[sqlalchemy,oauth2]==10.4.2
 pydantic==1.10.13
 email-validator==1.3.1
+sqlalchemy[asyncio]
 aiosqlite==0.21.0
 sqlalchemy==2.0.43
 python-jose[cryptography]==3.3.0
 parler-tts @ git+https://github.com/huggingface/parler-tts.git@5d0aca9753ab74ded179732f5bd797f7a8c6f8ee
 soupsieve>=2.5
 tqdm>=4.66.0
+git+https://github.com/Dao-AILab/flash-attention.git
 argon2-cffi>=23.1.0
 wsproto>=1.2.0
 descript-audiotools>=0.7.2
 scipy>=1.15.0
 librosa>=0.10.0
+matplotlib>=3.10.0
+vllm
+accelerate
+flash-attn
+diffusers
+psutil

templates/index.html CHANGED Viewed

@@ -217,7 +217,7 @@
           <div class="glass p-6">
             <h3 class="text-xl font-semibold mb-2">New AI Features</h3>
             <p>Explore our latest AI updates for smarter code and e-commerce tools.</p>
-            <a href="https://hager-zon.vercel.app/blog" target="_blank" class="text-emerald-300 hover:underline">Read More →</a>
           </div>
           <div class="glass p-6">
             <h3 class="text-xl font-semibold mb-2">Global Expansion</h3>

           <div class="glass p-6">
             <h3 class="text-xl font-semibold mb-2">New AI Features</h3>
             <p>Explore our latest AI updates for smarter code and e-commerce tools.</p>
+            <a href="/blog" target="_blank" class="text-emerald-300 hover:underline">Read More →</a>
           </div>
           <div class="glass p-6">
             <h3 class="text-xl font-semibold mb-2">Global Expansion</h3>

utils/generation.py CHANGED Viewed

@@ -1,7 +1,4 @@
 # utils/generation.py
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
 import os
 import re
 import json
@@ -20,7 +17,11 @@ from PIL import Image
 from transformers import CLIPModel, CLIPProcessor, AutoProcessor
 from parler_tts import ParlerTTSForConditionalGeneration
 from utils.web_search import web_search
 logger = logging.getLogger(__name__)
 # إعداد Cache
@@ -34,20 +35,46 @@ LATEX_DELIMS = [
     {"left": "\\(", "right": "\\)", "display": False},
 ]
 # إعداد العميل لـ Hugging Face API
 HF_TOKEN = os.getenv("HF_TOKEN")
 BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
 ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
 FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
-MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
-SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
-TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "meta-llama/Llama-3-8b-chat-hf")
-CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
-CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
-ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
-TTS_MODEL = os.getenv("TTS_MODEL", "facebook/mms-tts-ara")
 # تعطيل PROVIDER_ENDPOINTS لأننا بنستخدم Hugging Face فقط
 PROVIDER_ENDPOINTS = {
     "huggingface": API_ENDPOINT
@@ -95,10 +122,18 @@ def select_model(query: str, input_type: str = "text", preferred_model: Optional
         r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
         r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
     ]
     for pattern in image_patterns:
         if re.search(pattern, query_lower, re.IGNORECASE):
             logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query[:50]}...")
             return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
     available_models = [
         (MODEL_NAME, API_ENDPOINT),
         (SECONDARY_MODEL_NAME, FALLBACK_API_ENDPOINT),
@@ -112,6 +147,7 @@ def select_model(query: str, input_type: str = "text", preferred_model: Optional
     logger.error("No models available. Falling back to default.")
     return MODEL_NAME, API_ENDPOINT
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=2, min=4, max=60))
 def request_generation(
     api_key: str,
@@ -157,6 +193,7 @@ def request_generation(
     enhanced_system_prompt = system_prompt
     buffer = ""
     if model_name == ASR_MODEL and audio_data:
         task_type = "audio_transcription"
         try:
@@ -180,6 +217,7 @@ def request_generation(
             yield f"Error: Audio transcription failed: {e}"
             return
     if model_name == TTS_MODEL or output_format == "audio":
         task_type = "text_to_speech"
         try:
@@ -200,6 +238,7 @@ def request_generation(
             yield f"Error: Text-to-speech failed: {e}"
             return
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data:
         task_type = "image_analysis"
         try:
@@ -231,6 +270,51 @@ def request_generation(
             yield f"Error: Image analysis failed: {e}"
             return
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
         task_type = "image"
         enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query."
@@ -259,7 +343,7 @@ def request_generation(
             clean_msg = {"role": msg.get("role"), "content": msg.get("content")}
             if clean_msg["content"]:
                 input_messages.append(clean_msg)
     if deep_search:
         try:
             search_result = web_search(message)
@@ -563,6 +647,7 @@ def request_generation(
             yield f"Error: Failed to load model {model_name}: {e}"
             return
 def format_final(analysis_text: str, visible_text: str) -> str:
     reasoning_safe = html.escape((analysis_text or "").strip())
     response = (visible_text or "").strip()
@@ -577,6 +662,32 @@ def format_final(analysis_text: str, visible_text: str) -> str:
         f"{response}" if response else "No final response available."
     )
 def generate(message, history, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, input_type="text", audio_data=None, image_data=None, output_format="text"):
     if not message.strip() and not audio_data and not image_data:
         yield "Please enter a prompt or upload a file."

 # utils/generation.py
 import os
 import re
 import json
 from transformers import CLIPModel, CLIPProcessor, AutoProcessor
 from parler_tts import ParlerTTSForConditionalGeneration
 from utils.web_search import web_search
+from huggingface_hub import snapshot_download
+import torch
+from qwenimage.pipeline_qwen_image_edit import QwenImageEditPipeline
+from qwenimage.pipeline_qwen_image import QwenImagePipeline
+from utils.constants import MODEL_ALIASES, MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME, CLIP_BASE_MODEL, CLIP_LARGE_MODEL, ASR_MODEL, TTS_MODEL, IMAGE_GEN_MODEL, SECONDARY_IMAGE_GEN_MODEL
 logger = logging.getLogger(__name__)
 # إعداد Cache
     {"left": "\\(", "right": "\\)", "display": False},
 ]
 # إعداد العميل لـ Hugging Face API
 HF_TOKEN = os.getenv("HF_TOKEN")
 BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
 ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
 FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
+# تحميل نموذج FLUX.1-dev مسبقًا إذا لزم الأمر
+try:
+    model_path = snapshot_download(
+        repo_id="black-forest-labs/FLUX.1-dev",
+        repo_type="model",
+        ignore_patterns=["*.md", "*..gitattributes"],
+        local_dir="FLUX.1-dev",
+    )
+except Exception as e:
+    logger.error(f"Failed to download FLUX.1-dev: {e}")
+    # دعم FlashAttention-3
+_flash_attn_func = None
+_kernels_err = None
+try:
+    _k = get_kernel("kernels-community/vllm-flash-attn3")
+    _flash_attn_func = _k.flash_attn_func
+except Exception as e:
+    _flash_attn_func = None
+    _kernels_err = e
+def _ensure_fa3_available():
+    if _flash_attn_func is None:
+        raise ImportError(
+            "FlashAttention-3 via Hugging Face `kernels` is required. "
+            f"Tried `get_kernel('kernels-community/vllm-flash-attn3')` and failed with:\n{_kernels_err}"
+        )
 # تعطيل PROVIDER_ENDPOINTS لأننا بنستخدم Hugging Face فقط
 PROVIDER_ENDPOINTS = {
     "huggingface": API_ENDPOINT
         r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
         r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
     ]
+    image_gen_patterns = [
+        r"\bgenerate\s+image\b", r"\bcreate\s+image\b", r"\bimage\s+generation\b", r"\bصورة\s+توليد\b",
+        r"\bimage\s+edit\b", r"\bتحرير\s+صورة\b"
+    ]
     for pattern in image_patterns:
         if re.search(pattern, query_lower, re.IGNORECASE):
             logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query[:50]}...")
             return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
+    for pattern in image_gen_patterns:
+        if re.search(pattern, query_lower, re.IGNORECASE) or input_type == "image_gen":
+            logger.info(f"Selected {IMAGE_GEN_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image generation query: {query[:50]}...")
+            return IMAGE_GEN_MODEL, FALLBACK_API_ENDPOINT
     available_models = [
         (MODEL_NAME, API_ENDPOINT),
         (SECONDARY_MODEL_NAME, FALLBACK_API_ENDPOINT),
     logger.error("No models available. Falling back to default.")
     return MODEL_NAME, API_ENDPOINT
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=2, min=4, max=60))
 def request_generation(
     api_key: str,
     enhanced_system_prompt = system_prompt
     buffer = ""
+    # معالجة الصوت
     if model_name == ASR_MODEL and audio_data:
         task_type = "audio_transcription"
         try:
             yield f"Error: Audio transcription failed: {e}"
             return
+    # معالجة تحويل النص إلى صوت
     if model_name == TTS_MODEL or output_format == "audio":
         task_type = "text_to_speech"
         try:
             yield f"Error: Text-to-speech failed: {e}"
             return
+    # معالجة تحليل الصور
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data:
         task_type = "image_analysis"
         try:
             yield f"Error: Image analysis failed: {e}"
             return
+    # معالجة توليد الصور أو تحريرها
+    if model_name in [IMAGE_GEN_MODEL, SECONDARY_IMAGE_GEN_MODEL] or input_type == "image_gen":
+        task_type = "image_generation"
+        try:
+            dtype = torch.float16  # يمكن تعديل هذا حسب الأجهزة
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            _ensure_fa3_available()  # التأكد من توفر FlashAttention-3
+            if model_name == IMAGE_GEN_MODEL:
+                pipe = QwenImagePipeline.from_pretrained(model_name, torch_dtype=dtype).to(device)
+                pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
+            else:
+                pipe = QwenImageEditPipeline.from_pretrained(model_name, torch_dtype=dtype).to(device)
+                pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
+            # إعداد المعلمات لتوليد الصور
+            polished_prompt = polish_prompt(message)
+            image_params = {
+                "prompt": polished_prompt,
+                "seed": 0,
+                "randomize_seed": True,
+                "aspect_ratio": "16:9",
+                "guidance_scale": 4,
+                "num_inference_steps": 50,
+                "prompt_enhance": True
+            }
+            if input_type == "image_gen" and image_data:
+                image = Image.open(io.BytesIO(image_data)).convert("RGB")
+                image_params["image"] = image
+            # توليد الصورة
+            output = pipe(**image_params)
+            image_file = io.BytesIO()
+            output.images[0].save(image_file, format="PNG")
+            image_file.seek(0)
+            image_data = image_file.read()
+            logger.debug(f"Generated image data of length: {len(image_data)} bytes")
+            yield image_data
+            cache[cache_key] = [image_data]
+            return
+        except Exception as e:
+            logger.error(f"Image generation failed: {e}")
+            yield f"Error: Image generation failed: {e}"
+            return
+    # معالجة النصوص (كما هو موجود في الكود الأصلي)
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
         task_type = "image"
         enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query."
             clean_msg = {"role": msg.get("role"), "content": msg.get("content")}
             if clean_msg["content"]:
                 input_messages.append(clean_msg)
     if deep_search:
         try:
             search_result = web_search(message)
             yield f"Error: Failed to load model {model_name}: {e}"
             return
 def format_final(analysis_text: str, visible_text: str) -> str:
     reasoning_safe = html.escape((analysis_text or "").strip())
     response = (visible_text or "").strip()
         f"{response}" if response else "No final response available."
     )
+def polish_prompt(original_prompt: str, image: Optional[Image.Image] = None) -> str:
+    original_prompt = original_prompt.strip()
+    system_prompt = "You are an expert in generating high-quality prompts for image generation. Rewrite the user input to be clear, descriptive, and optimized for creating visually appealing images."
+    if any(0x0600 <= ord(char) <= 0x06FF for char in original_prompt):
+        system_prompt += "\nRespond in Arabic with a polished prompt suitable for image generation."
+    prompt = f"{system_prompt}\n\nUser Input: {original_prompt}\n\nRewritten Prompt:"
+    magic_prompt = "Ultra HD, 4K, cinematic composition"
+    success = False
+    while not success:
+        try:
+            polished_prompt = client.chat.completions.create(
+                model=MODEL_NAME,
+                messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}],
+                temperature=0.7,
+                max_tokens=200
+            ).choices[0].message.content.strip()
+            polished_prompt = polished_prompt.replace("\n", " ")
+            success = True
+        except Exception as e:
+            logger.error(f"Error during prompt polishing: {e}")
+            polished_prompt = original_prompt
+            break
+    return polished_prompt + " " + magic_prompt
 def generate(message, history, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, input_type="text", audio_data=None, image_data=None, output_format="text"):
     if not message.strip() and not audio_data and not image_data:
         yield "Please enter a prompt or upload a file."

utils/utils/constants.py ADDED Viewed

	@@ -0,0 +1,21 @@

+MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
+SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
+TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct:featherless-ai")
+CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
+CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
+ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
+TTS_MODEL = os.getenv("TTS_MODEL", "facebook/mms-tts-ara")
+IMAGE_GEN_MODEL = os.getenv("IMAGE_GEN_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
+SECONDARY_IMAGE_GEN_MODEL = os.getenv("SECONDARY_IMAGE_GEN_MODEL", "black-forest-labs/FLUX.1-dev")
+MODEL_ALIASES = {
+    "advanced": MODEL_NAME,
+    "standard": SECONDARY_MODEL_NAME,
+    "light": TERTIARY_MODEL_NAME,
+    "image_base": CLIP_BASE_MODEL,
+    "image_advanced": CLIP_LARGE_MODEL,
+    "audio": ASR_MODEL,
+    "tts": TTS_MODEL,
+    "image_gen": IMAGE_GEN_MODEL,
+    "secondary_image_gen": SECONDARY_IMAGE_GEN_MODEL
+}