Spaces:

MGZON
/

mgzon-app

Running

App Files Files Community

Mark-Lasfar commited on Sep 18

Commit

a20530e

1 Parent(s): dcad397

endpoints.py generation.py

Browse files

Files changed (3) hide show

requirements.txt +6 -6
utils/generation.py +53 -58
utils/utils/constants.py +2 -2

requirements.txt CHANGED Viewed

@@ -40,15 +40,15 @@ pymongo==4.10.1
 parler-tts @ git+https://github.com/huggingface/parler-tts.git@5d0aca9753ab74ded179732f5bd797f7a8c6f8ee
 soupsieve>=2.5
 tqdm>=4.66.0
-git+https://github.com/Dao-AILab/flash-attention.git
 argon2-cffi>=23.1.0
 wsproto>=1.2.0
 descript-audiotools>=0.7.2
 scipy>=1.15.0
 librosa>=0.10.0
 matplotlib>=3.10.0
-vllm
-accelerate
-flash-attn
-diffusers
-psutil

 parler-tts @ git+https://github.com/huggingface/parler-tts.git@5d0aca9753ab74ded179732f5bd797f7a8c6f8ee
 soupsieve>=2.5
 tqdm>=4.66.0
 argon2-cffi>=23.1.0
 wsproto>=1.2.0
 descript-audiotools>=0.7.2
 scipy>=1.15.0
 librosa>=0.10.0
 matplotlib>=3.10.0
+vllm==0.5.5
+accelerate>=0.26.0
+diffusers>=0.30.0
+psutil>=5.9.0
+xformers>=0.0.27
+anyio==4.6.0

utils/generation.py CHANGED Viewed

@@ -21,6 +21,7 @@ from huggingface_hub import snapshot_download
 import torch
 from qwenimage.pipeline_qwen_image_edit import QwenImageEditPipeline
 from qwenimage.pipeline_qwen_image import QwenImagePipeline
 from utils.constants import MODEL_ALIASES, MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME, CLIP_BASE_MODEL, CLIP_LARGE_MODEL, ASR_MODEL, TTS_MODEL, IMAGE_GEN_MODEL, SECONDARY_IMAGE_GEN_MODEL
 logger = logging.getLogger(__name__)
@@ -44,6 +45,7 @@ API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
 FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
 # تحميل نموذج FLUX.1-dev مسبقًا إذا لزم الأمر
 try:
     model_path = snapshot_download(
         repo_id="black-forest-labs/FLUX.1-dev",
@@ -53,28 +55,28 @@ try:
     )
 except Exception as e:
     logger.error(f"Failed to download FLUX.1-dev: {e}")
     # دعم FlashAttention-3
-_flash_attn_func = None
-_kernels_err = None
-try:
-    _k = get_kernel("kernels-community/vllm-flash-attn3")
-    _flash_attn_func = _k.flash_attn_func
-except Exception as e:
-    _flash_attn_func = None
-    _kernels_err = e
-def _ensure_fa3_available():
-    if _flash_attn_func is None:
-        raise ImportError(
-            "FlashAttention-3 via Hugging Face `kernels` is required. "
-            f"Tried `get_kernel('kernels-community/vllm-flash-attn3')` and failed with:\n{_kernels_err}"
-        )
 # تعطيل PROVIDER_ENDPOINTS لأننا بنستخدم Hugging Face فقط
 PROVIDER_ENDPOINTS = {
     "huggingface": API_ENDPOINT
@@ -271,48 +273,41 @@ def request_generation(
             return
     # معالجة توليد الصور أو تحريرها
-    if model_name in [IMAGE_GEN_MODEL, SECONDARY_IMAGE_GEN_MODEL] or input_type == "image_gen":
-        task_type = "image_generation"
-        try:
-            dtype = torch.float16  # يمكن تعديل هذا حسب الأجهزة
-            device = "cuda" if torch.cuda.is_available() else "cpu"
-            _ensure_fa3_available()  # التأكد من توفر FlashAttention-3
-            if model_name == IMAGE_GEN_MODEL:
-                pipe = QwenImagePipeline.from_pretrained(model_name, torch_dtype=dtype).to(device)
-                pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
-            else:
-                pipe = QwenImageEditPipeline.from_pretrained(model_name, torch_dtype=dtype).to(device)
-                pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
-            # إعداد المعلمات لتوليد الصور
-            polished_prompt = polish_prompt(message)
-            image_params = {
-                "prompt": polished_prompt,
-                "seed": 0,
-                "randomize_seed": True,
-                "aspect_ratio": "16:9",
-                "guidance_scale": 4,
-                "num_inference_steps": 50,
-                "prompt_enhance": True
-            }
-            if input_type == "image_gen" and image_data:
-                image = Image.open(io.BytesIO(image_data)).convert("RGB")
-                image_params["image"] = image
-            # توليد الصورة
-            output = pipe(**image_params)
-            image_file = io.BytesIO()
-            output.images[0].save(image_file, format="PNG")
-            image_file.seek(0)
-            image_data = image_file.read()
-            logger.debug(f"Generated image data of length: {len(image_data)} bytes")
-            yield image_data
-            cache[cache_key] = [image_data]
-            return
-        except Exception as e:
-            logger.error(f"Image generation failed: {e}")
-            yield f"Error: Image generation failed: {e}"
-            return
     # معالجة النصوص (كما هو موجود في الكود الأصلي)
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:

 import torch
 from qwenimage.pipeline_qwen_image_edit import QwenImageEditPipeline
 from qwenimage.pipeline_qwen_image import QwenImagePipeline
+from diffusers import DiffusionPipeline
 from utils.constants import MODEL_ALIASES, MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME, CLIP_BASE_MODEL, CLIP_LARGE_MODEL, ASR_MODEL, TTS_MODEL, IMAGE_GEN_MODEL, SECONDARY_IMAGE_GEN_MODEL
 logger = logging.getLogger(__name__)
 FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
 # تحميل نموذج FLUX.1-dev مسبقًا إذا لزم الأمر
+model_path = None
 try:
     model_path = snapshot_download(
         repo_id="black-forest-labs/FLUX.1-dev",
     )
 except Exception as e:
     logger.error(f"Failed to download FLUX.1-dev: {e}")
+    model_path = None
     # دعم FlashAttention-3
+# _flash_attn_func = None
+# _kernels_err = None
+# try:
+#     _k = get_kernel("kernels-community/vllm-flash-attn3")
+#     _flash_attn_func = _k.flash_attn_func
+# except Exception as e:
+#     _flash_attn_func = None
+#     _kernels_err = e
+# def _ensure_fa3_available():
+#     if _flash_attn_func is None:
+#         raise ImportError(
+#             "FlashAttention-3 via Hugging Face `kernels` is required. "
+#             f"Tried `get_kernel('kernels-community/vllm-flash-attn3')` and failed with:\n{_kernels_err}"
+#         )
 # تعطيل PROVIDER_ENDPOINTS لأننا بنستخدم Hugging Face فقط
 PROVIDER_ENDPOINTS = {
     "huggingface": API_ENDPOINT
             return
     # معالجة توليد الصور أو تحريرها
+if model_name in [IMAGE_GEN_MODEL, SECONDARY_IMAGE_GEN_MODEL] or input_type == "image_gen":
+    task_type = "image_generation"
+    try:
+        dtype = torch.float16
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        if model_name == IMAGE_GEN_MODEL:
+            pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=dtype).to(device)
+        else:
+            pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=dtype).to(device)
+        polished_prompt = polish_prompt(message)
+        image_params = {
+            "prompt": polished_prompt,
+            "num_inference_steps": 50,
+            "guidance_scale": 7.5,
+        }
+        if input_type == "image_gen" and image_data:
+            image = Image.open(io.BytesIO(image_data)).convert("RGB")
+            image_params["image"] = image
+        output = pipe(**image_params)
+        image_file = io.BytesIO()
+        output.images[0].save(image_file, format="PNG")
+        image_file.seek(0)
+        image_data = image_file.read()
+        logger.debug(f"Generated image data of length: {len(image_data)} bytes")
+        yield image_data
+        cache[cache_key] = [image_data]
+        return
+    except Exception as e:
+        logger.error(f"Image generation failed: {e}")
+        yield f"Error: Image generation failed: {e}"
+        return
     # معالجة النصوص (كما هو موجود في الكود الأصلي)
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:

utils/utils/constants.py CHANGED Viewed

@@ -1,11 +1,11 @@
 MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
 SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
-TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct:featherless-ai")
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
 CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
 ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
 TTS_MODEL = os.getenv("TTS_MODEL", "facebook/mms-tts-ara")
-IMAGE_GEN_MODEL = os.getenv("IMAGE_GEN_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
 SECONDARY_IMAGE_GEN_MODEL = os.getenv("SECONDARY_IMAGE_GEN_MODEL", "black-forest-labs/FLUX.1-dev")
 MODEL_ALIASES = {

 MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
 SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
+TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "llama/Llama-3.1-8B-Instruct:featherless-ai")
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
 CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
 ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
 TTS_MODEL = os.getenv("TTS_MODEL", "facebook/mms-tts-ara")
+IMAGE_GEN_MODEL = os.getenv("IMAGE_GEN_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct:novita")
 SECONDARY_IMAGE_GEN_MODEL = os.getenv("SECONDARY_IMAGE_GEN_MODEL", "black-forest-labs/FLUX.1-dev")
 MODEL_ALIASES = {