Spaces:

Derr11
/

Der11

Paused

App Files Files Community

Derr11 commited on 12 days ago

Commit

2d1791a

verified ·

1 Parent(s): 0db7e85

Delete qwen_omni_utils.py

Browse files

Files changed (1) hide show

qwen_omni_utils.py +0 -125

qwen_omni_utils.py DELETED Viewed

@@ -1,125 +0,0 @@
-import copy
-import os
-import requests
-import io
-import numpy as np
-import soundfile as sf
-from PIL import Image
-# محاولة استيراد decord لمعالجة الفيديو، إذا لم يكن موجوداً لن يتوقف الكود بالكامل
-try:
-    import decord
-    decord.bridge.set_bridge('torch')
-except ImportError:
-    decord = None
-    print("Warning: 'decord' module not found. Video processing will not work.")
-# محاولة استيراد librosa لمعالجة الصوت المتقدمة
-try:
-    import librosa
-except ImportError:
-    librosa = None
-def _load_image(image_path):
-    """تحميل الصورة من رابط أو مسار محلي وتحويلها إلى RGB"""
-    if image_path.startswith("http://") or image_path.startswith("https://"):
-        response = requests.get(image_path, timeout=10)
-        image = Image.open(io.BytesIO(response.content))
-    else:
-        image = Image.open(image_path)
-    return image.convert("RGB")
-def _load_audio(audio_path, target_sr=16000):
-    """تحميل الصوت وإعادة تعيين معدل الترميز (Sampling Rate)"""
-    if audio_path.startswith("http://") or audio_path.startswith("https://"):
-        response = requests.get(audio_path, timeout=10)
-        # استخدام io.BytesIO للقراءة من الذاكرة
-        audio_data, sr = sf.read(io.BytesIO(response.content))
-    else:
-        audio_data, sr = sf.read(audio_path)
-    # تحويل إلى Mono إذا كان Stereo
-    if len(audio_data.shape) > 1:
-        audio_data = audio_data.mean(axis=1)
-    # إعادة تشكيل التردد (Resampling) إذا توفر librosa وكان التردد مختلفاً
-    if librosa and sr != target_sr:
-        audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=target_sr)
-    return audio_data
-def _load_video(video_path, n_frames=8, use_audio=True):
-    """معالجة الفيديو: استخراج الإطارات والصوت"""
-    if decord is None:
-        raise ImportError("Please install 'decord' to support video processing.")
-    # تحميل الفيديو (يدعم الروابط المباشرة في بعض إصدارات decord، ولكن يفضل تحميله مؤقتاً)
-    if video_path.startswith("http"):
-        # تحميل الملف مؤقتاً
-        response = requests.get(video_path, stream=True)
-        temp_filename = "temp_video.mp4"
-        with open(temp_filename, 'wb') as f:
-            for chunk in response.iter_content(chunk_size=1024):
-                if chunk:
-                    f.write(chunk)
-        vr = decord.VideoReader(temp_filename)
-    else:
-        vr = decord.VideoReader(video_path)
-    # استخراج الإطارات (Sampling Frames)
-    total_frames = len(vr)
-    # اختيار إطارات موزعة بانتظام
-    frame_indices = np.linspace(0, total_frames - 1, n_frames, dtype=int)
-    frames = vr.get_batch(frame_indices).asnumpy()
-    # تحويل الإطارات إلى قائمة من صور PIL
-    pil_frames = [Image.fromarray(frame) for frame in frames]
-    audio_data = None
-    if use_audio:
-        # ملاحظة: استخراج الصوت من الفيديو يتطلب معالجة إضافية (عادة عبر ffmpeg)
-        # هنا سنضع قيمة فارغة لأن decord يركز على الصور،
-        # في التطبيقات الفعلية يتم استخدام moviepy أو ffmpeg لاستخراج المسار الصوتي
-        pass
-    return pil_frames, audio_data
-def process_mm_info(conversation, use_audio_in_video=True):
-    """
-    الدالة الرئيسية لمعالجة الوسائط المتعددة.
-    تقوم بتحويل الروابط النصية إلى كائنات بيانات (Tensors/Images) يفهمها النموذج.
-    """
-    conversation = copy.deepcopy(conversation)
-    audios = []
-    images = []
-    videos = []
-    for message in conversation:
-        if "content" in message and isinstance(message["content"], list):
-            for item in message["content"]:
-                try:
-                    if item["type"] == "audio":
-                        # تحميل ومعالجة الصوت
-                        audio_data = _load_audio(item["audio"])
-                        audios.append(audio_data)
-                    elif item["type"] == "image":
-                        # تحميل ومعالجة الصورة
-                        image_data = _load_image(item["image"])
-                        images.append(image_data)
-                    elif item["type"] == "video":
-                        # تحميل ومعالجة الفيديو
-                        video_frames, video_audio = _load_video(
-                            item["video"],
-                            use_audio=use_audio_in_video
-                        )
-                        videos.append(video_frames)
-                        if use_audio_in_video and video_audio is not None:
-                            audios.append(video_audio)
-                except Exception as e:
-                    print(f"Error processing {item['type']}: {e}")
-                    # في حالة الخطأ، يمكن تجاهل العنصر أو إضافة عنصر فارغ لتجنب انهيار الكود
-                    pass
-    return audios, images, videos