import streamlit as st
from PIL import Image
import torch
from transformers import pipeline
import numpy as np
import io

# إعداد الصفحة
st.set_page_config(page_title="Image to Video with Editing", page_icon="🎥")

# عنوان الصفحة
st.title("Stable Video Diffusion - Image to Video")

# تعليمات
st.write("Upload an image to generate a video. You can also adjust settings for video generation.")

# تحميل النموذج
@st.cache_resource
def load_model():
    model_id = "stabilityai/stable-video-diffusion-img2vid-xt"
    # تأكد من استخدام الكلاس الصحيح من مكتبة transformers إذا كان متاحًا
    pipe = pipeline("image-to-video", model=model_id, torch_dtype=torch.float16).to("cuda")
    return pipe

pipe = load_model()

# إدخال المستخدم
uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

# إعداد الخيارات لتعديل الفيديو
frame_count = st.slider("Number of frames", min_value=10, max_value=50, value=25, step=5)

if uploaded_image is not None:
    image = Image.open(uploaded_image)
    st.image(image, caption='Uploaded Image', use_column_width=True)

    if st.button('Generate Video'):
        # تحويل الصورة إلى فيديو
        with st.spinner("Generating video..."):
            video_frames = pipe(image, num_frames=frame_count)
        
        st.success("Video generated successfully!")

        # عرض الفيديو
        st.video(video_frames[0], format="video/mp4")

        # تنزيل الفيديو
        video_bytes = io.BytesIO()
        video_frames[0].save(video_bytes, format="mp4")
        st.download_button(label="Download Video", data=video_bytes.getvalue(), file_name="generated_video.mp4", mime="video/mp4")

# تقديم بعض المعلومات حول النموذج
st.write("""
### About the Model:
SVD Image-to-Video is a latent diffusion model trained to generate short video clips from an image conditioning. 
This model generates frames at a resolution of 576x1024 given a context frame of the same size, fine-tuned from the SVD Image-to-Video [14 frames] model.
The widely used f8-decoder is also fine-tuned for temporal consistency, making the output videos more stable and coherent.
""")