Spaces:

XCarleX
/

Xxx

Paused

App Files Files Community

XCarleX commited on Sep 18

Commit

3a0e4bf

verified ·

1 Parent(s): 088bf18

Create vincie_service.py

Browse files

Files changed (1) hide show

vincie_service.py +527 -0

vincie_service.py ADDED Viewed

	@@ -0,0 +1,527 @@

+#!/usr/bin/env python3
+"""
+VINCIE Service - Multi-turn Image Editing Service
+Serviço completo para VINCIE: Unlocking In-context Image Editing from Video
+"""
+import os
+import sys
+import time
+import uuid
+import json
+import torch
+import gradio as gr
+import argparse
+from pathlib import Path
+from typing import List, Dict, Any, Optional, Tuple
+from datetime import datetime
+from PIL import Image
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+class VINCIEService:
+    """Serviço completo para VINCIE Multi-turn Image Editing"""
+    def __init__(self, model_path: str = "ckpt/VINCIE-3B"):
+        self.model_path = model_path
+        self.model = None
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.output_dir = Path("outputs")
+        self.output_dir.mkdir(exist_ok=True)
+        logger.info(f"🎨 VINCIE Service iniciado")
+        logger.info(f"   Device: {self.device}")
+        logger.info(f"   Model path: {self.model_path}")
+    def load_model(self):
+        """Carregar modelo VINCIE"""
+        try:
+            logger.info("📦 Carregando modelo VINCIE...")
+            # Importar módulos VINCIE
+            sys.path.append('.')
+            from vincie.models import VINCIE
+            # Carregar checkpoint
+            self.model = VINCIE.from_pretrained(
+                self.model_path,
+                torch_dtype=torch.bfloat16,
+                device_map="auto"
+            )
+            logger.info("✅ Modelo VINCIE carregado com sucesso!")
+            return True
+        except Exception as e:
+            logger.error(f"❌ Erro ao carregar modelo: {e}")
+            return False
+    def multi_turn_editing(
+        self,
+        input_image: str,
+        prompts: List[str],
+        output_name: str = None
+    ) -> List[str]:
+        """
+        Multi-turn image editing
+        Args:
+            input_image: Caminho da imagem inicial
+            prompts: Lista de prompts para edição sequencial
+            output_name: Nome base para outputs
+        Returns:
+            Lista com caminhos das imagens geradas
+        """
+        if not self.model:
+            if not self.load_model():
+                return []
+        try:
+            # Preparar nome de saída
+            if not output_name:
+                output_name = f"multiturn_{int(time.time())}"
+            output_folder = self.output_dir / output_name
+            output_folder.mkdir(exist_ok=True)
+            logger.info(f"🎨 Multi-turn editing iniciado:")
+            logger.info(f"   Input: {input_image}")
+            logger.info(f"   Turns: {len(prompts)}")
+            # Processar cada turn
+            results = []
+            current_image = input_image
+            for i, prompt in enumerate(prompts, 1):
+                logger.info(f"   Turn {i}: {prompt}")
+                # Gerar imagem editada
+                result = self._generate_single_edit(current_image, prompt)
+                if result:
+                    # Salvar resultado
+                    output_path = output_folder / f"turn_{i:02d}.png"
+                    result.save(output_path)
+                    results.append(str(output_path))
+                    # Usar resultado como input do próximo turn
+                    current_image = str(output_path)
+                    logger.info(f"   ✅ Turn {i} concluído: {output_path}")
+                else:
+                    logger.error(f"   ❌ Turn {i} falhou")
+                    break
+            # Criar GIF animado dos resultados
+            if len(results) > 1:
+                self._create_editing_animation(results, output_folder / "animation.gif")
+            logger.info(f"✅ Multi-turn editing concluído: {len(results)} imagens")
+            return results
+        except Exception as e:
+            logger.error(f"❌ Erro no multi-turn editing: {e}")
+            return []
+    def multi_concept_composition(
+        self,
+        concept_images: List[str],
+        concept_descriptions: List[str],
+        final_prompt: str,
+        output_name: str = None
+    ) -> Optional[str]:
+        """
+        Multi-concept composition
+        Args:
+            concept_images: Lista de imagens dos conceitos
+            concept_descriptions: Descrições de cada conceito
+            final_prompt: Prompt final para composição
+            output_name: Nome do arquivo de saída
+        Returns:
+            Caminho da imagem composta
+        """
+        if not self.model:
+            if not self.load_model():
+                return None
+        try:
+            if not output_name:
+                output_name = f"composition_{int(time.time())}.png"
+            logger.info(f"🎭 Multi-concept composition:")
+            logger.info(f"   Concepts: {len(concept_images)}")
+            logger.info(f"   Final prompt: {final_prompt}")
+            # Preparar prompts no formato VINCIE
+            prompts = []
+            for i, desc in enumerate(concept_descriptions):
+                prompts.append(f"<IMG{i}>: {desc}")
+            prompts.append(final_prompt)
+            # Gerar composição
+            result = self._generate_composition(concept_images, prompts)
+            if result:
+                output_path = self.output_dir / output_name
+                result.save(output_path)
+                logger.info(f"✅ Composição criada: {output_path}")
+                return str(output_path)
+            else:
+                logger.error("❌ Falha na geração da composição")
+                return None
+        except Exception as e:
+            logger.error(f"❌ Erro na composição: {e}")
+            return None
+    def story_generation(
+        self,
+        story_prompts: List[str],
+        initial_image: Optional[str] = None,
+        output_name: str = None
+    ) -> List[str]:
+        """
+        Story generation através de sequência de imagens
+        Args:
+            story_prompts: Lista de prompts da história
+            initial_image: Imagem inicial (opcional)
+            output_name: Nome base para a história
+        Returns:
+            Lista com caminhos das imagens da história
+        """
+        if not self.model:
+            if not self.load_model():
+                return []
+        try:
+            if not output_name:
+                output_name = f"story_{int(time.time())}"
+            story_folder = self.output_dir / output_name
+            story_folder.mkdir(exist_ok=True)
+            logger.info(f"📖 Story generation:")
+            logger.info(f"   Chapters: {len(story_prompts)}")
+            results = []
+            current_context = []
+            # Adicionar imagem inicial se fornecida
+            if initial_image:
+                current_context.append(initial_image)
+            for i, prompt in enumerate(story_prompts, 1):
+                logger.info(f"   Chapter {i}: {prompt}")
+                # Gerar próxima imagem da história
+                result = self._generate_story_frame(current_context, prompt)
+                if result:
+                    output_path = story_folder / f"chapter_{i:02d}.png"
+                    result.save(output_path)
+                    results.append(str(output_path))
+                    # Adicionar ao contexto
+                    current_context.append(str(output_path))
+                    logger.info(f"   ✅ Chapter {i} gerado: {output_path}")
+                else:
+                    logger.error(f"   ❌ Chapter {i} falhou")
+            # Criar storyboard
+            if len(results) > 1:
+                self._create_storyboard(results, story_folder / "storyboard.png")
+            logger.info(f"✅ História gerada: {len(results)} capítulos")
+            return results
+        except Exception as e:
+            logger.error(f"❌ Erro na story generation: {e}")
+            return []
+    def _generate_single_edit(self, input_image: str, prompt: str) -> Optional[Image.Image]:
+        """Gerar uma única edição"""
+        try:
+            # Implementação específica do VINCIE para edição
+            # Usar a API do modelo carregado
+            result = self.model.generate(
+                input_image=input_image,
+                prompt=prompt,
+                num_inference_steps=50,
+                guidance_scale=7.5,
+                height=512,
+                width=512
+            )
+            return result
+        except Exception as e:
+            logger.error(f"Erro na edição: {e}")
+            return None
+    def _generate_composition(self, images: List[str], prompts: List[str]) -> Optional[Image.Image]:
+        """Gerar composição multi-conceito"""
+        try:
+            result = self.model.generate_composition(
+                concept_images=images,
+                prompts=prompts,
+                num_inference_steps=50,
+                guidance_scale=7.5,
+                height=768,
+                width=768
+            )
+            return result
+        except Exception as e:
+            logger.error(f"Erro na composição: {e}")
+            return None
+    def _generate_story_frame(self, context: List[str], prompt: str) -> Optional[Image.Image]:
+        """Gerar frame da história"""
+        try:
+            result = self.model.generate_story_frame(
+                context_images=context,
+                prompt=prompt,
+                num_inference_steps=40,
+                guidance_scale=6.0,
+                height=512,
+                width=768
+            )
+            return result
+        except Exception as e:
+            logger.error(f"Erro no story frame: {e}")
+            return None
+    def _create_editing_animation(self, image_paths: List[str], output_path: Path):
+        """Criar animação GIF das edições"""
+        try:
+            images = [Image.open(path) for path in image_paths]
+            images[0].save(
+                output_path,
+                save_all=True,
+                append_images=images[1:],
+                duration=1000,  # 1 segundo por frame
+                loop=0
+            )
+            logger.info(f"📹 Animação criada: {output_path}")
+        except Exception as e:
+            logger.error(f"Erro na animação: {e}")
+    def _create_storyboard(self, image_paths: List[str], output_path: Path):
+        """Criar storyboard das imagens"""
+        try:
+            images = [Image.open(path) for path in image_paths]
+            # Calcular grid
+            cols = min(3, len(images))
+            rows = (len(images) + cols - 1) // cols
+            # Tamanho de cada thumbnail
+            thumb_width, thumb_height = 256, 256
+            # Criar canvas
+            canvas = Image.new(
+                'RGB',
+                (cols * thumb_width, rows * thumb_height),
+                'white'
+            )
+            # Colar imagens
+            for i, img in enumerate(images):
+                row = i // cols
+                col = i % cols
+                # Redimensionar mantendo aspecto
+                img.thumbnail((thumb_width, thumb_height), Image.Resampling.LANCZOS)
+                # Posição no canvas
+                x = col * thumb_width + (thumb_width - img.width) // 2
+                y = row * thumb_height + (thumb_height - img.height) // 2
+                canvas.paste(img, (x, y))
+            canvas.save(output_path)
+            logger.info(f"📋 Storyboard criado: {output_path}")
+        except Exception as e:
+            logger.error(f"Erro no storyboard: {e}")
+def create_gradio_interface(service: VINCIEService):
+    """Criar interface Gradio para VINCIE"""
+    def multi_turn_interface(input_image, turns_text, output_name):
+        if not input_image or not turns_text:
+            return [], "❌ Forneça uma imagem e os prompts"
+        # Parse dos turns (um por linha)
+        prompts = [line.strip() for line in turns_text.split('\n') if line.strip()]
+        if not prompts:
+            return [], "❌ Nenhum prompt válido fornecido"
+        results = service.multi_turn_editing(input_image, prompts, output_name)
+        if results:
+            return results, f"✅ {len(results)} edições geradas com sucesso!"
+        else:
+            return [], "❌ Falha na geração"
+    def composition_interface(concept_images, descriptions_text, final_prompt, output_name):
+        if not concept_images or not descriptions_text or not final_prompt:
+            return None, "❌ Forneça imagens, descrições e prompt final"
+        # Parse das descrições
+        descriptions = [line.strip() for line in descriptions_text.split('\n') if line.strip()]
+        if len(descriptions) != len(concept_images):
+            return None, "❌ Número de descrições deve ser igual ao de imagens"
+        result = service.multi_concept_composition(
+            concept_images, descriptions, final_prompt, output_name
+        )
+        if result:
+            return result, "✅ Composição gerada com sucesso!"
+        else:
+            return None, "❌ Falha na composição"
+    def story_interface(story_prompts_text, initial_image, output_name):
+        if not story_prompts_text:
+            return [], "❌ Forneça os prompts da história"
+        # Parse dos prompts da história
+        prompts = [line.strip() for line in story_prompts_text.split('\n') if line.strip()]
+        if not prompts:
+            return [], "❌ Nenhum prompt válido fornecido"
+        results = service.story_generation(prompts, initial_image, output_name)
+        if results:
+            return results, f"✅ História gerada: {len(results)} capítulos!"
+        else:
+            return [], "❌ Falha na geração da história"
+    # Interface Gradio
+    with gr.Blocks(title="VINCIE Service", theme=gr.themes.Soft()) as interface:
+        gr.Markdown("""
+        # 🎨 VINCIE Multi-turn Image Editing Service
+        **VINCIE**: Unlocking In-context Image Editing from Video
+        Três modos disponíveis:
+        - **Multi-turn Editing**: Edite uma imagem em múltiplas etapas
+        - **Multi-concept Composition**: Combine conceitos de várias imagens
+        - **Story Generation**: Gere uma sequência de imagens contando uma história
+        """)
+        with gr.Tabs():
+            # Tab 1: Multi-turn Editing
+            with gr.TabItem("🔄 Multi-turn Editing"):
+                gr.Markdown("### Edição sequencial de uma imagem")
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        mt_input_image = gr.Image(
+                            label="Imagem inicial",
+                            type="filepath"
+                        )
+                        mt_turns = gr.Textbox(
+                            label="Prompts de edição (um por linha)",
+                            placeholder="Lower the pineapple beside her face\nAdd a crown to the woman's head\nChange the woman's expression to laughing",
+                            lines=5
+                        )
+                        mt_output_name = gr.Textbox(
+                            label="Nome da saída (opcional)",
+                            placeholder="minha_edicao"
+                        )
+                        mt_generate_btn = gr.Button("🎨 Gerar Edições", variant="primary")
+                    with gr.Column(scale=2):
+                        mt_output_gallery = gr.Gallery(
+                            label="Edições geradas",
+                            show_label=True,
+                            elem_id="mt_gallery",
+                            columns=3,
+                            rows=2,
+                            height="auto"
+                        )
+                        mt_status = gr.Textbox(label="Status", interactive=False)
+                mt_generate_btn.click(
+                    fn=multi_turn_interface,
+                    inputs=[mt_input_image, mt_turns, mt_output_name],
+                    outputs=[mt_output_gallery, mt_status]
+                )
+            # Tab 2: Multi-concept Composition
+            with gr.TabItem("🎭 Multi-concept Composition"):
+                gr.Markdown("### Combine conceitos de múltiplas imagens")
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        mc_images = gr.File(
+                            label="Imagens dos conceitos",
+                            file_count="multiple",
+                            file_types=["image"]
+                        )
+                        mc_descriptions = gr.Textbox(
+                            label="Descrições dos conceitos (uma por linha)",
+                            placeholder="father in casual clothes\nmother with blonde hair\nson with school backpack",
+                            lines=4
+                        )
+                        mc_final_prompt = gr.Textbox(
+                            label="Prompt final da composição",
+                            placeholder="A happy family portrait in a sunny park with trees in the background"
+                        )
+                        mc_output_name = gr.Textbox(
+                            label="Nome da saída (opcional)",
+                            placeholder="composicao_familia"
+                        )
+                        mc_generate_btn = gr.Button("🎭 Gerar Composição", variant="primary")
+                    with gr.Column(scale=2):
+                        mc_output = gr.Image(label="Composição gerada")
+                        mc_status = gr.Textbox(label="Status", interactive=False)
+                mc_generate_btn.click(
+                    fn=composition_interface,
+                    inputs=[mc_images, mc_descriptions, mc_final_prompt, mc_output_name],
+                    outputs=[mc_output, mc_status]
+                )
+            # Tab 3: Story Generation
+            with gr.TabItem("📖 Story Generation"):
+                gr.Markdown("### Gere uma sequência de imagens contando uma história")
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        sg_initial = gr.Image(
+                            label="Imagem inicial (opcional)",
+                            type="filepath"
+                        )
+                        sg_prompts = gr.Textbox(
+                            label="Prompts da história (um por linha)",
+                            placeholder="A brave knight starts his journey\nHe encounters a dragon in a cave\nHe befriends the dragon\nThey fly together into the sunset",
+                            lines=6
+                        )
+                        sg_output_name = gr.Textbox(
+                            label="Nome da história (opcional)",
+                            placeholder="historia_cavaleiro"
+                        )
+                        sg_generate_btn = gr.Button("📖 Gerar História", variant="primary")
+                    with gr.Column(scale=2):
+                        sg_output_gallery = gr.Gallery(
+                            label="Capítulo