Spaces:

XCarleX
/

Xxx

Paused

App Files Files Community

XCarleX commited on Sep 18

Commit

699a61f

verified ·

1 Parent(s): 3a0e4bf

Update vincie_service.py

Browse files

Files changed (1) hide show

vincie_service.py +105 -516

vincie_service.py CHANGED Viewed

@@ -1,527 +1,116 @@
 #!/usr/bin/env python3
-"""
-VINCIE Service - Multi-turn Image Editing Service
-Serviço completo para VINCIE: Unlocking In-context Image Editing from Video
-"""
 import os
-import sys
-import time
-import uuid
 import json
-import torch
-import gradio as gr
-import argparse
 from pathlib import Path
-from typing import List, Dict, Any, Optional, Tuple
-from datetime import datetime
-from PIL import Image
-import logging
-# Configure logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-class VINCIEService:
-    """Serviço completo para VINCIE Multi-turn Image Editing"""
-    def __init__(self, model_path: str = "ckpt/VINCIE-3B"):
-        self.model_path = model_path
-        self.model = None
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.output_dir = Path("outputs")
-        self.output_dir.mkdir(exist_ok=True)
-        logger.info(f"🎨 VINCIE Service iniciado")
-        logger.info(f"   Device: {self.device}")
-        logger.info(f"   Model path: {self.model_path}")
-    def load_model(self):
-        """Carregar modelo VINCIE"""
-        try:
-            logger.info("📦 Carregando modelo VINCIE...")
-            # Importar módulos VINCIE
-            sys.path.append('.')
-            from vincie.models import VINCIE
-            # Carregar checkpoint
-            self.model = VINCIE.from_pretrained(
-                self.model_path,
-                torch_dtype=torch.bfloat16,
-                device_map="auto"
-            )
-            logger.info("✅ Modelo VINCIE carregado com sucesso!")
-            return True
-        except Exception as e:
-            logger.error(f"❌ Erro ao carregar modelo: {e}")
-            return False
-    def multi_turn_editing(
-        self,
-        input_image: str,
-        prompts: List[str],
-        output_name: str = None
-    ) -> List[str]:
-        """
-        Multi-turn image editing
-        Args:
-            input_image: Caminho da imagem inicial
-            prompts: Lista de prompts para edição sequencial
-            output_name: Nome base para outputs
-        Returns:
-            Lista com caminhos das imagens geradas
-        """
-        if not self.model:
-            if not self.load_model():
-                return []
-        try:
-            # Preparar nome de saída
-            if not output_name:
-                output_name = f"multiturn_{int(time.time())}"
-            output_folder = self.output_dir / output_name
-            output_folder.mkdir(exist_ok=True)
-            logger.info(f"🎨 Multi-turn editing iniciado:")
-            logger.info(f"   Input: {input_image}")
-            logger.info(f"   Turns: {len(prompts)}")
-            # Processar cada turn
-            results = []
-            current_image = input_image
-            for i, prompt in enumerate(prompts, 1):
-                logger.info(f"   Turn {i}: {prompt}")
-                # Gerar imagem editada
-                result = self._generate_single_edit(current_image, prompt)
-                if result:
-                    # Salvar resultado
-                    output_path = output_folder / f"turn_{i:02d}.png"
-                    result.save(output_path)
-                    results.append(str(output_path))
-                    # Usar resultado como input do próximo turn
-                    current_image = str(output_path)
-                    logger.info(f"   ✅ Turn {i} concluído: {output_path}")
-                else:
-                    logger.error(f"   ❌ Turn {i} falhou")
-                    break
-            # Criar GIF animado dos resultados
-            if len(results) > 1:
-                self._create_editing_animation(results, output_folder / "animation.gif")
-            logger.info(f"✅ Multi-turn editing concluído: {len(results)} imagens")
-            return results
-        except Exception as e:
-            logger.error(f"❌ Erro no multi-turn editing: {e}")
-            return []
-    def multi_concept_composition(
-        self,
-        concept_images: List[str],
-        concept_descriptions: List[str],
-        final_prompt: str,
-        output_name: str = None
-    ) -> Optional[str]:
         """
-        Multi-concept composition
-        Args:
-            concept_images: Lista de imagens dos conceitos
-            concept_descriptions: Descrições de cada conceito
-            final_prompt: Prompt final para composição
-            output_name: Nome do arquivo de saída
-        Returns:
-            Caminho da imagem composta
         """
-        if not self.model:
-            if not self.load_model():
-                return None
-        try:
-            if not output_name:
-                output_name = f"composition_{int(time.time())}.png"
-            logger.info(f"🎭 Multi-concept composition:")
-            logger.info(f"   Concepts: {len(concept_images)}")
-            logger.info(f"   Final prompt: {final_prompt}")
-            # Preparar prompts no formato VINCIE
-            prompts = []
-            for i, desc in enumerate(concept_descriptions):
-                prompts.append(f"<IMG{i}>: {desc}")
-            prompts.append(final_prompt)
-            # Gerar composição
-            result = self._generate_composition(concept_images, prompts)
-            if result:
-                output_path = self.output_dir / output_name
-                result.save(output_path)
-                logger.info(f"✅ Composição criada: {output_path}")
-                return str(output_path)
-            else:
-                logger.error("❌ Falha na geração da composição")
-                return None
-        except Exception as e:
-            logger.error(f"❌ Erro na composição: {e}")
-            return None
-    def story_generation(
-        self,
-        story_prompts: List[str],
-        initial_image: Optional[str] = None,
-        output_name: str = None
-    ) -> List[str]:
         """
-        Story generation através de sequência de imagens
-        Args:
-            story_prompts: Lista de prompts da história
-            initial_image: Imagem inicial (opcional)
-            output_name: Nome base para a história
-        Returns:
-            Lista com caminhos das imagens da história
         """
-        if not self.model:
-            if not self.load_model():
-                return []
-        try:
-            if not output_name:
-                output_name = f"story_{int(time.time())}"
-            story_folder = self.output_dir / output_name
-            story_folder.mkdir(exist_ok=True)
-            logger.info(f"📖 Story generation:")
-            logger.info(f"   Chapters: {len(story_prompts)}")
-            results = []
-            current_context = []
-            # Adicionar imagem inicial se fornecida
-            if initial_image:
-                current_context.append(initial_image)
-            for i, prompt in enumerate(story_prompts, 1):
-                logger.info(f"   Chapter {i}: {prompt}")
-                # Gerar próxima imagem da história
-                result = self._generate_story_frame(current_context, prompt)
-                if result:
-                    output_path = story_folder / f"chapter_{i:02d}.png"
-                    result.save(output_path)
-                    results.append(str(output_path))
-                    # Adicionar ao contexto
-                    current_context.append(str(output_path))
-                    logger.info(f"   ✅ Chapter {i} gerado: {output_path}")
-                else:
-                    logger.error(f"   ❌ Chapter {i} falhou")
-            # Criar storyboard
-            if len(results) > 1:
-                self._create_storyboard(results, story_folder / "storyboard.png")
-            logger.info(f"✅ História gerada: {len(results)} capítulos")
-            return results
-        except Exception as e:
-            logger.error(f"❌ Erro na story generation: {e}")
-            return []
-    def _generate_single_edit(self, input_image: str, prompt: str) -> Optional[Image.Image]:
-        """Gerar uma única edição"""
-        try:
-            # Implementação específica do VINCIE para edição
-            # Usar a API do modelo carregado
-            result = self.model.generate(
-                input_image=input_image,
-                prompt=prompt,
-                num_inference_steps=50,
-                guidance_scale=7.5,
-                height=512,
-                width=512
-            )
-            return result
-        except Exception as e:
-            logger.error(f"Erro na edição: {e}")
-            return None
-    def _generate_composition(self, images: List[str], prompts: List[str]) -> Optional[Image.Image]:
-        """Gerar composição multi-conceito"""
-        try:
-            result = self.model.generate_composition(
-                concept_images=images,
-                prompts=prompts,
-                num_inference_steps=50,
-                guidance_scale=7.5,
-                height=768,
-                width=768
-            )
-            return result
-        except Exception as e:
-            logger.error(f"Erro na composição: {e}")
-            return None
-    def _generate_story_frame(self, context: List[str], prompt: str) -> Optional[Image.Image]:
-        """Gerar frame da história"""
-        try:
-            result = self.model.generate_story_frame(
-                context_images=context,
-                prompt=prompt,
-                num_inference_steps=40,
-                guidance_scale=6.0,
-                height=512,
-                width=768
-            )
-            return result
-        except Exception as e:
-            logger.error(f"Erro no story frame: {e}")
-            return None
-    def _create_editing_animation(self, image_paths: List[str], output_path: Path):
-        """Criar animação GIF das edições"""
-        try:
-            images = [Image.open(path) for path in image_paths]
-            images[0].save(
-                output_path,
-                save_all=True,
-                append_images=images[1:],
-                duration=1000,  # 1 segundo por frame
-                loop=0
-            )
-            logger.info(f"📹 Animação criada: {output_path}")
-        except Exception as e:
-            logger.error(f"Erro na animação: {e}")
-    def _create_storyboard(self, image_paths: List[str], output_path: Path):
-        """Criar storyboard das imagens"""
-        try:
-            images = [Image.open(path) for path in image_paths]
-            # Calcular grid
-            cols = min(3, len(images))
-            rows = (len(images) + cols - 1) // cols
-            # Tamanho de cada thumbnail
-            thumb_width, thumb_height = 256, 256
-            # Criar canvas
-            canvas = Image.new(
-                'RGB',
-                (cols * thumb_width, rows * thumb_height),
-                'white'
-            )
-            # Colar imagens
-            for i, img in enumerate(images):
-                row = i // cols
-                col = i % cols
-                # Redimensionar mantendo aspecto
-                img.thumbnail((thumb_width, thumb_height), Image.Resampling.LANCZOS)
-                # Posição no canvas
-                x = col * thumb_width + (thumb_width - img.width) // 2
-                y = row * thumb_height + (thumb_height - img.height) // 2
-                canvas.paste(img, (x, y))
-            canvas.save(output_path)
-            logger.info(f"📋 Storyboard criado: {output_path}")
-        except Exception as e:
-            logger.error(f"Erro no storyboard: {e}")
-def create_gradio_interface(service: VINCIEService):
-    """Criar interface Gradio para VINCIE"""
-    def multi_turn_interface(input_image, turns_text, output_name):
-        if not input_image or not turns_text:
-            return [], "❌ Forneça uma imagem e os prompts"
-        # Parse dos turns (um por linha)
-        prompts = [line.strip() for line in turns_text.split('\n') if line.strip()]
-        if not prompts:
-            return [], "❌ Nenhum prompt válido fornecido"
-        results = service.multi_turn_editing(input_image, prompts, output_name)
-        if results:
-            return results, f"✅ {len(results)} edições geradas com sucesso!"
-        else:
-            return [], "❌ Falha na geração"
-    def composition_interface(concept_images, descriptions_text, final_prompt, output_name):
-        if not concept_images or not descriptions_text or not final_prompt:
-            return None, "❌ Forneça imagens, descrições e prompt final"
-        # Parse das descrições
-        descriptions = [line.strip() for line in descriptions_text.split('\n') if line.strip()]
-        if len(descriptions) != len(concept_images):
-            return None, "❌ Número de descrições deve ser igual ao de imagens"
-        result = service.multi_concept_composition(
-            concept_images, descriptions, final_prompt, output_name
-        )
-        if result:
-            return result, "✅ Composição gerada com sucesso!"
-        else:
-            return None, "❌ Falha na composição"
-    def story_interface(story_prompts_text, initial_image, output_name):
-        if not story_prompts_text:
-            return [], "❌ Forneça os prompts da história"
-        # Parse dos prompts da história
-        prompts = [line.strip() for line in story_prompts_text.split('\n') if line.strip()]
-        if not prompts:
-            return [], "❌ Nenhum prompt válido fornecido"
-        results = service.story_generation(prompts, initial_image, output_name)
-        if results:
-            return results, f"✅ História gerada: {len(results)} capítulos!"
-        else:
-            return [], "❌ Falha na geração da história"
-    # Interface Gradio
-    with gr.Blocks(title="VINCIE Service", theme=gr.themes.Soft()) as interface:
-        gr.Markdown("""
-        # 🎨 VINCIE Multi-turn Image Editing Service
-        **VINCIE**: Unlocking In-context Image Editing from Video
-        Três modos disponíveis:
-        - **Multi-turn Editing**: Edite uma imagem em múltiplas etapas
-        - **Multi-concept Composition**: Combine conceitos de várias imagens
-        - **Story Generation**: Gere uma sequência de imagens contando uma história
-        """)
-        with gr.Tabs():
-            # Tab 1: Multi-turn Editing
-            with gr.TabItem("🔄 Multi-turn Editing"):
-                gr.Markdown("### Edição sequencial de uma imagem")
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        mt_input_image = gr.Image(
-                            label="Imagem inicial",
-                            type="filepath"
-                        )
-                        mt_turns = gr.Textbox(
-                            label="Prompts de edição (um por linha)",
-                            placeholder="Lower the pineapple beside her face\nAdd a crown to the woman's head\nChange the woman's expression to laughing",
-                            lines=5
-                        )
-                        mt_output_name = gr.Textbox(
-                            label="Nome da saída (opcional)",
-                            placeholder="minha_edicao"
-                        )
-                        mt_generate_btn = gr.Button("🎨 Gerar Edições", variant="primary")
-                    with gr.Column(scale=2):
-                        mt_output_gallery = gr.Gallery(
-                            label="Edições geradas",
-                            show_label=True,
-                            elem_id="mt_gallery",
-                            columns=3,
-                            rows=2,
-                            height="auto"
-                        )
-                        mt_status = gr.Textbox(label="Status", interactive=False)
-                mt_generate_btn.click(
-                    fn=multi_turn_interface,
-                    inputs=[mt_input_image, mt_turns, mt_output_name],
-                    outputs=[mt_output_gallery, mt_status]
-                )
-            # Tab 2: Multi-concept Composition
-            with gr.TabItem("🎭 Multi-concept Composition"):
-                gr.Markdown("### Combine conceitos de múltiplas imagens")
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        mc_images = gr.File(
-                            label="Imagens dos conceitos",
-                            file_count="multiple",
-                            file_types=["image"]
-                        )
-                        mc_descriptions = gr.Textbox(
-                            label="Descrições dos conceitos (uma por linha)",
-                            placeholder="father in casual clothes\nmother with blonde hair\nson with school backpack",
-                            lines=4
-                        )
-                        mc_final_prompt = gr.Textbox(
-                            label="Prompt final da composição",
-                            placeholder="A happy family portrait in a sunny park with trees in the background"
-                        )
-                        mc_output_name = gr.Textbox(
-                            label="Nome da saída (opcional)",
-                            placeholder="composicao_familia"
-                        )
-                        mc_generate_btn = gr.Button("🎭 Gerar Composição", variant="primary")
-                    with gr.Column(scale=2):
-                        mc_output = gr.Image(label="Composição gerada")
-                        mc_status = gr.Textbox(label="Status", interactive=False)
-                mc_generate_btn.click(
-                    fn=composition_interface,
-                    inputs=[mc_images, mc_descriptions, mc_final_prompt, mc_output_name],
-                    outputs=[mc_output, mc_status]
-                )
-            # Tab 3: Story Generation
-            with gr.TabItem("📖 Story Generation"):
-                gr.Markdown("### Gere uma sequência de imagens contando uma história")
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        sg_initial = gr.Image(
-                            label="Imagem inicial (opcional)",
-                            type="filepath"
-                        )
-                        sg_prompts = gr.Textbox(
-                            label="Prompts da história (um por linha)",
-                            placeholder="A brave knight starts his journey\nHe encounters a dragon in a cave\nHe befriends the dragon\nThey fly together into the sunset",
-                            lines=6
-                        )
-                        sg_output_name = gr.Textbox(
-                            label="Nome da história (opcional)",
-                            placeholder="historia_cavaleiro"
-                        )
-                        sg_generate_btn = gr.Button("📖 Gerar História", variant="primary")
-                    with gr.Column(scale=2):
-                        sg_output_gallery = gr.Gallery(
-                            label="Capítulo

 #!/usr/bin/env python3
 import os
 import json
+import shlex
+import subprocess
 from pathlib import Path
+from typing import List, Optional
+from huggingface_hub import snapshot_download
+class VincieService:
+    """
+    Serviço mínimo que:
+    - garante repositório VINCIE clonado e dependências de modelo baixadas
+    - chama python main.py configs/generate.yaml com overrides oficiais
+    - expõe funções de alto nível para multi-turn editing e multi-concept composition
+    """
+    def __init__(self,
+                 repo_dir: str = "/app/VINCIE",
+                 ckpt_dir: str = "/app/ckpt/VINCIE-3B",
+                 python_bin: str = "python"):
+        self.repo_dir = Path(repo_dir)
+        self.ckpt_dir = Path(ckpt_dir)
+        self.python = python_bin
+        self.generate_yaml = self.repo_dir / "configs" / "generate.yaml"
+        self.assets_dir = self.repo_dir / "assets"
+        self.output_root = Path("/app/outputs")
+        self.output_root.mkdir(parents=True, exist_ok=True)
+    # ---------- Setup ----------
+    def ensure_repo(self, git_url: str = "https://github.com/ByteDance-Seed/VINCIE") -> None:
+        if not self.repo_dir.exists():
+            subprocess.run(["git", "clone", git_url, str(self.repo_dir)], check=True)
+        # opcional: garantir submódulos/updates mínimos
+        # subprocess.run(["git", "pull"], cwd=self.repo_dir, check=True)
+    def ensure_model(self, repo_id: str = "ByteDance-Seed/VINCIE-3B") -> None:
+        self.ckpt_dir.mkdir(parents=True, exist_ok=True)
+        snapshot_download(
+            repo_id=repo_id,
+            local_dir=str(self.ckpt_dir),
+            local_dir_use_symlinks=False,
+            resume_download=True
+        )
+    def ready(self) -> bool:
+        return self.repo_dir.exists() and self.generate_yaml.exists() and self.ckpt_dir.exists()
+    # ---------- Core runner ----------
+    def _run_vincie(self, overrides: List[str], work_output: Path) -> None:
+        work_output.mkdir(parents=True, exist_ok=True)
+        cmd = [
+            self.python,
+            "main.py",
+            str(self.generate_yaml),
+            # overrides list (Hydra/YACS-style) como no README
+            *overrides,
+            f"generation.output.dir={str(work_output)}"
+        ]
+        # executar dentro do diretório do repo VINCIE
+        subprocess.run(cmd, cwd=self.repo_dir, check=True)
+    # ---------- Multi-turn editing ----------
+    def multi_turn_edit(self,
+                        input_image: str,
+                        turns: List[str],
+                        out_dir_name: Optional[str] = None) -> Path:
         """
+        Equivalente ao exemplo:
+        python main.py configs/generate.yaml \
+          generation.positive_prompt.image_path="[...]"
+          generation.positive_prompt.prompts="[...]"
+          generation.output.dir=...
         """
+        out_dir = self.output_root / (out_dir_name or f"multi_turn_{self._slug(input_image)}")
+        image_json = json.dumps([str(input_image)])
+        prompts_json = json.dumps(turns)
+        overrides = [
+            f'generation.positive_prompt.image_path={image_json}',
+            f'generation.positive_prompt.prompts={prompts_json}',
+            f'ckpt.path={str(self.ckpt_dir)}'
+        ]
+        self._run_vincie(overrides, out_dir)
+        return out_dir
+    # ---------- Multi-concept composition ----------
+    def multi_concept_compose(self,
+                              concept_images: List[str],
+                              concept_prompts: List[str],
+                              final_prompt: str,
+                              out_dir_name: Optional[str] = None) -> Path:
         """
+        Modelo de uso inspirado no README:
+        - image_path: lista de imagens dos conceitos
+        - prompts: lista com p1..pN e o prompt final concatenado
         """
+        out_dir = self.output_root / (out_dir_name or "multi_concept")
+        imgs_json = json.dumps([str(p) for p in concept_images])
+        # prompts devem alinhar com <IMG0>, <IMG1>, ... e incluir o prompt final no fim
+        prompts_all = concept_prompts + [final_prompt]
+        prompts_json = json.dumps(prompts_all)
+        overrides = [
+            f'generation.positive_prompt.image_path={imgs_json}',
+            f'generation.positive_prompt.prompts={prompts_json}',
+            f"generation.pad_img_placehoder=False",  # segue exemplo público
+            f'ckpt.path={str(self.ckpt_dir)}'
+        ]
+        self._run_vincie(overrides, out_dir)
+        return out_dir
+    # ---------- Helpers ----------
+    @staticmethod
+    def _slug(path_or_text: str) -> str:
+        base = Path(path_or_text).stem if Path(path_or_text).exists() else path_or_text
+        keep = "".join(c if c.isalnum() or c in "-_." else "_" for c in str(base))
+        return keep[:64]