Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import json | |
| import xml.etree.ElementTree as ET | |
| from dataclasses import dataclass | |
| from typing import List, Tuple, Optional, Callable | |
| import gradio as gr | |
| # ========================= | |
| # Configurações Gerais | |
| # ========================= | |
| OUTPUT_DIR = "./Output" | |
| os.makedirs(OUTPUT_DIR, exist_ok=True) | |
| # ========================= | |
| # LLM (Gemini) | |
| # ========================= | |
| USE_LLM_DEFAULT = True | |
| GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "").strip() | |
| LLM_AVAILABLE = False | |
| LLM_MODEL_NAME = "gemini-2.0-flash-exp" | |
| try: | |
| if GEMINI_API_KEY: | |
| import google.generativeai as genai | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| LLM = genai.GenerativeModel(LLM_MODEL_NAME) | |
| LLM_AVAILABLE = True | |
| else: | |
| LLM = None | |
| except Exception: | |
| LLM = None | |
| LLM_AVAILABLE = False | |
| # ========================= | |
| # Modelos | |
| # ========================= | |
| class Segment: | |
| start_tc: str | |
| end_tc: str | |
| start_f: int | |
| end_f: int | |
| text: str | |
| score: float | |
| # ========================= | |
| # Funções de Timecode | |
| # ========================= | |
| def _tc_to_hmsf(tc: str, fps: int) -> Tuple[int, int, int, int]: | |
| """Converte timecode para (hh, mm, ss, ff).""" | |
| s = tc.strip() | |
| m = re.match(r'^(\d{1,2}):(\d{2}):(\d{2})[:;](\d{2})$', s) | |
| if m: | |
| hh, mm, ss, ff = map(int, m.groups()) | |
| return hh, mm, ss, ff | |
| m = re.match(r'^(\d{1,2}):(\d{2}):(\d{2})[.,](\d{1,3})$', s) | |
| if m: | |
| hh, mm, ss, ms = map(int, m.groups()) | |
| ff = int(round((ms / 1000.0) * fps)) | |
| if ff >= fps: | |
| ss += 1 | |
| ff = 0 | |
| return hh, mm, ss, ff | |
| m = re.match(r'^(\d{1,2}):(\d{2}):(\d{2})$', s) | |
| if m: | |
| hh, mm, ss = map(int, m.groups()) | |
| return hh, mm, ss, 0 | |
| raise ValueError(f"Timecode inválido: {tc}") | |
| def parse_timecode_to_frames(tc: str, fps: int) -> int: | |
| hh, mm, ss, ff = _tc_to_hmsf(tc, fps) | |
| return hh * 3600 * fps + mm * 60 * fps + ss * fps + ff | |
| def frames_to_timecode(frames: int, fps: int) -> str: | |
| hh = frames // (3600 * fps) | |
| rem = frames % (3600 * fps) | |
| mm = rem // (60 * fps) | |
| rem = rem % (60 * fps) | |
| ss = rem // fps | |
| ff = rem % fps | |
| return f"{hh:02d}:{mm:02d}:{ss:02d}:{ff:02d}" | |
| # ========================= | |
| # Parser de Transcrição | |
| # ========================= | |
| def parse_transcript(txt: str, fps: int) -> List[Segment]: | |
| """Parser robusto para múltiplos formatos (intervalos e WEBVTT/SRT).""" | |
| if not txt or not txt.strip(): | |
| return [] | |
| lines = [l.rstrip() for l in txt.splitlines()] | |
| results: List[Segment] = [] | |
| line_range = re.compile( | |
| r'^\s*\[?\s*(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)\s*[-—–]\s*' | |
| r'(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)\s*\]?\s*(.*)$' | |
| ) | |
| arrow = re.compile( | |
| r'(\d{1,2}:\d{2}:\d{2}(?:[.,]\d{1,3}|[:;]\d{2})?)\s*-->\s*' | |
| r'(\d{1,2}:\d{2}:\d{2}(?:[.,]\d{1,3}|[:;]\d{2})?)' | |
| ) | |
| i = 0 | |
| while i < len(lines): | |
| raw = lines[i].strip() | |
| if not raw or raw.lower() == "desconhecido": | |
| i += 1 | |
| continue | |
| m = line_range.match(raw) | |
| if m: | |
| start_tc, end_tc, trailing_text = m.groups() | |
| text_parts = [] | |
| if trailing_text.strip(): | |
| text_parts.append(trailing_text.strip()) | |
| else: | |
| j = i + 1 | |
| while j < len(lines): | |
| nxt = lines[j].strip() | |
| if not nxt or line_range.match(nxt) or re.match(r'^\d+\s*$', nxt) or arrow.search(nxt): | |
| break | |
| text_parts.append(nxt) | |
| j += 1 | |
| i = j - 1 | |
| text = " ".join(text_parts).strip() | |
| try: | |
| sf = parse_timecode_to_frames(start_tc, fps) | |
| ef = parse_timecode_to_frames(end_tc, fps) | |
| if ef > sf: | |
| results.append(Segment( | |
| start_tc=frames_to_timecode(sf, fps), | |
| end_tc=frames_to_timecode(ef, fps), | |
| start_f=sf, | |
| end_f=ef, | |
| text=text if text else f"{start_tc} - {end_tc}", | |
| score=0.0 | |
| )) | |
| except Exception: | |
| pass | |
| i += 1 | |
| continue | |
| # Bloco estilo VTT/SRT: "00:00:01,000 --> 00:00:03,000" | |
| if arrow.search(raw) or (i + 1 < len(lines) and arrow.search(lines[i + 1])): | |
| line_with_tc = raw if arrow.search(raw) else lines[i + 1] | |
| mm = arrow.search(line_with_tc) | |
| if mm: | |
| start_tc, end_tc = mm.groups() | |
| j = i + 1 if line_with_tc == raw else i + 2 | |
| text_parts = [] | |
| while j < len(lines): | |
| nxt = lines[j].strip() | |
| if not nxt: | |
| break | |
| if re.match(r'^\d+\s*$', nxt) and (j + 1 < len(lines) and arrow.search(lines[j + 1])): | |
| break | |
| if arrow.search(nxt): | |
| break | |
| text_parts.append(nxt) | |
| j += 1 | |
| text = " ".join(text_parts).strip() | |
| try: | |
| sf = parse_timecode_to_frames(start_tc, fps) | |
| ef = parse_timecode_to_frames(end_tc, fps) | |
| if ef > sf: | |
| results.append(Segment( | |
| start_tc=frames_to_timecode(sf, fps), | |
| end_tc=frames_to_timecode(ef, fps), | |
| start_f=sf, | |
| end_f=ef, | |
| text=text, | |
| score=0.0 | |
| )) | |
| except Exception: | |
| pass | |
| i = j + 1 | |
| continue | |
| i += 1 | |
| return results | |
| # ========================= | |
| # Minutagens Manuais | |
| # ========================= | |
| def parse_manual_timecodes(manual_input: str) -> List[Tuple[str, str]]: | |
| if not manual_input or not manual_input.strip(): | |
| return [] | |
| manual_ranges = [] | |
| lines = manual_input.replace(",", "\n").splitlines() | |
| pattern = re.compile( | |
| r'(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)\s*[-–—]\s*' | |
| r'(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)' | |
| ) | |
| for line in lines: | |
| m = pattern.search(line.strip()) | |
| if m: | |
| manual_ranges.append((m.group(1), m.group(2))) | |
| return manual_ranges | |
| # ========================= | |
| # Helpers para JSON do LLM | |
| # ========================= | |
| def _extract_json_block(text: str) -> str: | |
| """ | |
| Extrai o melhor bloco JSON da resposta do LLM. | |
| 1) Prioriza bloco entre ```json ... ``` | |
| 2) Senão, recorta do primeiro '{' até o '}' pareado. | |
| """ | |
| if not text: | |
| raise ValueError("Resposta vazia do LLM") | |
| m = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text, flags=re.IGNORECASE) | |
| if m: | |
| return m.group(1).strip() | |
| start = text.find("{") | |
| if start == -1: | |
| raise ValueError("Nenhum '{' encontrado na resposta do LLM") | |
| depth = 0 | |
| for i in range(start, len(text)): | |
| c = text[i] | |
| if c == "{": | |
| depth += 1 | |
| elif c == "}": | |
| depth -= 1 | |
| if depth == 0: | |
| return text[start:i + 1].strip() | |
| end = text.rfind("}") | |
| if end != -1 and end > start: | |
| return text[start:end + 1].strip() | |
| raise ValueError("Não foi possível delimitar um JSON na resposta do LLM") | |
| def _coerce_to_strict_json(s: str) -> str: | |
| """ | |
| Tenta consertar JSONs “quase-JSON”. | |
| - remove comentários | |
| - troca aspas curvas por retas | |
| - remove vírgulas finais | |
| - adiciona aspas em chaves não-aspadas | |
| - tenta converter aspas simples para duplas | |
| """ | |
| t = s.strip() | |
| t = re.sub(r"^json\s*", "", t, flags=re.IGNORECASE).strip().strip("`") | |
| t = (t.replace("\u201c", '"') | |
| .replace("\u201d", '"') | |
| .replace("\u2018", "'") | |
| .replace("\u2019", "'")) | |
| t = re.sub(r"//.*?$", "", t, flags=re.MULTILINE) | |
| t = re.sub(r"/\*[\s\S]*?\*/", "", t) | |
| t = re.sub(r",(\s*[}\]])", r"\1", t) | |
| t = re.sub(r'([{,]\s*)([A-Za-z_][A-Za-z0-9_]*)\s*:', r'\1"\2":', t) | |
| try: | |
| json.loads(t) | |
| return t | |
| except Exception: | |
| t2 = re.sub(r"'", '"', t) | |
| return t2 | |
| # ========================= | |
| # IA: Análise Inteligente com Gemini | |
| # ========================= | |
| def ai_analyze_and_select( | |
| segments: List[Segment], | |
| command: str, | |
| fps: int, | |
| progress_callback: Optional[Callable[[str], None]] = None | |
| ) -> List[Segment]: | |
| """ | |
| Usa Gemini para analisar a transcrição completa e identificar os melhores trechos. | |
| Com extração/saneamento robusto do JSON para evitar quebras. | |
| """ | |
| if not LLM_AVAILABLE or not segments: | |
| raise ValueError("IA não disponível ou sem segmentos para analisar") | |
| if progress_callback: | |
| progress_callback("Etapa 1/3: preparando dados para análise...") | |
| # Prepara a transcrição completa com índices | |
| transcript_data = [] | |
| for i, seg in enumerate(segments): | |
| duration_sec = max(0, (seg.end_f - seg.start_f) / fps) | |
| transcript_data.append({ | |
| "index": i, | |
| "timecode": seg.start_tc, | |
| "duration_sec": round(duration_sec, 1), | |
| "text": (seg.text or "")[:200] | |
| }) | |
| transcript_json = json.dumps(transcript_data, ensure_ascii=False, indent=2) | |
| if progress_callback: | |
| progress_callback(f"Etapa 2/3: analisando {len(segments)} segmentos com IA...") | |
| prompt = f"""Você é um especialista em edição de vídeo. Analise a transcrição e identifique os MELHORES trechos baseado no comando do usuário. | |
| COMANDO DO USUÁRIO: | |
| {command} | |
| TRANSCRIÇÃO COMPLETA (formato JSON com index, timecode, duração e texto): | |
| {transcript_json} | |
| INSTRUÇÕES: | |
| 1. Leia o comando com atenção e identifique: | |
| - Quantidade de cortes desejada | |
| - Duração de cada corte (em segundos) | |
| - Tema/assunto/palavras-chave mencionados | |
| - Timecode de início (se mencionado) | |
| 2. Analise TODA a transcrição e identifique os segmentos que melhor correspondem ao comando | |
| 3. Responda APENAS com JSON estrito: | |
| {{ | |
| "cuts": [ | |
| {{ | |
| "start_index": <int>, | |
| "duration_seconds": <int>, | |
| "reason": "<string curta>" | |
| }} | |
| ] | |
| }}""" | |
| # Chamada ao LLM (alguns SDKs permitem mime_type JSON; se falhar, cai no básico) | |
| try: | |
| response = LLM.generate_content( | |
| prompt, | |
| generation_config={ | |
| "temperature": 0.2, | |
| "max_output_tokens": 2000, | |
| "response_mime_type": "application/json" | |
| } | |
| ) | |
| except Exception: | |
| response = LLM.generate_content( | |
| prompt, | |
| generation_config={ | |
| "temperature": 0.2, | |
| "max_output_tokens": 2000 | |
| } | |
| ) | |
| response_text = (getattr(response, "text", "") or "").strip() | |
| if progress_callback: | |
| progress_callback("Etapa 3/3: processando resposta da IA...") | |
| # Extrai e parseia o JSON de forma resiliente | |
| try: | |
| raw_json = _extract_json_block(response_text) | |
| try: | |
| result = json.loads(raw_json) | |
| except json.JSONDecodeError: | |
| fixed = _coerce_to_strict_json(raw_json) | |
| result = json.loads(fixed) | |
| except Exception as e: | |
| snippet = response_text[:600].replace("\n", " ") | |
| raise ValueError(f"Erro ao processar resposta da IA (JSON inválido): {e}. Amostra: {snippet}") | |
| cuts_data = result.get("cuts", []) | |
| if not isinstance(cuts_data, list) or not cuts_data: | |
| raise ValueError("IA não retornou a lista 'cuts' com itens válidos") | |
| selected_segments: List[Segment] = [] | |
| for cut_info in cuts_data: | |
| try: | |
| start_idx = int(cut_info.get("start_index", 0)) | |
| duration_sec = int(cut_info.get("duration_seconds", 60)) | |
| reason = str(cut_info.get("reason", "")).strip() | |
| except Exception: | |
| continue | |
| if start_idx < 0 or start_idx >= len(segments): | |
| continue | |
| start_seg = segments[start_idx] | |
| start_frame = start_seg.start_f | |
| duration_frames = max(0, int(duration_sec * fps)) | |
| end_frame = start_frame + duration_frames | |
| text_parts = [f"[IA] {reason}"] if reason else [] | |
| for seg in segments[start_idx:]: | |
| if seg.start_f < end_frame: | |
| if seg.text: | |
| text_parts.append(seg.text[:150]) | |
| else: | |
| break | |
| combined_text = " [...] ".join(text_parts)[:500] | |
| selected_segments.append(Segment( | |
| start_tc=frames_to_timecode(start_frame, fps), | |
| end_tc=frames_to_timecode(end_frame, fps), | |
| start_f=start_frame, | |
| end_f=end_frame, | |
| text=combined_text, | |
| score=100.0 | |
| )) | |
| return selected_segments | |
| # ========================= | |
| # Processamento com Comando Manual (sem IA) | |
| # ========================= | |
| def manual_command_processing(segments: List[Segment], command: str, fps: int) -> List[Segment]: | |
| """Fallback: processamento básico sem IA para comandos simples.""" | |
| s = (command or "").lower() | |
| # quantidade | |
| count = 1 | |
| m = re.search(r'(\d+)\s*(?:cortes?|clipes?|segmentos?)', s) | |
| if m: | |
| count = int(m.group(1)) | |
| # duração | |
| duration_sec = 60 | |
| m = re.search(r'(\d+)\s*(?:segundos?|s\b)', s) | |
| if m: | |
| duration_sec = int(m.group(1)) | |
| else: | |
| m = re.search(r'(\d+)\s*(?:minutos?|min\b)', s) | |
| if m: | |
| duration_sec = int(m.group(1)) * 60 | |
| # timecode inicial | |
| start_frame = 0 | |
| m = re.search(r'(?:começando|a partir de)\s+(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)', s) | |
| if m: | |
| try: | |
| start_frame = parse_timecode_to_frames(m.group(1), fps) | |
| except Exception: | |
| pass | |
| # cortes contínuos | |
| results = [] | |
| base_frame = start_frame | |
| for i in range(count): | |
| duration_frames = duration_sec * fps | |
| end_frame = base_frame + duration_frames | |
| text_parts = [] | |
| for seg in segments: | |
| if seg.start_f >= base_frame and seg.start_f < end_frame: | |
| if seg.text: | |
| text_parts.append(seg.text[:100]) | |
| combined_text = " [...] ".join(text_parts[:10])[:400] | |
| results.append(Segment( | |
| start_tc=frames_to_timecode(base_frame, fps), | |
| end_tc=frames_to_timecode(end_frame, fps), | |
| start_f=base_frame, | |
| end_f=end_frame, | |
| text=combined_text if combined_text else f"Corte {i+1}", | |
| score=50.0 | |
| )) | |
| base_frame = end_frame | |
| return results | |
| # ========================= | |
| # Modo Automático | |
| # ========================= | |
| def auto_score_segments( | |
| segs: List[Segment], | |
| num_segments: int, | |
| custom_keywords: str, | |
| weight_emotion: float, | |
| weight_break: float, | |
| weight_learn: float, | |
| weight_viral: float | |
| ) -> List[Segment]: | |
| """Sistema de pontuação automática simples por palavras-chave.""" | |
| emotion_words = ['medo', 'coragem', 'amor', 'ódio', 'paixão', 'alegria', 'tristeza'] | |
| break_words = ['nunca', 'de repente', 'surpreendente', 'inesperado', 'incrível'] | |
| learn_words = ['aprendi', 'descobri', 'entendi', 'percebi', 'lição'] | |
| viral_words = ['segredo', 'verdade', 'revelação', 'exclusivo', 'confissão'] | |
| for s in segs: | |
| score = 0.0 | |
| text = (s.text or "").lower() | |
| for word in emotion_words: | |
| if word in text: | |
| score += weight_emotion | |
| for word in break_words: | |
| if word in text: | |
| score += weight_break | |
| for word in learn_words: | |
| if word in text: | |
| score += weight_learn | |
| for word in viral_words: | |
| if word in text: | |
| score += weight_viral | |
| if custom_keywords: | |
| for kw in custom_keywords.split(","): | |
| kw_clean = kw.strip().lower() | |
| if kw_clean and kw_clean in text: | |
| score += 5.0 | |
| s.score = score | |
| segs.sort(key=lambda x: x.score, reverse=True) | |
| return segs[:max(1, num_segments)] | |
| # ========================= | |
| # Edição de XML | |
| # ========================= | |
| def deep_copy_element(elem: ET.Element) -> ET.Element: | |
| new = ET.Element(elem.tag, attrib=dict(elem.attrib)) | |
| new.text = elem.text | |
| new.tail = elem.tail | |
| for child in elem: | |
| new.append(deep_copy_element(child)) | |
| return new | |
| def edit_xml(tree: ET.ElementTree, segs: List[Segment]) -> ET.ElementTree: | |
| root = tree.getroot() | |
| seq = root.find(".//sequence") | |
| if seq is None: | |
| raise ValueError("Sequence não encontrada no XML") | |
| v_track = seq.find(".//media/video/track") | |
| a_track = seq.find(".//media/audio/track") | |
| if not v_track or not a_track: | |
| raise ValueError("Trilhas de vídeo/áudio não encontradas") | |
| v_template = v_track.find("./clipitem") | |
| a_template = a_track.find("./clipitem") | |
| for clip in list(v_track.findall("./clipitem")): | |
| v_track.remove(clip) | |
| for clip in list(a_track.findall("./clipitem")): | |
| a_track.remove(clip) | |
| timeline_pos = 0 | |
| for i, seg in enumerate(segs, 1): | |
| duration = seg.end_f - seg.start_f | |
| if duration <= 0: | |
| continue | |
| v_clip = ET.Element("clipitem", {"id": f"clip-v{i}"}) | |
| ET.SubElement(v_clip, "name").text = f"Clip {i}" | |
| ET.SubElement(v_clip, "start").text = str(timeline_pos) | |
| ET.SubElement(v_clip, "end").text = str(timeline_pos + duration) | |
| ET.SubElement(v_clip, "in").text = str(seg.start_f) | |
| ET.SubElement(v_clip, "out").text = str(seg.end_f) | |
| if v_template is not None: | |
| rate = v_template.find("rate") | |
| if rate is not None: | |
| v_clip.append(deep_copy_element(rate)) | |
| file_elem = v_template.find("file") | |
| if file_elem is not None: | |
| v_clip.append(deep_copy_element(file_elem)) | |
| a_clip = ET.Element("clipitem", {"id": f"clip-a{i}"}) | |
| ET.SubElement(a_clip, "name").text = f"Clip {i}" | |
| ET.SubElement(a_clip, "start").text = str(timeline_pos) | |
| ET.SubElement(a_clip, "end").text = str(timeline_pos + duration) | |
| ET.SubElement(a_clip, "in").text = str(seg.start_f) | |
| ET.SubElement(a_clip, "out").text = str(seg.end_f) | |
| if a_template is not None: | |
| rate = a_template.find("rate") | |
| if rate is not None: | |
| a_clip.append(deep_copy_element(rate)) | |
| file_elem = a_template.find("file") | |
| if file_elem is not None: | |
| a_clip.append(deep_copy_element(file_elem)) | |
| v_track.append(v_clip) | |
| a_track.append(a_clip) | |
| timeline_pos += duration | |
| return tree | |
| # ========================= | |
| # Seleção (orquestração) | |
| # ========================= | |
| def select_segments( | |
| transcript_txt: str, | |
| use_llm: bool, | |
| num_segments: int, | |
| custom_keywords: str, | |
| manual_timecodes: str, | |
| natural_instructions: str, | |
| weight_emotion: float, | |
| weight_break: float, | |
| weight_learn: float, | |
| weight_viral: float, | |
| fps: int, | |
| progress_callback: Optional[Callable[[str], None]] = None | |
| ) -> List[Segment]: | |
| # 1) Manual | |
| manual = parse_manual_timecodes(manual_timecodes) | |
| if manual: | |
| result = [] | |
| for start_tc, end_tc in manual: | |
| try: | |
| result.append(Segment( | |
| start_tc=frames_to_timecode(parse_timecode_to_frames(start_tc, fps), fps), | |
| end_tc=frames_to_timecode(parse_timecode_to_frames(end_tc, fps), fps), | |
| start_f=parse_timecode_to_frames(start_tc, fps), | |
| end_f=parse_timecode_to_frames(end_tc, fps), | |
| text=f"Manual: {start_tc} - {end_tc}", | |
| score=100.0 | |
| )) | |
| except Exception: | |
| pass | |
| return result | |
| # 2) Parser de transcrição | |
| segs = parse_transcript(transcript_txt, fps) if transcript_txt else [] | |
| # 3) Linguagem natural | |
| if natural_instructions.strip(): | |
| if use_llm and LLM_AVAILABLE and segs: | |
| return ai_analyze_and_select(segs, natural_instructions, fps, progress_callback) | |
| elif segs: | |
| return manual_command_processing(segs, natural_instructions, fps) | |
| else: | |
| raise ValueError( | |
| "Para usar comandos em linguagem natural, forneça uma transcrição " | |
| "ou use minutagens manuais." | |
| ) | |
| # 4) Automático | |
| if not segs: | |
| raise ValueError("Nenhum segmento encontrado. Envie transcrição, minutagens ou um comando em linguagem natural.") | |
| return auto_score_segments( | |
| segs, num_segments, custom_keywords, | |
| weight_emotion, weight_break, weight_learn, weight_viral | |
| ) | |
| # ========================= | |
| # Pipeline principal | |
| # ========================= | |
| def process_files( | |
| xml_file, txt_file, use_llm, num_segments, | |
| custom_keywords, manual_timecodes, natural_instructions, | |
| weight_emotion, weight_break, weight_learn, weight_viral, | |
| fps, | |
| progress=gr.Progress() | |
| ): | |
| if not xml_file: | |
| return "Envie o XML do Premiere", None, f"LLM: {'OK' if LLM_AVAILABLE else 'OFF'}" | |
| try: | |
| debug_info = [] | |
| def progress_callback(msg): | |
| progress(0.5, desc=msg) | |
| debug_info.append(msg) | |
| progress(0.1, desc="Carregando arquivos...") | |
| transcript = "" | |
| manual = parse_manual_timecodes(manual_timecodes) | |
| if not manual and txt_file: | |
| with open(txt_file.name, "r", encoding="utf-8-sig") as f: | |
| transcript = f.read() | |
| debug_info.append(f"Transcrição: {len(transcript)} caracteres") | |
| progress(0.2, desc="Selecionando segmentos...") | |
| segments = select_segments( | |
| transcript, bool(use_llm) and LLM_AVAILABLE, int(num_segments), | |
| custom_keywords, manual_timecodes, natural_instructions, | |
| float(weight_emotion), float(weight_break), float(weight_learn), float(weight_viral), | |
| int(fps), | |
| progress_callback | |
| ) | |
| if not segments: | |
| return "Nenhum segmento selecionado", None, f"LLM: {'OK' if LLM_AVAILABLE else 'OFF'}" | |
| # Validar duração mínima: pelo menos 1 segundo | |
| valid_segments = [] | |
| for seg in segments: | |
| if seg.end_f > seg.start_f and (seg.end_f - seg.start_f) >= max(1, int(fps)): | |
| valid_segments.append(seg) | |
| if not valid_segments: | |
| return "Segmentos inválidos (duração muito curta)", None, f"LLM: {'OK' if LLM_AVAILABLE else 'OFF'}" | |
| segments = valid_segments | |
| debug_info.append(f"{len(segments)} segmento(s) válidos") | |
| progress(0.7, desc="Editando XML...") | |
| tree = ET.parse(xml_file.name) | |
| tree = edit_xml(tree, segments) | |
| basename = os.path.splitext(os.path.basename(xml_file.name))[0] | |
| output_path = os.path.join(OUTPUT_DIR, f"{basename}_EDITADO.xml") | |
| tree.write(output_path, encoding="utf-8", xml_declaration=True) | |
| progress(0.9, desc="Gerando resumo...") | |
| total_sec = sum((s.end_f - s.start_f) / fps for s in segments) | |
| total_min = total_sec / 60.0 | |
| if manual: | |
| mode = "Manual" | |
| elif natural_instructions.strip() and use_llm and LLM_AVAILABLE: | |
| mode = "IA Completa (Gemini)" | |
| elif natural_instructions.strip(): | |
| mode = "Básico (sem IA)" | |
| else: | |
| mode = "Automático" | |
| summary_lines = [ | |
| "RESULTADO", | |
| f"- Cortes: {len(segments)}", | |
| f"- Duração total: {total_min:.1f} min", | |
| f"- Modo: {mode}", | |
| "" | |
| ] | |
| for i, seg in enumerate(segments, 1): | |
| dur_sec = (seg.end_f - seg.start_f) / fps | |
| dur_min = dur_sec / 60.0 | |
| line = f"Corte {i}\n {seg.start_tc} -> {seg.end_tc} ({dur_min:.2f} min / {dur_sec:.0f}s)" | |
| if seg.text and len(seg.text.strip()) > 10: | |
| text_preview = seg.text[:200].strip() | |
| if len(seg.text) > 200: | |
| text_preview += "..." | |
| line += f"\n {text_preview}" | |
| summary_lines.append(line) | |
| summary_lines.append("") | |
| if debug_info: | |
| summary_lines.append("Log do processamento:") | |
| summary_lines.extend(f"- {info}" for info in debug_info) | |
| summary = "\n".join(summary_lines) | |
| status = f"Sucesso | {mode} | {total_min:.1f} min | LLM: {'OK' if LLM_AVAILABLE else 'OFF'}" | |
| progress(1.0, desc="Concluído") | |
| return summary, output_path, status | |
| except Exception as e: | |
| import traceback | |
| error_trace = traceback.format_exc() | |
| print(error_trace) | |
| error_msg = f"Erro: {str(e)}\n\nDetalhes:\n{error_trace[:800]}" | |
| return error_msg, None, f"LLM: {'OK' if LLM_AVAILABLE else 'OFF'}" | |
| # ========================= | |
| # Interface Gradio | |
| # ========================= | |
| with gr.Blocks(theme=gr.themes.Soft(), title="Editor XML Premiere - IA") as demo: | |
| gr.Markdown("# Editor XML Premiere - IA Completa (Gemini)") | |
| status_inicial = f"{'IA Gemini ativa' if LLM_AVAILABLE else 'IA desabilitada: configure GEMINI_API_KEY'}" | |
| gr.Markdown(f"Status: {status_inicial}") | |
| with gr.Row(): | |
| xml_in = gr.File(label="XML do Premiere", file_types=[".xml"]) | |
| txt_in = gr.File(label="Transcrição (.txt) — obrigatória para IA", file_types=[".txt"]) | |
| with gr.Row(): | |
| use_llm = gr.Checkbox( | |
| label="Usar IA Gemini (análise completa — recomendado)", | |
| value=USE_LLM_DEFAULT and LLM_AVAILABLE, | |
| interactive=LLM_AVAILABLE | |
| ) | |
| num_segments = gr.Slider(2, 20, 5, 1, label="Quantidade de segmentos (modo automático)") | |
| fps_in = gr.Slider(12, 60, 24, 1, label="FPS") | |
| with gr.Accordion("Comando em linguagem natural (modo principal)", open=True): | |
| gr.Markdown( | |
| "Exemplos: \n" | |
| '- "Crie 3 cortes de 30 segundos sobre disciplina"\n' | |
| '- "2 clipes de 1 minuto falando sobre Maria"\n' | |
| '- "Corte de 5 minutos começando em 00:02:00:00 sobre tecnologia"' | |
| ) | |
| natural_instructions = gr.Textbox( | |
| label="Digite seu comando", | |
| placeholder='Ex: "Crie 3 cortes de 45 segundos sobre os momentos de disciplina e superação"', | |
| lines=4 | |
| ) | |
| with gr.Accordion("Minutagens manuais (precisão total)", open=False): | |
| gr.Markdown("Ignora IA e outros modos.") | |
| manual_timecodes = gr.Textbox( | |
| label="Timecodes (um por linha)", | |
| placeholder="00:21:18:09 - 00:31:18:09\n00:45:20:15 - 00:50:10:22", | |
| lines=4 | |
| ) | |
| with gr.Accordion("Modo automático (sem comando)", open=False): | |
| gr.Markdown("Sistema de pontuação simples por palavras-chave.") | |
| custom_keywords = gr.Textbox( | |
| label="Palavras-chave (separadas por vírgula)", | |
| placeholder="coragem, superação, vitória" | |
| ) | |
| with gr.Row(): | |
| weight_emotion = gr.Slider(0, 5, 2.0, 0.1, label="Peso: emoção") | |
| weight_break = gr.Slider(0, 5, 1.5, 0.1, label="Peso: quebra") | |
| with gr.Row(): | |
| weight_learn = gr.Slider(0, 5, 1.2, 0.1, label="Peso: aprendizado") | |
| weight_viral = gr.Slider(0, 5, 1.0, 0.1, label="Peso: viral") | |
| btn = gr.Button("Processar") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| summary_out = gr.Textbox(label="Resumo dos cortes", lines=20, max_lines=30) | |
| with gr.Column(scale=1): | |
| status_out = gr.Textbox(label="Status", lines=3) | |
| file_out = gr.File(label="Download XML editado") | |
| btn.click( | |
| process_files, | |
| [xml_in, txt_in, use_llm, num_segments, custom_keywords, | |
| manual_timecodes, natural_instructions, | |
| weight_emotion, weight_break, weight_learn, weight_viral, fps_in], | |
| [summary_out, file_out, status_out] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |