leicam's picture
Update app.py
135348a verified
import os
import re
import json
import xml.etree.ElementTree as ET
from dataclasses import dataclass
from typing import List, Tuple, Optional, Callable
import gradio as gr
# =========================
# Configurações Gerais
# =========================
OUTPUT_DIR = "./Output"
os.makedirs(OUTPUT_DIR, exist_ok=True)
# =========================
# LLM (Gemini)
# =========================
USE_LLM_DEFAULT = True
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "").strip()
LLM_AVAILABLE = False
LLM_MODEL_NAME = "gemini-2.0-flash-exp"
try:
if GEMINI_API_KEY:
import google.generativeai as genai
genai.configure(api_key=GEMINI_API_KEY)
LLM = genai.GenerativeModel(LLM_MODEL_NAME)
LLM_AVAILABLE = True
else:
LLM = None
except Exception:
LLM = None
LLM_AVAILABLE = False
# =========================
# Modelos
# =========================
@dataclass
class Segment:
start_tc: str
end_tc: str
start_f: int
end_f: int
text: str
score: float
# =========================
# Funções de Timecode
# =========================
def _tc_to_hmsf(tc: str, fps: int) -> Tuple[int, int, int, int]:
"""Converte timecode para (hh, mm, ss, ff)."""
s = tc.strip()
m = re.match(r'^(\d{1,2}):(\d{2}):(\d{2})[:;](\d{2})$', s)
if m:
hh, mm, ss, ff = map(int, m.groups())
return hh, mm, ss, ff
m = re.match(r'^(\d{1,2}):(\d{2}):(\d{2})[.,](\d{1,3})$', s)
if m:
hh, mm, ss, ms = map(int, m.groups())
ff = int(round((ms / 1000.0) * fps))
if ff >= fps:
ss += 1
ff = 0
return hh, mm, ss, ff
m = re.match(r'^(\d{1,2}):(\d{2}):(\d{2})$', s)
if m:
hh, mm, ss = map(int, m.groups())
return hh, mm, ss, 0
raise ValueError(f"Timecode inválido: {tc}")
def parse_timecode_to_frames(tc: str, fps: int) -> int:
hh, mm, ss, ff = _tc_to_hmsf(tc, fps)
return hh * 3600 * fps + mm * 60 * fps + ss * fps + ff
def frames_to_timecode(frames: int, fps: int) -> str:
hh = frames // (3600 * fps)
rem = frames % (3600 * fps)
mm = rem // (60 * fps)
rem = rem % (60 * fps)
ss = rem // fps
ff = rem % fps
return f"{hh:02d}:{mm:02d}:{ss:02d}:{ff:02d}"
# =========================
# Parser de Transcrição
# =========================
def parse_transcript(txt: str, fps: int) -> List[Segment]:
"""Parser robusto para múltiplos formatos (intervalos e WEBVTT/SRT)."""
if not txt or not txt.strip():
return []
lines = [l.rstrip() for l in txt.splitlines()]
results: List[Segment] = []
line_range = re.compile(
r'^\s*\[?\s*(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)\s*[-—–]\s*'
r'(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)\s*\]?\s*(.*)$'
)
arrow = re.compile(
r'(\d{1,2}:\d{2}:\d{2}(?:[.,]\d{1,3}|[:;]\d{2})?)\s*-->\s*'
r'(\d{1,2}:\d{2}:\d{2}(?:[.,]\d{1,3}|[:;]\d{2})?)'
)
i = 0
while i < len(lines):
raw = lines[i].strip()
if not raw or raw.lower() == "desconhecido":
i += 1
continue
m = line_range.match(raw)
if m:
start_tc, end_tc, trailing_text = m.groups()
text_parts = []
if trailing_text.strip():
text_parts.append(trailing_text.strip())
else:
j = i + 1
while j < len(lines):
nxt = lines[j].strip()
if not nxt or line_range.match(nxt) or re.match(r'^\d+\s*$', nxt) or arrow.search(nxt):
break
text_parts.append(nxt)
j += 1
i = j - 1
text = " ".join(text_parts).strip()
try:
sf = parse_timecode_to_frames(start_tc, fps)
ef = parse_timecode_to_frames(end_tc, fps)
if ef > sf:
results.append(Segment(
start_tc=frames_to_timecode(sf, fps),
end_tc=frames_to_timecode(ef, fps),
start_f=sf,
end_f=ef,
text=text if text else f"{start_tc} - {end_tc}",
score=0.0
))
except Exception:
pass
i += 1
continue
# Bloco estilo VTT/SRT: "00:00:01,000 --> 00:00:03,000"
if arrow.search(raw) or (i + 1 < len(lines) and arrow.search(lines[i + 1])):
line_with_tc = raw if arrow.search(raw) else lines[i + 1]
mm = arrow.search(line_with_tc)
if mm:
start_tc, end_tc = mm.groups()
j = i + 1 if line_with_tc == raw else i + 2
text_parts = []
while j < len(lines):
nxt = lines[j].strip()
if not nxt:
break
if re.match(r'^\d+\s*$', nxt) and (j + 1 < len(lines) and arrow.search(lines[j + 1])):
break
if arrow.search(nxt):
break
text_parts.append(nxt)
j += 1
text = " ".join(text_parts).strip()
try:
sf = parse_timecode_to_frames(start_tc, fps)
ef = parse_timecode_to_frames(end_tc, fps)
if ef > sf:
results.append(Segment(
start_tc=frames_to_timecode(sf, fps),
end_tc=frames_to_timecode(ef, fps),
start_f=sf,
end_f=ef,
text=text,
score=0.0
))
except Exception:
pass
i = j + 1
continue
i += 1
return results
# =========================
# Minutagens Manuais
# =========================
def parse_manual_timecodes(manual_input: str) -> List[Tuple[str, str]]:
if not manual_input or not manual_input.strip():
return []
manual_ranges = []
lines = manual_input.replace(",", "\n").splitlines()
pattern = re.compile(
r'(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)\s*[-–—]\s*'
r'(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)'
)
for line in lines:
m = pattern.search(line.strip())
if m:
manual_ranges.append((m.group(1), m.group(2)))
return manual_ranges
# =========================
# Helpers para JSON do LLM
# =========================
def _extract_json_block(text: str) -> str:
"""
Extrai o melhor bloco JSON da resposta do LLM.
1) Prioriza bloco entre ```json ... ```
2) Senão, recorta do primeiro '{' até o '}' pareado.
"""
if not text:
raise ValueError("Resposta vazia do LLM")
m = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text, flags=re.IGNORECASE)
if m:
return m.group(1).strip()
start = text.find("{")
if start == -1:
raise ValueError("Nenhum '{' encontrado na resposta do LLM")
depth = 0
for i in range(start, len(text)):
c = text[i]
if c == "{":
depth += 1
elif c == "}":
depth -= 1
if depth == 0:
return text[start:i + 1].strip()
end = text.rfind("}")
if end != -1 and end > start:
return text[start:end + 1].strip()
raise ValueError("Não foi possível delimitar um JSON na resposta do LLM")
def _coerce_to_strict_json(s: str) -> str:
"""
Tenta consertar JSONs “quase-JSON”.
- remove comentários
- troca aspas curvas por retas
- remove vírgulas finais
- adiciona aspas em chaves não-aspadas
- tenta converter aspas simples para duplas
"""
t = s.strip()
t = re.sub(r"^json\s*", "", t, flags=re.IGNORECASE).strip().strip("`")
t = (t.replace("\u201c", '"')
.replace("\u201d", '"')
.replace("\u2018", "'")
.replace("\u2019", "'"))
t = re.sub(r"//.*?$", "", t, flags=re.MULTILINE)
t = re.sub(r"/\*[\s\S]*?\*/", "", t)
t = re.sub(r",(\s*[}\]])", r"\1", t)
t = re.sub(r'([{,]\s*)([A-Za-z_][A-Za-z0-9_]*)\s*:', r'\1"\2":', t)
try:
json.loads(t)
return t
except Exception:
t2 = re.sub(r"'", '"', t)
return t2
# =========================
# IA: Análise Inteligente com Gemini
# =========================
def ai_analyze_and_select(
segments: List[Segment],
command: str,
fps: int,
progress_callback: Optional[Callable[[str], None]] = None
) -> List[Segment]:
"""
Usa Gemini para analisar a transcrição completa e identificar os melhores trechos.
Com extração/saneamento robusto do JSON para evitar quebras.
"""
if not LLM_AVAILABLE or not segments:
raise ValueError("IA não disponível ou sem segmentos para analisar")
if progress_callback:
progress_callback("Etapa 1/3: preparando dados para análise...")
# Prepara a transcrição completa com índices
transcript_data = []
for i, seg in enumerate(segments):
duration_sec = max(0, (seg.end_f - seg.start_f) / fps)
transcript_data.append({
"index": i,
"timecode": seg.start_tc,
"duration_sec": round(duration_sec, 1),
"text": (seg.text or "")[:200]
})
transcript_json = json.dumps(transcript_data, ensure_ascii=False, indent=2)
if progress_callback:
progress_callback(f"Etapa 2/3: analisando {len(segments)} segmentos com IA...")
prompt = f"""Você é um especialista em edição de vídeo. Analise a transcrição e identifique os MELHORES trechos baseado no comando do usuário.
COMANDO DO USUÁRIO:
{command}
TRANSCRIÇÃO COMPLETA (formato JSON com index, timecode, duração e texto):
{transcript_json}
INSTRUÇÕES:
1. Leia o comando com atenção e identifique:
- Quantidade de cortes desejada
- Duração de cada corte (em segundos)
- Tema/assunto/palavras-chave mencionados
- Timecode de início (se mencionado)
2. Analise TODA a transcrição e identifique os segmentos que melhor correspondem ao comando
3. Responda APENAS com JSON estrito:
{{
"cuts": [
{{
"start_index": <int>,
"duration_seconds": <int>,
"reason": "<string curta>"
}}
]
}}"""
# Chamada ao LLM (alguns SDKs permitem mime_type JSON; se falhar, cai no básico)
try:
response = LLM.generate_content(
prompt,
generation_config={
"temperature": 0.2,
"max_output_tokens": 2000,
"response_mime_type": "application/json"
}
)
except Exception:
response = LLM.generate_content(
prompt,
generation_config={
"temperature": 0.2,
"max_output_tokens": 2000
}
)
response_text = (getattr(response, "text", "") or "").strip()
if progress_callback:
progress_callback("Etapa 3/3: processando resposta da IA...")
# Extrai e parseia o JSON de forma resiliente
try:
raw_json = _extract_json_block(response_text)
try:
result = json.loads(raw_json)
except json.JSONDecodeError:
fixed = _coerce_to_strict_json(raw_json)
result = json.loads(fixed)
except Exception as e:
snippet = response_text[:600].replace("\n", " ")
raise ValueError(f"Erro ao processar resposta da IA (JSON inválido): {e}. Amostra: {snippet}")
cuts_data = result.get("cuts", [])
if not isinstance(cuts_data, list) or not cuts_data:
raise ValueError("IA não retornou a lista 'cuts' com itens válidos")
selected_segments: List[Segment] = []
for cut_info in cuts_data:
try:
start_idx = int(cut_info.get("start_index", 0))
duration_sec = int(cut_info.get("duration_seconds", 60))
reason = str(cut_info.get("reason", "")).strip()
except Exception:
continue
if start_idx < 0 or start_idx >= len(segments):
continue
start_seg = segments[start_idx]
start_frame = start_seg.start_f
duration_frames = max(0, int(duration_sec * fps))
end_frame = start_frame + duration_frames
text_parts = [f"[IA] {reason}"] if reason else []
for seg in segments[start_idx:]:
if seg.start_f < end_frame:
if seg.text:
text_parts.append(seg.text[:150])
else:
break
combined_text = " [...] ".join(text_parts)[:500]
selected_segments.append(Segment(
start_tc=frames_to_timecode(start_frame, fps),
end_tc=frames_to_timecode(end_frame, fps),
start_f=start_frame,
end_f=end_frame,
text=combined_text,
score=100.0
))
return selected_segments
# =========================
# Processamento com Comando Manual (sem IA)
# =========================
def manual_command_processing(segments: List[Segment], command: str, fps: int) -> List[Segment]:
"""Fallback: processamento básico sem IA para comandos simples."""
s = (command or "").lower()
# quantidade
count = 1
m = re.search(r'(\d+)\s*(?:cortes?|clipes?|segmentos?)', s)
if m:
count = int(m.group(1))
# duração
duration_sec = 60
m = re.search(r'(\d+)\s*(?:segundos?|s\b)', s)
if m:
duration_sec = int(m.group(1))
else:
m = re.search(r'(\d+)\s*(?:minutos?|min\b)', s)
if m:
duration_sec = int(m.group(1)) * 60
# timecode inicial
start_frame = 0
m = re.search(r'(?:começando|a partir de)\s+(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)', s)
if m:
try:
start_frame = parse_timecode_to_frames(m.group(1), fps)
except Exception:
pass
# cortes contínuos
results = []
base_frame = start_frame
for i in range(count):
duration_frames = duration_sec * fps
end_frame = base_frame + duration_frames
text_parts = []
for seg in segments:
if seg.start_f >= base_frame and seg.start_f < end_frame:
if seg.text:
text_parts.append(seg.text[:100])
combined_text = " [...] ".join(text_parts[:10])[:400]
results.append(Segment(
start_tc=frames_to_timecode(base_frame, fps),
end_tc=frames_to_timecode(end_frame, fps),
start_f=base_frame,
end_f=end_frame,
text=combined_text if combined_text else f"Corte {i+1}",
score=50.0
))
base_frame = end_frame
return results
# =========================
# Modo Automático
# =========================
def auto_score_segments(
segs: List[Segment],
num_segments: int,
custom_keywords: str,
weight_emotion: float,
weight_break: float,
weight_learn: float,
weight_viral: float
) -> List[Segment]:
"""Sistema de pontuação automática simples por palavras-chave."""
emotion_words = ['medo', 'coragem', 'amor', 'ódio', 'paixão', 'alegria', 'tristeza']
break_words = ['nunca', 'de repente', 'surpreendente', 'inesperado', 'incrível']
learn_words = ['aprendi', 'descobri', 'entendi', 'percebi', 'lição']
viral_words = ['segredo', 'verdade', 'revelação', 'exclusivo', 'confissão']
for s in segs:
score = 0.0
text = (s.text or "").lower()
for word in emotion_words:
if word in text:
score += weight_emotion
for word in break_words:
if word in text:
score += weight_break
for word in learn_words:
if word in text:
score += weight_learn
for word in viral_words:
if word in text:
score += weight_viral
if custom_keywords:
for kw in custom_keywords.split(","):
kw_clean = kw.strip().lower()
if kw_clean and kw_clean in text:
score += 5.0
s.score = score
segs.sort(key=lambda x: x.score, reverse=True)
return segs[:max(1, num_segments)]
# =========================
# Edição de XML
# =========================
def deep_copy_element(elem: ET.Element) -> ET.Element:
new = ET.Element(elem.tag, attrib=dict(elem.attrib))
new.text = elem.text
new.tail = elem.tail
for child in elem:
new.append(deep_copy_element(child))
return new
def edit_xml(tree: ET.ElementTree, segs: List[Segment]) -> ET.ElementTree:
root = tree.getroot()
seq = root.find(".//sequence")
if seq is None:
raise ValueError("Sequence não encontrada no XML")
v_track = seq.find(".//media/video/track")
a_track = seq.find(".//media/audio/track")
if not v_track or not a_track:
raise ValueError("Trilhas de vídeo/áudio não encontradas")
v_template = v_track.find("./clipitem")
a_template = a_track.find("./clipitem")
for clip in list(v_track.findall("./clipitem")):
v_track.remove(clip)
for clip in list(a_track.findall("./clipitem")):
a_track.remove(clip)
timeline_pos = 0
for i, seg in enumerate(segs, 1):
duration = seg.end_f - seg.start_f
if duration <= 0:
continue
v_clip = ET.Element("clipitem", {"id": f"clip-v{i}"})
ET.SubElement(v_clip, "name").text = f"Clip {i}"
ET.SubElement(v_clip, "start").text = str(timeline_pos)
ET.SubElement(v_clip, "end").text = str(timeline_pos + duration)
ET.SubElement(v_clip, "in").text = str(seg.start_f)
ET.SubElement(v_clip, "out").text = str(seg.end_f)
if v_template is not None:
rate = v_template.find("rate")
if rate is not None:
v_clip.append(deep_copy_element(rate))
file_elem = v_template.find("file")
if file_elem is not None:
v_clip.append(deep_copy_element(file_elem))
a_clip = ET.Element("clipitem", {"id": f"clip-a{i}"})
ET.SubElement(a_clip, "name").text = f"Clip {i}"
ET.SubElement(a_clip, "start").text = str(timeline_pos)
ET.SubElement(a_clip, "end").text = str(timeline_pos + duration)
ET.SubElement(a_clip, "in").text = str(seg.start_f)
ET.SubElement(a_clip, "out").text = str(seg.end_f)
if a_template is not None:
rate = a_template.find("rate")
if rate is not None:
a_clip.append(deep_copy_element(rate))
file_elem = a_template.find("file")
if file_elem is not None:
a_clip.append(deep_copy_element(file_elem))
v_track.append(v_clip)
a_track.append(a_clip)
timeline_pos += duration
return tree
# =========================
# Seleção (orquestração)
# =========================
def select_segments(
transcript_txt: str,
use_llm: bool,
num_segments: int,
custom_keywords: str,
manual_timecodes: str,
natural_instructions: str,
weight_emotion: float,
weight_break: float,
weight_learn: float,
weight_viral: float,
fps: int,
progress_callback: Optional[Callable[[str], None]] = None
) -> List[Segment]:
# 1) Manual
manual = parse_manual_timecodes(manual_timecodes)
if manual:
result = []
for start_tc, end_tc in manual:
try:
result.append(Segment(
start_tc=frames_to_timecode(parse_timecode_to_frames(start_tc, fps), fps),
end_tc=frames_to_timecode(parse_timecode_to_frames(end_tc, fps), fps),
start_f=parse_timecode_to_frames(start_tc, fps),
end_f=parse_timecode_to_frames(end_tc, fps),
text=f"Manual: {start_tc} - {end_tc}",
score=100.0
))
except Exception:
pass
return result
# 2) Parser de transcrição
segs = parse_transcript(transcript_txt, fps) if transcript_txt else []
# 3) Linguagem natural
if natural_instructions.strip():
if use_llm and LLM_AVAILABLE and segs:
return ai_analyze_and_select(segs, natural_instructions, fps, progress_callback)
elif segs:
return manual_command_processing(segs, natural_instructions, fps)
else:
raise ValueError(
"Para usar comandos em linguagem natural, forneça uma transcrição "
"ou use minutagens manuais."
)
# 4) Automático
if not segs:
raise ValueError("Nenhum segmento encontrado. Envie transcrição, minutagens ou um comando em linguagem natural.")
return auto_score_segments(
segs, num_segments, custom_keywords,
weight_emotion, weight_break, weight_learn, weight_viral
)
# =========================
# Pipeline principal
# =========================
def process_files(
xml_file, txt_file, use_llm, num_segments,
custom_keywords, manual_timecodes, natural_instructions,
weight_emotion, weight_break, weight_learn, weight_viral,
fps,
progress=gr.Progress()
):
if not xml_file:
return "Envie o XML do Premiere", None, f"LLM: {'OK' if LLM_AVAILABLE else 'OFF'}"
try:
debug_info = []
def progress_callback(msg):
progress(0.5, desc=msg)
debug_info.append(msg)
progress(0.1, desc="Carregando arquivos...")
transcript = ""
manual = parse_manual_timecodes(manual_timecodes)
if not manual and txt_file:
with open(txt_file.name, "r", encoding="utf-8-sig") as f:
transcript = f.read()
debug_info.append(f"Transcrição: {len(transcript)} caracteres")
progress(0.2, desc="Selecionando segmentos...")
segments = select_segments(
transcript, bool(use_llm) and LLM_AVAILABLE, int(num_segments),
custom_keywords, manual_timecodes, natural_instructions,
float(weight_emotion), float(weight_break), float(weight_learn), float(weight_viral),
int(fps),
progress_callback
)
if not segments:
return "Nenhum segmento selecionado", None, f"LLM: {'OK' if LLM_AVAILABLE else 'OFF'}"
# Validar duração mínima: pelo menos 1 segundo
valid_segments = []
for seg in segments:
if seg.end_f > seg.start_f and (seg.end_f - seg.start_f) >= max(1, int(fps)):
valid_segments.append(seg)
if not valid_segments:
return "Segmentos inválidos (duração muito curta)", None, f"LLM: {'OK' if LLM_AVAILABLE else 'OFF'}"
segments = valid_segments
debug_info.append(f"{len(segments)} segmento(s) válidos")
progress(0.7, desc="Editando XML...")
tree = ET.parse(xml_file.name)
tree = edit_xml(tree, segments)
basename = os.path.splitext(os.path.basename(xml_file.name))[0]
output_path = os.path.join(OUTPUT_DIR, f"{basename}_EDITADO.xml")
tree.write(output_path, encoding="utf-8", xml_declaration=True)
progress(0.9, desc="Gerando resumo...")
total_sec = sum((s.end_f - s.start_f) / fps for s in segments)
total_min = total_sec / 60.0
if manual:
mode = "Manual"
elif natural_instructions.strip() and use_llm and LLM_AVAILABLE:
mode = "IA Completa (Gemini)"
elif natural_instructions.strip():
mode = "Básico (sem IA)"
else:
mode = "Automático"
summary_lines = [
"RESULTADO",
f"- Cortes: {len(segments)}",
f"- Duração total: {total_min:.1f} min",
f"- Modo: {mode}",
""
]
for i, seg in enumerate(segments, 1):
dur_sec = (seg.end_f - seg.start_f) / fps
dur_min = dur_sec / 60.0
line = f"Corte {i}\n {seg.start_tc} -> {seg.end_tc} ({dur_min:.2f} min / {dur_sec:.0f}s)"
if seg.text and len(seg.text.strip()) > 10:
text_preview = seg.text[:200].strip()
if len(seg.text) > 200:
text_preview += "..."
line += f"\n {text_preview}"
summary_lines.append(line)
summary_lines.append("")
if debug_info:
summary_lines.append("Log do processamento:")
summary_lines.extend(f"- {info}" for info in debug_info)
summary = "\n".join(summary_lines)
status = f"Sucesso | {mode} | {total_min:.1f} min | LLM: {'OK' if LLM_AVAILABLE else 'OFF'}"
progress(1.0, desc="Concluído")
return summary, output_path, status
except Exception as e:
import traceback
error_trace = traceback.format_exc()
print(error_trace)
error_msg = f"Erro: {str(e)}\n\nDetalhes:\n{error_trace[:800]}"
return error_msg, None, f"LLM: {'OK' if LLM_AVAILABLE else 'OFF'}"
# =========================
# Interface Gradio
# =========================
with gr.Blocks(theme=gr.themes.Soft(), title="Editor XML Premiere - IA") as demo:
gr.Markdown("# Editor XML Premiere - IA Completa (Gemini)")
status_inicial = f"{'IA Gemini ativa' if LLM_AVAILABLE else 'IA desabilitada: configure GEMINI_API_KEY'}"
gr.Markdown(f"Status: {status_inicial}")
with gr.Row():
xml_in = gr.File(label="XML do Premiere", file_types=[".xml"])
txt_in = gr.File(label="Transcrição (.txt) — obrigatória para IA", file_types=[".txt"])
with gr.Row():
use_llm = gr.Checkbox(
label="Usar IA Gemini (análise completa — recomendado)",
value=USE_LLM_DEFAULT and LLM_AVAILABLE,
interactive=LLM_AVAILABLE
)
num_segments = gr.Slider(2, 20, 5, 1, label="Quantidade de segmentos (modo automático)")
fps_in = gr.Slider(12, 60, 24, 1, label="FPS")
with gr.Accordion("Comando em linguagem natural (modo principal)", open=True):
gr.Markdown(
"Exemplos: \n"
'- "Crie 3 cortes de 30 segundos sobre disciplina"\n'
'- "2 clipes de 1 minuto falando sobre Maria"\n'
'- "Corte de 5 minutos começando em 00:02:00:00 sobre tecnologia"'
)
natural_instructions = gr.Textbox(
label="Digite seu comando",
placeholder='Ex: "Crie 3 cortes de 45 segundos sobre os momentos de disciplina e superação"',
lines=4
)
with gr.Accordion("Minutagens manuais (precisão total)", open=False):
gr.Markdown("Ignora IA e outros modos.")
manual_timecodes = gr.Textbox(
label="Timecodes (um por linha)",
placeholder="00:21:18:09 - 00:31:18:09\n00:45:20:15 - 00:50:10:22",
lines=4
)
with gr.Accordion("Modo automático (sem comando)", open=False):
gr.Markdown("Sistema de pontuação simples por palavras-chave.")
custom_keywords = gr.Textbox(
label="Palavras-chave (separadas por vírgula)",
placeholder="coragem, superação, vitória"
)
with gr.Row():
weight_emotion = gr.Slider(0, 5, 2.0, 0.1, label="Peso: emoção")
weight_break = gr.Slider(0, 5, 1.5, 0.1, label="Peso: quebra")
with gr.Row():
weight_learn = gr.Slider(0, 5, 1.2, 0.1, label="Peso: aprendizado")
weight_viral = gr.Slider(0, 5, 1.0, 0.1, label="Peso: viral")
btn = gr.Button("Processar")
with gr.Row():
with gr.Column(scale=2):
summary_out = gr.Textbox(label="Resumo dos cortes", lines=20, max_lines=30)
with gr.Column(scale=1):
status_out = gr.Textbox(label="Status", lines=3)
file_out = gr.File(label="Download XML editado")
btn.click(
process_files,
[xml_in, txt_in, use_llm, num_segments, custom_keywords,
manual_timecodes, natural_instructions,
weight_emotion, weight_break, weight_learn, weight_viral, fps_in],
[summary_out, file_out, status_out]
)
if __name__ == "__main__":
demo.launch()