Spaces:

cesparzaf
/

icb4-leadership-star

Sleeping

App Files Files Community

icb4-leadership-star / app.py

cesparzaf

Update app.py

22a5b8d verified 8 days ago

raw

history blame

18.2 kB

	import json
	import re
	from typing import List, Dict, Tuple
	import csv, os, time
	import gradio as gr
	import matplotlib.pyplot as plt

	# ==========================
	# Config & estilos
	# ==========================
	DEFAULT_COLS = [
	"Código", "Indicador", "Score (0–4)",
	"Entailment medio", "Evidencias (hipótesis)", "Descripción"
	]

	CUSTOM_CSS = """
	#app {max-width: 1200px; margin: 0 auto;}
	.badge {
	display:inline-block; padding:10px 14px; border-radius:12px; font-weight:700;
	background:linear-gradient(135deg,#1f6feb,#5ac8fa); color:white; box-shadow:0 6px 20px rgba(0,0,0,.2);
	}
	.card {
	background: rgba(255,255,255,.03);
	border: 1px solid rgba(255,255,255,.08);
	border-radius: 14px; padding: 14px;
	box-shadow: 0 8px 24px rgba(0,0,0,.18);
	}
	.small {font-size: 12px; opacity: .9;}
	"""

	# ==========================
	# Metadatos IPMA ICB4 4.4.5.x
	# ==========================
	INDICATOR_META = {
	"4.4.5.1": ("Iniciativa y ayuda proactiva",
	"Inicia acciones sin que se lo pidan; ofrece ayuda, anticipa y equilibra riesgos."),
	"4.4.5.2": ("Ownership y compromiso",
	"Asume responsabilidad; impulsa el proyecto; define/monitorea indicadores y mejora procesos."),
	"4.4.5.3": ("Dirección, coaching y mentoring",
	"Da dirección; coach/mentor al equipo; alinea visión, valores y objetivos."),
	"4.4.5.4": ("Poder e influencia",
	"Usa influencia adecuada; elige bien el canal; es percibido como líder por stakeholders."),
	"4.4.5.5": ("Decisiones",
	"Toma decisiones bajo incertidumbre; explica razones; revisa con nueva evidencia; comunica con claridad.")
	}

	# ==========================
	# Modelos (CPU Basic friendly)
	# ==========================
	_llm = None
	_llm_tok = None
	_gen = None
	_nli_cache: Dict[str, object] = {} # cache de pipelines NLI por model_id

	LLM_ID = "Qwen/Qwen2.5-0.5B-Instruct" # LLM pequeño multilingüe para extraer STAR

	# Selector de NLI con configuración asociada
	MODEL_CHOICES = {
	"Velocidad (MiniLM)": {
	"id": "MoritzLaurer/multilingual-MiniLMv2-L12-mnli-xnli",
	"calibrate": True,
	"thresholds": (0.70, 0.50, 0.30, 0.15) # 4,3,2,1
	},
	"Precisión (DeBERTa)": {
	"id": "MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7",
	"calibrate": False,
	"thresholds": (0.80, 0.60, 0.40, 0.20)
	}
	}
	DEFAULT_MODEL_KEY = "Velocidad (MiniLM)" # por defecto en Spaces gratis

	STAR_PROMPT = """Eres evaluador ICB4. Toma el texto del candidato y devuélvelo en formato STAR como JSON válido con claves:
	"situation" (<=3 frases), "task" (<=2 frases), "action" (lista de viñetas, verbos de acción), "result" (lista de viñetas, resultados/indicadores/aprendizajes).
	Siempre responde SOLO con JSON válido y conciso en español, sin comentarios adicionales.

	TEXTO:
	{texto}
	"""

	HYP: Dict[str, List[str]] = {
	"4.4.5.1": [
	"Tomó la iniciativa sin que se lo pidieran.",
	"Ofreció ayuda o asesoría no solicitada.",
	"Pensó con orientación al futuro.",
	"Equilibró iniciativa y riesgo."
	],
	"4.4.5.2": [
	"Mostró compromiso personal con los objetivos.",
	"Promovió el proyecto y generó entusiasmo.",
	"Definió o monitoreó indicadores de desempeño.",
	"Buscó mejoras en procesos."
	],
	"4.4.5.3": [
	"Proporcionó dirección clara al equipo.",
	"Realizó coaching o mentoring para mejorar capacidades.",
	"Estableció y comunicó visión y valores.",
	"Alineó objetivos individuales con los comunes."
	],
	"4.4.5.4": [
	"Usó apropiadamente poder e influencia.",
	"Seleccionó el canal de comunicación adecuado para influir.",
	"Fue percibido como líder por los stakeholders."
	],
	"4.4.5.5": [
	"Tomó decisiones bajo incertidumbre considerando pros y contras.",
	"Explicó el razonamiento de las decisiones.",
	"Revisó decisiones con nueva evidencia.",
	"Comunicó claramente la decisión e influyó su adopción."
	]
	}

	# ==========================
	# Carga perezosa de modelos
	# ==========================
	def lazy_load_llm():
	"""Pipeline de generación (Qwen 0.5B) para extraer STAR."""
	global _llm, _llm_tok, _gen
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	if _gen is not None:
	return _gen
	_llm_tok = AutoTokenizer.from_pretrained(LLM_ID)
	_llm = AutoModelForCausalLM.from_pretrained(LLM_ID, device_map="auto")
	_gen = pipeline(
	"text-generation",
	model=_llm,
	tokenizer=_llm_tok,
	max_new_tokens=512,
	do_sample=False,
	repetition_penalty=1.1,
	)
	return _gen

	def lazy_load_nli(model_id: str):
	"""NLI con salida completa y truncado seguro. Cachea por model_id."""
	from transformers import pipeline
	if model_id in _nli_cache:
	return _nli_cache[model_id]
	nli = pipeline(
	"text-classification",
	model=model_id,
	tokenizer=model_id,
	return_all_scores=True, # {label, score} para todas las clases
	truncation=True # evita degradación por textos largos
	)
	_nli_cache[model_id] = nli
	return nli

	# ==========================
	# Utilidades extracción STAR
	# ==========================
	def extract_json_block(text: str) -> str:
	start = text.find("{")
	end = text.rfind("}")
	if start != -1 and end != -1 and end > start:
	return text[start:end+1]
	return '{"situation":"","task":"","action":[],"result":[]}'

	def quick_parse_star(txt: str):
	t = (txt or "").strip()
	if not t:
	return None
	keys = ("SITUATION", "TASK", "ACTION", "RESULT", "S:", "T:", "A:", "R:")
	if not any(k in t for k in keys):
	return None
	sections = {"situation": "", "task": "", "action": [], "result": []}
	blocks = re.split(r'(?im)^(SITUATION\|TASK\|ACTION\|RESULT\|S:\|T:\|A:\|R:)\s*:?', t)
	for i in range(1, len(blocks), 2):
	key = blocks[i].lower()[0]
	val = blocks[i+1].strip()
	if key == "s":
	sections["situation"] = val
	elif key == "t":
	sections["task"] = val
	elif key == "a":
	sections["action"] = [x.strip("•- ") for x in val.splitlines() if x.strip()]
	elif key == "r":
	sections["result"] = [x.strip("•- ") for x in val.splitlines() if x.strip()]
	return sections

	def extract_star(user_text: str) -> Dict:
	parsed = quick_parse_star(user_text)
	if parsed:
	return parsed
	gen = lazy_load_llm()
	prompt = STAR_PROMPT.format(texto=(user_text or "").strip())
	out = gen(prompt)[0]["generated_text"]
	raw = extract_json_block(out)
	try:
	data = json.loads(raw)
	except Exception:
	data = {"situation": "", "task": "", "action": [], "result": []}
	m = re.search(r'Situation[:：]\s(.)', user_text or "", flags=re.I)
	if m:
	data["situation"] = m.group(1).strip()
	data["action"] = data.get("action", [])
	data["result"] = data.get("result", [])
	if isinstance(data["action"], str):
	data["action"] = [data["action"]]
	if isinstance(data["result"], str):
	data["result"] = [data["result"]]
	return {
	"situation": (data.get("situation", "") or "").strip(),
	"task": (data.get("task", "") or "").strip(),
	"action": [str(a).strip(" •-") for a in data["action"] if str(a).strip()],
	"result": [str(r).strip(" •-") for r in data["result"] if str(r).strip()],
	}

	# ==========================
	# NLI + scoring (dinámico por modelo)
	# ==========================
	def calibrate_prob(p: float, use_calibration: bool) -> float:
	"""Calibración leve solo para MiniLM (p**0.9)."""
	p = max(0.0, min(1.0, float(p)))
	return (p ** 0.9) if use_calibration else p

	def nli_entails(premise: str, hypothesis: str, model_id: str) -> float:
	"""Probabilidad de ENTAILMENT (0..1) robusta a variantes de salida."""
	nli = lazy_load_nli(model_id)

	def _trim(s: str, limit=900):
	s = (s or "").strip()
	return s[:limit]

	text_a = _trim(premise)
	text_b = _trim(hypothesis)
	if not text_a or not text_b:
	return 0.0

	try:
	res = nli({"text": text_a, "text_pair": text_b})
	except Exception:
	return 0.0

	# return_all_scores=True → [{label, score}, ...] ó [[{...}]]
	if isinstance(res, dict):
	candidates = [res]
	elif isinstance(res, list):
	candidates = res[0] if (res and isinstance(res[0], list)) else res
	else:
	return 0.0

	for c in (d for d in candidates if isinstance(d, dict)):
	lab = str(c.get("label", "")).lower()
	if "entail" in lab:
	try:
	return float(c.get("score", 0.0))
	except Exception:
	return 0.0
	return 0.0

	def map_prob_to_score(p: float, thresholds: Tuple[float, float, float, float]) -> int:
	t4, t3, t2, t1 = thresholds
	if p >= t4: return 4
	if p >= t3: return 3
	if p >= t2: return 2
	if p >= t1: return 1
	return 0

	def score_indicator(premise: str, hyps: List[str], model_id: str, use_calibration: bool,
	thresholds: Tuple[float, float, float, float]) -> Tuple[int, List[Tuple[str, float]], float]:
	raw = [(h, nli_entails(premise, h, model_id)) for h in hyps]
	probs = [(h, calibrate_prob(p, use_calibration)) for h, p in raw]
	avg = sum(p for _, p in probs) / max(1, len(probs))
	score = map_prob_to_score(avg, thresholds)
	probs_sorted = sorted(probs, key=lambda x: x[1], reverse=True)[:2]
	return score, probs_sorted, avg

	# ==========================
	# Evaluación orquestada
	# ==========================
	def evaluate(texto: str, model_key: str):
	"""Devuelve: status_msg, matplotlib_fig, {"columns":[...], "data":[...] }."""
	try:
	if not texto or not texto.strip():
	return "Introduce un caso en formato STAR (o texto libre).", None, {"columns": [], "data": []}

	# Config del modelo seleccionado
	cfg = MODEL_CHOICES.get(model_key, MODEL_CHOICES[DEFAULT_MODEL_KEY])
	model_id = cfg["id"]
	use_calibration = cfg["calibrate"]
	thresholds = cfg["thresholds"]

	star = extract_star(texto)

	# Limita premisa para dar señal clara al NLI (6 A + 4 R)
	actions = (star.get("action", []) or [])[:6]
	results = (star.get("result", []) or [])[:4]
	premise = " ".join(actions) + " " + " ".join(results)

	# Scoring por indicador
	scores, table_rows, per_indicator_values = [], [], []
	for ind, hyps in HYP.items():
	s, ev, avg = score_indicator(premise, hyps, model_id, use_calibration, thresholds)
	scores.append(s)
	per_indicator_values.append((ind, s))
	best_evid = " / ".join([h for h, _ in ev])
	name, desc = INDICATOR_META[ind]
	table_rows.append([ind, name, s, f"{avg:.2f}", best_evid, desc])

	overall = round(sum(scores) / max(1, len(scores)), 2)

	# Gráfica
	labels = [f"{k.split('.')[-1]}" for k, _ in per_indicator_values]
	values = [v for _, v in per_indicator_values]
	fig, ax = plt.subplots(figsize=(8.2, 4.0))
	ax.bar(labels, values)
	ax.set_ylim(0, 4)
	ax.set_xlabel("Indicadores 4.4.5.x")
	ax.set_ylabel("Score (0–4)")
	fig.suptitle(f"ICB4 4.4.5 Leadership — Score global: {overall} \| Modelo: {model_key}", y=0.97)
	fig.subplots_adjust(top=0.86)
	for i, v in enumerate(values):
	ax.text(i, v + 0.08, f"{v}", ha="center", va="bottom")
	fig.tight_layout()

	table = {
	"columns": DEFAULT_COLS,
	"data": table_rows,
	"model_key": model_key, # ← etiqueta elegida en el dropdown (MiniLM / DeBERTa)
	"model_id": model_id # ← repo real en HF (para trazabilidad)
	}

	msg = (
	f"Evaluación completada. Score global (0–4): {overall}\n"
	f"Modelo: {model_key}\n"
	f"Sugerencia: revisa evidencias y ajusta umbrales según tu rúbrica."
	)
	return msg, fig, table

	except Exception as e:
	return f"⚠️ Error en evaluate(): {type(e).__name__}: {e}", None, {"columns": [], "data": []}

	# ==========================
	# CSV helper
	# ==========================
	def make_csv_from_table(table: dict) -> str:
	"""Genera CSV temporal sin incluir la columna 'Modelo (repo)', pero conserva 'Modelo (etiqueta)'."""
	cols = table.get("columns", [])
	rows = table.get("data", [])
	ts = int(time.time())
	path = f"/tmp/icb4_leadership_{ts}.csv"

	# Detecta y elimina solo la columna 'Modelo (repo)'
	if "Modelo (repo)" in cols:
	idx_repo = cols.index("Modelo (repo)")
	cols = [c for i, c in enumerate(cols) if i != idx_repo]
	new_rows = []
	for r in rows:
	if len(r) > idx_repo:
	# Elimina solo la celda correspondiente al campo 'Modelo (repo)'
	r = [c for i, c in enumerate(r) if i != idx_repo]
	new_rows.append(r)
	rows = new_rows

	# Escribe el CSV final
	with open(path, "w", newline="", encoding="utf-8") as f:
	writer = csv.writer(f)
	writer.writerow(cols)
	for r in rows:
	writer.writerow(r)

	return path if os.path.exists(path) else ""



	# ==========================
	# UI (2 columnas + selector modelo + CSV)
	# ==========================
	with gr.Blocks(title="ICB4 4.4.5 Leadership — Evaluación STAR (FRAQX)", css=CUSTOM_CSS, elem_id="app") as demo:
	gr.Markdown(
	"""
	<div style="display:flex;align-items:center;gap:12px;margin:8px 0 2px 0;">
	<img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" height="28">
	<h1 style="margin:0;">ICB4 • 4.4.5 Leadership — Evaluación STAR + NLI</h1>
	</div>
	<div class="small">Extracción STAR, scoring (4.4.5.1–4.4.5.5), gráfica y reporte descargable. Elige el modelo NLI según tu prioridad.</div>
	"""
	)

	with gr.Row(equal_height=True):
	# Entrada
	with gr.Column(scale=5):
	gr.Markdown("<div class='card'><b>Entrada</b></div>")

	model_key = gr.Dropdown(
	choices=list(MODEL_CHOICES.keys()),
	value=DEFAULT_MODEL_KEY,
	label="Modelo NLI",
	info="Velocidad (MiniLM) = más rápido \| Precisión (DeBERTa) = mejor calidad"
	)

	texto = gr.Textbox(
	label="Caso (STAR o texto libre)",
	lines=16,
	placeholder="Pega aquí tu caso en formato STAR (S, T, A, R) o texto libre…"
	)
	with gr.Row():
	btn = gr.Button("Evaluar", variant="primary", scale=3)
	gr.ClearButton([texto], value="Limpiar", scale=1)

	gr.Markdown(
	"""
	<details>
	<summary>Ejemplo rápido (clic para autocompletar)</summary>
	<div class="small">
	S: El proyecto CRM estaba retrasado 6 semanas y el equipo estaba desmotivado.<br/>
	T: Recuperar el plan y mejorar la colaboración en 2 sprints.<br/>
	A: Organicé una sesión de visión y valores; definí métricas; implementé dailies; mentoring a líderes junior;
	negocié con stakeholders; prioricé backlog mínimo; comuniqué riesgos y fechas realistas.<br/>
	R: Recuperamos 4 semanas en 2 sprints; NPS interno +22; retrabajo -18%; se mantuvieron prácticas; dos líderes promovidos.
	</div>
	</details>
	""",
	)

	# Salida
	with gr.Column(scale=7):
	gr.Markdown("<div class='card'><b>Resultados</b></div>")
	status = gr.Markdown(value="Estado: —", elem_id="status_md")
	score_badge = gr.Markdown(value="<span class='badge'>Score global: —</span>")
	plot = gr.Plot(label="Gráfica de evaluación (0–4)")
	table = gr.Dataframe(
	headers=DEFAULT_COLS,
	datatype=["str", "str", "number", "str", "str", "str"],
	interactive=False,
	label="Detalle por indicador"
	)
	with gr.Row():
	download_btn = gr.Button("Descargar CSV")
	csv_file = gr.File(label="Archivo CSV", visible=False)

	# Lógica
	def run_eval(t: str, mk: str):
	msg, fig, tbl = evaluate(t, mk)

	status_md = "Estado \n" + (msg or "").replace("\n", " \n")

	badge_html = "<span class='badge'>Score global: —</span>"
	try:
	m = re.search(r"Score global \(0–4\):\s*([0-4](?:\.[0-9])?)", msg or "")
	if m:
	badge_html = f"<span class='badge'>Score global: {m.group(1)}</span>"
	except Exception:
	pass

	cols = (tbl or {}).get("columns") or DEFAULT_COLS
	data = (tbl or {}).get("data") or []
	safe_data = []
	for row in data:
	r = list(row)
	if len(r) < len(cols):
	r += [""] * (len(cols) - len(r))
	elif len(r) > len(cols):
	r = r[:len(cols)]
	safe_data.append(r)

	if fig is None:
	fig, ax = plt.subplots(figsize=(6, 2))
	ax.axis("off")
	ax.text(0.5, 0.5, "Sin datos para graficar", ha="center", va="center")

	return status_md, badge_html, fig, gr.update(value=safe_data, headers=cols)

	btn.click(fn=run_eval, inputs=[texto, model_key], outputs=[status, score_badge, plot, table])

	def export_csv_handler(t: str, mk: str):
	_, _, tbl = evaluate(t, mk)
	path = make_csv_from_table(tbl)
	return path, gr.update(visible=True)

	download_btn.click(fn=export_csv_handler, inputs=[texto, model_key], outputs=[csv_file, csv_file])

	# Lanzamiento
	if __name__ == "__main__":
	demo.queue(max_size=16).launch(ssr_mode=False, show_error=True)