Spaces:

Iker
/

ClickbaitFighter

Running on Zero

App Files Files Community

ClickbaitFighter / app.py

Iker

Test style

4e7f729 verified about 1 year ago

raw

history blame

9.48 kB

	import os

	import gradio as gr
	from download_url import download_text_and_title
	from cache_system import CacheHandler
	from gradio_client import Client
	from collections import OrderedDict
	from typing import Any
	import datetime


	server = os.environ.get("SERVER") or True
	auth_token = os.environ.get("TOKEN") or True

	client = Client(server, hf_token=auth_token, verbose=False)

	total_runs = 0


	class HuggingFaceDatasetSaver_custom(gr.HuggingFaceDatasetSaver):
	def _deserialize_components(
	self,
	data_dir,
	flag_data: list[Any],
	flag_option: str = "",
	username: str = "",
	) -> tuple[dict[Any, Any], list[Any]]:
	"""Deserialize components and return the corresponding row for the flagged sample.

	Images/audio are saved to disk as individual files.
	"""
	# Components that can have a preview on dataset repos
	file_preview_types = {gr.Audio: "Audio", gr.Image: "Image"}

	# Generate the row corresponding to the flagged sample
	features = OrderedDict()
	row = []
	for component, sample in zip(self.components, flag_data):
	label = component.label or ""
	features[label] = {"dtype": "string", "_type": "Value"}
	row.append(sample)

	features["flag"] = {"dtype": "string", "_type": "Value"}
	features["username"] = {"dtype": "string", "_type": "Value"}
	row.append(flag_option)
	row.append(username)
	return features, row


	def finish_generation(text: str) -> str:
	return f"{text}\n\n⬇️ Ayuda a mejorar la herramienta marcando si el resumen es correcto o no.⬇️"


	def generate_text(
	url: str, mode: int, progress=gr.Progress(track_tqdm=False)
	) -> (str, str):
	global cache_handler
	global total_runs

	total_runs += 1
	print(f"Total runs: {total_runs}. Last run: {datetime.datetime.now()}")

	url = url.strip()

	if url.startswith("https://twitter.com/") or url.startswith("https://x.com/"):
	yield (
	"🤖 Vaya, parece que has introducido la url de un tweet. No puedo acceder a tweets, tienes que introducir la URL de una noticia.",
	"❌❌❌ Si el tweet contiene una noticia, dame la URL de la noticia ❌❌❌",
	"Error",
	)
	return (
	"🤖 Vaya, parece que has introducido la url de un tweet. No puedo acceder a tweets, tienes que introducir la URL de una noticia.",
	"❌❌❌ Si el tweet contiene una noticia, dame la URL de la noticia ❌❌❌",
	"Error",
	)

	# 1) Download the article

	progress(0, desc="🤖 Accediendo a la noticia")

	# First, check if the URL is in the cache
	title, text, temp = cache_handler.get_from_cache(url, mode)
	if title is not None and text is not None and temp is not None:
	temp = finish_generation(temp)
	yield title, temp, text
	return title, temp, text
	else:
	try:
	title, text, url = download_text_and_title(url)
	except Exception as e:
	title = None
	text = None

	if title is None or text is None:
	yield (
	"🤖 No he podido acceder a la notica, asegurate que la URL es correcta y que es posible acceder a la noticia desde un navegador.",
	"❌❌❌ Inténtalo de nuevo ❌❌❌",
	"Error",
	)
	return (
	"🤖 No he podido acceder a la notica, asegurate que la URL es correcta y que es posible acceder a la noticia desde un navegador.",
	"❌❌❌ Inténtalo de nuevo ❌❌❌",
	"Error",
	)

	# Test if the redirected and clean url is in the cache
	_, _, temp = cache_handler.get_from_cache(url, mode, second_try=True)
	if temp is not None:
	temp = finish_generation(temp)
	yield title, temp, text
	return title, temp, text

	progress(0.5, desc="🤖 Leyendo noticia")

	try:
	temp = client.submit(
	url, # str in '🌐 URL de la noticia' Textbox component
	title, # str in '🌐 Título de la noticia' Textbox component
	text, # str in '📰 Cuerpo de la noticia' Textbox component
	mode, # float (numeric value between 0 and 100) in '🎚️ Nivel de resumen' Slider component
	api_name="/predict",
	)

	for o in temp:
	yield title, o, text

	except Exception as e:
	yield (
	"🤖 El servidor no se encuentra disponible.",
	"❌❌❌ Inténtalo de nuevo más tarde ❌❌❌",
	"Error",
	)
	return (
	"🤖 El servidor no se encuentra disponible.",
	"❌❌❌ Inténtalo de nuevo más tarde ❌❌❌",
	"Error",
	)

	temp = temp.outputs()[-1]
	cache_handler.add_to_cache(
	url=url, title=title, text=text, summary_type=mode, summary=temp
	)
	temp = finish_generation(temp)
	yield title, temp, text

	hits, misses, cache_len = cache_handler.get_cache_stats()
	print(
	f"Hits: {hits}, misses: {misses}, cache length: {cache_len}. Percent hits: {round(hits/(hits+misses)*100,2)}%."
	)
	return title, temp, text


	cache_handler = CacheHandler(max_cache_size=1000)
	hf_writer = HuggingFaceDatasetSaver_custom(
	auth_token, "Iker/Clickbait-News", private=True, separate_dirs=False
	)


	demo = gr.Interface(
	generate_text,
	inputs=[
	gr.Textbox(
	label="🌐 URL de la noticia",
	info="Introduce la URL de la noticia que deseas resumir.",
	value="https://www.heraldo.es/noticias/salud/2024/01/08/atun-alimento-grasa-muscular-ayuda-combatir-colesterol-1702116.html",
	interactive=True,
	),
	gr.Slider(
	minimum=0,
	maximum=100,
	step=50,
	value=50,
	label="🎚️ Nivel de resumen",
	info="""¿Hasta qué punto quieres resumir la noticia?

	Si solo deseas un resumen, selecciona 0.

	Si buscas un resumen y desmontar el clickbait, elige 50.

	Para obtener solo la respuesta al clickbait, selecciona 100""",
	interactive=True,
	),
	],
	outputs=[
	gr.Textbox(
	label="📰 Titular de la noticia",
	interactive=False,
	placeholder="Aquí aparecerá el título de la noticia",
	),
	gr.Textbox(
	label="🗒️ Resumen",
	interactive=False,
	placeholder="Aquí aparecerá el resumen de la noticia.",
	),
	gr.Textbox(
	label="Noticia completa",
	visible=False,
	render=False,
	interactive=False,
	placeholder="Aquí aparecerá el resumen de la noticia.",
	),
	],
	#title="⚔️ Clickbait Fighter! ⚔️",
	thumbnail="https://huggingface.co/spaces/Iker/ClickbaitFighter/resolve/main/logo2.png",
	theme="JohnSmith9982/small_and_pretty",
	description="""
	<table>
	<tr>
	<td style="width:10%"><img src="https://huggingface.co/spaces/Iker/ClickbaitFighter/resolve/main/logo2.png" align="left" width="30%"> </td>
	<td style="width:10%"><center><h1>Clickbait Fighter!</h1></center></td>
	<td style="width:10%"><img src="https://huggingface.co/spaces/Iker/ClickbaitFighter/resolve/main/logo2.png" align="right" width="30%"> </td>
	</tr>
	</table>
	<p align="justify">Esta Inteligencia Artificial es capaz de generar un resumen de una sola frase que revela la verdad detrás de un titular sensacionalista o clickbait. Solo tienes que introducir la URL de la noticia. La IA accederá a la noticia, la leerá y en cuestión de segundos generará un resumen de una sola frase que revele la verdad detrás del titular.</p>

	🎚 Ajusta el nivel de resumen con el control deslizante. Cuanto maś alto, más corto será el resumen.

	🗒 La IA no es capaz de acceder a todas las webs, por ejemplo, si introduces un enlace a una noticia que requiere suscripción, la IA no podrá acceder a ella. Algunas webs pueden tener tecnologías para bloquear bots.

	⌚ La IA se encuentra corriendo en un hardware bastante modesto, debería tardar menos de 30 segundos en generar el resumen, pero si muchos usuarios usan la app a la vez, tendrás que esperar tu turno.

	💸 Este es un projecto sin ánimo de lucro, no se genera ningún tipo de ingreso con esta app. Los datos, la IA y el código se publicarán para su uso en la investigación académica. No puedes usar esta app para ningún uso comercial.

	🧪 El modelo se encuentra en fase de desarrollo, si quieres ayudar a mejorarlo puedes usar los botones 👍 y 👎 para valorar el resumen. ¡Gracias por tu ayuda!""",
	article="Esta Inteligencia Artificial ha sido generada por Iker García-Ferrero. Puedes saber más sobre mi trabajo en mi [página web](https://ikergarcia1996.github.io/Iker-Garcia-Ferrero/) o mi perfil de [X](https://twitter.com/iker_garciaf). Puedes ponerte en contacto conmigo a través de correo electrónico (ver web) y X.",
	cache_examples=False,
	concurrency_limit=1,
	allow_flagging="manual",
	flagging_options=[("👍", "correct"), ("👎", "incorrect")],
	flagging_callback=hf_writer,
	)

	demo.queue(max_size=None)
	demo.launch(share=False)