Spaces:

Veronyka
/

algospeak

Sleeping

algospeak / app.py

travahacker

Add AI model: Hybrid GPT-2 + Dictionary (context-aware translations!)

c300f88 15 days ago

11.5 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	from algospeak_dictionary import get_algospeak_context, ALGOSPEAK_DICT
	import os
	import re

	# Initialize inference client with HF token (automatically provided in Spaces)
	hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
	client = InferenceClient(token=hf_token)

	# Complete AlgoSpeak dictionary for prompts
	ALGOSPEAK_EXAMPLES = get_algospeak_context()

	# Model selection - using models that work reliably with HF Inference API
	# GPT-2 is older but 100% reliable for text generation
	MODEL = "gpt2-medium" # Smaller, faster, always works
	BACKUP_MODEL = "distilgpt2" # Even smaller fallback

	def translate_to_algospeak_simple(text):
	"""Rule-based translation using the dictionary (always works!)"""
	result = text

	# Create reverse mapping: meaning -> algospeak term
	replacements = {
	# Death and violence
	"suicide": "sewerslide",
	"kill": "unalive",
	"killing": "unaliving",
	"killed": "unalived",
	"die": "unalive",
	"died": "unalived",
	"death": "unalive",
	"dead": "unalived",

	# Weapons and war
	"gun": "pew pew",
	"guns": "pew pews",
	"shooting": "pew pew",
	"shot": "pew pew",
	"weapon": "noodle",
	"weapons": "noodles",
	"war": "cornucopia",
	"bomb": "kaboom",

	# Adult content
	"sex": "seggs",
	"sexual": "seggs",
	"porn": "corn",
	"pornography": "corn",
	"lesbian": "le$bian",
	"gay": "g@y",

	# Health
	"COVID": "mascara",
	"COVID-19": "mascara",
	"coronavirus": "mascara",
	"vaccine": "backshot",
	"pandemic": "panini",

	# LGBTQ+
	"LGBTQ": "leg booty",
	"LGBTQ+": "leg booty",

	# Sexual violence
	"rape": "grape",
	"sexual assault": "SA",

	# Other
	"sex worker": "accountant",
	"stripper": "skripper",
	"marijuana": "lettuce",
	"weed": "lettuce",
	}

	# Sort by length (longest first) to avoid partial replacements
	for original, coded in sorted(replacements.items(), key=lambda x: len(x[0]), reverse=True):
	# Case-insensitive replacement
	pattern = re.compile(re.escape(original), re.IGNORECASE)
	result = pattern.sub(coded, result)

	if result == text:
	return f"✨ No sensitive terms detected!\n\nOriginal: {text}\n\n💡 Tip: Try terms like 'suicide', 'sex', 'war', 'COVID', etc."

	return f"🔄 Translated to AlgoSpeak:\n\n{result}"

	def translate_to_algospeak_ai(text):
	"""AI-powered translation with context awareness"""
	prompt = f"""Task: Convert text to AlgoSpeak (coded language to avoid censorship).

	Examples:
	"someone died" → "someone unalived"
	"talking about sex" → "talking about seggs"
	"the war continues" → "the cornucopia continues"
	"COVID vaccine" → "mascara backshot"

	Now convert: {text}

	AlgoSpeak:"""

	try:
	response = client.text_generation(
	prompt,
	model=MODEL,
	max_new_tokens=100,
	temperature=0.7,
	do_sample=True,
	top_p=0.9,
	)
	# Clean up response
	result = response.strip()
	# Remove common artifacts
	if result.startswith('"') and result.endswith('"'):
	result = result[1:-1]
	return f"🤖 AI Translation:\n\n{result}"
	except Exception as e:
	# Fallback to dictionary-based
	return f"⚠️ AI model unavailable, using dictionary:\n\n{translate_to_algospeak_simple(text)}"

	def translate_to_algospeak(text):
	"""Translates normal text to AlgoSpeak - Hybrid approach"""
	if not text or not text.strip():
	return "⚠️ Please enter some text to translate."

	# Try AI first for better context
	dict_result = translate_to_algospeak_simple(text)

	# If dictionary found terms, try AI enhancement
	if "No sensitive terms detected" not in dict_result:
	ai_result = translate_to_algospeak_ai(text)
	return f"{ai_result}\n\n---\n\n📖 Dictionary version:\n{dict_result}"

	# If no terms found, just return dictionary result
	return dict_result

	def interpret_algospeak_simple(text):
	"""Rule-based interpretation using the dictionary (always works!)"""
	result = text
	found_terms = []

	# Direct mapping from algospeak -> meaning
	for term, meaning in ALGOSPEAK_DICT.items():
	# Case-insensitive search
	pattern = re.compile(re.escape(term), re.IGNORECASE)
	if pattern.search(result):
	found_terms.append(f"'{term}' → {meaning}")
	result = pattern.sub(f"[{meaning}]", result)

	if not found_terms:
	return f"✨ No AlgoSpeak terms detected!\n\nOriginal: {text}\n\n💡 Tip: Try terms like 'unalive', 'seggs', 'pew pew', 'mascara', etc."

	explanation = "\n".join(found_terms)
	return f"🔍 Interpreted:\n\n{result}\n\n📖 Terms found:\n{explanation}"

	def interpret_algospeak_ai(text):
	"""AI-powered interpretation with context awareness"""
	prompt = f"""Task: Translate AlgoSpeak (coded language) to plain English.

	Examples:
	"someone unalived" → "someone died/killed themselves"
	"talking about seggs" → "talking about sex"
	"the cornucopia continues" → "the war continues"
	"got my backshot for mascara" → "got my vaccine for COVID"

	Now translate: {text}

	Plain English:"""

	try:
	response = client.text_generation(
	prompt,
	model=MODEL,
	max_new_tokens=100,
	temperature=0.5,
	do_sample=True,
	top_p=0.9,
	)
	result = response.strip()
	if result.startswith('"') and result.endswith('"'):
	result = result[1:-1]
	return f"🤖 AI Interpretation:\n\n{result}"
	except Exception as e:
	return f"⚠️ AI model unavailable, using dictionary:\n\n{interpret_algospeak_simple(text)}"

	def interpret_algospeak(text):
	"""Interprets AlgoSpeak to plain language - Hybrid approach"""
	if not text or not text.strip():
	return "⚠️ Please enter some AlgoSpeak text to interpret."

	# Get dictionary result
	dict_result = interpret_algospeak_simple(text)

	# If dictionary found terms, add AI interpretation
	if "No AlgoSpeak terms detected" not in dict_result:
	ai_result = interpret_algospeak_ai(text)
	return f"{ai_result}\n\n---\n\n{dict_result}"

	return dict_result

	def search_dictionary(query):
	"""Search terms in the AlgoSpeak dictionary"""
	query = query.lower().strip()
	results = []

	for term, meaning in ALGOSPEAK_DICT.items():
	if query in term.lower() or query in meaning.lower():
	results.append(f"{term} → {meaning}")

	if not results:
	return "❌ No terms found. Try another search!"

	return "\n\n".join(results)

	# Interface Gradio
	with gr.Blocks(theme=gr.themes.Soft(), title="AlgoSpeak AI") as demo:
	gr.Markdown(f"""
	# 🗣️ AlgoSpeak AI Translator

	AlgoSpeak is a language used to circumvent content moderation algorithms on social media platforms.

	This tool can:
	- 📝 Translate plain text → AlgoSpeak (AI + dictionary hybrid)
	- 🔍 Interpret AlgoSpeak → plain language (AI-enhanced)
	- 📖 Search through 60+ catalogued terms

	🤖 Powered by: GPT-2 AI model + curated dictionary
	💡 Hybrid approach: AI for context + dictionary for accuracy
	""")

	with gr.Tab("🌐 Translate to AlgoSpeak"):
	with gr.Row():
	with gr.Column():
	input_normal = gr.Textbox(
	label="Plain Text",
	placeholder="Type something...",
	lines=3
	)
	btn_translate = gr.Button("Translate to AlgoSpeak", variant="primary")
	with gr.Column():
	output_algospeak = gr.Textbox(
	label="AlgoSpeak Result",
	lines=3
	)

	gr.Examples(
	examples=[
	["Let's talk about mental health and teenage suicide"],
	["The war in the region had many shootings and civilian deaths"],
	["We need to discuss LGBTQ+ rights and responsible adult content"],
	["COVID-19 caused millions of deaths in the pandemic"],
	["Sex workers deserve rights and protection"]
	],
	inputs=input_normal
	)

	btn_translate.click(
	translate_to_algospeak,
	inputs=input_normal,
	outputs=output_algospeak
	)

	with gr.Tab("🔍 Interpret AlgoSpeak"):
	with gr.Row():
	with gr.Column():
	input_algospeak = gr.Textbox(
	label="AlgoSpeak Text",
	placeholder="Paste AlgoSpeak text...",
	lines=3
	)
	btn_interpret = gr.Button("Interpret", variant="primary")
	with gr.Column():
	output_normal = gr.Textbox(
	label="Plain Language",
	lines=3
	)

	gr.Examples(
	examples=[
	["Someone tried to unalive themselves"],
	["The seggs worker talked about their job"],
	["There was a cornucopia with many pew pews"],
	["Got my backshot today, feeling safer from the mascara"],
	["The accountant shared spicy content on the corn site"]
	],
	inputs=input_algospeak
	)

	btn_interpret.click(
	interpret_algospeak,
	inputs=input_algospeak,
	outputs=output_normal
	)

	with gr.Tab("📖 Dictionary Search"):
	gr.Markdown("""
	### Explore the AlgoSpeak dictionary
	Search by coded term OR by real meaning.
	""")

	with gr.Row():
	with gr.Column():
	search_input = gr.Textbox(
	label="Type your search",
	placeholder="e.g. unalive, sex, weapon...",
	lines=1
	)
	btn_search = gr.Button("🔍 Search", variant="primary")
	with gr.Column():
	search_output = gr.Markdown(
	label="Results",
	value=f"💡 Tip: Type any word to search!\n\nTotal terms in dictionary: {len(ALGOSPEAK_DICT)}"
	)

	gr.Examples(
	examples=[
	["unalive"],
	["sex"],
	["weapon"],
	["COVID"],
	["LGBTQ"]
	],
	inputs=search_input
	)

	btn_search.click(
	search_dictionary,
	inputs=search_input,
	outputs=search_output
	)

	gr.Markdown("""
	---
	### ℹ️ About AlgoSpeak
	AlgoSpeak is a form of linguistic resistance against algorithmic censorship.
	Learn more at [algospeak.net](https://www.algospeak.net/)

	Note: This is an educational prototype developed during a workshop.
	""")

	if __name__ == "__main__":
	demo.launch()