|
|
import gradio as gr |
|
|
from huggingface_hub import InferenceClient |
|
|
from algospeak_dictionary import get_algospeak_context, ALGOSPEAK_DICT |
|
|
import os |
|
|
import re |
|
|
|
|
|
|
|
|
hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN") |
|
|
client = InferenceClient(token=hf_token) |
|
|
|
|
|
|
|
|
ALGOSPEAK_EXAMPLES = get_algospeak_context() |
|
|
|
|
|
|
|
|
|
|
|
MODEL = "gpt2-medium" |
|
|
BACKUP_MODEL = "distilgpt2" |
|
|
|
|
|
def translate_to_algospeak_simple(text): |
|
|
"""Rule-based translation using the dictionary (always works!)""" |
|
|
result = text |
|
|
|
|
|
|
|
|
replacements = { |
|
|
|
|
|
"suicide": "sewerslide", |
|
|
"kill": "unalive", |
|
|
"killing": "unaliving", |
|
|
"killed": "unalived", |
|
|
"die": "unalive", |
|
|
"died": "unalived", |
|
|
"death": "unalive", |
|
|
"dead": "unalived", |
|
|
|
|
|
|
|
|
"gun": "pew pew", |
|
|
"guns": "pew pews", |
|
|
"shooting": "pew pew", |
|
|
"shot": "pew pew", |
|
|
"weapon": "noodle", |
|
|
"weapons": "noodles", |
|
|
"war": "cornucopia", |
|
|
"bomb": "kaboom", |
|
|
|
|
|
|
|
|
"sex": "seggs", |
|
|
"sexual": "seggs", |
|
|
"porn": "corn", |
|
|
"pornography": "corn", |
|
|
"lesbian": "le$bian", |
|
|
"gay": "g@y", |
|
|
|
|
|
|
|
|
"COVID": "mascara", |
|
|
"COVID-19": "mascara", |
|
|
"coronavirus": "mascara", |
|
|
"vaccine": "backshot", |
|
|
"pandemic": "panini", |
|
|
|
|
|
|
|
|
"LGBTQ": "leg booty", |
|
|
"LGBTQ+": "leg booty", |
|
|
|
|
|
|
|
|
"rape": "grape", |
|
|
"sexual assault": "SA", |
|
|
|
|
|
|
|
|
"sex worker": "accountant", |
|
|
"stripper": "skripper", |
|
|
"marijuana": "lettuce", |
|
|
"weed": "lettuce", |
|
|
} |
|
|
|
|
|
|
|
|
for original, coded in sorted(replacements.items(), key=lambda x: len(x[0]), reverse=True): |
|
|
|
|
|
pattern = re.compile(re.escape(original), re.IGNORECASE) |
|
|
result = pattern.sub(coded, result) |
|
|
|
|
|
if result == text: |
|
|
return f"β¨ No sensitive terms detected!\n\nOriginal: {text}\n\nπ‘ Tip: Try terms like 'suicide', 'sex', 'war', 'COVID', etc." |
|
|
|
|
|
return f"π Translated to AlgoSpeak:\n\n{result}" |
|
|
|
|
|
def translate_to_algospeak_ai(text): |
|
|
"""AI-powered translation with context awareness""" |
|
|
prompt = f"""Task: Convert text to AlgoSpeak (coded language to avoid censorship). |
|
|
|
|
|
Examples: |
|
|
"someone died" β "someone unalived" |
|
|
"talking about sex" β "talking about seggs" |
|
|
"the war continues" β "the cornucopia continues" |
|
|
"COVID vaccine" β "mascara backshot" |
|
|
|
|
|
Now convert: {text} |
|
|
|
|
|
AlgoSpeak:""" |
|
|
|
|
|
try: |
|
|
response = client.text_generation( |
|
|
prompt, |
|
|
model=MODEL, |
|
|
max_new_tokens=100, |
|
|
temperature=0.7, |
|
|
do_sample=True, |
|
|
top_p=0.9, |
|
|
) |
|
|
|
|
|
result = response.strip() |
|
|
|
|
|
if result.startswith('"') and result.endswith('"'): |
|
|
result = result[1:-1] |
|
|
return f"π€ AI Translation:\n\n{result}" |
|
|
except Exception as e: |
|
|
|
|
|
return f"β οΈ AI model unavailable, using dictionary:\n\n{translate_to_algospeak_simple(text)}" |
|
|
|
|
|
def translate_to_algospeak(text): |
|
|
"""Translates normal text to AlgoSpeak - Hybrid approach""" |
|
|
if not text or not text.strip(): |
|
|
return "β οΈ Please enter some text to translate." |
|
|
|
|
|
|
|
|
dict_result = translate_to_algospeak_simple(text) |
|
|
|
|
|
|
|
|
if "No sensitive terms detected" not in dict_result: |
|
|
ai_result = translate_to_algospeak_ai(text) |
|
|
return f"{ai_result}\n\n---\n\nπ Dictionary version:\n{dict_result}" |
|
|
|
|
|
|
|
|
return dict_result |
|
|
|
|
|
def interpret_algospeak_simple(text): |
|
|
"""Rule-based interpretation using the dictionary (always works!)""" |
|
|
result = text |
|
|
found_terms = [] |
|
|
|
|
|
|
|
|
for term, meaning in ALGOSPEAK_DICT.items(): |
|
|
|
|
|
pattern = re.compile(re.escape(term), re.IGNORECASE) |
|
|
if pattern.search(result): |
|
|
found_terms.append(f"'{term}' β {meaning}") |
|
|
result = pattern.sub(f"[{meaning}]", result) |
|
|
|
|
|
if not found_terms: |
|
|
return f"β¨ No AlgoSpeak terms detected!\n\nOriginal: {text}\n\nπ‘ Tip: Try terms like 'unalive', 'seggs', 'pew pew', 'mascara', etc." |
|
|
|
|
|
explanation = "\n".join(found_terms) |
|
|
return f"π Interpreted:\n\n{result}\n\nπ Terms found:\n{explanation}" |
|
|
|
|
|
def interpret_algospeak_ai(text): |
|
|
"""AI-powered interpretation with context awareness""" |
|
|
prompt = f"""Task: Translate AlgoSpeak (coded language) to plain English. |
|
|
|
|
|
Examples: |
|
|
"someone unalived" β "someone died/killed themselves" |
|
|
"talking about seggs" β "talking about sex" |
|
|
"the cornucopia continues" β "the war continues" |
|
|
"got my backshot for mascara" β "got my vaccine for COVID" |
|
|
|
|
|
Now translate: {text} |
|
|
|
|
|
Plain English:""" |
|
|
|
|
|
try: |
|
|
response = client.text_generation( |
|
|
prompt, |
|
|
model=MODEL, |
|
|
max_new_tokens=100, |
|
|
temperature=0.5, |
|
|
do_sample=True, |
|
|
top_p=0.9, |
|
|
) |
|
|
result = response.strip() |
|
|
if result.startswith('"') and result.endswith('"'): |
|
|
result = result[1:-1] |
|
|
return f"π€ AI Interpretation:\n\n{result}" |
|
|
except Exception as e: |
|
|
return f"β οΈ AI model unavailable, using dictionary:\n\n{interpret_algospeak_simple(text)}" |
|
|
|
|
|
def interpret_algospeak(text): |
|
|
"""Interprets AlgoSpeak to plain language - Hybrid approach""" |
|
|
if not text or not text.strip(): |
|
|
return "β οΈ Please enter some AlgoSpeak text to interpret." |
|
|
|
|
|
|
|
|
dict_result = interpret_algospeak_simple(text) |
|
|
|
|
|
|
|
|
if "No AlgoSpeak terms detected" not in dict_result: |
|
|
ai_result = interpret_algospeak_ai(text) |
|
|
return f"{ai_result}\n\n---\n\n{dict_result}" |
|
|
|
|
|
return dict_result |
|
|
|
|
|
def search_dictionary(query): |
|
|
"""Search terms in the AlgoSpeak dictionary""" |
|
|
query = query.lower().strip() |
|
|
results = [] |
|
|
|
|
|
for term, meaning in ALGOSPEAK_DICT.items(): |
|
|
if query in term.lower() or query in meaning.lower(): |
|
|
results.append(f"**{term}** β {meaning}") |
|
|
|
|
|
if not results: |
|
|
return "β No terms found. Try another search!" |
|
|
|
|
|
return "\n\n".join(results) |
|
|
|
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft(), title="AlgoSpeak AI") as demo: |
|
|
gr.Markdown(f""" |
|
|
# π£οΈ AlgoSpeak AI Translator |
|
|
|
|
|
**AlgoSpeak** is a language used to circumvent content moderation algorithms on social media platforms. |
|
|
|
|
|
This tool can: |
|
|
- π Translate plain text β AlgoSpeak (AI + dictionary hybrid) |
|
|
- π Interpret AlgoSpeak β plain language (AI-enhanced) |
|
|
- π Search through 60+ catalogued terms |
|
|
|
|
|
π€ **Powered by:** GPT-2 AI model + curated dictionary |
|
|
π‘ **Hybrid approach:** AI for context + dictionary for accuracy |
|
|
""") |
|
|
|
|
|
with gr.Tab("π Translate to AlgoSpeak"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
input_normal = gr.Textbox( |
|
|
label="Plain Text", |
|
|
placeholder="Type something...", |
|
|
lines=3 |
|
|
) |
|
|
btn_translate = gr.Button("Translate to AlgoSpeak", variant="primary") |
|
|
with gr.Column(): |
|
|
output_algospeak = gr.Textbox( |
|
|
label="AlgoSpeak Result", |
|
|
lines=3 |
|
|
) |
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["Let's talk about mental health and teenage suicide"], |
|
|
["The war in the region had many shootings and civilian deaths"], |
|
|
["We need to discuss LGBTQ+ rights and responsible adult content"], |
|
|
["COVID-19 caused millions of deaths in the pandemic"], |
|
|
["Sex workers deserve rights and protection"] |
|
|
], |
|
|
inputs=input_normal |
|
|
) |
|
|
|
|
|
btn_translate.click( |
|
|
translate_to_algospeak, |
|
|
inputs=input_normal, |
|
|
outputs=output_algospeak |
|
|
) |
|
|
|
|
|
with gr.Tab("π Interpret AlgoSpeak"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
input_algospeak = gr.Textbox( |
|
|
label="AlgoSpeak Text", |
|
|
placeholder="Paste AlgoSpeak text...", |
|
|
lines=3 |
|
|
) |
|
|
btn_interpret = gr.Button("Interpret", variant="primary") |
|
|
with gr.Column(): |
|
|
output_normal = gr.Textbox( |
|
|
label="Plain Language", |
|
|
lines=3 |
|
|
) |
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["Someone tried to unalive themselves"], |
|
|
["The seggs worker talked about their job"], |
|
|
["There was a cornucopia with many pew pews"], |
|
|
["Got my backshot today, feeling safer from the mascara"], |
|
|
["The accountant shared spicy content on the corn site"] |
|
|
], |
|
|
inputs=input_algospeak |
|
|
) |
|
|
|
|
|
btn_interpret.click( |
|
|
interpret_algospeak, |
|
|
inputs=input_algospeak, |
|
|
outputs=output_normal |
|
|
) |
|
|
|
|
|
with gr.Tab("π Dictionary Search"): |
|
|
gr.Markdown(""" |
|
|
### Explore the AlgoSpeak dictionary |
|
|
Search by coded term OR by real meaning. |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
search_input = gr.Textbox( |
|
|
label="Type your search", |
|
|
placeholder="e.g. unalive, sex, weapon...", |
|
|
lines=1 |
|
|
) |
|
|
btn_search = gr.Button("π Search", variant="primary") |
|
|
with gr.Column(): |
|
|
search_output = gr.Markdown( |
|
|
label="Results", |
|
|
value=f"π‘ Tip: Type any word to search!\n\n**Total terms in dictionary:** {len(ALGOSPEAK_DICT)}" |
|
|
) |
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["unalive"], |
|
|
["sex"], |
|
|
["weapon"], |
|
|
["COVID"], |
|
|
["LGBTQ"] |
|
|
], |
|
|
inputs=search_input |
|
|
) |
|
|
|
|
|
btn_search.click( |
|
|
search_dictionary, |
|
|
inputs=search_input, |
|
|
outputs=search_output |
|
|
) |
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
### βΉοΈ About AlgoSpeak |
|
|
AlgoSpeak is a form of linguistic resistance against algorithmic censorship. |
|
|
Learn more at [algospeak.net](https://www.algospeak.net/) |
|
|
|
|
|
**Note**: This is an educational prototype developed during a workshop. |
|
|
""") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|
|
|
|