Spaces:
Running
Running
import gradio as gr | |
import re | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
# Modell und Tokenizer laden | |
HF_USER = "ai01firebird" | |
MODEL_NAME = "emojinator-gpt2-v3" | |
# fine-tuned | |
model = AutoModelForCausalLM.from_pretrained(f"{HF_USER}/{MODEL_NAME}") | |
tokenizer = AutoTokenizer.from_pretrained(f"{HF_USER}/{MODEL_NAME}") | |
# gpt2 outputs text! | |
#tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2") | |
#model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2") | |
# distilgpt2 is only 80MB -> NOK, no emojis | |
#tokenizer = AutoTokenizer.from_pretrained("distilgpt2") | |
#model = AutoModelForCausalLM.from_pretrained("distilgpt2") | |
# tiny-gpt2 is only 20MB -> NOK, no emojis | |
#tokenizer = AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2") | |
#model = AutoModelForCausalLM.from_pretrained("sshleifer/tiny-gpt2") | |
# TinyLlama | |
#tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") | |
#model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") | |
# OLD conversion method | |
def text_to_emoji_OLD_OLD(input_text): | |
# Eingabetext bereinigen (optional) | |
cleaned_text = re.sub(r"[.,!?;:]", "", input_text) | |
# Prompt vorbereiten | |
prompt = f'Convert the following sentence into an emoji-sequence which conveys a similar meaning and return only the emojis, no explanation:\n\n"{cleaned_text}"\n\n' | |
# Tokenisierung und Generation | |
inputs = tokenizer(prompt, return_tensors="pt") | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=30, | |
do_sample=True, | |
temperature=0.8, | |
top_k=50 | |
) | |
# Decodieren | |
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Nur den Teil nach dem Prompt extrahieren | |
emoji_part = generated_text[len(prompt):].strip() | |
return emoji_part | |
# conversion method | |
def text_to_emoji_OLD(input_text): | |
# Eingabetext bereinigen (optional) | |
cleaned_text = re.sub(r"[.,!?;:]", "", input_text) | |
# Pure pattern-based prompt | |
prompt = ( | |
"Hi there β ππ\n" | |
"Good night β ππ΄\n" | |
"I love pizza β β€οΈπ\n" | |
"It's raining β π§οΈβ\n" | |
"Happy birthday β πππ₯³\n" | |
"I am so tired β π΄π€\n" | |
"Letβs go to the beach β ποΈππ\n" | |
"Iβm feeling lucky β ππ€\n" | |
"Weβre getting married β ππ°π€΅\n" | |
"Merry Christmas β πππ \n" | |
"Letβs party β ππΊπ\n" | |
f"{cleaned_text} β" | |
) | |
# Tokenisierung und Generation | |
inputs = tokenizer(prompt, return_tensors="pt") | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=10, | |
do_sample=True, | |
temperature=0.9, | |
top_k=50, | |
pad_token_id=tokenizer.eos_token_id # Prevents warning | |
) | |
# Decodieren | |
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Nur den generierten Teil nach dem letzten "β" | |
emoji_part = generated_text.split("β")[-1].strip().split("\n")[0] | |
return emoji_part | |
# conversion method | |
def text_to_emoji(input_text): | |
# Eingabetext bereinigen (optional) | |
cleaned_text = re.sub(r"[.,!?;:]", "", input_text) | |
prompt = f"Text: {cleaned_text}\nEmoji:" | |
# Tokenisierung und Generation | |
inputs = tokenizer(prompt, return_tensors="pt") | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=10, | |
do_sample=True, | |
temperature=0.9, | |
top_k=50, | |
pad_token_id=tokenizer.eos_token_id # Prevents warning | |
) | |
# Decodieren | |
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Nur den generierten Teil nach dem letzten "β" | |
emoji_part = generated_text.split("β")[-1].strip().split("\n")[0] | |
return emoji_part | |
# Gradio UI | |
iface = gr.Interface( | |
fn=text_to_emoji, | |
inputs=gr.Textbox(lines=2, placeholder="Enter a sentence..."), | |
outputs="text", | |
title="AI-Powered Emoji Translator", | |
description="Enter a sentence, and the AI will transform it into an emoji-version π₯³" | |
) | |
iface.launch() |