Emojinator / app_OLD.py
ai01firebird's picture
Rename app.py to app_OLD.py
83e5b4d verified
raw
history blame
4.13 kB
import gradio as gr
import re
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Modell und Tokenizer laden
HF_USER = "ai01firebird"
MODEL_NAME = "emojinator-gpt2-v3"
# fine-tuned
model = AutoModelForCausalLM.from_pretrained(f"{HF_USER}/{MODEL_NAME}")
tokenizer = AutoTokenizer.from_pretrained(f"{HF_USER}/{MODEL_NAME}")
# gpt2 outputs text!
#tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
#model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
# distilgpt2 is only 80MB -> NOK, no emojis
#tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
#model = AutoModelForCausalLM.from_pretrained("distilgpt2")
# tiny-gpt2 is only 20MB -> NOK, no emojis
#tokenizer = AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2")
#model = AutoModelForCausalLM.from_pretrained("sshleifer/tiny-gpt2")
# TinyLlama
#tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
#model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
# OLD conversion method
def text_to_emoji_OLD_OLD(input_text):
# Eingabetext bereinigen (optional)
cleaned_text = re.sub(r"[.,!?;:]", "", input_text)
# Prompt vorbereiten
prompt = f'Convert the following sentence into an emoji-sequence which conveys a similar meaning and return only the emojis, no explanation:\n\n"{cleaned_text}"\n\n'
# Tokenisierung und Generation
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_new_tokens=30,
do_sample=True,
temperature=0.8,
top_k=50
)
# Decodieren
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Nur den Teil nach dem Prompt extrahieren
emoji_part = generated_text[len(prompt):].strip()
return emoji_part
# conversion method
def text_to_emoji_OLD(input_text):
# Eingabetext bereinigen (optional)
cleaned_text = re.sub(r"[.,!?;:]", "", input_text)
# Pure pattern-based prompt
prompt = (
"Hi there β†’ πŸ‘‹πŸ™‚\n"
"Good night β†’ πŸŒ™πŸ˜΄\n"
"I love pizza β†’ β€οΈπŸ•\n"
"It's raining β†’ πŸŒ§οΈβ˜”\n"
"Happy birthday β†’ πŸŽ‰πŸŽ‚πŸ₯³\n"
"I am so tired β†’ πŸ˜΄πŸ’€\n"
"Let’s go to the beach β†’ πŸ–οΈπŸŒŠπŸ˜Ž\n"
"I’m feeling lucky β†’ πŸ€πŸ€ž\n"
"We’re getting married β†’ πŸ’πŸ‘°πŸ€΅\n"
"Merry Christmas β†’ πŸŽ„πŸŽπŸŽ…\n"
"Let’s party β†’ πŸŽ‰πŸ•ΊπŸ’ƒ\n"
f"{cleaned_text} β†’"
)
# Tokenisierung und Generation
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_new_tokens=10,
do_sample=True,
temperature=0.9,
top_k=50,
pad_token_id=tokenizer.eos_token_id # Prevents warning
)
# Decodieren
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Nur den generierten Teil nach dem letzten "β†’"
emoji_part = generated_text.split("β†’")[-1].strip().split("\n")[0]
return emoji_part
# conversion method
def text_to_emoji(input_text):
# Eingabetext bereinigen (optional)
cleaned_text = re.sub(r"[.,!?;:]", "", input_text)
prompt = f"Text: {cleaned_text}\nEmoji:"
# Tokenisierung und Generation
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_new_tokens=10,
do_sample=True,
temperature=0.9,
top_k=50,
pad_token_id=tokenizer.eos_token_id # Prevents warning
)
# Decodieren
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Nur den generierten Teil nach dem letzten "β†’"
emoji_part = generated_text.split("β†’")[-1].strip().split("\n")[0]
return emoji_part
# Gradio UI
iface = gr.Interface(
fn=text_to_emoji,
inputs=gr.Textbox(lines=2, placeholder="Enter a sentence..."),
outputs="text",
title="AI-Powered Emoji Translator",
description="Enter a sentence, and the AI will transform it into an emoji-version πŸ₯³"
)
iface.launch()