import gradio as gr import re from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Modell und Tokenizer laden HF_USER = "ai01firebird" MODEL_NAME = "emojinator-gpt2-v3" # fine-tuned model = AutoModelForCausalLM.from_pretrained(f"{HF_USER}/{MODEL_NAME}") tokenizer = AutoTokenizer.from_pretrained(f"{HF_USER}/{MODEL_NAME}") # gpt2 outputs text! #tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2") #model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2") # distilgpt2 is only 80MB -> NOK, no emojis #tokenizer = AutoTokenizer.from_pretrained("distilgpt2") #model = AutoModelForCausalLM.from_pretrained("distilgpt2") # tiny-gpt2 is only 20MB -> NOK, no emojis #tokenizer = AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2") #model = AutoModelForCausalLM.from_pretrained("sshleifer/tiny-gpt2") # TinyLlama #tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") #model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") # OLD conversion method def text_to_emoji_OLD_OLD(input_text): # Eingabetext bereinigen (optional) cleaned_text = re.sub(r"[.,!?;:]", "", input_text) # Prompt vorbereiten prompt = f'Convert the following sentence into an emoji-sequence which conveys a similar meaning and return only the emojis, no explanation:\n\n"{cleaned_text}"\n\n' # Tokenisierung und Generation inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate( **inputs, max_new_tokens=30, do_sample=True, temperature=0.8, top_k=50 ) # Decodieren generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) # Nur den Teil nach dem Prompt extrahieren emoji_part = generated_text[len(prompt):].strip() return emoji_part # conversion method def text_to_emoji_OLD(input_text): # Eingabetext bereinigen (optional) cleaned_text = re.sub(r"[.,!?;:]", "", input_text) # Pure pattern-based prompt prompt = ( "Hi there β†’ πŸ‘‹πŸ™‚\n" "Good night β†’ πŸŒ™πŸ˜΄\n" "I love pizza β†’ β€οΈπŸ•\n" "It's raining β†’ πŸŒ§οΈβ˜”\n" "Happy birthday β†’ πŸŽ‰πŸŽ‚πŸ₯³\n" "I am so tired β†’ πŸ˜΄πŸ’€\n" "Let’s go to the beach β†’ πŸ–οΈπŸŒŠπŸ˜Ž\n" "I’m feeling lucky β†’ πŸ€πŸ€ž\n" "We’re getting married β†’ πŸ’πŸ‘°πŸ€΅\n" "Merry Christmas β†’ πŸŽ„πŸŽπŸŽ…\n" "Let’s party β†’ πŸŽ‰πŸ•ΊπŸ’ƒ\n" f"{cleaned_text} β†’" ) # Tokenisierung und Generation inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate( **inputs, max_new_tokens=10, do_sample=True, temperature=0.9, top_k=50, pad_token_id=tokenizer.eos_token_id # Prevents warning ) # Decodieren generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) # Nur den generierten Teil nach dem letzten "β†’" emoji_part = generated_text.split("β†’")[-1].strip().split("\n")[0] return emoji_part # conversion method def text_to_emoji(input_text): # Eingabetext bereinigen (optional) cleaned_text = re.sub(r"[.,!?;:]", "", input_text) prompt = f"Text: {cleaned_text}\nEmoji:" # Tokenisierung und Generation inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate( **inputs, max_new_tokens=10, do_sample=True, temperature=0.9, top_k=50, pad_token_id=tokenizer.eos_token_id # Prevents warning ) # Decodieren generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) # Nur den generierten Teil nach dem letzten "β†’" emoji_part = generated_text.split("β†’")[-1].strip().split("\n")[0] return emoji_part # Gradio UI iface = gr.Interface( fn=text_to_emoji, inputs=gr.Textbox(lines=2, placeholder="Enter a sentence..."), outputs="text", title="AI-Powered Emoji Translator", description="Enter a sentence, and the AI will transform it into an emoji-version πŸ₯³" ) iface.launch()