File size: 4,134 Bytes
e8a2d7c
c644909
3db21cd
 
78f153b
3db21cd
8634e34
0abc7a9
 
 
 
 
73f55cc
 
 
 
 
4352744
8634e34
 
 
4352744
16d4b86
 
e8a2d7c
4352744
6eb3e9b
 
4352744
6eb3e9b
0abc7a9
0a0ba7a
 
8a0b589
0a0ba7a
 
 
 
3db21cd
0a0ba7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6eb3e9b
0abc7a9
6eb3e9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e8a2d7c
0abc7a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e8a2d7c
 
 
6c5550d
e8a2d7c
6c5550d
bf8b184
e8a2d7c
 
e374794
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import gradio as gr
import re
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Modell und Tokenizer laden
HF_USER = "ai01firebird"
MODEL_NAME = "emojinator-gpt2-v3"

# fine-tuned
model = AutoModelForCausalLM.from_pretrained(f"{HF_USER}/{MODEL_NAME}")
tokenizer = AutoTokenizer.from_pretrained(f"{HF_USER}/{MODEL_NAME}")

# gpt2 outputs text!
#tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
#model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")

# distilgpt2 is only 80MB -> NOK, no emojis
#tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
#model = AutoModelForCausalLM.from_pretrained("distilgpt2")

# tiny-gpt2 is only 20MB -> NOK, no emojis
#tokenizer = AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2")
#model = AutoModelForCausalLM.from_pretrained("sshleifer/tiny-gpt2")

# TinyLlama
#tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
#model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

# OLD conversion method
def text_to_emoji_OLD_OLD(input_text):
    # Eingabetext bereinigen (optional)
    cleaned_text = re.sub(r"[.,!?;:]", "", input_text)

    # Prompt vorbereiten
    prompt = f'Convert the following sentence into an emoji-sequence which conveys a similar meaning and return only the emojis, no explanation:\n\n"{cleaned_text}"\n\n'

    # Tokenisierung und Generation
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        **inputs,
        max_new_tokens=30,
        do_sample=True,
        temperature=0.8,
        top_k=50
    )

    # Decodieren
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Nur den Teil nach dem Prompt extrahieren
    emoji_part = generated_text[len(prompt):].strip()

    return emoji_part

# conversion method
def text_to_emoji_OLD(input_text):
    # Eingabetext bereinigen (optional)
    cleaned_text = re.sub(r"[.,!?;:]", "", input_text)

    # Pure pattern-based prompt
    prompt = (
        "Hi there β†’ πŸ‘‹πŸ™‚\n"
        "Good night β†’ πŸŒ™πŸ˜΄\n"
        "I love pizza β†’ β€οΈπŸ•\n"
        "It's raining β†’ πŸŒ§οΈβ˜”\n"
        "Happy birthday β†’ πŸŽ‰πŸŽ‚πŸ₯³\n"
        "I am so tired β†’ πŸ˜΄πŸ’€\n"
        "Let’s go to the beach β†’ πŸ–οΈπŸŒŠπŸ˜Ž\n"
        "I’m feeling lucky β†’ πŸ€πŸ€ž\n"
        "We’re getting married β†’ πŸ’πŸ‘°πŸ€΅\n"
        "Merry Christmas β†’ πŸŽ„πŸŽπŸŽ…\n"
        "Let’s party β†’ πŸŽ‰πŸ•ΊπŸ’ƒ\n"
        f"{cleaned_text} β†’"
    )
    
    # Tokenisierung und Generation
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        **inputs,
        max_new_tokens=10,
        do_sample=True,
        temperature=0.9,
        top_k=50,
        pad_token_id=tokenizer.eos_token_id  # Prevents warning
    )

    # Decodieren
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Nur den generierten Teil nach dem letzten "β†’"
    emoji_part = generated_text.split("β†’")[-1].strip().split("\n")[0]

    return emoji_part

# conversion method
def text_to_emoji(input_text):
    # Eingabetext bereinigen (optional)
    cleaned_text = re.sub(r"[.,!?;:]", "", input_text)

    prompt = f"Text: {cleaned_text}\nEmoji:"
    
    # Tokenisierung und Generation
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        **inputs,
        max_new_tokens=10,
        do_sample=True,
        temperature=0.9,
        top_k=50,
        pad_token_id=tokenizer.eos_token_id  # Prevents warning
    )

    # Decodieren
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Nur den generierten Teil nach dem letzten "β†’"
    emoji_part = generated_text.split("β†’")[-1].strip().split("\n")[0]

    return emoji_part
    
# Gradio UI
iface = gr.Interface(
    fn=text_to_emoji,
    inputs=gr.Textbox(lines=2, placeholder="Enter a sentence..."),
    outputs="text",
    title="AI-Powered Emoji Translator",
    description="Enter a sentence, and the AI will transform it into an emoji-version πŸ₯³"
)

iface.launch()