File size: 3,887 Bytes
8bff16c
 
 
 
56e2e37
 
 
46677b4
8bff16c
71eacb0
d114b79
 
 
 
 
71eacb0
 
db1702e
8bff16c
 
 
 
 
 
f397f4b
8bff16c
 
 
f397f4b
8bff16c
 
db1702e
8bff16c
 
 
 
 
 
 
 
 
 
db1702e
8bff16c
 
 
 
 
 
 
 
 
 
 
 
 
 
8e7d1f2
 
8bff16c
 
 
 
8e7d1f2
71eacb0
8e7d1f2
db1702e
8bff16c
db1702e
8bff16c
56e2e37
 
8bff16c
56e2e37
 
 
db1702e
8bff16c
db1702e
8bff16c
769b214
56e2e37
71eacb0
5a6a071
8bff16c
 
5a6a071
8bff16c
 
56e2e37
 
 
8bff16c
db1702e
 
 
8bff16c
 
 
 
 
 
 
 
db1702e
 
 
56e2e37
8bff16c
56e2e37
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

import re
import gradio as gr
from NeuralTextGenerator import BertTextGenerator

# Load models
## Special tokens
special_tokens = [
    '[POSITIVE-0]', 
    '[POSITIVE-1]', 
    '[POSITIVE-2]',
    '[NEGATIVE-0]', 
    '[NEGATIVE-1]', 
    '[NEGATIVE-2]'
    ]

## Finetuned RoBERTa
finetunned_RoBERTa_model_name = "JuanJoseMV/XLM_RoBERTa_text_gen"
finetunned_RoBERTa = BertTextGenerator(finetunned_RoBERTa_model_name)

finetunned_RoBERTa.tokenizer.add_special_tokens({'additional_special_tokens': special_tokens})
finetunned_RoBERTa.model.resize_token_embeddings(len(finetunned_RoBERTa.tokenizer))

## Finetuned RoBERTa hate
finetunned_RoBERTa_Hate_model_name = "JuanJoseMV/XLM_RoBERTa_text_gen_FT_Hate"
finetunned_RoBERTa_Hate = BertTextGenerator(finetunned_RoBERTa_Hate_model_name)

# finetunned_RoBERTa_Hate.tokenizer.add_special_tokens({'additional_special_tokens': special_tokens})
# finetunned_RoBERTa_Hate.model.resize_token_embeddings(len(finetunned_RoBERTa_Hate.tokenizer))

# ## Finetuned BERT
# finetunned_BERT_model_name = "JuanJoseMV/BERT_text_gen"
# finetunned_BERT = BertTextGenerator(finetunned_BERT_model_name, tokenizer='Twitter/twhin-bert-large')

# finetunned_BERT.tokenizer.add_special_tokens({'additional_special_tokens': special_tokens})
# finetunned_BERT.model.resize_token_embeddings(len(finetunned_BERT.tokenizer))

## RoBERTa
RoBERTa_model_name = "cardiffnlp/twitter-xlm-roberta-base"
RoBERTa = BertTextGenerator(RoBERTa_model_name)

## BERT
BERT_model_name = "Twitter/twhin-bert-large"
BERT = BertTextGenerator(BERT_model_name)        

def sentence_builder(
        selected_model, 
        n_sentences, 
        max_iter, 
        temperature, 
        top_k, 
        sentiment, 
        seed_text
        ):
    # Select model
    if selected_model == "Finetuned_RoBERTa":
        generator = finetunned_RoBERTa
    elif selected_model == "Finetuned_RoBERTa_Hate":
        generator = finetunned_RoBERTa_Hate
        sentiment = 'HATE'
    if selected_model == "RoBERTa":
        generator = RoBERTa
    else:
        generator = BERT

    # Generate
    parameters = {'n_sentences': n_sentences,  
              'batch_size': n_sentences if n_sentences < 10 else 10,
              'avg_len':30,
              'max_len':50,
              'std_len' : 3,
              'generation_method':'parallel',
              'sample': True,
              'burnin': 450,
              'max_iter': max_iter,
              'top_k': top_k,
              'seed_text': f"[{sentiment}-0] [{sentiment}-1] [{sentiment}-2] {seed_text}",
              'temperature': temperature,
              'verbose': True
              }
    sents = generator.generate(**parameters)

    # Clean
    gen_text = ''
    for i, s in enumerate(sents):
        clean_sent = re.sub(r'\[.*?\]', '', s)
        gen_text += f'- GENERATED TWEET #{i + 1}: {clean_sent}\n\n'

    return gen_text

# Set Demo
demo = gr.Interface(
    sentence_builder,
    [
        gr.Radio(["BERT", "RoBERTa", "Finetuned_RoBERTa", "Finetuned_RoBERTa_Hate"], value="RoBERTa", label="Generator model"),
        # gr.Radio(["BERT", "RoBERTa"], value="BERT", label="Generator model"),
        gr.Slider(1, 15, value=5, label="Num. Tweets", step=1, info="Number of tweets to be generated."),
        gr.Slider(50, 500, value=300, label="Max. iter", info="Maximum number of iterations for the generation."),
        gr.Slider(0, 1.0, value=0.8, step=0.05, label="Temperature", info="Temperature parameter for the generation."),
        gr.Slider(1, 200, value=130, step=1, label="Top k", info="Top k parameter for the generation."),
        gr.Radio(["POSITIVE", "NEGATIVE"], value="NEGATIVE", label="Sentiment to generate"),
        gr.Textbox('ATP Finals in Turin', label="Seed text", info="Seed text for the generation.")
    ],
    "text",
)

# Run Demo
demo.launch()