Spaces:
Sleeping
Sleeping
Upload the model
Browse files- README.md +2 -1
- app.py +83 -0
- requirements.txt +2 -0
README.md
CHANGED
@@ -9,4 +9,5 @@ app_file: app.py
|
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
-
|
|
|
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
+
This is the space that implements the [Political Speech model](https://huggingface.co/MarkelFe/PoliticalSpeech2) made for a NLP class in university.
|
13 |
+
It uses data from the Basque Parliament, it only works in Spanish, although it may include some Basque Words.
|
app.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
2 |
+
import torch
|
3 |
+
import torch.nn
|
4 |
+
import gradio as gr
|
5 |
+
import re
|
6 |
+
|
7 |
+
# CONF
|
8 |
+
MAX_LENGTH = 1024
|
9 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'; print("Using:", device)
|
10 |
+
|
11 |
+
tokenizer = AutoTokenizer.from_pretrained("MarkelFe/PoliticalSpeech2", padding_side='left')
|
12 |
+
model = AutoModelForCausalLM.from_pretrained("MarkelFe/PoliticalSpeech2").to(device)
|
13 |
+
|
14 |
+
def return_conf(max_tokens, conf, ngram, beams, top_k, top_p):
|
15 |
+
if conf == "Ezer":
|
16 |
+
options = {"max_new_tokens": max_tokens, "do_sample": False}
|
17 |
+
elif conf == "Beam Search":
|
18 |
+
options = {"no_repeat_ngram_size": ngram, "num_beams": beams, "max_new_tokens": max_tokens, "do_sample": False}
|
19 |
+
elif conf == "Top K":
|
20 |
+
options = {"top_k": top_k, "max_new_tokens": max_tokens, "do_sample": False}
|
21 |
+
elif conf == "Top P":
|
22 |
+
options = {"top_p": top_p, "max_new_tokens": max_tokens, "do_sample": False}
|
23 |
+
return options
|
24 |
+
|
25 |
+
def sortu_testua(alderdia, testua, max_tokens, conf, ngram, beams, top_k, top_p):
|
26 |
+
options = return_conf(max_tokens, conf, ngram, beams, top_k, top_p)
|
27 |
+
prompt = f"[{alderdia}] {testua}"
|
28 |
+
tokens = tokenizer(prompt, return_tensors="pt").to(device)
|
29 |
+
generation = model.generate(inputs=tokens['input_ids'], attention_mask = tokens['attention_mask'], **options)[0]
|
30 |
+
text = tokenizer.decode(generation)
|
31 |
+
print(re.split("\[(.*?)\] ", text))
|
32 |
+
return re.split("\[(.*?)\] ", text)[-1]
|
33 |
+
|
34 |
+
def sortu_testu_guztiak(testua, max_tokens, conf, ngram, beams, top_k, top_p):
|
35 |
+
options = return_conf(max_tokens, conf, ngram, beams, top_k, top_p)
|
36 |
+
prompts = [f"[\"EAJ\"] {testua}", f"[\"EH Bildu\"] {testua}", f"[\"PP\"] {testua}", f"[\"PSE-EE\"] {testua}", f"[\"EP\"] {testua}", f"[\"UPyD\"] {testua}"]
|
37 |
+
tokens = tokenizer(prompts, padding = True, return_tensors="pt").to(device)
|
38 |
+
generation = model.generate(inputs=tokens['input_ids'], attention_mask = tokens['attention_mask'], **options)
|
39 |
+
texts = tokenizer.batch_decode(generation)
|
40 |
+
texts = list(map(lambda text: re.split("\[(.*?)\] ", text)[-1], texts))
|
41 |
+
return (texts[0], texts[1], texts[2], texts[3], texts[4], texts[5])
|
42 |
+
|
43 |
+
with gr.Blocks() as demo:
|
44 |
+
with gr.Tab("Alderdi guztiak"):
|
45 |
+
with gr.Row():
|
46 |
+
with gr.Column(scale=4, min_width=400):
|
47 |
+
testua2 = gr.Textbox(label="Testua")
|
48 |
+
greet_btn2 = gr.Button("Sortu testuak")
|
49 |
+
gr.Markdown("""Aldatu konfigurazioa""")
|
50 |
+
new_token2 = gr.Slider(minimum=1, maximum=MAX_LENGTH, value=30, label="Luzera", info="Zenbat token berri sortuko diren.")
|
51 |
+
confi2 = gr.Radio(["Ezer", "Beam Search", "Top K", "Top P"], value="Beam Search", label="Estrategia", info="Aukeratu ze estrategia erabiliko den erantzunak hobetzeko")
|
52 |
+
ngram2 = gr.Slider(minimum=1, maximum=50, value=4, step=1, label="ngram kopurua", info="Bakarrik kontuan hartuko da \"Beam Search\" aukeratuta badago")
|
53 |
+
beams2 = gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Beam kopurua", info="Bakarrik kontuan hartuko da \"Beam Search\" aukeratuta badago")
|
54 |
+
top_k2 = gr.Slider(minimum=1, maximum=50, value=5, step=1, label="K-balioa", info="Bakarrik kontuan hartuko da \"Top K\" aukeratuta badago")
|
55 |
+
top_p2 = gr.Slider(minimum=0, maximum=1, value=0.9, step=0.01, label="P-balioa", info="Bakarrik kontuan hartuko da \"Top P\" aukeratuta badago")
|
56 |
+
with gr.Column(scale=3, min_width=200):
|
57 |
+
outputEAJ = gr.Textbox(label="EAJ")
|
58 |
+
outputBildu = gr.Textbox(label="EH Bildu")
|
59 |
+
outputPP = gr.Textbox(label="PP")
|
60 |
+
outputPSE = gr.Textbox(label="PSE-EE")
|
61 |
+
outputEP = gr.Textbox(label="EP")
|
62 |
+
outputUPyD = gr.Textbox(label="UPyD")
|
63 |
+
with gr.Tab("Alderdi bakarra"):
|
64 |
+
with gr.Row():
|
65 |
+
with gr.Column(scale=4, min_width=400):
|
66 |
+
alderdia = gr.Dropdown(["EAJ", "EH Bildu", "PP", "PSE-EE", "EP", "UPyD"], label="Alderdi politikoa",)
|
67 |
+
testua = gr.Textbox(label="Testua")
|
68 |
+
greet_btn = gr.Button("Sortu testua")
|
69 |
+
gr.Markdown("""Aldatu konfigurazioa""")
|
70 |
+
new_token = gr.Slider(minimum=1, maximum=MAX_LENGTH, value=30, label="Luzera", info="Zenbat token berri sortuko diren.")
|
71 |
+
confi = gr.Radio(["Ezer", "Beam Search", "Top K", "Top P"], value="Beam Search", label="Estrategia", info="Aukeratu ze estrategia erabiliko den erantzunak hobetzeko")
|
72 |
+
ngram = gr.Slider(minimum=1, maximum=50, value=4, step=1, label="ngram kopurua", info="Bakarrik kontuan hartuko da \"Beam Search\" aukeratuta badago")
|
73 |
+
beams = gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Beam kopurua", info="Bakarrik kontuan hartuko da \"Beam Search\" aukeratuta badago")
|
74 |
+
top_k = gr.Slider(minimum=1, maximum=50, value=5, step=1, label="K-balioa", info="Bakarrik kontuan hartuko da \"Top K\" aukeratuta badago")
|
75 |
+
top_p = gr.Slider(minimum=0, maximum=1, value=0.9, step=0.01, label="P-balioa", info="Bakarrik kontuan hartuko da \"Top P\" aukeratuta badago")
|
76 |
+
with gr.Column(scale=3, min_width=200):
|
77 |
+
output = gr.Textbox(label="Output")
|
78 |
+
|
79 |
+
greet_btn.click(fn=sortu_testua, inputs=[alderdia, testua, new_token, confi, ngram, beams, top_k, top_p], outputs=output, api_name="sortu_testua")
|
80 |
+
greet_btn2.click(fn=sortu_testu_guztiak, inputs=[testua2, new_token2, confi2, ngram2, beams2, top_k2, top_p2], outputs=[outputEAJ, outputBildu, outputPP, outputPSE, outputEP, outputUPyD], api_name="sortu_testu_guztiak")
|
81 |
+
|
82 |
+
|
83 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
torch
|