MarkelFe commited on
Commit
a5f5754
1 Parent(s): f4638ed

Upload the model

Browse files
Files changed (3) hide show
  1. README.md +2 -1
  2. app.py +83 -0
  3. requirements.txt +2 -0
README.md CHANGED
@@ -9,4 +9,5 @@ app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
9
  pinned: false
10
  ---
11
 
12
+ This is the space that implements the [Political Speech model](https://huggingface.co/MarkelFe/PoliticalSpeech2) made for a NLP class in university.
13
+ It uses data from the Basque Parliament, it only works in Spanish, although it may include some Basque Words.
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
+ import torch
3
+ import torch.nn
4
+ import gradio as gr
5
+ import re
6
+
7
+ # CONF
8
+ MAX_LENGTH = 1024
9
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'; print("Using:", device)
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained("MarkelFe/PoliticalSpeech2", padding_side='left')
12
+ model = AutoModelForCausalLM.from_pretrained("MarkelFe/PoliticalSpeech2").to(device)
13
+
14
+ def return_conf(max_tokens, conf, ngram, beams, top_k, top_p):
15
+ if conf == "Ezer":
16
+ options = {"max_new_tokens": max_tokens, "do_sample": False}
17
+ elif conf == "Beam Search":
18
+ options = {"no_repeat_ngram_size": ngram, "num_beams": beams, "max_new_tokens": max_tokens, "do_sample": False}
19
+ elif conf == "Top K":
20
+ options = {"top_k": top_k, "max_new_tokens": max_tokens, "do_sample": False}
21
+ elif conf == "Top P":
22
+ options = {"top_p": top_p, "max_new_tokens": max_tokens, "do_sample": False}
23
+ return options
24
+
25
+ def sortu_testua(alderdia, testua, max_tokens, conf, ngram, beams, top_k, top_p):
26
+ options = return_conf(max_tokens, conf, ngram, beams, top_k, top_p)
27
+ prompt = f"[{alderdia}] {testua}"
28
+ tokens = tokenizer(prompt, return_tensors="pt").to(device)
29
+ generation = model.generate(inputs=tokens['input_ids'], attention_mask = tokens['attention_mask'], **options)[0]
30
+ text = tokenizer.decode(generation)
31
+ print(re.split("\[(.*?)\] ", text))
32
+ return re.split("\[(.*?)\] ", text)[-1]
33
+
34
+ def sortu_testu_guztiak(testua, max_tokens, conf, ngram, beams, top_k, top_p):
35
+ options = return_conf(max_tokens, conf, ngram, beams, top_k, top_p)
36
+ prompts = [f"[\"EAJ\"] {testua}", f"[\"EH Bildu\"] {testua}", f"[\"PP\"] {testua}", f"[\"PSE-EE\"] {testua}", f"[\"EP\"] {testua}", f"[\"UPyD\"] {testua}"]
37
+ tokens = tokenizer(prompts, padding = True, return_tensors="pt").to(device)
38
+ generation = model.generate(inputs=tokens['input_ids'], attention_mask = tokens['attention_mask'], **options)
39
+ texts = tokenizer.batch_decode(generation)
40
+ texts = list(map(lambda text: re.split("\[(.*?)\] ", text)[-1], texts))
41
+ return (texts[0], texts[1], texts[2], texts[3], texts[4], texts[5])
42
+
43
+ with gr.Blocks() as demo:
44
+ with gr.Tab("Alderdi guztiak"):
45
+ with gr.Row():
46
+ with gr.Column(scale=4, min_width=400):
47
+ testua2 = gr.Textbox(label="Testua")
48
+ greet_btn2 = gr.Button("Sortu testuak")
49
+ gr.Markdown("""Aldatu konfigurazioa""")
50
+ new_token2 = gr.Slider(minimum=1, maximum=MAX_LENGTH, value=30, label="Luzera", info="Zenbat token berri sortuko diren.")
51
+ confi2 = gr.Radio(["Ezer", "Beam Search", "Top K", "Top P"], value="Beam Search", label="Estrategia", info="Aukeratu ze estrategia erabiliko den erantzunak hobetzeko")
52
+ ngram2 = gr.Slider(minimum=1, maximum=50, value=4, step=1, label="ngram kopurua", info="Bakarrik kontuan hartuko da \"Beam Search\" aukeratuta badago")
53
+ beams2 = gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Beam kopurua", info="Bakarrik kontuan hartuko da \"Beam Search\" aukeratuta badago")
54
+ top_k2 = gr.Slider(minimum=1, maximum=50, value=5, step=1, label="K-balioa", info="Bakarrik kontuan hartuko da \"Top K\" aukeratuta badago")
55
+ top_p2 = gr.Slider(minimum=0, maximum=1, value=0.9, step=0.01, label="P-balioa", info="Bakarrik kontuan hartuko da \"Top P\" aukeratuta badago")
56
+ with gr.Column(scale=3, min_width=200):
57
+ outputEAJ = gr.Textbox(label="EAJ")
58
+ outputBildu = gr.Textbox(label="EH Bildu")
59
+ outputPP = gr.Textbox(label="PP")
60
+ outputPSE = gr.Textbox(label="PSE-EE")
61
+ outputEP = gr.Textbox(label="EP")
62
+ outputUPyD = gr.Textbox(label="UPyD")
63
+ with gr.Tab("Alderdi bakarra"):
64
+ with gr.Row():
65
+ with gr.Column(scale=4, min_width=400):
66
+ alderdia = gr.Dropdown(["EAJ", "EH Bildu", "PP", "PSE-EE", "EP", "UPyD"], label="Alderdi politikoa",)
67
+ testua = gr.Textbox(label="Testua")
68
+ greet_btn = gr.Button("Sortu testua")
69
+ gr.Markdown("""Aldatu konfigurazioa""")
70
+ new_token = gr.Slider(minimum=1, maximum=MAX_LENGTH, value=30, label="Luzera", info="Zenbat token berri sortuko diren.")
71
+ confi = gr.Radio(["Ezer", "Beam Search", "Top K", "Top P"], value="Beam Search", label="Estrategia", info="Aukeratu ze estrategia erabiliko den erantzunak hobetzeko")
72
+ ngram = gr.Slider(minimum=1, maximum=50, value=4, step=1, label="ngram kopurua", info="Bakarrik kontuan hartuko da \"Beam Search\" aukeratuta badago")
73
+ beams = gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Beam kopurua", info="Bakarrik kontuan hartuko da \"Beam Search\" aukeratuta badago")
74
+ top_k = gr.Slider(minimum=1, maximum=50, value=5, step=1, label="K-balioa", info="Bakarrik kontuan hartuko da \"Top K\" aukeratuta badago")
75
+ top_p = gr.Slider(minimum=0, maximum=1, value=0.9, step=0.01, label="P-balioa", info="Bakarrik kontuan hartuko da \"Top P\" aukeratuta badago")
76
+ with gr.Column(scale=3, min_width=200):
77
+ output = gr.Textbox(label="Output")
78
+
79
+ greet_btn.click(fn=sortu_testua, inputs=[alderdia, testua, new_token, confi, ngram, beams, top_k, top_p], outputs=output, api_name="sortu_testua")
80
+ greet_btn2.click(fn=sortu_testu_guztiak, inputs=[testua2, new_token2, confi2, ngram2, beams2, top_k2, top_p2], outputs=[outputEAJ, outputBildu, outputPP, outputPSE, outputEP, outputUPyD], api_name="sortu_testu_guztiak")
81
+
82
+
83
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers
2
+ torch