Cicciokr commited on
Commit
ad7b7bc
·
verified ·
1 Parent(s): 1f69fb9

Add model GPT2 to compare result

Browse files
Files changed (1) hide show
  1. app.py +13 -2
app.py CHANGED
@@ -12,19 +12,30 @@ st.write("Inserisci un testo con il token [MASK] per vedere le previsioni del mo
12
  #dvces et reges carthaginiensivm hanno et mago qui [MASK] punico bello cornelium consulem aput liparas ceperunt
13
  input_text = st.text_input("Testo:", value="Lorem ipsum dolor sit amet, [MASK] adipiscing elit.")
14
 
 
15
  #modelname = "./models/latin_bert/"
16
  #modelname = "LuisAVasquez/simple-latin-bert-uncased"
17
  modelname = "./models/bert-base-latin-uncased"
 
 
 
18
  tokenizer = AutoTokenizer.from_pretrained(modelname)
19
  model = AutoModelForMaskedLM.from_pretrained(modelname)
20
- # Creare un pipeline di riempimento maschere
21
  fill_mask = pipeline("fill-mask", model=model, tokenizer=tokenizer)
22
- #fill_mask = pipeline("fill-mask", model=modelname)
 
 
 
 
23
 
24
  if input_text:
25
  predictions = fill_mask(input_text)
26
  st.subheader("Risultati delle previsioni con Simple Latin Bert:")
27
  for pred in predictions:
28
  st.write(f"**Parola**: {pred['token_str']}, **Probabilità**: {pred['score']:.4f}, **Sequence**: {pred['sequence']}")
 
 
 
 
29
 
30
 
 
12
  #dvces et reges carthaginiensivm hanno et mago qui [MASK] punico bello cornelium consulem aput liparas ceperunt
13
  input_text = st.text_input("Testo:", value="Lorem ipsum dolor sit amet, [MASK] adipiscing elit.")
14
 
15
+ # Model based on BERT
16
  #modelname = "./models/latin_bert/"
17
  #modelname = "LuisAVasquez/simple-latin-bert-uncased"
18
  modelname = "./models/bert-base-latin-uncased"
19
+ # Model based on GPT 2
20
+ modelname_gpt = "itserr/scratch_2-nodes_tokenizer_latbert-original_packing_fcocchi"
21
+
22
  tokenizer = AutoTokenizer.from_pretrained(modelname)
23
  model = AutoModelForMaskedLM.from_pretrained(modelname)
 
24
  fill_mask = pipeline("fill-mask", model=model, tokenizer=tokenizer)
25
+
26
+ #Use GPT 2 to compare with BERT
27
+ tokenizer_gpt = AutoTokenizer.from_pretrained(modelname_gpt)
28
+ model_gpt = AutoModelForMaskedLM.from_pretrained(modelname_gpt)
29
+ fill_mask_gpt = pipeline("fill-mask", model=model_gpt, tokenizer=tokenizer_gpt)
30
 
31
  if input_text:
32
  predictions = fill_mask(input_text)
33
  st.subheader("Risultati delle previsioni con Simple Latin Bert:")
34
  for pred in predictions:
35
  st.write(f"**Parola**: {pred['token_str']}, **Probabilità**: {pred['score']:.4f}, **Sequence**: {pred['sequence']}")
36
+ predictions_gpt = fill_mask_gpt(input_text)
37
+ st.subheader("Risultati delle previsioni con Simple Latin GPT2:")
38
+ for pred_gpt in predictions_gpt:
39
+ st.write(f"**Parola**: {pred_gpt['token_str']}, **Probabilità**: {pred_gpt['score']:.4f}, **Sequence**: {pred_gpt['sequence']}")
40
 
41