Spaces:

rendy-ad89
/

legal-indobert

Runtime error

App Files Files Community

rendy-ad89 commited on May 5, 2023

Commit

0f49bdc

•

1 Parent(s): 28efa8b

updated to 14 class

Browse files

Files changed (1) hide show

app.py +21 -8

app.py CHANGED Viewed

@@ -1,21 +1,35 @@
 from transformers import BertTokenizer, BertForSequenceClassification
 import torch
 import gradio as gr
-model_name='indobenchmark/indobert-base-p1'
 label_dict={'Pasal 112 UU RI No. 35 Thn 2009': 0,
- 'Pasal 363 KUHP': 1,
- 'Pasal 372 KUHP': 2,
- 'Pasal 378 KUHP': 3}
 tokenizer = BertTokenizer.from_pretrained(model_name)
 model = BertForSequenceClassification.from_pretrained(model_name,
                                                       num_labels=len(label_dict),
                                                       output_attentions=False,
                                                       output_hidden_states=False)
-model.load_state_dict(torch.load('finetuned_BERT_epoch_9.model', map_location=torch.device('cpu')))
-from transformers import TextClassificationPipeline
 pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
 def get_nth_key(dictionary, n=0):
@@ -24,11 +38,10 @@ def get_nth_key(dictionary, n=0):
     for i, key in enumerate(dictionary.keys()):
         if i == n:
             return key
-    raise IndexError("dictionary index out of range")
 def predict(text):
     predictions = pipe(text)[0]
-    print(predictions)
     max = 0
     idx = -1
     for i in range(len(predictions)):

 from transformers import BertTokenizer, BertForSequenceClassification
 import torch
 import gradio as gr
+from transformers import TextClassificationPipeline
+model_name='indolem/indobert-base-uncased'
 label_dict={'Pasal 112 UU RI No. 35 Thn 2009': 0,
+ 'Pasal 114 UU RI No. 35 Thn 2009': 1,
+ 'Pasal 111 UU RI No. 35 Thn 2009': 2,
+ 'Pasal 127 UU RI No. 35 Thn 2009': 3,
+ 'Pasal 363 KUHP': 4,
+ 'Pasal 365 KUHP': 5,
+ 'Pasal 362 KUHP': 6,
+ 'Pasal 338 KUHP': 7,
+ 'Pasal 340 KUHP': 8,
+ 'Pasal 374 KUHP': 9,
+ 'Pasal 372 KUHP': 10,
+ 'Pasal 378 KUHP': 11,
+ 'Pasal 351 KUHP': 12,
+ 'Pasal 303 KUHP': 13}
 tokenizer = BertTokenizer.from_pretrained(model_name)
 model = BertForSequenceClassification.from_pretrained(model_name,
                                                       num_labels=len(label_dict),
                                                       output_attentions=False,
                                                       output_hidden_states=False)
+torch_model = torch.load('FineTune_IndoLEM_BERT_H_Mean_Pooling_LR1E-5_BS2_epoch_9.model')
+torch_model['classifier.weight'] = torch_model.pop('out.weight')
+torch_model['classifier.bias'] = torch_model.pop('out.bias')
+model.load_state_dict(torch_model)
 pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
 def get_nth_key(dictionary, n=0):
     for i, key in enumerate(dictionary.keys()):
         if i == n:
             return key
+    raise IndexError("dictionary index out of range")
 def predict(text):
     predictions = pipe(text)[0]
     max = 0
     idx = -1
     for i in range(len(predictions)):