Spaces:

vonewman
/

ner_app

Runtime error

App Files Files Community

vonewman commited on Oct 29, 2023

Commit

3f67191

1 Parent(s): e12504f

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -40

app.py CHANGED Viewed

@@ -27,21 +27,14 @@ def load_model():
     return trainer, model, tokenizer
 def align_word_ids(texts):
     trainer, model, tokenizer = load_model()
     tokenized_inputs = tokenizer(texts, padding='max_length', max_length=218, truncation=True)
     word_ids = tokenized_inputs.word_ids()
     previous_word_idx = None
     label_ids = []
     for word_idx in word_ids:
         if word_idx is None:
             label_ids.append(-100)
         elif word_idx != previous_word_idx:
             try:
                 label_ids.append(1)
@@ -53,54 +46,30 @@ def align_word_ids(texts):
             except:
                 label_ids.append(-100)
         previous_word_idx = word_idx
     return label_ids
 def predict_ner_labels(model, tokenizer, sentence):
     use_cuda = torch.cuda.is_available()
     device = torch.device("cuda" if use_cuda else "cpu")
     if use_cuda:
         model = model.cuda()
     text = tokenizer(sentence, padding='max_length', max_length=218, truncation=True, return_tensors="pt")
     mask = text['attention_mask'].to(device)
     input_id = text['input_ids'].to(device)
     label_ids = torch.Tensor(align_word_ids(sentence)).unsqueeze(0).to(device)
     logits = model(input_id, mask, None)
     logits_clean = logits[0][label_ids != -100]
     predictions = logits_clean.argmax(dim=1).tolist()
     prediction_label = [id2tag[i] for i in predictions]
     return prediction_label
 id2tag = {0: 'O', 1: 'B-LOC', 2: 'B-PER', 3: 'I-PER', 4: 'B-ORG', 5: 'I-DATE', 6: 'B-DATE', 7: 'I-ORG', 8: 'I-LOC'}
 def tag_sentence(text):
     trainer, model, tokenizer = load_model()
-    # Utilisez votre modèle pour prédire les tags
     predictions = predict_ner_labels(model, tokenizer, text)
-    # Créez un DataFrame avec les colonnes "word" et "tag"
-    df = pd.DataFrame({'word': text.split(), 'tag': predictions})
-    # Remplacez les étiquettes par des valeurs numériques
-    df['tag'] = df['tag'].map(id2tag)
-    # Appliquez une mise en forme conditionnelle pour colorier les tags dans le texte
-    def color_tags(tag):
-        if tag == 'O':
-            return ''
-        else:
-            return 'color: blue'
-    df['word'] = df.apply(lambda row: f'<span style="{color_tags(row["tag"])}">{row["word"]}</span>', axis=1)
     return df
 st.title("📘 Named Entity Recognition Wolof")
@@ -117,11 +86,8 @@ if submit_button:
     else:
         st.markdown("### Tagged Sentence")
         st.header("")
         results = tag_sentence(x1)
         cs, c1, c2, c3, cLast = st.columns([0.75, 1.5, 1.5, 1.5, 0.75])
         with c1:
             csvbutton = st.download_button(label="📥 Download .csv", data=convert_df(results),
                                            file_name="results.csv", mime='text/csv', key='csv')
@@ -131,13 +97,10 @@ if submit_button:
         with c3:
             jsonbutton = st.download_button(label="📥 Download .json", data=convert_json(results),
                                             file_name="results.json", mime='application/json', key='json')
         st.header("")
         c1, c2, c3 = st.columns([1, 3, 1])
         with c2:
-            st.write(results.to_html(escape=False), unsafe_allow_html=True)
 st.header("")
 st.header("")

     return trainer, model, tokenizer
 def align_word_ids(texts):
     trainer, model, tokenizer = load_model()
     tokenized_inputs = tokenizer(texts, padding='max_length', max_length=218, truncation=True)
     word_ids = tokenized_inputs.word_ids()
     previous_word_idx = None
     label_ids = []
     for word_idx in word_ids:
         if word_idx is None:
             label_ids.append(-100)
         elif word_idx != previous_word_idx:
             try:
                 label_ids.append(1)
             except:
                 label_ids.append(-100)
         previous_word_idx = word_idx
     return label_ids
 def predict_ner_labels(model, tokenizer, sentence):
     use_cuda = torch.cuda.is_available()
     device = torch.device("cuda" if use_cuda else "cpu")
     if use_cuda:
         model = model.cuda()
     text = tokenizer(sentence, padding='max_length', max_length=218, truncation=True, return_tensors="pt")
     mask = text['attention_mask'].to(device)
     input_id = text['input_ids'].to(device)
     label_ids = torch.Tensor(align_word_ids(sentence)).unsqueeze(0).to(device)
     logits = model(input_id, mask, None)
     logits_clean = logits[0][label_ids != -100]
     predictions = logits_clean.argmax(dim=1).tolist()
     prediction_label = [id2tag[i] for i in predictions]
     return prediction_label
 id2tag = {0: 'O', 1: 'B-LOC', 2: 'B-PER', 3: 'I-PER', 4: 'B-ORG', 5: 'I-DATE', 6: 'B-DATE', 7: 'I-ORG', 8: 'I-LOC'}
 def tag_sentence(text):
     trainer, model, tokenizer = load_model()
     predictions = predict_ner_labels(model, tokenizer, text)
+    # Créez un DataFrame avec les colonnes "words" et "tags"
+    df = pd.DataFrame({'words': text.split(), 'tags': predictions})
     return df
 st.title("📘 Named Entity Recognition Wolof")
     else:
         st.markdown("### Tagged Sentence")
         st.header("")
         results = tag_sentence(x1)
         cs, c1, c2, c3, cLast = st.columns([0.75, 1.5, 1.5, 1.5, 0.75])
         with c1:
             csvbutton = st.download_button(label="📥 Download .csv", data=convert_df(results),
                                            file_name="results.csv", mime='text/csv', key='csv')
         with c3:
             jsonbutton = st.download_button(label="📥 Download .json", data=convert_json(results),
                                             file_name="results.json", mime='application/json', key='json')
         st.header("")
         c1, c2, c3 = st.columns([1, 3, 1])
         with c2:
+            st.table(results[['words', 'tags']])
 st.header("")
 st.header("")