LM-Explanation-Demo-Soft

Sleeping

App Files Files Community

JRQi commited on Sep 28, 2023

Commit

cf3bdbb

1 Parent(s): 6d0928a

Update game1.py

Browse files

Files changed (1) hide show

game1.py +36 -1

game1.py CHANGED Viewed

@@ -312,6 +312,7 @@ def interpre1(lang_selected, num_selected):
             interpretation_combined.append((text_combined, score_combinded/length))
             index_tmp += length
     print(interpretation_combined)
     res = {"original": text['text'], "interpretation": interpretation_combined}
     # pos = []
@@ -408,15 +409,49 @@ def func1_written(text_written, human_predict, lang_written):
     if lang_written == "Dutch":
         sentiment_classifier = pipeline("text-classification", model='DTAI-KULeuven/robbert-v2-dutch-sentiment', return_all_scores=True, device=device)
     else:
         sentiment_classifier = pipeline("text-classification", model='distilbert-base-uncased-finetuned-sst-2-english', return_all_scores=True, device=device)
     explainer = shap.Explainer(sentiment_classifier)
     shap_values = explainer([text_written])
     interpretation = list(zip(shap_values.data[0], shap_values.values[0, :, 1]))
-    res = {"original": text_written, "interpretation": interpretation}
     print(res)
     return res, ai_predict, chatbot

             interpretation_combined.append((text_combined, score_combinded/length))
             index_tmp += length
+    interpretation_combined.append(('', 0.0))
     print(interpretation_combined)
     res = {"original": text['text'], "interpretation": interpretation_combined}
     # pos = []
     if lang_written == "Dutch":
         sentiment_classifier = pipeline("text-classification", model='DTAI-KULeuven/robbert-v2-dutch-sentiment', return_all_scores=True, device=device)
+        tokenizer = AutoTokenizer.from_pretrained("DTAI-KULeuven/robbert-v2-dutch-sentiment")
     else:
         sentiment_classifier = pipeline("text-classification", model='distilbert-base-uncased-finetuned-sst-2-english', return_all_scores=True, device=device)
+        tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
     explainer = shap.Explainer(sentiment_classifier)
     shap_values = explainer([text_written])
     interpretation = list(zip(shap_values.data[0], shap_values.values[0, :, 1]))
+    encodings = tokenizer(text_written, return_offsets_mapping=True)
+    print(encodings['offset_mapping'])
+    is_subword = [False, False]
+    for i in range(2, len(encodings['offset_mapping'])):
+        if encodings['offset_mapping'][i][0] == encodings['offset_mapping'][i-1][1]:
+            is_subword.append(True)
+        else:
+            is_subword.append(False)
+    print(is_subword)
+    interpretation_combined = []
+    index_tmp = 0
+    while index_tmp < (len(interpretation) - 1):
+        if not is_subword[index_tmp+1]:
+            interpretation_combined.append(interpretation[index_tmp])
+            index_tmp += 1
+        else:
+            text_combined = interpretation[index_tmp][0]
+            score_combinded = interpretation[index_tmp][1]
+            length = 1
+            while is_subword[index_tmp+length]:
+                text_combined += interpretation[index_tmp+length][0]
+                score_combinded += interpretation[index_tmp+length][1]
+                length += 1
+            interpretation_combined.append((text_combined, score_combinded/length))
+            index_tmp += length
+    interpretation_combined.append(('', 0.0))
+    print(interpretation_combined)
+    res = {"original": text_written, "interpretation": interpretation_combined}
     print(res)
     return res, ai_predict, chatbot