JRQi commited on
Commit
cf3bdbb
1 Parent(s): 6d0928a

Update game1.py

Browse files
Files changed (1) hide show
  1. game1.py +36 -1
game1.py CHANGED
@@ -312,6 +312,7 @@ def interpre1(lang_selected, num_selected):
312
  interpretation_combined.append((text_combined, score_combinded/length))
313
  index_tmp += length
314
 
 
315
  print(interpretation_combined)
316
  res = {"original": text['text'], "interpretation": interpretation_combined}
317
  # pos = []
@@ -408,15 +409,49 @@ def func1_written(text_written, human_predict, lang_written):
408
 
409
  if lang_written == "Dutch":
410
  sentiment_classifier = pipeline("text-classification", model='DTAI-KULeuven/robbert-v2-dutch-sentiment', return_all_scores=True, device=device)
 
411
  else:
412
  sentiment_classifier = pipeline("text-classification", model='distilbert-base-uncased-finetuned-sst-2-english', return_all_scores=True, device=device)
 
413
 
414
  explainer = shap.Explainer(sentiment_classifier)
415
 
416
  shap_values = explainer([text_written])
417
  interpretation = list(zip(shap_values.data[0], shap_values.values[0, :, 1]))
 
418
 
419
- res = {"original": text_written, "interpretation": interpretation}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
  print(res)
421
 
422
  return res, ai_predict, chatbot
 
312
  interpretation_combined.append((text_combined, score_combinded/length))
313
  index_tmp += length
314
 
315
+ interpretation_combined.append(('', 0.0))
316
  print(interpretation_combined)
317
  res = {"original": text['text'], "interpretation": interpretation_combined}
318
  # pos = []
 
409
 
410
  if lang_written == "Dutch":
411
  sentiment_classifier = pipeline("text-classification", model='DTAI-KULeuven/robbert-v2-dutch-sentiment', return_all_scores=True, device=device)
412
+ tokenizer = AutoTokenizer.from_pretrained("DTAI-KULeuven/robbert-v2-dutch-sentiment")
413
  else:
414
  sentiment_classifier = pipeline("text-classification", model='distilbert-base-uncased-finetuned-sst-2-english', return_all_scores=True, device=device)
415
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
416
 
417
  explainer = shap.Explainer(sentiment_classifier)
418
 
419
  shap_values = explainer([text_written])
420
  interpretation = list(zip(shap_values.data[0], shap_values.values[0, :, 1]))
421
+
422
 
423
+ encodings = tokenizer(text_written, return_offsets_mapping=True)
424
+
425
+ print(encodings['offset_mapping'])
426
+ is_subword = [False, False]
427
+ for i in range(2, len(encodings['offset_mapping'])):
428
+ if encodings['offset_mapping'][i][0] == encodings['offset_mapping'][i-1][1]:
429
+ is_subword.append(True)
430
+ else:
431
+ is_subword.append(False)
432
+ print(is_subword)
433
+ interpretation_combined = []
434
+
435
+ index_tmp = 0
436
+ while index_tmp < (len(interpretation) - 1):
437
+ if not is_subword[index_tmp+1]:
438
+ interpretation_combined.append(interpretation[index_tmp])
439
+ index_tmp += 1
440
+ else:
441
+ text_combined = interpretation[index_tmp][0]
442
+ score_combinded = interpretation[index_tmp][1]
443
+ length = 1
444
+ while is_subword[index_tmp+length]:
445
+ text_combined += interpretation[index_tmp+length][0]
446
+ score_combinded += interpretation[index_tmp+length][1]
447
+ length += 1
448
+ interpretation_combined.append((text_combined, score_combinded/length))
449
+ index_tmp += length
450
+
451
+ interpretation_combined.append(('', 0.0))
452
+ print(interpretation_combined)
453
+
454
+ res = {"original": text_written, "interpretation": interpretation_combined}
455
  print(res)
456
 
457
  return res, ai_predict, chatbot