beweinreich commited on
Commit
988ba41
1 Parent(s): fc0306d

more improvements to chatgpt audit

Browse files
audits/1720097343.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ input_word,original_dictionary_word,new_dictionary_word
audits/1720097580.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ input_word,original_dictionary_word,new_dictionary_word
chatgpt_audit.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import csv
3
  import json
@@ -32,7 +33,9 @@ model_name = 'sentence-transformers/all-mpnet-base-v2'
32
  model = SentenceTransformer(model_name)
33
 
34
  def remove_misc_words(text):
35
- return text.replace('raw', '').replace('fresh', '').replace('canned', '').replace('prepared', '').replace('with', '').replace('and', '').replace('or', '').replace('in', '').replace('NFS', '').strip()
 
 
36
 
37
  def compare_embeddings(old_dictionary_word, new_dictionary_word):
38
  old_dictionary_word = remove_misc_words(old_dictionary_word)
 
1
+ import re
2
  import os
3
  import csv
4
  import json
 
33
  model = SentenceTransformer(model_name)
34
 
35
  def remove_misc_words(text):
36
+ text = text.replace('raw', '').replace('fresh', '').replace('canned', '').replace('prepared', '').replace('with', '').replace('and', '').replace('or', '').replace('in', '').replace('NFS', '').strip()
37
+ text = re.sub(r'\(.*?\)', '', text)
38
+ return text
39
 
40
  def compare_embeddings(old_dictionary_word, new_dictionary_word):
41
  old_dictionary_word = remove_misc_words(old_dictionary_word)