XufengDuan commited on
Commit
3f72150
β€’
1 Parent(s): fb7f810

update scripts

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
src/backend/model_operations.py CHANGED
@@ -43,7 +43,7 @@ logging.basicConfig(level=logging.INFO,
43
  # Load spacy model for word tokenization
44
  nlp = spacy.load("en_core_web_sm")
45
  nlp1 = spacy.load("en_core_web_trf")
46
- os.environ["HUGGINGFACE_API_KEY"] = envs.TOKEN
47
 
48
  def load_evaluation_model(model_path):
49
  """Load the evaluation model from the given path
@@ -173,7 +173,7 @@ class SummaryGenerator:
173
  # print(ID, q_ID, prompt_value)
174
  system_prompt = envs.SYSTEM_PROMPT
175
  _user_prompt = prompt_value
176
- for ii in range(10):
177
  # user_prompt = f"{envs.USER_PROMPT}\nPassage:\n{_source}"
178
  while True:
179
  try:
@@ -405,20 +405,27 @@ class SummaryGenerator:
405
  # max_tokens=1024,
406
  # api_base= "https://api-inference.huggingface.co/models/" + self.model_id,
407
  # )
408
- self.model_id = 'command-r-plus' if 'command' in self.model_id else self.model_id
409
- response = litellm.completion(
410
- model="huggingface/" + self.model_id,
411
- # mistralai/Mistral-7B-Instruct-v0.1",
412
- messages=[{"role": "system", "content": system_prompt},
413
- {"role": "user", "content": user_prompt}],
414
- #temperature=0.0,
415
- max_tokens=1024,
416
- api_base="https://api-inference.huggingface.co/models/" + self.model_id)
417
- print("ζ¨‘εž‹θΏ”ε›žη»“ζžœ",response)
418
- print("ζ¨‘εž‹θΏ”ε›žη»“ζžœη»“ζŸ")
419
- # exit()
420
- result = response['choices'][0]['message']['content']
421
- print(result)
 
 
 
 
 
 
 
422
  return result
423
  # exit()
424
  except: # fail to call api. run it locally.
@@ -544,6 +551,11 @@ class EvaluationModel:
544
  output.append("Other")
545
  continue
546
  rs = summaries_df["Response"][i].strip().lower()
 
 
 
 
 
547
  '''Exp1'''
548
  if summaries_df["Experiment"][i] == "E1":
549
  print("E1", rs)
@@ -864,13 +876,22 @@ class EvaluationModel:
864
  output.append("Other")
865
 
866
  '''Exp4'''
867
-
868
  elif summaries_df["Experiment"][i] == "E4":
869
- # rs = summaries_df["Response"][i].strip()
870
- meaning_word = rs.split(";")[4].replace(" ",'')
 
 
 
 
 
 
 
 
871
  target = summaries_df["Factor 2"][i].strip().lower()
872
  pair = target + "_" + meaning_word
873
  print("E4:", pair)
 
874
  if pair in wordpair2code.keys():
875
  output.append(wordpair2code[pair])
876
  else:
@@ -1068,7 +1089,7 @@ class EvaluationModel:
1068
  float: The average JS divergence across all common Question_IDs.
1069
  """
1070
  # Load the datasets
1071
- human_df = pd.read_excel(file_path_1)
1072
  llm_df = pd.read_csv(file_path_2)
1073
 
1074
  def create_e5_entries(df):
@@ -1146,7 +1167,7 @@ class EvaluationModel:
1146
  return avg_js_divergence
1147
 
1148
 
1149
- def evaluate_humanlike(self, summaries_df, human_data_path, result_save_path):
1150
  '''
1151
  evaluate humanlike score
1152
  1. code the result
@@ -1156,8 +1177,8 @@ class EvaluationModel:
1156
  '''coding human data'''
1157
  # self.huamn_df = pd.read_csv(human_data_path)
1158
  # self.data = self.code_results(self.huamn_df)
1159
- save_path = human_data_path.replace('.csv','_coding.csv')
1160
- human_save_path = "./src/datasets/coding_human.xlsx"
1161
  # if save_path is not None:
1162
  # print(f'Save human coding results to {save_path}')
1163
  # fpath = Path(save_path)
@@ -1175,7 +1196,7 @@ class EvaluationModel:
1175
  self.llm_df.to_csv(fpath)
1176
  # file_path_1 = '/Users/simon/Downloads/coding_human.xlsx'
1177
  # file_path_2 = '/Users/simon/Downloads/Meta-Llama-3.1-70B-Instruct_coding.csv'
1178
- avg_js_divergence = self.calculate_js_divergence("./src/datasets/coding_human.xlsx", save_path)
1179
 
1180
  return avg_js_divergence
1181
 
 
43
  # Load spacy model for word tokenization
44
  nlp = spacy.load("en_core_web_sm")
45
  nlp1 = spacy.load("en_core_web_trf")
46
+ # os.environ["HUGGINGFACE_API_KEY"] = envs.TOKEN
47
 
48
  def load_evaluation_model(model_path):
49
  """Load the evaluation model from the given path
 
173
  # print(ID, q_ID, prompt_value)
174
  system_prompt = envs.SYSTEM_PROMPT
175
  _user_prompt = prompt_value
176
+ for ii in range(1):
177
  # user_prompt = f"{envs.USER_PROMPT}\nPassage:\n{_source}"
178
  while True:
179
  try:
 
405
  # max_tokens=1024,
406
  # api_base= "https://api-inference.huggingface.co/models/" + self.model_id,
407
  # )
408
+ # self.model_id = 'command-r-plus' if 'command' in self.model_id else self.model_id
409
+ # response = litellm.completion(
410
+ # model="huggingface/" + self.model_id,
411
+ # # mistralai/Mistral-7B-Instruct-v0.1",
412
+ # messages=[{"role": "system", "content": system_prompt},
413
+ # {"role": "user", "content": user_prompt}],
414
+ # #temperature=0.0,
415
+ # max_tokens=1024,
416
+ # api_base="https://api-inference.huggingface.co/models/" + self.model_id)
417
+ # print("ζ¨‘εž‹θΏ”ε›žη»“ζžœ",response)
418
+ # print("ζ¨‘εž‹θΏ”ε›žη»“ζžœη»“ζŸ")
419
+ # # exit()
420
+ # result = response['choices'][0]['message']['content']
421
+ # print(result)
422
+ from huggingface_hub import InferenceClient
423
+
424
+ client = InferenceClient(self.model_id,api_key=envs.TOKEN)
425
+ messages = [{"role": "system", "content": system_prompt},{"role": "user", "content": user_prompt}]
426
+ outputs = client.chat_completion(messages, max_tokens=50)
427
+ result = outputs['choices'][0]['message']['content']
428
+
429
  return result
430
  # exit()
431
  except: # fail to call api. run it locally.
 
551
  output.append("Other")
552
  continue
553
  rs = summaries_df["Response"][i].strip().lower()
554
+ sentences = rs.split('\n')
555
+ sentences = [sentence.split(':', 1)[-1].strip() if ':' in sentence else sentence
556
+ for sentence in sentences]
557
+ rs = [sentence.strip() for sentence in sentences if sentence.strip()]
558
+
559
  '''Exp1'''
560
  if summaries_df["Experiment"][i] == "E1":
561
  print("E1", rs)
 
876
  output.append("Other")
877
 
878
  '''Exp4'''
879
+
880
  elif summaries_df["Experiment"][i] == "E4":
881
+ try:
882
+ meaning_word = rs.split(";")[4].replace(" ", '')
883
+ except IndexError:
884
+ output.append("Other")
885
+ continue
886
+ except Exception as e:
887
+ print(f"Unexpected error: {e}")
888
+ output.append("Other")
889
+ continue
890
+
891
  target = summaries_df["Factor 2"][i].strip().lower()
892
  pair = target + "_" + meaning_word
893
  print("E4:", pair)
894
+
895
  if pair in wordpair2code.keys():
896
  output.append(wordpair2code[pair])
897
  else:
 
1089
  float: The average JS divergence across all common Question_IDs.
1090
  """
1091
  # Load the datasets
1092
+ human_df = pd.read_csv(file_path_1, encoding='ISO-8859-1')
1093
  llm_df = pd.read_csv(file_path_2)
1094
 
1095
  def create_e5_entries(df):
 
1167
  return avg_js_divergence
1168
 
1169
 
1170
+ def evaluate_humanlike(self, summaries_df: object, human_data_path: object, result_save_path: object) -> object:
1171
  '''
1172
  evaluate humanlike score
1173
  1. code the result
 
1177
  '''coding human data'''
1178
  # self.huamn_df = pd.read_csv(human_data_path)
1179
  # self.data = self.code_results(self.huamn_df)
1180
+ #save_path = human_data_path.replace('.csv','_coding.csv')
1181
+ #human_save_path = "./src/datasets/coding_human.xlsx"
1182
  # if save_path is not None:
1183
  # print(f'Save human coding results to {save_path}')
1184
  # fpath = Path(save_path)
 
1196
  self.llm_df.to_csv(fpath)
1197
  # file_path_1 = '/Users/simon/Downloads/coding_human.xlsx'
1198
  # file_path_2 = '/Users/simon/Downloads/Meta-Llama-3.1-70B-Instruct_coding.csv'
1199
+ avg_js_divergence = self.calculate_js_divergence(human_data_path, save_path)
1200
 
1201
  return avg_js_divergence
1202
 
src/backend/util.py CHANGED
@@ -61,7 +61,7 @@ def format_results(model_name: str, revision: str, precision: str,
61
  },
62
  "results": {
63
  "hallucination_rate": {
64
- "hallucination_rate": round(hallucination_rate,1)
65
  },
66
  "factual_consistency_rate": {
67
  "factual_consistency_rate": round(factual_consistency_rate,1)
 
61
  },
62
  "results": {
63
  "hallucination_rate": {
64
+ "hallucination_rate": round(hallucination_rate,3)
65
  },
66
  "factual_consistency_rate": {
67
  "factual_consistency_rate": round(factual_consistency_rate,1)
src/envs.py CHANGED
@@ -36,7 +36,7 @@ API = HfApi(token=TOKEN)
36
  DATASET_PATH = "./src/datasets/Material_Llama2_0603.xlsx" #experiment data
37
  PROMPT_PATH = "./src/datasets/prompt.xlsx" #prompt for each experiment
38
  HEM_PATH = 'vectara/hallucination_evaluation_model'
39
- HUMAN_DATA = "./src/datasets/human_data.csv" #experiment data
40
  ITEM_4_DATA = "./src/datasets/associataion_dataset.csv" #database
41
  ITEM_5_DATA = "./src/datasets/Items_5.csv" #experiment 5 need verb words
42
 
 
36
  DATASET_PATH = "./src/datasets/Material_Llama2_0603.xlsx" #experiment data
37
  PROMPT_PATH = "./src/datasets/prompt.xlsx" #prompt for each experiment
38
  HEM_PATH = 'vectara/hallucination_evaluation_model'
39
+ HUMAN_DATA = "./src/datasets/human_data_coding.csv" #experiment data
40
  ITEM_4_DATA = "./src/datasets/associataion_dataset.csv" #database
41
  ITEM_5_DATA = "./src/datasets/Items_5.csv" #experiment 5 need verb words
42