Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 2,431 Bytes
2c24f05 150bb15 2c24f05 156ef43 58b9de9 156ef43 58b9de9 156ef43 58b9de9 156ef43 58b9de9 156ef43 58b9de9 156ef43 d7b7dc6 156ef43 404587d 58b9de9 404587d 58b9de9 d7b7dc6 58b9de9 150bb15 58b9de9 404587d 150bb15 404587d 58b9de9 150bb15 58b9de9 150bb15 58b9de9 d7b7dc6 58b9de9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
def is_summary_valid(summary: str) -> bool:
"""
Checks if the summary is valid.
A summary is valid if it is not empty and contains at least five words.
Args:
summary (str): The summary to check.
Returns:
bool: True if the summary is valid, False otherwise.
"""
if isinstance(summary, str):
words = summary.split()
if len(words) >= 5:
return True
# print(summary)
return False
def create_pairs(df):
"""
Creates pairs of source and summary from the dataframe.
Args:
df (DataFrame): The dataframe containing source and summary columns.
Returns:
list: A list of pairs [source, summary].
"""
pairs = []
for _, row in df.iterrows():
pairs.append([row['source'], row['summary']])
return pairs
def format_results(model_name: str, revision: str, precision: str,
factual_consistency_rate: float, hallucination_rate: float,
answer_rate: float, avg_summary_len: float) -> dict:
"""
Formats the evaluation results into a structured dictionary.
Args:
model_name (str): The name of the evaluated model.
revision (str): The revision hash of the model.
precision (str): The precision with which the evaluation was run.
factual_consistency_rate (float): The factual consistency rate.
hallucination_rate (float): The hallucination rate.
answer_rate (float): The answer rate.
avg_summary_len (float): The average summary length.
Returns:
dict: A dictionary containing the structured evaluation results.
"""
results = {
"config": {
"model_dtype": precision, # Precision with which you ran the evaluation
"model_name": model_name, # Name of the model
"model_sha": revision # Hash of the model
},
"results": {
"hallucination_rate": {
"hallucination_rate": round(hallucination_rate,1)
},
"factual_consistency_rate": {
"factual_consistency_rate": round(factual_consistency_rate,1)
},
"answer_rate": {
"answer_rate": round(answer_rate*100,1)
},
"average_summary_length": {
"average_summary_length": round(avg_summary_len,1)
},
}
}
return results
|