Spaces:
Runtime error
Runtime error
import nltk | |
from nltk.translate.meteor_score import meteor_score | |
from nltk.translate.bleu_score import corpus_bleu | |
from indicnlp.tokenize import indic_tokenize | |
import pandas as pd | |
import numpy as np | |
def configure_nltk(): | |
nltk.download('punkt') | |
nltk.download('wordnet') | |
def tokenize_hindi(text): | |
return indic_tokenize.trivial_tokenize(text, lang='hi') | |
def calculate_meteor_score(ref, output): | |
ref_tokens = tokenize_hindi(ref) | |
output_tokens = tokenize_hindi(output) | |
return meteor_score([ref_tokens], output_tokens) | |
def calculate_bleu_score(ref, output): | |
ref_tokens = [tokenize_hindi(ref)] | |
output_tokens = tokenize_hindi(output) | |
return corpus_bleu(ref_tokens, [output_tokens]) | |
def read_hindi_data(filepath): | |
hindi_df = pd.read_csv(filepath) | |
hindi_df = hindi_df.replace('नेन', np.nan) | |
hindi_df = hindi_df.dropna() | |
return hindi_df | |
def create_reference_df(hindi_df): | |
reference_df = pd.DataFrame() | |
for i in range(1, 3): | |
reference_df[f'Question{i}'] = hindi_df[f'Question{i}'] | |
reference_df[f'Answer{i}'] = hindi_df[f'Answer{i}'] | |
return reference_df | |
def select_first_n_rows(df, n=10): | |
return df.iloc[:n] | |
def calculate_scores(reference_df, n=10): | |
meteor_scores = [] | |
bleu_scores = [] | |
for i in range(1, 3): | |
ref_question_col = f'Question{i}' | |
ref_answer_col = f'Answer{i}' | |
output_question_col = f'Question{i}_hindi' | |
output_answer_col = f'Answer{i}_hindi' | |
for ref_question, output_question in zip(reference_df[ref_question_col], reference_df[output_question_col]): | |
meteor_scores.append(calculate_meteor_score(ref_question, output_question)) | |
bleu_scores.append(calculate_bleu_score(ref_question, output_question)) | |
for ref_answer, output_answer in zip(reference_df[ref_answer_col], reference_df[output_answer_col]): | |
meteor_scores.append(calculate_meteor_score(ref_answer, output_answer)) | |
bleu_scores.append(calculate_bleu_score(ref_answer, output_answer)) | |
average_meteor_score = sum(meteor_scores) / len(meteor_scores) | |
average_bleu_score = sum(bleu_scores) / len(bleu_scores) | |
return average_meteor_score, average_bleu_score | |
def main(): | |
# Configure NLTK | |
configure_nltk() | |
# Read Hindi data | |
hindi_df = read_hindi_data('/content/Hindi_QnA.csv') | |
# Create reference dataframe | |
reference_df = create_reference_df(hindi_df) | |
# Select first 10 rows | |
reference_df_first10 = select_first_n_rows(reference_df) | |
# Calculate scores | |
average_meteor_score, average_bleu_score = calculate_scores(reference_df_first10) | |
print("Average METEOR Score:", average_meteor_score) | |
print("Average BLEU Score:", average_bleu_score) | |
if __name__ == "__main__": | |
main() | |