Spaces:
Runtime error
Runtime error
| import nltk | |
| from nltk.translate.meteor_score import meteor_score | |
| from nltk.translate.bleu_score import corpus_bleu | |
| from indicnlp.tokenize import indic_tokenize | |
| import pandas as pd | |
| import numpy as np | |
| def configure_nltk(): | |
| nltk.download('punkt') | |
| nltk.download('wordnet') | |
| def tokenize_hindi(text): | |
| return indic_tokenize.trivial_tokenize(text, lang='hi') | |
| def calculate_meteor_score(ref, output): | |
| ref_tokens = tokenize_hindi(ref) | |
| output_tokens = tokenize_hindi(output) | |
| return meteor_score([ref_tokens], output_tokens) | |
| def calculate_bleu_score(ref, output): | |
| ref_tokens = [tokenize_hindi(ref)] | |
| output_tokens = tokenize_hindi(output) | |
| return corpus_bleu(ref_tokens, [output_tokens]) | |
| def read_hindi_data(filepath): | |
| hindi_df = pd.read_csv(filepath) | |
| hindi_df = hindi_df.replace('नेन', np.nan) | |
| hindi_df = hindi_df.dropna() | |
| return hindi_df | |
| def create_reference_df(hindi_df): | |
| reference_df = pd.DataFrame() | |
| for i in range(1, 3): | |
| reference_df[f'Question{i}'] = hindi_df[f'Question{i}'] | |
| reference_df[f'Answer{i}'] = hindi_df[f'Answer{i}'] | |
| return reference_df | |
| def select_first_n_rows(df, n=10): | |
| return df.iloc[:n] | |
| def calculate_scores(reference_df, n=10): | |
| meteor_scores = [] | |
| bleu_scores = [] | |
| for i in range(1, 3): | |
| ref_question_col = f'Question{i}' | |
| ref_answer_col = f'Answer{i}' | |
| output_question_col = f'Question{i}_hindi' | |
| output_answer_col = f'Answer{i}_hindi' | |
| for ref_question, output_question in zip(reference_df[ref_question_col], reference_df[output_question_col]): | |
| meteor_scores.append(calculate_meteor_score(ref_question, output_question)) | |
| bleu_scores.append(calculate_bleu_score(ref_question, output_question)) | |
| for ref_answer, output_answer in zip(reference_df[ref_answer_col], reference_df[output_answer_col]): | |
| meteor_scores.append(calculate_meteor_score(ref_answer, output_answer)) | |
| bleu_scores.append(calculate_bleu_score(ref_answer, output_answer)) | |
| average_meteor_score = sum(meteor_scores) / len(meteor_scores) | |
| average_bleu_score = sum(bleu_scores) / len(bleu_scores) | |
| return average_meteor_score, average_bleu_score | |
| def main(): | |
| # Configure NLTK | |
| configure_nltk() | |
| # Read Hindi data | |
| hindi_df = read_hindi_data('/content/Hindi_QnA.csv') | |
| # Create reference dataframe | |
| reference_df = create_reference_df(hindi_df) | |
| # Select first 10 rows | |
| reference_df_first10 = select_first_n_rows(reference_df) | |
| # Calculate scores | |
| average_meteor_score, average_bleu_score = calculate_scores(reference_df_first10) | |
| print("Average METEOR Score:", average_meteor_score) | |
| print("Average BLEU Score:", average_bleu_score) | |
| if __name__ == "__main__": | |
| main() | |