Constrained-Track-Document-Bassline-Readability-Arabertv2-d3tok-reg / Constrained-Track-Document-Bassline-Readability-Arabertv2-d3tok-reg.py
| # -*- coding: utf-8 -*- | |
| """arabertv2-d3tok_Sentence_Constrained.ipynb | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1vHngBRGkcXHJIwi317DS0AsZfsXguIc0 | |
| """ | |
| # Authorize Colab to access your Google Drive | |
| from google.colab import drive | |
| drive.mount('/content/drive') | |
| import pandas as pd | |
| import numpy as np | |
| import os | |
| import torch | |
| import torch.nn as nn | |
| import zipfile | |
| from sklearn.metrics import cohen_kappa_score | |
| from torch.utils.data import Dataset as TorchDataset | |
| from transformers import ( | |
| AutoTokenizer, | |
| AutoModel, | |
| TrainingArguments, | |
| Trainer, | |
| EarlyStoppingCallback | |
| ) | |
| import gc | |
| import ast # To safely evaluate string-formatted lists | |
| import json # Added import for json | |
| # --- Hugging Face Authentication --- | |
| from huggingface_hub import login | |
| from google.colab import userdata | |
| # Log in to Hugging Face using the token stored in Colab secrets | |
| try: | |
| login(token=userdata.get('HF_TOKEN')) | |
| print("✔️ Successfully logged into Hugging Face.") | |
| except (NameError, KeyError): | |
| print("⚠️ Hugging Face token not found in Colab secrets. Please add it as 'HF_TOKEN'.") | |
| # Fallback for local execution or if login() is preferred manually | |
| login() | |
| # --- Model & Training --- | |
| MODEL_NAME = "CAMeL-Lab/readability-arabertv2-d3tok-reg" | |
| NUM_LABELS = 1 | |
| TARGET_CLASSES = 19 | |
| NUM_FEATURES = 7 | |
| # --- IMPORTANT: Set the path to your project folder on Google Drive --- | |
| PROJECT_DRIVE_PATH = '/content/drive/MyDrive/BAREC_Competition' | |
| # --- File & Directory Paths (Now relative to your Google Drive) --- | |
| BASE_DIR = PROJECT_DRIVE_PATH | |
| PROCESSED_DATA_DIR = os.path.join(BASE_DIR, "lex") | |
| CHECKPOINT_DIR = os.path.join(BASE_DIR, "results", f"hybrid_constrained_samer_regression_v2_{MODEL_NAME.split('/')[-1]}") | |
| SUBMISSION_DIR = os.path.join(BASE_DIR, "submission") | |
| # Ensure the output directories exist on your Google Drive | |
| os.makedirs(CHECKPOINT_DIR, exist_ok=True) | |
| os.makedirs(SUBMISSION_DIR, exist_ok=True) | |
| # Paths to the preprocessed input files on Google Drive | |
| TRAIN_PROCESSED_PATH = os.path.join(PROCESSED_DATA_DIR, 'train_processed_full.csv') | |
| DEV_PROCESSED_PATH = os.path.join(PROCESSED_DATA_DIR, 'dev_processed_full.csv') | |
| TEST_PROCESSED_PATH = os.path.join(PROCESSED_DATA_DIR, 'test_processed_full.csv') | |
| # --- Submission Paths on Google Drive --- | |
| SUBMISSION_PATH = os.path.join(SUBMISSION_DIR, "submission_hybrid_constrained_samer_regression.csv") | |
| ZIPPED_SUBMISSION_PATH = os.path.join(SUBMISSION_DIR, "submission_hybrid_constrained_samer_regression.zip") | |
| print(f"✔️ All paths configured to use Google Drive folder: {BASE_DIR}") | |
| """# 2. DATA LOADING FUNCTION""" | |
| # ===================================================================================== | |
| # 2. DATA LOADING FUNCTION | |
| # ===================================================================================== | |
| def load_preprocessed_data(): | |
| """Loads the pre-processed CSV files directly from Google Drive.""" | |
| print("\n--- Loading Preprocessed Data from Google Drive ---") | |
| try: | |
| train_df = pd.read_csv(TRAIN_PROCESSED_PATH) | |
| val_df = pd.read_csv(DEV_PROCESSED_PATH) | |
| print("Converting 'features' column from string to list...") | |
| train_df['features'] = train_df['features'].apply(ast.literal_eval) | |
| val_df['features'] = val_df['features'].apply(ast.literal_eval) | |
| # Convert labels for regression | |
| train_df['label'] = (train_df['label'].astype(int) - 1).astype(float) | |
| val_df['label'] = (val_df['label'].astype(int) - 1).astype(float) | |
| print(f"✔ Successfully loaded {len(train_df)} training and {len(val_df)} validation records.") | |
| return train_df, val_df | |
| except FileNotFoundError as e: | |
| print(f"❌ ERROR: Preprocessed file not found: {e}.") | |
| print("Please make sure your data is uploaded to the correct Google Drive folder.") | |
| return None, None | |
| except Exception as e: | |
| print(f"❌ ERROR during data loading: {e}") | |
| return None, None | |
| """# 3. MODEL, DATASET, AND METRICS DEFINITIONS""" | |
| # ===================================================================================== | |
| # 3. MODEL, DATASET, AND METRICS DEFINITIONS | |
| # ===================================================================================== | |
| class ReadabilityDataset(TorchDataset): | |
| """ | |
| Custom PyTorch Dataset for readability prediction. | |
| It takes text, extra numerical features, and optional labels. | |
| """ | |
| def __init__(self, texts, features, labels=None, tokenizer_obj=None, max_len=256): | |
| self.texts = texts | |
| self.features = features | |
| self.labels = labels | |
| self.tokenizer = tokenizer_obj | |
| self.max_len = max_len | |
| def __len__(self): | |
| return len(self.texts) | |
| def __getitem__(self, idx): | |
| text = str(self.texts[idx]) | |
| feature_vec = torch.tensor(self.features[idx], dtype=torch.float) | |
| encoding = self.tokenizer.encode_plus( | |
| text, | |
| add_special_tokens=True, | |
| max_length=self.max_len, | |
| return_token_type_ids=False, | |
| padding='max_length', | |
| truncation=True, | |
| return_attention_mask=True, | |
| return_tensors='pt', | |
| ) | |
| item = { | |
| 'input_ids': encoding['input_ids'].flatten(), | |
| 'attention_mask': encoding['attention_mask'].flatten(), | |
| 'extra_features': feature_vec | |
| } | |
| if self.labels is not None: | |
| item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float) | |
| return item | |
| class HybridRegressionModel(nn.Module): | |
| """ | |
| A hybrid model that combines a transformer base with additional numerical features. | |
| The output is a single regression value. | |
| """ | |
| def __init__(self, model_name, num_extra_features): | |
| super(HybridRegressionModel, self).__init__() | |
| self.transformer = AutoModel.from_pretrained(model_name) | |
| # The regression head takes the transformer's output + extra features | |
| self.regressor = nn.Linear(self.transformer.config.hidden_size + num_extra_features, 1) | |
| # Add the attribute expected by Hugging Face Trainer for checkpoint loading | |
| # Initialize it to None or an empty list if no keys should be ignored during saving | |
| self._keys_to_ignore_on_save = None # Or [] | |
| def forward(self, input_ids, attention_mask, extra_features, labels=None): | |
| outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask) | |
| pooler_output = outputs.pooler_output | |
| # Concatenate transformer output with extra features | |
| combined_features = torch.cat((pooler_output, extra_features), dim=1) | |
| logits = self.regressor(combined_features) | |
| loss = None | |
| if labels is not None: | |
| loss_fct = nn.MSELoss() | |
| loss = loss_fct(logits.squeeze(), labels.squeeze()) | |
| return (loss, logits) if loss is not None else logits | |
| def compute_metrics(p): | |
| """ | |
| Computes the Quadratic Weighted Kappa score for regression predictions. | |
| Predictions are rounded and clipped to be valid class labels. | |
| """ | |
| predictions, labels = p | |
| # Round predictions to nearest integer and clip to valid range [0, 18] | |
| clipped_preds = np.clip(np.round(predictions), 0, TARGET_CLASSES - 1) | |
| qwk = cohen_kappa_score(labels, clipped_preds, weights='quadratic') | |
| return {'qwk': qwk} | |
| """# 4. & 5. MAIN EXECUTION FUNCTIONS""" | |
| # ===================================================================================== | |
| # 4. & 5. MAIN EXECUTION FUNCTIONS | |
| # ===================================================================================== | |
| def main_train(): | |
| print("===== 🚀 STARTING HYBRID REGRESSION MODEL PIPELINE =====\n") | |
| print("Initializing Tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| train_df, val_df = load_preprocessed_data() | |
| if train_df is None: | |
| print("\n! Aborting script due to data loading failure.") | |
| return | |
| print("\nCreating Torch Datasets...") | |
| train_dataset = ReadabilityDataset(train_df['d3tok_text'].tolist(), train_df['features'].tolist(), train_df['label'].tolist(), tokenizer) | |
| val_dataset = ReadabilityDataset(val_df['d3tok_text'].tolist(), val_df['features'].tolist(), val_df['label'].tolist(), tokenizer) | |
| print("✔ Datasets created.") | |
| print("\nInitializing Hybrid Regression Model...") | |
| model = HybridRegressionModel(MODEL_NAME, num_extra_features=NUM_FEATURES) | |
| training_args = TrainingArguments( | |
| output_dir=CHECKPOINT_DIR, | |
| num_train_epochs=15, | |
| per_device_train_batch_size=16, | |
| per_device_eval_batch_size=64, | |
| learning_rate=3e-5, | |
| warmup_ratio=0.1, | |
| weight_decay=0.01, | |
| logging_steps=100, | |
| eval_strategy="epoch", | |
| save_strategy="epoch", | |
| load_best_model_at_end=True, | |
| metric_for_best_model="qwk", | |
| greater_is_better=True, | |
| save_total_limit=2, | |
| fp16=torch.cuda.is_available(), | |
| report_to="none" | |
| ) | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=train_dataset, | |
| eval_dataset=val_dataset, | |
| compute_metrics=compute_metrics, | |
| callbacks=[EarlyStoppingCallback(early_stopping_patience=4)] | |
| ) | |
| print("\nStarting model training... Checkpoints will be saved to Google Drive.") | |
| latest_checkpoint = None | |
| if os.path.exists(CHECKPOINT_DIR): | |
| checkpoints = [d for d in os.listdir(CHECKPOINT_DIR) if d.startswith("checkpoint-")] | |
| if checkpoints: | |
| checkpoints.sort(key=lambda x: int(x.split('-')[-1])) | |
| latest_checkpoint = os.path.join(CHECKPOINT_DIR, checkpoints[-1]) | |
| print(f"Resuming training from latest checkpoint: {latest_checkpoint}") | |
| else: | |
| print("No checkpoints found to resume training from. Starting from scratch.") | |
| else: | |
| print("Checkpoint directory not found. Starting training from scratch.") | |
| trainer.train(resume_from_checkpoint=latest_checkpoint) | |
| print("✔ Training finished.") | |
| del model, trainer, train_dataset, val_dataset, train_df, val_df | |
| gc.collect() | |
| torch.cuda.empty_cache() | |
| def main_predict(): | |
| print("\n===== 🏆 GENERATING FINAL PREDICTIONS & SUBMISSION =====\n") | |
| try: | |
| print("Initializing Tokenizer for prediction...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| print("Loading preprocessed test data from Google Drive...") | |
| test_df = pd.read_csv(TEST_PROCESSED_PATH) | |
| test_df['features'] = test_df['features'].apply(ast.literal_eval) | |
| print("\nLoading the best trained model from Google Drive checkpoints...") | |
| best_checkpoint_path = None | |
| best_qwk = -1.0 | |
| if not os.path.exists(CHECKPOINT_DIR): | |
| raise FileNotFoundError(f"Checkpoint directory not found on Google Drive: {CHECKPOINT_DIR}.") | |
| checkpoints = [d for d in os.listdir(CHECKPOINT_DIR) if d.startswith("checkpoint-")] | |
| if not checkpoints: | |
| raise FileNotFoundError(f"No checkpoint found in the results directory on Google Drive: {CHECKPOINT_DIR}.") | |
| checkpoints.sort(key=lambda x: int(x.split('-')[-1]), reverse=True) | |
| for checkpoint in checkpoints: | |
| checkpoint_path = os.path.join(CHECKPOINT_DIR, checkpoint) | |
| trainer_state_path = os.path.join(checkpoint_path, "trainer_state.json") | |
| model_file_path = os.path.join(checkpoint_path, "pytorch_model.bin") | |
| if os.path.exists(model_file_path) and os.path.exists(trainer_state_path): | |
| try: | |
| with open(trainer_state_path, 'r') as f: | |
| trainer_state = json.load(f) | |
| if 'best_metric' in trainer_state and trainer_state['best_metric'] > best_qwk: | |
| best_qwk = trainer_state['best_metric'] | |
| best_checkpoint_path = checkpoint_path | |
| print(f"Found improved eval_qwk {best_qwk} in {checkpoint_path}") | |
| except Exception as e: | |
| print(f"Warning: Could not parse trainer_state.json in {checkpoint_path}: {e}") | |
| if not best_checkpoint_path: | |
| print("Could not find best checkpoint via trainer_state.json. Falling back to the latest checkpoint with a model file.") | |
| for checkpoint in checkpoints: | |
| checkpoint_path = os.path.join(CHECKPOINT_DIR, checkpoint) | |
| if os.path.exists(os.path.join(checkpoint_path, "pytorch_model.bin")): | |
| best_checkpoint_path = checkpoint_path | |
| print(f"Using latest valid checkpoint: {best_checkpoint_path}") | |
| break | |
| if not best_checkpoint_path: | |
| raise FileNotFoundError(f"No valid checkpoint with 'pytorch_model.bin' found in: {CHECKPOINT_DIR}.") | |
| print(f"Loading model from: {best_checkpoint_path}") | |
| model = HybridRegressionModel(MODEL_NAME, num_extra_features=NUM_FEATURES) | |
| model.load_state_dict(torch.load(os.path.join(best_checkpoint_path, "pytorch_model.bin"))) | |
| trainer = Trainer(model=model, args=TrainingArguments(output_dir=CHECKPOINT_DIR)) | |
| print("Generating predictions on the test set...") | |
| # Note: No labels for the test dataset | |
| test_dataset = ReadabilityDataset(test_df['d3tok_text'].tolist(), test_df['features'].tolist(), tokenizer_obj=tokenizer) | |
| predictions = trainer.predict(test_dataset) | |
| clipped_preds = np.clip(np.round(predictions.predictions.flatten()), 0, TARGET_CLASSES - 1) | |
| test_df['Prediction'] = (clipped_preds + 1).astype(int) | |
| submission_df = test_df.rename(columns={'ID': 'id'})[['id', 'Prediction']] | |
| print(f"\nSaving prediction file to: {SUBMISSION_PATH}") | |
| submission_df.to_csv(SUBMISSION_PATH, index=False) | |
| print(f"Compressing into {os.path.basename(ZIPPED_SUBMISSION_PATH)}...") | |
| with zipfile.ZipFile(ZIPPED_SUBMISSION_PATH, 'w', zipfile.ZIP_DEFLATED) as zipf: | |
| zipf.write(SUBMISSION_PATH, arcname=os.path.basename(SUBMISSION_PATH)) | |
| print(f"✔ Submission file '{os.path.basename(ZIPPED_SUBMISSION_PATH)}' created successfully in your Drive!") | |
| except FileNotFoundError as e: | |
| print(f"❌ ERROR: File not found: {e}. Ensure training was completed and checkpoints exist.") | |
| except Exception as e: | |
| print(f"❌ An error occurred during final prediction: {e}") | |
| """# Running script""" | |
| # ===================================================================================== | |
| # 6. SCRIPT RUNNER | |
| # ===================================================================================== | |
| # Start the training process | |
| main_train() | |
| # Once training is done, generate predictions | |
| main_predict() | |
| print("\n--- ✅ All Done! Check your Google Drive for results and submission files. ---") | |
| BEST_CHECKPOINT_PATH = '/content/drive/MyDrive/BAREC_Competition/results/hybrid_constrained_samer_regression_v2_readability-arabertv2-d3tok-reg/checkpoint-42826' | |
| # --- Input & Output Directories (derived from base paths) --- | |
| PROCESSED_DATA_DIR = os.path.join(PROJECT_DRIVE_PATH, "lex") | |
| SUBMISSION_DIR = os.path.join(PROJECT_DRIVE_PATH, "submission") | |
| # Ensure the submission output directory exists | |
| os.makedirs(SUBMISSION_DIR, exist_ok=True) | |
| # --- File Paths --- | |
| TEST_PROCESSED_PATH = os.path.join(PROCESSED_DATA_DIR, 'test_processed_full.csv') | |
| SUBMISSION_PATH = os.path.join(SUBMISSION_DIR, "submission_final_prediction.csv") | |
| ZIPPED_SUBMISSION_PATH = os.path.join(SUBMISSION_DIR, "submission.zip") | |
| print(f"✔️ Configuration loaded. Model will be loaded from: {BEST_CHECKPOINT_PATH}") | |
| """# 3. MODEL AND DATASET CLASS DEFINITIONS""" | |
| # ===================================================================================== | |
| # 3. MODEL AND DATASET CLASS DEFINITIONS | |
| # ===================================================================================== | |
| class ReadabilityDataset(TorchDataset): | |
| """ | |
| Custom PyTorch Dataset for readability prediction. | |
| It takes text, extra numerical features, and optional labels. | |
| """ | |
| def __init__(self, texts, features, labels=None, tokenizer_obj=None, max_len=256): | |
| self.texts = texts | |
| self.features = features | |
| self.labels = labels | |
| self.tokenizer = tokenizer_obj | |
| self.max_len = max_len | |
| def __len__(self): | |
| return len(self.texts) | |
| def __getitem__(self, idx): | |
| text = str(self.texts[idx]) | |
| feature_vec = torch.tensor(self.features[idx], dtype=torch.float) | |
| encoding = self.tokenizer.encode_plus( | |
| text, | |
| add_special_tokens=True, | |
| max_length=self.max_len, | |
| return_token_type_ids=False, | |
| padding='max_length', | |
| truncation=True, | |
| return_attention_mask=True, | |
| return_tensors='pt', | |
| ) | |
| item = { | |
| 'input_ids': encoding['input_ids'].flatten(), | |
| 'attention_mask': encoding['attention_mask'].flatten(), | |
| 'extra_features': feature_vec | |
| } | |
| if self.labels is not None: | |
| item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float) | |
| return item | |
| class HybridRegressionModel(nn.Module): | |
| """ | |
| A hybrid model that combines a transformer base with additional numerical features. | |
| The output is a single regression value. | |
| """ | |
| def __init__(self, model_name, num_extra_features): | |
| super(HybridRegressionModel, self).__init__() | |
| self.transformer = AutoModel.from_pretrained(model_name) | |
| self.regressor = nn.Linear(self.transformer.config.hidden_size + num_extra_features, 1) | |
| def forward(self, input_ids, attention_mask, extra_features, labels=None): | |
| outputs = self.transformer(input_ids=input_ids, attention_mask=attention_mask) | |
| pooler_output = outputs.pooler_output | |
| combined_features = torch.cat((pooler_output, extra_features), dim=1) | |
| logits = self.regressor(combined_features) | |
| loss = None | |
| if labels is not None: | |
| # Using MSELoss for regression | |
| loss_fct = nn.MSELoss() | |
| loss = loss_fct(logits.squeeze(), labels.squeeze()) | |
| return (loss, logits) if loss is not None else logits | |
| """# 4. PREDICTION AND SUBMISSION SCRIPT""" | |
| # ===================================================================================== | |
| # 4. PREDICTION AND SUBMISSION SCRIPT | |
| # ===================================================================================== | |
| from safetensors.torch import load_file # Import load_file | |
| def generate_predictions(): | |
| """ | |
| Loads the trained model from the specified checkpoint, runs predictions on the | |
| test set, and saves the formatted submission file. | |
| """ | |
| print("\n===== 🏆 STARTING PREDICTION PIPELINE =====\n") | |
| try: | |
| # --- Validate Checkpoint Path --- | |
| model_weights_path = os.path.join(BEST_CHECKPOINT_PATH, 'model.safetensors') | |
| if not os.path.exists(model_weights_path): | |
| print(f"❌ ERROR: 'model.safetensors' not found at the specified path: {model_weights_path}") | |
| print("Please ensure the BEST_CHECKPOINT_PATH variable is set correctly to the directory containing the model weights.") | |
| return | |
| # --- Initialize Tokenizer --- | |
| print("1. Initializing tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| # --- Load Test Data --- | |
| print(f"2. Loading preprocessed test data from: {TEST_PROCESSED_PATH}") | |
| test_df = pd.read_csv(TEST_PROCESSED_PATH) | |
| test_df['features'] = test_df['features'].apply(ast.literal_eval) | |
| print(f" Loaded {len(test_df)} test records.") | |
| # --- Load Model from Checkpoint --- | |
| print(f"3. Loading model from checkpoint: {BEST_CHECKPOINT_PATH}") | |
| model = HybridRegressionModel(MODEL_NAME, num_extra_features=NUM_FEATURES) | |
| # Load the state dictionary from the .safetensors file | |
| state_dict = load_file(model_weights_path) | |
| # --- Inspect the state dictionary keys --- | |
| print("\n--- State Dictionary Keys ---") | |
| for key in state_dict.keys(): | |
| print(key) | |
| print("-----------------------------\n") | |
| # --- End Inspection --- | |
| # Load the state dictionary into the model | |
| model.load_state_dict(state_dict) | |
| print(" Model weights loaded successfully from model.safetensors.") | |
| # --- Initialize Trainer --- | |
| # A minimal Trainer is sufficient for making predictions | |
| trainer = Trainer(model=model, args=TrainingArguments(output_dir="./temp_results")) | |
| # --- Create Test Dataset --- | |
| print("4. Creating test dataset...") | |
| test_dataset = ReadabilityDataset( | |
| texts=test_df['d3tok_text'].tolist(), | |
| features=test_df['features'].tolist(), | |
| tokenizer_obj=tokenizer | |
| ) | |
| # --- Generate Predictions --- | |
| print("5. Generating predictions on the test set...") | |
| raw_predictions = trainer.predict(test_dataset) | |
| # Predictions are in the .predictions attribute; flatten them to a 1D array | |
| predictions_logits = raw_predictions.predictions.flatten() | |
| # --- Process Predictions --- | |
| # Round to the nearest integer, clip to the valid label range [0, 18], | |
| # and convert back to original class labels [1, 19] | |
| clipped_preds = np.clip(np.round(predictions_logits), 0, TARGET_CLASSES - 1) | |
| final_predictions = (clipped_preds + 1).astype(int) | |
| test_df['Prediction'] = final_predictions | |
| print(" Predictions generated and processed.") | |
| # --- Create and Save Submission File --- | |
| submission_df = test_df.rename(columns={'ID': 'id'})[['id', 'Prediction']] | |
| print(f"\n6. Saving final prediction CSV to: {SUBMISSION_PATH}") | |
| submission_df.to_csv(SUBMISSION_PATH, index=False) | |
| print(f"7. Compressing submission file into: {ZIPPED_SUBMISSION_PATH}") | |
| with zipfile.ZipFile(ZIPPED_SUBMISSION_PATH, 'w', zipfile.ZIP_DEFLATED) as zipf: | |
| zipf.write(SUBMISSION_PATH, arcname=os.path.basename(SUBMISSION_PATH)) | |
| print("\n--- ✅ All Done! ---") | |
| print(f"Submission file '{os.path.basename(ZIPPED_SUBMISSION_PATH)}' has been saved to your Google Drive.") | |
| print(f"Location: {SUBMISSION_DIR}") | |
| except FileNotFoundError as e: | |
| print(f"❌ ERROR: A required file was not found: {e}") | |
| print(" Please ensure all paths in the configuration section are correct.") | |
| except Exception as e: | |
| print(f"❌ An unexpected error occurred: {e}") | |
| """# 5. EXECUTE SCRIPT""" | |
| # ===================================================================================== | |
| # 5. EXECUTE SCRIPT | |
| # ===================================================================================== | |
| if __name__ == '__main__': | |
| generate_predictions() | |
| """# Results of Sentence-level Readability Assessment - Constrained on The Blind Test | |
| {'accuracy': 42.1, 'accuracy+-1': 71.6, 'avg_abs_dist': 1.2, 'qwk': 82.1, 'accuracy_7': 59.9, 'accuracy_5': 65.4, 'accuracy_3': 73.4} | |
| """ |