|
from pathlib import Path |
|
from typing import Union |
|
|
|
import pandas as pd |
|
from sklearn.model_selection import train_test_split |
|
|
|
from src.data_reader import load_train_test_df |
|
from src.metrics import MSEMetric |
|
from src.solutions.base_solution import BaseSolution |
|
|
|
|
|
class ConstantPredictorSolution(BaseSolution): |
|
def __init__(self, const=3.0): |
|
super().__init__() |
|
self.const = const |
|
|
|
def fit(self, X: pd.DataFrame, y: pd.DataFrame, **kwargs) -> None: |
|
pass |
|
|
|
def predict(self, X: pd.DataFrame) -> pd.DataFrame: |
|
submission_df = [] |
|
|
|
for _, row in X.iterrows(): |
|
submission_df.append({ |
|
'text_id': row.text_id, |
|
'cohesion': self.const, |
|
'syntax': self.const, |
|
'vocabulary': self.const, |
|
'phraseology': self.const, |
|
'grammar': self.const, |
|
'conventions': self.const |
|
}) |
|
|
|
return pd.DataFrame(submission_df) |
|
|
|
def save(self, directory: Union[str, Path]) -> None: |
|
directory = Path(directory) |
|
if not directory.exists(): |
|
directory.mkdir(parents=True) |
|
|
|
path = directory / "weights.ckpt" |
|
with open(path, 'w') as file: |
|
file.write(str(self.const)) |
|
|
|
def load(self, directory: Union[str, Path]) -> None: |
|
directory = Path(directory) |
|
if not directory.exists(): |
|
directory.mkdir(parents=True) |
|
|
|
path = directory / "weights.ckpt" |
|
with open(path, 'r') as file: |
|
self.const = float(file.read()) |
|
|
|
def to(self, device: str) -> 'BaseSolution': |
|
return self |
|
|
|
|
|
def main(): |
|
train_df, test_df = load_train_test_df() |
|
|
|
predictor = ConstantPredictorSolution() |
|
|
|
_, test_data = train_test_split(train_df, test_size=0.2) |
|
y_pred = predictor.predict(test_data) |
|
|
|
y_true = test_data[['text_id', 'cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar', 'conventions']] |
|
metric = MSEMetric() |
|
|
|
print(f"Calculation class metric: {metric.evaluate_class_rmse(y_pred, y_true)}") |
|
print(f"Calculation class metric: {metric.evaluate_class_rmse(y_pred, y_true)}") |
|
|
|
submission_df = predictor.predict(test_df) |
|
|
|
submission_df.to_csv("submission.csv", index=False) |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|