linguask / src /solutions /constant_predictor.py
GitHub Action
refs/heads/ci-cd/hugging-face
8b414b0
from pathlib import Path
from typing import Union
import pandas as pd
from sklearn.model_selection import train_test_split
from src.data_reader import load_train_test_df
from src.metrics import MSEMetric
from src.solutions.base_solution import BaseSolution
class ConstantPredictorSolution(BaseSolution):
def __init__(self, const=3.0):
super().__init__()
self.const = const
def fit(self, X: pd.DataFrame, y: pd.DataFrame, **kwargs) -> None:
pass
def predict(self, X: pd.DataFrame) -> pd.DataFrame:
submission_df = []
for _, row in X.iterrows():
submission_df.append({
'text_id': row.text_id,
'cohesion': self.const,
'syntax': self.const,
'vocabulary': self.const,
'phraseology': self.const,
'grammar': self.const,
'conventions': self.const
})
return pd.DataFrame(submission_df)
def save(self, directory: Union[str, Path]) -> None:
directory = Path(directory)
if not directory.exists():
directory.mkdir(parents=True)
path = directory / "weights.ckpt"
with open(path, 'w') as file:
file.write(str(self.const))
def load(self, directory: Union[str, Path]) -> None:
directory = Path(directory)
if not directory.exists():
directory.mkdir(parents=True)
path = directory / "weights.ckpt"
with open(path, 'r') as file:
self.const = float(file.read())
def to(self, device: str) -> 'BaseSolution':
return self
def main():
train_df, test_df = load_train_test_df()
predictor = ConstantPredictorSolution()
_, test_data = train_test_split(train_df, test_size=0.2)
y_pred = predictor.predict(test_data)
y_true = test_data[['text_id', 'cohesion', 'syntax', 'vocabulary', 'phraseology', 'grammar', 'conventions']]
metric = MSEMetric()
print(f"Calculation class metric: {metric.evaluate_class_rmse(y_pred, y_true)}")
print(f"Calculation class metric: {metric.evaluate_class_rmse(y_pred, y_true)}")
submission_df = predictor.predict(test_df)
submission_df.to_csv("submission.csv", index=False)
if __name__ == '__main__':
main()