Tymec commited on
Commit
b43b167
2 Parent(s): 85ac990 391bd16

Merge branch 'master' of https://github.com/Tymec/projekt-psi

Browse files
Files changed (4) hide show
  1. README.md +4 -0
  2. app/model/__init__.py +0 -0
  3. app/model/base.py +49 -0
  4. app/model/tfid_lr.py +35 -0
README.md CHANGED
@@ -12,6 +12,10 @@ Sentiment Analysis
12
  - [IMDb](https://www.kaggle.com/datasets/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews)
13
  - [Amazon Reviews](https://www.kaggle.com/datasets/bittlingmayer/amazonreviews)
14
 
 
 
 
 
15
  ### TODO
16
  - [ ] CLI using `click` (commands: predict, train, evaluate) with settings set via flags or environment variables
17
  - [ ] GUI using `gradio` (tabs: predict, train, evaluate, compare, settings)
 
12
  - [IMDb](https://www.kaggle.com/datasets/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews)
13
  - [Amazon Reviews](https://www.kaggle.com/datasets/bittlingmayer/amazonreviews)
14
 
15
+ ### Required tools
16
+ - `just`
17
+ - `poetry`
18
+
19
  ### TODO
20
  - [ ] CLI using `click` (commands: predict, train, evaluate) with settings set via flags or environment variables
21
  - [ ] GUI using `gradio` (tabs: predict, train, evaluate, compare, settings)
app/model/__init__.py ADDED
File without changes
app/model/base.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import TYPE_CHECKING
5
+
6
+ import joblib
7
+
8
+ if TYPE_CHECKING:
9
+ from pathlib import Path
10
+
11
+ from sklearn.pipeline import Pipeline
12
+
13
+
14
+ class Model(ABC):
15
+ """Base class for all models"""
16
+
17
+ @property
18
+ @abstractmethod
19
+ def pipeline(self) -> Pipeline:
20
+ """Pipeline used for the model"""
21
+ ...
22
+
23
+ @property
24
+ @abstractmethod
25
+ def description(self) -> str:
26
+ """Description of the architecture"""
27
+ ...
28
+
29
+ @abstractmethod
30
+ def _predict(self, text: str) -> int:
31
+ """Predict the sentiment of the given text"""
32
+ ...
33
+
34
+ @staticmethod
35
+ def from_file(path: Path) -> Model:
36
+ """Load the model from the given file"""
37
+ return joblib.load(path)
38
+
39
+ def to_file(self, path: Path) -> None:
40
+ """Save the model to the given file"""
41
+ joblib.dump(self, path)
42
+
43
+ def predict(self, text: str) -> int:
44
+ """Perform sentiment analysis on the given text"""
45
+ return self._predict(text)
46
+
47
+ def train(self, x: list[str], y: list[int]) -> None:
48
+ """Train the model on the given data"""
49
+ self.pipeline.fit(x, y)
app/model/tfid_lr.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
4
+ from sklearn.linear_model import LogisticRegression
5
+ from sklearn.pipeline import Pipeline
6
+
7
+ from .base import Model
8
+
9
+
10
+ class TfidfLR(Model):
11
+ """Sentiment analysis model using TF-IDF and Logistic Regression"""
12
+
13
+ def __init__(self):
14
+ self._pipeline = Pipeline(
15
+ [
16
+ (
17
+ "vectorize",
18
+ CountVectorizer(stop_words="english", ngram_range=(1, 2), max_features=10000),
19
+ ),
20
+ ("tfidf", TfidfTransformer()),
21
+ ("clf", LogisticRegression(max_iter=1000, random_state=self.rng)),
22
+ ],
23
+ memory=self.cache,
24
+ )
25
+
26
+ @property
27
+ def pipeline(self) -> Pipeline:
28
+ return self._pipeline
29
+
30
+ @property
31
+ def description(self) -> str:
32
+ return "TF-IDF with Logistic Regression"
33
+
34
+ def _predict(self, text: str) -> int:
35
+ return self.pipeline.predict([text])[0]