Spaces:
Sleeping
Sleeping
Slight optimizations
Browse files- app/data.py +7 -7
- app/model.py +1 -1
- justfile +2 -0
app/data.py
CHANGED
@@ -93,19 +93,19 @@ def load_amazonreviews(merge: bool = True) -> tuple[list[str], list[int]]:
|
|
93 |
raise FileNotFoundError(msg)
|
94 |
|
95 |
# Load the datasets
|
|
|
96 |
with bz2.BZ2File(AMAZONREVIEWS_PATH[1]) as train_file:
|
97 |
-
|
98 |
|
99 |
-
test_data = []
|
100 |
if merge:
|
101 |
with bz2.BZ2File(AMAZONREVIEWS_PATH[0]) as test_file:
|
102 |
-
|
103 |
-
|
104 |
-
# Merge the datasets
|
105 |
-
data = train_data + test_data
|
106 |
|
107 |
# Split the data into labels and text
|
108 |
-
labels, texts = zip(*(line.split(" ", 1) for line in
|
|
|
|
|
|
|
109 |
|
110 |
# Map sentiment values
|
111 |
sentiments = [int(label.split("__label__")[1]) - 1 for label in labels]
|
|
|
93 |
raise FileNotFoundError(msg)
|
94 |
|
95 |
# Load the datasets
|
96 |
+
dataset = []
|
97 |
with bz2.BZ2File(AMAZONREVIEWS_PATH[1]) as train_file:
|
98 |
+
dataset.extend([line.decode("utf-8") for line in train_file])
|
99 |
|
|
|
100 |
if merge:
|
101 |
with bz2.BZ2File(AMAZONREVIEWS_PATH[0]) as test_file:
|
102 |
+
dataset.extend([line.decode("utf-8") for line in test_file])
|
|
|
|
|
|
|
103 |
|
104 |
# Split the data into labels and text
|
105 |
+
labels, texts = zip(*(line.split(" ", 1) for line in dataset)) # NOTE: Occasionally OOM
|
106 |
+
|
107 |
+
# Free up memory
|
108 |
+
del dataset
|
109 |
|
110 |
# Map sentiment values
|
111 |
sentiments = [int(label.split("__label__")[1]) - 1 for label in labels]
|
app/model.py
CHANGED
@@ -121,7 +121,7 @@ def create_model(
|
|
121 |
token_pattern=None,
|
122 |
),
|
123 |
),
|
124 |
-
("classifier", LogisticRegression(max_iter=1000,
|
125 |
],
|
126 |
memory=Memory(CACHE_DIR, verbose=0),
|
127 |
verbose=verbose,
|
|
|
121 |
token_pattern=None,
|
122 |
),
|
123 |
),
|
124 |
+
("classifier", LogisticRegression(max_iter=1000, random_state=seed)),
|
125 |
],
|
126 |
memory=Memory(CACHE_DIR, verbose=0),
|
127 |
verbose=verbose,
|
justfile
CHANGED
@@ -13,9 +13,11 @@
|
|
13 |
@install-dev:
|
14 |
poetry self add poetry-plugin-export
|
15 |
poetry install
|
|
|
16 |
|
17 |
@requirements:
|
18 |
poetry export -f requirements.txt --output requirements.txt --without dev
|
|
|
19 |
|
20 |
[no-exit-message]
|
21 |
@app *ARGS:
|
|
|
13 |
@install-dev:
|
14 |
poetry self add poetry-plugin-export
|
15 |
poetry install
|
16 |
+
poetry run spacy download en_core_web_sm
|
17 |
|
18 |
@requirements:
|
19 |
poetry export -f requirements.txt --output requirements.txt --without dev
|
20 |
+
poetry export -f requirements.txt --output requirements-dev.txt
|
21 |
|
22 |
[no-exit-message]
|
23 |
@app *ARGS:
|