Spaces:

mlgeis
/

arxiv-subject-classifier-demo

Runtime error

Michael-Geis commited on Jul 29, 2023

Commit

1c04d6f

•

1 Parent(s): 8cfea01

modified dependencies

Files changed (3) hide show

postprocess.py CHANGED Viewed

@@ -1,7 +1,4 @@
-from sklearn.base import TransformerMixin, BaseEstimator
 import json
-import pandas as pd
-import numpy as np
 def postprocess(model_output):
@@ -15,38 +12,38 @@ def postprocess(model_output):
     return sorted([subject_dict[tag] for tag in predicted_tags])
-class ModelOutputDecoder(BaseEstimator, TransformerMixin):
-    def fit(self, X, y=None):
-        return self
-    def transform(self, X, y=None):
-        if y is None:
-            return X
-        ## Load label dictionary
-        with open("./data/arxiv-label-dict.json") as file:
-            string_dict = file.read()
-            label_dict = json.loads(string_dict)
-            col_list = list(label_dict.keys())
-        def decode_label(label):
-            ## For a row of y (individual label) returns the list of english subjects corresponding to this label
-            return [label_dict[col_list[index]] for index in np.where(label == 1)[0]]
-        num_rows, _ = y.shape
-        decoded_labels = []
-        for i in range(num_rows):
-            decoded_labels.append(decode_label(y[i, :]))
-        decoded_labels_as_series = pd.Series(
-            decoded_labels, name="decoded_labels", index=X.index
-        )
-        return pd.merge(
-            left=X,
-            left_index=True,
-            right=decoded_labels_as_series,
-            right_index=True,
-            validate="1:1",
-        )

 import json
 def postprocess(model_output):
     return sorted([subject_dict[tag] for tag in predicted_tags])
+# class ModelOutputDecoder(BaseEstimator, TransformerMixin):
+#     def fit(self, X, y=None):
+#         return self
+#     def transform(self, X, y=None):
+#         if y is None:
+#             return X
+#         ## Load label dictionary
+#         with open("./data/arxiv-label-dict.json") as file:
+#             string_dict = file.read()
+#             label_dict = json.loads(string_dict)
+#             col_list = list(label_dict.keys())
+#         def decode_label(label):
+#             ## For a row of y (individual label) returns the list of english subjects corresponding to this label
+#             return [label_dict[col_list[index]] for index in np.where(label == 1)[0]]
+#         num_rows, _ = y.shape
+#         decoded_labels = []
+#         for i in range(num_rows):
+#             decoded_labels.append(decode_label(y[i, :]))
+#         decoded_labels_as_series = pd.Series(
+#             decoded_labels, name="decoded_labels", index=X.index
+#         )
+#         return pd.merge(
+#             left=X,
+#             left_index=True,
+#             right=decoded_labels_as_series,
+#             right_index=True,
+#             validate="1:1",
+#         )

preprocess.py CHANGED Viewed

@@ -1,6 +1,3 @@
-from sklearn.pipeline import Pipeline
-from sklearn.base import BaseEstimator, TransformerMixin
-import pandas as pd
 import regex

requirements.txt CHANGED Viewed

@@ -1,8 +1,5 @@
-numpy
-pandas
-scikit-learn
-scikit-multilearn
 arxiv
 transformers
-torch
-datasets

 arxiv
+regex
+scikit-learn
 transformers
+torch