Spaces:
Running
Running
Update tasks/text.py
Browse files- tasks/text.py +21 -3
tasks/text.py
CHANGED
|
@@ -18,13 +18,18 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
| 18 |
"""
|
| 19 |
Evaluate text classification for climate disinformation detection.
|
| 20 |
|
| 21 |
-
Current Model:
|
| 22 |
-
- Makes random predictions from the label space (0-7)
|
| 23 |
- Used as a baseline for comparison
|
| 24 |
"""
|
| 25 |
# Get space info
|
| 26 |
username, space_url = get_space_info()
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# Define the label mapping
|
| 29 |
LABEL_MAPPING = {
|
| 30 |
"0_not_relevant": 0,
|
|
@@ -44,9 +49,19 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
| 44 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
| 45 |
|
| 46 |
# Split dataset
|
|
|
|
| 47 |
train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
|
| 48 |
test_dataset = train_test["test"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
# Start tracking emissions
|
| 51 |
tracker.start()
|
| 52 |
tracker.start_task("inference")
|
|
@@ -58,8 +73,11 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
| 58 |
|
| 59 |
# Make random predictions (placeholder for actual model inference)
|
| 60 |
true_labels = test_dataset["label"]
|
| 61 |
-
predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
#--------------------------------------------------------------------------------------------
|
| 64 |
# YOUR MODEL INFERENCE STOPS HERE
|
| 65 |
#--------------------------------------------------------------------------------------------
|
|
|
|
| 18 |
"""
|
| 19 |
Evaluate text classification for climate disinformation detection.
|
| 20 |
|
| 21 |
+
Current Model: Logistic regression
|
|
|
|
| 22 |
- Used as a baseline for comparison
|
| 23 |
"""
|
| 24 |
# Get space info
|
| 25 |
username, space_url = get_space_info()
|
| 26 |
|
| 27 |
+
from sklearn.linear_model import LogisticRegression
|
| 28 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 29 |
+
from sklearn.model_selection import train_test_split
|
| 30 |
+
from sklearn import metrics
|
| 31 |
+
from datetime import datetime
|
| 32 |
+
|
| 33 |
# Define the label mapping
|
| 34 |
LABEL_MAPPING = {
|
| 35 |
"0_not_relevant": 0,
|
|
|
|
| 49 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
| 50 |
|
| 51 |
# Split dataset
|
| 52 |
+
#train_test = dataset.train_test_split(test_size=.33, seed=42)
|
| 53 |
train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
|
| 54 |
test_dataset = train_test["test"]
|
| 55 |
+
|
| 56 |
+
#test_dataset = train_test["test"]
|
| 57 |
+
#train_dataset = train_test["train"]
|
| 58 |
+
|
| 59 |
+
tfidf_vect = TfidfVectorizer(stop_words = 'english')
|
| 60 |
|
| 61 |
+
tfidf_train = tfidf_vect.fit_transform(train_dataset['quote'])
|
| 62 |
+
tfidf_test = tfidf_vect.transform(test_dataset['quote'])
|
| 63 |
+
|
| 64 |
+
|
| 65 |
# Start tracking emissions
|
| 66 |
tracker.start()
|
| 67 |
tracker.start_task("inference")
|
|
|
|
| 73 |
|
| 74 |
# Make random predictions (placeholder for actual model inference)
|
| 75 |
true_labels = test_dataset["label"]
|
|
|
|
| 76 |
|
| 77 |
+
LR = LogisticRegression(class_weight='balanced', max_iter=20, random_state=1234,
|
| 78 |
+
solver='liblinear')
|
| 79 |
+
LR.fit(pd.DataFrame.sparse.from_spmatrix(tfidf_train), pd.DataFrame(y_train_v))
|
| 80 |
+
predictions=LR.predict(pd.DataFrame.sparse.from_spmatrix(tfidf_test))
|
| 81 |
#--------------------------------------------------------------------------------------------
|
| 82 |
# YOUR MODEL INFERENCE STOPS HERE
|
| 83 |
#--------------------------------------------------------------------------------------------
|