fdsafdas
Browse files- hugging_face/dataset.py +7 -14
hugging_face/dataset.py
CHANGED
@@ -4,20 +4,13 @@ from pandas import DataFrame, read_csv
|
|
4 |
REPO_ID = "kompiangg/twitter_hate_speech_classification"
|
5 |
|
6 |
def load_dataset(filename) -> DataFrame:
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
encoding='latin-1',
|
14 |
-
cache_dir='./.cache'
|
15 |
-
)
|
16 |
|
17 |
-
|
18 |
-
df.to_csv(filename, sep=',', index=False)
|
19 |
-
|
20 |
-
df = df.astype({ 'HS': 'category' })
|
21 |
-
df = df.astype({ 'Tweet': 'string' })
|
22 |
|
23 |
return df
|
|
|
4 |
REPO_ID = "kompiangg/twitter_hate_speech_classification"
|
5 |
|
6 |
def load_dataset(filename) -> DataFrame:
|
7 |
+
datasets = hf_load_dataset(
|
8 |
+
REPO_ID,
|
9 |
+
data_files=filename,
|
10 |
+
encoding='latin-1',
|
11 |
+
cache_dir='./.cache'
|
12 |
+
)
|
|
|
|
|
|
|
13 |
|
14 |
+
df = DataFrame(data=datasets['train'])
|
|
|
|
|
|
|
|
|
15 |
|
16 |
return df
|