kompiangg commited on
Commit
495c5c8
1 Parent(s): 7601722
Files changed (1) hide show
  1. hugging_face/dataset.py +7 -14
hugging_face/dataset.py CHANGED
@@ -4,20 +4,13 @@ from pandas import DataFrame, read_csv
4
  REPO_ID = "kompiangg/twitter_hate_speech_classification"
5
 
6
  def load_dataset(filename) -> DataFrame:
7
- try:
8
- df = read_csv(filename, encoding='latin-1')
9
- except:
10
- datasets = hf_load_dataset(
11
- REPO_ID,
12
- data_files=filename,
13
- encoding='latin-1',
14
- cache_dir='./.cache'
15
- )
16
 
17
- df = DataFrame(data=datasets['train'])
18
- df.to_csv(filename, sep=',', index=False)
19
-
20
- df = df.astype({ 'HS': 'category' })
21
- df = df.astype({ 'Tweet': 'string' })
22
 
23
  return df
 
4
  REPO_ID = "kompiangg/twitter_hate_speech_classification"
5
 
6
  def load_dataset(filename) -> DataFrame:
7
+ datasets = hf_load_dataset(
8
+ REPO_ID,
9
+ data_files=filename,
10
+ encoding='latin-1',
11
+ cache_dir='./.cache'
12
+ )
 
 
 
13
 
14
+ df = DataFrame(data=datasets['train'])
 
 
 
 
15
 
16
  return df