Spaces:
Runtime error
Runtime error
updates
Browse files- .gitignore +1 -0
- data.dvc +4 -4
- src/data/process_data.py +3 -3
.gitignore
CHANGED
@@ -91,3 +91,4 @@ coverage.xml
|
|
91 |
|
92 |
.idea
|
93 |
.vscode
|
|
|
|
91 |
|
92 |
.idea
|
93 |
.vscode
|
94 |
+
/data
|
data.dvc
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
outs:
|
2 |
-
- md5:
|
3 |
-
path: data
|
4 |
-
size:
|
5 |
-
nfiles:
|
|
|
1 |
outs:
|
2 |
+
- md5: 4088e0a288132d141c28bd020548d107.dir
|
3 |
+
path: data
|
4 |
+
size: 2720315628
|
5 |
+
nfiles: 6
|
src/data/process_data.py
CHANGED
@@ -3,12 +3,12 @@ import pandas as pd
|
|
3 |
|
4 |
def process_data(split='train'):
|
5 |
df = pd.read_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/raw/{}.csv'.format(split))
|
6 |
-
df.
|
7 |
-
print(df.
|
8 |
df.to_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/processed/{}.csv'.format(split))
|
9 |
|
10 |
|
11 |
-
if __name__ == '
|
12 |
process_data(split='train')
|
13 |
process_data(split='test')
|
14 |
process_data(split='validation')
|
|
|
3 |
|
4 |
def process_data(split='train'):
|
5 |
df = pd.read_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/raw/{}.csv'.format(split))
|
6 |
+
df.columns = ['Unnamed: 0', 'input_text', 'output_text']
|
7 |
+
print(df.columns)
|
8 |
df.to_csv('C:/Users/gbhat/Documents/GitHub/summarization/data/processed/{}.csv'.format(split))
|
9 |
|
10 |
|
11 |
+
if __name__ == '__main__':
|
12 |
process_data(split='train')
|
13 |
process_data(split='test')
|
14 |
process_data(split='validation')
|