Spaces:
Running
Running
admin
commited on
Commit
·
0a5fc93
1
Parent(s):
af319cd
2 en
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ def infer(input_file: str):
|
|
11 |
return output_file, pd.DataFrame(data_list)
|
12 |
|
13 |
except Exception as e:
|
14 |
-
return None, pd.DataFrame([{"
|
15 |
|
16 |
|
17 |
if __name__ == "__main__":
|
@@ -26,21 +26,21 @@ if __name__ == "__main__":
|
|
26 |
f"{types[0]} → {types[1]}",
|
27 |
f"{types[0]} ← {types[1]}",
|
28 |
],
|
29 |
-
label="
|
30 |
value=f"{types[0]} → {types[1]}",
|
31 |
)
|
32 |
input_file = gr.components.File(
|
33 |
type="filepath",
|
34 |
-
label="
|
35 |
file_types=[f".{types[0]}", f".{types[1]}"],
|
36 |
)
|
37 |
-
convert_btn = gr.Button("
|
38 |
|
39 |
with gr.Column():
|
40 |
output_file = gr.components.File(
|
41 |
-
type="filepath", label="
|
42 |
)
|
43 |
-
data_viewer = gr.Dataframe(label="
|
44 |
|
45 |
option.change(change_mode, inputs=option)
|
46 |
tab.select(change_mode, inputs=option)
|
@@ -50,7 +50,7 @@ if __name__ == "__main__":
|
|
50 |
|
51 |
gr.Markdown(
|
52 |
"""
|
53 |
-
##
|
54 |
```
|
55 |
[
|
56 |
{
|
@@ -66,13 +66,13 @@ if __name__ == "__main__":
|
|
66 |
...
|
67 |
]
|
68 |
```
|
69 |
-
##
|
70 |
```
|
71 |
{"key1": "val11", "key2": "val12", ...}
|
72 |
{"key1": "val21", "key2": "val22", ...}
|
73 |
...
|
74 |
```
|
75 |
-
##
|
76 |
```
|
77 |
key1, key2, ...
|
78 |
val11, val12, ...
|
|
|
11 |
return output_file, pd.DataFrame(data_list)
|
12 |
|
13 |
except Exception as e:
|
14 |
+
return None, pd.DataFrame([{"Please upload a standard data file": f"{e}"}])
|
15 |
|
16 |
|
17 |
if __name__ == "__main__":
|
|
|
26 |
f"{types[0]} → {types[1]}",
|
27 |
f"{types[0]} ← {types[1]}",
|
28 |
],
|
29 |
+
label="Mode",
|
30 |
value=f"{types[0]} → {types[1]}",
|
31 |
)
|
32 |
input_file = gr.components.File(
|
33 |
type="filepath",
|
34 |
+
label="Upload input file",
|
35 |
file_types=[f".{types[0]}", f".{types[1]}"],
|
36 |
)
|
37 |
+
convert_btn = gr.Button("Convert")
|
38 |
|
39 |
with gr.Column():
|
40 |
output_file = gr.components.File(
|
41 |
+
type="filepath", label="Download output file"
|
42 |
)
|
43 |
+
data_viewer = gr.Dataframe(label="Data viewer")
|
44 |
|
45 |
option.change(change_mode, inputs=option)
|
46 |
tab.select(change_mode, inputs=option)
|
|
|
50 |
|
51 |
gr.Markdown(
|
52 |
"""
|
53 |
+
## Supported JSON format
|
54 |
```
|
55 |
[
|
56 |
{
|
|
|
66 |
...
|
67 |
]
|
68 |
```
|
69 |
+
## Supported jsonl format
|
70 |
```
|
71 |
{"key1": "val11", "key2": "val12", ...}
|
72 |
{"key1": "val21", "key2": "val22", ...}
|
73 |
...
|
74 |
```
|
75 |
+
## Supported CSV format
|
76 |
```
|
77 |
key1, key2, ...
|
78 |
val11, val12, ...
|
utils.py
CHANGED
@@ -4,9 +4,7 @@ import json
|
|
4 |
import shutil
|
5 |
|
6 |
TMP_DIR = "./__pycache__"
|
7 |
-
|
8 |
TAB_CONFIG = ["jsonl ⇆ csv", "json ⇆ csv", "json ⇆ jsonl"]
|
9 |
-
|
10 |
MODE = {"from": "jsonl", "to": "csv"}
|
11 |
|
12 |
|
@@ -29,7 +27,6 @@ def encoder_jsonl(file_path: str):
|
|
29 |
data_list = []
|
30 |
with open(file_path, "r", encoding="utf-8") as file:
|
31 |
for line in file:
|
32 |
-
# 加载每一行的 JSON 数据
|
33 |
json_data = json.loads(line.strip())
|
34 |
data_list.append(json_data)
|
35 |
|
@@ -49,7 +46,6 @@ def encoder_csv(file_path: str):
|
|
49 |
def decoder_json(data_list: list, file_path=f"{TMP_DIR}/output.json"):
|
50 |
if data_list:
|
51 |
with open(file_path, "w", encoding="utf-8") as file:
|
52 |
-
# 将整个列表转换成 JSON 格式并写入文件
|
53 |
json.dump(data_list, file, ensure_ascii=False, indent=4)
|
54 |
|
55 |
return file_path
|
@@ -57,13 +53,10 @@ def decoder_json(data_list: list, file_path=f"{TMP_DIR}/output.json"):
|
|
57 |
|
58 |
def decoder_csv(data_list: list, file_path=f"{TMP_DIR}/output.csv"):
|
59 |
if data_list:
|
60 |
-
# 提取第一个字典的键作为表头
|
61 |
header = list(data_list[0].keys())
|
62 |
with open(file_path, "w", newline="", encoding="utf-8") as file:
|
63 |
csv_writer = csv.writer(file)
|
64 |
-
# 写入表头
|
65 |
csv_writer.writerow(header)
|
66 |
-
# 逐项写入字典的值
|
67 |
for item in data_list:
|
68 |
csv_writer.writerow([item[key] for key in header])
|
69 |
|
@@ -74,7 +67,6 @@ def decoder_jsonl(data_list: list, file_path=f"{TMP_DIR}/output.jsonl"):
|
|
74 |
if data_list:
|
75 |
with open(file_path, "w", encoding="utf-8") as file:
|
76 |
for data in data_list:
|
77 |
-
# 将每个 JSON 对象转换成字符串并写入文件,每行一个对象
|
78 |
json_line = json.dumps(data, ensure_ascii=False)
|
79 |
file.write(json_line + "\n")
|
80 |
|
|
|
4 |
import shutil
|
5 |
|
6 |
TMP_DIR = "./__pycache__"
|
|
|
7 |
TAB_CONFIG = ["jsonl ⇆ csv", "json ⇆ csv", "json ⇆ jsonl"]
|
|
|
8 |
MODE = {"from": "jsonl", "to": "csv"}
|
9 |
|
10 |
|
|
|
27 |
data_list = []
|
28 |
with open(file_path, "r", encoding="utf-8") as file:
|
29 |
for line in file:
|
|
|
30 |
json_data = json.loads(line.strip())
|
31 |
data_list.append(json_data)
|
32 |
|
|
|
46 |
def decoder_json(data_list: list, file_path=f"{TMP_DIR}/output.json"):
|
47 |
if data_list:
|
48 |
with open(file_path, "w", encoding="utf-8") as file:
|
|
|
49 |
json.dump(data_list, file, ensure_ascii=False, indent=4)
|
50 |
|
51 |
return file_path
|
|
|
53 |
|
54 |
def decoder_csv(data_list: list, file_path=f"{TMP_DIR}/output.csv"):
|
55 |
if data_list:
|
|
|
56 |
header = list(data_list[0].keys())
|
57 |
with open(file_path, "w", newline="", encoding="utf-8") as file:
|
58 |
csv_writer = csv.writer(file)
|
|
|
59 |
csv_writer.writerow(header)
|
|
|
60 |
for item in data_list:
|
61 |
csv_writer.writerow([item[key] for key in header])
|
62 |
|
|
|
67 |
if data_list:
|
68 |
with open(file_path, "w", encoding="utf-8") as file:
|
69 |
for data in data_list:
|
|
|
70 |
json_line = json.dumps(data, ensure_ascii=False)
|
71 |
file.write(json_line + "\n")
|
72 |
|