Spaces:
Sleeping
Sleeping
use tempfile
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
import json
|
3 |
import PyPDF2
|
4 |
import markdown
|
|
|
5 |
|
6 |
def convert_to_jsonl(file):
|
7 |
content = ""
|
@@ -22,12 +23,17 @@ def convert_to_jsonl(file):
|
|
22 |
json_obj = {"id": i, "text": line.strip()}
|
23 |
jsonl_output += json.dumps(json_obj) + "\n"
|
24 |
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
iface = gr.Interface(
|
28 |
fn=convert_to_jsonl,
|
29 |
inputs=gr.File(label="Upload .txt, .pdf, or .md file"),
|
30 |
-
outputs=gr.File(label="Download JSONL file"
|
31 |
title="Raw Text to Indexed JSONL Converter",
|
32 |
description="Upload a .txt, .pdf, or .md file to convert it to an indexed JSONL file for LLM training."
|
33 |
)
|
|
|
2 |
import json
|
3 |
import PyPDF2
|
4 |
import markdown
|
5 |
+
import tempfile
|
6 |
|
7 |
def convert_to_jsonl(file):
|
8 |
content = ""
|
|
|
23 |
json_obj = {"id": i, "text": line.strip()}
|
24 |
jsonl_output += json.dumps(json_obj) + "\n"
|
25 |
|
26 |
+
# Save the JSONL output to a temporary file named tmp.jsonl
|
27 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".jsonl")
|
28 |
+
temp_file.write(jsonl_output.encode('utf-8'))
|
29 |
+
temp_file.close()
|
30 |
+
|
31 |
+
return temp_file.name
|
32 |
|
33 |
iface = gr.Interface(
|
34 |
fn=convert_to_jsonl,
|
35 |
inputs=gr.File(label="Upload .txt, .pdf, or .md file"),
|
36 |
+
outputs=gr.File(label="Download JSONL file"),
|
37 |
title="Raw Text to Indexed JSONL Converter",
|
38 |
description="Upload a .txt, .pdf, or .md file to convert it to an indexed JSONL file for LLM training."
|
39 |
)
|