Spaces:
Sleeping
Sleeping
luminoussg
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -6,13 +6,9 @@ import json
|
|
6 |
def count_tokens(json_file, encoding_name):
|
7 |
encoding = tiktoken.get_encoding(encoding_name)
|
8 |
|
9 |
-
#
|
10 |
-
if not json_file.name.endswith('.jsonl'):
|
11 |
-
return {"error": "Please upload a valid .jsonl file."}, 0
|
12 |
-
|
13 |
-
# Load the JSONL data
|
14 |
with open(json_file.name, 'r') as f:
|
15 |
-
data = [json.loads(line) for line in f.readlines()]
|
16 |
|
17 |
total_token_count = 0
|
18 |
token_counts = []
|
@@ -56,10 +52,10 @@ encoding_options = [
|
|
56 |
|
57 |
# Gradio UI setup
|
58 |
with gr.Blocks() as app:
|
59 |
-
gr.Markdown("# Token Counter for JSONL Datasets
|
60 |
|
61 |
with gr.Row():
|
62 |
-
json_input = gr.File(label="Upload
|
63 |
encoding_dropdown = gr.Dropdown(choices=encoding_options, label="Select Encoding", value="o200k_base (gpt-4o, gpt-4o-mini)")
|
64 |
|
65 |
# Output for individual conversation token counts
|
|
|
6 |
def count_tokens(json_file, encoding_name):
|
7 |
encoding = tiktoken.get_encoding(encoding_name)
|
8 |
|
9 |
+
# Load the JSON or JSONL data
|
|
|
|
|
|
|
|
|
10 |
with open(json_file.name, 'r') as f:
|
11 |
+
data = json.load(f) if json_file.name.endswith('.json') else [json.loads(line) for line in f.readlines()]
|
12 |
|
13 |
total_token_count = 0
|
14 |
token_counts = []
|
|
|
52 |
|
53 |
# Gradio UI setup
|
54 |
with gr.Blocks() as app:
|
55 |
+
gr.Markdown("# Token Counter for JSON/JSONL Datasets")
|
56 |
|
57 |
with gr.Row():
|
58 |
+
json_input = gr.File(label="Upload JSON/JSONL File")
|
59 |
encoding_dropdown = gr.Dropdown(choices=encoding_options, label="Select Encoding", value="o200k_base (gpt-4o, gpt-4o-mini)")
|
60 |
|
61 |
# Output for individual conversation token counts
|