luminoussg commited on
Commit
0cc0797
·
verified ·
1 Parent(s): ee3031b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -8
app.py CHANGED
@@ -6,13 +6,9 @@ import json
6
  def count_tokens(json_file, encoding_name):
7
  encoding = tiktoken.get_encoding(encoding_name)
8
 
9
- # Validate that the file is a .jsonl file
10
- if not json_file.name.endswith('.jsonl'):
11
- return {"error": "Please upload a valid .jsonl file."}, 0
12
-
13
- # Load the JSONL data
14
  with open(json_file.name, 'r') as f:
15
- data = [json.loads(line) for line in f.readlines()]
16
 
17
  total_token_count = 0
18
  token_counts = []
@@ -56,10 +52,10 @@ encoding_options = [
56
 
57
  # Gradio UI setup
58
  with gr.Blocks() as app:
59
- gr.Markdown("# Token Counter for JSONL Datasets (OpenAI Fine-Tuning)")
60
 
61
  with gr.Row():
62
- json_input = gr.File(label="Upload .jsonl File", type="file") # Accept only file uploads
63
  encoding_dropdown = gr.Dropdown(choices=encoding_options, label="Select Encoding", value="o200k_base (gpt-4o, gpt-4o-mini)")
64
 
65
  # Output for individual conversation token counts
 
6
  def count_tokens(json_file, encoding_name):
7
  encoding = tiktoken.get_encoding(encoding_name)
8
 
9
+ # Load the JSON or JSONL data
 
 
 
 
10
  with open(json_file.name, 'r') as f:
11
+ data = json.load(f) if json_file.name.endswith('.json') else [json.loads(line) for line in f.readlines()]
12
 
13
  total_token_count = 0
14
  token_counts = []
 
52
 
53
  # Gradio UI setup
54
  with gr.Blocks() as app:
55
+ gr.Markdown("# Token Counter for JSON/JSONL Datasets")
56
 
57
  with gr.Row():
58
+ json_input = gr.File(label="Upload JSON/JSONL File")
59
  encoding_dropdown = gr.Dropdown(choices=encoding_options, label="Select Encoding", value="o200k_base (gpt-4o, gpt-4o-mini)")
60
 
61
  # Output for individual conversation token counts