Aivis commited on
Commit
400e980
·
verified ·
1 Parent(s): d49c2cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -15
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import gradio as gr
 
2
  import pandas as pd
3
  import tiktoken
4
  import anthropic
5
- #import os
6
 
7
  def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthropic_model):
8
  # Check if file is uploaded
@@ -27,7 +27,10 @@ def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthr
27
  openai_encoding = tiktoken.get_encoding("cl100k_base")
28
 
29
  token_counts_openai = {}
30
- total_tokens_openai = len(openai_encoding.encode(df.to_csv(index=False)))
 
 
 
31
 
32
  # Iterate over columns
33
  for col in df.columns:
@@ -43,7 +46,7 @@ def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthr
43
  #total_tokens_openai += tokens_openai
44
 
45
  # Prepare OpenAI output
46
- output += f"**Total OpenAI Tokens ({openai_model}): {total_tokens_openai}**\n"
47
  output += f"\n**OpenAI Token Counts per Column ({openai_model}):**\n\n"
48
  for col, count in token_counts_openai.items():
49
  output += f"- {col}: {count} tokens\n"
@@ -57,24 +60,39 @@ def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthr
57
 
58
  # Initialize the Anthropic client
59
  #client = anthropic.Anthropic(api_key=anthropic_api_key)
60
- #client = anthropic.Anthropic()
61
- try:
62
- client = anthropic.Anthropic()
63
- print("Anthropic client initialized successfully")
64
- except Exception as e:
65
- return f"Error initializing Anthropic client: {e}"
66
 
67
  token_counts_anthropic = {}
 
68
  try:
69
- total_tokens_anthropic = client.count_tokens(df.to_csv(index=False))
 
 
 
 
 
 
 
 
 
70
  except Exception as e:
71
- return f"Error counting tokens with Anthropic model ({anthropic.__version__}): {e}"
 
72
 
73
  # Iterate over columns
74
  for col in df.columns:
75
  #tokens_col_anthropic = 0
76
  try:
77
- tokens_anthropic = client.count_tokens('\n'.join([col]+list(df[col].astype(str).values)))
 
 
 
 
 
 
 
 
 
78
  except Exception as e:
79
  return f"Error counting tokens with Anthropic model: {e}"
80
  # for cell in df[col].astype(str):
@@ -87,7 +105,7 @@ def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthr
87
  #total_tokens_anthropic += tokens_anthropic
88
 
89
  # Prepare Anthropic output
90
- output += f"**Total Anthropic Tokens ({anthropic_model}): {total_tokens_anthropic}**\n"
91
  output += f"\n**Anthropic Token Counts per Column ({anthropic_model}):**\n"
92
  for col, count in token_counts_anthropic.items():
93
  output += f"- {col}: {count} tokens\n"
@@ -102,6 +120,10 @@ def main():
102
  with gr.Blocks() as demo:
103
  gr.Markdown("# Token Counter")
104
  gr.Markdown("Upload a CSV file to see token counts per column and total tokens.")
 
 
 
 
105
 
106
  with gr.Row():
107
  file_input = gr.File(label="Upload CSV File", type="filepath")
@@ -117,7 +139,7 @@ def main():
117
  visible=False
118
  )
119
  anthropic_model = gr.Dropdown(
120
- choices=['claude-3-5-sonnet-latest', 'claude-3-5-haiku-latest', 'claude-3-opus-latest'],
121
  label="Select Anthropic Model",
122
  visible=False
123
  )
@@ -137,7 +159,8 @@ def main():
137
  inputs = [file_input, calculate_openai, openai_model, calculate_anthropic, anthropic_model]
138
  submit_button.click(fn=process_csv, inputs=inputs, outputs=output)
139
 
140
- demo.launch(share=True)
 
141
 
142
  if __name__ == "__main__":
143
  main()
 
1
  import gradio as gr
2
+ import json
3
  import pandas as pd
4
  import tiktoken
5
  import anthropic
 
6
 
7
  def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthropic_model):
8
  # Check if file is uploaded
 
27
  openai_encoding = tiktoken.get_encoding("cl100k_base")
28
 
29
  token_counts_openai = {}
30
+ try:
31
+ total_tokens_openai = len(openai_encoding.encode(df.to_csv(index=False)))
32
+ except Exception as e:
33
+ return f"Error counting tokens with OpenAI model: {e}"
34
 
35
  # Iterate over columns
36
  for col in df.columns:
 
46
  #total_tokens_openai += tokens_openai
47
 
48
  # Prepare OpenAI output
49
+ output += f"\n**Total OpenAI Tokens ({openai_model}): {total_tokens_openai}**\n"
50
  output += f"\n**OpenAI Token Counts per Column ({openai_model}):**\n\n"
51
  for col, count in token_counts_openai.items():
52
  output += f"- {col}: {count} tokens\n"
 
60
 
61
  # Initialize the Anthropic client
62
  #client = anthropic.Anthropic(api_key=anthropic_api_key)
63
+ client = anthropic.Anthropic()
 
 
 
 
 
64
 
65
  token_counts_anthropic = {}
66
+ #total_tokens_anthropic = client.count_tokens(df.to_csv(index=False))
67
  try:
68
+ response = client.beta.messages.count_tokens(
69
+ betas=["token-counting-2024-11-01"],
70
+ model=anthropic_model, #"claude-3-5-sonnet-20241022",
71
+ #system="You are a scientist",
72
+ messages=[{
73
+ "role": "user",
74
+ "content": df.to_csv(index=False)
75
+ }],
76
+ )
77
+ total_tokens_anthropic = json.loads(response.json())['input_tokens']
78
  except Exception as e:
79
+ return f"Error counting tokens with Anthropic model: {e}"
80
+
81
 
82
  # Iterate over columns
83
  for col in df.columns:
84
  #tokens_col_anthropic = 0
85
  try:
86
+ #tokens_anthropic = client.count_tokens('\n'.join([col]+list(df[col].astype(str).values))) #0.37.1 version
87
+ response = client.beta.messages.count_tokens(
88
+ betas=["token-counting-2024-11-01"],
89
+ model=anthropic_model,
90
+ messages=[{
91
+ "role": "user",
92
+ "content": '\n'.join([col]+list(df[col].astype(str).values))
93
+ }],
94
+ )
95
+ tokens_anthropic = json.loads(response.json())['input_tokens']
96
  except Exception as e:
97
  return f"Error counting tokens with Anthropic model: {e}"
98
  # for cell in df[col].astype(str):
 
105
  #total_tokens_anthropic += tokens_anthropic
106
 
107
  # Prepare Anthropic output
108
+ output += f"\n**Total Anthropic Tokens ({anthropic_model}): {total_tokens_anthropic}**\n"
109
  output += f"\n**Anthropic Token Counts per Column ({anthropic_model}):**\n"
110
  for col, count in token_counts_anthropic.items():
111
  output += f"- {col}: {count} tokens\n"
 
120
  with gr.Blocks() as demo:
121
  gr.Markdown("# Token Counter")
122
  gr.Markdown("Upload a CSV file to see token counts per column and total tokens.")
123
+ gr.Markdown("""
124
+ For OpenAI models Python package `tiktoken` is used.
125
+ For Anthropic models beta version of [Token counting](https://docs.anthropic.com/en/docs/build-with-claude/token-counting) is used.
126
+ """)
127
 
128
  with gr.Row():
129
  file_input = gr.File(label="Upload CSV File", type="filepath")
 
139
  visible=False
140
  )
141
  anthropic_model = gr.Dropdown(
142
+ choices=['claude-3-5-sonnet-latest', 'claude-3-5-haiku-latest', 'claude-3-opus-latest', 'claude-3-haiku-20240307'],
143
  label="Select Anthropic Model",
144
  visible=False
145
  )
 
159
  inputs = [file_input, calculate_openai, openai_model, calculate_anthropic, anthropic_model]
160
  submit_button.click(fn=process_csv, inputs=inputs, outputs=output)
161
 
162
+ #demo.launch(share=True)
163
+ demo.launch()
164
 
165
  if __name__ == "__main__":
166
  main()