import gradio as gr import json import pandas as pd import tiktoken import anthropic def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthropic_model): # Check if file is uploaded if file is None: return "Please upload a CSV file." # Read the CSV file try: df = pd.read_csv(file)#.name) except Exception as e: return f"Error reading CSV file: {e}" # Initialize output string output = "" if calculate_openai: # Get the OpenAI tokenizer for the selected model try: openai_encoding = tiktoken.encoding_for_model(openai_model) except KeyError: # Default encoding if model is not found openai_encoding = tiktoken.get_encoding("cl100k_base") token_counts_openai = {} try: total_tokens_openai = len(openai_encoding.encode(df.to_csv(index=False))) except Exception as e: return f"Error counting tokens with OpenAI model: {e}" # Iterate over columns for col in df.columns: #tokens_col_openai = 0 try: tokens_openai = openai_encoding.encode('\n'.join([col]+list(df[col].astype(str).values))) except Exception as e: return f"Error counting tokens with OpenAI model: {e}" # for cell in df[col].astype(str): # tokens_openai = openai_encoding.encode(cell) # tokens_col_openai += len(tokens_openai) token_counts_openai[col] = len(tokens_openai) #total_tokens_openai += tokens_openai # Prepare OpenAI output output += f"\n**Total OpenAI Tokens ({openai_model}): {total_tokens_openai}**\n" output += f"\n**OpenAI Token Counts per Column ({openai_model}):**\n\n" for col, count in token_counts_openai.items(): output += f"- {col}: {count} tokens\n" if calculate_anthropic: # Get the Anthropic API key from environment variables #anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY") #if not anthropic_api_key: # return "Please set the ANTHROPIC_API_KEY environment variable." # Initialize the Anthropic client #client = anthropic.Anthropic(api_key=anthropic_api_key) client = anthropic.Anthropic() token_counts_anthropic = {} #total_tokens_anthropic = client.count_tokens(df.to_csv(index=False)) try: response = client.beta.messages.count_tokens( betas=["token-counting-2024-11-01"], model=anthropic_model, #"claude-3-5-sonnet-20241022", #system="You are a scientist", messages=[{ "role": "user", "content": df.to_csv(index=False) }], ) total_tokens_anthropic = json.loads(response.json())['input_tokens'] except Exception as e: return f"Error counting tokens with Anthropic model: {e}" # Iterate over columns for col in df.columns: #tokens_col_anthropic = 0 try: #tokens_anthropic = client.count_tokens('\n'.join([col]+list(df[col].astype(str).values))) #0.37.1 version response = client.beta.messages.count_tokens( betas=["token-counting-2024-11-01"], model=anthropic_model, messages=[{ "role": "user", "content": '\n'.join([col]+list(df[col].astype(str).values)) }], ) tokens_anthropic = json.loads(response.json())['input_tokens'] except Exception as e: return f"Error counting tokens with Anthropic model: {e}" # for cell in df[col].astype(str): # try: # tokens_anthropic = client.count_tokens(cell) # except Exception as e: # return f"Error counting tokens with Anthropic model: {e}" # tokens_col_anthropic += tokens_anthropic token_counts_anthropic[col] = tokens_anthropic #total_tokens_anthropic += tokens_anthropic # Prepare Anthropic output output += f"\n**Total Anthropic Tokens ({anthropic_model}): {total_tokens_anthropic}**\n" output += f"\n**Anthropic Token Counts per Column ({anthropic_model}):**\n" for col, count in token_counts_anthropic.items(): output += f"- {col}: {count} tokens\n" if not calculate_openai and not calculate_anthropic: output = "Please select at least one model to calculate tokens." return output def main(): with gr.Blocks() as demo: gr.Markdown("# Token Counter") gr.Markdown("Upload a CSV file to see token counts per column and total tokens.") gr.Markdown(""" For OpenAI models Python package `tiktoken` is used. For Anthropic models beta version of [Token counting](https://docs.anthropic.com/en/docs/build-with-claude/token-counting) is used. """) with gr.Row(): file_input = gr.File(label="Upload CSV File", type="filepath") with gr.Row(): calculate_openai = gr.Checkbox(label="Calculate tokens for OpenAI models") calculate_anthropic = gr.Checkbox(label="Calculate tokens for Anthropic models") with gr.Row(): openai_model = gr.Dropdown( choices=['gpt-4o', 'gpt-4o-mini', 'gpt-4'], label="Select OpenAI Model", visible=False ) anthropic_model = gr.Dropdown( choices=['claude-3-5-sonnet-latest', 'claude-3-5-haiku-latest', 'claude-3-opus-latest', 'claude-3-haiku-20240307'], label="Select Anthropic Model", visible=False ) def update_openai_visibility(selected): return gr.update(visible=selected) def update_anthropic_visibility(selected): return gr.update(visible=selected) calculate_openai.change(fn=update_openai_visibility, inputs=calculate_openai, outputs=openai_model) calculate_anthropic.change(fn=update_anthropic_visibility, inputs=calculate_anthropic, outputs=anthropic_model) submit_button = gr.Button("Calculate Tokens") output = gr.Markdown() inputs = [file_input, calculate_openai, openai_model, calculate_anthropic, anthropic_model] submit_button.click(fn=process_csv, inputs=inputs, outputs=output) #demo.launch(share=True) demo.launch() if __name__ == "__main__": main()