import gradio as gr import pandas as pd import tiktoken from anthropic import tokenizer def process_csv(file, calculate_openai, openai_model, calculate_anthropic, anthropic_model): # Check if file is uploaded if file is None: return "Please upload a CSV file." # Read the CSV file df = pd.read_csv(file.name) # Initialize output string output = "" if calculate_openai: # Get the OpenAI tokenizer for the selected model try: openai_encoding = tiktoken.encoding_for_model(openai_model) except KeyError: # Default encoding if model is not found openai_encoding = tiktoken.get_encoding("cl100k_base") token_counts_openai = {} total_tokens_openai = 0 # Iterate over columns for col in df.columns: tokens_col_openai = 0 for cell in df[col].astype(str): tokens_openai = openai_encoding.encode(cell) tokens_col_openai += len(tokens_openai) token_counts_openai[col] = tokens_col_openai total_tokens_openai += tokens_col_openai # Prepare OpenAI output output += f"**OpenAI Token Counts per Column ({openai_model}):**\n" for col, count in token_counts_openai.items(): output += f"- {col}: {count} tokens\n" output += f"\n**Total OpenAI Tokens ({openai_model}): {total_tokens_openai}**\n\n" if calculate_anthropic: # For Anthropic tokenizer (assuming same tokenizer across models) token_counts_anthropic = {} total_tokens_anthropic = 0 for col in df.columns: tokens_col_anthropic = 0 for cell in df[col].astype(str): tokens_anthropic = len(tokenizer.encode(cell)) tokens_col_anthropic += tokens_anthropic token_counts_anthropic[col] = tokens_col_anthropic total_tokens_anthropic += tokens_col_anthropic # Prepare Anthropic output output += f"**Anthropic Token Counts per Column ({anthropic_model}):**\n" for col, count in token_counts_anthropic.items(): output += f"- {col}: {count} tokens\n" output += f"\n**Total Anthropic Tokens ({anthropic_model}): {total_tokens_anthropic}**\n" if not calculate_openai and not calculate_anthropic: output = "Please select at least one model to calculate tokens." return output def main(): with gr.Blocks() as demo: gr.Markdown("# Token Counter") gr.Markdown("Upload a CSV file to see token counts per column and total tokens.") with gr.Row(): file_input = gr.File(label="Upload CSV File", type="file") with gr.Row(): calculate_openai = gr.Checkbox(label="Calculate tokens for OpenAI models") calculate_anthropic = gr.Checkbox(label="Calculate tokens for Anthropic models") with gr.Row(): openai_model = gr.Dropdown( choices=['gpt-4', 'gpt-3.5-turbo', 'text-davinci-003'], label="Select OpenAI Model", visible=False ) anthropic_model = gr.Dropdown( choices=['claude-v1', 'claude-v1.3', 'claude-instant-v1'], label="Select Anthropic Model", visible=False ) def update_openai_visibility(selected): return gr.update(visible=selected) def update_anthropic_visibility(selected): return gr.update(visible=selected) calculate_openai.change(fn=update_openai_visibility, inputs=calculate_openai, outputs=openai_model) calculate_anthropic.change(fn=update_anthropic_visibility, inputs=calculate_anthropic, outputs=anthropic_model) submit_button = gr.Button("Calculate Tokens") output = gr.Markdown() inputs = [file_input, calculate_openai, openai_model, calculate_anthropic, anthropic_model] submit_button.click(fn=process_csv, inputs=inputs, outputs=output) demo.launch() if __name__ == "__main__": main()