# -*- coding: utf-8 -*- # Install Cohere for embeddings import cohere import numpy as np import pandas as pd import gradio as gr import os from sklearn.metrics.pairwise import cosine_similarity from annoy import AnnoyIndex import warnings warnings.filterwarnings('ignore') pd.set_option('display.max_colwidth', None) data_df = pd.read_csv('functions_data.csv') data_df['docstring'].fillna('not specified', inplace=True) # Paste your API key here. Remember to not share publicly key = os.environ.get('API_KEY') api_key = key # Create and retrieve a Cohere API key from dashboard.cohere.ai/welcome/register co = cohere.Client(api_key) search_index = AnnoyIndex(4096, 'angular') search_index.load('code.ann') # super fast, will just mmap the file def get_code(query): # Get the query's embedding query_embed = co.embed(texts=[query], model="large", truncate="LEFT").embeddings # Retrieve the nearest neighbors similar_item_ids = search_index.get_nns_by_vector(query_embed[0],1) return data_df.iloc[similar_item_ids[0]]['function_body'], data_df.iloc[similar_item_ids[0]]['file_path'] examples = ['compute diffusion of given data'] inputs = gr.Textbox(label='query') outputs = [gr.Textbox(label='matched function'), gr.Textbox(label='File path')] title = "Search Code" description = "Semantically search codebase using Cohere embed API. This demo uses Open AI point cloud codebase https://github.com/openai/point-e as an example" iface = gr.Interface(fn=get_code, inputs=inputs, outputs=outputs, description = description, examples=examples, title=title) iface.launch()