# -*- coding: utf-8 -*- """code-search.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1-TlihNx5XCiVSxUHDF1oHFNcfpuy_k0N """ # Install Cohere for embeddings import cohere import numpy as np import pandas as pd import gradio as gr from sklearn.metrics.pairwise import cosine_similarity from annoy import AnnoyIndex import warnings warnings.filterwarnings('ignore') pd.set_option('display.max_colwidth', None) data_df = pd.read_csv('functions_data.csv') #data_df.head() data_df['docstring'].fillna('not specified', inplace=True) # Paste your API key here. Remember to not share publicly api_key = '2IdvZuDAwqcpMuwN3yjAXBOHKAT1Mqxr4N8hZFKN' # Create and retrieve a Cohere API key from dashboard.cohere.ai/welcome/register co = cohere.Client(api_key) search_index = AnnoyIndex(4096, 'angular') search_index.load('code.ann') # super fast, will just mmap the file def get_code(query): # Get the query's embedding query_embed = co.embed(texts=[query], model="large", truncate="LEFT").embeddings # Retrieve the nearest neighbors similar_item_ids = search_index.get_nns_by_vector(query_embed[0],3, include_distances=True) return data_df.iloc[similar_item_ids[0]]['function_body'] , data_df.iloc[similar_item_ids[0]]['file_path'] iface = gr.Interface(fn=get_code, inputs="text", outputs=[gr.Markdown(), "text"]) iface.launch()