Spaces:
Sleeping
Sleeping
File size: 4,995 Bytes
287a0bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Cohere\n",
"\n",
"This notebook demonstrates how to use Cohere Embeddings with Chroma.\n",
"\n",
"If you have not already, [create a Cohere account](https://dashboard.cohere.ai/welcome/register) and get your API Key.\n",
"\n",
"First a basic example:"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n"
]
}
],
"source": [
"! pip install chromadb --quiet\n",
"! pip install cohere --quiet"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import getpass\n",
"\n",
"os.environ[\"COHERE_API_KEY\"] = getpass.getpass(\"Cohere API Key:\")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'ids': [['3']], 'embeddings': None, 'documents': [['I like oranges']], 'metadatas': [[{'fruit': 'orange'}]], 'distances': [[6729.3291015625]]}\n"
]
}
],
"source": [
"import chromadb\n",
"from chromadb.utils import embedding_functions\n",
"\n",
"cohere_ef = embedding_functions.CohereEmbeddingFunction(api_key=os.environ[\"COHERE_API_KEY\"], model_name=\"large\")\n",
"\n",
"client = chromadb.Client()\n",
"collection = client.create_collection(\"cohere_python\", embedding_function=cohere_ef)\n",
"\n",
"collection.add(\n",
" ids=[\"1\", \"2\", \"3\"],\n",
" documents=[\"I like apples\", \"I like bananas\", \"I like oranges\"],\n",
" metadatas=[{\"fruit\": \"apple\"}, {\"fruit\": \"banana\"}, {\"fruit\": \"orange\"}],\n",
")\n",
"\n",
"print(collection.query(query_texts=[\"citrus\"], n_results=1))\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Multilingual Example\n",
"\n",
"Cohere can support many languages! In this example we store text in many languages, and then query in English."
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'ids': [['9']], 'embeddings': None, 'documents': [['나는 오렌지를 좋아한다']], 'metadatas': [[None]], 'distances': [[30.728900909423828]]}\n"
]
}
],
"source": [
"cohere_mutlilingual = embedding_functions.CohereEmbeddingFunction(\n",
" api_key=os.environ[\"COHERE_API_KEY\"], \n",
" model_name=\"multilingual-22-12\")\n",
"\n",
"# 나는 오렌지를 좋아한다 is \"I like oranges\" in Korean\n",
"multilingual_texts = [ 'Hello from Cohere!', 'مرحبًا من كوهير!', \n",
" 'Hallo von Cohere!', 'Bonjour de Cohere!', \n",
" '¡Hola desde Cohere!', 'Olá do Cohere!', \n",
" 'Ciao da Cohere!', '您好,来自 Cohere!',\n",
" 'कोहेरे से नमस्ते!', '나는 오렌지를 좋아한다' ]\n",
"\n",
"collection = client.create_collection(\"cohere_multilingual\", embedding_function=cohere_mutlilingual)\n",
"\n",
"collection.add(\n",
" ids=[str(i) for i in range(len(multilingual_texts))],\n",
" documents=multilingual_texts\n",
")\n",
"\n",
"print(collection.query(query_texts=[\"citrus\"], n_results=1))\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
|