Add example demonstrating using openai & cohere embeddings

This commit is contained in:
fr0th
2023-04-10 19:48:37 +12:00
parent ce0bc89777
commit ab78b15b00

View File

@@ -0,0 +1,205 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
" # Alternative Embeddings\n",
" \n",
" This notebook demonstrates how to use alternative embedding functions.\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import chromadb"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using embedded DuckDB without persistence: data will be transient\n"
]
}
],
"source": [
"client = chromadb.Client()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from chromadb.utils import embedding_functions"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# Using OpenAI Embeddings. This assumes you have the openai package installed\n",
"openai_ef = embedding_functions.OpenAIEmbeddingFunction(\n",
" api_key=\"OPENAI_API_KEY\", # Replace with your own OpenAI API key\n",
" model_name=\"text-embedding-ada-002\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Create a new chroma collection\n",
"openai_collection = client.create_collection(name=\"openai_embeddings\", embedding_function=openai_ef)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"openai_collection.add(\n",
" documents=[\"This is a document\", \"This is another document\"],\n",
" metadatas=[{\"source\": \"my_source\"}, {\"source\": \"my_source\"}],\n",
" ids=[\"id1\", \"id2\"]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'ids': [['id1', 'id2']],\n",
" 'embeddings': None,\n",
" 'documents': [['This is a document', 'This is another document']],\n",
" 'metadatas': [[{'source': 'my_source'}, {'source': 'my_source'}]],\n",
" 'distances': [[0.13865342736244202, 0.20187020301818848]]}"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results = openai_collection.query(\n",
" query_texts=[\"This is a query document\"],\n",
" n_results=2\n",
")\n",
"results"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# Using Cohere Embeddings. This assumes you have the cohere package installed\n",
"cohere_ef = embedding_functions.CohereEmbeddingFunction(\n",
" api_key=\"COHERE_API_KEY\", \n",
" model_name=\"large\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# Create a new chroma collection\n",
"cohere_collection = client.create_collection(name=\"cohere_embeddings\", embedding_function=cohere_ef)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"cohere_collection.add(\n",
" documents=[\"This is a document\", \"This is another document\"],\n",
" metadatas=[{\"source\": \"my_source\"}, {\"source\": \"my_source\"}],\n",
" ids=[\"id1\", \"id2\"]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'ids': [['id1', 'id2']],\n",
" 'embeddings': None,\n",
" 'documents': [['This is a document', 'This is another document']],\n",
" 'metadatas': [[{'source': 'my_source'}, {'source': 'my_source'}]],\n",
" 'distances': [[4343.1328125, 5653.28759765625]]}"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results = cohere_collection.query(\n",
" query_texts=[\"This is a query document\"],\n",
" n_results=2\n",
")\n",
"results"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}