mirror of
https://github.com/placeholder-soft/chroma.git
synced 2026-04-29 12:24:58 +08:00
Add example demonstrating using openai & cohere embeddings
This commit is contained in:
205
examples/alternative_embeddings.ipynb
Normal file
205
examples/alternative_embeddings.ipynb
Normal file
@@ -0,0 +1,205 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" # Alternative Embeddings\n",
|
||||
" \n",
|
||||
" This notebook demonstrates how to use alternative embedding functions.\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import chromadb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using embedded DuckDB without persistence: data will be transient\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"client = chromadb.Client()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from chromadb.utils import embedding_functions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Using OpenAI Embeddings. This assumes you have the openai package installed\n",
|
||||
"openai_ef = embedding_functions.OpenAIEmbeddingFunction(\n",
|
||||
" api_key=\"OPENAI_API_KEY\", # Replace with your own OpenAI API key\n",
|
||||
" model_name=\"text-embedding-ada-002\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create a new chroma collection\n",
|
||||
"openai_collection = client.create_collection(name=\"openai_embeddings\", embedding_function=openai_ef)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai_collection.add(\n",
|
||||
" documents=[\"This is a document\", \"This is another document\"],\n",
|
||||
" metadatas=[{\"source\": \"my_source\"}, {\"source\": \"my_source\"}],\n",
|
||||
" ids=[\"id1\", \"id2\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'ids': [['id1', 'id2']],\n",
|
||||
" 'embeddings': None,\n",
|
||||
" 'documents': [['This is a document', 'This is another document']],\n",
|
||||
" 'metadatas': [[{'source': 'my_source'}, {'source': 'my_source'}]],\n",
|
||||
" 'distances': [[0.13865342736244202, 0.20187020301818848]]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = openai_collection.query(\n",
|
||||
" query_texts=[\"This is a query document\"],\n",
|
||||
" n_results=2\n",
|
||||
")\n",
|
||||
"results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Using Cohere Embeddings. This assumes you have the cohere package installed\n",
|
||||
"cohere_ef = embedding_functions.CohereEmbeddingFunction(\n",
|
||||
" api_key=\"COHERE_API_KEY\", \n",
|
||||
" model_name=\"large\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create a new chroma collection\n",
|
||||
"cohere_collection = client.create_collection(name=\"cohere_embeddings\", embedding_function=cohere_ef)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cohere_collection.add(\n",
|
||||
" documents=[\"This is a document\", \"This is another document\"],\n",
|
||||
" metadatas=[{\"source\": \"my_source\"}, {\"source\": \"my_source\"}],\n",
|
||||
" ids=[\"id1\", \"id2\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'ids': [['id1', 'id2']],\n",
|
||||
" 'embeddings': None,\n",
|
||||
" 'documents': [['This is a document', 'This is another document']],\n",
|
||||
" 'metadatas': [[{'source': 'my_source'}, {'source': 'my_source'}]],\n",
|
||||
" 'distances': [[4343.1328125, 5653.28759765625]]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = cohere_collection.query(\n",
|
||||
" query_texts=[\"This is a query document\"],\n",
|
||||
" n_results=2\n",
|
||||
")\n",
|
||||
"results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Reference in New Issue
Block a user