mirror of
https://github.com/placeholder-soft/chroma.git
synced 2026-04-27 17:34:57 +08:00
Examples folder refactor (#736)
Reorganizes the examples folder and adds guidelines and a scaffold to flesh it out
This commit is contained in:
332
examples/basic_functionality/alternative_embeddings.ipynb
Normal file
332
examples/basic_functionality/alternative_embeddings.ipynb
Normal file
@@ -0,0 +1,332 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" # Alternative Embeddings\n",
|
||||
" \n",
|
||||
" This notebook demonstrates how to use alternative embedding functions.\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import chromadb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using embedded DuckDB without persistence: data will be transient\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"client = chromadb.Client()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from chromadb.utils import embedding_functions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Using OpenAI Embeddings. This assumes you have the openai package installed\n",
|
||||
"openai_ef = embedding_functions.OpenAIEmbeddingFunction(\n",
|
||||
" api_key=\"OPENAI_API_KEY\", # Replace with your own OpenAI API key\n",
|
||||
" model_name=\"text-embedding-ada-002\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create a new chroma collection\n",
|
||||
"openai_collection = client.create_collection(name=\"openai_embeddings\", embedding_function=openai_ef)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai_collection.add(\n",
|
||||
" documents=[\"This is a document\", \"This is another document\"],\n",
|
||||
" metadatas=[{\"source\": \"my_source\"}, {\"source\": \"my_source\"}],\n",
|
||||
" ids=[\"id1\", \"id2\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'ids': [['id1', 'id2']],\n",
|
||||
" 'embeddings': None,\n",
|
||||
" 'documents': [['This is a document', 'This is another document']],\n",
|
||||
" 'metadatas': [[{'source': 'my_source'}, {'source': 'my_source'}]],\n",
|
||||
" 'distances': [[0.13865342736244202, 0.20187020301818848]]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = openai_collection.query(\n",
|
||||
" query_texts=[\"This is a query document\"],\n",
|
||||
" n_results=2\n",
|
||||
")\n",
|
||||
"results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Using Cohere Embeddings. This assumes you have the cohere package installed\n",
|
||||
"cohere_ef = embedding_functions.CohereEmbeddingFunction(\n",
|
||||
" api_key=\"COHERE_API_KEY\", \n",
|
||||
" model_name=\"large\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create a new chroma collection\n",
|
||||
"cohere_collection = client.create_collection(name=\"cohere_embeddings\", embedding_function=cohere_ef)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cohere_collection.add(\n",
|
||||
" documents=[\"This is a document\", \"This is another document\"],\n",
|
||||
" metadatas=[{\"source\": \"my_source\"}, {\"source\": \"my_source\"}],\n",
|
||||
" ids=[\"id1\", \"id2\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'ids': [['id1', 'id2']],\n",
|
||||
" 'embeddings': None,\n",
|
||||
" 'documents': [['This is a document', 'This is another document']],\n",
|
||||
" 'metadatas': [[{'source': 'my_source'}, {'source': 'my_source'}]],\n",
|
||||
" 'distances': [[4343.1328125, 5653.28759765625]]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = cohere_collection.query(\n",
|
||||
" query_texts=[\"This is a query document\"],\n",
|
||||
" n_results=2\n",
|
||||
")\n",
|
||||
"results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Using Instructor models. The embedding function requires the InstructorEmbedding package. \n",
|
||||
"# To install it, run pip install InstructorEmbedding\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"#uses base model and cpu\n",
|
||||
"instructor_ef = embedding_functions.InstructorEmbeddingFunction() \n",
|
||||
"\n",
|
||||
"# For task specific embeddings, add an instruction\n",
|
||||
"# instructor_ef = embedding_functions.InstructorEmbeddingFunction(\n",
|
||||
"# instruction=\"Represent the Wikipedia document for retrieval: \"\n",
|
||||
"# )\n",
|
||||
"\n",
|
||||
"# Uses hkunlp/instructor-xl model and GPU\n",
|
||||
"#instructor_ef = embedding_functions.InstructorEmbeddingFunction(model_name=\"hkunlp/instructor-xl\", device=\"cuda\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create a collection with the instructor embedding function\n",
|
||||
"instructor_collection = client.create_collection(name=\"instructor_embeddings\", embedding_function=instructor_ef)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"instructor_collection.add(\n",
|
||||
" documents=[\"This is a document\", \"This is another document\"],\n",
|
||||
" metadatas=[{\"source\": \"my_source\"}, {\"source\": \"my_source\"}],\n",
|
||||
" ids=[\"id1\", \"id2\"]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Adding documents with an instruction\n",
|
||||
"# instructor_ef = embedding_functions.InstructorEmbeddingFunction(\n",
|
||||
"# instruction=\"Represent the Science sentence: \"\n",
|
||||
"# )\n",
|
||||
"# instructor_collection = client.create_collection(name=\"instructor_embeddings\", embedding_function=instructor_ef)\n",
|
||||
"# instructor_collection.add(documents=[\"Parton energy loss in QCD matter\"], ids=[\"id1\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = instructor_collection.query(\n",
|
||||
" query_texts=[\"This is a query document\"],\n",
|
||||
" n_results=2\n",
|
||||
")\n",
|
||||
"results\n",
|
||||
"\n",
|
||||
"# Querying with an instruction\n",
|
||||
"# instructor_ef = embedding_functions.InstructorEmbeddingFunction(instruction=\"Represent the Wikipedia question for retrieving supporting documents: \")\n",
|
||||
"# instructor_collection = client.get_collection(name=\"instructor_embeddings\", embedding_function=instructor_ef)\n",
|
||||
"# results = instructor_collection.query(query_texts=[\"where is the food stored in a yam plant\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Using HuggingFace models. The embedding function a huggingface api_key\n",
|
||||
"huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(\n",
|
||||
" api_key=\"HUGGINGFACE_API_KEY\", # Replace with your own HuggingFace API key\n",
|
||||
" model_name=\"sentence-transformers/all-MiniLM-L6-v2\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create a new HuggingFace collection\n",
|
||||
"huggingface_collection = client.create_collection(name=\"huggingface_embeddings\", embedding_function=huggingface_ef)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"huggingface_collection.add(\n",
|
||||
" documents=[\"This is a document\", \"This is another document\"],\n",
|
||||
" metadatas=[{\"source\": \"my_source\"}, {\"source\": \"my_source\"}],\n",
|
||||
" ids=[\"id1\", \"id2\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'ids': [['id1', 'id2']],\n",
|
||||
" 'embeddings': None,\n",
|
||||
" 'documents': [['This is a document', 'This is another document']],\n",
|
||||
" 'metadatas': [[{'source': 'my_source'}, {'source': 'my_source'}]],\n",
|
||||
" 'distances': [[0.7111215591430664, 1.010978102684021]]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = huggingface_collection.query(\n",
|
||||
" query_texts=[\"This is a query document\"],\n",
|
||||
" n_results=2\n",
|
||||
")\n",
|
||||
"results"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
227
examples/basic_functionality/local_persistence.ipynb
Normal file
227
examples/basic_functionality/local_persistence.ipynb
Normal file
@@ -0,0 +1,227 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Local Peristence Demo\n",
|
||||
"This notebook demonstrates how to persist the in-memory version of Chroma to disk, then load it back in. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import chromadb\n",
|
||||
"from chromadb.config import Settings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Running Chroma using direct local API.\n",
|
||||
"No existing DB found in db, skipping load\n",
|
||||
"No existing DB found in db, skipping load\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/antontroynikov/miniforge3/envs/chroma/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a new Chroma client with persistence enabled. \n",
|
||||
"persist_directory = \"db\"\n",
|
||||
"\n",
|
||||
"client = chromadb.Client(\n",
|
||||
" Settings(\n",
|
||||
" persist_directory=persist_directory,\n",
|
||||
" chroma_db_impl=\"duckdb+parquet\",\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Start from scratch\n",
|
||||
"client.reset()\n",
|
||||
"\n",
|
||||
"# Create a new chroma collection\n",
|
||||
"collection_name = \"peristed_collection\"\n",
|
||||
"collection = client.create_collection(name=collection_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Add some data to the collection\n",
|
||||
"collection.add(\n",
|
||||
" embeddings=[\n",
|
||||
" [1.1, 2.3, 3.2],\n",
|
||||
" [4.5, 6.9, 4.4],\n",
|
||||
" [1.1, 2.3, 3.2],\n",
|
||||
" [4.5, 6.9, 4.4],\n",
|
||||
" [1.1, 2.3, 3.2],\n",
|
||||
" [4.5, 6.9, 4.4],\n",
|
||||
" [1.1, 2.3, 3.2],\n",
|
||||
" [4.5, 6.9, 4.4],\n",
|
||||
" ],\n",
|
||||
" metadatas=[\n",
|
||||
" {\"uri\": \"img1.png\", \"style\": \"style1\"},\n",
|
||||
" {\"uri\": \"img2.png\", \"style\": \"style2\"},\n",
|
||||
" {\"uri\": \"img3.png\", \"style\": \"style1\"},\n",
|
||||
" {\"uri\": \"img4.png\", \"style\": \"style1\"},\n",
|
||||
" {\"uri\": \"img5.png\", \"style\": \"style1\"},\n",
|
||||
" {\"uri\": \"img6.png\", \"style\": \"style1\"},\n",
|
||||
" {\"uri\": \"img7.png\", \"style\": \"style1\"},\n",
|
||||
" {\"uri\": \"img8.png\", \"style\": \"style1\"},\n",
|
||||
" ],\n",
|
||||
" documents=[\"doc1\", \"doc2\", \"doc3\", \"doc4\", \"doc5\", \"doc6\", \"doc7\", \"doc8\"],\n",
|
||||
" ids=[\"id1\", \"id2\", \"id3\", \"id4\", \"id5\", \"id6\", \"id7\", \"id8\"],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Persisting DB to disk, putting it in the save folder db\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Persist the DB. This also happens automatically when the client is garbage collected.\n",
|
||||
"# In a notebook, prefer to call persist explicitly.\n",
|
||||
"client.persist()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Running Chroma using direct local API.\n",
|
||||
"loaded in 8 embeddings\n",
|
||||
"loaded in 1 collections\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a new client with the same settings\n",
|
||||
"client = chromadb.Client(\n",
|
||||
" Settings(\n",
|
||||
" persist_directory=persist_directory,\n",
|
||||
" chroma_db_impl=\"duckdb+parquet\",\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Load the collection\n",
|
||||
"collection = client.get_collection(collection_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'embeddings': [[[1.1, 2.3, 3.2]]], 'documents': [['doc5']], 'ids': [['id5']], 'metadatas': [[{'uri': 'img5.png', 'style': 'style1'}]], 'distances': [[0.0]]}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Query the collection\n",
|
||||
"results = collection.query(\n",
|
||||
" query_embeddings=[[1.1, 2.3, 3.2]],\n",
|
||||
" n_results=1\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(results)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Persisting DB to disk, putting it in the save folder db\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Clean up\n",
|
||||
"client.reset()\n",
|
||||
"client.persist()\n",
|
||||
"\n",
|
||||
"# You can also just delete the persist directory\n",
|
||||
"!rm -rf db/"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "chroma",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "88f09714c9334832bac29166716f9f6a879ee2a4ed4822c1d4120cb2393b58dd"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
197
examples/basic_functionality/where_filtering.ipynb
Normal file
197
examples/basic_functionality/where_filtering.ipynb
Normal file
@@ -0,0 +1,197 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Where Filtering\n",
|
||||
"This notebook demonstrates how to use where filtering to filter the data returned from get or query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import chromadb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using embedded DuckDB without persistence: data will be transient\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"client = chromadb.Client()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"No embedding_function provided, using default embedding function: SentenceTransformerEmbeddingFunction\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Create a new chroma collection\n",
|
||||
"collection_name = \"filter_example_collection\"\n",
|
||||
"collection = client.create_collection(name=collection_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Add some data to the collection\n",
|
||||
"collection.add(\n",
|
||||
" embeddings=[\n",
|
||||
" [1.1, 2.3, 3.2],\n",
|
||||
" [4.5, 6.9, 4.4],\n",
|
||||
" [1.1, 2.3, 3.2],\n",
|
||||
" [4.5, 6.9, 4.4],\n",
|
||||
" [1.1, 2.3, 3.2],\n",
|
||||
" [4.5, 6.9, 4.4],\n",
|
||||
" [1.1, 2.3, 3.2],\n",
|
||||
" [4.5, 6.9, 4.4],\n",
|
||||
" ],\n",
|
||||
" metadatas=[\n",
|
||||
" {\"status\": \"read\"},\n",
|
||||
" {\"status\": \"unread\"},\n",
|
||||
" {\"status\": \"read\"},\n",
|
||||
" {\"status\": \"unread\"},\n",
|
||||
" {\"status\": \"read\"},\n",
|
||||
" {\"status\": \"unread\"},\n",
|
||||
" {\"status\": \"read\"},\n",
|
||||
" {\"status\": \"unread\"},\n",
|
||||
" ],\n",
|
||||
" documents=[\"A document that discusses domestic policy\", \"A document that discusses international affairs\", \"A document that discusses kittens\", \"A document that discusses dogs\", \"A document that discusses chocolate\", \"A document that is sixth that discusses government\", \"A document that discusses international affairs\", \"A document that discusses global affairs\"],\n",
|
||||
" ids=[\"id1\", \"id2\", \"id3\", \"id4\", \"id5\", \"id6\", \"id7\", \"id8\"],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'ids': ['id7'],\n",
|
||||
" 'embeddings': None,\n",
|
||||
" 'documents': ['A document that discusses international affairs'],\n",
|
||||
" 'metadatas': [{'status': 'read'}]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Get documents that are read and about affairs\n",
|
||||
"collection.get(where={\"status\": \"read\"}, where_document={\"$contains\": \"affairs\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'ids': ['id8', 'id1'],\n",
|
||||
" 'embeddings': None,\n",
|
||||
" 'documents': ['A document that discusses global affairs',\n",
|
||||
" 'A document that discusses domestic policy'],\n",
|
||||
" 'metadatas': [{'status': 'unread'}, {'status': 'read'}]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Get documents that are about global affairs or domestic policy\n",
|
||||
"collection.get(where_document={\"$or\": [{\"$contains\": \"global affairs\"}, {\"$contains\": \"domestic policy\"}]})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'ids': [['id7', 'id2', 'id8']],\n",
|
||||
" 'embeddings': None,\n",
|
||||
" 'documents': [['A document that discusses international affairs',\n",
|
||||
" 'A document that discusses international affairs',\n",
|
||||
" 'A document that discusses global affairs']],\n",
|
||||
" 'metadatas': [[{'status': 'read'},\n",
|
||||
" {'status': 'unread'},\n",
|
||||
" {'status': 'unread'}]],\n",
|
||||
" 'distances': [[16.740001678466797, 87.22000122070312, 87.22000122070312]]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Get 5 closest vectors to [0, 0, 0] that are about affairs\n",
|
||||
"# Outputs 3 docs because collection only has 3 docs about affairs\n",
|
||||
"collection.query(query_embeddings=[[0, 0, 0]], where_document={\"$contains\": \"affairs\"}, n_results=5)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "2395417914bce3169eff793a7d01bf858f95b138000d8d354eed93ead856f5e6"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Reference in New Issue
Block a user