Where filtering with logical operators - updated examples (#966)

## Description of changes

*Summarize the changes made by this PR.*
 - Improvements & Bug fixes
- Added enhanced examples of how to use `where` filtering with logical
operators based on community questions

## Test plan
Run the jupyter notebook
`examples/basic_functionality/where_filtering.ipynb`

## Documentation Changes
No document updates.
This commit is contained in:
Trayan Azarov
2023-08-13 08:18:46 +03:00
committed by GitHub
parent 6739259ef8
commit a014870f40

View File

@@ -12,7 +12,12 @@
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-11T18:29:08.687703Z",
"start_time": "2023-08-11T18:29:07.757276Z"
}
},
"outputs": [],
"source": [
"import chromadb"
@@ -150,12 +155,168 @@
"collection.query(query_embeddings=[[0, 0, 0]], where_document={\"$contains\": \"affairs\"}, n_results=5)"
]
},
{
"cell_type": "markdown",
"source": [
"# Where Filtering With Logical Operators\n",
"This section demonstrates how one can use the logical operators in `where` filtering.\n",
"\n",
"Chroma currently supports: `$and` and `$or`operators.\n",
"\n",
"> Note: Logical operators can be nested"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 8,
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/tazarov/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [02:59<00:00, 463kiB/s] \n"
]
},
{
"data": {
"text/plain": "{'ids': ['1', '2'],\n 'embeddings': None,\n 'metadatas': [{'author': 'john'}, {'author': 'jack'}],\n 'documents': ['Article by john', 'Article by Jack']}"
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Or Logical Operator Filtering\n",
"# import chromadb\n",
"client = chromadb.Client()\n",
"collection = client.get_or_create_collection(\"test-where-list\")\n",
"collection.add(documents=[\"Article by john\", \"Article by Jack\", \"Article by Jill\"],\n",
" metadatas=[{\"author\": \"john\"}, {\"author\": \"jack\"}, {\"author\": \"jill\"}], ids=[\"1\", \"2\", \"3\"])\n",
"\n",
"collection.get(where={\"$or\": [{\"author\": \"john\"}, {\"author\": \"jack\"}]})\n"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-08-11T18:45:52.663345Z",
"start_time": "2023-08-11T18:42:50.970414Z"
}
}
},
{
"cell_type": "code",
"execution_count": 9,
"outputs": [
{
"data": {
"text/plain": "{'ids': ['1'],\n 'embeddings': None,\n 'metadatas': [{'author': 'john', 'category': 'chroma'}],\n 'documents': ['Article by john']}"
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# And Logical Operator Filtering\n",
"collection = client.get_or_create_collection(\"test-where-list\")\n",
"collection.upsert(documents=[\"Article by john\", \"Article by Jack\", \"Article by Jill\"],\n",
" metadatas=[{\"author\": \"john\",\"category\":\"chroma\"}, {\"author\": \"jack\",\"category\":\"ml\"}, {\"author\": \"jill\",\"category\":\"lifestyle\"}], ids=[\"1\", \"2\", \"3\"])\n",
"collection.get(where={\"$and\": [{\"category\": \"chroma\"}, {\"author\": \"john\"}]})"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-08-11T18:49:31.174811Z",
"start_time": "2023-08-11T18:49:31.056618Z"
}
}
},
{
"cell_type": "code",
"execution_count": 10,
"outputs": [
{
"data": {
"text/plain": "{'ids': [], 'embeddings': None, 'metadatas': [], 'documents': []}"
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# And logical that doesn't match anything\n",
"collection.get(where={\"$and\": [{\"category\": \"chroma\"}, {\"author\": \"jill\"}]})"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-08-11T18:49:35.758816Z",
"start_time": "2023-08-11T18:49:35.741477Z"
}
}
},
{
"cell_type": "code",
"execution_count": 11,
"outputs": [
{
"data": {
"text/plain": "{'ids': ['1'],\n 'embeddings': None,\n 'metadatas': [{'author': 'john', 'category': 'chroma'}],\n 'documents': ['Article by john']}"
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Combined And and Or Logical Operator Filtering\n",
"collection.get(where={\"$and\": [{\"category\": \"chroma\"}, {\"$or\": [{\"author\": \"john\"}, {\"author\": \"jack\"}]}]})"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-08-11T18:49:40.463045Z",
"start_time": "2023-08-11T18:49:40.450240Z"
}
}
},
{
"cell_type": "code",
"execution_count": 13,
"outputs": [
{
"data": {
"text/plain": "{'ids': ['1'],\n 'embeddings': None,\n 'metadatas': [{'author': 'john', 'category': 'chroma'}],\n 'documents': ['Article by john']}"
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"collection.get(where_document={\"$contains\": \"Article\"},where={\"$and\": [{\"category\": \"chroma\"}, {\"$or\": [{\"author\": \"john\"}, {\"author\": \"jack\"}]}]})"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-08-11T18:51:12.328062Z",
"start_time": "2023-08-11T18:51:12.315943Z"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [],
"metadata": {
"collapsed": false
}
}
],
"metadata": {