mirror of
https://github.com/placeholder-soft/chroma.git
synced 2026-01-12 22:44:55 +08:00
[ENH]: CIP-4: In and Not In Metadata Filters (#1081)
Cherry-picked from #1029 ## Description of changes *Summarize the changes made by this PR.* - Improvements & Bug fixes - Added support for `$in` and `$nin` metadata filters > Note: See CIP in `docs/` or example notebook for more info ## Test plan *How are these changes tested?* - [x] Tests pass locally with `pytest` for python ## Documentation Changes TBD --------- Co-authored-by: Hammad Bashir <HammadB@users.noreply.github.com>
This commit is contained in:
149
examples/basic_functionality/in_not_in_filtering.ipynb
Normal file
149
examples/basic_functionality/in_not_in_filtering.ipynb
Normal file
@@ -0,0 +1,149 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "initial_id",
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-30T12:48:38.227653Z",
|
||||
"start_time": "2023-08-30T12:48:27.744069Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Number of requested results 10 is greater than number of elements in index 3, updating n_results = 3\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'ids': [['1', '3']], 'distances': [[0.28824201226234436, 1.017508625984192]], 'metadatas': [[{'author': 'john'}, {'author': 'jill'}]], 'embeddings': None, 'documents': [['Article by john', 'Article by Jill']]}\n",
|
||||
"{'ids': ['1', '3'], 'embeddings': None, 'metadatas': [{'author': 'john'}, {'author': 'jill'}], 'documents': ['Article by john', 'Article by Jill']}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import chromadb\n",
|
||||
"\n",
|
||||
"from chromadb.utils import embedding_functions\n",
|
||||
"\n",
|
||||
"sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=\"all-MiniLM-L6-v2\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"client = chromadb.Client()\n",
|
||||
"# client.heartbeat()\n",
|
||||
"# client.reset()\n",
|
||||
"collection = client.get_or_create_collection(\"test-where-list\", embedding_function=sentence_transformer_ef)\n",
|
||||
"collection.add(documents=[\"Article by john\", \"Article by Jack\", \"Article by Jill\"],\n",
|
||||
" metadatas=[{\"author\": \"john\"}, {\"author\": \"jack\"}, {\"author\": \"jill\"}], ids=[\"1\", \"2\", \"3\"])\n",
|
||||
"\n",
|
||||
"query = [\"Give me articles by john\"]\n",
|
||||
"res = collection.query(query_texts=query,where={'author': {'$in': ['john', 'jill']}}, n_results=10)\n",
|
||||
"print(res)\n",
|
||||
"\n",
|
||||
"res_get = collection.get(where={'author': {'$in': ['john', 'jill']}})\n",
|
||||
"print(res_get)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Interactions with existing Where operators"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "752cef843ba2f900"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "{'ids': [['1']],\n 'distances': [[0.28824201226234436]],\n 'metadatas': [[{'article_type': 'blog', 'author': 'john'}]],\n 'embeddings': None,\n 'documents': [['Article by john']]}"
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"collection.upsert(documents=[\"Article by john\", \"Article by Jack\", \"Article by Jill\"],\n",
|
||||
" metadatas=[{\"author\": \"john\",\"article_type\":\"blog\"}, {\"author\": \"jack\",\"article_type\":\"social\"}, {\"author\": \"jill\",\"article_type\":\"paper\"}], ids=[\"1\", \"2\", \"3\"])\n",
|
||||
"\n",
|
||||
"collection.query(query_texts=query,where={\"$and\":[{\"author\": {'$in': ['john', 'jill']}},{\"article_type\":{\"$eq\":\"blog\"}}]}, n_results=3)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-30T12:48:49.974353Z",
|
||||
"start_time": "2023-08-30T12:48:49.938985Z"
|
||||
}
|
||||
},
|
||||
"id": "ca56cda318f9e94d"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "{'ids': [['1', '3']],\n 'distances': [[0.28824201226234436, 1.017508625984192]],\n 'metadatas': [[{'article_type': 'blog', 'author': 'john'},\n {'article_type': 'paper', 'author': 'jill'}]],\n 'embeddings': None,\n 'documents': [['Article by john', 'Article by Jill']]}"
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"collection.query(query_texts=query,where={\"$or\":[{\"author\": {'$in': ['john']}},{\"article_type\":{\"$in\":[\"paper\"]}}]}, n_results=3)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-30T12:48:53.501431Z",
|
||||
"start_time": "2023-08-30T12:48:53.481571Z"
|
||||
}
|
||||
},
|
||||
"id": "f10e79ec90c797c1"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "d97b8b6dd96261d0"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user