[ENH] CIP-2: Auth Providers Proposal (#986)

## Description of changes

*Summarize the changes made by this PR.*
 - New functionality
	 - Auth Provide Client and Server Side Abstractions
	 - Basic Auth Provider

## Test plan
Unit tests for authorized endpoints

## Documentation Changes
Docs should change to describe how to use auth providers on the client
and server. CIP added in `docs/`
This commit is contained in:
Trayan Azarov
2023-08-23 05:48:55 +03:00
committed by GitHub
parent 8a7f0ba36f
commit 48700dd07f
24 changed files with 1460 additions and 45 deletions

View File

@@ -1,6 +1,15 @@
import json
from typing import Optional, cast
from typing import Sequence
from uuid import UUID
import requests
from overrides import override
import chromadb.errors as errors
import chromadb.utils.embedding_functions as ef
from chromadb.api import API
from chromadb.config import Settings, System
from chromadb.api.models.Collection import Collection
from chromadb.api.types import (
Documents,
Embeddings,
@@ -14,15 +23,13 @@ from chromadb.api.types import (
QueryResult,
CollectionMetadata,
)
import chromadb.utils.embedding_functions as ef
import requests
import json
from typing import Sequence
from chromadb.api.models.Collection import Collection
import chromadb.errors as errors
from uuid import UUID
from chromadb.auth import (
ClientAuthProvider,
)
from chromadb.auth.providers import RequestsClientAuthProtocolAdapter
from chromadb.auth.registry import resolve_provider
from chromadb.config import Settings, System
from chromadb.telemetry import Telemetry
from overrides import override
class FastAPI(API):
@@ -47,7 +54,27 @@ class FastAPI(API):
)
self._header = system.settings.chroma_server_headers
self._session = requests.Session()
if (
system.settings.chroma_client_auth_provider
and system.settings.chroma_client_auth_protocol_adapter
):
self._auth_provider = self.require(
resolve_provider(
system.settings.chroma_client_auth_provider, ClientAuthProvider
)
)
self._adapter = cast(
RequestsClientAuthProtocolAdapter,
system.require(
resolve_provider(
system.settings.chroma_client_auth_protocol_adapter,
RequestsClientAuthProtocolAdapter,
)
),
)
self._session = self._adapter.session
else:
self._session = requests.Session()
if self._header is not None:
self._session.headers.update(self._header)

207
chromadb/auth/__init__.py Normal file
View File

@@ -0,0 +1,207 @@
"""
Contains only Auth abstractions, no implementations.
"""
import base64
import logging
from abc import ABC, abstractmethod
from enum import Enum
from typing import (
Optional,
Dict,
TypeVar,
Tuple,
Generic,
)
from overrides import EnforceOverrides, override
from pydantic import SecretStr
from chromadb.config import (
Component,
System,
)
from chromadb.errors import ChromaError
logger = logging.getLogger(__name__)
T = TypeVar("T")
S = TypeVar("S")
class AuthInfoType(Enum):
COOKIE = "cookie"
HEADER = "header"
URL = "url"
METADATA = "metadata" # gRPC
class ClientAuthResponse(EnforceOverrides, ABC):
@abstractmethod
def get_auth_info_type(self) -> AuthInfoType:
...
@abstractmethod
def get_auth_info(self) -> Tuple[str, SecretStr]:
...
class ClientAuthProvider(Component):
def __init__(self, system: System) -> None:
super().__init__(system)
@abstractmethod
def authenticate(self) -> ClientAuthResponse:
pass
class ClientAuthConfigurationProvider(Component):
def __init__(self, system: System) -> None:
super().__init__(system)
@abstractmethod
def get_configuration(self) -> Optional[T]:
pass
class ClientAuthCredentialsProvider(Component, Generic[T]):
def __init__(self, system: System) -> None:
super().__init__(system)
@abstractmethod
def get_credentials(self) -> T:
pass
class ClientAuthProtocolAdapter(Component, Generic[T]):
def __init__(self, system: System) -> None:
super().__init__(system)
@abstractmethod
def inject_credentials(self, injection_context: T) -> None:
pass
# SERVER-SIDE Abstractions
class ServerAuthenticationRequest(EnforceOverrides, ABC, Generic[T]):
@abstractmethod
def get_auth_info(
self, auth_info_type: AuthInfoType, auth_info_id: Optional[str] = None
) -> T:
"""
This method should return the necessary auth info based on the type of authentication (e.g. header, cookie, url)
and a given id for the respective auth type (e.g. name of the header, cookie, url param).
:param auth_info_type: The type of auth info to return
:param auth_info_id: The id of the auth info to return
:return: The auth info which can be specific to the implementation
"""
pass
class ServerAuthenticationResponse(EnforceOverrides, ABC):
def success(self) -> bool:
raise NotImplementedError()
class ServerAuthProvider(Component):
def __init__(self, system: System) -> None:
super().__init__(system)
@abstractmethod
def authenticate(self, request: ServerAuthenticationRequest[T]) -> bool:
pass
class ChromaAuthMiddleware(Component):
def __init__(self, system: System) -> None:
super().__init__(system)
@abstractmethod
def authenticate(
self, request: ServerAuthenticationRequest[T]
) -> Optional[ServerAuthenticationResponse]:
...
@abstractmethod
def ignore_operation(self, verb: str, path: str) -> bool:
...
@abstractmethod
def instrument_server(self, app: T) -> None:
...
class ServerAuthConfigurationProvider(Component):
def __init__(self, system: System) -> None:
super().__init__(system)
@abstractmethod
def get_configuration(self) -> Optional[T]:
pass
class AuthenticationError(ChromaError):
@override
def code(self) -> int:
return 401
@classmethod
@override
def name(cls) -> str:
return "AuthenticationError"
class AbstractCredentials(EnforceOverrides, ABC, Generic[T]):
"""
The class is used by Auth Providers to encapsulate credentials received from the server
and pass them to a ServerAuthCredentialsProvider.
"""
@abstractmethod
def get_credentials(self) -> Dict[str, T]:
"""
Returns the data encapsulated by the credentials object.
"""
pass
class SecretStrAbstractCredentials(AbstractCredentials[SecretStr]):
@abstractmethod
@override
def get_credentials(self) -> Dict[str, SecretStr]:
"""
Returns the data encapsulated by the credentials object.
"""
pass
class BasicAuthCredentials(SecretStrAbstractCredentials):
def __init__(self, username: SecretStr, password: SecretStr) -> None:
self.username = username
self.password = password
@override
def get_credentials(self) -> Dict[str, SecretStr]:
return {"username": self.username, "password": self.password}
@staticmethod
def from_header(header: str) -> "BasicAuthCredentials":
"""
Parses a basic auth header and returns a BasicAuthCredentials object.
"""
header = header.replace("Basic ", "")
header = header.strip()
base64_decoded = base64.b64decode(header).decode("utf-8")
username, password = base64_decoded.split(":")
return BasicAuthCredentials(SecretStr(username), SecretStr(password))
class ServerAuthCredentialsProvider(Component):
def __init__(self, system: System) -> None:
super().__init__(system)
@abstractmethod
def validate_credentials(self, credentials: AbstractCredentials[T]) -> bool:
pass

View File

@@ -0,0 +1,96 @@
import base64
import logging
from typing import Tuple, Any, cast
from overrides import override
from pydantic import SecretStr
from chromadb.auth import (
ServerAuthProvider,
ClientAuthProvider,
ServerAuthenticationRequest,
ServerAuthCredentialsProvider,
AuthInfoType,
BasicAuthCredentials,
ClientAuthCredentialsProvider,
ClientAuthResponse,
)
from chromadb.auth.registry import register_provider, resolve_provider
from chromadb.config import System
from chromadb.utils import get_class
logger = logging.getLogger(__name__)
__all__ = ["BasicAuthServerProvider", "BasicAuthClientProvider"]
class BasicAuthClientAuthResponse(ClientAuthResponse):
def __init__(self, credentials: SecretStr) -> None:
self._credentials = credentials
@override
def get_auth_info_type(self) -> AuthInfoType:
return AuthInfoType.HEADER
@override
def get_auth_info(self) -> Tuple[str, SecretStr]:
return "Authorization", SecretStr(
f"Basic {self._credentials.get_secret_value()}"
)
@register_provider("basic")
class BasicAuthClientProvider(ClientAuthProvider):
_credentials_provider: ClientAuthCredentialsProvider[Any]
def __init__(self, system: System) -> None:
super().__init__(system)
self._settings = system.settings
system.settings.require("chroma_client_auth_credentials_provider")
self._credentials_provider = system.require(
get_class(
str(system.settings.chroma_client_auth_credentials_provider),
ClientAuthCredentialsProvider,
)
)
@override
def authenticate(self) -> ClientAuthResponse:
_creds = self._credentials_provider.get_credentials()
return BasicAuthClientAuthResponse(
SecretStr(
base64.b64encode(f"{_creds.get_secret_value()}".encode("utf-8")).decode(
"utf-8"
)
)
)
@register_provider("basic")
class BasicAuthServerProvider(ServerAuthProvider):
_credentials_provider: ServerAuthCredentialsProvider
def __init__(self, system: System) -> None:
super().__init__(system)
self._settings = system.settings
system.settings.require("chroma_server_auth_credentials_provider")
self._credentials_provider = cast(
ServerAuthCredentialsProvider,
system.require(
resolve_provider(
str(system.settings.chroma_server_auth_credentials_provider),
ServerAuthCredentialsProvider,
)
),
)
@override
def authenticate(self, request: ServerAuthenticationRequest[Any]) -> bool:
try:
_auth_header = request.get_auth_info(AuthInfoType.HEADER, "Authorization")
return self._credentials_provider.validate_credentials(
BasicAuthCredentials.from_header(_auth_header)
)
except Exception as e:
logger.error(f"BasicAuthServerProvider.authenticate failed: {repr(e)}")
return False

121
chromadb/auth/fastapi.py Normal file
View File

@@ -0,0 +1,121 @@
# FAST API code
import logging
from typing import Optional, Dict, List, cast, Any
from overrides import override
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
from starlette.requests import Request
from starlette.responses import Response, JSONResponse
from starlette.types import ASGIApp
from chromadb.config import System
from chromadb.auth import (
ServerAuthenticationRequest,
AuthInfoType,
ServerAuthenticationResponse,
ServerAuthProvider,
ChromaAuthMiddleware,
)
from chromadb.auth.registry import resolve_provider
logger = logging.getLogger(__name__)
class FastAPIServerAuthenticationRequest(ServerAuthenticationRequest[Optional[str]]):
def __init__(self, request: Request) -> None:
self._request = request
@override
def get_auth_info(
self, auth_info_type: AuthInfoType, auth_info_id: Optional[str] = None
) -> Optional[str]:
if auth_info_type == AuthInfoType.HEADER:
return str(self._request.headers[auth_info_id])
elif auth_info_type == AuthInfoType.COOKIE:
return str(self._request.cookies[auth_info_id])
elif auth_info_type == AuthInfoType.URL:
return str(self._request.query_params[auth_info_id])
elif auth_info_type == AuthInfoType.METADATA:
raise ValueError("Metadata not supported for FastAPI")
else:
raise ValueError(f"Unknown auth info type: {auth_info_type}")
class FastAPIServerAuthenticationResponse(ServerAuthenticationResponse):
_auth_success: bool
def __init__(self, auth_success: bool) -> None:
self._auth_success = auth_success
@override
def success(self) -> bool:
return self._auth_success
class FastAPIChromaAuthMiddleware(ChromaAuthMiddleware):
_auth_provider: ServerAuthProvider
def __init__(self, system: System) -> None:
super().__init__(system)
self._system = system
self._settings = system.settings
self._settings.require("chroma_server_auth_provider")
self._ignore_auth_paths: Dict[
str, List[str]
] = self._settings.chroma_server_auth_ignore_paths
if self._settings.chroma_server_auth_provider:
logger.debug(
f"Server Auth Provider: {self._settings.chroma_server_auth_provider}"
)
_cls = resolve_provider(
self._settings.chroma_server_auth_provider, ServerAuthProvider
)
self._auth_provider = cast(ServerAuthProvider, self.require(_cls))
@override
def authenticate(
self, request: ServerAuthenticationRequest[Any]
) -> Optional[ServerAuthenticationResponse]:
return FastAPIServerAuthenticationResponse(
self._auth_provider.authenticate(request)
)
@override
def ignore_operation(self, verb: str, path: str) -> bool:
if (
path in self._ignore_auth_paths.keys()
and verb.upper() in self._ignore_auth_paths[path]
):
logger.debug(f"Skipping auth for path {path} and method {verb}")
return True
return False
@override
def instrument_server(self, app: ASGIApp) -> None:
# We can potentially add an `/auth` endpoint to the server to allow for more complex auth flows
return
class FastAPIChromaAuthMiddlewareWrapper(BaseHTTPMiddleware): # type: ignore
def __init__(
self, app: ASGIApp, auth_middleware: FastAPIChromaAuthMiddleware
) -> None:
super().__init__(app)
self._middleware = auth_middleware
self._middleware.instrument_server(app)
@override
async def dispatch(
self, request: Request, call_next: RequestResponseEndpoint
) -> Response:
if self._middleware.ignore_operation(request.method, request.url.path):
logger.debug(
f"Skipping auth for path {request.url.path} and method {request.method}"
)
return await call_next(request)
response = self._middleware.authenticate(
FastAPIServerAuthenticationRequest(request)
)
if not response or not response.success():
return JSONResponse({"error": "Unauthorized"}, status_code=401)
return await call_next(request)

171
chromadb/auth/providers.py Normal file
View File

@@ -0,0 +1,171 @@
import importlib
import logging
from typing import cast, Dict, TypeVar, Any
import requests
from overrides import override
from pydantic import SecretStr
from chromadb.auth import (
ServerAuthCredentialsProvider,
AbstractCredentials,
ClientAuthCredentialsProvider,
AuthInfoType,
ClientAuthProvider,
ClientAuthProtocolAdapter,
)
from chromadb.auth.registry import register_provider, resolve_provider
from chromadb.config import System
T = TypeVar("T")
logger = logging.getLogger(__name__)
class HtpasswdServerAuthCredentialsProvider(ServerAuthCredentialsProvider):
_creds: Dict[str, SecretStr]
def __init__(self, system: System) -> None:
super().__init__(system)
try:
# Equivalent to import onnxruntime
self.bc = importlib.import_module("bcrypt")
except ImportError:
raise ValueError(
"The bcrypt python package is not installed. Please install it with `pip install bcrypt`"
)
@override
def validate_credentials(self, credentials: AbstractCredentials[T]) -> bool:
_creds = cast(Dict[str, SecretStr], credentials.get_credentials())
if len(_creds) != 2:
logger.error(
"Returned credentials did match expected format: dict[username:SecretStr, password: SecretStr]"
)
return False
if "username" not in _creds or "password" not in _creds:
logger.error("Returned credentials do not contain username or password")
return False
_usr_check = bool(
_creds["username"].get_secret_value()
== self._creds["username"].get_secret_value()
)
return _usr_check and self.bc.checkpw(
_creds["password"].get_secret_value().encode("utf-8"),
self._creds["password"].get_secret_value().encode("utf-8"),
)
@register_provider("htpasswd_file")
class HtpasswdFileServerAuthCredentialsProvider(HtpasswdServerAuthCredentialsProvider):
def __init__(self, system: System) -> None:
super().__init__(system)
system.settings.require("chroma_server_auth_credentials_file")
_file = str(system.settings.chroma_server_auth_credentials_file)
with open(_file) as f:
_raw_creds = [v for v in f.readline().strip().split(":")]
self._creds = {
"username": SecretStr(_raw_creds[0]),
"password": SecretStr(_raw_creds[1]),
}
if (
len(self._creds) != 2
or "username" not in self._creds
or "password" not in self._creds
):
raise ValueError(
"Invalid Htpasswd credentials found in [chroma_server_auth_credentials]. "
"Must be <username>:<bcrypt passwd>."
)
class HtpasswdConfigurationServerAuthCredentialsProvider(
HtpasswdServerAuthCredentialsProvider
):
def __init__(self, system: System) -> None:
super().__init__(system)
system.settings.require("chroma_server_auth_credentials")
_raw_creds = (
str(system.settings.chroma_server_auth_credentials).strip().split(":")
)
self._creds = {
"username": SecretStr(_raw_creds[0]),
"password": SecretStr(_raw_creds[1]),
}
if (
len(self._creds) != 2
or "username" not in self._creds
or "password" not in self._creds
):
raise ValueError(
"Invalid Htpasswd credentials found in [chroma_server_auth_credentials]. "
"Must be <username>:<bcrypt passwd>."
)
class RequestsClientAuthProtocolAdapter(
ClientAuthProtocolAdapter[requests.PreparedRequest]
):
class _Session(requests.Session):
_protocol_adapter: ClientAuthProtocolAdapter[requests.PreparedRequest]
def __init__(
self, protocol_adapter: ClientAuthProtocolAdapter[requests.PreparedRequest]
) -> None:
super().__init__()
self._protocol_adapter = protocol_adapter
@override
def send(
self, request: requests.PreparedRequest, **kwargs: Any
) -> requests.Response:
self._protocol_adapter.inject_credentials(request)
return super().send(request, **kwargs)
_session: _Session
_auth_provider: ClientAuthProvider
def __init__(self, system: System) -> None:
super().__init__(system)
system.settings.require("chroma_client_auth_provider")
self._auth_provider = cast(
ClientAuthProvider,
system.require(
resolve_provider(
str(system.settings.chroma_client_auth_provider), ClientAuthProvider
),
),
)
self._session = self._Session(self)
self._auth_header = self._auth_provider.authenticate()
@property
def session(self) -> requests.Session:
return self._session
@override
def inject_credentials(self, injection_context: requests.PreparedRequest) -> None:
if self._auth_header.get_auth_info_type() == AuthInfoType.HEADER:
_header_info = self._auth_header.get_auth_info()
injection_context.headers[_header_info[0]] = _header_info[
1
].get_secret_value()
else:
raise ValueError(
f"Unsupported auth type: {self._auth_header.get_auth_info_type()}"
)
class ConfigurationClientAuthCredentialsProvider(
ClientAuthCredentialsProvider[SecretStr]
):
_creds: SecretStr
def __init__(self, system: System) -> None:
super().__init__(system)
system.settings.require("chroma_client_auth_credentials")
self._creds = SecretStr(str(system.settings.chroma_client_auth_credentials))
@override
def get_credentials(self) -> SecretStr:
return self._creds

107
chromadb/auth/registry.py Normal file
View File

@@ -0,0 +1,107 @@
import importlib
import logging
import pkgutil
from typing import Union, Dict, Type, Callable
from chromadb.auth import (
ClientAuthConfigurationProvider,
ClientAuthCredentialsProvider,
ClientAuthProtocolAdapter,
ServerAuthProvider,
ServerAuthConfigurationProvider,
ServerAuthCredentialsProvider,
ClientAuthProvider,
)
from chromadb.utils import get_class
logger = logging.getLogger(__name__)
ProviderTypes = Union[
"ClientAuthProvider",
"ClientAuthConfigurationProvider",
"ClientAuthCredentialsProvider",
"ServerAuthProvider",
"ServerAuthConfigurationProvider",
"ServerAuthCredentialsProvider",
"ClientAuthProtocolAdapter",
]
_provider_registry = {
"client_auth_providers": {},
"client_auth_config_providers": {},
"client_auth_credentials_providers": {},
"client_auth_protocol_adapters": {},
"server_auth_providers": {},
"server_auth_config_providers": {},
"server_auth_credentials_providers": {},
} # type: Dict[str, Dict[str, Type[ProviderTypes]]]
def register_classes_from_package(package_name: str) -> None:
package = importlib.import_module(package_name)
for _, module_name, _ in pkgutil.iter_modules(package.__path__):
full_module_name = f"{package_name}.{module_name}"
_ = importlib.import_module(full_module_name)
def register_provider(
short_hand: str,
) -> Callable[[Type[ProviderTypes]], Type[ProviderTypes]]:
def decorator(cls: Type[ProviderTypes]) -> Type[ProviderTypes]:
logger.error("Registering provider: %s", short_hand)
global _provider_registry
if issubclass(cls, ClientAuthProvider):
_provider_registry["client_auth_providers"][short_hand] = cls
elif issubclass(cls, ClientAuthConfigurationProvider):
_provider_registry["client_auth_config_providers"][short_hand] = cls
elif issubclass(cls, ClientAuthCredentialsProvider):
_provider_registry["client_auth_credentials_providers"][short_hand] = cls
elif issubclass(cls, ClientAuthProtocolAdapter):
_provider_registry["client_auth_protocol_adapters"][short_hand] = cls
elif issubclass(cls, ServerAuthProvider):
_provider_registry["server_auth_providers"][short_hand] = cls
elif issubclass(cls, ServerAuthConfigurationProvider):
_provider_registry["server_auth_config_providers"][short_hand] = cls
elif issubclass(cls, ServerAuthCredentialsProvider):
_provider_registry["server_auth_credentials_providers"][short_hand] = cls
else:
raise ValueError(
"Only ClientAuthProvider, ClientAuthConfigurationProvider, "
"ClientAuthCredentialsProvider, ServerAuthProvider, "
"ServerAuthConfigurationProvider, and ServerAuthCredentialsProvider, ClientAuthProtocolAdapter "
"can be registered."
)
return cls
return decorator
def resolve_provider(
class_or_name: str, cls: Type[ProviderTypes]
) -> Type[ProviderTypes]:
register_classes_from_package("chromadb.auth")
global _provider_registry
if issubclass(cls, ClientAuthProvider):
_key = "client_auth_providers"
elif issubclass(cls, ClientAuthConfigurationProvider):
_key = "client_auth_config_providers"
elif issubclass(cls, ClientAuthCredentialsProvider):
_key = "client_auth_credentials_providers"
elif issubclass(cls, ClientAuthProtocolAdapter):
_key = "client_auth_protocol_adapters"
elif issubclass(cls, ServerAuthProvider):
_key = "server_auth_providers"
elif issubclass(cls, ServerAuthConfigurationProvider):
_key = "server_auth_config_providers"
elif issubclass(cls, ServerAuthCredentialsProvider):
_key = "server_auth_credentials_providers"
else:
raise ValueError(
"Only ClientAuthProvider, ClientAuthConfigurationProvider, "
"ClientAuthCredentialsProvider, ServerAuthProvider, "
"ServerAuthConfigurationProvider, and ServerAuthCredentialsProvider,ClientAuthProtocolAdapter "
"can be registered."
)
if class_or_name in _provider_registry[_key]:
return _provider_registry[_key][class_or_name]
else:
return get_class(class_or_name, cls) # type: ignore

View File

View File

@@ -1,12 +1,16 @@
from pydantic import BaseSettings
from typing import Optional, List, Any, Dict, TypeVar, Set, cast, Iterable, Type
from typing_extensions import Literal
from abc import ABC
import importlib
import logging
from overrides import EnforceOverrides, override
from graphlib import TopologicalSorter
import inspect
import logging
import os
from abc import ABC
from graphlib import TopologicalSorter
from typing import Optional, List, Any, Dict, Set, Iterable
from typing import Type, TypeVar, cast
from overrides import EnforceOverrides
from overrides import override
from pydantic import BaseSettings, validator
from typing_extensions import Literal
# The thin client will have a flag to control which implementations to use
is_thin_client = False
@@ -15,10 +19,8 @@ try:
except ImportError:
is_thin_client = False
logger = logging.getLogger(__name__)
LEGACY_ERROR = """\033[91mYou are using a deprecated configuration of Chroma.
\033[94mIf you do not have data you wish to migrate, you only need to change how you construct
@@ -59,7 +61,7 @@ _abstract_type_keys: Dict[str, str] = {
}
class Settings(BaseSettings):
class Settings(BaseSettings): # type: ignore
environment: str = ""
# Legacy config has to be kept around because pydantic will error on nonexisting keys
@@ -89,6 +91,54 @@ class Settings(BaseSettings):
chroma_server_grpc_port: Optional[str] = None
chroma_server_cors_allow_origins: List[str] = [] # eg ["http://localhost:3000"]
chroma_server_auth_provider: Optional[str] = None
@validator("chroma_server_auth_provider", pre=True, always=True)
def chroma_server_auth_provider_non_empty(
cls: Type["Settings"], v: str
) -> Optional[str]:
if v and not v.strip():
raise ValueError(
"chroma_server_auth_provider cannot be empty or just whitespace"
)
return v
chroma_server_auth_configuration_provider: Optional[str] = None
chroma_server_auth_configuration_file: Optional[str] = None
chroma_server_auth_credentials_provider: Optional[str] = None
chroma_server_auth_credentials_file: Optional[str] = None
chroma_server_auth_credentials: Optional[str] = None
@validator("chroma_server_auth_credentials_file", pre=True, always=True)
def chroma_server_auth_credentials_file_non_empty_file_exists(
cls: Type["Settings"], v: str
) -> Optional[str]:
if v and not v.strip():
raise ValueError(
"chroma_server_auth_credentials_file cannot be empty or just whitespace"
)
if v and not os.path.isfile(os.path.join(v)):
raise ValueError(
f"chroma_server_auth_credentials_file [{v}] does not exist"
)
return v
chroma_client_auth_provider: Optional[str] = None
chroma_server_auth_ignore_paths: Dict[str, List[str]] = {
"/api/v1": ["GET"],
"/api/v1/heartbeat": ["GET"],
"/api/v1/version": ["GET"],
}
chroma_client_auth_credentials_provider: Optional[
str
] = "chromadb.auth.providers.ConfigurationClientAuthCredentialsProvider"
chroma_client_auth_protocol_adapter: Optional[
str
] = "chromadb.auth.providers.RequestsClientAuthProtocolAdapter"
chroma_client_auth_credentials_file: Optional[str] = None
chroma_client_auth_credentials: Optional[str] = None
anonymized_telemetry: bool = True
allow_reset: bool = False
@@ -106,7 +156,7 @@ class Settings(BaseSettings):
def __getitem__(self, key: str) -> Any:
val = getattr(self, key)
# Error on legacy config values
if val in _legacy_config_values:
if isinstance(val, str) and val in _legacy_config_values:
raise ValueError(LEGACY_ERROR)
return val
@@ -158,7 +208,6 @@ class Component(ABC, EnforceOverrides):
class System(Component):
settings: Settings
_instances: Dict[Type[Component], Component]
def __init__(self, settings: Settings):
@@ -169,7 +218,6 @@ class System(Component):
"Chroma is running in http-only client mode, and can only be run with 'chromadb.api.fastapi.FastAPI' as the chroma_api_impl. \
see https://docs.trychroma.com/usage-guide?lang=py#using-the-python-http-only-client for more information."
)
# Validate settings don't contain any legacy config values
for key in _legacy_config_keys:
if settings[key] is not None:
@@ -225,7 +273,9 @@ class System(Component):
def reset_state(self) -> None:
"""Reset the state of this system and all constituents in reverse dependency order"""
if not self.settings.allow_reset:
raise ValueError("Resetting is not allowed by this configuration (to enable it, set `allow_reset` to `True` in your Settings() or include `ALLOW_RESET=TRUE` in your environment variables)")
raise ValueError(
"Resetting is not allowed by this configuration (to enable it, set `allow_reset` to `True` in your Settings() or include `ALLOW_RESET=TRUE` in your environment variables)"
)
for component in reversed(list(self.components())):
component.reset_state()

View File

@@ -8,10 +8,13 @@ from fastapi.routing import APIRoute
from fastapi import HTTPException, status
from uuid import UUID
import chromadb
from chromadb.api.models.Collection import Collection
from chromadb.api.types import GetResult, QueryResult
from chromadb.auth.fastapi import (
FastAPIChromaAuthMiddleware,
FastAPIChromaAuthMiddlewareWrapper,
)
from chromadb.config import Settings
import chromadb.server
import chromadb.api
@@ -110,6 +113,12 @@ class FastAPI(chromadb.server.Server):
allow_origins=settings.chroma_server_cors_allow_origins,
allow_methods=["*"],
)
if settings.chroma_server_auth_provider:
self._auth_middleware = self._api.require(FastAPIChromaAuthMiddleware)
self._app.add_middleware(
FastAPIChromaAuthMiddlewareWrapper,
auth_middleware=self._auth_middleware,
)
self.router = ChromaAPIRouter()

View File

@@ -0,0 +1,12 @@
import pytest
def test_invalid_auth_cred(api_wrong_cred):
with pytest.raises(Exception) as e:
api_wrong_cred.list_collections()
assert "Unauthorized" in str(e.value)
def test_server_basic_auth(api_with_server_auth):
cols = api_with_server_auth.list_collections()
assert len(cols) == 0

View File

@@ -1,14 +1,10 @@
from chromadb.config import Settings, System
from chromadb.api import API
from chromadb.ingest import Producer
import chromadb.server.fastapi
from requests.exceptions import ConnectionError
import hypothesis
import tempfile
import logging
import multiprocessing
import os
import uvicorn
import shutil
import socket
import tempfile
import time
import pytest
from typing import (
Generator,
Iterator,
@@ -18,19 +14,23 @@ from typing import (
Tuple,
Callable,
)
from typing_extensions import Protocol
import shutil
import logging
import socket
import multiprocessing
import hypothesis
import pytest
import uvicorn
from requests.exceptions import ConnectionError
from typing_extensions import Protocol
import chromadb.server.fastapi
from chromadb.api import API
from chromadb.config import Settings, System
from chromadb.ingest import Producer
from chromadb.types import SeqId, SubmitEmbeddingRecord
from chromadb.db.mixins import embeddings_queue
root_logger = logging.getLogger()
root_logger.setLevel(logging.DEBUG) # This will only run when testing
logger = logging.getLogger(__name__)
hypothesis.settings.register_profile(
@@ -53,7 +53,12 @@ def find_free_port() -> int:
def _run_server(
port: int, is_persistent: bool = False, persist_directory: Optional[str] = None
port: int,
is_persistent: bool = False,
persist_directory: Optional[str] = None,
chroma_server_auth_provider: Optional[str] = None,
chroma_server_auth_credentials_provider: Optional[str] = None,
chroma_server_auth_credentials_file: Optional[str] = None,
) -> None:
"""Run a Chroma server locally"""
if is_persistent and persist_directory:
@@ -66,6 +71,9 @@ def _run_server(
is_persistent=is_persistent,
persist_directory=persist_directory,
allow_reset=True,
chroma_server_auth_provider=chroma_server_auth_provider,
chroma_server_auth_credentials_provider=chroma_server_auth_credentials_provider,
chroma_server_auth_credentials_file=chroma_server_auth_credentials_file,
)
else:
settings = Settings(
@@ -76,6 +84,9 @@ def _run_server(
chroma_segment_manager_impl="chromadb.segment.impl.manager.local.LocalSegmentManager",
is_persistent=False,
allow_reset=True,
chroma_server_auth_provider=chroma_server_auth_provider,
chroma_server_auth_credentials_provider=chroma_server_auth_credentials_provider,
chroma_server_auth_credentials_file=chroma_server_auth_credentials_file,
)
server = chromadb.server.fastapi.FastAPI(settings)
uvicorn.run(server.app(), host="0.0.0.0", port=port, log_level="error")
@@ -94,18 +105,41 @@ def _await_server(api: API, attempts: int = 0) -> None:
_await_server(api, attempts + 1)
def _fastapi_fixture(is_persistent: bool = False) -> Generator[System, None, None]:
def _fastapi_fixture(
is_persistent: bool = False,
chroma_server_auth_provider: Optional[str] = None,
chroma_server_auth_credentials_provider: Optional[str] = None,
chroma_client_auth_provider: Optional[str] = None,
chroma_server_auth_credentials_file: Optional[str] = None,
chroma_client_auth_credentials: Optional[str] = None,
) -> Generator[System, None, None]:
"""Fixture generator that launches a server in a separate process, and yields a
fastapi client connect to it"""
port = find_free_port()
logger.info(f"Running test FastAPI server on port {port}")
ctx = multiprocessing.get_context("spawn")
args: Tuple[int, bool, Optional[str]] = (port, False, None)
args: Tuple[
int, bool, Optional[str], Optional[str], Optional[str], Optional[str]
] = (
port,
False,
None,
chroma_server_auth_provider,
chroma_server_auth_credentials_provider,
chroma_server_auth_credentials_file,
)
persist_directory = None
if is_persistent:
persist_directory = tempfile.mkdtemp()
args = (port, is_persistent, persist_directory)
args = (
port,
is_persistent,
persist_directory,
chroma_server_auth_provider,
chroma_server_auth_credentials_provider,
chroma_server_auth_credentials_file,
)
proc = ctx.Process(target=_run_server, args=args, daemon=True)
proc.start()
settings = Settings(
@@ -113,6 +147,8 @@ def _fastapi_fixture(is_persistent: bool = False) -> Generator[System, None, Non
chroma_server_host="localhost",
chroma_server_http_port=str(port),
allow_reset=True,
chroma_client_auth_provider=chroma_client_auth_provider,
chroma_client_auth_credentials=chroma_client_auth_credentials,
)
system = System(settings)
api = system.instance(API)
@@ -134,6 +170,88 @@ def fastapi_persistent() -> Generator[System, None, None]:
return _fastapi_fixture(is_persistent=True)
def fastapi_server_auth() -> Generator[System, None, None]:
server_auth_file = os.path.abspath(os.path.join(".", "server.htpasswd"))
with open(server_auth_file, "w") as f:
f.write("admin:$2y$05$e5sRb6NCcSH3YfbIxe1AGu2h5K7OOd982OXKmd8WyQ3DRQ4MvpnZS\n")
for item in _fastapi_fixture(
is_persistent=False,
chroma_server_auth_provider="chromadb.auth.basic.BasicAuthServerProvider",
chroma_server_auth_credentials_provider="chromadb.auth.providers.HtpasswdFileServerAuthCredentialsProvider",
chroma_server_auth_credentials_file="./server.htpasswd",
chroma_client_auth_provider="chromadb.auth.basic.BasicAuthClientProvider",
chroma_client_auth_credentials="admin:admin",
):
yield item
os.remove(server_auth_file)
def fastapi_server_auth_param() -> Generator[System, None, None]:
server_auth_file = os.path.abspath(os.path.join(".", "server.htpasswd"))
with open(server_auth_file, "w") as f:
f.write("admin:$2y$05$e5sRb6NCcSH3YfbIxe1AGu2h5K7OOd982OXKmd8WyQ3DRQ4MvpnZS\n")
for item in _fastapi_fixture(
is_persistent=False,
chroma_server_auth_provider="chromadb.auth.basic.BasicAuthServerProvider",
chroma_server_auth_credentials_provider="chromadb.auth.providers.HtpasswdFileServerAuthCredentialsProvider",
chroma_server_auth_credentials_file="./server.htpasswd",
chroma_client_auth_provider="chromadb.auth.basic.BasicAuthClientProvider",
chroma_client_auth_credentials="admin:admin",
):
yield item
os.remove(server_auth_file)
# TODO we need a generator for auth providers
def fastapi_server_auth_file() -> Generator[System, None, None]:
server_auth_file = os.path.abspath(os.path.join(".", "server.htpasswd"))
with open(server_auth_file, "w") as f:
f.write("admin:$2y$05$e5sRb6NCcSH3YfbIxe1AGu2h5K7OOd982OXKmd8WyQ3DRQ4MvpnZS\n")
for item in _fastapi_fixture(
is_persistent=False,
chroma_server_auth_provider="chromadb.auth.basic.BasicAuthServerProvider",
chroma_server_auth_credentials_provider="chromadb.auth.providers.HtpasswdFileServerAuthCredentialsProvider",
chroma_server_auth_credentials_file="./server.htpasswd",
chroma_client_auth_provider="chromadb.auth.basic.BasicAuthClientProvider",
chroma_client_auth_credentials="admin:admin",
):
yield item
os.remove(server_auth_file)
def fastapi_server_auth_shorthand() -> Generator[System, None, None]:
server_auth_file = os.path.abspath(os.path.join(".", "server.htpasswd"))
with open(server_auth_file, "w") as f:
f.write("admin:$2y$05$e5sRb6NCcSH3YfbIxe1AGu2h5K7OOd982OXKmd8WyQ3DRQ4MvpnZS\n")
for item in _fastapi_fixture(
is_persistent=False,
chroma_server_auth_provider="basic",
chroma_server_auth_credentials_provider="htpasswd_file",
chroma_server_auth_credentials_file="./server.htpasswd",
chroma_client_auth_provider="basic",
chroma_client_auth_credentials="admin:admin",
):
yield item
os.remove(server_auth_file)
@pytest.fixture(scope="function")
def fastapi_server_auth_invalid_cred() -> Generator[System, None, None]:
server_auth_file = os.path.abspath(os.path.join(".", "server.htpasswd"))
with open(server_auth_file, "w") as f:
f.write("admin:$2y$05$e5sRb6NCcSH3YfbIxe1AGu2h5K7OOd982OXKmd8WyQ3DRQ4MvpnZS\n")
for item in _fastapi_fixture(
is_persistent=False,
chroma_server_auth_provider="chromadb.auth.basic.BasicAuthServerProvider",
chroma_server_auth_credentials_provider="chromadb.auth.providers.HtpasswdFileServerAuthCredentialsProvider",
chroma_server_auth_credentials_file="./server.htpasswd",
chroma_client_auth_provider="chromadb.auth.basic.BasicAuthClientProvider",
chroma_client_auth_credentials="admin:admin1",
):
yield item
os.remove(server_auth_file)
def integration() -> Generator[System, None, None]:
"""Fixture generator for returning a client configured via environmenet
variables, intended for externally configured integration tests
@@ -192,11 +310,25 @@ def system_fixtures() -> List[Callable[[], Generator[System, None, None]]]:
return fixtures
def system_fixtures_auth() -> List[Callable[[], Generator[System, None, None]]]:
fixtures = [
fastapi_server_auth_param,
fastapi_server_auth_file,
fastapi_server_auth_shorthand,
]
return fixtures
@pytest.fixture(scope="module", params=system_fixtures())
def system(request: pytest.FixtureRequest) -> Generator[API, None, None]:
yield next(request.param())
@pytest.fixture(scope="module", params=system_fixtures_auth())
def system_auth(request: pytest.FixtureRequest) -> Generator[API, None, None]:
yield next(request.param())
@pytest.fixture(scope="function")
def api(system: System) -> Generator[API, None, None]:
system.reset_state()
@@ -204,6 +336,23 @@ def api(system: System) -> Generator[API, None, None]:
yield api
@pytest.fixture(scope="function")
def api_wrong_cred(
fastapi_server_auth_invalid_cred: System,
) -> Generator[API, None, None]:
fastapi_server_auth_invalid_cred.reset_state()
api = fastapi_server_auth_invalid_cred.instance(API)
yield api
@pytest.fixture(scope="function")
def api_with_server_auth(system_auth: System) -> Generator[API, None, None]:
_sys = system_auth
_sys.reset_state()
api = _sys.instance(API)
yield api
# Producer / Consumer fixtures #

View File

@@ -1,4 +1,5 @@
# type: ignore
import chromadb
from chromadb.api.types import QueryResult
from chromadb.config import Settings

View File

@@ -0,0 +1,12 @@
import importlib
from typing import Type, TypeVar, cast
C = TypeVar("C")
def get_class(fqn: str, type: Type[C]) -> Type[C]:
"""Given a fully qualifed class name, import the module and return the class"""
module_name, class_name = fqn.rsplit(".", 1)
module = importlib.import_module(module_name)
cls = getattr(module, class_name)
return cast(Type[C], cls)

View File

@@ -16,6 +16,8 @@ services:
command: uvicorn chromadb.app:app --reload --workers 1 --host 0.0.0.0 --port 8000 --log-config log_config.yml
environment:
- IS_PERSISTENT=TRUE
- CHROMA_SERVER_AUTH_PROVIDER=${CHROMA_SERVER_AUTH_PROVIDER}
- CHROMA_SERVER_AUTH_PROVIDER_CONFIG=${CHROMA_SERVER_AUTH_PROVIDER_CONFIG}
ports:
- 8000:8000
networks:

View File

@@ -0,0 +1,190 @@
# CIP-2: Auth Providers Proposal
## Status
Current Status: `Under Discussion`
## **Motivation**
Currently, Chroma does not provide any authentication mechanism. This CIP proposes to
to add authentication abstractions and basic authentication mechanisms to Chroma.
There are intrinsic and extrinsic motivations for this CIP. The intrinsic motivation
is to provide a secure way to access Chroma as adoption grows and the team is gearing up to release a cloud offering.
The extrinsic motivation is driven by the community which is deploying Chroma in both public and private clouds and
in test and production environments. The community has expressed the need for authentication and authorization.
> Observation: We consider the Auth to be applicable to client-server mode.
## **Public Interfaces**
Changes to the public interface are related to the `Settings` class where we introduce new optional attributes to
control server and client-side auth providers.
## **Proposed Changes**
We propose two abstraction groups, one for the server-side and another for the client-side. In
addition we also introduce a FastAPI/startlette middleware adapter which will allow using the server-side abstractions
in the context of FastAPI. For client-side we rely on `requests`
### Architecture Overview
Architecture Overview:
![cip-2-arch.png](assets/cip-2-arch.png)
Request Sequence:
![cip-2-seq.png](assets/cip-2-seq.png)
### Constraints
This section provides teh architectural constraints for the authentication framework. The constraints are set of
restrictions we impose to make the design simpler and more robust.
- There must be at most one active client-side auth provider
- There must be at most one active client-side credentials provider
- There must be at most one active server-side auth provider
- There must be at most one active server-side auth configuration provider
- There must be at most one active server-side auth credentials provider
### Core Concepts
- Auth Provider - an abstraction that provides authentication functionality for either client or server-side. The
provider is responsible for validating client credentials using (if available) configuration and credentials
providers. The auth provider is also responsible for carrying the Chroma-leg of any authentication flow.
- Auth Configuration Provider - an abstraction that provides configuration for auth providers. The configuration can be
loaded from a file, env vars or programmatically. The configuration is used for validating and/or accessing user
credentials. Examples: secret key for JWT token based auth, DB URL for DB based auth, etc. Depending on sensitivity of
the information stored in the configuration, the provider should implement the necessary interfaces to access such
information in a secure way.
- Auth Credentials Provider - an abstraction that provides credentials for auth providers. The credentials can be
loaded from a file, env vars or programmatically. The credentials are used for validating client-side credentials (for
sever-side auth) and retrieving or generating client-side credentials (for client-side auth).
#### Abstractions
##### Server-Side
We suggest multiple abstractions on the server-side to allow for easy integration with different auth providers.
We suggest the following abstractions:
> Note: All abstractions are defined under `chromadb.auth` package
- `ServerAuthProvider` - this is the base server auth provider abstraction that allows any server implementation of
Chroma to support variety of auth providers. The main responsibility of the auth provider is to orchestrate the auth
flow by gluing together the auth configuration and credentials providers.
- `ChromaAuthMiddleware` - The auth middleware is effectively an adapter responsible for providing server specific
implementation of the auth middleware. This includes three general types of operations - forwarding authentication to
the auth provider, instrumenting the server if needed to support a specific auth flow, ignore certain
actions/operations (e.g. in REST this would be verb+path) that should not be authenticated.
- `ServerAuthenticationRequest` - An abstraction for querying for authentication data from server specific
implementation.
- `ServerAuthenticationResponse` - An abstraction for returning authentication data to server specific implementation.
- `ServerAuthConfigurationProvider` - this is the base abstraction for auth configuration providers. The provider is
responsible for loading auth configuration from a file, env vars or programmatically.
- `AbstractCredentials` - base abstraction for credentials encapsulation from server to Auth Credentials Provider.
- `ServerAuthCredentialsProvider` - this is the base abstraction for auth credentials providers. The provider is
responsible for verifying client credentials.
##### Client-Side
We suggest multiple abstractions on the client-side to allow for easy integration with different auth providers.
- `ClientAuthProvider` - this is the base client auth provider abstraction that allows any client implementation of
Chroma to support variety of auth providers. The main responsibility of the auth provider is to orchestrate the auth
flow by gluing together the auth configuration and credentials providers, and any possible auth workflows (e.g. OAuth)
- `ClientAuthConfigurationProvider` - this is the base abstraction for auth configuration providers. The provider is
responsible for loading auth configuration from a file, env vars or programmatically.
- `ClientAuthCredentialsProvider` - this is the base abstraction for auth credentials providers. The provider is
responsible for verifying client credentials.
- `AbstractCredentials` - base abstraction for credentials encapsulation from client to Auth Credentials Provider.
- `ClientAuthProtocolAdapter` - this is an abstraction that allows for client-side auth providers to communicate with
backends using variety of protocols and libraries (e.g. `requests`, `gRPC` etc). The adapter is responsible for
translating the auth requests to generated by the credentials provider to a protocol specific message.
#### Workflows
##### Server-Side
![cip-2-server-side-wf.png](assets/cip-2-server-side-wf.png)
##### Client-Side
![cip-2-client-side-wf.png](assets/cip-2-client-side-wf.png)
### Configuration
#### Server-side
TBD
#### Client-side
### Reasoning
- Server-side abstraction - it is very useful as the intention is to support a variety of auth providers.
- Client-side abstraction - similar reasoning but from client's perspective. It will allow for both standard and
non-standard auth provider plugins to be added without further impacting the client side
- Backend (fastAPI) adapter - this is a backend-specific way of loading server-side auth provider plugins. It will also
serve as a template/blueprint when it comes to introducing the auth plugins to another backend framework (e.g. Flask)
We also propose that each auth provider on either side must be configurable via three main methods depending on
developer preference:
- File-base - a configuration file that provides the requisite config and credentials (recommended for production)
- Env - configuration through environment variables (this can also apply for the file-based config, which can be
specified in env var)
- Programmatically - provide requisite configuration through CLI or directly in code (it is left for the developer to
decide how such configuration is loaded and made available to the auth provider) - this is possibly the least secure
and should be used for testing
The intention is to start with two minimal but useful Auth providers:
- Basic Auth - base64 encoded user and password credentials. The credentials will be static in nature and defined via
auth provider config
- Token - A simple static token implementation
Both of the above providers will rely on the `Authorization` header to achieve their functionality.
> Both initial providers are there to help introduce a bear minimum security but are not recommended for production use
Further work:
- Introduction of JWT and mTLS auth providers
- API Keys
- Chroma managed user store - this would be similar to what standard DBMS are doing today - maintain a table with users
and salted password hashes
- K8s RBAC integration (for cloud-native deployments)
- GCP service accounts?
- SPIFFE and SPIRE integrations
- Go and Java client-side auth providers (for other impl like Rust and Ruby, we need to discuss with respective
maintainers)
> Note: this CIP intentionally does not tackle authZ but acknowledges that authN and authZ must work in tandem in future
> releases
## **Compatibility, Deprecation, and Migration Plan**
This change, introducing a pluggable auth framework is no impacting compatibility of existing deployments and users can
upgrade and use the new framework without the need for migration.
No deprecations.
## **Test Plan**
We will introduce a new set of tests to verify both client and server-side auth providers.
## **Rejected Alternatives**
We have considered direct middleware Auth or existing third-party libraries for FastAPI integration with auth providers,
but that will create a dependency for Chroma on FastAPI itself.
We have also considered using OAuth 2.0 or OIDC however the challenge there is that both of these protocols are
generally intended for User (human) auth whereas in our case we have a system-to-system auth. That said there still
might be room for either of these protocols, but further more in-depth use case analysis is required.
Relying entirely on external providers, while this is possible not providing out-of-the-box integrated auth capabilities
is a non-starter for many enterprise customers.

BIN
docs/assets/cip-2-arch.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 89 KiB

BIN
docs/assets/cip-2-seq.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 158 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 114 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 136 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

View File

@@ -0,0 +1,259 @@
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# Chroma Authentication\n",
"\n",
"This tutorial aims to explain how authentication can be setup in Chroma.\n",
"\n",
"> **Important**: The concept of authentication is only applicable to Client/Server deployments. If you are using Chroma in a standalone mode, authentication is not applicable.\n",
"\n",
"## Concepts\n",
"\n",
"### Architecture Overview\n",
"\n",
"![Authentication Architecture](assets/auth-architecture.png \"Authentication Architecture\")\n",
"\n",
"### Authentication Flow (Sequence)\n",
"\n",
"The authentication sequence is applied for every request. It is important to understand that credential computation or retrieval (e.g. from external auth providers) is only done once for the first authenticated request. Subsequent requests will use the same credentials.\n",
"\n",
"The authentication flow is as follows:\n",
"\n",
"![Authentication Flow](assets/auh-sequence.png \"Authentication Flow\")\n",
"\n",
"### Preemptive Authentication\n",
"\n",
"In its current release the authentication in Chroma works in a preemptive mode. This means that the client is responsible for sending the authentication information on every request. The server will not challenge the client for authentication.\n",
"\n",
"> **Warning**: There are security risks involved with preemptive authentication in that the client might unintentionally send credentials to malicious or unintended server. When deploying authentication users are encouraged to use HTTPS (always verify server certs), to use secure providers (e.g. JWT) \n",
"> and apply good security practices.\n",
"\n",
"### Authentication Provider\n",
"\n",
"Authentication in Chroma is handled by Authentication Providers. Providers are pluggable modules that allow Chroma to abstract the authentication mechanism from the rest of the system.\n",
"\n",
"Chroma ships with the following build-in providers:\n",
"- Basic Authentication\n",
"- JWT Authentication (work in progress)\n",
"\n",
"### Client-side Authentication\n",
"\n",
"Client-side authentication refers to the process of preparing and communicating credentials information on the client-side and sending that information the Chroma server.\n",
"\n",
"### Server-side Authentication\n",
"\n",
"Server-side authentication refers to the process of validating the credentials information received from the client and authenticating the client.\n"
],
"metadata": {
"collapsed": false
},
"id": "eae631e46b4c1115"
},
{
"cell_type": "markdown",
"source": [
"## Configuration\n",
"\n",
"### Server Configuration\n",
"\n",
"In order for the server to provide auth it needs several pieces of information and depending on the authentication provider you may or may not need to provide all of them.\n",
"\n",
"- `CHROMA_SERVER_AUTH_PROVIDER` - It indicates the authentication provider class to use. In this case we are using the `chromadb.auth.basic.BasicAuthServerProvider` class (it is also possible to use `basic` as a shorthand).\n",
"- `CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER` - The credentials provider is a way for the server to validate the provided auth information from the client. You can use `chromadb.auth.providers.HtpasswdFileServerAuthCredentialsProvider` to validate against a file in htpasswd format (user:password) - single line with bcrypt hash for password. Alternatively you can use a shorthand to load providers (e.g. `htpasswd_file` for `chromadb.auth.providers.HtpasswdFileServerAuthCredentialsProvider`).\n",
"- `CHROMA_SERVER_AUTH_CREDENTIALS_FILE` - The path to the credentials file in case the credentials provider requires it. In this case we are using the `chromadb.auth.providers.HtpasswdFileServerAuthCredentialsProvider` provider which requires a file path.\n",
"\n",
"\n",
"### Client Configuration\n",
"\n",
"Similarly on the client side we need to provide the following configuration parameters:\n",
"\n",
"- `CHROMA_CLIENT_AUTH_PROVIDER` - It indicates the authentication provider class to use. In this case we are using the `chromadb.auth.basic.BasicAuthClientProvider` class or `basic` shorthand.\n",
"- `CHROMA_CLIENT_AUTH_CREDENTIALS` - The auth credentials to be passed to the provider. In this case we are using the `admin:admin` credentials as we'll be using Basic Auth.\n"
],
"metadata": {
"collapsed": false
},
"id": "87d45f79aed65e21"
},
{
"cell_type": "markdown",
"source": [
"## Setting Up\n",
"\n",
"### Before You Begin\n",
"\n",
"Make sure you have either `chromadb` or `chromadb-client` installed. You can do that by running the following command:\n",
"\n",
"```bash\n",
"pip install chromadb\n",
"```\n",
"or\n",
"\n",
"```bash\n",
"pip install chromadb-client\n",
"```\n",
"\n",
"Make sure Chroma Server is running. Use one of the following methods to start the server:\n",
"\n",
"From the command line:\n",
"\n",
"> Note: The below options will configure the server to use Basic Authentication with the username `admin` and password `admin`.\n",
"\n",
"```bash\n",
"export CHROMA_USER=admin\n",
"export CHROMA_PASSWORD=admin\n",
"docker run --rm --entrypoint htpasswd httpd:2 -Bbn ${CHROMA_USER} ${CHROMA_PASSWORD} > server.htpasswd\n",
"CHROMA_SERVER_AUTH_CREDENTIALS_FILE=\"./server.htpasswd\" \\\n",
"CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER='chromadb.auth.providers.HtpasswdFileServerAuthCredentialsProvider' \\\n",
"CHROMA_SERVER_AUTH_PROVIDER='chromadb.auth.basic.BasicAuthServerProvider' \\\n",
"uvicorn chromadb.app:app --workers 1 --host 0.0.0.0 --port 8000 --proxy-headers --log-config log_config.yml\n",
"```\n",
"\n",
"With Docker Compose:\n",
"\n",
"> Note: You need to clone the git repository first and run the command from the repository root.\n",
"\n",
"```bash\n",
"export CHROMA_USER=admin\n",
"export CHROMA_PASSWORD=admin\n",
"docker run --rm --entrypoint htpasswd httpd:2 -Bbn ${CHROMA_USER} ${CHROMA_PASSWORD} > server.htpasswd\n",
"cat << EOF > .env\n",
"CHROMA_SERVER_AUTH_CREDENTIALS_FILE=\"/chroma/server.htpasswd\"\n",
"CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER='chromadb.auth.providers.HtpasswdFileServerAuthCredentialsProvider'\n",
"CHROMA_SERVER_AUTH_PROVIDER='chromadb.auth.basic.BasicAuthServerProvider'\n",
"EOF\n",
"docker-compose up -d --build \n",
"```\n"
],
"metadata": {
"collapsed": false
},
"id": "af49d8c78f2f7347"
},
{
"cell_type": "markdown",
"source": [
"### Basic Authentication"
],
"metadata": {
"collapsed": false
},
"id": "fc77d909233f2645"
},
{
"cell_type": "code",
"execution_count": 2,
"outputs": [
{
"data": {
"text/plain": "[]"
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import chromadb\n",
"from chromadb import Settings\n",
"\n",
"client = chromadb.HttpClient(\n",
" settings=Settings(chroma_client_auth_provider=\"chromadb.auth.basic.BasicAuthClientProvider\",chroma_client_auth_credentials=\"admin:admin\"))\n",
"client.heartbeat() # this should work with or without authentication - it is a public endpoint\n",
"\n",
"client.get_version() # this should work with or without authentication - it is a public endpoint\n",
"\n",
"client.list_collections() # this is a protected endpoint and requires authentication\n",
"\n"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-08-22T00:33:16.354523Z",
"start_time": "2023-08-22T00:33:15.715736Z"
}
},
"id": "8f9307acce25f672"
},
{
"cell_type": "markdown",
"source": [
"#### Verifying Authentication (Negative Test)"
],
"metadata": {
"collapsed": false
},
"id": "6b75f04e59cb1d42"
},
{
"cell_type": "code",
"execution_count": 3,
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"As expected, you are not authorized to access protected endpoints.\n"
]
}
],
"source": [
"# Try to access a protected endpoint without authentication\n",
"import sys\n",
"\n",
"client = chromadb.HttpClient()\n",
"try:\n",
" client.list_collections()\n",
"except Exception as e:\n",
" if \"Unauthorized\" in str(e):\n",
" print(\"As expected, you are not authorized to access protected endpoints.\", file=sys.stderr)\n",
" else:\n",
" raise e"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-08-22T00:33:19.119718Z",
"start_time": "2023-08-22T00:33:19.097558Z"
}
},
"id": "c0c3240ed4d70a79"
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2023-08-11T15:58:07.272237Z"
}
},
"id": "ab8b90d83f02eda"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -30,7 +30,8 @@ dependencies = [
'tqdm >= 4.65.0',
'overrides >= 7.3.1',
'importlib-resources',
'graphlib_backport >= 1.0.3; python_version < "3.9"'
'graphlib_backport >= 1.0.3; python_version < "3.9"',
'bcrypt >= 4.0.1'
]
[tool.black]

View File

@@ -1,3 +1,4 @@
bcrypt==4.0.1
chroma-hnswlib==0.7.2
fastapi>=0.95.2, <0.100.0
graphlib_backport==1.0.3; python_version < '3.9'