sherpa_ai.models package#
Submodules#
sherpa_ai.models.chat_model_with_logging module#
- class sherpa_ai.models.chat_model_with_logging.ChatModelWithLogging(*args: Any, name: str | None = None, cache: BaseCache | bool | None = None, verbose: bool = None, callbacks: Callbacks = None, tags: List[str] | None = None, metadata: Dict[str, Any] | None = None, custom_get_token_ids: Callable[[str], List[int]] | None = None, callback_manager: BaseCallbackManager | None = None, rate_limiter: BaseRateLimiter | None = None, disable_streaming: bool | Literal['tool_calling'] = False, llm: BaseChatModel, logger: Logger)[source]#
Bases:
BaseChatModel
- llm: BaseChatModel#
- logger: Logger#
sherpa_ai.models.sherpa_base_chat_model module#
- class sherpa_ai.models.sherpa_base_chat_model.SherpaBaseChatModel(*args: Any, name: str | None = None, cache: BaseCache | bool | None = None, verbose: bool = None, callbacks: Callbacks = None, tags: List[str] | None = None, metadata: Dict[str, Any] | None = None, custom_get_token_ids: Callable[[str], List[int]] | None = None, callback_manager: BaseCallbackManager | None = None, rate_limiter: BaseRateLimiter | None = None, disable_streaming: bool | Literal['tool_calling'] = False, user_id: str | None = None, verbose_logger: BaseVerboseLogger = None)[source]#
Bases:
BaseChatModel
- verbose_logger: BaseVerboseLogger#
- class sherpa_ai.models.sherpa_base_chat_model.SherpaChatOpenAI(*args: Any, name: str | None = None, cache: BaseCache | bool | None = None, verbose: bool = None, callbacks: Callbacks = None, tags: List[str] | None = None, metadata: Dict[str, Any] | None = None, custom_get_token_ids: Callable[[str], List[int]] | None = None, callback_manager: BaseCallbackManager | None = None, rate_limiter: BaseRateLimiter | None = None, disable_streaming: bool | Literal['tool_calling'] = False, client: Any = None, async_client: Any = None, model: str = 'gpt-3.5-turbo', temperature: float = 0.7, model_kwargs: Dict[str, Any] = None, api_key: SecretStr | None = None, base_url: str | None = None, organization: str | None = None, openai_proxy: str | None = None, timeout: float | Tuple[float, float] | Any | None = None, max_retries: int = 2, streaming: bool = False, n: int = 1, max_tokens: int | None = None, tiktoken_model_name: str | None = None, default_headers: Mapping[str, str] | None = None, default_query: Mapping[str, object] | None = None, http_client: Any | None = None, http_async_client: Any | None = None, stop_sequences: List[str] | str | None = None, extra_body: Mapping[str, Any] | None = None, stream_usage: bool = False, user_id: str | None = None, verbose_logger: BaseVerboseLogger = None)[source]#
Bases:
ChatOpenAI
- verbose_logger: BaseVerboseLogger#
sherpa_ai.models.sherpa_base_model module#
- class sherpa_ai.models.sherpa_base_model.SherpaOpenAI(*args: Any, name: str | None = None, cache: BaseCache | bool | None = None, verbose: bool = None, callbacks: Callbacks = None, tags: List[str] | None = None, metadata: Dict[str, Any] | None = None, custom_get_token_ids: Callable[[str], List[int]] | None = None, callback_manager: BaseCallbackManager | None = None, rate_limiter: BaseRateLimiter | None = None, disable_streaming: bool | Literal['tool_calling'] = False, client: Any = None, async_client: Any = None, model: str = 'gpt-3.5-turbo', temperature: float = 0.7, model_kwargs: Dict[str, Any] = None, api_key: SecretStr | None = None, base_url: str | None = None, organization: str | None = None, openai_proxy: str | None = None, timeout: float | Tuple[float, float] | Any | None = None, max_retries: int = 2, streaming: bool = False, n: int = 1, max_tokens: int | None = None, tiktoken_model_name: str | None = None, default_headers: Mapping[str, str] | None = None, default_query: Mapping[str, object] | None = None, http_client: Any | None = None, http_async_client: Any | None = None, stop_sequences: List[str] | str | None = None, extra_body: Mapping[str, Any] | None = None, stream_usage: bool = False, user_id: str | None = None)[source]#
Bases:
ChatOpenAI
Module contents#
- class sherpa_ai.models.SherpaChatOpenAI(*args: Any, name: str | None = None, cache: BaseCache | bool | None = None, verbose: bool = None, callbacks: Callbacks = None, tags: List[str] | None = None, metadata: Dict[str, Any] | None = None, custom_get_token_ids: Callable[[str], List[int]] | None = None, callback_manager: BaseCallbackManager | None = None, rate_limiter: BaseRateLimiter | None = None, disable_streaming: bool | Literal['tool_calling'] = False, client: Any = None, async_client: Any = None, model: str = 'gpt-3.5-turbo', temperature: float = 0.7, model_kwargs: Dict[str, Any] = None, api_key: SecretStr | None = None, base_url: str | None = None, organization: str | None = None, openai_proxy: str | None = None, timeout: float | Tuple[float, float] | Any | None = None, max_retries: int = 2, streaming: bool = False, n: int = 1, max_tokens: int | None = None, tiktoken_model_name: str | None = None, default_headers: Mapping[str, str] | None = None, default_query: Mapping[str, object] | None = None, http_client: Any | None = None, http_async_client: Any | None = None, stop_sequences: List[str] | str | None = None, extra_body: Mapping[str, Any] | None = None, stream_usage: bool = False, user_id: str | None = None, verbose_logger: BaseVerboseLogger = None)[source]#
Bases:
ChatOpenAI
- cache: BaseCache | bool | None#
Whether to cache the response.
If true, will use the global cache.
If false, will not use a cache
If None, will use the global cache if it’s set, otherwise no cache.
If instance of BaseCache, will use the provided cache.
Caching is not currently supported for streaming methods of models.
- callbacks: Callbacks#
Callbacks to add to the run trace.
- custom_get_token_ids: Callable[[str], List[int]] | None#
Optional encoder to use for counting tokens.
- disable_streaming: bool | Literal['tool_calling']#
Whether to disable streaming for this model.
If streaming is bypassed, then
stream()/astream()
will defer toinvoke()/ainvoke()
.If True, will always bypass streaming case.
If “tool_calling”, will bypass streaming case only when the model is called with a
tools
keyword argument.If False (default), will always use streaming case if available.
- extra_body: Mapping[str, Any] | None#
Optional additional JSON properties to include in the request parameters when making requests to OpenAI compatible APIs, such as vLLM.
- http_async_client: Any | None#
Optional httpx.AsyncClient. Only used for async invocations. Must specify http_client as well if you’d like a custom client for sync invocations.
- http_client: Any | None#
Optional httpx.Client. Only used for sync invocations. Must specify http_async_client as well if you’d like a custom client for async invocations.
- max_retries: int#
Maximum number of retries to make when generating.
- model_kwargs: Dict[str, Any]#
Holds any model parameters valid for create call not explicitly specified.
- model_name: str#
Model name to use.
- n: int#
Number of chat completions to generate for each prompt.
- openai_api_base: str | None#
Base URL path for API requests, leave blank if not using a proxy or service emulator.
- openai_api_key: SecretStr | None#
Automatically inferred from env var OPENAI_API_KEY if not provided.
- rate_limiter: BaseRateLimiter | None#
An optional rate limiter to use for limiting the number of requests.
- request_timeout: float | Tuple[float, float] | Any | None#
Timeout for requests to OpenAI completion API. Can be float, httpx.Timeout or None.
- stream_usage: bool#
Whether to include usage metadata in streaming output. If True, additional message chunks will be generated during the stream including usage metadata.
- streaming: bool#
Whether to stream the results or not.
- temperature: float#
What sampling temperature to use.
- tiktoken_model_name: str | None#
The model name to pass to tiktoken when using this class. Tiktoken is used to count the number of tokens in documents to constrain them to be under a certain limit. By default, when set to None, this will be the same as the embedding model name. However, there are some cases where you may want to use this Embedding class with a model name not supported by tiktoken. This can include when using Azure embeddings or when using one of the many model providers that expose an OpenAI-like API but with different models. In those cases, in order to avoid erroring when tiktoken is called, you can specify a model name to use here.
- verbose: bool#
Whether to print out response text.
- verbose_logger: BaseVerboseLogger#
- class sherpa_ai.models.SherpaOpenAI(*args: Any, name: str | None = None, cache: BaseCache | bool | None = None, verbose: bool = None, callbacks: Callbacks = None, tags: List[str] | None = None, metadata: Dict[str, Any] | None = None, custom_get_token_ids: Callable[[str], List[int]] | None = None, callback_manager: BaseCallbackManager | None = None, rate_limiter: BaseRateLimiter | None = None, disable_streaming: bool | Literal['tool_calling'] = False, client: Any = None, async_client: Any = None, model: str = 'gpt-3.5-turbo', temperature: float = 0.7, model_kwargs: Dict[str, Any] = None, api_key: SecretStr | None = None, base_url: str | None = None, organization: str | None = None, openai_proxy: str | None = None, timeout: float | Tuple[float, float] | Any | None = None, max_retries: int = 2, streaming: bool = False, n: int = 1, max_tokens: int | None = None, tiktoken_model_name: str | None = None, default_headers: Mapping[str, str] | None = None, default_query: Mapping[str, object] | None = None, http_client: Any | None = None, http_async_client: Any | None = None, stop_sequences: List[str] | str | None = None, extra_body: Mapping[str, Any] | None = None, stream_usage: bool = False, user_id: str | None = None)[source]#
Bases:
ChatOpenAI
- cache: BaseCache | bool | None#
Whether to cache the response.
If true, will use the global cache.
If false, will not use a cache
If None, will use the global cache if it’s set, otherwise no cache.
If instance of BaseCache, will use the provided cache.
Caching is not currently supported for streaming methods of models.
- callbacks: Callbacks#
Callbacks to add to the run trace.
- custom_get_token_ids: Callable[[str], List[int]] | None#
Optional encoder to use for counting tokens.
- disable_streaming: bool | Literal['tool_calling']#
Whether to disable streaming for this model.
If streaming is bypassed, then
stream()/astream()
will defer toinvoke()/ainvoke()
.If True, will always bypass streaming case.
If “tool_calling”, will bypass streaming case only when the model is called with a
tools
keyword argument.If False (default), will always use streaming case if available.
- extra_body: Mapping[str, Any] | None#
Optional additional JSON properties to include in the request parameters when making requests to OpenAI compatible APIs, such as vLLM.
- http_async_client: Any | None#
Optional httpx.AsyncClient. Only used for async invocations. Must specify http_client as well if you’d like a custom client for sync invocations.
- http_client: Any | None#
Optional httpx.Client. Only used for sync invocations. Must specify http_async_client as well if you’d like a custom client for async invocations.
- max_retries: int#
Maximum number of retries to make when generating.
- model_kwargs: Dict[str, Any]#
Holds any model parameters valid for create call not explicitly specified.
- model_name: str#
Model name to use.
- n: int#
Number of chat completions to generate for each prompt.
- openai_api_base: str | None#
Base URL path for API requests, leave blank if not using a proxy or service emulator.
- openai_api_key: SecretStr | None#
Automatically inferred from env var OPENAI_API_KEY if not provided.
- rate_limiter: BaseRateLimiter | None#
An optional rate limiter to use for limiting the number of requests.
- request_timeout: float | Tuple[float, float] | Any | None#
Timeout for requests to OpenAI completion API. Can be float, httpx.Timeout or None.
- stream_usage: bool#
Whether to include usage metadata in streaming output. If True, additional message chunks will be generated during the stream including usage metadata.
- streaming: bool#
Whether to stream the results or not.
- temperature: float#
What sampling temperature to use.
- tiktoken_model_name: str | None#
The model name to pass to tiktoken when using this class. Tiktoken is used to count the number of tokens in documents to constrain them to be under a certain limit. By default, when set to None, this will be the same as the embedding model name. However, there are some cases where you may want to use this Embedding class with a model name not supported by tiktoken. This can include when using Azure embeddings or when using one of the many model providers that expose an OpenAI-like API but with different models. In those cases, in order to avoid erroring when tiktoken is called, you can specify a model name to use here.
- verbose: bool#
Whether to print out response text.