sherpa_ai.models package

In This Page:

sherpa_ai.models package#

Submodules#

sherpa_ai.models.chat_model_with_logging module#

class sherpa_ai.models.chat_model_with_logging.ChatModelWithLogging(*args: Any, name: str | None = None, cache: BaseCache | bool | None = None, verbose: bool = None, callbacks: Callbacks = None, tags: List[str] | None = None, metadata: Dict[str, Any] | None = None, custom_get_token_ids: Callable[[str], List[int]] | None = None, callback_manager: BaseCallbackManager | None = None, rate_limiter: BaseRateLimiter | None = None, disable_streaming: bool | Literal['tool_calling'] = False, llm: BaseChatModel, logger: Logger)[source]#

Bases: BaseChatModel

llm: BaseChatModel#
logger: Logger#

sherpa_ai.models.sherpa_base_chat_model module#

class sherpa_ai.models.sherpa_base_chat_model.SherpaBaseChatModel(*args: Any, name: str | None = None, cache: BaseCache | bool | None = None, verbose: bool = None, callbacks: Callbacks = None, tags: List[str] | None = None, metadata: Dict[str, Any] | None = None, custom_get_token_ids: Callable[[str], List[int]] | None = None, callback_manager: BaseCallbackManager | None = None, rate_limiter: BaseRateLimiter | None = None, disable_streaming: bool | Literal['tool_calling'] = False, user_id: str | None = None, verbose_logger: BaseVerboseLogger = None)[source]#

Bases: BaseChatModel

user_id: str | None#
verbose_logger: BaseVerboseLogger#
class sherpa_ai.models.sherpa_base_chat_model.SherpaChatOpenAI(*args: Any, name: str | None = None, cache: BaseCache | bool | None = None, verbose: bool = None, callbacks: Callbacks = None, tags: List[str] | None = None, metadata: Dict[str, Any] | None = None, custom_get_token_ids: Callable[[str], List[int]] | None = None, callback_manager: BaseCallbackManager | None = None, rate_limiter: BaseRateLimiter | None = None, disable_streaming: bool | Literal['tool_calling'] = False, client: Any = None, async_client: Any = None, model: str = 'gpt-3.5-turbo', temperature: float = 0.7, model_kwargs: Dict[str, Any] = None, api_key: SecretStr | None = None, base_url: str | None = None, organization: str | None = None, openai_proxy: str | None = None, timeout: float | Tuple[float, float] | Any | None = None, max_retries: int = 2, streaming: bool = False, n: int = 1, max_tokens: int | None = None, tiktoken_model_name: str | None = None, default_headers: Mapping[str, str] | None = None, default_query: Mapping[str, object] | None = None, http_client: Any | None = None, http_async_client: Any | None = None, stop_sequences: List[str] | str | None = None, extra_body: Mapping[str, Any] | None = None, stream_usage: bool = False, user_id: str | None = None, verbose_logger: BaseVerboseLogger = None)[source]#

Bases: ChatOpenAI

user_id: str | None#
verbose_logger: BaseVerboseLogger#

sherpa_ai.models.sherpa_base_model module#

class sherpa_ai.models.sherpa_base_model.SherpaOpenAI(*args: Any, name: str | None = None, cache: BaseCache | bool | None = None, verbose: bool = None, callbacks: Callbacks = None, tags: List[str] | None = None, metadata: Dict[str, Any] | None = None, custom_get_token_ids: Callable[[str], List[int]] | None = None, callback_manager: BaseCallbackManager | None = None, rate_limiter: BaseRateLimiter | None = None, disable_streaming: bool | Literal['tool_calling'] = False, client: Any = None, async_client: Any = None, model: str = 'gpt-3.5-turbo', temperature: float = 0.7, model_kwargs: Dict[str, Any] = None, api_key: SecretStr | None = None, base_url: str | None = None, organization: str | None = None, openai_proxy: str | None = None, timeout: float | Tuple[float, float] | Any | None = None, max_retries: int = 2, streaming: bool = False, n: int = 1, max_tokens: int | None = None, tiktoken_model_name: str | None = None, default_headers: Mapping[str, str] | None = None, default_query: Mapping[str, object] | None = None, http_client: Any | None = None, http_async_client: Any | None = None, stop_sequences: List[str] | str | None = None, extra_body: Mapping[str, Any] | None = None, stream_usage: bool = False, user_id: str | None = None)[source]#

Bases: ChatOpenAI

user_id: str | None#

Module contents#

class sherpa_ai.models.SherpaChatOpenAI(*args: Any, name: str | None = None, cache: BaseCache | bool | None = None, verbose: bool = None, callbacks: Callbacks = None, tags: List[str] | None = None, metadata: Dict[str, Any] | None = None, custom_get_token_ids: Callable[[str], List[int]] | None = None, callback_manager: BaseCallbackManager | None = None, rate_limiter: BaseRateLimiter | None = None, disable_streaming: bool | Literal['tool_calling'] = False, client: Any = None, async_client: Any = None, model: str = 'gpt-3.5-turbo', temperature: float = 0.7, model_kwargs: Dict[str, Any] = None, api_key: SecretStr | None = None, base_url: str | None = None, organization: str | None = None, openai_proxy: str | None = None, timeout: float | Tuple[float, float] | Any | None = None, max_retries: int = 2, streaming: bool = False, n: int = 1, max_tokens: int | None = None, tiktoken_model_name: str | None = None, default_headers: Mapping[str, str] | None = None, default_query: Mapping[str, object] | None = None, http_client: Any | None = None, http_async_client: Any | None = None, stop_sequences: List[str] | str | None = None, extra_body: Mapping[str, Any] | None = None, stream_usage: bool = False, user_id: str | None = None, verbose_logger: BaseVerboseLogger = None)[source]#

Bases: ChatOpenAI

cache: BaseCache | bool | None#

Whether to cache the response.

  • If true, will use the global cache.

  • If false, will not use a cache

  • If None, will use the global cache if it’s set, otherwise no cache.

  • If instance of BaseCache, will use the provided cache.

Caching is not currently supported for streaming methods of models.

callback_manager: BaseCallbackManager | None#
callbacks: Callbacks#

Callbacks to add to the run trace.

custom_get_token_ids: Callable[[str], List[int]] | None#

Optional encoder to use for counting tokens.

default_headers: Mapping[str, str] | None#
default_query: Mapping[str, object] | None#
disable_streaming: bool | Literal['tool_calling']#

Whether to disable streaming for this model.

If streaming is bypassed, then stream()/astream() will defer to invoke()/ainvoke().

  • If True, will always bypass streaming case.

  • If “tool_calling”, will bypass streaming case only when the model is called with a tools keyword argument.

  • If False (default), will always use streaming case if available.

extra_body: Mapping[str, Any] | None#

Optional additional JSON properties to include in the request parameters when making requests to OpenAI compatible APIs, such as vLLM.

http_async_client: Any | None#

Optional httpx.AsyncClient. Only used for async invocations. Must specify http_client as well if you’d like a custom client for sync invocations.

http_client: Any | None#

Optional httpx.Client. Only used for sync invocations. Must specify http_async_client as well if you’d like a custom client for async invocations.

max_retries: int#

Maximum number of retries to make when generating.

max_tokens: int | None#

Maximum number of tokens to generate.

metadata: Dict[str, Any] | None#

Metadata to add to the run trace.

model_kwargs: Dict[str, Any]#

Holds any model parameters valid for create call not explicitly specified.

model_name: str#

Model name to use.

n: int#

Number of chat completions to generate for each prompt.

openai_api_base: str | None#

Base URL path for API requests, leave blank if not using a proxy or service emulator.

openai_api_key: SecretStr | None#

Automatically inferred from env var OPENAI_API_KEY if not provided.

openai_organization: str | None#

Automatically inferred from env var OPENAI_ORG_ID if not provided.

openai_proxy: str | None#
rate_limiter: BaseRateLimiter | None#

An optional rate limiter to use for limiting the number of requests.

request_timeout: float | Tuple[float, float] | Any | None#

Timeout for requests to OpenAI completion API. Can be float, httpx.Timeout or None.

stop: List[str] | str | None#

Default stop sequences.

stream_usage: bool#

Whether to include usage metadata in streaming output. If True, additional message chunks will be generated during the stream including usage metadata.

streaming: bool#

Whether to stream the results or not.

tags: List[str] | None#

Tags to add to the run trace.

temperature: float#

What sampling temperature to use.

tiktoken_model_name: str | None#

The model name to pass to tiktoken when using this class. Tiktoken is used to count the number of tokens in documents to constrain them to be under a certain limit. By default, when set to None, this will be the same as the embedding model name. However, there are some cases where you may want to use this Embedding class with a model name not supported by tiktoken. This can include when using Azure embeddings or when using one of the many model providers that expose an OpenAI-like API but with different models. In those cases, in order to avoid erroring when tiktoken is called, you can specify a model name to use here.

user_id: str | None#
verbose: bool#

Whether to print out response text.

verbose_logger: BaseVerboseLogger#
class sherpa_ai.models.SherpaOpenAI(*args: Any, name: str | None = None, cache: BaseCache | bool | None = None, verbose: bool = None, callbacks: Callbacks = None, tags: List[str] | None = None, metadata: Dict[str, Any] | None = None, custom_get_token_ids: Callable[[str], List[int]] | None = None, callback_manager: BaseCallbackManager | None = None, rate_limiter: BaseRateLimiter | None = None, disable_streaming: bool | Literal['tool_calling'] = False, client: Any = None, async_client: Any = None, model: str = 'gpt-3.5-turbo', temperature: float = 0.7, model_kwargs: Dict[str, Any] = None, api_key: SecretStr | None = None, base_url: str | None = None, organization: str | None = None, openai_proxy: str | None = None, timeout: float | Tuple[float, float] | Any | None = None, max_retries: int = 2, streaming: bool = False, n: int = 1, max_tokens: int | None = None, tiktoken_model_name: str | None = None, default_headers: Mapping[str, str] | None = None, default_query: Mapping[str, object] | None = None, http_client: Any | None = None, http_async_client: Any | None = None, stop_sequences: List[str] | str | None = None, extra_body: Mapping[str, Any] | None = None, stream_usage: bool = False, user_id: str | None = None)[source]#

Bases: ChatOpenAI

cache: BaseCache | bool | None#

Whether to cache the response.

  • If true, will use the global cache.

  • If false, will not use a cache

  • If None, will use the global cache if it’s set, otherwise no cache.

  • If instance of BaseCache, will use the provided cache.

Caching is not currently supported for streaming methods of models.

callback_manager: BaseCallbackManager | None#
callbacks: Callbacks#

Callbacks to add to the run trace.

custom_get_token_ids: Callable[[str], List[int]] | None#

Optional encoder to use for counting tokens.

default_headers: Mapping[str, str] | None#
default_query: Mapping[str, object] | None#
disable_streaming: bool | Literal['tool_calling']#

Whether to disable streaming for this model.

If streaming is bypassed, then stream()/astream() will defer to invoke()/ainvoke().

  • If True, will always bypass streaming case.

  • If “tool_calling”, will bypass streaming case only when the model is called with a tools keyword argument.

  • If False (default), will always use streaming case if available.

extra_body: Mapping[str, Any] | None#

Optional additional JSON properties to include in the request parameters when making requests to OpenAI compatible APIs, such as vLLM.

http_async_client: Any | None#

Optional httpx.AsyncClient. Only used for async invocations. Must specify http_client as well if you’d like a custom client for sync invocations.

http_client: Any | None#

Optional httpx.Client. Only used for sync invocations. Must specify http_async_client as well if you’d like a custom client for async invocations.

max_retries: int#

Maximum number of retries to make when generating.

max_tokens: int | None#

Maximum number of tokens to generate.

metadata: Dict[str, Any] | None#

Metadata to add to the run trace.

model_kwargs: Dict[str, Any]#

Holds any model parameters valid for create call not explicitly specified.

model_name: str#

Model name to use.

n: int#

Number of chat completions to generate for each prompt.

openai_api_base: str | None#

Base URL path for API requests, leave blank if not using a proxy or service emulator.

openai_api_key: SecretStr | None#

Automatically inferred from env var OPENAI_API_KEY if not provided.

openai_organization: str | None#

Automatically inferred from env var OPENAI_ORG_ID if not provided.

openai_proxy: str | None#
rate_limiter: BaseRateLimiter | None#

An optional rate limiter to use for limiting the number of requests.

request_timeout: float | Tuple[float, float] | Any | None#

Timeout for requests to OpenAI completion API. Can be float, httpx.Timeout or None.

stop: List[str] | str | None#

Default stop sequences.

stream_usage: bool#

Whether to include usage metadata in streaming output. If True, additional message chunks will be generated during the stream including usage metadata.

streaming: bool#

Whether to stream the results or not.

tags: List[str] | None#

Tags to add to the run trace.

temperature: float#

What sampling temperature to use.

tiktoken_model_name: str | None#

The model name to pass to tiktoken when using this class. Tiktoken is used to count the number of tokens in documents to constrain them to be under a certain limit. By default, when set to None, this will be the same as the embedding model name. However, there are some cases where you may want to use this Embedding class with a model name not supported by tiktoken. This can include when using Azure embeddings or when using one of the many model providers that expose an OpenAI-like API but with different models. In those cases, in order to avoid erroring when tiktoken is called, you can specify a model name to use here.

user_id: str | None#
verbose: bool#

Whether to print out response text.