Skip to content

Providers

llm_expose.providers.base

Abstract base class for LLM providers.

BaseProvider

Bases: ABC

Common interface that all LLM provider adapters must implement.

Providers are responsible for sending a conversation history to an LLM and returning the model's reply. The conversation history follows the OpenAI-style message format::

[{"role": "user", "content": "Hello!"}]
Source code in llm_expose/providers/base.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
class BaseProvider(ABC):
    """Common interface that all LLM provider adapters must implement.

    Providers are responsible for sending a conversation history to an LLM
    and returning the model's reply. The conversation history follows the
    OpenAI-style message format::

        [{"role": "user", "content": "Hello!"}]
    """

    @abstractmethod
    async def complete(
        self,
        messages: list[Message],
        *,
        tools: list[ToolSpec] | None = None,
        tool_choice: ToolChoice | None = None,
    ) -> str:
        """Return a completion for *messages*.

        Args:
            messages: A list of message dicts following the OpenAI chat
                completion format.
            tools: Optional tool definitions supported by the provider.
            tool_choice: Optional tool selection policy.

        Returns:
            The model's reply as a plain string.
        """

    @abstractmethod
    def stream(
        self,
        messages: list[Message],
        *,
        tools: list[ToolSpec] | None = None,
        tool_choice: ToolChoice | None = None,
    ) -> AsyncIterator[str]:
        """Yield completion tokens for *messages* as they arrive.

        Args:
            messages: Same format as :meth:`complete`.
            tools: Optional tool definitions supported by the provider.
            tool_choice: Optional tool selection policy.

        Yields:
            Individual text tokens/chunks from the model response.
        """

    def supports_vision(self) -> bool:
        """Return whether the configured model supports image input."""
        return False

complete(messages, *, tools=None, tool_choice=None) abstractmethod async

Return a completion for messages.

Parameters:

Name Type Description Default
messages list[Message]

A list of message dicts following the OpenAI chat completion format.

required
tools list[ToolSpec] | None

Optional tool definitions supported by the provider.

None
tool_choice ToolChoice | None

Optional tool selection policy.

None

Returns:

Type Description
str

The model's reply as a plain string.

Source code in llm_expose/providers/base.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
@abstractmethod
async def complete(
    self,
    messages: list[Message],
    *,
    tools: list[ToolSpec] | None = None,
    tool_choice: ToolChoice | None = None,
) -> str:
    """Return a completion for *messages*.

    Args:
        messages: A list of message dicts following the OpenAI chat
            completion format.
        tools: Optional tool definitions supported by the provider.
        tool_choice: Optional tool selection policy.

    Returns:
        The model's reply as a plain string.
    """

stream(messages, *, tools=None, tool_choice=None) abstractmethod

Yield completion tokens for messages as they arrive.

Parameters:

Name Type Description Default
messages list[Message]

Same format as :meth:complete.

required
tools list[ToolSpec] | None

Optional tool definitions supported by the provider.

None
tool_choice ToolChoice | None

Optional tool selection policy.

None

Yields:

Type Description
AsyncIterator[str]

Individual text tokens/chunks from the model response.

Source code in llm_expose/providers/base.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
@abstractmethod
def stream(
    self,
    messages: list[Message],
    *,
    tools: list[ToolSpec] | None = None,
    tool_choice: ToolChoice | None = None,
) -> AsyncIterator[str]:
    """Yield completion tokens for *messages* as they arrive.

    Args:
        messages: Same format as :meth:`complete`.
        tools: Optional tool definitions supported by the provider.
        tool_choice: Optional tool selection policy.

    Yields:
        Individual text tokens/chunks from the model response.
    """

supports_vision()

Return whether the configured model supports image input.

Source code in llm_expose/providers/base.py
63
64
65
def supports_vision(self) -> bool:
    """Return whether the configured model supports image input."""
    return False

llm_expose.providers.litellm_provider

LiteLLM-backed provider implementation.

LiteLLMProvider

Bases: BaseProvider


              flowchart TD
              llm_expose.providers.litellm_provider.LiteLLMProvider[LiteLLMProvider]
              llm_expose.providers.base.BaseProvider[BaseProvider]

                              llm_expose.providers.base.BaseProvider --> llm_expose.providers.litellm_provider.LiteLLMProvider
                


              click llm_expose.providers.litellm_provider.LiteLLMProvider href "" "llm_expose.providers.litellm_provider.LiteLLMProvider"
              click llm_expose.providers.base.BaseProvider href "" "llm_expose.providers.base.BaseProvider"
            

LLM provider that delegates to litellm.

LiteLLM supports OpenAI, Anthropic, Google, and many other backends, as well as local models that expose an OpenAI-compatible REST API (e.g. LM Studio, Ollama with the OpenAI proxy, vLLM).

Parameters:

Name Type Description Default
config ProviderConfig

The :class:~llm_expose.config.models.ProviderConfig that controls which model and settings are used.

required
Source code in llm_expose/providers/litellm_provider.py
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
class LiteLLMProvider(BaseProvider):
    """LLM provider that delegates to `litellm`.

    LiteLLM supports OpenAI, Anthropic, Google, and many other backends, as
    well as local models that expose an OpenAI-compatible REST API (e.g.
    LM Studio, Ollama with the OpenAI proxy, vLLM).

    Args:
        config: The :class:`~llm_expose.config.models.ProviderConfig` that
            controls which model and settings are used.
    """

    def __init__(self, config: ProviderConfig) -> None:
        self._config = config
        self._openai_client: AsyncOpenAI | None = None
        self._supports_vision = self._detect_vision_support()
        self._last_usage: dict[str, Any] | None = None
        if config.api_key:
            litellm.api_key = config.api_key
            # Preserve historical behavior expected by tests and local users.
            if config.provider_name.lower() == "openai" and not os.environ.get(
                "OPENAI_API_KEY"
            ):
                os.environ["OPENAI_API_KEY"] = config.api_key
        if self._is_local_provider():
            # Most local OpenAI-compatible servers ignore API keys, but the
            # SDK expects one; use a harmless default when omitted.
            self._openai_client = AsyncOpenAI(
                base_url=config.base_url or "http://localhost:1234/v1",
                api_key=config.api_key or "local-not-required",
            )

    def _detect_vision_support(self) -> bool:
        """Determine vision capability from config override or LiteLLM metadata."""
        if self._config.supports_vision is not None:
            return bool(self._config.supports_vision)

        try:
            model_info = litellm.get_model_info(self._config.model.strip())
            if isinstance(model_info, dict):
                supports_vision = model_info.get("supports_vision")
                if isinstance(supports_vision, bool):
                    return supports_vision
        except Exception:
            # Conservative fallback when model metadata cannot be resolved.
            pass

        return False

    def supports_vision(self) -> bool:
        """Return whether the configured model supports image input."""
        return self._supports_vision

    def _prepare_messages(self, messages: list[Message]) -> list[Message]:
        """Normalize message payload according to model capabilities."""
        if self._supports_vision or not messages_have_images(messages):
            return messages

        normalized, stripped_count = strip_image_parts(messages)
        if stripped_count:
            warning_message = (
                f"Model '{self._config.model}' does not support vision; "
                f"skipping {stripped_count} image part(s)."
            )
            warnings.warn(warning_message, stacklevel=2)
        return normalized

    def _is_local_provider(self) -> bool:
        """Return ``True`` when configuration targets a local model server."""
        return self._config.provider_name.lower() == "local"

    def _local_model_id(self) -> str:
        """Return the model identifier expected by OpenAI-compatible local APIs."""
        model = self._config.model.strip()
        # Backward compatibility for existing configs saved as openai/<model>.
        if model.startswith("openai/"):
            return model.split("/", 1)[1]
        return model

    def _common_kwargs(self) -> dict:
        """Build the kwargs shared by both sync and streaming calls."""
        kwargs: dict = {
            "model": self._config.model.strip(),
        }
        if self._config.base_url:
            kwargs["base_url"] = self._config.base_url
        if self._config.api_key:
            kwargs["api_key"] = self._config.api_key
        return kwargs

    @staticmethod
    def _message_to_dict(message: Any) -> Message:
        """Normalize provider message objects into plain dicts."""
        if isinstance(message, dict):
            return message
        model_dump = getattr(message, "model_dump", None)
        if callable(model_dump):
            dumped = model_dump(exclude_none=True)
            if isinstance(dumped, dict):
                return dumped
        return {
            "role": getattr(message, "role", "assistant"),
            "content": getattr(message, "content", ""),
            "tool_calls": getattr(message, "tool_calls", None),
        }

    @staticmethod
    def _as_int(value: Any) -> int | None:
        if value is None:
            return None
        try:
            return int(value)
        except (TypeError, ValueError):
            return None

    @staticmethod
    def _as_float(value: Any) -> float | None:
        if value is None:
            return None
        try:
            return float(value)
        except (TypeError, ValueError):
            return None

    def _extract_completion_usage(
        self, response: Any, *, elapsed_ms: int
    ) -> dict[str, Any] | None:
        usage_obj = getattr(response, "usage", None)
        if usage_obj is None and isinstance(response, dict):
            usage_obj = response.get("usage")

        prompt_tokens: int | None = None
        completion_tokens: int | None = None
        total_tokens: int | None = None

        if usage_obj is not None:
            if isinstance(usage_obj, dict):
                prompt_tokens = self._as_int(usage_obj.get("prompt_tokens"))
                completion_tokens = self._as_int(usage_obj.get("completion_tokens"))
                total_tokens = self._as_int(usage_obj.get("total_tokens"))
            else:
                prompt_tokens = self._as_int(getattr(usage_obj, "prompt_tokens", None))
                completion_tokens = self._as_int(
                    getattr(usage_obj, "completion_tokens", None)
                )
                total_tokens = self._as_int(getattr(usage_obj, "total_tokens", None))

        # Compute total when provider omits it but gives partial counters.
        if (
            total_tokens is None
            and prompt_tokens is not None
            and completion_tokens is not None
        ):
            total_tokens = prompt_tokens + completion_tokens

        cost_usd: float | None = None
        try:
            # Estimated value from LiteLLM pricing metadata when available.
            cost_usd = self._as_float(
                litellm.completion_cost(completion_response=response)
            )
        except Exception:
            cost_usd = None

        if (
            prompt_tokens is None
            and completion_tokens is None
            and total_tokens is None
            and cost_usd is None
        ):
            return None

        model_name = getattr(response, "model", None)
        if isinstance(response, dict):
            model_name = model_name or response.get("model")

        usage: dict[str, Any] = {
            "prompt_tokens": prompt_tokens,
            "completion_tokens": completion_tokens,
            "total_tokens": total_tokens,
            "cost_usd": cost_usd,
            "model": str(model_name or self._config.model.strip()),
            "latency_ms": int(elapsed_ms),
        }
        return usage

    def get_last_usage(self) -> dict[str, Any] | None:
        """Return the most recent completion usage payload when available."""
        if self._last_usage is None:
            return None
        return dict(self._last_usage)

    async def complete_with_message(
        self,
        messages: list[Message],
        *,
        tools: list[ToolSpec] | None = None,
        tool_choice: ToolChoice | None = None,
    ) -> Message:
        """Return the full assistant message payload for tool-call handling."""
        messages = self._prepare_messages(messages)
        self._last_usage = None
        started = time.monotonic()
        request_kwargs: dict[str, Any] = {}
        if tools is not None:
            request_kwargs["tools"] = tools
        if tool_choice is not None:
            request_kwargs["tool_choice"] = tool_choice

        if self._is_local_provider():
            assert self._openai_client is not None
            response = await self._openai_client.chat.completions.create(
                model=self._local_model_id(),
                messages=cast(Any, messages),
                **request_kwargs,
            )
            elapsed_ms = (time.monotonic() - started) * 1000
            self._last_usage = self._extract_completion_usage(
                response, elapsed_ms=int(elapsed_ms)
            )
            return self._message_to_dict(response.choices[0].message)

        response = await litellm.acompletion(
            messages=cast(Any, messages),
            **request_kwargs,
            **self._common_kwargs(),
        )
        elapsed_ms = (time.monotonic() - started) * 1000
        self._last_usage = self._extract_completion_usage(
            response, elapsed_ms=int(elapsed_ms)
        )
        return self._message_to_dict(response.choices[0].message)

    async def complete(
        self,
        messages: list[Message],
        *,
        tools: list[ToolSpec] | None = None,
        tool_choice: ToolChoice | None = None,
    ) -> str:
        """Return a single completion from the configured model.

        Args:
            messages: OpenAI-style message list.

        Returns:
            The model's reply as a plain string.

        Raises:
            litellm.exceptions.APIError: On provider-level errors.
        """
        assistant_message = await self.complete_with_message(
            messages,
            tools=tools,
            tool_choice=tool_choice,
        )
        return assistant_message.get("content") or ""

    def stream(
        self,
        messages: list[Message],
        *,
        tools: list[ToolSpec] | None = None,
        tool_choice: ToolChoice | None = None,
    ) -> AsyncIterator[str]:
        """Yield text chunks from a streaming completion.

        Args:
            messages: OpenAI-style message list.

        Yields:
            Text delta strings as they are received from the model.
        """

        async def _stream() -> AsyncIterator[str]:
            prepared_messages = self._prepare_messages(messages)
            request_kwargs: dict[str, Any] = {}
            if tools is not None:
                request_kwargs["tools"] = tools
            if tool_choice is not None:
                request_kwargs["tool_choice"] = tool_choice

            if self._is_local_provider():
                assert self._openai_client is not None
                response = await self._openai_client.chat.completions.create(
                    model=self._local_model_id(),
                    messages=cast(Any, prepared_messages),
                    stream=True,
                    **request_kwargs,
                )
                async for chunk in cast(Any, response):
                    delta = chunk.choices[0].delta.content
                    if delta:
                        yield delta
                return

            response = await litellm.acompletion(
                messages=cast(Any, prepared_messages),
                stream=True,
                **request_kwargs,
                **self._common_kwargs(),
            )
            async for chunk in cast(Any, response):
                delta = chunk.choices[0].delta.content
                if delta:
                    yield delta

        return _stream()

complete(messages, *, tools=None, tool_choice=None) async

Return a single completion from the configured model.

Parameters:

Name Type Description Default
messages list[Message]

OpenAI-style message list.

required

Returns:

Type Description
str

The model's reply as a plain string.

Raises:

Type Description
APIError

On provider-level errors.

Source code in llm_expose/providers/litellm_provider.py
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
async def complete(
    self,
    messages: list[Message],
    *,
    tools: list[ToolSpec] | None = None,
    tool_choice: ToolChoice | None = None,
) -> str:
    """Return a single completion from the configured model.

    Args:
        messages: OpenAI-style message list.

    Returns:
        The model's reply as a plain string.

    Raises:
        litellm.exceptions.APIError: On provider-level errors.
    """
    assistant_message = await self.complete_with_message(
        messages,
        tools=tools,
        tool_choice=tool_choice,
    )
    return assistant_message.get("content") or ""

complete_with_message(messages, *, tools=None, tool_choice=None) async

Return the full assistant message payload for tool-call handling.

Source code in llm_expose/providers/litellm_provider.py
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
async def complete_with_message(
    self,
    messages: list[Message],
    *,
    tools: list[ToolSpec] | None = None,
    tool_choice: ToolChoice | None = None,
) -> Message:
    """Return the full assistant message payload for tool-call handling."""
    messages = self._prepare_messages(messages)
    self._last_usage = None
    started = time.monotonic()
    request_kwargs: dict[str, Any] = {}
    if tools is not None:
        request_kwargs["tools"] = tools
    if tool_choice is not None:
        request_kwargs["tool_choice"] = tool_choice

    if self._is_local_provider():
        assert self._openai_client is not None
        response = await self._openai_client.chat.completions.create(
            model=self._local_model_id(),
            messages=cast(Any, messages),
            **request_kwargs,
        )
        elapsed_ms = (time.monotonic() - started) * 1000
        self._last_usage = self._extract_completion_usage(
            response, elapsed_ms=int(elapsed_ms)
        )
        return self._message_to_dict(response.choices[0].message)

    response = await litellm.acompletion(
        messages=cast(Any, messages),
        **request_kwargs,
        **self._common_kwargs(),
    )
    elapsed_ms = (time.monotonic() - started) * 1000
    self._last_usage = self._extract_completion_usage(
        response, elapsed_ms=int(elapsed_ms)
    )
    return self._message_to_dict(response.choices[0].message)

get_last_usage()

Return the most recent completion usage payload when available.

Source code in llm_expose/providers/litellm_provider.py
205
206
207
208
209
def get_last_usage(self) -> dict[str, Any] | None:
    """Return the most recent completion usage payload when available."""
    if self._last_usage is None:
        return None
    return dict(self._last_usage)

stream(messages, *, tools=None, tool_choice=None)

Yield text chunks from a streaming completion.

Parameters:

Name Type Description Default
messages list[Message]

OpenAI-style message list.

required

Yields:

Type Description
AsyncIterator[str]

Text delta strings as they are received from the model.

Source code in llm_expose/providers/litellm_provider.py
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
def stream(
    self,
    messages: list[Message],
    *,
    tools: list[ToolSpec] | None = None,
    tool_choice: ToolChoice | None = None,
) -> AsyncIterator[str]:
    """Yield text chunks from a streaming completion.

    Args:
        messages: OpenAI-style message list.

    Yields:
        Text delta strings as they are received from the model.
    """

    async def _stream() -> AsyncIterator[str]:
        prepared_messages = self._prepare_messages(messages)
        request_kwargs: dict[str, Any] = {}
        if tools is not None:
            request_kwargs["tools"] = tools
        if tool_choice is not None:
            request_kwargs["tool_choice"] = tool_choice

        if self._is_local_provider():
            assert self._openai_client is not None
            response = await self._openai_client.chat.completions.create(
                model=self._local_model_id(),
                messages=cast(Any, prepared_messages),
                stream=True,
                **request_kwargs,
            )
            async for chunk in cast(Any, response):
                delta = chunk.choices[0].delta.content
                if delta:
                    yield delta
            return

        response = await litellm.acompletion(
            messages=cast(Any, prepared_messages),
            stream=True,
            **request_kwargs,
            **self._common_kwargs(),
        )
        async for chunk in cast(Any, response):
            delta = chunk.choices[0].delta.content
            if delta:
                yield delta

    return _stream()

supports_vision()

Return whether the configured model supports image input.

Source code in llm_expose/providers/litellm_provider.py
68
69
70
def supports_vision(self) -> bool:
    """Return whether the configured model supports image input."""
    return self._supports_vision