Index

ToolSelection #

Bases: BaseModel

Tool selection.

Parameters:

Name	Type	Description	Default
`tool_id`	`str`	Tool ID to select.	required
`tool_name`	`str`	Tool name to select.	required
`tool_kwargs`	`Dict[str, Any]`	Keyword arguments for the tool.	required

Source code in llama_index/core/llms/llm.py

class ToolSelection(BaseModel):
    """Tool selection."""

    tool_id: str = Field(description="Tool ID to select.")
    tool_name: str = Field(description="Tool name to select.")
    tool_kwargs: Dict[str, Any] = Field(description="Keyword arguments for the tool.")

    @field_validator("tool_kwargs", mode="wrap")
    @classmethod
    def ignore_non_dict_arguments(cls, v: Any, handler: Any) -> Dict[str, Any]:
        try:
            return handler(v)
        except ValidationError:
            return handler({})

LLM #

Bases: BaseLLM

The LLM class is the main class for interacting with language models.

Parameters:

Name	Type	Description	Default
`system_prompt`	`str \| None`	System prompt for LLM calls.	`None`
`messages_to_prompt`	`MessagesToPromptType \| None`	Function to convert a list of messages to an LLM prompt.	`None`
`completion_to_prompt`	`CompletionToPromptType \| None`	Function to convert a completion to an LLM prompt.	`None`
`output_parser`	`BaseOutputParser \| None`	Output parser to parse, validate, and correct errors programmatically.	`None`
`pydantic_program_mode`	`PydanticProgramMode`		`<PydanticProgramMode.DEFAULT: 'default'>`
`query_wrapper_prompt`	`BasePromptTemplate \| None`	Query wrapper prompt for LLM calls.	`None`

Attributes:

Name	Type	Description
`system_prompt`	`Optional[str]`	System prompt for LLM calls.
`messages_to_prompt`	`Callable`	Function to convert a list of messages to an LLM prompt.
`completion_to_prompt`	`Callable`	Function to convert a completion to an LLM prompt.
`output_parser`	`Optional[BaseOutputParser]`	Output parser to parse, validate, and correct errors programmatically.
`pydantic_program_mode`	`PydanticProgramMode`	Pydantic program mode to use for structured prediction.

Source code in llama_index/core/llms/llm.py

class LLM(BaseLLM):
    """
    The LLM class is the main class for interacting with language models.

    Attributes:
        system_prompt (Optional[str]):
            System prompt for LLM calls.
        messages_to_prompt (Callable):
            Function to convert a list of messages to an LLM prompt.
        completion_to_prompt (Callable):
            Function to convert a completion to an LLM prompt.
        output_parser (Optional[BaseOutputParser]):
            Output parser to parse, validate, and correct errors programmatically.
        pydantic_program_mode (PydanticProgramMode):
            Pydantic program mode to use for structured prediction.

    """

    system_prompt: Optional[str] = Field(
        default=None, description="System prompt for LLM calls."
    )
    messages_to_prompt: MessagesToPromptCallable = Field(
        description="Function to convert a list of messages to an LLM prompt.",
        default=None,
        exclude=True,
    )
    completion_to_prompt: CompletionToPromptCallable = Field(
        description="Function to convert a completion to an LLM prompt.",
        default=None,
        exclude=True,
    )
    output_parser: Optional[BaseOutputParser] = Field(
        description="Output parser to parse, validate, and correct errors programmatically.",
        default=None,
        exclude=True,
    )
    pydantic_program_mode: PydanticProgramMode = PydanticProgramMode.DEFAULT

    # deprecated
    query_wrapper_prompt: Optional[BasePromptTemplate] = Field(
        description="Query wrapper prompt for LLM calls.",
        default=None,
        exclude=True,
    )

    # -- Pydantic Configs --

    @field_validator("messages_to_prompt")
    @classmethod
    def set_messages_to_prompt(
        cls, messages_to_prompt: Optional[MessagesToPromptType]
    ) -> MessagesToPromptType:
        return messages_to_prompt or generic_messages_to_prompt

    @field_validator("completion_to_prompt")
    @classmethod
    def set_completion_to_prompt(
        cls, completion_to_prompt: Optional[CompletionToPromptType]
    ) -> CompletionToPromptType:
        return completion_to_prompt or default_completion_to_prompt

    @model_validator(mode="after")
    def check_prompts(self) -> "LLM":
        if self.completion_to_prompt is None:
            self.completion_to_prompt = default_completion_to_prompt
        if self.messages_to_prompt is None:
            self.messages_to_prompt = generic_messages_to_prompt
        return self

    # -- Utils --

    def _log_template_data(
        self, prompt: BasePromptTemplate, **prompt_args: Any
    ) -> None:
        template_vars = {
            k: v
            for k, v in ChainMap(prompt.kwargs, prompt_args).items()
            if k in prompt.template_vars
        }
        with self.callback_manager.event(
            CBEventType.TEMPLATING,
            payload={
                EventPayload.TEMPLATE: prompt.get_template(llm=self),
                EventPayload.TEMPLATE_VARS: template_vars,
                EventPayload.SYSTEM_PROMPT: self.system_prompt,
                EventPayload.QUERY_WRAPPER_PROMPT: self.query_wrapper_prompt,
            },
        ):
            pass

    def _get_prompt(self, prompt: BasePromptTemplate, **prompt_args: Any) -> str:
        formatted_prompt = prompt.format(
            llm=self,
            messages_to_prompt=self.messages_to_prompt,
            completion_to_prompt=self.completion_to_prompt,
            **prompt_args,
        )
        if self.output_parser is not None:
            formatted_prompt = self.output_parser.format(formatted_prompt)
        return self._extend_prompt(formatted_prompt)

    def _get_messages(
        self, prompt: BasePromptTemplate, **prompt_args: Any
    ) -> List[ChatMessage]:
        messages = prompt.format_messages(llm=self, **prompt_args)
        if self.output_parser is not None:
            messages = self.output_parser.format_messages(messages)
        return self._extend_messages(messages)

    def _parse_output(self, output: str) -> str:
        if self.output_parser is not None:
            return str(self.output_parser.parse(output))

        return output

    def _extend_prompt(
        self,
        formatted_prompt: str,
    ) -> str:
        """Add system and query wrapper prompts to base prompt."""
        extended_prompt = formatted_prompt

        if self.system_prompt:
            extended_prompt = self.system_prompt + "\n\n" + extended_prompt

        if self.query_wrapper_prompt:
            extended_prompt = self.query_wrapper_prompt.format(
                query_str=extended_prompt
            )

        return extended_prompt

    def _extend_messages(self, messages: List[ChatMessage]) -> List[ChatMessage]:
        """Add system prompt to chat message list."""
        if self.system_prompt:
            messages = [
                ChatMessage(role=MessageRole.SYSTEM, content=self.system_prompt),
                *messages,
            ]
        return messages

    # -- Structured outputs --

    @dispatcher.span
    def structured_predict(
        self,
        output_cls: Type[Model],
        prompt: PromptTemplate,
        llm_kwargs: Optional[Dict[str, Any]] = None,
        **prompt_args: Any,
    ) -> Model:
        r"""
        Structured predict.

        Args:
            output_cls (BaseModel):
                Output class to use for structured prediction.
            prompt (PromptTemplate):
                Prompt template to use for structured prediction.
            llm_kwargs (Optional[Dict[str, Any]]):
                Arguments that are passed down to the LLM invoked by the program.
            prompt_args (Any):
                Additional arguments to format the prompt with.

        Returns:
            BaseModel: The structured prediction output.

        Examples:
            ```python
            from pydantic import BaseModel

            class Test(BaseModel):
                \"\"\"My test class.\"\"\"
                name: str

            from llama_index.core.prompts import PromptTemplate

            prompt = PromptTemplate("Please predict a Test with a random name related to {topic}.")
            output = llm.structured_predict(Test, prompt, topic="cats")
            print(output.name)
            ```

        """
        from llama_index.core.program.utils import get_program_for_llm

        dispatcher.event(
            LLMStructuredPredictStartEvent(
                output_cls=output_cls, template=prompt, template_args=prompt_args
            )
        )
        program = get_program_for_llm(
            output_cls,
            prompt,
            self,
            pydantic_program_mode=self.pydantic_program_mode,
        )

        result = program(llm_kwargs=llm_kwargs, **prompt_args)
        assert not isinstance(result, list)

        dispatcher.event(LLMStructuredPredictEndEvent(output=result))
        return result

    @dispatcher.span
    async def astructured_predict(
        self,
        output_cls: Type[Model],
        prompt: PromptTemplate,
        llm_kwargs: Optional[Dict[str, Any]] = None,
        **prompt_args: Any,
    ) -> Model:
        r"""
        Async Structured predict.

        Args:
            output_cls (BaseModel):
                Output class to use for structured prediction.
            prompt (PromptTemplate):
                Prompt template to use for structured prediction.
            llm_kwargs (Optional[Dict[str, Any]]):
                Arguments that are passed down to the LLM invoked by the program.
            prompt_args (Any):
                Additional arguments to format the prompt with.

        Returns:
            BaseModel: The structured prediction output.

        Examples:
            ```python
            from pydantic import BaseModel

            class Test(BaseModel):
                \"\"\"My test class.\"\"\"
                name: str

            from llama_index.core.prompts import PromptTemplate

            prompt = PromptTemplate("Please predict a Test with a random name related to {topic}.")
            output = await llm.astructured_predict(Test, prompt, topic="cats")
            print(output.name)
            ```

        """
        from llama_index.core.program.utils import get_program_for_llm

        dispatcher.event(
            LLMStructuredPredictStartEvent(
                output_cls=output_cls, template=prompt, template_args=prompt_args
            )
        )

        program = get_program_for_llm(
            output_cls,
            prompt,
            self,
            pydantic_program_mode=self.pydantic_program_mode,
        )

        result = await program.acall(llm_kwargs=llm_kwargs, **prompt_args)
        assert not isinstance(result, list)

        dispatcher.event(LLMStructuredPredictEndEvent(output=result))
        return result

    def _structured_stream_call(
        self,
        output_cls: Type[Model],
        prompt: PromptTemplate,
        llm_kwargs: Optional[Dict[str, Any]] = None,
        **prompt_args: Any,
    ) -> Generator[
        Union[Model, List[Model], "FlexibleModel", List["FlexibleModel"]], None, None
    ]:
        from llama_index.core.program.utils import get_program_for_llm

        program = get_program_for_llm(
            output_cls,
            prompt,
            self,
            pydantic_program_mode=self.pydantic_program_mode,
        )
        return program.stream_call(llm_kwargs=llm_kwargs, **prompt_args)

    @dispatcher.span
    def stream_structured_predict(
        self,
        output_cls: Type[Model],
        prompt: PromptTemplate,
        llm_kwargs: Optional[Dict[str, Any]] = None,
        **prompt_args: Any,
    ) -> Generator[Union[Model, "FlexibleModel"], None, None]:
        r"""
        Stream Structured predict.

        Args:
            output_cls (BaseModel):
                Output class to use for structured prediction.
            prompt (PromptTemplate):
                Prompt template to use for structured prediction.
            llm_kwargs (Optional[Dict[str, Any]]):
                Arguments that are passed down to the LLM invoked by the program.
            prompt_args (Any):
                Additional arguments to format the prompt with.

        Returns:
            Generator: A generator returning partial copies of the model or list of models.

        Examples:
            ```python
            from pydantic import BaseModel

            class Test(BaseModel):
                \"\"\"My test class.\"\"\"
                name: str

            from llama_index.core.prompts import PromptTemplate

            prompt = PromptTemplate("Please predict a Test with a random name related to {topic}.")
            stream_output = llm.stream_structured_predict(Test, prompt, topic="cats")
            for partial_output in stream_output:
                # stream partial outputs until completion
                print(partial_output.name)
            ```

        """
        dispatcher.event(
            LLMStructuredPredictStartEvent(
                output_cls=output_cls, template=prompt, template_args=prompt_args
            )
        )

        result = self._structured_stream_call(
            output_cls, prompt, llm_kwargs, **prompt_args
        )
        for r in result:
            dispatcher.event(LLMStructuredPredictInProgressEvent(output=r))
            assert not isinstance(r, list)
            yield r

        dispatcher.event(LLMStructuredPredictEndEvent(output=r))

    async def _structured_astream_call(
        self,
        output_cls: Type[Model],
        prompt: PromptTemplate,
        llm_kwargs: Optional[Dict[str, Any]] = None,
        **prompt_args: Any,
    ) -> AsyncGenerator[
        Union[Model, List[Model], "FlexibleModel", List["FlexibleModel"]], None
    ]:
        from llama_index.core.program.utils import get_program_for_llm

        program = get_program_for_llm(
            output_cls,
            prompt,
            self,
            pydantic_program_mode=self.pydantic_program_mode,
        )

        return await program.astream_call(llm_kwargs=llm_kwargs, **prompt_args)

    @dispatcher.span
    async def astream_structured_predict(
        self,
        output_cls: Type[Model],
        prompt: PromptTemplate,
        llm_kwargs: Optional[Dict[str, Any]] = None,
        **prompt_args: Any,
    ) -> AsyncGenerator[Union[Model, "FlexibleModel"], None]:
        r"""
        Async Stream Structured predict.

        Args:
            output_cls (BaseModel):
                Output class to use for structured prediction.
            prompt (PromptTemplate):
                Prompt template to use for structured prediction.
            llm_kwargs (Optional[Dict[str, Any]]):
                Arguments that are passed down to the LLM invoked by the program.
            prompt_args (Any):
                Additional arguments to format the prompt with.

        Returns:
            Generator: A generator returning partial copies of the model or list of models.

        Examples:
            ```python
            from pydantic import BaseModel

            class Test(BaseModel):
                \"\"\"My test class.\"\"\"
                name: str

            from llama_index.core.prompts import PromptTemplate

            prompt = PromptTemplate("Please predict a Test with a random name related to {topic}.")
            stream_output = await llm.astream_structured_predict(Test, prompt, topic="cats")
            async for partial_output in stream_output:
                # stream partial outputs until completion
                print(partial_output.name)
            ```

        """

        async def gen() -> AsyncGenerator[Union[Model, "FlexibleModel"], None]:
            dispatcher.event(
                LLMStructuredPredictStartEvent(
                    output_cls=output_cls, template=prompt, template_args=prompt_args
                )
            )

            result = await self._structured_astream_call(
                output_cls, prompt, llm_kwargs, **prompt_args
            )
            async for r in result:
                dispatcher.event(LLMStructuredPredictInProgressEvent(output=r))
                assert not isinstance(r, list)
                yield r

            dispatcher.event(LLMStructuredPredictEndEvent(output=r))

        return gen()

    # -- Prompt Chaining --

    @dispatcher.span
    def predict(
        self,
        prompt: BasePromptTemplate,
        **prompt_args: Any,
    ) -> str:
        """
        Predict for a given prompt.

        Args:
            prompt (BasePromptTemplate):
                The prompt to use for prediction.
            prompt_args (Any):
                Additional arguments to format the prompt with.

        Returns:
            str: The prediction output.

        Examples:
            ```python
            from llama_index.core.prompts import PromptTemplate

            prompt = PromptTemplate("Please write a random name related to {topic}.")
            output = llm.predict(prompt, topic="cats")
            print(output)
            ```

        """
        dispatcher.event(
            LLMPredictStartEvent(template=prompt, template_args=prompt_args)
        )
        self._log_template_data(prompt, **prompt_args)

        if self.metadata.is_chat_model:
            messages = self._get_messages(prompt, **prompt_args)
            chat_response = self.chat(messages)
            output = chat_response.message.content or ""
        else:
            formatted_prompt = self._get_prompt(prompt, **prompt_args)
            response = self.complete(formatted_prompt, formatted=True)
            output = response.text
        parsed_output = self._parse_output(output)
        dispatcher.event(LLMPredictEndEvent(output=parsed_output))
        return parsed_output

    @dispatcher.span
    def stream(
        self,
        prompt: BasePromptTemplate,
        **prompt_args: Any,
    ) -> TokenGen:
        """
        Stream predict for a given prompt.

        Args:
            prompt (BasePromptTemplate):
                The prompt to use for prediction.
            prompt_args (Any):
                Additional arguments to format the prompt with.

        Yields:
            str: Each streamed token.

        Examples:
            ```python
            from llama_index.core.prompts import PromptTemplate

            prompt = PromptTemplate("Please write a random name related to {topic}.")
            gen = llm.stream(prompt, topic="cats")
            for token in gen:
                print(token, end="", flush=True)
            ```

        """
        self._log_template_data(prompt, **prompt_args)

        dispatcher.event(
            LLMPredictStartEvent(template=prompt, template_args=prompt_args)
        )
        if self.metadata.is_chat_model:
            messages = self._get_messages(prompt, **prompt_args)
            chat_response = self.stream_chat(messages)
            stream_tokens = stream_chat_response_to_tokens(chat_response)
        else:
            formatted_prompt = self._get_prompt(prompt, **prompt_args)
            stream_response = self.stream_complete(formatted_prompt, formatted=True)
            stream_tokens = stream_completion_response_to_tokens(stream_response)

        if prompt.output_parser is not None or self.output_parser is not None:
            raise NotImplementedError("Output parser is not supported for streaming.")

        return stream_tokens

    @dispatcher.span
    async def apredict(
        self,
        prompt: BasePromptTemplate,
        **prompt_args: Any,
    ) -> str:
        """
        Async Predict for a given prompt.

        Args:
            prompt (BasePromptTemplate):
                The prompt to use for prediction.
            prompt_args (Any):
                Additional arguments to format the prompt with.

        Returns:
            str: The prediction output.

        Examples:
            ```python
            from llama_index.core.prompts import PromptTemplate

            prompt = PromptTemplate("Please write a random name related to {topic}.")
            output = await llm.apredict(prompt, topic="cats")
            print(output)
            ```

        """
        dispatcher.event(
            LLMPredictStartEvent(template=prompt, template_args=prompt_args)
        )
        self._log_template_data(prompt, **prompt_args)

        if self.metadata.is_chat_model:
            messages = self._get_messages(prompt, **prompt_args)
            chat_response = await self.achat(messages)
            output = chat_response.message.content or ""
        else:
            formatted_prompt = self._get_prompt(prompt, **prompt_args)
            response = await self.acomplete(formatted_prompt, formatted=True)
            output = response.text

        parsed_output = self._parse_output(output)
        dispatcher.event(LLMPredictEndEvent(output=parsed_output))
        return parsed_output

    @dispatcher.span
    async def astream(
        self,
        prompt: BasePromptTemplate,
        **prompt_args: Any,
    ) -> TokenAsyncGen:
        """
        Async stream predict for a given prompt.

        Args:
        prompt (BasePromptTemplate):
            The prompt to use for prediction.
        prompt_args (Any):
            Additional arguments to format the prompt with.

        Yields:
            str: An async generator that yields strings of tokens.

        Examples:
            ```python
            from llama_index.core.prompts import PromptTemplate

            prompt = PromptTemplate("Please write a random name related to {topic}.")
            gen = await llm.astream(prompt, topic="cats")
            async for token in gen:
                print(token, end="", flush=True)
            ```

        """
        self._log_template_data(prompt, **prompt_args)

        dispatcher.event(
            LLMPredictStartEvent(template=prompt, template_args=prompt_args)
        )
        if self.metadata.is_chat_model:
            messages = self._get_messages(prompt, **prompt_args)
            chat_response = await self.astream_chat(messages)
            stream_tokens = await astream_chat_response_to_tokens(chat_response)
        else:
            formatted_prompt = self._get_prompt(prompt, **prompt_args)
            stream_response = await self.astream_complete(
                formatted_prompt, formatted=True
            )
            stream_tokens = await astream_completion_response_to_tokens(stream_response)

        if prompt.output_parser is not None or self.output_parser is not None:
            raise NotImplementedError("Output parser is not supported for streaming.")

        return stream_tokens

    @dispatcher.span
    def predict_and_call(
        self,
        tools: List["BaseTool"],
        user_msg: Optional[Union[str, ChatMessage]] = None,
        chat_history: Optional[List[ChatMessage]] = None,
        verbose: bool = False,
        **kwargs: Any,
    ) -> "AgentChatResponse":
        """
        Predict and call the tool.

        By default uses a ReAct agent to do tool calling (through text prompting),
        but function calling LLMs will implement this differently.

        """
        from llama_index.core.agent.workflow import ReActAgent
        from llama_index.core.chat_engine.types import AgentChatResponse
        from llama_index.core.memory import Memory
        from llama_index.core.tools.calling import call_tool_with_selection
        from llama_index.core.workflow import Context
        from workflows.context.state_store import DictState

        agent = ReActAgent(
            tools=tools,
            llm=self,
            verbose=verbose,
            formatter=kwargs.get("react_chat_formatter"),
            output_parser=kwargs.get("output_parser"),
            tool_retriever=kwargs.get("tool_retriever"),
        )

        memory = kwargs.get("memory", Memory.from_defaults())

        if isinstance(user_msg, ChatMessage) and isinstance(user_msg.content, str):
            pass
        elif isinstance(user_msg, str):
            user_msg = ChatMessage(content=user_msg, role=MessageRole.USER)

        llm_input = []
        if chat_history:
            llm_input.extend(chat_history)
        if user_msg:
            llm_input.append(user_msg)

        ctx: Context[DictState] = Context(agent)

        try:
            resp = asyncio_run(
                agent.take_step(
                    ctx=ctx, llm_input=llm_input, tools=tools or [], memory=memory
                )
            )
            tool_outputs = []
            for tool_call in resp.tool_calls:
                tool_output = call_tool_with_selection(
                    tool_call=tool_call,
                    tools=tools or [],
                    verbose=verbose,
                )
                tool_outputs.append(tool_output)
            output_text = "\n\n".join(
                [tool_output.content for tool_output in tool_outputs]
            )
            return AgentChatResponse(
                response=output_text,
                sources=tool_outputs,
            )
        except Exception as e:
            output = AgentChatResponse(
                response="An error occurred while running the tool: " + str(e),
                sources=[],
            )

        return output

    @dispatcher.span
    async def apredict_and_call(
        self,
        tools: List["BaseTool"],
        user_msg: Optional[Union[str, ChatMessage]] = None,
        chat_history: Optional[List[ChatMessage]] = None,
        verbose: bool = False,
        **kwargs: Any,
    ) -> "AgentChatResponse":
        """Predict and call the tool."""
        from llama_index.core.agent.workflow import ReActAgent
        from llama_index.core.chat_engine.types import AgentChatResponse
        from llama_index.core.memory import Memory
        from llama_index.core.tools.calling import acall_tool_with_selection
        from llama_index.core.workflow import Context
        from workflows.context.state_store import DictState

        agent = ReActAgent(
            tools=tools,
            llm=self,
            verbose=verbose,
            formatter=kwargs.get("react_chat_formatter"),
            output_parser=kwargs.get("output_parser"),
            tool_retriever=kwargs.get("tool_retriever"),
        )

        memory = kwargs.get("memory", Memory.from_defaults())

        if isinstance(user_msg, ChatMessage) and isinstance(user_msg.content, str):
            pass
        elif isinstance(user_msg, str):
            user_msg = ChatMessage(content=user_msg, role=MessageRole.USER)

        llm_input = []
        if chat_history:
            llm_input.extend(chat_history)
        if user_msg:
            llm_input.append(user_msg)

        ctx: Context[DictState] = Context(agent)

        try:
            resp = await agent.take_step(
                ctx=ctx, llm_input=llm_input, tools=tools or [], memory=memory
            )
            tool_outputs = []
            for tool_call in resp.tool_calls:
                tool_output = await acall_tool_with_selection(
                    tool_call=tool_call,
                    tools=tools or [],
                    verbose=verbose,
                )
                tool_outputs.append(tool_output)

            output_text = "\n\n".join(
                [tool_output.content for tool_output in tool_outputs]
            )
            return AgentChatResponse(
                response=output_text,
                sources=tool_outputs,
            )
        except Exception as e:
            output = AgentChatResponse(
                response="An error occurred while running the tool: " + str(e),
                sources=[],
            )

        return output

    def as_structured_llm(
        self,
        output_cls: Type[BaseModel],
        **kwargs: Any,
    ) -> "StructuredLLM":
        """Return a structured LLM around a given object."""
        from llama_index.core.llms.structured_llm import StructuredLLM

        return StructuredLLM(llm=self, output_cls=output_cls, **kwargs)

structured_predict #

structured_predict(output_cls: Type[Model], prompt: PromptTemplate, llm_kwargs: Optional[Dict[str, Any]] = None, **prompt_args: Any) -> Model

Structured predict.

Parameters:

Name	Type	Description	Default
`output_cls`	`BaseModel`	Output class to use for structured prediction.	required
`prompt`	`PromptTemplate`	Prompt template to use for structured prediction.	required
`llm_kwargs`	`Optional[Dict[str, Any]]`	Arguments that are passed down to the LLM invoked by the program.	`None`
`prompt_args`	`Any`	Additional arguments to format the prompt with.	`{}`

Returns:

Name	Type	Description
`BaseModel`	`Model`	The structured prediction output.

Examples:

from pydantic import BaseModel

class Test(BaseModel):
    \"\"\"My test class.\"\"\"
    name: str

from llama_index.core.prompts import PromptTemplate

prompt = PromptTemplate("Please predict a Test with a random name related to {topic}.")
output = llm.structured_predict(Test, prompt, topic="cats")
print(output.name)

Source code in llama_index/core/llms/llm.py

@dispatcher.span
def structured_predict(
    self,
    output_cls: Type[Model],
    prompt: PromptTemplate,
    llm_kwargs: Optional[Dict[str, Any]] = None,
    **prompt_args: Any,
) -> Model:
    r"""
    Structured predict.

    Args:
        output_cls (BaseModel):
            Output class to use for structured prediction.
        prompt (PromptTemplate):
            Prompt template to use for structured prediction.
        llm_kwargs (Optional[Dict[str, Any]]):
            Arguments that are passed down to the LLM invoked by the program.
        prompt_args (Any):
            Additional arguments to format the prompt with.

    Returns:
        BaseModel: The structured prediction output.

    Examples:
        ```python
        from pydantic import BaseModel

        class Test(BaseModel):
            \"\"\"My test class.\"\"\"
            name: str

        from llama_index.core.prompts import PromptTemplate

        prompt = PromptTemplate("Please predict a Test with a random name related to {topic}.")
        output = llm.structured_predict(Test, prompt, topic="cats")
        print(output.name)
        ```

    """
    from llama_index.core.program.utils import get_program_for_llm

    dispatcher.event(
        LLMStructuredPredictStartEvent(
            output_cls=output_cls, template=prompt, template_args=prompt_args
        )
    )
    program = get_program_for_llm(
        output_cls,
        prompt,
        self,
        pydantic_program_mode=self.pydantic_program_mode,
    )

    result = program(llm_kwargs=llm_kwargs, **prompt_args)
    assert not isinstance(result, list)

    dispatcher.event(LLMStructuredPredictEndEvent(output=result))
    return result

astructured_predict `async` #

astructured_predict(output_cls: Type[Model], prompt: PromptTemplate, llm_kwargs: Optional[Dict[str, Any]] = None, **prompt_args: Any) -> Model

Async Structured predict.

Parameters:

Name	Type	Description	Default
`output_cls`	`BaseModel`	Output class to use for structured prediction.	required
`prompt`	`PromptTemplate`	Prompt template to use for structured prediction.	required
`llm_kwargs`	`Optional[Dict[str, Any]]`	Arguments that are passed down to the LLM invoked by the program.	`None`
`prompt_args`	`Any`	Additional arguments to format the prompt with.	`{}`

Returns:

Name	Type	Description
`BaseModel`	`Model`	The structured prediction output.

Examples:

from pydantic import BaseModel

class Test(BaseModel):
    \"\"\"My test class.\"\"\"
    name: str

from llama_index.core.prompts import PromptTemplate

prompt = PromptTemplate("Please predict a Test with a random name related to {topic}.")
output = await llm.astructured_predict(Test, prompt, topic="cats")
print(output.name)

Source code in llama_index/core/llms/llm.py

@dispatcher.span
async def astructured_predict(
    self,
    output_cls: Type[Model],
    prompt: PromptTemplate,
    llm_kwargs: Optional[Dict[str, Any]] = None,
    **prompt_args: Any,
) -> Model:
    r"""
    Async Structured predict.

    Args:
        output_cls (BaseModel):
            Output class to use for structured prediction.
        prompt (PromptTemplate):
            Prompt template to use for structured prediction.
        llm_kwargs (Optional[Dict[str, Any]]):
            Arguments that are passed down to the LLM invoked by the program.
        prompt_args (Any):
            Additional arguments to format the prompt with.

    Returns:
        BaseModel: The structured prediction output.

    Examples:
        ```python
        from pydantic import BaseModel

        class Test(BaseModel):
            \"\"\"My test class.\"\"\"
            name: str

        from llama_index.core.prompts import PromptTemplate

        prompt = PromptTemplate("Please predict a Test with a random name related to {topic}.")
        output = await llm.astructured_predict(Test, prompt, topic="cats")
        print(output.name)
        ```

    """
    from llama_index.core.program.utils import get_program_for_llm

    dispatcher.event(
        LLMStructuredPredictStartEvent(
            output_cls=output_cls, template=prompt, template_args=prompt_args
        )
    )

    program = get_program_for_llm(
        output_cls,
        prompt,
        self,
        pydantic_program_mode=self.pydantic_program_mode,
    )

    result = await program.acall(llm_kwargs=llm_kwargs, **prompt_args)
    assert not isinstance(result, list)

    dispatcher.event(LLMStructuredPredictEndEvent(output=result))
    return result

stream_structured_predict #

stream_structured_predict(output_cls: Type[Model], prompt: PromptTemplate, llm_kwargs: Optional[Dict[str, Any]] = None, **prompt_args: Any) -> Generator[Union[Model, FlexibleModel], None, None]

Stream Structured predict.

Parameters:

Name	Type	Description	Default
`output_cls`	`BaseModel`	Output class to use for structured prediction.	required
`prompt`	`PromptTemplate`	Prompt template to use for structured prediction.	required
`llm_kwargs`	`Optional[Dict[str, Any]]`	Arguments that are passed down to the LLM invoked by the program.	`None`
`prompt_args`	`Any`	Additional arguments to format the prompt with.	`{}`

Returns:

Name	Type	Description
`Generator`	`None`	A generator returning partial copies of the model or list of models.

Examples:

from pydantic import BaseModel

class Test(BaseModel):
    \"\"\"My test class.\"\"\"
    name: str

from llama_index.core.prompts import PromptTemplate

prompt = PromptTemplate("Please predict a Test with a random name related to {topic}.")
stream_output = llm.stream_structured_predict(Test, prompt, topic="cats")
for partial_output in stream_output:
    # stream partial outputs until completion
    print(partial_output.name)

Source code in llama_index/core/llms/llm.py

@dispatcher.span
def stream_structured_predict(
    self,
    output_cls: Type[Model],
    prompt: PromptTemplate,
    llm_kwargs: Optional[Dict[str, Any]] = None,
    **prompt_args: Any,
) -> Generator[Union[Model, "FlexibleModel"], None, None]:
    r"""
    Stream Structured predict.

    Args:
        output_cls (BaseModel):
            Output class to use for structured prediction.
        prompt (PromptTemplate):
            Prompt template to use for structured prediction.
        llm_kwargs (Optional[Dict[str, Any]]):
            Arguments that are passed down to the LLM invoked by the program.
        prompt_args (Any):
            Additional arguments to format the prompt with.

    Returns:
        Generator: A generator returning partial copies of the model or list of models.

    Examples:
        ```python
        from pydantic import BaseModel

        class Test(BaseModel):
            \"\"\"My test class.\"\"\"
            name: str

        from llama_index.core.prompts import PromptTemplate

        prompt = PromptTemplate("Please predict a Test with a random name related to {topic}.")
        stream_output = llm.stream_structured_predict(Test, prompt, topic="cats")
        for partial_output in stream_output:
            # stream partial outputs until completion
            print(partial_output.name)
        ```

    """
    dispatcher.event(
        LLMStructuredPredictStartEvent(
            output_cls=output_cls, template=prompt, template_args=prompt_args
        )
    )

    result = self._structured_stream_call(
        output_cls, prompt, llm_kwargs, **prompt_args
    )
    for r in result:
        dispatcher.event(LLMStructuredPredictInProgressEvent(output=r))
        assert not isinstance(r, list)
        yield r

    dispatcher.event(LLMStructuredPredictEndEvent(output=r))

astream_structured_predict `async` #

astream_structured_predict(output_cls: Type[Model], prompt: PromptTemplate, llm_kwargs: Optional[Dict[str, Any]] = None, **prompt_args: Any) -> AsyncGenerator[Union[Model, FlexibleModel], None]

Async Stream Structured predict.

Parameters:

Name	Type	Description	Default
`output_cls`	`BaseModel`	Output class to use for structured prediction.	required
`prompt`	`PromptTemplate`	Prompt template to use for structured prediction.	required
`llm_kwargs`	`Optional[Dict[str, Any]]`	Arguments that are passed down to the LLM invoked by the program.	`None`
`prompt_args`	`Any`	Additional arguments to format the prompt with.	`{}`

Returns:

Name	Type	Description
`Generator`	`AsyncGenerator[Union[Model, FlexibleModel], None]`	A generator returning partial copies of the model or list of models.

Examples:

from pydantic import BaseModel

class Test(BaseModel):
    \"\"\"My test class.\"\"\"
    name: str

from llama_index.core.prompts import PromptTemplate

prompt = PromptTemplate("Please predict a Test with a random name related to {topic}.")
stream_output = await llm.astream_structured_predict(Test, prompt, topic="cats")
async for partial_output in stream_output:
    # stream partial outputs until completion
    print(partial_output.name)

Source code in llama_index/core/llms/llm.py

@dispatcher.span
async def astream_structured_predict(
    self,
    output_cls: Type[Model],
    prompt: PromptTemplate,
    llm_kwargs: Optional[Dict[str, Any]] = None,
    **prompt_args: Any,
) -> AsyncGenerator[Union[Model, "FlexibleModel"], None]:
    r"""
    Async Stream Structured predict.

    Args:
        output_cls (BaseModel):
            Output class to use for structured prediction.
        prompt (PromptTemplate):
            Prompt template to use for structured prediction.
        llm_kwargs (Optional[Dict[str, Any]]):
            Arguments that are passed down to the LLM invoked by the program.
        prompt_args (Any):
            Additional arguments to format the prompt with.

    Returns:
        Generator: A generator returning partial copies of the model or list of models.

    Examples:
        ```python
        from pydantic import BaseModel

        class Test(BaseModel):
            \"\"\"My test class.\"\"\"
            name: str

        from llama_index.core.prompts import PromptTemplate

        prompt = PromptTemplate("Please predict a Test with a random name related to {topic}.")
        stream_output = await llm.astream_structured_predict(Test, prompt, topic="cats")
        async for partial_output in stream_output:
            # stream partial outputs until completion
            print(partial_output.name)
        ```

    """

    async def gen() -> AsyncGenerator[Union[Model, "FlexibleModel"], None]:
        dispatcher.event(
            LLMStructuredPredictStartEvent(
                output_cls=output_cls, template=prompt, template_args=prompt_args
            )
        )

        result = await self._structured_astream_call(
            output_cls, prompt, llm_kwargs, **prompt_args
        )
        async for r in result:
            dispatcher.event(LLMStructuredPredictInProgressEvent(output=r))
            assert not isinstance(r, list)
            yield r

        dispatcher.event(LLMStructuredPredictEndEvent(output=r))

    return gen()

predict #

predict(prompt: BasePromptTemplate, **prompt_args: Any) -> str

Predict for a given prompt.

Parameters:

Name	Type	Description	Default
`prompt`	`BasePromptTemplate`	The prompt to use for prediction.	required
`prompt_args`	`Any`	Additional arguments to format the prompt with.	`{}`

Returns:

Name	Type	Description
`str`	`str`	The prediction output.

Examples:

from llama_index.core.prompts import PromptTemplate

prompt = PromptTemplate("Please write a random name related to {topic}.")
output = llm.predict(prompt, topic="cats")
print(output)

Source code in llama_index/core/llms/llm.py

@dispatcher.span
def predict(
    self,
    prompt: BasePromptTemplate,
    **prompt_args: Any,
) -> str:
    """
    Predict for a given prompt.

    Args:
        prompt (BasePromptTemplate):
            The prompt to use for prediction.
        prompt_args (Any):
            Additional arguments to format the prompt with.

    Returns:
        str: The prediction output.

    Examples:
        ```python
        from llama_index.core.prompts import PromptTemplate

        prompt = PromptTemplate("Please write a random name related to {topic}.")
        output = llm.predict(prompt, topic="cats")
        print(output)
        ```

    """
    dispatcher.event(
        LLMPredictStartEvent(template=prompt, template_args=prompt_args)
    )
    self._log_template_data(prompt, **prompt_args)

    if self.metadata.is_chat_model:
        messages = self._get_messages(prompt, **prompt_args)
        chat_response = self.chat(messages)
        output = chat_response.message.content or ""
    else:
        formatted_prompt = self._get_prompt(prompt, **prompt_args)
        response = self.complete(formatted_prompt, formatted=True)
        output = response.text
    parsed_output = self._parse_output(output)
    dispatcher.event(LLMPredictEndEvent(output=parsed_output))
    return parsed_output

stream #

stream(prompt: BasePromptTemplate, **prompt_args: Any) -> TokenGen

Stream predict for a given prompt.

Parameters:

Name	Type	Description	Default
`prompt`	`BasePromptTemplate`	The prompt to use for prediction.	required
`prompt_args`	`Any`	Additional arguments to format the prompt with.	`{}`

Yields:

Name	Type	Description
`str`	`TokenGen`	Each streamed token.

Examples:

from llama_index.core.prompts import PromptTemplate

prompt = PromptTemplate("Please write a random name related to {topic}.")
gen = llm.stream(prompt, topic="cats")
for token in gen:
    print(token, end="", flush=True)

Source code in llama_index/core/llms/llm.py

@dispatcher.span
def stream(
    self,
    prompt: BasePromptTemplate,
    **prompt_args: Any,
) -> TokenGen:
    """
    Stream predict for a given prompt.

    Args:
        prompt (BasePromptTemplate):
            The prompt to use for prediction.
        prompt_args (Any):
            Additional arguments to format the prompt with.

    Yields:
        str: Each streamed token.

    Examples:
        ```python
        from llama_index.core.prompts import PromptTemplate

        prompt = PromptTemplate("Please write a random name related to {topic}.")
        gen = llm.stream(prompt, topic="cats")
        for token in gen:
            print(token, end="", flush=True)
        ```

    """
    self._log_template_data(prompt, **prompt_args)

    dispatcher.event(
        LLMPredictStartEvent(template=prompt, template_args=prompt_args)
    )
    if self.metadata.is_chat_model:
        messages = self._get_messages(prompt, **prompt_args)
        chat_response = self.stream_chat(messages)
        stream_tokens = stream_chat_response_to_tokens(chat_response)
    else:
        formatted_prompt = self._get_prompt(prompt, **prompt_args)
        stream_response = self.stream_complete(formatted_prompt, formatted=True)
        stream_tokens = stream_completion_response_to_tokens(stream_response)

    if prompt.output_parser is not None or self.output_parser is not None:
        raise NotImplementedError("Output parser is not supported for streaming.")

    return stream_tokens

apredict `async` #

apredict(prompt: BasePromptTemplate, **prompt_args: Any) -> str

Async Predict for a given prompt.

Parameters:

Name	Type	Description	Default
`prompt`	`BasePromptTemplate`	The prompt to use for prediction.	required
`prompt_args`	`Any`	Additional arguments to format the prompt with.	`{}`

Returns:

Name	Type	Description
`str`	`str`	The prediction output.

Examples:

from llama_index.core.prompts import PromptTemplate

prompt = PromptTemplate("Please write a random name related to {topic}.")
output = await llm.apredict(prompt, topic="cats")
print(output)

Source code in llama_index/core/llms/llm.py

@dispatcher.span
async def apredict(
    self,
    prompt: BasePromptTemplate,
    **prompt_args: Any,
) -> str:
    """
    Async Predict for a given prompt.

    Args:
        prompt (BasePromptTemplate):
            The prompt to use for prediction.
        prompt_args (Any):
            Additional arguments to format the prompt with.

    Returns:
        str: The prediction output.

    Examples:
        ```python
        from llama_index.core.prompts import PromptTemplate

        prompt = PromptTemplate("Please write a random name related to {topic}.")
        output = await llm.apredict(prompt, topic="cats")
        print(output)
        ```

    """
    dispatcher.event(
        LLMPredictStartEvent(template=prompt, template_args=prompt_args)
    )
    self._log_template_data(prompt, **prompt_args)

    if self.metadata.is_chat_model:
        messages = self._get_messages(prompt, **prompt_args)
        chat_response = await self.achat(messages)
        output = chat_response.message.content or ""
    else:
        formatted_prompt = self._get_prompt(prompt, **prompt_args)
        response = await self.acomplete(formatted_prompt, formatted=True)
        output = response.text

    parsed_output = self._parse_output(output)
    dispatcher.event(LLMPredictEndEvent(output=parsed_output))
    return parsed_output

astream `async` #

astream(prompt: BasePromptTemplate, **prompt_args: Any) -> TokenAsyncGen

Async stream predict for a given prompt.

prompt (BasePromptTemplate): The prompt to use for prediction. prompt_args (Any): Additional arguments to format the prompt with.

Yields:

Name	Type	Description
`str`	`TokenAsyncGen`	An async generator that yields strings of tokens.

Examples:

from llama_index.core.prompts import PromptTemplate

prompt = PromptTemplate("Please write a random name related to {topic}.")
gen = await llm.astream(prompt, topic="cats")
async for token in gen:
    print(token, end="", flush=True)

Source code in llama_index/core/llms/llm.py

@dispatcher.span
async def astream(
    self,
    prompt: BasePromptTemplate,
    **prompt_args: Any,
) -> TokenAsyncGen:
    """
    Async stream predict for a given prompt.

    Args:
    prompt (BasePromptTemplate):
        The prompt to use for prediction.
    prompt_args (Any):
        Additional arguments to format the prompt with.

    Yields:
        str: An async generator that yields strings of tokens.

    Examples:
        ```python
        from llama_index.core.prompts import PromptTemplate

        prompt = PromptTemplate("Please write a random name related to {topic}.")
        gen = await llm.astream(prompt, topic="cats")
        async for token in gen:
            print(token, end="", flush=True)
        ```

    """
    self._log_template_data(prompt, **prompt_args)

    dispatcher.event(
        LLMPredictStartEvent(template=prompt, template_args=prompt_args)
    )
    if self.metadata.is_chat_model:
        messages = self._get_messages(prompt, **prompt_args)
        chat_response = await self.astream_chat(messages)
        stream_tokens = await astream_chat_response_to_tokens(chat_response)
    else:
        formatted_prompt = self._get_prompt(prompt, **prompt_args)
        stream_response = await self.astream_complete(
            formatted_prompt, formatted=True
        )
        stream_tokens = await astream_completion_response_to_tokens(stream_response)

    if prompt.output_parser is not None or self.output_parser is not None:
        raise NotImplementedError("Output parser is not supported for streaming.")

    return stream_tokens

predict_and_call #

predict_and_call(tools: List[BaseTool], user_msg: Optional[Union[str, ChatMessage]] = None, chat_history: Optional[List[ChatMessage]] = None, verbose: bool = False, **kwargs: Any) -> AgentChatResponse

Predict and call the tool.

By default uses a ReAct agent to do tool calling (through text prompting), but function calling LLMs will implement this differently.

Source code in llama_index/core/llms/llm.py

@dispatcher.span
def predict_and_call(
    self,
    tools: List["BaseTool"],
    user_msg: Optional[Union[str, ChatMessage]] = None,
    chat_history: Optional[List[ChatMessage]] = None,
    verbose: bool = False,
    **kwargs: Any,
) -> "AgentChatResponse":
    """
    Predict and call the tool.

    By default uses a ReAct agent to do tool calling (through text prompting),
    but function calling LLMs will implement this differently.

    """
    from llama_index.core.agent.workflow import ReActAgent
    from llama_index.core.chat_engine.types import AgentChatResponse
    from llama_index.core.memory import Memory
    from llama_index.core.tools.calling import call_tool_with_selection
    from llama_index.core.workflow import Context
    from workflows.context.state_store import DictState

    agent = ReActAgent(
        tools=tools,
        llm=self,
        verbose=verbose,
        formatter=kwargs.get("react_chat_formatter"),
        output_parser=kwargs.get("output_parser"),
        tool_retriever=kwargs.get("tool_retriever"),
    )

    memory = kwargs.get("memory", Memory.from_defaults())

    if isinstance(user_msg, ChatMessage) and isinstance(user_msg.content, str):
        pass
    elif isinstance(user_msg, str):
        user_msg = ChatMessage(content=user_msg, role=MessageRole.USER)

    llm_input = []
    if chat_history:
        llm_input.extend(chat_history)
    if user_msg:
        llm_input.append(user_msg)

    ctx: Context[DictState] = Context(agent)

    try:
        resp = asyncio_run(
            agent.take_step(
                ctx=ctx, llm_input=llm_input, tools=tools or [], memory=memory
            )
        )
        tool_outputs = []
        for tool_call in resp.tool_calls:
            tool_output = call_tool_with_selection(
                tool_call=tool_call,
                tools=tools or [],
                verbose=verbose,
            )
            tool_outputs.append(tool_output)
        output_text = "\n\n".join(
            [tool_output.content for tool_output in tool_outputs]
        )
        return AgentChatResponse(
            response=output_text,
            sources=tool_outputs,
        )
    except Exception as e:
        output = AgentChatResponse(
            response="An error occurred while running the tool: " + str(e),
            sources=[],
        )

    return output

apredict_and_call `async` #

apredict_and_call(tools: List[BaseTool], user_msg: Optional[Union[str, ChatMessage]] = None, chat_history: Optional[List[ChatMessage]] = None, verbose: bool = False, **kwargs: Any) -> AgentChatResponse

Predict and call the tool.

Source code in llama_index/core/llms/llm.py

@dispatcher.span
async def apredict_and_call(
    self,
    tools: List["BaseTool"],
    user_msg: Optional[Union[str, ChatMessage]] = None,
    chat_history: Optional[List[ChatMessage]] = None,
    verbose: bool = False,
    **kwargs: Any,
) -> "AgentChatResponse":
    """Predict and call the tool."""
    from llama_index.core.agent.workflow import ReActAgent
    from llama_index.core.chat_engine.types import AgentChatResponse
    from llama_index.core.memory import Memory
    from llama_index.core.tools.calling import acall_tool_with_selection
    from llama_index.core.workflow import Context
    from workflows.context.state_store import DictState

    agent = ReActAgent(
        tools=tools,
        llm=self,
        verbose=verbose,
        formatter=kwargs.get("react_chat_formatter"),
        output_parser=kwargs.get("output_parser"),
        tool_retriever=kwargs.get("tool_retriever"),
    )

    memory = kwargs.get("memory", Memory.from_defaults())

    if isinstance(user_msg, ChatMessage) and isinstance(user_msg.content, str):
        pass
    elif isinstance(user_msg, str):
        user_msg = ChatMessage(content=user_msg, role=MessageRole.USER)

    llm_input = []
    if chat_history:
        llm_input.extend(chat_history)
    if user_msg:
        llm_input.append(user_msg)

    ctx: Context[DictState] = Context(agent)

    try:
        resp = await agent.take_step(
            ctx=ctx, llm_input=llm_input, tools=tools or [], memory=memory
        )
        tool_outputs = []
        for tool_call in resp.tool_calls:
            tool_output = await acall_tool_with_selection(
                tool_call=tool_call,
                tools=tools or [],
                verbose=verbose,
            )
            tool_outputs.append(tool_output)

        output_text = "\n\n".join(
            [tool_output.content for tool_output in tool_outputs]
        )
        return AgentChatResponse(
            response=output_text,
            sources=tool_outputs,
        )
    except Exception as e:
        output = AgentChatResponse(
            response="An error occurred while running the tool: " + str(e),
            sources=[],
        )

    return output

as_structured_llm #

as_structured_llm(output_cls: Type[BaseModel], **kwargs: Any) -> StructuredLLM

Return a structured LLM around a given object.

Source code in llama_index/core/llms/llm.py

def as_structured_llm(
    self,
    output_cls: Type[BaseModel],
    **kwargs: Any,
) -> "StructuredLLM":
    """Return a structured LLM around a given object."""
    from llama_index.core.llms.structured_llm import StructuredLLM

    return StructuredLLM(llm=self, output_cls=output_cls, **kwargs)

stream_completion_response_to_tokens #

stream_completion_response_to_tokens(completion_response_gen: CompletionResponseGen) -> TokenGen

Convert a stream completion response to a stream of tokens.

Source code in llama_index/core/llms/llm.py

def stream_completion_response_to_tokens(
    completion_response_gen: CompletionResponseGen,
) -> TokenGen:
    """Convert a stream completion response to a stream of tokens."""

    def gen() -> TokenGen:
        for response in completion_response_gen:
            yield response.delta or ""

    return gen()

stream_chat_response_to_tokens #

stream_chat_response_to_tokens(chat_response_gen: ChatResponseGen) -> TokenGen

Convert a stream completion response to a stream of tokens.

Source code in llama_index/core/llms/llm.py

def stream_chat_response_to_tokens(
    chat_response_gen: ChatResponseGen,
) -> TokenGen:
    """Convert a stream completion response to a stream of tokens."""

    def gen() -> TokenGen:
        for response in chat_response_gen:
            yield response.delta or ""

    return gen()

astream_completion_response_to_tokens `async` #

astream_completion_response_to_tokens(completion_response_gen: CompletionResponseAsyncGen) -> TokenAsyncGen

Convert a stream completion response to a stream of tokens.

Source code in llama_index/core/llms/llm.py

async def astream_completion_response_to_tokens(
    completion_response_gen: CompletionResponseAsyncGen,
) -> TokenAsyncGen:
    """Convert a stream completion response to a stream of tokens."""

    async def gen() -> TokenAsyncGen:
        async for response in completion_response_gen:
            yield response.delta or ""

    return gen()

astream_chat_response_to_tokens `async` #

astream_chat_response_to_tokens(chat_response_gen: ChatResponseAsyncGen) -> TokenAsyncGen

Convert a stream completion response to a stream of tokens.

Source code in llama_index/core/llms/llm.py

async def astream_chat_response_to_tokens(
    chat_response_gen: ChatResponseAsyncGen,
) -> TokenAsyncGen:
    """Convert a stream completion response to a stream of tokens."""

    async def gen() -> TokenAsyncGen:
        async for response in chat_response_gen:
            yield response.delta or ""

    return gen()

options: members: - LLM show_source: false inherited_members: true

MessageRole #

Bases: str, Enum

Message role.

Source code in llama_index/core/base/llms/types.py

class MessageRole(str, Enum):
    """Message role."""

    SYSTEM = "system"
    DEVELOPER = "developer"
    USER = "user"
    ASSISTANT = "assistant"
    FUNCTION = "function"
    TOOL = "tool"
    CHATBOT = "chatbot"
    MODEL = "model"

TextBlock #

Bases: BaseModel

A representation of text data to directly pass to/from the LLM.

Parameters:

Name	Type	Description	Default
`block_type`	`Literal['text']`		`'text'`
`text`	`str`		required

Source code in llama_index/core/base/llms/types.py

class TextBlock(BaseModel):
    """A representation of text data to directly pass to/from the LLM."""

    block_type: Literal["text"] = "text"
    text: str

ImageBlock #

Bases: BaseModel

A representation of image data to directly pass to/from the LLM.

Parameters:

Name	Type	Default
`block_type`	`Literal['image']`	`'image'`
`image`	`bytes \| None`	`None`
`path`	`Annotated[Path, PathType] \| None`	`None`
`url`	`AnyUrl \| str \| None`	`None`
`image_mimetype`	`str \| None`	`None`
`detail`	`str \| None`	`None`

Source code in llama_index/core/base/llms/types.py

class ImageBlock(BaseModel):
    """A representation of image data to directly pass to/from the LLM."""

    block_type: Literal["image"] = "image"
    image: bytes | IOBase | None = None
    path: FilePath | None = None
    url: AnyUrl | str | None = None
    image_mimetype: str | None = None
    detail: str | None = None

    model_config = ConfigDict(arbitrary_types_allowed=True)

    @field_validator("url", mode="after")
    @classmethod
    def urlstr_to_anyurl(cls, url: str | AnyUrl | None) -> AnyUrl | None:
        """Store the url as Anyurl."""
        if isinstance(url, AnyUrl):
            return url
        if url is None:
            return None

        return AnyUrl(url=url)

    @field_serializer("image")
    def serialize_image(self, image: bytes | IOBase | None) -> bytes | None:
        """Serialize the image field."""
        if isinstance(image, bytes):
            return image
        if isinstance(image, IOBase):
            image.seek(0)
            return image.read()
        return None

    @model_validator(mode="after")
    def image_to_base64(self) -> Self:
        """
        Store the image as base64 and guess the mimetype when possible.

        In case the model was built passing image data but without a mimetype,
        we try to guess it using the filetype library. To avoid resource-intense
        operations, we won't load the path or the URL to guess the mimetype.
        """
        if not self.image or not isinstance(self.image, bytes):
            if not self.image_mimetype:
                path = self.path or self.url
                if path:
                    suffix = Path(str(path)).suffix.replace(".", "") or None
                    mimetype = filetype.get_type(ext=suffix)
                    if mimetype and str(mimetype.mime).startswith("image/"):
                        self.image_mimetype = str(mimetype.mime)

            return self

        try:
            # Check if self.image is already base64 encoded.
            # b64decode() can succeed on random binary data, so we
            # pass verify=True to make sure it's not a false positive
            decoded_img = base64.b64decode(self.image, validate=True)
        except BinasciiError:
            decoded_img = self.image
            self.image = base64.b64encode(self.image)

        self._guess_mimetype(decoded_img)
        return self

    def _guess_mimetype(self, img_data: bytes) -> None:
        if not self.image_mimetype:
            guess = filetype.guess(img_data)
            self.image_mimetype = guess.mime if guess else None

    def resolve_image(self, as_base64: bool = False) -> IOBase:
        """
        Resolve an image such that PIL can read it.

        Args:
            as_base64 (bool): whether the resolved image should be returned as base64-encoded bytes

        """
        data_buffer = (
            self.image
            if isinstance(self.image, IOBase)
            else resolve_binary(
                raw_bytes=self.image,
                path=self.path,
                url=str(self.url) if self.url else None,
                as_base64=as_base64,
            )
        )

        # Check size by seeking to end and getting position
        data_buffer.seek(0, 2)  # Seek to end
        size = data_buffer.tell()
        data_buffer.seek(0)  # Reset to beginning

        if size == 0:
            raise ValueError("resolve_image returned zero bytes")
        return data_buffer

urlstr_to_anyurl `classmethod` #

urlstr_to_anyurl(url: str | AnyUrl | None) -> AnyUrl | None

Store the url as Anyurl.

Source code in llama_index/core/base/llms/types.py

@field_validator("url", mode="after")
@classmethod
def urlstr_to_anyurl(cls, url: str | AnyUrl | None) -> AnyUrl | None:
    """Store the url as Anyurl."""
    if isinstance(url, AnyUrl):
        return url
    if url is None:
        return None

    return AnyUrl(url=url)

serialize_image #

serialize_image(image: bytes | IOBase | None) -> bytes | None

Serialize the image field.

Source code in llama_index/core/base/llms/types.py

@field_serializer("image")
def serialize_image(self, image: bytes | IOBase | None) -> bytes | None:
    """Serialize the image field."""
    if isinstance(image, bytes):
        return image
    if isinstance(image, IOBase):
        image.seek(0)
        return image.read()
    return None

image_to_base64 #

image_to_base64() -> Self

Store the image as base64 and guess the mimetype when possible.

In case the model was built passing image data but without a mimetype, we try to guess it using the filetype library. To avoid resource-intense operations, we won't load the path or the URL to guess the mimetype.

Source code in llama_index/core/base/llms/types.py

@model_validator(mode="after")
def image_to_base64(self) -> Self:
    """
    Store the image as base64 and guess the mimetype when possible.

    In case the model was built passing image data but without a mimetype,
    we try to guess it using the filetype library. To avoid resource-intense
    operations, we won't load the path or the URL to guess the mimetype.
    """
    if not self.image or not isinstance(self.image, bytes):
        if not self.image_mimetype:
            path = self.path or self.url
            if path:
                suffix = Path(str(path)).suffix.replace(".", "") or None
                mimetype = filetype.get_type(ext=suffix)
                if mimetype and str(mimetype.mime).startswith("image/"):
                    self.image_mimetype = str(mimetype.mime)

        return self

    try:
        # Check if self.image is already base64 encoded.
        # b64decode() can succeed on random binary data, so we
        # pass verify=True to make sure it's not a false positive
        decoded_img = base64.b64decode(self.image, validate=True)
    except BinasciiError:
        decoded_img = self.image
        self.image = base64.b64encode(self.image)

    self._guess_mimetype(decoded_img)
    return self

resolve_image #

resolve_image(as_base64: bool = False) -> IOBase

Resolve an image such that PIL can read it.

Parameters:

Name	Type	Description	Default
`as_base64`	`bool`	whether the resolved image should be returned as base64-encoded bytes	`False`

Source code in llama_index/core/base/llms/types.py

def resolve_image(self, as_base64: bool = False) -> IOBase:
    """
    Resolve an image such that PIL can read it.

    Args:
        as_base64 (bool): whether the resolved image should be returned as base64-encoded bytes

    """
    data_buffer = (
        self.image
        if isinstance(self.image, IOBase)
        else resolve_binary(
            raw_bytes=self.image,
            path=self.path,
            url=str(self.url) if self.url else None,
            as_base64=as_base64,
        )
    )

    # Check size by seeking to end and getting position
    data_buffer.seek(0, 2)  # Seek to end
    size = data_buffer.tell()
    data_buffer.seek(0)  # Reset to beginning

    if size == 0:
        raise ValueError("resolve_image returned zero bytes")
    return data_buffer

AudioBlock #

Bases: BaseModel

A representation of audio data to directly pass to/from the LLM.

Parameters:

Name	Type	Default
`block_type`	`Literal['audio']`	`'audio'`
`audio`	`bytes \| None`	`None`
`path`	`Annotated[Path, PathType] \| None`	`None`
`url`	`AnyUrl \| str \| None`	`None`
`format`	`str \| None`	`None`

Source code in llama_index/core/base/llms/types.py

class AudioBlock(BaseModel):
    """A representation of audio data to directly pass to/from the LLM."""

    block_type: Literal["audio"] = "audio"
    audio: bytes | IOBase | None = None
    path: FilePath | None = None
    url: AnyUrl | str | None = None
    format: str | None = None

    model_config = ConfigDict(arbitrary_types_allowed=True)

    @field_validator("url", mode="after")
    @classmethod
    def urlstr_to_anyurl(cls, url: str | AnyUrl) -> AnyUrl:
        """Store the url as Anyurl."""
        if isinstance(url, AnyUrl):
            return url
        return AnyUrl(url=url)

    @field_serializer("audio")
    def serialize_audio(self, audio: bytes | IOBase | None) -> bytes | None:
        """Serialize the audio field."""
        if isinstance(audio, bytes):
            return audio
        if isinstance(audio, IOBase):
            audio.seek(0)
            return audio.read()
        return None

    @model_validator(mode="after")
    def audio_to_base64(self) -> Self:
        """
        Store the audio as base64 and guess the mimetype when possible.

        In case the model was built passing audio data but without a mimetype,
        we try to guess it using the filetype library. To avoid resource-intense
        operations, we won't load the path or the URL to guess the mimetype.
        """
        if not self.audio or not isinstance(self.audio, bytes):
            return self

        try:
            # Check if audio is already base64 encoded
            decoded_audio = base64.b64decode(self.audio)
        except Exception:
            decoded_audio = self.audio
            # Not base64 - encode it
            self.audio = base64.b64encode(self.audio)

        self._guess_format(decoded_audio)

        return self

    def _guess_format(self, audio_data: bytes) -> None:
        if not self.format:
            guess = filetype.guess(audio_data)
            self.format = guess.extension if guess else None

    def resolve_audio(self, as_base64: bool = False) -> IOBase:
        """
        Resolve an audio such that PIL can read it.

        Args:
            as_base64 (bool): whether the resolved audio should be returned as base64-encoded bytes

        """
        data_buffer = (
            self.audio
            if isinstance(self.audio, IOBase)
            else resolve_binary(
                raw_bytes=self.audio,
                path=self.path,
                url=str(self.url) if self.url else None,
                as_base64=as_base64,
            )
        )
        # Check size by seeking to end and getting position
        data_buffer.seek(0, 2)  # Seek to end
        size = data_buffer.tell()
        data_buffer.seek(0)  # Reset to beginning

        if size == 0:
            raise ValueError("resolve_audio returned zero bytes")
        return data_buffer

urlstr_to_anyurl `classmethod` #

urlstr_to_anyurl(url: str | AnyUrl) -> AnyUrl

Store the url as Anyurl.

Source code in llama_index/core/base/llms/types.py

@field_validator("url", mode="after")
@classmethod
def urlstr_to_anyurl(cls, url: str | AnyUrl) -> AnyUrl:
    """Store the url as Anyurl."""
    if isinstance(url, AnyUrl):
        return url
    return AnyUrl(url=url)

serialize_audio #

serialize_audio(audio: bytes | IOBase | None) -> bytes | None

Serialize the audio field.

Source code in llama_index/core/base/llms/types.py

@field_serializer("audio")
def serialize_audio(self, audio: bytes | IOBase | None) -> bytes | None:
    """Serialize the audio field."""
    if isinstance(audio, bytes):
        return audio
    if isinstance(audio, IOBase):
        audio.seek(0)
        return audio.read()
    return None

audio_to_base64 #

audio_to_base64() -> Self

Store the audio as base64 and guess the mimetype when possible.

In case the model was built passing audio data but without a mimetype, we try to guess it using the filetype library. To avoid resource-intense operations, we won't load the path or the URL to guess the mimetype.

Source code in llama_index/core/base/llms/types.py

@model_validator(mode="after")
def audio_to_base64(self) -> Self:
    """
    Store the audio as base64 and guess the mimetype when possible.

    In case the model was built passing audio data but without a mimetype,
    we try to guess it using the filetype library. To avoid resource-intense
    operations, we won't load the path or the URL to guess the mimetype.
    """
    if not self.audio or not isinstance(self.audio, bytes):
        return self

    try:
        # Check if audio is already base64 encoded
        decoded_audio = base64.b64decode(self.audio)
    except Exception:
        decoded_audio = self.audio
        # Not base64 - encode it
        self.audio = base64.b64encode(self.audio)

    self._guess_format(decoded_audio)

    return self

resolve_audio #

resolve_audio(as_base64: bool = False) -> IOBase

Resolve an audio such that PIL can read it.

Parameters:

Name	Type	Description	Default
`as_base64`	`bool`	whether the resolved audio should be returned as base64-encoded bytes	`False`

Source code in llama_index/core/base/llms/types.py

def resolve_audio(self, as_base64: bool = False) -> IOBase:
    """
    Resolve an audio such that PIL can read it.

    Args:
        as_base64 (bool): whether the resolved audio should be returned as base64-encoded bytes

    """
    data_buffer = (
        self.audio
        if isinstance(self.audio, IOBase)
        else resolve_binary(
            raw_bytes=self.audio,
            path=self.path,
            url=str(self.url) if self.url else None,
            as_base64=as_base64,
        )
    )
    # Check size by seeking to end and getting position
    data_buffer.seek(0, 2)  # Seek to end
    size = data_buffer.tell()
    data_buffer.seek(0)  # Reset to beginning

    if size == 0:
        raise ValueError("resolve_audio returned zero bytes")
    return data_buffer

VideoBlock #

Bases: BaseModel

A representation of video data to directly pass to/from the LLM.

Source code in llama_index/core/base/llms/types.py

class VideoBlock(BaseModel):
    """A representation of video data to directly pass to/from the LLM."""

    block_type: Literal["video"] = "video"
    video: bytes | IOBase | None = None
    path: FilePath | None = None
    url: AnyUrl | str | None = None
    video_mimetype: str | None = None
    detail: str | None = None
    fps: int | None = None

    model_config = ConfigDict(arbitrary_types_allowed=True)

    @field_validator("url", mode="after")
    @classmethod
    def urlstr_to_anyurl(cls, url: str | AnyUrl | None) -> AnyUrl | None:
        """Store the url as AnyUrl."""
        if isinstance(url, AnyUrl):
            return url
        if url is None:
            return None
        return AnyUrl(url=url)

    @field_serializer("video")
    def serialize_video(self, video: bytes | IOBase | None) -> bytes | None:
        """Serialize the video field."""
        if isinstance(video, bytes):
            return video
        if isinstance(video, IOBase):
            video.seek(0)
            return video.read()
        return None

    @model_validator(mode="after")
    def video_to_base64(self) -> "VideoBlock":
        """
        Store the video as base64 and guess the mimetype when possible.

        If video data is passed but no mimetype is provided, try to infer it.
        """
        if not self.video or not isinstance(self.video, bytes):
            if not self.video_mimetype:
                path = self.path or self.url
                if path:
                    suffix = Path(str(path)).suffix.replace(".", "") or None
                    mimetype = filetype.get_type(ext=suffix)
                    if mimetype and str(mimetype.mime).startswith("video/"):
                        self.video_mimetype = str(mimetype.mime)
            return self

        try:
            decoded_vid = base64.b64decode(self.video, validate=True)
        except BinasciiError:
            decoded_vid = self.video
            self.video = base64.b64encode(self.video)

        self._guess_mimetype(decoded_vid)
        return self

    def _guess_mimetype(self, vid_data: bytes) -> None:
        if not self.video_mimetype:
            guess = filetype.guess(vid_data)
            if guess and guess.mime.startswith("video/"):
                self.video_mimetype = guess.mime

    def resolve_video(self, as_base64: bool = False) -> IOBase:
        """
        Resolve a video file to a IOBase buffer.

        Args:
            as_base64 (bool): whether to return the video as base64-encoded bytes

        """
        data_buffer = (
            self.video
            if isinstance(self.video, IOBase)
            else resolve_binary(
                raw_bytes=self.video,
                path=self.path,
                url=str(self.url) if self.url else None,
                as_base64=as_base64,
            )
        )

        # Check size by seeking to end and getting position
        data_buffer.seek(0, 2)  # Seek to end
        size = data_buffer.tell()
        data_buffer.seek(0)  # Reset to beginning

        if size == 0:
            raise ValueError("resolve_video returned zero bytes")
        return data_buffer

urlstr_to_anyurl `classmethod` #

urlstr_to_anyurl(url: str | AnyUrl | None) -> AnyUrl | None

Store the url as AnyUrl.

Source code in llama_index/core/base/llms/types.py

@field_validator("url", mode="after")
@classmethod
def urlstr_to_anyurl(cls, url: str | AnyUrl | None) -> AnyUrl | None:
    """Store the url as AnyUrl."""
    if isinstance(url, AnyUrl):
        return url
    if url is None:
        return None
    return AnyUrl(url=url)

serialize_video #

serialize_video(video: bytes | IOBase | None) -> bytes | None

Serialize the video field.

Source code in llama_index/core/base/llms/types.py

@field_serializer("video")
def serialize_video(self, video: bytes | IOBase | None) -> bytes | None:
    """Serialize the video field."""
    if isinstance(video, bytes):
        return video
    if isinstance(video, IOBase):
        video.seek(0)
        return video.read()
    return None

video_to_base64 #

video_to_base64() -> 'VideoBlock'

Store the video as base64 and guess the mimetype when possible.

If video data is passed but no mimetype is provided, try to infer it.

Source code in llama_index/core/base/llms/types.py

@model_validator(mode="after")
def video_to_base64(self) -> "VideoBlock":
    """
    Store the video as base64 and guess the mimetype when possible.

    If video data is passed but no mimetype is provided, try to infer it.
    """
    if not self.video or not isinstance(self.video, bytes):
        if not self.video_mimetype:
            path = self.path or self.url
            if path:
                suffix = Path(str(path)).suffix.replace(".", "") or None
                mimetype = filetype.get_type(ext=suffix)
                if mimetype and str(mimetype.mime).startswith("video/"):
                    self.video_mimetype = str(mimetype.mime)
        return self

    try:
        decoded_vid = base64.b64decode(self.video, validate=True)
    except BinasciiError:
        decoded_vid = self.video
        self.video = base64.b64encode(self.video)

    self._guess_mimetype(decoded_vid)
    return self

resolve_video #

resolve_video(as_base64: bool = False) -> IOBase

Resolve a video file to a IOBase buffer.

Parameters:

Name	Type	Description	Default
`as_base64`	`bool`	whether to return the video as base64-encoded bytes	`False`

Source code in llama_index/core/base/llms/types.py

def resolve_video(self, as_base64: bool = False) -> IOBase:
    """
    Resolve a video file to a IOBase buffer.

    Args:
        as_base64 (bool): whether to return the video as base64-encoded bytes

    """
    data_buffer = (
        self.video
        if isinstance(self.video, IOBase)
        else resolve_binary(
            raw_bytes=self.video,
            path=self.path,
            url=str(self.url) if self.url else None,
            as_base64=as_base64,
        )
    )

    # Check size by seeking to end and getting position
    data_buffer.seek(0, 2)  # Seek to end
    size = data_buffer.tell()
    data_buffer.seek(0)  # Reset to beginning

    if size == 0:
        raise ValueError("resolve_video returned zero bytes")
    return data_buffer

DocumentBlock #

Bases: BaseModel

A representation of a document to directly pass to the LLM.

Parameters:

Name	Type	Default
`block_type`	`Literal['document']`	`'document'`
`data`	`bytes \| None`	`None`
`path`	`Annotated[Path, PathType] \| str \| None`	`None`
`url`	`str \| None`	`None`
`title`	`str \| None`	`None`
`document_mimetype`	`str \| None`	`None`

Source code in llama_index/core/base/llms/types.py

class DocumentBlock(BaseModel):
    """A representation of a document to directly pass to the LLM."""

    block_type: Literal["document"] = "document"
    data: bytes | IOBase | None = None
    path: Optional[Union[FilePath | str]] = None
    url: Optional[str] = None
    title: Optional[str] = None
    document_mimetype: Optional[str] = None

    model_config = ConfigDict(arbitrary_types_allowed=True)

    @model_validator(mode="after")
    def document_validation(self) -> Self:
        self.document_mimetype = self.document_mimetype or self._guess_mimetype()

        if not self.title:
            self.title = "input_document"

        # skip data validation if no byte is provided
        if not self.data or not isinstance(self.data, bytes):
            return self

        try:
            decoded_document = base64.b64decode(self.data, validate=True)
        except BinasciiError:
            self.data = base64.b64encode(self.data)

        return self

    @field_serializer("data")
    def serialize_data(self, data: bytes | IOBase | None) -> bytes | None:
        """Serialize the data field."""
        if isinstance(data, bytes):
            return data
        if isinstance(data, IOBase):
            data.seek(0)
            return data.read()
        return None

    def resolve_document(self) -> IOBase:
        """
        Resolve a document such that it is represented by a BufferIO object.
        """
        data_buffer = (
            self.data
            if isinstance(self.data, IOBase)
            else resolve_binary(
                raw_bytes=self.data,
                path=self.path,
                url=str(self.url) if self.url else None,
                as_base64=False,
            )
        )
        # Check size by seeking to end and getting position
        data_buffer.seek(0, 2)  # Seek to end
        size = data_buffer.tell()
        data_buffer.seek(0)  # Reset to beginning

        if size == 0:
            raise ValueError("resolve_document returned zero bytes")
        return data_buffer

    def _get_b64_string(self, data_buffer: IOBase) -> str:
        """
        Get base64-encoded string from a IOBase buffer.
        """
        data = data_buffer.read()
        return base64.b64encode(data).decode("utf-8")

    def _get_b64_bytes(self, data_buffer: IOBase) -> bytes:
        """
        Get base64-encoded bytes from a IOBase buffer.
        """
        data = data_buffer.read()
        return base64.b64encode(data)

    def guess_format(self) -> str | None:
        path = self.path or self.url
        if not path:
            return None

        return Path(str(path)).suffix.replace(".", "")

    def _guess_mimetype(self) -> str | None:
        if self.data:
            guess = filetype.guess(self.data)
            return str(guess.mime) if guess else None

        suffix = self.guess_format()
        if not suffix:
            return None

        guess = filetype.get_type(ext=suffix)
        return str(guess.mime) if guess else None

serialize_data #

serialize_data(data: bytes | IOBase | None) -> bytes | None

Serialize the data field.

Source code in llama_index/core/base/llms/types.py

@field_serializer("data")
def serialize_data(self, data: bytes | IOBase | None) -> bytes | None:
    """Serialize the data field."""
    if isinstance(data, bytes):
        return data
    if isinstance(data, IOBase):
        data.seek(0)
        return data.read()
    return None

resolve_document #

resolve_document() -> IOBase

Resolve a document such that it is represented by a BufferIO object.

Source code in llama_index/core/base/llms/types.py

def resolve_document(self) -> IOBase:
    """
    Resolve a document such that it is represented by a BufferIO object.
    """
    data_buffer = (
        self.data
        if isinstance(self.data, IOBase)
        else resolve_binary(
            raw_bytes=self.data,
            path=self.path,
            url=str(self.url) if self.url else None,
            as_base64=False,
        )
    )
    # Check size by seeking to end and getting position
    data_buffer.seek(0, 2)  # Seek to end
    size = data_buffer.tell()
    data_buffer.seek(0)  # Reset to beginning

    if size == 0:
        raise ValueError("resolve_document returned zero bytes")
    return data_buffer

CacheControl #

Bases: BaseModel

Parameters:

Name	Type	Description	Default
`type`	`str`		required
`ttl`	`str`		`'5m'`

Source code in llama_index/core/base/llms/types.py

class CacheControl(BaseModel):
    type: str
    ttl: str = Field(default="5m")

CachePoint #

Bases: BaseModel

Used to set the point to cache up to, if the LLM supports caching.

Parameters:

Name	Type	Description	Default
`block_type`	`Literal['cache']`		`'cache'`
`cache_control`	`CacheControl`		required

Source code in llama_index/core/base/llms/types.py

class CachePoint(BaseModel):
    """Used to set the point to cache up to, if the LLM supports caching."""

    block_type: Literal["cache"] = "cache"
    cache_control: CacheControl

CitableBlock #

Bases: BaseModel

Supports providing citable content to LLMs that have built-in citation support.

Parameters:

Name	Type	Default
`block_type`	`Literal['citable']`	`'citable'`
`title`	`str`	required
`source`	`str`	required
`content`	`List[Annotated[Union[TextBlock, ImageBlock, DocumentBlock], FieldInfo]]`	required

Source code in llama_index/core/base/llms/types.py

class CitableBlock(BaseModel):
    """Supports providing citable content to LLMs that have built-in citation support."""

    block_type: Literal["citable"] = "citable"
    title: str
    source: str
    # TODO: We could maybe expand the types here,
    # limiting for now to known use cases
    content: List[
        Annotated[
            Union[TextBlock, ImageBlock, DocumentBlock],
            Field(discriminator="block_type"),
        ]
    ]

    @field_validator("content", mode="before")
    @classmethod
    def validate_content(cls, v: Any) -> Any:
        if isinstance(v, str):
            return [TextBlock(text=v)]

        return v

CitationBlock #

Bases: BaseModel

A representation of cited content from past messages.

Parameters:

Name	Type	Default
`block_type`	`Literal['citation']`	`'citation'`
`cited_content`	`TextBlock \| ImageBlock`	required
`source`	`str`	required
`title`	`str`	required
`additional_location_info`	`Dict[str, int]`	required

Source code in llama_index/core/base/llms/types.py

class CitationBlock(BaseModel):
    """A representation of cited content from past messages."""

    block_type: Literal["citation"] = "citation"
    cited_content: Annotated[
        Union[TextBlock, ImageBlock], Field(discriminator="block_type")
    ]
    source: str
    title: str
    additional_location_info: Dict[str, int]

    @field_validator("cited_content", mode="before")
    @classmethod
    def validate_cited_content(cls, v: Any) -> Any:
        if isinstance(v, str):
            return TextBlock(text=v)

        return v

ThinkingBlock #

Bases: BaseModel

A representation of the content streamed from reasoning/thinking processes by LLMs

Source code in llama_index/core/base/llms/types.py

class ThinkingBlock(BaseModel):
    """A representation of the content streamed from reasoning/thinking processes by LLMs"""

    block_type: Literal["thinking"] = "thinking"
    content: Optional[str] = Field(
        description="Content of the reasoning/thinking process, if available",
        default=None,
    )
    num_tokens: Optional[int] = Field(
        description="Number of token used for reasoning/thinking, if available",
        default=None,
    )
    additional_information: Dict[str, Any] = Field(
        description="Additional information related to the thinking/reasoning process, if available",
        default_factory=dict,
    )

ChatMessage #

Bases: BaseModel

Chat message.

Parameters:

Name	Type	Description	Default
`role`	`MessageRole`		`<MessageRole.USER: 'user'>`
`blocks`	`list[Annotated[Union[TextBlock, ImageBlock, AudioBlock, DocumentBlock, CachePoint, CitableBlock, CitationBlock], FieldInfo]]`	Built-in mutable sequence. If no argument is given, the constructor creates a new empty list. The argument must be an iterable if specified.	`<dynamic>`

Source code in llama_index/core/base/llms/types.py

class ChatMessage(BaseModel):
    """Chat message."""

    role: MessageRole = MessageRole.USER
    additional_kwargs: dict[str, Any] = Field(default_factory=dict)
    blocks: list[ContentBlock] = Field(default_factory=list)

    def __init__(self, /, content: Any | None = None, **data: Any) -> None:
        """
        Keeps backward compatibility with the old `content` field.

        If content was passed and contained text, store a single TextBlock.
        If content was passed and it was a list, assume it's a list of content blocks and store it.
        """
        if content is not None:
            if isinstance(content, str):
                data["blocks"] = [TextBlock(text=content)]
            elif isinstance(content, list):
                data["blocks"] = content

        super().__init__(**data)

    @model_validator(mode="after")
    def legacy_additional_kwargs_image(self) -> Self:
        """
        Provided for backward compatibility.

        If `additional_kwargs` contains an `images` key, assume the value is a list
        of ImageDocument and convert them into image blocks.
        """
        if documents := self.additional_kwargs.get("images"):
            documents = cast(list[ImageDocument], documents)
            for doc in documents:
                img_base64_bytes = doc.resolve_image(as_base64=True).read()
                self.blocks.append(ImageBlock(image=img_base64_bytes))
        return self

    @property
    def content(self) -> str | None:
        """
        Keeps backward compatibility with the old `content` field.

        Returns:
            The cumulative content of the TextBlock blocks, None if there are none.

        """
        content_strs = []
        for block in self.blocks:
            if isinstance(block, TextBlock):
                content_strs.append(block.text)

        ct = "\n".join(content_strs) or None
        if ct is None and len(content_strs) == 1:
            return ""
        return ct

    @content.setter
    def content(self, content: str) -> None:
        """
        Keeps backward compatibility with the old `content` field.

        Raises:
            ValueError: if blocks contains more than a block, or a block that's not TextBlock.

        """
        if not self.blocks:
            self.blocks = [TextBlock(text=content)]
        elif len(self.blocks) == 1 and isinstance(self.blocks[0], TextBlock):
            self.blocks = [TextBlock(text=content)]
        else:
            raise ValueError(
                "ChatMessage contains multiple blocks, use 'ChatMessage.blocks' instead."
            )

    def __str__(self) -> str:
        return f"{self.role.value}: {self.content}"

    @classmethod
    def from_str(
        cls,
        content: str,
        role: Union[MessageRole, str] = MessageRole.USER,
        **kwargs: Any,
    ) -> Self:
        if isinstance(role, str):
            role = MessageRole(role)
        return cls(role=role, blocks=[TextBlock(text=content)], **kwargs)

    def _recursive_serialization(self, value: Any) -> Any:
        if isinstance(value, BaseModel):
            value.model_rebuild()  # ensures all fields are initialized and serializable
            return value.model_dump()  # type: ignore
        if isinstance(value, dict):
            return {
                key: self._recursive_serialization(value)
                for key, value in value.items()
            }
        if isinstance(value, list):
            return [self._recursive_serialization(item) for item in value]

        if isinstance(value, bytes):
            return base64.b64encode(value).decode("utf-8")

        return value

    @field_serializer("additional_kwargs", check_fields=False)
    def serialize_additional_kwargs(self, value: Any, _info: Any) -> Any:
        return self._recursive_serialization(value)

content `property` `writable` #

content: str | None

Keeps backward compatibility with the old content field.

Returns:

Type	Description
`str \| None`	The cumulative content of the TextBlock blocks, None if there are none.

legacy_additional_kwargs_image #

legacy_additional_kwargs_image() -> Self

Provided for backward compatibility.

If additional_kwargs contains an images key, assume the value is a list of ImageDocument and convert them into image blocks.

Source code in llama_index/core/base/llms/types.py

@model_validator(mode="after")
def legacy_additional_kwargs_image(self) -> Self:
    """
    Provided for backward compatibility.

    If `additional_kwargs` contains an `images` key, assume the value is a list
    of ImageDocument and convert them into image blocks.
    """
    if documents := self.additional_kwargs.get("images"):
        documents = cast(list[ImageDocument], documents)
        for doc in documents:
            img_base64_bytes = doc.resolve_image(as_base64=True).read()
            self.blocks.append(ImageBlock(image=img_base64_bytes))
    return self

LogProb #

Bases: BaseModel

LogProb of a token.

Parameters:

Name	Type	Description	Default
`logprob`	`float`	Convert a string or number to a floating point number, if possible.	`<dynamic>`
`bytes`	`List[int]`	Built-in mutable sequence. If no argument is given, the constructor creates a new empty list. The argument must be an iterable if specified.	`<dynamic>`

Source code in llama_index/core/base/llms/types.py

class LogProb(BaseModel):
    """LogProb of a token."""

    token: str = Field(default_factory=str)
    logprob: float = Field(default_factory=float)
    bytes: List[int] = Field(default_factory=list)

ChatResponse #

Bases: BaseModel

Chat response.

Parameters:

Name	Type	Default
`message`	`ChatMessage`	required
`raw`	`Any \| None`	`None`
`delta`	`str \| None`	`None`
`logprobs`	`List[List[LogProb]] \| None`	`None`

Source code in llama_index/core/base/llms/types.py

class ChatResponse(BaseModel):
    """Chat response."""

    message: ChatMessage
    raw: Optional[Any] = None
    delta: Optional[str] = None
    logprobs: Optional[List[List[LogProb]]] = None
    additional_kwargs: dict = Field(default_factory=dict)

    def __str__(self) -> str:
        return str(self.message)

CompletionResponse #

Bases: BaseModel

Completion response.

Parameters:

Name	Type	Default
`text`	`str`	required
`raw`	`Any \| None`	`None`
`logprobs`	`List[List[LogProb]] \| None`	`None`
`delta`	`str \| None`	`None`

Fields

text: Text content of the response if not streaming, or if streaming, the current extent of streamed text. additional_kwargs: Additional information on the response(i.e. token counts, function calling information). raw: Optional raw JSON that was parsed to populate text, if relevant. delta: New text that just streamed in (only relevant when streaming).

Source code in llama_index/core/base/llms/types.py

class CompletionResponse(BaseModel):
    """
    Completion response.

    Fields:
        text: Text content of the response if not streaming, or if streaming,
            the current extent of streamed text.
        additional_kwargs: Additional information on the response(i.e. token
            counts, function calling information).
        raw: Optional raw JSON that was parsed to populate text, if relevant.
        delta: New text that just streamed in (only relevant when streaming).
    """

    text: str
    additional_kwargs: dict = Field(default_factory=dict)
    raw: Optional[Any] = None
    logprobs: Optional[List[List[LogProb]]] = None
    delta: Optional[str] = None

    def __str__(self) -> str:
        return self.text

LLMMetadata #

Bases: BaseModel

Parameters:

Name	Type	Description	Default
`context_window`	`int`	Total number of tokens the model can be input and output for one response.	`3900`
`num_output`	`int`	Number of tokens the model can output when generating a response.	`256`
`is_chat_model`	`bool`	Set True if the model exposes a chat interface (i.e. can be passed a sequence of messages, rather than text), like OpenAI's /v1/chat/completions endpoint.	`False`
`is_function_calling_model`	`bool`	Set True if the model supports function calling messages, similar to OpenAI's function calling API. For example, converting 'Email Anya to see if she wants to get coffee next Friday' to a function call like `send_email(to: string, body: string)`.	`False`
`model_name`	`str`	The model's name used for logging, testing, and sanity checking. For some models this can be automatically discerned. For other models, like locally loaded models, this must be manually specified.	`'unknown'`
`system_role`	`MessageRole`	The role this specific LLM providerexpects for system prompt. E.g. 'SYSTEM' for OpenAI, 'CHATBOT' for Cohere	`<MessageRole.SYSTEM: 'system'>`

Source code in llama_index/core/base/llms/types.py

class LLMMetadata(BaseModel):
    model_config = ConfigDict(
        protected_namespaces=("pydantic_model_",), arbitrary_types_allowed=True
    )
    context_window: int = Field(
        default=DEFAULT_CONTEXT_WINDOW,
        description=(
            "Total number of tokens the model can be input and output for one response."
        ),
    )
    num_output: int = Field(
        default=DEFAULT_NUM_OUTPUTS,
        description="Number of tokens the model can output when generating a response.",
    )
    is_chat_model: bool = Field(
        default=False,
        description=(
            "Set True if the model exposes a chat interface (i.e. can be passed a"
            " sequence of messages, rather than text), like OpenAI's"
            " /v1/chat/completions endpoint."
        ),
    )
    is_function_calling_model: bool = Field(
        default=False,
        # SEE: https://openai.com/blog/function-calling-and-other-api-updates
        description=(
            "Set True if the model supports function calling messages, similar to"
            " OpenAI's function calling API. For example, converting 'Email Anya to"
            " see if she wants to get coffee next Friday' to a function call like"
            " `send_email(to: string, body: string)`."
        ),
    )
    model_name: str = Field(
        default="unknown",
        description=(
            "The model's name used for logging, testing, and sanity checking. For some"
            " models this can be automatically discerned. For other models, like"
            " locally loaded models, this must be manually specified."
        ),
    )
    system_role: MessageRole = Field(
        default=MessageRole.SYSTEM,
        description="The role this specific LLM provider"
        "expects for system prompt. E.g. 'SYSTEM' for OpenAI, 'CHATBOT' for Cohere",
    )

Index

ToolSelection #

LLM #

structured_predict #

astructured_predict async #

stream_structured_predict #

astream_structured_predict async #

predict #

stream #

apredict async #

astream async #

predict_and_call #

apredict_and_call async #

as_structured_llm #

stream_completion_response_to_tokens #

stream_chat_response_to_tokens #

astream_completion_response_to_tokens async #

astream_chat_response_to_tokens async #

MessageRole #

TextBlock #

ImageBlock #

urlstr_to_anyurl classmethod #

serialize_image #

image_to_base64 #

resolve_image #

AudioBlock #

urlstr_to_anyurl classmethod #

serialize_audio #

audio_to_base64 #

resolve_audio #

VideoBlock #

urlstr_to_anyurl classmethod #

serialize_video #

video_to_base64 #

resolve_video #

DocumentBlock #

serialize_data #

resolve_document #

CacheControl #

CachePoint #

CitableBlock #

CitationBlock #

ThinkingBlock #

ChatMessage #

content property writable #

legacy_additional_kwargs_image #

LogProb #

ChatResponse #

CompletionResponse #

LLMMetadata #

astructured_predict `async` #

astream_structured_predict `async` #

apredict `async` #

astream `async` #

apredict_and_call `async` #

astream_completion_response_to_tokens `async` #

astream_chat_response_to_tokens `async` #

urlstr_to_anyurl `classmethod` #

urlstr_to_anyurl `classmethod` #

urlstr_to_anyurl `classmethod` #

content `property` `writable` #