Evaporate

BaseEvaporateProgram #

Bases: BasePydanticProgram, Generic[Model]

BaseEvaporate program.

You should provide the fields you want to extract. Then when you call the program you should pass in a list of training_data nodes and a list of infer_data nodes. The program will call the EvaporateExtractor to synthesize a python function from the training data and then apply the function to the infer_data.

Source code in llama_index/program/evaporate/base.py

class BaseEvaporateProgram(BasePydanticProgram, Generic[Model]):
    """
    BaseEvaporate program.

    You should provide the fields you want to extract.
    Then when you call the program you should pass in a list of training_data nodes
    and a list of infer_data nodes. The program will call the EvaporateExtractor
    to synthesize a python function from the training data and then apply the function
    to the infer_data.
    """

    def __init__(
        self,
        extractor: EvaporateExtractor,
        fields_to_extract: Optional[List[str]] = None,
        fields_context: Optional[Dict[str, Any]] = None,
        nodes_to_fit: Optional[List[BaseNode]] = None,
        verbose: bool = False,
    ) -> None:
        """Init params."""
        self._extractor = extractor
        self._fields = fields_to_extract or []
        self._fields_context = fields_context or {}
        # NOTE: this will change with each call to `fit`
        self._field_fns: Dict[str, str] = {}
        self._verbose = verbose

        # if nodes_to_fit is not None, then fit extractor
        if nodes_to_fit is not None:
            self._field_fns = self.fit_fields(nodes_to_fit)

    @classmethod
    def from_defaults(
        cls,
        fields_to_extract: Optional[List[str]] = None,
        fields_context: Optional[Dict[str, Any]] = None,
        llm: Optional[LLM] = None,
        schema_id_prompt: Optional[SchemaIDPrompt] = None,
        fn_generate_prompt: Optional[FnGeneratePrompt] = None,
        field_extract_query_tmpl: str = DEFAULT_FIELD_EXTRACT_QUERY_TMPL,
        nodes_to_fit: Optional[List[BaseNode]] = None,
        verbose: bool = False,
    ) -> "BaseEvaporateProgram":
        """Evaporate program."""
        extractor = EvaporateExtractor(
            llm=llm,
            schema_id_prompt=schema_id_prompt,
            fn_generate_prompt=fn_generate_prompt,
            field_extract_query_tmpl=field_extract_query_tmpl,
        )
        return cls(
            extractor,
            fields_to_extract=fields_to_extract,
            fields_context=fields_context,
            nodes_to_fit=nodes_to_fit,
            verbose=verbose,
        )

    @property
    def extractor(self) -> EvaporateExtractor:
        """Extractor."""
        return self._extractor

    def get_function_str(self, field: str) -> str:
        """Get function string."""
        return self._field_fns[field]

    def set_fields_to_extract(self, fields: List[str]) -> None:
        """Set fields to extract."""
        self._fields = fields

    def fit_fields(
        self,
        nodes: List[BaseNode],
        inplace: bool = True,
    ) -> Dict[str, str]:
        """Fit on all fields."""
        if len(self._fields) == 0:
            raise ValueError("Must provide at least one field to extract.")

        field_fns = {}
        for field in self._fields:
            field_context = self._fields_context.get(field, None)
            field_fns[field] = self.fit(
                nodes, field, field_context=field_context, inplace=inplace
            )
        return field_fns

    @abstractmethod
    def fit(
        self,
        nodes: List[BaseNode],
        field: str,
        field_context: Optional[Any] = None,
        expected_output: Optional[Any] = None,
        inplace: bool = True,
    ) -> str:
        """Given the input Nodes and fields, synthesize the python code."""

extractor `property` #

extractor: EvaporateExtractor

Extractor.

from_defaults `classmethod` #

from_defaults(fields_to_extract: Optional[List[str]] = None, fields_context: Optional[Dict[str, Any]] = None, llm: Optional[LLM] = None, schema_id_prompt: Optional[SchemaIDPrompt] = None, fn_generate_prompt: Optional[FnGeneratePrompt] = None, field_extract_query_tmpl: str = DEFAULT_FIELD_EXTRACT_QUERY_TMPL, nodes_to_fit: Optional[List[BaseNode]] = None, verbose: bool = False) -> BaseEvaporateProgram

Evaporate program.

Source code in llama_index/program/evaporate/base.py

@classmethod
def from_defaults(
    cls,
    fields_to_extract: Optional[List[str]] = None,
    fields_context: Optional[Dict[str, Any]] = None,
    llm: Optional[LLM] = None,
    schema_id_prompt: Optional[SchemaIDPrompt] = None,
    fn_generate_prompt: Optional[FnGeneratePrompt] = None,
    field_extract_query_tmpl: str = DEFAULT_FIELD_EXTRACT_QUERY_TMPL,
    nodes_to_fit: Optional[List[BaseNode]] = None,
    verbose: bool = False,
) -> "BaseEvaporateProgram":
    """Evaporate program."""
    extractor = EvaporateExtractor(
        llm=llm,
        schema_id_prompt=schema_id_prompt,
        fn_generate_prompt=fn_generate_prompt,
        field_extract_query_tmpl=field_extract_query_tmpl,
    )
    return cls(
        extractor,
        fields_to_extract=fields_to_extract,
        fields_context=fields_context,
        nodes_to_fit=nodes_to_fit,
        verbose=verbose,
    )

get_function_str #

get_function_str(field: str) -> str

Get function string.

Source code in llama_index/program/evaporate/base.py

def get_function_str(self, field: str) -> str:
    """Get function string."""
    return self._field_fns[field]

set_fields_to_extract #

set_fields_to_extract(fields: List[str]) -> None

Set fields to extract.

Source code in llama_index/program/evaporate/base.py

def set_fields_to_extract(self, fields: List[str]) -> None:
    """Set fields to extract."""
    self._fields = fields

fit_fields #

fit_fields(nodes: List[BaseNode], inplace: bool = True) -> Dict[str, str]

Fit on all fields.

Source code in llama_index/program/evaporate/base.py

def fit_fields(
    self,
    nodes: List[BaseNode],
    inplace: bool = True,
) -> Dict[str, str]:
    """Fit on all fields."""
    if len(self._fields) == 0:
        raise ValueError("Must provide at least one field to extract.")

    field_fns = {}
    for field in self._fields:
        field_context = self._fields_context.get(field, None)
        field_fns[field] = self.fit(
            nodes, field, field_context=field_context, inplace=inplace
        )
    return field_fns

fit `abstractmethod` #

fit(nodes: List[BaseNode], field: str, field_context: Optional[Any] = None, expected_output: Optional[Any] = None, inplace: bool = True) -> str

Given the input Nodes and fields, synthesize the python code.

Source code in llama_index/program/evaporate/base.py

@abstractmethod
def fit(
    self,
    nodes: List[BaseNode],
    field: str,
    field_context: Optional[Any] = None,
    expected_output: Optional[Any] = None,
    inplace: bool = True,
) -> str:
    """Given the input Nodes and fields, synthesize the python code."""

DFEvaporateProgram #

Bases: BaseEvaporateProgram[DataFrameRowsOnly]

Evaporate DF program.

Given a set of fields, extracts a dataframe from a set of nodes. Each node corresponds to a row in the dataframe - each value in the row corresponds to a field value.

Source code in llama_index/program/evaporate/base.py

class DFEvaporateProgram(BaseEvaporateProgram[DataFrameRowsOnly]):
    """
    Evaporate DF program.

    Given a set of fields, extracts a dataframe from a set of nodes.
    Each node corresponds to a row in the dataframe - each value in the row
    corresponds to a field value.

    """

    def fit(
        self,
        nodes: List[BaseNode],
        field: str,
        field_context: Optional[Any] = None,
        expected_output: Optional[Any] = None,
        inplace: bool = True,
    ) -> str:
        """Given the input Nodes and fields, synthesize the python code."""
        fn = self._extractor.extract_fn_from_nodes(nodes, field)
        logger.debug(f"Extracted function: {fn}")
        if inplace:
            self._field_fns[field] = fn
        return fn

    def _inference(
        self, nodes: List[BaseNode], fn_str: str, field_name: str
    ) -> List[Any]:
        """Given the input, call the python code and return the result."""
        results = self._extractor.run_fn_on_nodes(nodes, fn_str, field_name)
        logger.debug(f"Results: {results}")
        return results

    @property
    def output_cls(self) -> Type[DataFrameRowsOnly]:
        """Output class."""
        return DataFrameRowsOnly

    def __call__(self, *args: Any, **kwds: Any) -> DataFrameRowsOnly:
        """Call evaporate on inference data."""
        # TODO: either specify `nodes` or `texts` in kwds
        if "nodes" in kwds:
            nodes = kwds["nodes"]
        elif "texts" in kwds:
            nodes = [TextNode(text=t) for t in kwds["texts"]]
        else:
            raise ValueError("Must provide either `nodes` or `texts`.")

        col_dict = {}
        for field in self._fields:
            col_dict[field] = self._inference(nodes, self._field_fns[field], field)

        df = pd.DataFrame(col_dict, columns=self._fields)

        # convert pd.DataFrame to DataFrameRowsOnly
        df_row_objs = []
        for row_arr in df.values:
            df_row_objs.append(DataFrameRow(row_values=list(row_arr)))
        return DataFrameRowsOnly(rows=df_row_objs)

output_cls `property` #

output_cls: Type[DataFrameRowsOnly]

Output class.

fit #

fit(nodes: List[BaseNode], field: str, field_context: Optional[Any] = None, expected_output: Optional[Any] = None, inplace: bool = True) -> str

Given the input Nodes and fields, synthesize the python code.

Source code in llama_index/program/evaporate/base.py

def fit(
    self,
    nodes: List[BaseNode],
    field: str,
    field_context: Optional[Any] = None,
    expected_output: Optional[Any] = None,
    inplace: bool = True,
) -> str:
    """Given the input Nodes and fields, synthesize the python code."""
    fn = self._extractor.extract_fn_from_nodes(nodes, field)
    logger.debug(f"Extracted function: {fn}")
    if inplace:
        self._field_fns[field] = fn
    return fn

options: members: - DFEvaporateProgram

Evaporate

BaseEvaporateProgram #

extractor property #

from_defaults classmethod #

get_function_str #

set_fields_to_extract #

fit_fields #

fit abstractmethod #

DFEvaporateProgram #

output_cls property #

fit #

extractor `property` #

from_defaults `classmethod` #

fit `abstractmethod` #

output_cls `property` #