Source code for tidy3d.plugins.design.result

"""Defines parameter sweeping utilities for tidy3d."""

from __future__ import annotations

from typing import Any, Dict, List, Tuple

import numpy as np
import pandas
import pydantic.v1 as pd

from ...components.base import Tidy3dBaseModel, cached_property

# NOTE: Coords are args_dict from method and design. This may be changed in future to unify naming



[docs]
class Result(Tidy3dBaseModel):
    """Stores the result of a run over a ``DesignSpace``.
    Can be converted to ``pandas.DataFrame`` with ``Result.to_dataframe()`` for post processing.

    Example
    -------
    >>> import tidy3d.plugins.design as tdd
    >>> result = tdd.Result(
    ...     dims=('x', 'y', 'z'),
    ...     values=(1, 2, 3, 4),
    ...     coords=((0,1,2), (1,2,3), (2,3,4), (3,4,5)),
    ...     output_names=("output",),
    ... )
    >>> df = result.to_dataframe()
    >>> # df.head() # print out first 5 elements of data
    """

    dims: Tuple[str, ...] = pd.Field(
        (),
        title="Dimensions",
        description="The dimensions of the design variables (indexed by 'name').",
    )

    values: Tuple[Any, ...] = pd.Field(
        (),
        title="Values",
        description="The return values from the design problem function.",
    )

    coords: Tuple[Tuple[Any, ...], ...] = pd.Field(
        (),
        title="Coordinates",
        description="The values of the coordinates corresponding to each of the dims."
        "Note: shaped (D, N) where D is the ``len(dims)`` and N is the ``len(values)``",
    )

    output_names: Tuple[str, ...] = pd.Field(
        None,
        title="Output Names",
        description="Names for each of the outputs stored in ``values``. If not specified, default "
        "values are assigned.",
    )

    fn_source: str = pd.Field(
        None,
        title="Function Source Code",
        description="Source code for the function evaluated in the parameter sweep.",
    )

    task_names: list = pd.Field(
        None,
        title="Task Names",
        description="Task name of every simulation run during ``DesignSpace.run``. Only available if "
        "the parameter sweep function is split into pre and post processing, otherwise is ``None``. "
        "Stored in the same format as the output of fn_pre i.e. if pre outputs a dict, this output is a dict with the keys preserved.",
    )

    task_paths: list = pd.Field(
        None,
        title="Task Paths",
        description="Task paths of every simulation run during ``DesignSpace.run``. Useful for loading download ``SimulationData`` hdf5 files."
        "Only available if the parameter sweep function is split into pre and post processing, otherwise is ``None``. "
        "Stored in the same format as the output of fn_pre i.e. if pre outputs a dict, this output is a dict with the keys preserved.",
    )

    aux_values: Tuple[Any, ...] = pd.Field(
        None,
        title="Auxiliary values output from the user function",
        description="The auxiliary return values from the design problem function. This is the collection of objects returned "
        "alongside the float value used for the optimization. These weren't used to inform the optimizer, if one was used.",
    )

    optimizer: Any = pd.Field(
        None,
        title="Optimizer object",
        description="The optimizer returned at the end of an optimizer run. Can be used to analyze and plot how the optimization progressed. "
        "Attributes depend on the optimizer used; a full explaination of the optimizer can be found on associated library doc pages. Will be ``None`` for sampling based methods.",
    )

    @pd.validator("coords", always=True)
    def _coords_and_dims_shape(cls, val, values):
        """Make sure coords and dims have same size."""

        dims = values.get("dims")

        if val is None or dims is None:
            return

        num_dims = len(dims)
        for i, _val in enumerate(val):
            if len(_val) != num_dims:
                raise ValueError(
                    f"Number of 'coords' at index '{i}' ({len(_val)}) "
                    f"doesn't match the number of 'dims' ({num_dims})."
                )

        return val

    @pd.validator("coords", always=True)
    def _coords_and_values_shape(cls, val, values):
        """Make sure coords and values have same length."""

        _values = values.get("values")

        if val is None or _values is None:
            return

        num_values = len(_values)
        num_coords = len(val)

        if num_values != num_coords:
            raise ValueError(
                f"'coords' and 'values' must have same number of elements. "
                f"Have {num_coords} and {num_values} elements, respectively."
            )

        return val


[docs]
    def value_as_dict(self, value) -> Dict[str, Any]:
        """How to convert an output function value as a dictionary."""
        if isinstance(value, dict):
            return value
        keys = self.default_value_keys(value)
        if len(keys) == 1:
            return {keys[0]: value}
        return dict(zip(keys, value))



[docs]
    @staticmethod
    def default_value_keys(value) -> Tuple[str, ...]:
        """The default keys for a given value."""

        # if a dict already, just use the existing keys as labels
        if isinstance(value, dict):
            return tuple(value.keys())

        # if array-like, ith output has key "output {i}"
        if isinstance(value, (tuple, list, np.ndarray)):
            return tuple(f"output_{i}" for i in range(len(value)))

        # if simply single value (float, int, bool, etc) just label "output"
        return ("output",)



[docs]
    def items(self) -> Tuple[dict, Any]:
        """Iterate through coordinates (args) and values (outputs) one by one."""

        for coord_tuple, val in zip(self.coords, self.values):
            coord_dict = dict(zip(self.dims, coord_tuple))
            yield coord_dict, val


    @cached_property
    def data(self) -> Dict[tuple, Any]:
        """Dict mapping tuple of fn args to their value."""

        result = {}
        for coord_dict, val in self.items():
            coord_tuple = tuple(coord_dict[dim] for dim in self.dims)
            result[coord_tuple] = val

        return result


[docs]
    def get_value(self, coords: tuple) -> Any:
        """Get a data element indexing by function arg tuple."""
        return self.data[coords]



[docs]
    def sel(self, **kwargs) -> Any:
        """Get a data element by function kwargs.."""
        coords_tuple = tuple(kwargs[dim] for dim in self.dims)
        return self.get_value(coords_tuple)



[docs]
    def to_dataframe(self, include_aux: bool = False) -> pandas.DataFrame:
        """Data as a ``pandas.DataFrame``.

        Output a ``pandas.DataFrame`` of the ``Result``. Can include auxiliary data if ``include_aux`` is ``True``
        and auxiliary data is found in the ``Result``. If auxiliary data is in a dictionary the keys will be used
        as column names, otherwise they will be labeled ``aux_key_X`` for X auxiliary columns.

        Parameters
        ----------
        include_aux: bool = False
            Toggle to include auxiliary values in the dataframe. Requires auxiliary values in the ``Result``.

        Returns
        -------
        pandas.DataFrame
            ``pandas.DataFrame`` corresponding to the ``Result``.
        """

        data = []
        for coord_dict, val in self.items():
            val_dict = self.value_as_dict(val)
            data_i = list(coord_dict.values()) + list(val_dict.values())
            data.append(data_i)

        val_keys = list(self.value_as_dict(self.values[0])) if self.values else [""]

        columns = list(self.dims) + val_keys

        if include_aux:
            if self.aux_values is not None:
                # Can use [0] for aux keys as the function is assumed producing the same structure of output each run
                if all(isinstance(auxs, dict) for auxs in self.aux_values):
                    expanded_data = [
                        data_row + list(auxs.values())
                        for data_row, auxs in zip(data, self.aux_values)
                    ]
                    aux_keys = list(self.aux_values[0].keys())
                else:
                    expanded_data = [
                        data_row + aux_row for data_row, aux_row in zip(data, self.aux_values)
                    ]
                    aux_keys = [f"aux_key_{val}" for val in range(len(self.aux_values[0]))]

                columns = columns + aux_keys
                data = expanded_data

            else:
                raise ValueError(
                    "``include_aux`` is True but no ``aux_values`` were found in the ``Results``."
                )

        df = pandas.DataFrame(data=data, columns=columns)

        attrs = dict(
            task_names=self.task_names,
            output_names=self.output_names,
            fn_source=self.fn_source,
            dims=self.dims,
        )

        df.attrs = attrs
        return df



[docs]
    @classmethod
    def from_dataframe(cls, df: pandas.DataFrame, dims: List[str] = None) -> Result:
        """Load a result directly from a `pandas.DataFrame` object.

        Parameters
        ----------
        df : ``pandas.DataFrame``
            ```DataFrame`` object to load into a :class:`.Result`.
        dims : List[str] = None
            Set of dimensions corresponding to the function arguments.
            Not required if this dataframe was generated directly from a :class:`.Result`
            without modification. In that case, it contains the dims in its ``.attrs`` metadata.

        Returns
        -------
        :class:`.Result`
            Result loaded from this ``DataFrame``.
        """

        attrs = df.attrs

        # get dims either from kwarg or from attrs, error if dims not provided
        if dims is None:
            if "dims" in attrs:
                dims = attrs["dims"]
            else:
                raise ValueError(
                    "'dims' neither supplied or found in the 'DataFrame.attrs'. "
                    "Can't process 'DataFrame' into 'sweep.Results'."
                )

        # grab the columns of the data, sorted into coords and values
        coords_list = []
        values_list = []
        for key in df.keys():
            data_column = df[key]
            if key in dims:
                coords_list.append(data_column)
            else:
                values_list.append(data_column)

        # transpose the data and store in Result along with the other attrs (if present)
        coords = np.array(coords_list).T.tolist()
        values = np.array(values_list).T.tolist()
        return cls(
            dims=dims,
            coords=coords,
            values=values,
            output_names=attrs.get("output_names"),
            task_names=attrs.get("task_names"),
            fn_source=attrs.get("fn_source"),
        )



[docs]
    def combine(self, other: Result) -> Result:
        """Combine data from two results into a new result (also works with '+').

        Parameters
        ----------
        other : :class:`.Result`
            Results to combine with this object.

        Returns
        -------
        :class:`.Result`
            Combined :class:`.Result`
        """

        if self.fn_source != other.fn_source:
            raise ValueError("Can't combine results, function sources don't match.")

        if self.output_names != other.output_names:
            raise ValueError("Can't combine results, output names don't match.")

        if self.dims != other.dims:
            raise ValueError("Can't combine results, dimensions don't match.")

        def combine_tuples(tuple1: tuple, tuple2: tuple):
            """Combine two tuples together if not None."""
            if tuple1 is None and tuple2 is None:
                return None
            if (tuple1 is None) != (tuple2 is None):
                raise ValueError("Can't combine data where one only one field is `None`.")
            return list(tuple1) + list(tuple2)

        task_names = combine_tuples(self.task_names, other.task_names)
        coords = combine_tuples(self.coords, other.coords)
        values = combine_tuples(self.values, other.values)

        return Result(
            dims=self.dims,
            coords=coords,
            values=values,
            output_names=self.output_names,
            fn_source=self.fn_source,
            task_names=task_names,
        )



[docs]
    def __add__(self, other):
        """Special syntax for design_result1 + design_result2."""
        return self.combine(other)



[docs]
    def get_index(self, fn_args: Dict[str, float]) -> int:
        """Get index into the data for a specific set of arguments."""

        key_list = list(self.coords)
        arg_key = tuple(fn_args[dim] for dim in self.dims)
        return key_list.index(arg_key)



[docs]
    def delete(self, fn_args: Dict[str, float]) -> Result:
        """Delete a specific set of arguments from the result.

        Parameters
        ----------
        fn_args : Dict[str, float]
            ``dict`` containing the function arguments one wishes to delete.

        Returns
        -------
        :class:`.Result`
            Copy of the result with that element removed.
        """

        index = self.get_index(fn_args)

        new_coords = list(self.coords)
        new_values = list(self.values)

        new_coords.pop(index)
        new_values.pop(index)

        # ParticleSwarm optimizer doesn't work with updated_copy
        # Creating new result with updated values and coords instead
        if self.optimizer is not None:
            new_result = Result(
                dims=self.dims,
                values=new_values,
                coords=new_coords,
                output_names=self.output_names,
                fn_source=self.fn_source,
                task_names=self.task_names,
                aux_values=self.aux_values,
                optimizer=self.optimizer,
            )
            return new_result

        return self.updated_copy(values=new_values, coords=new_coords)



[docs]
    def add(self, fn_args: Dict[str, float], value: Any) -> Result:
        """Add a specific argument and value the result.

        Parameters
        ----------
        fn_args : Dict[str, float]
            ``dict`` containing the function arguments one wishes to add.
        value : Any
            Data point value corresponding to these arguments.

        Returns
        -------
        :class:`.Result`
            Copy of the result with that element added.
        """

        new_values = list(self.values) + [value]
        new_coords = list(self.coords) + [tuple(fn_args[dim] for dim in self.dims)]

        # ParticleSwarm optimizer doesn't work with updated_copy
        # Creating new result with updated values and coords instead
        if self.optimizer is not None:
            new_result = Result(
                dims=self.dims,
                values=new_values,
                coords=new_coords,
                output_names=self.output_names,
                fn_source=self.fn_source,
                task_names=self.task_names,
                aux_values=self.aux_values,
                optimizer=self.optimizer,
            )
            return new_result

        return self.updated_copy(values=new_values, coords=new_coords)