Source code for tidy3d.plugins.design.result

"""Defines parameter sweeping utilities for tidy3d."""

from __future__ import annotations

from typing import Any, Dict, List, Tuple

import numpy as np
import pandas
import pydantic.v1 as pd

from ...components.base import Tidy3dBaseModel, cached_property

# NOTE: Coords are args_dict from method and design. This may be changed in future to unify naming


[docs] class Result(Tidy3dBaseModel): """Stores the result of a run over a ``DesignSpace``. Can be converted to ``pandas.DataFrame`` with ``Result.to_dataframe()`` for post processing. Example ------- >>> import tidy3d.plugins.design as tdd >>> result = tdd.Result( ... dims=('x', 'y', 'z'), ... values=(1, 2, 3, 4), ... coords=((0,1,2), (1,2,3), (2,3,4), (3,4,5)), ... output_names=("output",), ... ) >>> df = result.to_dataframe() >>> # df.head() # print out first 5 elements of data """ dims: Tuple[str, ...] = pd.Field( (), title="Dimensions", description="The dimensions of the design variables (indexed by 'name').", ) values: Tuple[Any, ...] = pd.Field( (), title="Values", description="The return values from the design problem function.", ) coords: Tuple[Tuple[Any, ...], ...] = pd.Field( (), title="Coordinates", description="The values of the coordinates corresponding to each of the dims." "Note: shaped (D, N) where D is the ``len(dims)`` and N is the ``len(values)``", ) output_names: Tuple[str, ...] = pd.Field( None, title="Output Names", description="Names for each of the outputs stored in ``values``. If not specified, default " "values are assigned.", ) fn_source: str = pd.Field( None, title="Function Source Code", description="Source code for the function evaluated in the parameter sweep.", ) task_names: list = pd.Field( None, title="Task Names", description="Task name of every simulation run during ``DesignSpace.run``. Only available if " "the parameter sweep function is split into pre and post processing, otherwise is ``None``. " "Stored in the same format as the output of fn_pre i.e. if pre outputs a dict, this output is a dict with the keys preserved.", ) task_paths: list = pd.Field( None, title="Task Paths", description="Task paths of every simulation run during ``DesignSpace.run``. Useful for loading download ``SimulationData`` hdf5 files." "Only available if the parameter sweep function is split into pre and post processing, otherwise is ``None``. " "Stored in the same format as the output of fn_pre i.e. if pre outputs a dict, this output is a dict with the keys preserved.", ) aux_values: Tuple[Any, ...] = pd.Field( None, title="Auxiliary values output from the user function", description="The auxiliary return values from the design problem function. This is the collection of objects returned " "alongside the float value used for the optimization. These weren't used to inform the optimizer, if one was used.", ) optimizer: Any = pd.Field( None, title="Optimizer object", description="The optimizer returned at the end of an optimizer run. Can be used to analyze and plot how the optimization progressed. " "Attributes depend on the optimizer used; a full explaination of the optimizer can be found on associated library doc pages. Will be ``None`` for sampling based methods.", ) @pd.validator("coords", always=True) def _coords_and_dims_shape(cls, val, values): """Make sure coords and dims have same size.""" dims = values.get("dims") if val is None or dims is None: return num_dims = len(dims) for i, _val in enumerate(val): if len(_val) != num_dims: raise ValueError( f"Number of 'coords' at index '{i}' ({len(_val)}) " f"doesn't match the number of 'dims' ({num_dims})." ) return val @pd.validator("coords", always=True) def _coords_and_values_shape(cls, val, values): """Make sure coords and values have same length.""" _values = values.get("values") if val is None or _values is None: return num_values = len(_values) num_coords = len(val) if num_values != num_coords: raise ValueError( f"'coords' and 'values' must have same number of elements. " f"Have {num_coords} and {num_values} elements, respectively." ) return val
[docs] def value_as_dict(self, value) -> Dict[str, Any]: """How to convert an output function value as a dictionary.""" if isinstance(value, dict): return value keys = self.default_value_keys(value) if len(keys) == 1: return {keys[0]: value} return dict(zip(keys, value))
[docs] @staticmethod def default_value_keys(value) -> Tuple[str, ...]: """The default keys for a given value.""" # if a dict already, just use the existing keys as labels if isinstance(value, dict): return tuple(value.keys()) # if array-like, ith output has key "output {i}" if isinstance(value, (tuple, list, np.ndarray)): return tuple(f"output_{i}" for i in range(len(value))) # if simply single value (float, int, bool, etc) just label "output" return ("output",)
[docs] def items(self) -> Tuple[dict, Any]: """Iterate through coordinates (args) and values (outputs) one by one.""" for coord_tuple, val in zip(self.coords, self.values): coord_dict = dict(zip(self.dims, coord_tuple)) yield coord_dict, val
@cached_property def data(self) -> Dict[tuple, Any]: """Dict mapping tuple of fn args to their value.""" result = {} for coord_dict, val in self.items(): coord_tuple = tuple(coord_dict[dim] for dim in self.dims) result[coord_tuple] = val return result
[docs] def get_value(self, coords: tuple) -> Any: """Get a data element indexing by function arg tuple.""" return self.data[coords]
[docs] def sel(self, **kwargs) -> Any: """Get a data element by function kwargs..""" coords_tuple = tuple(kwargs[dim] for dim in self.dims) return self.get_value(coords_tuple)
[docs] def to_dataframe(self, include_aux: bool = False) -> pandas.DataFrame: """Data as a ``pandas.DataFrame``. Output a ``pandas.DataFrame`` of the ``Result``. Can include auxiliary data if ``include_aux`` is ``True`` and auxiliary data is found in the ``Result``. If auxiliary data is in a dictionary the keys will be used as column names, otherwise they will be labeled ``aux_key_X`` for X auxiliary columns. Parameters ---------- include_aux: bool = False Toggle to include auxiliary values in the dataframe. Requires auxiliary values in the ``Result``. Returns ------- pandas.DataFrame ``pandas.DataFrame`` corresponding to the ``Result``. """ data = [] for coord_dict, val in self.items(): val_dict = self.value_as_dict(val) data_i = list(coord_dict.values()) + list(val_dict.values()) data.append(data_i) val_keys = list(self.value_as_dict(self.values[0])) if self.values else [""] columns = list(self.dims) + val_keys if include_aux: if self.aux_values is not None: # Can use [0] for aux keys as the function is assumed producing the same structure of output each run if all(isinstance(auxs, dict) for auxs in self.aux_values): expanded_data = [ data_row + list(auxs.values()) for data_row, auxs in zip(data, self.aux_values) ] aux_keys = list(self.aux_values[0].keys()) else: expanded_data = [ data_row + aux_row for data_row, aux_row in zip(data, self.aux_values) ] aux_keys = [f"aux_key_{val}" for val in range(len(self.aux_values[0]))] columns = columns + aux_keys data = expanded_data else: raise ValueError( "``include_aux`` is True but no ``aux_values`` were found in the ``Results``." ) df = pandas.DataFrame(data=data, columns=columns) attrs = dict( task_names=self.task_names, output_names=self.output_names, fn_source=self.fn_source, dims=self.dims, ) df.attrs = attrs return df
[docs] @classmethod def from_dataframe(cls, df: pandas.DataFrame, dims: List[str] = None) -> Result: """Load a result directly from a `pandas.DataFrame` object. Parameters ---------- df : ``pandas.DataFrame`` ```DataFrame`` object to load into a :class:`.Result`. dims : List[str] = None Set of dimensions corresponding to the function arguments. Not required if this dataframe was generated directly from a :class:`.Result` without modification. In that case, it contains the dims in its ``.attrs`` metadata. Returns ------- :class:`.Result` Result loaded from this ``DataFrame``. """ attrs = df.attrs # get dims either from kwarg or from attrs, error if dims not provided if dims is None: if "dims" in attrs: dims = attrs["dims"] else: raise ValueError( "'dims' neither supplied or found in the 'DataFrame.attrs'. " "Can't process 'DataFrame' into 'sweep.Results'." ) # grab the columns of the data, sorted into coords and values coords_list = [] values_list = [] for key in df.keys(): data_column = df[key] if key in dims: coords_list.append(data_column) else: values_list.append(data_column) # transpose the data and store in Result along with the other attrs (if present) coords = np.array(coords_list).T.tolist() values = np.array(values_list).T.tolist() return cls( dims=dims, coords=coords, values=values, output_names=attrs.get("output_names"), task_names=attrs.get("task_names"), fn_source=attrs.get("fn_source"), )
[docs] def combine(self, other: Result) -> Result: """Combine data from two results into a new result (also works with '+'). Parameters ---------- other : :class:`.Result` Results to combine with this object. Returns ------- :class:`.Result` Combined :class:`.Result` """ if self.fn_source != other.fn_source: raise ValueError("Can't combine results, function sources don't match.") if self.output_names != other.output_names: raise ValueError("Can't combine results, output names don't match.") if self.dims != other.dims: raise ValueError("Can't combine results, dimensions don't match.") def combine_tuples(tuple1: tuple, tuple2: tuple): """Combine two tuples together if not None.""" if tuple1 is None and tuple2 is None: return None if (tuple1 is None) != (tuple2 is None): raise ValueError("Can't combine data where one only one field is `None`.") return list(tuple1) + list(tuple2) task_names = combine_tuples(self.task_names, other.task_names) coords = combine_tuples(self.coords, other.coords) values = combine_tuples(self.values, other.values) return Result( dims=self.dims, coords=coords, values=values, output_names=self.output_names, fn_source=self.fn_source, task_names=task_names, )
[docs] def __add__(self, other): """Special syntax for design_result1 + design_result2.""" return self.combine(other)
[docs] def get_index(self, fn_args: Dict[str, float]) -> int: """Get index into the data for a specific set of arguments.""" key_list = list(self.coords) arg_key = tuple(fn_args[dim] for dim in self.dims) return key_list.index(arg_key)
[docs] def delete(self, fn_args: Dict[str, float]) -> Result: """Delete a specific set of arguments from the result. Parameters ---------- fn_args : Dict[str, float] ``dict`` containing the function arguments one wishes to delete. Returns ------- :class:`.Result` Copy of the result with that element removed. """ index = self.get_index(fn_args) new_coords = list(self.coords) new_values = list(self.values) new_coords.pop(index) new_values.pop(index) # ParticleSwarm optimizer doesn't work with updated_copy # Creating new result with updated values and coords instead if self.optimizer is not None: new_result = Result( dims=self.dims, values=new_values, coords=new_coords, output_names=self.output_names, fn_source=self.fn_source, task_names=self.task_names, aux_values=self.aux_values, optimizer=self.optimizer, ) return new_result return self.updated_copy(values=new_values, coords=new_coords)
[docs] def add(self, fn_args: Dict[str, float], value: Any) -> Result: """Add a specific argument and value the result. Parameters ---------- fn_args : Dict[str, float] ``dict`` containing the function arguments one wishes to add. value : Any Data point value corresponding to these arguments. Returns ------- :class:`.Result` Copy of the result with that element added. """ new_values = list(self.values) + [value] new_coords = list(self.coords) + [tuple(fn_args[dim] for dim in self.dims)] # ParticleSwarm optimizer doesn't work with updated_copy # Creating new result with updated values and coords instead if self.optimizer is not None: new_result = Result( dims=self.dims, values=new_values, coords=new_coords, output_names=self.output_names, fn_source=self.fn_source, task_names=self.task_names, aux_values=self.aux_values, optimizer=self.optimizer, ) return new_result return self.updated_copy(values=new_values, coords=new_coords)