Source code for jwst.datamodels.utils.flat_multispec

"""Utilities for re-organizing spectral products into a flat structure."""

import logging
from copy import deepcopy

import numpy as np
from asdf.tags.core.ndarray import asdf_datatype_to_numpy_dtype
from stdatamodels.jwst import datamodels

log = logging.getLogger(__name__)

__all__ = [
    "determine_vector_and_meta_columns",
    "make_empty_recarray",
    "populate_recarray",
    "set_schema_units",
    "copy_column_units",
    "copy_spec_metadata",
    "expand_table",
    "expand_flat_spec",
]


[docs] def determine_vector_and_meta_columns(input_datatype, output_datatype): """ Figure out which columns are vector-like and which are metadata. The vector-like columns are the ones defined in the input schema, and the metadata columns are the ones defined only in the output schema. The input and output datatypes are typically read from the schema as e.g.:: datatype = schema["properties"]["spec_table"]["datatype"]. Parameters ---------- input_datatype : list[dict] The datatype of the input model as read from the schema. Each inner dict should have at least the keys "name" and "datatype". output_datatype : list[dict] The datatype of the output model as read from the schema. Each inner dict should have at least the keys "name" and "datatype". Returns ------- columns : ndarray[tuple] Array of tuples containing the column names and their dtypes. is_vector : ndarray[bool] Array of booleans indicating whether each column is vector-like, same length as ``columns``. """ # Extract just names and dtypes, convert to numpy dtypes vector_colnames = np.array([col["name"] for col in input_datatype]) all_colnames = np.array([col["name"] for col in output_datatype]) all_dtypes = np.array( [asdf_datatype_to_numpy_dtype(col["datatype"]) for col in output_datatype] ) all_cols = np.array(list(zip(all_colnames, all_dtypes, strict=True))) # Determine which columns are metadata is_vector = np.array([col in vector_colnames for col in all_colnames]) return all_cols, is_vector
[docs] def make_empty_recarray(n_rows, n_spec, columns, is_vector, defaults=0): """ Create an empty output table with the specified number of rows. Parameters ---------- n_rows : int The number of rows in the output table; this is the maximum number of data points for any spectrum in the exposure. n_spec : int The number of spectra in the output table. columns : ndarray[tuple] Array of tuples containing the column names and their dtypes. is_vector : ndarray[bool] Array of booleans indicating whether each column is vector-like. If `True`, the column will be a 1D array of length ``n_rows``. Otherwise, the column will be a scalar. defaults : list, ndarray, int, or float, optional List of default values for each column. If a column is vector-like, the default value will be repeated to fill the array. If a column is scalar, the default value will be used directly. If `int` or `float`, the same value will be used for all columns; string-type columns will be filled with the string representation of the value. Returns ------- output_table : `~numpy.recarray` The empty output table with the specified shape and dtypes. """ # build the data type fltdtype = [] for i, (col, dtype) in enumerate(columns): if is_vector[i]: fltdtype.append((col, dtype, n_rows)) else: fltdtype.append((col, dtype)) arr = np.empty(n_spec, dtype=fltdtype) if isinstance(defaults, (int, float)): arr[...] = defaults return arr # fill the array with the default values for i, (col, dtype) in enumerate(columns): if is_vector[i]: arr[col] = np.full((n_spec, n_rows), defaults[i], dtype=dtype) else: arr[col] = np.full(n_spec, defaults[i], dtype=dtype) return arr
[docs] def populate_recarray(output_table, input_spec, columns, is_vector, ignore_columns=None): """ Populate the output table in-place with data from the input spectrum. The output table is padded with NaNs to match the maximum number of data points for any spectrum in the exposure. The metadata columns are copied from the input spectrum assuming they have the same names as in the output table. Parameters ---------- output_table : `~numpy.recarray` The output table to be populated with the spectral data. input_spec : `~stdatamodels.jwst.datamodels.SpecModel` or \ `~stdatamodels.jwst.datamodels.CombinedSpecModel` The input data model containing the spectral data. columns : ndarray[tuple] Array of tuples containing the column names and their dtypes. is_vector : ndarray[bool] Array of booleans indicating whether each column is vector-like, ignore_columns : list[str], optional List of column names to ignore when copying data or metadata from the input spectrum to the output table. This is useful for columns that are not present in the input spectrum but are required in the output table, and are handled separately in the calling code. """ if ignore_columns is None: ignore_columns = [] input_table = input_spec.spec_table vector_columns = columns[is_vector] meta_columns = columns[~is_vector] # Copy the data into the new table for col, _ in vector_columns: if col in ignore_columns: continue output_table[col][: input_table.shape[0]] = input_table[col] # Copy the metadata into the new table # Metadata columns must have identical names to spec_meta columns problems = [] for col, _ in meta_columns: if col in ignore_columns: continue spec_meta = getattr(input_spec, col.lower(), None) if spec_meta is None: problems.append(col.lower()) else: output_table[col] = spec_meta if len(problems) > 0: log.warning(f"Metadata could not be determined from input spec_table: {problems}")
[docs] def set_schema_units(model): """ Give all columns in the model the units defined in the model schema. This gets around a bug/bad behavior in stdatamodels that units are not automatically assigned to the spec_table. Model is modified in place. Parameters ---------- model : `~stdatamodels.jwst.datamodels.JwstDataModel` Any model containing a spec_table attribute. """ data_type = model.schema["properties"]["spec_table"]["datatype"] for col in data_type: if "unit" in col: model.spec_table.columns[col["name"]].unit = col["unit"]
[docs] def copy_column_units(input_model, output_model): """ Copy units from input columns to output columns. Spectral tables in both input and output models must be in FITS record format. The output model is updated in place. Parameters ---------- input_model : `~stdatamodels.jwst.datamodels.SpecModel` Input spectral model containing vector columns in the ``spec_table`` attribute. output_model : `~stdatamodels.jwst.datamodels.JwstDataModel` Output spectral model containing a mix of vector columns and metadata columns in the ``spec_table`` attribute. """ input_columns = input_model.spec_table.columns output_columns = output_model.spec_table.columns for col_name in input_columns.names: if col_name in output_columns.names: output_columns[col_name].unit = input_columns[col_name].unit
[docs] def copy_spec_metadata(input_model, output_model): """ Copy spectral metadata from the input to the output spectrum. Values to be copied are any attributes of the input model, other than "meta" or "spec_table", e.g. "source_id", "name", etc. Parameters ---------- input_model : `~stdatamodels.jwst.datamodels.JwstDataModel` or ObjectNode A spectral model, such as `~stdatamodels.jwst.datamodels.SpecModel` or `~stdatamodels.jwst.datamodels.TSOSpecModel`. If read in from a list of spectra, as in `~stdatamodels.jwst.datamodels.MultiSpecModel`, the input model may be an ObjectNode rather than a full `~stdatamodels.jwst.datamodels.JwstDataModel`. output_model : `~stdatamodels.jwst.datamodels.JwstDataModel` A spectral model, such as `~stdatamodels.jwst.datamodels.SpecModel` or `~stdatamodels.jwst.datamodels.TSOSpecModel`. Updated in place with metadata from the input model. The output model must be a full `~stdatamodels.jwst.datamodels.JwstDataModel`, not an ObjectNode. """ copy_attributes = [] for prop in output_model.schema["properties"]: if prop not in ["meta", "spec_table"]: copy_attributes.append(prop) for key in copy_attributes: if getattr(input_model, key, None) is not None: setattr(output_model, key, getattr(input_model, key))
[docs] def expand_table(spec): """ Expand a table of spectra into a list of SpecModel objects. Parameters ---------- spec : `~stdatamodels.jwst.datamodels.WFSSSpecModel`, \ `~stdatamodels.jwst.datamodels.TSOMultiSpecModel`, ObjectNode Any model containing a spec_table to expand into multiple spectra Returns ------- list[SpecModel] A list of `~stdatamodels.jwst.datamodels.SpecModel` objects, one for each spectrum in the input spec_table. """ all_columns = np.array([str(x) for x in spec.spec_table.dtype.names]) new_spec_list = [] n_spectra = len(spec.spec_table) for i in range(n_spectra): # initialize a new SpecModel spec_row = spec.spec_table[i] n_elements = int(spec_row["N_ALONGDISP"]) new_spec = datamodels.SpecModel() data_type = new_spec.schema["properties"]["spec_table"]["datatype"] columns_to_copy = np.array([col["name"] for col in data_type]) # Copy over the vector columns from input spec_table to output spec_table spec_table = np.empty(n_elements, dtype=new_spec.spec_table.dtype) for col_name in columns_to_copy: spec_table[col_name] = spec_row[col_name][:n_elements] new_spec.spec_table = spec_table # Copy over the metadata columns from input spec_table to the spectrum's metadata meta_columns = all_columns[~np.isin(all_columns, columns_to_copy)].tolist() meta_columns.remove("N_ALONGDISP") for meta_key in meta_columns: try: setattr(new_spec, meta_key.lower(), spec_row[meta_key]) except KeyError: pass # Copy over relevant metadata from the input model to the output model if getattr(spec.meta, "wcs", None) is not None: new_spec.meta.wcs = deepcopy(spec.meta.wcs) new_spec.meta.group_id = getattr(spec, "group_id", "") new_spec.meta.filename = getattr(spec, "filename", "") copy_spec_metadata(spec, new_spec) copy_column_units(spec, new_spec) new_spec_list.append(new_spec) return new_spec_list
[docs] def expand_flat_spec(input_model): """ Create simple spectra from a flat spectral table. Parameters ---------- input_model : `~stdatamodels.jwst.datamodels.TSOMultiSpecModel` Spectral model containing spectra with a mix of vector columns and metadata columns in the ``spec_table`` attribute. Metadata columns will be dropped. Returns ------- `~stdatamodels.jwst.datamodels.MultiSpecModel` A set of simple spectra, one per extension. """ output_model = datamodels.MultiSpecModel() for old_spec in input_model.spec: new_spec_list = expand_table(old_spec) for new_spec in new_spec_list: # Add the new spec to the output model output_model.spec.append(new_spec) # Update meta output_model.update(input_model, only="PRIMARY") # Copy int_times if present if getattr(input_model, "int_times", None) is not None: output_model.int_times = input_model.int_times.copy() return output_model