Source code for jwst.background.background_sub_soss

"""Subtract a template background reference file from NIRISS SOSS data."""

import logging
import warnings

import numpy as np
from scipy import ndimage
from stdatamodels.jwst import datamodels

log = logging.getLogger(__name__)

# These parameters were hard-coded in the algorithm derived by NIRISS - future updates
# could consider exposing them as step-level parameters.
BKG_DISCON_COLUMNS = [685, 725]  # The columns to check for a discontinuity in background level.
BACKGROUND_MASK_CUTOFF = 950  # Drop all pixels with col > cutoff for template matching of bkgd.
SUBSTRIP96_ROWSTART = 10  # Determine where to place SUBSTRIP96 slice wrt. SUBSTRIP256 array.

__all__ = ["find_discontinuity", "generate_background_masks", "subtract_soss_bkg"]



[docs]
def find_discontinuity(image):
    """
    Determine the location of a discontinuity in the background levels.

    This function applies a gaussian smoothing filter to the image, then
    searches for a discontinuity by locating the maximum gradient in the
    smoothed image. The search is restricted to columns where the
    discontinuity is expected to occur, set by the ``BKG_DISCON_COLUMNS``
    parameter.

    Parameters
    ----------
    image : ndarray
        The background template with a discontinuity.

    Returns
    -------
    ndarray
        The derived location of the discontinuity in ``[x, y]``.
    """
    smoothed = ndimage.gaussian_filter(image, 3)
    gradient = np.gradient(smoothed, axis=1)
    x_discontinuity = (
        np.argmax(
            gradient[:, BKG_DISCON_COLUMNS[0] : BKG_DISCON_COLUMNS[1]],
            axis=1,
        )
        + BKG_DISCON_COLUMNS[0]
    )

    return np.column_stack([x_discontinuity, np.arange(len(image))])




[docs]
def generate_background_masks(background, n_repeats, for_fitting):
    """
    Generate background masks for the two distinct background regions.

    Using the discontinuity in the background template as a separation,
    build selection masks which flag the left and right background
    regions.

    The mask right of the discontinuity is also truncated rightward of
    a cutoff value set by ``BACKGROUND_MASK_CUTOFF`` - the NIRISS team
    found that template matching using pixels on the right side of the
    detector led to poorer fits due to source contamination.

    Parameters
    ----------
    background : ndarray
        The 2-D background template.
    n_repeats : int
        The number of integrations in the science data, used to
        broadcast the mask into a 3-D array.
    for_fitting : bool
        If `True`, the right_mask will be truncated to only use pixels
        left of the ``BACKGROUND_MASK_CUTOFF`` value, by default column 950.
        If `False`, every pixel will belong to either left_mask or
        right_mask.

    Returns
    -------
    bool ndarray, bool ndarray
        The left and right background masks.
    """
    # First find discontinuity in background template
    discontinuity = find_discontinuity(background)

    # Use discontinuity to split base mask into left and right segments
    __, xx = np.indices(background.shape)

    mask_right = xx >= discontinuity[:, 0].reshape(-1, 1)
    mask_left = ~mask_right

    # Remove background pixels rightward of cutoff from mask for template-fitting purposes
    if for_fitting:
        mask_right[xx > BACKGROUND_MASK_CUTOFF] = False

    # Broadcast mask shape to match data integrations
    mask_left = np.repeat(mask_left[np.newaxis, :, :], n_repeats, axis=0)
    mask_right = np.repeat(mask_right[np.newaxis, :, :], n_repeats, axis=0)

    return mask_left, mask_right



def _rms_error(residuals):
    return np.sqrt(np.square(residuals).mean())



[docs]
def subtract_soss_bkg(
    input_model,
    bkg_name,
    soss_source_percentile,
    soss_bkg_percentile,
):
    """
    Subtract a scaled template reference background from input SOSS data.

    Derive a separate scaling factor left and right of the background discontinuity;
    select the best template from the library of templates in the reference file
    using the root-mean-square of the residuals.

    Parameters
    ----------
    input_model : `~stdatamodels.jwst.datamodels.CubeModel` or \
                  `~stdatamodels.jwst.datamodels.ImageModel`
        The science data, typically multi-integration
        `~stdatamodels.jwst.datamodels.CubeModel` but
        possibly an `~stdatamodels.jwst.datamodels.ImageModel`.
    bkg_name : str
        The name of the background reference file.
    soss_source_percentile : float
        The threshold percentile used as a cutoff - all pixels above the threshold
        are deemed source and are not used for background template matching.
    soss_bkg_percentile : list of float
        A 2-member list describing the lower and upper limits of the background
        flux percentiles to use for calculation of the scaling factor.

    Returns
    -------
    `~stdatamodels.jwst.datamodels.CubeModel` or `~stdatamodels.jwst.datamodels.ImageModel`
        The background-subtracted science datamodel.
    """
    # Load background reference file into datamodel
    bkg_refmodel = datamodels.SossBkgModel(bkg_name)

    # Deal with array shape issues - either slice the arrays for SUBSTRIP96 or skip processing.
    if bkg_refmodel.shape[-2] == 256 and input_model.data.shape[-2] == 96:
        # Slice reference model for SUBSTRIP96 data
        bkg_refmodel.data = bkg_refmodel.data[:, SUBSTRIP96_ROWSTART : SUBSTRIP96_ROWSTART + 96, :]
    elif bkg_refmodel.shape[-2] != input_model.data.shape[-2]:
        log.warning(
            "Background reference file shape does not match SOSS subarray. "
            "Skipping subtraction of reference background."
        )
        return None

    # Generate exclusion mask from data array flux threshold, then OR in the DNU dq plane.
    data_mask = input_model.data >= np.nanpercentile(input_model.data, soss_source_percentile)
    data_mask |= input_model.dq & 1 > 0

    # Most SOSS data will be multi-integration - but if input data array is 2-D, cast into
    # 3-D array for ease of computation
    if len(input_model.data.shape) < 3:
        data = input_model.data[np.newaxis, :, :]
        data_mask = data_mask[np.newaxis, :, :]
    else:
        data = input_model.data

    # Initialize best-fit RMSE to large number
    best_rmse = np.inf
    best_fit_template_idx = -1
    best_scales = np.array([np.nan, np.nan])

    # Iterate over template backgrounds in reference file to find best-fit template
    for t_idx, template in enumerate(bkg_refmodel.data):
        # Generate masks for template matching
        masks = generate_background_masks(template, data.shape[-3], for_fitting=True)

        template = np.repeat(template[np.newaxis, :, :], data.shape[-3], axis=0)

        # Initialize some quantities that we'll check for each template
        scales = np.array([np.nan, np.nan])
        rmse = np.array([np.inf, np.inf])

        # upper and lower range of ratios then find median
        for i, mask in enumerate(masks):
            mask[data_mask] = False
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", "divide by zero")
                ratio = data[mask] / template[mask]

            q1, q2 = np.nanpercentile(ratio, [soss_bkg_percentile[0], soss_bkg_percentile[1]])
            valid_pixels = (ratio > q1) & (ratio < q2)
            scales[i] = np.nanmedian(ratio[valid_pixels])
            rmse[i] = _rms_error(data[mask] - (template[mask] * scales[i]))

        if np.sum(rmse) < best_rmse:
            best_rmse = np.sum(rmse)
            best_fit_template_idx = t_idx
            best_scales = scales

    if best_fit_template_idx < 0:
        log.warning("Template matching failed for background subtraction. Skipping bkg_subtract.")
        return None

    # With template determined, regenerate masks for separate scaling
    masks = generate_background_masks(
        bkg_refmodel.data[best_fit_template_idx], data.shape[-3], for_fitting=False
    )

    # Special behavior for ImageModels
    if len(input_model.data.shape) < 3:
        masks = [mask[0] for mask in masks]
        template = bkg_refmodel.data[best_fit_template_idx]
    else:
        template = np.repeat(
            bkg_refmodel.data[best_fit_template_idx][np.newaxis, :, :], data.shape[-3], axis=0
        )
    for i, mask in enumerate(masks):
        input_model.data -= template * mask * best_scales[i]

    return input_model