Source code for jwst.background.background_sub_soss

"""Subtract a template background reference file from NIRISS SOSS data."""

import logging
import warnings

import numpy as np
from scipy import ndimage
from stdatamodels.jwst import datamodels

log = logging.getLogger(__name__)

# These parameters were hard-coded in the algorithm derived by NIRISS - future updates
# could consider exposing them as step-level parameters.
BKG_DISCON_COLUMNS = [685, 725]  # The columns to check for a discontinuity in background level.
BACKGROUND_MASK_CUTOFF = 950  # Drop all pixels with col > cutoff for template matching of bkgd.
SUBSTRIP96_ROWSTART = 10  # Determine where to place SUBSTRIP96 slice wrt. SUBSTRIP256 array.

__all__ = ["find_discontinuity", "generate_background_masks", "subtract_soss_bkg"]


[docs] def find_discontinuity(image): """ Determine the location of a discontinuity in the background levels. This function applies a gaussian smoothing filter to the image, then searches for a discontinuity by locating the maximum gradient in the smoothed image. The search is restricted to columns where the discontinuity is expected to occur, set by the ``BKG_DISCON_COLUMNS`` parameter. Parameters ---------- image : ndarray The background template with a discontinuity. Returns ------- ndarray The derived location of the discontinuity in ``[x, y]``. """ smoothed = ndimage.gaussian_filter(image, 3) gradient = np.gradient(smoothed, axis=1) x_discontinuity = ( np.argmax( gradient[:, BKG_DISCON_COLUMNS[0] : BKG_DISCON_COLUMNS[1]], axis=1, ) + BKG_DISCON_COLUMNS[0] ) return np.column_stack([x_discontinuity, np.arange(len(image))])
[docs] def generate_background_masks(background, n_repeats, for_fitting): """ Generate background masks for the two distinct background regions. Using the discontinuity in the background template as a separation, build selection masks which flag the left and right background regions. The mask right of the discontinuity is also truncated rightward of a cutoff value set by ``BACKGROUND_MASK_CUTOFF`` - the NIRISS team found that template matching using pixels on the right side of the detector led to poorer fits due to source contamination. Parameters ---------- background : ndarray The 2-D background template. n_repeats : int The number of integrations in the science data, used to broadcast the mask into a 3-D array. for_fitting : bool If `True`, the right_mask will be truncated to only use pixels left of the ``BACKGROUND_MASK_CUTOFF`` value, by default column 950. If `False`, every pixel will belong to either left_mask or right_mask. Returns ------- bool ndarray, bool ndarray The left and right background masks. """ # First find discontinuity in background template discontinuity = find_discontinuity(background) # Use discontinuity to split base mask into left and right segments __, xx = np.indices(background.shape) mask_right = xx >= discontinuity[:, 0].reshape(-1, 1) mask_left = ~mask_right # Remove background pixels rightward of cutoff from mask for template-fitting purposes if for_fitting: mask_right[xx > BACKGROUND_MASK_CUTOFF] = False # Broadcast mask shape to match data integrations mask_left = np.repeat(mask_left[np.newaxis, :, :], n_repeats, axis=0) mask_right = np.repeat(mask_right[np.newaxis, :, :], n_repeats, axis=0) return mask_left, mask_right
def _rms_error(residuals): return np.sqrt(np.square(residuals).mean())
[docs] def subtract_soss_bkg( input_model, bkg_name, soss_source_percentile, soss_bkg_percentile, ): """ Subtract a scaled template reference background from input SOSS data. Derive a separate scaling factor left and right of the background discontinuity; select the best template from the library of templates in the reference file using the root-mean-square of the residuals. Parameters ---------- input_model : `~stdatamodels.jwst.datamodels.CubeModel` or \ `~stdatamodels.jwst.datamodels.ImageModel` The science data, typically multi-integration `~stdatamodels.jwst.datamodels.CubeModel` but possibly an `~stdatamodels.jwst.datamodels.ImageModel`. bkg_name : str The name of the background reference file. soss_source_percentile : float The threshold percentile used as a cutoff - all pixels above the threshold are deemed source and are not used for background template matching. soss_bkg_percentile : list of float A 2-member list describing the lower and upper limits of the background flux percentiles to use for calculation of the scaling factor. Returns ------- `~stdatamodels.jwst.datamodels.CubeModel` or `~stdatamodels.jwst.datamodels.ImageModel` The background-subtracted science datamodel. """ # Load background reference file into datamodel bkg_refmodel = datamodels.SossBkgModel(bkg_name) # Deal with array shape issues - either slice the arrays for SUBSTRIP96 or skip processing. if bkg_refmodel.shape[-2] == 256 and input_model.data.shape[-2] == 96: # Slice reference model for SUBSTRIP96 data bkg_refmodel.data = bkg_refmodel.data[:, SUBSTRIP96_ROWSTART : SUBSTRIP96_ROWSTART + 96, :] elif bkg_refmodel.shape[-2] != input_model.data.shape[-2]: log.warning( "Background reference file shape does not match SOSS subarray. " "Skipping subtraction of reference background." ) return None # Generate exclusion mask from data array flux threshold, then OR in the DNU dq plane. data_mask = input_model.data >= np.nanpercentile(input_model.data, soss_source_percentile) data_mask |= input_model.dq & 1 > 0 # Most SOSS data will be multi-integration - but if input data array is 2-D, cast into # 3-D array for ease of computation if len(input_model.data.shape) < 3: data = input_model.data[np.newaxis, :, :] data_mask = data_mask[np.newaxis, :, :] else: data = input_model.data # Initialize best-fit RMSE to large number best_rmse = np.inf best_fit_template_idx = -1 best_scales = np.array([np.nan, np.nan]) # Iterate over template backgrounds in reference file to find best-fit template for t_idx, template in enumerate(bkg_refmodel.data): # Generate masks for template matching masks = generate_background_masks(template, data.shape[-3], for_fitting=True) template = np.repeat(template[np.newaxis, :, :], data.shape[-3], axis=0) # Initialize some quantities that we'll check for each template scales = np.array([np.nan, np.nan]) rmse = np.array([np.inf, np.inf]) # upper and lower range of ratios then find median for i, mask in enumerate(masks): mask[data_mask] = False with warnings.catch_warnings(): warnings.filterwarnings("ignore", "divide by zero") ratio = data[mask] / template[mask] q1, q2 = np.nanpercentile(ratio, [soss_bkg_percentile[0], soss_bkg_percentile[1]]) valid_pixels = (ratio > q1) & (ratio < q2) scales[i] = np.nanmedian(ratio[valid_pixels]) rmse[i] = _rms_error(data[mask] - (template[mask] * scales[i])) if np.sum(rmse) < best_rmse: best_rmse = np.sum(rmse) best_fit_template_idx = t_idx best_scales = scales if best_fit_template_idx < 0: log.warning("Template matching failed for background subtraction. Skipping bkg_subtract.") return None # With template determined, regenerate masks for separate scaling masks = generate_background_masks( bkg_refmodel.data[best_fit_template_idx], data.shape[-3], for_fitting=False ) # Special behavior for ImageModels if len(input_model.data.shape) < 3: masks = [mask[0] for mask in masks] template = bkg_refmodel.data[best_fit_template_idx] else: template = np.repeat( bkg_refmodel.data[best_fit_template_idx][np.newaxis, :, :], data.shape[-3], axis=0 ) for i, mask in enumerate(masks): input_model.data -= template * mask * best_scales[i] return input_model