Source code for sofia_redux.instruments.hawc.steps.stepcheckhead

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""Header validation pipeline step."""

import os
import re

from astropy import log
import configobj

from sofia_redux.instruments.hawc.stepparent import StepParent

__all__ = ['StepCheckhead', 'HeaderValidationError']


# Define a custom error value so that wrapper scripts can catch
# them and abort with a readable message
[docs] class HeaderValidationError(RuntimeError): """Error raised when a FITS header does not meet requirements.""" pass
[docs] class StepCheckhead(StepParent): """ Validate headers for HAWC+ raw data files. This step checks the primary header of the input file for keywords required for data reduction. It also reformats the filename stored in the DataFits object, to conform to SOFIA requirements. This step should be called before any other steps, on raw HAWC data. Output from this step is identical to the input except for the filename; it should not be saved to disk. """
[docs] def setup(self): """ Set parameters and metadata for the pipeline step. Output files have PRODTYPE = 'checkhead', and are named with the step abbreviation 'CHK'. Parameters defined for this step are: abort : bool If set, this step will raise a `HeaderValidationError` if the input header does not meet requirements. Otherwise, it will only issue warnings. headerdef : str Path to the header keyword definition file, usually stored in data/config/header_req_config.cfg. """ # Name of the pipeline reduction step self.name = 'checkhead' self.description = 'Check Headers' # Identifier for saved file names. self.procname = 'chk' # Clear Parameter list self.paramlist = [] # Append parameters self.paramlist.append(['abort', True, 'Abort pipeline if headers do not meet ' 'requirements']) self.paramlist.append(['headerdef', 'headerdef.txt', 'Header keyword definition file'])
[docs] def run(self): """ Run the data reduction algorithm. Because this step is single-in, single-out (SISO), self.datain must be a DataFits object. The output is also a DataFits object, stored in self.dataout. The process is: 1. Read required keyword types and limits from a configuration file (usually in pipeline/config/header_req_config.cfg). 2. Check the primary header of the input file for compliance and output error messages if necessary. 3. Rename the output file to SOFIA standard format. Raises ------ HeaderValidationError If any errors are found and the parameter 'abort' is True. """ # Read input parameters abort = self.getarg('abort') deffile = os.path.expandvars(self.getarg('headerdef')) # Read header definition file into config object if os.path.isfile(deffile): try: reqconf = configobj.ConfigObj(deffile) except configobj.ConfigObjError as error: msg = 'Error while loading header configuration file' log.error('HeaderCheck: ' + msg) raise error else: msg = '<%s> is invalid file name for header configuration' % \ deffile log.error('HeaderCheck: ' + msg) raise IOError(msg) # Check a few important mode keywords chopping = self._getsafeval('CHOPPING') nodding = self._getsafeval('NODDING') dithering = self._getsafeval('DITHER') scanning = self._getsafeval('SCANNING') # Add any that are True to the requirement set req_set = ['*'] if chopping is not None and chopping: req_set.append('chopping') if nodding is not None and nodding: req_set.append('nodding') if dithering is not None and dithering: req_set.append('dithering') if scanning is not None and scanning: req_set.append('scanning') # Flag to throw error if requirements are not met abort_flag = False # Loop through keywords, checking against requirements reqdict = reqconf.dict() for key, req in reqdict.items(): # Retrieve requirements try: req_category = str(req['requirement']).strip() except KeyError: req_category = '*' try: req_dtype = str(req['dtype']).strip() except KeyError: req_dtype = 'str' try: req_drange = req['drange'] except KeyError: req_drange = None # Get type class corresponding to string if req_dtype == 'bool': req_dtype_class = bool elif req_dtype == 'int': req_dtype_class = int elif req_dtype == 'long': req_dtype_class = int elif req_dtype == 'float': req_dtype_class = float else: req_dtype_class = str # Check if key is required for this data type if req_category not in req_set: continue # Retrieve value from header and/or config file val = self._getsafeval(key) valtype = type(val) stype = valtype.__name__ # Check if required key is present if val is None: abort_flag = True msg = 'Required keyword <%s> not found' % key if abort: log.error(msg) else: log.warning(msg) continue # Check if key matches required type if req_dtype == 'str' or req_dtype == 'bool': # Use exact type for str, bool if stype != req_dtype: abort_flag = True msg = 'Required keyword <%s> has wrong ' \ 'type <%s>; should be <%s>' % \ (key, stype, req_dtype) if abort: log.error(msg) else: log.warning(msg) continue elif req_dtype == 'float': # Allow any number type for float types if stype not in ['float', 'int', 'long']: abort_flag = True msg = 'Required keyword <%s> has wrong ' \ 'type <%s>; should be <%s>' % \ (key, stype, req_dtype) if abort: log.error(msg) else: log.warning(msg) continue elif req_dtype == 'int' or req_dtype == 'long': # Allow ints to be longs and vice versa if stype not in ['int', 'long']: abort_flag = True msg = 'Required keyword <%s> has wrong ' \ 'type <%s>; should be <%s>' % \ (key, stype, req_dtype) if abort: log.error(msg) else: log.warning(msg) continue # Check if value meets range requirements if req_drange is not None: # Check for enum first -- ignore any others if # present. May be used for strings, bools, or numerical # equality. if 'enum' in req_drange: enum = req_drange['enum'] # Make into list if enum is a single value if type(enum) is not list: enum = [enum] # Cast to data type if req_dtype == 'bool': enum = [True if str(e).strip().lower() == 'true' else False for e in enum] else: try: enum = [req_dtype_class(e) for e in enum] except ValueError as error: msg = 'Error in header configuration file for ' \ 'key <%s>' % key log.error('HeaderCheck: ' + msg) raise error # Case-insensitive comparison for strings if stype == 'str': enum = [str(e).upper() for e in enum] if val.upper() not in enum: abort_flag = True msg = 'Required keyword <%s> has wrong ' \ 'value <%s>; should be in %s' % \ (key, val, enum) if abort: log.error(msg) else: log.warning(msg) continue else: if val not in enum: abort_flag = True msg = 'Required keyword <%s> has wrong ' \ 'value <%s>; should be in %s' % \ (key, val, enum) if abort: log.error(msg) else: log.warning(msg) continue # Check for a minimum requirement # (numerical value must be >= minimum) else: if ('min' in req_drange and stype in ['int', 'long', 'float']): try: minval = req_dtype_class(req_drange['min']) except ValueError as error: msg = 'Error in header configuration file for ' \ 'key <%s>' % key log.error('HeaderCheck: ' + msg) raise error if val < minval: abort_flag = True msg = 'Required keyword <%s> has wrong ' \ 'value <%s>; should be >= %f' % \ (key, val, minval) if abort: log.error(msg) else: log.warning(msg) continue # Check for a maximum requirement # (numerical value must be <= maximum) if ('max' in req_drange and stype in ['int', 'long', 'float']): try: maxval = req_dtype_class(req_drange['max']) except ValueError as error: msg = 'Error in header configuration file for ' \ 'key <%s>' % key log.error('HeaderCheck: ' + msg) raise error if val > maxval: abort_flag = True msg = 'Required keyword <%s> has wrong ' \ 'value <%s>; should be <= %f' % \ (key, val, maxval) if abort: log.error(msg) else: log.warning(msg) continue # Bail if requested, and requirements are not met if abort and abort_flag: msg = 'Header for <%s> does not meet requirements for ' \ 'data processing' % \ os.path.basename(self.datain.filename) log.error('HeaderCheck: ' + msg) raise HeaderValidationError(msg) # Add a reference to the input data in dataout self.dataout = self.datain # Rename the output file for SOFIA convention # Keep the original filename (dname, rawname) = os.path.split(self.dataout.filename) self.dataout.setheadval('RAWFNAME', rawname, 'Raw filename') # Get flight number from mission mid = str(self._getsafeval('MISSN-ID')) match = re.search(r'F(\d{3,4})', mid) if match is not None and len(match.groups()) > 0: fltnum = "%4.4d" % int(match.group(1)) else: # if can't find, use HAWC flight number try: fltnum = "%4.4d" % self._getsafeval('FLGTNUM') except (TypeError, ValueError): fltnum = "XXXX" # Get spectels spec1 = self._getsafeval('SPECTEL1') spec2 = self._getsafeval('SPECTEL2') if spec1 is None: spec1 = 'UNKNOWN' if spec2 is None: spec2 = 'UNKNOWN' if spec1 == 'UNKNOWN' and spec2 == 'UNKNOWN': spec = 'UNKNOWN' else: spec2 = re.sub('^HAW_', '', spec2) spec = re.sub('_', '', spec1.strip()) + \ re.sub('_', '', spec2.strip()) # Get AOR-ID aorid = self._getsafeval('AOR_ID') if aorid is None: aorid = 'UNKNOWN' else: aorid = re.sub('_', '', aorid.strip()) # Get obsmode instcfg = str(self._getsafeval('INSTCFG')).upper() calmode = str(self._getsafeval('CALMODE')).upper() if calmode not in ['NONE', 'UNKNOWN']: obs = 'CAL' else: if 'POL' in instcfg: obs = 'POL' else: obs = 'IMA' # Get file number from filename fnum = self.dataout.filenum try: int(fnum) except (TypeError, ValueError): fnum = 'UNKNOWN' # Compose output file name outfilename = "F%s_HA_%s_%s_%s_RAW_%s.fits" % \ (fltnum, obs, aorid, spec, fnum) self.dataout.filename = os.path.join(dname, outfilename)
def _getsafeval(self, key): """ Helper function to quietly return None if a keyword isn't found. Parameters ---------- key : str The keyword value to retrieve. """ # This function assumes that self.datain contains exactly # one pre-loaded file # Set the log level to critical only old_level = log.level log.setLevel('CRITICAL') # Try to get the value from the datain header, allowing # the config file to override try: val = self.datain.getheadval(key) except KeyError: # Set to None if not found val = None # Strip any spaces from string values if type(val) is str: val = val.strip() # Restore the old log level log.setLevel(old_level) # Return the safe value return val