Source code for sofia_redux.instruments.hawc.steps.stepcheckhead

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""Header validation pipeline step."""

import os
import re

from astropy import log
import configobj

from sofia_redux.instruments.hawc.stepparent import StepParent

__all__ = ['StepCheckhead', 'HeaderValidationError']


# Define a custom error value so that wrapper scripts can catch
# them and abort with a readable message

[docs]
class HeaderValidationError(RuntimeError):
    """Error raised when a FITS header does not meet requirements."""
    pass




[docs]
class StepCheckhead(StepParent):
    """
    Validate headers for HAWC+ raw data files.

    This step checks the primary header of the input file for
    keywords required for data reduction. It also reformats the
    filename stored in the DataFits object, to conform to SOFIA
    requirements.

    This step should be called before any other steps, on raw
    HAWC data. Output from this step is identical to the input
    except for the filename; it should not be saved to disk.
    """

[docs]
    def setup(self):
        """
        Set parameters and metadata for the pipeline step.

        Output files have PRODTYPE = 'checkhead', and are named with
        the step abbreviation 'CHK'.

        Parameters defined for this step are:

        abort : bool
            If set, this step will raise a `HeaderValidationError`
            if the input header does not meet requirements. Otherwise,
            it will only issue warnings.
        headerdef : str
            Path to the header keyword definition file, usually
            stored in data/config/header_req_config.cfg.
        """

        # Name of the pipeline reduction step
        self.name = 'checkhead'
        self.description = 'Check Headers'

        # Identifier for saved file names.
        self.procname = 'chk'

        # Clear Parameter list
        self.paramlist = []

        # Append parameters
        self.paramlist.append(['abort', True,
                               'Abort pipeline if headers do not meet '
                               'requirements'])
        self.paramlist.append(['headerdef', 'headerdef.txt',
                               'Header keyword definition file'])



[docs]
    def run(self):
        """
        Run the data reduction algorithm.

        Because this step is single-in, single-out (SISO),
        self.datain must be a DataFits object. The output
        is also a DataFits object, stored in self.dataout.

        The process is:

        1. Read required keyword types and limits from a configuration file
           (usually in pipeline/config/header_req_config.cfg).
        2. Check the primary header of the input file for compliance and
           output error messages if necessary.
        3. Rename the output file to SOFIA standard format.

        Raises
        ------
        HeaderValidationError
            If any errors are found and the parameter 'abort' is True.
        """

        # Read input parameters
        abort = self.getarg('abort')
        deffile = os.path.expandvars(self.getarg('headerdef'))

        # Read header definition file into config object
        if os.path.isfile(deffile):
            try:
                reqconf = configobj.ConfigObj(deffile)
            except configobj.ConfigObjError as error:
                msg = 'Error while loading header configuration file'
                log.error('HeaderCheck: ' + msg)
                raise error
        else:
            msg = '<%s> is invalid file name for header configuration' % \
                  deffile
            log.error('HeaderCheck: ' + msg)
            raise IOError(msg)

        # Check a few important mode keywords
        chopping = self._getsafeval('CHOPPING')
        nodding = self._getsafeval('NODDING')
        dithering = self._getsafeval('DITHER')
        scanning = self._getsafeval('SCANNING')

        # Add any that are True to the requirement set
        req_set = ['*']
        if chopping is not None and chopping:
            req_set.append('chopping')
        if nodding is not None and nodding:
            req_set.append('nodding')
        if dithering is not None and dithering:
            req_set.append('dithering')
        if scanning is not None and scanning:
            req_set.append('scanning')

        # Flag to throw error if requirements are not met
        abort_flag = False

        # Loop through keywords, checking against requirements
        reqdict = reqconf.dict()
        for key, req in reqdict.items():
            # Retrieve requirements
            try:
                req_category = str(req['requirement']).strip()
            except KeyError:
                req_category = '*'
            try:
                req_dtype = str(req['dtype']).strip()
            except KeyError:
                req_dtype = 'str'
            try:
                req_drange = req['drange']
            except KeyError:
                req_drange = None

            # Get type class corresponding to string
            if req_dtype == 'bool':
                req_dtype_class = bool
            elif req_dtype == 'int':
                req_dtype_class = int
            elif req_dtype == 'long':
                req_dtype_class = int
            elif req_dtype == 'float':
                req_dtype_class = float
            else:
                req_dtype_class = str

            # Check if key is required for this data type
            if req_category not in req_set:
                continue

            # Retrieve value from header and/or config file
            val = self._getsafeval(key)
            valtype = type(val)
            stype = valtype.__name__

            # Check if required key is present
            if val is None:
                abort_flag = True
                msg = 'Required keyword <%s> not found' % key
                if abort:
                    log.error(msg)
                else:
                    log.warning(msg)
                continue

            # Check if key matches required type
            if req_dtype == 'str' or req_dtype == 'bool':
                # Use exact type for str, bool
                if stype != req_dtype:
                    abort_flag = True
                    msg = 'Required keyword <%s> has wrong ' \
                          'type <%s>; should be <%s>' % \
                          (key, stype, req_dtype)
                    if abort:
                        log.error(msg)
                    else:
                        log.warning(msg)
                    continue
            elif req_dtype == 'float':
                # Allow any number type for float types
                if stype not in ['float', 'int', 'long']:
                    abort_flag = True
                    msg = 'Required keyword <%s> has wrong ' \
                          'type <%s>; should be <%s>' % \
                          (key, stype, req_dtype)
                    if abort:
                        log.error(msg)
                    else:
                        log.warning(msg)
                    continue
            elif req_dtype == 'int' or req_dtype == 'long':
                # Allow ints to be longs and vice versa
                if stype not in ['int', 'long']:
                    abort_flag = True
                    msg = 'Required keyword <%s> has wrong ' \
                          'type <%s>; should be <%s>' % \
                          (key, stype, req_dtype)
                    if abort:
                        log.error(msg)
                    else:
                        log.warning(msg)
                    continue

            # Check if value meets range requirements
            if req_drange is not None:

                # Check for enum first -- ignore any others if
                # present. May be used for strings, bools, or numerical
                # equality.
                if 'enum' in req_drange:
                    enum = req_drange['enum']

                    # Make into list if enum is a single value
                    if type(enum) is not list:
                        enum = [enum]

                    # Cast to data type
                    if req_dtype == 'bool':
                        enum = [True if str(e).strip().lower() == 'true'
                                else False
                                for e in enum]
                    else:
                        try:
                            enum = [req_dtype_class(e) for e in enum]
                        except ValueError as error:
                            msg = 'Error in header configuration file for ' \
                                  'key <%s>' % key
                            log.error('HeaderCheck: ' + msg)
                            raise error

                    # Case-insensitive comparison for strings
                    if stype == 'str':
                        enum = [str(e).upper() for e in enum]
                        if val.upper() not in enum:
                            abort_flag = True
                            msg = 'Required keyword <%s> has wrong ' \
                                  'value <%s>; should be in %s' % \
                                  (key, val, enum)
                            if abort:
                                log.error(msg)
                            else:
                                log.warning(msg)
                            continue
                    else:
                        if val not in enum:
                            abort_flag = True
                            msg = 'Required keyword <%s> has wrong ' \
                                  'value <%s>; should be in %s' % \
                                  (key, val, enum)
                            if abort:
                                log.error(msg)
                            else:
                                log.warning(msg)
                            continue

                # Check for a minimum requirement
                # (numerical value must be >= minimum)
                else:
                    if ('min' in req_drange
                            and stype in ['int', 'long', 'float']):
                        try:
                            minval = req_dtype_class(req_drange['min'])
                        except ValueError as error:
                            msg = 'Error in header configuration file for ' \
                                  'key <%s>' % key
                            log.error('HeaderCheck: ' + msg)
                            raise error
                        if val < minval:
                            abort_flag = True
                            msg = 'Required keyword <%s> has wrong ' \
                                  'value <%s>; should be >= %f' % \
                                  (key, val, minval)
                            if abort:
                                log.error(msg)
                            else:
                                log.warning(msg)
                            continue

                    # Check for a maximum requirement
                    # (numerical value must be <= maximum)
                    if ('max' in req_drange
                            and stype in ['int', 'long', 'float']):
                        try:
                            maxval = req_dtype_class(req_drange['max'])
                        except ValueError as error:
                            msg = 'Error in header configuration file for ' \
                                  'key <%s>' % key
                            log.error('HeaderCheck: ' + msg)
                            raise error
                        if val > maxval:
                            abort_flag = True
                            msg = 'Required keyword <%s> has wrong ' \
                                  'value <%s>; should be <= %f' % \
                                  (key, val, maxval)
                            if abort:
                                log.error(msg)
                            else:
                                log.warning(msg)
                            continue

        # Bail if requested, and requirements are not met
        if abort and abort_flag:
            msg = 'Header for <%s> does not meet requirements for ' \
                  'data processing' % \
                  os.path.basename(self.datain.filename)
            log.error('HeaderCheck: ' + msg)
            raise HeaderValidationError(msg)

        # Add a reference to the input data in dataout
        self.dataout = self.datain

        # Rename the output file for SOFIA convention

        # Keep the original filename
        (dname, rawname) = os.path.split(self.dataout.filename)
        self.dataout.setheadval('RAWFNAME', rawname, 'Raw filename')

        # Get flight number from mission
        mid = str(self._getsafeval('MISSN-ID'))
        match = re.search(r'F(\d{3,4})', mid)
        if match is not None and len(match.groups()) > 0:
            fltnum = "%4.4d" % int(match.group(1))
        else:
            # if can't find, use HAWC flight number
            try:
                fltnum = "%4.4d" % self._getsafeval('FLGTNUM')
            except (TypeError, ValueError):
                fltnum = "XXXX"

        # Get spectels
        spec1 = self._getsafeval('SPECTEL1')
        spec2 = self._getsafeval('SPECTEL2')
        if spec1 is None:
            spec1 = 'UNKNOWN'
        if spec2 is None:
            spec2 = 'UNKNOWN'
        if spec1 == 'UNKNOWN' and spec2 == 'UNKNOWN':
            spec = 'UNKNOWN'
        else:
            spec2 = re.sub('^HAW_', '', spec2)
            spec = re.sub('_', '', spec1.strip()) + \
                re.sub('_', '', spec2.strip())

        # Get AOR-ID
        aorid = self._getsafeval('AOR_ID')
        if aorid is None:
            aorid = 'UNKNOWN'
        else:
            aorid = re.sub('_', '', aorid.strip())

        # Get obsmode
        instcfg = str(self._getsafeval('INSTCFG')).upper()
        calmode = str(self._getsafeval('CALMODE')).upper()
        if calmode not in ['NONE', 'UNKNOWN']:
            obs = 'CAL'
        else:
            if 'POL' in instcfg:
                obs = 'POL'
            else:
                obs = 'IMA'

        # Get file number from filename
        fnum = self.dataout.filenum
        try:
            int(fnum)
        except (TypeError, ValueError):
            fnum = 'UNKNOWN'

        # Compose output file name
        outfilename = "F%s_HA_%s_%s_%s_RAW_%s.fits" % \
                      (fltnum, obs, aorid, spec, fnum)
        self.dataout.filename = os.path.join(dname, outfilename)


    def _getsafeval(self, key):
        """
        Helper function to quietly return None if a keyword isn't found.

        Parameters
        ----------
        key : str
            The keyword value to retrieve.
        """
        # This function assumes that self.datain contains exactly
        # one pre-loaded file

        # Set the log level to critical only
        old_level = log.level
        log.setLevel('CRITICAL')

        # Try to get the value from the datain header, allowing
        # the config file to override
        try:
            val = self.datain.getheadval(key)
        except KeyError:
            # Set to None if not found
            val = None

        # Strip any spaces from string values
        if type(val) is str:
            val = val.strip()

        # Restore the old log level
        log.setLevel(old_level)

        # Return the safe value
        return val
Navigation

Source code for sofia_redux.instruments.hawc.steps.stepcheckhead