# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
# (C) British Crown Copyright 2017-2019 Met Office.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""Module containing thresholding classes."""
import operator
import iris
import numpy as np
from cf_units import Unit
from improver import BasePlugin
from improver.utilities.cube_manipulation import enforce_coordinate_ordering
from improver.utilities.rescale import rescale
[docs]class BasicThreshold(BasePlugin):
"""Apply a threshold truth criterion to a cube.
Calculate the threshold truth values based on a linear membership function
around the threshold values provided. A cube will be returned with a new
threshold dimension coordinate.
Can operate on multiple time sequences within a cube.
"""
[docs] def __init__(self, thresholds, fuzzy_factor=None,
fuzzy_bounds=None, threshold_units=None,
comparison_operator='>'):
"""
Set up for processing an in-or-out of threshold field, including the
generation of fuzzy_bounds which are required to threshold an input
cube (through self.process(cube)). If fuzzy_factor is not None, fuzzy
bounds are calculated using the threshold value in the units in which
it is provided.
The usage of fuzzy_factor is exemplified as follows:
For a 6 mm/hr threshold with a 0.75 fuzzy factor, a range of 25%
around this threshold (between (6*0.75=) 4.5 and (6*(2-0.75)=) 7.5)
would be generated. The probabilities of exceeding values within this
range are scaled linearly, so that 4.5 mm/hr yields a thresholded value
of 0 and 7.5 mm/hr yields a thresholded value of 1. Therefore, in this
case, the thresholded exceedance probabilities between 4.5 mm/hr and
7.5 mm/hr would follow the pattern:
::
Data value | Probability
------------|-------------
4.5 | 0
5.0 | 0.167
5.5 | 0.333
6.0 | 0.5
6.5 | 0.667
7.0 | 0.833
7.5 | 1.0
Args:
thresholds (list of float or float):
The threshold points for 'significant' datapoints.
fuzzy_factor (float):
Specifies lower bound for fuzzy membership value when
multiplied by each threshold. Upper bound is equivalent linear
distance above threshold. If None, no fuzzy_factor is applied.
fuzzy_bounds (list of tuple):
Lower and upper bounds for fuzziness.
List should be of same length as thresholds.
Each entry in list should be a tuple of two floats
representing the lower and upper bounds respectively.
If None, no fuzzy_bounds are applied.
threshold_units (str):
Units of the threshold values. If not provided the units are
assumed to be the same as those of the input cube.
comparison_operator (str):
Indicates the comparison_operator to use with the threshold.
e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to
evaluate data < threshold. When using fuzzy thresholds, there
is no difference between < and <= or > and >=.
Valid choices: > >= < <= gt ge lt le.
Raises:
ValueError: If a threshold of 0.0 is requested when using a fuzzy
factor.
ValueError: If the fuzzy_factor is not greater than 0 and less
than 1.
ValueError: If both fuzzy_factor and fuzzy_bounds are set
as this is ambiguous.
"""
# ensure threshold is a list, even if only a single value is provided
self.thresholds = thresholds
if np.isscalar(thresholds):
self.thresholds = [thresholds]
# if necessary, set threshold units
if threshold_units is None:
self.threshold_units = None
else:
self.threshold_units = Unit(threshold_units)
# initialise threshold coordinate name as None
self.threshold_coord_name = None
# read fuzzy factor or set (default) to 1 (no smoothing)
fuzzy_factor_loc = 1.
if fuzzy_factor is not None:
if fuzzy_bounds is not None:
raise ValueError(
"Invalid combination of keywords. Cannot specify "
"fuzzy_factor and fuzzy_bounds together")
if not 0 < fuzzy_factor < 1:
raise ValueError(
"Invalid fuzzy_factor: must be >0 and <1: {}".format(
fuzzy_factor))
if 0 in self.thresholds:
raise ValueError(
"Invalid threshold with fuzzy factor: cannot use a "
"multiplicative fuzzy factor with threshold == 0")
fuzzy_factor_loc = fuzzy_factor
# Set fuzzy-bounds. If neither fuzzy_factor nor fuzzy_bounds is set,
# both lower_thr and upper_thr default to the threshold value. A test
# of this equality is used later to determine whether to process with
# a sharp threshold or fuzzy bounds.
if fuzzy_bounds is None:
self.fuzzy_bounds = []
for thr in self.thresholds:
lower_thr = thr * fuzzy_factor_loc
upper_thr = thr * (2. - fuzzy_factor_loc)
if thr < 0:
lower_thr, upper_thr = upper_thr, lower_thr
self.fuzzy_bounds.append((lower_thr, upper_thr))
else:
self.fuzzy_bounds = fuzzy_bounds
# ensure fuzzy_bounds is a list of tuples
if isinstance(fuzzy_bounds, tuple):
self.fuzzy_bounds = [fuzzy_bounds]
# check that thresholds and fuzzy_bounds are self-consistent
for thr, bounds in zip(self.thresholds, self.fuzzy_bounds):
if len(bounds) != 2:
raise ValueError("Invalid bounds for one threshold: {}."
" Expected 2 floats.".format(bounds))
if bounds[0] > thr or bounds[1] < thr:
bounds_msg = ("Threshold must be within bounds: "
"!( {} <= {} <= {} )".format(bounds[0],
thr, bounds[1]))
raise ValueError(bounds_msg)
# Dict of known logical comparisons. Each key contains a dict of
# {'function': The operator function for this comparison_operator,
# 'spp_string': Comparison_Operator string for use in CF-convention
# meta-data}
self.comparison_operator_dict = {}
self.comparison_operator_dict.update(dict.fromkeys(
['ge', 'GE', '>='], {'function': operator.ge,
'spp_string': 'above'}))
self.comparison_operator_dict.update(dict.fromkeys(
['gt', 'GT', '>'], {'function': operator.gt,
'spp_string': 'above'}))
self.comparison_operator_dict.update(dict.fromkeys(
['le', 'LE', '<='], {'function': operator.le,
'spp_string': 'below'}))
self.comparison_operator_dict.update(dict.fromkeys(
['lt', 'LT', '<'], {'function': operator.lt,
'spp_string': 'below'}))
self.comparison_operator_string = comparison_operator
self._decode_comparison_operator_string()
def __repr__(self):
"""Represent the configured plugin instance as a string."""
return (
'<BasicThreshold: thresholds {}, ' +
'fuzzy_bounds {}, ' +
'method: data {} threshold>'
).format(self.thresholds, self.fuzzy_bounds,
self.comparison_operator_string)
[docs] def _add_threshold_coord(self, cube, threshold):
"""
Add a scalar threshold-type coordinate to a cube containing
thresholded data and promote the new coordinate to be the
leading dimension of the cube.
Args:
cube (iris.cube.Cube):
Cube containing thresholded data (1s and 0s)
threshold (float):
Value at which the data has been thresholded
Returns:
iris.cube.Cube:
With new "threshold" axis
"""
coord = iris.coords.DimCoord(np.array([threshold], dtype=np.float32),
units=cube.units)
coord.rename(self.threshold_coord_name)
coord.var_name = "threshold"
# Use an spp__relative_to_threshold attribute, as an extension to the
# CF-conventions.
coord.attributes.update({'spp__relative_to_threshold':
self.comparison_operator['spp_string']})
cube.add_aux_coord(coord)
return iris.util.new_axis(cube, coord)
[docs] def _decode_comparison_operator_string(self):
"""Sets self.comparison_operator based on
self.comparison_operator_string. This is a dict containing the keys
'function' and 'spp_string'.
Raises errors if invalid options are found.
Raises:
ValueError: If self.comparison_operator_string does not match a
defined method.
"""
try:
self.comparison_operator = self.comparison_operator_dict[
self.comparison_operator_string]
except KeyError:
msg = (f'String "{self.comparison_operator_string}" '
'does not match any known comparison_operator method')
raise ValueError(msg)
[docs] def process(self, input_cube):
"""Convert each point to a truth value based on provided threshold
values. The truth value may or may not be fuzzy depending upon if
fuzzy_bounds are supplied. If the plugin has a "threshold_units"
member, this is used to convert both thresholds and fuzzy bounds into
the units of the input cube.
Args:
input_cube (iris.cube.Cube):
Cube to threshold. The code is dimension-agnostic.
Returns:
iris.cube.Cube:
Cube after a threshold has been applied. The data within this
cube will contain values between 0 and 1 to indicate whether
a given threshold has been exceeded or not.
The cube meta-data will contain:
* Input_cube name prepended with
probability_of_X_above(or below)_threshold (where X is
the diagnostic under consideration)
* Threshold dimension coordinate with same units as input_cube
* Threshold attribute (above or below threshold)
* Cube units set to (1).
Raises:
ValueError: if a np.nan value is detected within the input cube.
"""
# Record input cube data type to ensure consistent output, though
# integer data must become float to enable fuzzy thresholding.
input_cube_dtype = input_cube.dtype
if input_cube.dtype.kind == 'i':
input_cube_dtype = np.float32
thresholded_cubes = iris.cube.CubeList()
if np.isnan(input_cube.data).any():
raise ValueError("Error: NaN detected in input cube data")
# if necessary, convert thresholds and fuzzy bounds into cube units
if self.threshold_units is not None:
self.thresholds = [self.threshold_units.convert(threshold,
input_cube.units)
for threshold in self.thresholds]
self.fuzzy_bounds = [tuple([
self.threshold_units.convert(threshold, input_cube.units)
for threshold in bounds]) for bounds in self.fuzzy_bounds]
# set name of threshold coordinate to match input diagnostic
self.threshold_coord_name = input_cube.name()
# apply fuzzy thresholding
for threshold, bounds in zip(self.thresholds, self.fuzzy_bounds):
cube = input_cube.copy()
# if upper and lower bounds are equal, set a deterministic 0/1
# probability based on exceedance of the threshold
if bounds[0] == bounds[1]:
truth_value = self.comparison_operator['function'](
cube.data, threshold)
# otherwise, scale exceedance probabilities linearly between 0/1
# at the min/max fuzzy bounds and 0.5 at the threshold value
else:
truth_value = np.where(
cube.data < threshold,
rescale(cube.data,
data_range=(bounds[0], threshold),
scale_range=(0., 0.5),
clip=True),
rescale(cube.data,
data_range=(threshold, bounds[1]),
scale_range=(0.5, 1.),
clip=True),
)
# if requirement is for probabilities below threshold (rather
# than above), invert the exceedance probability
if 'below' in self.comparison_operator['spp_string']:
truth_value = 1. - truth_value
truth_value = truth_value.astype(input_cube_dtype)
cube.data = truth_value
# Overwrite masked values that have been thresholded
# with the un-thresholded values from the input cube.
if np.ma.is_masked(cube.data):
cube.data[input_cube.data.mask] = (
input_cube.data[input_cube.data.mask])
cube = self._add_threshold_coord(cube, threshold)
thresholded_cubes.append(cube)
cube, = thresholded_cubes.concatenate()
cube.rename(
"probability_of_{}_{}_threshold".format(
cube.name(),
self.comparison_operator['spp_string']))
cube.units = Unit(1)
enforce_coordinate_ordering(
cube, ["realization", "percentile"])
return cube