Source code for improver.standardise
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
# (C) British Crown Copyright 2017-2019 Met Office.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""Plugin to regrid cube data and standardise metadata"""
import warnings
import iris
import numpy as np
from iris.analysis import Linear, Nearest
from iris.exceptions import CoordinateNotFoundError
from scipy.interpolate import griddata
from improver import BasePlugin
from improver.metadata.amend import amend_attributes
from improver.metadata.check_datatypes import (
check_cube_not_float64, check_time_coordinate_metadata)
from improver.metadata.constants.attributes import MANDATORY_ATTRIBUTE_DEFAULTS
from improver.metadata.constants.mo_attributes import MOSG_GRID_ATTRIBUTES
from improver.metadata.constants.time_types import (
TIME_COORD_NAMES, TIME_INTERVAL_DTYPE, TIME_INTERVAL_UNIT,
TIME_REFERENCE_DTYPE, TIME_REFERENCE_UNIT)
from improver.threshold import BasicThreshold
from improver.utilities.cube_checker import spatial_coords_match
from improver.utilities.spatial import OccurrenceWithinVicinity
[docs]def grid_contains_cutout(grid, cutout):
"""
Check that a spatial cutout is contained within a given grid
Args:
grid (iris.cube.Cube):
A cube defining a data grid
cutout (iris.cube.Cube):
The cutout to search for within the grid
Returns:
bool:
True if cutout is contained within grid, False otherwise
"""
if spatial_coords_match(grid, cutout):
return True
# check whether "cutout" coordinate points match a subset of "grid"
# points on both axes
for axis in ['x', 'y']:
grid_coord = grid.coord(axis=axis)
cutout_coord = cutout.coord(axis=axis)
# check coordinate metadata
if (cutout_coord.name() != grid_coord.name() or
cutout_coord.units != grid_coord.units or
cutout_coord.coord_system != grid_coord.coord_system):
return False
# search for cutout coordinate points in larger grid
cutout_start = cutout_coord.points[0]
find_start = [np.isclose(cutout_start, grid_point)
for grid_point in grid_coord.points]
if not np.any(find_start):
return False
start = find_start.index(True)
end = start + len(cutout_coord.points)
try:
if not np.allclose(cutout_coord.points,
grid_coord.points[start:end]):
return False
except ValueError:
# raised by np.allclose if "end" index overshoots edge of grid
# domain - slicing does not raise IndexError
return False
return True
[docs]class StandardiseGridAndMetadata(BasePlugin):
"""Plugin to regrid cube data and standardise metadata"""
REGRID_REQUIRES_LANDMASK = {"bilinear": False,
"nearest": False,
"nearest-with-mask": True}
[docs] def __init__(self, regrid_mode='bilinear', extrapolation_mode='nanmask',
landmask=None, landmask_vicinity=25000):
"""
Initialise regridding parameters
Args:
regrid_mode (str):
Mode of interpolation in regridding.
extrapolation_mode (str):
Mode to fill regions outside the domain in regridding.
landmask (iris.cube.Cube or None):
Land-sea mask ("land_binary_mask") on the input cube grid.
Required for "nearest-with-mask" regridding option.
landmask_vicinity (float):
Radius of vicinity to search for a coastline, in metres
Raises:
ValueError: If a landmask is required but not passed in
"""
if landmask is None and "nearest-with-mask" in regrid_mode:
msg = ("Regrid mode {} requires an input landmask cube")
raise ValueError(msg.format(regrid_mode))
self.regrid_mode = regrid_mode
self.extrapolation_mode = extrapolation_mode
self.landmask_source_grid = landmask
self.landmask_vicinity = (
None if landmask is None else landmask_vicinity)
self.landmask_name = "land_binary_mask"
[docs] def _adjust_landsea(self, cube, target_grid):
"""
Adjust regridded data using differences between the target landmask
and that obtained by regridding the source grid landmask, to ensure
that the "land" or "sea" nature of the points in the regridded cube
matches that of the target grid.
Args:
cube (iris.cube.Cube):
Cube after initial regridding
target_grid (iris.cube.Cube):
Cube containing landmask data on the target grid
Returns:
iris.cube.Cube: Adjusted cube
"""
if self.landmask_name not in self.landmask_source_grid.name():
msg = ("Expected {} in input_landmask cube but found {}".format(
self.landmask_name, repr(self.landmask_source_grid)))
warnings.warn(msg)
if self.landmask_name not in target_grid.name():
msg = ("Expected {} in target_grid cube but found {}".format(
self.landmask_name, repr(target_grid)))
warnings.warn(msg)
plugin = AdjustLandSeaPoints(vicinity_radius=self.landmask_vicinity)
return plugin.process(cube, self.landmask_source_grid, target_grid)
[docs] def _regrid_to_target(self, cube, target_grid, regridded_title):
"""
Regrid cube to target_grid, inherit grid attributes and update title
Args:
cube (iris.cube.Cube):
Cube to be regridded
target_grid (iris.cube.Cube):
Data on the target grid. If regridding with mask, this cube
should contain land-sea mask data to be used in adjusting land
and sea points after regridding.
regridded_title (str or None):
New value for the "title" attribute to be used after
regridding. If not set, a default value is used.
Returns:
iris.cube.Cube: Regridded cube with updated attributes
"""
regridder = Linear(extrapolation_mode=self.extrapolation_mode)
if "nearest" in self.regrid_mode:
regridder = Nearest(extrapolation_mode=self.extrapolation_mode)
cube = cube.regrid(target_grid, regridder)
if self.REGRID_REQUIRES_LANDMASK[self.regrid_mode]:
cube = self._adjust_landsea(cube, target_grid)
# identify grid-describing attributes on source cube that need updating
required_grid_attributes = [attr for attr in cube.attributes
if attr in MOSG_GRID_ATTRIBUTES]
# update attributes if available on target grid, otherwise remove
for key in required_grid_attributes:
if key in target_grid.attributes:
cube.attributes[key] = target_grid.attributes[key]
else:
cube.attributes.pop(key)
cube.attributes["title"] = (
MANDATORY_ATTRIBUTE_DEFAULTS["title"]
if regridded_title is None else regridded_title)
return cube
[docs] @staticmethod
def _standardise_time_coordinates(cube):
"""
If cube time-type coordinates do not conform to expected standards,
update units and datatypes in place.
"""
def _update_time_coordinate(coord):
"""Update a non-conforming time coordinate"""
if coord.units.is_time_reference():
coord.convert_units(TIME_REFERENCE_UNIT)
coord.points = coord.points.astype(TIME_REFERENCE_DTYPE)
if coord.bounds is not None:
coord.bounds = coord.bounds.astype(TIME_REFERENCE_DTYPE)
else:
coord.convert_units(TIME_INTERVAL_UNIT)
coord.points = coord.points.astype(TIME_INTERVAL_DTYPE)
if coord.bounds is not None:
coord.bounds = coord.bounds.astype(TIME_INTERVAL_DTYPE)
# check all time coordinates; if check fails, update them all
try:
check_time_coordinate_metadata(cube)
except ValueError:
for time_coord in TIME_COORD_NAMES:
try:
coord = cube.coord(time_coord)
except CoordinateNotFoundError:
pass
else:
_update_time_coordinate(coord)
[docs] @staticmethod
def _collapse_scalar_dimensions(cube):
"""
Demote any scalar dimensions (excluding "realization") on the input
cube to auxiliary coordinates.
Returns:
iris.cube.Cube
"""
coords_to_collapse = []
for coord in cube.coords(dim_coords=True):
if len(coord.points) == 1 and "realization" not in coord.name():
coords_to_collapse.append(coord)
for coord in coords_to_collapse:
cube = next(cube.slices_over(coord))
return cube
[docs] @staticmethod
def _remove_scalar_coords(cube, coords_to_remove):
"""Removes named coordinates from the input cube."""
for coord in coords_to_remove:
try:
cube.remove_coord(coord)
except CoordinateNotFoundError:
continue
[docs] def process(self, cube, target_grid=None, new_name=None, new_units=None,
regridded_title=None, coords_to_remove=None,
attributes_dict=None, fix_float64=False):
"""
Perform regridding and metadata adjustments
Args:
cube (iris.cube.Cube):
Input cube to be standardised
target_grid (iris.cube.Cube or None):
Cube on the required grid. For "nearest-with-mask" regridding,
this cube should contain a binary land-sea mask
("land_binary_mask"). If target_grid is None, no regridding is
performed.
new_name (str or None):
Optional rename for output cube
new_units (str or None):
Optional unit conversion for output cube
regridded_title (str or None):
New title attribute to be applied after regridding. If not set,
the title attribute is set to a default value if the field is
regridded, as "title" may contain grid information.
coords_to_remove (list of str or None):
Optional list of scalar coordinates to remove from output cube
attributes_dict (dict or None):
Optional dictionary of required attribute updates. Keys are
attribute names, and values are the required value or "remove".
fix_float64 (bool):
Flag to de-escalate float64 precision
Returns:
iris.cube.Cube
"""
# regridding
if target_grid:
# if regridding using a land-sea mask, check this covers the source
# grid in the required coordinates
if self.REGRID_REQUIRES_LANDMASK[self.regrid_mode]:
if not grid_contains_cutout(self.landmask_source_grid, cube):
raise ValueError(
"Source landmask does not match input grid")
cube = self._regrid_to_target(cube, target_grid, regridded_title)
# standard metadata updates
cube = self._collapse_scalar_dimensions(cube)
self._standardise_time_coordinates(cube)
# optional metadata updates
if new_name:
cube.rename(new_name)
if new_units:
cube.convert_units(new_units)
if coords_to_remove:
self._remove_scalar_coords(cube, coords_to_remove)
if attributes_dict:
amend_attributes(cube, attributes_dict)
check_cube_not_float64(cube, fix=fix_float64)
return cube
[docs]class AdjustLandSeaPoints:
"""
Replace data values at points where the nearest-regridding technique
selects a source grid-point with an opposite land-sea-mask value to the
target grid-point.
The replacement data values are selected from a vicinity of points on the
source-grid and the closest point of the correct mask is used.
Where no match is found within the vicinity, the data value is not changed.
"""
[docs] def __init__(self, extrapolation_mode="nanmask", vicinity_radius=25000.):
"""
Initialise class
Args:
extrapolation_mode (str):
Mode to use for extrapolating data into regions
beyond the limits of the source_data domain.
Available modes are documented in
`iris.analysis <https://scitools.org.uk/iris/docs/latest/iris/
iris/analysis.html#iris.analysis.Nearest>`_
Defaults to "nanmask".
vicinity_radius (float):
Distance in metres to search for a sea or land point.
"""
self.input_land = None
self.nearest_cube = None
self.output_land = None
self.output_cube = None
self.regridder = Nearest(extrapolation_mode=extrapolation_mode)
self.vicinity = OccurrenceWithinVicinity(vicinity_radius)
def __repr__(self):
"""
Print a human-readable representation of the instantiated object.
"""
return "<AdjustLandSeaPoints: regridder: {}; vicinity: {}>".format(
self.regridder, self.vicinity)
[docs] def correct_where_input_true(self, selector_val):
"""
Replace points in the output_cube where output_land matches the
selector_val and the input_land does not match, but has matching
points in the vicinity, with the nearest matching point in the
vicinity in the original nearest_cube.
Updates self.output_cube.data
Args:
selector_val (int):
Value of mask to replace if needed.
Intended to be 1 for filling land points near the coast
and 0 for filling sea points near the coast.
"""
# Find all points on output grid matching selector_val
use_points = np.where(self.input_land.data == selector_val)
# If there are no matching points on the input grid, no alteration can
# be made. This tests the size of the y-coordinate of use_points.
if use_points[0].size is 0:
return
# Get shape of output grid
ynum, xnum = self.output_land.shape
# Using only these points, extrapolate to fill domain using nearest
# neighbour. This will generate a grid where the non-selector_val
# points are filled with the nearest value in the same mask
# classification.
(y_points, x_points) = np.mgrid[0:ynum, 0:xnum]
selector_data = griddata(use_points,
self.nearest_cube.data[use_points],
(y_points, x_points), method="nearest")
# Identify nearby points on regridded input_land that match the
# selector_value
if selector_val > 0.5:
thresholder = BasicThreshold(0.5)
else:
thresholder = BasicThreshold(0.5, comparison_operator='<=')
in_vicinity = self.vicinity.process(
thresholder.process(self.input_land))
# Identify those points sourced from the opposite mask that are
# close to a source point of the correct mask
mismatch_points, = np.logical_and(
np.logical_and(self.output_land.data == selector_val,
self.input_land.data != selector_val),
in_vicinity.data > 0.5)
# Replace these points with the filled-domain data
self.output_cube.data[mismatch_points] = (
selector_data[mismatch_points])
[docs] def process(self, cube, input_land, output_land):
"""
Update cube.data so that output_land and sea points match an input_land
or sea point respectively so long as one is present within the
specified vicinity radius. Note that before calling this plugin the
input land mask MUST be checked against the source grid, to ensure
the grids match.
Args:
cube (iris.cube.Cube):
Cube of data to be updated (on same grid as output_land).
input_land (iris.cube.Cube):
Cube of land_binary_mask data on the grid from which "cube" has
been reprojected (it is expected that the iris.analysis.Nearest
method would have been used).
This is used to determine where the input model data is
representing land and sea points.
output_land (iris.cube.Cube):
Cube of land_binary_mask data on target grid.
"""
# Check cube and output_land are on the same grid:
if not spatial_coords_match(cube, output_land):
raise ValueError('X and Y coordinates do not match for cubes {}'
'and {}'.format(repr(cube), repr(output_land)))
self.output_land = output_land
# Regrid input_land to output_land grid.
self.input_land = input_land.regrid(self.output_land, self.regridder)
# Slice over x-y grids for multi-realization data.
result = iris.cube.CubeList()
for xyslice in cube.slices(
[cube.coord(axis='y'), cube.coord(axis='x')]):
# Store and copy cube ready for the output data
self.nearest_cube = xyslice
self.output_cube = self.nearest_cube.copy()
# Update sea points that were incorrectly sourced from land points
self.correct_where_input_true(0)
# Update land points that were incorrectly sourced from sea points
self.correct_where_input_true(1)
result.append(self.output_cube)
result = result.merge_cube()
return result