# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
# (C) British Crown Copyright 2017-2019 Met Office.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""Neighbour finding for the Improver site specific process chain."""
import warnings
import cartopy.crs as ccrs
import numpy as np
from scipy.spatial import cKDTree
from improver import BasePlugin
from improver.metadata.utilities import create_coordinate_hash
from improver.spotdata.build_spotdata_cube import build_spotdata_cube
from improver.utilities.cube_manipulation import enforce_coordinate_ordering
[docs]class NeighbourSelection(BasePlugin):
"""
For the selection of a grid point near an arbitrary coordinate, where the
selection may be the nearest point, or a point that fulfils other
imposed constraints.
Constraints available for determining the neighbours are:
1. land_constraint which requires the selected point to be on land.
2. minimum_dz which minimises the vertical displacement between the
given coordinate (when an altitude is provided) and the grid point
where its altitude is provided by the relevant model or high resolution
orography. Note that spot coordinates provided without an altitude are
given the altitude of the nearest grid point taken from the orography
cube.
3. A combination of the above, where the land constraint is primary and out
of available land points, the one with the minimal vertical displacement
is chosen.
"""
[docs] def __init__(self, land_constraint=False, minimum_dz=False,
search_radius=1.0E4,
site_coordinate_system=ccrs.PlateCarree(),
site_x_coordinate='longitude', site_y_coordinate='latitude',
node_limit=36):
"""
Args:
land_constraint (bool):
If True the selected neighbouring grid point must be on land,
where this is determined using a land_mask.
minimum_dz (bool):
If True the selected neighbouring grid point must be chosen to
minimise the vertical displacement compared to the site
altitude.
search_radius (float):
The radius in metres from a spot site within which to search
for a grid point neighbour.
site_coordinate_system (cartopy coordinate system):
The coordinate system of the sitelist coordinates that will be
provided. This defaults to be a latitude/longitude grid, a
PlateCarree projection.
site_x_coordinate (str):
The key that identifies site x coordinates in the provided site
dictionary. Defaults to longitude.
site_y_coordinate (str):
The key that identifies site y coordinates in the provided site
dictionary. Defaults to latitude.
node_limit (int):
The upper limit for the number of nearest neighbours to return
when querying the tree for a selection of neighbours from which
one matching the minimum_dz constraint will be picked.
"""
self.minimum_dz = minimum_dz
self.land_constraint = land_constraint
self.search_radius = search_radius
self.site_coordinate_system = site_coordinate_system
self.site_x_coordinate = site_x_coordinate
self.site_y_coordinate = site_y_coordinate
self.site_altitude = 'altitude'
self.node_limit = node_limit
self.global_coordinate_system = False
def __repr__(self):
"""Represent the configured plugin instance as a string."""
return ('<NeighbourSelection: land_constraint: {}, ' +
'minimum_dz: {}, search_radius: {}, site_coordinate_system'
': {}, site_x_coordinate:{}, site_y_coordinate: {}, '
'node_limit: {}>').format(
self.land_constraint, self.minimum_dz, self.search_radius,
self.site_coordinate_system.__class__,
self.site_x_coordinate, self.site_y_coordinate,
self.node_limit)
[docs] def neighbour_finding_method_name(self):
"""
Create a name to describe the neighbour method based on the constraints
provided.
Returns:
str:
A string that describes the neighbour finding method employed.
This is essentially a concatenation of the options.
"""
method_name = '{}{}{}'.format('nearest',
'_land' if self.land_constraint else '',
'_minimum_dz' if self.minimum_dz else '')
return method_name
[docs] def check_sites_are_within_domain(self, sites, site_coords, site_x_coords,
site_y_coords, cube):
"""
A function to remove sites from consideration if they fall outside the
domain of the provided model cube. A warning is raised and the details
of each rejected site are printed.
Args:
sites (list of dict):
A list of dictionaries defining the spot sites for which
neighbours are to be found. e.g.:
[{'altitude': 11.0, 'latitude': 57.867000579833984,
'longitude': -5.632999897003174, 'wmo_id': 3034}]
site_coords (numpy.ndarray):
An array of shape (n_sites, 2) that contains the spot site
coordinates in the coordinate system of the model cube.
site_x_coords (numpy.ndarray):
The x coordinates of the spot sites in their original
coordinate system, from which invalid sites must be removed.
site_y_coords (numpy.ndarray):
The y coordinates of the spot sites in their original
coordinate system, from which invalid sites must be removed.
cube (iris.cube.Cube):
A cube that is representative of the model/grid from which spot
data will be extracted.
Returns:
(tuple): tuple containing:
**sites** (numpy.ndarray):
The sites modified to filter out the sites falling outside
the grid domain of the cube.
**site_coords** (numpy.ndarray):
The site_coords modified to filter out the sites falling
outside the grid domain of the cube.
**site_x_coords** (numpy.ndarray):
The x_coords modified to filter out the sites falling
outside the grid domain of the cube.
**site_y_coords** (numpy.ndarray):
The y_coords modified to filter out the sites falling
outside the grid domain of the cube.
"""
# Get the grid domain limits
x_min = cube.coord(axis='x').bounds.min()
x_max = cube.coord(axis='x').bounds.max()
y_min = cube.coord(axis='y').bounds.min()
y_max = cube.coord(axis='y').bounds.max()
if self.global_coordinate_system:
domain_valid = np.where(
(site_coords[:, 1] >= y_min) & (site_coords[:, 1] <= y_max))
domain_invalid = np.where(
(site_coords[:, 1] < y_min) | (site_coords[:, 1] > y_max))
else:
domain_valid = np.where(
(site_coords[:, 0] >= x_min) & (site_coords[:, 0] <= x_max) &
(site_coords[:, 1] >= y_min) & (site_coords[:, 1] <= y_max))
domain_invalid = np.where(
(site_coords[:, 0] < x_min) | (site_coords[:, 0] > x_max) |
(site_coords[:, 1] < y_min) | (site_coords[:, 1] > y_max))
num_invalid = len(domain_invalid[0])
if num_invalid > 0:
msg = ("{} spot sites fall outside the grid domain and will not be"
" processed. These sites are:\n".format(num_invalid))
dyn_msg = '{}\n'
for site in np.array(sites)[domain_invalid]:
msg += dyn_msg.format(site)
warnings.warn(msg)
sites = np.array(sites)[domain_valid]
site_coords = site_coords[domain_valid]
site_x_coords = site_x_coords[domain_valid]
site_y_coords = site_y_coords[domain_valid]
return sites, site_coords, site_x_coords, site_y_coords
[docs] @staticmethod
def get_nearest_indices(site_coords, cube):
"""
Uses the iris cube method nearest_neighbour_index to find the nearest
grid points to a site.
Args:
site_coords (numpy.ndarray):
An array of shape (n_sites, 2) that contains the x and y
coordinates of the sites.
cube (iris.cube.Cube):
Cube containing a representative grid.
Returns:
numpy.ndarray:
A list of shape (n_sites, 2) that contains the x and y indices
of the nearest grid points to the sites.
"""
nearest_indices = np.zeros((len(site_coords), 2)).astype(np.int)
for index, (x_point, y_point) in enumerate(site_coords):
nearest_indices[index, 0] = (
cube.coord(axis='x').nearest_neighbour_index(x_point))
nearest_indices[index, 1] = (
cube.coord(axis='y').nearest_neighbour_index(y_point))
return nearest_indices
[docs] @staticmethod
def geocentric_cartesian(cube, x_coords, y_coords):
"""
A function to convert a global (lat/lon) coordinate system into a
geocentric (3D trignonometric) system. This function ignores orographic
height differences between coordinates, giving a 2D projected
neighbourhood akin to selecting a neighbourhood of grid points about a
point without considering their vertical displacement.
Args:
cube (iris.cube.Cube):
A cube from which is taken the globe for which the geocentric
coordinates are being calculated.
x_coords (numpy.ndarray):
An array of x coordinates that will represent one axis of the
mesh of coordinates to be transformed.
y_coords (numpy.ndarray):
An array of y coordinates that will represent one axis of the
mesh of coordinates to be transformed.
Returns:
numpy.ndarray:
An array of all the xyz combinations that describe the nodes of
the grid, now in 3D geocentric cartesian coordinates. The shape
of the array is (n_nodes, 3), order x[:, 0], y[:, 1], z[:, 2].
"""
coordinate_system = cube.coord_system().as_cartopy_crs()
cartesian_calculator = coordinate_system.as_geocentric()
z_coords = np.zeros_like(x_coords)
cartesian_nodes = cartesian_calculator.transform_points(
coordinate_system, x_coords, y_coords, z_coords)
return cartesian_nodes
[docs] def build_KDTree(self, land_mask):
"""
Build a KDTree for extracting the nearest point or points to a site.
The tree can be built with a constrained set of grid points, e.g. only
land points, if required.
Args:
land_mask (iris.cube.Cube):
A land mask cube for the model/grid from which grid point
neighbours are being selected.
Returns:
(tuple): tuple containing:
**scipy.spatial.ckdtree.cKDTree**:
A KDTree containing the required nodes, built using the
scipy cKDTree method.
**numpy.ndarray**:
An array of shape (n_nodes, 2) that contains the x and y
indices that correspond to the selected node,
e.g. node=100 --> x_coord_index=10, y_coord_index=300,
index_nodes[100] = [10, 300]
"""
if self.land_constraint:
included_points = np.nonzero(land_mask.data)
else:
included_points = np.where(np.isfinite(land_mask.data.data))
x_indices = included_points[0]
y_indices = included_points[1]
x_coords = land_mask.coord(axis='x').points[x_indices]
y_coords = land_mask.coord(axis='y').points[y_indices]
if self.global_coordinate_system:
nodes = self.geocentric_cartesian(land_mask, x_coords, y_coords)
else:
nodes = list(zip(x_coords, y_coords))
index_nodes = np.array(list(zip(x_indices, y_indices)))
return cKDTree(nodes), index_nodes
[docs] def select_minimum_dz(self, orography, site_altitude, index_nodes,
distance, indices):
"""
Given a selection of nearest neighbours to a given site, this function
calculates the absolute vertical displacement between the site and the
neighbours. It then returns grid indices of the neighbour with the
minimum vertical displacement (i.e. at the most similar altitude). The
number of neighbours to consider is a maximum of node_limit, but these
may be limited by the imposed search_radius, or this limit may be
insufficient to reach the search radius, in which case a warning is
raised.
Args:
orography (iris.cube.Cube):
A cube of orography, used to obtain the grid point altitudes.
site_altitude (float):
The altitude of the spot site being considered.
index_nodes (numpy.ndarray):
An array of shape (n_nodes, 2) that contains the x and y
indices that correspond to the selected node,
distance (numpy.ndarray):
An array that contains the distances from the spot site to each
grid point neighbour being considered. The number maybe np.inf
if the site is beyond the search_radius.
indices (numpy.ndarray):
An array of tree node indices identifying the neigbouring grid
points, the list corresponding to the array of distances.
Returns:
numpy.ndarray or None:
A 2-element array giving the x and y indices of the chosen grid
point neighbour. Returns None if no valid neighbours were found
in the tree query.
"""
# Values beyond the imposed search radius are set to inf,
# these need to be excluded.
valid_indices = np.where(np.isfinite(distance))
# If no valid neighbours are available in the tree, return None.
if valid_indices[0].shape[0] == 0:
return None
# If the last distance is finite the number of tree nodes may not be
# sufficient to fill the search radius, raise a warning.
if np.isfinite(distance[-1]):
msg = ('Limit on number of nearest neighbours to return, {}, may '
'not be sufficiently large to fill search_radius {}'.format(
self.node_limit, self.search_radius))
warnings.warn(msg)
indices = indices[valid_indices]
# Calculate the difference in height between the spot site
# and grid point.
grid_point_altitudes = orography.data[tuple(index_nodes[indices].T)]
vertical_displacements = abs(grid_point_altitudes - site_altitude)
# The tree returns an ordered array, the first element
# being the closest. We search the array for the first
# element that matches the minimum vertical displacement
# found, giving us the nearest such point.
index_of_minimum = (
np.argmax(vertical_displacements ==
vertical_displacements.min()))
grid_point = index_nodes[indices][index_of_minimum]
return grid_point
[docs] def process(self, sites, orography, land_mask):
"""
Using the constraints provided, find the nearest grid point neighbours
to the given spot sites for the model/grid given by the input cubes.
Returned is a cube that contains the defining characteristics of the
spot sites (e.g. x coordinate, y coordinate, altitude) and the indices
of the selected grid point neighbour.
Args:
sites (list of dict):
A list of dictionaries defining the spot sites for which
neighbours are to be found. e.g.:
[{'altitude': 11.0, 'latitude': 57.867000579833984,
'longitude': -5.632999897003174, 'wmo_id': 3034}]
orography (iris.cube.Cube):
A cube of orography, used to obtain the grid point altitudes.
land_mask (iris.cube.Cube):
A land mask cube for the model/grid from which grid point
neighbours are being selected.
Returns:
iris.cube.Cube:
A cube containing both the spot site information and for each
the grid point indices of its nearest neighbour as per the
imposed constraints.
"""
# Check if we are dealing with a global grid.
self.global_coordinate_system = orography.coord(axis='x').circular
# Exclude regional grids with spatial dimensions other than metres.
if not self.global_coordinate_system:
if not orography.coord(axis='x').units == 'metres':
msg = ('Cube spatial coordinates for regional grids must be'
'in metres to match the defined search_radius.')
raise ValueError(msg)
# Ensure land_mask and orography are on the same grid.
if not orography.dim_coords == land_mask.dim_coords:
msg = ('Orography and land_mask cubes are not on the same '
'grid.')
raise ValueError(msg)
# Enforce x-y coordinate order for input cubes.
enforce_coordinate_ordering(
orography, [orography.coord(axis='x').name(),
orography.coord(axis='y').name()])
enforce_coordinate_ordering(
land_mask, [land_mask.coord(axis='x').name(),
land_mask.coord(axis='y').name()])
# Remap site coordinates on to coordinate system of the model grid.
site_x_coords = np.array([site[self.site_x_coordinate]
for site in sites])
site_y_coords = np.array([site[self.site_y_coordinate]
for site in sites])
site_coords = self._transform_sites_coordinate_system(
site_x_coords, site_y_coords,
orography.coord_system().as_cartopy_crs())
# Exclude any sites falling outside the domain given by the cube and
# notify the user.
sites, site_coords, site_x_coords, site_y_coords = (
self.check_sites_are_within_domain(
sites, site_coords, site_x_coords, site_y_coords,
orography))
# Find nearest neighbour point using quick iris method.
nearest_indices = self.get_nearest_indices(site_coords, orography)
# Create an array containing site altitudes, using the nearest point
# orography height for any that are unset.
site_altitudes = np.array([site.get(self.site_altitude, None)
for site in sites])
site_altitudes = np.where(np.isnan(site_altitudes.astype(float)),
orography.data[tuple(nearest_indices.T)],
site_altitudes)
# If further constraints are being applied, build a KD Tree which
# includes points filtered by constraint.
if self.land_constraint or self.minimum_dz:
# Build the KDTree, an internal test for the land_constraint checks
# whether to exclude sea points from the tree.
tree, index_nodes = self.build_KDTree(land_mask)
# Site coordinates made cartesian for global coordinate system
if self.global_coordinate_system:
site_coords = self.geocentric_cartesian(
orography, site_coords[:, 0], site_coords[:, 1])
if not self.minimum_dz:
# Query the tree for the nearest neighbour, in this case a land
# neighbour is returned along with the distance to it.
distances, node_indices = tree.query([site_coords])
# Look up the grid coordinates that correspond to the tree node
land_neighbour_indices, = index_nodes[node_indices]
# Use the found land neighbour if it is within the
# search_radius, otherwise use the nearest neighbour.
distances = np.array([distances[0], distances[0]]).T
nearest_indices = np.where(distances < self.search_radius,
land_neighbour_indices,
nearest_indices)
else:
# Query the tree for self.node_limit nearby neighbours.
distances, node_indices = tree.query(
[site_coords], distance_upper_bound=self.search_radius,
k=self.node_limit)
# Loop over the sites and for each choose the returned
# neighbour with the minimum vertical displacement.
for index, (distance, indices) in enumerate(zip(
distances[0], node_indices[0])):
grid_point = self.select_minimum_dz(
orography, site_altitudes[index], index_nodes,
distance, indices)
# None is returned if the tree query returned no neighbours
# within the search radius.
if grid_point is not None:
nearest_indices[index] = grid_point
# Calculate the vertical displacements between the chosen grid point
# and the spot site.
vertical_displacements = (site_altitudes -
orography.data[tuple(nearest_indices.T)])
# Create a list of WMO IDs if available. These are stored as strings
# to accommodate the use of 'None' for unset IDs.
wmo_ids = [str(site.get('wmo_id', None)) for site in sites]
# Construct a name to describe the neighbour finding method employed
method_name = self.neighbour_finding_method_name()
# Create an array of indices and displacements to return
data = np.stack((nearest_indices[:, 0], nearest_indices[:, 1],
vertical_displacements), axis=1)
data = np.expand_dims(data, 1).astype(np.float32)
# Regardless of input sitelist coordinate system, the site coordinates
# are stored as latitudes and longitudes in the neighbour cube.
if self.site_coordinate_system != ccrs.PlateCarree():
lon_lats = self._transform_sites_coordinate_system(
site_x_coords, site_y_coords, ccrs.PlateCarree())
longitudes = lon_lats[:, 0]
latitudes = lon_lats[:, 1]
else:
longitudes = site_x_coords
latitudes = site_y_coords
# Create a cube of neighbours
neighbour_cube = build_spotdata_cube(
data, 'grid_neighbours', 1, site_altitudes.astype(np.float32),
latitudes.astype(np.float32), longitudes.astype(np.float32),
wmo_ids, neighbour_methods=[method_name],
grid_attributes=['x_index', 'y_index', 'vertical_displacement'])
# Add a hash attribute based on the model grid to ensure the neighbour
# cube is only used with a compatible grid.
grid_hash = create_coordinate_hash(orography)
neighbour_cube.attributes['model_grid_hash'] = grid_hash
return neighbour_cube