Source code for raster_tools.io

import os
import urllib

import dask
import numpy as np
import rasterio as rio
import rioxarray as xrio
import xarray as xr
from affine import Affine
from dask.array.core import normalize_chunks as dask_chunks

from raster_tools.dtypes import F32, F64, I64, U8, is_bool, is_float, is_int
from raster_tools.exceptions import (
    AffineEncodingError,
    DimensionsError,
    RasterDataError,
    RasterIOError,
)
from raster_tools.masking import get_default_null_value
from raster_tools.utils import to_chunk_dict, validate_path


def _get_extension(path):
    return os.path.splitext(path)[-1].lower()


def _get_chunking_info_from_file(src_file):
    with rio.open(src_file) as src:
        tile_shape = (1, *src.block_shapes[0])
        shape = (src.count, *src.shape)
        dtype = np.dtype(src.dtypes[0])
        return tile_shape, shape, dtype


def _get_chunks(data=None, src_file=None):
    chunks = (1, "auto", "auto")
    if data is None:
        if src_file is None:
            return chunks
        tile_shape, shape, dtype = _get_chunking_info_from_file(src_file)
    else:
        shape = data.shape
        dtype = data.dtype
        tile_shape = None
        if dask.is_dask_collection(data):
            tile_shape = data.chunks
        elif src_file is not None:
            _, tile_shape, _ = _get_chunking_info_from_file(src_file)
    return dask_chunks(chunks, shape, dtype=dtype, previous_chunks=tile_shape)


[docs]def chunk(xrs, src_file=None): chunks = to_chunk_dict( _get_chunks( xrs.raster if isinstance(xrs, xr.Dataset) else xrs, src_file ) ) return xrs.chunk(chunks)
TIFF_EXTS = frozenset((".tif", ".tiff")) NC_EXTS = frozenset((".cdf", ".nc", ".nc4")) HDF_EXTS = frozenset((".hdf", ".h4", ".hdf4", ".he2", ".h5", ".hdf5", ".he5")) GRIB_EXTS = frozenset((".grib", ".grib2", ".grb", ".grb2", ".gb", ".gb2")) BATCH_EXTS = frozenset((".bch",)) # File extenstions that can't be read in yet READ_NOT_IMPLEMENTED_EXTS = NC_EXTS | HDF_EXTS | GRIB_EXTS # File extenstions that can't be written out yet WRITE_NOT_IMPLEMENTED_EXTS = NC_EXTS | HDF_EXTS | GRIB_EXTS
[docs]def is_batch_file(path): return _get_extension(path) in BATCH_EXTS
ESRI_DEFAULT_F32_NV = np.finfo(F32).min
[docs]def normalize_null_value(nv, dtype): # Make sure that ESRI's default F32 null value is properly # registered as F32 if dtype == F32 and nv is not None and np.isclose(nv, ESRI_DEFAULT_F32_NV): nv = F32.type(nv) # Some rasters have (u)int dtype and a null value that is a whole number # but it gets read in as a float. This can cause a lot of accidental type # promotions down the pipeline. Check for this case and correct it. if is_int(dtype) and is_float(nv) and float(nv).is_integer(): nv = int(nv) return nv
[docs]def open_raster_from_path_or_url(path): from raster_tools.raster import ( _try_to_get_null_value_xarray, normalize_xarray_data, ) if isinstance(path, os.PathLike): ext = path.suffix elif isinstance(path, str): if urllib.parse.urlparse(path) == "": # Assume file path validate_path(path) ext = _get_extension(path) else: # Could be a URL or path ext = "" else: raise RasterIOError( f"Could not resolve input to a raster path or URL: '{path}'" ) xrs = None # Try to let gdal open anything but NC, HDF, GRIB files if ext in READ_NOT_IMPLEMENTED_EXTS: raise NotImplementedError( "Reading of NetCDF, HDF, and GRIB files is not supported at this" " time. Try 'raster_tools.open_dataset'." ) else: try: xrs = xrio.open_rasterio( path, chunks=to_chunk_dict(_get_chunks()), lock=False ) except rio.errors.RasterioIOError as e: raise RasterIOError( "Could not open given path as a raster." ) from e if isinstance(xrs, xr.Dataset): raise RasterDataError("Too many data variables in input data") assert isinstance( xrs, xr.DataArray ), "Resulting data structure must be a DataArray" if not dask.is_dask_collection(xrs): xrs = chunk(xrs, path) xrs = normalize_xarray_data(xrs) nv = _try_to_get_null_value_xarray(xrs) nv = normalize_null_value(nv, xrs.dtype) xrs = xrs.rio.write_nodata(nv) return xrs
[docs]def write_raster(xrs, path, no_data_value, **rio_gdal_kwargs): ext = _get_extension(path) rio_is_bool = False if ext in TIFF_EXTS or len(ext) == 0: if xrs.dtype == I64: # GDAL, and thus rioxarray and rasterio, doesn't support I64 so # cast up to float. This avoids to_raster throwing a TypeError. xrs = xrs.astype(F64) elif is_bool(xrs.dtype): # GDAL doesn't support boolean dtype either so convert to uint8 # 0-1 encoding. rio_is_bool = True xrs = xrs.astype(U8) if not ext or ext not in WRITE_NOT_IMPLEMENTED_EXTS: kwargs = {"lock": True, "compute": True, **rio_gdal_kwargs} if "blockheight" in kwargs: value = kwargs.pop("blockheight") kwargs["blockysize"] = value if "blockwidth" in kwargs: value = kwargs.pop("blockwidth") kwargs["blockxsize"] = value if rio_is_bool: # Store each entry using a single bit kwargs["nbits"] = 1 xrs.rio.to_raster(path, **kwargs) else: # TODO: populate raise NotImplementedError()
def _get_valid_variables(meta, ignore_too_many_dims): data_vars = list(meta.data_vars) valid = [] for v in data_vars: n = meta[v].squeeze().ndim if n > 3: if ignore_too_many_dims: continue else: raise DimensionsError( f"Too many dimensions for variable {v!r} with " f"{meta[v].ndim}." ) elif n in (2, 3): valid.append(v) else: raise DimensionsError( f"Too few dimensions for variable {v!r} with {n}." ) if not valid: raise ValueError("No valid raster variables found") return valid def _build_raster(path, variable, affine, crs, xarray_kwargs): from raster_tools.raster import data_to_raster if affine is None: affine = Affine(1, 0, 0, 0, -1, 0, 0) kwargs = xarray_kwargs.copy() kwargs["chunks"] = "auto" var = xr.open_dataset(path, **kwargs)[variable].squeeze() x = var[var.rio.x_dim].to_numpy() y = var[var.rio.y_dim].to_numpy() nv = var._FillValue if "_FillValue" in var.attrs else var.rio.nodata raster = data_to_raster(var.data, x=x, y=y, affine=affine, crs=crs, nv=nv) if nv is None or np.isnan(nv): raster = raster.set_null_value(get_default_null_value(raster.dtype)) return raster def _get_affine(ds): try: affine = ds.rio.transform() except TypeError as err: # Some datasets like gridMET improperly encode the transform. raise AffineEncodingError( "Error reading GeoTransform data:" f"{ds.coords[ds.rio.grid_mapping].attrs['GeoTransform']!r}" ) from err return affine
[docs]def open_dataset( path, crs=None, ignore_extra_dim_errors=False, xarray_kwargs=None, ): """Open a netCDF or GRIB dataset. This function opens a netCDF or GRIB dataset file and returns a dictionary of Raster objectds where each raster corrersponds to the variables in the the file. netCDF/GRIB files can be N-dimensional, while rasters only comprehend 2 to 3 dimensions (band, y, x), so it may not be possible to map all variables in a file to a raster. See the `ignore_extra_dim_errors` option below for more information. Parameters ---------- path : str THe path to the netCDF or GRIB dataset file. crs : str, rasterio.crs.CRS, optional A coordinate reference system definition to attach to the dataset. This can be an EPSG, PROJ, or WKT string. It can also be a `rasterio.crs.CRS` object. netCDF/GRIB files do not always encode a CRS. This option allows a CRS to be supplied, if known ahead of time. It can also be used to override the CRS encoded in the file. ignore_extra_dim_errors : bool, optional If ``True``, ignore dataset variables that cannot be mapped to a raster. An error is raised, otherwise. netCDF/GRIB files allow N-dimensional. Rasters only comprehend 2 or 3 dimensional data so it is not always possible to map a variable to a raster. The default is ``False``. xarray_kwargs : dict, optional Keyword arguments to supply to `xarray.open_dataset` when opening the file. Raises ------ raster_tools.io.AffineEncodingError Raised if the affine matrix is improperly encoded. ra Returns ------- dataset : dict of Raster A ``dict`` of Raster objects. The keys are the variable names in the dataset file and the values are the corresponding variable data as a raster. """ if xarray_kwargs is None: xarray_kwargs = {} xarray_kwargs["decode_coords"] = "all" tmp_ds = xr.open_dataset(path, **xarray_kwargs) data_vars = _get_valid_variables(tmp_ds, ignore_extra_dim_errors) crs = crs or tmp_ds.rio.crs affine = _get_affine(tmp_ds) tmp_ds = None ds = {} for v in data_vars: ds[v] = _build_raster(path, v, affine, crs, xarray_kwargs) return ds