Source code for raster_tools.io

import os
import urllib
from pathlib import Path

import dask
import numpy as np
import rasterio as rio
import rioxarray as xrio
import xarray as xr
from affine import Affine
from dask.array.core import normalize_chunks as dask_chunks

from raster_tools.dtypes import F32, F64, I64, U8, is_bool, is_float, is_int
from raster_tools.exceptions import (
    AffineEncodingError,
    DimensionsError,
    RasterDataError,
    RasterIOError,
)
from raster_tools.utils import to_chunk_dict, validate_path


def _get_extension(path):
    return os.path.splitext(path)[-1].lower()


def _get_chunking_info_from_file(src_file):
    with rio.open(src_file) as src:
        tile_shape = (1, *src.block_shapes[0])
        shape = (src.count, *src.shape)
        dtype = np.dtype(src.dtypes[0])
        return tile_shape, shape, dtype


def _get_chunks(data=None, src_file=None):
    chunks = (1, "auto", "auto")
    if data is None:
        if src_file is None:
            return chunks
        tile_shape, shape, dtype = _get_chunking_info_from_file(src_file)
    else:
        shape = data.shape
        dtype = data.dtype
        tile_shape = None
        if dask.is_dask_collection(data):
            tile_shape = data.chunks
        elif src_file is not None:
            _, tile_shape, _ = _get_chunking_info_from_file(src_file)
    return dask_chunks(chunks, shape, dtype=dtype, previous_chunks=tile_shape)


[docs]def chunk(xrs, src_file=None): chunks = to_chunk_dict( _get_chunks( xrs.raster if isinstance(xrs, xr.Dataset) else xrs, src_file ) ) return xrs.chunk(chunks)
TIFF_EXTS = frozenset((".tif", ".tiff")) NC_EXTS = frozenset((".cdf", ".nc", ".nc4")) HDF_EXTS = frozenset((".hdf", ".h4", ".hdf4", ".he2", ".h5", ".hdf5", ".he5")) GRIB_EXTS = frozenset((".grib", ".grib2", ".grb", ".grb2", ".gb", ".gb2")) BATCH_EXTS = frozenset((".bch",)) # File extenstions that can't be read in yet READ_NOT_IMPLEMENTED_EXTS = NC_EXTS | HDF_EXTS | GRIB_EXTS # File extenstions that can't be written out yet WRITE_NOT_IMPLEMENTED_EXTS = NC_EXTS | HDF_EXTS | GRIB_EXTS IO_UNDERSTOOD_TYPES = (str, Path)
[docs]def is_batch_file(path): return _get_extension(path) in BATCH_EXTS
ESRI_DEFAULT_F32_NV = np.finfo(F32).min
[docs]def normalize_null_value(nv, dtype): # Make sure that ESRI's default F32 null value is properly # registered as F32 if dtype == F32 and nv is not None and np.isclose(nv, ESRI_DEFAULT_F32_NV): nv = F32.type(nv) # Some rasters have (u)int dtype and a null value that is a whole number # but it gets read in as a float. This can cause a lot of accidental type # promotions down the pipeline. Check for this case and correct it. if is_int(dtype) and is_float(nv) and float(nv).is_integer(): nv = int(nv) return nv
[docs]def open_raster_from_path_or_url(path): from raster_tools.raster import ( _try_to_get_null_value_xarray, normalize_xarray_data, ) if type(path) in IO_UNDERSTOOD_TYPES: path = str(path) else: raise RasterIOError( f"Could not resolve input to a raster path or URL: '{path}'" ) if urllib.parse.urlparse(path) == "": # Assume file path validate_path(path) ext = _get_extension(path) else: # URL ext = "" xrs = None # Try to let gdal open anything but NC, HDF, GRIB files if not ext or ext not in READ_NOT_IMPLEMENTED_EXTS: try: xrs = xrio.open_rasterio( path, chunks=to_chunk_dict(_get_chunks()), lock=False ) except rio.errors.RasterioIOError as e: raise RasterIOError( "Could not open given path as a raster." ) from e elif ext in READ_NOT_IMPLEMENTED_EXTS: raise NotImplementedError( "Reading of NetCDF, HDF, and GRIB files is not supported at this" " time." ) else: raise RasterIOError("Unknown file type") if isinstance(xrs, xr.Dataset): raise RasterDataError("Too many data variables in input data") assert isinstance( xrs, xr.DataArray ), "Resulting data structure must be a DataArray" if not dask.is_dask_collection(xrs): xrs = chunk(xrs, path) xrs = normalize_xarray_data(xrs) nv = _try_to_get_null_value_xarray(xrs) nv = normalize_null_value(nv, xrs.dtype) xrs = xrs.rio.write_nodata(nv) return xrs
[docs]def write_raster(xrs, path, no_data_value, **rio_gdal_kwargs): ext = _get_extension(path) rio_is_bool = False if ext in TIFF_EXTS or len(ext) == 0: if xrs.dtype == I64: # GDAL, and thus rioxarray and rasterio, doesn't support I64 so # cast up to float. This avoids to_raster throwing a TypeError. xrs = xrs.astype(F64) elif is_bool(xrs.dtype): # GDAL doesn't support boolean dtype either so convert to uint8 # 0-1 encoding. rio_is_bool = True xrs = xrs.astype(U8) if not ext or ext not in WRITE_NOT_IMPLEMENTED_EXTS: kwargs = {"lock": True, "compute": True, **rio_gdal_kwargs} if "blockheight" in kwargs: value = kwargs.pop("blockheight") kwargs["blockysize"] = value if "blockwidth" in kwargs: value = kwargs.pop("blockwidth") kwargs["blockxsize"] = value if rio_is_bool: # Store each entry using a single bit kwargs["nbits"] = 1 xrs.rio.to_raster(path, **kwargs) else: # TODO: populate raise NotImplementedError()
def _get_valid_variables(meta, ignore_too_many_dims): data_vars = list(meta.data_vars) valid = [] for v in data_vars: n = meta[v].squeeze().ndim if n > 3: if ignore_too_many_dims: continue else: raise DimensionsError( f"Too many dimensions for variable {v!r} with " f"{meta[v].ndim}." ) elif n in (2, 3): valid.append(v) else: raise DimensionsError( f"Too few dimensions for variable {v!r} with {n}." ) if not valid: raise ValueError("No valid raster variables found") return valid def _build_xr_raster(path, variable, affine, crs, xarray_kwargs): if affine is None: affine = Affine(1, 0, 0, 0, -1, 0, 0) kwargs = xarray_kwargs.copy() kwargs["chunks"] = "auto" var = xr.open_dataset(path, **kwargs)[variable].squeeze() var_data = var.data if var.ndim == 2: var_data = np.expand_dims(var_data, axis=0) var_data = var_data.rechunk((1, "auto", "auto")) band = np.array(list(range(var_data.shape[0]))) x = var[var.rio.x_dim].to_numpy() y = var[var.rio.y_dim].to_numpy() new_var = xr.DataArray( var_data, dims=["band", "y", "x"], coords=(band, y, x) ) new_var = new_var.rio.write_transform(affine) if crs is not None: new_var = new_var.rio.write_crs(crs) nv = var._FillValue if "_FillValue" in var.attrs else var.rio.nodata if nv is not None: new_var = new_var.rio.write_nodata(nv) return new_var def _get_affine(ds): try: affine = ds.rio.transform() except TypeError as err: # Some datasets like gridMET improperly encode the transform. raise AffineEncodingError( "Error reading GeoTransform data:" f"{ds.coords[ds.rio.grid_mapping].attrs['GeoTransform']!r}" ) from err return affine
[docs]def open_dataset( path, crs=None, ignore_extra_dim_errors=False, xarray_kwargs=None, ): """Open a netCDF or GRIB dataset. This function opens a netCDF or GRIB dataset file and returns a dictionary of Raster objectds where each raster corrersponds to the variables in the the file. netCDF/GRIB files can be N-dimensional, while rasters only comprehend 2 to 3 dimensions (band, y, x), so it may not be possible to map all variables in a file to a raster. See the `ignore_extra_dim_errors` option below for more information. Parameters ---------- path : str THe path to the netCDF or GRIB dataset file. crs : str, rasterio.crs.CRS, optional A coordinate reference system definition to attach to the dataset. This can be an EPSG, PROJ, or WKT string. It can also be a `rasterio.crs.CRS` object. netCDF/GRIB files do not always encode a CRS. This option allows a CRS to be supplied, if known ahead of time. It can also be used to override the CRS encoded in the file. ignore_extra_dim_errors : bool, optional If ``True``, ignore dataset variables that cannot be mapped to a raster. An error is raised, otherwise. netCDF/GRIB files allow N-dimensional. Rasters only comprehend 2 or 3 dimensional data so it is not always possible to map a variable to a raster. The default is ``False``. xarray_kwargs : dict, optional Keyword arguments to supply to `xarray.open_dataset` when opening the file. Raises ------ raster_tools.io.AffineEncodingError Raised if the affine matrix is improperly encoded. ra Returns ------- dataset : dict of Raster A ``dict`` of Raster objects. The keys are the variable names in the dataset file and the values are the corresponding variable data as a raster. """ from raster_tools.raster import Raster if xarray_kwargs is None: xarray_kwargs = {} xarray_kwargs["decode_coords"] = "all" tmp_ds = xr.open_dataset(path, **xarray_kwargs) data_vars = _get_valid_variables(tmp_ds, ignore_extra_dim_errors) crs = crs or tmp_ds.rio.crs affine = _get_affine(tmp_ds) tmp_ds = None ds = {} for v in data_vars: var = _build_xr_raster(path, v, affine, crs, xarray_kwargs) ds[v] = Raster(var) return ds