Coverage for py_tools_ds/geo/raster/conversion.py: 91%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
3# py_tools_ds - A collection of geospatial data analysis tools that simplify standard
4# operations when handling geospatial raster and vector data as well as projections.
5#
6# Copyright (C) 2016-2021
7# - Daniel Scheffler (GFZ Potsdam, daniel.scheffler@gfz-potsdam.de)
8# - Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences Potsdam,
9# Germany (https://www.gfz-potsdam.de/)
10#
11# This software was developed within the context of the GeoMultiSens project funded
12# by the German Federal Ministry of Education and Research
13# (project grant code: 01 IS 14 010 A-C).
14#
15# Licensed under the Apache License, Version 2.0 (the "License");
16# you may not use this file except in compliance with the License.
17# You may obtain a copy of the License at
18#
19# http://www.apache.org/licenses/LICENSE-2.0
20#
21# Unless required by applicable law or agreed to in writing, software
22# distributed under the License is distributed on an "AS IS" BASIS,
23# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24# See the License for the specific language governing permissions and
25# limitations under the License.
27__author__ = "Daniel Scheffler"
29import warnings
30import json
31from typing import Union
33from shapely.geometry import shape, Polygon, MultiPolygon, GeometryCollection
34from osgeo import gdal, osr, ogr # noqa
35import numpy as np
37from ...io.raster.gdal import get_GDAL_ds_inmem
38from ...processing.progress_mon import ProgressBar, Timer
39from ..raster.reproject import warp_ndarray
40from ..vector.topology import get_bounds_polygon, polyVertices_outside_poly, get_overlap_polygon
43def raster2polygon(array: np.ndarray,
44 gt: Union[list, tuple],
45 prj: Union[str, int],
46 DN2extract: int = 1,
47 exact: bool = True,
48 maxfeatCount: int = None,
49 min_npx: int = 1,
50 timeout: float = None,
51 progress: bool = True,
52 q: bool = False
53 ) -> Union[Polygon, MultiPolygon]:
54 """Calculate a footprint polygon for the given array.
56 :param array: 2D numpy array
57 :param gt: GDAL GeoTransform
58 :param prj: projection as WKT string, 'EPSG:1234' or <EPSG_int>
59 :param DN2extract: <int, float> pixel value to create polygons for
60 :param exact: whether to compute the exact footprint polygon or a simplified one for speed
61 (exact=False downsamples large input datasets before polygonizing)
62 :param maxfeatCount: the maximum expected number of polygons. If more polygons are found, every further
63 processing is cancelled and a RunTimeError is raised.
64 :param min_npx: minmal polygon area to be included in the result (in numbers of pixels; default: 1)
65 :param timeout: breaks the process after a given time in seconds
66 :param progress: show progress bars (default: True)
67 :param q: quiet mode (default: False)
68 :return:
69 """
70 # TODO
71 if maxfeatCount is not None:
72 warnings.warn("'maxfeatCount' is deprecated and will be removed soon.", DeprecationWarning) # pragma: no cover
74 if not isinstance(array.dtype, np.integer):
75 array = array.astype(int)
77 array = (array == DN2extract).astype(np.uint8)
79 assert array.ndim == 2, "Only 2D arrays are supported. Got a %sD array." % array.ndim
80 gt_orig = gt
81 rows, cols = shape_orig = array.shape
83 # downsample input array in case it has more than 1e8 pixels to prevent crash
84 if not exact and array.size > 1e8: # 10000 x 10000 px
85 # downsample with nearest neighbour
86 zoom_factor = (8000 * 8000 / array.size) ** 0.5
87 array, gt, prj = warp_ndarray(array, gt, prj,
88 out_gsd=(gt[1] / zoom_factor,
89 gt[5] / zoom_factor),
90 rspAlg='near',
91 CPUs=None, # all CPUs
92 q=True)
94 # remove raster polygons smaller than min_npx
95 if array.size > min_npx > 1:
96 drv = gdal.GetDriverByName('GTiff')
97 path_tmp = '/vsimem/raster2polygon_gdalsieve.tif'
99 ds = drv.Create(path_tmp, cols, rows, 1, gdal.GDT_Byte)
100 band = ds.GetRasterBand(1)
101 band.WriteArray(array)
103 callback = gdal.TermProgress_nocb if array.size > 1e8 and progress else None
104 gdal.SieveFilter(srcBand=band, maskBand=None, dstBand=band, threshold=min_npx,
105 # connectedness=4 if exact else 8,
106 connectedness=4, # 4-connectedness is 30% faster
107 callback=callback)
109 array = ds.ReadAsArray()
110 del ds, band
111 gdal.Unlink(path_tmp)
113 src_ds = get_GDAL_ds_inmem(array, gt, prj)
114 src_band = src_ds.GetRasterBand(1)
116 # Create a GeoJSON OGR datasource to put results in.
117 srs = osr.SpatialReference()
118 srs.ImportFromWkt(prj)
120 drv = ogr.GetDriverByName("GeoJSON")
121 path_geojson = "/vsimem/polygonize_result.geojson"
122 dst_ds = drv.CreateDataSource(path_geojson)
123 dst_layer = dst_ds.CreateLayer('polygonize_result', srs=srs)
124 dst_layer.CreateField(ogr.FieldDefn("DN", 4))
126 # set callback
127 callback = \
128 ProgressBar(prefix='Polygonize progress ',
129 suffix='Complete',
130 barLength=50,
131 timeout=timeout,
132 use_as_callback=True) \
133 if progress and not q else Timer(timeout, use_as_callback=True) if timeout else None
135 # run the algorithm
136 status = gdal.Polygonize(src_band,
137 src_band, # .GetMaskBand(),
138 dst_layer,
139 0,
140 [], # uses 4-connectedness for exact output (["8CONNECTED=8"] is much slower below)
141 # callback=gdal.TermProgress_nocb)
142 callback=callback)
144 del dst_layer, dst_ds
146 # handle exit status other than 0 (fail)
147 if status != 0:
148 errMsg = gdal.GetLastErrorMsg()
150 # Catch the KeyboardInterrupt raised in case of a timeout within the callback.
151 # It seems like there is no other way to catch exceptions within callbacks.
152 if errMsg == 'User terminated':
153 raise TimeoutError('raster2polygon timed out!')
155 raise Exception(errMsg)
157 # read virtual GeoJSON file as text
158 f = gdal.VSIFOpenL(path_geojson, 'r')
159 gdal.VSIFSeekL(f, 0, 2) # seek to end
160 size = gdal.VSIFTellL(f)
161 gdal.VSIFSeekL(f, 0, 0) # seek to beginning
162 content_str = gdal.VSIFReadL(1, size, f)
163 gdal.VSIFCloseL(f)
165 gdal.Unlink(path_geojson)
167 # convert JSON string to dict
168 gjs = json.loads(content_str)
170 def get_valid_polys(val: Union[Polygon, MultiPolygon, GeometryCollection]):
171 if isinstance(val, Polygon):
172 val = val if val.is_valid else val.buffer(0)
173 if isinstance(val, Polygon):
174 return val
176 return [get_valid_polys(g) for g in val.geoms]
178 # extract polygons from GeoJSON dict
179 polys = []
180 for f in gjs['features']:
181 if f['properties']['DN'] == str(1):
182 geom = shape(f["geometry"])
183 geom = get_valid_polys(geom)
185 if isinstance(geom, list):
186 polys.extend(geom)
187 else:
188 polys.append(geom)
190 # drop polygons with an area below npx
191 if min_npx:
192 area_1px = gt[1] * abs(gt[5])
193 area_min = min_npx * area_1px
195 polys = [p for p in polys if p.area >= area_min]
197 poly = MultiPolygon(polys)
199 # the downsampling in case exact=False may cause vertices of poly to be outside the input array bounds
200 # -> clip poly with bounds_poly in that case
201 if not exact:
202 bounds_poly = get_bounds_polygon(gt_orig, *shape_orig)
204 if polyVertices_outside_poly(poly, bounds_poly, 1e-5):
205 poly = get_overlap_polygon(poly, bounds_poly)['overlap poly']
207 return poly