Coverage for py_tools_ds/geo/raster/conversion.py: 91%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

82 statements  

1# -*- coding: utf-8 -*- 

2 

3# py_tools_ds - A collection of geospatial data analysis tools that simplify standard 

4# operations when handling geospatial raster and vector data as well as projections. 

5# 

6# Copyright (C) 2016-2021 

7# - Daniel Scheffler (GFZ Potsdam, daniel.scheffler@gfz-potsdam.de) 

8# - Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences Potsdam, 

9# Germany (https://www.gfz-potsdam.de/) 

10# 

11# This software was developed within the context of the GeoMultiSens project funded 

12# by the German Federal Ministry of Education and Research 

13# (project grant code: 01 IS 14 010 A-C). 

14# 

15# Licensed under the Apache License, Version 2.0 (the "License"); 

16# you may not use this file except in compliance with the License. 

17# You may obtain a copy of the License at 

18# 

19# http://www.apache.org/licenses/LICENSE-2.0 

20# 

21# Unless required by applicable law or agreed to in writing, software 

22# distributed under the License is distributed on an "AS IS" BASIS, 

23# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

24# See the License for the specific language governing permissions and 

25# limitations under the License. 

26 

27__author__ = "Daniel Scheffler" 

28 

29import warnings 

30import json 

31from typing import Union 

32 

33from shapely.geometry import shape, Polygon, MultiPolygon, GeometryCollection 

34from osgeo import gdal, osr, ogr # noqa 

35import numpy as np 

36 

37from ...io.raster.gdal import get_GDAL_ds_inmem 

38from ...processing.progress_mon import ProgressBar, Timer 

39from ..raster.reproject import warp_ndarray 

40from ..vector.topology import get_bounds_polygon, polyVertices_outside_poly, get_overlap_polygon 

41 

42 

43def raster2polygon(array: np.ndarray, 

44 gt: Union[list, tuple], 

45 prj: Union[str, int], 

46 DN2extract: int = 1, 

47 exact: bool = True, 

48 maxfeatCount: int = None, 

49 min_npx: int = 1, 

50 timeout: float = None, 

51 progress: bool = True, 

52 q: bool = False 

53 ) -> Union[Polygon, MultiPolygon]: 

54 """Calculate a footprint polygon for the given array. 

55 

56 :param array: 2D numpy array 

57 :param gt: GDAL GeoTransform 

58 :param prj: projection as WKT string, 'EPSG:1234' or <EPSG_int> 

59 :param DN2extract: <int, float> pixel value to create polygons for 

60 :param exact: whether to compute the exact footprint polygon or a simplified one for speed 

61 (exact=False downsamples large input datasets before polygonizing) 

62 :param maxfeatCount: the maximum expected number of polygons. If more polygons are found, every further 

63 processing is cancelled and a RunTimeError is raised. 

64 :param min_npx: minmal polygon area to be included in the result (in numbers of pixels; default: 1) 

65 :param timeout: breaks the process after a given time in seconds 

66 :param progress: show progress bars (default: True) 

67 :param q: quiet mode (default: False) 

68 :return: 

69 """ 

70 # TODO 

71 if maxfeatCount is not None: 

72 warnings.warn("'maxfeatCount' is deprecated and will be removed soon.", DeprecationWarning) # pragma: no cover 

73 

74 if not isinstance(array.dtype, np.integer): 

75 array = array.astype(int) 

76 

77 array = (array == DN2extract).astype(np.uint8) 

78 

79 assert array.ndim == 2, "Only 2D arrays are supported. Got a %sD array." % array.ndim 

80 gt_orig = gt 

81 rows, cols = shape_orig = array.shape 

82 

83 # downsample input array in case it has more than 1e8 pixels to prevent crash 

84 if not exact and array.size > 1e8: # 10000 x 10000 px 

85 # downsample with nearest neighbour 

86 zoom_factor = (8000 * 8000 / array.size) ** 0.5 

87 array, gt, prj = warp_ndarray(array, gt, prj, 

88 out_gsd=(gt[1] / zoom_factor, 

89 gt[5] / zoom_factor), 

90 rspAlg='near', 

91 CPUs=None, # all CPUs 

92 q=True) 

93 

94 # remove raster polygons smaller than min_npx 

95 if array.size > min_npx > 1: 

96 drv = gdal.GetDriverByName('GTiff') 

97 path_tmp = '/vsimem/raster2polygon_gdalsieve.tif' 

98 

99 ds = drv.Create(path_tmp, cols, rows, 1, gdal.GDT_Byte) 

100 band = ds.GetRasterBand(1) 

101 band.WriteArray(array) 

102 

103 callback = gdal.TermProgress_nocb if array.size > 1e8 and progress else None 

104 gdal.SieveFilter(srcBand=band, maskBand=None, dstBand=band, threshold=min_npx, 

105 # connectedness=4 if exact else 8, 

106 connectedness=4, # 4-connectedness is 30% faster 

107 callback=callback) 

108 

109 array = ds.ReadAsArray() 

110 del ds, band 

111 gdal.Unlink(path_tmp) 

112 

113 src_ds = get_GDAL_ds_inmem(array, gt, prj) 

114 src_band = src_ds.GetRasterBand(1) 

115 

116 # Create a GeoJSON OGR datasource to put results in. 

117 srs = osr.SpatialReference() 

118 srs.ImportFromWkt(prj) 

119 

120 drv = ogr.GetDriverByName("GeoJSON") 

121 path_geojson = "/vsimem/polygonize_result.geojson" 

122 dst_ds = drv.CreateDataSource(path_geojson) 

123 dst_layer = dst_ds.CreateLayer('polygonize_result', srs=srs) 

124 dst_layer.CreateField(ogr.FieldDefn("DN", 4)) 

125 

126 # set callback 

127 callback = \ 

128 ProgressBar(prefix='Polygonize progress ', 

129 suffix='Complete', 

130 barLength=50, 

131 timeout=timeout, 

132 use_as_callback=True) \ 

133 if progress and not q else Timer(timeout, use_as_callback=True) if timeout else None 

134 

135 # run the algorithm 

136 status = gdal.Polygonize(src_band, 

137 src_band, # .GetMaskBand(), 

138 dst_layer, 

139 0, 

140 [], # uses 4-connectedness for exact output (["8CONNECTED=8"] is much slower below) 

141 # callback=gdal.TermProgress_nocb) 

142 callback=callback) 

143 

144 del dst_layer, dst_ds 

145 

146 # handle exit status other than 0 (fail) 

147 if status != 0: 

148 errMsg = gdal.GetLastErrorMsg() 

149 

150 # Catch the KeyboardInterrupt raised in case of a timeout within the callback. 

151 # It seems like there is no other way to catch exceptions within callbacks. 

152 if errMsg == 'User terminated': 

153 raise TimeoutError('raster2polygon timed out!') 

154 

155 raise Exception(errMsg) 

156 

157 # read virtual GeoJSON file as text 

158 f = gdal.VSIFOpenL(path_geojson, 'r') 

159 gdal.VSIFSeekL(f, 0, 2) # seek to end 

160 size = gdal.VSIFTellL(f) 

161 gdal.VSIFSeekL(f, 0, 0) # seek to beginning 

162 content_str = gdal.VSIFReadL(1, size, f) 

163 gdal.VSIFCloseL(f) 

164 

165 gdal.Unlink(path_geojson) 

166 

167 # convert JSON string to dict 

168 gjs = json.loads(content_str) 

169 

170 def get_valid_polys(val: Union[Polygon, MultiPolygon, GeometryCollection]): 

171 if isinstance(val, Polygon): 

172 val = val if val.is_valid else val.buffer(0) 

173 if isinstance(val, Polygon): 

174 return val 

175 

176 return [get_valid_polys(g) for g in val.geoms] 

177 

178 # extract polygons from GeoJSON dict 

179 polys = [] 

180 for f in gjs['features']: 

181 if f['properties']['DN'] == str(1): 

182 geom = shape(f["geometry"]) 

183 geom = get_valid_polys(geom) 

184 

185 if isinstance(geom, list): 

186 polys.extend(geom) 

187 else: 

188 polys.append(geom) 

189 

190 # drop polygons with an area below npx 

191 if min_npx: 

192 area_1px = gt[1] * abs(gt[5]) 

193 area_min = min_npx * area_1px 

194 

195 polys = [p for p in polys if p.area >= area_min] 

196 

197 poly = MultiPolygon(polys) 

198 

199 # the downsampling in case exact=False may cause vertices of poly to be outside the input array bounds 

200 # -> clip poly with bounds_poly in that case 

201 if not exact: 

202 bounds_poly = get_bounds_polygon(gt_orig, *shape_orig) 

203 

204 if polyVertices_outside_poly(poly, bounds_poly, 1e-5): 

205 poly = get_overlap_polygon(poly, bounds_poly)['overlap poly'] 

206 

207 return poly