Saturday, 15 April 2017

Issue Trying to create Zonal Statistics using Gdal and Python.


I am trying to create zonal statistics using Python and Gdal. I have a polygon shapefile and a raster file, and in order to do so, I am using a piece of code I found in StackExchange.


The raster and shapefile used can be found here


Here is the code I am using:


#!/usr/bin/python
#coding: utf-8

## Code : http://gis.stackexchange.com/questions/21888/how-to-overlay-shapefile-and-raster

## User: ustroetz

# Calculates statistics (mean) on values of a raster within the zones of an polygon shapefile

import gdal, ogr, osr, numpy

def zonal_stats(input_value_raster, input_zone_polygon):




# Open data
raster = gdal.Open(input_value_raster)
driver = ogr.GetDriverByName('ESRI Shapefile')
shp = driver.Open(input_zone_polygon)
lyr = shp.GetLayer()

# get raster georeference info
transform = raster.GetGeoTransform()
xOrigin = transform[0]
yOrigin = transform[3]

pixelWidth = transform[1]
pixelHeight = transform[5]

# reproject geometry to same projection as raster
sourceSR = lyr.GetSpatialRef()
targetSR = osr.SpatialReference()
targetSR.ImportFromWkt(raster.GetProjectionRef())
coordTrans = osr.CoordinateTransformation(sourceSR,targetSR)
feat = lyr.GetNextFeature()
geom = feat.GetGeometryRef()

geom.Transform(coordTrans)

# Get extent of geometry
ring = geom.GetGeometryRef(0)
numpoints = ring.GetPointCount()
pointsX = []; pointsY = []
for p in range(numpoints):
lon, lat, z = ring.GetPoint(p)
pointsX.append(lon)
pointsY.append(lat)

xmin = min(pointsX)
xmax = max(pointsX)
ymin = min(pointsY)
ymax = max(pointsY)

# Specify offset and rows and columns to read
xoff = int((xmin - xOrigin)/pixelWidth)
yoff = int((yOrigin - ymax)/pixelWidth)
xcount = int((xmax - xmin)/pixelWidth)+1
ycount = int((ymax - ymin)/pixelWidth)+1



# create memory target raster
target_ds = gdal.GetDriverByName('MEM').Create('', xcount, ycount, gdal.GDT_Byte)
target_ds.SetGeoTransform((
xmin, pixelWidth, 0,
ymax, 0, pixelHeight,
))

# create for target raster the same projection as for the value raster

raster_srs = osr.SpatialReference()
raster_srs.ImportFromWkt(raster.GetProjectionRef())
target_ds.SetProjection(raster_srs.ExportToWkt())

# rasterize zone polygon to raster
gdal.RasterizeLayer(target_ds, [1], lyr, burn_values=[1])

# read raster as arrays
banddataraster = raster.GetRasterBand(1)
dataraster = banddataraster.ReadAsArray(xoff, yoff, xcount, ycount).astype(numpy.float)


bandmask = target_ds.GetRasterBand(1)
datamask = bandmask.ReadAsArray(0, 0, xcount, ycount).astype(numpy.float)

# mask zone of raster
zoneraster = numpy.ma.masked_array(dataraster, numpy.logical_not(datamask))

# calculate mean of zonal raster
return numpy.mean(zoneraster)


However, when I implement it, the output (numpy.mean(zoneraster)) is just a number and not an array or a raster. What am I missing? Is there something wrong with the files imput? The code?



Answer



If you want to get zonal statistics for several features in one shapefile, you have to loop over the zonal_stats function. You can write the results of the loop for example to a dictionary. Below is the modified zonal_stats function together with a loop, looping over the input shapefile. As an output you get a Dictionary containing for each Feature ID the mean of the covered raster.


For your sample dataset the dictionary for the first five features looks like this


{0: 114.57909872798288, 1: 21.889622561136882, 2: 287.79623686237102, 3: 35.350804240486838, 4: 19.63043032511781}

The two functions:


import gdal, ogr, osr, numpy



def zonal_stats(feat, input_zone_polygon, input_value_raster):

# Open data
raster = gdal.Open(input_value_raster)
shp = ogr.Open(input_zone_polygon)
lyr = shp.GetLayer()

# Get raster georeference info
transform = raster.GetGeoTransform()
xOrigin = transform[0]

yOrigin = transform[3]
pixelWidth = transform[1]
pixelHeight = transform[5]

# Get extent of feat
geom = feat.GetGeometryRef()
if (geom.GetGeometryName() == 'MULTIPOLYGON'):
count = 0
pointsX = []; pointsY = []
for polygon in geom:

geomInner = geom.GetGeometryRef(count)
ring = geomInner.GetGeometryRef(0)
numpoints = ring.GetPointCount()
for p in range(numpoints):
lon, lat, z = ring.GetPoint(p)
pointsX.append(lon)
pointsY.append(lat)
count += 1
elif (geom.GetGeometryName() == 'POLYGON'):
ring = geom.GetGeometryRef(0)

numpoints = ring.GetPointCount()
pointsX = []; pointsY = []
for p in range(numpoints):
lon, lat, z = ring.GetPoint(p)
pointsX.append(lon)
pointsY.append(lat)

else:
sys.exit()


xmin = min(pointsX)
xmax = max(pointsX)
ymin = min(pointsY)
ymax = max(pointsY)

# Specify offset and rows and columns to read
xoff = int((xmin - xOrigin)/pixelWidth)
yoff = int((yOrigin - ymax)/pixelWidth)
xcount = int((xmax - xmin)/pixelWidth)+1
ycount = int((ymax - ymin)/pixelWidth)+1


# Create memory target raster
target_ds = gdal.GetDriverByName('MEM').Create('', xcount, ycount, gdal.GDT_Byte)
target_ds.SetGeoTransform((
xmin, pixelWidth, 0,
ymax, 0, pixelHeight,
))

# Create for target raster the same projection as for the value raster
raster_srs = osr.SpatialReference()

raster_srs.ImportFromWkt(raster.GetProjectionRef())
target_ds.SetProjection(raster_srs.ExportToWkt())

# Rasterize zone polygon to raster
gdal.RasterizeLayer(target_ds, [1], lyr, burn_values=[1])

# Read raster as arrays
banddataraster = raster.GetRasterBand(1)
dataraster = banddataraster.ReadAsArray(xoff, yoff, xcount, ycount).astype(numpy.float)


bandmask = target_ds.GetRasterBand(1)
datamask = bandmask.ReadAsArray(0, 0, xcount, ycount).astype(numpy.float)

# Mask zone of raster
zoneraster = numpy.ma.masked_array(dataraster, numpy.logical_not(datamask))

# Calculate statistics of zonal raster
return numpy.mean(zoneraster)



def loop_zonal_stats(input_zone_polygon, input_value_raster):

shp = ogr.Open(input_zone_polygon)
lyr = shp.GetLayer()
featList = range(lyr.GetFeatureCount())
statDict = {}

for FID in featList:
feat = lyr.GetFeature(FID)
meanValue = zonal_stats(feat, input_zone_polygon, input_value_raster)

statDict[FID] = meanValue
return statDict



# Raster dataset
input_value_raster = 'popc_0ADProj.tif'
# Vector dataset(zones)
input_zone_polygon = 'borders_tribes.shp'


print loop_zonal_stats(input_zone_polygon, input_value_raster)

No comments:

Post a Comment

arcpy - Changing output name when exporting data driven pages to JPG?

Is there a way to save the output JPG, changing the output file name to the page name, instead of page number? I mean changing the script fo...