Thursday 13 February 2020

arcgis 10.1 - Zonal statistics for millions of overlapping polygons in ArcPy?


I am running ArcGIS 10.1 with 64-bit GP on Windows 8.1 x64.


I want to calculate the zonal statistics of buffer polygons based on parcels and a kernel density raster. The 4.7 million polygons overlap and are stored in a single feature class, making the problem particularly difficult. I've tried to use the supplemental Spatial Analyst Tools developed by ESRI in various ways, but the script isn't fast enough. I've tried to compute the zonal statistics piece by piece, using small census areas (with several hundred buffer polygons each) and large ones (thousands of buffer polygons). None of these implementations perform adequately. I've included the following features in a Python script:



  • loading buffer polygons into the in_memory workspace before loading into the ZS tool

  • creating feature layers where needed

  • writing the results to a single text file



The code for performing ZS one polygon at a time is posted below, however the tool only completes about one parcel every minute. I'm wondering if anyone has any insight on how to improve zonal statistics performance or the script in general.


import arcpy, os, string, csv
from arcpy import env

# arcpy.ImportToolbox("D:\ClusterAnalysis_Deployment\SpatialAnalystSupplementalTools\Spatial Analyst Supplemental Tools.pyt")
arcpy.CheckOutExtension("Spatial")

# LOCATIONS #
# Local machine paths

gdb = r"D:\path\to\geodatabase.gdb"
results = r"\path\to\results"

# Virtual machine paths

# INPUT DATA #
b = gdb + "\\" + "ParcelBuffers"
kd = gdb + "\\" + "kdensity750"

env.workspace = gdb

env.overwriteOutput = True

# TEMPORARY DATA #
buffers = arcpy.MakeFeatureLayer_management(b, "bdata")
kdensity = arcpy.Raster(kd)

parcelList = []

cursor = arcpy.da.SearchCursor(buffers, ("PIN"))
for row in cursor:

parcelList.append(row[0])
del cursor

countDict = {}
countDict["Count:"] = 0

print "Script setup... done"

# GEOPROCESSING #
for PIN in parcelList:

parcel_ram = "in_memory" + "\\" + "parcel"
zs_table = results + "\\" + "zs_table_" + str(PIN) + ".dbf"
solution = results + "\\" + "ZS_solutions.txt"

arcpy.SelectLayerByAttribute_management(buffers, "NEW_SELECTION", "\"PIN\" = \'" + str(PIN) + "\'")
count = int(arcpy.GetCount_management(buffers).getOutput(0))
arcpy.CopyFeatures_management(buffers, parcel_ram)
arcpy.SelectLayerByAttribute_management(buffers, "CLEAR_SELECTION")
arcpy.gp.ZonalStatisticsAsTable_sa(parcel_ram, "PIN", kdensity, zs_table, "DATA", "ALL")


table = arcpy.gp.MakeTableView_management(zs_table, "zs_table_lyr")

fields = arcpy.ListFields(table)
field_names = map(lambda n:n.name, fields)
header = string.join(field_names, "\t")

with open(solution, 'w') as x:
x.write(header + "\n")
cursor = arcpy.SearchCursor(table)
for row in cursor:

values = string.join(map(lambda n: str(row.getValue(n)), field_names), "\t")
x.write(values + "\n")
del row, cursor

countDict["Count:"] = countDict["Count:"] + count
print "Zonal statistics computed and saved for parcel: " + str(countDict["Count:"])

arcpy.Delete_management(parcel_ram)
arcpy.Delete_management(zs_table)
arcpy.Delete_management(table)

del count

After some further testing of Felix's script I think the way my data sets are set up is wrong, leading to the spatial join feature query to pull up polygons that do overlap. Here are the data sets and their relevant attributes:



  • fishnet feature (ID'd by long integer "FnID")

  • minBound feature (ID'd by long integer "FnID")

  • parentLR (Join FID is FnID and Target FID is parID, where parID is long integer field created for buffer polygons)


The script:


##  Zonal statistics on a set of overlapping

import arcpy, traceback, os, sys, time
from arcpy import env

arcpy.CheckOutExtension("Spatial")

env.workspace = "C:\Geoprocessing\ClusterAnalysis"
env.overwriteOutput = True

# Field name of fishnet ID
parID="FnID"

parID2="FnID_1"

# Location of input map document containing the minBound (minimum bounding geometry)
# layer and parentLR (spatial join of fishnet to polygons) layer

mapDocument = env.workspace + "\\" + "ClusterAnalysis.mxd"
raster = env.workspace + "\\" + "ClusterAnalysis.gdb\intKD750"
rast = env.workspace + "\\" + "ClusterAnalysis.gdb\parcelRaster"
kdensity = arcpy.Raster(raster)


dbf="stat.dbf" # output zonal statistics table
joinLR="SD.shp" # intermediate data set for determining non-overlapping polygons
subset="subset"

##try:
def showPyMessage():
arcpy.AddMessage(str(time.ctime()) + " - " + message)
def Get_V(aKey):
try:
return smallDict[aKey]

except:
return (-1)
def pgonsCount(aLayer):
result=arcpy.GetCount_management(aLayer)
return int(result.getOutput(0))

# Define variables based on layers in ArcMap document
mxd = arcpy.mapping.MapDocument(mapDocument)
layers = arcpy.mapping.ListLayers(mxd)
minBound,parentLR=layers[0],layers[1]


# Query parentLR polygons based on fishnet grid of minBound polygon
with arcpy.da.SearchCursor(minBound, ("SHAPE@","FnID")) as clipper:
for rcrd in clipper:
feat=rcrd[0]
env.extent=feat.extent
fp='"FnID"='+str(rcrd[1])
parentLR.definitionQuery=fp
nF=pgonsCount(parentLR)
arcpy.AddMessage("Processing subset %i containing %i polygons" %(rcrd[1],nF))

arcpy.AddMessage("Defining neighbours...")
arcpy.SpatialJoin_analysis(parentLR, parentLR, joinLR, "JOIN_ONE_TO_MANY")
arcpy.AddMessage("Creating empty dictionary")

dictFeatures = {}
with arcpy.da.SearchCursor(parentLR, parID) as cursor:
for row in cursor:
dictFeatures[row[0]]=()
del row, cursor


arcpy.AddMessage("Assigning neighbours...")
nF=pgonsCount(joinLR)
arcpy.SetProgressor("step", "", 0, nF,1)
with arcpy.da.SearchCursor(joinLR, (parID,parID2)) as cursor:
for row in cursor:
aKey=row[0]
aList=dictFeatures[aKey]
aList+=(row[1],)
dictFeatures[aKey]=aList
arcpy.SetProgressorPosition()

del row, cursor
arcpy.AddMessage("Defining non-overlapping subsets...")
runNo=0
while (True):
parentLR.definitionQuery=fp
toShow,toHide=(),()
nF=len(dictFeatures)
arcpy.SetProgressor("step", "", 0, nF,1)
for item in dictFeatures:
if item not in toShow and item not in toHide:

toShow+=(item,)
toHide+=(dictFeatures[item])
arcpy.SetProgressorPosition()
m=len(toShow)
quer='"parID" IN '+str(toShow)+ " AND "+fp
if m==1:
quer='"parID" = '+str(toShow[0])+ " AND "+fp
parentLR.definitionQuery=quer
runNo+=1
arcpy.AddMessage("Run %i, %i polygon(s) found" % (runNo,m))

arcpy.AddMessage("Running Statistics...")
arcpy.gp.ZonalStatisticsAsTable_sa(parentLR, "PIN", kdensity, dbf,"DATA", "MEAN")
arcpy.AddMessage("Data transfer...")
smallDict={}
with arcpy.da.SearchCursor(dbf, (parID,"MEAN")) as cursor:
for row in cursor:
smallDict[row[0]]=row[1]
del row, cursor
with arcpy.da.UpdateCursor(parentLR, (parID,"MEAN")) as cursor:
for row in cursor:

aKey=row[0]
row[1]=Get_V(aKey)
cursor.updateRow(row)
del row, cursor
for item in toShow:
del dictFeatures[item]
m=len(dictFeatures)
if m==0:
break
parentLR.definitionQuery=fp

del smallDict, dictFeatures
parentLR.definitionQuery=''

Produces the zonal statistics error: http://resources.arcgis.com/en/help/main/10.1/index.html#//00vq0000000s010423




No comments:

Post a Comment

arcpy - Changing output name when exporting data driven pages to JPG?

Is there a way to save the output JPG, changing the output file name to the page name, instead of page number? I mean changing the script fo...