I am running ArcGIS 10.1 with 64-bit GP on Windows 8.1 x64.
I want to calculate the zonal statistics of buffer polygons based on parcels and a kernel density raster. The 4.7 million polygons overlap and are stored in a single feature class, making the problem particularly difficult. I've tried to use the supplemental Spatial Analyst Tools developed by ESRI in various ways, but the script isn't fast enough. I've tried to compute the zonal statistics piece by piece, using small census areas (with several hundred buffer polygons each) and large ones (thousands of buffer polygons). None of these implementations perform adequately. I've included the following features in a Python script:
- loading buffer polygons into the in_memory workspace before loading into the ZS tool
- creating feature layers where needed
- writing the results to a single text file
The code for performing ZS one polygon at a time is posted below, however the tool only completes about one parcel every minute. I'm wondering if anyone has any insight on how to improve zonal statistics performance or the script in general.
import arcpy, os, string, csv
from arcpy import env
# arcpy.ImportToolbox("D:\ClusterAnalysis_Deployment\SpatialAnalystSupplementalTools\Spatial Analyst Supplemental Tools.pyt")
arcpy.CheckOutExtension("Spatial")
# LOCATIONS #
# Local machine paths
gdb = r"D:\path\to\geodatabase.gdb"
results = r"\path\to\results"
# Virtual machine paths
# INPUT DATA #
b = gdb + "\\" + "ParcelBuffers"
kd = gdb + "\\" + "kdensity750"
env.workspace = gdb
env.overwriteOutput = True
# TEMPORARY DATA #
buffers = arcpy.MakeFeatureLayer_management(b, "bdata")
kdensity = arcpy.Raster(kd)
parcelList = []
cursor = arcpy.da.SearchCursor(buffers, ("PIN"))
for row in cursor:
parcelList.append(row[0])
del cursor
countDict = {}
countDict["Count:"] = 0
print "Script setup... done"
# GEOPROCESSING #
for PIN in parcelList:
parcel_ram = "in_memory" + "\\" + "parcel"
zs_table = results + "\\" + "zs_table_" + str(PIN) + ".dbf"
solution = results + "\\" + "ZS_solutions.txt"
arcpy.SelectLayerByAttribute_management(buffers, "NEW_SELECTION", "\"PIN\" = \'" + str(PIN) + "\'")
count = int(arcpy.GetCount_management(buffers).getOutput(0))
arcpy.CopyFeatures_management(buffers, parcel_ram)
arcpy.SelectLayerByAttribute_management(buffers, "CLEAR_SELECTION")
arcpy.gp.ZonalStatisticsAsTable_sa(parcel_ram, "PIN", kdensity, zs_table, "DATA", "ALL")
table = arcpy.gp.MakeTableView_management(zs_table, "zs_table_lyr")
fields = arcpy.ListFields(table)
field_names = map(lambda n:n.name, fields)
header = string.join(field_names, "\t")
with open(solution, 'w') as x:
x.write(header + "\n")
cursor = arcpy.SearchCursor(table)
for row in cursor:
values = string.join(map(lambda n: str(row.getValue(n)), field_names), "\t")
x.write(values + "\n")
del row, cursor
countDict["Count:"] = countDict["Count:"] + count
print "Zonal statistics computed and saved for parcel: " + str(countDict["Count:"])
arcpy.Delete_management(parcel_ram)
arcpy.Delete_management(zs_table)
arcpy.Delete_management(table)
del count
After some further testing of Felix's script I think the way my data sets are set up is wrong, leading to the spatial join feature query to pull up polygons that do overlap. Here are the data sets and their relevant attributes:
- fishnet feature (ID'd by long integer "FnID")
- minBound feature (ID'd by long integer "FnID")
- parentLR (Join FID is FnID and Target FID is parID, where parID is long integer field created for buffer polygons)
The script:
## Zonal statistics on a set of overlapping
import arcpy, traceback, os, sys, time
from arcpy import env
arcpy.CheckOutExtension("Spatial")
env.workspace = "C:\Geoprocessing\ClusterAnalysis"
env.overwriteOutput = True
# Field name of fishnet ID
parID="FnID"
parID2="FnID_1"
# Location of input map document containing the minBound (minimum bounding geometry)
# layer and parentLR (spatial join of fishnet to polygons) layer
mapDocument = env.workspace + "\\" + "ClusterAnalysis.mxd"
raster = env.workspace + "\\" + "ClusterAnalysis.gdb\intKD750"
rast = env.workspace + "\\" + "ClusterAnalysis.gdb\parcelRaster"
kdensity = arcpy.Raster(raster)
dbf="stat.dbf" # output zonal statistics table
joinLR="SD.shp" # intermediate data set for determining non-overlapping polygons
subset="subset"
##try:
def showPyMessage():
arcpy.AddMessage(str(time.ctime()) + " - " + message)
def Get_V(aKey):
try:
return smallDict[aKey]
except:
return (-1)
def pgonsCount(aLayer):
result=arcpy.GetCount_management(aLayer)
return int(result.getOutput(0))
# Define variables based on layers in ArcMap document
mxd = arcpy.mapping.MapDocument(mapDocument)
layers = arcpy.mapping.ListLayers(mxd)
minBound,parentLR=layers[0],layers[1]
# Query parentLR polygons based on fishnet grid of minBound polygon
with arcpy.da.SearchCursor(minBound, ("SHAPE@","FnID")) as clipper:
for rcrd in clipper:
feat=rcrd[0]
env.extent=feat.extent
fp='"FnID"='+str(rcrd[1])
parentLR.definitionQuery=fp
nF=pgonsCount(parentLR)
arcpy.AddMessage("Processing subset %i containing %i polygons" %(rcrd[1],nF))
arcpy.AddMessage("Defining neighbours...")
arcpy.SpatialJoin_analysis(parentLR, parentLR, joinLR, "JOIN_ONE_TO_MANY")
arcpy.AddMessage("Creating empty dictionary")
dictFeatures = {}
with arcpy.da.SearchCursor(parentLR, parID) as cursor:
for row in cursor:
dictFeatures[row[0]]=()
del row, cursor
arcpy.AddMessage("Assigning neighbours...")
nF=pgonsCount(joinLR)
arcpy.SetProgressor("step", "", 0, nF,1)
with arcpy.da.SearchCursor(joinLR, (parID,parID2)) as cursor:
for row in cursor:
aKey=row[0]
aList=dictFeatures[aKey]
aList+=(row[1],)
dictFeatures[aKey]=aList
arcpy.SetProgressorPosition()
del row, cursor
arcpy.AddMessage("Defining non-overlapping subsets...")
runNo=0
while (True):
parentLR.definitionQuery=fp
toShow,toHide=(),()
nF=len(dictFeatures)
arcpy.SetProgressor("step", "", 0, nF,1)
for item in dictFeatures:
if item not in toShow and item not in toHide:
toShow+=(item,)
toHide+=(dictFeatures[item])
arcpy.SetProgressorPosition()
m=len(toShow)
quer='"parID" IN '+str(toShow)+ " AND "+fp
if m==1:
quer='"parID" = '+str(toShow[0])+ " AND "+fp
parentLR.definitionQuery=quer
runNo+=1
arcpy.AddMessage("Run %i, %i polygon(s) found" % (runNo,m))
arcpy.AddMessage("Running Statistics...")
arcpy.gp.ZonalStatisticsAsTable_sa(parentLR, "PIN", kdensity, dbf,"DATA", "MEAN")
arcpy.AddMessage("Data transfer...")
smallDict={}
with arcpy.da.SearchCursor(dbf, (parID,"MEAN")) as cursor:
for row in cursor:
smallDict[row[0]]=row[1]
del row, cursor
with arcpy.da.UpdateCursor(parentLR, (parID,"MEAN")) as cursor:
for row in cursor:
aKey=row[0]
row[1]=Get_V(aKey)
cursor.updateRow(row)
del row, cursor
for item in toShow:
del dictFeatures[item]
m=len(dictFeatures)
if m==0:
break
parentLR.definitionQuery=fp
del smallDict, dictFeatures
parentLR.definitionQuery=''
Produces the zonal statistics error: http://resources.arcgis.com/en/help/main/10.1/index.html#//00vq0000000s010423