So I found this python script on another post here and was trying to adjust it for my needs. I'm a very novice python user so I'm struggling with how to modify the script. I have a Feature Class stored in a feature Dataset that I want to search a field for duplicate values and populate a new field with Y for duplicate or N for none. The below script is what I found that looks like it will work once I find a way to drill down into my file geodatabase.
from arcpy import *
inShapefile = pointsShapefile
checkField = "xyCombine"
updateField = "dplicate"
#List of values found once
occursOnce = []
#list of values found twice
occursTwice = []
cursor = da.SearchCursor (inShapefile, [checkField])
for row in cursor:
#Check value is not null
if row[0]:
#If not already found to occur twice, proceed
if not row[0] in occursTwice:
#If hasn't occured once yet
if not row[0] in occursOnce:
#Add to occurs once list
occursOnce.append (row[0])
#If value has already been found once
else:
#Add to occurs twice list (duplicates)
occursTwice.append (row[0])
del cursor
cursor = da.UpdateCursor (inShapefile, [checkField, updateField])
for row in cursor:
#Check value is not null
if row[0]:
#check if value in occursTwice list (i.e. is duplicate)
if row[0] in occursTwice:
row[1] = "Y"
else:
row[1] = "N"
cursor.updateRow(row)
del cursor
Answer
Something like this should work:
import arcpy
inShapefile = pointsShapefile
checkField = "xyCombine"
updateField = "dplicate"
with arcpy.da.SearchCursor(inShapefile, [checkField]) as rows:
values = [r[0] for r in rows]
d = {}
for item in set(values):
if values.count(item) > 1:
d[item] = 'Y'
else:
d[item] = 'N'
with arcpy.da.UpdateCursor(inShapefile, [checkField, updateField]) as rows:
for row in rows:
if row[0] in d:
row[1] = d[row[0]]
rows.updateRow(row)
And as @mr.adam suggested, the dictionary is not needed. here is the cleaner version:
import arcpy
def findDupes(inShapefile, checkField, updateField):
with arcpy.da.SearchCursor(inShapefile, [checkField]) as rows:
values = [r[0] for r in rows]
with arcpy.da.UpdateCursor(inShapefile, [checkField, updateField]) as rows:
for row in rows:
if values.count(row[0]) > 1:
row[1] = 'Y'
else:
row[1] = 'N'
rows.updateRow(row)
if __name__ == '__main__':
fc = r'C:\TEMP\crm_test.gdb\test'
fld = 'Project_Manager'
up = 'duplicates'
findDupes(fc, fld, up)
No comments:
Post a Comment