Wednesday 11 September 2019

Iterate over ImageCollection returning pandas dataframe using earth engine & python


I'm trying to extract values from an ImageCollection to a series of points. My goal is to create a pandas dataframe that includes the values from each image & band of the ImageCollection for each point.


I'm using google earth engine & python in a docker, run on my local machine (using the set-up described here): https://developers.google.com/earth-engine/python_install-datalab-local



My approach is to:




  1. Create FeatureClass with a few points




  2. Import ImageCollection, bound by the above points




  3. Create an initial pandas dataframe





  4. Create function (extract_point_values) where an image and pandas dataframe are the inputs. Values of image are extracted over FeatureClass/points, and values are put into a pandas dataframe. This pandas dataframe is appended to the one that was inputted into the function.




The issue is in the "extract_point_values" function and how I'm iterating over the imageCollection. Is it possible to return a pandas dataframe using the iterate function on an imageCollection? If so, any ideas as to what might be going wrong?


%%bash
apt-get update -y

apt-get install -y -q python-dev python-pip libxml2-dev libxslt1-dev zlib1g-

dev libffi-dev libssl-dev

pip install geopandas
pip install shapely

.


import ee, datetime
import pandas as pd
import geopandas as gpd
import matplotlib.dates as mdates

from IPython.display import Image
from matplotlib import dates
from shapely.geometry import shape
import skimage
ee.Initialize()
%matplotlib inline

# ==========================================================================
# Function to Convert Feature Classes to Pandas Dataframe
# Adapted from: https://events.hpc.grnet.gr/event/47/material/1/12.py

def fc2df(fc):
# Convert a FeatureCollection into a pandas DataFrame
# Features is a list of dict with the output
features = fc.getInfo()['features']

dictarr = []

for f in features:
# Store all attributes in a dict
attr = f['properties']

# and treat geometry separately
attr['geometry'] = f['geometry'] # GeoJSON Feature!
# attr['geometrytype'] = f['geometry']['type']
dictarr.append(attr)

df = gpd.GeoDataFrame(dictarr)
# Convert GeoJSON features to shape
df['geometry'] = map(lambda s: shape(s), df.geometry)
return df


# ==========================================================================
# Function to iterate over image collection, returning a pandas dataframe
# THIS FUNCTION ISN'T WORKING
def extract_point_values(image, df):

# Extract values of rasters to points
image_red = image.reduceRegions(collection=points,
reducer=ee.Reducer.mean(),
scale=30)


# Convert output to pandas data frame
image_red_pd = fc2df(image_red)

# Add date variable to data frame
image_red_pd['date'] = image.getInfo()['properties']['DATE_ACQUIRED']

df = df.append(image_red_pd)

return df


# ==========================================================================
#### Make Points
points = ee.FeatureCollection([
ee.Feature(ee.Geometry.Point(14.742607, -17.494993)),
ee.Feature(ee.Geometry.Point(14.715903, -17.450650)),
])

#### Load Raster
l8 = ee.ImageCollection('LANDSAT/LC8_L1T').filterDate('2015-01-01', '2015-12-
31').filterBounds(points)


#### Create Initial Pandas Dataframe
l8_singleImage = ee.Image('LANDSAT/LC8_L1T/LC80440342014077LGN00')

l8_singleImage_red = l8_singleImage.reduceRegions(collection=points,
reducer=ee.Reducer.mean(),
scale=30)

l8_singleImage_red_pd = fc2df(l8_singleImage_red)


l8_singleImage_red_pd['date'] = l8_singleImage.getInfo()['properties']
['DATE_ACQUIRED']

l8_singleImage_red_pd = l8_singleImage_red_pd.drop([0,1])

#### Iterate over image collection
pd_all = l8.iterate(extract_point_values, l8_singleImage_red_pd)

Here's the error that's returned


    EEExceptionTraceback (most recent call last)

in ()
66
67 #### Iterate over image collection
---> 68 pd_all = l8.iterate(extract_point_values, l8_singleImage_red_pd)

/src/earthengine-api/python/ee/collection.pyc in iterate(self, algorithm, first)
223 with_cast = lambda e, prev: algorithm(element_type(e), prev)
224 return apifunction.ApiFunction.call_(
--> 225 'Collection.iterate', self, with_cast, first)


/src/earthengine-api/python/ee/apifunction.pyc in call_(cls, name, *args,
**kwargs)
79 a recognized return type, the returned value will be cast to
that type.
80 """
---> 81 return cls.lookup(name).call(*args, **kwargs)
82
83 @classmethod

/src/earthengine-api/python/ee/function.pyc in call(self, *args, **kwargs)

65 to that type.
66 """
---> 67 return self.apply(self.nameArgs(args, kwargs))
68
69 def apply(self, named_args):

/src/earthengine-api/python/ee/function.pyc in apply(self, named_args)
78 to that type.
79 """
---> 80 result = computedobject.ComputedObject(self,

self.promoteArgs(named_args))
81 return Function._promoter(result, self.getReturnType())
82

/src/earthengine-api/python/ee/function.pyc in promoteArgs(self, args)
105 name = spec['name']
106 if name in args:
--> 107 promoted_args[name] = Function._promoter(args[name], spec['type'])
108 elif not spec.get('optional'):
109 raise ee_exception.EEException(


/src/earthengine-api/python/ee/__init__.pyc in _Promote(arg, klass)
208 # A native function that needs to be wrapped.
209 args_count = len(inspect.getargspec(arg).args)
--> 210 return CustomFunction.create(arg, 'Object', ['Object'] * args_count)
211 elif isinstance(arg, Encodable):
212 # An ee.Function or a computed function like the return value of

/src/earthengine-api/python/ee/customfunction.pyc in create(func, return_type,
arg_types)

99 'args': args
100 }
--> 101 return CustomFunction(signature, func)
102
103 @staticmethod

/src/earthengine-api/python/ee/customfunction.pyc in __init__(self, signature,
body)
36 # The signature of the function.
37 self._signature = CustomFunction._resolveNamelessArgs(

---> 38 signature, variables, body)
39
40 # The expression to evaluate.

/src/earthengine-api/python/ee/customfunction.pyc in
_resolveNamelessArgs(signature, variables, body)
143 count += CountFunctions(sub_expression)
144 return count
--> 145 serialized_body = serializer.encode(body(*variables))
146 base_name = '_MAPPING_VAR_%d_' % CountFunctions(serialized_body)

147

/src/earthengine-api/python/ee/collection.pyc in (e, prev)
221 """
222 element_type = self.elementType()
--> 223 with_cast = lambda e, prev: algorithm(element_type(e), prev)
224 return apifunction.ApiFunction.call_(
225 'Collection.iterate', self, with_cast, first)

in extract_point_values(image, df)

33
34 # Convert output to pandas data frame
---> 35 image_red_pd = fc2df(image_red)
36
37 # Add date variable to data frame

in fc2df(fc)
5 # Convert a FeatureCollection into a pandas DataFrame
6 # Features is a list of dict with the output
----> 7 features = fc.getInfo()['features']

8
9 dictarr = []

/src/earthengine-api/python/ee/collection.pyc in getInfo(self)
125 properties.
126 """
--> 127 return super(Collection, self).getInfo()
128
129 def limit(self, maximum, opt_property=None, opt_ascending=None):


/src/earthengine-api/python/ee/computedobject.pyc in getInfo(self)
93 The object can evaluate to anything.
94 """
---> 95 return data.getValue({'json': self.serialize()})
96
97 def encode(self, encoder):

/src/earthengine-api/python/ee/data.pyc in getValue(params)
253 """
254 params['json_format'] = 'v2'

--> 255 return send_('/value', params)
256
257

/src/earthengine-api/python/ee/data.pyc in send_(path, params, opt_method,
opt_raw)
795 raise ee_exception.EEException('Invalid JSON: %s' % content)
796 if 'error' in json_content:
--> 797 raise ee_exception.EEException(json_content['error']['message'])
798 if 'data' not in content:

799 raise ee_exception.EEException('Malformed response: ' +
str(content))

EEException: Failed to decode JSON.
Error: Field 'value' of object '{"type":"ArgumentRef","value":null}' is
missing or
null.
Object: {"type":"ArgumentRef","value":null}.

Answer



Found a solution, by making a list of the scene IDs from the imageCollection and iterating over the list. Then in a loop I import the individual images instead of mapping/iterating over the imageCollection. Probably a more efficient way to do this, but this gets the job done.



# ==========================================================================
# Function to Convert Feature Classes to Pandas Dataframe
# Adapted from: https://events.hpc.grnet.gr/event/47/material/1/12.py
def fc2df(fc):
# Convert a FeatureCollection into a pandas DataFrame
# Features is a list of dict with the output
features = fc.getInfo()['features']

dictarr = []


for f in features:
# Store all attributes in a dict
attr = f['properties']
# and treat geometry separately
attr['geometry'] = f['geometry'] # GeoJSON Feature!
# attr['geometrytype'] = f['geometry']['type']
dictarr.append(attr)

df = gpd.GeoDataFrame(dictarr)
# Convert GeoJSON features to shape

df['geometry'] = map(lambda s: shape(s), df.geometry)
return df

# ==========================================================================
# Function to iterate over image collection, returning a pandas dataframe
def extract_point_values(img_id, pts):
image = ee.Image(img_id)

fc_image_red = image.reduceRegions(collection=pts,
reducer=ee.Reducer.mean(),

scale=30)

# Convert to Pandas Dataframe
df_image_red = fc2df(fc_image_red)

# Add Date as Variable
df_image_red['date'] = image.getInfo()['properties']['DATE_ACQUIRED']

return df_image_red


# ==========================================================================
#### Make Points
points = ee.FeatureCollection([
ee.Feature(ee.Geometry.Point(14.742607, -17.494993)),
ee.Feature(ee.Geometry.Point(14.715903, -17.450650)),
])

#### Load Raster
l8 = ee.ImageCollection('LANDSAT/LC8_L1T').filterDate('2015-01-01', '2015-12-
31').filterBounds(points)


#### Make list of image IDs
l8_id = []
for f in l8.getInfo()['features']:
image_id = f['properties']['LANDSAT_SCENE_ID'].encode('ascii', 'ignore')
image_id = 'LANDSAT/LC8_L1T/' + image_id
l8_id.append(image_id)

#### Create Initial Pandas Dataframe
df_all = extract_point_values(l8_id[0], points)

df_all = df_all.drop([0,1])

#### Iterate over all impages
for i in l8_id:
df_all = df_all.append(extract_point_values(i, points))

#### Display Results
df_all

No comments:

Post a Comment

arcpy - Changing output name when exporting data driven pages to JPG?

Is there a way to save the output JPG, changing the output file name to the page name, instead of page number? I mean changing the script fo...