# + from exactextract import exact_extract from osgeo import ogr, gdal from pathlib import Path from joblib import Parallel, delayed def extract(raster, vector, layer, output = None): if output is None: output = Path(raster).with_suffix('.csv').name gdal.UseExceptions() ogr.UseExceptions() rast = gdal.Open(raster) vrt = f""" {vector} {layer} {rast.GetSpatialRef().ExportToWkt()} """ polys = ogr.Open(vrt) exact_extract(rast, polys, 'mean', output='gdal', output_options = {'filename' : output }) return output # - vector = "/home/rstudio/source.coop/cboettig/pad-us-3/PADUS3_0Geopackage.gpkg" layer = "PADUS3_0Combined_DOD_TRIB_Fee_Designation_Easement" rasters = [ "/home/rstudio/boettiger-lab/us-pa-policy/hfp_2021_100m_v1-2_cog.tif", '/home/rstudio/source.coop/vizzuality/lg-land-carbon-data/deforest_carbon_100m_cog.tif', '/home/rstudio/source.coop/vizzuality/lg-land-carbon-data/natcrop_bii_100m_cog.tif', '/home/rstudio/source.coop/vizzuality/lg-land-carbon-data/natcrop_fii_100m_cog.tif', '/home/rstudio/source.coop/vizzuality/lg-land-carbon-data/natcrop_expansion_100m_cog.tif', '/home/rstudio/source.coop/vizzuality/lg-land-carbon-data/natcrop_reduction_100m_cog.tif', '/home/rstudio/source.coop/cboettig/carbon/cogs/irrecoverable_c_total_2018.tif', '/home/rstudio/source.coop/cboettig/carbon/cogs/manageable_c_total_2018.tif', '/home/rstudio/minio/shared-biodiversity/redlist/cog/combined_rwr_2022.tif', '/home/rstudio/minio/shared-biodiversity/redlist/cog/combined_sr_2022.tif', #'/home/rstudio/source.coop/cboettig/mobi/species-richness-all/SpeciesRichness_All.tif', #'/home/rstudio/source.coop/cboettig/mobi/range-size-rarity-all/RSR_All.tif', ] # extract(rasters[0], vector, layer) # just one # + # %%time # extract all the raster layers in parallel parallel_extract = delayed(extract) runner = Parallel(n_jobs=-1) runner(parallel_extract(i, vector, layer) for i in rasters) # + # bind columns into a single csv csv_files = list(Path.cwd().glob("*.csv")) import pandas as pd data = {} for file in csv_files: df = pd.read_csv(file.name) data[file.stem] = df['mean'] pd.DataFrame(data).to_csv("pad_raster_means.csv") # - [file.unlink() for file in csv_files]