climate-policy-tracker / batch_scripts /caps_directory_reader.py
umangchaudhry's picture
Upload 5 files
53c9473 verified
raw
history blame contribute delete
754 Bytes
import os
import pandas as pd
import re
# Define the folder path containing the files
caps_folder = "./CAPS"
# List all files in the CAPS folder
files = os.listdir(caps_folder)
# Define a regex pattern to extract city, state, year, and plan type
pattern = re.compile(r"^(.*?),\s([A-Z]{2})\s(.{3,}?)\s(\d{4})\.pdf$")
# Extract information from file names
data = []
for file in files:
match = pattern.match(file)
if match:
city, state, plan_type, year = match.groups()
data.append([city.strip(), state, year, plan_type.strip()])
# Convert to DataFrame
df = pd.DataFrame(data, columns=["City", "State", "Year", "Plan Type"])
# Save to CSV
df.to_csv("./caps_plans.csv", index=False)
print(f"CSV file saved to: caps_plans.csv")