Spaces:

serJD
/

RECODE_HF_tripGeneration

Sleeping

File size: 16,210 Bytes

7a74e26
5f940bc
 
7a74e26
ce3dfc6
 
 
 
 
 
 
76d7deb
ce3dfc6

import sys
sys.path.append("speckleUtils")
from .speckleUtils import speckle_utils

import os
import json
import pandas as pd
import copy
from functools import wraps
from specklepy.api.client import SpeckleClient
from tripGenerationFunc import *

import gradio as gr
import requests
from huggingface_hub import webhook_endpoint, WebhookPayload
from fastapi import Request
import datetime

current_directory = os.path.dirname(os.path.abspath(__file__))
# Path to the config.json file
config_file_path = os.path.join(current_directory, "config.json")

# Check if the config.json file exists
if os.path.exists(config_file_path):
    # Load the JSON data from config.json
    with open(config_file_path, 'r') as f:
        config = json.load(f)

    # Convert to Python variables with the same names as the keys in the JSON
    locals().update(config)
    print("varaibles from json")
    # Now you can access the variables directly
    print(STREAM_ID)
    print(BRANCH_NAME_LAND_USES)
    print(TARGET_TRIP_RATE)
    print(ALPHA_LOW)
    print(F_VALUES_MANUAL)
    print(distance_matrices_of_interest)
    print(redistributeTrips)
    print(DISTANCE_BRACKETS)
    print(XLS_FILE_PATH)
    print("==================")
else:
    print("Error: config.json file not found in the current directory.")


# checks payload of webhook and runs the main code if webhook was triggered by specified stream + one of the branches
listendStreams = [STREAM_ID]
listendBranchNames = [BRANCH_NAME_LAND_USES,BRANCH_NAME_DISTANCE_MATRIX,BRANCH_NAME_METRIC_DIST_MATRIX]

@webhook_endpoint
async def update_streams(request: Request):
    # Initialize flag
    should_continue = False

    # Read the request body as JSON
    payload = await request.json()

    # Check if the payload structure matches the expected format
    if "event" in payload and "data" in payload["event"]:
        event_data = payload["event"]["data"]

        # Check if the event type is "commit_create"
        if "type" in event_data and event_data["type"] == "commit_create":
            # Check if the stream name matches the specified list
            if "stream" in event_data and event_data["stream"] in listendStreams:
                # Check if the branch name matches the specified list
                if "commit" in event_data and "branchName" in event_data["commit"]:
                    if event_data["commit"]["branchName"] in listendBranchNames:
                        should_continue = True
                else:
                    print("Branch name not found in payload.")
            else:
                print("Stream name not found or not in the specified list.")
        else:
            print("Event type is not 'commit_create'.")
    else:
        print("Payload structure does not match the expected format.")

    # If the flag is True, continue running the main part of the code
    if should_continue:
        # Your main code logic goes here
        runAll()
    else:
        print("Flag is False. Skipping further execution.")

    return "Webhook processing complete."
   
    

def runAll():
     # get config file:# Parse JSON
    
    speckle_token = os.environ.get("SPECKLE_TOKEN")

    



    xls_file_path = os.path.join(current_directory, XLS_FILE_PATH)
    print("full path", xls_file_path)
    # fetch speckle data
    CLIENT = SpeckleClient(host="https://speckle.xyz/")
    CLIENT.authenticate_with_token(token="52566d1047b881764e16ad238356abeb2fc35d8b42")

    # get land use stream
    stream_land_use = speckle_utils.getSpeckleStream(STREAM_ID,
                                            BRANCH_NAME_LAND_USES,
                                            CLIENT,
                                            commit_id = "")
    # navigate to list with speckle objects of interest
    stream_data = stream_land_use["@Data"]["@{0}"]

    # transform stream_data to dataframe (create a backup copy of this dataframe)
    df_speckle_lu = speckle_utils.get_dataframe(stream_data, return_original_df=False)
    df_main = df_speckle_lu.copy()

    # set index column
    df_main =  df_main.set_index("ids", drop=False)


    # get distance matrix stream
    stream_distance_matrice = speckle_utils.getSpeckleStream(STREAM_ID,
                                            BRANCH_NAME_DISTANCE_MATRIX,
                                            CLIENT,
                                            commit_id = "")

    # navigate to list with speckle objects of interest
    distance_matrices = {}
    for distM in stream_distance_matrice["@Data"]['@{0}']:
        for kk in distM.__dict__.keys():
            try:
                if kk.split("+")[1].startswith("distance_matrix"):
                    distance_matrix_dict = json.loads(distM[kk])
                    origin_ids = distance_matrix_dict["origin_uuid"]
                    destination_ids = distance_matrix_dict["destination_uuid"]
                    distance_matrix =  distance_matrix_dict["matrix"]
                    # Convert the distance matrix to a DataFrame
                    df_distances = pd.DataFrame(distance_matrix, index=origin_ids, columns=destination_ids)

                    # i want to add the index & colum names to dist_m_csv
                    #distance_matrices[kk]  = dist_m_csv[kk]
                    distance_matrices[kk] = df_distances

            except:
                pass


    # get metric matrix stream
    stream_metric_matrice = speckle_utils.getSpeckleStream(STREAM_ID,
                                            BRANCH_NAME_METRIC_DIST_MATRIX,
                                            CLIENT,
                                            commit_id = "")


    # navigate to list with speckle objects of interest
    metric_matrices = {}
    for distM in stream_metric_matrice["@Data"]['@{0}']:
        print(distM.__dict__.keys())
        for kk in distM.__dict__.keys():
            try:
                if kk.split("+")[1].startswith("metric_matrix"):
                    metric_matrix_dict = json.loads(distM[kk])
                    origin_ids = metric_matrix_dict["origin_uuid"]
                    destination_ids = metric_matrix_dict["destination_uuid"]
                    metric_matrix =  metric_matrix_dict["matrix"]
                    # Convert the distance matrix to a DataFrame
                    df_metric_dist = pd.DataFrame(metric_matrix, index=origin_ids, columns=destination_ids)
                    metric_matrices[kk] = df_metric_dist*10 #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

                    print("metric_matrix_dict", metric_matrix_dict.keys())
            except:
                pass

    metric_matrices =  extract_distance_matrices(stream_metric_matrice, metric_matrices_of_interest)


    sourceCommits = {
    "landuseCommitID": stream_land_use.id,
        "distanceMatrixCommitID": stream_distance_matrice.id,
        "metricMatrixCommitID": stream_metric_matrice.id
    }


    # READ XLS FILE ======================================
    # Read Excel file into Pandas DataFrame
    #Production
    # Load Excel file separately
    #xls_file_path = os.path.join(current_directory, XLS_FILE_PATH)
    if os.path.exists(xls_file_path):
        # Production
        df_production = pd.read_excel(xls_file_path, sheet_name='Production')
        df_production_transposed = df_production.T
        df_production = preprocess_dataFrame(df_production, headerRow_idx=2, numRowsStart_idx=3)
        df_production_transposed = preprocess_dataFrame(df_production_transposed, headerRow_idx=0, numRowsStart_idx=4,
                                                    numColsStart_idx=4, rowNames_idx=2)

        # Attraction
        df_attraction = pd.read_excel(xls_file_path, sheet_name='Attraction')
        df_attraction = preprocess_dataFrame(df_attraction, headerRow_idx=0, numRowsStart_idx=2)

        # Distribution_Matrix
        df_distributionMatrix = pd.read_excel(xls_file_path, sheet_name='Distribution_Matrix')
        df_distributionMatrix = preprocess_dataFrame(df_distributionMatrix, headerRow_idx=0, numRowsStart_idx=2,
                                                    numRowsEnd_idx=None, numColsStart_idx=2, numColsEnd_idx=None,
                                                    rowNames_idx=0)

        # Alphas
        df_alphas = pd.read_excel(xls_file_path, sheet_name='Alphas')
        df_alphas.columns = df_alphas.iloc[1]
        df_alphas = df_alphas.iloc[0, 2:]

        # Land use
        df_lu = pd.read_excel(xls_file_path, sheet_name='Example_Land_Use')
        df_lu = preprocess_dataFrame(df_lu, headerRow_idx=0, numRowsStart_idx=1)
        df_lu["nameCombined"] = df_lu.iloc[:, 1].astype(str) + "+" + df_lu.iloc[:, 0].astype(str)

        # Distance Matrix
        df_distMatrix = pd.read_excel(xls_file_path, sheet_name='Example_Distance_Matrix')
        df_distMatrix = preprocess_dataFrame(df_distMatrix, headerRow_idx=0, numRowsStart_idx=1, numRowsEnd_idx=None,
                                            numColsStart_idx=1, numColsEnd_idx=None, rowNames_idx=0)
    else:
        print("Error: Excel file specified in config.json not found.")



    # Land use strucutre =======
    # THIS IS THE DISTANCE MATRIX THATS USED DOWN THE ROAD
    df_distances_aligned, df_lu_stream_aligned = align_dataframes(distance_matrices[distanceMatrixName], df_main, 'ids')

    #Create a df with lanuses
    lu_cols = [col for col in df_lu_stream_aligned.columns if col.startswith("lu+")]
    df_lu_stream = df_lu_stream_aligned[lu_cols]

    # Remove "lu+" from the beginning of column names
    df_lu_stream.columns = df_lu_stream.columns.str.lstrip('lu+')
    df_lu_stream = df_lu_stream.T

    df_lu_stream_t = df_lu_stream.T

    df_lu_stream_with_nameLu_column = df_lu_stream.reset_index(drop=False).rename(columns={'index': 'nameLu'})

    #---
    df_lu_names_xlsx = pd.concat([df_lu.iloc[:, 0:2], df_lu.iloc[:, -1]], axis=1)
    df_lu_names_xlsx.index = df_lu_names_xlsx.iloc[:, 1]
    column_names = ['nameTripType', 'nameLu', 'nameCombined']
    df_lu_names_xlsx.columns = column_names
    print(f"df_lu_names_xlsx shape: {df_lu_names_xlsx.shape}")
    df_lu_names_xlsx.head()

    #--

    # Merge DataFrames using an outer join
    merged_df = pd.merge(df_lu_stream_with_nameLu_column, df_lu_names_xlsx, on='nameLu', how='outer')

    # Get the unique names and their counts from df_lu_names_xlsx
    name_counts = df_lu_names_xlsx['nameLu'].value_counts()
    #print(name_counts)

    # Identify names in df_lu_stream_with_nameLu_column that are not in df_lu_names_xlsx
    missing_names = df_lu_stream_with_nameLu_column.loc[~df_lu_stream_with_nameLu_column['nameLu'].isin(df_lu_names_xlsx['nameLu'])]

    # Append missing rows to df_lu_stream_with_nameLu_column
    df_lu_stream_duplicated = pd.concat([merged_df, missing_names], ignore_index=True)


    #--
    # Find names in df_lu_names_xlsx that are not in df_lu_stream_with_nameLu_column
    missing_names = df_lu_names_xlsx.loc[~df_lu_names_xlsx['nameLu'].isin(df_lu_stream_with_nameLu_column['nameLu'])]

    #--
    # print existing names (?)
    df_lu_names_sorted = df_lu_names_xlsx.sort_values(by='nameLu')
    df_lu_stream_duplicated_sorted = df_lu_stream_duplicated.sort_values(by='nameLu')
    #--
    # Merge DataFrames to get the order of names
    merged_order = pd.merge(df_lu_names_xlsx[['nameCombined']], df_lu_stream_duplicated[['nameCombined']], on='nameCombined', how='inner')

    # Sort df_lu_stream_duplicated based on the order of names in df_lu_names_xlsx
    df_lu_stream_sorted = df_lu_stream_duplicated.sort_values(by='nameCombined', key=lambda x: pd.Categorical(x, categories=merged_order['nameCombined'], ordered=True))

    # Reorganize columns
    column_order = ['nameTripType', 'nameCombined'] + [col for col in df_lu_stream_sorted.columns if col not in ['nameTripType', 'nameCombined']]

    # Create a new DataFrame with the desired column order
    df_lu_stream_reordered = df_lu_stream_sorted[column_order]

    df_lu_stream_reordered_t = df_lu_stream_reordered.T

    #--
    df_lu_stream_with_index = df_lu_stream_reordered_t.reset_index(drop=False).rename(columns={'index': 'ids'})
    df_lu_stream_with_index.index = df_lu_stream_reordered_t.index

    df_lu_num_t_index = df_lu_stream_with_index.iloc[3:]

    df_distances_aligned_index = df_distances_aligned.reset_index(drop=False).rename(columns={'index': 'ids'})
    df_distances_aligned_index.index = df_distances_aligned.index

    df_lu_namesCombined = df_lu_stream_with_index.loc["nameCombined"].iloc[1:]

    # Sort df_lu_stream_with_index based on the 'ids' column in df_distances_aligned_index
    df_lu_stream_sorted = df_lu_stream_with_index.sort_values(by=['ids'], key=lambda x: pd.Categorical(x, categories=df_distances_aligned_index['ids'], ordered=True))


    df_lu_num = df_lu_stream_sorted.T.iloc[1:, :-3]
    df_lu_num.index = df_lu_namesCombined

    df_distMatrix_speckle = df_distances_aligned

    df_attraction_num = df_attraction.reset_index().iloc[:-1, 6:]

    # =============================================================================
    # TRIP GENERATION

    # ATTRACTION & PRODUCTION ======================================================
    """
    INPUTS
    df_attraction_num
    df_lu_num
    df_production
    df_lu
    df_production_transposed
    """

    df_attraction_proNode_sum_total = attraction_proNode_full_iter(df_attraction_num, df_lu_num, True)

    #Get the sqmProPerson
    df_sqmProPerson = df_production.iloc[0, 4:].reset_index()[3]

    #Get the trip rate
    df_tripRate = copy.deepcopy(df_production) # create a copy ensures df_tripRate doenst point to df_production
    df_tripRate.index = df_tripRate.iloc[:, 0] #Set the row names
    df_tripRate = df_tripRate.iloc[1:, 2]

    #Numerical df from production ==============================================
    df_production_num = df_production.iloc[1:, 4:]
    df_production_transposed1 = df_production_num.T

    df_total_trips_allNodes = production_proNode_total(df_lu,
                                                    df_sqmProPerson,
                                                    df_tripRate,
                                                    df_production_num,
                                                    df_production_transposed,
                                                    df_lu_num, printSteps=False)
    # Convert data types to float
    df_total_trips_allNodes = df_total_trips_allNodes.astype(float)
    df_tripRate = df_tripRate.astype(float)

    df_total_trips_allNodes_sumPerson = df_total_trips_allNodes.div(df_tripRate, axis=0).sum()
    df_total_trips_allNodes_sumPerson_proCat = df_total_trips_allNodes.div(df_tripRate, axis=0)
    df_total_trips_allNodes_sumPerson_proCat_t = df_total_trips_allNodes_sumPerson_proCat.T
    df_total_trips_allNodes_sumPerson_proCat_t_sum = df_total_trips_allNodes_sumPerson_proCat_t.sum()

    # get total population
    total_population = df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_Res"] + df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_tou"]



    # =============================================================================
    distance_matrices = extract_distance_matrices(stream_distance_matrice, distance_matrices_of_interest)
    metric_matrices_ = extract_distance_matrices(stream_metric_matrice, metric_matrices_of_interest)
    metric_matrices = { k:v*10 for k, v in metric_matrices_.items()} # scale (speckle issue)

    logs  = computeTrips(
        df_distributionMatrix,
        df_total_trips_allNodes,
        df_distMatrix_speckle,
        df_alphas,
        df_attraction_proNode_sum_total,
        df_distances_aligned,
        TARGET_TRIP_RATE,
        SCALING_FACTOR,
        total_population,
        df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_Res"],
        df_total_trips_allNodes_sumPerson_proCat_t_sum["Tot_tou"],
        distance_matrices,
        metric_matrices,
        redistributeTrips,
        DISTANCE_BRACKETS,
        ALPHA_LOW, ALPHA_MED, ALPHA_HIGH, ALPHA, ALPHA_UNIFORM, F_VALUES_MANUAL,
        CLIENT, 
        STREAM_ID, 
        TARGET_BRANCH_TM,
        sourceCommits
        )

    print(logs)