Upload 10 files

Browse files

Files changed (10) hide show

assets.json +1 -0
code/constants.py +31 -0
code/multimodal_serve.py +125 -0
code/utils.py +188 -0
config.yaml +249 -0
data_processors.pkl +3 -0
df_preprocessor.pkl +3 -0
events.out.tfevents.1706665637.algo-1.21.0 +3 -0
hparams.yaml +15 -0
model.ckpt +3 -0

assets.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"class_name": "MultiModalPredictor", "column_types": {"image": "image_path", "label": "categorical"}, "label_column": "label", "problem_type": "multiclass", "eval_metric_name": "accuracy", "validation_metric_name": "accuracy", "output_shape": 4, "classes": null, "save_path": "/opt/ml/model", "pretrained_path": null, "version": "0.6.1"}

code/constants.py ADDED Viewed

	@@ -0,0 +1,31 @@

+########################################################################################################################
+#                                                ENVIRONMENT VARIABLES                                                 #
+########################################################################################################################
+NUM_GPU = "NUM_GPU"
+SAGEMAKER_INFERENCE_OUTPUT = "SAGEMAKER_INFERENCE_OUTPUT"
+########################################################################################################################
+#                                                OUTPUT CONSTANTS                                                      #
+########################################################################################################################
+PROBABILITY = "probability"
+PROBABILITIES = "probabilities"
+PREDICTED_LABEL = "predicted_label"
+LABELS = "labels"
+########################################################################################################################
+#                                                DATA FORMAT CONSTANTS                                                 #
+########################################################################################################################
+BYTE_ARRAY_FORMAT = "application/x-image"
+JPEG_FORMAT = "image/jpeg"
+PNG_FORMAT = "image/png"
+JSON_FORMAT = "application/json"
+CSV_FORMAT = "text/csv"
+COMMA_DELIMITER = ","
+BRACKET_FORMATTER = '"{}"'
+ALLOWED_INPUT_FORMATS = [BYTE_ARRAY_FORMAT, JPEG_FORMAT, PNG_FORMAT]
+ALLOWED_OUTPUT_FORMATS = [JSON_FORMAT, CSV_FORMAT]
+########################################################################################################################
+#                                                INFERENCE DATA CONSTANTS                                              #
+########################################################################################################################
+IMAGE_COLUMN_NAME = "image"

code/multimodal_serve.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import ast
+import json
+import os
+import numpy as np
+import pandas as pd
+from autogluon.multimodal import MultiModalPredictor
+from constants import (
+    ALLOWED_INPUT_FORMATS,
+    ALLOWED_OUTPUT_FORMATS,
+    BRACKET_FORMATTER,
+    COMMA_DELIMITER,
+    IMAGE_COLUMN_NAME,
+    JSON_FORMAT,
+    LABELS,
+    NUM_GPU,
+    PREDICTED_LABEL,
+    PROBABILITIES,
+    PROBABILITY,
+    SAGEMAKER_INFERENCE_OUTPUT,
+)
+from utils import infer_type_and_cast_value
+INFERENCE_OUTPUT = (
+    infer_type_and_cast_value(os.getenv(SAGEMAKER_INFERENCE_OUTPUT))
+    if SAGEMAKER_INFERENCE_OUTPUT in os.environ
+    else [PREDICTED_LABEL]
+)
+NUM_GPUS = infer_type_and_cast_value(os.getenv(NUM_GPU))
+def generate_single_csv_line_inference_selection(data):
+    """Generate a single csv line response.
+    :param data: list of output generated from the model
+    :return: csv line for the predictions
+    """
+    contents: str
+    for single_prediction in data:
+        contents = (
+            BRACKET_FORMATTER.format(single_prediction)
+            if isinstance(single_prediction, list)
+            else str(single_prediction)
+        )
+    return contents
+def model_fn(model_dir):
+    """Load model from previously saved artifact.
+    :param model_dir: local path to the model directory
+    :return: loaded model
+    """
+    predictor = MultiModalPredictor.load(model_dir)
+    if NUM_GPUS is not None:
+        predictor._config.env.num_gpus = NUM_GPUS
+    return predictor
+def convert_to_json_compatible_type(value):
+    """Convert the input value to a JSON compatible type.
+    :param value: input value
+    :return: JSON compatible value
+    """
+    string_value = "{}".format(value)
+    try:
+        return ast.literal_eval(string_value)
+    except Exception:
+        return string_value
+def transform_fn(model, request_body, input_content_type, output_content_type):
+    """Transform function for serving inference requests.
+    If INFERENCE_OUTPUT is provided, then the predictions are generated in the requested format and concatenated in the
+    same order. Otherwise, prediction_labels are generated by default.
+    :param model: loaded model
+    :param request_body: request body
+    :param input_content_type: content type of the input
+    :param output_content_type: content type of the response
+    :return: prediction response
+    """
+    if input_content_type.lower() not in ALLOWED_INPUT_FORMATS:
+        raise Exception(
+            f"{input_content_type} input content type not supported. Supported formats are {ALLOWED_INPUT_FORMATS}"
+        )
+    if output_content_type.lower() not in ALLOWED_OUTPUT_FORMATS:
+        raise Exception(
+            f"{output_content_type} output content type not supported. Supported formats are {ALLOWED_OUTPUT_FORMATS}"
+        )
+    data = pd.DataFrame({IMAGE_COLUMN_NAME: [request_body]})
+    result_dict = dict()
+    result = []
+    inference_output_list = (
+        INFERENCE_OUTPUT if isinstance(INFERENCE_OUTPUT, list) else [INFERENCE_OUTPUT]
+    )
+    for output_type in inference_output_list:
+        if output_type == PREDICTED_LABEL:
+            prediction = model.predict(data)
+            result_dict[PREDICTED_LABEL] = convert_to_json_compatible_type(prediction.squeeze())
+        elif output_type == PROBABILITIES:
+            predict_probs = model.predict_proba(data)
+            prediction = predict_probs.to_numpy()
+            result_dict[PROBABILITIES] = predict_probs.squeeze().tolist()
+        elif output_type == LABELS:
+            labels = model.class_labels
+            prediction = np.array([labels]).astype("str")
+            result_dict[LABELS] = labels.tolist()
+        else:
+            predict_probabilities = model.predict_proba(data).to_numpy()
+            prediction = np.max(predict_probabilities, axis=1)
+            result_dict[PROBABILITY] = prediction.squeeze().tolist()
+        result.append(generate_single_csv_line_inference_selection(prediction.tolist()))
+    response = COMMA_DELIMITER.join(result)
+    if output_content_type == JSON_FORMAT:
+        response = json.dumps(result_dict)
+    return response, output_content_type

code/utils.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import logging
+import os
+from typing import Optional
+def is_float(value: str):
+    """Check if the input value is float.
+    :param value: value
+    :return: True / False based on whether the value is float or not
+    """
+    try:
+        float(value)
+    except (TypeError, ValueError):
+        return False
+    else:
+        return True
+def is_int(value: str):
+    """Check if the input value is int.
+    :param value: value
+    :return: True / False based on whether the value is int or not
+    """
+    try:
+        float_value = float(value)
+        int_value = int(value)
+    except (TypeError, ValueError):
+        return False
+    else:
+        return float_value == int_value
+def is_list(value: str):
+    """Check if the input value is list.
+    Currently, we support list in the following format -
+    1. "a,b,c,d"
+    2. "1,2,3,4"
+    :param value: value
+    :return: True / False based on whether the value is list or not
+    """
+    return "," in value
+def is_boolean(value: str):
+    """Check if the input value is boolean.
+    :param value: value
+    :return: True / False based on whether the value is boolean or not
+    """
+    return value.lower() in ["true", "false"]
+def parse_boolean(value: str):
+    """Parse the boolean value.
+    :param value: value
+    :return: Parsed boolean values
+    """
+    return True if value.lower() == "true" else False
+def parse_list(value: str):
+    """Parse the list value.
+    Currently, we support list in the following format -
+    1. "a,b,c,d"
+    2. "1,2,3,4"
+    :param value: value
+    :return: Parsed list
+    """
+    values = value.split(",")
+    clean_values = [v.strip(" \"'") for v in values]
+    return [infer_type_and_cast_value(v) for v in clean_values]
+def infer_type_and_cast_value(value: Optional[str]):
+    """Infer the type of value and casts it accordingly.
+    :param value: value
+    :return: casted value
+    """
+    if value is None:
+        return value
+    elif is_int(value):
+        return int(value)
+    elif is_float(value):
+        return float(value)
+    elif is_boolean(value):
+        return parse_boolean(value)
+    elif is_list(value):
+        return parse_list(value)
+    else:
+        return value
+def __setup_fault_handler(file_path: str = None):
+    """Set up fault handler.
+    :param file_path: path to the error file
+    :return:
+    """
+    try:
+        import faulthandler
+        if not faulthandler.is_enabled():
+            if file_path is not None:
+                faulthandler.enable(os.open(file_path, os.O_APPEND), all_threads=True)
+            else:
+                faulthandler.enable()
+    except ImportError:
+        logging.warn("No faulthandler found")
+def get_error_logger():
+    """Return the logger from logging for id ERROR_LOGGER_ID ."""
+    return logging.getLogger("error")
+def setup_trusted_log(error_volume: str, error_file_path: str):
+    """Set up trusted logs for the script.
+    :param error_volume: volume where the errors should be written
+    :param error_file_path: path to the error_file
+    :return: trusted logger
+    """
+    trusted_log_formatter = logging.Formatter(
+        "[%(asctime)s %(levelname)s %(thread)d %(filename)s:%(lineno)d] %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+    )
+    os.makedirs(error_volume, exist_ok=True)
+    trusted_log_handler = logging.FileHandler(error_file_path)
+    __setup_fault_handler(file_path=error_file_path)
+    trusted_log_handler.setFormatter(trusted_log_formatter)
+    trusted_log_handler.setLevel(logging.INFO)
+    error_logger = get_error_logger()
+    error_logger.addHandler(trusted_log_handler)
+    error_logger.propagate = False
+def write_trusted_log_info(private_info_message):
+    """Write private info message to the trusted log channel.
+    :param private_info_message: private trusted log message
+    :return:
+    """
+    trusted_logger = get_error_logger()
+    trusted_logger.info(private_info_message)
+def write_failure_reason(failure_reason_text, file_path):
+    """Write failure reason to failure file.
+    :param failure_reason_text: reason for failure
+    :param file_path: path to the failure file
+    :return:
+    """
+    if not os.path.exists(os.path.dirname(file_path)):
+        os.makedirs(os.path.dirname(file_path))
+    with open(file_path, "w") as f:
+        f.write(failure_reason_text)
+def write_trusted_log_exception(
+    error_message, caused_by, failure_file_path, failure_prefix="Algorithm Error"
+):
+    """Write private exception message to the trusted error channel.
+    :param error_message: error_message
+    :param caused_by: cause for the error
+    :param failure_file_path: failure file path. Usually /opt/ml/output/failure
+    :param failure_prefix: prefix to attach to the error message
+    :return:
+    """
+    message = "{}: {}".format(failure_prefix, error_message)
+    error_detail = "Caused by: {}".format(caused_by)
+    message += "\n\n{}".format(error_detail)
+    err_logger = get_error_logger()
+    err_logger.exception(message)
+    write_failure_reason(message, failure_file_path)
+    return message

config.yaml ADDED Viewed

	@@ -0,0 +1,249 @@

+model:
+  names:
+  - timm_image
+  categorical_transformer:
+    out_features: 192
+    d_token: 192
+    ffn_d_hidden: 192
+    num_trans_blocks: 0
+    num_attn_heads: 8
+    residual_dropout: 0.0
+    attention_dropout: 0.2
+    ffn_dropout: 0.1
+    normalization: layer_norm
+    ffn_activation: reglu
+    head_activation: relu
+    data_types:
+    - categorical
+    additive_attention: false
+    share_qv_weights: false
+  numerical_transformer:
+    out_features: 192
+    d_token: 192
+    ffn_d_hidden: 192
+    num_trans_blocks: 0
+    num_attn_heads: 8
+    residual_dropout: 0.0
+    attention_dropout: 0.2
+    ffn_dropout: 0.1
+    normalization: layer_norm
+    ffn_activation: reglu
+    head_activation: relu
+    data_types:
+    - numerical
+    embedding_arch:
+    - linear
+    - relu
+    merge: concat
+    additive_attention: false
+    share_qv_weights: false
+  ner_text:
+    checkpoint_name: bert-base-cased
+    max_text_len: 512
+    gradient_checkpointing: false
+    low_cpu_mem_usage: false
+    data_types:
+    - text
+    tokenizer_name: hf_auto
+    insert_sep: false
+    text_segment_num: 2
+    stochastic_chunk: false
+    special_tags:
+    - X
+    - O
+  t_few:
+    checkpoint_name: t5-small
+    gradient_checkpointing: false
+    data_types:
+    - text
+    tokenizer_name: hf_auto
+    length_norm: 1.0
+    unlikely_loss: 1.0
+    mc_loss: 1.0
+    max_text_len: 512
+    text_segment_num: 2
+    insert_sep: true
+    low_cpu_mem_usage: false
+    stochastic_chunk: false
+    text_aug_detect_length: 10
+    text_trivial_aug_maxscale: 0.0
+  timm_image:
+    checkpoint_name: swin_base_patch4_window7_224
+    mix_choice: all_logits
+    data_types:
+    - image
+    train_transform_types:
+    - resize_shorter_side
+    - center_crop
+    - trivial_augment
+    val_transform_types:
+    - resize_shorter_side
+    - center_crop
+    image_norm: imagenet
+    image_size: 224
+    max_img_num_per_col: 2
+  mmdet_image:
+    checkpoint_name: yolov3_mobilenetv2_320_300e_coco
+    data_types:
+    - image
+    train_transform_types:
+    - resize_shorter_side
+    - center_crop
+    - trivial_augment
+    val_transform_types:
+    - resize_shorter_side
+    - center_crop
+    image_norm: imagenet
+    image_size: 224
+    max_img_num_per_col: 2
+  mmocr_text_detection:
+    checkpoint_name: TextSnake
+    data_types:
+    - image
+    train_transform_types:
+    - resize_shorter_side
+    - center_crop
+    - trivial_augment
+    val_transform_types:
+    - resize_shorter_side
+    - center_crop
+    image_norm: imagenet
+    image_size: 224
+    max_img_num_per_col: 2
+  mmocr_text_recognition:
+    checkpoint_name: ABINet
+    data_types:
+    - image
+    train_transform_types:
+    - resize_shorter_side
+    - center_crop
+    - trivial_augment
+    val_transform_types:
+    - resize_shorter_side
+    - center_crop
+    image_norm: imagenet
+    image_size: 224
+    max_img_num_per_col: 2
+  clip:
+    checkpoint_name: openai/clip-vit-base-patch32
+    data_types:
+    - image
+    - text
+    train_transform_types:
+    - resize_shorter_side
+    - center_crop
+    - trivial_augment
+    val_transform_types:
+    - resize_shorter_side
+    - center_crop
+    image_norm: clip
+    image_size: 224
+    max_img_num_per_col: 2
+    tokenizer_name: clip
+    max_text_len: 77
+    insert_sep: false
+    text_segment_num: 1
+    stochastic_chunk: false
+    text_aug_detect_length: 10
+    text_trivial_aug_maxscale: 0.0
+    text_train_augment_types: null
+  fusion_transformer:
+    hidden_size: 192
+    n_blocks: 3
+    attention_n_heads: 8
+    adapt_in_features: max
+    attention_dropout: 0.2
+    residual_dropout: 0.0
+    ffn_dropout: 0.1
+    ffn_d_hidden: 192
+    normalization: layer_norm
+    ffn_activation: geglu
+    head_activation: relu
+    data_types: null
+    additive_attention: false
+    share_qv_weights: false
+data:
+  image:
+    missing_value_strategy: skip
+  text:
+    normalize_text: false
+  categorical:
+    minimum_cat_count: 100
+    maximum_num_cat: 20
+    convert_to_text: true
+  numerical:
+    convert_to_text: false
+    scaler_with_mean: true
+    scaler_with_std: true
+  label:
+    numerical_label_preprocessing: standardscaler
+  pos_label: null
+  mixup:
+    turn_on: false
+    mixup_alpha: 0.8
+    cutmix_alpha: 1.0
+    cutmix_minmax: null
+    prob: 1.0
+    switch_prob: 0.5
+    mode: batch
+    turn_off_epoch: 5
+    label_smoothing: 0.1
+  templates:
+    turn_on: false
+    num_templates: 30
+    template_length: 2048
+    preset_templates:
+    - super_glue
+    - rte
+    custom_templates: null
+optimization:
+  optim_type: adamw
+  learning_rate: 0.001
+  weight_decay: 0.001
+  lr_choice: layerwise_decay
+  lr_decay: 0.9
+  lr_schedule: cosine_decay
+  max_epochs: 10
+  max_steps: -1
+  warmup_steps: 0.1
+  end_lr: 0
+  lr_mult: 1
+  patience: 10
+  val_check_interval: 0.5
+  check_val_every_n_epoch: 1
+  gradient_clip_val: 1
+  gradient_clip_algorithm: norm
+  track_grad_norm: -1
+  log_every_n_steps: 10
+  val_metric: null
+  top_k: 3
+  top_k_average_method: best
+  efficient_finetune: null
+  lora:
+    module_filter: null
+    filter:
+    - query
+    - value
+    - ^q$
+    - ^v$
+    - ^k$
+    - ^o$
+    r: 8
+    alpha: 8
+  loss_function: auto
+env:
+  num_gpus: 4
+  num_nodes: 1
+  batch_size: 128
+  per_gpu_batch_size: 32
+  eval_batch_size_ratio: 4
+  per_gpu_batch_size_evaluation: null
+  precision: 16
+  num_workers: 2
+  num_workers_evaluation: 2
+  fast_dev_run: false
+  deterministic: false
+  auto_select_gpus: true
+  strategy: ddp
+  deepspeed_allgather_size: 1000000000.0
+  deepspeed_allreduce_size: 1000000000.0

data_processors.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4de012a54d1f0b1e4087f8d8fe96649416ce438a359da29b8aeb68a6eb7b7e03
+size 348790580

df_preprocessor.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9e5001bf2241ae6b6fdc72edb06ddef63842d8bec8f79b66f0ad58315b1f403
+size 22548

events.out.tfevents.1706665637.algo-1.21.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:61b5cf65a2950a863dfa5d46f2c253e4d9e7729ba00c9c6b22b5a12df4c03eda
+size 8085

hparams.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+optim_type: adamw
+lr_choice: layerwise_decay
+lr_schedule: cosine_decay
+lr: 0.001
+lr_decay: 0.9
+end_lr: 0
+lr_mult: 1
+weight_decay: 0.001
+warmup_steps: 0.1
+validation_metric_name: accuracy
+custom_metric_func: null
+efficient_finetune: null
+trainable_param_names: []
+mixup_fn: null
+mixup_off_epoch: 5

model.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e7891ddbec5fc09cc894c993664dab03244b780705c508fe04889e990948652
+size 348680105