Upload 3 files

Browse files

Files changed (3) hide show

README.md +55 -3
config.json +248 -0
preprocessor_config.json +28 -0

README.md CHANGED Viewed

@@ -1,3 +1,55 @@
----
-license: apache-2.0
----

+---
+license: apache-2.0
+---
+# InsectSAM: Insect Segmentation and Monitoring
+<p align="left">
+  <a href="" rel="noopener">
+ <img width=200px height=200px src="https://i.imgur.com/hjWgAN9.png alt="Project logo"></a>
+</p>
+## Overview
+InsectSAM is an advanced machine learning model tailored for the https://diopsis.eu camera systems and https://www.arise-biodiversity.nl/, dedicated to Insect Biodiversity Detection and Monitoring in the Netherlands. Built on Meta AI's `segment-anything` model, InsectSAM is fine-tuned to be accurate at segmenting insects from complex backgrounds, enhancing the accuracy and efficiency of biodiversity monitoring efforts.
+## Purpose
+This model has been meticulously trained to identify and segment insects against a variety of backgrounds that might otherwise confuse traditional algorithms. It is specifically designed to adapt to future changes in background environments, ensuring its long-term utility in the DIOPSIS / ARISE project.
+## Model Architecture
+InsectSAM utilizes the advanced capabilities of the `segment-anything` architecture, enhanced by our custom training on an insect-centric dataset. The model is further refined by integrating with GroundingDINO, improving its ability to distinguish fine details and subtle variations in insect appearances.
+## Quick Start
+### Prerequisites
+- Python
+- Hugging Face Transformers
+- PyTorch
+### Usage
+#### Install
+``` bash
+!pip install --upgrade -q git+https://github.com/huggingface/transformers
+!pip install torch
+```
+#### Load model directly via HF Transformers 🤗
+``` bash
+from transformers import AutoProcessor, AutoModelForMaskGeneration
+processor = AutoProcessor.from_pretrained("martintmv/InsectSAM")
+model = AutoModelForMaskGeneration.from_pretrained("martintmv/InsectSAM")
+```
+### Notebooks
+Three Jupyter notebooks are provided to demonstrate the model's capabilities and its integration with GroundingDINO:
+- **InsectSAM.ipynb**: Covers the training process, from data preparation to model evaluation.
+- **InsectSAM_GroundingDINO.ipynb**: Demonstrates how InsectSAM is combined with GroundingDINO for enhanced segmentation performance.
+- **Run_InsectSAM_Inference_Transformers.ipynb**: Run InsectSAM using Transformers.
+Check out the notebooks on RB-IBDM's GitHub page - https://github.com/martintmv-git/RB-IBDM/tree/main/InsectSAM

config.json ADDED Viewed

	@@ -0,0 +1,248 @@

+{
+  "_commit_hash": null,
+  "architectures": [
+    "SamModel"
+  ],
+  "initializer_range": 0.02,
+  "mask_decoder_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_downsample_rate": 2,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "relu",
+    "hidden_size": 256,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "iou_head_depth": 3,
+    "iou_head_hidden_dim": 256,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-06,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "mlp_dim": 2048,
+    "model_type": "",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 8,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_hidden_layers": 2,
+    "num_multimask_outputs": 3,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.29.0.dev0",
+    "typical_p": 1.0,
+    "use_bfloat16": false
+  },
+  "model_type": "sam",
+  "prompt_encoder_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "gelu",
+    "hidden_size": 256,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_embedding_size": 64,
+    "image_size": 1024,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-06,
+    "length_penalty": 1.0,
+    "mask_input_channels": 16,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "",
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_point_embeddings": 4,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 16,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.29.0.dev0",
+    "typical_p": 1.0,
+    "use_bfloat16": false
+  },
+  "torch_dtype": "float32",
+  "transformers_version": null,
+  "vision_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.0,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "global_attn_indexes": [
+      2,
+      5,
+      8,
+      11
+    ],
+    "hidden_act": "gelu",
+    "hidden_size": 768,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_size": 1024,
+    "initializer_factor": 1.0,
+    "initializer_range": 1e-10,
+    "intermediate_size": 6144,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-06,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "mlp_dim": 3072,
+    "mlp_ratio": 4.0,
+    "model_type": "",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 12,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_hidden_layers": 12,
+    "num_pos_feats": 128,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_channels": 256,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 16,
+    "prefix": null,
+    "problem_type": null,
+    "projection_dim": 512,
+    "pruned_heads": {},
+    "qkv_bias": true,
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.29.0.dev0",
+    "typical_p": 1.0,
+    "use_abs_pos": true,
+    "use_bfloat16": false,
+    "use_rel_pos": true,
+    "window_size": 14
+  }
+}

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_pad": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_processor_type": "SamImageProcessor",
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "pad_size": {
+    "height": 1024,
+    "width": 1024
+  },
+  "processor_class": "SamProcessor",
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "longest_edge": 1024
+  }
+}