ydshieh
/

flax-vision-encoder-decoder-vit-gpt2-coco-en

Model card Files Files and versions Community

ydshieh commited on Dec 24, 2021

Commit

dbe1403

•

1 Parent(s): 68f6bad

update 2

Browse files

Files changed (1) hide show

run_image_captioning_flax_reduced.py +18 -41

run_image_captioning_flax_reduced.py CHANGED Viewed

@@ -516,57 +516,34 @@ def main():
         decoder_dtype=getattr(jnp, model_args.dtype),
     )
-    feature_extractor = None
     if model_args.feature_extractor_name:
         feature_extractor = AutoFeatureExtractor.from_pretrained(
             model_args.feature_extractor_name,
             cache_dir=model_args.cache_dir,
         )
-    elif model_args.model_name_or_path:
-        try:
-            feature_extractor = AutoFeatureExtractor.from_pretrained(
-                model_args.model_name_or_path, cache_dir=model_args.cache_dir
-            )
-        except ValueError as e:
-            logger.warning(e)
-    # Check encoder
-    if not feature_extractor:
-        if model_args.encoder_model_name_or_path:
-            feature_extractor = AutoFeatureExtractor.from_pretrained(
-                model_args.encoder_model_name_or_path, cache_dir=model_args.cache_dir
-            )
-        else:
-            raise ValueError(
-                "You are instantiating a new feature extractor from scratch. This is not supported by this script."
-                "You can do it from another script, save it, and load it from here, using --feature_extractor_name."
-            )
-    tokenizer = None
     if model_args.tokenizer_name:
         tokenizer = AutoTokenizer.from_pretrained(
             model_args.tokenizer_name, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer
         )
-    elif model_args.model_name_or_path:
-        try:
-            tokenizer = AutoTokenizer.from_pretrained(
-                model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer
-            )
-        except ValueError as e:
-            logger.warning(e)
-    # Check decoder
-    if not tokenizer:
-        if model_args.decoder_model_name_or_path:
-            tokenizer = AutoTokenizer.from_pretrained(
-                model_args.decoder_model_name_or_path,
-                cache_dir=model_args.cache_dir,
-                use_fast=model_args.use_fast_tokenizer,
-            )
-        else:
-            raise ValueError(
-                "You are instantiating a new tokenizer from scratch. This is not supported by this script."
-                "You can do it from another script, save it, and load it from here, using --tokenizer_name."
-            )
     tokenizer.pad_token = tokenizer.convert_ids_to_tokens(config.pad_token_id)
     # Preprocessing the datasets.

         decoder_dtype=getattr(jnp, model_args.dtype),
     )
     if model_args.feature_extractor_name:
         feature_extractor = AutoFeatureExtractor.from_pretrained(
             model_args.feature_extractor_name,
             cache_dir=model_args.cache_dir,
         )
+    elif model_args.encoder_model_name_or_path:
+        feature_extractor = AutoFeatureExtractor.from_pretrained(
+            model_args.encoder_model_name_or_path, cache_dir=model_args.cache_dir
+        )
+    else:
+        raise ValueError(
+            "You are instantiating a new feature extractor from scratch. This is not supported by this script."
+            "You can do it from another script, save it, and load it from here, using --feature_extractor_name."
+        )
     if model_args.tokenizer_name:
         tokenizer = AutoTokenizer.from_pretrained(
             model_args.tokenizer_name, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer
         )
+    elif model_args.decoder_model_name_or_path:
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_args.decoder_model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer
+        )
+    else:
+        raise ValueError(
+            "You are instantiating a new tokenizer from scratch. This is not supported by this script."
+            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
+        )
     tokenizer.pad_token = tokenizer.convert_ids_to_tokens(config.pad_token_id)
     # Preprocessing the datasets.