Spaces:

BramVanroy
/

text-to-amr

Running

App Files Files Community

Bram Vanroy commited on Feb 13, 2023

Commit

51df785

•

1 Parent(s): 1184fa3

add CUDA support

Browse files

Files changed (1) hide show

utils.py +11 -5

utils.py CHANGED Viewed

@@ -2,8 +2,9 @@ from typing import Tuple
 import streamlit as st
 from torch.quantization import quantize_dynamic
-from torch import nn, qint8
 from torch.nn import Parameter
 from transformers import PreTrainedModel, PreTrainedTokenizer
 from optimum.bettertransformer import BetterTransformer
@@ -14,17 +15,19 @@ from transformers import MBartForConditionalGeneration
 st_hash_funcs = {PreTrainedModel: lambda model: model.name_or_path,
                  PreTrainedTokenizer: lambda tokenizer: tokenizer.name_or_path,
-                 Parameter: lambda param: param.data}
 @st.cache(show_spinner=False, hash_funcs=st_hash_funcs, allow_output_mutation=True)
-def get_resources(multilingual: bool, quantize: bool = True) -> Tuple[MBartForConditionalGeneration, AMRMBartTokenizer, AMRLogitsProcessor]:
     """Get the relevant model, tokenizer and logits_processor. The loaded model depends on whether the multilingual
     model is requested, or not. If not, an English-only model is loaded. The model can be optionally quantized
     for better performance.
     :param multilingual: whether or not to load the multilingual model. If not, loads the English-only model
     :param quantize: whether to quantize the model with PyTorch's 'quantize_dynamic'
     :return: the loaded model, tokenizer, and logits processor
     """
     if multilingual:
@@ -38,7 +41,9 @@ def get_resources(multilingual: bool, quantize: bool = True) -> Tuple[MBartForCo
     model = BetterTransformer.transform(model, keep_original_model=False)
     model.resize_token_embeddings(len(tokenizer))
-    if quantize:
         model = quantize_dynamic(model, {nn.Linear, nn.Dropout, nn.LayerNorm}, dtype=qint8)
     logits_processor = AMRLogitsProcessor(tokenizer, model.config.max_length)
@@ -60,7 +65,8 @@ def translate(text: str, src_lang: str, model: MBartForConditionalGeneration, to
     """
     tokenizer.src_lang = LANGUAGES[src_lang]
     encoded = tokenizer(text, return_tensors="pt")
-    generated = model.generate(**encoded, **gen_kwargs)
     return tokenizer.decode_and_fix(generated)[0]

 import streamlit as st
+import torch
 from torch.quantization import quantize_dynamic
+from torch import nn, qint8, Tensor
 from torch.nn import Parameter
 from transformers import PreTrainedModel, PreTrainedTokenizer
 from optimum.bettertransformer import BetterTransformer
 st_hash_funcs = {PreTrainedModel: lambda model: model.name_or_path,
                  PreTrainedTokenizer: lambda tokenizer: tokenizer.name_or_path,
+                 Parameter: lambda parameter: parameter.data,
+                 Tensor: lambda tensor: tensor.cpu()}
 @st.cache(show_spinner=False, hash_funcs=st_hash_funcs, allow_output_mutation=True)
+def get_resources(multilingual: bool, quantize: bool = True, no_cuda: bool = False) -> Tuple[MBartForConditionalGeneration, AMRMBartTokenizer, AMRLogitsProcessor]:
     """Get the relevant model, tokenizer and logits_processor. The loaded model depends on whether the multilingual
     model is requested, or not. If not, an English-only model is loaded. The model can be optionally quantized
     for better performance.
     :param multilingual: whether or not to load the multilingual model. If not, loads the English-only model
     :param quantize: whether to quantize the model with PyTorch's 'quantize_dynamic'
+    :param no_cuda: whether to disable CUDA, even if it is available
     :return: the loaded model, tokenizer, and logits processor
     """
     if multilingual:
     model = BetterTransformer.transform(model, keep_original_model=False)
     model.resize_token_embeddings(len(tokenizer))
+    if torch.cuda.is_available() and not no_cuda:
+        model = model.to("cuda")
+    elif quantize:  # Quantization not supported on CUDA
         model = quantize_dynamic(model, {nn.Linear, nn.Dropout, nn.LayerNorm}, dtype=qint8)
     logits_processor = AMRLogitsProcessor(tokenizer, model.config.max_length)
     """
     tokenizer.src_lang = LANGUAGES[src_lang]
     encoded = tokenizer(text, return_tensors="pt")
+    encoded = {k: v.to(model.device) for k, v in encoded.items()}
+    generated = model.generate(**encoded, **gen_kwargs).cpu()
     return tokenizer.decode_and_fix(generated)[0]