pierreguillou commited on
Commit
aac3ac3
1 Parent(s): 6f42898

Update files/functions.py

Browse files
Files changed (1) hide show
  1. files/functions.py +14 -7
files/functions.py CHANGED
@@ -52,15 +52,21 @@ import pytesseract
52
 
53
  ## model / feature extractor / tokenizer
54
 
55
- from transformers import LayoutLMv2ForTokenClassification # LayoutXLMTokenizerFast,
56
-
57
  import torch
58
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
59
 
60
- # model
61
- # tokenizer = LayoutXLMTokenizerFast.from_pretrained(model_id)
62
- model = LayoutLMv2ForTokenClassification.from_pretrained(model_id);
63
- model.to(device);
 
 
 
 
 
 
 
 
64
 
65
  # feature extractor
66
  from transformers import LayoutLMv2FeatureExtractor
@@ -68,7 +74,8 @@ feature_extractor = LayoutLMv2FeatureExtractor(apply_ocr=False)
68
 
69
  # tokenizer
70
  from transformers import AutoTokenizer
71
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
 
72
 
73
  ## Key parameters
74
 
 
52
 
53
  ## model / feature extractor / tokenizer
54
 
 
 
55
  import torch
56
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
57
 
58
+ # model 1
59
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
60
+ model_id = "pierreguillou/lilt-xlm-roberta-base-finetuned-with-DocLayNet-base-at-linelevel-ml384"
61
+ tokenizer1 = AutoTokenizer.from_pretrained(model_id)
62
+ model1 = AutoModelForTokenClassification.from_pretrained(model_id);
63
+ model1.to(device);
64
+
65
+ from transformers import LayoutLMv2ForTokenClassification
66
+ # model 2
67
+ model_id = "pierreguillou/layout-xlm-base-finetuned-with-DocLayNet-base-at-linelevel-ml384"
68
+ model2 = LayoutLMv2ForTokenClassification.from_pretrained(model_id);
69
+ model2.to(device);
70
 
71
  # feature extractor
72
  from transformers import LayoutLMv2FeatureExtractor
 
74
 
75
  # tokenizer
76
  from transformers import AutoTokenizer
77
+ tokenizer_id = "xlm-roberta-base"
78
+ tokenizer2 = AutoTokenizer.from_pretrained(tokenizer_id)
79
 
80
  ## Key parameters
81