aaliyaan commited on
Commit
353af85
1 Parent(s): 1ec8383

Fix tokenizer issue by using slow tokenizer

Browse files
Files changed (2) hide show
  1. app.py +3 -3
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from transformers import AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoTokenizer
3
  from PyPDF2 import PdfReader
4
 
5
  # Models and tokenizers setup
@@ -10,11 +10,11 @@ models = {
10
  },
11
  "PDF Summarizer (T5)": {
12
  "model": AutoModelForSeq2SeqLM.from_pretrained("t5-small"),
13
- "tokenizer": AutoTokenizer.from_pretrained("t5-small"),
14
  },
15
  "Broken Answer (T0pp)": {
16
  "model": AutoModelForSeq2SeqLM.from_pretrained("bigscience/T0pp"),
17
- "tokenizer": AutoTokenizer.from_pretrained("bigscience/T0pp"),
18
  },
19
  }
20
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
3
  from PyPDF2 import PdfReader
4
 
5
  # Models and tokenizers setup
 
10
  },
11
  "PDF Summarizer (T5)": {
12
  "model": AutoModelForSeq2SeqLM.from_pretrained("t5-small"),
13
+ "tokenizer": AutoTokenizer.from_pretrained("t5-small", use_fast=False), # Use the slow tokenizer
14
  },
15
  "Broken Answer (T0pp)": {
16
  "model": AutoModelForSeq2SeqLM.from_pretrained("bigscience/T0pp"),
17
+ "tokenizer": AutoTokenizer.from_pretrained("bigscience/T0pp", use_fast=False), # Use the slow tokenizer
18
  },
19
  }
20
 
requirements.txt CHANGED
@@ -2,3 +2,4 @@ torch
2
  gradio
3
  transformers
4
  PyPDF2
 
 
2
  gradio
3
  transformers
4
  PyPDF2
5
+ sentencepiece