mbochniak01 Claude Sonnet 4.6 commited on
Commit
14d263b
·
1 Parent(s): 7a72ab0

Load T5-small tokenizer for Vectara HHEM v2

Browse files

Vectara's model doesn't ship a tokenizer config — it reuses T5-small's
SentencePiece vocab. Load from t5-small explicitly instead of the model repo.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. Dockerfile +1 -1
  2. backend/grader.py +1 -1
Dockerfile CHANGED
@@ -15,7 +15,7 @@ RUN python -c "\
15
  from sentence_transformers import SentenceTransformer; \
16
  from transformers import T5Tokenizer, pipeline; \
17
  SentenceTransformer('all-MiniLM-L6-v2'); \
18
- tok = T5Tokenizer.from_pretrained('vectara/hallucination_evaluation_model'); \
19
  pipeline('text-classification', model='vectara/hallucination_evaluation_model', tokenizer=tok, trust_remote_code=True)"
20
 
21
  COPY knowledge/ ./knowledge/
 
15
  from sentence_transformers import SentenceTransformer; \
16
  from transformers import T5Tokenizer, pipeline; \
17
  SentenceTransformer('all-MiniLM-L6-v2'); \
18
+ tok = T5Tokenizer.from_pretrained('t5-small'); \
19
  pipeline('text-classification', model='vectara/hallucination_evaluation_model', tokenizer=tok, trust_remote_code=True)"
20
 
21
  COPY knowledge/ ./knowledge/
backend/grader.py CHANGED
@@ -41,7 +41,7 @@ def get_nli_model() -> Any:
41
  """Return the shared Vectara faithfulness pipeline, loading it on first call."""
42
  global _nli_model
43
  if _nli_model is None:
44
- tokenizer = T5Tokenizer.from_pretrained(NLI_MODEL)
45
  _nli_model = hf_pipeline(
46
  "text-classification",
47
  model=NLI_MODEL,
 
41
  """Return the shared Vectara faithfulness pipeline, loading it on first call."""
42
  global _nli_model
43
  if _nli_model is None:
44
+ tokenizer = T5Tokenizer.from_pretrained("t5-small")
45
  _nli_model = hf_pipeline(
46
  "text-classification",
47
  model=NLI_MODEL,