Joblib
English
llm
human-feedback
weak supervision
data filtering
Inference Endpoints
Christopher Glaze commited on
Commit
cbc0f63
1 Parent(s): fbe1af4

Add nltk resource

Browse files
handler.py CHANGED
@@ -1,5 +1,6 @@
1
 
2
  from typing import Dict, List, Union, Optional
 
3
  from pathlib import Path
4
  import json
5
  import joblib
@@ -10,6 +11,9 @@ import torch
10
  import numpy as np
11
  from sklearn.base import TransformerMixin
12
 
 
 
 
13
  class SimcseGenerator(TransformerMixin):
14
  def __init__(
15
  self, batch_size: int =16, model_name: str = "princeton-nlp/unsup-simcse-bert-base-uncased"
@@ -57,7 +61,7 @@ class EndpointHandler():
57
  def __init__(self, path: str = ""):
58
 
59
  if len(path)==0:
60
- path = Path(__file__).parent
61
  else:
62
  path = Path(path)
63
 
 
1
 
2
  from typing import Dict, List, Union, Optional
3
+ import os
4
  from pathlib import Path
5
  import json
6
  import joblib
 
11
  import numpy as np
12
  from sklearn.base import TransformerMixin
13
 
14
+ LOCAL_PATH = Path(__file__).parent
15
+ nltk.data.path.append(str(LOCAL_PATH/"nltk_data"))
16
+
17
  class SimcseGenerator(TransformerMixin):
18
  def __init__(
19
  self, batch_size: int =16, model_name: str = "princeton-nlp/unsup-simcse-bert-base-uncased"
 
61
  def __init__(self, path: str = ""):
62
 
63
  if len(path)==0:
64
+ path = LOCAL_PATH
65
  else:
66
  path = Path(path)
67
 
nltk_data/tokenizers/punkt/PY3/english.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cad3758596392364e3be9803dbd7ebeda384b68937b488a01365f5551bb942c
3
+ size 406697
nltk_data/tokenizers/punkt/english.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dda37972ae88998a6fd3e3ec002697a6bd362b32d050fda7d7ca5276873092aa
3
+ size 433305