Princess3 commited on
Commit
3647dcd
·
verified ·
1 Parent(s): 791a487

Update 2.py

Browse files
Files changed (1) hide show
  1. 2.py +2 -2
2.py CHANGED
@@ -76,7 +76,7 @@ def create_model_from_folder(folder_path):
76
  s[os.path.basename(r).replace('.', '_')].extend(parse_xml(os.path.join(r, file)))
77
  return DM(dict(s))
78
 
79
- def create_embeddings_and_sentences(folder_path, model_name="sentence-transformers/all-MiniLM-L6-v2"):
80
  t, m, embeddings, ds = AutoTokenizer.from_pretrained(model_name), AutoModel.from_pretrained(model_name), [], []
81
  for r, d, f in os.walk(folder_path):
82
  for file in f:
@@ -91,7 +91,7 @@ def create_embeddings_and_sentences(folder_path, model_name="sentence-transforme
91
  ds.append(text)
92
  return np.vstack(embeddings), ds
93
 
94
- def query_vector_similarity(query, embeddings, ds, model_name="sentence-transformers/all-MiniLM-L6-v2"):
95
  t, m = AutoTokenizer.from_pretrained(model_name), AutoModel.from_pretrained(model_name)
96
  i = t(query, return_tensors="pt", truncation=True, padding=True)
97
  with torch.no_grad():
 
76
  s[os.path.basename(r).replace('.', '_')].extend(parse_xml(os.path.join(r, file)))
77
  return DM(dict(s))
78
 
79
+ def create_embeddings_and_sentences(folder_path, model_name="pile-of-law/legalbert-large-1.7M-1"):
80
  t, m, embeddings, ds = AutoTokenizer.from_pretrained(model_name), AutoModel.from_pretrained(model_name), [], []
81
  for r, d, f in os.walk(folder_path):
82
  for file in f:
 
91
  ds.append(text)
92
  return np.vstack(embeddings), ds
93
 
94
+ def query_vector_similarity(query, embeddings, ds, model_name="pile-of-law/legalbert-large-1.7M-2"):
95
  t, m = AutoTokenizer.from_pretrained(model_name), AutoModel.from_pretrained(model_name)
96
  i = t(query, return_tensors="pt", truncation=True, padding=True)
97
  with torch.no_grad():