heymenn commited on
Commit
0a6c112
1 Parent(s): a1f97d1

Update split_files_to_excel.py

Browse files
Files changed (1) hide show
  1. split_files_to_excel.py +6 -1
split_files_to_excel.py CHANGED
@@ -26,6 +26,7 @@ from pypdf import PdfReader
26
  import pandas as pd
27
 
28
 
 
29
  MODEL = "thenlper/gte-base"
30
  CHUNK_SIZE = 1000
31
  CHUNK_OVERLAP = 200
@@ -35,11 +36,15 @@ embeddings = HuggingFaceEmbeddings(
35
  cache_folder=os.getenv("SENTENCE_TRANSFORMERS_HOME")
36
  )
37
 
 
 
38
  model_id = "mistralai/Mistral-7B-Instruct-v0.1"
 
39
 
40
  tokenizer = AutoTokenizer.from_pretrained(
41
  model_id,
42
- padding_side="left"
 
43
  )
44
 
45
  text_splitter = CharacterTextSplitter(
 
26
  import pandas as pd
27
 
28
 
29
+
30
  MODEL = "thenlper/gte-base"
31
  CHUNK_SIZE = 1000
32
  CHUNK_OVERLAP = 200
 
36
  cache_folder=os.getenv("SENTENCE_TRANSFORMERS_HOME")
37
  )
38
 
39
+
40
+
41
  model_id = "mistralai/Mistral-7B-Instruct-v0.1"
42
+ acces_token = os.getenv("HUGGINGFACE_SPLITFILES_API_KEY")
43
 
44
  tokenizer = AutoTokenizer.from_pretrained(
45
  model_id,
46
+ padding_side="left",
47
+ token = access_token
48
  )
49
 
50
  text_splitter = CharacterTextSplitter(