JBHF commited on
Commit
97ed0a5
1 Parent(s): ac715c8

Update rag.py

Browse files
Files changed (1) hide show
  1. rag.py +28 -1
rag.py CHANGED
@@ -1,5 +1,18 @@
1
  # rag.py
2
  # https://github.com/vndee/local-rag-example/blob/main/rag.py
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  from langchain.vectorstores import Chroma
5
  from langchain.chat_models import ChatOllama
@@ -11,6 +24,9 @@ from langchain.schema.runnable import RunnablePassthrough
11
  from langchain.prompts import PromptTemplate
12
  from langchain.vectorstores.utils import filter_complex_metadata
13
 
 
 
 
14
 
15
  class ChatPDF:
16
  vector_store = None
@@ -18,7 +34,18 @@ class ChatPDF:
18
  chain = None
19
 
20
  def __init__(self):
21
- self.model = ChatOllama(model="mistral")
 
 
 
 
 
 
 
 
 
 
 
22
  self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=100)
23
  self.prompt = PromptTemplate.from_template(
24
  """
 
1
  # rag.py
2
  # https://github.com/vndee/local-rag-example/blob/main/rag.py
3
+ # ADAPTED TO USE HF LLM INSTEAD OF OLLAMA self.model = ChatOllama(model="mistral") BY J. BOURS 01-03-2024
4
+ # EVERNOTE:
5
+ # https://www.evernote.com/shard/s313/nl/41973486/282c6fc8-9ed5-a977-9895-1eb23941bb4c?title=REQUIREMENTS%20FOR%20A%20LITERATURE%20BASED%20RESEARCH%20LBR%20SYSTEM%20-%20FUNCTIONAL%20AND%20TECHNICAL%20REQUIREMENTS%20-%20ALEXANDER%20UNZICKER%20-%2026-02-2024
6
+ #
7
+ # mistralai/Mistral-7B-v0.1 · Hugging Face
8
+ # https://huggingface.co/mistralai/Mistral-7B-v0.1?library=true
9
+ #
10
+ # Load model directly
11
+ # from transformers import AutoTokenizer, AutoModelForCausalLM
12
+ #
13
+ # tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
14
+ # model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
15
+
16
 
17
  from langchain.vectorstores import Chroma
18
  from langchain.chat_models import ChatOllama
 
24
  from langchain.prompts import PromptTemplate
25
  from langchain.vectorstores.utils import filter_complex_metadata
26
 
27
+ from transformers import AutoTokenizer, AutoModelForCausalLM
28
+
29
+
30
 
31
  class ChatPDF:
32
  vector_store = None
 
34
  chain = None
35
 
36
  def __init__(self):
37
+ # self.model = ChatOllama(model="mistral") # ORIGINAL
38
+ # mistralai/Mistral-7B-v0.1 · Hugging Face
39
+ # https://huggingface.co/mistralai/Mistral-7B-v0.1?library=true
40
+ #
41
+ # Load model directly
42
+ # from transformers import AutoTokenizer, AutoModelForCausalLM
43
+ #
44
+ # tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
45
+ # model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
46
+ tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
47
+ self.model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
48
+
49
  self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=100)
50
  self.prompt = PromptTemplate.from_template(
51
  """