File size: 3,808 Bytes
dc54faf
 
 
1dbb9f0
c298b5c
 
7cc71c0
4c8a6f3
56abc69
 
 
 
dc54faf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aaf7aaf
 
 
 
dc54faf
 
4c8a6f3
2678939
56abc69
 
 
4c8a6f3
56abc69
9875a43
6e9cf31
 
 
 
 
 
 
 
 
 
4c8a6f3
a0ebadb
a3ad85c
 
 
 
 
 
 
 
 
 
 
 
 
9c9db31
 
 
 
 
a3ad85c
 
 
 
 
 
dc54faf
69c15f2
a925a65
49fdcd9
a3ad85c
6e9cf31
 
 
 
 
 
b26b237
 
6e9cf31
 
 
b26b237
 
 
 
6e9cf31
a925a65
2f706b1
dc54faf
 
56abc69
 
a925a65
 
56abc69
 
dc54faf
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import gradio as gr
from gpt4all import GPT4All
from huggingface_hub import hf_hub_download
import faiss
#from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
import numpy as np
from pypdf import PdfReader
from gradio_pdf import PDF
from pdf2image import convert_from_path
from transformers import pipeline
from pathlib import Path

title = "Mistral-7B-Instruct-GGUF Run On CPU-Basic Free Hardware"

description = """
🔎 [Mistral AI's Mistral 7B Instruct v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) [GGUF format model](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF) , 4-bit quantization balanced quality gguf version, running on CPU. English Only (Also support other languages but the quality's not good). Using [GitHub - llama.cpp](https://github.com/ggerganov/llama.cpp) [GitHub - gpt4all](https://github.com/nomic-ai/gpt4all). 
🔨 Running on CPU-Basic free hardware. Suggest duplicating this space to run without a queue. 
Mistral does not support system prompt symbol (such as ```<<SYS>>```) now, input your system prompt in the first message if you need. Learn more: [Guardrailing Mistral 7B](https://docs.mistral.ai/usage/guardrailing). 
"""

"""
[Model From TheBloke/Mistral-7B-Instruct-v0.1-GGUF](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF)
[Mistral-instruct-v0.1 System prompt](https://docs.mistral.ai/usage/guardrailing)
"""

model_path = "models"
model_name = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"

hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)

print("Start the model init process")
model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")


model.config["promptTemplate"] = "[INST] {0} [/INST]"
model.config["systemPrompt"] = "Tu es un assitant et tu dois répondre en français"
model._is_chat_session_activated = False

max_new_tokens = 2048

model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

chunk_size = 2048

# creating a pdf reader object

reader = PdfReader("./resource/NGAP 01042024.pdf")

text = []
for p in np.arange(0, len(reader.pages), 1):
    page = reader.pages[int(p)]
    
    # extracting text from page
    text.append(page.extract_text())
    
text = ' '.join(text)

chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]


def get_text_embedding(text):

    return embeddings.embed_query(text)

text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])

d = text_embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(text_embeddings)

print("Finish the model init process")

def qa(question: str) -> str:
    
    

    question_embeddings = np.array([get_text_embedding(question)])

    D, I = index.search(question_embeddings, k=2) # distance, index
    retrieved_chunk = [chunks[i] for i in I.tolist()[0]]

    prompt = f"""<s>[INST]
            Les informations contextuelles sont ci-dessous.
            ---------------------
            {retrieved_chunk}
            ---------------------
            [/INST]
            Compte tenu des informations contextuelles et non des connaissances préalables, répondez à la requête.  </s>
            [INST] Requête: {question} [/INST]
            Réponse: 
                """
    outputs = model.generate(prompt=prompt, temp=0.5, top_k = 40, top_p = 1, max_tokens = max_new_tokens)
    return "".join(outputs)


demo = gr.Interface(
    qa,
    [gr.Textbox(label="Question")#, PDF(label="Document")
    ],
    gr.Textbox()
)
if __name__ == "__main__":
    demo.queue(max_size=3).launch()