Spaces:
Sleeping
Sleeping
File size: 3,809 Bytes
dc54faf 1dbb9f0 c298b5c 7cc71c0 4c8a6f3 56abc69 dc54faf aaf7aaf dc54faf 4c8a6f3 2678939 56abc69 4c8a6f3 56abc69 9875a43 6e9cf31 4c8a6f3 a0ebadb a3ad85c 9c9db31 a3ad85c dc54faf 69c15f2 a925a65 49fdcd9 a3ad85c 6e9cf31 b26b237 6e9cf31 b26b237 6e9cf31 a925a65 2f706b1 dc54faf 56abc69 a925a65 56abc69 dc54faf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import gradio as gr
from gpt4all import GPT4All
from huggingface_hub import hf_hub_download
import faiss
#from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
import numpy as np
from pypdf import PdfReader
from gradio_pdf import PDF
from pdf2image import convert_from_path
from transformers import pipeline
from pathlib import Path
title = "Mistral-7B-Instruct-GGUF Run On CPU-Basic Free Hardware"
description = """
🔎 [Mistral AI's Mistral 7B Instruct v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) [GGUF format model](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF) , 4-bit quantization balanced quality gguf version, running on CPU. English Only (Also support other languages but the quality's not good). Using [GitHub - llama.cpp](https://github.com/ggerganov/llama.cpp) [GitHub - gpt4all](https://github.com/nomic-ai/gpt4all).
🔨 Running on CPU-Basic free hardware. Suggest duplicating this space to run without a queue.
Mistral does not support system prompt symbol (such as ```<<SYS>>```) now, input your system prompt in the first message if you need. Learn more: [Guardrailing Mistral 7B](https://docs.mistral.ai/usage/guardrailing).
"""
"""
[Model From TheBloke/Mistral-7B-Instruct-v0.1-GGUF](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF)
[Mistral-instruct-v0.1 System prompt](https://docs.mistral.ai/usage/guardrailing)
"""
model_path = "models"
model_name = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
print("Start the model init process")
model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
model.config["promptTemplate"] = "[INST] {0} [/INST]"
model.config["systemPrompt"] = "Tu es un assitant et tu dois répondre en français"
model._is_chat_session_activated = False
max_new_tokens = 2048
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
chunk_size = 2048
# creating a pdf reader object
reader = PdfReader("./resource/NGAP 01042024.pdf")
text = []
for p in np.arange(0, len(reader.pages), 1):
page = reader.pages[int(p)]
# extracting text from page
text.append(page.extract_text())
text = ' '.join(text)
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
def get_text_embedding(text):
return embeddings.embed_query(text)
text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])
d = text_embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(text_embeddings)
print("Finish the model init process")
def qa(question: str) -> str:
question_embeddings = np.array([get_text_embedding(question)])
D, I = index.search(question_embeddings, k=2) # distance, index
retrieved_chunk = [chunks[i] for i in I.tolist()[0]]
prompt = f"""<s>[INST]
Les informations contextuelles sont ci-dessous.
---------------------
{retrieved_chunk}
---------------------
[/INST]
Compte tenu des informations contextuelles et non des connaissances préalables, répondez à la requête. </s>
[INST] Requête: {question} [/INST]
Réponse:
"""
outputs = model.generate(prompt=prompt, temp=0.5, top_k = 40, top_p = 1, max_tokens = max_new_tokens)
return "".join(outputs)
demo = gr.Interface(
qa,
[gr.Textbox(label="Question")#, PDF(label="Document")
],
gr.Textbox()
)
if __name__ == "__main__":
demo.queue(max_size=3).launch() |