Spaces:
Sleeping
Sleeping
File size: 3,884 Bytes
b4e5268 b48a9c3 b4e5268 9774287 b4e5268 9774287 b4e5268 b48a9c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import os
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
BitsAndBytesConfig,
pipeline
)
from transformers import BitsAndBytesConfig
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain
import transformers
import transformers
model_name='mistralai/Mistral-7B-Instruct-v0.1'
from huggingface_hub import login
login(token = 'HF_TOKEN')
model_config = transformers.AutoConfig.from_pretrained(
model_name,
)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
#################################################################
# bitsandbytes parameters
#################################################################
# Activate 4-bit precision base model loading
use_4bit = True
# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"
# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"
# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False
#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
bnb_config = BitsAndBytesConfig(
load_in_4bit=use_4bit,
bnb_4bit_quant_type=bnb_4bit_quant_type,
bnb_4bit_compute_dtype=compute_dtype,
bnb_4bit_use_double_quant=use_nested_quant,
)
#############################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
)
# Connect query to FAISS index using a retriever
retriever = db.as_retriever(
search_type="mmr",
search_kwargs={'k': 1}
)
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
text_generation_pipeline = transformers.pipeline(
model=model,
tokenizer=tokenizer,
task="text-generation",
temperature=0.02,
repetition_penalty=1.1,
return_full_text=True,
max_new_tokens=512,
)
prompt_template = """
### [INST]
Instruction: You are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided without using prior knowledge.You answer in FRENCH
Analyse carefully the context and provide a direct answer based on the context.
Answer in french only
{context}
Vous devez répondre aux questions en français.
### QUESTION:
{question}
[/INST]
Answer in french only
Vous devez répondre aux questions en français.
"""
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
# Create prompt from prompt template
prompt = PromptTemplate(
input_variables=["question"],
template=prompt_template,
)
# Create llm chain
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)
from langchain.chains import RetrievalQA
retriever.search_kwargs = {'k':1}
qa = RetrievalQA.from_chain_type(
llm=mistral_llm,
chain_type="stuff",
retriever=retriever,
chain_type_kwargs={"prompt": prompt},
)
import gradio as gr
def qna_chatbot(message, history):
res = qa(message)
answer = res["result"]
return answer
chat_interface = gr.ChatInterface(qna_chatbot)
if __name__ == "__main__":
chat_interface.launch(debug=True) |