File size: 3,804 Bytes
b4e5268
 
 
 
 
 
8f64959
55a8b20
dd507bb
1282351
 
35b4203
b4ebf0a
b4e5268
 
 
 
 
ec7dcc8
b4e5268
9774287
cff9147
cb5c2b5
cff9147
397c421
a2d8eba
 
397c421
0a98570
9b1d956
 
 
 
 
6a7d03a
 
 
 
37b2fc4
6a7d03a
 
 
 
 
 
 
 
 
 
9d68da3
b956157
b4e5268
 
 
b956157
 
b4e5268
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b48a9c3
ab55f29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os
import torch
from transformers import (
  BitsAndBytesConfig,
  pipeline
)
import streamlit as st
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFacePipeline
from transformers import BitsAndBytesConfig
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFaceEndpoint

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.chains import LLMChain
import transformers
from ctransformers import AutoModelForCausalLM, AutoTokenizer

import transformers
from transformers import pipeline
from datasets import load_dataset

import transformers
repo_id = "mistralai/Mistral-7B-Instruct-v0.3"

from huggingface_hub import login
login(token=st.secrets["HF_TOKEN"])
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from google.colab import drive
from langchain.document_loaders import PyPDFLoader, OnlinePDFLoader

# Montez Google Drive
loader = PyPDFLoader("test-1.pdf")
data = loader.load()
# split the documents into chunks
text_splitter1 = CharacterTextSplitter(chunk_size=512, chunk_overlap=0,separator="\n\n")
texts = text_splitter1.split_documents(data)
db = FAISS.from_documents(texts,
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L12-v2'))



retriever = db.as_retriever(
    search_type="mmr",
    search_kwargs={'k': 1}
)


from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

text_generation_pipeline = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",

    temperature=0.02,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=512,
)

prompt_template = """
### [INST]
Instruction: You are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided without using prior knowledge.You answer in FRENCH
        Analyse carefully the context and provide a direct answer based on the context.
Answer in french only
{context}
Vous devez répondre aux questions en français.

### QUESTION:
{question}
[/INST]
Answer in french only
 Vous devez répondre aux questions en français.

 """

mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

# Create prompt from prompt template
prompt = PromptTemplate(
    input_variables=["question"],
    template=prompt_template,
)

# Create llm chain
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)
from langchain.chains import RetrievalQA


retriever.search_kwargs = {'k':1}
qa = RetrievalQA.from_chain_type(
    llm=mistral_llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt},
)

import streamlit as st

# Streamlit interface
st.title("Chatbot Interface")

# Define function to handle user input and display chatbot response
def chatbot_response(user_input):
    response = qa.get_answer(user_input)
    return response

# Streamlit components
user_input = st.text_input("You:", "")
submit_button = st.button("Send")

# Handle user input
if submit_button:
    if user_input.strip() != "":
        bot_response = chatbot_response(user_input)
        st.text_area("Bot:", value=bot_response, height=200)
    else:
        st.warning("Please enter a message.")