thinh111 commited on
Commit
615a1d7
β€’
1 Parent(s): 7cba8ca
Files changed (7) hide show
  1. README.md +2 -13
  2. app.py +42 -63
  3. data/rag_questionAndAnwser.txt +0 -0
  4. model.py +118 -0
  5. process-documents.py +64 -0
  6. requirements.txt +12 -1
  7. setup.py +18 -0
README.md CHANGED
@@ -1,13 +1,2 @@
1
- ---
2
- title: ResvuChatbox
3
- emoji: πŸ’¬
4
- colorFrom: yellow
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: 4.36.1
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- ---
12
-
13
- An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
 
1
+ This is a Gradio-based chatbot interface for interacting with the Resvu API. Users can input messages and receive responses from the AI.
2
+
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,63 +1,42 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
- )
60
-
61
-
62
- if __name__ == "__main__":
63
- demo.launch()
 
1
+ import subprocess
2
+ import sys
3
+ import gradio as gr
4
+ from utils import initialize_database
5
+ from model import llm_chain_response, get_response_value
6
+ from process_documents import create_db_from_files
7
+
8
+ def run_setup():
9
+ subprocess.check_call([sys.executable, "setup.py"])
10
+
11
+ # Run the setup script
12
+ run_setup()
13
+ llm_chain = llm_chain_response()
14
+
15
+ def chat_with_mistral(user_input):
16
+ if not user_input:
17
+ return "The message is not be empty."
18
+ response = llm_chain.invoke({"query": user_input})
19
+ print(response)
20
+
21
+ print("---------------Response--------------")
22
+ print(get_response_value(response["result"]))
23
+ return get_response_value(response["result"])
24
+
25
+ def main():
26
+ # Initialize the database
27
+ create_db_from_files()
28
+
29
+ # Set up and launch the Gradio interface
30
+ iface = gr.Interface(
31
+ fn=chat_with_mistral,
32
+ inputs=gr.components.Textbox(label="Enter Your Message"),
33
+ outputs=gr.components.Markdown(label="ChatbotResponse"),
34
+ title="Resvu AI Chatbot",
35
+ description="Interact with the Resvu API via this chatbot. Enter a message and get a response.",
36
+ examples=["Hi, how are you", "Who are you?", "What services do you offer?", "How can I find out about upcoming community events?"],
37
+ allow_flagging="never"
38
+ )
39
+ iface.launch()
40
+
41
+ if __name__ == "__main__":
42
+ main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/rag_questionAndAnwser.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from unsloth import FastLanguageModel
2
+ import torch
3
+ max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
4
+ dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
5
+ load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
6
+
7
+ from langchain_community.llms import CTransformers
8
+ from langchain.chains import RetrievalQA
9
+ from langchain.prompts import PromptTemplate
10
+ from langchain_community.embeddings import GPT4AllEmbeddings
11
+ from langchain_community.vectorstores import FAISS
12
+ from langchain_community.llms import HuggingFacePipeline
13
+ from langchain.callbacks.base import BaseCallbackHandler
14
+ from transformers import pipeline
15
+
16
+ # 4bit pre quantized models we support for 4x faster downloading + no OOMs.
17
+ fourbit_models = [
18
+ "unsloth/mistral-7b-bnb-4bit",
19
+ "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
20
+ "unsloth/llama-2-7b-bnb-4bit",
21
+ "unsloth/llama-2-13b-bnb-4bit",
22
+ "unsloth/codellama-34b-bnb-4bit",
23
+ "unsloth/tinyllama-bnb-4bit",
24
+ "unsloth/gemma-7b-bnb-4bit", # New Google 6 trillion tokens model 2.5x faster!
25
+ "unsloth/gemma-2b-bnb-4bit",
26
+ ] # More models at https://huggingface.co/unsloth
27
+
28
+ template = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
29
+
30
+ ### Instruction:
31
+ You are ResVuAssist and You are a helpful bot who reads texts and answers questions about them.
32
+
33
+ ### Input:
34
+ {context}
35
+ QUESTION: {question}
36
+
37
+ ### Response:
38
+ """
39
+
40
+ # Cau hinh
41
+ vector_db_path = "vectorstores/db_faiss"
42
+
43
+
44
+ def initialModelAndTokenizer():
45
+ model, tokenizer = FastLanguageModel.from_pretrained(
46
+ model_name = "unsloth/mistral-7b-instruct-v0.3-bnb-4bit", # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B
47
+ max_seq_length = max_seq_length,
48
+ dtype = dtype,
49
+ load_in_4bit = load_in_4bit,
50
+ # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
51
+ )
52
+ model = FastLanguageModel.get_peft_model(
53
+ model,
54
+ r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
55
+ target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
56
+ "gate_proj", "up_proj", "down_proj",],
57
+ lora_alpha = 16,
58
+ lora_dropout = 0, # Supports any, but = 0 is optimized
59
+ bias = "none", # Supports any, but = "none" is optimized
60
+ # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
61
+ use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
62
+ random_state = 3407,
63
+ use_rslora = False, # We support rank stabilized LoRA
64
+ loftq_config = None, # And LoftQ
65
+ )
66
+ return model, tokenizer
67
+
68
+ def create_pipeline():
69
+ model, tokenizer = initialModelAndTokenizer()
70
+ pipe = pipeline(
71
+ "text-generation",
72
+ model=model,
73
+ tokenizer=tokenizer,
74
+ max_new_tokens=512,
75
+ temperature=0.1,
76
+ top_p=0.95,
77
+ repetition_penalty=1.15
78
+ )
79
+ return pipe
80
+
81
+ # Tao prompt template
82
+ def creat_prompt(template):
83
+ prompt = PromptTemplate(template = template, input_variables=["context", "question"])
84
+ return prompt
85
+
86
+ # Tao simple chain
87
+ def create_qa_chain(prompt, llm, db):
88
+ llm_chain = RetrievalQA.from_chain_type(
89
+ llm = llm,
90
+ chain_type= "stuff",
91
+ # retriever = db.as_retriever(search_kwargs = {"k":8}, max_tokens_limit=1024),
92
+ retriever = db.as_retriever(search_kwargs = {"k": 15}, max_tokens_limit=4096),
93
+ return_source_documents = False,
94
+ chain_type_kwargs= {'prompt': prompt},
95
+ )
96
+ return llm_chain
97
+
98
+ # Read tu VectorDB
99
+ def read_vectors_db():
100
+ # Embeding
101
+ embedding_model = GPT4AllEmbeddings(model_name="all-MiniLM-L6-v2.gguf2.f16.gguf")
102
+ db = FAISS.load_local(vector_db_path, embedding_model, allow_dangerous_deserialization=True)
103
+ return db
104
+
105
+ def get_response_value(text):
106
+ start = text.find('### Response:')
107
+ if start != -1:
108
+ return text[start + len('### Response:'):].strip()
109
+ return None
110
+
111
+ def llm_chain_response():
112
+ pipe = create_pipeline()
113
+ db = read_vectors_db()
114
+ prompt = creat_prompt(template)
115
+ llm = HuggingFacePipeline(pipeline=pipe)
116
+
117
+ llm_chain =create_qa_chain(prompt, llm, db)
118
+ return llm_chain
process-documents.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
2
+ from langchain_community.document_loaders import UnstructuredFileLoader, DirectoryLoader
3
+ from langchain_community.vectorstores import FAISS
4
+ from langchain_community.embeddings import GPT4AllEmbeddings
5
+
6
+ # Khai bao bien
7
+ pdf_data_path = "/data"
8
+ vector_db_path = "vectorstores/db_faiss"
9
+
10
+ # Ham 1. Tao ra vector DB tu 1 doan text
11
+ def create_db_from_text():
12
+ raw_text = """
13
+ Thinh created you who is a chatbox at Resvu,
14
+ """
15
+
16
+ # Chia nho van ban
17
+ text_splitter = CharacterTextSplitter(
18
+ separator="\n",
19
+ chunk_size=100,
20
+ chunk_overlap=20,
21
+ length_function=len
22
+ )
23
+
24
+ chunks = text_splitter.split_text(raw_text)
25
+
26
+ # Embeding
27
+ embedding_model = GPT4AllEmbeddings(model_name="all-MiniLM-L6-v2.gguf2.f16.gguf")
28
+
29
+ # Dua vao Faiss Vector DB
30
+ db = FAISS.from_texts(texts=chunks, embedding=embedding_model)
31
+ db.save_local(vector_db_path)
32
+ return db
33
+
34
+ # Define the file types you want to load
35
+ file_types = ["*.pdf", "*.txt", "*.doc", "*.docx"]
36
+
37
+ def create_db_from_files():
38
+ # Khai bao loader de quet toan bo thu muc dataa
39
+ # loader = DirectoryLoader(pdf_data_path, glob=file_types, loader_cls = PyPDFLoader)
40
+ # documents = loader.load()
41
+
42
+ # Create a loader for each file type
43
+ loaders = []
44
+ for file_type in file_types:
45
+ loader = DirectoryLoader(
46
+ pdf_data_path,
47
+ glob=file_type,
48
+ loader_cls=UnstructuredFileLoader
49
+ )
50
+ loaders.append(loader)
51
+
52
+ # Load all documents
53
+ documents = []
54
+ for loader in loaders:
55
+ documents.extend(loader.load())
56
+
57
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
58
+ chunks = text_splitter.split_documents(documents)
59
+
60
+ # Embeding
61
+ embedding_model = GPT4AllEmbeddings(model_name="all-MiniLM-L6-v2.gguf2.f16.gguf")
62
+ db = FAISS.from_documents(chunks, embedding_model)
63
+ db.save_local(vector_db_path)
64
+ return db
requirements.txt CHANGED
@@ -1 +1,12 @@
1
- huggingface_hub==0.22.2
 
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ langchain
3
+ langchain-community
4
+ torch
5
+ pypdf
6
+ sentence-transformers
7
+ gpt4all
8
+ faiss-cpu
9
+ openai
10
+ gradio
11
+ unstructured
12
+ python-docx
setup.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import sys
3
+
4
+ def install(package):
5
+ subprocess.check_call([sys.executable, "-m", "pip", "install", package])
6
+
7
+ def setup_environment():
8
+ print("Installing required packages...")
9
+ install('git+https://github.com/unslothai/unsloth.git#egg=unsloth[colab-new]')
10
+ install('--no-deps xformers')
11
+ install('trl<0.9.0')
12
+ install('peft')
13
+ install('accelerate')
14
+ install('bitsandbytes')
15
+ print("Packages installed successfully.")
16
+
17
+ if __name__ == "__main__":
18
+ setup_environment()