Th3BossC commited on
Commit
23d152f
β€’
1 Parent(s): dbd97f0

initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .venv
2
+ **/__pycache__/
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
8
+ RUN pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
9
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
10
+
11
+ RUN useradd -m -u 1000 user
12
+ USER user
13
+ ENV HOME=/home/user \
14
+ PATH=/home/user/.local/bin:$PATH
15
+
16
+ WORKDIR $HOME/app
17
+
18
+
19
+ COPY --chown=user . $HOME/app
20
+
21
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from chatBot import create_app
2
+ from threading import Thread
3
+ from flask import render_template
4
+ app = create_app()
5
+
6
+ @app.route('/home')
7
+ def index():
8
+ return render_template('layout.html')
9
+
10
+
11
+ def run():
12
+ app.run(host='0.0.0.0', port=7860, debug=False)
13
+
14
+ run()
chatBot/__init__.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask
2
+ from flask_cors import CORS
3
+ from chatBot.config import Config
4
+ from flask_login import LoginManager
5
+ from flask_bcrypt import Bcrypt
6
+ from flask_sqlalchemy import SQLAlchemy
7
+
8
+ db = SQLAlchemy()
9
+
10
+ bcrypt = Bcrypt()
11
+
12
+ login_manager = LoginManager()
13
+ login_manager.login_view = 'update.login'
14
+ login_manager.login_message_category = 'info'
15
+
16
+
17
+ def create_app(config_class=Config):
18
+ app = Flask(__name__)
19
+ app.config.from_object(Config)
20
+ CORS(app)
21
+
22
+ db.init_app(app)
23
+ bcrypt.init_app(app)
24
+ login_manager.init_app(app)
25
+ from chatBot.resources.routes import resources
26
+ app.register_blueprint(resources)
27
+ from chatBot.update.routes import update
28
+ app.register_blueprint(update)
29
+ return app
chatBot/common/__init__.py ADDED
File without changes
chatBot/common/chatgpt.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from chatBot.common.pdfToText import loadLatestPdf
3
+ os.environ["OPENAI_API_KEY"] = "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"
4
+ from langchain.document_loaders import PyPDFLoader
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ import pickle
7
+ import faiss
8
+ from langchain.vectorstores import FAISS
9
+ from langchain.embeddings import OpenAIEmbeddings
10
+ from langchain.chains import RetrievalQAWithSourcesChain
11
+ from langchain.chains.question_answering import load_qa_chain
12
+ from langchain import OpenAI
13
+
14
+ urls = [
15
+ 'http://en.espn.co.uk/f1/motorsport/story/3836.html', 'https://www.mercedes-amg-hpp.com/formula-1-engine-facts/#' , 'https://www.redbullracing.com/int-en/five-things-about-yas-marina' , 'https://www.redbull.com/gb-en/history-of-formula-1'
16
+ , 'https://www.formula1.com/en/information.abu-dhabi-yas-marina-circuit-yas-island.4YtOtpaWvaxWvDBTItP7s6.html']
17
+
18
+
19
+ data = loadLatestPdf()
20
+
21
+
22
+ text_splitter = CharacterTextSplitter(separator='\n',
23
+ chunk_size=1000,
24
+ chunk_overlap=200)
25
+
26
+
27
+ docs = text_splitter.split_documents(data)
28
+
29
+
30
+ embeddings = OpenAIEmbeddings()
31
+
32
+ vectorStore1_openAI = FAISS.from_documents(docs, embeddings)
33
+
34
+ with open("faiss_store_openai.pkl", "wb") as f:
35
+ pickle.dump(vectorStore1_openAI, f)
36
+
37
+ with open("faiss_store_openai.pkl", "rb") as f:
38
+ VectorStore = pickle.load(f)
39
+
40
+
41
+ llm=OpenAI(temperature=0.8, verbose = True)
42
+
43
+ gptModel = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=VectorStore.as_retriever())
chatBot/common/llama.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from auto_gptq import AutoGPTQForCausalLM
3
+ from langchain import HuggingFacePipeline, PromptTemplate
4
+ from langchain.chains import RetrievalQA
5
+ from langchain.document_loaders import PyPDFDirectoryLoader
6
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.vectorstores import Chroma
9
+ from pdf2image import convert_from_path
10
+ from transformers import AutoTokenizer, TextStreamer, pipeline
11
+ from chatBot.common.pdfToText import loadLatestPdf
12
+ from transformers import LlamaTokenizer
13
+ from langchain.document_loaders import PyPDFLoader
14
+
15
+ # DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
16
+ DEVICE = "cpu"
17
+ print(DEVICE)
18
+
19
+
20
+ data = loadLatestPdf()
21
+
22
+ embeddings = HuggingFaceInstructEmbeddings(
23
+ model_name="hkunlp/instructor-large", model_kwargs={"device": DEVICE}
24
+ )
25
+
26
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
27
+ texts = text_splitter.split_documents(data)
28
+
29
+ db = Chroma.from_documents(texts, embeddings, persist_directory="db")
30
+
31
+ model_name_or_path = "TheBloke/Llama-2-13B-chat-GPTQ"
32
+ model_basename = "model"
33
+
34
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
35
+
36
+ model = AutoGPTQForCausalLM.from_quantized(
37
+ model_name_or_path,
38
+ device_map="auto",
39
+ revision="gptq-4bit-128g-actorder_True",
40
+ model_basename=model_basename,
41
+ use_safetensors=True,
42
+ trust_remote_code=True,
43
+ inject_fused_attention=False,
44
+ device=DEVICE,
45
+ quantize_config=None,
46
+ )
47
+
48
+
49
+ DEFAULT_SYSTEM_PROMPT = """
50
+ You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
51
+
52
+ If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
53
+ """.strip()
54
+
55
+
56
+ def generate_prompt(prompt: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT) -> str:
57
+ return f"""
58
+ [INST] <<SYS>>
59
+ {system_prompt}
60
+ <</SYS>>
61
+
62
+ {prompt} [/INST]
63
+ """.strip()
64
+
65
+ streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
66
+
67
+ text_pipeline = pipeline(
68
+ "text-generation",
69
+ model=model,
70
+ tokenizer=tokenizer,
71
+ max_new_tokens=1024,
72
+ temperature=0,
73
+ top_p=0.95,
74
+ repetition_penalty=1.15,
75
+ streamer=streamer,
76
+ )
77
+
78
+ llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 0})
79
+
80
+ SYSTEM_PROMPT = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer."
81
+
82
+ template = generate_prompt(
83
+ """
84
+ {context}
85
+
86
+ Question: {question}
87
+ """,
88
+ system_prompt=SYSTEM_PROMPT,
89
+ )
90
+
91
+ prompt = PromptTemplate(template=template, input_variables=["context", "question"])
92
+
93
+ llamaModel = RetrievalQA.from_chain_type(
94
+ llm=llm,
95
+ chain_type="stuff",
96
+ retriever=db.as_retriever(search_kwargs={"k": 2}),
97
+ return_source_documents=True,
98
+ chain_type_kwargs={"prompt": prompt},
99
+ )
chatBot/common/pdfToText.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.document_loaders import PyPDFLoader
2
+ import os
3
+
4
+ pdfLocation = 'chatBot/static/pdfs/'
5
+
6
+ def loadLatestPdf():
7
+ pdfCount = len(os.listdir(pdfLocation))
8
+ loader = PyPDFLoader(f"{pdfLocation}/{pdfCount}.pdf")
9
+ pages = loader.load_and_split()
10
+ data=pages
11
+
12
+ return data
chatBot/common/utils.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from chatBot.common.chatgpt import gptModel
2
+ from chatBot.common.llama import llamaModel
3
+
4
+
5
+ def getAnswerLlama(question):
6
+ return llamaModel(question)
7
+
8
+ def getAnswerGpt(question):
9
+ return gptModel({'question' : question}, return_only_outputs = True)['answer']
chatBot/config.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ class Config:
2
+ SECRET_KEY = '5791628bb0b13ce0c676dfde280ba245'
3
+ PASSWORD = 'saaddiditallonhisown'
4
+ PDF_PATH = 'chatBot/static/pdfs/'
5
+ SQLALCHEMY_DATABASE_URI = 'sqlite:///site.db'
6
+
chatBot/models.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import current_app
2
+ from flask_sqlalchemy import SQLAlchemy
3
+ from chatBot import login_manager, db
4
+ from flask_login import UserMixin
5
+
6
+
7
+ @login_manager.user_loader
8
+ def load_user(user_id):
9
+ return User.query.get(int(user_id))
10
+
11
+
12
+ class User(db.Model, UserMixin):
13
+ id = db.Column(db.Integer, primary_key = True)
14
+ username = db.Column(db.String(20), unique = True, nullable = False)
15
+ password = db.Column(db.String(60), nullable = False)
16
+
17
+ def __repr__(self):
18
+ return f"User('{self.username}')"
19
+
20
+
chatBot/resources/__init__.py ADDED
File without changes
chatBot/resources/routes.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Blueprint, request
2
+ from flask_restful import Api, Resource
3
+ from chatBot.common.utils import getAnswerLlama, getAnswerGpt
4
+
5
+ resources = Blueprint('resources', __name__)
6
+ api = Api(resources)
7
+
8
+ class Gpt(Resource):
9
+ def get(self):
10
+ question = request.json['question']
11
+
12
+ return {'answer': getAnswerGpt(question)}
13
+ api.add_resource(Gpt, '/gpt')
14
+
15
+
16
+ class Llama(Resource):
17
+ def get(self):
18
+ question = request.json['question']
19
+ return {'answer': getAnswerLlama(question)}
20
+ api.add_resource(Llama, '/llama')
chatBot/static/logo.jpg ADDED
chatBot/static/main.css ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ * {
2
+ box-sizing: border-box;
3
+ color : white;
4
+ }
5
+ body {
6
+ padding : 100px;
7
+ margin : 100px;
8
+ background-color: #222;
9
+ padding: 0px;
10
+ margin: 0px;
11
+ font-family: Arial, Helvetica, sans-serif;
12
+ display : flex;
13
+ flex-direction: column;
14
+ justify-content: center;
15
+ align-items: center;
16
+ height : 100dvh;
17
+ gap : 1rem;
18
+ overflow-y : scroll;
19
+
20
+ }
21
+
22
+ .img {
23
+ border-radius: 50%;
24
+ }
25
+
26
+
27
+ .container {
28
+ width : 50%;
29
+ padding : 2rem;
30
+ border-radius: 0.8rem;
31
+ background-color: #333;
32
+ }
33
+
34
+ main > form > fieldset {
35
+ display : flex;
36
+ flex-direction: column;
37
+ gap : 1rem;
38
+ }
chatBot/static/pdfs/1.pdf ADDED
Binary file (754 kB). View file
 
chatBot/static/pdfs/2.pdf ADDED
Binary file (754 kB). View file
 
chatBot/templates/layout.html ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-T3c6CoIi6uLrA9TneNEoa7RxnatzjcDSCmG1MXxSR1GAsXEV/Dwwykc2MPK8M2HN" crossorigin="anonymous">
7
+ <script defersrc="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/js/bootstrap.bundle.min.js" integrity="sha384-C6RzsynM9kWDrMNeT87bh95OGNyZPhcTNXj1NW7RuBCsyN/o0jlpcV8Qyq46cDfL" crossorigin="anonymous"></script>
8
+ <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='main.css') }}">
9
+ <title>Document</title>
10
+ </head>
11
+ <body>
12
+ <div class="image">
13
+ <img src="{{ url_for('static', filename='logo.jpg') }}" alt="logo" class="img">
14
+ </div>
15
+ <div class="container">
16
+ {% with messages = get_flashed_messages(with_categories = true) %}
17
+ {% if messages %}
18
+ {% for category, message in messages %}
19
+ <div class="alert alert-{{ category }}">
20
+ {{ message }}
21
+ </div>
22
+ {% endfor %}
23
+ {% endif %}
24
+ {% endwith %}
25
+ {% block content %}
26
+ {% endblock %}
27
+ </div>
28
+ </body>
29
+ </html>
chatBot/templates/login.html ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends 'layout.html' %}
2
+ {% block content %}
3
+
4
+ <main class="login">
5
+ <form method="POST" action="">
6
+ {{form.hidden_tag()}}
7
+ <fieldset class="form-group">
8
+ <legend class="border-bottom mb-4">Login</legend>
9
+ <div class="form-group">
10
+ {{form.username.label(class="form-control-label")}}
11
+ {% if form.username.errors %}
12
+ {{form.username(class="form-control form-control-lg is-invalid")}}
13
+ <div class="invalid-feedback">
14
+ {% for error in form.username.errors %}
15
+ <span> {{ error }} </span>
16
+ {% endfor %}
17
+ </div>
18
+ {% else %}
19
+ {{form.username(class="form-control form-control-lg")}}
20
+ {% endif %}
21
+ </div>
22
+
23
+ <div class="form-group">
24
+ {{form.password.label(class="form-control-label")}}
25
+ {% if form.password.errors %}
26
+ {{form.password(class="form-control form-control-lg is-invalid")}}
27
+ <div class="invalid-feedback">
28
+ {% for error in form.password.errors %}
29
+ <span> {{ error }} </span>
30
+ {% endfor %}
31
+ </div>
32
+ {% else %}
33
+ {{form.password(class="form-control form-control-lg")}}
34
+ {% endif %}
35
+ </div>
36
+
37
+ <div class="form-group">
38
+ {{form.submit(class="btn btn-info")}}
39
+ </div>
40
+ </fieldset>
41
+ </form>
42
+ </main>
43
+
44
+ {% endblock %}
chatBot/templates/update.html ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends 'layout.html' %}
2
+ {% block content %}
3
+
4
+ <main class="update">
5
+ <form method="POST" action="" enctype="multipart/form-data">
6
+ {{form.hidden_tag()}}
7
+ <fieldset class="form-group">
8
+ <legend class="border-bottom mb-4"> Upload new dataset </legend>
9
+ <div class="form-group">
10
+ {{ form.file.label(class="form-control-label", for="formFile") }}
11
+ {{ form.file(class="form-control", id="formFile", type="file") }}
12
+ {% if form.file.errors %}
13
+ {% for error in form.file.errors %}
14
+ <span> {{ error }} </span>
15
+ {% endfor %}
16
+ {% endif %}
17
+ </div>
18
+
19
+ <div class="form-group">
20
+ {{form.submit(class="btn btn-info")}}
21
+ </div>
22
+ </fieldset>
23
+ </form>
24
+ </main>
25
+
26
+ {% endblock %}
chatBot/update/__init__.py ADDED
File without changes
chatBot/update/forms.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask_wtf import FlaskForm
2
+ from wtforms import StringField, SubmitField, PasswordField
3
+ from flask_wtf.file import FileAllowed, FileField
4
+ from wtforms.validators import DataRequired
5
+
6
+
7
+ class LoginForm(FlaskForm):
8
+ username = StringField('Username',validators=[DataRequired()])
9
+ password = PasswordField('Password',validators=[DataRequired()])
10
+ submit = SubmitField('Login')
11
+
12
+
13
+ class UpdateForm(FlaskForm):
14
+ file = FileField('Update File', validators=[DataRequired(), FileAllowed(['pdf'])])
15
+ submit = SubmitField('Update')
16
+
17
+
chatBot/update/routes.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Blueprint, render_template, url_for, flash, redirect, request, current_app
2
+ from chatBot.update.forms import LoginForm, UpdateForm
3
+ from flask_login import login_user, current_user, logout_user, login_required
4
+ import os
5
+ from chatBot import bcrypt, db
6
+ from chatBot.models import User
7
+ update = Blueprint('update', __name__)
8
+
9
+ @update.route("/register", methods=['GET', 'POST'])
10
+ def register():
11
+ if current_user.is_authenticated:
12
+ return redirect(url_for('update.upload'))
13
+ form = LoginForm()
14
+
15
+ if form.validate_on_submit():
16
+ hashed_password = bcrypt.generate_password_hash(form.password.data).decode('utf-8')
17
+ user = User(username = form.username.data, password = hashed_password)
18
+ db.session.add(user)
19
+ db.session.commit()
20
+ flash(message = "Account created", category = "success")
21
+ return redirect(url_for('update.login'))
22
+ return render_template('login.html', title='login', form=form)
23
+
24
+ @update.route("/login", methods=['GET', 'POST'])
25
+ def login():
26
+ if current_user.is_authenticated:
27
+ return redirect(url_for('update.upload'))
28
+ form = LoginForm()
29
+
30
+ if form.validate_on_submit():
31
+ user = User.query.filter_by(username=form.username.data).first()
32
+ if user and form.username.data == 'TechTeam' and bcrypt.check_password_hash(pw_hash = user.password, password = form.password.data):
33
+ login_user(user)
34
+ next_page = request.args.get('next')
35
+ flash(message = "Login successful", category = "success")
36
+ return redirect(next_page) if next_page else redirect(url_for('update.upload'))
37
+ flash(message = "Login unsuccessful", category = "danger")
38
+ return render_template('login.html', title='Login', form=form)
39
+
40
+ @update.route("/upload", methods=['GET', 'POST'])
41
+ @login_required
42
+ def upload():
43
+ form = UpdateForm()
44
+ if form.validate_on_submit():
45
+ if form.file.data:
46
+ file = form.file.data
47
+ fileCount = len(os.listdir(current_app.config['PDF_PATH']))
48
+ file.save(current_app.config['PDF_PATH'] + str(fileCount + 1) + '.pdf')
49
+ flash('Your file has been updated!', 'success')
50
+ return render_template('update.html', title='Update', form=form)
requirements.txt ADDED
Binary file (4.61 kB). View file