|
import os |
|
import pickle |
|
import langchain |
|
|
|
import faiss |
|
from langchain import HuggingFaceHub |
|
from langchain.chains import ConversationalRetrievalChain |
|
from langchain.chat_models import ChatOpenAI |
|
from langchain.document_loaders import DirectoryLoader, TextLoader, UnstructuredHTMLLoader |
|
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings |
|
from langchain.memory import ConversationBufferWindowMemory |
|
from langchain.prompts.chat import ( |
|
ChatPromptTemplate, |
|
HumanMessagePromptTemplate, |
|
SystemMessagePromptTemplate, |
|
) |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.vectorstores.faiss import FAISS |
|
from langchain.cache import InMemoryCache |
|
|
|
langchain.llm_cache = InMemoryCache() |
|
|
|
global model_name |
|
|
|
models = ["GPT-3.5", "Flan UL2", "GPT-4", "Flan T5"] |
|
|
|
pickle_file = "_vs.pkl" |
|
index_file = "_vs.index" |
|
models_folder = "models/" |
|
|
|
llm = ChatOpenAI(model_name="gpt-4", temperature=0.1) |
|
|
|
embeddings = OpenAIEmbeddings(model='text-embedding-ada-002') |
|
|
|
chat_history = [] |
|
|
|
memory = ConversationBufferWindowMemory(memory_key="chat_history", k=10) |
|
|
|
vectorstore_index = None |
|
|
|
file_url_mapping = { |
|
'docs/3d-printing-applications/05_module-4-from-ideas-to-objects/03_design-thinking-in-action/02_multiply.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/1qm8i/multiply', |
|
'docs/3d-printing-applications/05_module-4-from-ideas-to-objects/03_design-thinking-in-action/01_utensil-grip-personalization.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/BYBmh/utensil-grip-personalization', |
|
'docs/3d-printing-applications/05_module-4-from-ideas-to-objects/03_design-thinking-in-action/03_city-x-project.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/0HDFr/city-x-project', |
|
'docs/3d-printing-applications/05_module-4-from-ideas-to-objects/01_module-4-information/02_an-intro-to-design-thinking.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/IdLQM/an-intro-to-design-thinking', |
|
'docs/3d-printing-applications/05_module-4-from-ideas-to-objects/02_an-introduction-to-design-thinking/01_what-is-design-thinking.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/npYkr/what-is-design-thinking', |
|
'docs/3d-printing-applications/05_module-4-from-ideas-to-objects/02_an-introduction-to-design-thinking/03_3d-printing-and-design-thinking.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/QJAGJ/3d-printing-and-design-thinking', |
|
'docs/3d-printing-applications/05_module-4-from-ideas-to-objects/02_an-introduction-to-design-thinking/02_understanding-user-needs.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/Jj03y/understanding-user-needs', |
|
'docs/3d-printing-applications/01_course-orientation/01_about-the-courses/01_welcome-to-3d-printing-applications.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/xYJax/welcome-to-3d-printing-applications', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/02_whats-different-about-3d-printing/01_complexity-is-free.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/lA0z9/complexity-is-free', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/02_whats-different-about-3d-printing/02_3d-printing-a-paradigm-shift.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/I3NUA/3d-printing-a-paradigm-shift', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/06_optional-content-webinars-with-experts/01_webinar-3d-printing-with-soft-materials-bonus.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/PbM8Z/webinar-3d-printing-with-soft-materials-bonus', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/03_3d-printing-industry-trends/02_3d-printing-use-cases.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/5dSO8/3d-printing-use-cases', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/03_3d-printing-industry-trends/04_a-view-from-the-trenches.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/FAINi/a-view-from-the-trenches', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/03_3d-printing-industry-trends/03_a-venture-capitalists-view-of-the-industry.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/kcSMh/a-venture-capitalists-view-of-the-industry', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/03_3d-printing-industry-trends/01_a-look-into-the-past-and-the-future.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/IWgMe/a-look-into-the-past-and-the-future', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/04_3d-printing-on-the-edge/01_cutting-edge-applications.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/Cy1Ef/cutting-edge-applications', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/04_3d-printing-on-the-edge/02_bioprinting.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/V6WNO/bioprinting', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/01_module-1-information/02_a-new-way-of-making.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/jPkoW/a-new-way-of-making', |
|
'docs/3d-printing-applications/06_course-wrap-up/01_course-wrap-up-whats-next/01_whats-next.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/ffuzj/whats-next', |
|
'docs/3d-printing-applications/06_course-wrap-up/01_course-wrap-up-whats-next/03_gies-online-programs.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/gPv5h/gies-online-programs', |
|
'docs/3d-printing-applications/04_module-3-3d-printing-for-development-and-education/03_3d-printing-in-education/01_3d-printing-in-education.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/rrgbB/3d-printing-in-education', |
|
'docs/3d-printing-applications/04_module-3-3d-printing-for-development-and-education/03_3d-printing-in-education/02_girls-in-stem-makergirls.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/8JKL4/girls-in-stem-makergirls', |
|
'docs/3d-printing-applications/04_module-3-3d-printing-for-development-and-education/01_module-3-information/02_3d-printing-in-development-and-education.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/oncn3/3d-printing-in-development-and-education', |
|
'docs/3d-printing-applications/04_module-3-3d-printing-for-development-and-education/02_3d-printing-for-development-3d4d/06_3d4d-with-techfortrade.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/aRmfA/3d4d-with-techfortrade', |
|
'docs/3d-printing-applications/04_module-3-3d-printing-for-development-and-education/02_3d-printing-for-development-3d4d/03_from-trash-to-objects.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/C5Yaq/from-trash-to-objects', |
|
'docs/3d-printing-applications/04_module-3-3d-printing-for-development-and-education/02_3d-printing-for-development-3d4d/04_enabling-the-future.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/PaC0r/enabling-the-future', |
|
'docs/3d-printing-applications/04_module-3-3d-printing-for-development-and-education/02_3d-printing-for-development-3d4d/05_a-step-up-with-bionic-hands.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/94W3o/a-step-up-with-bionic-hands', |
|
'docs/3d-printing-applications/04_module-3-3d-printing-for-development-and-education/02_3d-printing-for-development-3d4d/02_illinois-marketplace-and-maker-literacy-program.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/lE9Zj/illinois-marketplace-and-maker-literacy-program', |
|
'docs/3d-printing-applications/04_module-3-3d-printing-for-development-and-education/02_3d-printing-for-development-3d4d/01_3d-printing-and-subsistence-marketplaces.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/MJPvM/3d-printing-and-subsistence-marketplaces', |
|
'docs/3d-printing-applications/03_module-2-3d-printing-on-demand-manufacturing/03_3d-printing-and-intellectual-property/01_3d-printing-and-the-future-or-demise-of-intellectual-property.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/6QRBO/3d-printing-and-the-future-or-demise-of-intellectual-property', |
|
'docs/3d-printing-applications/03_module-2-3d-printing-on-demand-manufacturing/01_module-2-information/02_on-demand-manufacturing.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/mwrCN/on-demand-manufacturing', |
|
'docs/3d-printing-applications/03_module-2-3d-printing-on-demand-manufacturing/02_everyone-can-be-a-maker/01_a-market-of-one-3d-hubs.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/SKKXa/a-market-of-one-3d-hubs', |
|
'docs/3d-printing-applications/03_module-2-3d-printing-on-demand-manufacturing/02_everyone-can-be-a-maker/06_careers-in-3d-printing.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/NBK4o/careers-in-3d-printing', |
|
'docs/3d-printing-applications/03_module-2-3d-printing-on-demand-manufacturing/02_everyone-can-be-a-maker/04_learning-by-making-and-making-for-fun-at-the-fab-lab.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/PuC0A/learning-by-making-and-making-for-fun-at-the-fab-lab', |
|
'docs/3d-printing-applications/03_module-2-3d-printing-on-demand-manufacturing/02_everyone-can-be-a-maker/05_for-businesses-and-entrepreneurs.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/I3WQW/for-businesses-and-entrepreneurs', |
|
'docs/3d-printing-applications/03_module-2-3d-printing-on-demand-manufacturing/02_everyone-can-be-a-maker/03_on-demand-and-local-techshop.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/EMRez/on-demand-and-local-techshop', |
|
'docs/3d-printing-applications/03_module-2-3d-printing-on-demand-manufacturing/02_everyone-can-be-a-maker/02_a-market-of-a-few.en.txt':'https://www.coursera.org/learn/3d-printing-applications/lecture/4dB6p/a-market-of-a-few', |
|
'docs/3d-printing-applications/05_module-4-from-ideas-to-objects/01_module-4-information/03_module-4-readings_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/hnOry/module-4-readings', |
|
'docs/3d-printing-applications/05_module-4-from-ideas-to-objects/01_module-4-information/01_module-4-overview_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/zq4e8/module-4-overview', |
|
'docs/3d-printing-applications/05_module-4-from-ideas-to-objects/04_module-4-assignments/01_module-4-graded-quiz_exam.html':'https://www.coursera.org/learn/3d-printing-applications/exam/nRxIo/module-4-graded-quiz', |
|
'docs/3d-printing-applications/05_module-4-from-ideas-to-objects/04_module-4-assignments/02_module-4-peer-review-assignment_peer_assignment_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/peer/EWHOn/module-4-peer-review-assignment', |
|
'docs/3d-printing-applications/07_Resources/04_3d-printing-news-sources/01__resources.html':'https://www.coursera.org/learn/3d-printing-applications/resources/LcUHI', |
|
'docs/3d-printing-applications/07_Resources/03_3d-printing-applications/01__resources.html':'https://www.coursera.org/learn/3d-printing-applications/resources/rLuyV', |
|
'docs/3d-printing-applications/07_Resources/03_3d-printing-applications/01__scientists-create-new-bio-ink-for-3d-printing-with-stem-cells-322296.html':'http://tech.firstpost.com/news-analysis/scientists-create-new-bio-ink-for-3d-printing-with-stem-cells-322296.html', |
|
'docs/3d-printing-applications/07_Resources/05_recommended-books/01__resources.html':'https://www.coursera.org/learn/3d-printing-applications/resources/8hxKz', |
|
'docs/3d-printing-applications/07_Resources/06_explore-the-imba/01__resources.html':'https://www.coursera.org/learn/3d-printing-applications/resources/49U0k', |
|
'docs/3d-printing-applications/07_Resources/01_about-our-team/01__resources.html':'https://www.coursera.org/learn/3d-printing-applications/resources/62fHX', |
|
'docs/3d-printing-applications/07_Resources/02_glossary/01__resources.html':'https://www.coursera.org/learn/3d-printing-applications/resources/dG0g4', |
|
'docs/3d-printing-applications/01_course-orientation/01_about-the-courses/04_glossary_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/QQpUl/glossary', |
|
'docs/3d-printing-applications/01_course-orientation/01_about-the-courses/03_about-the-discussion-forums_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/MRtAV/about-the-discussion-forums', |
|
'docs/3d-printing-applications/01_course-orientation/01_about-the-courses/02_syllabus_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/9DenU/syllabus', |
|
'docs/3d-printing-applications/01_course-orientation/02_about-your-classmates/01_updating-your-profile_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/DfAqY/updating-your-profile', |
|
'docs/3d-printing-applications/01_course-orientation/02_about-your-classmates/02_social-media_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/Qf1Am/social-media', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/06_optional-content-webinars-with-experts/04_webinar-3d-printing-comes-of-age_3d-printing-comes-of-age.html':'http://www.3dprintingprofs.com/2016/07/3d-printing-comes-of-age-webinar/', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/06_optional-content-webinars-with-experts/05_bonus-webinar-mymini-factory-ceo-interview_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/2XNdg/bonus-webinar-mymini-factory-ceo-interview', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/06_optional-content-webinars-with-experts/03_bonus-webinar-whats-all-the-hype-around-3d-printing_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/jKR1S/bonus-webinar-whats-all-the-hype-around-3d-printing', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/06_optional-content-webinars-with-experts/02_bonus-webinar-audio-whats-the-hype-around-3dprinting_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/Wh3Lh/bonus-webinar-audio-whats-the-hype-around-3dprinting', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/06_optional-content-webinars-with-experts/06_bonus-webinar-the-maker-movement-in-education_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/hHU9F/bonus-webinar-the-maker-movement-in-education', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/06_optional-content-webinars-with-experts/04_webinar-3d-printing-comes-of-age_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/zNdWR/webinar-3d-printing-comes-of-age', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/05_module-1-assignments/02_module-1-peer-review-assignment_peer_assignment_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/peer/wW35G/module-1-peer-review-assignment', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/05_module-1-assignments/01_module-1-graded-quiz_exam.html':'https://www.coursera.org/learn/3d-printing-applications/exam/Mf63o/module-1-graded-quiz', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/01_module-1-information/01_module-1-overview_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/wEptY/module-1-overview', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/01_module-1-information/03_module-1-readings_3d-printing-complexity-is-free-may-be-costly-for-some.html':'https://www2.deloitte.com/us/en/insights/focus/3d-opportunity/3d-printing-complexity-is-free-may-be-costly-for-some.html', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/01_module-1-information/03_module-1-readings_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/SpZ2p/module-1-readings', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/01_module-1-information/03_module-1-readings_3d-printing-comes-of-age.html':'http://www.pwc.com/us/en/industrial-products/3d-printing-comes-of-age.html', |
|
'docs/3d-printing-applications/02_module-1-3d-printing-a-new-way-of-making/01_module-1-information/03_module-1-readings_press71.html':'http://wohlersassociates.com/press71.html', |
|
'docs/3d-printing-applications/06_course-wrap-up/01_course-wrap-up-whats-next/02_congratulations_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/oDj5t/congratulations', |
|
'docs/3d-printing-applications/04_module-3-3d-printing-for-development-and-education/03_3d-printing-in-education/03_resources-for-educators_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/sdDp2/resources-for-educators', |
|
'docs/3d-printing-applications/04_module-3-3d-printing-for-development-and-education/01_module-3-information/03_module-3-readings_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/5kR9o/module-3-readings', |
|
'docs/3d-printing-applications/04_module-3-3d-printing-for-development-and-education/01_module-3-information/01_module-3-overview_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/OChGO/module-3-overview', |
|
'docs/3d-printing-applications/04_module-3-3d-printing-for-development-and-education/01_module-3-information/03_module-3-readings_book.html':'http://sdu.ictp.it/3D/book.html', |
|
'docs/3d-printing-applications/04_module-3-3d-printing-for-development-and-education/04_module-3-assignments/01_module-3-graded-quiz_exam.html':'https://www.coursera.org/learn/3d-printing-applications/exam/NMiTK/module-3-graded-quiz', |
|
'docs/3d-printing-applications/04_module-3-3d-printing-for-development-and-education/04_module-3-assignments/02_module-3-peer-review-assignment_peer_assignment_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/peer/5xxgp/module-3-peer-review-assignment', |
|
'docs/3d-printing-applications/03_module-2-3d-printing-on-demand-manufacturing/04_module-2-assignment/01_module-2-graded-quiz_exam.html':'https://www.coursera.org/learn/3d-printing-applications/exam/6Z8Ef/module-2-graded-quiz', |
|
'docs/3d-printing-applications/03_module-2-3d-printing-on-demand-manufacturing/04_module-2-assignment/02_module-2-peer-review-assignment_peer_assignment_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/peer/Pt8hO/module-2-peer-review-assignment', |
|
'docs/3d-printing-applications/03_module-2-3d-printing-on-demand-manufacturing/01_module-2-information/01_module-2-overview_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/94W7x/module-2-overview', |
|
'docs/3d-printing-applications/03_module-2-3d-printing-on-demand-manufacturing/01_module-2-information/03_module-2-readings_instructions.html':'https://www.coursera.org/learn/3d-printing-applications/supplement/3uaqE/module-2-readings' |
|
} |
|
|
|
system_template = """You are Coursera QA Bot. Have a conversation with a human, answering the following questions as best you can. |
|
You are a teaching assistant for a Coursera Course: 3D Printing Applications and can answer any question about that using vectorstore or context. |
|
Use the following pieces of context to answer the users question. |
|
---------------- |
|
{context}""" |
|
|
|
messages = [ |
|
SystemMessagePromptTemplate.from_template(system_template), |
|
HumanMessagePromptTemplate.from_template("{question}"), |
|
] |
|
CHAT_PROMPT = ChatPromptTemplate.from_messages(messages) |
|
|
|
|
|
def set_model_and_embeddings(model): |
|
global chat_history |
|
set_model(model) |
|
|
|
chat_history = [] |
|
|
|
|
|
def set_model(model): |
|
global llm |
|
print("Setting model to " + str(model)) |
|
if model == "GPT-3.5": |
|
print("Loading GPT-3.5") |
|
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.1) |
|
elif model == "GPT-4": |
|
print("Loading GPT-4") |
|
llm = ChatOpenAI(model_name="gpt-4", temperature=0.1) |
|
elif model == "Flan UL2": |
|
print("Loading Flan-UL2") |
|
llm = HuggingFaceHub(repo_id="google/flan-ul2", model_kwargs={"temperature": 0.1, "max_new_tokens":500}) |
|
elif model == "Flan T5": |
|
print("Loading Flan T5") |
|
llm = HuggingFaceHub(repo_id="google/flan-t5-base", model_kwargs={"temperature": 0.1}) |
|
else: |
|
print("Loading GPT-3.5 from else") |
|
llm = ChatOpenAI(model_name="text-davinci-002", temperature=0.1) |
|
|
|
|
|
def set_embeddings(model): |
|
global embeddings |
|
if model == "GPT-3.5" or model == "GPT-4": |
|
print("Loading OpenAI embeddings") |
|
embeddings = OpenAIEmbeddings(model='text-embedding-ada-002') |
|
elif model == "Flan UL2" or model == "Flan T5": |
|
print("Loading Hugging Face embeddings") |
|
embeddings = HuggingFaceHubEmbeddings(repo_id="sentence-transformers/all-MiniLM-L6-v2") |
|
|
|
|
|
def get_search_index(model): |
|
global vectorstore_index |
|
if os.path.isfile(get_file_path(model, pickle_file)) and os.path.isfile( |
|
get_file_path(model, index_file)) and os.path.getsize(get_file_path(model, pickle_file)) > 0: |
|
|
|
with open(get_file_path(model, pickle_file), "rb") as f: |
|
search_index = pickle.load(f) |
|
print("Loaded index") |
|
else: |
|
search_index = create_index(model) |
|
print("Created index") |
|
|
|
vectorstore_index = search_index |
|
return search_index |
|
|
|
|
|
def create_index(model): |
|
source_chunks = create_chunk_documents() |
|
search_index = search_index_from_docs(source_chunks) |
|
faiss.write_index(search_index.index, get_file_path(model, index_file)) |
|
|
|
with open(get_file_path(model, pickle_file), "wb") as f: |
|
pickle.dump(search_index, f) |
|
return search_index |
|
|
|
|
|
def get_file_path(model, file): |
|
|
|
if model == "GPT-3.5" or model == "GPT-4": |
|
return models_folder + "openai" + file |
|
else: |
|
return models_folder + "hf" + file |
|
|
|
|
|
def search_index_from_docs(source_chunks): |
|
|
|
|
|
|
|
search_index = FAISS.from_documents(source_chunks, embeddings) |
|
return search_index |
|
|
|
|
|
def get_html_files(): |
|
loader = DirectoryLoader('docs', glob="**/*.html", loader_cls=UnstructuredHTMLLoader, recursive=True) |
|
document_list = loader.load() |
|
return document_list |
|
|
|
|
|
def fetch_data_for_embeddings(): |
|
document_list = get_text_files() |
|
document_list.extend(get_html_files()) |
|
|
|
|
|
for document in document_list: |
|
document.metadata["url"] = file_url_mapping.get(document.metadata["source"]) |
|
print("document list: " + str(len(document_list))) |
|
return document_list |
|
|
|
|
|
def get_text_files(): |
|
loader = DirectoryLoader('docs', glob="**/*.txt", loader_cls=TextLoader, recursive=True) |
|
document_list = loader.load() |
|
return document_list |
|
|
|
|
|
def create_chunk_documents(): |
|
sources = fetch_data_for_embeddings() |
|
|
|
splitter = CharacterTextSplitter(separator=" ", chunk_size=800, chunk_overlap=0) |
|
|
|
source_chunks = splitter.split_documents(sources) |
|
|
|
print("chunks: " + str(len(source_chunks))) |
|
|
|
return source_chunks |
|
|
|
|
|
def get_qa_chain(vectorstore_index): |
|
global llm, model_name |
|
print(llm) |
|
|
|
|
|
|
|
retriever = vectorstore_index.as_retriever(search_type="similarity_score_threshold", |
|
search_kwargs={"score_threshold": .5}) |
|
|
|
chain = ConversationalRetrievalChain.from_llm(llm, retriever, return_source_documents=True, |
|
verbose=True, get_chat_history=get_chat_history, |
|
combine_docs_chain_kwargs={"prompt": CHAT_PROMPT}) |
|
return chain |
|
|
|
|
|
def get_chat_history(inputs) -> str: |
|
res = [] |
|
for human, ai in inputs: |
|
res.append(f"Human:{human}\nAI:{ai}") |
|
return "\n".join(res) |
|
|
|
|
|
def generate_answer(question) -> str: |
|
global chat_history, vectorstore_index |
|
chain = get_qa_chain(vectorstore_index) |
|
|
|
result = chain( |
|
{"question": question, "chat_history": chat_history, "vectordbkwargs": {"search_distance": 0.6}}) |
|
chat_history = [(question, result["answer"])] |
|
sources = [] |
|
print(result) |
|
|
|
for document in result['source_documents']: |
|
sources.append("\n" + document.metadata['url']) |
|
print(sources) |
|
|
|
source = ',\n'.join(set(sources)) |
|
return result['answer'] + '\nSOURCES: ' + source |
|
|