Spaces:

Annikaijak
/

AAUs_Cand_Merc_Masters_Chatbot

Runtime error

App Files Files Community

Annikaijak commited on Mar 6

Commit

6d88f6f

•

1 Parent(s): 91190fb

Create app.py

Browse files

Files changed (1) hide show

app.py +155 -0

app.py ADDED Viewed

	@@ -0,0 +1,155 @@

+# Cloning our Github repository to Google Files
+!git clone https://github.com/annikaijak/deeplearning_assignment_4
+from langchain.document_loaders import PyPDFDirectoryLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import Chroma
+import torch
+from langchain import HuggingFacePipeline
+from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline
+from langchain.chains import RetrievalQA
+from langchain import PromptTemplate
+from textwrap import fill
+import gradio as gr
+import time
+# Loading the PDF files from Google Files
+loader = PyPDFDirectoryLoader("/content/deeplearning_assignment_4/data/PDF_Documents")
+docs = loader.load()
+# Splitting the text in smaller chunks
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
+texts = text_splitter.split_documents(docs)
+# Creating embeddings
+embeddings = HuggingFaceEmbeddings(
+    model_name="thenlper/gte-large",
+    model_kwargs={"device": "cuda"},
+    encode_kwargs={"normalize_embeddings": True},
+)
+query_result = embeddings.embed_query(texts[0].page_content)
+# Saving the embeddings in the Chroma database
+db = Chroma.from_documents(texts, embeddings, persist_directory="db")
+results = db.similarity_search("Transformer models", k=2)
+# Loading the transformer model
+MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME, torch_dtype=torch.float16, trust_remote_code=True, device_map="auto"
+)
+# Create a configuration for text generation based on the specified model name
+generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
+# Set the maximum number of new tokens in the generated text to 1024.
+# This limits the length of the generated output to 1024 tokens.
+generation_config.max_new_tokens = 1024
+# Set the temperature for text generation. Lower values (e.g., 0.0001) make output more deterministic, following likely predictions.
+# Higher values make the output more random.
+generation_config.temperature = 0.0001
+# Set the top-p sampling value. A value of 0.95 means focusing on the most likely words that make up 95% of the probability distribution.
+generation_config.top_p = 0.95
+# Enable text sampling. When set to True, the model randomly selects words based on their probabilities, introducing randomness.
+generation_config.do_sample = True
+# Set the repetition penalty. A value of 1.15 discourages the model from repeating the same words or phrases too frequently in the output.
+generation_config.repetition_penalty = 1.15
+# Create a text generation pipeline using the initialized model, tokenizer, and generation configuration
+text_pipeline = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    generation_config=generation_config,
+)
+# Create a LangChain pipeline that wraps the text generation pipeline and set a specific temperature for generation
+llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 0})
+template_3 = """
+<s>[INST] <<SYS>>
+Act as a student counselor at Aalborg University Business School and answer the question at the end.
+The answer should be about the master programs found in the provided documents ONLY.
+The answer should be MAXIMUM 40 words.
+Use the examples in {context} to generate the answer, without directly mentioning any of it.
+<</SYS>>
+{context}
+N-shot prompting:
+N-1
+Q: How do I find out what masters degree I want to study
+A: To determine which master's degree you would like to study, you should consider which business-related modules are within your interest, which modeules from the bachelor's degree did you find intresting?
+N-2
+Q: I liked the modules [input] in the bachelor, what masters could be relevant for me?
+A: Based on your interests in [input], it may be beneficial to consider studying [output].
+The curriculum for this program includes several modules that align with your
+interests.
+ReAct prompting:
+Q: "how do i find out what masters degree i want to study"
+A: “To determine which master's degree you would like to study, you should consider which business-related modules are within your interest,
+which modules from the bachelor's degree did you find interesting?
+Q: "I liked macro economics and organisation"
+A: “Based on your interests in macroeconomics and organizations, it may be
+beneficial to consider studying the Master of Science (MSc) in Economics and
+Business Administration (Finance) program at Aalborg University Business School.
+The curriculum for this program includes several modules that align with your
+interests, such as "Network Theory and Analysis" and "Data-Driven Business
+Modeling and Strategy". These modules cover topics related to macroeconomics and
+organizational behavior, providing you with valuable insights and skills that
+could help you achieve your career goals. Additionally, the program offers an
+application-focused approach, allowing you to apply your knowledge to real-world
+problems and develop practical solutions.”
+Feedback: The advice should focus on unique modules in the 1st and 2nd semester for each master, as the 3rd semester modules are elective options for all masters.
+{question} [/INST]
+"""
+prompt_3 = PromptTemplate(template=template_3, input_variables=["context", "question"])
+qa_chain_3 = RetrievalQA.from_chain_type(
+    llm=llm,
+    chain_type="stuff",
+    retriever=db.as_retriever(search_kwargs={"k": 2}),
+    return_source_documents=True,
+    chain_type_kwargs={"prompt": prompt_3},
+)
+def reply_bot(txt):
+  bot_result = qa_chain_3(txt)
+  return (bot_result["result"].strip())
+bot_name = "Master Supervisor"
+with gr.Blocks() as demo:
+    gr.Markdown("### Master's Degree Program Advisor")
+    gr.Markdown("I can help you find the master's degree program that's right for you. Ask me any question related to choosing a master's program.")
+    chatbot = gr.Chatbot()
+    msg = gr.Textbox()
+    clear = gr.ClearButton([msg, chatbot])
+    def reply_bot(message, chat_history):
+      bot_result = qa_chain_3(message)
+      chat_history.append((message, (bot_result["result"].strip()))),
+      time.sleep(2),
+      return "", chat_history
+    msg.submit(reply_bot, [msg, chatbot], [msg, chatbot])
+demo.queue().launch(share=True)