File size: 3,007 Bytes
67b3ce8
52b8413
ec14cd2
67b3ce8
2302aa3
62c3bf7
bb24d41
1e7f31b
bb24d41
0b8c276
bb24d41
 
 
5c3b489
bb24d41
dd23570
e98dbef
 
dd23570
 
ec14cd2
52b8413
0b8c276
1e7f31b
0b8c276
1e7f31b
 
bb24d41
 
 
52d7945
 
2302aa3
bb24d41
 
0b8c276
 
 
 
bb24d41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e98dbef
 
 
0b8c276
e98dbef
0b8c276
 
67b3ce8
e98dbef
 
0b8c276
e98dbef
67b3ce8
 
 
 
694d006
a02d33e
0109621
19949d0
a02d33e
 
bb24d41
 
 
 
 
 
 
 
 
 
 
 
67b3ce8
5c3b489
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import gradio as gr
import os
from datasets import load_dataset

from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama.llms import OllamaLLM

from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

#from langchain import hub
from langchain_core.runnables import RunnableParallel
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_chroma import Chroma


"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
""" 

# LLM Model#
llm = HuggingFacePipeline.from_model_id(
    model_id="llmware/bling-phi-3-gguf",
    task="text-generation",
    pipeline_kwargs={"max_new_tokens": 100},
)

# Initialize embedding model "all-MiniLM-L6-v2"
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Load the existing ChromaDB database
vector_store = Chroma(persist_directory="./chroma_db", embedding_function=embedding_model)

# See full prompt at https://smith.langchain.com/hub/rlm/rag-prompt
# Basically a solid prompt for RAG 
prompt = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. 
Question: {question} 
Context: {context} 
Answer:"""

# Define a new chain to return both the answer and the sources
qa_chain_with_sources = (
    RunnableParallel(
        {
            "context": vector_store.as_retriever(),
            "question": RunnablePassthrough(),
        }
    )
    | {
        "answer": prompt | llm | StrOutputParser(),
        "sources": lambda x: [doc.metadata.get("source", "Unknown") for doc in x["context"]],
    }
)

# Function to call a RAG LLM query
def rag_query(query, history):
    # Invoke the chain
    r = qa_chain_with_sources.invoke(query)
    
    answer = r["answer"]
    unique_sources = list(set(r["sources"]))

    # Print answers + sources
    output = f"Answer: {answer}\n\nSources:\n" + "\n".join(unique_sources)

    return output


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""

demo = gr.ChatInterface(
    fn=rag_query,  # Function to call for generating responses
    title="WEHI Student Intern Chatbot Demo",
    type='messages',
    description="Ask questions related to your WEHI internship and get answers with sources.",
    examples=[
        "What flexibility is there for the internship?",
        "What are the key things to do before the weekly meetings?", 
        "How do I tackle complex and ambiguous projects?",
        "What happens over Easter break at WEHI?",
        "What are the tasks for the REDMANE Data Ingestion team?",
        "When is the final presentation due?",
        "What is Nectar?",
        "Is the internship remote or in person?"
    ],
)

demo.launch()