Spaces:

oliverdixon
/

BereaAI

Sleeping

File size: 8,961 Bytes

38f4a2d
e30ab4c
0b8bec6
 
 
e30ab4c
0b8bec6
e30ab4c
 
 
 
 
 
38f4a2d
e30ab4c
 
 
 
 
 
38f4a2d
e30ab4c
 
 
 
 
 
9bbb6bb
e30ab4c
 
 
0b8bec6
e30ab4c
 
 
 
38f4a2d
 
 
 
 
 
 
 
9bbb6bb
 
 
 
 
 
38f4a2d
9bbb6bb
38f4a2d
7ca274b
9bbb6bb
 
 
38f4a2d
9bbb6bb
 
 
 
 
 
 
38f4a2d
 
9bbb6bb
 
 
38f4a2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9bbb6bb
 
 
9169891
9bbb6bb
ddc41e6
9bbb6bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
06cfba4
 
 
302d53c
06cfba4
0529161
302d53c
ae15fae
06cfba4
fdc79c0
 
 
06cfba4
fdc79c0
 
9bbb6bb
 
ae15fae
 
9bbb6bb
11b4bc8
9bbb6bb
38f4a2d
 
 
 
 
 
fdc79c0
 
 
 
ae15fae
9bbb6bb
ae15fae
0b8bec6
ae15fae
38f4a2d

import os
import pinecone
import logging
import time
import faiss
import nest_asyncio

import streamlit as st
from langchain.llms import OpenAI
from langchain.chat_models.openai import ChatOpenAI
from langchain.llms import HuggingFacePipeline
from langchain import HuggingFaceHub
from langchain import PromptTemplate, LLMChain
from langchain.chains.router import MultiPromptChain
from langchain.llms import OpenAI
from langchain.chains import ConversationChain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser
from langchain.chains.router.multi_prompt_prompt import MULTI_PROMPT_ROUTER_TEMPLATE
from langchain.chains.router.embedding_router import EmbeddingRouterChain
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings
from templates import hebrew_template, greek_template, apologetics_template, theology_template, therapy_template, history_template, commentary_template
from langchain.callbacks.base import BaseCallbackHandler
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.retrievers.web_research import WebResearchRetriever
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.docstore import InMemoryDocstore
from langchain.utilities import GoogleSearchAPIWrapper
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.chains.question_answering import load_qa_chain
from langchain.vectorstores import Pinecone


st.title("BereaAI Bible Assistant")

st.write("BereaAI has expertise in Biblical Hebrew, Greek, Apologetics, Theology, Counselling and Church History. \
        Though still in the early stages BereaAI shows promise in delivering nuanced and thorough explanations. \
        The first answer takes a while to load but the consequent answers load much faster. \
        ")

#HUGGINGFACEHUB_API_TOKEN = st.secrets["HUGGINGFACEHUB_API_TOKEN"]
GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
GOOGLE_CSE_ID = st.secrets["GOOGLE_CSE_ID"]
OPENAI_API_BASE = st.secrets["OPENAI_API_BASE"]
OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
PINECONE_API_KEY = st.secrets["PINECONE_API_KEY"]

#repo_id = "meta-llama/Llama-2-7b-chat-hf"

#st.header("Parameters")
#temperature = st.slider('Temperature', min_value=0.0, max_value=1.0, value=0.3, step=0.1)
#max_new_tokens = st.slider('Max New Tokens', min_value=100, max_value=2000, value=1024, step=50)
#top_p = st.slider('Top P', min_value=0.0, max_value=1.0, value=0.4, step=0.05)

#llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": temperature, "max_new_tokens": max_new_tokens, "top_p": top_p})
llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k",openai_api_key=OPENAI_API_KEY, temperature=0.3, streaming=True)

# initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment="us-west1-gcp",  # next to api key in console
)

index_name = "bereaai"

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-bert-large")

prompt_infos = [
    {
        "name": "hebrew",
        "description": "Good for answering questions about Hebrew Old Testament Bible",
        "prompt_template": hebrew_template,
    },
    {
        "name": "greek",
        "description": "Good for answering questions about Greek New Testament Bible",
        "prompt_template": greek_template,
    },
    {
        "name": "apologetics",
        "description": "Good for answering questions directed against the Bible or Christianity",
        "prompt_template": apologetics_template,
    },
    {
        "name": "theology",
        "description": "Good for answering questions about biblical theology",
        "prompt_template": theology_template,
    },
    {
        "name": "therapy",
        "description": "Good for answering questions about mental health or personal issues",
        "prompt_template": therapy_template,
    },
    {
        "name": "history",
        "description": "Good for answering questions about mental health or personal issues",
        "prompt_template": history_template,
    },
    {
        "name": "commentary",
        "description": "Good for answering questions about verses, chapters or books of the Bible",
        "prompt_template": commentary_template,
    },
]

destination_chains = {}
for p_info in prompt_infos:
    name = p_info["name"]
    prompt_template = p_info["prompt_template"]
    prompt = PromptTemplate(template=prompt_template, input_variables=["input"])
    chain = LLMChain(llm=llm, prompt=prompt)
    destination_chains[name] = chain
default_chain = ConversationChain(llm=llm, output_key="text")

names_and_descriptions = [
    ("hebrew", ["for questions about hebrew"]),
    ("greek", ["for questions about greek"]),
    ("apologetics", ["for questions directed against the Bible or Christianity"]),
    ("theology", ["for questions about theology"]),
    ("therapy", ["for questions about mental health"]),
    ("history", ["for questions about history"]),
    ("commentary", ["for questions about verses, passages or books of the Bible"]),
]

router_chain = EmbeddingRouterChain.from_names_and_descriptions(
    names_and_descriptions, FAISS, embeddings, routing_keys=["input"]
)

def generate_response(input_text):
    chain = MultiPromptChain(
        router_chain=router_chain,
        destination_chains=destination_chains,
        default_chain=default_chain,
        verbose=True,)
    return chain.run(input_text)

def settings():
    embedding_size = 1024
    index = faiss.IndexFlatL2(embedding_size)
    vectorstore_public = FAISS(embeddings.embed_query, index, InMemoryDocstore({}), {}) 
    search = GoogleSearchAPIWrapper()
                               # Initialize
    web_retriever = WebResearchRetriever.from_llm(
        vectorstore=vectorstore_public,
        llm=llm,
        search=search,
        num_search_results=3
    )

    return web_retriever, llm

# Make retriever and llm
web_retriever, llm = settings()
nest_asyncio.apply()

def web_search(question):
    logging.basicConfig()
    logging.getLogger("langchain.retrievers.web_research").setLevel(logging.INFO)
    qa_chain = RetrievalQAWithSourcesChain.from_chain_type(llm, retriever=web_retriever)

    # Get result
    result = qa_chain({"question": question})

    return(result)

def vector_search(question):
    docsearch = Pinecone.from_existing_index(index_name, embeddings)
    logging.basicConfig()
    logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)
    qa_chain = RetrievalQAWithSourcesChain.from_chain_type(llm, retriever=docsearch.as_retriever())
    result = qa_chain({"question": question})
    if result:
        sources_list = result['sources'].split('/content/drive/MyDrive/Commentaries/')
        filenames_with_extension = [os.path.basename(source) for source in sources_list]
        filename_without_extension = [os.path.splitext(source)[0] for source in filenames_with_extension]
        filtered_sources = [source for source in filename_without_extension if source]
        cleaned_sources = "\n- ".join(filtered_sources) 
    
    else: 
        pass

    return text, cleaned_sources

def combined_answer(text1, text2, text3, source, question):
    
    text_answer2 = text2['answer']
    books_ref = source
    web_ref = text2['sources']
    
    return text1, books_ref, web_ref
    
with st.form("my_form"):
    text = st.text_area("Enter text:", "Type question here")
    submitted = st.form_submit_button("Submit")
    if not text:
        st.info("You forgot to type something")
    elif submitted:
        with st.spinner('Researching...'):
            text1 = generate_response(text)
            text2 = web_search(text)
            text3, source = vector_search(text)
        answer, source2, source3 = combined_answer(text1, text2, text3, source,text)
        st.info(answer)
        st.info(f"Web Sources:\n {source3}")
        st.info(f"Book Sources: \n\n {source2}")
        
st.markdown("## Examples")
example1 = "Give me a Hebrew word study of Psalm 23"
example2 = "Give me a Greek word study on John 17:1-5"
example3 = "What is the evidence Jesus actually rose from the dead?"
example4 = "I'm feeling really overwhelmed and overcome by anxiety and I don't know what to do"
example5 = "How and when was the canon of the Bible put together?"
example6 = "Explain the Trinity"
example7 = "Give me a commentary on Matthew 5:3-12"

if st.button(example1):
    user_input = example1
if st.button(example2):
    user_input = example2
if st.button(example3):
    user_input = example3
if st.button(example4):
    user_input = example4
if st.button(example5):
    user_input = example5
if st.button(example6):
    user_input = example6
if st.button(example7):
    user_input = example7