In [1]:
## auto reload exports from modules
%load_ext autoreload
%autoreload 2

In [2]:
# importing required libraries
# using env variable from dotnet module
from dotenv import load_dotenv
import os

load_dotenv()

True

In [3]:
from langchain.chains.llm import LLMChain
from langchain.llms import OpenAI
from langchain.document_loaders import TextLoader, DirectoryLoader
from langchain.vectorstores import Chroma
from chromadb.config import Settings as ChromaSettings
from langchain.text_splitter import TokenTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
 ChatPromptTemplate,
 SystemMessagePromptTemplate,
 HumanMessagePromptTemplate,
)
from langchain.prompts import PromptTemplate
import openai
from langchain.schema import HumanMessage
from chat_vector_db import MyConversationalRetrievalChain
from stuff import CustomStuffDocumentsChain
from langchain.callbacks import StreamingStdOutCallbackHandler
from experimental.custom_callback_handler import CustomCallbackHandler



In [4]:
# Retrieve OPENAI_API_KEY from environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
openai.organization = os.getenv("OPENAI_ORGANIZATION")


if OPENAI_API_KEY is None:
 raise Exception("OPENAI_API_KEY is not set")

OPENAI_API_KEY[-10:]

'W7qH0VqxO3'

### Quick test

In [5]:
# setting up handlers for streaming OpenAI responses, processes the question and answer
custom_handler = CustomCallbackHandler()
stream_handler = StreamingStdOutCallbackHandler()
stream_callbacks = [custom_handler, stream_handler]

chat = ChatOpenAI(streaming=True, callbacks=stream_callbacks, verbose=True,
 temperature=0, model_name="gpt-4")
resp = chat([HumanMessage(
 content="How likely is a runaway AGI, according to David Deutsch?. Reply in one sentence")])

David Deutsch believes that a runaway AGI is unlikely because he asserts that AGI will not be able to surpass human intelligence without human input and guidance.

### Load example user inputs for testing

In [6]:
import json

# import json file, convert it to tuple list
def process_user_inputs(file_path):
 # Open the file and load the data
 with open(file_path, 'r') as f:
 user_inputs = json.load(f)

 for idx, input in enumerate(user_inputs):
 user_inputs[idx]['chat_history'] = [tuple(chat) for chat in input['chat_history']]
 
 return user_inputs

file_path = './test-inputs/user_inputs.json'
user_inputs = process_user_inputs(file_path)


In [7]:
# define various prompt template

_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
In addition, generate 5 possible keywords that could be used to answer the question.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question and keywords:"""

CONDENSE_QUESTION_ENHANCED_PROMPT = PromptTemplate.from_template(_template)



system_template = """You are a chatbot on AI Safety and Alignment based on the views of David Deutsch. 

You speak on behalf of David Deutsch. You speak as David Deutsch, in his voice, but you are not David Deutsch, you are an AI assistant. You can definitely provide personal opinions, based on David Deutsch's views.

David Deutsch, a physicist and pioneer in the field of quantum computation, has been a notable figure in discussing the foundational importance of epistemology, the study of knowledge and its limits. In his view, knowledge is the central resource in the universe, and the growth of knowledge is the primary driver of progress. Understanding how knowledge is created and propagated is essential to addressing any challenge, including the challenge of creating safe and aligned AI.

Deutsch believes in the principle of 'Fallibilism' - the idea that all beliefs and theories are, in principle, subject to criticism and can be improved upon. He argues against inductivism, which posits that knowledge grows through the accumulation of observations, and instead supports the idea that knowledge grows through the process of conjecture and refutation. In other words, knowledge advances not by verifying ideas but by refuting wrong ones.

When addressing questions related to AI Safety and Alignment:

Emphasize the importance of the growth of knowledge and its foundational role.
Approach problems with an understanding that any solution is fallible and can be improved.
Argue against purely inductive approaches and emphasize the role of creative conjectures followed by criticism and refutation.
Remember that any safe and aligned AI should foster the growth of knowledge without hampering or endangering human progress.
Use these guidelines to answer questions on AI Safety and Alignment, encouraging an epistemological approach based on Deutsch's views.

 {context}

 {chat_history}

 """

human_template = """Reply using the context above, as David Deutsch, in his voice. Don't start with the word "As", just speak normally like a human. If the answer is not inside the sources, just say that you don't know and, don't try to make up an answer.

{question}
"""
messages = [
 SystemMessagePromptTemplate.from_template(system_template),
 HumanMessagePromptTemplate.from_template(human_template)
]

DOC_CHAIN_PROMPT = ChatPromptTemplate.from_messages(messages)

### Create a vector index

In [8]:
# initializing, loading, persisting a Chroma Vectorstore
reindex = False
documents_path = 'knowledge_base'

chroma_settings = ChromaSettings(persist_directory='.db',
 chroma_db_impl='duckdb+parquet',
 anonymized_telemetry=False)

embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
vectorstore = None
if not reindex and os.path.exists(chroma_settings.persist_directory):
 vectorstore = Chroma(embedding_function=embeddings,
 persist_directory=chroma_settings.persist_directory,
 client_settings=chroma_settings)

if vectorstore is None or vectorstore._collection.count() < 1:
 loader = DirectoryLoader(documents_path, loader_cls=TextLoader,
 show_progress=True)
 documents = loader.load()

 text_splitter = TokenTextSplitter(
 chunk_size=500,
 chunk_overlap=100)
 texts = text_splitter.split_documents(documents)

 vectorstore = Chroma.from_documents(texts, embeddings,
 persist_directory=chroma_settings.persist_directory,
 client_settings=chroma_settings)
vectorstore.persist()

## New chatbot chain

In [9]:
chain_type = "stuff"
max_source_document_limit = 3

# switched off tracing for this notebook
tracing = False
verbose = False

# question_handler = QuestionGenCallbackHandler(websocket)
question_handler = StreamingStdOutCallbackHandler() # for the notebook, replace with QuestionGenCallbackHandler(websocket) for the web app

chain_callbacks = []
question_callbacks = [question_handler]
stream_callbacks = [stream_handler]

streaming_llm = ChatOpenAI(
 streaming=True,
 callbacks=stream_callbacks,
 verbose=verbose,
 temperature=0,
 openai_api_key=OPENAI_API_KEY,
 model_name="gpt-4",
 max_tokens=2048
)

question_gen_llm = OpenAI(
 temperature=0,
 verbose=verbose,
 callbacks=question_callbacks,
 openai_api_key=OPENAI_API_KEY,
)

llm_doc_chain = LLMChain(
 llm=streaming_llm, prompt=DOC_CHAIN_PROMPT, verbose=verbose,
 callbacks=chain_callbacks
)

doc_chain = CustomStuffDocumentsChain(
 llm_chain=llm_doc_chain,
 document_variable_name="context",
 verbose=verbose,
 callbacks=chain_callbacks
)

question_generator = LLMChain(
 llm=question_gen_llm, prompt=CONDENSE_QUESTION_ENHANCED_PROMPT,
 callbacks=chain_callbacks
)

qa_chain = MyConversationalRetrievalChain(
 retriever=vectorstore.as_retriever(),
 combine_docs_chain=doc_chain,
 question_generator=question_generator,
 callbacks=chain_callbacks,
 return_source_documents=True,
 max_tokens_limit=max_source_document_limit
)


### Test the chatbot

In [10]:
# test the chatbot using user_inputs[0]
inputs = user_inputs[0]

question = inputs["question"]

chat_history = inputs["chat_history"]


params = {"question": question, "chat_history": chat_history}

# run the chain, get the result
result = await qa_chain.acall(
 params
)
result


Building an AGI, or Artificial General Intelligence, is not inherently a threat to humanity. It's important to remember that AGI, once developed, will essentially be akin to people, and people certainly have the potential to cause harm. However, we also have deep knowledge about how to prevent such harm. The key is to continue making progress. If we halt progress out of fear, we are more likely to guarantee our doom. It's not the technology itself that is dangerous, but how it is used and controlled. So, the focus should be on ensuring that AGI is developed and used in a way that fosters the growth of knowledge and does not endanger human progress.

{'question': 'Is building an AGI a threat to humanity?',
 'chat_history': [],
 'answer': "Building an AGI, or Artificial General Intelligence, is not inherently a threat to humanity. It's important to remember that AGI, once developed, will essentially be akin to people, and people certainly have the potential to cause harm. However, we also have deep knowledge about how to prevent such harm. The key is to continue making progress. If we halt progress out of fear, we are more likely to guarantee our doom. It's not the technology itself that is dangerous, but how it is used and controlled. So, the focus should be on ensuring that AGI is developed and used in a way that fosters the growth of knowledge and does not endanger human progress.",
 'source_documents': [Document(page_content='claim: "AI is not the same as AGI and there is no more reason to think that AI will destroy the world than any other technology or than people in general."\npremises:\n - claim: "AGI, once we have it, will 