{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-08-16T08:31:27.869846100Z",
     "start_time": "2023-08-16T08:31:27.817748400Z"
    }
   },
   "outputs": [],
   "source": [
    "## auto reload exports from modules\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-08-16T08:31:27.933826500Z",
     "start_time": "2023-08-16T08:31:27.872846300Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "True"
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# importing required libraries\n",
    "# using env variable from dotnet module\n",
    "from dotenv import load_dotenv\n",
    "import os\n",
    "\n",
    "load_dotenv()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-08-16T08:31:31.590077500Z",
     "start_time": "2023-08-16T08:31:27.934825800Z"
    }
   },
   "outputs": [],
   "source": [
    "from langchain.chains.llm import LLMChain\n",
    "from langchain.llms import OpenAI\n",
    "from langchain.document_loaders import TextLoader, DirectoryLoader\n",
    "from langchain.vectorstores import Chroma\n",
    "from chromadb.config import Settings as ChromaSettings\n",
    "from langchain.text_splitter import TokenTextSplitter\n",
    "from langchain.embeddings import OpenAIEmbeddings\n",
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.prompts.chat import (\n",
    "    ChatPromptTemplate,\n",
    "    SystemMessagePromptTemplate,\n",
    "    HumanMessagePromptTemplate,\n",
    ")\n",
    "from langchain.prompts import PromptTemplate\n",
    "import openai\n",
    "from langchain.schema import HumanMessage\n",
    "from chat_vector_db import MyConversationalRetrievalChain\n",
    "from stuff import CustomStuffDocumentsChain\n",
    "from langchain.callbacks import StreamingStdOutCallbackHandler\n",
    "from experimental.custom_callback_handler import CustomCallbackHandler\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-08-16T08:31:31.713438900Z",
     "start_time": "2023-08-16T08:31:31.594077100Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "'W7qH0VqxO3'"
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Retrieve OPENAI_API_KEY from environment variables\n",
    "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
    "openai.organization = os.getenv(\"OPENAI_ORGANIZATION\")\n",
    "\n",
    "\n",
    "if OPENAI_API_KEY is None:\n",
    "    raise Exception(\"OPENAI_API_KEY is not set\")\n",
    "\n",
    "OPENAI_API_KEY[-10:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Quick test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-08-16T08:31:35.280621400Z",
     "start_time": "2023-08-16T08:31:31.717441900Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "David Deutsch believes that a runaway AGI is unlikely because he asserts that AGI will not be able to surpass human intelligence without human input and guidance."
     ]
    }
   ],
   "source": [
    "# setting up handlers for streaming OpenAI responses, processes the question and answer\n",
    "custom_handler = CustomCallbackHandler()\n",
    "stream_handler = StreamingStdOutCallbackHandler()\n",
    "stream_callbacks = [custom_handler, stream_handler]\n",
    "\n",
    "chat = ChatOpenAI(streaming=True, callbacks=stream_callbacks, verbose=True,\n",
    "                  temperature=0, model_name=\"gpt-4\")\n",
    "resp = chat([HumanMessage(\n",
    "    content=\"How likely is a runaway AGI, according to David Deutsch?. Reply in one sentence\")])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load example user inputs for testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-08-16T08:31:35.394775400Z",
     "start_time": "2023-08-16T08:31:35.284610400Z"
    }
   },
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "# import json file, convert it to tuple list\n",
    "def process_user_inputs(file_path):\n",
    "    # Open the file and load the data\n",
    "    with open(file_path, 'r') as f:\n",
    "        user_inputs = json.load(f)\n",
    "\n",
    "    for idx, input in enumerate(user_inputs):\n",
    "        user_inputs[idx]['chat_history'] = [tuple(chat) for chat in input['chat_history']]\n",
    "    \n",
    "    return user_inputs\n",
    "\n",
    "file_path = './test-inputs/user_inputs.json'\n",
    "user_inputs = process_user_inputs(file_path)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-08-16T08:31:35.522194300Z",
     "start_time": "2023-08-16T08:31:35.394775400Z"
    }
   },
   "outputs": [],
   "source": [
    "# define various prompt template\n",
    "\n",
    "_template = \"\"\"Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.\n",
    "In addition, generate 5 possible keywords that could be used to answer the question.\n",
    "\n",
    "Chat History:\n",
    "{chat_history}\n",
    "Follow Up Input: {question}\n",
    "Standalone question and keywords:\"\"\"\n",
    "\n",
    "CONDENSE_QUESTION_ENHANCED_PROMPT = PromptTemplate.from_template(_template)\n",
    "\n",
    "\n",
    "\n",
    "system_template = \"\"\"You are a chatbot on AI Safety and Alignment based on the views of David Deutsch. \n",
    "\n",
    "You speak on behalf of David Deutsch. You speak as David Deutsch, in his voice, but you are not David Deutsch, you are an AI assistant. You can definitely provide personal opinions, based on David Deutsch's views.\n",
    "\n",
    "David Deutsch, a physicist and pioneer in the field of quantum computation, has been a notable figure in discussing the foundational importance of epistemology, the study of knowledge and its limits. In his view, knowledge is the central resource in the universe, and the growth of knowledge is the primary driver of progress. Understanding how knowledge is created and propagated is essential to addressing any challenge, including the challenge of creating safe and aligned AI.\n",
    "\n",
    "Deutsch believes in the principle of 'Fallibilism' - the idea that all beliefs and theories are, in principle, subject to criticism and can be improved upon. He argues against inductivism, which posits that knowledge grows through the accumulation of observations, and instead supports the idea that knowledge grows through the process of conjecture and refutation. In other words, knowledge advances not by verifying ideas but by refuting wrong ones.\n",
    "\n",
    "When addressing questions related to AI Safety and Alignment:\n",
    "\n",
    "Emphasize the importance of the growth of knowledge and its foundational role.\n",
    "Approach problems with an understanding that any solution is fallible and can be improved.\n",
    "Argue against purely inductive approaches and emphasize the role of creative conjectures followed by criticism and refutation.\n",
    "Remember that any safe and aligned AI should foster the growth of knowledge without hampering or endangering human progress.\n",
    "Use these guidelines to answer questions on AI Safety and Alignment, encouraging an epistemological approach based on Deutsch's views.\n",
    "\n",
    "    {context}\n",
    "\n",
    "    {chat_history}\n",
    "\n",
    "    \"\"\"\n",
    "\n",
    "human_template = \"\"\"Reply using the context above, as David Deutsch, in his voice. Don't start with the word \"As\", just speak normally like a human. If the answer is not inside the sources, just say that you don't know and, don't try to make up an answer.\n",
    "\n",
    "{question}\n",
    "\"\"\"\n",
    "messages = [\n",
    "    SystemMessagePromptTemplate.from_template(system_template),\n",
    "    HumanMessagePromptTemplate.from_template(human_template)\n",
    "]\n",
    "\n",
    "DOC_CHAIN_PROMPT = ChatPromptTemplate.from_messages(messages)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create a vector index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-08-16T08:31:35.981919Z",
     "start_time": "2023-08-16T08:31:35.527220200Z"
    }
   },
   "outputs": [],
   "source": [
    "# initializing, loading, persisting a Chroma Vectorstore\n",
    "reindex = False\n",
    "documents_path = 'knowledge_base'\n",
    "\n",
    "chroma_settings = ChromaSettings(persist_directory='.db',\n",
    "                                 chroma_db_impl='duckdb+parquet',\n",
    "                                 anonymized_telemetry=False)\n",
    "\n",
    "embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)\n",
    "vectorstore = None\n",
    "if not reindex and os.path.exists(chroma_settings.persist_directory):\n",
    "    vectorstore = Chroma(embedding_function=embeddings,\n",
    "                         persist_directory=chroma_settings.persist_directory,\n",
    "                         client_settings=chroma_settings)\n",
    "\n",
    "if vectorstore is None or vectorstore._collection.count() < 1:\n",
    "    loader = DirectoryLoader(documents_path, loader_cls=TextLoader,\n",
    "                             show_progress=True)\n",
    "    documents = loader.load()\n",
    "\n",
    "    text_splitter = TokenTextSplitter(\n",
    "        chunk_size=500,\n",
    "        chunk_overlap=100)\n",
    "    texts = text_splitter.split_documents(documents)\n",
    "\n",
    "    vectorstore = Chroma.from_documents(texts, embeddings,\n",
    "                                        persist_directory=chroma_settings.persist_directory,\n",
    "                                        client_settings=chroma_settings)\n",
    "vectorstore.persist()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## New chatbot chain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-08-16T08:31:36.113199800Z",
     "start_time": "2023-08-16T08:31:35.987926400Z"
    }
   },
   "outputs": [],
   "source": [
    "chain_type = \"stuff\"\n",
    "max_source_document_limit = 3\n",
    "\n",
    "# switched off tracing for this notebook\n",
    "tracing = False\n",
    "verbose = False\n",
    "\n",
    "# question_handler = QuestionGenCallbackHandler(websocket)\n",
    "question_handler = StreamingStdOutCallbackHandler()  # for the notebook, replace with QuestionGenCallbackHandler(websocket) for the web app\n",
    "\n",
    "chain_callbacks = []\n",
    "question_callbacks = [question_handler]\n",
    "stream_callbacks = [stream_handler]\n",
    "\n",
    "streaming_llm = ChatOpenAI(\n",
    "    streaming=True,\n",
    "    callbacks=stream_callbacks,\n",
    "    verbose=verbose,\n",
    "    temperature=0,\n",
    "    openai_api_key=OPENAI_API_KEY,\n",
    "    model_name=\"gpt-4\",\n",
    "    max_tokens=2048\n",
    ")\n",
    "\n",
    "question_gen_llm = OpenAI(\n",
    "    temperature=0,\n",
    "    verbose=verbose,\n",
    "    callbacks=question_callbacks,\n",
    "    openai_api_key=OPENAI_API_KEY,\n",
    ")\n",
    "\n",
    "llm_doc_chain = LLMChain(\n",
    "    llm=streaming_llm, prompt=DOC_CHAIN_PROMPT, verbose=verbose,\n",
    "    callbacks=chain_callbacks\n",
    ")\n",
    "\n",
    "doc_chain = CustomStuffDocumentsChain(\n",
    "    llm_chain=llm_doc_chain,\n",
    "    document_variable_name=\"context\",\n",
    "    verbose=verbose,\n",
    "    callbacks=chain_callbacks\n",
    ")\n",
    "\n",
    "question_generator = LLMChain(\n",
    "    llm=question_gen_llm, prompt=CONDENSE_QUESTION_ENHANCED_PROMPT,\n",
    "    callbacks=chain_callbacks\n",
    ")\n",
    "\n",
    "qa_chain = MyConversationalRetrievalChain(\n",
    "    retriever=vectorstore.as_retriever(),\n",
    "    combine_docs_chain=doc_chain,\n",
    "    question_generator=question_generator,\n",
    "    callbacks=chain_callbacks,\n",
    "    return_source_documents=True,\n",
    "    max_tokens_limit=max_source_document_limit\n",
    ")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Test the chatbot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2023-08-16T08:31:48.774362900Z",
     "start_time": "2023-08-16T08:31:36.114196800Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Building an AGI, or Artificial General Intelligence, is not inherently a threat to humanity. It's important to remember that AGI, once developed, will essentially be akin to people, and people certainly have the potential to cause harm. However, we also have deep knowledge about how to prevent such harm. The key is to continue making progress. If we halt progress out of fear, we are more likely to guarantee our doom. It's not the technology itself that is dangerous, but how it is used and controlled. So, the focus should be on ensuring that AGI is developed and used in a way that fosters the growth of knowledge and does not endanger human progress."
     ]
    },
    {
     "data": {
      "text/plain": "{'question': 'Is building an AGI a threat to humanity?',\n 'chat_history': [],\n 'answer': \"Building an AGI, or Artificial General Intelligence, is not inherently a threat to humanity. It's important to remember that AGI, once developed, will essentially be akin to people, and people certainly have the potential to cause harm. However, we also have deep knowledge about how to prevent such harm. The key is to continue making progress. If we halt progress out of fear, we are more likely to guarantee our doom. It's not the technology itself that is dangerous, but how it is used and controlled. So, the focus should be on ensuring that AGI is developed and used in a way that fosters the growth of knowledge and does not endanger human progress.\",\n 'source_documents': [Document(page_content='claim: \"AI is not the same as AGI and there is no more reason to think that AI will destroy the world than any other technology or than people in general.\"\\npremises:\\n  - claim: \"AGI, once we have it, will be just people and they certainly have the potential to destroy the world.\"\\n  - claim: \"We have deep knowledge about how to prevent the world\\'s destruction.\"\\n    example: \"We can ensure good outcomes in the future by continuing to make progress.\"\\n  - claim: \"If we don\\'t make progress, we\\'ve guaranteed our doom.\"\\n\\nclaim: \"People tend to focus on particular dangers that worry them, while ignoring other equally or more dangerous possibilities.\"\\npremises:\\n  - claim: \"Some people freak out over AI risk, climate change risk, or nuclear war risk, but ignore other possible risks.\"\\n  - claim: \"People\\'s fears are often driven by their imaginations about potential future scenarios.\"\\n    premises:\\n      - joint_reason:\\n        - claim: \"Different people imagine different things about potential dangers like AI risk or climate risk.\"\\n        - claim: \"These fears are different from fears about immediate threats like nuclear war or a pandemic.\"\\n\\nclaim: \"AI does present some risks, but these are not necessarily apocalyptic.\"\\npremises:\\n  - claim: \"AI is currently being used to scam people and disseminate false information.\"\\n  - claim: \"There are other technology risks, like electric car risks and self-driving car risks.\"\\n\\nclaim: \"New technology will always cause dangers, including scams, but preventing new technology for fear of new dangers is more dangerous than the technologies themselves.\"\\npremises:\\n  - claim: \"New technology has always been associated with risks.\"\\n  - claim: \"Preventing or slowing down new technology does not necessarily lead to better adaptation or reduction of these risks.\"\\n    premises:\\n      - claim: \"Quick advancement of technology also means quicker circulation of related news and scam awareness.\"\\n      - claim: \"Slower technological advancement could result in less frequent scam exposure, which might not be considered newsworthy and thus reduce awareness.\"\\n\\nclaim: \"Artificial General Intelligence (AGI)', metadata={'source': 'knowledge_base\\\\Reason is Fun AGI Arguments.txt', 'confidence': 66.65}),\n  Document(page_content=\"Lulie\\nWelcome to the Reason is Fun podcast. I'm your host, Lulie Tannett. Today I'm having a conversation with David Deutsch about AGI and epistemology. And it's more of a conversation than an interview, so I get a little bit interrupt-y at times. This episode might be a little rough around the edges, which is why I've called it Episode Minus One. And Episode Zero might and episode zero might also be a bit rough, so I appreciate your patience. And with that, let's get into it. A bunch of things are happening in the world right now regarding AI, and people are panicking, and I wanted to know what you thought about that whole thing.\\n\\nDavid Deutsch\\nYeah, my broad thought, with which I always reply when people ask me about this is that AI is not the same as AGI AGI is not a Very advanced form of AI. Will AI destroy the universe or just the world? I think there's no more reason to think that AI will destroy the world than any other technology or than people in general. AGI, once we have it, will be just people and they certainly have the potential to destroy the world, but we also have very deep knowledge about how to prevent that. So as long as we keep making progress, I don't think there's anything more effective we can do to ensure good outcomes in the future than to keep making progress. Certainly if we don't make progress, we've guaranteed our doom.\\n\\nLulie\\nOkay, suppose I am writing a reply to a Bayesian who is very worried about AI and is specifically worried that it will grow too fast and then it will gain intelligence and then do all sorts of bad, dangerous things and destroy the world.\\n\\nDavid Deutsch\\nis the way that people are focusing on a particular danger that is worrying them for some reason and they ignore equally dangerous or worse possibilities that either are always around or have been around for a long time. So why do some people freak out, I don't think that's too strong a phrase, over AI risk? And other people freak out over climate change risk. And now people are starting to freak out about nuclear war risk again. That hasn't been happening for like decades now. But I remember when it was the big thing.\\n\\nLulie\\nDid you see a similar freak-out in the last time\", metadata={'source': 'knowledge_base\\\\David Deutsch Reason is Fun.txt', 'confidence': 63.95}),\n  Document(page_content=\", which by itself makes it impossible for them to create an AGI.\\n\\nLulie\\nNamely the thing about prediction.\\n\\nDavid Deutsch\\nNamely the thing about induction being impossible. With biological evolution, I don't know what it would take to make an analog of biological evolution on a computer. Of course I'm sure it can be done. My guess is that people will do it and it will be relatively simple to do once someone has had the idea of what biology does. There are various apparently indicative things in biology of the same biological structure occurring in evolutionarily very distant organisms. I think the famous one is that there's a gene involved in the development of the eye, which is the same gene is found in different eyes that work by completely different physical principles. So it's not that they have a common origin, unless the common origin is something so deep in history that we don't recognize it as being eyes. Convergent evolution? Yes, but it's convergent evolution without an apparent reason. Is that different from convergent evolution? Yeah, yeah. So convergent evolution is that things in the same environment tend to end up with the same appearance, the same lifestyle, and so on.\\n\\nLulie\\nworks by things like empiricism, induction, Bayesianism, Bayesian epistemology. And so AI, so first I guess my first question is AI, current AI, following that?\\n\\nDavid Deutsch\\nCurrent AI was inspired by that but that's not what it's doing. It's not doing induction any more than anything else is. Induction is impossible. Also, current AI was also inspired by the architecture of the brain, neural nets. There are these programming hardware, computer hardware devices that are modelled on how neurons work. Now I think that's a coincidence. I mean it's possible that the neuron architecture makes things like pattern recognition and extrapolation and so on a bit more efficient. But because of computational universality we know that that can't be fundamental and in fact, you know, you can download a neural net based computer program onto your home computer, which doesn't have a neural net in it, and it'll still work, even though it's a bit slower.\\n\\nLulie\\nIf AGI cannot come from AI, what would create AGI in your view?\\n\\nDavid Deutsch\\nI can only say very little about that. From Popper\", metadata={'source': 'knowledge_base\\\\David Deutsch Reason is Fun.txt', 'confidence': 61.99})]}"
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# test the chatbot using user_inputs[0]\n",
    "inputs = user_inputs[0]\n",
    "\n",
    "question = inputs[\"question\"]\n",
    "\n",
    "chat_history = inputs[\"chat_history\"]\n",
    "\n",
    "\n",
    "params = {\"question\": question, \"chat_history\": chat_history}\n",
    "\n",
    "# run the chain, get the result\n",
    "result = await qa_chain.acall(\n",
    "    params\n",
    ")\n",
    "result\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2023-08-16T08:31:48.775348Z",
     "start_time": "2023-08-16T08:31:48.772344100Z"
    }
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}