{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "23b388fd-2a24-48cf-9cf8-fd5cd19257d8", "metadata": {}, "outputs": [], "source": [ "import os\n", "import sqlite3\n", "\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": null, "id": "1edf4aeb-bcb3-42f6-b3f7-9f9543b5ab12", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "04969710-e7b7-4017-8eb7-fc50ee99df6f", "metadata": {}, "source": [ "### Parameters" ] }, { "cell_type": "code", "execution_count": null, "id": "7cf683dc-93fc-4497-9641-75f0a3c1ba12", "metadata": {}, "outputs": [], "source": [ "db_path = \"../database/mock_qna.sqlite\"\n", "nature_of_run = \"new\" if not os.path.exists(db_path) else \"existing\"\n", "\n", "qna_path = \"../database/mock_qna_source.csv\"" ] }, { "cell_type": "code", "execution_count": null, "id": "b6cca63e-021b-4950-ab9f-0e3170194c35", "metadata": {}, "outputs": [], "source": [ "print(f\"nature of run: `{nature_of_run}`\")" ] }, { "cell_type": "code", "execution_count": null, "id": "add28f2e-d695-42a5-97e5-3647dd768dce", "metadata": {}, "outputs": [], "source": [ "qna_data = pd.read_csv( qna_path )\n", "qna_cols = list(qna_data.columns)\n", "print(\"qna_data.shape\", qna_data.shape)\n", "\n", "qna_data = qna_data[ qna_data[\"question\"].notnull() ].reset_index(drop=True)\n", "print(\"qna_data.shape\", qna_data.shape)" ] }, { "cell_type": "code", "execution_count": null, "id": "26fa3a67-71d9-4410-b0ea-9c1e08ca2f51", "metadata": {}, "outputs": [], "source": [ "qna_data[:3]" ] }, { "cell_type": "code", "execution_count": null, "id": "2a20c4ee-ae53-4582-a660-54e40f8f1dd5", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "1167bb3a-97fd-48b1-a0a9-eab6e4d54245", "metadata": {}, "source": [ "### Initialize database connection & resources" ] }, { "cell_type": "code", "execution_count": null, "id": "095b8a2e-c3cb-4c09-b49d-ccb5df8467b0", "metadata": {}, "outputs": [], "source": [ "con = sqlite3.connect(db_path)" ] }, { "cell_type": "code", "execution_count": null, "id": "f2668a87-be3c-464d-a4ad-4e40590cbd0c", "metadata": {}, "outputs": [], "source": [ "cur = con.cursor()" ] }, { "cell_type": "code", "execution_count": null, "id": "4437d3cb-b92b-40ef-b030-b7fb4499d0e7", "metadata": {}, "outputs": [], "source": [ "if nature_of_run == \"new\":\n", " qna_cols_str = \", \".join(qna_cols)\n", " cur.execute(f\"\"\"CREATE TABLE qna_tbl (\n", " {qna_cols_str}\n", " )\n", " \"\"\")\n", " print(\"created table `qna_tbl`\")\n", " print(f\"columns for `qna_tbl` are {qna_cols_str}\")\n", "\n", " \n", " cur.execute(f\"\"\"CREATE TABLE answer_tbl (\n", " id, correct_answer, user_answer\n", " )\n", " \"\"\")\n", " print(\"created table `answer_tbl`\")" ] }, { "cell_type": "code", "execution_count": null, "id": "31b6fe49-f55d-44f0-90c1-248158eac96c", "metadata": {}, "outputs": [], "source": [ "if False:\n", " cur.execute(\"DELETE FROM answer_tbl\")\n", " con.commit()" ] }, { "cell_type": "code", "execution_count": null, "id": "a6153892-4d8b-487e-bd1d-05577ef1fcb5", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "cdc0a81b-fb0a-46fa-9646-1a78c2781f02", "metadata": {}, "source": [ "#### Test fetching empty table" ] }, { "cell_type": "code", "execution_count": null, "id": "dce53aec-680e-4f0f-b6eb-71efe902231a", "metadata": {}, "outputs": [], "source": [ "res = cur.execute(\"SELECT chapter, question FROM qna_tbl\")" ] }, { "cell_type": "code", "execution_count": null, "id": "506527e2-4d6d-4817-bdaf-9a31fec3b006", "metadata": {}, "outputs": [], "source": [ "res.fetchone()" ] }, { "cell_type": "code", "execution_count": null, "id": "69f74ed2-a1da-410a-b759-d334fcf37851", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "e82debcf-c3e4-4c93-8e59-2c73ead63adc", "metadata": {}, "source": [ "#### Test ingesting one record of data" ] }, { "cell_type": "code", "execution_count": null, "id": "e239f941-d19b-4400-acac-8a45b7b50fcc", "metadata": {}, "outputs": [], "source": [ "data = qna_data.values.tolist()\n", "q_mark_list = [\"?\"] * len(qna_cols)\n", "q_mark_str = \"(\" + \", \".join(q_mark_list) + \")\"" ] }, { "cell_type": "code", "execution_count": null, "id": "93b7130b-b007-4359-a0a2-bfe5fb7ddba2", "metadata": {}, "outputs": [], "source": [ "cur.executemany(f\"INSERT INTO qna_tbl VALUES {q_mark_str}\", data[:1])\n", "con.commit()" ] }, { "cell_type": "code", "execution_count": null, "id": "5f01dac9-c9f5-4536-85d4-667abd8f178d", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "bf8b1f1d-08fd-4a07-9489-58ef14b8439d", "metadata": {}, "source": [ "#### Test fetching one record of data" ] }, { "cell_type": "code", "execution_count": null, "id": "26206800-54c0-495e-bf8f-5958421eddca", "metadata": {}, "outputs": [], "source": [ "res = cur.execute(\"SELECT chapter, question FROM qna_tbl\")\n", "res.fetchone()" ] }, { "cell_type": "code", "execution_count": null, "id": "54722955-7e72-4723-88ca-a0dbee361934", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "54ec1451-fe61-4a92-9148-d4a3d05aeed8", "metadata": {}, "source": [ "#### Clean up and ingest full Q&A data" ] }, { "cell_type": "code", "execution_count": null, "id": "64131faf-b2e7-4e70-8547-762a09ed2ad2", "metadata": {}, "outputs": [], "source": [ "cur.execute(\"DELETE FROM qna_tbl\")\n", "con.commit()" ] }, { "cell_type": "code", "execution_count": null, "id": "06d55885-50b1-4c23-a364-1fb8fa4f4b36", "metadata": {}, "outputs": [], "source": [ "cur.executemany(f\"INSERT INTO qna_tbl VALUES {q_mark_str}\", data)\n", "con.commit()" ] }, { "cell_type": "code", "execution_count": null, "id": "9e2a3d06-a077-4b32-8fce-600b3577cad9", "metadata": {}, "outputs": [], "source": [ "res = cur.execute(\"SELECT COUNT(*) FROM qna_tbl\")\n", "res.fetchone()" ] }, { "cell_type": "code", "execution_count": null, "id": "9256ad33-f70a-482c-801e-01b5a52e8261", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.18" } }, "nbformat": 4, "nbformat_minor": 5 }