{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Example notebook on how to extract a source from the database and save it in another format" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "from buster.docparser import read_documents, write_documents" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Path to the database\n", "db_path = \"documents.db\"\n", "\n", "# Source to extract\n", "target = \"pytorch\"\n", "df = read_documents(db_path, target)\n", "\n", "# If you want to save it as tar.gz\n", "filepath = os.path.join('buster/data/', f'document_embeddings_{target}.tar.gz')\n", "write_documents(filepath, target, df)" ] } ], "metadata": { "kernelspec": { "display_name": "milabot", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.10.9" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "9db6f4b791ef587fd310257e87896b12053c9010399595f881592a25a8a29679" } } }, "nbformat": 4, "nbformat_minor": 2 }