{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(14,\n",
       " ['sukanta.txt',\n",
       "  'jatindramohan.txt',\n",
       "  'sukumar.txt',\n",
       "  'ishwarchandragupta.txt',\n",
       "  'test.txt',\n",
       "  'data_prep.ipynb',\n",
       "  'train.txt',\n",
       "  'madhusudan.txt',\n",
       "  'satyendranath.txt',\n",
       "  'rabindranath.txt',\n",
       "  'jibanananda.txt',\n",
       "  '.ipynb_checkpoints',\n",
       "  'nazrulislam.txt',\n",
       "  'kaminiroy.txt'])"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "files = os.listdir('.')\n",
    "len(files), files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(['sukanta.txt',\n",
       "  'jatindramohan.txt',\n",
       "  'sukumar.txt',\n",
       "  'ishwarchandragupta.txt',\n",
       "  'madhusudan.txt',\n",
       "  'satyendranath.txt',\n",
       "  'rabindranath.txt',\n",
       "  'jibanananda.txt',\n",
       "  'nazrulislam.txt',\n",
       "  'kaminiroy.txt'],\n",
       " 10)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "poems_files = files[0:4] + files[7:11]  + files[12:14]\n",
    "poems_files, len(poems_files)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "def open_files(dr):\n",
    "    return (open(file, 'r') for file in os.listdir(dr) if file in poems_files)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "sukanta, jm, sukumar, ishwarg, msd, satyen, rabi, jiban, nazrul, kamini = open_files('.')\n",
    "poets = [sukanta, jm, sukumar, ishwarg, msd, satyen, rabi, jiban, nazrul, kamini]\n",
    "\n",
    "# for line in poets[0].readlines():\n",
    "#     print(line)\n",
    "\n",
    "for poet in poets:\n",
    "    counter = 0\n",
    "    lines = poet.readlines()\n",
    "    line_count = len(lines)\n",
    "    train_split_idx = int(line_count * 0.9)\n",
    "    for line in lines:\n",
    "        if counter <= train_split_idx:\n",
    "            with open(r'train.txt', 'a') as train_:\n",
    "                train_.writelines(line)\n",
    "            counter += 1\n",
    "        else:\n",
    "            with open(r'test.txt', 'a') as test_:\n",
    "                test_.writelines(line)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of lines in train file: 91263\n",
      "Number of lines in test file: 10136\n"
     ]
    }
   ],
   "source": [
    "with open(r'train.txt', 'r') as train:\n",
    "    train_line_count = len(train.readlines())\n",
    "    print(f'Number of lines in train file: {train_line_count}')\n",
    "    \n",
    "with open(r'test.txt', 'r') as test:\n",
    "    test_line_count = len(test.readlines())\n",
    "    print(f'Number of lines in test file: {test_line_count}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}