{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "621c09fe", "metadata": { "ExecuteTime": { "end_time": "2024-03-08T19:16:32.071268Z", "start_time": "2024-03-08T19:16:31.275321Z" } }, "outputs": [], "source": [ "from bs4 import BeautifulSoup\n", "import pandas as pd\n", "\n", "c = 0\n", "\n", "df = 0\n", "\n", "# Read HTML content from the file\n", "with open(\"index.html\", \"r\", encoding=\"utf-8\") as file:\n", " html_content = file.read()\n", "\n", "# Parse the HTML content\n", "soup = BeautifulSoup(html_content, \"html.parser\")\n", "\n", "# Find the table\n", "table = soup.find(\"table\")\n", "\n", "# Extract table data\n", "if table:\n", " rows = table.find_all(\"tr\")\n", " data = []\n", " for row in rows:\n", " columns = row.find_all(\"td\")\n", " if columns: # Ensure it's not a header row or empty row\n", " row_data = [column.text.strip() for column in columns]\n", " data.append(row_data)\n", " c += 1\n", " # Create DataFrame\n", " df = pd.DataFrame(data)\n", " \n", " # Extract first part and last word from the first column\n", " df['Name'] = df[0].str.split().str[:-1].str.join(\" \")\n", " df['Ticker'] = df[0].str.split().str[-1]\n", " df[['Name', 'Ticker']].to_csv(\"CompanyList.csv\")\n", "else:\n", " print(\"Table not found in the HTML content.\")\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "38efd2c9", "metadata": { "ExecuteTime": { "end_time": "2024-03-08T19:16:45.594430Z", "start_time": "2024-03-08T19:16:45.591931Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2063\n" ] } ], "source": [ "print(c)" ] }, { "cell_type": "code", "execution_count": 4, "id": "6c185203", "metadata": { "ExecuteTime": { "end_time": "2024-03-08T19:16:46.502190Z", "start_time": "2024-03-08T19:16:46.489733Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234NameTicker
count2062206220622062206220622062
unique206220442060205612520612062
top20 Microns Ltd. 20MICRONS₹1.14 -4.16%₹8.8/₹3.79₹128.05 CrsPharmaceuticalsGallantt Ispat Ltd.20MICRONS
freq142210521
\n", "
" ], "text/plain": [ " 0 1 2 3 \\\n", "count 2062 2062 2062 2062 \n", "unique 2062 2044 2060 2056 \n", "top 20 Microns Ltd. 20MICRONS ₹1.14 -4.16% ₹8.8/₹3.79 ₹128.05 Crs \n", "freq 1 4 2 2 \n", "\n", " 4 Name Ticker \n", "count 2062 2062 2062 \n", "unique 125 2061 2062 \n", "top Pharmaceuticals Gallantt Ispat Ltd. 20MICRONS \n", "freq 105 2 1 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe()" ] }, { "cell_type": "code", "execution_count": 5, "id": "ebfb97f3", "metadata": { "ExecuteTime": { "end_time": "2024-03-08T19:16:48.278807Z", "start_time": "2024-03-08T19:16:48.273680Z" } }, "outputs": [ { "data": { "text/plain": [ "2062" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df)" ] }, { "cell_type": "code", "execution_count": 6, "id": "70727a81", "metadata": { "ExecuteTime": { "end_time": "2024-03-08T19:16:49.269674Z", "start_time": "2024-03-08T19:16:49.267266Z" } }, "outputs": [], "source": [ "duplicates = df[df.duplicated(subset='Name', keep=False)]" ] }, { "cell_type": "code", "execution_count": 7, "id": "75f68012", "metadata": { "ExecuteTime": { "end_time": "2024-03-08T19:16:50.191351Z", "start_time": "2024-03-08T19:16:50.186194Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234NameTicker
610Gallantt Ispat Ltd.\\n ...₹64.15 -0.77%₹76/₹44.64₹1807.11 CrsIron & SteelGallantt Ispat Ltd.GALLISPAT
611Gallantt Ispat Ltd.\\n ...₹216.94 +1.33%₹236.4/₹49.54₹5235.8 CrsIron & SteelGallantt Ispat Ltd.GALLANTT
\n", "
" ], "text/plain": [ " 0 1 \\\n", "610 Gallantt Ispat Ltd.\\n ... ₹64.15 -0.77% \n", "611 Gallantt Ispat Ltd.\\n ... ₹216.94 +1.33% \n", "\n", " 2 3 4 Name Ticker \n", "610 ₹76/₹44.64 ₹1807.11 Crs Iron & Steel Gallantt Ispat Ltd. GALLISPAT \n", "611 ₹236.4/₹49.54 ₹5235.8 Crs Iron & Steel Gallantt Ispat Ltd. GALLANTT " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "duplicates" ] }, { "cell_type": "code", "execution_count": 8, "id": "a0f8f58d", "metadata": { "ExecuteTime": { "end_time": "2024-03-08T19:16:59.440812Z", "start_time": "2024-03-08T19:16:59.430280Z" } }, "outputs": [], "source": [ "df.to_csv(\"CompanyList.csv\")" ] }, { "cell_type": "code", "execution_count": null, "id": "e9274e3c3011e6fc", "metadata": { "collapsed": false }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }