Spaces:

sankhyikii
/

CapiPort

Sleeping

File size: 8,208 Bytes

e9fbea3
 
 
 
74dfb56
e9fbea3
74dfb56
 
 
 
 
 
e9fbea3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74dfb56
e9fbea3
 
 
 
 
 
74dfb56
e9fbea3
74dfb56
 
 
 
 
 
e9fbea3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74dfb56
e9fbea3
74dfb56
 
 
 
 
 
e9fbea3
 
 
74dfb56
 
e9fbea3
74dfb56
e9fbea3
 
 
 
 
 
 
 
 
 
74dfb56
e9fbea3
74dfb56
 
 
 
 
 
e9fbea3
 
 
74dfb56
e9fbea3
74dfb56
e9fbea3
 
 
 
 
 
 
 
 
 
74dfb56
e9fbea3
74dfb56
 
 
 
 
 
e9fbea3
 
 
 
 
 
 
74dfb56
e9fbea3
74dfb56
 
 
 
 
 
e9fbea3
 
 
74dfb56
 
e9fbea3
74dfb56
e9fbea3
 
 
 
 
 
 
 
 
 
74dfb56
e9fbea3
74dfb56
 
 
 
 
 
e9fbea3
74dfb56
 
 
 
 
 
 
 
 
 
 
 
e9fbea3

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "621c09fe",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-08T19:16:32.071268Z",
     "start_time": "2024-03-08T19:16:31.275321Z"
    }
   },
   "outputs": [],
   "source": [
    "from bs4 import BeautifulSoup\n",
    "import pandas as pd\n",
    "\n",
    "c = 0\n",
    "\n",
    "df = 0\n",
    "\n",
    "# Read HTML content from the file\n",
    "with open(\"index.html\", \"r\", encoding=\"utf-8\") as file:\n",
    "    html_content = file.read()\n",
    "\n",
    "# Parse the HTML content\n",
    "soup = BeautifulSoup(html_content, \"html.parser\")\n",
    "\n",
    "# Find the table\n",
    "table = soup.find(\"table\")\n",
    "\n",
    "# Extract table data\n",
    "if table:\n",
    "    rows = table.find_all(\"tr\")\n",
    "    data = []\n",
    "    for row in rows:\n",
    "        columns = row.find_all(\"td\")\n",
    "        if columns:  # Ensure it's not a header row or empty row\n",
    "            row_data = [column.text.strip() for column in columns]\n",
    "            data.append(row_data)\n",
    "        c += 1\n",
    "    # Create DataFrame\n",
    "    df = pd.DataFrame(data)\n",
    "    \n",
    "    # Extract first part and last word from the first column\n",
    "    df['Name'] = df[0].str.split().str[:-1].str.join(\" \")\n",
    "    df['Ticker'] = df[0].str.split().str[-1]\n",
    "    df[['Name', 'Ticker']].to_csv(\"CompanyList.csv\")\n",
    "else:\n",
    "    print(\"Table not found in the HTML content.\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "38efd2c9",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-08T19:16:45.594430Z",
     "start_time": "2024-03-08T19:16:45.591931Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2063\n"
     ]
    }
   ],
   "source": [
    "print(c)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "6c185203",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-08T19:16:46.502190Z",
     "start_time": "2024-03-08T19:16:46.489733Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "                                0             1           2            3  \\\ncount                        2062          2062        2062         2062   \nunique                       2062          2044        2060         2056   \ntop     20 Microns Ltd. 20MICRONS  ₹1.14 -4.16%  ₹8.8/₹3.79  ₹128.05 Crs   \nfreq                            1             4           2            2   \n\n                      4                 Name     Ticker  \ncount              2062                 2062       2062  \nunique              125                 2061       2062  \ntop     Pharmaceuticals  Gallantt Ispat Ltd.  20MICRONS  \nfreq                105                    2          1  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n      <th>2</th>\n      <th>3</th>\n      <th>4</th>\n      <th>Name</th>\n      <th>Ticker</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>count</th>\n      <td>2062</td>\n      <td>2062</td>\n      <td>2062</td>\n      <td>2062</td>\n      <td>2062</td>\n      <td>2062</td>\n      <td>2062</td>\n    </tr>\n    <tr>\n      <th>unique</th>\n      <td>2062</td>\n      <td>2044</td>\n      <td>2060</td>\n      <td>2056</td>\n      <td>125</td>\n      <td>2061</td>\n      <td>2062</td>\n    </tr>\n    <tr>\n      <th>top</th>\n      <td>20 Microns Ltd. 20MICRONS</td>\n      <td>₹1.14 -4.16%</td>\n      <td>₹8.8/₹3.79</td>\n      <td>₹128.05 Crs</td>\n      <td>Pharmaceuticals</td>\n      <td>Gallantt Ispat Ltd.</td>\n      <td>20MICRONS</td>\n    </tr>\n    <tr>\n      <th>freq</th>\n      <td>1</td>\n      <td>4</td>\n      <td>2</td>\n      <td>2</td>\n      <td>105</td>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "ebfb97f3",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-08T19:16:48.278807Z",
     "start_time": "2024-03-08T19:16:48.273680Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "2062"
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "70727a81",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-08T19:16:49.269674Z",
     "start_time": "2024-03-08T19:16:49.267266Z"
    }
   },
   "outputs": [],
   "source": [
    "duplicates = df[df.duplicated(subset='Name', keep=False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "75f68012",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-08T19:16:50.191351Z",
     "start_time": "2024-03-08T19:16:50.186194Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": "                                                     0               1  \\\n610  Gallantt Ispat Ltd.\\n                         ...   ₹64.15 -0.77%   \n611  Gallantt Ispat Ltd.\\n                         ...  ₹216.94 +1.33%   \n\n                 2             3             4                 Name     Ticker  \n610     ₹76/₹44.64  ₹1807.11 Crs  Iron & Steel  Gallantt Ispat Ltd.  GALLISPAT  \n611  ₹236.4/₹49.54   ₹5235.8 Crs  Iron & Steel  Gallantt Ispat Ltd.   GALLANTT  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n      <th>2</th>\n      <th>3</th>\n      <th>4</th>\n      <th>Name</th>\n      <th>Ticker</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>610</th>\n      <td>Gallantt Ispat Ltd.\\n                         ...</td>\n      <td>₹64.15 -0.77%</td>\n      <td>₹76/₹44.64</td>\n      <td>₹1807.11 Crs</td>\n      <td>Iron &amp; Steel</td>\n      <td>Gallantt Ispat Ltd.</td>\n      <td>GALLISPAT</td>\n    </tr>\n    <tr>\n      <th>611</th>\n      <td>Gallantt Ispat Ltd.\\n                         ...</td>\n      <td>₹216.94 +1.33%</td>\n      <td>₹236.4/₹49.54</td>\n      <td>₹5235.8 Crs</td>\n      <td>Iron &amp; Steel</td>\n      <td>Gallantt Ispat Ltd.</td>\n      <td>GALLANTT</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "duplicates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "a0f8f58d",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-03-08T19:16:59.440812Z",
     "start_time": "2024-03-08T19:16:59.430280Z"
    }
   },
   "outputs": [],
   "source": [
    "df.to_csv(\"CompanyList.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   },
   "id": "e9274e3c3011e6fc"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}