{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "621c09fe",
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-08T19:16:32.071268Z",
"start_time": "2024-03-08T19:16:31.275321Z"
}
},
"outputs": [],
"source": [
"from bs4 import BeautifulSoup\n",
"import pandas as pd\n",
"\n",
"c = 0\n",
"\n",
"df = 0\n",
"\n",
"# Read HTML content from the file\n",
"with open(\"index.html\", \"r\", encoding=\"utf-8\") as file:\n",
" html_content = file.read()\n",
"\n",
"# Parse the HTML content\n",
"soup = BeautifulSoup(html_content, \"html.parser\")\n",
"\n",
"# Find the table\n",
"table = soup.find(\"table\")\n",
"\n",
"# Extract table data\n",
"if table:\n",
" rows = table.find_all(\"tr\")\n",
" data = []\n",
" for row in rows:\n",
" columns = row.find_all(\"td\")\n",
" if columns: # Ensure it's not a header row or empty row\n",
" row_data = [column.text.strip() for column in columns]\n",
" data.append(row_data)\n",
" c += 1\n",
" # Create DataFrame\n",
" df = pd.DataFrame(data)\n",
" \n",
" # Extract first part and last word from the first column\n",
" df['Name'] = df[0].str.split().str[:-1].str.join(\" \")\n",
" df['Ticker'] = df[0].str.split().str[-1]\n",
" df[['Name', 'Ticker']].to_csv(\"CompanyList.csv\")\n",
"else:\n",
" print(\"Table not found in the HTML content.\")\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "38efd2c9",
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-08T19:16:45.594430Z",
"start_time": "2024-03-08T19:16:45.591931Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2063\n"
]
}
],
"source": [
"print(c)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "6c185203",
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-08T19:16:46.502190Z",
"start_time": "2024-03-08T19:16:46.489733Z"
}
},
"outputs": [
{
"data": {
"text/plain": " 0 1 2 3 \\\ncount 2062 2062 2062 2062 \nunique 2062 2044 2060 2056 \ntop 20 Microns Ltd. 20MICRONS ₹1.14 -4.16% ₹8.8/₹3.79 ₹128.05 Crs \nfreq 1 4 2 2 \n\n 4 Name Ticker \ncount 2062 2062 2062 \nunique 125 2061 2062 \ntop Pharmaceuticals Gallantt Ispat Ltd. 20MICRONS \nfreq 105 2 1 ",
"text/html": "
\n\n
\n \n \n | \n 0 | \n 1 | \n 2 | \n 3 | \n 4 | \n Name | \n Ticker | \n
\n \n \n \n count | \n 2062 | \n 2062 | \n 2062 | \n 2062 | \n 2062 | \n 2062 | \n 2062 | \n
\n \n unique | \n 2062 | \n 2044 | \n 2060 | \n 2056 | \n 125 | \n 2061 | \n 2062 | \n
\n \n top | \n 20 Microns Ltd. 20MICRONS | \n ₹1.14 -4.16% | \n ₹8.8/₹3.79 | \n ₹128.05 Crs | \n Pharmaceuticals | \n Gallantt Ispat Ltd. | \n 20MICRONS | \n
\n \n freq | \n 1 | \n 4 | \n 2 | \n 2 | \n 105 | \n 2 | \n 1 | \n
\n \n
\n
"
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ebfb97f3",
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-08T19:16:48.278807Z",
"start_time": "2024-03-08T19:16:48.273680Z"
}
},
"outputs": [
{
"data": {
"text/plain": "2062"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "70727a81",
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-08T19:16:49.269674Z",
"start_time": "2024-03-08T19:16:49.267266Z"
}
},
"outputs": [],
"source": [
"duplicates = df[df.duplicated(subset='Name', keep=False)]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "75f68012",
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-08T19:16:50.191351Z",
"start_time": "2024-03-08T19:16:50.186194Z"
}
},
"outputs": [
{
"data": {
"text/plain": " 0 1 \\\n610 Gallantt Ispat Ltd.\\n ... ₹64.15 -0.77% \n611 Gallantt Ispat Ltd.\\n ... ₹216.94 +1.33% \n\n 2 3 4 Name Ticker \n610 ₹76/₹44.64 ₹1807.11 Crs Iron & Steel Gallantt Ispat Ltd. GALLISPAT \n611 ₹236.4/₹49.54 ₹5235.8 Crs Iron & Steel Gallantt Ispat Ltd. GALLANTT ",
"text/html": "\n\n
\n \n \n | \n 0 | \n 1 | \n 2 | \n 3 | \n 4 | \n Name | \n Ticker | \n
\n \n \n \n 610 | \n Gallantt Ispat Ltd.\\n ... | \n ₹64.15 -0.77% | \n ₹76/₹44.64 | \n ₹1807.11 Crs | \n Iron & Steel | \n Gallantt Ispat Ltd. | \n GALLISPAT | \n
\n \n 611 | \n Gallantt Ispat Ltd.\\n ... | \n ₹216.94 +1.33% | \n ₹236.4/₹49.54 | \n ₹5235.8 Crs | \n Iron & Steel | \n Gallantt Ispat Ltd. | \n GALLANTT | \n
\n \n
\n
"
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"duplicates"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "a0f8f58d",
"metadata": {
"ExecuteTime": {
"end_time": "2024-03-08T19:16:59.440812Z",
"start_time": "2024-03-08T19:16:59.430280Z"
}
},
"outputs": [],
"source": [
"df.to_csv(\"CompanyList.csv\")"
]
},
{
"cell_type": "code",
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
},
"id": "e9274e3c3011e6fc"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}