competitive-app / steps /parse_website.py
hellorahulk's picture
Create parse_website.py
7ef4196
raw
history blame
893 Bytes
from abc import ABC, abstractmethod
import pandas as pd
class ParseWebsite(ABC):
'''
Abstract Class for parsing website
'''
@abstractmethod
def get_table(html: str) -> pd.DataFrame:
'''
Args:
html: string
Returns:
pd.DataFrame: Competition records
'''
pass
class ParseCraft(ParseWebsite):
'''
Parse craft.co website for competitor intel
'''
def get_table(self,html: str) -> pd.DataFrame:
html_tables = pd.read_html(html, index_col=0)[0]
return html_tables
class ParseLink:
'''
Parses Website
'''
def __init__(self,website_type: str,html: str):
self.website_type = website_type
self.html = html
def parse(self):
if(self.website_type == "craft"):
df = ParseCraft().get_table(self.html)
return df