hellorahulk commited on
Commit
7ef4196
1 Parent(s): 08e59e6

Create parse_website.py

Browse files
Files changed (1) hide show
  1. steps/parse_website.py +41 -0
steps/parse_website.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ import pandas as pd
3
+
4
+ class ParseWebsite(ABC):
5
+ '''
6
+ Abstract Class for parsing website
7
+ '''
8
+
9
+ @abstractmethod
10
+ def get_table(html: str) -> pd.DataFrame:
11
+ '''
12
+ Args:
13
+ html: string
14
+ Returns:
15
+ pd.DataFrame: Competition records
16
+ '''
17
+ pass
18
+
19
+
20
+ class ParseCraft(ParseWebsite):
21
+ '''
22
+ Parse craft.co website for competitor intel
23
+ '''
24
+
25
+ def get_table(self,html: str) -> pd.DataFrame:
26
+ html_tables = pd.read_html(html, index_col=0)[0]
27
+ return html_tables
28
+
29
+
30
+ class ParseLink:
31
+ '''
32
+ Parses Website
33
+ '''
34
+ def __init__(self,website_type: str,html: str):
35
+ self.website_type = website_type
36
+ self.html = html
37
+
38
+ def parse(self):
39
+ if(self.website_type == "craft"):
40
+ df = ParseCraft().get_table(self.html)
41
+ return df