hellorahulk commited on
Commit
08e59e6
1 Parent(s): 8894aa7

Create steps/Bots.py

Browse files
Files changed (1) hide show
  1. steps/Bots.py +58 -0
steps/Bots.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from selenium import webdriver
3
+ from selenium.webdriver.common.by import By
4
+ from selenium.webdriver.support.ui import WebDriverWait
5
+ from selenium.webdriver.support import expected_conditions as EC
6
+ from selenium.webdriver.common.keys import Keys
7
+
8
+ from abc import ABC,abstractmethod
9
+ import pandas as pd
10
+
11
+ from steps.utils import get_priority_link
12
+
13
+
14
+ class BOT(ABC):
15
+ '''
16
+ Abstract method for selenium bot
17
+ '''
18
+ @abstractmethod
19
+ def target_html() -> str:
20
+ pass
21
+
22
+
23
+ class CompetitorBot(BOT):
24
+ '''
25
+ Get Competitor info for doing the steps in order:
26
+ 1> google search company_name competitors
27
+ 2> select most appropriate search result
28
+ 3> Go to website, scrape
29
+ '''
30
+
31
+ def target_html(self,company_name: str) -> str:
32
+ browser = webdriver.Firefox()
33
+
34
+ #Searching on FireFox
35
+ browser.get('http://www.google.com')
36
+ search = browser.find_element("name", "q")
37
+ search.send_keys(company_name + " craft.co competitors")
38
+ search.send_keys(Keys.RETURN) # hit return after you enter search text
39
+ time.sleep(5)
40
+
41
+
42
+ # Getting Search results
43
+ search_results = browser.find_elements(By.XPATH,"//a")
44
+ links = [result.get_attribute("href") for result in search_results]
45
+
46
+ #Getting Priority Link
47
+ website,idx = get_priority_link(links)
48
+ print(website,idx)
49
+ print(links[idx])
50
+
51
+ #go to the website
52
+ search_results[idx].click()
53
+ time.sleep(10)
54
+
55
+ html = browser.page_source
56
+ browser.quit()
57
+
58
+ return website,html