from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from time import sleep
#from tqdm.notebook import tqdm
#from selenium.webdriver.chrome.options import Options
import sys
import requests
import http.cookiejar
import re
import urllib.request
from lxml.html import fragment_fromstring
from collections import OrderedDict
import urllib.parse
import pandas as pd


def get_data(*args, **kwargs):
    class AppURLopener(urllib.request.FancyURLopener):
      version = "Mozilla/5.0"

    opener = AppURLopener()
    response = opener.open('http://httpbin.org/user-agent')

    url = 'http://www.fundamentus.com.br/resultado.php'
    cj = http.cookiejar.CookieJar()
    opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))

    opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'),
                        ('Accept', 'text/html, text/plain, text/css, text/sgml, */*;q=0.01')                                                             
                        ]

    #opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201'),
    #                     ('Accept', 'text/html, text/plain, text/css, text/sgml, */*;q=0.01')]

    # Aqui estão os parâmetros de busca das ações
    # Estão em branco para que retorne todas as disponíveis
    data = {'pl_min':'','pl_max':'','pvp_min':'','pvp_max' :'','psr_min':'','psr_max':'','divy_min':'','divy_max':'',            'pativos_min':'','pativos_max':'','pcapgiro_min':'','pcapgiro_max':'','pebit_min':'','pebit_max':'', 'fgrah_min':'',
            'fgrah_max':'', 'firma_ebit_min':'', 'firma_ebit_max':'','margemebit_min':'','margemebit_max':'',            'margemliq_min':'','margemliq_max':'', 'liqcorr_min':'','liqcorr_max':'','roic_min':'','roic_max':'','roe_min':'',            'roe_max':'','liq_min':'','liq_max':'','patrim_min':'','patrim_max':'','divbruta_min':'','divbruta_max':'',         'tx_cresc_rec_min':'','tx_cresc_rec_max':'','setor':'','negociada':'ON','ordem':'1','x':'28','y':'16'}

    with opener.open(url, urllib.parse.urlencode(data).encode('UTF-8')) as link:
        content = link.read().decode('ISO-8859-1')

    pattern = re.compile('<table id="resultado".*</table>', re.DOTALL)
    reg = re.findall(pattern, content)[0]
    page = fragment_fromstring(reg)
    lista = OrderedDict()


    stocks = page.xpath('tbody')[0].findall("tr")

    todos = []
    for i in range(0, len(stocks)):
        lista[i] = {
            stocks[i].getchildren()[0][0].getchildren()[0].text: {
                'cotacao': stocks[i].getchildren()[1].text,
               'P/L': stocks[i].getchildren()[2].text,
               'P/VP': stocks[i].getchildren()[3].text,
               'PSR': stocks[i].getchildren()[4].text,
               'DY': stocks[i].getchildren()[5].text,
               'P/Ativo': stocks[i].getchildren()[6].text,
               'P/Cap.Giro': stocks[i].getchildren()[7].text,
               'P/EBIT': stocks[i].getchildren()[8].text,
               'P/Ativ.Circ.Liq.': stocks[i].getchildren()[9].text,
               'EV/EBIT': stocks[i].getchildren()[10].text,
               'EBITDA': stocks[i].getchildren()[11].text,
               'Mrg. Ebit': stocks[i].getchildren()[12].text,
               'Mrg.Liq.': stocks[i].getchildren()[13].text,
               'Liq.Corr.': stocks[i].getchildren()[14].text,
               'ROIC': stocks[i].getchildren()[15].text,
               'ROE': stocks[i].getchildren()[16].text,
               'Liq.2m.': stocks[i].getchildren()[17].text,
               'Pat.Liq': stocks[i].getchildren()[18].text,
               'Div.Brut/Pat.': stocks[i].getchildren()[19].text,
               'Cresc.5a': stocks[i].getchildren()[20].text
               }
            }

    return lista

def get_specific_data(stock):
    class AppURLopener(urllib.request.FancyURLopener):
      version = "Mozilla/5.0"

    opener = AppURLopener()
    response = opener.open('http://httpbin.org/user-agent')

    url = "http://www.fundamentus.com.br/detalhes.php?papel=" + stock
    cj = http.cookiejar.CookieJar()
    opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
    
    opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'),
                        ('Accept', 'text/html, text/plain, text/css, text/sgml, */*;q=0.01')                                                             
                        ]
    
    
    #opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201'),
    #                     ('Accept', 'text/html, text/plain, text/css, text/sgml, */*;q=0.01')]
    
    # Get data from site
    link = opener.open(url, urllib.parse.urlencode({}).encode('UTF-8'))
    content = link.read().decode('ISO-8859-1')

    # Get all table instances
    pattern = re.compile('<table class="w728">.*</table>', re.DOTALL)
    reg = re.findall(pattern, content)[0]
    reg = "<div>" + reg + "</div>"
    page = fragment_fromstring(reg)
    all_data = {}

    # There is 5 tables with tr, I will get all trs
    all_trs = []
    all_tables = page.xpath("table")

    for i in range(0, len(all_tables)):
        all_trs = all_trs + all_tables[i].findall("tr")

    # Run through all the trs and get the label and the
    # data for each line
    for tr_index in range(0, len(all_trs)):
        tr = all_trs[tr_index]
        # Get into td
        all_tds = tr.getchildren()
        for td_index in range(0, len(all_tds)):
            td = all_tds[td_index]

            label = ""
            data = ""

            # The page has tds with contents and some 
            # other with not
            if (td.get("class").find("label") != -1):
                # We have a label
                for span in td.getchildren():
                    if (span.get("class").find("txt") != -1):
                        label = span.text

                # If we did find a label we have to look 
                # for a value 
                if (label and len(label) > 0):
                    next_td = all_tds[td_index + 1]

                    if (next_td.get("class").find("data") != -1):
                        # We have a data
                        for span in next_td.getchildren():
                            if (span.get("class").find("txt") != -1):
                                if (span.text):
                                    data = span.text
                                else:
                                    # If it is a link
                                    span_children = span.getchildren()
                                    if (span_children and len(span_children) > 0):
                                        data = span_children[0].text

                                # Include into dict
                                all_data[label] = data

                                # Erase it
                                label = ""
                                data = ""

    return all_data


def coletar_scrap():
    sys.path.insert(0,'/usr/lib/chromium-browser/chromedriver')
    URL = 'https://statusinvest.com.br/acoes/busca-avancada'
    #output = 'busca-avancada.csv'

    #if path.exists(output):
    #    os.remove(output)

    #chrome_options = Options()
    #chrome_options.binary_location = GOOGLE_CHROME_BIN
    #chrome_options.add_argument('--disable-gpu')
    #chrome_options.add_argument('--no-sandbox')
    #driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH, chrome_options=chrome_options)

    #driver = webdriver.Chrome('chromedriver/chromedriver.exe')

    #chrome_options = webdriver.ChromeOptions()
    #chrome_options.add_argument('--headless')
    #chrome_options.add_argument('--no-sandbox')
    #chrome_options.add_argument('--disable-dev-shm-usage')
    #driver = webdriver.Chrome('chromedriver',chrome_options=chrome_options)

    gChromeOptions = webdriver.ChromeOptions()
    gChromeOptions.add_argument("window-size=1920x1480")
    gChromeOptions.add_argument("disable-dev-shm-usage")
    driver = webdriver.Chrome(
        chrome_options=gChromeOptions, executable_path=ChromeDriverManager().install()
    )
    
    #driver = webdriver.Chrome(ChromeDriverManager(chrome_type=ChromeType.GOOGLE).install())
    #driver.get(URL)
    driver.get(URL)
    sleep(5)

    button_buscar = driver.find_element_by_xpath('//div/button[contains(@class,"find")]')

    button_buscar.click()
    sleep(5)

    button_skip = driver.find_element_by_xpath('//div/button[contains(@class,"btn-close")]')

    button_skip.click()
    sleep(5)

    button_download = driver.find_element_by_xpath('//div/a[contains(@class, "btn-download")]')
    button_download.click()
    sleep(1)
    
    #if path.exists(output):
               

    df = pd.read_csv('busca-avancada.csv', sep=';', decimal=',', thousands='.')
    driver.close()
    return df


def scrap_fundamentus():
    url  = 'http://www.fundamentus.com.br/resultado.php'
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
    result = requests.get(url, headers=headers)
    df = pd.read_html(result.content)[0]
    
    return df