Spaces:
Runtime error
Runtime error
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
# Hive Appier Framework | |
# Copyright (c) 2008-2024 Hive Solutions Lda. | |
# | |
# This file is part of Hive Appier Framework. | |
# | |
# Hive Appier Framework is free software: you can redistribute it and/or modify | |
# it under the terms of the Apache License as published by the Apache | |
# Foundation, either version 2.0 of the License, or (at your option) any | |
# later version. | |
# | |
# Hive Appier Framework is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# Apache License for more details. | |
# | |
# You should have received a copy of the Apache License along with | |
# Hive Appier Framework. If not, see <http://www.apache.org/licenses/>. | |
__author__ = "João Magalhães <joamag@hive.pt>" | |
""" The author(s) of the module """ | |
__copyright__ = "Copyright (c) 2008-2024 Hive Solutions Lda." | |
""" The copyright for the module """ | |
__license__ = "Apache License, Version 2.0" | |
""" The license for the module """ | |
import re | |
INFINITY = float("inf") | |
""" Infinity value alternative to math module infinity | |
compatible with Python versions 2 and 3 """ | |
ITERABLES = (list, tuple) | |
""" The tuple that defined the various base types | |
that are considered to be generally "iterable" """ | |
MOBILE_REGEX = re.compile( | |
r"(android|bb\\d+|meego).+mobile|avantgo|bada\\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|mobile.+firefox|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino", | |
re.I | re.M, | |
) # @UndefinedVariable | |
""" The regular expression that is going to be used | |
to validate the complete user agent string for mobile """ | |
TABLET_REGEX = re.compile( | |
r"(android|bb\d+|meego).+mobile|avantgo|bada\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino|android|ipad|playbook|silk", | |
re.I | re.M, | |
) # @UndefinedVariable | |
""" The regular expression that is going to be used | |
to validate the complete user agent string for tablet """ | |
MOBILE_PREFIX_REGEX = re.compile( | |
r"1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\\-(n|u)|c55\\/|capi|ccwa|cdm\\-|cell|chtm|cldc|cmd\\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\\-s|devi|dica|dmob|do(c|p)o|ds(12|\\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\\-|_)|g1 u|g560|gene|gf\\-5|g\\-mo|go(\\.w|od)|gr(ad|un)|haie|hcit|hd\\-(m|p|t)|hei\\-|hi(pt|ta)|hp( i|ip)|hs\\-c|ht(c(\\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\\-(20|go|ma)|i230|iac( |\\-|\\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\\/)|klon|kpt |kwc\\-|kyo(c|k)|le(no|xi)|lg( g|\\/(k|l|u)|50|54|\\-[a-w])|libw|lynx|m1\\-w|m3ga|m50\\/|ma(te|ui|xo)|mc(01|21|ca)|m\\-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\\-2|po(ck|rt|se)|prox|psio|pt\\-g|qa\\-a|qc(07|12|21|32|60|\\-[2-7]|i\\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\\-|oo|p\\-)|sdk\\/|se(c(\\-|0|1)|47|mc|nd|ri)|sgh\\-|shar|sie(\\-|m)|sk\\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\\-|v\\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\\-|tdg\\-|tel(i|m)|tim\\-|t\\-mo|to(pl|sh)|ts(70|m\\-|m3|m5)|tx\\-9|up(\\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|yas\\-|your|zeto|zte\\-", | |
re.I | re.M, | |
) # @UndefinedVariable | |
""" The regular expression to test the base prefix | |
of the user agent string for mobile browser """ | |
BODY_REGEX = re.compile(r"<body[^<>]*?>(.*?)</body>", re.I) # @UndefinedVariable | |
""" Regular expression that is going to be used in the matching | |
of the partial contents (child) nodes of a body HTML node """ | |
TAG_REGEX = re.compile(r"<[^<]*?>") | |
""" The regular expression that is going to be used in the matching | |
of an html/xml based node in the HTML to plain text conversion """ | |
EMAIL_REGEX = re.compile( | |
r"((?P<name>^.+) \<(?P<email_a>[\w\d\._%+-]+@[\w\d\.\-]+)\>)|(?P<email_b>[\w\d\._%+-]+@[\w\d\.\-]+)$", | |
flags=re.UNICODE, # @UndefinedVariable | |
) | |
""" The regular expression that is going to be used | |
in the matching of email lines, it supports both the | |
extended (including name) and the simplified versions """ | |
BROWSER_INFO = [ | |
dict(identity="Edge", sub_string="Edge"), | |
dict(identity="Chrome", sub_string="Chrome"), | |
dict(identity="Safari", sub_string="Safari"), | |
dict(identity="Opera"), | |
dict(identity="Firefox", sub_string="Firefox"), | |
dict(identity="Explorer", sub_string="MSIE", version_search="MSIE "), | |
dict(identity="AppleWebKit", sub_string="AppleWebKit"), | |
dict(identity="Googlebot", sub_string="Googlebot", interactive=False, bot=True), | |
dict(identity="Bingbot", sub_string="Bingbot", interactive=False, bot=True), | |
dict(identity="DuckDuckBot", sub_string="DuckDuckBot", interactive=False, bot=True), | |
dict(identity="netius", sub_string="netius", interactive=False), | |
] | |
""" List that contains the complete information used | |
for the parsing and identification of the browser information | |
from a typical user agent string """ | |
OS_INFO = [ | |
dict(identity="Windows", sub_string="Win"), | |
dict(identity="Mac", sub_string="Mac"), | |
dict(identity="iPhone/iPod", sub_string="iPhone"), | |
dict(sub_string="Linux", identity="Linux"), | |
] | |
""" List that contains the complete information used | |
for the parsing and identification of the os information | |
from a typical user agent string """ | |
WINDOWS_LOCALE = dict( | |
af="Afrikaans", | |
sq="Albanian", | |
am="Amharic", | |
ar_DZ="Arabic_Algeria", | |
ar_BH="Arabic_Bahrain", | |
ar_EG="Arabic_Egypt", | |
ar_IQ="Arabic_Iraq", | |
ar_JO="Arabic_Jordan", | |
ar_KW="Arabic_Kuwait", | |
ar_LB="Arabic_Lebanon", | |
ar_LY="Arabic_Libya", | |
ar_MA="Arabic_Morocco", | |
ar_OM="Arabic_Oman", | |
ar_QA="Arabic_Qatar", | |
ar_SA="Arabic_Saudi Arabia", | |
ar_SY="Arabic_Syria", | |
ar_TN="Arabic_Tunisia", | |
ar_AE="Arabic_United Arab Emirates", | |
ar_YE="Arabic_Yemen", | |
hy="Armenian", | |
az_AZ="Azeri_Cyrillic", | |
eu="Basque", | |
be="Belarusian", | |
bn="Bengali_Bangladesh", | |
bs="Bosnian", | |
bg="Bulgarian", | |
my="Burmese", | |
ca="Catalan", | |
zh_CN="Chinese_China", | |
zh_HK="Chinese_Hong Kong SAR", | |
zh_MO="Chinese_Macau SAR", | |
zh_SG="Chinese_Singapore", | |
zh_TW="Chinese_Taiwan", | |
hr="Croatian", | |
cs="Czech", | |
da="Danish", | |
dv="Divehi; Dhivehi; Maldivian", | |
nl_BE="Dutch_Belgium", | |
nl_NL="Dutch_Netherlands", | |
en_AU="English_Australia", | |
en_BZ="English_Belize", | |
en_CA="English_Canada", | |
en_CB="English_Caribbean", | |
en_GB="English_Great Britain", | |
en_IN="English_India", | |
en_IE="English_Ireland", | |
en_JM="English_Jamaica", | |
en_NZ="English_New Zealand", | |
en_PH="English_Phillippines", | |
en_ZA="English_Southern Africa", | |
en_TT="English_Trinidad", | |
en_US="English_United States", | |
et="Estonian", | |
fo="Faroese", | |
fa="Farsi_Persian", | |
fi="Finnish", | |
fr_BE="French_Belgium", | |
fr_CA="French_Canada", | |
fr_FR="French_France", | |
fr_LU="French_Luxembourg", | |
fr_CH="French_Switzerland", | |
mk="FYRO Macedonia", | |
gd_IE="Gaelic_Ireland", | |
gd="Gaelic_Scotland", | |
de_AT="German_Austria", | |
de_DE="German_Germany", | |
de_LI="German_Liechtenstein", | |
de_LU="German_Luxembourg", | |
de_CH="German_Switzerland", | |
el="Greek", | |
gn="Guarani_Paraguay", | |
gu="Gujarati", | |
he="Hebrew", | |
hi="Hindi", | |
hu="Hungarian", | |
id="Indonesian", | |
it_IT="Italian_Italy", | |
it_CH="Italian_Switzerland", | |
ja="Japanese", | |
kn="Kannada", | |
ks="Kashmiri", | |
kk="Kazakh", | |
km="Khmer", | |
ko="Korean", | |
lo="Lao", | |
la="Latin", | |
lv="Latvian", | |
lt="Lithuanian", | |
ms_BN="Malay_Brunei", | |
ms_MY="Malay_Malaysia", | |
ml="Malayalam", | |
mt="Maltese", | |
mi="Maori", | |
mr="Marathi", | |
mn="Mongolian", | |
ne="Nepali", | |
no_NO="Norwegian_Bokml", | |
pl="Polish", | |
pt_BR="Portuguese_Brazil", | |
pt_PT="Portuguese_Portugal", | |
pa="Punjabi", | |
rm="Raeto-Romance", | |
ro_MO="Romanian_Moldova", | |
ro="Romanian_Romania", | |
ru="Russian", | |
ru_MO="Russian_Moldova", | |
sa="Sanskrit", | |
sr_SP="Serbian_Cyrillic", | |
tn="Setsuana", | |
sd="Sindhi", | |
si="Sinhala; Sinhalese", | |
sk="Slovak", | |
sl="Slovenian", | |
so="Somali", | |
sb="Sorbian", | |
es_AR="Spanish_Argentina", | |
es_BO="Spanish_Bolivia", | |
es_CL="Spanish_Chile", | |
es_CO="Spanish_Colombia", | |
es_CR="Spanish_Costa Rica", | |
es_DO="Spanish_Dominican Republic", | |
es_EC="Spanish_Ecuador", | |
es_SV="Spanish_El Salvador", | |
es_GT="Spanish_Guatemala", | |
es_HN="Spanish_Honduras", | |
es_MX="Spanish_Mexico", | |
es_NI="Spanish_Nicaragua", | |
es_PA="Spanish_Panama", | |
es_PY="Spanish_Paraguay", | |
es_PE="Spanish_Peru", | |
es_PR="Spanish_Puerto Rico", | |
es_ES="Spanish_Spain (Traditional)", | |
es_UY="Spanish_Uruguay", | |
es_VE="Spanish_Venezuela", | |
sw="Swahili", | |
sv_FI="Swedish_Finland", | |
sv_SE="Swedish_Sweden", | |
tg="Tajik", | |
ta="Tamil", | |
tt="Tatar", | |
te="Telugu", | |
th="Thai", | |
bo="Tibetan", | |
ts="Tsonga", | |
tr="Turkish", | |
tk="Turkmen", | |
uk="Ukrainian", | |
UTF_8="Unicode", | |
ur="Urdu", | |
uz_UZ="Uzbek_Cyrillic", | |
vi="Vietnamese", | |
cy="Welsh", | |
xh="Xhosa", | |
yi="Yiddish", | |
zu="Zulu", | |
) | |
""" The map containing the associations between the | |
normalized version/descriptor of the locale and the | |
longer windows version of them so that it may be used | |
when setting locales for windows based operative systems """ | |
SLUG_PERMUTATIONS = ( | |
("á", "a"), | |
("é", "e"), | |
("í", "i"), | |
("ó", "o"), | |
("ú", "u"), | |
("à", "a"), | |
("ã", "a"), | |
("õ", "o"), | |
("â", "a"), | |
("ê", "e"), | |
("ô", "o"), | |
("ç", "c"), | |
("ä", "a"), | |
("ë", "e"), | |
("ï", "i"), | |
("ö", "o"), | |
("ü", "u"), | |
("ÿ", "y"), | |
("æ", "ae"), | |
("œ", "oe"), | |
) | |
""" The complete set of permutation tuples to be used in | |
a slug based simplification process """ | |