Spaces:
Sleeping
Sleeping
Update url_analyzer.py
Browse files- url_analyzer.py +43 -32
url_analyzer.py
CHANGED
|
@@ -1,26 +1,16 @@
|
|
| 1 |
import requests
|
| 2 |
import os
|
|
|
|
| 3 |
|
| 4 |
SAFE_BROWSING_API_KEY = os.getenv("SAFE_BROWSING_API_KEY")
|
| 5 |
-
|
| 6 |
|
| 7 |
def analyze_urls(urls):
|
| 8 |
findings = []
|
|
|
|
| 9 |
|
| 10 |
for url in urls:
|
| 11 |
-
|
| 12 |
-
# --- 1. URLHaus ---
|
| 13 |
-
try:
|
| 14 |
-
res = requests.post("https://urlhaus-api.abuse.ch/v1/url/", data={"url": url})
|
| 15 |
-
data = res.json()
|
| 16 |
-
if data.get("query_status") == "ok":
|
| 17 |
-
findings.append(f"URL: {url} flagged as {data['url_status']} (URLHaus)")
|
| 18 |
-
else:
|
| 19 |
-
findings.append(f"URL: {url} not found in URLHaus")
|
| 20 |
-
except Exception as e:
|
| 21 |
-
findings.append(f"URL: {url} check failed (URLHaus) - {e}")
|
| 22 |
-
|
| 23 |
-
# --- 2. Google Safe Browsing ---
|
| 24 |
if SAFE_BROWSING_API_KEY:
|
| 25 |
try:
|
| 26 |
payload = {
|
|
@@ -39,30 +29,51 @@ def analyze_urls(urls):
|
|
| 39 |
data = res.json()
|
| 40 |
if "matches" in data:
|
| 41 |
findings.append(f"URL: {url} flagged by Google Safe Browsing")
|
|
|
|
| 42 |
else:
|
| 43 |
findings.append(f"URL: {url} not flagged (Google Safe Browsing)")
|
| 44 |
-
except
|
| 45 |
-
findings.append(f"URL: {url} check failed (Google Safe Browsing)
|
| 46 |
-
else:
|
| 47 |
-
findings.append("Google Safe Browsing API key not set.")
|
| 48 |
|
| 49 |
-
# ---
|
| 50 |
-
if
|
| 51 |
try:
|
| 52 |
-
headers = {"X-OTX-API-KEY":
|
| 53 |
res = requests.get(f"https://otx.alienvault.com/api/v1/indicators/url/{url}/general", headers=headers)
|
| 54 |
if res.status_code == 200:
|
| 55 |
data = res.json()
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
else:
|
| 60 |
-
findings.append(f"URL: {url} not
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
-
return findings
|
|
|
|
| 1 |
import requests
|
| 2 |
import os
|
| 3 |
+
import re
|
| 4 |
|
| 5 |
SAFE_BROWSING_API_KEY = os.getenv("SAFE_BROWSING_API_KEY")
|
| 6 |
+
OTX_API_KEY = os.getenv("OTX_API_KEY")
|
| 7 |
|
| 8 |
def analyze_urls(urls):
|
| 9 |
findings = []
|
| 10 |
+
score = 0
|
| 11 |
|
| 12 |
for url in urls:
|
| 13 |
+
# --- 1. Google Safe Browsing ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
if SAFE_BROWSING_API_KEY:
|
| 15 |
try:
|
| 16 |
payload = {
|
|
|
|
| 29 |
data = res.json()
|
| 30 |
if "matches" in data:
|
| 31 |
findings.append(f"URL: {url} flagged by Google Safe Browsing")
|
| 32 |
+
score += 40
|
| 33 |
else:
|
| 34 |
findings.append(f"URL: {url} not flagged (Google Safe Browsing)")
|
| 35 |
+
except:
|
| 36 |
+
findings.append(f"URL: {url} check failed (Google Safe Browsing)")
|
|
|
|
|
|
|
| 37 |
|
| 38 |
+
# --- 2. AlienVault OTX ---
|
| 39 |
+
if OTX_API_KEY:
|
| 40 |
try:
|
| 41 |
+
headers = {"X-OTX-API-KEY": OTX_API_KEY}
|
| 42 |
res = requests.get(f"https://otx.alienvault.com/api/v1/indicators/url/{url}/general", headers=headers)
|
| 43 |
if res.status_code == 200:
|
| 44 |
data = res.json()
|
| 45 |
+
if data.get("pulse_info", {}).get("count", 0) > 0:
|
| 46 |
+
findings.append(f"URL: {url} flagged in AlienVault OTX")
|
| 47 |
+
score += 30
|
| 48 |
else:
|
| 49 |
+
findings.append(f"URL: {url} not found in AlienVault OTX")
|
| 50 |
+
except:
|
| 51 |
+
findings.append(f"URL: {url} check failed (AlienVault OTX)")
|
| 52 |
+
|
| 53 |
+
# --- 3. URLHaus ---
|
| 54 |
+
try:
|
| 55 |
+
res = requests.post("https://urlhaus-api.abuse.ch/v1/url/", data={"url": url})
|
| 56 |
+
data = res.json()
|
| 57 |
+
if data.get("query_status") == "ok":
|
| 58 |
+
findings.append(f"URL: {url} flagged as {data['url_status']} (URLHaus)")
|
| 59 |
+
score += 30
|
| 60 |
+
else:
|
| 61 |
+
findings.append(f"URL: {url} not found in URLHaus")
|
| 62 |
+
except:
|
| 63 |
+
findings.append(f"URL: {url} check failed (URLHaus)")
|
| 64 |
+
|
| 65 |
+
# --- 4. Heuristic Checks ---
|
| 66 |
+
domain_match = re.search(r"https?://([^/]+)/?", url)
|
| 67 |
+
if domain_match:
|
| 68 |
+
domain = domain_match.group(1)
|
| 69 |
+
if len(domain) > 25 or any(char.isdigit() for char in domain.split(".")[0]):
|
| 70 |
+
findings.append(f"URL: {url} has suspicious-looking domain")
|
| 71 |
+
score += 15
|
| 72 |
+
if "?" in url and len(url.split("?")[1]) > 50:
|
| 73 |
+
findings.append(f"URL: {url} has obfuscated query string")
|
| 74 |
+
score += 15
|
| 75 |
+
|
| 76 |
+
if not findings:
|
| 77 |
+
return ["No URLs found in email."], 0
|
| 78 |
|
| 79 |
+
return findings, score
|