security improved to read api keys'
Browse files- chat_database.db +2 -2
- scripts/format_response.py +123 -1
chat_database.db
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33b91dbbc8ccfd1c8c384924a5250449ae4092206ad840287cfb6953a2ad220b
|
3 |
+
size 622592
|
scripts/format_response.py
CHANGED
@@ -19,6 +19,122 @@ def stdoutIO(stdout=None):
|
|
19 |
yield stdout
|
20 |
sys.stdout = old
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
def clean_print_statements(code_block):
|
24 |
"""
|
@@ -107,6 +223,12 @@ def execute_code_from_markdown(code_str, dataframe=None):
|
|
107 |
import sys
|
108 |
from io import StringIO
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
context = {
|
111 |
'pd': pd,
|
112 |
'px': px,
|
@@ -124,7 +246,7 @@ def execute_code_from_markdown(code_str, dataframe=None):
|
|
124 |
modified_code = re.sub(
|
125 |
r'(\w*_?)fig(\w*)\.show\(\)',
|
126 |
r'json_outputs.append(plotly.io.to_json(\1fig\2, pretty=True))',
|
127 |
-
|
128 |
)
|
129 |
|
130 |
modified_code = re.sub(
|
|
|
19 |
yield stdout
|
20 |
sys.stdout = old
|
21 |
|
22 |
+
# Precompile regex patterns for better performance
|
23 |
+
SENSITIVE_MODULES = re.compile(r"(os|sys|subprocess|dotenv|requests|http|socket|smtplib|ftplib|telnetlib|paramiko)")
|
24 |
+
IMPORT_PATTERN = re.compile(r"^\s*import\s+(" + SENSITIVE_MODULES.pattern + r").*?(\n|$)", re.MULTILINE)
|
25 |
+
FROM_IMPORT_PATTERN = re.compile(r"^\s*from\s+(" + SENSITIVE_MODULES.pattern + r").*?(\n|$)", re.MULTILINE)
|
26 |
+
DYNAMIC_IMPORT_PATTERN = re.compile(r"__import__\s*\(\s*['\"](" + SENSITIVE_MODULES.pattern + r")['\"].*?\)")
|
27 |
+
ENV_ACCESS_PATTERN = re.compile(r"(os\.getenv|os\.environ|load_dotenv|\.__import__\s*\(\s*['\"]os['\"].*?\.environ)")
|
28 |
+
FILE_ACCESS_PATTERN = re.compile(r"(open\(|read\(|write\(|file\(|with\s+open)")
|
29 |
+
|
30 |
+
# Enhanced API key detection patterns
|
31 |
+
API_KEY_PATTERNS = [
|
32 |
+
# Direct key assignments
|
33 |
+
re.compile(r"(?i)(api_?key|access_?token|secret_?key|auth_?token|password|credential|secret)s?\s*=\s*[\"\'][\w\-\+\/\=]{8,}[\"\']"),
|
34 |
+
# Function calls with keys
|
35 |
+
re.compile(r"(?i)\.set_api_key\(\s*[\"\'][\w\-\+\/\=]{8,}[\"\']"),
|
36 |
+
# Dictionary assignments
|
37 |
+
re.compile(r"(?i)['\"](?:api_?key|access_?token|secret_?key|auth_?token|password|credential|secret)['\"](?:\s*:\s*)[\"\'][\w\-\+\/\=]{8,}[\"\']"),
|
38 |
+
# Common key formats (base64-like, hex)
|
39 |
+
re.compile(r"[\"\'](?:[A-Za-z0-9\+\/\=]{32,}|[0-9a-fA-F]{32,})[\"\']"),
|
40 |
+
# Bearer token pattern
|
41 |
+
re.compile(r"[\"\'](Bearer\s+[\w\-\+\/\=]{8,})[\"\']"),
|
42 |
+
# Inline URL with auth
|
43 |
+
re.compile(r"https?:\/\/[\w\-\+\/\=]{8,}@")
|
44 |
+
]
|
45 |
+
|
46 |
+
# Network request patterns
|
47 |
+
NETWORK_REQUEST_PATTERNS = re.compile(r"(requests\.|urllib\.|http\.|\.post\(|\.get\(|\.connect\()")
|
48 |
+
|
49 |
+
def check_security_concerns(code_str):
|
50 |
+
"""Check code for security concerns and return info about what was found"""
|
51 |
+
security_concerns = {
|
52 |
+
"has_concern": False,
|
53 |
+
"messages": [],
|
54 |
+
"blocked_imports": False,
|
55 |
+
"blocked_dynamic_imports": False,
|
56 |
+
"blocked_env_access": False,
|
57 |
+
"blocked_file_access": False,
|
58 |
+
"blocked_api_keys": False,
|
59 |
+
"blocked_network": False
|
60 |
+
}
|
61 |
+
|
62 |
+
# Check for sensitive imports
|
63 |
+
if IMPORT_PATTERN.search(code_str) or FROM_IMPORT_PATTERN.search(code_str):
|
64 |
+
security_concerns["has_concern"] = True
|
65 |
+
security_concerns["blocked_imports"] = True
|
66 |
+
security_concerns["messages"].append("Sensitive module imports blocked")
|
67 |
+
|
68 |
+
# Check for __import__ bypass technique
|
69 |
+
if DYNAMIC_IMPORT_PATTERN.search(code_str):
|
70 |
+
security_concerns["has_concern"] = True
|
71 |
+
security_concerns["blocked_dynamic_imports"] = True
|
72 |
+
security_concerns["messages"].append("Dynamic import of sensitive modules blocked")
|
73 |
+
|
74 |
+
# Check for environment variables access
|
75 |
+
if ENV_ACCESS_PATTERN.search(code_str):
|
76 |
+
security_concerns["has_concern"] = True
|
77 |
+
security_concerns["blocked_env_access"] = True
|
78 |
+
security_concerns["messages"].append("Environment variables access blocked")
|
79 |
+
|
80 |
+
# Check for file operations
|
81 |
+
if FILE_ACCESS_PATTERN.search(code_str):
|
82 |
+
security_concerns["has_concern"] = True
|
83 |
+
security_concerns["blocked_file_access"] = True
|
84 |
+
security_concerns["messages"].append("File operations blocked")
|
85 |
+
|
86 |
+
# Check for API key patterns
|
87 |
+
for pattern in API_KEY_PATTERNS:
|
88 |
+
if pattern.search(code_str):
|
89 |
+
security_concerns["has_concern"] = True
|
90 |
+
security_concerns["blocked_api_keys"] = True
|
91 |
+
security_concerns["messages"].append("API key/token usage blocked")
|
92 |
+
break
|
93 |
+
|
94 |
+
# Check for network requests
|
95 |
+
if NETWORK_REQUEST_PATTERNS.search(code_str):
|
96 |
+
security_concerns["has_concern"] = True
|
97 |
+
security_concerns["blocked_network"] = True
|
98 |
+
security_concerns["messages"].append("Network requests blocked")
|
99 |
+
|
100 |
+
return security_concerns
|
101 |
+
|
102 |
+
def clean_code_for_security(code_str, security_concerns):
|
103 |
+
"""Apply security modifications to the code based on detected concerns"""
|
104 |
+
modified_code = code_str
|
105 |
+
|
106 |
+
# Block sensitive imports if needed
|
107 |
+
if security_concerns["blocked_imports"]:
|
108 |
+
modified_code = IMPORT_PATTERN.sub(r'# BLOCKED: import \1\n', modified_code)
|
109 |
+
modified_code = FROM_IMPORT_PATTERN.sub(r'# BLOCKED: from \1\n', modified_code)
|
110 |
+
|
111 |
+
# Block dynamic imports if needed
|
112 |
+
if security_concerns["blocked_dynamic_imports"]:
|
113 |
+
modified_code = DYNAMIC_IMPORT_PATTERN.sub(r'"BLOCKED_DYNAMIC_IMPORT"', modified_code)
|
114 |
+
|
115 |
+
# Block environment access if needed
|
116 |
+
if security_concerns["blocked_env_access"]:
|
117 |
+
modified_code = ENV_ACCESS_PATTERN.sub(r'"BLOCKED_ENV_ACCESS"', modified_code)
|
118 |
+
|
119 |
+
# Block file operations if needed
|
120 |
+
if security_concerns["blocked_file_access"]:
|
121 |
+
modified_code = FILE_ACCESS_PATTERN.sub(r'"BLOCKED_FILE_ACCESS"', modified_code)
|
122 |
+
|
123 |
+
# Block API keys if needed
|
124 |
+
if security_concerns["blocked_api_keys"]:
|
125 |
+
for pattern in API_KEY_PATTERNS:
|
126 |
+
modified_code = pattern.sub(r'"BLOCKED_API_KEY"', modified_code)
|
127 |
+
|
128 |
+
# Block network requests if needed
|
129 |
+
if security_concerns["blocked_network"]:
|
130 |
+
modified_code = NETWORK_REQUEST_PATTERNS.sub(r'"BLOCKED_NETWORK_REQUEST"', modified_code)
|
131 |
+
|
132 |
+
# Add warning banner if needed
|
133 |
+
if security_concerns["has_concern"]:
|
134 |
+
security_message = "⚠️ SECURITY WARNING: " + ". ".join(security_concerns["messages"]) + "."
|
135 |
+
modified_code = f"print('{security_message}')\n\n" + modified_code
|
136 |
+
|
137 |
+
return modified_code
|
138 |
|
139 |
def clean_print_statements(code_block):
|
140 |
"""
|
|
|
223 |
import sys
|
224 |
from io import StringIO
|
225 |
|
226 |
+
# Check for security concerns in the code
|
227 |
+
security_concerns = check_security_concerns(code_str)
|
228 |
+
|
229 |
+
# Apply security modifications to the code
|
230 |
+
modified_code = clean_code_for_security(code_str, security_concerns)
|
231 |
+
|
232 |
context = {
|
233 |
'pd': pd,
|
234 |
'px': px,
|
|
|
246 |
modified_code = re.sub(
|
247 |
r'(\w*_?)fig(\w*)\.show\(\)',
|
248 |
r'json_outputs.append(plotly.io.to_json(\1fig\2, pretty=True))',
|
249 |
+
modified_code
|
250 |
)
|
251 |
|
252 |
modified_code = re.sub(
|