Ashad001 commited on
Commit
4d25103
·
1 Parent(s): 498f4fe

security improved to read api keys'

Browse files
Files changed (2) hide show
  1. chat_database.db +2 -2
  2. scripts/format_response.py +123 -1
chat_database.db CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:744167cc151de902fa4fe1533063ee7e907b88992e2c1ba5a1792da6b27feb5a
3
- size 581632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33b91dbbc8ccfd1c8c384924a5250449ae4092206ad840287cfb6953a2ad220b
3
+ size 622592
scripts/format_response.py CHANGED
@@ -19,6 +19,122 @@ def stdoutIO(stdout=None):
19
  yield stdout
20
  sys.stdout = old
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  def clean_print_statements(code_block):
24
  """
@@ -107,6 +223,12 @@ def execute_code_from_markdown(code_str, dataframe=None):
107
  import sys
108
  from io import StringIO
109
 
 
 
 
 
 
 
110
  context = {
111
  'pd': pd,
112
  'px': px,
@@ -124,7 +246,7 @@ def execute_code_from_markdown(code_str, dataframe=None):
124
  modified_code = re.sub(
125
  r'(\w*_?)fig(\w*)\.show\(\)',
126
  r'json_outputs.append(plotly.io.to_json(\1fig\2, pretty=True))',
127
- code_str
128
  )
129
 
130
  modified_code = re.sub(
 
19
  yield stdout
20
  sys.stdout = old
21
 
22
+ # Precompile regex patterns for better performance
23
+ SENSITIVE_MODULES = re.compile(r"(os|sys|subprocess|dotenv|requests|http|socket|smtplib|ftplib|telnetlib|paramiko)")
24
+ IMPORT_PATTERN = re.compile(r"^\s*import\s+(" + SENSITIVE_MODULES.pattern + r").*?(\n|$)", re.MULTILINE)
25
+ FROM_IMPORT_PATTERN = re.compile(r"^\s*from\s+(" + SENSITIVE_MODULES.pattern + r").*?(\n|$)", re.MULTILINE)
26
+ DYNAMIC_IMPORT_PATTERN = re.compile(r"__import__\s*\(\s*['\"](" + SENSITIVE_MODULES.pattern + r")['\"].*?\)")
27
+ ENV_ACCESS_PATTERN = re.compile(r"(os\.getenv|os\.environ|load_dotenv|\.__import__\s*\(\s*['\"]os['\"].*?\.environ)")
28
+ FILE_ACCESS_PATTERN = re.compile(r"(open\(|read\(|write\(|file\(|with\s+open)")
29
+
30
+ # Enhanced API key detection patterns
31
+ API_KEY_PATTERNS = [
32
+ # Direct key assignments
33
+ re.compile(r"(?i)(api_?key|access_?token|secret_?key|auth_?token|password|credential|secret)s?\s*=\s*[\"\'][\w\-\+\/\=]{8,}[\"\']"),
34
+ # Function calls with keys
35
+ re.compile(r"(?i)\.set_api_key\(\s*[\"\'][\w\-\+\/\=]{8,}[\"\']"),
36
+ # Dictionary assignments
37
+ re.compile(r"(?i)['\"](?:api_?key|access_?token|secret_?key|auth_?token|password|credential|secret)['\"](?:\s*:\s*)[\"\'][\w\-\+\/\=]{8,}[\"\']"),
38
+ # Common key formats (base64-like, hex)
39
+ re.compile(r"[\"\'](?:[A-Za-z0-9\+\/\=]{32,}|[0-9a-fA-F]{32,})[\"\']"),
40
+ # Bearer token pattern
41
+ re.compile(r"[\"\'](Bearer\s+[\w\-\+\/\=]{8,})[\"\']"),
42
+ # Inline URL with auth
43
+ re.compile(r"https?:\/\/[\w\-\+\/\=]{8,}@")
44
+ ]
45
+
46
+ # Network request patterns
47
+ NETWORK_REQUEST_PATTERNS = re.compile(r"(requests\.|urllib\.|http\.|\.post\(|\.get\(|\.connect\()")
48
+
49
+ def check_security_concerns(code_str):
50
+ """Check code for security concerns and return info about what was found"""
51
+ security_concerns = {
52
+ "has_concern": False,
53
+ "messages": [],
54
+ "blocked_imports": False,
55
+ "blocked_dynamic_imports": False,
56
+ "blocked_env_access": False,
57
+ "blocked_file_access": False,
58
+ "blocked_api_keys": False,
59
+ "blocked_network": False
60
+ }
61
+
62
+ # Check for sensitive imports
63
+ if IMPORT_PATTERN.search(code_str) or FROM_IMPORT_PATTERN.search(code_str):
64
+ security_concerns["has_concern"] = True
65
+ security_concerns["blocked_imports"] = True
66
+ security_concerns["messages"].append("Sensitive module imports blocked")
67
+
68
+ # Check for __import__ bypass technique
69
+ if DYNAMIC_IMPORT_PATTERN.search(code_str):
70
+ security_concerns["has_concern"] = True
71
+ security_concerns["blocked_dynamic_imports"] = True
72
+ security_concerns["messages"].append("Dynamic import of sensitive modules blocked")
73
+
74
+ # Check for environment variables access
75
+ if ENV_ACCESS_PATTERN.search(code_str):
76
+ security_concerns["has_concern"] = True
77
+ security_concerns["blocked_env_access"] = True
78
+ security_concerns["messages"].append("Environment variables access blocked")
79
+
80
+ # Check for file operations
81
+ if FILE_ACCESS_PATTERN.search(code_str):
82
+ security_concerns["has_concern"] = True
83
+ security_concerns["blocked_file_access"] = True
84
+ security_concerns["messages"].append("File operations blocked")
85
+
86
+ # Check for API key patterns
87
+ for pattern in API_KEY_PATTERNS:
88
+ if pattern.search(code_str):
89
+ security_concerns["has_concern"] = True
90
+ security_concerns["blocked_api_keys"] = True
91
+ security_concerns["messages"].append("API key/token usage blocked")
92
+ break
93
+
94
+ # Check for network requests
95
+ if NETWORK_REQUEST_PATTERNS.search(code_str):
96
+ security_concerns["has_concern"] = True
97
+ security_concerns["blocked_network"] = True
98
+ security_concerns["messages"].append("Network requests blocked")
99
+
100
+ return security_concerns
101
+
102
+ def clean_code_for_security(code_str, security_concerns):
103
+ """Apply security modifications to the code based on detected concerns"""
104
+ modified_code = code_str
105
+
106
+ # Block sensitive imports if needed
107
+ if security_concerns["blocked_imports"]:
108
+ modified_code = IMPORT_PATTERN.sub(r'# BLOCKED: import \1\n', modified_code)
109
+ modified_code = FROM_IMPORT_PATTERN.sub(r'# BLOCKED: from \1\n', modified_code)
110
+
111
+ # Block dynamic imports if needed
112
+ if security_concerns["blocked_dynamic_imports"]:
113
+ modified_code = DYNAMIC_IMPORT_PATTERN.sub(r'"BLOCKED_DYNAMIC_IMPORT"', modified_code)
114
+
115
+ # Block environment access if needed
116
+ if security_concerns["blocked_env_access"]:
117
+ modified_code = ENV_ACCESS_PATTERN.sub(r'"BLOCKED_ENV_ACCESS"', modified_code)
118
+
119
+ # Block file operations if needed
120
+ if security_concerns["blocked_file_access"]:
121
+ modified_code = FILE_ACCESS_PATTERN.sub(r'"BLOCKED_FILE_ACCESS"', modified_code)
122
+
123
+ # Block API keys if needed
124
+ if security_concerns["blocked_api_keys"]:
125
+ for pattern in API_KEY_PATTERNS:
126
+ modified_code = pattern.sub(r'"BLOCKED_API_KEY"', modified_code)
127
+
128
+ # Block network requests if needed
129
+ if security_concerns["blocked_network"]:
130
+ modified_code = NETWORK_REQUEST_PATTERNS.sub(r'"BLOCKED_NETWORK_REQUEST"', modified_code)
131
+
132
+ # Add warning banner if needed
133
+ if security_concerns["has_concern"]:
134
+ security_message = "⚠️ SECURITY WARNING: " + ". ".join(security_concerns["messages"]) + "."
135
+ modified_code = f"print('{security_message}')\n\n" + modified_code
136
+
137
+ return modified_code
138
 
139
  def clean_print_statements(code_block):
140
  """
 
223
  import sys
224
  from io import StringIO
225
 
226
+ # Check for security concerns in the code
227
+ security_concerns = check_security_concerns(code_str)
228
+
229
+ # Apply security modifications to the code
230
+ modified_code = clean_code_for_security(code_str, security_concerns)
231
+
232
  context = {
233
  'pd': pd,
234
  'px': px,
 
246
  modified_code = re.sub(
247
  r'(\w*_?)fig(\w*)\.show\(\)',
248
  r'json_outputs.append(plotly.io.to_json(\1fig\2, pretty=True))',
249
+ modified_code
250
  )
251
 
252
  modified_code = re.sub(