karimaloulou commited on
Commit
700aa07
·
verified ·
1 Parent(s): a039a4b

Update format_input.py

Browse files
Files changed (1) hide show
  1. format_input.py +14 -258
format_input.py CHANGED
@@ -1,259 +1,15 @@
1
- import re
2
- import sys
3
- import os
4
-
5
- # Add the project root to sys.path
6
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
7
-
8
- from location.IP_Checker import get_all_details
9
-
10
- def append_severity_definition(log_text):
11
- severity_definitions = {
12
- "Information": "The event provides useful context but is not critical.",
13
- "Notice": "Signifies noteworthy event that do not require immediate action.",
14
- "Warning": "Indicates potential issue that should be addressed and should be investigated before escalation.",
15
- "Error": "Indicates a problem that needs to be resolved and impacts system functionality but doesn't cause immediate service disruption.",
16
- "Critical": "Indicates a severe issue that causes significant loss of service and requires immediate attention.",
17
- }
18
-
19
- match = re.search(r"Severity:\s*(\S+)", log_text)
20
- if match:
21
- severity = match.group(1)
22
- definition = severity_definitions.get(severity)
23
- if definition:
24
- log_text = re.sub(
25
- r"(Severity:\s*" + re.escape(severity) + r")\n",
26
- r"\1\n - Severity Definition: " + definition + "\n",
27
- log_text,
28
- )
29
- return log_text
30
-
31
-
32
- def format_sophos_row(row):
33
- def get_value(key, default="Unknown"):
34
- return row.get(key, default)
35
-
36
- return f"""
37
- - Device Information
38
- - Device Name: {get_value("device_name")}
39
- - Device Model: {get_value("device_model")}
40
- - Device Serial ID: {get_value("device_serial_id")}
41
-
42
- - Connection Details
43
- - Source IP: {get_value("src_ip")}
44
- - Source Country: {get_value("src_country", "Unknown") if get_value("src_country") != 'R1' else "Unknown"}
45
- - Destination IP: {get_value("dst_ip")}
46
- - Destination Country: {get_value("dst_country", "Unknown") if get_value("dst_country") != 'R1' else "Unknown"}
47
- - Protocol: {get_value("protocol")}
48
- - Source Port: {get_value("src_port")}
49
- - Destination Port: {get_value("dst_port")}
50
- - Timestamp: {get_value("timestamp")}
51
-
52
- - Log Information
53
- - Log Type: {get_value("log_type")}
54
- - Log Component: {get_value("log_component")}
55
- - Log Subtype: {get_value("log_subtype")}
56
- - Severity: {get_value("severity")}
57
-
58
- - Additional Information
59
- - Heartbeat Status: {get_value("hb_status")}
60
- - Application Resolved By: {get_value("app_resolved_by")}
61
- - Application Is Cloud: {get_value("app_is_cloud")}
62
- """
63
-
64
-
65
- def format_azure_sign_in_logs(data_row):
66
- def get_value(key, default="Unknown"):
67
- return data_row.get(key, default)
68
-
69
- device_detail = (
70
- f"\n- Device Detail:"
71
- f"\n Device ID: {get_value('deviceDetail_deviceId')}"
72
- f"\n Display Name: {get_value('deviceDetail_displayName')}"
73
- f"\n Operating System: {get_value('deviceDetail_operatingSystem')}"
74
- f"\n Browser: {get_value('deviceDetail_browser')}"
75
- f"\n Compliance: {get_value('deviceDetail_isCompliant')}"
76
- f"\n Managed: {get_value('deviceDetail_isManaged')}"
77
- f"\n Trust Type: {get_value('deviceDetail_trustType')}"
78
- )
79
-
80
- location = (
81
- f"\n- Location:"
82
- f"\n City: {get_value('location_city')}"
83
- f"\n State: {get_value('location_state')}"
84
- f"\n Country/Region: {get_value('location_countryOrRegion')}"
85
- f"\n Latitude: {get_value('location_geoCoordinates_latitude')}"
86
- f"\n Longitude: {get_value('location_geoCoordinates_longitude')}"
87
- )
88
-
89
- return (
90
- f"Created Date: {get_value('createdDateTime')}\n"
91
- f"User: {get_value('userDisplayName')}\n"
92
- f"User Principal Name: {get_value('userPrincipalName')}\n"
93
- f"User ID: {get_value('userId')}\n"
94
- f"Application ID: {get_value('appId')}\n"
95
- f"Application Display Name: {get_value('appDisplayName')}\n"
96
- f"IP Address: {get_value('ipAddress')}\n"
97
- f"More information about Location:{get_all_details({get_value('ipAddress')})}"
98
- f"Client App Used: {get_value('clientAppUsed')}\n"
99
- f"Conditional Access Status: {get_value('conditionalAccessStatus')}\n"
100
- f"{device_detail}\n"
101
- f"{location}\n"
102
- f"- Status: {get_value('status_additionalDetails')}\n"
103
- )
104
-
105
-
106
- def format_palo_alto_logs(data_row):
107
- def get_value(key, default="Unknown"):
108
- return data_row.get(key, default)
109
-
110
- core_identifiers = (
111
- f"Source: {get_value('src')}\n"
112
- f"Destination: {get_value('dest')}\n"
113
- f"Source IP: {get_value('src_ip')}\n"
114
- f"Destination IP: {get_value('dest_ip')}\n"
115
- f"Source Port: {get_value('src_port')}\n"
116
- f"Destination Port: {get_value('dest_port')}\n"
117
- f"Protocol: {get_value('protocol')}\n"
118
- )
119
-
120
- traffic_volume = (
121
- f"Bytes: {get_value('bytes')}\n"
122
- f"Bytes In: {get_value('bytes_in')}\n"
123
- f"Bytes Out: {get_value('bytes_out')}\n"
124
- f"Packets: {get_value('packets')}\n"
125
- f"Packets In: {get_value('packets_in')}\n"
126
- f"Packets Out: {get_value('packets_out')}\n"
127
- )
128
-
129
- temporal_info = (
130
- f"Start Time: {get_value('start_time')}\n"
131
- f"Date: {get_value('date_year')}-{get_value('date_month')}-{get_value('date_mday')} "
132
- f"{get_value('date_hour')}:{get_value('date_minute')}:{get_value('date_second')}\n"
133
- f"Duration: {get_value('duration')}\n"
134
- )
135
-
136
- network_device_info = (
137
- f"Source Zone: {get_value('src_zone')}\n"
138
- f"Destination Zone: {get_value('dest_zone')}\n"
139
- f"Source Interface: {get_value('src_interface')}\n"
140
- f"Destination Interface: {get_value('dest_interface')}\n"
141
- f"Device ID: {get_value('dvc')}\n"
142
- f"Device Name: {get_value('dvc_name')}\n"
143
- )
144
-
145
- app_user_info = (
146
- f"Application: {get_value('application')}\n"
147
- f"User: {get_value('user')}\n"
148
- f"User Agent: {get_value('user_agent')}\n"
149
- )
150
-
151
- security_info = (
152
- f"Action: {get_value('action')}\n"
153
- f"Severity: {get_value('severity')}\n"
154
- f"Threat: {get_value('threat')}\n"
155
- f"Threat Category: {get_value('threat_category')}\n"
156
- f"Signature: {get_value('signature')}\n"
157
- f"Signature ID: {get_value('signature_id')}\n"
158
- )
159
-
160
- return (
161
- f"{core_identifiers}\n"
162
- f"{traffic_volume}\n"
163
- f"{temporal_info}\n"
164
- f"{network_device_info}\n"
165
- f"{app_user_info}\n"
166
- f"{security_info}\n"
167
- )
168
-
169
- def format_office365_logs(data_row):
170
- def get_value(key, default="Unknown"):
171
- return data_row.get(key, default)
172
-
173
- actor_info = (
174
- f"- Actor Information:\n"
175
- f" - User ID: {get_value('UserId')}\n"
176
- f" - User Key: {get_value('UserKey')}\n"
177
- f" - User Type: {get_value('UserType')}\n"
178
- f" - User Principal Name: {get_value('UserPrincipalName')}\n"
179
- f" - Actor IP Address: {get_value('ActorIpAddress')}\n"
180
- )
181
-
182
- device_info = (
183
- f"- Device Information:\n"
184
- f" - Client IP: {get_value('ClientIP')}\n"
185
- f" - Client App ID: {get_value('ClientAppId')}\n"
186
- f" - Client App Name: {get_value('AppAccessContext.ClientAppName')}\n"
187
- f" - Device ID: {get_value('DeviceId')}\n"
188
- f" - Device Name: {get_value('DeviceName')}\n"
189
- f" - Device Operating System: {get_value('DeviceOperatingSystem')}\n"
190
- )
191
-
192
- operation_info = (
193
- f"- Operation Information:\n"
194
- f" - Operation: {get_value('Operation')}\n"
195
- f" - Operation Properties: {get_value('OperationProperties')}\n"
196
- f" - Object ID: {get_value('ObjectId')}\n"
197
- f" - Object Type: {get_value('ObjectType')}\n"
198
- f" - Object Name: {get_value('ObjectName')}\n"
199
- )
200
-
201
- policy_info = (
202
- f"- Policy Information:\n"
203
- f" - Policy Details: {get_value('PolicyDetails')}\n"
204
- f" - Policy Identifier: {get_value('PolicyIdentifierString')}\n"
205
- f" - Policy Last Updated Time: {get_value('PolicyLastUpdatedTime')}\n"
206
- )
207
-
208
- event_info = (
209
- f"- Event Information:\n"
210
- f" - Creation Time: {get_value('CreationTime')}\n"
211
- f" - Result Status: {get_value('ResultStatus')}\n"
212
- f" - Record Type: {get_value('RecordType')}\n"
213
- f" - Request ID: {get_value('RequestId')}\n"
214
- f" - Organization ID: {get_value('OrganizationId')}\n"
215
- f" - Organization Name: {get_value('OrganizationName')}\n"
216
- f" - Tenant ID: {get_value('TenantId')}\n"
217
- )
218
-
219
- additional_info = (
220
- f"- Additional Information:\n"
221
- f" - App Display Name: {get_value('ApplicationDisplayName')}\n"
222
- f" - User Agent: {get_value('UserAgent')}\n"
223
- f" - Session ID: {get_value('SessionId')}\n"
224
- )
225
-
226
- return (
227
- f"{actor_info}\n"
228
- f"{device_info}\n"
229
- f"{operation_info}\n"
230
- f"{policy_info}\n"
231
- f"{event_info}\n"
232
- f"{additional_info}\n"
233
- )
234
-
235
-
236
- def flatten_json(json_data, parent_key="", separator="_"):
237
- def _flatten(obj, parent_key=""):
238
- items = {}
239
- for k, v in obj.items():
240
- new_key = f"{parent_key}{separator}{k}" if parent_key else k
241
- if isinstance(v, dict):
242
- items.update(_flatten(v, new_key))
243
- else:
244
- items[new_key] = v
245
- return items
246
-
247
- return _flatten(json_data, parent_key)
248
-
249
- def detect_log_type(log_data):
250
- if "device_name" in log_data and "src_ip" in log_data and "dst_ip" in log_data:
251
- return "sophos"
252
- elif "userPrincipalName" in log_data and "ipAddress" in log_data:
253
- return "azure-sign-in"
254
- elif "src" in log_data and "dest" in log_data and "severity" in log_data:
255
- return "palo-alto"
256
- elif "UserId" in log_data and "ClientIP" in log_data and "Operation" in log_data:
257
- return "office365"
258
- else:
259
  return "Unknown"
 
1
+ import re
2
+ import sys
3
+ import os
4
+
5
+ def detect_log_type(log_data):
6
+ if "device_name" in log_data and "src_ip" in log_data and "dst_ip" in log_data:
7
+ return "sophos"
8
+ elif "userPrincipalName" in log_data and "ipAddress" in log_data:
9
+ return "azure-sign-in"
10
+ elif "src" in log_data and "dest" in log_data and "severity" in log_data:
11
+ return "palo-alto"
12
+ elif "UserId" in log_data and "ClientIP" in log_data and "Operation" in log_data:
13
+ return "office365"
14
+ else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  return "Unknown"