1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
|
#!/usr/bin/env python3
# /opt/nifi/scripts/secure_processor.py
"""
Processore con security hardening e compliance
"""
import os
import hashlib
import hmac
import secrets
from cryptography.fernet import Fernet
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
import base64
import json
from typing import Dict, Any, Optional
import re
from datetime import datetime, timedelta
class SecurityManager:
"""Gestore sicurezza per ExecuteStreamCommand"""
def __init__(self):
self.encryption_key = self._derive_encryption_key()
self.cipher_suite = Fernet(self.encryption_key)
self.sensitive_patterns = [
r'\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b', # Credit card
r'\b\d{3}-\d{2}-\d{4}\b', # SSN
r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', # Email
r'\b(?:\d{1,3}\.){3}\d{1,3}\b', # IP addresses
]
def _derive_encryption_key(self) -> bytes:
"""Deriva chiave crittografia da password"""
password = os.getenv("ENCRYPTION_PASSWORD", "default-key-change-in-production").encode()
salt = os.getenv("ENCRYPTION_SALT", "default-salt").encode()
kdf = PBKDF2HMAC(
algorithm=hashes.SHA256(),
length=32,
salt=salt,
iterations=100000,
)
key = base64.urlsafe_b64encode(kdf.derive(password))
return key
def encrypt_sensitive_data(self, data: str) -> str:
"""Cripta dati sensibili"""
try:
encrypted = self.cipher_suite.encrypt(data.encode())
return base64.urlsafe_b64encode(encrypted).decode()
except Exception as e:
raise SecurityException(f"Encryption failed: {e}")
def decrypt_sensitive_data(self, encrypted_data: str) -> str:
"""Decripta dati sensibili"""
try:
decoded = base64.urlsafe_b64decode(encrypted_data.encode())
decrypted = self.cipher_suite.decrypt(decoded)
return decrypted.decode()
except Exception as e:
raise SecurityException(f"Decryption failed: {e}")
def sanitize_logs(self, log_message: str) -> str:
"""Sanitizza log rimuovendo dati sensibili"""
sanitized = log_message
for pattern in self.sensitive_patterns:
sanitized = re.sub(pattern, '[REDACTED]', sanitized)
return sanitized
def validate_input_security(self, data: Any) -> bool:
"""Valida input per security violations"""
data_str = json.dumps(data) if isinstance(data, (dict, list)) else str(data)
# Check for injection patterns
dangerous_patterns = [
r'<script[^>]*>.*?</script>', # XSS
r'(union|select|insert|update|delete|drop)\s+', # SQL injection
r'(eval|exec|system|shell_exec)\s*\(', # Code injection
r'\.\./', # Path traversal
]
for pattern in dangerous_patterns:
if re.search(pattern, data_str, re.IGNORECASE):
return False
return True
def generate_audit_hash(self, data: Any) -> str:
"""Genera hash per audit trail"""
data_str = json.dumps(data, sort_keys=True) if isinstance(data, (dict, list)) else str(data)
timestamp = datetime.now().isoformat()
content = f"{data_str}:{timestamp}"
return hashlib.sha256(content.encode()).hexdigest()
def verify_request_signature(self, data: str, signature: str, secret: str) -> bool:
"""Verifica firma HMAC della richiesta"""
try:
expected_signature = hmac.new(
secret.encode(),
data.encode(),
hashlib.sha256
).hexdigest()
return hmac.compare_digest(signature, expected_signature)
except Exception:
return False
class SecurityException(Exception):
"""Eccezione per violazioni security"""
pass
class AuditLogger:
"""Logger per audit trail"""
def __init__(self):
self.audit_file = os.getenv("AUDIT_LOG_FILE", "/opt/nifi/logs/audit.log")
os.makedirs(os.path.dirname(self.audit_file), exist_ok=True)
def log_processing_event(self, event_type: str, user_context: Dict, data_hash: str,
processing_details: Dict):
"""Logga evento di processing per compliance"""
audit_entry = {
"timestamp": datetime.now().isoformat(),
"event_type": event_type,
"user_context": user_context,
"data_hash": data_hash,
"processing_details": processing_details,
"compliance_flags": {
"gdpr_applicable": True,
"data_retention_days": 365,
"encryption_used": True
}
}
with open(self.audit_file, 'a') as f:
f.write(json.dumps(audit_entry) + '\n')
class SecureProcessor(NiFiStreamProcessor):
"""Processore con security e compliance integrate"""
def __init__(self, log_level: str = "INFO"):
super().__init__(log_level)
self.security_manager = SecurityManager()
self.audit_logger = AuditLogger()
self.compliance_mode = os.getenv("COMPLIANCE_MODE", "strict")
self.max_data_size = int(os.getenv("MAX_DATA_SIZE_BYTES", "10485760")) # 10MB
def process_data(self, input_data: str) -> Any:
"""Processing con security e compliance"""
# Security validations
if len(input_data.encode('utf-8')) > self.max_data_size:
raise SecurityException(f"Input data exceeds maximum size: {self.max_data_size} bytes")
try:
data = json.loads(input_data)
except json.JSONDecodeError:
raise SecurityException("Invalid JSON input - potential injection attempt")
# Validate input security
if not self.security_manager.validate_input_security(data):
raise SecurityException("Input contains potentially dangerous patterns")
# Generate audit hash
data_hash = self.security_manager.generate_audit_hash(data)
# Extract user context from NiFi attributes
user_context = {
"nifi_user": os.getenv("NIFI_USER", "system"),
"source_ip": os.getenv("NIFI_SOURCE_IP", "unknown"),
"execution_context": os.getenv("NIFI_EXECUTION_ID", "unknown")
}
# Log processing start
self.audit_logger.log_processing_event(
"processing_started",
user_context,
data_hash,
{"record_count": len(data) if isinstance(data, list) else 1}
)
try:
# Actual processing
processed_data = self._secure_process_records(data)
# Compliance post-processing
final_result = self._apply_compliance_rules(processed_data)
# Log successful processing
self.audit_logger.log_processing_event(
"processing_completed",
user_context,
self.security_manager.generate_audit_hash(final_result),
{
"records_processed": len(final_result.get("data", [])) if isinstance(final_result, dict) else 1,
"security_level": "high",
"compliance_verified": True
}
)
return final_result
except Exception as e:
# Log processing error
self.audit_logger.log_processing_event(
"processing_error",
user_context,
data_hash,
{"error": str(e), "error_type": e.__class__.__name__}
)
raise
def _secure_process_records(self, data: Any) -> Any:
"""Processa record con sicurezza"""
if isinstance(data, list):
processed_records = []
for idx, record in enumerate(data):
# Sanitize record
sanitized_record = self._sanitize_record(record)
# Process business logic
processed_record = self._apply_business_logic(sanitized_record, idx)
# Encrypt sensitive fields if required
if self.compliance_mode == "strict":
processed_record = self._encrypt_sensitive_fields(processed_record)
processed_records.append(processed_record)
return processed_records
else:
sanitized_data = self._sanitize_record(data)
processed_data = self._apply_business_logic(sanitized_data, 0)
if self.compliance_mode == "strict":
processed_data = self._encrypt_sensitive_fields(processed_data)
return processed_data
def _sanitize_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
"""Sanitizza record per security"""
sanitized = {}
for key, value in record.items():
# Sanitize key names
safe_key = re.sub(r'[^a-zA-Z0-9_]', '_', key)
# Sanitize values
if isinstance(value, str):
# Remove potential script injection
safe_value = re.sub(r'<[^>]*>', '', value)
# Limit string length
safe_value = safe_value[:1000] if len(safe_value) > 1000 else safe_value
else:
safe_value = value
sanitized[safe_key] = safe_value
return sanitized
def _apply_business_logic(self, record: Dict[str, Any], index: int) -> Dict[str, Any]:
"""Applica business logic con security"""
processed = record.copy()
# Add processing metadata
processed["processed_at"] = datetime.now().isoformat()
processed["security_level"] = "verified"
processed["record_index"] = index
# Validate required security fields
if "user_id" in record:
processed["user_id_hash"] = hashlib.sha256(str(record["user_id"]).encode()).hexdigest()
return processed
def _encrypt_sensitive_fields(self, record: Dict[str, Any]) -> Dict[str, Any]:
"""Cripta campi sensibili"""
sensitive_fields = ["ssn", "credit_card", "personal_data", "private_info"]
encrypted_record = record.copy()
for field in sensitive_fields:
if field in record and record[field]:
try:
encrypted_value = self.security_manager.encrypt_sensitive_data(str(record[field]))
encrypted_record[f"{field}_encrypted"] = encrypted_value
# Remove original sensitive data
del encrypted_record[field]
except Exception as e:
self.logger.warning(f"Failed to encrypt field {field}: {e}")
return encrypted_record
def _apply_compliance_rules(self, processed_data: Any) -> Dict[str, Any]:
"""Applica regole compliance (GDPR, HIPAA, etc.)"""
compliance_metadata = {
"gdpr_compliance": {
"data_processing_lawful_basis": "legitimate_interest",
"data_subject_rights_respected": True,
"retention_period_days": 365,
"encrypted_fields_count": self._count_encrypted_fields(processed_data)
},
"security_measures": {
"encryption_at_rest": True,
"access_logging": True,
"input_validation": True,
"output_sanitization": True
},
"audit_trail": {
"processing_timestamp": datetime.now().isoformat(),
"compliance_version": "1.0",
"security_officer": os.getenv("SECURITY_OFFICER", "system")
}
}
return {
"data": processed_data,
"compliance_metadata": compliance_metadata,
"processing_signature": self.security_manager.generate_audit_hash(processed_data)
}
def _count_encrypted_fields(self, data: Any) -> int:
"""Conta campi crittografati"""
count = 0
def count_in_dict(d):
nonlocal count
if isinstance(d, dict):
for key, value in d.items():
if key.endswith('_encrypted'):
count += 1
elif isinstance(value, (dict, list)):
count_in_dict(value)
elif isinstance(d, list):
for item in d:
count_in_dict(item)
count_in_dict(data)
return count
if __name__ == "__main__":
processor = SecureProcessor(os.getenv("LOG_LEVEL", "INFO"))
processor.run()
|