Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 79 additions & 13 deletions .buildkite/scripts/snyk/plugins-scan/generate-steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,23 +82,26 @@ def load_plugin_matrix() -> list:
def parse_plugin_entry(entry) -> list:
"""
Parse a plugin entry from the matrix.
Returns a list of (plugin_name, branch) tuples.
Returns a list of (plugin_name, branch, logstash_branch) tuples.
Entries can be either:
- A simple string: "logstash-filter-date" -> uses default branch
- A dict with branches as sibling key:
{"logstash-input-http": None, "branches": ["main", "3.x"]}
- If branches contains "use-release-branches", fetches branches from artifacts-api
- If logstash_branch is "match-with-plugin-branches", uses same branch as plugin
- If logstash_branch is a specific branch (e.g., "main"), uses that branch for Logstash
"""
if isinstance(entry, str):
return [(entry, DEFAULT_BRANCH)]
return [(entry, DEFAULT_BRANCH, None)]

if isinstance(entry, dict):
plugin_name = None
branches = entry.get('branches', [DEFAULT_BRANCH])
ignore_branches = entry.get('ignore_branches', [])
logstash_branch_config = entry.get('logstash_branch')

for key, value in entry.items():
if key not in ('branches', 'ignore_branches'):
if key not in ('branches', 'ignore_branches', 'logstash_branch'):
plugin_name = key
break

Expand All @@ -124,29 +127,82 @@ def parse_plugin_entry(entry) -> list:
if not should_ignore_branch(b, ignore_branches)
]

return [(plugin_name, branch) for branch in set(resolved_branches)]
# Determine logstash branch for each plugin branch
result = []
for branch in set(resolved_branches):
if logstash_branch_config == 'match-with-plugin-branches':
logstash_branch = branch
elif logstash_branch_config:
logstash_branch = str(logstash_branch_config)
else:
logstash_branch = None
result.append((plugin_name, branch, logstash_branch))
return result

return []


def generate_snyk_step(plugin_name: str, branch: str) -> dict:
def generate_snyk_step(plugin_name: str, branch: str, logstash_branch: str = None) -> dict:
"""Generate a Buildkite step for running snyk monitor on a plugin."""
step_key = slugify_bk_key(f"snyk-{plugin_name}-{branch}")
if plugin_name == 'logstash-filter-elastic_integration':
repo_url = f"https://github.com/elastic/{plugin_name}.git"
else:
repo_url = f"https://github.com/logstash-plugins/{plugin_name}.git"

work_dir = "/opt/buildkite-agent/ls-plugins-snyk-scan"

# Build logstash clone and bootstrap command if logstash_branch is specified
logstash_clone_cmd = ""
if logstash_branch:
logstash_clone_cmd = f"""
echo "--- Cloning logstash (branch: {logstash_branch})"
if ! git clone --depth 1 --branch {logstash_branch} https://github.com/elastic/logstash.git; then
echo "Branch {logstash_branch} not found in logstash, skipping..."
rm -rf {work_dir}
exit 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this exit non zero?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, thanks for catching it. Removed if ... then if git clone workflow also fails (tested in the past)

fi

echo "--- Building logstash"
cd logstash && ./gradlew clean bootstrap installDefaultGems && cd ..

export LOGSTASH_PATH="{work_dir}/logstash"

# Export Gradle property for plugins that need logstashCoreGemPath
export ORG_GRADLE_PROJECT_logstashCoreGemPath="{work_dir}/logstash/logstash-core"
"""

command = f"""#!/bin/bash
set -euo pipefail

echo "--- Downloading snyk..."
curl -sL --retry-max-time 60 --retry 3 --retry-delay 5 https://static.snyk.io/cli/latest/snyk-linux -o snyk
chmod +x ./snyk
source .buildkite/scripts/common/vm-agent-multi-jdk.sh
export SNYK_TOKEN=$(vault read -field=token secret/ci/elastic-logstash/snyk-creds)

# Use isolated directory to avoid settings.gradle conflicts
rm -rf {work_dir}
mkdir -p {work_dir}
cd {work_dir}
{logstash_clone_cmd}
echo "--- Cloning {plugin_name} (branch: {branch})"
git clone --depth 1 --branch {branch} https://github.com/logstash-plugins/{plugin_name}.git
if ! git clone --depth 1 --branch {branch} {repo_url}; then
echo "Branch {branch} not found in {plugin_name}, skipping..."
rm -rf {work_dir}
exit 0
fi
cd {plugin_name}

echo "--- Downloading snyk..."
curl -sL --retry-max-time 60 --retry 3 --retry-delay 5 https://static.snyk.io/cli/latest/snyk-linux -o snyk
chmod +x ./snyk

echo "--- Running Snyk monitor for {plugin_name} on branch {branch}"
./snyk monitor --gradle --package-manager=gradle --org=logstash --project-name={plugin_name} --target-reference={branch} || true
# LS core resolves the gems so Gemfile needs to be excluded
# .buildkite, .ci path may contain python/other projects not necessary to scan
# eventually using --all-projects is good because snyk may detect CVEs through other package managers like maven, gradle, (ruby excluded) etc..
./snyk monitor --all-projects --exclude=Gemfile,.buildkite,.ci,vendor.json --org=logstash --target-reference={branch}

# Cleanup
rm -rf {work_dir}
"""

return {
Expand All @@ -163,11 +219,21 @@ def generate_pipeline() -> dict:
steps = []
for entry in plugins:
plugin_branches = parse_plugin_entry(entry)
for plugin_name, branch in plugin_branches:
step = generate_snyk_step(plugin_name, branch)
for plugin_name, branch, logstash_branch in plugin_branches:
step = generate_snyk_step(plugin_name, branch, logstash_branch)
steps.append(step)

return {"steps": steps}

return {
"agents": {
"provider": "gcp",
"imageProject": "elastic-images-prod",
"image": "family/platform-ingest-logstash-multi-jdk-ubuntu-2204",
"machineType": "n2-standard-4",
"diskSizeGb": 32
},
"steps": steps
}


if __name__ == "__main__":
Expand Down
19 changes: 12 additions & 7 deletions .buildkite/scripts/snyk/plugins-scan/plugins-snyk-scan-matrix.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
plugins:
- logstash-filter-date
- logstash-filter-dissect
- logstash-filter-geoip
- logstash-filter-jdbc_static
- logstash-filter-date:
logstash_branch: 'main'
- logstash-filter-dissect:
logstash_branch: 'main'
- logstash-filter-geoip:
logstash_branch: 'main'
- logstash-filter-useragent
- logstash-input-azure_event_hubs
- logstash-input-dead_letter_queue
- logstash-input-dead_letter_queue:
logstash_branch: 'main'
- logstash-input-file
- logstash-integration-jdbc
- logstash-integration-kafka
- logstash-integration-kafka:
branches: [main, 11.x]
- logstash-integration-snmp
- logstash-integration-aws
- logstash-input-http:
Expand All @@ -19,4 +23,5 @@ plugins:
branches: [main, 6.x]
- logstash-filter-elastic_integration:
branches: [main, 'use-release-branches']
ignore_branches: [7.x]
ignore_branches: [7.x]
logstash_branch: 'match-with-plugin-branches'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit hesitant to introduce this "magic" match-with-plugin-branches name. Fundamentally the only plugin that strives to sync up branch name with logstash is filter-elastic_integration. Clearly there is a case for mapping target plugin branch with logstash branch. Instead I think we should make this explicit. Specifically I think logstash_branch should be a map in which the keys are the branches in branches section and the values are the branch in logstash. We should add validation that when logstash_branch is present that branches is also present.

  - logstash-input-dead_letter_queue:
    branches: [main]
    logstash_branch: 
      main: main

There is the same sentiment for the "magic" use-release-branches name. I really dont think that having these special cases throughout the code is worth the overhead. I think that maintaining static lists is less cognitive burden and makes us be more intentional about exactly what we are scanning.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, let's think about optimization. I have removed the magic use-release-branches entry 😁 and specified with plugin vs core branch pair.