Skip to content

Commit a25587c

Browse files
Copilotstreed
andcommitted
Implement secure Chrome configuration for website import
Co-authored-by: streed <805140+streed@users.noreply.github.com>
1 parent 7104a1d commit a25587c

2 files changed

Lines changed: 50 additions & 9 deletions

File tree

cmd/import.go

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"net/url"
7+
"os"
78
"strings"
89
"time"
910

@@ -151,18 +152,22 @@ func runImport(cmd *cobra.Command, args []string) error {
151152

152153
// extractPageContent uses chromedp to extract title and content from a webpage
153154
func extractPageContent(pageURL string) (title, content string, err error) {
154-
// Configure Chrome options for sandboxed environments
155+
// Configure Chrome options with security considerations
156+
// Start with default options that include necessary headless browser settings
155157
opts := append(chromedp.DefaultExecAllocatorOptions[:],
156-
chromedp.NoSandbox,
157-
chromedp.DisableGPU,
158-
chromedp.Flag("disable-dev-shm-usage", true),
159-
chromedp.Flag("disable-web-security", true),
160-
chromedp.Flag("ignore-certificate-errors", true),
161-
chromedp.Flag("ignore-ssl-errors", true),
162-
chromedp.Flag("ignore-certificate-errors-spki-list", true),
163-
chromedp.UserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"),
158+
// Only add minimal flags needed for CI/container environments
159+
// NoSandbox is only used if we detect we're in a restricted environment
160+
chromedp.DisableGPU, // Safe to disable GPU in headless mode
161+
// Use a realistic user agent for better compatibility
162+
chromedp.UserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"),
164163
)
165164

165+
// Only disable sandbox if we're in a restricted environment (CI/containers)
166+
// This is detected by checking if we can create user namespaces
167+
if isRestrictedEnvironment() {
168+
opts = append(opts, chromedp.NoSandbox)
169+
}
170+
166171
// Create allocator context
167172
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
168173
defer cancel()
@@ -293,4 +298,32 @@ func cleanMarkdownContent(content string) string {
293298
}
294299

295300
return strings.Join(cleanLines, "\n")
301+
}
302+
303+
// isRestrictedEnvironment checks if we're running in a restricted environment
304+
// where Chrome sandbox needs to be disabled (CI, containers, etc.)
305+
func isRestrictedEnvironment() bool {
306+
// Check for common CI environment variables
307+
ciEnvVars := []string{
308+
"CI", "CONTINUOUS_INTEGRATION", "BUILD_NUMBER", "GITHUB_ACTIONS",
309+
"GITLAB_CI", "JENKINS_URL", "TRAVIS", "CIRCLECI", "BUILDKITE",
310+
}
311+
312+
for _, envVar := range ciEnvVars {
313+
if os.Getenv(envVar) != "" {
314+
return true
315+
}
316+
}
317+
318+
// Check if we're running in a container
319+
if _, err := os.Stat("/.dockerenv"); err == nil {
320+
return true
321+
}
322+
323+
// Check for AppArmor restrictions (common in Ubuntu 23.10+)
324+
if _, err := os.Stat("/proc/sys/kernel/apparmor_restrict_unprivileged_userns"); err == nil {
325+
return true
326+
}
327+
328+
return false
296329
}

cmd/import_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,4 +95,12 @@ func TestCleanMarkdownContent(t *testing.T) {
9595
}
9696
})
9797
}
98+
}
99+
100+
func TestIsRestrictedEnvironment(t *testing.T) {
101+
// Since we're running in GitHub Actions, this should return true
102+
result := isRestrictedEnvironment()
103+
if !result {
104+
t.Errorf("isRestrictedEnvironment() = %v, want true (running in CI)", result)
105+
}
98106
}

0 commit comments

Comments
 (0)