44 "context"
55 "fmt"
66 "net/url"
7+ "os"
78 "strings"
89 "time"
910
@@ -151,18 +152,22 @@ func runImport(cmd *cobra.Command, args []string) error {
151152
152153// extractPageContent uses chromedp to extract title and content from a webpage
153154func extractPageContent (pageURL string ) (title , content string , err error ) {
154- // Configure Chrome options for sandboxed environments
155+ // Configure Chrome options with security considerations
156+ // Start with default options that include necessary headless browser settings
155157 opts := append (chromedp .DefaultExecAllocatorOptions [:],
156- chromedp .NoSandbox ,
157- chromedp .DisableGPU ,
158- chromedp .Flag ("disable-dev-shm-usage" , true ),
159- chromedp .Flag ("disable-web-security" , true ),
160- chromedp .Flag ("ignore-certificate-errors" , true ),
161- chromedp .Flag ("ignore-ssl-errors" , true ),
162- chromedp .Flag ("ignore-certificate-errors-spki-list" , true ),
163- chromedp .UserAgent ("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" ),
158+ // Only add minimal flags needed for CI/container environments
159+ // NoSandbox is only used if we detect we're in a restricted environment
160+ chromedp .DisableGPU , // Safe to disable GPU in headless mode
161+ // Use a realistic user agent for better compatibility
162+ chromedp .UserAgent ("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" ),
164163 )
165164
165+ // Only disable sandbox if we're in a restricted environment (CI/containers)
166+ // This is detected by checking if we can create user namespaces
167+ if isRestrictedEnvironment () {
168+ opts = append (opts , chromedp .NoSandbox )
169+ }
170+
166171 // Create allocator context
167172 allocCtx , cancel := chromedp .NewExecAllocator (context .Background (), opts ... )
168173 defer cancel ()
@@ -293,4 +298,32 @@ func cleanMarkdownContent(content string) string {
293298 }
294299
295300 return strings .Join (cleanLines , "\n " )
301+ }
302+
303+ // isRestrictedEnvironment checks if we're running in a restricted environment
304+ // where Chrome sandbox needs to be disabled (CI, containers, etc.)
305+ func isRestrictedEnvironment () bool {
306+ // Check for common CI environment variables
307+ ciEnvVars := []string {
308+ "CI" , "CONTINUOUS_INTEGRATION" , "BUILD_NUMBER" , "GITHUB_ACTIONS" ,
309+ "GITLAB_CI" , "JENKINS_URL" , "TRAVIS" , "CIRCLECI" , "BUILDKITE" ,
310+ }
311+
312+ for _ , envVar := range ciEnvVars {
313+ if os .Getenv (envVar ) != "" {
314+ return true
315+ }
316+ }
317+
318+ // Check if we're running in a container
319+ if _ , err := os .Stat ("/.dockerenv" ); err == nil {
320+ return true
321+ }
322+
323+ // Check for AppArmor restrictions (common in Ubuntu 23.10+)
324+ if _ , err := os .Stat ("/proc/sys/kernel/apparmor_restrict_unprivileged_userns" ); err == nil {
325+ return true
326+ }
327+
328+ return false
296329}
0 commit comments