Skip to content

Commit bbb483c

Browse files
committed
✨ feat: making js rendered requests
1 parent f7b789d commit bbb483c

File tree

3 files changed

+45
-21
lines changed

3 files changed

+45
-21
lines changed

README.md

+26-9
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,30 @@ Usage:
4545
goclone <url> [flags]
4646
4747
Flags:
48-
-c, --cookie if set true, cookies won't send
49-
-h, --help help for goclone
50-
-o, --open automatically open project in default browser
51-
-p, --proxy_string string proxy connection string
52-
-r, --robots disable robots.txt checks
53-
-s, --serve serve the generated files using gofiber
54-
-P, --servePort int serve port number (default 8088)
55-
-u, --user_agent string custom User-Agent (default "goclone")
56-
-v, --version version for goclone
48+
-b, --browser_endpoint string chrome headless browser WS endpoint
49+
-c, --cookie if set true, cookies won't send
50+
-h, --help help for goclone
51+
-o, --open automatically open project in default browser
52+
-p, --proxy_string string proxy connection string
53+
-r, --robots disable robots.txt checks
54+
-s, --serve serve the generated files using gofiber
55+
-P, --servePort int serve port number (default 8088)
56+
-u, --user_agent string custom User-Agent (default "goclone")
57+
-v, --version version for goclone
5758
```
59+
60+
## Making JS Rendered Requests
61+
62+
JS Rendered requests can be made using ```-b``` flag. For example start image :
63+
64+
65+
``` bash
66+
docker run -d -p 9222:9222 --rm --name headless-shell chromedp/headless-shell
67+
```
68+
69+
then run goclone:
70+
71+
```bash
72+
goclone -b "ws://localhost:9222" https://domain.com
73+
```
74+

cmd/main.go

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ func main() {
5353
pf.StringVarP(&flags.UserAgent, "user_agent", "u", "goclone", "custom User-Agent")
5454
pf.BoolVarP(&flags.Cookies, "cookie", "c", false, "if set true, cookies won't send")
5555
pf.BoolVarP(&flags.Robots, "robots", "r", false, "disable robots.txt checks")
56+
pf.StringVarP(&flags.BrowserEndpoint, "browser_endpoint", "b", "", "chrome headless browser WS endpoint")
5657

5758
if err := rootCmd.Execute(); err != nil {
5859
log.Fatal(err)

pkg/crawler/crawler.go

+18-12
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,14 @@ import (
2121
)
2222

2323
type Flags struct {
24-
Open bool
25-
Serve bool
26-
ServePort int
27-
UserAgent string
28-
ProxyString string
29-
Cookies bool
30-
Robots bool
24+
Open bool
25+
Serve bool
26+
ServePort int
27+
UserAgent string
28+
ProxyString string
29+
Cookies bool
30+
Robots bool
31+
BrowserEndpoint string
3132
}
3233

3334
type filesBase struct {
@@ -72,6 +73,12 @@ func CloneSite(ctx context.Context, args []string, flag Flags) error {
7273
if flag.ProxyString != "" {
7374
geziyorOptions.ProxyFunc = client.RoundRobinProxy(flag.ProxyString)
7475
}
76+
if flag.BrowserEndpoint != "" {
77+
geziyorOptions.BrowserEndpoint = flag.BrowserEndpoint
78+
geziyorOptions.StartRequestsFunc = func(g *geziyor.Geziyor) {
79+
g.GetRendered(domain, g.Opt.ParseFunc)
80+
}
81+
}
7582

7683
geziyor.NewGeziyor(geziyorOptions).Start()
7784

@@ -119,8 +126,7 @@ func quotesParse(g *geziyor.Geziyor, r *client.Response) {
119126
fmt.Println("Css found", "-->", parsedURL)
120127
if !files.css.Contains(parsedURL.Path) {
121128
files.css = append(files.css, parsedURL.Path)
122-
netutil.Extractor(projectURL.String()+parsedURL.Path, projectPath)
123-
129+
go netutil.Extractor(projectURL.String()+parsedURL.Path, projectPath)
124130
g.Get(r.JoinURL(projectURL.String()+parsedURL.Path), parseCSS)
125131
}
126132

@@ -142,7 +148,7 @@ func quotesParse(g *geziyor.Geziyor, r *client.Response) {
142148
fmt.Println("Js found", "-->", parsedURL)
143149
if !files.js.Contains(parsedURL.Path) {
144150
files.js = append(files.js, parsedURL.Path)
145-
netutil.Extractor(projectURL.String()+parsedURL.Path, projectPath)
151+
go netutil.Extractor(projectURL.String()+parsedURL.Path, projectPath)
146152
}
147153

148154
body = strings.Replace(body, data, "/assets/js/"+filepath.Base(data), -1)
@@ -162,7 +168,7 @@ func quotesParse(g *geziyor.Geziyor, r *client.Response) {
162168
fmt.Println("Js found", "-->", parsedURL)
163169
if !files.js.Contains(parsedURL.Path) {
164170
files.js = append(files.js, parsedURL.Path)
165-
netutil.Extractor(projectURL.String()+parsedURL.Path, projectPath)
171+
go netutil.Extractor(projectURL.String()+parsedURL.Path, projectPath)
166172
}
167173

168174
body = strings.Replace(body, data, "/assets/js/"+filepath.Base(data), -1)
@@ -186,7 +192,7 @@ func quotesParse(g *geziyor.Geziyor, r *client.Response) {
186192
fmt.Println("Img found", "-->", parsedURL)
187193
if !files.img.Contains(parsedURL.Path) {
188194
files.img = append(files.img, parsedURL.Path)
189-
netutil.Extractor(projectURL.String()+parsedURL.Path, projectPath)
195+
go netutil.Extractor(projectURL.String()+parsedURL.Path, projectPath)
190196
}
191197

192198
body = strings.Replace(body, data, "/assets/img/"+filepath.Base(data), -1)

0 commit comments

Comments
 (0)