forked from PuerkitoBio/gocrawl
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathoptions.go
More file actions
52 lines (47 loc) · 1.43 KB
/
options.go
File metadata and controls
52 lines (47 loc) · 1.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
package gocrawl
import (
"time"
"github.com/PuerkitoBio/purell"
)
// Default options
const (
DefaultUserAgent string = `Mozilla/5.0 (Windows NT 6.1; rv:15.0) gocrawl/0.4 Gecko/20120716 Firefox/15.0a2`
DefaultRobotUserAgent string = `Googlebot (gocrawl v0.4)`
DefaultEnqueueChanBuffer int = 100
DefaultHostBufferFactor int = 10
DefaultCrawlDelay time.Duration = 5 * time.Second
DefaultIdleTTL time.Duration = 10 * time.Second
DefaultNormalizationFlags purell.NormalizationFlags = purell.FlagsAllGreedy
)
// The Options available to control and customize the crawling process.
type Options struct {
UserAgent string
RobotUserAgent string
MaxVisits int
EnqueueChanBuffer int
HostBufferFactor int
CrawlDelay time.Duration // Applied per host
WorkerIdleTTL time.Duration
SameHostOnly bool
HeadBeforeGet bool
URLNormalizationFlags purell.NormalizationFlags
LogFlags LogFlags
Extender Extender
}
func NewOptions(ext Extender) *Options {
// Use defaults except for Extender
return &Options{
DefaultUserAgent,
DefaultRobotUserAgent,
0,
DefaultEnqueueChanBuffer,
DefaultHostBufferFactor,
DefaultCrawlDelay,
DefaultIdleTTL,
true,
false,
DefaultNormalizationFlags,
LogError,
ext,
}
}