Skip to content

Commit 4cc4c33

Browse files
committed
Merge remote-tracking branch 'origin/main' into fcrdns
2 parents c6869b3 + 3c1d95d commit 4cc4c33

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+1500
-467
lines changed

.github/actions/spelling/allow.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@ github
22
https
33
ssh
44
ubuntu
5-
workarounds
5+
workarounds

.github/actions/spelling/excludes.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,9 @@
8383
^\Q.github/FUNDING.yml\E$
8484
^\Q.github/workflows/spelling.yml\E$
8585
^data/crawlers/
86+
^docs/blog/tags\.yml$
8687
^docs/manifest/.*$
8788
^docs/static/\.nojekyll$
89+
^lib/policy/config/testdata/bad/unparseable\.json$
8890
ignore$
89-
robots.txt
91+
robots.txt

.github/actions/spelling/expect.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ chall
4444
challengemozilla
4545
checkpath
4646
checkresult
47-
chen
4847
chibi
4948
cidranger
5049
ckie
@@ -61,7 +60,6 @@ DDOS
6160
Debian
6261
debrpm
6362
decaymap
64-
decompiling
6563
Diffbot
6664
discordapp
6765
discordbot
@@ -188,6 +186,7 @@ ogtags
188186
omgili
189187
omgilibot
190188
openai
189+
opengraph
191190
openrc
192191
pag
193192
palemoon
@@ -218,6 +217,7 @@ rawler
218217
rcvar
219218
redir
220219
redirectscheme
220+
refactors
221221
relayd
222222
reputational
223223
reqmeta
@@ -261,6 +261,7 @@ techarohq
261261
templ
262262
templruntime
263263
testarea
264+
Thancred
264265
thoth
265266
thothmock
266267
Tik
@@ -300,6 +301,7 @@ xess
300301
xff
301302
XForwarded
302303
XNG
304+
XOB
303305
XReal
304306
yae
305307
YAMLTo

.github/actions/spelling/line_forbidden.patterns

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -273,14 +273,6 @@
273273
# Most people only have two hands. Reword.
274274
\b(?i)on the third hand\b
275275

276-
# Should be `Open Graph`
277-
# unless talking about a specific Open Graph implementation:
278-
# - Java
279-
# - Node
280-
# - Py
281-
# - Ruby
282-
\bOpenGraph\b
283-
284276
# Should be `OpenShift`
285277
\bOpenshift\b
286278

.github/actions/spelling/patterns.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,4 +131,4 @@ go install(?:\s+[a-z]+\.[-@\w/.]+)+
131131

132132
# hit-count: 1 file-count: 1
133133
# microsoft
134-
\b(?:https?://|)(?:(?:(?:blogs|download\.visualstudio|docs|msdn2?|research)\.|)microsoft|blogs\.msdn)\.co(?:m|\.\w\w)/[-_a-zA-Z0-9()=./%]*
134+
\b(?:https?://|)(?:(?:(?:blogs|download\.visualstudio|docs|msdn2?|research)\.|)microsoft|blogs\.msdn)\.co(?:m|\.\w\w)/[-_a-zA-Z0-9()=./%]*

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ assets: deps
1818

1919
build: assets
2020
$(GO) build -o ./var/anubis ./cmd/anubis
21+
$(GO) build -o ./var/robots2policy ./cmd/robots2policy
2122
@echo "Anubis is now built to ./var/anubis"
2223

2324
lint: assets
@@ -27,6 +28,7 @@ lint: assets
2728

2829
prebaked-build:
2930
$(GO) build -o ./var/anubis -ldflags "-X 'github.com/TecharoHQ/anubis.Version=$(VERSION)'" ./cmd/anubis
31+
$(GO) build -o ./var/robots2policy -ldflags "-X 'github.com/TecharoHQ/anubis.Version=$(VERSION)'" ./cmd/robots2policy
3032

3133
test: assets
3234
$(GO) test ./...

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.19.1
1+
1.20.0-pre1

cmd/anubis/main.go

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -333,24 +333,29 @@ func main() {
333333
slog.Warn("REDIRECT_DOMAINS is not set, Anubis will only redirect to the same domain a request is coming from, see https://anubis.techaro.lol/docs/admin/configuration/redirect-domains")
334334
}
335335

336-
s, err := libanubis.New(libanubis.Options{
337-
BasePrefix: *basePrefix,
338-
StripBasePrefix: *stripBasePrefix,
339-
Next: rp,
340-
Policy: policy,
341-
ServeRobotsTXT: *robotsTxt,
342-
PrivateKey: priv,
343-
CookieDomain: *cookieDomain,
344-
CookieExpiration: *cookieExpiration,
345-
CookiePartitioned: *cookiePartitioned,
346-
OGPassthrough: *ogPassthrough,
347-
OGTimeToLive: *ogTimeToLive,
348-
RedirectDomains: redirectDomainsList,
349-
Target: *target,
350-
WebmasterEmail: *webmasterEmail,
351-
OGCacheConsidersHost: *ogCacheConsiderHost,
352-
FCrDNS: fdns,
336+
// If OpenGraph configuration values are not set in the config file, use the
337+
// values from flags / envvars.
338+
if !policy.OpenGraph.Enabled {
339+
policy.OpenGraph.Enabled = *ogPassthrough
340+
policy.OpenGraph.ConsiderHost = *ogCacheConsiderHost
341+
policy.OpenGraph.TimeToLive = *ogTimeToLive
342+
policy.OpenGraph.Override = map[string]string{}
343+
}
353344

345+
s, err := libanubis.New(libanubis.Options{
346+
FCrDNS: fdns,
347+
BasePrefix: *basePrefix,
348+
StripBasePrefix: *stripBasePrefix,
349+
Next: rp,
350+
Policy: policy,
351+
ServeRobotsTXT: *robotsTxt,
352+
PrivateKey: priv,
353+
CookieDomain: *cookieDomain,
354+
CookieExpiration: *cookieExpiration,
355+
CookiePartitioned: *cookiePartitioned,
356+
RedirectDomains: redirectDomainsList,
357+
Target: *target,
358+
WebmasterEmail: *webmasterEmail,
354359
})
355360
if err != nil {
356361
log.Fatalf("can't construct libanubis.Server: %v", err)

data/botPolicies.yaml

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ bots:
5656
- name: countries-with-aggressive-scrapers
5757
action: WEIGH
5858
geoip:
59-
counties:
59+
countries:
6060
- BR
6161
- CN
6262
weight:
@@ -84,10 +84,88 @@ bots:
8484

8585
dnsbl: false
8686

87+
# Open Graph passthrough configuration, see here for more information:
88+
# https://anubis.techaro.lol/docs/admin/configuration/open-graph/
89+
openGraph:
90+
# Enables Open Graph passthrough
91+
enabled: false
92+
# Enables the use of the HTTP host in the cache key, this enables
93+
# caching metadata for multiple http hosts at once.
94+
considerHost: false
95+
# How long cached OpenGraph metadata should last in memory
96+
ttl: 24h
97+
# # If set, return these opengraph values instead of looking them up with
98+
# # the target service.
99+
# #
100+
# # Correlates to properties in https://ogp.me/
101+
# override:
102+
# # og:title is required, it is the title of the website
103+
# "og:title": "Techaro Anubis"
104+
# "og:description": >-
105+
# Anubis is a Web AI Firewall Utility that helps you fight the bots
106+
# away so that you can maintain uptime at work!
107+
# "description": >-
108+
# Anubis is a Web AI Firewall Utility that helps you fight the bots
109+
# away so that you can maintain uptime at work!
110+
87111
# By default, send HTTP 200 back to clients that either get issued a challenge
88112
# or a denial. This seems weird, but this is load-bearing due to the fact that
89113
# the most aggressive scraper bots seem to really, really, want an HTTP 200 and
90114
# will stop sending requests once they get it.
91115
status_codes:
92116
CHALLENGE: 200
93117
DENY: 200
118+
119+
# The weight thresholds for when to trigger individual challenges. Any
120+
# CHALLENGE will take precedence over this.
121+
#
122+
# A threshold has four configuration options:
123+
#
124+
# - name: the name that is reported down the stack and used for metrics
125+
# - expression: A CEL expression with the request weight in the variable
126+
# weight
127+
# - action: the Anubis action to apply, similar to in a bot policy
128+
# - challenge: which challenge to send to the user, similar to in a bot policy
129+
#
130+
# See https://anubis.techaro.lol/docs/admin/configuration/thresholds for more
131+
# information.
132+
thresholds:
133+
# By default Anubis ships with the following thresholds:
134+
- name: minimal-suspicion # This client is likely fine, its soul is lighter than a feather
135+
expression: weight <= 0 # a feather weighs zero units
136+
action: ALLOW # Allow the traffic through
137+
# For clients that had some weight reduced through custom rules, give them a
138+
# lightweight challenge.
139+
- name: mild-suspicion
140+
expression:
141+
all:
142+
- weight > 0
143+
- weight < 10
144+
action: CHALLENGE
145+
challenge:
146+
# https://anubis.techaro.lol/docs/admin/configuration/challenges/metarefresh
147+
algorithm: metarefresh
148+
difficulty: 1
149+
report_as: 1
150+
# For clients that are browser-like but have either gained points from custom rules or
151+
# report as a standard browser.
152+
- name: moderate-suspicion
153+
expression:
154+
all:
155+
- weight >= 10
156+
- weight < 20
157+
action: CHALLENGE
158+
challenge:
159+
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
160+
algorithm: fast
161+
difficulty: 2 # two leading zeros, very fast for most clients
162+
report_as: 2
163+
# For clients that are browser like and have gained many points from custom rules
164+
- name: extreme-suspicion
165+
expression: weight >= 20
166+
action: CHALLENGE
167+
challenge:
168+
# https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
169+
algorithm: fast
170+
difficulty: 4
171+
report_as: 4

data/bots/ai-robots-txt.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
# Note: Blocks human-directed/non-training user agents
33
- name: "ai-robots-txt"
44
user_agent_regex: >-
5-
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|Andibot|anthropic-ai|Applebot|Applebot-Extended|bedrockbot|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-SearchBot|Claude-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|MistralAI-User/1.0|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Panscient|panscient.com|Perplexity-User|PerplexityBot|PetalBot|PhindBot|QualifiedBot|QuillBot|quillbot.com|SBIntuitionsBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YandexAdditional|YandexAdditionalBot|YouBot
5+
AI2Bot|Ai2Bot-Dolma|aiHitBot|Amazonbot|Andibot|anthropic-ai|Applebot|Applebot-Extended|bedrockbot|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-SearchBot|Claude-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Cotoyogi|Crawlspace|Diffbot|DuckAssistBot|EchoboxBot|FacebookBot|facebookexternalhit|Factset_spyderbot|FirecrawlAgent|FriendlyCrawler|Google-CloudVertexBot|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|meta-externalagent|Meta-ExternalAgent|meta-externalfetcher|Meta-ExternalFetcher|MistralAI-User/1.0|MyCentralAIScraperBot|NovaAct|OAI-SearchBot|omgili|omgilibot|Operator|PanguBot|Panscient|panscient.com|Perplexity-User|PerplexityBot|PetalBot|PhindBot|Poseidon Research Crawler|QualifiedBot|QuillBot|quillbot.com|SBIntuitionsBot|Scrapy|SemrushBot|SemrushBot-BA|SemrushBot-CT|SemrushBot-OCOB|SemrushBot-SI|SemrushBot-SWA|Sidetrade indexer bot|TikTokSpider|Timpibot|VelenPublicWebCrawler|Webzio-Extended|wpbot|YandexAdditional|YandexAdditionalBot|YouBot
66
action: DENY

0 commit comments

Comments
 (0)