Skip to content

Commit d34f3a2

Browse files
wolfogrelafriksdelvh
authored
Fix sitemap (#22272)
Fix #22270. Related to #18407. The old code treated both sitemap and sitemap index as the format like: ```xml ... <url> <loc>http://localhost:3000/explore/users/sitemap-1.xml</loc> </url> ... ``` Actually, it's incorrect for sitemap index, it should be: ```xml ... <sitemap> <loc>http://localhost:3000/explore/users/sitemap-1.xml</loc> </sitemap> ... ``` See https://www.sitemaps.org/protocol.html Co-authored-by: Lauris BH <[email protected]> Co-authored-by: delvh <[email protected]>
1 parent 9dcaf14 commit d34f3a2

File tree

2 files changed

+170
-66
lines changed

2 files changed

+170
-66
lines changed

modules/sitemap/sitemap.go

+29-15
Original file line numberDiff line numberDiff line change
@@ -11,48 +11,62 @@ import (
1111
"time"
1212
)
1313

14-
// sitemapFileLimit contains the maximum size of a sitemap file
15-
const sitemapFileLimit = 50 * 1024 * 1024
14+
const (
15+
sitemapFileLimit = 50 * 1024 * 1024 // the maximum size of a sitemap file
16+
urlsLimit = 50000
1617

17-
// Url represents a single sitemap entry
18+
schemaURL = "http://www.sitemaps.org/schemas/sitemap/0.9"
19+
urlsetName = "urlset"
20+
sitemapindexName = "sitemapindex"
21+
)
22+
23+
// URL represents a single sitemap entry
1824
type URL struct {
1925
URL string `xml:"loc"`
2026
LastMod *time.Time `xml:"lastmod,omitempty"`
2127
}
2228

23-
// SitemapUrl represents a sitemap
29+
// Sitemap represents a sitemap
2430
type Sitemap struct {
2531
XMLName xml.Name
2632
Namespace string `xml:"xmlns,attr"`
2733

28-
URLs []URL `xml:"url"`
34+
URLs []URL `xml:"url"`
35+
Sitemaps []URL `xml:"sitemap"`
2936
}
3037

3138
// NewSitemap creates a sitemap
3239
func NewSitemap() *Sitemap {
3340
return &Sitemap{
34-
XMLName: xml.Name{Local: "urlset"},
35-
Namespace: "http://www.sitemaps.org/schemas/sitemap/0.9",
41+
XMLName: xml.Name{Local: urlsetName},
42+
Namespace: schemaURL,
3643
}
3744
}
3845

39-
// NewSitemap creates a sitemap index.
46+
// NewSitemapIndex creates a sitemap index.
4047
func NewSitemapIndex() *Sitemap {
4148
return &Sitemap{
42-
XMLName: xml.Name{Local: "sitemapindex"},
43-
Namespace: "http://www.sitemaps.org/schemas/sitemap/0.9",
49+
XMLName: xml.Name{Local: sitemapindexName},
50+
Namespace: schemaURL,
4451
}
4552
}
4653

4754
// Add adds a URL to the sitemap
4855
func (s *Sitemap) Add(u URL) {
49-
s.URLs = append(s.URLs, u)
56+
if s.XMLName.Local == sitemapindexName {
57+
s.Sitemaps = append(s.Sitemaps, u)
58+
} else {
59+
s.URLs = append(s.URLs, u)
60+
}
5061
}
5162

52-
// Write writes the sitemap to a response
63+
// WriteTo writes the sitemap to a response
5364
func (s *Sitemap) WriteTo(w io.Writer) (int64, error) {
54-
if len(s.URLs) > 50000 {
55-
return 0, fmt.Errorf("The sitemap contains too many URLs: %d", len(s.URLs))
65+
if l := len(s.URLs); l > urlsLimit {
66+
return 0, fmt.Errorf("The sitemap contains %d URLs, but only %d are allowed", l, urlsLimit)
67+
}
68+
if l := len(s.Sitemaps); l > urlsLimit {
69+
return 0, fmt.Errorf("The sitemap contains %d sub-sitemaps, but only %d are allowed", l, urlsLimit)
5670
}
5771
buf := bytes.NewBufferString(xml.Header)
5872
if err := xml.NewEncoder(buf).Encode(s); err != nil {
@@ -62,7 +76,7 @@ func (s *Sitemap) WriteTo(w io.Writer) (int64, error) {
6276
return 0, err
6377
}
6478
if buf.Len() > sitemapFileLimit {
65-
return 0, fmt.Errorf("The sitemap is too big: %d", buf.Len())
79+
return 0, fmt.Errorf("The sitemap has %d bytes, but only %d are allowed", buf.Len(), sitemapFileLimit)
6680
}
6781
return buf.WriteTo(w)
6882
}

modules/sitemap/sitemap_test.go

+141-51
Original file line numberDiff line numberDiff line change
@@ -6,71 +6,161 @@ package sitemap
66
import (
77
"bytes"
88
"encoding/xml"
9-
"fmt"
109
"strings"
1110
"testing"
1211
"time"
1312

1413
"github.com/stretchr/testify/assert"
1514
)
1615

17-
func TestOk(t *testing.T) {
18-
testReal := func(s *Sitemap, name string, urls []URL, expected string) {
19-
for _, url := range urls {
20-
s.Add(url)
21-
}
22-
buf := &bytes.Buffer{}
23-
_, err := s.WriteTo(buf)
24-
assert.NoError(t, nil, err)
25-
assert.Equal(t, xml.Header+"<"+name+" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"+expected+"</"+name+">\n", buf.String())
16+
func TestNewSitemap(t *testing.T) {
17+
ts := time.Unix(1651322008, 0).UTC()
18+
19+
tests := []struct {
20+
name string
21+
urls []URL
22+
want string
23+
wantErr string
24+
}{
25+
{
26+
name: "empty",
27+
urls: []URL{},
28+
want: xml.Header + `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
29+
"" +
30+
"</urlset>\n",
31+
},
32+
{
33+
name: "regular",
34+
urls: []URL{
35+
{URL: "https://gitea.io/test1", LastMod: &ts},
36+
},
37+
want: xml.Header + `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
38+
"<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></url>" +
39+
"</urlset>\n",
40+
},
41+
{
42+
name: "without lastmod",
43+
urls: []URL{
44+
{URL: "https://gitea.io/test1"},
45+
},
46+
want: xml.Header + `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
47+
"<url><loc>https://gitea.io/test1</loc></url>" +
48+
"</urlset>\n",
49+
},
50+
{
51+
name: "multiple",
52+
urls: []URL{
53+
{URL: "https://gitea.io/test1", LastMod: &ts},
54+
{URL: "https://gitea.io/test2", LastMod: nil},
55+
},
56+
want: xml.Header + `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
57+
"<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></url>" +
58+
"<url><loc>https://gitea.io/test2</loc></url>" +
59+
"</urlset>\n",
60+
},
61+
{
62+
name: "too many urls",
63+
urls: make([]URL, 50001),
64+
wantErr: "The sitemap contains 50001 URLs, but only 50000 are allowed",
65+
},
66+
{
67+
name: "too big file",
68+
urls: []URL{
69+
{URL: strings.Repeat("b", 50*1024*1024+1)},
70+
},
71+
wantErr: "The sitemap has 52428932 bytes, but only 52428800 are allowed",
72+
},
2673
}
27-
test := func(urls []URL, expected string) {
28-
testReal(NewSitemap(), "urlset", urls, expected)
29-
testReal(NewSitemapIndex(), "sitemapindex", urls, expected)
74+
for _, tt := range tests {
75+
t.Run(tt.name, func(t *testing.T) {
76+
s := NewSitemap()
77+
for _, url := range tt.urls {
78+
s.Add(url)
79+
}
80+
buf := &bytes.Buffer{}
81+
_, err := s.WriteTo(buf)
82+
if tt.wantErr != "" {
83+
assert.EqualError(t, err, tt.wantErr)
84+
} else {
85+
assert.NoError(t, err)
86+
assert.Equalf(t, tt.want, buf.String(), "NewSitemap()")
87+
}
88+
})
3089
}
90+
}
3191

92+
func TestNewSitemapIndex(t *testing.T) {
3293
ts := time.Unix(1651322008, 0).UTC()
3394

34-
test(
35-
[]URL{},
36-
"",
37-
)
38-
test(
39-
[]URL{
40-
{URL: "https://gitea.io/test1", LastMod: &ts},
95+
tests := []struct {
96+
name string
97+
urls []URL
98+
want string
99+
wantErr string
100+
}{
101+
{
102+
name: "empty",
103+
urls: []URL{},
104+
want: xml.Header + `<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
105+
"" +
106+
"</sitemapindex>\n",
41107
},
42-
"<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></url>",
43-
)
44-
test(
45-
[]URL{
46-
{URL: "https://gitea.io/test2", LastMod: nil},
108+
{
109+
name: "regular",
110+
urls: []URL{
111+
{URL: "https://gitea.io/test1", LastMod: &ts},
112+
},
113+
want: xml.Header + `<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
114+
"<sitemap><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></sitemap>" +
115+
"</sitemapindex>\n",
47116
},
48-
"<url><loc>https://gitea.io/test2</loc></url>",
49-
)
50-
test(
51-
[]URL{
52-
{URL: "https://gitea.io/test1", LastMod: &ts},
53-
{URL: "https://gitea.io/test2", LastMod: nil},
117+
{
118+
name: "without lastmod",
119+
urls: []URL{
120+
{URL: "https://gitea.io/test1"},
121+
},
122+
want: xml.Header + `<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
123+
"<sitemap><loc>https://gitea.io/test1</loc></sitemap>" +
124+
"</sitemapindex>\n",
125+
},
126+
{
127+
name: "multiple",
128+
urls: []URL{
129+
{URL: "https://gitea.io/test1", LastMod: &ts},
130+
{URL: "https://gitea.io/test2", LastMod: nil},
131+
},
132+
want: xml.Header + `<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
133+
"<sitemap><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></sitemap>" +
134+
"<sitemap><loc>https://gitea.io/test2</loc></sitemap>" +
135+
"</sitemapindex>\n",
136+
},
137+
{
138+
name: "too many sitemaps",
139+
urls: make([]URL, 50001),
140+
wantErr: "The sitemap contains 50001 sub-sitemaps, but only 50000 are allowed",
141+
},
142+
{
143+
name: "too big file",
144+
urls: []URL{
145+
{URL: strings.Repeat("b", 50*1024*1024+1)},
146+
},
147+
wantErr: "The sitemap has 52428952 bytes, but only 52428800 are allowed",
54148
},
55-
"<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></url>"+
56-
"<url><loc>https://gitea.io/test2</loc></url>",
57-
)
58-
}
59-
60-
func TestTooManyURLs(t *testing.T) {
61-
s := NewSitemap()
62-
for i := 0; i < 50001; i++ {
63-
s.Add(URL{URL: fmt.Sprintf("https://gitea.io/test%d", i)})
64149
}
65-
buf := &bytes.Buffer{}
66-
_, err := s.WriteTo(buf)
67-
assert.EqualError(t, err, "The sitemap contains too many URLs: 50001")
68-
}
69-
70-
func TestSitemapTooBig(t *testing.T) {
71-
s := NewSitemap()
72-
s.Add(URL{URL: strings.Repeat("b", sitemapFileLimit)})
73-
buf := &bytes.Buffer{}
74-
_, err := s.WriteTo(buf)
75-
assert.EqualError(t, err, "The sitemap is too big: 52428931")
150+
for _, tt := range tests {
151+
t.Run(tt.name, func(t *testing.T) {
152+
s := NewSitemapIndex()
153+
for _, url := range tt.urls {
154+
s.Add(url)
155+
}
156+
buf := &bytes.Buffer{}
157+
_, err := s.WriteTo(buf)
158+
if tt.wantErr != "" {
159+
assert.EqualError(t, err, tt.wantErr)
160+
} else {
161+
assert.NoError(t, err)
162+
assert.Equalf(t, tt.want, buf.String(), "NewSitemapIndex()")
163+
}
164+
})
165+
}
76166
}

0 commit comments

Comments
 (0)