summaryrefslogtreecommitdiffstats
path: root/modules/sitemap
diff options
context:
space:
mode:
authorDaniel Baumann <daniel@debian.org>2024-10-18 20:33:49 +0200
committerDaniel Baumann <daniel@debian.org>2024-10-18 20:33:49 +0200
commitdd136858f1ea40ad3c94191d647487fa4f31926c (patch)
tree58fec94a7b2a12510c9664b21793f1ed560c6518 /modules/sitemap
parentInitial commit. (diff)
downloadforgejo-dd136858f1ea40ad3c94191d647487fa4f31926c.tar.xz
forgejo-dd136858f1ea40ad3c94191d647487fa4f31926c.zip
Adding upstream version 9.0.0.HEADupstream/9.0.0upstreamdebian
Signed-off-by: Daniel Baumann <daniel@debian.org>
Diffstat (limited to 'modules/sitemap')
-rw-r--r--modules/sitemap/sitemap.go82
-rw-r--r--modules/sitemap/sitemap_test.go167
2 files changed, 249 insertions, 0 deletions
diff --git a/modules/sitemap/sitemap.go b/modules/sitemap/sitemap.go
new file mode 100644
index 0000000..280ca1d
--- /dev/null
+++ b/modules/sitemap/sitemap.go
@@ -0,0 +1,82 @@
+// Copyright 2022 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package sitemap
+
+import (
+ "bytes"
+ "encoding/xml"
+ "fmt"
+ "io"
+ "time"
+)
+
+const (
+ sitemapFileLimit = 50 * 1024 * 1024 // the maximum size of a sitemap file
+ urlsLimit = 50000
+
+ schemaURL = "http://www.sitemaps.org/schemas/sitemap/0.9"
+ urlsetName = "urlset"
+ sitemapindexName = "sitemapindex"
+)
+
+// URL represents a single sitemap entry
+type URL struct {
+ URL string `xml:"loc"`
+ LastMod *time.Time `xml:"lastmod,omitempty"`
+}
+
+// Sitemap represents a sitemap
+type Sitemap struct {
+ XMLName xml.Name
+ Namespace string `xml:"xmlns,attr"`
+
+ URLs []URL `xml:"url"`
+ Sitemaps []URL `xml:"sitemap"`
+}
+
+// NewSitemap creates a sitemap
+func NewSitemap() *Sitemap {
+ return &Sitemap{
+ XMLName: xml.Name{Local: urlsetName},
+ Namespace: schemaURL,
+ }
+}
+
+// NewSitemapIndex creates a sitemap index.
+func NewSitemapIndex() *Sitemap {
+ return &Sitemap{
+ XMLName: xml.Name{Local: sitemapindexName},
+ Namespace: schemaURL,
+ }
+}
+
+// Add adds a URL to the sitemap
+func (s *Sitemap) Add(u URL) {
+ if s.XMLName.Local == sitemapindexName {
+ s.Sitemaps = append(s.Sitemaps, u)
+ } else {
+ s.URLs = append(s.URLs, u)
+ }
+}
+
+// WriteTo writes the sitemap to a response
+func (s *Sitemap) WriteTo(w io.Writer) (int64, error) {
+ if l := len(s.URLs); l > urlsLimit {
+ return 0, fmt.Errorf("The sitemap contains %d URLs, but only %d are allowed", l, urlsLimit)
+ }
+ if l := len(s.Sitemaps); l > urlsLimit {
+ return 0, fmt.Errorf("The sitemap contains %d sub-sitemaps, but only %d are allowed", l, urlsLimit)
+ }
+ buf := bytes.NewBufferString(xml.Header)
+ if err := xml.NewEncoder(buf).Encode(s); err != nil {
+ return 0, err
+ }
+ if err := buf.WriteByte('\n'); err != nil {
+ return 0, err
+ }
+ if buf.Len() > sitemapFileLimit {
+ return 0, fmt.Errorf("The sitemap has %d bytes, but only %d are allowed", buf.Len(), sitemapFileLimit)
+ }
+ return buf.WriteTo(w)
+}
diff --git a/modules/sitemap/sitemap_test.go b/modules/sitemap/sitemap_test.go
new file mode 100644
index 0000000..39a2178
--- /dev/null
+++ b/modules/sitemap/sitemap_test.go
@@ -0,0 +1,167 @@
+// Copyright 2022 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package sitemap
+
+import (
+ "bytes"
+ "encoding/xml"
+ "strings"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestNewSitemap(t *testing.T) {
+ ts := time.Unix(1651322008, 0).UTC()
+
+ tests := []struct {
+ name string
+ urls []URL
+ want string
+ wantErr string
+ }{
+ {
+ name: "empty",
+ urls: []URL{},
+ want: xml.Header + `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
+ "" +
+ "</urlset>\n",
+ },
+ {
+ name: "regular",
+ urls: []URL{
+ {URL: "https://gitea.io/test1", LastMod: &ts},
+ },
+ want: xml.Header + `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
+ "<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></url>" +
+ "</urlset>\n",
+ },
+ {
+ name: "without lastmod",
+ urls: []URL{
+ {URL: "https://gitea.io/test1"},
+ },
+ want: xml.Header + `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
+ "<url><loc>https://gitea.io/test1</loc></url>" +
+ "</urlset>\n",
+ },
+ {
+ name: "multiple",
+ urls: []URL{
+ {URL: "https://gitea.io/test1", LastMod: &ts},
+ {URL: "https://gitea.io/test2", LastMod: nil},
+ },
+ want: xml.Header + `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
+ "<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></url>" +
+ "<url><loc>https://gitea.io/test2</loc></url>" +
+ "</urlset>\n",
+ },
+ {
+ name: "too many urls",
+ urls: make([]URL, 50001),
+ wantErr: "The sitemap contains 50001 URLs, but only 50000 are allowed",
+ },
+ {
+ name: "too big file",
+ urls: []URL{
+ {URL: strings.Repeat("b", 50*1024*1024+1)},
+ },
+ wantErr: "The sitemap has 52428932 bytes, but only 52428800 are allowed",
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ s := NewSitemap()
+ for _, url := range tt.urls {
+ s.Add(url)
+ }
+ buf := &bytes.Buffer{}
+ _, err := s.WriteTo(buf)
+ if tt.wantErr != "" {
+ assert.EqualError(t, err, tt.wantErr)
+ } else {
+ require.NoError(t, err)
+ assert.Equalf(t, tt.want, buf.String(), "NewSitemap()")
+ }
+ })
+ }
+}
+
+func TestNewSitemapIndex(t *testing.T) {
+ ts := time.Unix(1651322008, 0).UTC()
+
+ tests := []struct {
+ name string
+ urls []URL
+ want string
+ wantErr string
+ }{
+ {
+ name: "empty",
+ urls: []URL{},
+ want: xml.Header + `<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
+ "" +
+ "</sitemapindex>\n",
+ },
+ {
+ name: "regular",
+ urls: []URL{
+ {URL: "https://gitea.io/test1", LastMod: &ts},
+ },
+ want: xml.Header + `<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
+ "<sitemap><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></sitemap>" +
+ "</sitemapindex>\n",
+ },
+ {
+ name: "without lastmod",
+ urls: []URL{
+ {URL: "https://gitea.io/test1"},
+ },
+ want: xml.Header + `<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
+ "<sitemap><loc>https://gitea.io/test1</loc></sitemap>" +
+ "</sitemapindex>\n",
+ },
+ {
+ name: "multiple",
+ urls: []URL{
+ {URL: "https://gitea.io/test1", LastMod: &ts},
+ {URL: "https://gitea.io/test2", LastMod: nil},
+ },
+ want: xml.Header + `<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
+ "<sitemap><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></sitemap>" +
+ "<sitemap><loc>https://gitea.io/test2</loc></sitemap>" +
+ "</sitemapindex>\n",
+ },
+ {
+ name: "too many sitemaps",
+ urls: make([]URL, 50001),
+ wantErr: "The sitemap contains 50001 sub-sitemaps, but only 50000 are allowed",
+ },
+ {
+ name: "too big file",
+ urls: []URL{
+ {URL: strings.Repeat("b", 50*1024*1024+1)},
+ },
+ wantErr: "The sitemap has 52428952 bytes, but only 52428800 are allowed",
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ s := NewSitemapIndex()
+ for _, url := range tt.urls {
+ s.Add(url)
+ }
+ buf := &bytes.Buffer{}
+ _, err := s.WriteTo(buf)
+ if tt.wantErr != "" {
+ assert.EqualError(t, err, tt.wantErr)
+ } else {
+ require.NoError(t, err)
+ assert.Equalf(t, tt.want, buf.String(), "NewSitemapIndex()")
+ }
+ })
+ }
+}