summaryrefslogtreecommitdiffstats
path: root/modules/charset/htmlstream.go
diff options
context:
space:
mode:
Diffstat (limited to 'modules/charset/htmlstream.go')
-rw-r--r--modules/charset/htmlstream.go200
1 files changed, 200 insertions, 0 deletions
diff --git a/modules/charset/htmlstream.go b/modules/charset/htmlstream.go
new file mode 100644
index 0000000..61f2912
--- /dev/null
+++ b/modules/charset/htmlstream.go
@@ -0,0 +1,200 @@
+// Copyright 2022 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package charset
+
+import (
+ "fmt"
+ "io"
+
+ "golang.org/x/net/html"
+)
+
+// HTMLStreamer represents a SAX-like interface for HTML
+type HTMLStreamer interface {
+ Error(err error) error
+ Doctype(data string) error
+ Comment(data string) error
+ StartTag(data string, attrs ...html.Attribute) error
+ SelfClosingTag(data string, attrs ...html.Attribute) error
+ EndTag(data string) error
+ Text(data string) error
+}
+
+// PassthroughHTMLStreamer is a passthrough streamer
+type PassthroughHTMLStreamer struct {
+ next HTMLStreamer
+}
+
+func NewPassthroughStreamer(next HTMLStreamer) *PassthroughHTMLStreamer {
+ return &PassthroughHTMLStreamer{next: next}
+}
+
+var _ (HTMLStreamer) = &PassthroughHTMLStreamer{}
+
+// Error tells the next streamer in line that there is an error
+func (p *PassthroughHTMLStreamer) Error(err error) error {
+ return p.next.Error(err)
+}
+
+// Doctype tells the next streamer what the doctype is
+func (p *PassthroughHTMLStreamer) Doctype(data string) error {
+ return p.next.Doctype(data)
+}
+
+// Comment tells the next streamer there is a comment
+func (p *PassthroughHTMLStreamer) Comment(data string) error {
+ return p.next.Comment(data)
+}
+
+// StartTag tells the next streamer there is a starting tag
+func (p *PassthroughHTMLStreamer) StartTag(data string, attrs ...html.Attribute) error {
+ return p.next.StartTag(data, attrs...)
+}
+
+// SelfClosingTag tells the next streamer there is a self-closing tag
+func (p *PassthroughHTMLStreamer) SelfClosingTag(data string, attrs ...html.Attribute) error {
+ return p.next.SelfClosingTag(data, attrs...)
+}
+
+// EndTag tells the next streamer there is a end tag
+func (p *PassthroughHTMLStreamer) EndTag(data string) error {
+ return p.next.EndTag(data)
+}
+
+// Text tells the next streamer there is a text
+func (p *PassthroughHTMLStreamer) Text(data string) error {
+ return p.next.Text(data)
+}
+
+// HTMLStreamWriter acts as a writing sink
+type HTMLStreamerWriter struct {
+ io.Writer
+ err error
+}
+
+// Write implements io.Writer
+func (h *HTMLStreamerWriter) Write(data []byte) (int, error) {
+ if h.err != nil {
+ return 0, h.err
+ }
+ return h.Writer.Write(data)
+}
+
+// Write implements io.StringWriter
+func (h *HTMLStreamerWriter) WriteString(data string) (int, error) {
+ if h.err != nil {
+ return 0, h.err
+ }
+ return h.Writer.Write([]byte(data))
+}
+
+// Error tells the next streamer in line that there is an error
+func (h *HTMLStreamerWriter) Error(err error) error {
+ if h.err == nil {
+ h.err = err
+ }
+ return h.err
+}
+
+// Doctype tells the next streamer what the doctype is
+func (h *HTMLStreamerWriter) Doctype(data string) error {
+ _, h.err = h.WriteString("<!DOCTYPE " + data + ">")
+ return h.err
+}
+
+// Comment tells the next streamer there is a comment
+func (h *HTMLStreamerWriter) Comment(data string) error {
+ _, h.err = h.WriteString("<!--" + data + "-->")
+ return h.err
+}
+
+// StartTag tells the next streamer there is a starting tag
+func (h *HTMLStreamerWriter) StartTag(data string, attrs ...html.Attribute) error {
+ return h.startTag(data, attrs, false)
+}
+
+// SelfClosingTag tells the next streamer there is a self-closing tag
+func (h *HTMLStreamerWriter) SelfClosingTag(data string, attrs ...html.Attribute) error {
+ return h.startTag(data, attrs, true)
+}
+
+func (h *HTMLStreamerWriter) startTag(data string, attrs []html.Attribute, selfclosing bool) error {
+ if _, h.err = h.WriteString("<" + data); h.err != nil {
+ return h.err
+ }
+ for _, attr := range attrs {
+ if _, h.err = h.WriteString(" " + attr.Key + "=\"" + html.EscapeString(attr.Val) + "\""); h.err != nil {
+ return h.err
+ }
+ }
+ if selfclosing {
+ if _, h.err = h.WriteString("/>"); h.err != nil {
+ return h.err
+ }
+ } else {
+ if _, h.err = h.WriteString(">"); h.err != nil {
+ return h.err
+ }
+ }
+ return h.err
+}
+
+// EndTag tells the next streamer there is a end tag
+func (h *HTMLStreamerWriter) EndTag(data string) error {
+ _, h.err = h.WriteString("</" + data + ">")
+ return h.err
+}
+
+// Text tells the next streamer there is a text
+func (h *HTMLStreamerWriter) Text(data string) error {
+ _, h.err = h.WriteString(html.EscapeString(data))
+ return h.err
+}
+
+// StreamHTML streams an html to a provided streamer
+func StreamHTML(source io.Reader, streamer HTMLStreamer) error {
+ tokenizer := html.NewTokenizer(source)
+ for {
+ tt := tokenizer.Next()
+ switch tt {
+ case html.ErrorToken:
+ if tokenizer.Err() != io.EOF {
+ return tokenizer.Err()
+ }
+ return nil
+ case html.DoctypeToken:
+ token := tokenizer.Token()
+ if err := streamer.Doctype(token.Data); err != nil {
+ return err
+ }
+ case html.CommentToken:
+ token := tokenizer.Token()
+ if err := streamer.Comment(token.Data); err != nil {
+ return err
+ }
+ case html.StartTagToken:
+ token := tokenizer.Token()
+ if err := streamer.StartTag(token.Data, token.Attr...); err != nil {
+ return err
+ }
+ case html.SelfClosingTagToken:
+ token := tokenizer.Token()
+ if err := streamer.StartTag(token.Data, token.Attr...); err != nil {
+ return err
+ }
+ case html.EndTagToken:
+ token := tokenizer.Token()
+ if err := streamer.EndTag(token.Data); err != nil {
+ return err
+ }
+ case html.TextToken:
+ token := tokenizer.Token()
+ if err := streamer.Text(token.Data); err != nil {
+ return err
+ }
+ default:
+ return fmt.Errorf("unknown type of token: %d", tt)
+ }
+ }
+}