From dd136858f1ea40ad3c94191d647487fa4f31926c Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 18 Oct 2024 20:33:49 +0200 Subject: Adding upstream version 9.0.0. Signed-off-by: Daniel Baumann --- modules/charset/htmlstream.go | 200 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 modules/charset/htmlstream.go (limited to 'modules/charset/htmlstream.go') diff --git a/modules/charset/htmlstream.go b/modules/charset/htmlstream.go new file mode 100644 index 0000000..61f2912 --- /dev/null +++ b/modules/charset/htmlstream.go @@ -0,0 +1,200 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package charset + +import ( + "fmt" + "io" + + "golang.org/x/net/html" +) + +// HTMLStreamer represents a SAX-like interface for HTML +type HTMLStreamer interface { + Error(err error) error + Doctype(data string) error + Comment(data string) error + StartTag(data string, attrs ...html.Attribute) error + SelfClosingTag(data string, attrs ...html.Attribute) error + EndTag(data string) error + Text(data string) error +} + +// PassthroughHTMLStreamer is a passthrough streamer +type PassthroughHTMLStreamer struct { + next HTMLStreamer +} + +func NewPassthroughStreamer(next HTMLStreamer) *PassthroughHTMLStreamer { + return &PassthroughHTMLStreamer{next: next} +} + +var _ (HTMLStreamer) = &PassthroughHTMLStreamer{} + +// Error tells the next streamer in line that there is an error +func (p *PassthroughHTMLStreamer) Error(err error) error { + return p.next.Error(err) +} + +// Doctype tells the next streamer what the doctype is +func (p *PassthroughHTMLStreamer) Doctype(data string) error { + return p.next.Doctype(data) +} + +// Comment tells the next streamer there is a comment +func (p *PassthroughHTMLStreamer) Comment(data string) error { + return p.next.Comment(data) +} + +// StartTag tells the next streamer there is a starting tag +func (p *PassthroughHTMLStreamer) StartTag(data string, attrs ...html.Attribute) error { + return p.next.StartTag(data, attrs...) +} + +// SelfClosingTag tells the next streamer there is a self-closing tag +func (p *PassthroughHTMLStreamer) SelfClosingTag(data string, attrs ...html.Attribute) error { + return p.next.SelfClosingTag(data, attrs...) +} + +// EndTag tells the next streamer there is a end tag +func (p *PassthroughHTMLStreamer) EndTag(data string) error { + return p.next.EndTag(data) +} + +// Text tells the next streamer there is a text +func (p *PassthroughHTMLStreamer) Text(data string) error { + return p.next.Text(data) +} + +// HTMLStreamWriter acts as a writing sink +type HTMLStreamerWriter struct { + io.Writer + err error +} + +// Write implements io.Writer +func (h *HTMLStreamerWriter) Write(data []byte) (int, error) { + if h.err != nil { + return 0, h.err + } + return h.Writer.Write(data) +} + +// Write implements io.StringWriter +func (h *HTMLStreamerWriter) WriteString(data string) (int, error) { + if h.err != nil { + return 0, h.err + } + return h.Writer.Write([]byte(data)) +} + +// Error tells the next streamer in line that there is an error +func (h *HTMLStreamerWriter) Error(err error) error { + if h.err == nil { + h.err = err + } + return h.err +} + +// Doctype tells the next streamer what the doctype is +func (h *HTMLStreamerWriter) Doctype(data string) error { + _, h.err = h.WriteString("") + return h.err +} + +// Comment tells the next streamer there is a comment +func (h *HTMLStreamerWriter) Comment(data string) error { + _, h.err = h.WriteString("") + return h.err +} + +// StartTag tells the next streamer there is a starting tag +func (h *HTMLStreamerWriter) StartTag(data string, attrs ...html.Attribute) error { + return h.startTag(data, attrs, false) +} + +// SelfClosingTag tells the next streamer there is a self-closing tag +func (h *HTMLStreamerWriter) SelfClosingTag(data string, attrs ...html.Attribute) error { + return h.startTag(data, attrs, true) +} + +func (h *HTMLStreamerWriter) startTag(data string, attrs []html.Attribute, selfclosing bool) error { + if _, h.err = h.WriteString("<" + data); h.err != nil { + return h.err + } + for _, attr := range attrs { + if _, h.err = h.WriteString(" " + attr.Key + "=\"" + html.EscapeString(attr.Val) + "\""); h.err != nil { + return h.err + } + } + if selfclosing { + if _, h.err = h.WriteString("/>"); h.err != nil { + return h.err + } + } else { + if _, h.err = h.WriteString(">"); h.err != nil { + return h.err + } + } + return h.err +} + +// EndTag tells the next streamer there is a end tag +func (h *HTMLStreamerWriter) EndTag(data string) error { + _, h.err = h.WriteString("") + return h.err +} + +// Text tells the next streamer there is a text +func (h *HTMLStreamerWriter) Text(data string) error { + _, h.err = h.WriteString(html.EscapeString(data)) + return h.err +} + +// StreamHTML streams an html to a provided streamer +func StreamHTML(source io.Reader, streamer HTMLStreamer) error { + tokenizer := html.NewTokenizer(source) + for { + tt := tokenizer.Next() + switch tt { + case html.ErrorToken: + if tokenizer.Err() != io.EOF { + return tokenizer.Err() + } + return nil + case html.DoctypeToken: + token := tokenizer.Token() + if err := streamer.Doctype(token.Data); err != nil { + return err + } + case html.CommentToken: + token := tokenizer.Token() + if err := streamer.Comment(token.Data); err != nil { + return err + } + case html.StartTagToken: + token := tokenizer.Token() + if err := streamer.StartTag(token.Data, token.Attr...); err != nil { + return err + } + case html.SelfClosingTagToken: + token := tokenizer.Token() + if err := streamer.StartTag(token.Data, token.Attr...); err != nil { + return err + } + case html.EndTagToken: + token := tokenizer.Token() + if err := streamer.EndTag(token.Data); err != nil { + return err + } + case html.TextToken: + token := tokenizer.Token() + if err := streamer.Text(token.Data); err != nil { + return err + } + default: + return fmt.Errorf("unknown type of token: %d", tt) + } + } +} -- cgit v1.2.3