summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/dsnet/compress/bzip2/rle1.go
blob: 1d789f65f2f4561fb8ad057688c0e4f51acda71f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.

package bzip2

import "github.com/dsnet/compress/internal/errors"

// rleDone is a special "error" to indicate that the RLE stage is done.
var rleDone = errorf(errors.Unknown, "RLE1 stage is completed")

// runLengthEncoding implements the first RLE stage of bzip2. Every sequence
// of 4..255 duplicated bytes is replaced by only the first 4 bytes, and a
// single byte representing the repeat length. Similar to the C bzip2
// implementation, the encoder will always terminate repeat sequences with a
// count (even if it is the end of the buffer), and it will also never produce
// run lengths of 256..259. The decoder can handle the latter case.
//
// For example, if the input was:
//	input:  "AAAAAAABBBBCCCD"
//
// Then the output will be:
//	output: "AAAA\x03BBBB\x00CCCD"
type runLengthEncoding struct {
	buf     []byte
	idx     int
	lastVal byte
	lastCnt int
}

func (rle *runLengthEncoding) Init(buf []byte) {
	*rle = runLengthEncoding{buf: buf}
}

func (rle *runLengthEncoding) Write(buf []byte) (int, error) {
	for i, b := range buf {
		if rle.lastVal != b {
			rle.lastCnt = 0
		}
		rle.lastCnt++
		switch {
		case rle.lastCnt < 4:
			if rle.idx >= len(rle.buf) {
				return i, rleDone
			}
			rle.buf[rle.idx] = b
			rle.idx++
		case rle.lastCnt == 4:
			if rle.idx+1 >= len(rle.buf) {
				return i, rleDone
			}
			rle.buf[rle.idx] = b
			rle.idx++
			rle.buf[rle.idx] = 0
			rle.idx++
		case rle.lastCnt < 256:
			rle.buf[rle.idx-1]++
		default:
			if rle.idx >= len(rle.buf) {
				return i, rleDone
			}
			rle.lastCnt = 1
			rle.buf[rle.idx] = b
			rle.idx++
		}
		rle.lastVal = b
	}
	return len(buf), nil
}

func (rle *runLengthEncoding) Read(buf []byte) (int, error) {
	for i := range buf {
		switch {
		case rle.lastCnt == -4:
			if rle.idx >= len(rle.buf) {
				return i, errorf(errors.Corrupted, "missing terminating run-length repeater")
			}
			rle.lastCnt = int(rle.buf[rle.idx])
			rle.idx++
			if rle.lastCnt > 0 {
				break // Break the switch
			}
			fallthrough // Count was zero, continue the work
		case rle.lastCnt <= 0:
			if rle.idx >= len(rle.buf) {
				return i, rleDone
			}
			b := rle.buf[rle.idx]
			rle.idx++
			if b != rle.lastVal {
				rle.lastCnt = 0
				rle.lastVal = b
			}
		}
		buf[i] = rle.lastVal
		rle.lastCnt--
	}
	return len(buf), nil
}

func (rle *runLengthEncoding) Bytes() []byte { return rle.buf[:rle.idx] }