summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/andybalholm/brotli/metablock_distance.go
blob: 95923127a6dcf8a79a952bce8ce57faf67efd783 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
package brotli

/* Copyright 2015 Google Inc. All Rights Reserved.

   Distributed under MIT license.
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/

/* Greedy block splitter for one block category (literal, command or distance).
 */
type blockSplitterDistance struct {
	alphabet_size_     uint
	min_block_size_    uint
	split_threshold_   float64
	num_blocks_        uint
	split_             *blockSplit
	histograms_        []histogramDistance
	histograms_size_   *uint
	target_block_size_ uint
	block_size_        uint
	curr_histogram_ix_ uint
	last_histogram_ix_ [2]uint
	last_entropy_      [2]float64
	merge_last_count_  uint
}

func initBlockSplitterDistance(self *blockSplitterDistance, alphabet_size uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramDistance, histograms_size *uint) {
	var max_num_blocks uint = num_symbols/min_block_size + 1
	var max_num_types uint = brotli_min_size_t(max_num_blocks, maxNumberOfBlockTypes+1)
	/* We have to allocate one more histogram than the maximum number of block
	   types for the current histogram when the meta-block is too big. */
	self.alphabet_size_ = alphabet_size

	self.min_block_size_ = min_block_size
	self.split_threshold_ = split_threshold
	self.num_blocks_ = 0
	self.split_ = split
	self.histograms_size_ = histograms_size
	self.target_block_size_ = min_block_size
	self.block_size_ = 0
	self.curr_histogram_ix_ = 0
	self.merge_last_count_ = 0
	brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks)
	brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks)
	self.split_.num_blocks = max_num_blocks
	assert(*histograms == nil)
	*histograms_size = max_num_types
	*histograms = make([]histogramDistance, (*histograms_size))
	self.histograms_ = *histograms

	/* Clear only current histogram. */
	histogramClearDistance(&self.histograms_[0])

	self.last_histogram_ix_[1] = 0
	self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
}

/* Does either of three things:
   (1) emits the current block with a new block type;
   (2) emits the current block with the type of the second last block;
   (3) merges the current block with the last block. */
func blockSplitterFinishBlockDistance(self *blockSplitterDistance, is_final bool) {
	var split *blockSplit = self.split_
	var last_entropy []float64 = self.last_entropy_[:]
	var histograms []histogramDistance = self.histograms_
	self.block_size_ = brotli_max_size_t(self.block_size_, self.min_block_size_)
	if self.num_blocks_ == 0 {
		/* Create first block. */
		split.lengths[0] = uint32(self.block_size_)

		split.types[0] = 0
		last_entropy[0] = bitsEntropy(histograms[0].data_[:], self.alphabet_size_)
		last_entropy[1] = last_entropy[0]
		self.num_blocks_++
		split.num_types++
		self.curr_histogram_ix_++
		if self.curr_histogram_ix_ < *self.histograms_size_ {
			histogramClearDistance(&histograms[self.curr_histogram_ix_])
		}
		self.block_size_ = 0
	} else if self.block_size_ > 0 {
		var entropy float64 = bitsEntropy(histograms[self.curr_histogram_ix_].data_[:], self.alphabet_size_)
		var combined_histo [2]histogramDistance
		var combined_entropy [2]float64
		var diff [2]float64
		var j uint
		for j = 0; j < 2; j++ {
			var last_histogram_ix uint = self.last_histogram_ix_[j]
			combined_histo[j] = histograms[self.curr_histogram_ix_]
			histogramAddHistogramDistance(&combined_histo[j], &histograms[last_histogram_ix])
			combined_entropy[j] = bitsEntropy(combined_histo[j].data_[0:], self.alphabet_size_)
			diff[j] = combined_entropy[j] - entropy - last_entropy[j]
		}

		if split.num_types < maxNumberOfBlockTypes && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ {
			/* Create new block. */
			split.lengths[self.num_blocks_] = uint32(self.block_size_)

			split.types[self.num_blocks_] = byte(split.num_types)
			self.last_histogram_ix_[1] = self.last_histogram_ix_[0]
			self.last_histogram_ix_[0] = uint(byte(split.num_types))
			last_entropy[1] = last_entropy[0]
			last_entropy[0] = entropy
			self.num_blocks_++
			split.num_types++
			self.curr_histogram_ix_++
			if self.curr_histogram_ix_ < *self.histograms_size_ {
				histogramClearDistance(&histograms[self.curr_histogram_ix_])
			}
			self.block_size_ = 0
			self.merge_last_count_ = 0
			self.target_block_size_ = self.min_block_size_
		} else if diff[1] < diff[0]-20.0 {
			split.lengths[self.num_blocks_] = uint32(self.block_size_)
			split.types[self.num_blocks_] = split.types[self.num_blocks_-2]
			/* Combine this block with second last block. */

			var tmp uint = self.last_histogram_ix_[0]
			self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
			self.last_histogram_ix_[1] = tmp
			histograms[self.last_histogram_ix_[0]] = combined_histo[1]
			last_entropy[1] = last_entropy[0]
			last_entropy[0] = combined_entropy[1]
			self.num_blocks_++
			self.block_size_ = 0
			histogramClearDistance(&histograms[self.curr_histogram_ix_])
			self.merge_last_count_ = 0
			self.target_block_size_ = self.min_block_size_
		} else {
			/* Combine this block with last block. */
			split.lengths[self.num_blocks_-1] += uint32(self.block_size_)

			histograms[self.last_histogram_ix_[0]] = combined_histo[0]
			last_entropy[0] = combined_entropy[0]
			if split.num_types == 1 {
				last_entropy[1] = last_entropy[0]
			}

			self.block_size_ = 0
			histogramClearDistance(&histograms[self.curr_histogram_ix_])
			self.merge_last_count_++
			if self.merge_last_count_ > 1 {
				self.target_block_size_ += self.min_block_size_
			}
		}
	}

	if is_final {
		*self.histograms_size_ = split.num_types
		split.num_blocks = self.num_blocks_
	}
}

/* Adds the next symbol to the current histogram. When the current histogram
   reaches the target size, decides on merging the block. */
func blockSplitterAddSymbolDistance(self *blockSplitterDistance, symbol uint) {
	histogramAddDistance(&self.histograms_[self.curr_histogram_ix_], symbol)
	self.block_size_++
	if self.block_size_ == self.target_block_size_ {
		blockSplitterFinishBlockDistance(self, false) /* is_final = */
	}
}