审查视图

vendor/github.com/andybalholm/brotli/metablock_distance.go 6.0 KB
tangxvhui authored
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
package brotli

/* Copyright 2015 Google Inc. All Rights Reserved.

   Distributed under MIT license.
   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/

/* Greedy block splitter for one block category (literal, command or distance).
 */
type blockSplitterDistance struct {
	alphabet_size_     uint
	min_block_size_    uint
	split_threshold_   float64
	num_blocks_        uint
	split_             *blockSplit
	histograms_        []histogramDistance
	histograms_size_   *uint
	target_block_size_ uint
	block_size_        uint
	curr_histogram_ix_ uint
	last_histogram_ix_ [2]uint
	last_entropy_      [2]float64
	merge_last_count_  uint
}

func initBlockSplitterDistance(self *blockSplitterDistance, alphabet_size uint, min_block_size uint, split_threshold float64, num_symbols uint, split *blockSplit, histograms *[]histogramDistance, histograms_size *uint) {
	var max_num_blocks uint = num_symbols/min_block_size + 1
	var max_num_types uint = brotli_min_size_t(max_num_blocks, maxNumberOfBlockTypes+1)
	/* We have to allocate one more histogram than the maximum number of block
	   types for the current histogram when the meta-block is too big. */
	self.alphabet_size_ = alphabet_size

	self.min_block_size_ = min_block_size
	self.split_threshold_ = split_threshold
	self.num_blocks_ = 0
	self.split_ = split
	self.histograms_size_ = histograms_size
	self.target_block_size_ = min_block_size
	self.block_size_ = 0
	self.curr_histogram_ix_ = 0
	self.merge_last_count_ = 0
	brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, max_num_blocks)
	brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, max_num_blocks)
	self.split_.num_blocks = max_num_blocks
	assert(*histograms == nil)
	*histograms_size = max_num_types
	*histograms = make([]histogramDistance, (*histograms_size))
	self.histograms_ = *histograms

	/* Clear only current histogram. */
	histogramClearDistance(&self.histograms_[0])

	self.last_histogram_ix_[1] = 0
	self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
}

/* Does either of three things:
   (1) emits the current block with a new block type;
   (2) emits the current block with the type of the second last block;
   (3) merges the current block with the last block. */
func blockSplitterFinishBlockDistance(self *blockSplitterDistance, is_final bool) {
	var split *blockSplit = self.split_
	var last_entropy []float64 = self.last_entropy_[:]
	var histograms []histogramDistance = self.histograms_
	self.block_size_ = brotli_max_size_t(self.block_size_, self.min_block_size_)
	if self.num_blocks_ == 0 {
		/* Create first block. */
		split.lengths[0] = uint32(self.block_size_)

		split.types[0] = 0
		last_entropy[0] = bitsEntropy(histograms[0].data_[:], self.alphabet_size_)
		last_entropy[1] = last_entropy[0]
		self.num_blocks_++
		split.num_types++
		self.curr_histogram_ix_++
		if self.curr_histogram_ix_ < *self.histograms_size_ {
			histogramClearDistance(&histograms[self.curr_histogram_ix_])
		}
		self.block_size_ = 0
	} else if self.block_size_ > 0 {
		var entropy float64 = bitsEntropy(histograms[self.curr_histogram_ix_].data_[:], self.alphabet_size_)
		var combined_histo [2]histogramDistance
		var combined_entropy [2]float64
		var diff [2]float64
		var j uint
		for j = 0; j < 2; j++ {
			var last_histogram_ix uint = self.last_histogram_ix_[j]
			combined_histo[j] = histograms[self.curr_histogram_ix_]
			histogramAddHistogramDistance(&combined_histo[j], &histograms[last_histogram_ix])
			combined_entropy[j] = bitsEntropy(combined_histo[j].data_[0:], self.alphabet_size_)
			diff[j] = combined_entropy[j] - entropy - last_entropy[j]
		}

		if split.num_types < maxNumberOfBlockTypes && diff[0] > self.split_threshold_ && diff[1] > self.split_threshold_ {
			/* Create new block. */
			split.lengths[self.num_blocks_] = uint32(self.block_size_)

			split.types[self.num_blocks_] = byte(split.num_types)
			self.last_histogram_ix_[1] = self.last_histogram_ix_[0]
			self.last_histogram_ix_[0] = uint(byte(split.num_types))
			last_entropy[1] = last_entropy[0]
			last_entropy[0] = entropy
			self.num_blocks_++
			split.num_types++
			self.curr_histogram_ix_++
			if self.curr_histogram_ix_ < *self.histograms_size_ {
				histogramClearDistance(&histograms[self.curr_histogram_ix_])
			}
			self.block_size_ = 0
			self.merge_last_count_ = 0
			self.target_block_size_ = self.min_block_size_
		} else if diff[1] < diff[0]-20.0 {
			split.lengths[self.num_blocks_] = uint32(self.block_size_)
			split.types[self.num_blocks_] = split.types[self.num_blocks_-2]
			/* Combine this block with second last block. */

			var tmp uint = self.last_histogram_ix_[0]
			self.last_histogram_ix_[0] = self.last_histogram_ix_[1]
			self.last_histogram_ix_[1] = tmp
			histograms[self.last_histogram_ix_[0]] = combined_histo[1]
			last_entropy[1] = last_entropy[0]
			last_entropy[0] = combined_entropy[1]
			self.num_blocks_++
			self.block_size_ = 0
			histogramClearDistance(&histograms[self.curr_histogram_ix_])
			self.merge_last_count_ = 0
			self.target_block_size_ = self.min_block_size_
		} else {
			/* Combine this block with last block. */
			split.lengths[self.num_blocks_-1] += uint32(self.block_size_)

			histograms[self.last_histogram_ix_[0]] = combined_histo[0]
			last_entropy[0] = combined_entropy[0]
			if split.num_types == 1 {
				last_entropy[1] = last_entropy[0]
			}

			self.block_size_ = 0
			histogramClearDistance(&histograms[self.curr_histogram_ix_])
			self.merge_last_count_++
			if self.merge_last_count_ > 1 {
				self.target_block_size_ += self.min_block_size_
			}
		}
	}

	if is_final {
		*self.histograms_size_ = split.num_types
		split.num_blocks = self.num_blocks_
	}
}

/* Adds the next symbol to the current histogram. When the current histogram
   reaches the target size, decides on merging the block. */
func blockSplitterAddSymbolDistance(self *blockSplitterDistance, symbol uint) {
	histogramAddDistance(&self.histograms_[self.curr_histogram_ix_], symbol)
	self.block_size_++
	if self.block_size_ == self.target_block_size_ {
		blockSplitterFinishBlockDistance(self, false) /* is_final = */
	}
}