作者 唐旭辉

更新

要显示太多修改。

为保证性能只显示 24 of 24+ 个文件。

@@ -4,6 +4,7 @@ go 1.14 @@ -4,6 +4,7 @@ go 1.14
4 4
5 require ( 5 require (
6 github.com/GeeTeam/gt3-golang-sdk v0.0.0-20200116043922-446ca8a507d2 6 github.com/GeeTeam/gt3-golang-sdk v0.0.0-20200116043922-446ca8a507d2
  7 + github.com/Shopify/sarama v1.23.1
7 github.com/ajg/form v1.5.1 // indirect 8 github.com/ajg/form v1.5.1 // indirect
8 github.com/astaxie/beego v1.12.2 9 github.com/astaxie/beego v1.12.2
9 github.com/dgrijalva/jwt-go v3.2.0+incompatible 10 github.com/dgrijalva/jwt-go v3.2.0+incompatible
1 package main 1 package main
2 2
3 import ( 3 import (
  4 + "context"
  5 + "fmt"
  6 + "os"
  7 + "os/signal"
  8 + "syscall"
  9 +
4 "github.com/astaxie/beego" 10 "github.com/astaxie/beego"
5 "github.com/astaxie/beego/logs" 11 "github.com/astaxie/beego/logs"
6 _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/infrastructure/pg" 12 _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/infrastructure/pg"
7 _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/log" 13 _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/log"
8 _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/port/beego" 14 _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/port/beego"
  15 + "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/port/consumer"
9 ) 16 )
10 17
11 func main() { 18 func main() {
  19 + sigs := make(chan os.Signal, 1)
  20 + signal.Notify(sigs, os.Interrupt, os.Kill, syscall.SIGINT, syscall.SIGTERM)
  21 + ctx, cancel := context.WithCancel(context.Background())
  22 + closeConsumer, err := consumer.StartConsumer(ctx)
  23 + if err != nil {
  24 + fmt.Printf("启动kafka消息消费者失败 err%s \n", err)
  25 + logs.Error("启动kafka消息消费者失败:%s", err)
  26 + return
  27 + }
  28 + go func() {
  29 + select {
  30 + case <-sigs:
  31 + cancel()
  32 + closeConsumer()
  33 + }
  34 + }()
  35 +
12 logs.Info("应用启动") 36 logs.Info("应用启动")
13 beego.Run() 37 beego.Run()
14 } 38 }
@@ -21,6 +21,14 @@ func CreateOrderBaseDao(options map[string]interface{}) (*dao.OrderBaseDao, erro @@ -21,6 +21,14 @@ func CreateOrderBaseDao(options map[string]interface{}) (*dao.OrderBaseDao, erro
21 return dao.NewOrderBaseDao(transactionContext) 21 return dao.NewOrderBaseDao(transactionContext)
22 } 22 }
23 23
  24 +func CreateOrderBestshopDao(options map[string]interface{}) (*dao.OrderBestshopDao, error) {
  25 + var transactionContext *transaction.TransactionContext
  26 + if value, ok := options["transactionContext"]; ok {
  27 + transactionContext = value.(*transaction.TransactionContext)
  28 + }
  29 + return dao.NewOrderBestshopDao(transactionContext)
  30 +}
  31 +
24 func CreateUsersDao(options map[string]interface{}) (*dao.UsersDao, error) { 32 func CreateUsersDao(options map[string]interface{}) (*dao.UsersDao, error) {
25 var transactionContext *transaction.TransactionContext 33 var transactionContext *transaction.TransactionContext
26 if value, ok := options["transactionContext"]; ok { 34 if value, ok := options["transactionContext"]; ok {
@@ -8,6 +8,8 @@ type CreateOrderFromBestshop struct { @@ -8,6 +8,8 @@ type CreateOrderFromBestshop struct {
8 OrderCode string `json:"orderCode"` 8 OrderCode string `json:"orderCode"`
9 //下单时间 9 //下单时间
10 OrderTime string `json:"orderTime"` 10 OrderTime string `json:"orderTime"`
  11 + //公司id
  12 + CompanyId int64 `json:"companyId"`
11 //订单状态 13 //订单状态
12 OrderState int8 `json:"orderState"` 14 OrderState int8 `json:"orderState"`
13 //发货状态 15 //发货状态
@@ -4,6 +4,8 @@ import ( @@ -4,6 +4,8 @@ import (
4 "fmt" 4 "fmt"
5 "time" 5 "time"
6 6
  7 + "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/infrastructure/dao"
  8 +
7 "github.com/astaxie/beego/logs" 9 "github.com/astaxie/beego/logs"
8 10
9 "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/application/factory" 11 "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/application/factory"
@@ -37,6 +39,25 @@ func (s SyncOrderService) SyncOrderFromBestshop(cmd command.CreateOrderFromBests @@ -37,6 +39,25 @@ func (s SyncOrderService) SyncOrderFromBestshop(cmd command.CreateOrderFromBests
37 defer func() { 39 defer func() {
38 transactionContext.RollbackTransaction() 40 transactionContext.RollbackTransaction()
39 }() 41 }()
  42 +
  43 + //检查账号是否存在
  44 + var (
  45 + orderBestshopDao *dao.OrderBestshopDao
  46 + )
  47 + if orderBestshopDao, err = factory.CreateOrderBestshopDao(map[string]interface{}{
  48 + "transactionContext": transactionContext,
  49 + }); err != nil {
  50 + return lib.ThrowError(lib.TRANSACTION_ERROR, err.Error())
  51 + }
  52 + ok, err := orderBestshopDao.OrderExist(cmd.OrderCode)
  53 + if err != nil {
  54 + return lib.ThrowError(lib.TRANSACTION_ERROR, err.Error())
  55 + }
  56 + if ok {
  57 + logs.Info("订单已存在,order_code=%s", cmd.OrderCode)
  58 + return nil
  59 + }
  60 +
40 var ( 61 var (
41 orderBestshopRepository domain.OrderBestshopRepository 62 orderBestshopRepository domain.OrderBestshopRepository
42 orderGoodBestshopRepository domain.OrderGoodBestshopRepository 63 orderGoodBestshopRepository domain.OrderGoodBestshopRepository
@@ -173,11 +194,25 @@ func (s SyncOrderService) copyOrderBestshopToOrderBase(orderBestshop *domain.Ord @@ -173,11 +194,25 @@ func (s SyncOrderService) copyOrderBestshopToOrderBase(orderBestshop *domain.Ord
173 ordergoods []domain.OrderGood 194 ordergoods []domain.OrderGood
174 ) 195 )
175 //TODO 添加orderBase 196 //TODO 添加orderBase
  197 + orderBestshop.CopyToOrderBase(&orderbase)
  198 + orderbase.CompanyId = companyData.Id
  199 + for i := range orderBestshop.Goods {
  200 + good := domain.NewOrderGood()
  201 + orderBestshop.Goods[i].CopyToOrderGood(&good)
  202 + good.OrderId = orderbase.Id
  203 + good.Compute()
  204 + ordergoods = append(ordergoods, good)
  205 + }
  206 + orderbase.Goods = ordergoods
  207 + orderbase.Compute()
176 err = orderBaseRepository.Save(&orderbase) 208 err = orderBaseRepository.Save(&orderbase)
177 if err != nil { 209 if err != nil {
178 e := fmt.Sprintf("添加order_base数据失败%s", err) 210 e := fmt.Sprintf("添加order_base数据失败%s", err)
179 return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, e) 211 return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, e)
180 } 212 }
  213 + for i := range ordergoods {
  214 + ordergoods[i].OrderId = orderbase.Id
  215 + }
181 //TODO 添加goods 216 //TODO 添加goods
182 err = orderGoodRepository.Save(ordergoods) 217 err = orderGoodRepository.Save(ordergoods)
183 if err != nil { 218 if err != nil {
@@ -185,6 +220,7 @@ func (s SyncOrderService) copyOrderBestshopToOrderBase(orderBestshop *domain.Ord @@ -185,6 +220,7 @@ func (s SyncOrderService) copyOrderBestshopToOrderBase(orderBestshop *domain.Ord
185 return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, e) 220 return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, e)
186 } 221 }
187 //TODO 更新isCopy 222 //TODO 更新isCopy
  223 + orderBestshop.IsCopy = true
188 err = orderBestshopRepository.Edit(orderBestshop) 224 err = orderBestshopRepository.Edit(orderBestshop)
189 if err != nil { 225 if err != nil {
190 return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, err.Error()) 226 return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, err.Error())
  1 +package dao
  2 +
  3 +import (
  4 + "fmt"
  5 +
  6 + "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/infrastructure/pg/models"
  7 + "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/infrastructure/pg/transaction"
  8 +)
  9 +
  10 +type OrderBestshopDao struct {
  11 + transactionContext *transaction.TransactionContext
  12 +}
  13 +
  14 +func NewOrderBestshopDao(transactionContext *transaction.TransactionContext) (*OrderBestshopDao, error) {
  15 + if transactionContext == nil {
  16 + return nil, fmt.Errorf("transactionContext参数不能为nil")
  17 + } else {
  18 + return &OrderBestshopDao{
  19 + transactionContext: transactionContext,
  20 + }, nil
  21 + }
  22 +}
  23 +
  24 +func (dao OrderBestshopDao) OrderExist(orderCode string) (bool, error) {
  25 + tx := dao.transactionContext.GetDB()
  26 + m := models.OrderBestshop{}
  27 + query := tx.Model(m).Where("order_code=?", orderCode)
  28 + ok, err := query.Exists()
  29 + return ok, err
  30 +}
  1 +package configs
  2 +
  3 +type MqConfig struct {
  4 + Servers []string `json:"servers"`
  5 + ConsumerId string `json:"consumerGroup"`
  6 +}
  7 +
  8 +var Cfg MqConfig
  9 +
  10 +func init() {
  11 + Cfg = MqConfig{
  12 + Servers: []string{"192.168.190.136:9092"},
  13 + ConsumerId: "partnermg",
  14 + }
  15 +}
  16 +
  17 +// "",
  18 +// "106.52.15.41:9092"
  1 +package consumer
  2 +
  3 +import (
  4 + "context"
  5 + "errors"
  6 + "sync"
  7 +
  8 + "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/port/consumer/configs"
  9 +
  10 + "github.com/Shopify/sarama"
  11 + "github.com/astaxie/beego/logs"
  12 +)
  13 +
  14 +//MessageConsumer 消息消费者
  15 +type MessageConsumer struct {
  16 + ready chan bool
  17 + kafkaHosts []string
  18 + groupId string
  19 + topics []string
  20 + topicsHandles map[string]TopicHandle
  21 +}
  22 +
  23 +func NewMessageConsumer() *MessageConsumer {
  24 + topics := []string{}
  25 + for key := range TopicHandleRouters {
  26 + topics = append(topics, key)
  27 + }
  28 + return &MessageConsumer{
  29 + ready: make(chan bool),
  30 + kafkaHosts: configs.Cfg.Servers,
  31 + groupId: configs.Cfg.ConsumerId,
  32 + topicsHandles: TopicHandleRouters,
  33 + topics: topics,
  34 + }
  35 +}
  36 +
  37 +//实现对应的接口
  38 +var _ sarama.ConsumerGroupHandler = (*MessageConsumer)(nil)
  39 +
  40 +func (c *MessageConsumer) Setup(groupSession sarama.ConsumerGroupSession) error {
  41 + close(c.ready)
  42 + return nil
  43 +}
  44 +
  45 +func (c *MessageConsumer) Cleanup(groupSession sarama.ConsumerGroupSession) error {
  46 + return nil
  47 +}
  48 +
  49 +func (c *MessageConsumer) ConsumeClaim(groupSession sarama.ConsumerGroupSession,
  50 + groupClaim sarama.ConsumerGroupClaim) error {
  51 + var (
  52 + topicHandle TopicHandle
  53 + err error
  54 + )
  55 + for message := range groupClaim.Messages() {
  56 + if topicHandle, err = c.FindTopichandle(groupClaim.Topic()); err != nil {
  57 + logs.Error("FindTopichandle err:%s \n", err)
  58 + continue
  59 + }
  60 + if err = topicHandle(message); err != nil {
  61 + logs.Error("Message claimed: kafka消息处理错误 topic =", message.Topic, message.Offset, err)
  62 + }
  63 + groupSession.MarkMessage(message, "")
  64 + }
  65 + return nil
  66 +}
  67 +
  68 +func (c *MessageConsumer) FindTopichandle(topic string) (TopicHandle, error) {
  69 + if v, ok := c.topicsHandles[topic]; ok {
  70 + return v, nil
  71 + }
  72 + return nil, errors.New("TopicHandle not found")
  73 +}
  74 +
  75 +//StartConsumer 启动
  76 +//返回 Consumer关闭方法 和 error
  77 +func StartConsumer(ctx context.Context) (func(), error) {
  78 + consumer := NewMessageConsumer()
  79 + config := sarama.NewConfig()
  80 + config.Consumer.Group.Rebalance.Strategy = sarama.BalanceStrategyRoundRobin
  81 + config.Consumer.Offsets.Initial = sarama.OffsetNewest
  82 + config.Version = sarama.V0_11_0_2
  83 + consumerGroup, err := sarama.NewConsumerGroup(consumer.kafkaHosts, consumer.groupId, config)
  84 + if err != nil {
  85 + return nil, err
  86 + }
  87 + wg := &sync.WaitGroup{}
  88 + wg.Add(1)
  89 + go func() {
  90 + defer wg.Done()
  91 + for {
  92 + if err := ctx.Err(); err != nil {
  93 + logs.Error("ctx err:%s \n", err)
  94 + return
  95 + }
  96 + if err := consumerGroup.Consume(ctx, consumer.topics, consumer); err != nil {
  97 + logs.Error("consumerGroup err:%s \n", err)
  98 + }
  99 + }
  100 + }()
  101 + //等待 consumerGroup 设置完成
  102 + <-consumer.ready
  103 + logs.Error("Sarama consumer up and running!...")
  104 + return func() {
  105 + wg.Wait()
  106 + if err := consumerGroup.Close(); err != nil {
  107 + logs.Error("consumerGroup.Close err %s", err)
  108 + }
  109 + logs.Info("consumerGroup.Close")
  110 + }, nil
  111 +}
  1 +package consumer
  2 +
  3 +import (
  4 + "fmt"
  5 +
  6 + "github.com/Shopify/sarama"
  7 +)
  8 +
  9 +//TopicHandle 处理kafka中得消息
  10 +type TopicHandle func(*sarama.ConsumerMessage) error
  11 +
  12 +//TopicHandleRouters 根据topic区分消息并进行处理
  13 +var TopicHandleRouters = map[string]TopicHandle{
  14 + "topic_test": func(message *sarama.ConsumerMessage) error {
  15 + fmt.Printf("Done Message claimed: timestamp = %v, topic = %s offset = %v value = %v \n",
  16 + message.Timestamp, message.Topic, message.Offset, string(message.Value))
  17 + return nil
  18 + },
  19 + "bestshop_order": SyncBestshopOrder,
  20 +}
  1 +package consumer
  2 +
  3 +import (
  4 + "encoding/json"
  5 + "fmt"
  6 +
  7 + "github.com/Shopify/sarama"
  8 + "github.com/astaxie/beego/logs"
  9 + syncOrderCmd "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/application/syncOrder/command"
  10 + syncOrderSrv "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/application/syncOrder/service"
  11 +)
  12 +
  13 +//SyncBestshopOrder 同步
  14 +func SyncBestshopOrder(message *sarama.ConsumerMessage) error {
  15 + logs.Info("Done Message claimed: timestamp = %v, topic = %s offset = %v value = %v \n",
  16 + message.Timestamp, message.Topic, message.Offset, string(message.Value))
  17 + var (
  18 + cmd syncOrderCmd.CreateOrderFromBestshop
  19 + err error
  20 + )
  21 + err = json.Unmarshal(message.Value, &cmd)
  22 + if err != nil {
  23 + return fmt.Errorf("[SyncBestshopOrder] 解析kafka数据失败;%s", err)
  24 + }
  25 + if cmd.PartnerId <= 0 {
  26 + logs.Info("[SyncBestshopOrder] PartnerId<=0 ,不处理消息")
  27 + return nil
  28 + }
  29 + srv := syncOrderSrv.NewOrderInfoService(nil)
  30 + err = srv.SyncOrderFromBestshop(cmd)
  31 + return err
  32 +}
  1 +dist: xenial
  2 +language: go
  3 +
  4 +go:
  5 + - 1.10.x
  6 + - 1.11.x
  7 + - 1.12.x
  8 +
  9 +os:
  10 + - linux
  11 + - osx
  12 +
  13 +matrix:
  14 + include:
  15 + name: "Go 1.11.x CentOS 32bits"
  16 + language: go
  17 + go: 1.11.x
  18 + os: linux
  19 + services:
  20 + - docker
  21 + script:
  22 + # Please update Go version in travis_test_32 as needed
  23 + - "docker run -i -v \"${PWD}:/zstd\" toopher/centos-i386:centos6 /bin/bash -c \"linux32 --32bit i386 /zstd/travis_test_32.sh\""
  24 +
  25 +install:
  26 + - "wget https://github.com/DataDog/zstd/files/2246767/mr.zip"
  27 + - "unzip mr.zip"
  28 +script:
  29 + - "go build"
  30 + - "PAYLOAD=`pwd`/mr go test -v"
  31 + - "PAYLOAD=`pwd`/mr go test -bench ."
  1 +Simplified BSD License
  2 +
  3 +Copyright (c) 2016, Datadog <info@datadoghq.com>
  4 +All rights reserved.
  5 +
  6 +Redistribution and use in source and binary forms, with or without
  7 +modification, are permitted provided that the following conditions are met:
  8 +
  9 + * Redistributions of source code must retain the above copyright notice,
  10 + this list of conditions and the following disclaimer.
  11 + * Redistributions in binary form must reproduce the above copyright notice,
  12 + this list of conditions and the following disclaimer in the documentation
  13 + and/or other materials provided with the distribution.
  14 + * Neither the name of the copyright holder nor the names of its contributors
  15 + may be used to endorse or promote products derived from this software
  16 + without specific prior written permission.
  17 +
  18 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19 +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20 +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  21 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
  22 +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  23 +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  24 +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  25 +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  26 +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  27 +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  1 +# Zstd Go Wrapper
  2 +
  3 +[C Zstd Homepage](https://github.com/Cyan4973/zstd)
  4 +
  5 +The current headers and C files are from *v1.3.8* (Commit
  6 +[470344d](https://github.com/facebook/zstd/releases/tag/v1.3.8)).
  7 +
  8 +## Usage
  9 +
  10 +There are two main APIs:
  11 +
  12 +* simple Compress/Decompress
  13 +* streaming API (io.Reader/io.Writer)
  14 +
  15 +The compress/decompress APIs mirror that of lz4, while the streaming API was
  16 +designed to be a drop-in replacement for zlib.
  17 +
  18 +### Simple `Compress/Decompress`
  19 +
  20 +
  21 +```go
  22 +// Compress compresses the byte array given in src and writes it to dst.
  23 +// If you already have a buffer allocated, you can pass it to prevent allocation
  24 +// If not, you can pass nil as dst.
  25 +// If the buffer is too small, it will be reallocated, resized, and returned bu the function
  26 +// If dst is nil, this will allocate the worst case size (CompressBound(src))
  27 +Compress(dst, src []byte) ([]byte, error)
  28 +```
  29 +
  30 +```go
  31 +// CompressLevel is the same as Compress but you can pass another compression level
  32 +CompressLevel(dst, src []byte, level int) ([]byte, error)
  33 +```
  34 +
  35 +```go
  36 +// Decompress will decompress your payload into dst.
  37 +// If you already have a buffer allocated, you can pass it to prevent allocation
  38 +// If not, you can pass nil as dst (allocates a 4*src size as default).
  39 +// If the buffer is too small, it will retry 3 times by doubling the dst size
  40 +// After max retries, it will switch to the slower stream API to be sure to be able
  41 +// to decompress. Currently switches if compression ratio > 4*2**3=32.
  42 +Decompress(dst, src []byte) ([]byte, error)
  43 +```
  44 +
  45 +### Stream API
  46 +
  47 +```go
  48 +// NewWriter creates a new object that can optionally be initialized with
  49 +// a precomputed dictionary. If dict is nil, compress without a dictionary.
  50 +// The dictionary array should not be changed during the use of this object.
  51 +// You MUST CALL Close() to write the last bytes of a zstd stream and free C objects.
  52 +NewWriter(w io.Writer) *Writer
  53 +NewWriterLevel(w io.Writer, level int) *Writer
  54 +NewWriterLevelDict(w io.Writer, level int, dict []byte) *Writer
  55 +
  56 +// Write compresses the input data and write it to the underlying writer
  57 +(w *Writer) Write(p []byte) (int, error)
  58 +
  59 +// Close flushes the buffer and frees C zstd objects
  60 +(w *Writer) Close() error
  61 +```
  62 +
  63 +```go
  64 +// NewReader returns a new io.ReadCloser that will decompress data from the
  65 +// underlying reader. If a dictionary is provided to NewReaderDict, it must
  66 +// not be modified until Close is called. It is the caller's responsibility
  67 +// to call Close, which frees up C objects.
  68 +NewReader(r io.Reader) io.ReadCloser
  69 +NewReaderDict(r io.Reader, dict []byte) io.ReadCloser
  70 +```
  71 +
  72 +### Benchmarks (benchmarked with v0.5.0)
  73 +
  74 +The author of Zstd also wrote lz4. Zstd is intended to occupy a speed/ratio
  75 +level similar to what zlib currently provides. In our tests, the can always
  76 +be made to be better than zlib by chosing an appropriate level while still
  77 +keeping compression and decompression time faster than zlib.
  78 +
  79 +You can run the benchmarks against your own payloads by using the Go benchmarks tool.
  80 +Just export your payload filepath as the `PAYLOAD` environment variable and run the benchmarks:
  81 +
  82 +```go
  83 +go test -bench .
  84 +```
  85 +
  86 +Compression of a 7Mb pdf zstd (this wrapper) vs [czlib](https://github.com/DataDog/czlib):
  87 +```
  88 +BenchmarkCompression 5 221056624 ns/op 67.34 MB/s
  89 +BenchmarkDecompression 100 18370416 ns/op 810.32 MB/s
  90 +
  91 +BenchmarkFzlibCompress 2 610156603 ns/op 24.40 MB/s
  92 +BenchmarkFzlibDecompress 20 81195246 ns/op 183.33 MB/s
  93 +```
  94 +
  95 +Ratio is also better by a margin of ~20%.
  96 +Compression speed is always better than zlib on all the payloads we tested;
  97 +However, [czlib](https://github.com/DataDog/czlib) has optimisations that make it
  98 +faster at decompressiong small payloads:
  99 +
  100 +```
  101 +Testing with size: 11... czlib: 8.97 MB/s, zstd: 3.26 MB/s
  102 +Testing with size: 27... czlib: 23.3 MB/s, zstd: 8.22 MB/s
  103 +Testing with size: 62... czlib: 31.6 MB/s, zstd: 19.49 MB/s
  104 +Testing with size: 141... czlib: 74.54 MB/s, zstd: 42.55 MB/s
  105 +Testing with size: 323... czlib: 155.14 MB/s, zstd: 99.39 MB/s
  106 +Testing with size: 739... czlib: 235.9 MB/s, zstd: 216.45 MB/s
  107 +Testing with size: 1689... czlib: 116.45 MB/s, zstd: 345.64 MB/s
  108 +Testing with size: 3858... czlib: 176.39 MB/s, zstd: 617.56 MB/s
  109 +Testing with size: 8811... czlib: 254.11 MB/s, zstd: 824.34 MB/s
  110 +Testing with size: 20121... czlib: 197.43 MB/s, zstd: 1339.11 MB/s
  111 +Testing with size: 45951... czlib: 201.62 MB/s, zstd: 1951.57 MB/s
  112 +```
  113 +
  114 +zstd starts to shine with payloads > 1KB
  115 +
  116 +### Stability - Current state: STABLE
  117 +
  118 +The C library seems to be pretty stable and according to the author has been tested and fuzzed.
  119 +
  120 +For the Go wrapper, the test cover most usual cases and we have succesfully tested it on all staging and prod data.
  1 +BSD License
  2 +
  3 +For Zstandard software
  4 +
  5 +Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
  6 +
  7 +Redistribution and use in source and binary forms, with or without modification,
  8 +are permitted provided that the following conditions are met:
  9 +
  10 + * Redistributions of source code must retain the above copyright notice, this
  11 + list of conditions and the following disclaimer.
  12 +
  13 + * Redistributions in binary form must reproduce the above copyright notice,
  14 + this list of conditions and the following disclaimer in the documentation
  15 + and/or other materials provided with the distribution.
  16 +
  17 + * Neither the name Facebook nor the names of its contributors may be used to
  18 + endorse or promote products derived from this software without specific
  19 + prior written permission.
  20 +
  21 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  22 +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  23 +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  24 +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
  25 +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  26 +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  27 +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  28 +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29 +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  30 +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  1 +/* ******************************************************************
  2 + bitstream
  3 + Part of FSE library
  4 + Copyright (C) 2013-present, Yann Collet.
  5 +
  6 + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  7 +
  8 + Redistribution and use in source and binary forms, with or without
  9 + modification, are permitted provided that the following conditions are
  10 + met:
  11 +
  12 + * Redistributions of source code must retain the above copyright
  13 + notice, this list of conditions and the following disclaimer.
  14 + * Redistributions in binary form must reproduce the above
  15 + copyright notice, this list of conditions and the following disclaimer
  16 + in the documentation and/or other materials provided with the
  17 + distribution.
  18 +
  19 + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20 + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21 + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22 + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23 + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24 + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25 + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26 + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27 + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28 + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29 + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30 +
  31 + You can contact the author at :
  32 + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
  33 +****************************************************************** */
  34 +#ifndef BITSTREAM_H_MODULE
  35 +#define BITSTREAM_H_MODULE
  36 +
  37 +#if defined (__cplusplus)
  38 +extern "C" {
  39 +#endif
  40 +
  41 +/*
  42 +* This API consists of small unitary functions, which must be inlined for best performance.
  43 +* Since link-time-optimization is not available for all compilers,
  44 +* these functions are defined into a .h to be included.
  45 +*/
  46 +
  47 +/*-****************************************
  48 +* Dependencies
  49 +******************************************/
  50 +#include "mem.h" /* unaligned access routines */
  51 +#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
  52 +#include "error_private.h" /* error codes and messages */
  53 +
  54 +
  55 +/*=========================================
  56 +* Target specific
  57 +=========================================*/
  58 +#if defined(__BMI__) && defined(__GNUC__)
  59 +# include <immintrin.h> /* support for bextr (experimental) */
  60 +#endif
  61 +
  62 +#define STREAM_ACCUMULATOR_MIN_32 25
  63 +#define STREAM_ACCUMULATOR_MIN_64 57
  64 +#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
  65 +
  66 +
  67 +/*-******************************************
  68 +* bitStream encoding API (write forward)
  69 +********************************************/
  70 +/* bitStream can mix input from multiple sources.
  71 + * A critical property of these streams is that they encode and decode in **reverse** direction.
  72 + * So the first bit sequence you add will be the last to be read, like a LIFO stack.
  73 + */
  74 +typedef struct {
  75 + size_t bitContainer;
  76 + unsigned bitPos;
  77 + char* startPtr;
  78 + char* ptr;
  79 + char* endPtr;
  80 +} BIT_CStream_t;
  81 +
  82 +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
  83 +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
  84 +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);
  85 +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
  86 +
  87 +/* Start with initCStream, providing the size of buffer to write into.
  88 +* bitStream will never write outside of this buffer.
  89 +* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
  90 +*
  91 +* bits are first added to a local register.
  92 +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
  93 +* Writing data into memory is an explicit operation, performed by the flushBits function.
  94 +* Hence keep track how many bits are potentially stored into local register to avoid register overflow.
  95 +* After a flushBits, a maximum of 7 bits might still be stored into local register.
  96 +*
  97 +* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
  98 +*
  99 +* Last operation is to close the bitStream.
  100 +* The function returns the final size of CStream in bytes.
  101 +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
  102 +*/
  103 +
  104 +
  105 +/*-********************************************
  106 +* bitStream decoding API (read backward)
  107 +**********************************************/
  108 +typedef struct {
  109 + size_t bitContainer;
  110 + unsigned bitsConsumed;
  111 + const char* ptr;
  112 + const char* start;
  113 + const char* limitPtr;
  114 +} BIT_DStream_t;
  115 +
  116 +typedef enum { BIT_DStream_unfinished = 0,
  117 + BIT_DStream_endOfBuffer = 1,
  118 + BIT_DStream_completed = 2,
  119 + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */
  120 + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
  121 +
  122 +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
  123 +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
  124 +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
  125 +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
  126 +
  127 +
  128 +/* Start by invoking BIT_initDStream().
  129 +* A chunk of the bitStream is then stored into a local register.
  130 +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
  131 +* You can then retrieve bitFields stored into the local register, **in reverse order**.
  132 +* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
  133 +* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
  134 +* Otherwise, it can be less than that, so proceed accordingly.
  135 +* Checking if DStream has reached its end can be performed with BIT_endOfDStream().
  136 +*/
  137 +
  138 +
  139 +/*-****************************************
  140 +* unsafe API
  141 +******************************************/
  142 +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
  143 +/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
  144 +
  145 +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
  146 +/* unsafe version; does not check buffer overflow */
  147 +
  148 +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
  149 +/* faster, but works only if nbBits >= 1 */
  150 +
  151 +
  152 +
  153 +/*-**************************************************************
  154 +* Internal functions
  155 +****************************************************************/
  156 +MEM_STATIC unsigned BIT_highbit32 (U32 val)
  157 +{
  158 + assert(val != 0);
  159 + {
  160 +# if defined(_MSC_VER) /* Visual */
  161 + unsigned long r=0;
  162 + _BitScanReverse ( &r, val );
  163 + return (unsigned) r;
  164 +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
  165 + return 31 - __builtin_clz (val);
  166 +# else /* Software version */
  167 + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
  168 + 11, 14, 16, 18, 22, 25, 3, 30,
  169 + 8, 12, 20, 28, 15, 17, 24, 7,
  170 + 19, 27, 23, 6, 26, 5, 4, 31 };
  171 + U32 v = val;
  172 + v |= v >> 1;
  173 + v |= v >> 2;
  174 + v |= v >> 4;
  175 + v |= v >> 8;
  176 + v |= v >> 16;
  177 + return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
  178 +# endif
  179 + }
  180 +}
  181 +
  182 +/*===== Local Constants =====*/
  183 +static const unsigned BIT_mask[] = {
  184 + 0, 1, 3, 7, 0xF, 0x1F,
  185 + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
  186 + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF,
  187 + 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
  188 + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
  189 + 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
  190 +#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
  191 +
  192 +/*-**************************************************************
  193 +* bitStream encoding
  194 +****************************************************************/
  195 +/*! BIT_initCStream() :
  196 + * `dstCapacity` must be > sizeof(size_t)
  197 + * @return : 0 if success,
  198 + * otherwise an error code (can be tested using ERR_isError()) */
  199 +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
  200 + void* startPtr, size_t dstCapacity)
  201 +{
  202 + bitC->bitContainer = 0;
  203 + bitC->bitPos = 0;
  204 + bitC->startPtr = (char*)startPtr;
  205 + bitC->ptr = bitC->startPtr;
  206 + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
  207 + if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
  208 + return 0;
  209 +}
  210 +
  211 +/*! BIT_addBits() :
  212 + * can add up to 31 bits into `bitC`.
  213 + * Note : does not check for register overflow ! */
  214 +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
  215 + size_t value, unsigned nbBits)
  216 +{
  217 + MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
  218 + assert(nbBits < BIT_MASK_SIZE);
  219 + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
  220 + bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
  221 + bitC->bitPos += nbBits;
  222 +}
  223 +
  224 +/*! BIT_addBitsFast() :
  225 + * works only if `value` is _clean_,
  226 + * meaning all high bits above nbBits are 0 */
  227 +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
  228 + size_t value, unsigned nbBits)
  229 +{
  230 + assert((value>>nbBits) == 0);
  231 + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
  232 + bitC->bitContainer |= value << bitC->bitPos;
  233 + bitC->bitPos += nbBits;
  234 +}
  235 +
  236 +/*! BIT_flushBitsFast() :
  237 + * assumption : bitContainer has not overflowed
  238 + * unsafe version; does not check buffer overflow */
  239 +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
  240 +{
  241 + size_t const nbBytes = bitC->bitPos >> 3;
  242 + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
  243 + MEM_writeLEST(bitC->ptr, bitC->bitContainer);
  244 + bitC->ptr += nbBytes;
  245 + assert(bitC->ptr <= bitC->endPtr);
  246 + bitC->bitPos &= 7;
  247 + bitC->bitContainer >>= nbBytes*8;
  248 +}
  249 +
  250 +/*! BIT_flushBits() :
  251 + * assumption : bitContainer has not overflowed
  252 + * safe version; check for buffer overflow, and prevents it.
  253 + * note : does not signal buffer overflow.
  254 + * overflow will be revealed later on using BIT_closeCStream() */
  255 +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
  256 +{
  257 + size_t const nbBytes = bitC->bitPos >> 3;
  258 + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
  259 + MEM_writeLEST(bitC->ptr, bitC->bitContainer);
  260 + bitC->ptr += nbBytes;
  261 + if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
  262 + bitC->bitPos &= 7;
  263 + bitC->bitContainer >>= nbBytes*8;
  264 +}
  265 +
  266 +/*! BIT_closeCStream() :
  267 + * @return : size of CStream, in bytes,
  268 + * or 0 if it could not fit into dstBuffer */
  269 +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
  270 +{
  271 + BIT_addBitsFast(bitC, 1, 1); /* endMark */
  272 + BIT_flushBits(bitC);
  273 + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
  274 + return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
  275 +}
  276 +
  277 +
  278 +/*-********************************************************
  279 +* bitStream decoding
  280 +**********************************************************/
  281 +/*! BIT_initDStream() :
  282 + * Initialize a BIT_DStream_t.
  283 + * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
  284 + * `srcSize` must be the *exact* size of the bitStream, in bytes.
  285 + * @return : size of stream (== srcSize), or an errorCode if a problem is detected
  286 + */
  287 +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
  288 +{
  289 + if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
  290 +
  291 + bitD->start = (const char*)srcBuffer;
  292 + bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
  293 +
  294 + if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */
  295 + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
  296 + bitD->bitContainer = MEM_readLEST(bitD->ptr);
  297 + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
  298 + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
  299 + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
  300 + } else {
  301 + bitD->ptr = bitD->start;
  302 + bitD->bitContainer = *(const BYTE*)(bitD->start);
  303 + switch(srcSize)
  304 + {
  305 + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
  306 + /* fall-through */
  307 +
  308 + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
  309 + /* fall-through */
  310 +
  311 + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
  312 + /* fall-through */
  313 +
  314 + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
  315 + /* fall-through */
  316 +
  317 + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
  318 + /* fall-through */
  319 +
  320 + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
  321 + /* fall-through */
  322 +
  323 + default: break;
  324 + }
  325 + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
  326 + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
  327 + if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
  328 + }
  329 + bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
  330 + }
  331 +
  332 + return srcSize;
  333 +}
  334 +
  335 +MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
  336 +{
  337 + return bitContainer >> start;
  338 +}
  339 +
  340 +MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
  341 +{
  342 + U32 const regMask = sizeof(bitContainer)*8 - 1;
  343 + /* if start > regMask, bitstream is corrupted, and result is undefined */
  344 + assert(nbBits < BIT_MASK_SIZE);
  345 + return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
  346 +}
  347 +
  348 +MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
  349 +{
  350 + assert(nbBits < BIT_MASK_SIZE);
  351 + return bitContainer & BIT_mask[nbBits];
  352 +}
  353 +
  354 +/*! BIT_lookBits() :
  355 + * Provides next n bits from local register.
  356 + * local register is not modified.
  357 + * On 32-bits, maxNbBits==24.
  358 + * On 64-bits, maxNbBits==56.
  359 + * @return : value extracted */
  360 +MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
  361 +{
  362 + /* arbitrate between double-shift and shift+mask */
  363 +#if 1
  364 + /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
  365 + * bitstream is likely corrupted, and result is undefined */
  366 + return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
  367 +#else
  368 + /* this code path is slower on my os-x laptop */
  369 + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
  370 + return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
  371 +#endif
  372 +}
  373 +
  374 +/*! BIT_lookBitsFast() :
  375 + * unsafe version; only works if nbBits >= 1 */
  376 +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
  377 +{
  378 + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
  379 + assert(nbBits >= 1);
  380 + return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
  381 +}
  382 +
  383 +MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
  384 +{
  385 + bitD->bitsConsumed += nbBits;
  386 +}
  387 +
  388 +/*! BIT_readBits() :
  389 + * Read (consume) next n bits from local register and update.
  390 + * Pay attention to not read more than nbBits contained into local register.
  391 + * @return : extracted value. */
  392 +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
  393 +{
  394 + size_t const value = BIT_lookBits(bitD, nbBits);
  395 + BIT_skipBits(bitD, nbBits);
  396 + return value;
  397 +}
  398 +
  399 +/*! BIT_readBitsFast() :
  400 + * unsafe version; only works only if nbBits >= 1 */
  401 +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
  402 +{
  403 + size_t const value = BIT_lookBitsFast(bitD, nbBits);
  404 + assert(nbBits >= 1);
  405 + BIT_skipBits(bitD, nbBits);
  406 + return value;
  407 +}
  408 +
  409 +/*! BIT_reloadDStream() :
  410 + * Refill `bitD` from buffer previously set in BIT_initDStream() .
  411 + * This function is safe, it guarantees it will not read beyond src buffer.
  412 + * @return : status of `BIT_DStream_t` internal register.
  413 + * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
  414 +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
  415 +{
  416 + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
  417 + return BIT_DStream_overflow;
  418 +
  419 + if (bitD->ptr >= bitD->limitPtr) {
  420 + bitD->ptr -= bitD->bitsConsumed >> 3;
  421 + bitD->bitsConsumed &= 7;
  422 + bitD->bitContainer = MEM_readLEST(bitD->ptr);
  423 + return BIT_DStream_unfinished;
  424 + }
  425 + if (bitD->ptr == bitD->start) {
  426 + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
  427 + return BIT_DStream_completed;
  428 + }
  429 + /* start < ptr < limitPtr */
  430 + { U32 nbBytes = bitD->bitsConsumed >> 3;
  431 + BIT_DStream_status result = BIT_DStream_unfinished;
  432 + if (bitD->ptr - nbBytes < bitD->start) {
  433 + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
  434 + result = BIT_DStream_endOfBuffer;
  435 + }
  436 + bitD->ptr -= nbBytes;
  437 + bitD->bitsConsumed -= nbBytes*8;
  438 + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
  439 + return result;
  440 + }
  441 +}
  442 +
  443 +/*! BIT_endOfDStream() :
  444 + * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
  445 + */
  446 +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
  447 +{
  448 + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
  449 +}
  450 +
  451 +#if defined (__cplusplus)
  452 +}
  453 +#endif
  454 +
  455 +#endif /* BITSTREAM_H_MODULE */
  1 +/*
  2 + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  3 + * All rights reserved.
  4 + *
  5 + * This source code is licensed under both the BSD-style license (found in the
  6 + * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7 + * in the COPYING file in the root directory of this source tree).
  8 + * You may select, at your option, one of the above-listed licenses.
  9 + */
  10 +
  11 +#ifndef ZSTD_COMPILER_H
  12 +#define ZSTD_COMPILER_H
  13 +
  14 +/*-*******************************************************
  15 +* Compiler specifics
  16 +*********************************************************/
  17 +/* force inlining */
  18 +
  19 +#if !defined(ZSTD_NO_INLINE)
  20 +#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
  21 +# define INLINE_KEYWORD inline
  22 +#else
  23 +# define INLINE_KEYWORD
  24 +#endif
  25 +
  26 +#if defined(__GNUC__)
  27 +# define FORCE_INLINE_ATTR __attribute__((always_inline))
  28 +#elif defined(_MSC_VER)
  29 +# define FORCE_INLINE_ATTR __forceinline
  30 +#else
  31 +# define FORCE_INLINE_ATTR
  32 +#endif
  33 +
  34 +#else
  35 +
  36 +#define INLINE_KEYWORD
  37 +#define FORCE_INLINE_ATTR
  38 +
  39 +#endif
  40 +
  41 +/**
  42 + * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
  43 + * parameters. They must be inlined for the compiler to elimininate the constant
  44 + * branches.
  45 + */
  46 +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
  47 +/**
  48 + * HINT_INLINE is used to help the compiler generate better code. It is *not*
  49 + * used for "templates", so it can be tweaked based on the compilers
  50 + * performance.
  51 + *
  52 + * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
  53 + * always_inline attribute.
  54 + *
  55 + * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
  56 + * attribute.
  57 + */
  58 +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
  59 +# define HINT_INLINE static INLINE_KEYWORD
  60 +#else
  61 +# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
  62 +#endif
  63 +
  64 +/* force no inlining */
  65 +#ifdef _MSC_VER
  66 +# define FORCE_NOINLINE static __declspec(noinline)
  67 +#else
  68 +# ifdef __GNUC__
  69 +# define FORCE_NOINLINE static __attribute__((__noinline__))
  70 +# else
  71 +# define FORCE_NOINLINE static
  72 +# endif
  73 +#endif
  74 +
  75 +/* target attribute */
  76 +#ifndef __has_attribute
  77 + #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
  78 +#endif
  79 +#if defined(__GNUC__)
  80 +# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
  81 +#else
  82 +# define TARGET_ATTRIBUTE(target)
  83 +#endif
  84 +
  85 +/* Enable runtime BMI2 dispatch based on the CPU.
  86 + * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
  87 + */
  88 +#ifndef DYNAMIC_BMI2
  89 + #if ((defined(__clang__) && __has_attribute(__target__)) \
  90 + || (defined(__GNUC__) \
  91 + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
  92 + && (defined(__x86_64__) || defined(_M_X86)) \
  93 + && !defined(__BMI2__)
  94 + # define DYNAMIC_BMI2 1
  95 + #else
  96 + # define DYNAMIC_BMI2 0
  97 + #endif
  98 +#endif
  99 +
  100 +/* prefetch
  101 + * can be disabled, by declaring NO_PREFETCH build macro */
  102 +#if defined(NO_PREFETCH)
  103 +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
  104 +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
  105 +#else
  106 +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
  107 +# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
  108 +# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
  109 +# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
  110 +# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
  111 +# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
  112 +# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
  113 +# else
  114 +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
  115 +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
  116 +# endif
  117 +#endif /* NO_PREFETCH */
  118 +
  119 +#define CACHELINE_SIZE 64
  120 +
  121 +#define PREFETCH_AREA(p, s) { \
  122 + const char* const _ptr = (const char*)(p); \
  123 + size_t const _size = (size_t)(s); \
  124 + size_t _pos; \
  125 + for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
  126 + PREFETCH_L2(_ptr + _pos); \
  127 + } \
  128 +}
  129 +
  130 +/* disable warnings */
  131 +#ifdef _MSC_VER /* Visual Studio */
  132 +# include <intrin.h> /* For Visual 2005 */
  133 +# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */
  134 +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
  135 +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
  136 +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
  137 +# pragma warning(disable : 4324) /* disable: C4324: padded structure */
  138 +#endif
  139 +
  140 +#endif /* ZSTD_COMPILER_H */
  1 +/*
  2 + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  3 + * All rights reserved.
  4 + *
  5 + * This source code is licensed under both the BSD-style license (found in the
  6 + * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7 + * in the COPYING file in the root directory of this source tree).
  8 + * You may select, at your option, one of the above-listed licenses.
  9 + */
  10 +
  11 +/* *****************************************************************************
  12 + * Constructs a dictionary using a heuristic based on the following paper:
  13 + *
  14 + * Liao, Petri, Moffat, Wirth
  15 + * Effective Construction of Relative Lempel-Ziv Dictionaries
  16 + * Published in WWW 2016.
  17 + *
  18 + * Adapted from code originally written by @ot (Giuseppe Ottaviano).
  19 + ******************************************************************************/
  20 +
  21 +/*-*************************************
  22 +* Dependencies
  23 +***************************************/
  24 +#include <stdio.h> /* fprintf */
  25 +#include <stdlib.h> /* malloc, free, qsort */
  26 +#include <string.h> /* memset */
  27 +#include <time.h> /* clock */
  28 +
  29 +#include "mem.h" /* read */
  30 +#include "pool.h"
  31 +#include "threading.h"
  32 +#include "cover.h"
  33 +#include "zstd_internal.h" /* includes zstd.h */
  34 +#ifndef ZDICT_STATIC_LINKING_ONLY
  35 +#define ZDICT_STATIC_LINKING_ONLY
  36 +#endif
  37 +#include "zdict.h"
  38 +
  39 +/*-*************************************
  40 +* Constants
  41 +***************************************/
  42 +#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
  43 +#define DEFAULT_SPLITPOINT 1.0
  44 +
  45 +/*-*************************************
  46 +* Console display
  47 +***************************************/
  48 +static int g_displayLevel = 2;
  49 +#define DISPLAY(...) \
  50 + { \
  51 + fprintf(stderr, __VA_ARGS__); \
  52 + fflush(stderr); \
  53 + }
  54 +#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
  55 + if (displayLevel >= l) { \
  56 + DISPLAY(__VA_ARGS__); \
  57 + } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
  58 +#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
  59 +
  60 +#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
  61 + if (displayLevel >= l) { \
  62 + if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
  63 + g_time = clock(); \
  64 + DISPLAY(__VA_ARGS__); \
  65 + } \
  66 + }
  67 +#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
  68 +static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
  69 +static clock_t g_time = 0;
  70 +
  71 +/*-*************************************
  72 +* Hash table
  73 +***************************************
  74 +* A small specialized hash map for storing activeDmers.
  75 +* The map does not resize, so if it becomes full it will loop forever.
  76 +* Thus, the map must be large enough to store every value.
  77 +* The map implements linear probing and keeps its load less than 0.5.
  78 +*/
  79 +
  80 +#define MAP_EMPTY_VALUE ((U32)-1)
  81 +typedef struct COVER_map_pair_t_s {
  82 + U32 key;
  83 + U32 value;
  84 +} COVER_map_pair_t;
  85 +
  86 +typedef struct COVER_map_s {
  87 + COVER_map_pair_t *data;
  88 + U32 sizeLog;
  89 + U32 size;
  90 + U32 sizeMask;
  91 +} COVER_map_t;
  92 +
  93 +/**
  94 + * Clear the map.
  95 + */
  96 +static void COVER_map_clear(COVER_map_t *map) {
  97 + memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t));
  98 +}
  99 +
  100 +/**
  101 + * Initializes a map of the given size.
  102 + * Returns 1 on success and 0 on failure.
  103 + * The map must be destroyed with COVER_map_destroy().
  104 + * The map is only guaranteed to be large enough to hold size elements.
  105 + */
  106 +static int COVER_map_init(COVER_map_t *map, U32 size) {
  107 + map->sizeLog = ZSTD_highbit32(size) + 2;
  108 + map->size = (U32)1 << map->sizeLog;
  109 + map->sizeMask = map->size - 1;
  110 + map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t));
  111 + if (!map->data) {
  112 + map->sizeLog = 0;
  113 + map->size = 0;
  114 + return 0;
  115 + }
  116 + COVER_map_clear(map);
  117 + return 1;
  118 +}
  119 +
  120 +/**
  121 + * Internal hash function
  122 + */
  123 +static const U32 prime4bytes = 2654435761U;
  124 +static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
  125 + return (key * prime4bytes) >> (32 - map->sizeLog);
  126 +}
  127 +
  128 +/**
  129 + * Helper function that returns the index that a key should be placed into.
  130 + */
  131 +static U32 COVER_map_index(COVER_map_t *map, U32 key) {
  132 + const U32 hash = COVER_map_hash(map, key);
  133 + U32 i;
  134 + for (i = hash;; i = (i + 1) & map->sizeMask) {
  135 + COVER_map_pair_t *pos = &map->data[i];
  136 + if (pos->value == MAP_EMPTY_VALUE) {
  137 + return i;
  138 + }
  139 + if (pos->key == key) {
  140 + return i;
  141 + }
  142 + }
  143 +}
  144 +
  145 +/**
  146 + * Returns the pointer to the value for key.
  147 + * If key is not in the map, it is inserted and the value is set to 0.
  148 + * The map must not be full.
  149 + */
  150 +static U32 *COVER_map_at(COVER_map_t *map, U32 key) {
  151 + COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)];
  152 + if (pos->value == MAP_EMPTY_VALUE) {
  153 + pos->key = key;
  154 + pos->value = 0;
  155 + }
  156 + return &pos->value;
  157 +}
  158 +
  159 +/**
  160 + * Deletes key from the map if present.
  161 + */
  162 +static void COVER_map_remove(COVER_map_t *map, U32 key) {
  163 + U32 i = COVER_map_index(map, key);
  164 + COVER_map_pair_t *del = &map->data[i];
  165 + U32 shift = 1;
  166 + if (del->value == MAP_EMPTY_VALUE) {
  167 + return;
  168 + }
  169 + for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) {
  170 + COVER_map_pair_t *const pos = &map->data[i];
  171 + /* If the position is empty we are done */
  172 + if (pos->value == MAP_EMPTY_VALUE) {
  173 + del->value = MAP_EMPTY_VALUE;
  174 + return;
  175 + }
  176 + /* If pos can be moved to del do so */
  177 + if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) {
  178 + del->key = pos->key;
  179 + del->value = pos->value;
  180 + del = pos;
  181 + shift = 1;
  182 + } else {
  183 + ++shift;
  184 + }
  185 + }
  186 +}
  187 +
  188 +/**
  189 + * Destroys a map that is inited with COVER_map_init().
  190 + */
  191 +static void COVER_map_destroy(COVER_map_t *map) {
  192 + if (map->data) {
  193 + free(map->data);
  194 + }
  195 + map->data = NULL;
  196 + map->size = 0;
  197 +}
  198 +
  199 +/*-*************************************
  200 +* Context
  201 +***************************************/
  202 +
  203 +typedef struct {
  204 + const BYTE *samples;
  205 + size_t *offsets;
  206 + const size_t *samplesSizes;
  207 + size_t nbSamples;
  208 + size_t nbTrainSamples;
  209 + size_t nbTestSamples;
  210 + U32 *suffix;
  211 + size_t suffixSize;
  212 + U32 *freqs;
  213 + U32 *dmerAt;
  214 + unsigned d;
  215 +} COVER_ctx_t;
  216 +
  217 +/* We need a global context for qsort... */
  218 +static COVER_ctx_t *g_ctx = NULL;
  219 +
  220 +/*-*************************************
  221 +* Helper functions
  222 +***************************************/
  223 +
  224 +/**
  225 + * Returns the sum of the sample sizes.
  226 + */
  227 +size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
  228 + size_t sum = 0;
  229 + unsigned i;
  230 + for (i = 0; i < nbSamples; ++i) {
  231 + sum += samplesSizes[i];
  232 + }
  233 + return sum;
  234 +}
  235 +
  236 +/**
  237 + * Returns -1 if the dmer at lp is less than the dmer at rp.
  238 + * Return 0 if the dmers at lp and rp are equal.
  239 + * Returns 1 if the dmer at lp is greater than the dmer at rp.
  240 + */
  241 +static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
  242 + U32 const lhs = *(U32 const *)lp;
  243 + U32 const rhs = *(U32 const *)rp;
  244 + return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
  245 +}
  246 +/**
  247 + * Faster version for d <= 8.
  248 + */
  249 +static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
  250 + U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1);
  251 + U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask;
  252 + U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask;
  253 + if (lhs < rhs) {
  254 + return -1;
  255 + }
  256 + return (lhs > rhs);
  257 +}
  258 +
  259 +/**
  260 + * Same as COVER_cmp() except ties are broken by pointer value
  261 + * NOTE: g_ctx must be set to call this function. A global is required because
  262 + * qsort doesn't take an opaque pointer.
  263 + */
  264 +static int COVER_strict_cmp(const void *lp, const void *rp) {
  265 + int result = COVER_cmp(g_ctx, lp, rp);
  266 + if (result == 0) {
  267 + result = lp < rp ? -1 : 1;
  268 + }
  269 + return result;
  270 +}
  271 +/**
  272 + * Faster version for d <= 8.
  273 + */
  274 +static int COVER_strict_cmp8(const void *lp, const void *rp) {
  275 + int result = COVER_cmp8(g_ctx, lp, rp);
  276 + if (result == 0) {
  277 + result = lp < rp ? -1 : 1;
  278 + }
  279 + return result;
  280 +}
  281 +
  282 +/**
  283 + * Returns the first pointer in [first, last) whose element does not compare
  284 + * less than value. If no such element exists it returns last.
  285 + */
  286 +static const size_t *COVER_lower_bound(const size_t *first, const size_t *last,
  287 + size_t value) {
  288 + size_t count = last - first;
  289 + while (count != 0) {
  290 + size_t step = count / 2;
  291 + const size_t *ptr = first;
  292 + ptr += step;
  293 + if (*ptr < value) {
  294 + first = ++ptr;
  295 + count -= step + 1;
  296 + } else {
  297 + count = step;
  298 + }
  299 + }
  300 + return first;
  301 +}
  302 +
  303 +/**
  304 + * Generic groupBy function.
  305 + * Groups an array sorted by cmp into groups with equivalent values.
  306 + * Calls grp for each group.
  307 + */
  308 +static void
  309 +COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx,
  310 + int (*cmp)(COVER_ctx_t *, const void *, const void *),
  311 + void (*grp)(COVER_ctx_t *, const void *, const void *)) {
  312 + const BYTE *ptr = (const BYTE *)data;
  313 + size_t num = 0;
  314 + while (num < count) {
  315 + const BYTE *grpEnd = ptr + size;
  316 + ++num;
  317 + while (num < count && cmp(ctx, ptr, grpEnd) == 0) {
  318 + grpEnd += size;
  319 + ++num;
  320 + }
  321 + grp(ctx, ptr, grpEnd);
  322 + ptr = grpEnd;
  323 + }
  324 +}
  325 +
  326 +/*-*************************************
  327 +* Cover functions
  328 +***************************************/
  329 +
  330 +/**
  331 + * Called on each group of positions with the same dmer.
  332 + * Counts the frequency of each dmer and saves it in the suffix array.
  333 + * Fills `ctx->dmerAt`.
  334 + */
  335 +static void COVER_group(COVER_ctx_t *ctx, const void *group,
  336 + const void *groupEnd) {
  337 + /* The group consists of all the positions with the same first d bytes. */
  338 + const U32 *grpPtr = (const U32 *)group;
  339 + const U32 *grpEnd = (const U32 *)groupEnd;
  340 + /* The dmerId is how we will reference this dmer.
  341 + * This allows us to map the whole dmer space to a much smaller space, the
  342 + * size of the suffix array.
  343 + */
  344 + const U32 dmerId = (U32)(grpPtr - ctx->suffix);
  345 + /* Count the number of samples this dmer shows up in */
  346 + U32 freq = 0;
  347 + /* Details */
  348 + const size_t *curOffsetPtr = ctx->offsets;
  349 + const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples;
  350 + /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a
  351 + * different sample than the last.
  352 + */
  353 + size_t curSampleEnd = ctx->offsets[0];
  354 + for (; grpPtr != grpEnd; ++grpPtr) {
  355 + /* Save the dmerId for this position so we can get back to it. */
  356 + ctx->dmerAt[*grpPtr] = dmerId;
  357 + /* Dictionaries only help for the first reference to the dmer.
  358 + * After that zstd can reference the match from the previous reference.
  359 + * So only count each dmer once for each sample it is in.
  360 + */
  361 + if (*grpPtr < curSampleEnd) {
  362 + continue;
  363 + }
  364 + freq += 1;
  365 + /* Binary search to find the end of the sample *grpPtr is in.
  366 + * In the common case that grpPtr + 1 == grpEnd we can skip the binary
  367 + * search because the loop is over.
  368 + */
  369 + if (grpPtr + 1 != grpEnd) {
  370 + const size_t *sampleEndPtr =
  371 + COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr);
  372 + curSampleEnd = *sampleEndPtr;
  373 + curOffsetPtr = sampleEndPtr + 1;
  374 + }
  375 + }
  376 + /* At this point we are never going to look at this segment of the suffix
  377 + * array again. We take advantage of this fact to save memory.
  378 + * We store the frequency of the dmer in the first position of the group,
  379 + * which is dmerId.
  380 + */
  381 + ctx->suffix[dmerId] = freq;
  382 +}
  383 +
  384 +
  385 +/**
  386 + * Selects the best segment in an epoch.
  387 + * Segments of are scored according to the function:
  388 + *
  389 + * Let F(d) be the frequency of dmer d.
  390 + * Let S_i be the dmer at position i of segment S which has length k.
  391 + *
  392 + * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
  393 + *
  394 + * Once the dmer d is in the dictionay we set F(d) = 0.
  395 + */
  396 +static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
  397 + COVER_map_t *activeDmers, U32 begin,
  398 + U32 end,
  399 + ZDICT_cover_params_t parameters) {
  400 + /* Constants */
  401 + const U32 k = parameters.k;
  402 + const U32 d = parameters.d;
  403 + const U32 dmersInK = k - d + 1;
  404 + /* Try each segment (activeSegment) and save the best (bestSegment) */
  405 + COVER_segment_t bestSegment = {0, 0, 0};
  406 + COVER_segment_t activeSegment;
  407 + /* Reset the activeDmers in the segment */
  408 + COVER_map_clear(activeDmers);
  409 + /* The activeSegment starts at the beginning of the epoch. */
  410 + activeSegment.begin = begin;
  411 + activeSegment.end = begin;
  412 + activeSegment.score = 0;
  413 + /* Slide the activeSegment through the whole epoch.
  414 + * Save the best segment in bestSegment.
  415 + */
  416 + while (activeSegment.end < end) {
  417 + /* The dmerId for the dmer at the next position */
  418 + U32 newDmer = ctx->dmerAt[activeSegment.end];
  419 + /* The entry in activeDmers for this dmerId */
  420 + U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer);
  421 + /* If the dmer isn't already present in the segment add its score. */
  422 + if (*newDmerOcc == 0) {
  423 + /* The paper suggest using the L-0.5 norm, but experiments show that it
  424 + * doesn't help.
  425 + */
  426 + activeSegment.score += freqs[newDmer];
  427 + }
  428 + /* Add the dmer to the segment */
  429 + activeSegment.end += 1;
  430 + *newDmerOcc += 1;
  431 +
  432 + /* If the window is now too large, drop the first position */
  433 + if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
  434 + U32 delDmer = ctx->dmerAt[activeSegment.begin];
  435 + U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
  436 + activeSegment.begin += 1;
  437 + *delDmerOcc -= 1;
  438 + /* If this is the last occurence of the dmer, subtract its score */
  439 + if (*delDmerOcc == 0) {
  440 + COVER_map_remove(activeDmers, delDmer);
  441 + activeSegment.score -= freqs[delDmer];
  442 + }
  443 + }
  444 +
  445 + /* If this segment is the best so far save it */
  446 + if (activeSegment.score > bestSegment.score) {
  447 + bestSegment = activeSegment;
  448 + }
  449 + }
  450 + {
  451 + /* Trim off the zero frequency head and tail from the segment. */
  452 + U32 newBegin = bestSegment.end;
  453 + U32 newEnd = bestSegment.begin;
  454 + U32 pos;
  455 + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
  456 + U32 freq = freqs[ctx->dmerAt[pos]];
  457 + if (freq != 0) {
  458 + newBegin = MIN(newBegin, pos);
  459 + newEnd = pos + 1;
  460 + }
  461 + }
  462 + bestSegment.begin = newBegin;
  463 + bestSegment.end = newEnd;
  464 + }
  465 + {
  466 + /* Zero out the frequency of each dmer covered by the chosen segment. */
  467 + U32 pos;
  468 + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
  469 + freqs[ctx->dmerAt[pos]] = 0;
  470 + }
  471 + }
  472 + return bestSegment;
  473 +}
  474 +
  475 +/**
  476 + * Check the validity of the parameters.
  477 + * Returns non-zero if the parameters are valid and 0 otherwise.
  478 + */
  479 +static int COVER_checkParameters(ZDICT_cover_params_t parameters,
  480 + size_t maxDictSize) {
  481 + /* k and d are required parameters */
  482 + if (parameters.d == 0 || parameters.k == 0) {
  483 + return 0;
  484 + }
  485 + /* k <= maxDictSize */
  486 + if (parameters.k > maxDictSize) {
  487 + return 0;
  488 + }
  489 + /* d <= k */
  490 + if (parameters.d > parameters.k) {
  491 + return 0;
  492 + }
  493 + /* 0 < splitPoint <= 1 */
  494 + if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){
  495 + return 0;
  496 + }
  497 + return 1;
  498 +}
  499 +
  500 +/**
  501 + * Clean up a context initialized with `COVER_ctx_init()`.
  502 + */
  503 +static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
  504 + if (!ctx) {
  505 + return;
  506 + }
  507 + if (ctx->suffix) {
  508 + free(ctx->suffix);
  509 + ctx->suffix = NULL;
  510 + }
  511 + if (ctx->freqs) {
  512 + free(ctx->freqs);
  513 + ctx->freqs = NULL;
  514 + }
  515 + if (ctx->dmerAt) {
  516 + free(ctx->dmerAt);
  517 + ctx->dmerAt = NULL;
  518 + }
  519 + if (ctx->offsets) {
  520 + free(ctx->offsets);
  521 + ctx->offsets = NULL;
  522 + }
  523 +}
  524 +
  525 +/**
  526 + * Prepare a context for dictionary building.
  527 + * The context is only dependent on the parameter `d` and can used multiple
  528 + * times.
  529 + * Returns 1 on success or zero on error.
  530 + * The context must be destroyed with `COVER_ctx_destroy()`.
  531 + */
  532 +static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
  533 + const size_t *samplesSizes, unsigned nbSamples,
  534 + unsigned d, double splitPoint) {
  535 + const BYTE *const samples = (const BYTE *)samplesBuffer;
  536 + const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
  537 + /* Split samples into testing and training sets */
  538 + const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
  539 + const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
  540 + const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
  541 + const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
  542 + /* Checks */
  543 + if (totalSamplesSize < MAX(d, sizeof(U64)) ||
  544 + totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
  545 + DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
  546 + (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
  547 + return 0;
  548 + }
  549 + /* Check if there are at least 5 training samples */
  550 + if (nbTrainSamples < 5) {
  551 + DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
  552 + return 0;
  553 + }
  554 + /* Check if there's testing sample */
  555 + if (nbTestSamples < 1) {
  556 + DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
  557 + return 0;
  558 + }
  559 + /* Zero the context */
  560 + memset(ctx, 0, sizeof(*ctx));
  561 + DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
  562 + (unsigned)trainingSamplesSize);
  563 + DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
  564 + (unsigned)testSamplesSize);
  565 + ctx->samples = samples;
  566 + ctx->samplesSizes = samplesSizes;
  567 + ctx->nbSamples = nbSamples;
  568 + ctx->nbTrainSamples = nbTrainSamples;
  569 + ctx->nbTestSamples = nbTestSamples;
  570 + /* Partial suffix array */
  571 + ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1;
  572 + ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
  573 + /* Maps index to the dmerID */
  574 + ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
  575 + /* The offsets of each file */
  576 + ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
  577 + if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
  578 + DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
  579 + COVER_ctx_destroy(ctx);
  580 + return 0;
  581 + }
  582 + ctx->freqs = NULL;
  583 + ctx->d = d;
  584 +
  585 + /* Fill offsets from the samplesSizes */
  586 + {
  587 + U32 i;
  588 + ctx->offsets[0] = 0;
  589 + for (i = 1; i <= nbSamples; ++i) {
  590 + ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
  591 + }
  592 + }
  593 + DISPLAYLEVEL(2, "Constructing partial suffix array\n");
  594 + {
  595 + /* suffix is a partial suffix array.
  596 + * It only sorts suffixes by their first parameters.d bytes.
  597 + * The sort is stable, so each dmer group is sorted by position in input.
  598 + */
  599 + U32 i;
  600 + for (i = 0; i < ctx->suffixSize; ++i) {
  601 + ctx->suffix[i] = i;
  602 + }
  603 + /* qsort doesn't take an opaque pointer, so pass as a global.
  604 + * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
  605 + */
  606 + g_ctx = ctx;
  607 +#if defined(__OpenBSD__)
  608 + mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
  609 + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
  610 +#else
  611 + qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
  612 + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
  613 +#endif
  614 + }
  615 + DISPLAYLEVEL(2, "Computing frequencies\n");
  616 + /* For each dmer group (group of positions with the same first d bytes):
  617 + * 1. For each position we set dmerAt[position] = dmerID. The dmerID is
  618 + * (groupBeginPtr - suffix). This allows us to go from position to
  619 + * dmerID so we can look up values in freq.
  620 + * 2. We calculate how many samples the dmer occurs in and save it in
  621 + * freqs[dmerId].
  622 + */
  623 + COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
  624 + (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
  625 + ctx->freqs = ctx->suffix;
  626 + ctx->suffix = NULL;
  627 + return 1;
  628 +}
  629 +
  630 +/**
  631 + * Given the prepared context build the dictionary.
  632 + */
  633 +static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
  634 + COVER_map_t *activeDmers, void *dictBuffer,
  635 + size_t dictBufferCapacity,
  636 + ZDICT_cover_params_t parameters) {
  637 + BYTE *const dict = (BYTE *)dictBuffer;
  638 + size_t tail = dictBufferCapacity;
  639 + /* Divide the data up into epochs of equal size.
  640 + * We will select at least one segment from each epoch.
  641 + */
  642 + const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4));
  643 + const unsigned epochSize = (U32)(ctx->suffixSize / epochs);
  644 + size_t epoch;
  645 + DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
  646 + epochs, epochSize);
  647 + /* Loop through the epochs until there are no more segments or the dictionary
  648 + * is full.
  649 + */
  650 + for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
  651 + const U32 epochBegin = (U32)(epoch * epochSize);
  652 + const U32 epochEnd = epochBegin + epochSize;
  653 + size_t segmentSize;
  654 + /* Select a segment */
  655 + COVER_segment_t segment = COVER_selectSegment(
  656 + ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
  657 + /* If the segment covers no dmers, then we are out of content */
  658 + if (segment.score == 0) {
  659 + break;
  660 + }
  661 + /* Trim the segment if necessary and if it is too small then we are done */
  662 + segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
  663 + if (segmentSize < parameters.d) {
  664 + break;
  665 + }
  666 + /* We fill the dictionary from the back to allow the best segments to be
  667 + * referenced with the smallest offsets.
  668 + */
  669 + tail -= segmentSize;
  670 + memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
  671 + DISPLAYUPDATE(
  672 + 2, "\r%u%% ",
  673 + (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
  674 + }
  675 + DISPLAYLEVEL(2, "\r%79s\r", "");
  676 + return tail;
  677 +}
  678 +
  679 +ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
  680 + void *dictBuffer, size_t dictBufferCapacity,
  681 + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
  682 + ZDICT_cover_params_t parameters)
  683 +{
  684 + BYTE* const dict = (BYTE*)dictBuffer;
  685 + COVER_ctx_t ctx;
  686 + COVER_map_t activeDmers;
  687 + parameters.splitPoint = 1.0;
  688 + /* Initialize global data */
  689 + g_displayLevel = parameters.zParams.notificationLevel;
  690 + /* Checks */
  691 + if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
  692 + DISPLAYLEVEL(1, "Cover parameters incorrect\n");
  693 + return ERROR(GENERIC);
  694 + }
  695 + if (nbSamples == 0) {
  696 + DISPLAYLEVEL(1, "Cover must have at least one input file\n");
  697 + return ERROR(GENERIC);
  698 + }
  699 + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
  700 + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
  701 + ZDICT_DICTSIZE_MIN);
  702 + return ERROR(dstSize_tooSmall);
  703 + }
  704 + /* Initialize context and activeDmers */
  705 + if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
  706 + parameters.d, parameters.splitPoint)) {
  707 + return ERROR(GENERIC);
  708 + }
  709 + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
  710 + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
  711 + COVER_ctx_destroy(&ctx);
  712 + return ERROR(GENERIC);
  713 + }
  714 +
  715 + DISPLAYLEVEL(2, "Building dictionary\n");
  716 + {
  717 + const size_t tail =
  718 + COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
  719 + dictBufferCapacity, parameters);
  720 + const size_t dictionarySize = ZDICT_finalizeDictionary(
  721 + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
  722 + samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
  723 + if (!ZSTD_isError(dictionarySize)) {
  724 + DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
  725 + (unsigned)dictionarySize);
  726 + }
  727 + COVER_ctx_destroy(&ctx);
  728 + COVER_map_destroy(&activeDmers);
  729 + return dictionarySize;
  730 + }
  731 +}
  732 +
  733 +
  734 +
  735 +size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
  736 + const size_t *samplesSizes, const BYTE *samples,
  737 + size_t *offsets,
  738 + size_t nbTrainSamples, size_t nbSamples,
  739 + BYTE *const dict, size_t dictBufferCapacity) {
  740 + size_t totalCompressedSize = ERROR(GENERIC);
  741 + /* Pointers */
  742 + ZSTD_CCtx *cctx;
  743 + ZSTD_CDict *cdict;
  744 + void *dst;
  745 + /* Local variables */
  746 + size_t dstCapacity;
  747 + size_t i;
  748 + /* Allocate dst with enough space to compress the maximum sized sample */
  749 + {
  750 + size_t maxSampleSize = 0;
  751 + i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
  752 + for (; i < nbSamples; ++i) {
  753 + maxSampleSize = MAX(samplesSizes[i], maxSampleSize);
  754 + }
  755 + dstCapacity = ZSTD_compressBound(maxSampleSize);
  756 + dst = malloc(dstCapacity);
  757 + }
  758 + /* Create the cctx and cdict */
  759 + cctx = ZSTD_createCCtx();
  760 + cdict = ZSTD_createCDict(dict, dictBufferCapacity,
  761 + parameters.zParams.compressionLevel);
  762 + if (!dst || !cctx || !cdict) {
  763 + goto _compressCleanup;
  764 + }
  765 + /* Compress each sample and sum their sizes (or error) */
  766 + totalCompressedSize = dictBufferCapacity;
  767 + i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
  768 + for (; i < nbSamples; ++i) {
  769 + const size_t size = ZSTD_compress_usingCDict(
  770 + cctx, dst, dstCapacity, samples + offsets[i],
  771 + samplesSizes[i], cdict);
  772 + if (ZSTD_isError(size)) {
  773 + totalCompressedSize = ERROR(GENERIC);
  774 + goto _compressCleanup;
  775 + }
  776 + totalCompressedSize += size;
  777 + }
  778 +_compressCleanup:
  779 + ZSTD_freeCCtx(cctx);
  780 + ZSTD_freeCDict(cdict);
  781 + if (dst) {
  782 + free(dst);
  783 + }
  784 + return totalCompressedSize;
  785 +}
  786 +
  787 +
  788 +/**
  789 + * Initialize the `COVER_best_t`.
  790 + */
  791 +void COVER_best_init(COVER_best_t *best) {
  792 + if (best==NULL) return; /* compatible with init on NULL */
  793 + (void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
  794 + (void)ZSTD_pthread_cond_init(&best->cond, NULL);
  795 + best->liveJobs = 0;
  796 + best->dict = NULL;
  797 + best->dictSize = 0;
  798 + best->compressedSize = (size_t)-1;
  799 + memset(&best->parameters, 0, sizeof(best->parameters));
  800 +}
  801 +
  802 +/**
  803 + * Wait until liveJobs == 0.
  804 + */
  805 +void COVER_best_wait(COVER_best_t *best) {
  806 + if (!best) {
  807 + return;
  808 + }
  809 + ZSTD_pthread_mutex_lock(&best->mutex);
  810 + while (best->liveJobs != 0) {
  811 + ZSTD_pthread_cond_wait(&best->cond, &best->mutex);
  812 + }
  813 + ZSTD_pthread_mutex_unlock(&best->mutex);
  814 +}
  815 +
  816 +/**
  817 + * Call COVER_best_wait() and then destroy the COVER_best_t.
  818 + */
  819 +void COVER_best_destroy(COVER_best_t *best) {
  820 + if (!best) {
  821 + return;
  822 + }
  823 + COVER_best_wait(best);
  824 + if (best->dict) {
  825 + free(best->dict);
  826 + }
  827 + ZSTD_pthread_mutex_destroy(&best->mutex);
  828 + ZSTD_pthread_cond_destroy(&best->cond);
  829 +}
  830 +
  831 +/**
  832 + * Called when a thread is about to be launched.
  833 + * Increments liveJobs.
  834 + */
  835 +void COVER_best_start(COVER_best_t *best) {
  836 + if (!best) {
  837 + return;
  838 + }
  839 + ZSTD_pthread_mutex_lock(&best->mutex);
  840 + ++best->liveJobs;
  841 + ZSTD_pthread_mutex_unlock(&best->mutex);
  842 +}
  843 +
  844 +/**
  845 + * Called when a thread finishes executing, both on error or success.
  846 + * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
  847 + * If this dictionary is the best so far save it and its parameters.
  848 + */
  849 +void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
  850 + ZDICT_cover_params_t parameters, void *dict,
  851 + size_t dictSize) {
  852 + if (!best) {
  853 + return;
  854 + }
  855 + {
  856 + size_t liveJobs;
  857 + ZSTD_pthread_mutex_lock(&best->mutex);
  858 + --best->liveJobs;
  859 + liveJobs = best->liveJobs;
  860 + /* If the new dictionary is better */
  861 + if (compressedSize < best->compressedSize) {
  862 + /* Allocate space if necessary */
  863 + if (!best->dict || best->dictSize < dictSize) {
  864 + if (best->dict) {
  865 + free(best->dict);
  866 + }
  867 + best->dict = malloc(dictSize);
  868 + if (!best->dict) {
  869 + best->compressedSize = ERROR(GENERIC);
  870 + best->dictSize = 0;
  871 + ZSTD_pthread_cond_signal(&best->cond);
  872 + ZSTD_pthread_mutex_unlock(&best->mutex);
  873 + return;
  874 + }
  875 + }
  876 + /* Save the dictionary, parameters, and size */
  877 + memcpy(best->dict, dict, dictSize);
  878 + best->dictSize = dictSize;
  879 + best->parameters = parameters;
  880 + best->compressedSize = compressedSize;
  881 + }
  882 + if (liveJobs == 0) {
  883 + ZSTD_pthread_cond_broadcast(&best->cond);
  884 + }
  885 + ZSTD_pthread_mutex_unlock(&best->mutex);
  886 + }
  887 +}
  888 +
  889 +/**
  890 + * Parameters for COVER_tryParameters().
  891 + */
  892 +typedef struct COVER_tryParameters_data_s {
  893 + const COVER_ctx_t *ctx;
  894 + COVER_best_t *best;
  895 + size_t dictBufferCapacity;
  896 + ZDICT_cover_params_t parameters;
  897 +} COVER_tryParameters_data_t;
  898 +
  899 +/**
  900 + * Tries a set of parameters and updates the COVER_best_t with the results.
  901 + * This function is thread safe if zstd is compiled with multithreaded support.
  902 + * It takes its parameters as an *OWNING* opaque pointer to support threading.
  903 + */
  904 +static void COVER_tryParameters(void *opaque) {
  905 + /* Save parameters as local variables */
  906 + COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
  907 + const COVER_ctx_t *const ctx = data->ctx;
  908 + const ZDICT_cover_params_t parameters = data->parameters;
  909 + size_t dictBufferCapacity = data->dictBufferCapacity;
  910 + size_t totalCompressedSize = ERROR(GENERIC);
  911 + /* Allocate space for hash table, dict, and freqs */
  912 + COVER_map_t activeDmers;
  913 + BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
  914 + U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
  915 + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
  916 + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
  917 + goto _cleanup;
  918 + }
  919 + if (!dict || !freqs) {
  920 + DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
  921 + goto _cleanup;
  922 + }
  923 + /* Copy the frequencies because we need to modify them */
  924 + memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32));
  925 + /* Build the dictionary */
  926 + {
  927 + const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
  928 + dictBufferCapacity, parameters);
  929 + dictBufferCapacity = ZDICT_finalizeDictionary(
  930 + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
  931 + ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
  932 + parameters.zParams);
  933 + if (ZDICT_isError(dictBufferCapacity)) {
  934 + DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
  935 + goto _cleanup;
  936 + }
  937 + }
  938 + /* Check total compressed size */
  939 + totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
  940 + ctx->samples, ctx->offsets,
  941 + ctx->nbTrainSamples, ctx->nbSamples,
  942 + dict, dictBufferCapacity);
  943 +
  944 +_cleanup:
  945 + COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
  946 + dictBufferCapacity);
  947 + free(data);
  948 + COVER_map_destroy(&activeDmers);
  949 + if (dict) {
  950 + free(dict);
  951 + }
  952 + if (freqs) {
  953 + free(freqs);
  954 + }
  955 +}
  956 +
  957 +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
  958 + void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
  959 + const size_t *samplesSizes, unsigned nbSamples,
  960 + ZDICT_cover_params_t *parameters) {
  961 + /* constants */
  962 + const unsigned nbThreads = parameters->nbThreads;
  963 + const double splitPoint =
  964 + parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
  965 + const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
  966 + const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
  967 + const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
  968 + const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
  969 + const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
  970 + const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
  971 + const unsigned kIterations =
  972 + (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
  973 + /* Local variables */
  974 + const int displayLevel = parameters->zParams.notificationLevel;
  975 + unsigned iteration = 1;
  976 + unsigned d;
  977 + unsigned k;
  978 + COVER_best_t best;
  979 + POOL_ctx *pool = NULL;
  980 +
  981 + /* Checks */
  982 + if (splitPoint <= 0 || splitPoint > 1) {
  983 + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
  984 + return ERROR(GENERIC);
  985 + }
  986 + if (kMinK < kMaxD || kMaxK < kMinK) {
  987 + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
  988 + return ERROR(GENERIC);
  989 + }
  990 + if (nbSamples == 0) {
  991 + DISPLAYLEVEL(1, "Cover must have at least one input file\n");
  992 + return ERROR(GENERIC);
  993 + }
  994 + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
  995 + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
  996 + ZDICT_DICTSIZE_MIN);
  997 + return ERROR(dstSize_tooSmall);
  998 + }
  999 + if (nbThreads > 1) {
  1000 + pool = POOL_create(nbThreads, 1);
  1001 + if (!pool) {
  1002 + return ERROR(memory_allocation);
  1003 + }
  1004 + }
  1005 + /* Initialization */
  1006 + COVER_best_init(&best);
  1007 + /* Turn down global display level to clean up display at level 2 and below */
  1008 + g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
  1009 + /* Loop through d first because each new value needs a new context */
  1010 + LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
  1011 + kIterations);
  1012 + for (d = kMinD; d <= kMaxD; d += 2) {
  1013 + /* Initialize the context for this value of d */
  1014 + COVER_ctx_t ctx;
  1015 + LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
  1016 + if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) {
  1017 + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
  1018 + COVER_best_destroy(&best);
  1019 + POOL_free(pool);
  1020 + return ERROR(GENERIC);
  1021 + }
  1022 + /* Loop through k reusing the same context */
  1023 + for (k = kMinK; k <= kMaxK; k += kStepSize) {
  1024 + /* Prepare the arguments */
  1025 + COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc(
  1026 + sizeof(COVER_tryParameters_data_t));
  1027 + LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
  1028 + if (!data) {
  1029 + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
  1030 + COVER_best_destroy(&best);
  1031 + COVER_ctx_destroy(&ctx);
  1032 + POOL_free(pool);
  1033 + return ERROR(GENERIC);
  1034 + }
  1035 + data->ctx = &ctx;
  1036 + data->best = &best;
  1037 + data->dictBufferCapacity = dictBufferCapacity;
  1038 + data->parameters = *parameters;
  1039 + data->parameters.k = k;
  1040 + data->parameters.d = d;
  1041 + data->parameters.splitPoint = splitPoint;
  1042 + data->parameters.steps = kSteps;
  1043 + data->parameters.zParams.notificationLevel = g_displayLevel;
  1044 + /* Check the parameters */
  1045 + if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
  1046 + DISPLAYLEVEL(1, "Cover parameters incorrect\n");
  1047 + free(data);
  1048 + continue;
  1049 + }
  1050 + /* Call the function and pass ownership of data to it */
  1051 + COVER_best_start(&best);
  1052 + if (pool) {
  1053 + POOL_add(pool, &COVER_tryParameters, data);
  1054 + } else {
  1055 + COVER_tryParameters(data);
  1056 + }
  1057 + /* Print status */
  1058 + LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
  1059 + (unsigned)((iteration * 100) / kIterations));
  1060 + ++iteration;
  1061 + }
  1062 + COVER_best_wait(&best);
  1063 + COVER_ctx_destroy(&ctx);
  1064 + }
  1065 + LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
  1066 + /* Fill the output buffer and parameters with output of the best parameters */
  1067 + {
  1068 + const size_t dictSize = best.dictSize;
  1069 + if (ZSTD_isError(best.compressedSize)) {
  1070 + const size_t compressedSize = best.compressedSize;
  1071 + COVER_best_destroy(&best);
  1072 + POOL_free(pool);
  1073 + return compressedSize;
  1074 + }
  1075 + *parameters = best.parameters;
  1076 + memcpy(dictBuffer, best.dict, dictSize);
  1077 + COVER_best_destroy(&best);
  1078 + POOL_free(pool);
  1079 + return dictSize;
  1080 + }
  1081 +}
  1 +#include <stdio.h> /* fprintf */
  2 +#include <stdlib.h> /* malloc, free, qsort */
  3 +#include <string.h> /* memset */
  4 +#include <time.h> /* clock */
  5 +#include "mem.h" /* read */
  6 +#include "pool.h"
  7 +#include "threading.h"
  8 +#include "zstd_internal.h" /* includes zstd.h */
  9 +#ifndef ZDICT_STATIC_LINKING_ONLY
  10 +#define ZDICT_STATIC_LINKING_ONLY
  11 +#endif
  12 +#include "zdict.h"
  13 +
  14 +/**
  15 + * COVER_best_t is used for two purposes:
  16 + * 1. Synchronizing threads.
  17 + * 2. Saving the best parameters and dictionary.
  18 + *
  19 + * All of the methods except COVER_best_init() are thread safe if zstd is
  20 + * compiled with multithreaded support.
  21 + */
  22 +typedef struct COVER_best_s {
  23 + ZSTD_pthread_mutex_t mutex;
  24 + ZSTD_pthread_cond_t cond;
  25 + size_t liveJobs;
  26 + void *dict;
  27 + size_t dictSize;
  28 + ZDICT_cover_params_t parameters;
  29 + size_t compressedSize;
  30 +} COVER_best_t;
  31 +
  32 +/**
  33 + * A segment is a range in the source as well as the score of the segment.
  34 + */
  35 +typedef struct {
  36 + U32 begin;
  37 + U32 end;
  38 + U32 score;
  39 +} COVER_segment_t;
  40 +
  41 +/**
  42 + * Checks total compressed size of a dictionary
  43 + */
  44 +size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
  45 + const size_t *samplesSizes, const BYTE *samples,
  46 + size_t *offsets,
  47 + size_t nbTrainSamples, size_t nbSamples,
  48 + BYTE *const dict, size_t dictBufferCapacity);
  49 +
  50 +/**
  51 + * Returns the sum of the sample sizes.
  52 + */
  53 +size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ;
  54 +
  55 +/**
  56 + * Initialize the `COVER_best_t`.
  57 + */
  58 +void COVER_best_init(COVER_best_t *best);
  59 +
  60 +/**
  61 + * Wait until liveJobs == 0.
  62 + */
  63 +void COVER_best_wait(COVER_best_t *best);
  64 +
  65 +/**
  66 + * Call COVER_best_wait() and then destroy the COVER_best_t.
  67 + */
  68 +void COVER_best_destroy(COVER_best_t *best);
  69 +
  70 +/**
  71 + * Called when a thread is about to be launched.
  72 + * Increments liveJobs.
  73 + */
  74 +void COVER_best_start(COVER_best_t *best);
  75 +
  76 +/**
  77 + * Called when a thread finishes executing, both on error or success.
  78 + * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
  79 + * If this dictionary is the best so far save it and its parameters.
  80 + */
  81 +void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
  82 + ZDICT_cover_params_t parameters, void *dict,
  83 + size_t dictSize);
  1 +/*
  2 + * Copyright (c) 2018-present, Facebook, Inc.
  3 + * All rights reserved.
  4 + *
  5 + * This source code is licensed under both the BSD-style license (found in the
  6 + * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7 + * in the COPYING file in the root directory of this source tree).
  8 + * You may select, at your option, one of the above-listed licenses.
  9 + */
  10 +
  11 +#ifndef ZSTD_COMMON_CPU_H
  12 +#define ZSTD_COMMON_CPU_H
  13 +
  14 +/**
  15 + * Implementation taken from folly/CpuId.h
  16 + * https://github.com/facebook/folly/blob/master/folly/CpuId.h
  17 + */
  18 +
  19 +#include <string.h>
  20 +
  21 +#include "mem.h"
  22 +
  23 +#ifdef _MSC_VER
  24 +#include <intrin.h>
  25 +#endif
  26 +
  27 +typedef struct {
  28 + U32 f1c;
  29 + U32 f1d;
  30 + U32 f7b;
  31 + U32 f7c;
  32 +} ZSTD_cpuid_t;
  33 +
  34 +MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
  35 + U32 f1c = 0;
  36 + U32 f1d = 0;
  37 + U32 f7b = 0;
  38 + U32 f7c = 0;
  39 +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
  40 + int reg[4];
  41 + __cpuid((int*)reg, 0);
  42 + {
  43 + int const n = reg[0];
  44 + if (n >= 1) {
  45 + __cpuid((int*)reg, 1);
  46 + f1c = (U32)reg[2];
  47 + f1d = (U32)reg[3];
  48 + }
  49 + if (n >= 7) {
  50 + __cpuidex((int*)reg, 7, 0);
  51 + f7b = (U32)reg[1];
  52 + f7c = (U32)reg[2];
  53 + }
  54 + }
  55 +#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
  56 + /* The following block like the normal cpuid branch below, but gcc
  57 + * reserves ebx for use of its pic register so we must specially
  58 + * handle the save and restore to avoid clobbering the register
  59 + */
  60 + U32 n;
  61 + __asm__(
  62 + "pushl %%ebx\n\t"
  63 + "cpuid\n\t"
  64 + "popl %%ebx\n\t"
  65 + : "=a"(n)
  66 + : "a"(0)
  67 + : "ecx", "edx");
  68 + if (n >= 1) {
  69 + U32 f1a;
  70 + __asm__(
  71 + "pushl %%ebx\n\t"
  72 + "cpuid\n\t"
  73 + "popl %%ebx\n\t"
  74 + : "=a"(f1a), "=c"(f1c), "=d"(f1d)
  75 + : "a"(1));
  76 + }
  77 + if (n >= 7) {
  78 + __asm__(
  79 + "pushl %%ebx\n\t"
  80 + "cpuid\n\t"
  81 + "movl %%ebx, %%eax\n\t"
  82 + "popl %%ebx"
  83 + : "=a"(f7b), "=c"(f7c)
  84 + : "a"(7), "c"(0)
  85 + : "edx");
  86 + }
  87 +#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
  88 + U32 n;
  89 + __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
  90 + if (n >= 1) {
  91 + U32 f1a;
  92 + __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
  93 + }
  94 + if (n >= 7) {
  95 + U32 f7a;
  96 + __asm__("cpuid"
  97 + : "=a"(f7a), "=b"(f7b), "=c"(f7c)
  98 + : "a"(7), "c"(0)
  99 + : "edx");
  100 + }
  101 +#endif
  102 + {
  103 + ZSTD_cpuid_t cpuid;
  104 + cpuid.f1c = f1c;
  105 + cpuid.f1d = f1d;
  106 + cpuid.f7b = f7b;
  107 + cpuid.f7c = f7c;
  108 + return cpuid;
  109 + }
  110 +}
  111 +
  112 +#define X(name, r, bit) \
  113 + MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \
  114 + return ((cpuid.r) & (1U << bit)) != 0; \
  115 + }
  116 +
  117 +/* cpuid(1): Processor Info and Feature Bits. */
  118 +#define C(name, bit) X(name, f1c, bit)
  119 + C(sse3, 0)
  120 + C(pclmuldq, 1)
  121 + C(dtes64, 2)
  122 + C(monitor, 3)
  123 + C(dscpl, 4)
  124 + C(vmx, 5)
  125 + C(smx, 6)
  126 + C(eist, 7)
  127 + C(tm2, 8)
  128 + C(ssse3, 9)
  129 + C(cnxtid, 10)
  130 + C(fma, 12)
  131 + C(cx16, 13)
  132 + C(xtpr, 14)
  133 + C(pdcm, 15)
  134 + C(pcid, 17)
  135 + C(dca, 18)
  136 + C(sse41, 19)
  137 + C(sse42, 20)
  138 + C(x2apic, 21)
  139 + C(movbe, 22)
  140 + C(popcnt, 23)
  141 + C(tscdeadline, 24)
  142 + C(aes, 25)
  143 + C(xsave, 26)
  144 + C(osxsave, 27)
  145 + C(avx, 28)
  146 + C(f16c, 29)
  147 + C(rdrand, 30)
  148 +#undef C
  149 +#define D(name, bit) X(name, f1d, bit)
  150 + D(fpu, 0)
  151 + D(vme, 1)
  152 + D(de, 2)
  153 + D(pse, 3)
  154 + D(tsc, 4)
  155 + D(msr, 5)
  156 + D(pae, 6)
  157 + D(mce, 7)
  158 + D(cx8, 8)
  159 + D(apic, 9)
  160 + D(sep, 11)
  161 + D(mtrr, 12)
  162 + D(pge, 13)
  163 + D(mca, 14)
  164 + D(cmov, 15)
  165 + D(pat, 16)
  166 + D(pse36, 17)
  167 + D(psn, 18)
  168 + D(clfsh, 19)
  169 + D(ds, 21)
  170 + D(acpi, 22)
  171 + D(mmx, 23)
  172 + D(fxsr, 24)
  173 + D(sse, 25)
  174 + D(sse2, 26)
  175 + D(ss, 27)
  176 + D(htt, 28)
  177 + D(tm, 29)
  178 + D(pbe, 31)
  179 +#undef D
  180 +
  181 +/* cpuid(7): Extended Features. */
  182 +#define B(name, bit) X(name, f7b, bit)
  183 + B(bmi1, 3)
  184 + B(hle, 4)
  185 + B(avx2, 5)
  186 + B(smep, 7)
  187 + B(bmi2, 8)
  188 + B(erms, 9)
  189 + B(invpcid, 10)
  190 + B(rtm, 11)
  191 + B(mpx, 14)
  192 + B(avx512f, 16)
  193 + B(avx512dq, 17)
  194 + B(rdseed, 18)
  195 + B(adx, 19)
  196 + B(smap, 20)
  197 + B(avx512ifma, 21)
  198 + B(pcommit, 22)
  199 + B(clflushopt, 23)
  200 + B(clwb, 24)
  201 + B(avx512pf, 26)
  202 + B(avx512er, 27)
  203 + B(avx512cd, 28)
  204 + B(sha, 29)
  205 + B(avx512bw, 30)
  206 + B(avx512vl, 31)
  207 +#undef B
  208 +#define C(name, bit) X(name, f7c, bit)
  209 + C(prefetchwt1, 0)
  210 + C(avx512vbmi, 1)
  211 +#undef C
  212 +
  213 +#undef X
  214 +
  215 +#endif /* ZSTD_COMMON_CPU_H */
  1 +/* ******************************************************************
  2 + debug
  3 + Part of FSE library
  4 + Copyright (C) 2013-present, Yann Collet.
  5 +
  6 + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  7 +
  8 + Redistribution and use in source and binary forms, with or without
  9 + modification, are permitted provided that the following conditions are
  10 + met:
  11 +
  12 + * Redistributions of source code must retain the above copyright
  13 + notice, this list of conditions and the following disclaimer.
  14 + * Redistributions in binary form must reproduce the above
  15 + copyright notice, this list of conditions and the following disclaimer
  16 + in the documentation and/or other materials provided with the
  17 + distribution.
  18 +
  19 + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20 + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21 + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22 + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23 + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24 + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25 + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26 + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27 + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28 + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29 + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30 +
  31 + You can contact the author at :
  32 + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
  33 +****************************************************************** */
  34 +
  35 +
  36 +/*
  37 + * This module only hosts one global variable
  38 + * which can be used to dynamically influence the verbosity of traces,
  39 + * such as DEBUGLOG and RAWLOG
  40 + */
  41 +
  42 +#include "debug.h"
  43 +
  44 +int g_debuglevel = DEBUGLEVEL;
  1 +/* ******************************************************************
  2 + debug
  3 + Part of FSE library
  4 + Copyright (C) 2013-present, Yann Collet.
  5 +
  6 + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  7 +
  8 + Redistribution and use in source and binary forms, with or without
  9 + modification, are permitted provided that the following conditions are
  10 + met:
  11 +
  12 + * Redistributions of source code must retain the above copyright
  13 + notice, this list of conditions and the following disclaimer.
  14 + * Redistributions in binary form must reproduce the above
  15 + copyright notice, this list of conditions and the following disclaimer
  16 + in the documentation and/or other materials provided with the
  17 + distribution.
  18 +
  19 + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20 + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21 + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22 + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23 + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24 + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25 + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26 + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27 + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28 + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29 + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30 +
  31 + You can contact the author at :
  32 + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
  33 +****************************************************************** */
  34 +
  35 +
  36 +/*
  37 + * The purpose of this header is to enable debug functions.
  38 + * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time,
  39 + * and DEBUG_STATIC_ASSERT() for compile-time.
  40 + *
  41 + * By default, DEBUGLEVEL==0, which means run-time debug is disabled.
  42 + *
  43 + * Level 1 enables assert() only.
  44 + * Starting level 2, traces can be generated and pushed to stderr.
  45 + * The higher the level, the more verbose the traces.
  46 + *
  47 + * It's possible to dynamically adjust level using variable g_debug_level,
  48 + * which is only declared if DEBUGLEVEL>=2,
  49 + * and is a global variable, not multi-thread protected (use with care)
  50 + */
  51 +
  52 +#ifndef DEBUG_H_12987983217
  53 +#define DEBUG_H_12987983217
  54 +
  55 +#if defined (__cplusplus)
  56 +extern "C" {
  57 +#endif
  58 +
  59 +
  60 +/* static assert is triggered at compile time, leaving no runtime artefact.
  61 + * static assert only works with compile-time constants.
  62 + * Also, this variant can only be used inside a function. */
  63 +#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
  64 +
  65 +
  66 +/* DEBUGLEVEL is expected to be defined externally,
  67 + * typically through compiler command line.
  68 + * Value must be a number. */
  69 +#ifndef DEBUGLEVEL
  70 +# define DEBUGLEVEL 0
  71 +#endif
  72 +
  73 +
  74 +/* DEBUGFILE can be defined externally,
  75 + * typically through compiler command line.
  76 + * note : currently useless.
  77 + * Value must be stderr or stdout */
  78 +#ifndef DEBUGFILE
  79 +# define DEBUGFILE stderr
  80 +#endif
  81 +
  82 +
  83 +/* recommended values for DEBUGLEVEL :
  84 + * 0 : release mode, no debug, all run-time checks disabled
  85 + * 1 : enables assert() only, no display
  86 + * 2 : reserved, for currently active debug path
  87 + * 3 : events once per object lifetime (CCtx, CDict, etc.)
  88 + * 4 : events once per frame
  89 + * 5 : events once per block
  90 + * 6 : events once per sequence (verbose)
  91 + * 7+: events at every position (*very* verbose)
  92 + *
  93 + * It's generally inconvenient to output traces > 5.
  94 + * In which case, it's possible to selectively trigger high verbosity levels
  95 + * by modifying g_debug_level.
  96 + */
  97 +
  98 +#if (DEBUGLEVEL>=1)
  99 +# include <assert.h>
  100 +#else
  101 +# ifndef assert /* assert may be already defined, due to prior #include <assert.h> */
  102 +# define assert(condition) ((void)0) /* disable assert (default) */
  103 +# endif
  104 +#endif
  105 +
  106 +#if (DEBUGLEVEL>=2)
  107 +# include <stdio.h>
  108 +extern int g_debuglevel; /* the variable is only declared,
  109 + it actually lives in debug.c,
  110 + and is shared by the whole process.
  111 + It's not thread-safe.
  112 + It's useful when enabling very verbose levels
  113 + on selective conditions (such as position in src) */
  114 +
  115 +# define RAWLOG(l, ...) { \
  116 + if (l<=g_debuglevel) { \
  117 + fprintf(stderr, __VA_ARGS__); \
  118 + } }
  119 +# define DEBUGLOG(l, ...) { \
  120 + if (l<=g_debuglevel) { \
  121 + fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
  122 + fprintf(stderr, " \n"); \
  123 + } }
  124 +#else
  125 +# define RAWLOG(l, ...) {} /* disabled */
  126 +# define DEBUGLOG(l, ...) {} /* disabled */
  127 +#endif
  128 +
  129 +
  130 +#if defined (__cplusplus)
  131 +}
  132 +#endif
  133 +
  134 +#endif /* DEBUG_H_12987983217 */
  1 +/*
  2 + * divsufsort.c for libdivsufsort-lite
  3 + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
  4 + *
  5 + * Permission is hereby granted, free of charge, to any person
  6 + * obtaining a copy of this software and associated documentation
  7 + * files (the "Software"), to deal in the Software without
  8 + * restriction, including without limitation the rights to use,
  9 + * copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 + * copies of the Software, and to permit persons to whom the
  11 + * Software is furnished to do so, subject to the following
  12 + * conditions:
  13 + *
  14 + * The above copyright notice and this permission notice shall be
  15 + * included in all copies or substantial portions of the Software.
  16 + *
  17 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  19 + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  20 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  21 + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  22 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24 + * OTHER DEALINGS IN THE SOFTWARE.
  25 + */
  26 +
  27 +/*- Compiler specifics -*/
  28 +#ifdef __clang__
  29 +#pragma clang diagnostic ignored "-Wshorten-64-to-32"
  30 +#endif
  31 +
  32 +#if defined(_MSC_VER)
  33 +# pragma warning(disable : 4244)
  34 +# pragma warning(disable : 4127) /* C4127 : Condition expression is constant */
  35 +#endif
  36 +
  37 +
  38 +/*- Dependencies -*/
  39 +#include <assert.h>
  40 +#include <stdio.h>
  41 +#include <stdlib.h>
  42 +
  43 +#include "divsufsort.h"
  44 +
  45 +/*- Constants -*/
  46 +#if defined(INLINE)
  47 +# undef INLINE
  48 +#endif
  49 +#if !defined(INLINE)
  50 +# define INLINE __inline
  51 +#endif
  52 +#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
  53 +# undef ALPHABET_SIZE
  54 +#endif
  55 +#if !defined(ALPHABET_SIZE)
  56 +# define ALPHABET_SIZE (256)
  57 +#endif
  58 +#define BUCKET_A_SIZE (ALPHABET_SIZE)
  59 +#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
  60 +#if defined(SS_INSERTIONSORT_THRESHOLD)
  61 +# if SS_INSERTIONSORT_THRESHOLD < 1
  62 +# undef SS_INSERTIONSORT_THRESHOLD
  63 +# define SS_INSERTIONSORT_THRESHOLD (1)
  64 +# endif
  65 +#else
  66 +# define SS_INSERTIONSORT_THRESHOLD (8)
  67 +#endif
  68 +#if defined(SS_BLOCKSIZE)
  69 +# if SS_BLOCKSIZE < 0
  70 +# undef SS_BLOCKSIZE
  71 +# define SS_BLOCKSIZE (0)
  72 +# elif 32768 <= SS_BLOCKSIZE
  73 +# undef SS_BLOCKSIZE
  74 +# define SS_BLOCKSIZE (32767)
  75 +# endif
  76 +#else
  77 +# define SS_BLOCKSIZE (1024)
  78 +#endif
  79 +/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
  80 +#if SS_BLOCKSIZE == 0
  81 +# define SS_MISORT_STACKSIZE (96)
  82 +#elif SS_BLOCKSIZE <= 4096
  83 +# define SS_MISORT_STACKSIZE (16)
  84 +#else
  85 +# define SS_MISORT_STACKSIZE (24)
  86 +#endif
  87 +#define SS_SMERGE_STACKSIZE (32)
  88 +#define TR_INSERTIONSORT_THRESHOLD (8)
  89 +#define TR_STACKSIZE (64)
  90 +
  91 +
  92 +/*- Macros -*/
  93 +#ifndef SWAP
  94 +# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
  95 +#endif /* SWAP */
  96 +#ifndef MIN
  97 +# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
  98 +#endif /* MIN */
  99 +#ifndef MAX
  100 +# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
  101 +#endif /* MAX */
  102 +#define STACK_PUSH(_a, _b, _c, _d)\
  103 + do {\
  104 + assert(ssize < STACK_SIZE);\
  105 + stack[ssize].a = (_a), stack[ssize].b = (_b),\
  106 + stack[ssize].c = (_c), stack[ssize++].d = (_d);\
  107 + } while(0)
  108 +#define STACK_PUSH5(_a, _b, _c, _d, _e)\
  109 + do {\
  110 + assert(ssize < STACK_SIZE);\
  111 + stack[ssize].a = (_a), stack[ssize].b = (_b),\
  112 + stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
  113 + } while(0)
  114 +#define STACK_POP(_a, _b, _c, _d)\
  115 + do {\
  116 + assert(0 <= ssize);\
  117 + if(ssize == 0) { return; }\
  118 + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
  119 + (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
  120 + } while(0)
  121 +#define STACK_POP5(_a, _b, _c, _d, _e)\
  122 + do {\
  123 + assert(0 <= ssize);\
  124 + if(ssize == 0) { return; }\
  125 + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
  126 + (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
  127 + } while(0)
  128 +#define BUCKET_A(_c0) bucket_A[(_c0)]
  129 +#if ALPHABET_SIZE == 256
  130 +#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
  131 +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
  132 +#else
  133 +#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
  134 +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
  135 +#endif
  136 +
  137 +
  138 +/*- Private Functions -*/
  139 +
  140 +static const int lg_table[256]= {
  141 + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
  142 + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
  143 + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
  144 + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
  145 + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  146 + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  147 + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  148 + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
  149 +};
  150 +
  151 +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
  152 +
  153 +static INLINE
  154 +int
  155 +ss_ilg(int n) {
  156 +#if SS_BLOCKSIZE == 0
  157 + return (n & 0xffff0000) ?
  158 + ((n & 0xff000000) ?
  159 + 24 + lg_table[(n >> 24) & 0xff] :
  160 + 16 + lg_table[(n >> 16) & 0xff]) :
  161 + ((n & 0x0000ff00) ?
  162 + 8 + lg_table[(n >> 8) & 0xff] :
  163 + 0 + lg_table[(n >> 0) & 0xff]);
  164 +#elif SS_BLOCKSIZE < 256
  165 + return lg_table[n];
  166 +#else
  167 + return (n & 0xff00) ?
  168 + 8 + lg_table[(n >> 8) & 0xff] :
  169 + 0 + lg_table[(n >> 0) & 0xff];
  170 +#endif
  171 +}
  172 +
  173 +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
  174 +
  175 +#if SS_BLOCKSIZE != 0
  176 +
  177 +static const int sqq_table[256] = {
  178 + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
  179 + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
  180 + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
  181 +110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
  182 +128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
  183 +143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
  184 +156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
  185 +169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
  186 +181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
  187 +192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
  188 +202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
  189 +212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
  190 +221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
  191 +230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
  192 +239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
  193 +247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
  194 +};
  195 +
  196 +static INLINE
  197 +int
  198 +ss_isqrt(int x) {
  199 + int y, e;
  200 +
  201 + if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
  202 + e = (x & 0xffff0000) ?
  203 + ((x & 0xff000000) ?
  204 + 24 + lg_table[(x >> 24) & 0xff] :
  205 + 16 + lg_table[(x >> 16) & 0xff]) :
  206 + ((x & 0x0000ff00) ?
  207 + 8 + lg_table[(x >> 8) & 0xff] :
  208 + 0 + lg_table[(x >> 0) & 0xff]);
  209 +
  210 + if(e >= 16) {
  211 + y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
  212 + if(e >= 24) { y = (y + 1 + x / y) >> 1; }
  213 + y = (y + 1 + x / y) >> 1;
  214 + } else if(e >= 8) {
  215 + y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
  216 + } else {
  217 + return sqq_table[x] >> 4;
  218 + }
  219 +
  220 + return (x < (y * y)) ? y - 1 : y;
  221 +}
  222 +
  223 +#endif /* SS_BLOCKSIZE != 0 */
  224 +
  225 +
  226 +/*---------------------------------------------------------------------------*/
  227 +
  228 +/* Compares two suffixes. */
  229 +static INLINE
  230 +int
  231 +ss_compare(const unsigned char *T,
  232 + const int *p1, const int *p2,
  233 + int depth) {
  234 + const unsigned char *U1, *U2, *U1n, *U2n;
  235 +
  236 + for(U1 = T + depth + *p1,
  237 + U2 = T + depth + *p2,
  238 + U1n = T + *(p1 + 1) + 2,
  239 + U2n = T + *(p2 + 1) + 2;
  240 + (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
  241 + ++U1, ++U2) {
  242 + }
  243 +
  244 + return U1 < U1n ?
  245 + (U2 < U2n ? *U1 - *U2 : 1) :
  246 + (U2 < U2n ? -1 : 0);
  247 +}
  248 +
  249 +
  250 +/*---------------------------------------------------------------------------*/
  251 +
  252 +#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
  253 +
  254 +/* Insertionsort for small size groups */
  255 +static
  256 +void
  257 +ss_insertionsort(const unsigned char *T, const int *PA,
  258 + int *first, int *last, int depth) {
  259 + int *i, *j;
  260 + int t;
  261 + int r;
  262 +
  263 + for(i = last - 2; first <= i; --i) {
  264 + for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
  265 + do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
  266 + if(last <= j) { break; }
  267 + }
  268 + if(r == 0) { *j = ~*j; }
  269 + *(j - 1) = t;
  270 + }
  271 +}
  272 +
  273 +#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
  274 +
  275 +
  276 +/*---------------------------------------------------------------------------*/
  277 +
  278 +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
  279 +
  280 +static INLINE
  281 +void
  282 +ss_fixdown(const unsigned char *Td, const int *PA,
  283 + int *SA, int i, int size) {
  284 + int j, k;
  285 + int v;
  286 + int c, d, e;
  287 +
  288 + for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
  289 + d = Td[PA[SA[k = j++]]];
  290 + if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
  291 + if(d <= c) { break; }
  292 + }
  293 + SA[i] = v;
  294 +}
  295 +
  296 +/* Simple top-down heapsort. */
  297 +static
  298 +void
  299 +ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
  300 + int i, m;
  301 + int t;
  302 +
  303 + m = size;
  304 + if((size % 2) == 0) {
  305 + m--;
  306 + if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
  307 + }
  308 +
  309 + for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
  310 + if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
  311 + for(i = m - 1; 0 < i; --i) {
  312 + t = SA[0], SA[0] = SA[i];
  313 + ss_fixdown(Td, PA, SA, 0, i);
  314 + SA[i] = t;
  315 + }
  316 +}
  317 +
  318 +
  319 +/*---------------------------------------------------------------------------*/
  320 +
  321 +/* Returns the median of three elements. */
  322 +static INLINE
  323 +int *
  324 +ss_median3(const unsigned char *Td, const int *PA,
  325 + int *v1, int *v2, int *v3) {
  326 + int *t;
  327 + if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
  328 + if(Td[PA[*v2]] > Td[PA[*v3]]) {
  329 + if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
  330 + else { return v3; }
  331 + }
  332 + return v2;
  333 +}
  334 +
  335 +/* Returns the median of five elements. */
  336 +static INLINE
  337 +int *
  338 +ss_median5(const unsigned char *Td, const int *PA,
  339 + int *v1, int *v2, int *v3, int *v4, int *v5) {
  340 + int *t;
  341 + if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
  342 + if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
  343 + if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
  344 + if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
  345 + if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
  346 + if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
  347 + return v3;
  348 +}
  349 +
  350 +/* Returns the pivot element. */
  351 +static INLINE
  352 +int *
  353 +ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
  354 + int *middle;
  355 + int t;
  356 +
  357 + t = last - first;
  358 + middle = first + t / 2;
  359 +
  360 + if(t <= 512) {
  361 + if(t <= 32) {
  362 + return ss_median3(Td, PA, first, middle, last - 1);
  363 + } else {
  364 + t >>= 2;
  365 + return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
  366 + }
  367 + }
  368 + t >>= 3;
  369 + first = ss_median3(Td, PA, first, first + t, first + (t << 1));
  370 + middle = ss_median3(Td, PA, middle - t, middle, middle + t);
  371 + last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
  372 + return ss_median3(Td, PA, first, middle, last);
  373 +}
  374 +
  375 +
  376 +/*---------------------------------------------------------------------------*/
  377 +
  378 +/* Binary partition for substrings. */
  379 +static INLINE
  380 +int *
  381 +ss_partition(const int *PA,
  382 + int *first, int *last, int depth) {
  383 + int *a, *b;
  384 + int t;
  385 + for(a = first - 1, b = last;;) {
  386 + for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
  387 + for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
  388 + if(b <= a) { break; }
  389 + t = ~*b;
  390 + *b = *a;
  391 + *a = t;
  392 + }
  393 + if(first < a) { *first = ~*first; }
  394 + return a;
  395 +}
  396 +
  397 +/* Multikey introsort for medium size groups. */
  398 +static
  399 +void
  400 +ss_mintrosort(const unsigned char *T, const int *PA,
  401 + int *first, int *last,
  402 + int depth) {
  403 +#define STACK_SIZE SS_MISORT_STACKSIZE
  404 + struct { int *a, *b, c; int d; } stack[STACK_SIZE];
  405 + const unsigned char *Td;
  406 + int *a, *b, *c, *d, *e, *f;
  407 + int s, t;
  408 + int ssize;
  409 + int limit;
  410 + int v, x = 0;
  411 +
  412 + for(ssize = 0, limit = ss_ilg(last - first);;) {
  413 +
  414 + if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
  415 +#if 1 < SS_INSERTIONSORT_THRESHOLD
  416 + if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
  417 +#endif
  418 + STACK_POP(first, last, depth, limit);
  419 + continue;
  420 + }
  421 +
  422 + Td = T + depth;
  423 + if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
  424 + if(limit < 0) {
  425 + for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
  426 + if((x = Td[PA[*a]]) != v) {
  427 + if(1 < (a - first)) { break; }
  428 + v = x;
  429 + first = a;
  430 + }
  431 + }
  432 + if(Td[PA[*first] - 1] < v) {
  433 + first = ss_partition(PA, first, a, depth);
  434 + }
  435 + if((a - first) <= (last - a)) {
  436 + if(1 < (a - first)) {
  437 + STACK_PUSH(a, last, depth, -1);
  438 + last = a, depth += 1, limit = ss_ilg(a - first);
  439 + } else {
  440 + first = a, limit = -1;
  441 + }
  442 + } else {
  443 + if(1 < (last - a)) {
  444 + STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
  445 + first = a, limit = -1;
  446 + } else {
  447 + last = a, depth += 1, limit = ss_ilg(a - first);
  448 + }
  449 + }
  450 + continue;
  451 + }
  452 +
  453 + /* choose pivot */
  454 + a = ss_pivot(Td, PA, first, last);
  455 + v = Td[PA[*a]];
  456 + SWAP(*first, *a);
  457 +
  458 + /* partition */
  459 + for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
  460 + if(((a = b) < last) && (x < v)) {
  461 + for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
  462 + if(x == v) { SWAP(*b, *a); ++a; }
  463 + }
  464 + }
  465 + for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
  466 + if((b < (d = c)) && (x > v)) {
  467 + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
  468 + if(x == v) { SWAP(*c, *d); --d; }
  469 + }
  470 + }
  471 + for(; b < c;) {
  472 + SWAP(*b, *c);
  473 + for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
  474 + if(x == v) { SWAP(*b, *a); ++a; }
  475 + }
  476 + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
  477 + if(x == v) { SWAP(*c, *d); --d; }
  478 + }
  479 + }
  480 +
  481 + if(a <= d) {
  482 + c = b - 1;
  483 +
  484 + if((s = a - first) > (t = b - a)) { s = t; }
  485 + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
  486 + if((s = d - c) > (t = last - d - 1)) { s = t; }
  487 + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
  488 +
  489 + a = first + (b - a), c = last - (d - c);
  490 + b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
  491 +
  492 + if((a - first) <= (last - c)) {
  493 + if((last - c) <= (c - b)) {
  494 + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
  495 + STACK_PUSH(c, last, depth, limit);
  496 + last = a;
  497 + } else if((a - first) <= (c - b)) {
  498 + STACK_PUSH(c, last, depth, limit);
  499 + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
  500 + last = a;
  501 + } else {
  502 + STACK_PUSH(c, last, depth, limit);
  503 + STACK_PUSH(first, a, depth, limit);
  504 + first = b, last = c, depth += 1, limit = ss_ilg(c - b);
  505 + }
  506 + } else {
  507 + if((a - first) <= (c - b)) {
  508 + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
  509 + STACK_PUSH(first, a, depth, limit);
  510 + first = c;
  511 + } else if((last - c) <= (c - b)) {
  512 + STACK_PUSH(first, a, depth, limit);
  513 + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
  514 + first = c;
  515 + } else {
  516 + STACK_PUSH(first, a, depth, limit);
  517 + STACK_PUSH(c, last, depth, limit);
  518 + first = b, last = c, depth += 1, limit = ss_ilg(c - b);
  519 + }
  520 + }
  521 + } else {
  522 + limit += 1;
  523 + if(Td[PA[*first] - 1] < v) {
  524 + first = ss_partition(PA, first, last, depth);
  525 + limit = ss_ilg(last - first);
  526 + }
  527 + depth += 1;
  528 + }
  529 + }
  530 +#undef STACK_SIZE
  531 +}
  532 +
  533 +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
  534 +
  535 +
  536 +/*---------------------------------------------------------------------------*/
  537 +
  538 +#if SS_BLOCKSIZE != 0
  539 +
  540 +static INLINE
  541 +void
  542 +ss_blockswap(int *a, int *b, int n) {
  543 + int t;
  544 + for(; 0 < n; --n, ++a, ++b) {
  545 + t = *a, *a = *b, *b = t;
  546 + }
  547 +}
  548 +
  549 +static INLINE
  550 +void
  551 +ss_rotate(int *first, int *middle, int *last) {
  552 + int *a, *b, t;
  553 + int l, r;
  554 + l = middle - first, r = last - middle;
  555 + for(; (0 < l) && (0 < r);) {
  556 + if(l == r) { ss_blockswap(first, middle, l); break; }
  557 + if(l < r) {
  558 + a = last - 1, b = middle - 1;
  559 + t = *a;
  560 + do {
  561 + *a-- = *b, *b-- = *a;
  562 + if(b < first) {
  563 + *a = t;
  564 + last = a;
  565 + if((r -= l + 1) <= l) { break; }
  566 + a -= 1, b = middle - 1;
  567 + t = *a;
  568 + }
  569 + } while(1);
  570 + } else {
  571 + a = first, b = middle;
  572 + t = *a;
  573 + do {
  574 + *a++ = *b, *b++ = *a;
  575 + if(last <= b) {
  576 + *a = t;
  577 + first = a + 1;
  578 + if((l -= r + 1) <= r) { break; }
  579 + a += 1, b = middle;
  580 + t = *a;
  581 + }
  582 + } while(1);
  583 + }
  584 + }
  585 +}
  586 +
  587 +
  588 +/*---------------------------------------------------------------------------*/
  589 +
  590 +static
  591 +void
  592 +ss_inplacemerge(const unsigned char *T, const int *PA,
  593 + int *first, int *middle, int *last,
  594 + int depth) {
  595 + const int *p;
  596 + int *a, *b;
  597 + int len, half;
  598 + int q, r;
  599 + int x;
  600 +
  601 + for(;;) {
  602 + if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
  603 + else { x = 0; p = PA + *(last - 1); }
  604 + for(a = first, len = middle - first, half = len >> 1, r = -1;
  605 + 0 < len;
  606 + len = half, half >>= 1) {
  607 + b = a + half;
  608 + q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
  609 + if(q < 0) {
  610 + a = b + 1;
  611 + half -= (len & 1) ^ 1;
  612 + } else {
  613 + r = q;
  614 + }
  615 + }
  616 + if(a < middle) {
  617 + if(r == 0) { *a = ~*a; }
  618 + ss_rotate(a, middle, last);
  619 + last -= middle - a;
  620 + middle = a;
  621 + if(first == middle) { break; }
  622 + }
  623 + --last;
  624 + if(x != 0) { while(*--last < 0) { } }
  625 + if(middle == last) { break; }
  626 + }
  627 +}
  628 +
  629 +
  630 +/*---------------------------------------------------------------------------*/
  631 +
  632 +/* Merge-forward with internal buffer. */
  633 +static
  634 +void
  635 +ss_mergeforward(const unsigned char *T, const int *PA,
  636 + int *first, int *middle, int *last,
  637 + int *buf, int depth) {
  638 + int *a, *b, *c, *bufend;
  639 + int t;
  640 + int r;
  641 +
  642 + bufend = buf + (middle - first) - 1;
  643 + ss_blockswap(buf, first, middle - first);
  644 +
  645 + for(t = *(a = first), b = buf, c = middle;;) {
  646 + r = ss_compare(T, PA + *b, PA + *c, depth);
  647 + if(r < 0) {
  648 + do {
  649 + *a++ = *b;
  650 + if(bufend <= b) { *bufend = t; return; }
  651 + *b++ = *a;
  652 + } while(*b < 0);
  653 + } else if(r > 0) {
  654 + do {
  655 + *a++ = *c, *c++ = *a;
  656 + if(last <= c) {
  657 + while(b < bufend) { *a++ = *b, *b++ = *a; }
  658 + *a = *b, *b = t;
  659 + return;
  660 + }
  661 + } while(*c < 0);
  662 + } else {
  663 + *c = ~*c;
  664 + do {
  665 + *a++ = *b;
  666 + if(bufend <= b) { *bufend = t; return; }
  667 + *b++ = *a;
  668 + } while(*b < 0);
  669 +
  670 + do {
  671 + *a++ = *c, *c++ = *a;
  672 + if(last <= c) {
  673 + while(b < bufend) { *a++ = *b, *b++ = *a; }
  674 + *a = *b, *b = t;
  675 + return;
  676 + }
  677 + } while(*c < 0);
  678 + }
  679 + }
  680 +}
  681 +
  682 +/* Merge-backward with internal buffer. */
  683 +static
  684 +void
  685 +ss_mergebackward(const unsigned char *T, const int *PA,
  686 + int *first, int *middle, int *last,
  687 + int *buf, int depth) {
  688 + const int *p1, *p2;
  689 + int *a, *b, *c, *bufend;
  690 + int t;
  691 + int r;
  692 + int x;
  693 +
  694 + bufend = buf + (last - middle) - 1;
  695 + ss_blockswap(buf, middle, last - middle);
  696 +
  697 + x = 0;
  698 + if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; }
  699 + else { p1 = PA + *bufend; }
  700 + if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
  701 + else { p2 = PA + *(middle - 1); }
  702 + for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
  703 + r = ss_compare(T, p1, p2, depth);
  704 + if(0 < r) {
  705 + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
  706 + *a-- = *b;
  707 + if(b <= buf) { *buf = t; break; }
  708 + *b-- = *a;
  709 + if(*b < 0) { p1 = PA + ~*b; x |= 1; }
  710 + else { p1 = PA + *b; }
  711 + } else if(r < 0) {
  712 + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
  713 + *a-- = *c, *c-- = *a;
  714 + if(c < first) {
  715 + while(buf < b) { *a-- = *b, *b-- = *a; }
  716 + *a = *b, *b = t;
  717 + break;
  718 + }
  719 + if(*c < 0) { p2 = PA + ~*c; x |= 2; }
  720 + else { p2 = PA + *c; }
  721 + } else {
  722 + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
  723 + *a-- = ~*b;
  724 + if(b <= buf) { *buf = t; break; }
  725 + *b-- = *a;
  726 + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
  727 + *a-- = *c, *c-- = *a;
  728 + if(c < first) {
  729 + while(buf < b) { *a-- = *b, *b-- = *a; }
  730 + *a = *b, *b = t;
  731 + break;
  732 + }
  733 + if(*b < 0) { p1 = PA + ~*b; x |= 1; }
  734 + else { p1 = PA + *b; }
  735 + if(*c < 0) { p2 = PA + ~*c; x |= 2; }
  736 + else { p2 = PA + *c; }
  737 + }
  738 + }
  739 +}
  740 +
  741 +/* D&C based merge. */
  742 +static
  743 +void
  744 +ss_swapmerge(const unsigned char *T, const int *PA,
  745 + int *first, int *middle, int *last,
  746 + int *buf, int bufsize, int depth) {
  747 +#define STACK_SIZE SS_SMERGE_STACKSIZE
  748 +#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
  749 +#define MERGE_CHECK(a, b, c)\
  750 + do {\
  751 + if(((c) & 1) ||\
  752 + (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
  753 + *(a) = ~*(a);\
  754 + }\
  755 + if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
  756 + *(b) = ~*(b);\
  757 + }\
  758 + } while(0)
  759 + struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
  760 + int *l, *r, *lm, *rm;
  761 + int m, len, half;
  762 + int ssize;
  763 + int check, next;
  764 +
  765 + for(check = 0, ssize = 0;;) {
  766 + if((last - middle) <= bufsize) {
  767 + if((first < middle) && (middle < last)) {
  768 + ss_mergebackward(T, PA, first, middle, last, buf, depth);
  769 + }
  770 + MERGE_CHECK(first, last, check);
  771 + STACK_POP(first, middle, last, check);
  772 + continue;
  773 + }
  774 +
  775 + if((middle - first) <= bufsize) {
  776 + if(first < middle) {
  777 + ss_mergeforward(T, PA, first, middle, last, buf, depth);
  778 + }
  779 + MERGE_CHECK(first, last, check);
  780 + STACK_POP(first, middle, last, check);
  781 + continue;
  782 + }
  783 +
  784 + for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
  785 + 0 < len;
  786 + len = half, half >>= 1) {
  787 + if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
  788 + PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
  789 + m += half + 1;
  790 + half -= (len & 1) ^ 1;
  791 + }
  792 + }
  793 +
  794 + if(0 < m) {
  795 + lm = middle - m, rm = middle + m;
  796 + ss_blockswap(lm, middle, m);
  797 + l = r = middle, next = 0;
  798 + if(rm < last) {
  799 + if(*rm < 0) {
  800 + *rm = ~*rm;
  801 + if(first < lm) { for(; *--l < 0;) { } next |= 4; }
  802 + next |= 1;
  803 + } else if(first < lm) {
  804 + for(; *r < 0; ++r) { }
  805 + next |= 2;
  806 + }
  807 + }
  808 +
  809 + if((l - first) <= (last - r)) {
  810 + STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
  811 + middle = lm, last = l, check = (check & 3) | (next & 4);
  812 + } else {
  813 + if((next & 2) && (r == middle)) { next ^= 6; }
  814 + STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
  815 + first = r, middle = rm, check = (next & 3) | (check & 4);
  816 + }
  817 + } else {
  818 + if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
  819 + *middle = ~*middle;
  820 + }
  821 + MERGE_CHECK(first, last, check);
  822 + STACK_POP(first, middle, last, check);
  823 + }
  824 + }
  825 +#undef STACK_SIZE
  826 +}
  827 +
  828 +#endif /* SS_BLOCKSIZE != 0 */
  829 +
  830 +
  831 +/*---------------------------------------------------------------------------*/
  832 +
  833 +/* Substring sort */
  834 +static
  835 +void
  836 +sssort(const unsigned char *T, const int *PA,
  837 + int *first, int *last,
  838 + int *buf, int bufsize,
  839 + int depth, int n, int lastsuffix) {
  840 + int *a;
  841 +#if SS_BLOCKSIZE != 0
  842 + int *b, *middle, *curbuf;
  843 + int j, k, curbufsize, limit;
  844 +#endif
  845 + int i;
  846 +
  847 + if(lastsuffix != 0) { ++first; }
  848 +
  849 +#if SS_BLOCKSIZE == 0
  850 + ss_mintrosort(T, PA, first, last, depth);
  851 +#else
  852 + if((bufsize < SS_BLOCKSIZE) &&
  853 + (bufsize < (last - first)) &&
  854 + (bufsize < (limit = ss_isqrt(last - first)))) {
  855 + if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
  856 + buf = middle = last - limit, bufsize = limit;
  857 + } else {
  858 + middle = last, limit = 0;
  859 + }
  860 + for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
  861 +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
  862 + ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
  863 +#elif 1 < SS_BLOCKSIZE
  864 + ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
  865 +#endif
  866 + curbufsize = last - (a + SS_BLOCKSIZE);
  867 + curbuf = a + SS_BLOCKSIZE;
  868 + if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
  869 + for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
  870 + ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
  871 + }
  872 + }
  873 +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
  874 + ss_mintrosort(T, PA, a, middle, depth);
  875 +#elif 1 < SS_BLOCKSIZE
  876 + ss_insertionsort(T, PA, a, middle, depth);
  877 +#endif
  878 + for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
  879 + if(i & 1) {
  880 + ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
  881 + a -= k;
  882 + }
  883 + }
  884 + if(limit != 0) {
  885 +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
  886 + ss_mintrosort(T, PA, middle, last, depth);
  887 +#elif 1 < SS_BLOCKSIZE
  888 + ss_insertionsort(T, PA, middle, last, depth);
  889 +#endif
  890 + ss_inplacemerge(T, PA, first, middle, last, depth);
  891 + }
  892 +#endif
  893 +
  894 + if(lastsuffix != 0) {
  895 + /* Insert last type B* suffix. */
  896 + int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
  897 + for(a = first, i = *(first - 1);
  898 + (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
  899 + ++a) {
  900 + *(a - 1) = *a;
  901 + }
  902 + *(a - 1) = i;
  903 + }
  904 +}
  905 +
  906 +
  907 +/*---------------------------------------------------------------------------*/
  908 +
  909 +static INLINE
  910 +int
  911 +tr_ilg(int n) {
  912 + return (n & 0xffff0000) ?
  913 + ((n & 0xff000000) ?
  914 + 24 + lg_table[(n >> 24) & 0xff] :
  915 + 16 + lg_table[(n >> 16) & 0xff]) :
  916 + ((n & 0x0000ff00) ?
  917 + 8 + lg_table[(n >> 8) & 0xff] :
  918 + 0 + lg_table[(n >> 0) & 0xff]);
  919 +}
  920 +
  921 +
  922 +/*---------------------------------------------------------------------------*/
  923 +
  924 +/* Simple insertionsort for small size groups. */
  925 +static
  926 +void
  927 +tr_insertionsort(const int *ISAd, int *first, int *last) {
  928 + int *a, *b;
  929 + int t, r;
  930 +
  931 + for(a = first + 1; a < last; ++a) {
  932 + for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
  933 + do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
  934 + if(b < first) { break; }
  935 + }
  936 + if(r == 0) { *b = ~*b; }
  937 + *(b + 1) = t;
  938 + }
  939 +}
  940 +
  941 +
  942 +/*---------------------------------------------------------------------------*/
  943 +
  944 +static INLINE
  945 +void
  946 +tr_fixdown(const int *ISAd, int *SA, int i, int size) {
  947 + int j, k;
  948 + int v;
  949 + int c, d, e;
  950 +
  951 + for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
  952 + d = ISAd[SA[k = j++]];
  953 + if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
  954 + if(d <= c) { break; }
  955 + }
  956 + SA[i] = v;
  957 +}
  958 +
  959 +/* Simple top-down heapsort. */
  960 +static
  961 +void
  962 +tr_heapsort(const int *ISAd, int *SA, int size) {
  963 + int i, m;
  964 + int t;
  965 +
  966 + m = size;
  967 + if((size % 2) == 0) {
  968 + m--;
  969 + if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
  970 + }
  971 +
  972 + for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
  973 + if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
  974 + for(i = m - 1; 0 < i; --i) {
  975 + t = SA[0], SA[0] = SA[i];
  976 + tr_fixdown(ISAd, SA, 0, i);
  977 + SA[i] = t;
  978 + }
  979 +}
  980 +
  981 +
  982 +/*---------------------------------------------------------------------------*/
  983 +
  984 +/* Returns the median of three elements. */
  985 +static INLINE
  986 +int *
  987 +tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
  988 + int *t;
  989 + if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
  990 + if(ISAd[*v2] > ISAd[*v3]) {
  991 + if(ISAd[*v1] > ISAd[*v3]) { return v1; }
  992 + else { return v3; }
  993 + }
  994 + return v2;
  995 +}
  996 +
  997 +/* Returns the median of five elements. */
  998 +static INLINE
  999 +int *
  1000 +tr_median5(const int *ISAd,
  1001 + int *v1, int *v2, int *v3, int *v4, int *v5) {
  1002 + int *t;
  1003 + if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
  1004 + if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
  1005 + if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
  1006 + if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
  1007 + if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
  1008 + if(ISAd[*v3] > ISAd[*v4]) { return v4; }
  1009 + return v3;
  1010 +}
  1011 +
  1012 +/* Returns the pivot element. */
  1013 +static INLINE
  1014 +int *
  1015 +tr_pivot(const int *ISAd, int *first, int *last) {
  1016 + int *middle;
  1017 + int t;
  1018 +
  1019 + t = last - first;
  1020 + middle = first + t / 2;
  1021 +
  1022 + if(t <= 512) {
  1023 + if(t <= 32) {
  1024 + return tr_median3(ISAd, first, middle, last - 1);
  1025 + } else {
  1026 + t >>= 2;
  1027 + return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
  1028 + }
  1029 + }
  1030 + t >>= 3;
  1031 + first = tr_median3(ISAd, first, first + t, first + (t << 1));
  1032 + middle = tr_median3(ISAd, middle - t, middle, middle + t);
  1033 + last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
  1034 + return tr_median3(ISAd, first, middle, last);
  1035 +}
  1036 +
  1037 +
  1038 +/*---------------------------------------------------------------------------*/
  1039 +
  1040 +typedef struct _trbudget_t trbudget_t;
  1041 +struct _trbudget_t {
  1042 + int chance;
  1043 + int remain;
  1044 + int incval;
  1045 + int count;
  1046 +};
  1047 +
  1048 +static INLINE
  1049 +void
  1050 +trbudget_init(trbudget_t *budget, int chance, int incval) {
  1051 + budget->chance = chance;
  1052 + budget->remain = budget->incval = incval;
  1053 +}
  1054 +
  1055 +static INLINE
  1056 +int
  1057 +trbudget_check(trbudget_t *budget, int size) {
  1058 + if(size <= budget->remain) { budget->remain -= size; return 1; }
  1059 + if(budget->chance == 0) { budget->count += size; return 0; }
  1060 + budget->remain += budget->incval - size;
  1061 + budget->chance -= 1;
  1062 + return 1;
  1063 +}
  1064 +
  1065 +
  1066 +/*---------------------------------------------------------------------------*/
  1067 +
  1068 +static INLINE
  1069 +void
  1070 +tr_partition(const int *ISAd,
  1071 + int *first, int *middle, int *last,
  1072 + int **pa, int **pb, int v) {
  1073 + int *a, *b, *c, *d, *e, *f;
  1074 + int t, s;
  1075 + int x = 0;
  1076 +
  1077 + for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
  1078 + if(((a = b) < last) && (x < v)) {
  1079 + for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
  1080 + if(x == v) { SWAP(*b, *a); ++a; }
  1081 + }
  1082 + }
  1083 + for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
  1084 + if((b < (d = c)) && (x > v)) {
  1085 + for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
  1086 + if(x == v) { SWAP(*c, *d); --d; }
  1087 + }
  1088 + }
  1089 + for(; b < c;) {
  1090 + SWAP(*b, *c);
  1091 + for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
  1092 + if(x == v) { SWAP(*b, *a); ++a; }
  1093 + }
  1094 + for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
  1095 + if(x == v) { SWAP(*c, *d); --d; }
  1096 + }
  1097 + }
  1098 +
  1099 + if(a <= d) {
  1100 + c = b - 1;
  1101 + if((s = a - first) > (t = b - a)) { s = t; }
  1102 + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
  1103 + if((s = d - c) > (t = last - d - 1)) { s = t; }
  1104 + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
  1105 + first += (b - a), last -= (d - c);
  1106 + }
  1107 + *pa = first, *pb = last;
  1108 +}
  1109 +
  1110 +static
  1111 +void
  1112 +tr_copy(int *ISA, const int *SA,
  1113 + int *first, int *a, int *b, int *last,
  1114 + int depth) {
  1115 + /* sort suffixes of middle partition
  1116 + by using sorted order of suffixes of left and right partition. */
  1117 + int *c, *d, *e;
  1118 + int s, v;
  1119 +
  1120 + v = b - SA - 1;
  1121 + for(c = first, d = a - 1; c <= d; ++c) {
  1122 + if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
  1123 + *++d = s;
  1124 + ISA[s] = d - SA;
  1125 + }
  1126 + }
  1127 + for(c = last - 1, e = d + 1, d = b; e < d; --c) {
  1128 + if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
  1129 + *--d = s;
  1130 + ISA[s] = d - SA;
  1131 + }
  1132 + }
  1133 +}
  1134 +
  1135 +static
  1136 +void
  1137 +tr_partialcopy(int *ISA, const int *SA,
  1138 + int *first, int *a, int *b, int *last,
  1139 + int depth) {
  1140 + int *c, *d, *e;
  1141 + int s, v;
  1142 + int rank, lastrank, newrank = -1;
  1143 +
  1144 + v = b - SA - 1;
  1145 + lastrank = -1;
  1146 + for(c = first, d = a - 1; c <= d; ++c) {
  1147 + if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
  1148 + *++d = s;
  1149 + rank = ISA[s + depth];
  1150 + if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
  1151 + ISA[s] = newrank;
  1152 + }
  1153 + }
  1154 +
  1155 + lastrank = -1;
  1156 + for(e = d; first <= e; --e) {
  1157 + rank = ISA[*e];
  1158 + if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
  1159 + if(newrank != rank) { ISA[*e] = newrank; }
  1160 + }
  1161 +
  1162 + lastrank = -1;
  1163 + for(c = last - 1, e = d + 1, d = b; e < d; --c) {
  1164 + if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
  1165 + *--d = s;
  1166 + rank = ISA[s + depth];
  1167 + if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
  1168 + ISA[s] = newrank;
  1169 + }
  1170 + }
  1171 +}
  1172 +
  1173 +static
  1174 +void
  1175 +tr_introsort(int *ISA, const int *ISAd,
  1176 + int *SA, int *first, int *last,
  1177 + trbudget_t *budget) {
  1178 +#define STACK_SIZE TR_STACKSIZE
  1179 + struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
  1180 + int *a, *b, *c;
  1181 + int t;
  1182 + int v, x = 0;
  1183 + int incr = ISAd - ISA;
  1184 + int limit, next;
  1185 + int ssize, trlink = -1;
  1186 +
  1187 + for(ssize = 0, limit = tr_ilg(last - first);;) {
  1188 +
  1189 + if(limit < 0) {
  1190 + if(limit == -1) {
  1191 + /* tandem repeat partition */
  1192 + tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
  1193 +
  1194 + /* update ranks */
  1195 + if(a < last) {
  1196 + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
  1197 + }
  1198 + if(b < last) {
  1199 + for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
  1200 + }
  1201 +
  1202 + /* push */
  1203 + if(1 < (b - a)) {
  1204 + STACK_PUSH5(NULL, a, b, 0, 0);
  1205 + STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
  1206 + trlink = ssize - 2;
  1207 + }
  1208 + if((a - first) <= (last - b)) {
  1209 + if(1 < (a - first)) {
  1210 + STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
  1211 + last = a, limit = tr_ilg(a - first);
  1212 + } else if(1 < (last - b)) {
  1213 + first = b, limit = tr_ilg(last - b);
  1214 + } else {
  1215 + STACK_POP5(ISAd, first, last, limit, trlink);
  1216 + }
  1217 + } else {
  1218 + if(1 < (last - b)) {
  1219 + STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
  1220 + first = b, limit = tr_ilg(last - b);
  1221 + } else if(1 < (a - first)) {
  1222 + last = a, limit = tr_ilg(a - first);
  1223 + } else {
  1224 + STACK_POP5(ISAd, first, last, limit, trlink);
  1225 + }
  1226 + }
  1227 + } else if(limit == -2) {
  1228 + /* tandem repeat copy */
  1229 + a = stack[--ssize].b, b = stack[ssize].c;
  1230 + if(stack[ssize].d == 0) {
  1231 + tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
  1232 + } else {
  1233 + if(0 <= trlink) { stack[trlink].d = -1; }
  1234 + tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
  1235 + }
  1236 + STACK_POP5(ISAd, first, last, limit, trlink);
  1237 + } else {
  1238 + /* sorted partition */
  1239 + if(0 <= *first) {
  1240 + a = first;
  1241 + do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
  1242 + first = a;
  1243 + }
  1244 + if(first < last) {
  1245 + a = first; do { *a = ~*a; } while(*++a < 0);
  1246 + next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
  1247 + if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
  1248 +
  1249 + /* push */
  1250 + if(trbudget_check(budget, a - first)) {
  1251 + if((a - first) <= (last - a)) {
  1252 + STACK_PUSH5(ISAd, a, last, -3, trlink);
  1253 + ISAd += incr, last = a, limit = next;
  1254 + } else {
  1255 + if(1 < (last - a)) {
  1256 + STACK_PUSH5(ISAd + incr, first, a, next, trlink);
  1257 + first = a, limit = -3;
  1258 + } else {
  1259 + ISAd += incr, last = a, limit = next;
  1260 + }
  1261 + }
  1262 + } else {
  1263 + if(0 <= trlink) { stack[trlink].d = -1; }
  1264 + if(1 < (last - a)) {
  1265 + first = a, limit = -3;
  1266 + } else {
  1267 + STACK_POP5(ISAd, first, last, limit, trlink);
  1268 + }
  1269 + }
  1270 + } else {
  1271 + STACK_POP5(ISAd, first, last, limit, trlink);
  1272 + }
  1273 + }
  1274 + continue;
  1275 + }
  1276 +
  1277 + if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
  1278 + tr_insertionsort(ISAd, first, last);
  1279 + limit = -3;
  1280 + continue;
  1281 + }
  1282 +
  1283 + if(limit-- == 0) {
  1284 + tr_heapsort(ISAd, first, last - first);
  1285 + for(a = last - 1; first < a; a = b) {
  1286 + for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
  1287 + }
  1288 + limit = -3;
  1289 + continue;
  1290 + }
  1291 +
  1292 + /* choose pivot */
  1293 + a = tr_pivot(ISAd, first, last);
  1294 + SWAP(*first, *a);
  1295 + v = ISAd[*first];
  1296 +
  1297 + /* partition */
  1298 + tr_partition(ISAd, first, first + 1, last, &a, &b, v);
  1299 + if((last - first) != (b - a)) {
  1300 + next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
  1301 +
  1302 + /* update ranks */
  1303 + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
  1304 + if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
  1305 +
  1306 + /* push */
  1307 + if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
  1308 + if((a - first) <= (last - b)) {
  1309 + if((last - b) <= (b - a)) {
  1310 + if(1 < (a - first)) {
  1311 + STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1312 + STACK_PUSH5(ISAd, b, last, limit, trlink);
  1313 + last = a;
  1314 + } else if(1 < (last - b)) {
  1315 + STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1316 + first = b;
  1317 + } else {
  1318 + ISAd += incr, first = a, last = b, limit = next;
  1319 + }
  1320 + } else if((a - first) <= (b - a)) {
  1321 + if(1 < (a - first)) {
  1322 + STACK_PUSH5(ISAd, b, last, limit, trlink);
  1323 + STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1324 + last = a;
  1325 + } else {
  1326 + STACK_PUSH5(ISAd, b, last, limit, trlink);
  1327 + ISAd += incr, first = a, last = b, limit = next;
  1328 + }
  1329 + } else {
  1330 + STACK_PUSH5(ISAd, b, last, limit, trlink);
  1331 + STACK_PUSH5(ISAd, first, a, limit, trlink);
  1332 + ISAd += incr, first = a, last = b, limit = next;
  1333 + }
  1334 + } else {
  1335 + if((a - first) <= (b - a)) {
  1336 + if(1 < (last - b)) {
  1337 + STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1338 + STACK_PUSH5(ISAd, first, a, limit, trlink);
  1339 + first = b;
  1340 + } else if(1 < (a - first)) {
  1341 + STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1342 + last = a;
  1343 + } else {
  1344 + ISAd += incr, first = a, last = b, limit = next;
  1345 + }
  1346 + } else if((last - b) <= (b - a)) {
  1347 + if(1 < (last - b)) {
  1348 + STACK_PUSH5(ISAd, first, a, limit, trlink);
  1349 + STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1350 + first = b;
  1351 + } else {
  1352 + STACK_PUSH5(ISAd, first, a, limit, trlink);
  1353 + ISAd += incr, first = a, last = b, limit = next;
  1354 + }
  1355 + } else {
  1356 + STACK_PUSH5(ISAd, first, a, limit, trlink);
  1357 + STACK_PUSH5(ISAd, b, last, limit, trlink);
  1358 + ISAd += incr, first = a, last = b, limit = next;
  1359 + }
  1360 + }
  1361 + } else {
  1362 + if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
  1363 + if((a - first) <= (last - b)) {
  1364 + if(1 < (a - first)) {
  1365 + STACK_PUSH5(ISAd, b, last, limit, trlink);
  1366 + last = a;
  1367 + } else if(1 < (last - b)) {
  1368 + first = b;
  1369 + } else {
  1370 + STACK_POP5(ISAd, first, last, limit, trlink);
  1371 + }
  1372 + } else {
  1373 + if(1 < (last - b)) {
  1374 + STACK_PUSH5(ISAd, first, a, limit, trlink);
  1375 + first = b;
  1376 + } else if(1 < (a - first)) {
  1377 + last = a;
  1378 + } else {
  1379 + STACK_POP5(ISAd, first, last, limit, trlink);
  1380 + }
  1381 + }
  1382 + }
  1383 + } else {
  1384 + if(trbudget_check(budget, last - first)) {
  1385 + limit = tr_ilg(last - first), ISAd += incr;
  1386 + } else {
  1387 + if(0 <= trlink) { stack[trlink].d = -1; }
  1388 + STACK_POP5(ISAd, first, last, limit, trlink);
  1389 + }
  1390 + }
  1391 + }
  1392 +#undef STACK_SIZE
  1393 +}
  1394 +
  1395 +
  1396 +
  1397 +/*---------------------------------------------------------------------------*/
  1398 +
  1399 +/* Tandem repeat sort */
  1400 +static
  1401 +void
  1402 +trsort(int *ISA, int *SA, int n, int depth) {
  1403 + int *ISAd;
  1404 + int *first, *last;
  1405 + trbudget_t budget;
  1406 + int t, skip, unsorted;
  1407 +
  1408 + trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
  1409 +/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
  1410 + for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
  1411 + first = SA;
  1412 + skip = 0;
  1413 + unsorted = 0;
  1414 + do {
  1415 + if((t = *first) < 0) { first -= t; skip += t; }
  1416 + else {
  1417 + if(skip != 0) { *(first + skip) = skip; skip = 0; }
  1418 + last = SA + ISA[t] + 1;
  1419 + if(1 < (last - first)) {
  1420 + budget.count = 0;
  1421 + tr_introsort(ISA, ISAd, SA, first, last, &budget);
  1422 + if(budget.count != 0) { unsorted += budget.count; }
  1423 + else { skip = first - last; }
  1424 + } else if((last - first) == 1) {
  1425 + skip = -1;
  1426 + }
  1427 + first = last;
  1428 + }
  1429 + } while(first < (SA + n));
  1430 + if(skip != 0) { *(first + skip) = skip; }
  1431 + if(unsorted == 0) { break; }
  1432 + }
  1433 +}
  1434 +
  1435 +
  1436 +/*---------------------------------------------------------------------------*/
  1437 +
  1438 +/* Sorts suffixes of type B*. */
  1439 +static
  1440 +int
  1441 +sort_typeBstar(const unsigned char *T, int *SA,
  1442 + int *bucket_A, int *bucket_B,
  1443 + int n, int openMP) {
  1444 + int *PAb, *ISAb, *buf;
  1445 +#ifdef LIBBSC_OPENMP
  1446 + int *curbuf;
  1447 + int l;
  1448 +#endif
  1449 + int i, j, k, t, m, bufsize;
  1450 + int c0, c1;
  1451 +#ifdef LIBBSC_OPENMP
  1452 + int d0, d1;
  1453 +#endif
  1454 + (void)openMP;
  1455 +
  1456 + /* Initialize bucket arrays. */
  1457 + for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
  1458 + for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
  1459 +
  1460 + /* Count the number of occurrences of the first one or two characters of each
  1461 + type A, B and B* suffix. Moreover, store the beginning position of all
  1462 + type B* suffixes into the array SA. */
  1463 + for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
  1464 + /* type A suffix. */
  1465 + do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
  1466 + if(0 <= i) {
  1467 + /* type B* suffix. */
  1468 + ++BUCKET_BSTAR(c0, c1);
  1469 + SA[--m] = i;
  1470 + /* type B suffix. */
  1471 + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
  1472 + ++BUCKET_B(c0, c1);
  1473 + }
  1474 + }
  1475 + }
  1476 + m = n - m;
  1477 +/*
  1478 +note:
  1479 + A type B* suffix is lexicographically smaller than a type B suffix that
  1480 + begins with the same first two characters.
  1481 +*/
  1482 +
  1483 + /* Calculate the index of start/end point of each bucket. */
  1484 + for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
  1485 + t = i + BUCKET_A(c0);
  1486 + BUCKET_A(c0) = i + j; /* start point */
  1487 + i = t + BUCKET_B(c0, c0);
  1488 + for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
  1489 + j += BUCKET_BSTAR(c0, c1);
  1490 + BUCKET_BSTAR(c0, c1) = j; /* end point */
  1491 + i += BUCKET_B(c0, c1);
  1492 + }
  1493 + }
  1494 +
  1495 + if(0 < m) {
  1496 + /* Sort the type B* suffixes by their first two characters. */
  1497 + PAb = SA + n - m; ISAb = SA + m;
  1498 + for(i = m - 2; 0 <= i; --i) {
  1499 + t = PAb[i], c0 = T[t], c1 = T[t + 1];
  1500 + SA[--BUCKET_BSTAR(c0, c1)] = i;
  1501 + }
  1502 + t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
  1503 + SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
  1504 +
  1505 + /* Sort the type B* substrings using sssort. */
  1506 +#ifdef LIBBSC_OPENMP
  1507 + if (openMP)
  1508 + {
  1509 + buf = SA + m;
  1510 + c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
  1511 +#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1)
  1512 + {
  1513 + bufsize = (n - (2 * m)) / omp_get_num_threads();
  1514 + curbuf = buf + omp_get_thread_num() * bufsize;
  1515 + k = 0;
  1516 + for(;;) {
  1517 + #pragma omp critical(sssort_lock)
  1518 + {
  1519 + if(0 < (l = j)) {
  1520 + d0 = c0, d1 = c1;
  1521 + do {
  1522 + k = BUCKET_BSTAR(d0, d1);
  1523 + if(--d1 <= d0) {
  1524 + d1 = ALPHABET_SIZE - 1;
  1525 + if(--d0 < 0) { break; }
  1526 + }
  1527 + } while(((l - k) <= 1) && (0 < (l = k)));
  1528 + c0 = d0, c1 = d1, j = k;
  1529 + }
  1530 + }
  1531 + if(l == 0) { break; }
  1532 + sssort(T, PAb, SA + k, SA + l,
  1533 + curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
  1534 + }
  1535 + }
  1536 + }
  1537 + else
  1538 + {
  1539 + buf = SA + m, bufsize = n - (2 * m);
  1540 + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
  1541 + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
  1542 + i = BUCKET_BSTAR(c0, c1);
  1543 + if(1 < (j - i)) {
  1544 + sssort(T, PAb, SA + i, SA + j,
  1545 + buf, bufsize, 2, n, *(SA + i) == (m - 1));
  1546 + }
  1547 + }
  1548 + }
  1549 + }
  1550 +#else
  1551 + buf = SA + m, bufsize = n - (2 * m);
  1552 + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
  1553 + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
  1554 + i = BUCKET_BSTAR(c0, c1);
  1555 + if(1 < (j - i)) {
  1556 + sssort(T, PAb, SA + i, SA + j,
  1557 + buf, bufsize, 2, n, *(SA + i) == (m - 1));
  1558 + }
  1559 + }
  1560 + }
  1561 +#endif
  1562 +
  1563 + /* Compute ranks of type B* substrings. */
  1564 + for(i = m - 1; 0 <= i; --i) {
  1565 + if(0 <= SA[i]) {
  1566 + j = i;
  1567 + do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
  1568 + SA[i + 1] = i - j;
  1569 + if(i <= 0) { break; }
  1570 + }
  1571 + j = i;
  1572 + do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
  1573 + ISAb[SA[i]] = j;
  1574 + }
  1575 +
  1576 + /* Construct the inverse suffix array of type B* suffixes using trsort. */
  1577 + trsort(ISAb, SA, m, 1);
  1578 +
  1579 + /* Set the sorted order of tyoe B* suffixes. */
  1580 + for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
  1581 + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
  1582 + if(0 <= i) {
  1583 + t = i;
  1584 + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
  1585 + SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
  1586 + }
  1587 + }
  1588 +
  1589 + /* Calculate the index of start/end point of each bucket. */
  1590 + BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
  1591 + for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
  1592 + i = BUCKET_A(c0 + 1) - 1;
  1593 + for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
  1594 + t = i - BUCKET_B(c0, c1);
  1595 + BUCKET_B(c0, c1) = i; /* end point */
  1596 +
  1597 + /* Move all type B* suffixes to the correct position. */
  1598 + for(i = t, j = BUCKET_BSTAR(c0, c1);
  1599 + j <= k;
  1600 + --i, --k) { SA[i] = SA[k]; }
  1601 + }
  1602 + BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
  1603 + BUCKET_B(c0, c0) = i; /* end point */
  1604 + }
  1605 + }
  1606 +
  1607 + return m;
  1608 +}
  1609 +
  1610 +/* Constructs the suffix array by using the sorted order of type B* suffixes. */
  1611 +static
  1612 +void
  1613 +construct_SA(const unsigned char *T, int *SA,
  1614 + int *bucket_A, int *bucket_B,
  1615 + int n, int m) {
  1616 + int *i, *j, *k;
  1617 + int s;
  1618 + int c0, c1, c2;
  1619 +
  1620 + if(0 < m) {
  1621 + /* Construct the sorted order of type B suffixes by using
  1622 + the sorted order of type B* suffixes. */
  1623 + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
  1624 + /* Scan the suffix array from right to left. */
  1625 + for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
  1626 + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
  1627 + i <= j;
  1628 + --j) {
  1629 + if(0 < (s = *j)) {
  1630 + assert(T[s] == c1);
  1631 + assert(((s + 1) < n) && (T[s] <= T[s + 1]));
  1632 + assert(T[s - 1] <= T[s]);
  1633 + *j = ~s;
  1634 + c0 = T[--s];
  1635 + if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
  1636 + if(c0 != c2) {
  1637 + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
  1638 + k = SA + BUCKET_B(c2 = c0, c1);
  1639 + }
  1640 + assert(k < j); assert(k != NULL);
  1641 + *k-- = s;
  1642 + } else {
  1643 + assert(((s == 0) && (T[s] == c1)) || (s < 0));
  1644 + *j = ~s;
  1645 + }
  1646 + }
  1647 + }
  1648 + }
  1649 +
  1650 + /* Construct the suffix array by using
  1651 + the sorted order of type B suffixes. */
  1652 + k = SA + BUCKET_A(c2 = T[n - 1]);
  1653 + *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
  1654 + /* Scan the suffix array from left to right. */
  1655 + for(i = SA, j = SA + n; i < j; ++i) {
  1656 + if(0 < (s = *i)) {
  1657 + assert(T[s - 1] >= T[s]);
  1658 + c0 = T[--s];
  1659 + if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
  1660 + if(c0 != c2) {
  1661 + BUCKET_A(c2) = k - SA;
  1662 + k = SA + BUCKET_A(c2 = c0);
  1663 + }
  1664 + assert(i < k);
  1665 + *k++ = s;
  1666 + } else {
  1667 + assert(s < 0);
  1668 + *i = ~s;
  1669 + }
  1670 + }
  1671 +}
  1672 +
  1673 +/* Constructs the burrows-wheeler transformed string directly
  1674 + by using the sorted order of type B* suffixes. */
  1675 +static
  1676 +int
  1677 +construct_BWT(const unsigned char *T, int *SA,
  1678 + int *bucket_A, int *bucket_B,
  1679 + int n, int m) {
  1680 + int *i, *j, *k, *orig;
  1681 + int s;
  1682 + int c0, c1, c2;
  1683 +
  1684 + if(0 < m) {
  1685 + /* Construct the sorted order of type B suffixes by using
  1686 + the sorted order of type B* suffixes. */
  1687 + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
  1688 + /* Scan the suffix array from right to left. */
  1689 + for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
  1690 + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
  1691 + i <= j;
  1692 + --j) {
  1693 + if(0 < (s = *j)) {
  1694 + assert(T[s] == c1);
  1695 + assert(((s + 1) < n) && (T[s] <= T[s + 1]));
  1696 + assert(T[s - 1] <= T[s]);
  1697 + c0 = T[--s];
  1698 + *j = ~((int)c0);
  1699 + if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
  1700 + if(c0 != c2) {
  1701 + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
  1702 + k = SA + BUCKET_B(c2 = c0, c1);
  1703 + }
  1704 + assert(k < j); assert(k != NULL);
  1705 + *k-- = s;
  1706 + } else if(s != 0) {
  1707 + *j = ~s;
  1708 +#ifndef NDEBUG
  1709 + } else {
  1710 + assert(T[s] == c1);
  1711 +#endif
  1712 + }
  1713 + }
  1714 + }
  1715 + }
  1716 +
  1717 + /* Construct the BWTed string by using
  1718 + the sorted order of type B suffixes. */
  1719 + k = SA + BUCKET_A(c2 = T[n - 1]);
  1720 + *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1);
  1721 + /* Scan the suffix array from left to right. */
  1722 + for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
  1723 + if(0 < (s = *i)) {
  1724 + assert(T[s - 1] >= T[s]);
  1725 + c0 = T[--s];
  1726 + *i = c0;
  1727 + if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); }
  1728 + if(c0 != c2) {
  1729 + BUCKET_A(c2) = k - SA;
  1730 + k = SA + BUCKET_A(c2 = c0);
  1731 + }
  1732 + assert(i < k);
  1733 + *k++ = s;
  1734 + } else if(s != 0) {
  1735 + *i = ~s;
  1736 + } else {
  1737 + orig = i;
  1738 + }
  1739 + }
  1740 +
  1741 + return orig - SA;
  1742 +}
  1743 +
  1744 +/* Constructs the burrows-wheeler transformed string directly
  1745 + by using the sorted order of type B* suffixes. */
  1746 +static
  1747 +int
  1748 +construct_BWT_indexes(const unsigned char *T, int *SA,
  1749 + int *bucket_A, int *bucket_B,
  1750 + int n, int m,
  1751 + unsigned char * num_indexes, int * indexes) {
  1752 + int *i, *j, *k, *orig;
  1753 + int s;
  1754 + int c0, c1, c2;
  1755 +
  1756 + int mod = n / 8;
  1757 + {
  1758 + mod |= mod >> 1; mod |= mod >> 2;
  1759 + mod |= mod >> 4; mod |= mod >> 8;
  1760 + mod |= mod >> 16; mod >>= 1;
  1761 +
  1762 + *num_indexes = (unsigned char)((n - 1) / (mod + 1));
  1763 + }
  1764 +
  1765 + if(0 < m) {
  1766 + /* Construct the sorted order of type B suffixes by using
  1767 + the sorted order of type B* suffixes. */
  1768 + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
  1769 + /* Scan the suffix array from right to left. */
  1770 + for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
  1771 + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
  1772 + i <= j;
  1773 + --j) {
  1774 + if(0 < (s = *j)) {
  1775 + assert(T[s] == c1);
  1776 + assert(((s + 1) < n) && (T[s] <= T[s + 1]));
  1777 + assert(T[s - 1] <= T[s]);
  1778 +
  1779 + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA;
  1780 +
  1781 + c0 = T[--s];
  1782 + *j = ~((int)c0);
  1783 + if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
  1784 + if(c0 != c2) {
  1785 + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
  1786 + k = SA + BUCKET_B(c2 = c0, c1);
  1787 + }
  1788 + assert(k < j); assert(k != NULL);
  1789 + *k-- = s;
  1790 + } else if(s != 0) {
  1791 + *j = ~s;
  1792 +#ifndef NDEBUG
  1793 + } else {
  1794 + assert(T[s] == c1);
  1795 +#endif
  1796 + }
  1797 + }
  1798 + }
  1799 + }
  1800 +
  1801 + /* Construct the BWTed string by using
  1802 + the sorted order of type B suffixes. */
  1803 + k = SA + BUCKET_A(c2 = T[n - 1]);
  1804 + if (T[n - 2] < c2) {
  1805 + if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA;
  1806 + *k++ = ~((int)T[n - 2]);
  1807 + }
  1808 + else {
  1809 + *k++ = n - 1;
  1810 + }
  1811 +
  1812 + /* Scan the suffix array from left to right. */
  1813 + for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
  1814 + if(0 < (s = *i)) {
  1815 + assert(T[s - 1] >= T[s]);
  1816 +
  1817 + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA;
  1818 +
  1819 + c0 = T[--s];
  1820 + *i = c0;
  1821 + if(c0 != c2) {
  1822 + BUCKET_A(c2) = k - SA;
  1823 + k = SA + BUCKET_A(c2 = c0);
  1824 + }
  1825 + assert(i < k);
  1826 + if((0 < s) && (T[s - 1] < c0)) {
  1827 + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA;
  1828 + *k++ = ~((int)T[s - 1]);
  1829 + } else
  1830 + *k++ = s;
  1831 + } else if(s != 0) {
  1832 + *i = ~s;
  1833 + } else {
  1834 + orig = i;
  1835 + }
  1836 + }
  1837 +
  1838 + return orig - SA;
  1839 +}
  1840 +
  1841 +
  1842 +/*---------------------------------------------------------------------------*/
  1843 +
  1844 +/*- Function -*/
  1845 +
  1846 +int
  1847 +divsufsort(const unsigned char *T, int *SA, int n, int openMP) {
  1848 + int *bucket_A, *bucket_B;
  1849 + int m;
  1850 + int err = 0;
  1851 +
  1852 + /* Check arguments. */
  1853 + if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
  1854 + else if(n == 0) { return 0; }
  1855 + else if(n == 1) { SA[0] = 0; return 0; }
  1856 + else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
  1857 +
  1858 + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
  1859 + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
  1860 +
  1861 + /* Suffixsort. */
  1862 + if((bucket_A != NULL) && (bucket_B != NULL)) {
  1863 + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP);
  1864 + construct_SA(T, SA, bucket_A, bucket_B, n, m);
  1865 + } else {
  1866 + err = -2;
  1867 + }
  1868 +
  1869 + free(bucket_B);
  1870 + free(bucket_A);
  1871 +
  1872 + return err;
  1873 +}
  1874 +
  1875 +int
  1876 +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) {
  1877 + int *B;
  1878 + int *bucket_A, *bucket_B;
  1879 + int m, pidx, i;
  1880 +
  1881 + /* Check arguments. */
  1882 + if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
  1883 + else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
  1884 +
  1885 + if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); }
  1886 + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
  1887 + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
  1888 +
  1889 + /* Burrows-Wheeler Transform. */
  1890 + if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
  1891 + m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP);
  1892 +
  1893 + if (num_indexes == NULL || indexes == NULL) {
  1894 + pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
  1895 + } else {
  1896 + pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes);
  1897 + }
  1898 +
  1899 + /* Copy to output string. */
  1900 + U[0] = T[n - 1];
  1901 + for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; }
  1902 + for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; }
  1903 + pidx += 1;
  1904 + } else {
  1905 + pidx = -2;
  1906 + }
  1907 +
  1908 + free(bucket_B);
  1909 + free(bucket_A);
  1910 + if(A == NULL) { free(B); }
  1911 +
  1912 + return pidx;
  1913 +}
  1 +/*
  2 + * divsufsort.h for libdivsufsort-lite
  3 + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
  4 + *
  5 + * Permission is hereby granted, free of charge, to any person
  6 + * obtaining a copy of this software and associated documentation
  7 + * files (the "Software"), to deal in the Software without
  8 + * restriction, including without limitation the rights to use,
  9 + * copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 + * copies of the Software, and to permit persons to whom the
  11 + * Software is furnished to do so, subject to the following
  12 + * conditions:
  13 + *
  14 + * The above copyright notice and this permission notice shall be
  15 + * included in all copies or substantial portions of the Software.
  16 + *
  17 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  19 + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  20 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  21 + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  22 + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24 + * OTHER DEALINGS IN THE SOFTWARE.
  25 + */
  26 +
  27 +#ifndef _DIVSUFSORT_H
  28 +#define _DIVSUFSORT_H 1
  29 +
  30 +#ifdef __cplusplus
  31 +extern "C" {
  32 +#endif /* __cplusplus */
  33 +
  34 +
  35 +/*- Prototypes -*/
  36 +
  37 +/**
  38 + * Constructs the suffix array of a given string.
  39 + * @param T [0..n-1] The input string.
  40 + * @param SA [0..n-1] The output array of suffixes.
  41 + * @param n The length of the given string.
  42 + * @param openMP enables OpenMP optimization.
  43 + * @return 0 if no error occurred, -1 or -2 otherwise.
  44 + */
  45 +int
  46 +divsufsort(const unsigned char *T, int *SA, int n, int openMP);
  47 +
  48 +/**
  49 + * Constructs the burrows-wheeler transformed string of a given string.
  50 + * @param T [0..n-1] The input string.
  51 + * @param U [0..n-1] The output string. (can be T)
  52 + * @param A [0..n-1] The temporary array. (can be NULL)
  53 + * @param n The length of the given string.
  54 + * @param num_indexes The length of secondary indexes array. (can be NULL)
  55 + * @param indexes The secondary indexes array. (can be NULL)
  56 + * @param openMP enables OpenMP optimization.
  57 + * @return The primary index if no error occurred, -1 or -2 otherwise.
  58 + */
  59 +int
  60 +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP);
  61 +
  62 +
  63 +#ifdef __cplusplus
  64 +} /* extern "C" */
  65 +#endif /* __cplusplus */
  66 +
  67 +#endif /* _DIVSUFSORT_H */
  1 +/*
  2 + Common functions of New Generation Entropy library
  3 + Copyright (C) 2016, Yann Collet.
  4 +
  5 + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  6 +
  7 + Redistribution and use in source and binary forms, with or without
  8 + modification, are permitted provided that the following conditions are
  9 + met:
  10 +
  11 + * Redistributions of source code must retain the above copyright
  12 + notice, this list of conditions and the following disclaimer.
  13 + * Redistributions in binary form must reproduce the above
  14 + copyright notice, this list of conditions and the following disclaimer
  15 + in the documentation and/or other materials provided with the
  16 + distribution.
  17 +
  18 + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19 + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20 + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21 + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22 + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23 + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24 + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25 + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26 + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27 + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28 + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29 +
  30 + You can contact the author at :
  31 + - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
  32 + - Public forum : https://groups.google.com/forum/#!forum/lz4c
  33 +*************************************************************************** */
  34 +
  35 +/* *************************************
  36 +* Dependencies
  37 +***************************************/
  38 +#include "mem.h"
  39 +#include "error_private.h" /* ERR_*, ERROR */
  40 +#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
  41 +#include "fse.h"
  42 +#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */
  43 +#include "huf.h"
  44 +
  45 +
  46 +/*=== Version ===*/
  47 +unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
  48 +
  49 +
  50 +/*=== Error Management ===*/
  51 +unsigned FSE_isError(size_t code) { return ERR_isError(code); }
  52 +const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
  53 +
  54 +unsigned HUF_isError(size_t code) { return ERR_isError(code); }
  55 +const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
  56 +
  57 +
  58 +/*-**************************************************************
  59 +* FSE NCount encoding-decoding
  60 +****************************************************************/
  61 +size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
  62 + const void* headerBuffer, size_t hbSize)
  63 +{
  64 + const BYTE* const istart = (const BYTE*) headerBuffer;
  65 + const BYTE* const iend = istart + hbSize;
  66 + const BYTE* ip = istart;
  67 + int nbBits;
  68 + int remaining;
  69 + int threshold;
  70 + U32 bitStream;
  71 + int bitCount;
  72 + unsigned charnum = 0;
  73 + int previous0 = 0;
  74 +
  75 + if (hbSize < 4) {
  76 + /* This function only works when hbSize >= 4 */
  77 + char buffer[4];
  78 + memset(buffer, 0, sizeof(buffer));
  79 + memcpy(buffer, headerBuffer, hbSize);
  80 + { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
  81 + buffer, sizeof(buffer));
  82 + if (FSE_isError(countSize)) return countSize;
  83 + if (countSize > hbSize) return ERROR(corruption_detected);
  84 + return countSize;
  85 + } }
  86 + assert(hbSize >= 4);
  87 +
  88 + /* init */
  89 + memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
  90 + bitStream = MEM_readLE32(ip);
  91 + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
  92 + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
  93 + bitStream >>= 4;
  94 + bitCount = 4;
  95 + *tableLogPtr = nbBits;
  96 + remaining = (1<<nbBits)+1;
  97 + threshold = 1<<nbBits;
  98 + nbBits++;
  99 +
  100 + while ((remaining>1) & (charnum<=*maxSVPtr)) {
  101 + if (previous0) {
  102 + unsigned n0 = charnum;
  103 + while ((bitStream & 0xFFFF) == 0xFFFF) {
  104 + n0 += 24;
  105 + if (ip < iend-5) {
  106 + ip += 2;
  107 + bitStream = MEM_readLE32(ip) >> bitCount;
  108 + } else {
  109 + bitStream >>= 16;
  110 + bitCount += 16;
  111 + } }
  112 + while ((bitStream & 3) == 3) {
  113 + n0 += 3;
  114 + bitStream >>= 2;
  115 + bitCount += 2;
  116 + }
  117 + n0 += bitStream & 3;
  118 + bitCount += 2;
  119 + if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
  120 + while (charnum < n0) normalizedCounter[charnum++] = 0;
  121 + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
  122 + assert((bitCount >> 3) <= 3); /* For first condition to work */
  123 + ip += bitCount>>3;
  124 + bitCount &= 7;
  125 + bitStream = MEM_readLE32(ip) >> bitCount;
  126 + } else {
  127 + bitStream >>= 2;
  128 + } }
  129 + { int const max = (2*threshold-1) - remaining;
  130 + int count;
  131 +
  132 + if ((bitStream & (threshold-1)) < (U32)max) {
  133 + count = bitStream & (threshold-1);
  134 + bitCount += nbBits-1;
  135 + } else {
  136 + count = bitStream & (2*threshold-1);
  137 + if (count >= threshold) count -= max;
  138 + bitCount += nbBits;
  139 + }
  140 +
  141 + count--; /* extra accuracy */
  142 + remaining -= count < 0 ? -count : count; /* -1 means +1 */
  143 + normalizedCounter[charnum++] = (short)count;
  144 + previous0 = !count;
  145 + while (remaining < threshold) {
  146 + nbBits--;
  147 + threshold >>= 1;
  148 + }
  149 +
  150 + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
  151 + ip += bitCount>>3;
  152 + bitCount &= 7;
  153 + } else {
  154 + bitCount -= (int)(8 * (iend - 4 - ip));
  155 + ip = iend - 4;
  156 + }
  157 + bitStream = MEM_readLE32(ip) >> (bitCount & 31);
  158 + } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
  159 + if (remaining != 1) return ERROR(corruption_detected);
  160 + if (bitCount > 32) return ERROR(corruption_detected);
  161 + *maxSVPtr = charnum-1;
  162 +
  163 + ip += (bitCount+7)>>3;
  164 + return ip-istart;
  165 +}
  166 +
  167 +
  168 +/*! HUF_readStats() :
  169 + Read compact Huffman tree, saved by HUF_writeCTable().
  170 + `huffWeight` is destination buffer.
  171 + `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
  172 + @return : size read from `src` , or an error Code .
  173 + Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
  174 +*/
  175 +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
  176 + U32* nbSymbolsPtr, U32* tableLogPtr,
  177 + const void* src, size_t srcSize)
  178 +{
  179 + U32 weightTotal;
  180 + const BYTE* ip = (const BYTE*) src;
  181 + size_t iSize;
  182 + size_t oSize;
  183 +
  184 + if (!srcSize) return ERROR(srcSize_wrong);
  185 + iSize = ip[0];
  186 + /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
  187 +
  188 + if (iSize >= 128) { /* special header */
  189 + oSize = iSize - 127;
  190 + iSize = ((oSize+1)/2);
  191 + if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
  192 + if (oSize >= hwSize) return ERROR(corruption_detected);
  193 + ip += 1;
  194 + { U32 n;
  195 + for (n=0; n<oSize; n+=2) {
  196 + huffWeight[n] = ip[n/2] >> 4;
  197 + huffWeight[n+1] = ip[n/2] & 15;
  198 + } } }
  199 + else { /* header compressed with FSE (normal case) */
  200 + FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
  201 + if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
  202 + oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */
  203 + if (FSE_isError(oSize)) return oSize;
  204 + }
  205 +
  206 + /* collect weight stats */
  207 + memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
  208 + weightTotal = 0;
  209 + { U32 n; for (n=0; n<oSize; n++) {
  210 + if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
  211 + rankStats[huffWeight[n]]++;
  212 + weightTotal += (1 << huffWeight[n]) >> 1;
  213 + } }
  214 + if (weightTotal == 0) return ERROR(corruption_detected);
  215 +
  216 + /* get last non-null symbol weight (implied, total must be 2^n) */
  217 + { U32 const tableLog = BIT_highbit32(weightTotal) + 1;
  218 + if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
  219 + *tableLogPtr = tableLog;
  220 + /* determine last weight */
  221 + { U32 const total = 1 << tableLog;
  222 + U32 const rest = total - weightTotal;
  223 + U32 const verif = 1 << BIT_highbit32(rest);
  224 + U32 const lastWeight = BIT_highbit32(rest) + 1;
  225 + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
  226 + huffWeight[oSize] = (BYTE)lastWeight;
  227 + rankStats[lastWeight]++;
  228 + } }
  229 +
  230 + /* check tree construction validity */
  231 + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
  232 +
  233 + /* results */
  234 + *nbSymbolsPtr = (U32)(oSize+1);
  235 + return iSize+1;
  236 +}