正在显示
24 个修改的文件
包含
4858 行增加
和
0 行删除
| @@ -4,6 +4,7 @@ go 1.14 | @@ -4,6 +4,7 @@ go 1.14 | ||
| 4 | 4 | ||
| 5 | require ( | 5 | require ( |
| 6 | github.com/GeeTeam/gt3-golang-sdk v0.0.0-20200116043922-446ca8a507d2 | 6 | github.com/GeeTeam/gt3-golang-sdk v0.0.0-20200116043922-446ca8a507d2 |
| 7 | + github.com/Shopify/sarama v1.23.1 | ||
| 7 | github.com/ajg/form v1.5.1 // indirect | 8 | github.com/ajg/form v1.5.1 // indirect |
| 8 | github.com/astaxie/beego v1.12.2 | 9 | github.com/astaxie/beego v1.12.2 |
| 9 | github.com/dgrijalva/jwt-go v3.2.0+incompatible | 10 | github.com/dgrijalva/jwt-go v3.2.0+incompatible |
| 1 | package main | 1 | package main |
| 2 | 2 | ||
| 3 | import ( | 3 | import ( |
| 4 | + "context" | ||
| 5 | + "fmt" | ||
| 6 | + "os" | ||
| 7 | + "os/signal" | ||
| 8 | + "syscall" | ||
| 9 | + | ||
| 4 | "github.com/astaxie/beego" | 10 | "github.com/astaxie/beego" |
| 5 | "github.com/astaxie/beego/logs" | 11 | "github.com/astaxie/beego/logs" |
| 6 | _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/infrastructure/pg" | 12 | _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/infrastructure/pg" |
| 7 | _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/log" | 13 | _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/log" |
| 8 | _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/port/beego" | 14 | _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/port/beego" |
| 15 | + "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/port/consumer" | ||
| 9 | ) | 16 | ) |
| 10 | 17 | ||
| 11 | func main() { | 18 | func main() { |
| 19 | + sigs := make(chan os.Signal, 1) | ||
| 20 | + signal.Notify(sigs, os.Interrupt, os.Kill, syscall.SIGINT, syscall.SIGTERM) | ||
| 21 | + ctx, cancel := context.WithCancel(context.Background()) | ||
| 22 | + closeConsumer, err := consumer.StartConsumer(ctx) | ||
| 23 | + if err != nil { | ||
| 24 | + fmt.Printf("启动kafka消息消费者失败 err%s \n", err) | ||
| 25 | + logs.Error("启动kafka消息消费者失败:%s", err) | ||
| 26 | + return | ||
| 27 | + } | ||
| 28 | + go func() { | ||
| 29 | + select { | ||
| 30 | + case <-sigs: | ||
| 31 | + cancel() | ||
| 32 | + closeConsumer() | ||
| 33 | + } | ||
| 34 | + }() | ||
| 35 | + | ||
| 12 | logs.Info("应用启动") | 36 | logs.Info("应用启动") |
| 13 | beego.Run() | 37 | beego.Run() |
| 14 | } | 38 | } |
| @@ -21,6 +21,14 @@ func CreateOrderBaseDao(options map[string]interface{}) (*dao.OrderBaseDao, erro | @@ -21,6 +21,14 @@ func CreateOrderBaseDao(options map[string]interface{}) (*dao.OrderBaseDao, erro | ||
| 21 | return dao.NewOrderBaseDao(transactionContext) | 21 | return dao.NewOrderBaseDao(transactionContext) |
| 22 | } | 22 | } |
| 23 | 23 | ||
| 24 | +func CreateOrderBestshopDao(options map[string]interface{}) (*dao.OrderBestshopDao, error) { | ||
| 25 | + var transactionContext *transaction.TransactionContext | ||
| 26 | + if value, ok := options["transactionContext"]; ok { | ||
| 27 | + transactionContext = value.(*transaction.TransactionContext) | ||
| 28 | + } | ||
| 29 | + return dao.NewOrderBestshopDao(transactionContext) | ||
| 30 | +} | ||
| 31 | + | ||
| 24 | func CreateUsersDao(options map[string]interface{}) (*dao.UsersDao, error) { | 32 | func CreateUsersDao(options map[string]interface{}) (*dao.UsersDao, error) { |
| 25 | var transactionContext *transaction.TransactionContext | 33 | var transactionContext *transaction.TransactionContext |
| 26 | if value, ok := options["transactionContext"]; ok { | 34 | if value, ok := options["transactionContext"]; ok { |
| @@ -8,6 +8,8 @@ type CreateOrderFromBestshop struct { | @@ -8,6 +8,8 @@ type CreateOrderFromBestshop struct { | ||
| 8 | OrderCode string `json:"orderCode"` | 8 | OrderCode string `json:"orderCode"` |
| 9 | //下单时间 | 9 | //下单时间 |
| 10 | OrderTime string `json:"orderTime"` | 10 | OrderTime string `json:"orderTime"` |
| 11 | + //公司id | ||
| 12 | + CompanyId int64 `json:"companyId"` | ||
| 11 | //订单状态 | 13 | //订单状态 |
| 12 | OrderState int8 `json:"orderState"` | 14 | OrderState int8 `json:"orderState"` |
| 13 | //发货状态 | 15 | //发货状态 |
| @@ -4,6 +4,8 @@ import ( | @@ -4,6 +4,8 @@ import ( | ||
| 4 | "fmt" | 4 | "fmt" |
| 5 | "time" | 5 | "time" |
| 6 | 6 | ||
| 7 | + "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/infrastructure/dao" | ||
| 8 | + | ||
| 7 | "github.com/astaxie/beego/logs" | 9 | "github.com/astaxie/beego/logs" |
| 8 | 10 | ||
| 9 | "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/application/factory" | 11 | "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/application/factory" |
| @@ -37,6 +39,25 @@ func (s SyncOrderService) SyncOrderFromBestshop(cmd command.CreateOrderFromBests | @@ -37,6 +39,25 @@ func (s SyncOrderService) SyncOrderFromBestshop(cmd command.CreateOrderFromBests | ||
| 37 | defer func() { | 39 | defer func() { |
| 38 | transactionContext.RollbackTransaction() | 40 | transactionContext.RollbackTransaction() |
| 39 | }() | 41 | }() |
| 42 | + | ||
| 43 | + //检查账号是否存在 | ||
| 44 | + var ( | ||
| 45 | + orderBestshopDao *dao.OrderBestshopDao | ||
| 46 | + ) | ||
| 47 | + if orderBestshopDao, err = factory.CreateOrderBestshopDao(map[string]interface{}{ | ||
| 48 | + "transactionContext": transactionContext, | ||
| 49 | + }); err != nil { | ||
| 50 | + return lib.ThrowError(lib.TRANSACTION_ERROR, err.Error()) | ||
| 51 | + } | ||
| 52 | + ok, err := orderBestshopDao.OrderExist(cmd.OrderCode) | ||
| 53 | + if err != nil { | ||
| 54 | + return lib.ThrowError(lib.TRANSACTION_ERROR, err.Error()) | ||
| 55 | + } | ||
| 56 | + if ok { | ||
| 57 | + logs.Info("订单已存在,order_code=%s", cmd.OrderCode) | ||
| 58 | + return nil | ||
| 59 | + } | ||
| 60 | + | ||
| 40 | var ( | 61 | var ( |
| 41 | orderBestshopRepository domain.OrderBestshopRepository | 62 | orderBestshopRepository domain.OrderBestshopRepository |
| 42 | orderGoodBestshopRepository domain.OrderGoodBestshopRepository | 63 | orderGoodBestshopRepository domain.OrderGoodBestshopRepository |
| @@ -173,11 +194,25 @@ func (s SyncOrderService) copyOrderBestshopToOrderBase(orderBestshop *domain.Ord | @@ -173,11 +194,25 @@ func (s SyncOrderService) copyOrderBestshopToOrderBase(orderBestshop *domain.Ord | ||
| 173 | ordergoods []domain.OrderGood | 194 | ordergoods []domain.OrderGood |
| 174 | ) | 195 | ) |
| 175 | //TODO 添加orderBase | 196 | //TODO 添加orderBase |
| 197 | + orderBestshop.CopyToOrderBase(&orderbase) | ||
| 198 | + orderbase.CompanyId = companyData.Id | ||
| 199 | + for i := range orderBestshop.Goods { | ||
| 200 | + good := domain.NewOrderGood() | ||
| 201 | + orderBestshop.Goods[i].CopyToOrderGood(&good) | ||
| 202 | + good.OrderId = orderbase.Id | ||
| 203 | + good.Compute() | ||
| 204 | + ordergoods = append(ordergoods, good) | ||
| 205 | + } | ||
| 206 | + orderbase.Goods = ordergoods | ||
| 207 | + orderbase.Compute() | ||
| 176 | err = orderBaseRepository.Save(&orderbase) | 208 | err = orderBaseRepository.Save(&orderbase) |
| 177 | if err != nil { | 209 | if err != nil { |
| 178 | e := fmt.Sprintf("添加order_base数据失败%s", err) | 210 | e := fmt.Sprintf("添加order_base数据失败%s", err) |
| 179 | return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, e) | 211 | return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, e) |
| 180 | } | 212 | } |
| 213 | + for i := range ordergoods { | ||
| 214 | + ordergoods[i].OrderId = orderbase.Id | ||
| 215 | + } | ||
| 181 | //TODO 添加goods | 216 | //TODO 添加goods |
| 182 | err = orderGoodRepository.Save(ordergoods) | 217 | err = orderGoodRepository.Save(ordergoods) |
| 183 | if err != nil { | 218 | if err != nil { |
| @@ -185,6 +220,7 @@ func (s SyncOrderService) copyOrderBestshopToOrderBase(orderBestshop *domain.Ord | @@ -185,6 +220,7 @@ func (s SyncOrderService) copyOrderBestshopToOrderBase(orderBestshop *domain.Ord | ||
| 185 | return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, e) | 220 | return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, e) |
| 186 | } | 221 | } |
| 187 | //TODO 更新isCopy | 222 | //TODO 更新isCopy |
| 223 | + orderBestshop.IsCopy = true | ||
| 188 | err = orderBestshopRepository.Edit(orderBestshop) | 224 | err = orderBestshopRepository.Edit(orderBestshop) |
| 189 | if err != nil { | 225 | if err != nil { |
| 190 | return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, err.Error()) | 226 | return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, err.Error()) |
pkg/infrastructure/dao/pg_order_bestshop.go
0 → 100644
| 1 | +package dao | ||
| 2 | + | ||
| 3 | +import ( | ||
| 4 | + "fmt" | ||
| 5 | + | ||
| 6 | + "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/infrastructure/pg/models" | ||
| 7 | + "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/infrastructure/pg/transaction" | ||
| 8 | +) | ||
| 9 | + | ||
| 10 | +type OrderBestshopDao struct { | ||
| 11 | + transactionContext *transaction.TransactionContext | ||
| 12 | +} | ||
| 13 | + | ||
| 14 | +func NewOrderBestshopDao(transactionContext *transaction.TransactionContext) (*OrderBestshopDao, error) { | ||
| 15 | + if transactionContext == nil { | ||
| 16 | + return nil, fmt.Errorf("transactionContext参数不能为nil") | ||
| 17 | + } else { | ||
| 18 | + return &OrderBestshopDao{ | ||
| 19 | + transactionContext: transactionContext, | ||
| 20 | + }, nil | ||
| 21 | + } | ||
| 22 | +} | ||
| 23 | + | ||
| 24 | +func (dao OrderBestshopDao) OrderExist(orderCode string) (bool, error) { | ||
| 25 | + tx := dao.transactionContext.GetDB() | ||
| 26 | + m := models.OrderBestshop{} | ||
| 27 | + query := tx.Model(m).Where("order_code=?", orderCode) | ||
| 28 | + ok, err := query.Exists() | ||
| 29 | + return ok, err | ||
| 30 | +} |
pkg/port/consumer/configs/config.go
0 → 100644
| 1 | +package configs | ||
| 2 | + | ||
| 3 | +type MqConfig struct { | ||
| 4 | + Servers []string `json:"servers"` | ||
| 5 | + ConsumerId string `json:"consumerGroup"` | ||
| 6 | +} | ||
| 7 | + | ||
| 8 | +var Cfg MqConfig | ||
| 9 | + | ||
| 10 | +func init() { | ||
| 11 | + Cfg = MqConfig{ | ||
| 12 | + Servers: []string{"192.168.190.136:9092"}, | ||
| 13 | + ConsumerId: "partnermg", | ||
| 14 | + } | ||
| 15 | +} | ||
| 16 | + | ||
| 17 | +// "", | ||
| 18 | +// "106.52.15.41:9092" |
pkg/port/consumer/consumer.go
0 → 100644
| 1 | +package consumer | ||
| 2 | + | ||
| 3 | +import ( | ||
| 4 | + "context" | ||
| 5 | + "errors" | ||
| 6 | + "sync" | ||
| 7 | + | ||
| 8 | + "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/port/consumer/configs" | ||
| 9 | + | ||
| 10 | + "github.com/Shopify/sarama" | ||
| 11 | + "github.com/astaxie/beego/logs" | ||
| 12 | +) | ||
| 13 | + | ||
| 14 | +//MessageConsumer 消息消费者 | ||
| 15 | +type MessageConsumer struct { | ||
| 16 | + ready chan bool | ||
| 17 | + kafkaHosts []string | ||
| 18 | + groupId string | ||
| 19 | + topics []string | ||
| 20 | + topicsHandles map[string]TopicHandle | ||
| 21 | +} | ||
| 22 | + | ||
| 23 | +func NewMessageConsumer() *MessageConsumer { | ||
| 24 | + topics := []string{} | ||
| 25 | + for key := range TopicHandleRouters { | ||
| 26 | + topics = append(topics, key) | ||
| 27 | + } | ||
| 28 | + return &MessageConsumer{ | ||
| 29 | + ready: make(chan bool), | ||
| 30 | + kafkaHosts: configs.Cfg.Servers, | ||
| 31 | + groupId: configs.Cfg.ConsumerId, | ||
| 32 | + topicsHandles: TopicHandleRouters, | ||
| 33 | + topics: topics, | ||
| 34 | + } | ||
| 35 | +} | ||
| 36 | + | ||
| 37 | +//实现对应的接口 | ||
| 38 | +var _ sarama.ConsumerGroupHandler = (*MessageConsumer)(nil) | ||
| 39 | + | ||
| 40 | +func (c *MessageConsumer) Setup(groupSession sarama.ConsumerGroupSession) error { | ||
| 41 | + close(c.ready) | ||
| 42 | + return nil | ||
| 43 | +} | ||
| 44 | + | ||
| 45 | +func (c *MessageConsumer) Cleanup(groupSession sarama.ConsumerGroupSession) error { | ||
| 46 | + return nil | ||
| 47 | +} | ||
| 48 | + | ||
| 49 | +func (c *MessageConsumer) ConsumeClaim(groupSession sarama.ConsumerGroupSession, | ||
| 50 | + groupClaim sarama.ConsumerGroupClaim) error { | ||
| 51 | + var ( | ||
| 52 | + topicHandle TopicHandle | ||
| 53 | + err error | ||
| 54 | + ) | ||
| 55 | + for message := range groupClaim.Messages() { | ||
| 56 | + if topicHandle, err = c.FindTopichandle(groupClaim.Topic()); err != nil { | ||
| 57 | + logs.Error("FindTopichandle err:%s \n", err) | ||
| 58 | + continue | ||
| 59 | + } | ||
| 60 | + if err = topicHandle(message); err != nil { | ||
| 61 | + logs.Error("Message claimed: kafka消息处理错误 topic =", message.Topic, message.Offset, err) | ||
| 62 | + } | ||
| 63 | + groupSession.MarkMessage(message, "") | ||
| 64 | + } | ||
| 65 | + return nil | ||
| 66 | +} | ||
| 67 | + | ||
| 68 | +func (c *MessageConsumer) FindTopichandle(topic string) (TopicHandle, error) { | ||
| 69 | + if v, ok := c.topicsHandles[topic]; ok { | ||
| 70 | + return v, nil | ||
| 71 | + } | ||
| 72 | + return nil, errors.New("TopicHandle not found") | ||
| 73 | +} | ||
| 74 | + | ||
| 75 | +//StartConsumer 启动 | ||
| 76 | +//返回 Consumer关闭方法 和 error | ||
| 77 | +func StartConsumer(ctx context.Context) (func(), error) { | ||
| 78 | + consumer := NewMessageConsumer() | ||
| 79 | + config := sarama.NewConfig() | ||
| 80 | + config.Consumer.Group.Rebalance.Strategy = sarama.BalanceStrategyRoundRobin | ||
| 81 | + config.Consumer.Offsets.Initial = sarama.OffsetNewest | ||
| 82 | + config.Version = sarama.V0_11_0_2 | ||
| 83 | + consumerGroup, err := sarama.NewConsumerGroup(consumer.kafkaHosts, consumer.groupId, config) | ||
| 84 | + if err != nil { | ||
| 85 | + return nil, err | ||
| 86 | + } | ||
| 87 | + wg := &sync.WaitGroup{} | ||
| 88 | + wg.Add(1) | ||
| 89 | + go func() { | ||
| 90 | + defer wg.Done() | ||
| 91 | + for { | ||
| 92 | + if err := ctx.Err(); err != nil { | ||
| 93 | + logs.Error("ctx err:%s \n", err) | ||
| 94 | + return | ||
| 95 | + } | ||
| 96 | + if err := consumerGroup.Consume(ctx, consumer.topics, consumer); err != nil { | ||
| 97 | + logs.Error("consumerGroup err:%s \n", err) | ||
| 98 | + } | ||
| 99 | + } | ||
| 100 | + }() | ||
| 101 | + //等待 consumerGroup 设置完成 | ||
| 102 | + <-consumer.ready | ||
| 103 | + logs.Error("Sarama consumer up and running!...") | ||
| 104 | + return func() { | ||
| 105 | + wg.Wait() | ||
| 106 | + if err := consumerGroup.Close(); err != nil { | ||
| 107 | + logs.Error("consumerGroup.Close err %s", err) | ||
| 108 | + } | ||
| 109 | + logs.Info("consumerGroup.Close") | ||
| 110 | + }, nil | ||
| 111 | +} |
pkg/port/consumer/topic_handle_router.go
0 → 100644
| 1 | +package consumer | ||
| 2 | + | ||
| 3 | +import ( | ||
| 4 | + "fmt" | ||
| 5 | + | ||
| 6 | + "github.com/Shopify/sarama" | ||
| 7 | +) | ||
| 8 | + | ||
| 9 | +//TopicHandle 处理kafka中得消息 | ||
| 10 | +type TopicHandle func(*sarama.ConsumerMessage) error | ||
| 11 | + | ||
| 12 | +//TopicHandleRouters 根据topic区分消息并进行处理 | ||
| 13 | +var TopicHandleRouters = map[string]TopicHandle{ | ||
| 14 | + "topic_test": func(message *sarama.ConsumerMessage) error { | ||
| 15 | + fmt.Printf("Done Message claimed: timestamp = %v, topic = %s offset = %v value = %v \n", | ||
| 16 | + message.Timestamp, message.Topic, message.Offset, string(message.Value)) | ||
| 17 | + return nil | ||
| 18 | + }, | ||
| 19 | + "bestshop_order": SyncBestshopOrder, | ||
| 20 | +} |
pkg/port/consumer/topic_handles.go
0 → 100644
| 1 | +package consumer | ||
| 2 | + | ||
| 3 | +import ( | ||
| 4 | + "encoding/json" | ||
| 5 | + "fmt" | ||
| 6 | + | ||
| 7 | + "github.com/Shopify/sarama" | ||
| 8 | + "github.com/astaxie/beego/logs" | ||
| 9 | + syncOrderCmd "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/application/syncOrder/command" | ||
| 10 | + syncOrderSrv "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/application/syncOrder/service" | ||
| 11 | +) | ||
| 12 | + | ||
| 13 | +//SyncBestshopOrder 同步 | ||
| 14 | +func SyncBestshopOrder(message *sarama.ConsumerMessage) error { | ||
| 15 | + logs.Info("Done Message claimed: timestamp = %v, topic = %s offset = %v value = %v \n", | ||
| 16 | + message.Timestamp, message.Topic, message.Offset, string(message.Value)) | ||
| 17 | + var ( | ||
| 18 | + cmd syncOrderCmd.CreateOrderFromBestshop | ||
| 19 | + err error | ||
| 20 | + ) | ||
| 21 | + err = json.Unmarshal(message.Value, &cmd) | ||
| 22 | + if err != nil { | ||
| 23 | + return fmt.Errorf("[SyncBestshopOrder] 解析kafka数据失败;%s", err) | ||
| 24 | + } | ||
| 25 | + if cmd.PartnerId <= 0 { | ||
| 26 | + logs.Info("[SyncBestshopOrder] PartnerId<=0 ,不处理消息") | ||
| 27 | + return nil | ||
| 28 | + } | ||
| 29 | + srv := syncOrderSrv.NewOrderInfoService(nil) | ||
| 30 | + err = srv.SyncOrderFromBestshop(cmd) | ||
| 31 | + return err | ||
| 32 | +} |
vendor/github.com/DataDog/zstd/.travis.yml
0 → 100644
| 1 | +dist: xenial | ||
| 2 | +language: go | ||
| 3 | + | ||
| 4 | +go: | ||
| 5 | + - 1.10.x | ||
| 6 | + - 1.11.x | ||
| 7 | + - 1.12.x | ||
| 8 | + | ||
| 9 | +os: | ||
| 10 | + - linux | ||
| 11 | + - osx | ||
| 12 | + | ||
| 13 | +matrix: | ||
| 14 | + include: | ||
| 15 | + name: "Go 1.11.x CentOS 32bits" | ||
| 16 | + language: go | ||
| 17 | + go: 1.11.x | ||
| 18 | + os: linux | ||
| 19 | + services: | ||
| 20 | + - docker | ||
| 21 | + script: | ||
| 22 | + # Please update Go version in travis_test_32 as needed | ||
| 23 | + - "docker run -i -v \"${PWD}:/zstd\" toopher/centos-i386:centos6 /bin/bash -c \"linux32 --32bit i386 /zstd/travis_test_32.sh\"" | ||
| 24 | + | ||
| 25 | +install: | ||
| 26 | + - "wget https://github.com/DataDog/zstd/files/2246767/mr.zip" | ||
| 27 | + - "unzip mr.zip" | ||
| 28 | +script: | ||
| 29 | + - "go build" | ||
| 30 | + - "PAYLOAD=`pwd`/mr go test -v" | ||
| 31 | + - "PAYLOAD=`pwd`/mr go test -bench ." |
vendor/github.com/DataDog/zstd/LICENSE
0 → 100644
| 1 | +Simplified BSD License | ||
| 2 | + | ||
| 3 | +Copyright (c) 2016, Datadog <info@datadoghq.com> | ||
| 4 | +All rights reserved. | ||
| 5 | + | ||
| 6 | +Redistribution and use in source and binary forms, with or without | ||
| 7 | +modification, are permitted provided that the following conditions are met: | ||
| 8 | + | ||
| 9 | + * Redistributions of source code must retain the above copyright notice, | ||
| 10 | + this list of conditions and the following disclaimer. | ||
| 11 | + * Redistributions in binary form must reproduce the above copyright notice, | ||
| 12 | + this list of conditions and the following disclaimer in the documentation | ||
| 13 | + and/or other materials provided with the distribution. | ||
| 14 | + * Neither the name of the copyright holder nor the names of its contributors | ||
| 15 | + may be used to endorse or promote products derived from this software | ||
| 16 | + without specific prior written permission. | ||
| 17 | + | ||
| 18 | +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
| 19 | +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
| 20 | +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 21 | +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE | ||
| 22 | +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
| 23 | +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
| 24 | +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
| 25 | +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
| 26 | +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 27 | +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
vendor/github.com/DataDog/zstd/README.md
0 → 100644
| 1 | +# Zstd Go Wrapper | ||
| 2 | + | ||
| 3 | +[C Zstd Homepage](https://github.com/Cyan4973/zstd) | ||
| 4 | + | ||
| 5 | +The current headers and C files are from *v1.3.8* (Commit | ||
| 6 | +[470344d](https://github.com/facebook/zstd/releases/tag/v1.3.8)). | ||
| 7 | + | ||
| 8 | +## Usage | ||
| 9 | + | ||
| 10 | +There are two main APIs: | ||
| 11 | + | ||
| 12 | +* simple Compress/Decompress | ||
| 13 | +* streaming API (io.Reader/io.Writer) | ||
| 14 | + | ||
| 15 | +The compress/decompress APIs mirror that of lz4, while the streaming API was | ||
| 16 | +designed to be a drop-in replacement for zlib. | ||
| 17 | + | ||
| 18 | +### Simple `Compress/Decompress` | ||
| 19 | + | ||
| 20 | + | ||
| 21 | +```go | ||
| 22 | +// Compress compresses the byte array given in src and writes it to dst. | ||
| 23 | +// If you already have a buffer allocated, you can pass it to prevent allocation | ||
| 24 | +// If not, you can pass nil as dst. | ||
| 25 | +// If the buffer is too small, it will be reallocated, resized, and returned bu the function | ||
| 26 | +// If dst is nil, this will allocate the worst case size (CompressBound(src)) | ||
| 27 | +Compress(dst, src []byte) ([]byte, error) | ||
| 28 | +``` | ||
| 29 | + | ||
| 30 | +```go | ||
| 31 | +// CompressLevel is the same as Compress but you can pass another compression level | ||
| 32 | +CompressLevel(dst, src []byte, level int) ([]byte, error) | ||
| 33 | +``` | ||
| 34 | + | ||
| 35 | +```go | ||
| 36 | +// Decompress will decompress your payload into dst. | ||
| 37 | +// If you already have a buffer allocated, you can pass it to prevent allocation | ||
| 38 | +// If not, you can pass nil as dst (allocates a 4*src size as default). | ||
| 39 | +// If the buffer is too small, it will retry 3 times by doubling the dst size | ||
| 40 | +// After max retries, it will switch to the slower stream API to be sure to be able | ||
| 41 | +// to decompress. Currently switches if compression ratio > 4*2**3=32. | ||
| 42 | +Decompress(dst, src []byte) ([]byte, error) | ||
| 43 | +``` | ||
| 44 | + | ||
| 45 | +### Stream API | ||
| 46 | + | ||
| 47 | +```go | ||
| 48 | +// NewWriter creates a new object that can optionally be initialized with | ||
| 49 | +// a precomputed dictionary. If dict is nil, compress without a dictionary. | ||
| 50 | +// The dictionary array should not be changed during the use of this object. | ||
| 51 | +// You MUST CALL Close() to write the last bytes of a zstd stream and free C objects. | ||
| 52 | +NewWriter(w io.Writer) *Writer | ||
| 53 | +NewWriterLevel(w io.Writer, level int) *Writer | ||
| 54 | +NewWriterLevelDict(w io.Writer, level int, dict []byte) *Writer | ||
| 55 | + | ||
| 56 | +// Write compresses the input data and write it to the underlying writer | ||
| 57 | +(w *Writer) Write(p []byte) (int, error) | ||
| 58 | + | ||
| 59 | +// Close flushes the buffer and frees C zstd objects | ||
| 60 | +(w *Writer) Close() error | ||
| 61 | +``` | ||
| 62 | + | ||
| 63 | +```go | ||
| 64 | +// NewReader returns a new io.ReadCloser that will decompress data from the | ||
| 65 | +// underlying reader. If a dictionary is provided to NewReaderDict, it must | ||
| 66 | +// not be modified until Close is called. It is the caller's responsibility | ||
| 67 | +// to call Close, which frees up C objects. | ||
| 68 | +NewReader(r io.Reader) io.ReadCloser | ||
| 69 | +NewReaderDict(r io.Reader, dict []byte) io.ReadCloser | ||
| 70 | +``` | ||
| 71 | + | ||
| 72 | +### Benchmarks (benchmarked with v0.5.0) | ||
| 73 | + | ||
| 74 | +The author of Zstd also wrote lz4. Zstd is intended to occupy a speed/ratio | ||
| 75 | +level similar to what zlib currently provides. In our tests, the can always | ||
| 76 | +be made to be better than zlib by chosing an appropriate level while still | ||
| 77 | +keeping compression and decompression time faster than zlib. | ||
| 78 | + | ||
| 79 | +You can run the benchmarks against your own payloads by using the Go benchmarks tool. | ||
| 80 | +Just export your payload filepath as the `PAYLOAD` environment variable and run the benchmarks: | ||
| 81 | + | ||
| 82 | +```go | ||
| 83 | +go test -bench . | ||
| 84 | +``` | ||
| 85 | + | ||
| 86 | +Compression of a 7Mb pdf zstd (this wrapper) vs [czlib](https://github.com/DataDog/czlib): | ||
| 87 | +``` | ||
| 88 | +BenchmarkCompression 5 221056624 ns/op 67.34 MB/s | ||
| 89 | +BenchmarkDecompression 100 18370416 ns/op 810.32 MB/s | ||
| 90 | + | ||
| 91 | +BenchmarkFzlibCompress 2 610156603 ns/op 24.40 MB/s | ||
| 92 | +BenchmarkFzlibDecompress 20 81195246 ns/op 183.33 MB/s | ||
| 93 | +``` | ||
| 94 | + | ||
| 95 | +Ratio is also better by a margin of ~20%. | ||
| 96 | +Compression speed is always better than zlib on all the payloads we tested; | ||
| 97 | +However, [czlib](https://github.com/DataDog/czlib) has optimisations that make it | ||
| 98 | +faster at decompressiong small payloads: | ||
| 99 | + | ||
| 100 | +``` | ||
| 101 | +Testing with size: 11... czlib: 8.97 MB/s, zstd: 3.26 MB/s | ||
| 102 | +Testing with size: 27... czlib: 23.3 MB/s, zstd: 8.22 MB/s | ||
| 103 | +Testing with size: 62... czlib: 31.6 MB/s, zstd: 19.49 MB/s | ||
| 104 | +Testing with size: 141... czlib: 74.54 MB/s, zstd: 42.55 MB/s | ||
| 105 | +Testing with size: 323... czlib: 155.14 MB/s, zstd: 99.39 MB/s | ||
| 106 | +Testing with size: 739... czlib: 235.9 MB/s, zstd: 216.45 MB/s | ||
| 107 | +Testing with size: 1689... czlib: 116.45 MB/s, zstd: 345.64 MB/s | ||
| 108 | +Testing with size: 3858... czlib: 176.39 MB/s, zstd: 617.56 MB/s | ||
| 109 | +Testing with size: 8811... czlib: 254.11 MB/s, zstd: 824.34 MB/s | ||
| 110 | +Testing with size: 20121... czlib: 197.43 MB/s, zstd: 1339.11 MB/s | ||
| 111 | +Testing with size: 45951... czlib: 201.62 MB/s, zstd: 1951.57 MB/s | ||
| 112 | +``` | ||
| 113 | + | ||
| 114 | +zstd starts to shine with payloads > 1KB | ||
| 115 | + | ||
| 116 | +### Stability - Current state: STABLE | ||
| 117 | + | ||
| 118 | +The C library seems to be pretty stable and according to the author has been tested and fuzzed. | ||
| 119 | + | ||
| 120 | +For the Go wrapper, the test cover most usual cases and we have succesfully tested it on all staging and prod data. |
vendor/github.com/DataDog/zstd/ZSTD_LICENSE
0 → 100644
| 1 | +BSD License | ||
| 2 | + | ||
| 3 | +For Zstandard software | ||
| 4 | + | ||
| 5 | +Copyright (c) 2016-present, Facebook, Inc. All rights reserved. | ||
| 6 | + | ||
| 7 | +Redistribution and use in source and binary forms, with or without modification, | ||
| 8 | +are permitted provided that the following conditions are met: | ||
| 9 | + | ||
| 10 | + * Redistributions of source code must retain the above copyright notice, this | ||
| 11 | + list of conditions and the following disclaimer. | ||
| 12 | + | ||
| 13 | + * Redistributions in binary form must reproduce the above copyright notice, | ||
| 14 | + this list of conditions and the following disclaimer in the documentation | ||
| 15 | + and/or other materials provided with the distribution. | ||
| 16 | + | ||
| 17 | + * Neither the name Facebook nor the names of its contributors may be used to | ||
| 18 | + endorse or promote products derived from this software without specific | ||
| 19 | + prior written permission. | ||
| 20 | + | ||
| 21 | +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
| 22 | +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
| 23 | +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 24 | +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR | ||
| 25 | +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
| 26 | +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
| 27 | +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | ||
| 28 | +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 29 | +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| 30 | +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
vendor/github.com/DataDog/zstd/bitstream.h
0 → 100644
| 1 | +/* ****************************************************************** | ||
| 2 | + bitstream | ||
| 3 | + Part of FSE library | ||
| 4 | + Copyright (C) 2013-present, Yann Collet. | ||
| 5 | + | ||
| 6 | + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | ||
| 7 | + | ||
| 8 | + Redistribution and use in source and binary forms, with or without | ||
| 9 | + modification, are permitted provided that the following conditions are | ||
| 10 | + met: | ||
| 11 | + | ||
| 12 | + * Redistributions of source code must retain the above copyright | ||
| 13 | + notice, this list of conditions and the following disclaimer. | ||
| 14 | + * Redistributions in binary form must reproduce the above | ||
| 15 | + copyright notice, this list of conditions and the following disclaimer | ||
| 16 | + in the documentation and/or other materials provided with the | ||
| 17 | + distribution. | ||
| 18 | + | ||
| 19 | + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 20 | + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 21 | + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 22 | + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 23 | + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 24 | + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| 25 | + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| 26 | + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| 27 | + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 28 | + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 29 | + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 30 | + | ||
| 31 | + You can contact the author at : | ||
| 32 | + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | ||
| 33 | +****************************************************************** */ | ||
| 34 | +#ifndef BITSTREAM_H_MODULE | ||
| 35 | +#define BITSTREAM_H_MODULE | ||
| 36 | + | ||
| 37 | +#if defined (__cplusplus) | ||
| 38 | +extern "C" { | ||
| 39 | +#endif | ||
| 40 | + | ||
| 41 | +/* | ||
| 42 | +* This API consists of small unitary functions, which must be inlined for best performance. | ||
| 43 | +* Since link-time-optimization is not available for all compilers, | ||
| 44 | +* these functions are defined into a .h to be included. | ||
| 45 | +*/ | ||
| 46 | + | ||
| 47 | +/*-**************************************** | ||
| 48 | +* Dependencies | ||
| 49 | +******************************************/ | ||
| 50 | +#include "mem.h" /* unaligned access routines */ | ||
| 51 | +#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ | ||
| 52 | +#include "error_private.h" /* error codes and messages */ | ||
| 53 | + | ||
| 54 | + | ||
| 55 | +/*========================================= | ||
| 56 | +* Target specific | ||
| 57 | +=========================================*/ | ||
| 58 | +#if defined(__BMI__) && defined(__GNUC__) | ||
| 59 | +# include <immintrin.h> /* support for bextr (experimental) */ | ||
| 60 | +#endif | ||
| 61 | + | ||
| 62 | +#define STREAM_ACCUMULATOR_MIN_32 25 | ||
| 63 | +#define STREAM_ACCUMULATOR_MIN_64 57 | ||
| 64 | +#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) | ||
| 65 | + | ||
| 66 | + | ||
| 67 | +/*-****************************************** | ||
| 68 | +* bitStream encoding API (write forward) | ||
| 69 | +********************************************/ | ||
| 70 | +/* bitStream can mix input from multiple sources. | ||
| 71 | + * A critical property of these streams is that they encode and decode in **reverse** direction. | ||
| 72 | + * So the first bit sequence you add will be the last to be read, like a LIFO stack. | ||
| 73 | + */ | ||
| 74 | +typedef struct { | ||
| 75 | + size_t bitContainer; | ||
| 76 | + unsigned bitPos; | ||
| 77 | + char* startPtr; | ||
| 78 | + char* ptr; | ||
| 79 | + char* endPtr; | ||
| 80 | +} BIT_CStream_t; | ||
| 81 | + | ||
| 82 | +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); | ||
| 83 | +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); | ||
| 84 | +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); | ||
| 85 | +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); | ||
| 86 | + | ||
| 87 | +/* Start with initCStream, providing the size of buffer to write into. | ||
| 88 | +* bitStream will never write outside of this buffer. | ||
| 89 | +* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. | ||
| 90 | +* | ||
| 91 | +* bits are first added to a local register. | ||
| 92 | +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. | ||
| 93 | +* Writing data into memory is an explicit operation, performed by the flushBits function. | ||
| 94 | +* Hence keep track how many bits are potentially stored into local register to avoid register overflow. | ||
| 95 | +* After a flushBits, a maximum of 7 bits might still be stored into local register. | ||
| 96 | +* | ||
| 97 | +* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. | ||
| 98 | +* | ||
| 99 | +* Last operation is to close the bitStream. | ||
| 100 | +* The function returns the final size of CStream in bytes. | ||
| 101 | +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) | ||
| 102 | +*/ | ||
| 103 | + | ||
| 104 | + | ||
| 105 | +/*-******************************************** | ||
| 106 | +* bitStream decoding API (read backward) | ||
| 107 | +**********************************************/ | ||
| 108 | +typedef struct { | ||
| 109 | + size_t bitContainer; | ||
| 110 | + unsigned bitsConsumed; | ||
| 111 | + const char* ptr; | ||
| 112 | + const char* start; | ||
| 113 | + const char* limitPtr; | ||
| 114 | +} BIT_DStream_t; | ||
| 115 | + | ||
| 116 | +typedef enum { BIT_DStream_unfinished = 0, | ||
| 117 | + BIT_DStream_endOfBuffer = 1, | ||
| 118 | + BIT_DStream_completed = 2, | ||
| 119 | + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ | ||
| 120 | + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ | ||
| 121 | + | ||
| 122 | +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); | ||
| 123 | +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); | ||
| 124 | +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); | ||
| 125 | +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); | ||
| 126 | + | ||
| 127 | + | ||
| 128 | +/* Start by invoking BIT_initDStream(). | ||
| 129 | +* A chunk of the bitStream is then stored into a local register. | ||
| 130 | +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). | ||
| 131 | +* You can then retrieve bitFields stored into the local register, **in reverse order**. | ||
| 132 | +* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. | ||
| 133 | +* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. | ||
| 134 | +* Otherwise, it can be less than that, so proceed accordingly. | ||
| 135 | +* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). | ||
| 136 | +*/ | ||
| 137 | + | ||
| 138 | + | ||
| 139 | +/*-**************************************** | ||
| 140 | +* unsafe API | ||
| 141 | +******************************************/ | ||
| 142 | +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); | ||
| 143 | +/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ | ||
| 144 | + | ||
| 145 | +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); | ||
| 146 | +/* unsafe version; does not check buffer overflow */ | ||
| 147 | + | ||
| 148 | +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); | ||
| 149 | +/* faster, but works only if nbBits >= 1 */ | ||
| 150 | + | ||
| 151 | + | ||
| 152 | + | ||
| 153 | +/*-************************************************************** | ||
| 154 | +* Internal functions | ||
| 155 | +****************************************************************/ | ||
| 156 | +MEM_STATIC unsigned BIT_highbit32 (U32 val) | ||
| 157 | +{ | ||
| 158 | + assert(val != 0); | ||
| 159 | + { | ||
| 160 | +# if defined(_MSC_VER) /* Visual */ | ||
| 161 | + unsigned long r=0; | ||
| 162 | + _BitScanReverse ( &r, val ); | ||
| 163 | + return (unsigned) r; | ||
| 164 | +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ | ||
| 165 | + return 31 - __builtin_clz (val); | ||
| 166 | +# else /* Software version */ | ||
| 167 | + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, | ||
| 168 | + 11, 14, 16, 18, 22, 25, 3, 30, | ||
| 169 | + 8, 12, 20, 28, 15, 17, 24, 7, | ||
| 170 | + 19, 27, 23, 6, 26, 5, 4, 31 }; | ||
| 171 | + U32 v = val; | ||
| 172 | + v |= v >> 1; | ||
| 173 | + v |= v >> 2; | ||
| 174 | + v |= v >> 4; | ||
| 175 | + v |= v >> 8; | ||
| 176 | + v |= v >> 16; | ||
| 177 | + return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; | ||
| 178 | +# endif | ||
| 179 | + } | ||
| 180 | +} | ||
| 181 | + | ||
| 182 | +/*===== Local Constants =====*/ | ||
| 183 | +static const unsigned BIT_mask[] = { | ||
| 184 | + 0, 1, 3, 7, 0xF, 0x1F, | ||
| 185 | + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, | ||
| 186 | + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, | ||
| 187 | + 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, | ||
| 188 | + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, | ||
| 189 | + 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ | ||
| 190 | +#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) | ||
| 191 | + | ||
| 192 | +/*-************************************************************** | ||
| 193 | +* bitStream encoding | ||
| 194 | +****************************************************************/ | ||
| 195 | +/*! BIT_initCStream() : | ||
| 196 | + * `dstCapacity` must be > sizeof(size_t) | ||
| 197 | + * @return : 0 if success, | ||
| 198 | + * otherwise an error code (can be tested using ERR_isError()) */ | ||
| 199 | +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, | ||
| 200 | + void* startPtr, size_t dstCapacity) | ||
| 201 | +{ | ||
| 202 | + bitC->bitContainer = 0; | ||
| 203 | + bitC->bitPos = 0; | ||
| 204 | + bitC->startPtr = (char*)startPtr; | ||
| 205 | + bitC->ptr = bitC->startPtr; | ||
| 206 | + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); | ||
| 207 | + if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); | ||
| 208 | + return 0; | ||
| 209 | +} | ||
| 210 | + | ||
| 211 | +/*! BIT_addBits() : | ||
| 212 | + * can add up to 31 bits into `bitC`. | ||
| 213 | + * Note : does not check for register overflow ! */ | ||
| 214 | +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, | ||
| 215 | + size_t value, unsigned nbBits) | ||
| 216 | +{ | ||
| 217 | + MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32); | ||
| 218 | + assert(nbBits < BIT_MASK_SIZE); | ||
| 219 | + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); | ||
| 220 | + bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; | ||
| 221 | + bitC->bitPos += nbBits; | ||
| 222 | +} | ||
| 223 | + | ||
| 224 | +/*! BIT_addBitsFast() : | ||
| 225 | + * works only if `value` is _clean_, | ||
| 226 | + * meaning all high bits above nbBits are 0 */ | ||
| 227 | +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, | ||
| 228 | + size_t value, unsigned nbBits) | ||
| 229 | +{ | ||
| 230 | + assert((value>>nbBits) == 0); | ||
| 231 | + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); | ||
| 232 | + bitC->bitContainer |= value << bitC->bitPos; | ||
| 233 | + bitC->bitPos += nbBits; | ||
| 234 | +} | ||
| 235 | + | ||
| 236 | +/*! BIT_flushBitsFast() : | ||
| 237 | + * assumption : bitContainer has not overflowed | ||
| 238 | + * unsafe version; does not check buffer overflow */ | ||
| 239 | +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) | ||
| 240 | +{ | ||
| 241 | + size_t const nbBytes = bitC->bitPos >> 3; | ||
| 242 | + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); | ||
| 243 | + MEM_writeLEST(bitC->ptr, bitC->bitContainer); | ||
| 244 | + bitC->ptr += nbBytes; | ||
| 245 | + assert(bitC->ptr <= bitC->endPtr); | ||
| 246 | + bitC->bitPos &= 7; | ||
| 247 | + bitC->bitContainer >>= nbBytes*8; | ||
| 248 | +} | ||
| 249 | + | ||
| 250 | +/*! BIT_flushBits() : | ||
| 251 | + * assumption : bitContainer has not overflowed | ||
| 252 | + * safe version; check for buffer overflow, and prevents it. | ||
| 253 | + * note : does not signal buffer overflow. | ||
| 254 | + * overflow will be revealed later on using BIT_closeCStream() */ | ||
| 255 | +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) | ||
| 256 | +{ | ||
| 257 | + size_t const nbBytes = bitC->bitPos >> 3; | ||
| 258 | + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); | ||
| 259 | + MEM_writeLEST(bitC->ptr, bitC->bitContainer); | ||
| 260 | + bitC->ptr += nbBytes; | ||
| 261 | + if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; | ||
| 262 | + bitC->bitPos &= 7; | ||
| 263 | + bitC->bitContainer >>= nbBytes*8; | ||
| 264 | +} | ||
| 265 | + | ||
| 266 | +/*! BIT_closeCStream() : | ||
| 267 | + * @return : size of CStream, in bytes, | ||
| 268 | + * or 0 if it could not fit into dstBuffer */ | ||
| 269 | +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) | ||
| 270 | +{ | ||
| 271 | + BIT_addBitsFast(bitC, 1, 1); /* endMark */ | ||
| 272 | + BIT_flushBits(bitC); | ||
| 273 | + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ | ||
| 274 | + return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); | ||
| 275 | +} | ||
| 276 | + | ||
| 277 | + | ||
| 278 | +/*-******************************************************** | ||
| 279 | +* bitStream decoding | ||
| 280 | +**********************************************************/ | ||
| 281 | +/*! BIT_initDStream() : | ||
| 282 | + * Initialize a BIT_DStream_t. | ||
| 283 | + * `bitD` : a pointer to an already allocated BIT_DStream_t structure. | ||
| 284 | + * `srcSize` must be the *exact* size of the bitStream, in bytes. | ||
| 285 | + * @return : size of stream (== srcSize), or an errorCode if a problem is detected | ||
| 286 | + */ | ||
| 287 | +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) | ||
| 288 | +{ | ||
| 289 | + if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } | ||
| 290 | + | ||
| 291 | + bitD->start = (const char*)srcBuffer; | ||
| 292 | + bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); | ||
| 293 | + | ||
| 294 | + if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ | ||
| 295 | + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); | ||
| 296 | + bitD->bitContainer = MEM_readLEST(bitD->ptr); | ||
| 297 | + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; | ||
| 298 | + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ | ||
| 299 | + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } | ||
| 300 | + } else { | ||
| 301 | + bitD->ptr = bitD->start; | ||
| 302 | + bitD->bitContainer = *(const BYTE*)(bitD->start); | ||
| 303 | + switch(srcSize) | ||
| 304 | + { | ||
| 305 | + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); | ||
| 306 | + /* fall-through */ | ||
| 307 | + | ||
| 308 | + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); | ||
| 309 | + /* fall-through */ | ||
| 310 | + | ||
| 311 | + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); | ||
| 312 | + /* fall-through */ | ||
| 313 | + | ||
| 314 | + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; | ||
| 315 | + /* fall-through */ | ||
| 316 | + | ||
| 317 | + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; | ||
| 318 | + /* fall-through */ | ||
| 319 | + | ||
| 320 | + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; | ||
| 321 | + /* fall-through */ | ||
| 322 | + | ||
| 323 | + default: break; | ||
| 324 | + } | ||
| 325 | + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; | ||
| 326 | + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; | ||
| 327 | + if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ | ||
| 328 | + } | ||
| 329 | + bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; | ||
| 330 | + } | ||
| 331 | + | ||
| 332 | + return srcSize; | ||
| 333 | +} | ||
| 334 | + | ||
| 335 | +MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) | ||
| 336 | +{ | ||
| 337 | + return bitContainer >> start; | ||
| 338 | +} | ||
| 339 | + | ||
| 340 | +MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) | ||
| 341 | +{ | ||
| 342 | + U32 const regMask = sizeof(bitContainer)*8 - 1; | ||
| 343 | + /* if start > regMask, bitstream is corrupted, and result is undefined */ | ||
| 344 | + assert(nbBits < BIT_MASK_SIZE); | ||
| 345 | + return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; | ||
| 346 | +} | ||
| 347 | + | ||
| 348 | +MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) | ||
| 349 | +{ | ||
| 350 | + assert(nbBits < BIT_MASK_SIZE); | ||
| 351 | + return bitContainer & BIT_mask[nbBits]; | ||
| 352 | +} | ||
| 353 | + | ||
| 354 | +/*! BIT_lookBits() : | ||
| 355 | + * Provides next n bits from local register. | ||
| 356 | + * local register is not modified. | ||
| 357 | + * On 32-bits, maxNbBits==24. | ||
| 358 | + * On 64-bits, maxNbBits==56. | ||
| 359 | + * @return : value extracted */ | ||
| 360 | +MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) | ||
| 361 | +{ | ||
| 362 | + /* arbitrate between double-shift and shift+mask */ | ||
| 363 | +#if 1 | ||
| 364 | + /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, | ||
| 365 | + * bitstream is likely corrupted, and result is undefined */ | ||
| 366 | + return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); | ||
| 367 | +#else | ||
| 368 | + /* this code path is slower on my os-x laptop */ | ||
| 369 | + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; | ||
| 370 | + return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); | ||
| 371 | +#endif | ||
| 372 | +} | ||
| 373 | + | ||
| 374 | +/*! BIT_lookBitsFast() : | ||
| 375 | + * unsafe version; only works if nbBits >= 1 */ | ||
| 376 | +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) | ||
| 377 | +{ | ||
| 378 | + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; | ||
| 379 | + assert(nbBits >= 1); | ||
| 380 | + return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); | ||
| 381 | +} | ||
| 382 | + | ||
| 383 | +MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) | ||
| 384 | +{ | ||
| 385 | + bitD->bitsConsumed += nbBits; | ||
| 386 | +} | ||
| 387 | + | ||
| 388 | +/*! BIT_readBits() : | ||
| 389 | + * Read (consume) next n bits from local register and update. | ||
| 390 | + * Pay attention to not read more than nbBits contained into local register. | ||
| 391 | + * @return : extracted value. */ | ||
| 392 | +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) | ||
| 393 | +{ | ||
| 394 | + size_t const value = BIT_lookBits(bitD, nbBits); | ||
| 395 | + BIT_skipBits(bitD, nbBits); | ||
| 396 | + return value; | ||
| 397 | +} | ||
| 398 | + | ||
| 399 | +/*! BIT_readBitsFast() : | ||
| 400 | + * unsafe version; only works only if nbBits >= 1 */ | ||
| 401 | +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) | ||
| 402 | +{ | ||
| 403 | + size_t const value = BIT_lookBitsFast(bitD, nbBits); | ||
| 404 | + assert(nbBits >= 1); | ||
| 405 | + BIT_skipBits(bitD, nbBits); | ||
| 406 | + return value; | ||
| 407 | +} | ||
| 408 | + | ||
| 409 | +/*! BIT_reloadDStream() : | ||
| 410 | + * Refill `bitD` from buffer previously set in BIT_initDStream() . | ||
| 411 | + * This function is safe, it guarantees it will not read beyond src buffer. | ||
| 412 | + * @return : status of `BIT_DStream_t` internal register. | ||
| 413 | + * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ | ||
| 414 | +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) | ||
| 415 | +{ | ||
| 416 | + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ | ||
| 417 | + return BIT_DStream_overflow; | ||
| 418 | + | ||
| 419 | + if (bitD->ptr >= bitD->limitPtr) { | ||
| 420 | + bitD->ptr -= bitD->bitsConsumed >> 3; | ||
| 421 | + bitD->bitsConsumed &= 7; | ||
| 422 | + bitD->bitContainer = MEM_readLEST(bitD->ptr); | ||
| 423 | + return BIT_DStream_unfinished; | ||
| 424 | + } | ||
| 425 | + if (bitD->ptr == bitD->start) { | ||
| 426 | + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; | ||
| 427 | + return BIT_DStream_completed; | ||
| 428 | + } | ||
| 429 | + /* start < ptr < limitPtr */ | ||
| 430 | + { U32 nbBytes = bitD->bitsConsumed >> 3; | ||
| 431 | + BIT_DStream_status result = BIT_DStream_unfinished; | ||
| 432 | + if (bitD->ptr - nbBytes < bitD->start) { | ||
| 433 | + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ | ||
| 434 | + result = BIT_DStream_endOfBuffer; | ||
| 435 | + } | ||
| 436 | + bitD->ptr -= nbBytes; | ||
| 437 | + bitD->bitsConsumed -= nbBytes*8; | ||
| 438 | + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ | ||
| 439 | + return result; | ||
| 440 | + } | ||
| 441 | +} | ||
| 442 | + | ||
| 443 | +/*! BIT_endOfDStream() : | ||
| 444 | + * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). | ||
| 445 | + */ | ||
| 446 | +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) | ||
| 447 | +{ | ||
| 448 | + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); | ||
| 449 | +} | ||
| 450 | + | ||
| 451 | +#if defined (__cplusplus) | ||
| 452 | +} | ||
| 453 | +#endif | ||
| 454 | + | ||
| 455 | +#endif /* BITSTREAM_H_MODULE */ |
vendor/github.com/DataDog/zstd/compiler.h
0 → 100644
| 1 | +/* | ||
| 2 | + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | ||
| 3 | + * All rights reserved. | ||
| 4 | + * | ||
| 5 | + * This source code is licensed under both the BSD-style license (found in the | ||
| 6 | + * LICENSE file in the root directory of this source tree) and the GPLv2 (found | ||
| 7 | + * in the COPYING file in the root directory of this source tree). | ||
| 8 | + * You may select, at your option, one of the above-listed licenses. | ||
| 9 | + */ | ||
| 10 | + | ||
| 11 | +#ifndef ZSTD_COMPILER_H | ||
| 12 | +#define ZSTD_COMPILER_H | ||
| 13 | + | ||
| 14 | +/*-******************************************************* | ||
| 15 | +* Compiler specifics | ||
| 16 | +*********************************************************/ | ||
| 17 | +/* force inlining */ | ||
| 18 | + | ||
| 19 | +#if !defined(ZSTD_NO_INLINE) | ||
| 20 | +#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ | ||
| 21 | +# define INLINE_KEYWORD inline | ||
| 22 | +#else | ||
| 23 | +# define INLINE_KEYWORD | ||
| 24 | +#endif | ||
| 25 | + | ||
| 26 | +#if defined(__GNUC__) | ||
| 27 | +# define FORCE_INLINE_ATTR __attribute__((always_inline)) | ||
| 28 | +#elif defined(_MSC_VER) | ||
| 29 | +# define FORCE_INLINE_ATTR __forceinline | ||
| 30 | +#else | ||
| 31 | +# define FORCE_INLINE_ATTR | ||
| 32 | +#endif | ||
| 33 | + | ||
| 34 | +#else | ||
| 35 | + | ||
| 36 | +#define INLINE_KEYWORD | ||
| 37 | +#define FORCE_INLINE_ATTR | ||
| 38 | + | ||
| 39 | +#endif | ||
| 40 | + | ||
| 41 | +/** | ||
| 42 | + * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant | ||
| 43 | + * parameters. They must be inlined for the compiler to elimininate the constant | ||
| 44 | + * branches. | ||
| 45 | + */ | ||
| 46 | +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR | ||
| 47 | +/** | ||
| 48 | + * HINT_INLINE is used to help the compiler generate better code. It is *not* | ||
| 49 | + * used for "templates", so it can be tweaked based on the compilers | ||
| 50 | + * performance. | ||
| 51 | + * | ||
| 52 | + * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the | ||
| 53 | + * always_inline attribute. | ||
| 54 | + * | ||
| 55 | + * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline | ||
| 56 | + * attribute. | ||
| 57 | + */ | ||
| 58 | +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 | ||
| 59 | +# define HINT_INLINE static INLINE_KEYWORD | ||
| 60 | +#else | ||
| 61 | +# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR | ||
| 62 | +#endif | ||
| 63 | + | ||
| 64 | +/* force no inlining */ | ||
| 65 | +#ifdef _MSC_VER | ||
| 66 | +# define FORCE_NOINLINE static __declspec(noinline) | ||
| 67 | +#else | ||
| 68 | +# ifdef __GNUC__ | ||
| 69 | +# define FORCE_NOINLINE static __attribute__((__noinline__)) | ||
| 70 | +# else | ||
| 71 | +# define FORCE_NOINLINE static | ||
| 72 | +# endif | ||
| 73 | +#endif | ||
| 74 | + | ||
| 75 | +/* target attribute */ | ||
| 76 | +#ifndef __has_attribute | ||
| 77 | + #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ | ||
| 78 | +#endif | ||
| 79 | +#if defined(__GNUC__) | ||
| 80 | +# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) | ||
| 81 | +#else | ||
| 82 | +# define TARGET_ATTRIBUTE(target) | ||
| 83 | +#endif | ||
| 84 | + | ||
| 85 | +/* Enable runtime BMI2 dispatch based on the CPU. | ||
| 86 | + * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. | ||
| 87 | + */ | ||
| 88 | +#ifndef DYNAMIC_BMI2 | ||
| 89 | + #if ((defined(__clang__) && __has_attribute(__target__)) \ | ||
| 90 | + || (defined(__GNUC__) \ | ||
| 91 | + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ | ||
| 92 | + && (defined(__x86_64__) || defined(_M_X86)) \ | ||
| 93 | + && !defined(__BMI2__) | ||
| 94 | + # define DYNAMIC_BMI2 1 | ||
| 95 | + #else | ||
| 96 | + # define DYNAMIC_BMI2 0 | ||
| 97 | + #endif | ||
| 98 | +#endif | ||
| 99 | + | ||
| 100 | +/* prefetch | ||
| 101 | + * can be disabled, by declaring NO_PREFETCH build macro */ | ||
| 102 | +#if defined(NO_PREFETCH) | ||
| 103 | +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ | ||
| 104 | +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ | ||
| 105 | +#else | ||
| 106 | +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ | ||
| 107 | +# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ | ||
| 108 | +# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) | ||
| 109 | +# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) | ||
| 110 | +# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) | ||
| 111 | +# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) | ||
| 112 | +# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) | ||
| 113 | +# else | ||
| 114 | +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ | ||
| 115 | +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ | ||
| 116 | +# endif | ||
| 117 | +#endif /* NO_PREFETCH */ | ||
| 118 | + | ||
| 119 | +#define CACHELINE_SIZE 64 | ||
| 120 | + | ||
| 121 | +#define PREFETCH_AREA(p, s) { \ | ||
| 122 | + const char* const _ptr = (const char*)(p); \ | ||
| 123 | + size_t const _size = (size_t)(s); \ | ||
| 124 | + size_t _pos; \ | ||
| 125 | + for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ | ||
| 126 | + PREFETCH_L2(_ptr + _pos); \ | ||
| 127 | + } \ | ||
| 128 | +} | ||
| 129 | + | ||
| 130 | +/* disable warnings */ | ||
| 131 | +#ifdef _MSC_VER /* Visual Studio */ | ||
| 132 | +# include <intrin.h> /* For Visual 2005 */ | ||
| 133 | +# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ | ||
| 134 | +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ | ||
| 135 | +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ | ||
| 136 | +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ | ||
| 137 | +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ | ||
| 138 | +#endif | ||
| 139 | + | ||
| 140 | +#endif /* ZSTD_COMPILER_H */ |
vendor/github.com/DataDog/zstd/cover.c
0 → 100644
| 1 | +/* | ||
| 2 | + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | ||
| 3 | + * All rights reserved. | ||
| 4 | + * | ||
| 5 | + * This source code is licensed under both the BSD-style license (found in the | ||
| 6 | + * LICENSE file in the root directory of this source tree) and the GPLv2 (found | ||
| 7 | + * in the COPYING file in the root directory of this source tree). | ||
| 8 | + * You may select, at your option, one of the above-listed licenses. | ||
| 9 | + */ | ||
| 10 | + | ||
| 11 | +/* ***************************************************************************** | ||
| 12 | + * Constructs a dictionary using a heuristic based on the following paper: | ||
| 13 | + * | ||
| 14 | + * Liao, Petri, Moffat, Wirth | ||
| 15 | + * Effective Construction of Relative Lempel-Ziv Dictionaries | ||
| 16 | + * Published in WWW 2016. | ||
| 17 | + * | ||
| 18 | + * Adapted from code originally written by @ot (Giuseppe Ottaviano). | ||
| 19 | + ******************************************************************************/ | ||
| 20 | + | ||
| 21 | +/*-************************************* | ||
| 22 | +* Dependencies | ||
| 23 | +***************************************/ | ||
| 24 | +#include <stdio.h> /* fprintf */ | ||
| 25 | +#include <stdlib.h> /* malloc, free, qsort */ | ||
| 26 | +#include <string.h> /* memset */ | ||
| 27 | +#include <time.h> /* clock */ | ||
| 28 | + | ||
| 29 | +#include "mem.h" /* read */ | ||
| 30 | +#include "pool.h" | ||
| 31 | +#include "threading.h" | ||
| 32 | +#include "cover.h" | ||
| 33 | +#include "zstd_internal.h" /* includes zstd.h */ | ||
| 34 | +#ifndef ZDICT_STATIC_LINKING_ONLY | ||
| 35 | +#define ZDICT_STATIC_LINKING_ONLY | ||
| 36 | +#endif | ||
| 37 | +#include "zdict.h" | ||
| 38 | + | ||
| 39 | +/*-************************************* | ||
| 40 | +* Constants | ||
| 41 | +***************************************/ | ||
| 42 | +#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) | ||
| 43 | +#define DEFAULT_SPLITPOINT 1.0 | ||
| 44 | + | ||
| 45 | +/*-************************************* | ||
| 46 | +* Console display | ||
| 47 | +***************************************/ | ||
| 48 | +static int g_displayLevel = 2; | ||
| 49 | +#define DISPLAY(...) \ | ||
| 50 | + { \ | ||
| 51 | + fprintf(stderr, __VA_ARGS__); \ | ||
| 52 | + fflush(stderr); \ | ||
| 53 | + } | ||
| 54 | +#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ | ||
| 55 | + if (displayLevel >= l) { \ | ||
| 56 | + DISPLAY(__VA_ARGS__); \ | ||
| 57 | + } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ | ||
| 58 | +#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) | ||
| 59 | + | ||
| 60 | +#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ | ||
| 61 | + if (displayLevel >= l) { \ | ||
| 62 | + if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \ | ||
| 63 | + g_time = clock(); \ | ||
| 64 | + DISPLAY(__VA_ARGS__); \ | ||
| 65 | + } \ | ||
| 66 | + } | ||
| 67 | +#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) | ||
| 68 | +static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; | ||
| 69 | +static clock_t g_time = 0; | ||
| 70 | + | ||
| 71 | +/*-************************************* | ||
| 72 | +* Hash table | ||
| 73 | +*************************************** | ||
| 74 | +* A small specialized hash map for storing activeDmers. | ||
| 75 | +* The map does not resize, so if it becomes full it will loop forever. | ||
| 76 | +* Thus, the map must be large enough to store every value. | ||
| 77 | +* The map implements linear probing and keeps its load less than 0.5. | ||
| 78 | +*/ | ||
| 79 | + | ||
| 80 | +#define MAP_EMPTY_VALUE ((U32)-1) | ||
| 81 | +typedef struct COVER_map_pair_t_s { | ||
| 82 | + U32 key; | ||
| 83 | + U32 value; | ||
| 84 | +} COVER_map_pair_t; | ||
| 85 | + | ||
| 86 | +typedef struct COVER_map_s { | ||
| 87 | + COVER_map_pair_t *data; | ||
| 88 | + U32 sizeLog; | ||
| 89 | + U32 size; | ||
| 90 | + U32 sizeMask; | ||
| 91 | +} COVER_map_t; | ||
| 92 | + | ||
| 93 | +/** | ||
| 94 | + * Clear the map. | ||
| 95 | + */ | ||
| 96 | +static void COVER_map_clear(COVER_map_t *map) { | ||
| 97 | + memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t)); | ||
| 98 | +} | ||
| 99 | + | ||
| 100 | +/** | ||
| 101 | + * Initializes a map of the given size. | ||
| 102 | + * Returns 1 on success and 0 on failure. | ||
| 103 | + * The map must be destroyed with COVER_map_destroy(). | ||
| 104 | + * The map is only guaranteed to be large enough to hold size elements. | ||
| 105 | + */ | ||
| 106 | +static int COVER_map_init(COVER_map_t *map, U32 size) { | ||
| 107 | + map->sizeLog = ZSTD_highbit32(size) + 2; | ||
| 108 | + map->size = (U32)1 << map->sizeLog; | ||
| 109 | + map->sizeMask = map->size - 1; | ||
| 110 | + map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t)); | ||
| 111 | + if (!map->data) { | ||
| 112 | + map->sizeLog = 0; | ||
| 113 | + map->size = 0; | ||
| 114 | + return 0; | ||
| 115 | + } | ||
| 116 | + COVER_map_clear(map); | ||
| 117 | + return 1; | ||
| 118 | +} | ||
| 119 | + | ||
| 120 | +/** | ||
| 121 | + * Internal hash function | ||
| 122 | + */ | ||
| 123 | +static const U32 prime4bytes = 2654435761U; | ||
| 124 | +static U32 COVER_map_hash(COVER_map_t *map, U32 key) { | ||
| 125 | + return (key * prime4bytes) >> (32 - map->sizeLog); | ||
| 126 | +} | ||
| 127 | + | ||
| 128 | +/** | ||
| 129 | + * Helper function that returns the index that a key should be placed into. | ||
| 130 | + */ | ||
| 131 | +static U32 COVER_map_index(COVER_map_t *map, U32 key) { | ||
| 132 | + const U32 hash = COVER_map_hash(map, key); | ||
| 133 | + U32 i; | ||
| 134 | + for (i = hash;; i = (i + 1) & map->sizeMask) { | ||
| 135 | + COVER_map_pair_t *pos = &map->data[i]; | ||
| 136 | + if (pos->value == MAP_EMPTY_VALUE) { | ||
| 137 | + return i; | ||
| 138 | + } | ||
| 139 | + if (pos->key == key) { | ||
| 140 | + return i; | ||
| 141 | + } | ||
| 142 | + } | ||
| 143 | +} | ||
| 144 | + | ||
| 145 | +/** | ||
| 146 | + * Returns the pointer to the value for key. | ||
| 147 | + * If key is not in the map, it is inserted and the value is set to 0. | ||
| 148 | + * The map must not be full. | ||
| 149 | + */ | ||
| 150 | +static U32 *COVER_map_at(COVER_map_t *map, U32 key) { | ||
| 151 | + COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)]; | ||
| 152 | + if (pos->value == MAP_EMPTY_VALUE) { | ||
| 153 | + pos->key = key; | ||
| 154 | + pos->value = 0; | ||
| 155 | + } | ||
| 156 | + return &pos->value; | ||
| 157 | +} | ||
| 158 | + | ||
| 159 | +/** | ||
| 160 | + * Deletes key from the map if present. | ||
| 161 | + */ | ||
| 162 | +static void COVER_map_remove(COVER_map_t *map, U32 key) { | ||
| 163 | + U32 i = COVER_map_index(map, key); | ||
| 164 | + COVER_map_pair_t *del = &map->data[i]; | ||
| 165 | + U32 shift = 1; | ||
| 166 | + if (del->value == MAP_EMPTY_VALUE) { | ||
| 167 | + return; | ||
| 168 | + } | ||
| 169 | + for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) { | ||
| 170 | + COVER_map_pair_t *const pos = &map->data[i]; | ||
| 171 | + /* If the position is empty we are done */ | ||
| 172 | + if (pos->value == MAP_EMPTY_VALUE) { | ||
| 173 | + del->value = MAP_EMPTY_VALUE; | ||
| 174 | + return; | ||
| 175 | + } | ||
| 176 | + /* If pos can be moved to del do so */ | ||
| 177 | + if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) { | ||
| 178 | + del->key = pos->key; | ||
| 179 | + del->value = pos->value; | ||
| 180 | + del = pos; | ||
| 181 | + shift = 1; | ||
| 182 | + } else { | ||
| 183 | + ++shift; | ||
| 184 | + } | ||
| 185 | + } | ||
| 186 | +} | ||
| 187 | + | ||
| 188 | +/** | ||
| 189 | + * Destroys a map that is inited with COVER_map_init(). | ||
| 190 | + */ | ||
| 191 | +static void COVER_map_destroy(COVER_map_t *map) { | ||
| 192 | + if (map->data) { | ||
| 193 | + free(map->data); | ||
| 194 | + } | ||
| 195 | + map->data = NULL; | ||
| 196 | + map->size = 0; | ||
| 197 | +} | ||
| 198 | + | ||
| 199 | +/*-************************************* | ||
| 200 | +* Context | ||
| 201 | +***************************************/ | ||
| 202 | + | ||
| 203 | +typedef struct { | ||
| 204 | + const BYTE *samples; | ||
| 205 | + size_t *offsets; | ||
| 206 | + const size_t *samplesSizes; | ||
| 207 | + size_t nbSamples; | ||
| 208 | + size_t nbTrainSamples; | ||
| 209 | + size_t nbTestSamples; | ||
| 210 | + U32 *suffix; | ||
| 211 | + size_t suffixSize; | ||
| 212 | + U32 *freqs; | ||
| 213 | + U32 *dmerAt; | ||
| 214 | + unsigned d; | ||
| 215 | +} COVER_ctx_t; | ||
| 216 | + | ||
| 217 | +/* We need a global context for qsort... */ | ||
| 218 | +static COVER_ctx_t *g_ctx = NULL; | ||
| 219 | + | ||
| 220 | +/*-************************************* | ||
| 221 | +* Helper functions | ||
| 222 | +***************************************/ | ||
| 223 | + | ||
| 224 | +/** | ||
| 225 | + * Returns the sum of the sample sizes. | ||
| 226 | + */ | ||
| 227 | +size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) { | ||
| 228 | + size_t sum = 0; | ||
| 229 | + unsigned i; | ||
| 230 | + for (i = 0; i < nbSamples; ++i) { | ||
| 231 | + sum += samplesSizes[i]; | ||
| 232 | + } | ||
| 233 | + return sum; | ||
| 234 | +} | ||
| 235 | + | ||
| 236 | +/** | ||
| 237 | + * Returns -1 if the dmer at lp is less than the dmer at rp. | ||
| 238 | + * Return 0 if the dmers at lp and rp are equal. | ||
| 239 | + * Returns 1 if the dmer at lp is greater than the dmer at rp. | ||
| 240 | + */ | ||
| 241 | +static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) { | ||
| 242 | + U32 const lhs = *(U32 const *)lp; | ||
| 243 | + U32 const rhs = *(U32 const *)rp; | ||
| 244 | + return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d); | ||
| 245 | +} | ||
| 246 | +/** | ||
| 247 | + * Faster version for d <= 8. | ||
| 248 | + */ | ||
| 249 | +static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) { | ||
| 250 | + U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1); | ||
| 251 | + U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask; | ||
| 252 | + U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask; | ||
| 253 | + if (lhs < rhs) { | ||
| 254 | + return -1; | ||
| 255 | + } | ||
| 256 | + return (lhs > rhs); | ||
| 257 | +} | ||
| 258 | + | ||
| 259 | +/** | ||
| 260 | + * Same as COVER_cmp() except ties are broken by pointer value | ||
| 261 | + * NOTE: g_ctx must be set to call this function. A global is required because | ||
| 262 | + * qsort doesn't take an opaque pointer. | ||
| 263 | + */ | ||
| 264 | +static int COVER_strict_cmp(const void *lp, const void *rp) { | ||
| 265 | + int result = COVER_cmp(g_ctx, lp, rp); | ||
| 266 | + if (result == 0) { | ||
| 267 | + result = lp < rp ? -1 : 1; | ||
| 268 | + } | ||
| 269 | + return result; | ||
| 270 | +} | ||
| 271 | +/** | ||
| 272 | + * Faster version for d <= 8. | ||
| 273 | + */ | ||
| 274 | +static int COVER_strict_cmp8(const void *lp, const void *rp) { | ||
| 275 | + int result = COVER_cmp8(g_ctx, lp, rp); | ||
| 276 | + if (result == 0) { | ||
| 277 | + result = lp < rp ? -1 : 1; | ||
| 278 | + } | ||
| 279 | + return result; | ||
| 280 | +} | ||
| 281 | + | ||
| 282 | +/** | ||
| 283 | + * Returns the first pointer in [first, last) whose element does not compare | ||
| 284 | + * less than value. If no such element exists it returns last. | ||
| 285 | + */ | ||
| 286 | +static const size_t *COVER_lower_bound(const size_t *first, const size_t *last, | ||
| 287 | + size_t value) { | ||
| 288 | + size_t count = last - first; | ||
| 289 | + while (count != 0) { | ||
| 290 | + size_t step = count / 2; | ||
| 291 | + const size_t *ptr = first; | ||
| 292 | + ptr += step; | ||
| 293 | + if (*ptr < value) { | ||
| 294 | + first = ++ptr; | ||
| 295 | + count -= step + 1; | ||
| 296 | + } else { | ||
| 297 | + count = step; | ||
| 298 | + } | ||
| 299 | + } | ||
| 300 | + return first; | ||
| 301 | +} | ||
| 302 | + | ||
| 303 | +/** | ||
| 304 | + * Generic groupBy function. | ||
| 305 | + * Groups an array sorted by cmp into groups with equivalent values. | ||
| 306 | + * Calls grp for each group. | ||
| 307 | + */ | ||
| 308 | +static void | ||
| 309 | +COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx, | ||
| 310 | + int (*cmp)(COVER_ctx_t *, const void *, const void *), | ||
| 311 | + void (*grp)(COVER_ctx_t *, const void *, const void *)) { | ||
| 312 | + const BYTE *ptr = (const BYTE *)data; | ||
| 313 | + size_t num = 0; | ||
| 314 | + while (num < count) { | ||
| 315 | + const BYTE *grpEnd = ptr + size; | ||
| 316 | + ++num; | ||
| 317 | + while (num < count && cmp(ctx, ptr, grpEnd) == 0) { | ||
| 318 | + grpEnd += size; | ||
| 319 | + ++num; | ||
| 320 | + } | ||
| 321 | + grp(ctx, ptr, grpEnd); | ||
| 322 | + ptr = grpEnd; | ||
| 323 | + } | ||
| 324 | +} | ||
| 325 | + | ||
| 326 | +/*-************************************* | ||
| 327 | +* Cover functions | ||
| 328 | +***************************************/ | ||
| 329 | + | ||
| 330 | +/** | ||
| 331 | + * Called on each group of positions with the same dmer. | ||
| 332 | + * Counts the frequency of each dmer and saves it in the suffix array. | ||
| 333 | + * Fills `ctx->dmerAt`. | ||
| 334 | + */ | ||
| 335 | +static void COVER_group(COVER_ctx_t *ctx, const void *group, | ||
| 336 | + const void *groupEnd) { | ||
| 337 | + /* The group consists of all the positions with the same first d bytes. */ | ||
| 338 | + const U32 *grpPtr = (const U32 *)group; | ||
| 339 | + const U32 *grpEnd = (const U32 *)groupEnd; | ||
| 340 | + /* The dmerId is how we will reference this dmer. | ||
| 341 | + * This allows us to map the whole dmer space to a much smaller space, the | ||
| 342 | + * size of the suffix array. | ||
| 343 | + */ | ||
| 344 | + const U32 dmerId = (U32)(grpPtr - ctx->suffix); | ||
| 345 | + /* Count the number of samples this dmer shows up in */ | ||
| 346 | + U32 freq = 0; | ||
| 347 | + /* Details */ | ||
| 348 | + const size_t *curOffsetPtr = ctx->offsets; | ||
| 349 | + const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples; | ||
| 350 | + /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a | ||
| 351 | + * different sample than the last. | ||
| 352 | + */ | ||
| 353 | + size_t curSampleEnd = ctx->offsets[0]; | ||
| 354 | + for (; grpPtr != grpEnd; ++grpPtr) { | ||
| 355 | + /* Save the dmerId for this position so we can get back to it. */ | ||
| 356 | + ctx->dmerAt[*grpPtr] = dmerId; | ||
| 357 | + /* Dictionaries only help for the first reference to the dmer. | ||
| 358 | + * After that zstd can reference the match from the previous reference. | ||
| 359 | + * So only count each dmer once for each sample it is in. | ||
| 360 | + */ | ||
| 361 | + if (*grpPtr < curSampleEnd) { | ||
| 362 | + continue; | ||
| 363 | + } | ||
| 364 | + freq += 1; | ||
| 365 | + /* Binary search to find the end of the sample *grpPtr is in. | ||
| 366 | + * In the common case that grpPtr + 1 == grpEnd we can skip the binary | ||
| 367 | + * search because the loop is over. | ||
| 368 | + */ | ||
| 369 | + if (grpPtr + 1 != grpEnd) { | ||
| 370 | + const size_t *sampleEndPtr = | ||
| 371 | + COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr); | ||
| 372 | + curSampleEnd = *sampleEndPtr; | ||
| 373 | + curOffsetPtr = sampleEndPtr + 1; | ||
| 374 | + } | ||
| 375 | + } | ||
| 376 | + /* At this point we are never going to look at this segment of the suffix | ||
| 377 | + * array again. We take advantage of this fact to save memory. | ||
| 378 | + * We store the frequency of the dmer in the first position of the group, | ||
| 379 | + * which is dmerId. | ||
| 380 | + */ | ||
| 381 | + ctx->suffix[dmerId] = freq; | ||
| 382 | +} | ||
| 383 | + | ||
| 384 | + | ||
| 385 | +/** | ||
| 386 | + * Selects the best segment in an epoch. | ||
| 387 | + * Segments of are scored according to the function: | ||
| 388 | + * | ||
| 389 | + * Let F(d) be the frequency of dmer d. | ||
| 390 | + * Let S_i be the dmer at position i of segment S which has length k. | ||
| 391 | + * | ||
| 392 | + * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) | ||
| 393 | + * | ||
| 394 | + * Once the dmer d is in the dictionay we set F(d) = 0. | ||
| 395 | + */ | ||
| 396 | +static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs, | ||
| 397 | + COVER_map_t *activeDmers, U32 begin, | ||
| 398 | + U32 end, | ||
| 399 | + ZDICT_cover_params_t parameters) { | ||
| 400 | + /* Constants */ | ||
| 401 | + const U32 k = parameters.k; | ||
| 402 | + const U32 d = parameters.d; | ||
| 403 | + const U32 dmersInK = k - d + 1; | ||
| 404 | + /* Try each segment (activeSegment) and save the best (bestSegment) */ | ||
| 405 | + COVER_segment_t bestSegment = {0, 0, 0}; | ||
| 406 | + COVER_segment_t activeSegment; | ||
| 407 | + /* Reset the activeDmers in the segment */ | ||
| 408 | + COVER_map_clear(activeDmers); | ||
| 409 | + /* The activeSegment starts at the beginning of the epoch. */ | ||
| 410 | + activeSegment.begin = begin; | ||
| 411 | + activeSegment.end = begin; | ||
| 412 | + activeSegment.score = 0; | ||
| 413 | + /* Slide the activeSegment through the whole epoch. | ||
| 414 | + * Save the best segment in bestSegment. | ||
| 415 | + */ | ||
| 416 | + while (activeSegment.end < end) { | ||
| 417 | + /* The dmerId for the dmer at the next position */ | ||
| 418 | + U32 newDmer = ctx->dmerAt[activeSegment.end]; | ||
| 419 | + /* The entry in activeDmers for this dmerId */ | ||
| 420 | + U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer); | ||
| 421 | + /* If the dmer isn't already present in the segment add its score. */ | ||
| 422 | + if (*newDmerOcc == 0) { | ||
| 423 | + /* The paper suggest using the L-0.5 norm, but experiments show that it | ||
| 424 | + * doesn't help. | ||
| 425 | + */ | ||
| 426 | + activeSegment.score += freqs[newDmer]; | ||
| 427 | + } | ||
| 428 | + /* Add the dmer to the segment */ | ||
| 429 | + activeSegment.end += 1; | ||
| 430 | + *newDmerOcc += 1; | ||
| 431 | + | ||
| 432 | + /* If the window is now too large, drop the first position */ | ||
| 433 | + if (activeSegment.end - activeSegment.begin == dmersInK + 1) { | ||
| 434 | + U32 delDmer = ctx->dmerAt[activeSegment.begin]; | ||
| 435 | + U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer); | ||
| 436 | + activeSegment.begin += 1; | ||
| 437 | + *delDmerOcc -= 1; | ||
| 438 | + /* If this is the last occurence of the dmer, subtract its score */ | ||
| 439 | + if (*delDmerOcc == 0) { | ||
| 440 | + COVER_map_remove(activeDmers, delDmer); | ||
| 441 | + activeSegment.score -= freqs[delDmer]; | ||
| 442 | + } | ||
| 443 | + } | ||
| 444 | + | ||
| 445 | + /* If this segment is the best so far save it */ | ||
| 446 | + if (activeSegment.score > bestSegment.score) { | ||
| 447 | + bestSegment = activeSegment; | ||
| 448 | + } | ||
| 449 | + } | ||
| 450 | + { | ||
| 451 | + /* Trim off the zero frequency head and tail from the segment. */ | ||
| 452 | + U32 newBegin = bestSegment.end; | ||
| 453 | + U32 newEnd = bestSegment.begin; | ||
| 454 | + U32 pos; | ||
| 455 | + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { | ||
| 456 | + U32 freq = freqs[ctx->dmerAt[pos]]; | ||
| 457 | + if (freq != 0) { | ||
| 458 | + newBegin = MIN(newBegin, pos); | ||
| 459 | + newEnd = pos + 1; | ||
| 460 | + } | ||
| 461 | + } | ||
| 462 | + bestSegment.begin = newBegin; | ||
| 463 | + bestSegment.end = newEnd; | ||
| 464 | + } | ||
| 465 | + { | ||
| 466 | + /* Zero out the frequency of each dmer covered by the chosen segment. */ | ||
| 467 | + U32 pos; | ||
| 468 | + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { | ||
| 469 | + freqs[ctx->dmerAt[pos]] = 0; | ||
| 470 | + } | ||
| 471 | + } | ||
| 472 | + return bestSegment; | ||
| 473 | +} | ||
| 474 | + | ||
| 475 | +/** | ||
| 476 | + * Check the validity of the parameters. | ||
| 477 | + * Returns non-zero if the parameters are valid and 0 otherwise. | ||
| 478 | + */ | ||
| 479 | +static int COVER_checkParameters(ZDICT_cover_params_t parameters, | ||
| 480 | + size_t maxDictSize) { | ||
| 481 | + /* k and d are required parameters */ | ||
| 482 | + if (parameters.d == 0 || parameters.k == 0) { | ||
| 483 | + return 0; | ||
| 484 | + } | ||
| 485 | + /* k <= maxDictSize */ | ||
| 486 | + if (parameters.k > maxDictSize) { | ||
| 487 | + return 0; | ||
| 488 | + } | ||
| 489 | + /* d <= k */ | ||
| 490 | + if (parameters.d > parameters.k) { | ||
| 491 | + return 0; | ||
| 492 | + } | ||
| 493 | + /* 0 < splitPoint <= 1 */ | ||
| 494 | + if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){ | ||
| 495 | + return 0; | ||
| 496 | + } | ||
| 497 | + return 1; | ||
| 498 | +} | ||
| 499 | + | ||
| 500 | +/** | ||
| 501 | + * Clean up a context initialized with `COVER_ctx_init()`. | ||
| 502 | + */ | ||
| 503 | +static void COVER_ctx_destroy(COVER_ctx_t *ctx) { | ||
| 504 | + if (!ctx) { | ||
| 505 | + return; | ||
| 506 | + } | ||
| 507 | + if (ctx->suffix) { | ||
| 508 | + free(ctx->suffix); | ||
| 509 | + ctx->suffix = NULL; | ||
| 510 | + } | ||
| 511 | + if (ctx->freqs) { | ||
| 512 | + free(ctx->freqs); | ||
| 513 | + ctx->freqs = NULL; | ||
| 514 | + } | ||
| 515 | + if (ctx->dmerAt) { | ||
| 516 | + free(ctx->dmerAt); | ||
| 517 | + ctx->dmerAt = NULL; | ||
| 518 | + } | ||
| 519 | + if (ctx->offsets) { | ||
| 520 | + free(ctx->offsets); | ||
| 521 | + ctx->offsets = NULL; | ||
| 522 | + } | ||
| 523 | +} | ||
| 524 | + | ||
| 525 | +/** | ||
| 526 | + * Prepare a context for dictionary building. | ||
| 527 | + * The context is only dependent on the parameter `d` and can used multiple | ||
| 528 | + * times. | ||
| 529 | + * Returns 1 on success or zero on error. | ||
| 530 | + * The context must be destroyed with `COVER_ctx_destroy()`. | ||
| 531 | + */ | ||
| 532 | +static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, | ||
| 533 | + const size_t *samplesSizes, unsigned nbSamples, | ||
| 534 | + unsigned d, double splitPoint) { | ||
| 535 | + const BYTE *const samples = (const BYTE *)samplesBuffer; | ||
| 536 | + const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); | ||
| 537 | + /* Split samples into testing and training sets */ | ||
| 538 | + const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples; | ||
| 539 | + const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples; | ||
| 540 | + const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize; | ||
| 541 | + const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize; | ||
| 542 | + /* Checks */ | ||
| 543 | + if (totalSamplesSize < MAX(d, sizeof(U64)) || | ||
| 544 | + totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { | ||
| 545 | + DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", | ||
| 546 | + (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20)); | ||
| 547 | + return 0; | ||
| 548 | + } | ||
| 549 | + /* Check if there are at least 5 training samples */ | ||
| 550 | + if (nbTrainSamples < 5) { | ||
| 551 | + DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples); | ||
| 552 | + return 0; | ||
| 553 | + } | ||
| 554 | + /* Check if there's testing sample */ | ||
| 555 | + if (nbTestSamples < 1) { | ||
| 556 | + DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples); | ||
| 557 | + return 0; | ||
| 558 | + } | ||
| 559 | + /* Zero the context */ | ||
| 560 | + memset(ctx, 0, sizeof(*ctx)); | ||
| 561 | + DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples, | ||
| 562 | + (unsigned)trainingSamplesSize); | ||
| 563 | + DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples, | ||
| 564 | + (unsigned)testSamplesSize); | ||
| 565 | + ctx->samples = samples; | ||
| 566 | + ctx->samplesSizes = samplesSizes; | ||
| 567 | + ctx->nbSamples = nbSamples; | ||
| 568 | + ctx->nbTrainSamples = nbTrainSamples; | ||
| 569 | + ctx->nbTestSamples = nbTestSamples; | ||
| 570 | + /* Partial suffix array */ | ||
| 571 | + ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1; | ||
| 572 | + ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); | ||
| 573 | + /* Maps index to the dmerID */ | ||
| 574 | + ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); | ||
| 575 | + /* The offsets of each file */ | ||
| 576 | + ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t)); | ||
| 577 | + if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) { | ||
| 578 | + DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); | ||
| 579 | + COVER_ctx_destroy(ctx); | ||
| 580 | + return 0; | ||
| 581 | + } | ||
| 582 | + ctx->freqs = NULL; | ||
| 583 | + ctx->d = d; | ||
| 584 | + | ||
| 585 | + /* Fill offsets from the samplesSizes */ | ||
| 586 | + { | ||
| 587 | + U32 i; | ||
| 588 | + ctx->offsets[0] = 0; | ||
| 589 | + for (i = 1; i <= nbSamples; ++i) { | ||
| 590 | + ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1]; | ||
| 591 | + } | ||
| 592 | + } | ||
| 593 | + DISPLAYLEVEL(2, "Constructing partial suffix array\n"); | ||
| 594 | + { | ||
| 595 | + /* suffix is a partial suffix array. | ||
| 596 | + * It only sorts suffixes by their first parameters.d bytes. | ||
| 597 | + * The sort is stable, so each dmer group is sorted by position in input. | ||
| 598 | + */ | ||
| 599 | + U32 i; | ||
| 600 | + for (i = 0; i < ctx->suffixSize; ++i) { | ||
| 601 | + ctx->suffix[i] = i; | ||
| 602 | + } | ||
| 603 | + /* qsort doesn't take an opaque pointer, so pass as a global. | ||
| 604 | + * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is. | ||
| 605 | + */ | ||
| 606 | + g_ctx = ctx; | ||
| 607 | +#if defined(__OpenBSD__) | ||
| 608 | + mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32), | ||
| 609 | + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); | ||
| 610 | +#else | ||
| 611 | + qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), | ||
| 612 | + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); | ||
| 613 | +#endif | ||
| 614 | + } | ||
| 615 | + DISPLAYLEVEL(2, "Computing frequencies\n"); | ||
| 616 | + /* For each dmer group (group of positions with the same first d bytes): | ||
| 617 | + * 1. For each position we set dmerAt[position] = dmerID. The dmerID is | ||
| 618 | + * (groupBeginPtr - suffix). This allows us to go from position to | ||
| 619 | + * dmerID so we can look up values in freq. | ||
| 620 | + * 2. We calculate how many samples the dmer occurs in and save it in | ||
| 621 | + * freqs[dmerId]. | ||
| 622 | + */ | ||
| 623 | + COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, | ||
| 624 | + (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group); | ||
| 625 | + ctx->freqs = ctx->suffix; | ||
| 626 | + ctx->suffix = NULL; | ||
| 627 | + return 1; | ||
| 628 | +} | ||
| 629 | + | ||
| 630 | +/** | ||
| 631 | + * Given the prepared context build the dictionary. | ||
| 632 | + */ | ||
| 633 | +static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, | ||
| 634 | + COVER_map_t *activeDmers, void *dictBuffer, | ||
| 635 | + size_t dictBufferCapacity, | ||
| 636 | + ZDICT_cover_params_t parameters) { | ||
| 637 | + BYTE *const dict = (BYTE *)dictBuffer; | ||
| 638 | + size_t tail = dictBufferCapacity; | ||
| 639 | + /* Divide the data up into epochs of equal size. | ||
| 640 | + * We will select at least one segment from each epoch. | ||
| 641 | + */ | ||
| 642 | + const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4)); | ||
| 643 | + const unsigned epochSize = (U32)(ctx->suffixSize / epochs); | ||
| 644 | + size_t epoch; | ||
| 645 | + DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", | ||
| 646 | + epochs, epochSize); | ||
| 647 | + /* Loop through the epochs until there are no more segments or the dictionary | ||
| 648 | + * is full. | ||
| 649 | + */ | ||
| 650 | + for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) { | ||
| 651 | + const U32 epochBegin = (U32)(epoch * epochSize); | ||
| 652 | + const U32 epochEnd = epochBegin + epochSize; | ||
| 653 | + size_t segmentSize; | ||
| 654 | + /* Select a segment */ | ||
| 655 | + COVER_segment_t segment = COVER_selectSegment( | ||
| 656 | + ctx, freqs, activeDmers, epochBegin, epochEnd, parameters); | ||
| 657 | + /* If the segment covers no dmers, then we are out of content */ | ||
| 658 | + if (segment.score == 0) { | ||
| 659 | + break; | ||
| 660 | + } | ||
| 661 | + /* Trim the segment if necessary and if it is too small then we are done */ | ||
| 662 | + segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); | ||
| 663 | + if (segmentSize < parameters.d) { | ||
| 664 | + break; | ||
| 665 | + } | ||
| 666 | + /* We fill the dictionary from the back to allow the best segments to be | ||
| 667 | + * referenced with the smallest offsets. | ||
| 668 | + */ | ||
| 669 | + tail -= segmentSize; | ||
| 670 | + memcpy(dict + tail, ctx->samples + segment.begin, segmentSize); | ||
| 671 | + DISPLAYUPDATE( | ||
| 672 | + 2, "\r%u%% ", | ||
| 673 | + (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); | ||
| 674 | + } | ||
| 675 | + DISPLAYLEVEL(2, "\r%79s\r", ""); | ||
| 676 | + return tail; | ||
| 677 | +} | ||
| 678 | + | ||
| 679 | +ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( | ||
| 680 | + void *dictBuffer, size_t dictBufferCapacity, | ||
| 681 | + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, | ||
| 682 | + ZDICT_cover_params_t parameters) | ||
| 683 | +{ | ||
| 684 | + BYTE* const dict = (BYTE*)dictBuffer; | ||
| 685 | + COVER_ctx_t ctx; | ||
| 686 | + COVER_map_t activeDmers; | ||
| 687 | + parameters.splitPoint = 1.0; | ||
| 688 | + /* Initialize global data */ | ||
| 689 | + g_displayLevel = parameters.zParams.notificationLevel; | ||
| 690 | + /* Checks */ | ||
| 691 | + if (!COVER_checkParameters(parameters, dictBufferCapacity)) { | ||
| 692 | + DISPLAYLEVEL(1, "Cover parameters incorrect\n"); | ||
| 693 | + return ERROR(GENERIC); | ||
| 694 | + } | ||
| 695 | + if (nbSamples == 0) { | ||
| 696 | + DISPLAYLEVEL(1, "Cover must have at least one input file\n"); | ||
| 697 | + return ERROR(GENERIC); | ||
| 698 | + } | ||
| 699 | + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { | ||
| 700 | + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", | ||
| 701 | + ZDICT_DICTSIZE_MIN); | ||
| 702 | + return ERROR(dstSize_tooSmall); | ||
| 703 | + } | ||
| 704 | + /* Initialize context and activeDmers */ | ||
| 705 | + if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, | ||
| 706 | + parameters.d, parameters.splitPoint)) { | ||
| 707 | + return ERROR(GENERIC); | ||
| 708 | + } | ||
| 709 | + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { | ||
| 710 | + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); | ||
| 711 | + COVER_ctx_destroy(&ctx); | ||
| 712 | + return ERROR(GENERIC); | ||
| 713 | + } | ||
| 714 | + | ||
| 715 | + DISPLAYLEVEL(2, "Building dictionary\n"); | ||
| 716 | + { | ||
| 717 | + const size_t tail = | ||
| 718 | + COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer, | ||
| 719 | + dictBufferCapacity, parameters); | ||
| 720 | + const size_t dictionarySize = ZDICT_finalizeDictionary( | ||
| 721 | + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, | ||
| 722 | + samplesBuffer, samplesSizes, nbSamples, parameters.zParams); | ||
| 723 | + if (!ZSTD_isError(dictionarySize)) { | ||
| 724 | + DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", | ||
| 725 | + (unsigned)dictionarySize); | ||
| 726 | + } | ||
| 727 | + COVER_ctx_destroy(&ctx); | ||
| 728 | + COVER_map_destroy(&activeDmers); | ||
| 729 | + return dictionarySize; | ||
| 730 | + } | ||
| 731 | +} | ||
| 732 | + | ||
| 733 | + | ||
| 734 | + | ||
| 735 | +size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, | ||
| 736 | + const size_t *samplesSizes, const BYTE *samples, | ||
| 737 | + size_t *offsets, | ||
| 738 | + size_t nbTrainSamples, size_t nbSamples, | ||
| 739 | + BYTE *const dict, size_t dictBufferCapacity) { | ||
| 740 | + size_t totalCompressedSize = ERROR(GENERIC); | ||
| 741 | + /* Pointers */ | ||
| 742 | + ZSTD_CCtx *cctx; | ||
| 743 | + ZSTD_CDict *cdict; | ||
| 744 | + void *dst; | ||
| 745 | + /* Local variables */ | ||
| 746 | + size_t dstCapacity; | ||
| 747 | + size_t i; | ||
| 748 | + /* Allocate dst with enough space to compress the maximum sized sample */ | ||
| 749 | + { | ||
| 750 | + size_t maxSampleSize = 0; | ||
| 751 | + i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0; | ||
| 752 | + for (; i < nbSamples; ++i) { | ||
| 753 | + maxSampleSize = MAX(samplesSizes[i], maxSampleSize); | ||
| 754 | + } | ||
| 755 | + dstCapacity = ZSTD_compressBound(maxSampleSize); | ||
| 756 | + dst = malloc(dstCapacity); | ||
| 757 | + } | ||
| 758 | + /* Create the cctx and cdict */ | ||
| 759 | + cctx = ZSTD_createCCtx(); | ||
| 760 | + cdict = ZSTD_createCDict(dict, dictBufferCapacity, | ||
| 761 | + parameters.zParams.compressionLevel); | ||
| 762 | + if (!dst || !cctx || !cdict) { | ||
| 763 | + goto _compressCleanup; | ||
| 764 | + } | ||
| 765 | + /* Compress each sample and sum their sizes (or error) */ | ||
| 766 | + totalCompressedSize = dictBufferCapacity; | ||
| 767 | + i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0; | ||
| 768 | + for (; i < nbSamples; ++i) { | ||
| 769 | + const size_t size = ZSTD_compress_usingCDict( | ||
| 770 | + cctx, dst, dstCapacity, samples + offsets[i], | ||
| 771 | + samplesSizes[i], cdict); | ||
| 772 | + if (ZSTD_isError(size)) { | ||
| 773 | + totalCompressedSize = ERROR(GENERIC); | ||
| 774 | + goto _compressCleanup; | ||
| 775 | + } | ||
| 776 | + totalCompressedSize += size; | ||
| 777 | + } | ||
| 778 | +_compressCleanup: | ||
| 779 | + ZSTD_freeCCtx(cctx); | ||
| 780 | + ZSTD_freeCDict(cdict); | ||
| 781 | + if (dst) { | ||
| 782 | + free(dst); | ||
| 783 | + } | ||
| 784 | + return totalCompressedSize; | ||
| 785 | +} | ||
| 786 | + | ||
| 787 | + | ||
| 788 | +/** | ||
| 789 | + * Initialize the `COVER_best_t`. | ||
| 790 | + */ | ||
| 791 | +void COVER_best_init(COVER_best_t *best) { | ||
| 792 | + if (best==NULL) return; /* compatible with init on NULL */ | ||
| 793 | + (void)ZSTD_pthread_mutex_init(&best->mutex, NULL); | ||
| 794 | + (void)ZSTD_pthread_cond_init(&best->cond, NULL); | ||
| 795 | + best->liveJobs = 0; | ||
| 796 | + best->dict = NULL; | ||
| 797 | + best->dictSize = 0; | ||
| 798 | + best->compressedSize = (size_t)-1; | ||
| 799 | + memset(&best->parameters, 0, sizeof(best->parameters)); | ||
| 800 | +} | ||
| 801 | + | ||
| 802 | +/** | ||
| 803 | + * Wait until liveJobs == 0. | ||
| 804 | + */ | ||
| 805 | +void COVER_best_wait(COVER_best_t *best) { | ||
| 806 | + if (!best) { | ||
| 807 | + return; | ||
| 808 | + } | ||
| 809 | + ZSTD_pthread_mutex_lock(&best->mutex); | ||
| 810 | + while (best->liveJobs != 0) { | ||
| 811 | + ZSTD_pthread_cond_wait(&best->cond, &best->mutex); | ||
| 812 | + } | ||
| 813 | + ZSTD_pthread_mutex_unlock(&best->mutex); | ||
| 814 | +} | ||
| 815 | + | ||
| 816 | +/** | ||
| 817 | + * Call COVER_best_wait() and then destroy the COVER_best_t. | ||
| 818 | + */ | ||
| 819 | +void COVER_best_destroy(COVER_best_t *best) { | ||
| 820 | + if (!best) { | ||
| 821 | + return; | ||
| 822 | + } | ||
| 823 | + COVER_best_wait(best); | ||
| 824 | + if (best->dict) { | ||
| 825 | + free(best->dict); | ||
| 826 | + } | ||
| 827 | + ZSTD_pthread_mutex_destroy(&best->mutex); | ||
| 828 | + ZSTD_pthread_cond_destroy(&best->cond); | ||
| 829 | +} | ||
| 830 | + | ||
| 831 | +/** | ||
| 832 | + * Called when a thread is about to be launched. | ||
| 833 | + * Increments liveJobs. | ||
| 834 | + */ | ||
| 835 | +void COVER_best_start(COVER_best_t *best) { | ||
| 836 | + if (!best) { | ||
| 837 | + return; | ||
| 838 | + } | ||
| 839 | + ZSTD_pthread_mutex_lock(&best->mutex); | ||
| 840 | + ++best->liveJobs; | ||
| 841 | + ZSTD_pthread_mutex_unlock(&best->mutex); | ||
| 842 | +} | ||
| 843 | + | ||
| 844 | +/** | ||
| 845 | + * Called when a thread finishes executing, both on error or success. | ||
| 846 | + * Decrements liveJobs and signals any waiting threads if liveJobs == 0. | ||
| 847 | + * If this dictionary is the best so far save it and its parameters. | ||
| 848 | + */ | ||
| 849 | +void COVER_best_finish(COVER_best_t *best, size_t compressedSize, | ||
| 850 | + ZDICT_cover_params_t parameters, void *dict, | ||
| 851 | + size_t dictSize) { | ||
| 852 | + if (!best) { | ||
| 853 | + return; | ||
| 854 | + } | ||
| 855 | + { | ||
| 856 | + size_t liveJobs; | ||
| 857 | + ZSTD_pthread_mutex_lock(&best->mutex); | ||
| 858 | + --best->liveJobs; | ||
| 859 | + liveJobs = best->liveJobs; | ||
| 860 | + /* If the new dictionary is better */ | ||
| 861 | + if (compressedSize < best->compressedSize) { | ||
| 862 | + /* Allocate space if necessary */ | ||
| 863 | + if (!best->dict || best->dictSize < dictSize) { | ||
| 864 | + if (best->dict) { | ||
| 865 | + free(best->dict); | ||
| 866 | + } | ||
| 867 | + best->dict = malloc(dictSize); | ||
| 868 | + if (!best->dict) { | ||
| 869 | + best->compressedSize = ERROR(GENERIC); | ||
| 870 | + best->dictSize = 0; | ||
| 871 | + ZSTD_pthread_cond_signal(&best->cond); | ||
| 872 | + ZSTD_pthread_mutex_unlock(&best->mutex); | ||
| 873 | + return; | ||
| 874 | + } | ||
| 875 | + } | ||
| 876 | + /* Save the dictionary, parameters, and size */ | ||
| 877 | + memcpy(best->dict, dict, dictSize); | ||
| 878 | + best->dictSize = dictSize; | ||
| 879 | + best->parameters = parameters; | ||
| 880 | + best->compressedSize = compressedSize; | ||
| 881 | + } | ||
| 882 | + if (liveJobs == 0) { | ||
| 883 | + ZSTD_pthread_cond_broadcast(&best->cond); | ||
| 884 | + } | ||
| 885 | + ZSTD_pthread_mutex_unlock(&best->mutex); | ||
| 886 | + } | ||
| 887 | +} | ||
| 888 | + | ||
| 889 | +/** | ||
| 890 | + * Parameters for COVER_tryParameters(). | ||
| 891 | + */ | ||
| 892 | +typedef struct COVER_tryParameters_data_s { | ||
| 893 | + const COVER_ctx_t *ctx; | ||
| 894 | + COVER_best_t *best; | ||
| 895 | + size_t dictBufferCapacity; | ||
| 896 | + ZDICT_cover_params_t parameters; | ||
| 897 | +} COVER_tryParameters_data_t; | ||
| 898 | + | ||
| 899 | +/** | ||
| 900 | + * Tries a set of parameters and updates the COVER_best_t with the results. | ||
| 901 | + * This function is thread safe if zstd is compiled with multithreaded support. | ||
| 902 | + * It takes its parameters as an *OWNING* opaque pointer to support threading. | ||
| 903 | + */ | ||
| 904 | +static void COVER_tryParameters(void *opaque) { | ||
| 905 | + /* Save parameters as local variables */ | ||
| 906 | + COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque; | ||
| 907 | + const COVER_ctx_t *const ctx = data->ctx; | ||
| 908 | + const ZDICT_cover_params_t parameters = data->parameters; | ||
| 909 | + size_t dictBufferCapacity = data->dictBufferCapacity; | ||
| 910 | + size_t totalCompressedSize = ERROR(GENERIC); | ||
| 911 | + /* Allocate space for hash table, dict, and freqs */ | ||
| 912 | + COVER_map_t activeDmers; | ||
| 913 | + BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); | ||
| 914 | + U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); | ||
| 915 | + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { | ||
| 916 | + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); | ||
| 917 | + goto _cleanup; | ||
| 918 | + } | ||
| 919 | + if (!dict || !freqs) { | ||
| 920 | + DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); | ||
| 921 | + goto _cleanup; | ||
| 922 | + } | ||
| 923 | + /* Copy the frequencies because we need to modify them */ | ||
| 924 | + memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32)); | ||
| 925 | + /* Build the dictionary */ | ||
| 926 | + { | ||
| 927 | + const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict, | ||
| 928 | + dictBufferCapacity, parameters); | ||
| 929 | + dictBufferCapacity = ZDICT_finalizeDictionary( | ||
| 930 | + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, | ||
| 931 | + ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, | ||
| 932 | + parameters.zParams); | ||
| 933 | + if (ZDICT_isError(dictBufferCapacity)) { | ||
| 934 | + DISPLAYLEVEL(1, "Failed to finalize dictionary\n"); | ||
| 935 | + goto _cleanup; | ||
| 936 | + } | ||
| 937 | + } | ||
| 938 | + /* Check total compressed size */ | ||
| 939 | + totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes, | ||
| 940 | + ctx->samples, ctx->offsets, | ||
| 941 | + ctx->nbTrainSamples, ctx->nbSamples, | ||
| 942 | + dict, dictBufferCapacity); | ||
| 943 | + | ||
| 944 | +_cleanup: | ||
| 945 | + COVER_best_finish(data->best, totalCompressedSize, parameters, dict, | ||
| 946 | + dictBufferCapacity); | ||
| 947 | + free(data); | ||
| 948 | + COVER_map_destroy(&activeDmers); | ||
| 949 | + if (dict) { | ||
| 950 | + free(dict); | ||
| 951 | + } | ||
| 952 | + if (freqs) { | ||
| 953 | + free(freqs); | ||
| 954 | + } | ||
| 955 | +} | ||
| 956 | + | ||
| 957 | +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( | ||
| 958 | + void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, | ||
| 959 | + const size_t *samplesSizes, unsigned nbSamples, | ||
| 960 | + ZDICT_cover_params_t *parameters) { | ||
| 961 | + /* constants */ | ||
| 962 | + const unsigned nbThreads = parameters->nbThreads; | ||
| 963 | + const double splitPoint = | ||
| 964 | + parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint; | ||
| 965 | + const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; | ||
| 966 | + const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; | ||
| 967 | + const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; | ||
| 968 | + const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k; | ||
| 969 | + const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps; | ||
| 970 | + const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); | ||
| 971 | + const unsigned kIterations = | ||
| 972 | + (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); | ||
| 973 | + /* Local variables */ | ||
| 974 | + const int displayLevel = parameters->zParams.notificationLevel; | ||
| 975 | + unsigned iteration = 1; | ||
| 976 | + unsigned d; | ||
| 977 | + unsigned k; | ||
| 978 | + COVER_best_t best; | ||
| 979 | + POOL_ctx *pool = NULL; | ||
| 980 | + | ||
| 981 | + /* Checks */ | ||
| 982 | + if (splitPoint <= 0 || splitPoint > 1) { | ||
| 983 | + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); | ||
| 984 | + return ERROR(GENERIC); | ||
| 985 | + } | ||
| 986 | + if (kMinK < kMaxD || kMaxK < kMinK) { | ||
| 987 | + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); | ||
| 988 | + return ERROR(GENERIC); | ||
| 989 | + } | ||
| 990 | + if (nbSamples == 0) { | ||
| 991 | + DISPLAYLEVEL(1, "Cover must have at least one input file\n"); | ||
| 992 | + return ERROR(GENERIC); | ||
| 993 | + } | ||
| 994 | + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { | ||
| 995 | + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", | ||
| 996 | + ZDICT_DICTSIZE_MIN); | ||
| 997 | + return ERROR(dstSize_tooSmall); | ||
| 998 | + } | ||
| 999 | + if (nbThreads > 1) { | ||
| 1000 | + pool = POOL_create(nbThreads, 1); | ||
| 1001 | + if (!pool) { | ||
| 1002 | + return ERROR(memory_allocation); | ||
| 1003 | + } | ||
| 1004 | + } | ||
| 1005 | + /* Initialization */ | ||
| 1006 | + COVER_best_init(&best); | ||
| 1007 | + /* Turn down global display level to clean up display at level 2 and below */ | ||
| 1008 | + g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; | ||
| 1009 | + /* Loop through d first because each new value needs a new context */ | ||
| 1010 | + LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", | ||
| 1011 | + kIterations); | ||
| 1012 | + for (d = kMinD; d <= kMaxD; d += 2) { | ||
| 1013 | + /* Initialize the context for this value of d */ | ||
| 1014 | + COVER_ctx_t ctx; | ||
| 1015 | + LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); | ||
| 1016 | + if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) { | ||
| 1017 | + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); | ||
| 1018 | + COVER_best_destroy(&best); | ||
| 1019 | + POOL_free(pool); | ||
| 1020 | + return ERROR(GENERIC); | ||
| 1021 | + } | ||
| 1022 | + /* Loop through k reusing the same context */ | ||
| 1023 | + for (k = kMinK; k <= kMaxK; k += kStepSize) { | ||
| 1024 | + /* Prepare the arguments */ | ||
| 1025 | + COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc( | ||
| 1026 | + sizeof(COVER_tryParameters_data_t)); | ||
| 1027 | + LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k); | ||
| 1028 | + if (!data) { | ||
| 1029 | + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n"); | ||
| 1030 | + COVER_best_destroy(&best); | ||
| 1031 | + COVER_ctx_destroy(&ctx); | ||
| 1032 | + POOL_free(pool); | ||
| 1033 | + return ERROR(GENERIC); | ||
| 1034 | + } | ||
| 1035 | + data->ctx = &ctx; | ||
| 1036 | + data->best = &best; | ||
| 1037 | + data->dictBufferCapacity = dictBufferCapacity; | ||
| 1038 | + data->parameters = *parameters; | ||
| 1039 | + data->parameters.k = k; | ||
| 1040 | + data->parameters.d = d; | ||
| 1041 | + data->parameters.splitPoint = splitPoint; | ||
| 1042 | + data->parameters.steps = kSteps; | ||
| 1043 | + data->parameters.zParams.notificationLevel = g_displayLevel; | ||
| 1044 | + /* Check the parameters */ | ||
| 1045 | + if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) { | ||
| 1046 | + DISPLAYLEVEL(1, "Cover parameters incorrect\n"); | ||
| 1047 | + free(data); | ||
| 1048 | + continue; | ||
| 1049 | + } | ||
| 1050 | + /* Call the function and pass ownership of data to it */ | ||
| 1051 | + COVER_best_start(&best); | ||
| 1052 | + if (pool) { | ||
| 1053 | + POOL_add(pool, &COVER_tryParameters, data); | ||
| 1054 | + } else { | ||
| 1055 | + COVER_tryParameters(data); | ||
| 1056 | + } | ||
| 1057 | + /* Print status */ | ||
| 1058 | + LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ", | ||
| 1059 | + (unsigned)((iteration * 100) / kIterations)); | ||
| 1060 | + ++iteration; | ||
| 1061 | + } | ||
| 1062 | + COVER_best_wait(&best); | ||
| 1063 | + COVER_ctx_destroy(&ctx); | ||
| 1064 | + } | ||
| 1065 | + LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", ""); | ||
| 1066 | + /* Fill the output buffer and parameters with output of the best parameters */ | ||
| 1067 | + { | ||
| 1068 | + const size_t dictSize = best.dictSize; | ||
| 1069 | + if (ZSTD_isError(best.compressedSize)) { | ||
| 1070 | + const size_t compressedSize = best.compressedSize; | ||
| 1071 | + COVER_best_destroy(&best); | ||
| 1072 | + POOL_free(pool); | ||
| 1073 | + return compressedSize; | ||
| 1074 | + } | ||
| 1075 | + *parameters = best.parameters; | ||
| 1076 | + memcpy(dictBuffer, best.dict, dictSize); | ||
| 1077 | + COVER_best_destroy(&best); | ||
| 1078 | + POOL_free(pool); | ||
| 1079 | + return dictSize; | ||
| 1080 | + } | ||
| 1081 | +} |
vendor/github.com/DataDog/zstd/cover.h
0 → 100644
| 1 | +#include <stdio.h> /* fprintf */ | ||
| 2 | +#include <stdlib.h> /* malloc, free, qsort */ | ||
| 3 | +#include <string.h> /* memset */ | ||
| 4 | +#include <time.h> /* clock */ | ||
| 5 | +#include "mem.h" /* read */ | ||
| 6 | +#include "pool.h" | ||
| 7 | +#include "threading.h" | ||
| 8 | +#include "zstd_internal.h" /* includes zstd.h */ | ||
| 9 | +#ifndef ZDICT_STATIC_LINKING_ONLY | ||
| 10 | +#define ZDICT_STATIC_LINKING_ONLY | ||
| 11 | +#endif | ||
| 12 | +#include "zdict.h" | ||
| 13 | + | ||
| 14 | +/** | ||
| 15 | + * COVER_best_t is used for two purposes: | ||
| 16 | + * 1. Synchronizing threads. | ||
| 17 | + * 2. Saving the best parameters and dictionary. | ||
| 18 | + * | ||
| 19 | + * All of the methods except COVER_best_init() are thread safe if zstd is | ||
| 20 | + * compiled with multithreaded support. | ||
| 21 | + */ | ||
| 22 | +typedef struct COVER_best_s { | ||
| 23 | + ZSTD_pthread_mutex_t mutex; | ||
| 24 | + ZSTD_pthread_cond_t cond; | ||
| 25 | + size_t liveJobs; | ||
| 26 | + void *dict; | ||
| 27 | + size_t dictSize; | ||
| 28 | + ZDICT_cover_params_t parameters; | ||
| 29 | + size_t compressedSize; | ||
| 30 | +} COVER_best_t; | ||
| 31 | + | ||
| 32 | +/** | ||
| 33 | + * A segment is a range in the source as well as the score of the segment. | ||
| 34 | + */ | ||
| 35 | +typedef struct { | ||
| 36 | + U32 begin; | ||
| 37 | + U32 end; | ||
| 38 | + U32 score; | ||
| 39 | +} COVER_segment_t; | ||
| 40 | + | ||
| 41 | +/** | ||
| 42 | + * Checks total compressed size of a dictionary | ||
| 43 | + */ | ||
| 44 | +size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, | ||
| 45 | + const size_t *samplesSizes, const BYTE *samples, | ||
| 46 | + size_t *offsets, | ||
| 47 | + size_t nbTrainSamples, size_t nbSamples, | ||
| 48 | + BYTE *const dict, size_t dictBufferCapacity); | ||
| 49 | + | ||
| 50 | +/** | ||
| 51 | + * Returns the sum of the sample sizes. | ||
| 52 | + */ | ||
| 53 | +size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ; | ||
| 54 | + | ||
| 55 | +/** | ||
| 56 | + * Initialize the `COVER_best_t`. | ||
| 57 | + */ | ||
| 58 | +void COVER_best_init(COVER_best_t *best); | ||
| 59 | + | ||
| 60 | +/** | ||
| 61 | + * Wait until liveJobs == 0. | ||
| 62 | + */ | ||
| 63 | +void COVER_best_wait(COVER_best_t *best); | ||
| 64 | + | ||
| 65 | +/** | ||
| 66 | + * Call COVER_best_wait() and then destroy the COVER_best_t. | ||
| 67 | + */ | ||
| 68 | +void COVER_best_destroy(COVER_best_t *best); | ||
| 69 | + | ||
| 70 | +/** | ||
| 71 | + * Called when a thread is about to be launched. | ||
| 72 | + * Increments liveJobs. | ||
| 73 | + */ | ||
| 74 | +void COVER_best_start(COVER_best_t *best); | ||
| 75 | + | ||
| 76 | +/** | ||
| 77 | + * Called when a thread finishes executing, both on error or success. | ||
| 78 | + * Decrements liveJobs and signals any waiting threads if liveJobs == 0. | ||
| 79 | + * If this dictionary is the best so far save it and its parameters. | ||
| 80 | + */ | ||
| 81 | +void COVER_best_finish(COVER_best_t *best, size_t compressedSize, | ||
| 82 | + ZDICT_cover_params_t parameters, void *dict, | ||
| 83 | + size_t dictSize); |
vendor/github.com/DataDog/zstd/cpu.h
0 → 100644
| 1 | +/* | ||
| 2 | + * Copyright (c) 2018-present, Facebook, Inc. | ||
| 3 | + * All rights reserved. | ||
| 4 | + * | ||
| 5 | + * This source code is licensed under both the BSD-style license (found in the | ||
| 6 | + * LICENSE file in the root directory of this source tree) and the GPLv2 (found | ||
| 7 | + * in the COPYING file in the root directory of this source tree). | ||
| 8 | + * You may select, at your option, one of the above-listed licenses. | ||
| 9 | + */ | ||
| 10 | + | ||
| 11 | +#ifndef ZSTD_COMMON_CPU_H | ||
| 12 | +#define ZSTD_COMMON_CPU_H | ||
| 13 | + | ||
| 14 | +/** | ||
| 15 | + * Implementation taken from folly/CpuId.h | ||
| 16 | + * https://github.com/facebook/folly/blob/master/folly/CpuId.h | ||
| 17 | + */ | ||
| 18 | + | ||
| 19 | +#include <string.h> | ||
| 20 | + | ||
| 21 | +#include "mem.h" | ||
| 22 | + | ||
| 23 | +#ifdef _MSC_VER | ||
| 24 | +#include <intrin.h> | ||
| 25 | +#endif | ||
| 26 | + | ||
| 27 | +typedef struct { | ||
| 28 | + U32 f1c; | ||
| 29 | + U32 f1d; | ||
| 30 | + U32 f7b; | ||
| 31 | + U32 f7c; | ||
| 32 | +} ZSTD_cpuid_t; | ||
| 33 | + | ||
| 34 | +MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { | ||
| 35 | + U32 f1c = 0; | ||
| 36 | + U32 f1d = 0; | ||
| 37 | + U32 f7b = 0; | ||
| 38 | + U32 f7c = 0; | ||
| 39 | +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) | ||
| 40 | + int reg[4]; | ||
| 41 | + __cpuid((int*)reg, 0); | ||
| 42 | + { | ||
| 43 | + int const n = reg[0]; | ||
| 44 | + if (n >= 1) { | ||
| 45 | + __cpuid((int*)reg, 1); | ||
| 46 | + f1c = (U32)reg[2]; | ||
| 47 | + f1d = (U32)reg[3]; | ||
| 48 | + } | ||
| 49 | + if (n >= 7) { | ||
| 50 | + __cpuidex((int*)reg, 7, 0); | ||
| 51 | + f7b = (U32)reg[1]; | ||
| 52 | + f7c = (U32)reg[2]; | ||
| 53 | + } | ||
| 54 | + } | ||
| 55 | +#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) | ||
| 56 | + /* The following block like the normal cpuid branch below, but gcc | ||
| 57 | + * reserves ebx for use of its pic register so we must specially | ||
| 58 | + * handle the save and restore to avoid clobbering the register | ||
| 59 | + */ | ||
| 60 | + U32 n; | ||
| 61 | + __asm__( | ||
| 62 | + "pushl %%ebx\n\t" | ||
| 63 | + "cpuid\n\t" | ||
| 64 | + "popl %%ebx\n\t" | ||
| 65 | + : "=a"(n) | ||
| 66 | + : "a"(0) | ||
| 67 | + : "ecx", "edx"); | ||
| 68 | + if (n >= 1) { | ||
| 69 | + U32 f1a; | ||
| 70 | + __asm__( | ||
| 71 | + "pushl %%ebx\n\t" | ||
| 72 | + "cpuid\n\t" | ||
| 73 | + "popl %%ebx\n\t" | ||
| 74 | + : "=a"(f1a), "=c"(f1c), "=d"(f1d) | ||
| 75 | + : "a"(1)); | ||
| 76 | + } | ||
| 77 | + if (n >= 7) { | ||
| 78 | + __asm__( | ||
| 79 | + "pushl %%ebx\n\t" | ||
| 80 | + "cpuid\n\t" | ||
| 81 | + "movl %%ebx, %%eax\n\t" | ||
| 82 | + "popl %%ebx" | ||
| 83 | + : "=a"(f7b), "=c"(f7c) | ||
| 84 | + : "a"(7), "c"(0) | ||
| 85 | + : "edx"); | ||
| 86 | + } | ||
| 87 | +#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) | ||
| 88 | + U32 n; | ||
| 89 | + __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); | ||
| 90 | + if (n >= 1) { | ||
| 91 | + U32 f1a; | ||
| 92 | + __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); | ||
| 93 | + } | ||
| 94 | + if (n >= 7) { | ||
| 95 | + U32 f7a; | ||
| 96 | + __asm__("cpuid" | ||
| 97 | + : "=a"(f7a), "=b"(f7b), "=c"(f7c) | ||
| 98 | + : "a"(7), "c"(0) | ||
| 99 | + : "edx"); | ||
| 100 | + } | ||
| 101 | +#endif | ||
| 102 | + { | ||
| 103 | + ZSTD_cpuid_t cpuid; | ||
| 104 | + cpuid.f1c = f1c; | ||
| 105 | + cpuid.f1d = f1d; | ||
| 106 | + cpuid.f7b = f7b; | ||
| 107 | + cpuid.f7c = f7c; | ||
| 108 | + return cpuid; | ||
| 109 | + } | ||
| 110 | +} | ||
| 111 | + | ||
| 112 | +#define X(name, r, bit) \ | ||
| 113 | + MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ | ||
| 114 | + return ((cpuid.r) & (1U << bit)) != 0; \ | ||
| 115 | + } | ||
| 116 | + | ||
| 117 | +/* cpuid(1): Processor Info and Feature Bits. */ | ||
| 118 | +#define C(name, bit) X(name, f1c, bit) | ||
| 119 | + C(sse3, 0) | ||
| 120 | + C(pclmuldq, 1) | ||
| 121 | + C(dtes64, 2) | ||
| 122 | + C(monitor, 3) | ||
| 123 | + C(dscpl, 4) | ||
| 124 | + C(vmx, 5) | ||
| 125 | + C(smx, 6) | ||
| 126 | + C(eist, 7) | ||
| 127 | + C(tm2, 8) | ||
| 128 | + C(ssse3, 9) | ||
| 129 | + C(cnxtid, 10) | ||
| 130 | + C(fma, 12) | ||
| 131 | + C(cx16, 13) | ||
| 132 | + C(xtpr, 14) | ||
| 133 | + C(pdcm, 15) | ||
| 134 | + C(pcid, 17) | ||
| 135 | + C(dca, 18) | ||
| 136 | + C(sse41, 19) | ||
| 137 | + C(sse42, 20) | ||
| 138 | + C(x2apic, 21) | ||
| 139 | + C(movbe, 22) | ||
| 140 | + C(popcnt, 23) | ||
| 141 | + C(tscdeadline, 24) | ||
| 142 | + C(aes, 25) | ||
| 143 | + C(xsave, 26) | ||
| 144 | + C(osxsave, 27) | ||
| 145 | + C(avx, 28) | ||
| 146 | + C(f16c, 29) | ||
| 147 | + C(rdrand, 30) | ||
| 148 | +#undef C | ||
| 149 | +#define D(name, bit) X(name, f1d, bit) | ||
| 150 | + D(fpu, 0) | ||
| 151 | + D(vme, 1) | ||
| 152 | + D(de, 2) | ||
| 153 | + D(pse, 3) | ||
| 154 | + D(tsc, 4) | ||
| 155 | + D(msr, 5) | ||
| 156 | + D(pae, 6) | ||
| 157 | + D(mce, 7) | ||
| 158 | + D(cx8, 8) | ||
| 159 | + D(apic, 9) | ||
| 160 | + D(sep, 11) | ||
| 161 | + D(mtrr, 12) | ||
| 162 | + D(pge, 13) | ||
| 163 | + D(mca, 14) | ||
| 164 | + D(cmov, 15) | ||
| 165 | + D(pat, 16) | ||
| 166 | + D(pse36, 17) | ||
| 167 | + D(psn, 18) | ||
| 168 | + D(clfsh, 19) | ||
| 169 | + D(ds, 21) | ||
| 170 | + D(acpi, 22) | ||
| 171 | + D(mmx, 23) | ||
| 172 | + D(fxsr, 24) | ||
| 173 | + D(sse, 25) | ||
| 174 | + D(sse2, 26) | ||
| 175 | + D(ss, 27) | ||
| 176 | + D(htt, 28) | ||
| 177 | + D(tm, 29) | ||
| 178 | + D(pbe, 31) | ||
| 179 | +#undef D | ||
| 180 | + | ||
| 181 | +/* cpuid(7): Extended Features. */ | ||
| 182 | +#define B(name, bit) X(name, f7b, bit) | ||
| 183 | + B(bmi1, 3) | ||
| 184 | + B(hle, 4) | ||
| 185 | + B(avx2, 5) | ||
| 186 | + B(smep, 7) | ||
| 187 | + B(bmi2, 8) | ||
| 188 | + B(erms, 9) | ||
| 189 | + B(invpcid, 10) | ||
| 190 | + B(rtm, 11) | ||
| 191 | + B(mpx, 14) | ||
| 192 | + B(avx512f, 16) | ||
| 193 | + B(avx512dq, 17) | ||
| 194 | + B(rdseed, 18) | ||
| 195 | + B(adx, 19) | ||
| 196 | + B(smap, 20) | ||
| 197 | + B(avx512ifma, 21) | ||
| 198 | + B(pcommit, 22) | ||
| 199 | + B(clflushopt, 23) | ||
| 200 | + B(clwb, 24) | ||
| 201 | + B(avx512pf, 26) | ||
| 202 | + B(avx512er, 27) | ||
| 203 | + B(avx512cd, 28) | ||
| 204 | + B(sha, 29) | ||
| 205 | + B(avx512bw, 30) | ||
| 206 | + B(avx512vl, 31) | ||
| 207 | +#undef B | ||
| 208 | +#define C(name, bit) X(name, f7c, bit) | ||
| 209 | + C(prefetchwt1, 0) | ||
| 210 | + C(avx512vbmi, 1) | ||
| 211 | +#undef C | ||
| 212 | + | ||
| 213 | +#undef X | ||
| 214 | + | ||
| 215 | +#endif /* ZSTD_COMMON_CPU_H */ |
vendor/github.com/DataDog/zstd/debug.c
0 → 100644
| 1 | +/* ****************************************************************** | ||
| 2 | + debug | ||
| 3 | + Part of FSE library | ||
| 4 | + Copyright (C) 2013-present, Yann Collet. | ||
| 5 | + | ||
| 6 | + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | ||
| 7 | + | ||
| 8 | + Redistribution and use in source and binary forms, with or without | ||
| 9 | + modification, are permitted provided that the following conditions are | ||
| 10 | + met: | ||
| 11 | + | ||
| 12 | + * Redistributions of source code must retain the above copyright | ||
| 13 | + notice, this list of conditions and the following disclaimer. | ||
| 14 | + * Redistributions in binary form must reproduce the above | ||
| 15 | + copyright notice, this list of conditions and the following disclaimer | ||
| 16 | + in the documentation and/or other materials provided with the | ||
| 17 | + distribution. | ||
| 18 | + | ||
| 19 | + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 20 | + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 21 | + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 22 | + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 23 | + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 24 | + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| 25 | + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| 26 | + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| 27 | + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 28 | + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 29 | + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 30 | + | ||
| 31 | + You can contact the author at : | ||
| 32 | + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | ||
| 33 | +****************************************************************** */ | ||
| 34 | + | ||
| 35 | + | ||
| 36 | +/* | ||
| 37 | + * This module only hosts one global variable | ||
| 38 | + * which can be used to dynamically influence the verbosity of traces, | ||
| 39 | + * such as DEBUGLOG and RAWLOG | ||
| 40 | + */ | ||
| 41 | + | ||
| 42 | +#include "debug.h" | ||
| 43 | + | ||
| 44 | +int g_debuglevel = DEBUGLEVEL; |
vendor/github.com/DataDog/zstd/debug.h
0 → 100644
| 1 | +/* ****************************************************************** | ||
| 2 | + debug | ||
| 3 | + Part of FSE library | ||
| 4 | + Copyright (C) 2013-present, Yann Collet. | ||
| 5 | + | ||
| 6 | + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | ||
| 7 | + | ||
| 8 | + Redistribution and use in source and binary forms, with or without | ||
| 9 | + modification, are permitted provided that the following conditions are | ||
| 10 | + met: | ||
| 11 | + | ||
| 12 | + * Redistributions of source code must retain the above copyright | ||
| 13 | + notice, this list of conditions and the following disclaimer. | ||
| 14 | + * Redistributions in binary form must reproduce the above | ||
| 15 | + copyright notice, this list of conditions and the following disclaimer | ||
| 16 | + in the documentation and/or other materials provided with the | ||
| 17 | + distribution. | ||
| 18 | + | ||
| 19 | + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 20 | + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 21 | + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 22 | + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 23 | + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 24 | + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| 25 | + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| 26 | + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| 27 | + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 28 | + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 29 | + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 30 | + | ||
| 31 | + You can contact the author at : | ||
| 32 | + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | ||
| 33 | +****************************************************************** */ | ||
| 34 | + | ||
| 35 | + | ||
| 36 | +/* | ||
| 37 | + * The purpose of this header is to enable debug functions. | ||
| 38 | + * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, | ||
| 39 | + * and DEBUG_STATIC_ASSERT() for compile-time. | ||
| 40 | + * | ||
| 41 | + * By default, DEBUGLEVEL==0, which means run-time debug is disabled. | ||
| 42 | + * | ||
| 43 | + * Level 1 enables assert() only. | ||
| 44 | + * Starting level 2, traces can be generated and pushed to stderr. | ||
| 45 | + * The higher the level, the more verbose the traces. | ||
| 46 | + * | ||
| 47 | + * It's possible to dynamically adjust level using variable g_debug_level, | ||
| 48 | + * which is only declared if DEBUGLEVEL>=2, | ||
| 49 | + * and is a global variable, not multi-thread protected (use with care) | ||
| 50 | + */ | ||
| 51 | + | ||
| 52 | +#ifndef DEBUG_H_12987983217 | ||
| 53 | +#define DEBUG_H_12987983217 | ||
| 54 | + | ||
| 55 | +#if defined (__cplusplus) | ||
| 56 | +extern "C" { | ||
| 57 | +#endif | ||
| 58 | + | ||
| 59 | + | ||
| 60 | +/* static assert is triggered at compile time, leaving no runtime artefact. | ||
| 61 | + * static assert only works with compile-time constants. | ||
| 62 | + * Also, this variant can only be used inside a function. */ | ||
| 63 | +#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) | ||
| 64 | + | ||
| 65 | + | ||
| 66 | +/* DEBUGLEVEL is expected to be defined externally, | ||
| 67 | + * typically through compiler command line. | ||
| 68 | + * Value must be a number. */ | ||
| 69 | +#ifndef DEBUGLEVEL | ||
| 70 | +# define DEBUGLEVEL 0 | ||
| 71 | +#endif | ||
| 72 | + | ||
| 73 | + | ||
| 74 | +/* DEBUGFILE can be defined externally, | ||
| 75 | + * typically through compiler command line. | ||
| 76 | + * note : currently useless. | ||
| 77 | + * Value must be stderr or stdout */ | ||
| 78 | +#ifndef DEBUGFILE | ||
| 79 | +# define DEBUGFILE stderr | ||
| 80 | +#endif | ||
| 81 | + | ||
| 82 | + | ||
| 83 | +/* recommended values for DEBUGLEVEL : | ||
| 84 | + * 0 : release mode, no debug, all run-time checks disabled | ||
| 85 | + * 1 : enables assert() only, no display | ||
| 86 | + * 2 : reserved, for currently active debug path | ||
| 87 | + * 3 : events once per object lifetime (CCtx, CDict, etc.) | ||
| 88 | + * 4 : events once per frame | ||
| 89 | + * 5 : events once per block | ||
| 90 | + * 6 : events once per sequence (verbose) | ||
| 91 | + * 7+: events at every position (*very* verbose) | ||
| 92 | + * | ||
| 93 | + * It's generally inconvenient to output traces > 5. | ||
| 94 | + * In which case, it's possible to selectively trigger high verbosity levels | ||
| 95 | + * by modifying g_debug_level. | ||
| 96 | + */ | ||
| 97 | + | ||
| 98 | +#if (DEBUGLEVEL>=1) | ||
| 99 | +# include <assert.h> | ||
| 100 | +#else | ||
| 101 | +# ifndef assert /* assert may be already defined, due to prior #include <assert.h> */ | ||
| 102 | +# define assert(condition) ((void)0) /* disable assert (default) */ | ||
| 103 | +# endif | ||
| 104 | +#endif | ||
| 105 | + | ||
| 106 | +#if (DEBUGLEVEL>=2) | ||
| 107 | +# include <stdio.h> | ||
| 108 | +extern int g_debuglevel; /* the variable is only declared, | ||
| 109 | + it actually lives in debug.c, | ||
| 110 | + and is shared by the whole process. | ||
| 111 | + It's not thread-safe. | ||
| 112 | + It's useful when enabling very verbose levels | ||
| 113 | + on selective conditions (such as position in src) */ | ||
| 114 | + | ||
| 115 | +# define RAWLOG(l, ...) { \ | ||
| 116 | + if (l<=g_debuglevel) { \ | ||
| 117 | + fprintf(stderr, __VA_ARGS__); \ | ||
| 118 | + } } | ||
| 119 | +# define DEBUGLOG(l, ...) { \ | ||
| 120 | + if (l<=g_debuglevel) { \ | ||
| 121 | + fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ | ||
| 122 | + fprintf(stderr, " \n"); \ | ||
| 123 | + } } | ||
| 124 | +#else | ||
| 125 | +# define RAWLOG(l, ...) {} /* disabled */ | ||
| 126 | +# define DEBUGLOG(l, ...) {} /* disabled */ | ||
| 127 | +#endif | ||
| 128 | + | ||
| 129 | + | ||
| 130 | +#if defined (__cplusplus) | ||
| 131 | +} | ||
| 132 | +#endif | ||
| 133 | + | ||
| 134 | +#endif /* DEBUG_H_12987983217 */ |
vendor/github.com/DataDog/zstd/divsufsort.c
0 → 100644
| 1 | +/* | ||
| 2 | + * divsufsort.c for libdivsufsort-lite | ||
| 3 | + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. | ||
| 4 | + * | ||
| 5 | + * Permission is hereby granted, free of charge, to any person | ||
| 6 | + * obtaining a copy of this software and associated documentation | ||
| 7 | + * files (the "Software"), to deal in the Software without | ||
| 8 | + * restriction, including without limitation the rights to use, | ||
| 9 | + * copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| 10 | + * copies of the Software, and to permit persons to whom the | ||
| 11 | + * Software is furnished to do so, subject to the following | ||
| 12 | + * conditions: | ||
| 13 | + * | ||
| 14 | + * The above copyright notice and this permission notice shall be | ||
| 15 | + * included in all copies or substantial portions of the Software. | ||
| 16 | + * | ||
| 17 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
| 18 | + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | ||
| 19 | + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
| 20 | + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | ||
| 21 | + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | ||
| 22 | + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
| 23 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
| 24 | + * OTHER DEALINGS IN THE SOFTWARE. | ||
| 25 | + */ | ||
| 26 | + | ||
| 27 | +/*- Compiler specifics -*/ | ||
| 28 | +#ifdef __clang__ | ||
| 29 | +#pragma clang diagnostic ignored "-Wshorten-64-to-32" | ||
| 30 | +#endif | ||
| 31 | + | ||
| 32 | +#if defined(_MSC_VER) | ||
| 33 | +# pragma warning(disable : 4244) | ||
| 34 | +# pragma warning(disable : 4127) /* C4127 : Condition expression is constant */ | ||
| 35 | +#endif | ||
| 36 | + | ||
| 37 | + | ||
| 38 | +/*- Dependencies -*/ | ||
| 39 | +#include <assert.h> | ||
| 40 | +#include <stdio.h> | ||
| 41 | +#include <stdlib.h> | ||
| 42 | + | ||
| 43 | +#include "divsufsort.h" | ||
| 44 | + | ||
| 45 | +/*- Constants -*/ | ||
| 46 | +#if defined(INLINE) | ||
| 47 | +# undef INLINE | ||
| 48 | +#endif | ||
| 49 | +#if !defined(INLINE) | ||
| 50 | +# define INLINE __inline | ||
| 51 | +#endif | ||
| 52 | +#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) | ||
| 53 | +# undef ALPHABET_SIZE | ||
| 54 | +#endif | ||
| 55 | +#if !defined(ALPHABET_SIZE) | ||
| 56 | +# define ALPHABET_SIZE (256) | ||
| 57 | +#endif | ||
| 58 | +#define BUCKET_A_SIZE (ALPHABET_SIZE) | ||
| 59 | +#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) | ||
| 60 | +#if defined(SS_INSERTIONSORT_THRESHOLD) | ||
| 61 | +# if SS_INSERTIONSORT_THRESHOLD < 1 | ||
| 62 | +# undef SS_INSERTIONSORT_THRESHOLD | ||
| 63 | +# define SS_INSERTIONSORT_THRESHOLD (1) | ||
| 64 | +# endif | ||
| 65 | +#else | ||
| 66 | +# define SS_INSERTIONSORT_THRESHOLD (8) | ||
| 67 | +#endif | ||
| 68 | +#if defined(SS_BLOCKSIZE) | ||
| 69 | +# if SS_BLOCKSIZE < 0 | ||
| 70 | +# undef SS_BLOCKSIZE | ||
| 71 | +# define SS_BLOCKSIZE (0) | ||
| 72 | +# elif 32768 <= SS_BLOCKSIZE | ||
| 73 | +# undef SS_BLOCKSIZE | ||
| 74 | +# define SS_BLOCKSIZE (32767) | ||
| 75 | +# endif | ||
| 76 | +#else | ||
| 77 | +# define SS_BLOCKSIZE (1024) | ||
| 78 | +#endif | ||
| 79 | +/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ | ||
| 80 | +#if SS_BLOCKSIZE == 0 | ||
| 81 | +# define SS_MISORT_STACKSIZE (96) | ||
| 82 | +#elif SS_BLOCKSIZE <= 4096 | ||
| 83 | +# define SS_MISORT_STACKSIZE (16) | ||
| 84 | +#else | ||
| 85 | +# define SS_MISORT_STACKSIZE (24) | ||
| 86 | +#endif | ||
| 87 | +#define SS_SMERGE_STACKSIZE (32) | ||
| 88 | +#define TR_INSERTIONSORT_THRESHOLD (8) | ||
| 89 | +#define TR_STACKSIZE (64) | ||
| 90 | + | ||
| 91 | + | ||
| 92 | +/*- Macros -*/ | ||
| 93 | +#ifndef SWAP | ||
| 94 | +# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) | ||
| 95 | +#endif /* SWAP */ | ||
| 96 | +#ifndef MIN | ||
| 97 | +# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) | ||
| 98 | +#endif /* MIN */ | ||
| 99 | +#ifndef MAX | ||
| 100 | +# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) | ||
| 101 | +#endif /* MAX */ | ||
| 102 | +#define STACK_PUSH(_a, _b, _c, _d)\ | ||
| 103 | + do {\ | ||
| 104 | + assert(ssize < STACK_SIZE);\ | ||
| 105 | + stack[ssize].a = (_a), stack[ssize].b = (_b),\ | ||
| 106 | + stack[ssize].c = (_c), stack[ssize++].d = (_d);\ | ||
| 107 | + } while(0) | ||
| 108 | +#define STACK_PUSH5(_a, _b, _c, _d, _e)\ | ||
| 109 | + do {\ | ||
| 110 | + assert(ssize < STACK_SIZE);\ | ||
| 111 | + stack[ssize].a = (_a), stack[ssize].b = (_b),\ | ||
| 112 | + stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ | ||
| 113 | + } while(0) | ||
| 114 | +#define STACK_POP(_a, _b, _c, _d)\ | ||
| 115 | + do {\ | ||
| 116 | + assert(0 <= ssize);\ | ||
| 117 | + if(ssize == 0) { return; }\ | ||
| 118 | + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ | ||
| 119 | + (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ | ||
| 120 | + } while(0) | ||
| 121 | +#define STACK_POP5(_a, _b, _c, _d, _e)\ | ||
| 122 | + do {\ | ||
| 123 | + assert(0 <= ssize);\ | ||
| 124 | + if(ssize == 0) { return; }\ | ||
| 125 | + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ | ||
| 126 | + (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ | ||
| 127 | + } while(0) | ||
| 128 | +#define BUCKET_A(_c0) bucket_A[(_c0)] | ||
| 129 | +#if ALPHABET_SIZE == 256 | ||
| 130 | +#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) | ||
| 131 | +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) | ||
| 132 | +#else | ||
| 133 | +#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) | ||
| 134 | +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) | ||
| 135 | +#endif | ||
| 136 | + | ||
| 137 | + | ||
| 138 | +/*- Private Functions -*/ | ||
| 139 | + | ||
| 140 | +static const int lg_table[256]= { | ||
| 141 | + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, | ||
| 142 | + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, | ||
| 143 | + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, | ||
| 144 | + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, | ||
| 145 | + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, | ||
| 146 | + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, | ||
| 147 | + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, | ||
| 148 | + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 | ||
| 149 | +}; | ||
| 150 | + | ||
| 151 | +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) | ||
| 152 | + | ||
| 153 | +static INLINE | ||
| 154 | +int | ||
| 155 | +ss_ilg(int n) { | ||
| 156 | +#if SS_BLOCKSIZE == 0 | ||
| 157 | + return (n & 0xffff0000) ? | ||
| 158 | + ((n & 0xff000000) ? | ||
| 159 | + 24 + lg_table[(n >> 24) & 0xff] : | ||
| 160 | + 16 + lg_table[(n >> 16) & 0xff]) : | ||
| 161 | + ((n & 0x0000ff00) ? | ||
| 162 | + 8 + lg_table[(n >> 8) & 0xff] : | ||
| 163 | + 0 + lg_table[(n >> 0) & 0xff]); | ||
| 164 | +#elif SS_BLOCKSIZE < 256 | ||
| 165 | + return lg_table[n]; | ||
| 166 | +#else | ||
| 167 | + return (n & 0xff00) ? | ||
| 168 | + 8 + lg_table[(n >> 8) & 0xff] : | ||
| 169 | + 0 + lg_table[(n >> 0) & 0xff]; | ||
| 170 | +#endif | ||
| 171 | +} | ||
| 172 | + | ||
| 173 | +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ | ||
| 174 | + | ||
| 175 | +#if SS_BLOCKSIZE != 0 | ||
| 176 | + | ||
| 177 | +static const int sqq_table[256] = { | ||
| 178 | + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, | ||
| 179 | + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, | ||
| 180 | + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, | ||
| 181 | +110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, | ||
| 182 | +128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, | ||
| 183 | +143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, | ||
| 184 | +156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, | ||
| 185 | +169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, | ||
| 186 | +181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, | ||
| 187 | +192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, | ||
| 188 | +202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, | ||
| 189 | +212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, | ||
| 190 | +221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, | ||
| 191 | +230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, | ||
| 192 | +239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, | ||
| 193 | +247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 | ||
| 194 | +}; | ||
| 195 | + | ||
| 196 | +static INLINE | ||
| 197 | +int | ||
| 198 | +ss_isqrt(int x) { | ||
| 199 | + int y, e; | ||
| 200 | + | ||
| 201 | + if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } | ||
| 202 | + e = (x & 0xffff0000) ? | ||
| 203 | + ((x & 0xff000000) ? | ||
| 204 | + 24 + lg_table[(x >> 24) & 0xff] : | ||
| 205 | + 16 + lg_table[(x >> 16) & 0xff]) : | ||
| 206 | + ((x & 0x0000ff00) ? | ||
| 207 | + 8 + lg_table[(x >> 8) & 0xff] : | ||
| 208 | + 0 + lg_table[(x >> 0) & 0xff]); | ||
| 209 | + | ||
| 210 | + if(e >= 16) { | ||
| 211 | + y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); | ||
| 212 | + if(e >= 24) { y = (y + 1 + x / y) >> 1; } | ||
| 213 | + y = (y + 1 + x / y) >> 1; | ||
| 214 | + } else if(e >= 8) { | ||
| 215 | + y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; | ||
| 216 | + } else { | ||
| 217 | + return sqq_table[x] >> 4; | ||
| 218 | + } | ||
| 219 | + | ||
| 220 | + return (x < (y * y)) ? y - 1 : y; | ||
| 221 | +} | ||
| 222 | + | ||
| 223 | +#endif /* SS_BLOCKSIZE != 0 */ | ||
| 224 | + | ||
| 225 | + | ||
| 226 | +/*---------------------------------------------------------------------------*/ | ||
| 227 | + | ||
| 228 | +/* Compares two suffixes. */ | ||
| 229 | +static INLINE | ||
| 230 | +int | ||
| 231 | +ss_compare(const unsigned char *T, | ||
| 232 | + const int *p1, const int *p2, | ||
| 233 | + int depth) { | ||
| 234 | + const unsigned char *U1, *U2, *U1n, *U2n; | ||
| 235 | + | ||
| 236 | + for(U1 = T + depth + *p1, | ||
| 237 | + U2 = T + depth + *p2, | ||
| 238 | + U1n = T + *(p1 + 1) + 2, | ||
| 239 | + U2n = T + *(p2 + 1) + 2; | ||
| 240 | + (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); | ||
| 241 | + ++U1, ++U2) { | ||
| 242 | + } | ||
| 243 | + | ||
| 244 | + return U1 < U1n ? | ||
| 245 | + (U2 < U2n ? *U1 - *U2 : 1) : | ||
| 246 | + (U2 < U2n ? -1 : 0); | ||
| 247 | +} | ||
| 248 | + | ||
| 249 | + | ||
| 250 | +/*---------------------------------------------------------------------------*/ | ||
| 251 | + | ||
| 252 | +#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) | ||
| 253 | + | ||
| 254 | +/* Insertionsort for small size groups */ | ||
| 255 | +static | ||
| 256 | +void | ||
| 257 | +ss_insertionsort(const unsigned char *T, const int *PA, | ||
| 258 | + int *first, int *last, int depth) { | ||
| 259 | + int *i, *j; | ||
| 260 | + int t; | ||
| 261 | + int r; | ||
| 262 | + | ||
| 263 | + for(i = last - 2; first <= i; --i) { | ||
| 264 | + for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { | ||
| 265 | + do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); | ||
| 266 | + if(last <= j) { break; } | ||
| 267 | + } | ||
| 268 | + if(r == 0) { *j = ~*j; } | ||
| 269 | + *(j - 1) = t; | ||
| 270 | + } | ||
| 271 | +} | ||
| 272 | + | ||
| 273 | +#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ | ||
| 274 | + | ||
| 275 | + | ||
| 276 | +/*---------------------------------------------------------------------------*/ | ||
| 277 | + | ||
| 278 | +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) | ||
| 279 | + | ||
| 280 | +static INLINE | ||
| 281 | +void | ||
| 282 | +ss_fixdown(const unsigned char *Td, const int *PA, | ||
| 283 | + int *SA, int i, int size) { | ||
| 284 | + int j, k; | ||
| 285 | + int v; | ||
| 286 | + int c, d, e; | ||
| 287 | + | ||
| 288 | + for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { | ||
| 289 | + d = Td[PA[SA[k = j++]]]; | ||
| 290 | + if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } | ||
| 291 | + if(d <= c) { break; } | ||
| 292 | + } | ||
| 293 | + SA[i] = v; | ||
| 294 | +} | ||
| 295 | + | ||
| 296 | +/* Simple top-down heapsort. */ | ||
| 297 | +static | ||
| 298 | +void | ||
| 299 | +ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) { | ||
| 300 | + int i, m; | ||
| 301 | + int t; | ||
| 302 | + | ||
| 303 | + m = size; | ||
| 304 | + if((size % 2) == 0) { | ||
| 305 | + m--; | ||
| 306 | + if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } | ||
| 307 | + } | ||
| 308 | + | ||
| 309 | + for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } | ||
| 310 | + if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } | ||
| 311 | + for(i = m - 1; 0 < i; --i) { | ||
| 312 | + t = SA[0], SA[0] = SA[i]; | ||
| 313 | + ss_fixdown(Td, PA, SA, 0, i); | ||
| 314 | + SA[i] = t; | ||
| 315 | + } | ||
| 316 | +} | ||
| 317 | + | ||
| 318 | + | ||
| 319 | +/*---------------------------------------------------------------------------*/ | ||
| 320 | + | ||
| 321 | +/* Returns the median of three elements. */ | ||
| 322 | +static INLINE | ||
| 323 | +int * | ||
| 324 | +ss_median3(const unsigned char *Td, const int *PA, | ||
| 325 | + int *v1, int *v2, int *v3) { | ||
| 326 | + int *t; | ||
| 327 | + if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } | ||
| 328 | + if(Td[PA[*v2]] > Td[PA[*v3]]) { | ||
| 329 | + if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } | ||
| 330 | + else { return v3; } | ||
| 331 | + } | ||
| 332 | + return v2; | ||
| 333 | +} | ||
| 334 | + | ||
| 335 | +/* Returns the median of five elements. */ | ||
| 336 | +static INLINE | ||
| 337 | +int * | ||
| 338 | +ss_median5(const unsigned char *Td, const int *PA, | ||
| 339 | + int *v1, int *v2, int *v3, int *v4, int *v5) { | ||
| 340 | + int *t; | ||
| 341 | + if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } | ||
| 342 | + if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } | ||
| 343 | + if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } | ||
| 344 | + if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } | ||
| 345 | + if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } | ||
| 346 | + if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } | ||
| 347 | + return v3; | ||
| 348 | +} | ||
| 349 | + | ||
| 350 | +/* Returns the pivot element. */ | ||
| 351 | +static INLINE | ||
| 352 | +int * | ||
| 353 | +ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) { | ||
| 354 | + int *middle; | ||
| 355 | + int t; | ||
| 356 | + | ||
| 357 | + t = last - first; | ||
| 358 | + middle = first + t / 2; | ||
| 359 | + | ||
| 360 | + if(t <= 512) { | ||
| 361 | + if(t <= 32) { | ||
| 362 | + return ss_median3(Td, PA, first, middle, last - 1); | ||
| 363 | + } else { | ||
| 364 | + t >>= 2; | ||
| 365 | + return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); | ||
| 366 | + } | ||
| 367 | + } | ||
| 368 | + t >>= 3; | ||
| 369 | + first = ss_median3(Td, PA, first, first + t, first + (t << 1)); | ||
| 370 | + middle = ss_median3(Td, PA, middle - t, middle, middle + t); | ||
| 371 | + last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); | ||
| 372 | + return ss_median3(Td, PA, first, middle, last); | ||
| 373 | +} | ||
| 374 | + | ||
| 375 | + | ||
| 376 | +/*---------------------------------------------------------------------------*/ | ||
| 377 | + | ||
| 378 | +/* Binary partition for substrings. */ | ||
| 379 | +static INLINE | ||
| 380 | +int * | ||
| 381 | +ss_partition(const int *PA, | ||
| 382 | + int *first, int *last, int depth) { | ||
| 383 | + int *a, *b; | ||
| 384 | + int t; | ||
| 385 | + for(a = first - 1, b = last;;) { | ||
| 386 | + for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } | ||
| 387 | + for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } | ||
| 388 | + if(b <= a) { break; } | ||
| 389 | + t = ~*b; | ||
| 390 | + *b = *a; | ||
| 391 | + *a = t; | ||
| 392 | + } | ||
| 393 | + if(first < a) { *first = ~*first; } | ||
| 394 | + return a; | ||
| 395 | +} | ||
| 396 | + | ||
| 397 | +/* Multikey introsort for medium size groups. */ | ||
| 398 | +static | ||
| 399 | +void | ||
| 400 | +ss_mintrosort(const unsigned char *T, const int *PA, | ||
| 401 | + int *first, int *last, | ||
| 402 | + int depth) { | ||
| 403 | +#define STACK_SIZE SS_MISORT_STACKSIZE | ||
| 404 | + struct { int *a, *b, c; int d; } stack[STACK_SIZE]; | ||
| 405 | + const unsigned char *Td; | ||
| 406 | + int *a, *b, *c, *d, *e, *f; | ||
| 407 | + int s, t; | ||
| 408 | + int ssize; | ||
| 409 | + int limit; | ||
| 410 | + int v, x = 0; | ||
| 411 | + | ||
| 412 | + for(ssize = 0, limit = ss_ilg(last - first);;) { | ||
| 413 | + | ||
| 414 | + if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { | ||
| 415 | +#if 1 < SS_INSERTIONSORT_THRESHOLD | ||
| 416 | + if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } | ||
| 417 | +#endif | ||
| 418 | + STACK_POP(first, last, depth, limit); | ||
| 419 | + continue; | ||
| 420 | + } | ||
| 421 | + | ||
| 422 | + Td = T + depth; | ||
| 423 | + if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } | ||
| 424 | + if(limit < 0) { | ||
| 425 | + for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { | ||
| 426 | + if((x = Td[PA[*a]]) != v) { | ||
| 427 | + if(1 < (a - first)) { break; } | ||
| 428 | + v = x; | ||
| 429 | + first = a; | ||
| 430 | + } | ||
| 431 | + } | ||
| 432 | + if(Td[PA[*first] - 1] < v) { | ||
| 433 | + first = ss_partition(PA, first, a, depth); | ||
| 434 | + } | ||
| 435 | + if((a - first) <= (last - a)) { | ||
| 436 | + if(1 < (a - first)) { | ||
| 437 | + STACK_PUSH(a, last, depth, -1); | ||
| 438 | + last = a, depth += 1, limit = ss_ilg(a - first); | ||
| 439 | + } else { | ||
| 440 | + first = a, limit = -1; | ||
| 441 | + } | ||
| 442 | + } else { | ||
| 443 | + if(1 < (last - a)) { | ||
| 444 | + STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); | ||
| 445 | + first = a, limit = -1; | ||
| 446 | + } else { | ||
| 447 | + last = a, depth += 1, limit = ss_ilg(a - first); | ||
| 448 | + } | ||
| 449 | + } | ||
| 450 | + continue; | ||
| 451 | + } | ||
| 452 | + | ||
| 453 | + /* choose pivot */ | ||
| 454 | + a = ss_pivot(Td, PA, first, last); | ||
| 455 | + v = Td[PA[*a]]; | ||
| 456 | + SWAP(*first, *a); | ||
| 457 | + | ||
| 458 | + /* partition */ | ||
| 459 | + for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } | ||
| 460 | + if(((a = b) < last) && (x < v)) { | ||
| 461 | + for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { | ||
| 462 | + if(x == v) { SWAP(*b, *a); ++a; } | ||
| 463 | + } | ||
| 464 | + } | ||
| 465 | + for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } | ||
| 466 | + if((b < (d = c)) && (x > v)) { | ||
| 467 | + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { | ||
| 468 | + if(x == v) { SWAP(*c, *d); --d; } | ||
| 469 | + } | ||
| 470 | + } | ||
| 471 | + for(; b < c;) { | ||
| 472 | + SWAP(*b, *c); | ||
| 473 | + for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { | ||
| 474 | + if(x == v) { SWAP(*b, *a); ++a; } | ||
| 475 | + } | ||
| 476 | + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { | ||
| 477 | + if(x == v) { SWAP(*c, *d); --d; } | ||
| 478 | + } | ||
| 479 | + } | ||
| 480 | + | ||
| 481 | + if(a <= d) { | ||
| 482 | + c = b - 1; | ||
| 483 | + | ||
| 484 | + if((s = a - first) > (t = b - a)) { s = t; } | ||
| 485 | + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } | ||
| 486 | + if((s = d - c) > (t = last - d - 1)) { s = t; } | ||
| 487 | + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } | ||
| 488 | + | ||
| 489 | + a = first + (b - a), c = last - (d - c); | ||
| 490 | + b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); | ||
| 491 | + | ||
| 492 | + if((a - first) <= (last - c)) { | ||
| 493 | + if((last - c) <= (c - b)) { | ||
| 494 | + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); | ||
| 495 | + STACK_PUSH(c, last, depth, limit); | ||
| 496 | + last = a; | ||
| 497 | + } else if((a - first) <= (c - b)) { | ||
| 498 | + STACK_PUSH(c, last, depth, limit); | ||
| 499 | + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); | ||
| 500 | + last = a; | ||
| 501 | + } else { | ||
| 502 | + STACK_PUSH(c, last, depth, limit); | ||
| 503 | + STACK_PUSH(first, a, depth, limit); | ||
| 504 | + first = b, last = c, depth += 1, limit = ss_ilg(c - b); | ||
| 505 | + } | ||
| 506 | + } else { | ||
| 507 | + if((a - first) <= (c - b)) { | ||
| 508 | + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); | ||
| 509 | + STACK_PUSH(first, a, depth, limit); | ||
| 510 | + first = c; | ||
| 511 | + } else if((last - c) <= (c - b)) { | ||
| 512 | + STACK_PUSH(first, a, depth, limit); | ||
| 513 | + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); | ||
| 514 | + first = c; | ||
| 515 | + } else { | ||
| 516 | + STACK_PUSH(first, a, depth, limit); | ||
| 517 | + STACK_PUSH(c, last, depth, limit); | ||
| 518 | + first = b, last = c, depth += 1, limit = ss_ilg(c - b); | ||
| 519 | + } | ||
| 520 | + } | ||
| 521 | + } else { | ||
| 522 | + limit += 1; | ||
| 523 | + if(Td[PA[*first] - 1] < v) { | ||
| 524 | + first = ss_partition(PA, first, last, depth); | ||
| 525 | + limit = ss_ilg(last - first); | ||
| 526 | + } | ||
| 527 | + depth += 1; | ||
| 528 | + } | ||
| 529 | + } | ||
| 530 | +#undef STACK_SIZE | ||
| 531 | +} | ||
| 532 | + | ||
| 533 | +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ | ||
| 534 | + | ||
| 535 | + | ||
| 536 | +/*---------------------------------------------------------------------------*/ | ||
| 537 | + | ||
| 538 | +#if SS_BLOCKSIZE != 0 | ||
| 539 | + | ||
| 540 | +static INLINE | ||
| 541 | +void | ||
| 542 | +ss_blockswap(int *a, int *b, int n) { | ||
| 543 | + int t; | ||
| 544 | + for(; 0 < n; --n, ++a, ++b) { | ||
| 545 | + t = *a, *a = *b, *b = t; | ||
| 546 | + } | ||
| 547 | +} | ||
| 548 | + | ||
| 549 | +static INLINE | ||
| 550 | +void | ||
| 551 | +ss_rotate(int *first, int *middle, int *last) { | ||
| 552 | + int *a, *b, t; | ||
| 553 | + int l, r; | ||
| 554 | + l = middle - first, r = last - middle; | ||
| 555 | + for(; (0 < l) && (0 < r);) { | ||
| 556 | + if(l == r) { ss_blockswap(first, middle, l); break; } | ||
| 557 | + if(l < r) { | ||
| 558 | + a = last - 1, b = middle - 1; | ||
| 559 | + t = *a; | ||
| 560 | + do { | ||
| 561 | + *a-- = *b, *b-- = *a; | ||
| 562 | + if(b < first) { | ||
| 563 | + *a = t; | ||
| 564 | + last = a; | ||
| 565 | + if((r -= l + 1) <= l) { break; } | ||
| 566 | + a -= 1, b = middle - 1; | ||
| 567 | + t = *a; | ||
| 568 | + } | ||
| 569 | + } while(1); | ||
| 570 | + } else { | ||
| 571 | + a = first, b = middle; | ||
| 572 | + t = *a; | ||
| 573 | + do { | ||
| 574 | + *a++ = *b, *b++ = *a; | ||
| 575 | + if(last <= b) { | ||
| 576 | + *a = t; | ||
| 577 | + first = a + 1; | ||
| 578 | + if((l -= r + 1) <= r) { break; } | ||
| 579 | + a += 1, b = middle; | ||
| 580 | + t = *a; | ||
| 581 | + } | ||
| 582 | + } while(1); | ||
| 583 | + } | ||
| 584 | + } | ||
| 585 | +} | ||
| 586 | + | ||
| 587 | + | ||
| 588 | +/*---------------------------------------------------------------------------*/ | ||
| 589 | + | ||
| 590 | +static | ||
| 591 | +void | ||
| 592 | +ss_inplacemerge(const unsigned char *T, const int *PA, | ||
| 593 | + int *first, int *middle, int *last, | ||
| 594 | + int depth) { | ||
| 595 | + const int *p; | ||
| 596 | + int *a, *b; | ||
| 597 | + int len, half; | ||
| 598 | + int q, r; | ||
| 599 | + int x; | ||
| 600 | + | ||
| 601 | + for(;;) { | ||
| 602 | + if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } | ||
| 603 | + else { x = 0; p = PA + *(last - 1); } | ||
| 604 | + for(a = first, len = middle - first, half = len >> 1, r = -1; | ||
| 605 | + 0 < len; | ||
| 606 | + len = half, half >>= 1) { | ||
| 607 | + b = a + half; | ||
| 608 | + q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); | ||
| 609 | + if(q < 0) { | ||
| 610 | + a = b + 1; | ||
| 611 | + half -= (len & 1) ^ 1; | ||
| 612 | + } else { | ||
| 613 | + r = q; | ||
| 614 | + } | ||
| 615 | + } | ||
| 616 | + if(a < middle) { | ||
| 617 | + if(r == 0) { *a = ~*a; } | ||
| 618 | + ss_rotate(a, middle, last); | ||
| 619 | + last -= middle - a; | ||
| 620 | + middle = a; | ||
| 621 | + if(first == middle) { break; } | ||
| 622 | + } | ||
| 623 | + --last; | ||
| 624 | + if(x != 0) { while(*--last < 0) { } } | ||
| 625 | + if(middle == last) { break; } | ||
| 626 | + } | ||
| 627 | +} | ||
| 628 | + | ||
| 629 | + | ||
| 630 | +/*---------------------------------------------------------------------------*/ | ||
| 631 | + | ||
| 632 | +/* Merge-forward with internal buffer. */ | ||
| 633 | +static | ||
| 634 | +void | ||
| 635 | +ss_mergeforward(const unsigned char *T, const int *PA, | ||
| 636 | + int *first, int *middle, int *last, | ||
| 637 | + int *buf, int depth) { | ||
| 638 | + int *a, *b, *c, *bufend; | ||
| 639 | + int t; | ||
| 640 | + int r; | ||
| 641 | + | ||
| 642 | + bufend = buf + (middle - first) - 1; | ||
| 643 | + ss_blockswap(buf, first, middle - first); | ||
| 644 | + | ||
| 645 | + for(t = *(a = first), b = buf, c = middle;;) { | ||
| 646 | + r = ss_compare(T, PA + *b, PA + *c, depth); | ||
| 647 | + if(r < 0) { | ||
| 648 | + do { | ||
| 649 | + *a++ = *b; | ||
| 650 | + if(bufend <= b) { *bufend = t; return; } | ||
| 651 | + *b++ = *a; | ||
| 652 | + } while(*b < 0); | ||
| 653 | + } else if(r > 0) { | ||
| 654 | + do { | ||
| 655 | + *a++ = *c, *c++ = *a; | ||
| 656 | + if(last <= c) { | ||
| 657 | + while(b < bufend) { *a++ = *b, *b++ = *a; } | ||
| 658 | + *a = *b, *b = t; | ||
| 659 | + return; | ||
| 660 | + } | ||
| 661 | + } while(*c < 0); | ||
| 662 | + } else { | ||
| 663 | + *c = ~*c; | ||
| 664 | + do { | ||
| 665 | + *a++ = *b; | ||
| 666 | + if(bufend <= b) { *bufend = t; return; } | ||
| 667 | + *b++ = *a; | ||
| 668 | + } while(*b < 0); | ||
| 669 | + | ||
| 670 | + do { | ||
| 671 | + *a++ = *c, *c++ = *a; | ||
| 672 | + if(last <= c) { | ||
| 673 | + while(b < bufend) { *a++ = *b, *b++ = *a; } | ||
| 674 | + *a = *b, *b = t; | ||
| 675 | + return; | ||
| 676 | + } | ||
| 677 | + } while(*c < 0); | ||
| 678 | + } | ||
| 679 | + } | ||
| 680 | +} | ||
| 681 | + | ||
| 682 | +/* Merge-backward with internal buffer. */ | ||
| 683 | +static | ||
| 684 | +void | ||
| 685 | +ss_mergebackward(const unsigned char *T, const int *PA, | ||
| 686 | + int *first, int *middle, int *last, | ||
| 687 | + int *buf, int depth) { | ||
| 688 | + const int *p1, *p2; | ||
| 689 | + int *a, *b, *c, *bufend; | ||
| 690 | + int t; | ||
| 691 | + int r; | ||
| 692 | + int x; | ||
| 693 | + | ||
| 694 | + bufend = buf + (last - middle) - 1; | ||
| 695 | + ss_blockswap(buf, middle, last - middle); | ||
| 696 | + | ||
| 697 | + x = 0; | ||
| 698 | + if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } | ||
| 699 | + else { p1 = PA + *bufend; } | ||
| 700 | + if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } | ||
| 701 | + else { p2 = PA + *(middle - 1); } | ||
| 702 | + for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { | ||
| 703 | + r = ss_compare(T, p1, p2, depth); | ||
| 704 | + if(0 < r) { | ||
| 705 | + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } | ||
| 706 | + *a-- = *b; | ||
| 707 | + if(b <= buf) { *buf = t; break; } | ||
| 708 | + *b-- = *a; | ||
| 709 | + if(*b < 0) { p1 = PA + ~*b; x |= 1; } | ||
| 710 | + else { p1 = PA + *b; } | ||
| 711 | + } else if(r < 0) { | ||
| 712 | + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } | ||
| 713 | + *a-- = *c, *c-- = *a; | ||
| 714 | + if(c < first) { | ||
| 715 | + while(buf < b) { *a-- = *b, *b-- = *a; } | ||
| 716 | + *a = *b, *b = t; | ||
| 717 | + break; | ||
| 718 | + } | ||
| 719 | + if(*c < 0) { p2 = PA + ~*c; x |= 2; } | ||
| 720 | + else { p2 = PA + *c; } | ||
| 721 | + } else { | ||
| 722 | + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } | ||
| 723 | + *a-- = ~*b; | ||
| 724 | + if(b <= buf) { *buf = t; break; } | ||
| 725 | + *b-- = *a; | ||
| 726 | + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } | ||
| 727 | + *a-- = *c, *c-- = *a; | ||
| 728 | + if(c < first) { | ||
| 729 | + while(buf < b) { *a-- = *b, *b-- = *a; } | ||
| 730 | + *a = *b, *b = t; | ||
| 731 | + break; | ||
| 732 | + } | ||
| 733 | + if(*b < 0) { p1 = PA + ~*b; x |= 1; } | ||
| 734 | + else { p1 = PA + *b; } | ||
| 735 | + if(*c < 0) { p2 = PA + ~*c; x |= 2; } | ||
| 736 | + else { p2 = PA + *c; } | ||
| 737 | + } | ||
| 738 | + } | ||
| 739 | +} | ||
| 740 | + | ||
| 741 | +/* D&C based merge. */ | ||
| 742 | +static | ||
| 743 | +void | ||
| 744 | +ss_swapmerge(const unsigned char *T, const int *PA, | ||
| 745 | + int *first, int *middle, int *last, | ||
| 746 | + int *buf, int bufsize, int depth) { | ||
| 747 | +#define STACK_SIZE SS_SMERGE_STACKSIZE | ||
| 748 | +#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) | ||
| 749 | +#define MERGE_CHECK(a, b, c)\ | ||
| 750 | + do {\ | ||
| 751 | + if(((c) & 1) ||\ | ||
| 752 | + (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ | ||
| 753 | + *(a) = ~*(a);\ | ||
| 754 | + }\ | ||
| 755 | + if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ | ||
| 756 | + *(b) = ~*(b);\ | ||
| 757 | + }\ | ||
| 758 | + } while(0) | ||
| 759 | + struct { int *a, *b, *c; int d; } stack[STACK_SIZE]; | ||
| 760 | + int *l, *r, *lm, *rm; | ||
| 761 | + int m, len, half; | ||
| 762 | + int ssize; | ||
| 763 | + int check, next; | ||
| 764 | + | ||
| 765 | + for(check = 0, ssize = 0;;) { | ||
| 766 | + if((last - middle) <= bufsize) { | ||
| 767 | + if((first < middle) && (middle < last)) { | ||
| 768 | + ss_mergebackward(T, PA, first, middle, last, buf, depth); | ||
| 769 | + } | ||
| 770 | + MERGE_CHECK(first, last, check); | ||
| 771 | + STACK_POP(first, middle, last, check); | ||
| 772 | + continue; | ||
| 773 | + } | ||
| 774 | + | ||
| 775 | + if((middle - first) <= bufsize) { | ||
| 776 | + if(first < middle) { | ||
| 777 | + ss_mergeforward(T, PA, first, middle, last, buf, depth); | ||
| 778 | + } | ||
| 779 | + MERGE_CHECK(first, last, check); | ||
| 780 | + STACK_POP(first, middle, last, check); | ||
| 781 | + continue; | ||
| 782 | + } | ||
| 783 | + | ||
| 784 | + for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; | ||
| 785 | + 0 < len; | ||
| 786 | + len = half, half >>= 1) { | ||
| 787 | + if(ss_compare(T, PA + GETIDX(*(middle + m + half)), | ||
| 788 | + PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { | ||
| 789 | + m += half + 1; | ||
| 790 | + half -= (len & 1) ^ 1; | ||
| 791 | + } | ||
| 792 | + } | ||
| 793 | + | ||
| 794 | + if(0 < m) { | ||
| 795 | + lm = middle - m, rm = middle + m; | ||
| 796 | + ss_blockswap(lm, middle, m); | ||
| 797 | + l = r = middle, next = 0; | ||
| 798 | + if(rm < last) { | ||
| 799 | + if(*rm < 0) { | ||
| 800 | + *rm = ~*rm; | ||
| 801 | + if(first < lm) { for(; *--l < 0;) { } next |= 4; } | ||
| 802 | + next |= 1; | ||
| 803 | + } else if(first < lm) { | ||
| 804 | + for(; *r < 0; ++r) { } | ||
| 805 | + next |= 2; | ||
| 806 | + } | ||
| 807 | + } | ||
| 808 | + | ||
| 809 | + if((l - first) <= (last - r)) { | ||
| 810 | + STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); | ||
| 811 | + middle = lm, last = l, check = (check & 3) | (next & 4); | ||
| 812 | + } else { | ||
| 813 | + if((next & 2) && (r == middle)) { next ^= 6; } | ||
| 814 | + STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); | ||
| 815 | + first = r, middle = rm, check = (next & 3) | (check & 4); | ||
| 816 | + } | ||
| 817 | + } else { | ||
| 818 | + if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { | ||
| 819 | + *middle = ~*middle; | ||
| 820 | + } | ||
| 821 | + MERGE_CHECK(first, last, check); | ||
| 822 | + STACK_POP(first, middle, last, check); | ||
| 823 | + } | ||
| 824 | + } | ||
| 825 | +#undef STACK_SIZE | ||
| 826 | +} | ||
| 827 | + | ||
| 828 | +#endif /* SS_BLOCKSIZE != 0 */ | ||
| 829 | + | ||
| 830 | + | ||
| 831 | +/*---------------------------------------------------------------------------*/ | ||
| 832 | + | ||
| 833 | +/* Substring sort */ | ||
| 834 | +static | ||
| 835 | +void | ||
| 836 | +sssort(const unsigned char *T, const int *PA, | ||
| 837 | + int *first, int *last, | ||
| 838 | + int *buf, int bufsize, | ||
| 839 | + int depth, int n, int lastsuffix) { | ||
| 840 | + int *a; | ||
| 841 | +#if SS_BLOCKSIZE != 0 | ||
| 842 | + int *b, *middle, *curbuf; | ||
| 843 | + int j, k, curbufsize, limit; | ||
| 844 | +#endif | ||
| 845 | + int i; | ||
| 846 | + | ||
| 847 | + if(lastsuffix != 0) { ++first; } | ||
| 848 | + | ||
| 849 | +#if SS_BLOCKSIZE == 0 | ||
| 850 | + ss_mintrosort(T, PA, first, last, depth); | ||
| 851 | +#else | ||
| 852 | + if((bufsize < SS_BLOCKSIZE) && | ||
| 853 | + (bufsize < (last - first)) && | ||
| 854 | + (bufsize < (limit = ss_isqrt(last - first)))) { | ||
| 855 | + if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } | ||
| 856 | + buf = middle = last - limit, bufsize = limit; | ||
| 857 | + } else { | ||
| 858 | + middle = last, limit = 0; | ||
| 859 | + } | ||
| 860 | + for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { | ||
| 861 | +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE | ||
| 862 | + ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); | ||
| 863 | +#elif 1 < SS_BLOCKSIZE | ||
| 864 | + ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); | ||
| 865 | +#endif | ||
| 866 | + curbufsize = last - (a + SS_BLOCKSIZE); | ||
| 867 | + curbuf = a + SS_BLOCKSIZE; | ||
| 868 | + if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } | ||
| 869 | + for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { | ||
| 870 | + ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); | ||
| 871 | + } | ||
| 872 | + } | ||
| 873 | +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE | ||
| 874 | + ss_mintrosort(T, PA, a, middle, depth); | ||
| 875 | +#elif 1 < SS_BLOCKSIZE | ||
| 876 | + ss_insertionsort(T, PA, a, middle, depth); | ||
| 877 | +#endif | ||
| 878 | + for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { | ||
| 879 | + if(i & 1) { | ||
| 880 | + ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); | ||
| 881 | + a -= k; | ||
| 882 | + } | ||
| 883 | + } | ||
| 884 | + if(limit != 0) { | ||
| 885 | +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE | ||
| 886 | + ss_mintrosort(T, PA, middle, last, depth); | ||
| 887 | +#elif 1 < SS_BLOCKSIZE | ||
| 888 | + ss_insertionsort(T, PA, middle, last, depth); | ||
| 889 | +#endif | ||
| 890 | + ss_inplacemerge(T, PA, first, middle, last, depth); | ||
| 891 | + } | ||
| 892 | +#endif | ||
| 893 | + | ||
| 894 | + if(lastsuffix != 0) { | ||
| 895 | + /* Insert last type B* suffix. */ | ||
| 896 | + int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; | ||
| 897 | + for(a = first, i = *(first - 1); | ||
| 898 | + (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); | ||
| 899 | + ++a) { | ||
| 900 | + *(a - 1) = *a; | ||
| 901 | + } | ||
| 902 | + *(a - 1) = i; | ||
| 903 | + } | ||
| 904 | +} | ||
| 905 | + | ||
| 906 | + | ||
| 907 | +/*---------------------------------------------------------------------------*/ | ||
| 908 | + | ||
| 909 | +static INLINE | ||
| 910 | +int | ||
| 911 | +tr_ilg(int n) { | ||
| 912 | + return (n & 0xffff0000) ? | ||
| 913 | + ((n & 0xff000000) ? | ||
| 914 | + 24 + lg_table[(n >> 24) & 0xff] : | ||
| 915 | + 16 + lg_table[(n >> 16) & 0xff]) : | ||
| 916 | + ((n & 0x0000ff00) ? | ||
| 917 | + 8 + lg_table[(n >> 8) & 0xff] : | ||
| 918 | + 0 + lg_table[(n >> 0) & 0xff]); | ||
| 919 | +} | ||
| 920 | + | ||
| 921 | + | ||
| 922 | +/*---------------------------------------------------------------------------*/ | ||
| 923 | + | ||
| 924 | +/* Simple insertionsort for small size groups. */ | ||
| 925 | +static | ||
| 926 | +void | ||
| 927 | +tr_insertionsort(const int *ISAd, int *first, int *last) { | ||
| 928 | + int *a, *b; | ||
| 929 | + int t, r; | ||
| 930 | + | ||
| 931 | + for(a = first + 1; a < last; ++a) { | ||
| 932 | + for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { | ||
| 933 | + do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); | ||
| 934 | + if(b < first) { break; } | ||
| 935 | + } | ||
| 936 | + if(r == 0) { *b = ~*b; } | ||
| 937 | + *(b + 1) = t; | ||
| 938 | + } | ||
| 939 | +} | ||
| 940 | + | ||
| 941 | + | ||
| 942 | +/*---------------------------------------------------------------------------*/ | ||
| 943 | + | ||
| 944 | +static INLINE | ||
| 945 | +void | ||
| 946 | +tr_fixdown(const int *ISAd, int *SA, int i, int size) { | ||
| 947 | + int j, k; | ||
| 948 | + int v; | ||
| 949 | + int c, d, e; | ||
| 950 | + | ||
| 951 | + for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { | ||
| 952 | + d = ISAd[SA[k = j++]]; | ||
| 953 | + if(d < (e = ISAd[SA[j]])) { k = j; d = e; } | ||
| 954 | + if(d <= c) { break; } | ||
| 955 | + } | ||
| 956 | + SA[i] = v; | ||
| 957 | +} | ||
| 958 | + | ||
| 959 | +/* Simple top-down heapsort. */ | ||
| 960 | +static | ||
| 961 | +void | ||
| 962 | +tr_heapsort(const int *ISAd, int *SA, int size) { | ||
| 963 | + int i, m; | ||
| 964 | + int t; | ||
| 965 | + | ||
| 966 | + m = size; | ||
| 967 | + if((size % 2) == 0) { | ||
| 968 | + m--; | ||
| 969 | + if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } | ||
| 970 | + } | ||
| 971 | + | ||
| 972 | + for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } | ||
| 973 | + if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } | ||
| 974 | + for(i = m - 1; 0 < i; --i) { | ||
| 975 | + t = SA[0], SA[0] = SA[i]; | ||
| 976 | + tr_fixdown(ISAd, SA, 0, i); | ||
| 977 | + SA[i] = t; | ||
| 978 | + } | ||
| 979 | +} | ||
| 980 | + | ||
| 981 | + | ||
| 982 | +/*---------------------------------------------------------------------------*/ | ||
| 983 | + | ||
| 984 | +/* Returns the median of three elements. */ | ||
| 985 | +static INLINE | ||
| 986 | +int * | ||
| 987 | +tr_median3(const int *ISAd, int *v1, int *v2, int *v3) { | ||
| 988 | + int *t; | ||
| 989 | + if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } | ||
| 990 | + if(ISAd[*v2] > ISAd[*v3]) { | ||
| 991 | + if(ISAd[*v1] > ISAd[*v3]) { return v1; } | ||
| 992 | + else { return v3; } | ||
| 993 | + } | ||
| 994 | + return v2; | ||
| 995 | +} | ||
| 996 | + | ||
| 997 | +/* Returns the median of five elements. */ | ||
| 998 | +static INLINE | ||
| 999 | +int * | ||
| 1000 | +tr_median5(const int *ISAd, | ||
| 1001 | + int *v1, int *v2, int *v3, int *v4, int *v5) { | ||
| 1002 | + int *t; | ||
| 1003 | + if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } | ||
| 1004 | + if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } | ||
| 1005 | + if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } | ||
| 1006 | + if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } | ||
| 1007 | + if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } | ||
| 1008 | + if(ISAd[*v3] > ISAd[*v4]) { return v4; } | ||
| 1009 | + return v3; | ||
| 1010 | +} | ||
| 1011 | + | ||
| 1012 | +/* Returns the pivot element. */ | ||
| 1013 | +static INLINE | ||
| 1014 | +int * | ||
| 1015 | +tr_pivot(const int *ISAd, int *first, int *last) { | ||
| 1016 | + int *middle; | ||
| 1017 | + int t; | ||
| 1018 | + | ||
| 1019 | + t = last - first; | ||
| 1020 | + middle = first + t / 2; | ||
| 1021 | + | ||
| 1022 | + if(t <= 512) { | ||
| 1023 | + if(t <= 32) { | ||
| 1024 | + return tr_median3(ISAd, first, middle, last - 1); | ||
| 1025 | + } else { | ||
| 1026 | + t >>= 2; | ||
| 1027 | + return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); | ||
| 1028 | + } | ||
| 1029 | + } | ||
| 1030 | + t >>= 3; | ||
| 1031 | + first = tr_median3(ISAd, first, first + t, first + (t << 1)); | ||
| 1032 | + middle = tr_median3(ISAd, middle - t, middle, middle + t); | ||
| 1033 | + last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); | ||
| 1034 | + return tr_median3(ISAd, first, middle, last); | ||
| 1035 | +} | ||
| 1036 | + | ||
| 1037 | + | ||
| 1038 | +/*---------------------------------------------------------------------------*/ | ||
| 1039 | + | ||
| 1040 | +typedef struct _trbudget_t trbudget_t; | ||
| 1041 | +struct _trbudget_t { | ||
| 1042 | + int chance; | ||
| 1043 | + int remain; | ||
| 1044 | + int incval; | ||
| 1045 | + int count; | ||
| 1046 | +}; | ||
| 1047 | + | ||
| 1048 | +static INLINE | ||
| 1049 | +void | ||
| 1050 | +trbudget_init(trbudget_t *budget, int chance, int incval) { | ||
| 1051 | + budget->chance = chance; | ||
| 1052 | + budget->remain = budget->incval = incval; | ||
| 1053 | +} | ||
| 1054 | + | ||
| 1055 | +static INLINE | ||
| 1056 | +int | ||
| 1057 | +trbudget_check(trbudget_t *budget, int size) { | ||
| 1058 | + if(size <= budget->remain) { budget->remain -= size; return 1; } | ||
| 1059 | + if(budget->chance == 0) { budget->count += size; return 0; } | ||
| 1060 | + budget->remain += budget->incval - size; | ||
| 1061 | + budget->chance -= 1; | ||
| 1062 | + return 1; | ||
| 1063 | +} | ||
| 1064 | + | ||
| 1065 | + | ||
| 1066 | +/*---------------------------------------------------------------------------*/ | ||
| 1067 | + | ||
| 1068 | +static INLINE | ||
| 1069 | +void | ||
| 1070 | +tr_partition(const int *ISAd, | ||
| 1071 | + int *first, int *middle, int *last, | ||
| 1072 | + int **pa, int **pb, int v) { | ||
| 1073 | + int *a, *b, *c, *d, *e, *f; | ||
| 1074 | + int t, s; | ||
| 1075 | + int x = 0; | ||
| 1076 | + | ||
| 1077 | + for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } | ||
| 1078 | + if(((a = b) < last) && (x < v)) { | ||
| 1079 | + for(; (++b < last) && ((x = ISAd[*b]) <= v);) { | ||
| 1080 | + if(x == v) { SWAP(*b, *a); ++a; } | ||
| 1081 | + } | ||
| 1082 | + } | ||
| 1083 | + for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } | ||
| 1084 | + if((b < (d = c)) && (x > v)) { | ||
| 1085 | + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { | ||
| 1086 | + if(x == v) { SWAP(*c, *d); --d; } | ||
| 1087 | + } | ||
| 1088 | + } | ||
| 1089 | + for(; b < c;) { | ||
| 1090 | + SWAP(*b, *c); | ||
| 1091 | + for(; (++b < c) && ((x = ISAd[*b]) <= v);) { | ||
| 1092 | + if(x == v) { SWAP(*b, *a); ++a; } | ||
| 1093 | + } | ||
| 1094 | + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { | ||
| 1095 | + if(x == v) { SWAP(*c, *d); --d; } | ||
| 1096 | + } | ||
| 1097 | + } | ||
| 1098 | + | ||
| 1099 | + if(a <= d) { | ||
| 1100 | + c = b - 1; | ||
| 1101 | + if((s = a - first) > (t = b - a)) { s = t; } | ||
| 1102 | + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } | ||
| 1103 | + if((s = d - c) > (t = last - d - 1)) { s = t; } | ||
| 1104 | + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } | ||
| 1105 | + first += (b - a), last -= (d - c); | ||
| 1106 | + } | ||
| 1107 | + *pa = first, *pb = last; | ||
| 1108 | +} | ||
| 1109 | + | ||
| 1110 | +static | ||
| 1111 | +void | ||
| 1112 | +tr_copy(int *ISA, const int *SA, | ||
| 1113 | + int *first, int *a, int *b, int *last, | ||
| 1114 | + int depth) { | ||
| 1115 | + /* sort suffixes of middle partition | ||
| 1116 | + by using sorted order of suffixes of left and right partition. */ | ||
| 1117 | + int *c, *d, *e; | ||
| 1118 | + int s, v; | ||
| 1119 | + | ||
| 1120 | + v = b - SA - 1; | ||
| 1121 | + for(c = first, d = a - 1; c <= d; ++c) { | ||
| 1122 | + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { | ||
| 1123 | + *++d = s; | ||
| 1124 | + ISA[s] = d - SA; | ||
| 1125 | + } | ||
| 1126 | + } | ||
| 1127 | + for(c = last - 1, e = d + 1, d = b; e < d; --c) { | ||
| 1128 | + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { | ||
| 1129 | + *--d = s; | ||
| 1130 | + ISA[s] = d - SA; | ||
| 1131 | + } | ||
| 1132 | + } | ||
| 1133 | +} | ||
| 1134 | + | ||
| 1135 | +static | ||
| 1136 | +void | ||
| 1137 | +tr_partialcopy(int *ISA, const int *SA, | ||
| 1138 | + int *first, int *a, int *b, int *last, | ||
| 1139 | + int depth) { | ||
| 1140 | + int *c, *d, *e; | ||
| 1141 | + int s, v; | ||
| 1142 | + int rank, lastrank, newrank = -1; | ||
| 1143 | + | ||
| 1144 | + v = b - SA - 1; | ||
| 1145 | + lastrank = -1; | ||
| 1146 | + for(c = first, d = a - 1; c <= d; ++c) { | ||
| 1147 | + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { | ||
| 1148 | + *++d = s; | ||
| 1149 | + rank = ISA[s + depth]; | ||
| 1150 | + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } | ||
| 1151 | + ISA[s] = newrank; | ||
| 1152 | + } | ||
| 1153 | + } | ||
| 1154 | + | ||
| 1155 | + lastrank = -1; | ||
| 1156 | + for(e = d; first <= e; --e) { | ||
| 1157 | + rank = ISA[*e]; | ||
| 1158 | + if(lastrank != rank) { lastrank = rank; newrank = e - SA; } | ||
| 1159 | + if(newrank != rank) { ISA[*e] = newrank; } | ||
| 1160 | + } | ||
| 1161 | + | ||
| 1162 | + lastrank = -1; | ||
| 1163 | + for(c = last - 1, e = d + 1, d = b; e < d; --c) { | ||
| 1164 | + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { | ||
| 1165 | + *--d = s; | ||
| 1166 | + rank = ISA[s + depth]; | ||
| 1167 | + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } | ||
| 1168 | + ISA[s] = newrank; | ||
| 1169 | + } | ||
| 1170 | + } | ||
| 1171 | +} | ||
| 1172 | + | ||
| 1173 | +static | ||
| 1174 | +void | ||
| 1175 | +tr_introsort(int *ISA, const int *ISAd, | ||
| 1176 | + int *SA, int *first, int *last, | ||
| 1177 | + trbudget_t *budget) { | ||
| 1178 | +#define STACK_SIZE TR_STACKSIZE | ||
| 1179 | + struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE]; | ||
| 1180 | + int *a, *b, *c; | ||
| 1181 | + int t; | ||
| 1182 | + int v, x = 0; | ||
| 1183 | + int incr = ISAd - ISA; | ||
| 1184 | + int limit, next; | ||
| 1185 | + int ssize, trlink = -1; | ||
| 1186 | + | ||
| 1187 | + for(ssize = 0, limit = tr_ilg(last - first);;) { | ||
| 1188 | + | ||
| 1189 | + if(limit < 0) { | ||
| 1190 | + if(limit == -1) { | ||
| 1191 | + /* tandem repeat partition */ | ||
| 1192 | + tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); | ||
| 1193 | + | ||
| 1194 | + /* update ranks */ | ||
| 1195 | + if(a < last) { | ||
| 1196 | + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } | ||
| 1197 | + } | ||
| 1198 | + if(b < last) { | ||
| 1199 | + for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } | ||
| 1200 | + } | ||
| 1201 | + | ||
| 1202 | + /* push */ | ||
| 1203 | + if(1 < (b - a)) { | ||
| 1204 | + STACK_PUSH5(NULL, a, b, 0, 0); | ||
| 1205 | + STACK_PUSH5(ISAd - incr, first, last, -2, trlink); | ||
| 1206 | + trlink = ssize - 2; | ||
| 1207 | + } | ||
| 1208 | + if((a - first) <= (last - b)) { | ||
| 1209 | + if(1 < (a - first)) { | ||
| 1210 | + STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); | ||
| 1211 | + last = a, limit = tr_ilg(a - first); | ||
| 1212 | + } else if(1 < (last - b)) { | ||
| 1213 | + first = b, limit = tr_ilg(last - b); | ||
| 1214 | + } else { | ||
| 1215 | + STACK_POP5(ISAd, first, last, limit, trlink); | ||
| 1216 | + } | ||
| 1217 | + } else { | ||
| 1218 | + if(1 < (last - b)) { | ||
| 1219 | + STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); | ||
| 1220 | + first = b, limit = tr_ilg(last - b); | ||
| 1221 | + } else if(1 < (a - first)) { | ||
| 1222 | + last = a, limit = tr_ilg(a - first); | ||
| 1223 | + } else { | ||
| 1224 | + STACK_POP5(ISAd, first, last, limit, trlink); | ||
| 1225 | + } | ||
| 1226 | + } | ||
| 1227 | + } else if(limit == -2) { | ||
| 1228 | + /* tandem repeat copy */ | ||
| 1229 | + a = stack[--ssize].b, b = stack[ssize].c; | ||
| 1230 | + if(stack[ssize].d == 0) { | ||
| 1231 | + tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); | ||
| 1232 | + } else { | ||
| 1233 | + if(0 <= trlink) { stack[trlink].d = -1; } | ||
| 1234 | + tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); | ||
| 1235 | + } | ||
| 1236 | + STACK_POP5(ISAd, first, last, limit, trlink); | ||
| 1237 | + } else { | ||
| 1238 | + /* sorted partition */ | ||
| 1239 | + if(0 <= *first) { | ||
| 1240 | + a = first; | ||
| 1241 | + do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); | ||
| 1242 | + first = a; | ||
| 1243 | + } | ||
| 1244 | + if(first < last) { | ||
| 1245 | + a = first; do { *a = ~*a; } while(*++a < 0); | ||
| 1246 | + next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; | ||
| 1247 | + if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } | ||
| 1248 | + | ||
| 1249 | + /* push */ | ||
| 1250 | + if(trbudget_check(budget, a - first)) { | ||
| 1251 | + if((a - first) <= (last - a)) { | ||
| 1252 | + STACK_PUSH5(ISAd, a, last, -3, trlink); | ||
| 1253 | + ISAd += incr, last = a, limit = next; | ||
| 1254 | + } else { | ||
| 1255 | + if(1 < (last - a)) { | ||
| 1256 | + STACK_PUSH5(ISAd + incr, first, a, next, trlink); | ||
| 1257 | + first = a, limit = -3; | ||
| 1258 | + } else { | ||
| 1259 | + ISAd += incr, last = a, limit = next; | ||
| 1260 | + } | ||
| 1261 | + } | ||
| 1262 | + } else { | ||
| 1263 | + if(0 <= trlink) { stack[trlink].d = -1; } | ||
| 1264 | + if(1 < (last - a)) { | ||
| 1265 | + first = a, limit = -3; | ||
| 1266 | + } else { | ||
| 1267 | + STACK_POP5(ISAd, first, last, limit, trlink); | ||
| 1268 | + } | ||
| 1269 | + } | ||
| 1270 | + } else { | ||
| 1271 | + STACK_POP5(ISAd, first, last, limit, trlink); | ||
| 1272 | + } | ||
| 1273 | + } | ||
| 1274 | + continue; | ||
| 1275 | + } | ||
| 1276 | + | ||
| 1277 | + if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { | ||
| 1278 | + tr_insertionsort(ISAd, first, last); | ||
| 1279 | + limit = -3; | ||
| 1280 | + continue; | ||
| 1281 | + } | ||
| 1282 | + | ||
| 1283 | + if(limit-- == 0) { | ||
| 1284 | + tr_heapsort(ISAd, first, last - first); | ||
| 1285 | + for(a = last - 1; first < a; a = b) { | ||
| 1286 | + for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } | ||
| 1287 | + } | ||
| 1288 | + limit = -3; | ||
| 1289 | + continue; | ||
| 1290 | + } | ||
| 1291 | + | ||
| 1292 | + /* choose pivot */ | ||
| 1293 | + a = tr_pivot(ISAd, first, last); | ||
| 1294 | + SWAP(*first, *a); | ||
| 1295 | + v = ISAd[*first]; | ||
| 1296 | + | ||
| 1297 | + /* partition */ | ||
| 1298 | + tr_partition(ISAd, first, first + 1, last, &a, &b, v); | ||
| 1299 | + if((last - first) != (b - a)) { | ||
| 1300 | + next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; | ||
| 1301 | + | ||
| 1302 | + /* update ranks */ | ||
| 1303 | + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } | ||
| 1304 | + if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } | ||
| 1305 | + | ||
| 1306 | + /* push */ | ||
| 1307 | + if((1 < (b - a)) && (trbudget_check(budget, b - a))) { | ||
| 1308 | + if((a - first) <= (last - b)) { | ||
| 1309 | + if((last - b) <= (b - a)) { | ||
| 1310 | + if(1 < (a - first)) { | ||
| 1311 | + STACK_PUSH5(ISAd + incr, a, b, next, trlink); | ||
| 1312 | + STACK_PUSH5(ISAd, b, last, limit, trlink); | ||
| 1313 | + last = a; | ||
| 1314 | + } else if(1 < (last - b)) { | ||
| 1315 | + STACK_PUSH5(ISAd + incr, a, b, next, trlink); | ||
| 1316 | + first = b; | ||
| 1317 | + } else { | ||
| 1318 | + ISAd += incr, first = a, last = b, limit = next; | ||
| 1319 | + } | ||
| 1320 | + } else if((a - first) <= (b - a)) { | ||
| 1321 | + if(1 < (a - first)) { | ||
| 1322 | + STACK_PUSH5(ISAd, b, last, limit, trlink); | ||
| 1323 | + STACK_PUSH5(ISAd + incr, a, b, next, trlink); | ||
| 1324 | + last = a; | ||
| 1325 | + } else { | ||
| 1326 | + STACK_PUSH5(ISAd, b, last, limit, trlink); | ||
| 1327 | + ISAd += incr, first = a, last = b, limit = next; | ||
| 1328 | + } | ||
| 1329 | + } else { | ||
| 1330 | + STACK_PUSH5(ISAd, b, last, limit, trlink); | ||
| 1331 | + STACK_PUSH5(ISAd, first, a, limit, trlink); | ||
| 1332 | + ISAd += incr, first = a, last = b, limit = next; | ||
| 1333 | + } | ||
| 1334 | + } else { | ||
| 1335 | + if((a - first) <= (b - a)) { | ||
| 1336 | + if(1 < (last - b)) { | ||
| 1337 | + STACK_PUSH5(ISAd + incr, a, b, next, trlink); | ||
| 1338 | + STACK_PUSH5(ISAd, first, a, limit, trlink); | ||
| 1339 | + first = b; | ||
| 1340 | + } else if(1 < (a - first)) { | ||
| 1341 | + STACK_PUSH5(ISAd + incr, a, b, next, trlink); | ||
| 1342 | + last = a; | ||
| 1343 | + } else { | ||
| 1344 | + ISAd += incr, first = a, last = b, limit = next; | ||
| 1345 | + } | ||
| 1346 | + } else if((last - b) <= (b - a)) { | ||
| 1347 | + if(1 < (last - b)) { | ||
| 1348 | + STACK_PUSH5(ISAd, first, a, limit, trlink); | ||
| 1349 | + STACK_PUSH5(ISAd + incr, a, b, next, trlink); | ||
| 1350 | + first = b; | ||
| 1351 | + } else { | ||
| 1352 | + STACK_PUSH5(ISAd, first, a, limit, trlink); | ||
| 1353 | + ISAd += incr, first = a, last = b, limit = next; | ||
| 1354 | + } | ||
| 1355 | + } else { | ||
| 1356 | + STACK_PUSH5(ISAd, first, a, limit, trlink); | ||
| 1357 | + STACK_PUSH5(ISAd, b, last, limit, trlink); | ||
| 1358 | + ISAd += incr, first = a, last = b, limit = next; | ||
| 1359 | + } | ||
| 1360 | + } | ||
| 1361 | + } else { | ||
| 1362 | + if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } | ||
| 1363 | + if((a - first) <= (last - b)) { | ||
| 1364 | + if(1 < (a - first)) { | ||
| 1365 | + STACK_PUSH5(ISAd, b, last, limit, trlink); | ||
| 1366 | + last = a; | ||
| 1367 | + } else if(1 < (last - b)) { | ||
| 1368 | + first = b; | ||
| 1369 | + } else { | ||
| 1370 | + STACK_POP5(ISAd, first, last, limit, trlink); | ||
| 1371 | + } | ||
| 1372 | + } else { | ||
| 1373 | + if(1 < (last - b)) { | ||
| 1374 | + STACK_PUSH5(ISAd, first, a, limit, trlink); | ||
| 1375 | + first = b; | ||
| 1376 | + } else if(1 < (a - first)) { | ||
| 1377 | + last = a; | ||
| 1378 | + } else { | ||
| 1379 | + STACK_POP5(ISAd, first, last, limit, trlink); | ||
| 1380 | + } | ||
| 1381 | + } | ||
| 1382 | + } | ||
| 1383 | + } else { | ||
| 1384 | + if(trbudget_check(budget, last - first)) { | ||
| 1385 | + limit = tr_ilg(last - first), ISAd += incr; | ||
| 1386 | + } else { | ||
| 1387 | + if(0 <= trlink) { stack[trlink].d = -1; } | ||
| 1388 | + STACK_POP5(ISAd, first, last, limit, trlink); | ||
| 1389 | + } | ||
| 1390 | + } | ||
| 1391 | + } | ||
| 1392 | +#undef STACK_SIZE | ||
| 1393 | +} | ||
| 1394 | + | ||
| 1395 | + | ||
| 1396 | + | ||
| 1397 | +/*---------------------------------------------------------------------------*/ | ||
| 1398 | + | ||
| 1399 | +/* Tandem repeat sort */ | ||
| 1400 | +static | ||
| 1401 | +void | ||
| 1402 | +trsort(int *ISA, int *SA, int n, int depth) { | ||
| 1403 | + int *ISAd; | ||
| 1404 | + int *first, *last; | ||
| 1405 | + trbudget_t budget; | ||
| 1406 | + int t, skip, unsorted; | ||
| 1407 | + | ||
| 1408 | + trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); | ||
| 1409 | +/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ | ||
| 1410 | + for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { | ||
| 1411 | + first = SA; | ||
| 1412 | + skip = 0; | ||
| 1413 | + unsorted = 0; | ||
| 1414 | + do { | ||
| 1415 | + if((t = *first) < 0) { first -= t; skip += t; } | ||
| 1416 | + else { | ||
| 1417 | + if(skip != 0) { *(first + skip) = skip; skip = 0; } | ||
| 1418 | + last = SA + ISA[t] + 1; | ||
| 1419 | + if(1 < (last - first)) { | ||
| 1420 | + budget.count = 0; | ||
| 1421 | + tr_introsort(ISA, ISAd, SA, first, last, &budget); | ||
| 1422 | + if(budget.count != 0) { unsorted += budget.count; } | ||
| 1423 | + else { skip = first - last; } | ||
| 1424 | + } else if((last - first) == 1) { | ||
| 1425 | + skip = -1; | ||
| 1426 | + } | ||
| 1427 | + first = last; | ||
| 1428 | + } | ||
| 1429 | + } while(first < (SA + n)); | ||
| 1430 | + if(skip != 0) { *(first + skip) = skip; } | ||
| 1431 | + if(unsorted == 0) { break; } | ||
| 1432 | + } | ||
| 1433 | +} | ||
| 1434 | + | ||
| 1435 | + | ||
| 1436 | +/*---------------------------------------------------------------------------*/ | ||
| 1437 | + | ||
| 1438 | +/* Sorts suffixes of type B*. */ | ||
| 1439 | +static | ||
| 1440 | +int | ||
| 1441 | +sort_typeBstar(const unsigned char *T, int *SA, | ||
| 1442 | + int *bucket_A, int *bucket_B, | ||
| 1443 | + int n, int openMP) { | ||
| 1444 | + int *PAb, *ISAb, *buf; | ||
| 1445 | +#ifdef LIBBSC_OPENMP | ||
| 1446 | + int *curbuf; | ||
| 1447 | + int l; | ||
| 1448 | +#endif | ||
| 1449 | + int i, j, k, t, m, bufsize; | ||
| 1450 | + int c0, c1; | ||
| 1451 | +#ifdef LIBBSC_OPENMP | ||
| 1452 | + int d0, d1; | ||
| 1453 | +#endif | ||
| 1454 | + (void)openMP; | ||
| 1455 | + | ||
| 1456 | + /* Initialize bucket arrays. */ | ||
| 1457 | + for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } | ||
| 1458 | + for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } | ||
| 1459 | + | ||
| 1460 | + /* Count the number of occurrences of the first one or two characters of each | ||
| 1461 | + type A, B and B* suffix. Moreover, store the beginning position of all | ||
| 1462 | + type B* suffixes into the array SA. */ | ||
| 1463 | + for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { | ||
| 1464 | + /* type A suffix. */ | ||
| 1465 | + do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); | ||
| 1466 | + if(0 <= i) { | ||
| 1467 | + /* type B* suffix. */ | ||
| 1468 | + ++BUCKET_BSTAR(c0, c1); | ||
| 1469 | + SA[--m] = i; | ||
| 1470 | + /* type B suffix. */ | ||
| 1471 | + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { | ||
| 1472 | + ++BUCKET_B(c0, c1); | ||
| 1473 | + } | ||
| 1474 | + } | ||
| 1475 | + } | ||
| 1476 | + m = n - m; | ||
| 1477 | +/* | ||
| 1478 | +note: | ||
| 1479 | + A type B* suffix is lexicographically smaller than a type B suffix that | ||
| 1480 | + begins with the same first two characters. | ||
| 1481 | +*/ | ||
| 1482 | + | ||
| 1483 | + /* Calculate the index of start/end point of each bucket. */ | ||
| 1484 | + for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { | ||
| 1485 | + t = i + BUCKET_A(c0); | ||
| 1486 | + BUCKET_A(c0) = i + j; /* start point */ | ||
| 1487 | + i = t + BUCKET_B(c0, c0); | ||
| 1488 | + for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { | ||
| 1489 | + j += BUCKET_BSTAR(c0, c1); | ||
| 1490 | + BUCKET_BSTAR(c0, c1) = j; /* end point */ | ||
| 1491 | + i += BUCKET_B(c0, c1); | ||
| 1492 | + } | ||
| 1493 | + } | ||
| 1494 | + | ||
| 1495 | + if(0 < m) { | ||
| 1496 | + /* Sort the type B* suffixes by their first two characters. */ | ||
| 1497 | + PAb = SA + n - m; ISAb = SA + m; | ||
| 1498 | + for(i = m - 2; 0 <= i; --i) { | ||
| 1499 | + t = PAb[i], c0 = T[t], c1 = T[t + 1]; | ||
| 1500 | + SA[--BUCKET_BSTAR(c0, c1)] = i; | ||
| 1501 | + } | ||
| 1502 | + t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; | ||
| 1503 | + SA[--BUCKET_BSTAR(c0, c1)] = m - 1; | ||
| 1504 | + | ||
| 1505 | + /* Sort the type B* substrings using sssort. */ | ||
| 1506 | +#ifdef LIBBSC_OPENMP | ||
| 1507 | + if (openMP) | ||
| 1508 | + { | ||
| 1509 | + buf = SA + m; | ||
| 1510 | + c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; | ||
| 1511 | +#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1) | ||
| 1512 | + { | ||
| 1513 | + bufsize = (n - (2 * m)) / omp_get_num_threads(); | ||
| 1514 | + curbuf = buf + omp_get_thread_num() * bufsize; | ||
| 1515 | + k = 0; | ||
| 1516 | + for(;;) { | ||
| 1517 | + #pragma omp critical(sssort_lock) | ||
| 1518 | + { | ||
| 1519 | + if(0 < (l = j)) { | ||
| 1520 | + d0 = c0, d1 = c1; | ||
| 1521 | + do { | ||
| 1522 | + k = BUCKET_BSTAR(d0, d1); | ||
| 1523 | + if(--d1 <= d0) { | ||
| 1524 | + d1 = ALPHABET_SIZE - 1; | ||
| 1525 | + if(--d0 < 0) { break; } | ||
| 1526 | + } | ||
| 1527 | + } while(((l - k) <= 1) && (0 < (l = k))); | ||
| 1528 | + c0 = d0, c1 = d1, j = k; | ||
| 1529 | + } | ||
| 1530 | + } | ||
| 1531 | + if(l == 0) { break; } | ||
| 1532 | + sssort(T, PAb, SA + k, SA + l, | ||
| 1533 | + curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); | ||
| 1534 | + } | ||
| 1535 | + } | ||
| 1536 | + } | ||
| 1537 | + else | ||
| 1538 | + { | ||
| 1539 | + buf = SA + m, bufsize = n - (2 * m); | ||
| 1540 | + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { | ||
| 1541 | + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { | ||
| 1542 | + i = BUCKET_BSTAR(c0, c1); | ||
| 1543 | + if(1 < (j - i)) { | ||
| 1544 | + sssort(T, PAb, SA + i, SA + j, | ||
| 1545 | + buf, bufsize, 2, n, *(SA + i) == (m - 1)); | ||
| 1546 | + } | ||
| 1547 | + } | ||
| 1548 | + } | ||
| 1549 | + } | ||
| 1550 | +#else | ||
| 1551 | + buf = SA + m, bufsize = n - (2 * m); | ||
| 1552 | + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { | ||
| 1553 | + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { | ||
| 1554 | + i = BUCKET_BSTAR(c0, c1); | ||
| 1555 | + if(1 < (j - i)) { | ||
| 1556 | + sssort(T, PAb, SA + i, SA + j, | ||
| 1557 | + buf, bufsize, 2, n, *(SA + i) == (m - 1)); | ||
| 1558 | + } | ||
| 1559 | + } | ||
| 1560 | + } | ||
| 1561 | +#endif | ||
| 1562 | + | ||
| 1563 | + /* Compute ranks of type B* substrings. */ | ||
| 1564 | + for(i = m - 1; 0 <= i; --i) { | ||
| 1565 | + if(0 <= SA[i]) { | ||
| 1566 | + j = i; | ||
| 1567 | + do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); | ||
| 1568 | + SA[i + 1] = i - j; | ||
| 1569 | + if(i <= 0) { break; } | ||
| 1570 | + } | ||
| 1571 | + j = i; | ||
| 1572 | + do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); | ||
| 1573 | + ISAb[SA[i]] = j; | ||
| 1574 | + } | ||
| 1575 | + | ||
| 1576 | + /* Construct the inverse suffix array of type B* suffixes using trsort. */ | ||
| 1577 | + trsort(ISAb, SA, m, 1); | ||
| 1578 | + | ||
| 1579 | + /* Set the sorted order of tyoe B* suffixes. */ | ||
| 1580 | + for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { | ||
| 1581 | + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } | ||
| 1582 | + if(0 <= i) { | ||
| 1583 | + t = i; | ||
| 1584 | + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } | ||
| 1585 | + SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; | ||
| 1586 | + } | ||
| 1587 | + } | ||
| 1588 | + | ||
| 1589 | + /* Calculate the index of start/end point of each bucket. */ | ||
| 1590 | + BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ | ||
| 1591 | + for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { | ||
| 1592 | + i = BUCKET_A(c0 + 1) - 1; | ||
| 1593 | + for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { | ||
| 1594 | + t = i - BUCKET_B(c0, c1); | ||
| 1595 | + BUCKET_B(c0, c1) = i; /* end point */ | ||
| 1596 | + | ||
| 1597 | + /* Move all type B* suffixes to the correct position. */ | ||
| 1598 | + for(i = t, j = BUCKET_BSTAR(c0, c1); | ||
| 1599 | + j <= k; | ||
| 1600 | + --i, --k) { SA[i] = SA[k]; } | ||
| 1601 | + } | ||
| 1602 | + BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ | ||
| 1603 | + BUCKET_B(c0, c0) = i; /* end point */ | ||
| 1604 | + } | ||
| 1605 | + } | ||
| 1606 | + | ||
| 1607 | + return m; | ||
| 1608 | +} | ||
| 1609 | + | ||
| 1610 | +/* Constructs the suffix array by using the sorted order of type B* suffixes. */ | ||
| 1611 | +static | ||
| 1612 | +void | ||
| 1613 | +construct_SA(const unsigned char *T, int *SA, | ||
| 1614 | + int *bucket_A, int *bucket_B, | ||
| 1615 | + int n, int m) { | ||
| 1616 | + int *i, *j, *k; | ||
| 1617 | + int s; | ||
| 1618 | + int c0, c1, c2; | ||
| 1619 | + | ||
| 1620 | + if(0 < m) { | ||
| 1621 | + /* Construct the sorted order of type B suffixes by using | ||
| 1622 | + the sorted order of type B* suffixes. */ | ||
| 1623 | + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { | ||
| 1624 | + /* Scan the suffix array from right to left. */ | ||
| 1625 | + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), | ||
| 1626 | + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; | ||
| 1627 | + i <= j; | ||
| 1628 | + --j) { | ||
| 1629 | + if(0 < (s = *j)) { | ||
| 1630 | + assert(T[s] == c1); | ||
| 1631 | + assert(((s + 1) < n) && (T[s] <= T[s + 1])); | ||
| 1632 | + assert(T[s - 1] <= T[s]); | ||
| 1633 | + *j = ~s; | ||
| 1634 | + c0 = T[--s]; | ||
| 1635 | + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } | ||
| 1636 | + if(c0 != c2) { | ||
| 1637 | + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } | ||
| 1638 | + k = SA + BUCKET_B(c2 = c0, c1); | ||
| 1639 | + } | ||
| 1640 | + assert(k < j); assert(k != NULL); | ||
| 1641 | + *k-- = s; | ||
| 1642 | + } else { | ||
| 1643 | + assert(((s == 0) && (T[s] == c1)) || (s < 0)); | ||
| 1644 | + *j = ~s; | ||
| 1645 | + } | ||
| 1646 | + } | ||
| 1647 | + } | ||
| 1648 | + } | ||
| 1649 | + | ||
| 1650 | + /* Construct the suffix array by using | ||
| 1651 | + the sorted order of type B suffixes. */ | ||
| 1652 | + k = SA + BUCKET_A(c2 = T[n - 1]); | ||
| 1653 | + *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); | ||
| 1654 | + /* Scan the suffix array from left to right. */ | ||
| 1655 | + for(i = SA, j = SA + n; i < j; ++i) { | ||
| 1656 | + if(0 < (s = *i)) { | ||
| 1657 | + assert(T[s - 1] >= T[s]); | ||
| 1658 | + c0 = T[--s]; | ||
| 1659 | + if((s == 0) || (T[s - 1] < c0)) { s = ~s; } | ||
| 1660 | + if(c0 != c2) { | ||
| 1661 | + BUCKET_A(c2) = k - SA; | ||
| 1662 | + k = SA + BUCKET_A(c2 = c0); | ||
| 1663 | + } | ||
| 1664 | + assert(i < k); | ||
| 1665 | + *k++ = s; | ||
| 1666 | + } else { | ||
| 1667 | + assert(s < 0); | ||
| 1668 | + *i = ~s; | ||
| 1669 | + } | ||
| 1670 | + } | ||
| 1671 | +} | ||
| 1672 | + | ||
| 1673 | +/* Constructs the burrows-wheeler transformed string directly | ||
| 1674 | + by using the sorted order of type B* suffixes. */ | ||
| 1675 | +static | ||
| 1676 | +int | ||
| 1677 | +construct_BWT(const unsigned char *T, int *SA, | ||
| 1678 | + int *bucket_A, int *bucket_B, | ||
| 1679 | + int n, int m) { | ||
| 1680 | + int *i, *j, *k, *orig; | ||
| 1681 | + int s; | ||
| 1682 | + int c0, c1, c2; | ||
| 1683 | + | ||
| 1684 | + if(0 < m) { | ||
| 1685 | + /* Construct the sorted order of type B suffixes by using | ||
| 1686 | + the sorted order of type B* suffixes. */ | ||
| 1687 | + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { | ||
| 1688 | + /* Scan the suffix array from right to left. */ | ||
| 1689 | + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), | ||
| 1690 | + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; | ||
| 1691 | + i <= j; | ||
| 1692 | + --j) { | ||
| 1693 | + if(0 < (s = *j)) { | ||
| 1694 | + assert(T[s] == c1); | ||
| 1695 | + assert(((s + 1) < n) && (T[s] <= T[s + 1])); | ||
| 1696 | + assert(T[s - 1] <= T[s]); | ||
| 1697 | + c0 = T[--s]; | ||
| 1698 | + *j = ~((int)c0); | ||
| 1699 | + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } | ||
| 1700 | + if(c0 != c2) { | ||
| 1701 | + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } | ||
| 1702 | + k = SA + BUCKET_B(c2 = c0, c1); | ||
| 1703 | + } | ||
| 1704 | + assert(k < j); assert(k != NULL); | ||
| 1705 | + *k-- = s; | ||
| 1706 | + } else if(s != 0) { | ||
| 1707 | + *j = ~s; | ||
| 1708 | +#ifndef NDEBUG | ||
| 1709 | + } else { | ||
| 1710 | + assert(T[s] == c1); | ||
| 1711 | +#endif | ||
| 1712 | + } | ||
| 1713 | + } | ||
| 1714 | + } | ||
| 1715 | + } | ||
| 1716 | + | ||
| 1717 | + /* Construct the BWTed string by using | ||
| 1718 | + the sorted order of type B suffixes. */ | ||
| 1719 | + k = SA + BUCKET_A(c2 = T[n - 1]); | ||
| 1720 | + *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1); | ||
| 1721 | + /* Scan the suffix array from left to right. */ | ||
| 1722 | + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { | ||
| 1723 | + if(0 < (s = *i)) { | ||
| 1724 | + assert(T[s - 1] >= T[s]); | ||
| 1725 | + c0 = T[--s]; | ||
| 1726 | + *i = c0; | ||
| 1727 | + if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); } | ||
| 1728 | + if(c0 != c2) { | ||
| 1729 | + BUCKET_A(c2) = k - SA; | ||
| 1730 | + k = SA + BUCKET_A(c2 = c0); | ||
| 1731 | + } | ||
| 1732 | + assert(i < k); | ||
| 1733 | + *k++ = s; | ||
| 1734 | + } else if(s != 0) { | ||
| 1735 | + *i = ~s; | ||
| 1736 | + } else { | ||
| 1737 | + orig = i; | ||
| 1738 | + } | ||
| 1739 | + } | ||
| 1740 | + | ||
| 1741 | + return orig - SA; | ||
| 1742 | +} | ||
| 1743 | + | ||
| 1744 | +/* Constructs the burrows-wheeler transformed string directly | ||
| 1745 | + by using the sorted order of type B* suffixes. */ | ||
| 1746 | +static | ||
| 1747 | +int | ||
| 1748 | +construct_BWT_indexes(const unsigned char *T, int *SA, | ||
| 1749 | + int *bucket_A, int *bucket_B, | ||
| 1750 | + int n, int m, | ||
| 1751 | + unsigned char * num_indexes, int * indexes) { | ||
| 1752 | + int *i, *j, *k, *orig; | ||
| 1753 | + int s; | ||
| 1754 | + int c0, c1, c2; | ||
| 1755 | + | ||
| 1756 | + int mod = n / 8; | ||
| 1757 | + { | ||
| 1758 | + mod |= mod >> 1; mod |= mod >> 2; | ||
| 1759 | + mod |= mod >> 4; mod |= mod >> 8; | ||
| 1760 | + mod |= mod >> 16; mod >>= 1; | ||
| 1761 | + | ||
| 1762 | + *num_indexes = (unsigned char)((n - 1) / (mod + 1)); | ||
| 1763 | + } | ||
| 1764 | + | ||
| 1765 | + if(0 < m) { | ||
| 1766 | + /* Construct the sorted order of type B suffixes by using | ||
| 1767 | + the sorted order of type B* suffixes. */ | ||
| 1768 | + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { | ||
| 1769 | + /* Scan the suffix array from right to left. */ | ||
| 1770 | + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), | ||
| 1771 | + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; | ||
| 1772 | + i <= j; | ||
| 1773 | + --j) { | ||
| 1774 | + if(0 < (s = *j)) { | ||
| 1775 | + assert(T[s] == c1); | ||
| 1776 | + assert(((s + 1) < n) && (T[s] <= T[s + 1])); | ||
| 1777 | + assert(T[s - 1] <= T[s]); | ||
| 1778 | + | ||
| 1779 | + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA; | ||
| 1780 | + | ||
| 1781 | + c0 = T[--s]; | ||
| 1782 | + *j = ~((int)c0); | ||
| 1783 | + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } | ||
| 1784 | + if(c0 != c2) { | ||
| 1785 | + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } | ||
| 1786 | + k = SA + BUCKET_B(c2 = c0, c1); | ||
| 1787 | + } | ||
| 1788 | + assert(k < j); assert(k != NULL); | ||
| 1789 | + *k-- = s; | ||
| 1790 | + } else if(s != 0) { | ||
| 1791 | + *j = ~s; | ||
| 1792 | +#ifndef NDEBUG | ||
| 1793 | + } else { | ||
| 1794 | + assert(T[s] == c1); | ||
| 1795 | +#endif | ||
| 1796 | + } | ||
| 1797 | + } | ||
| 1798 | + } | ||
| 1799 | + } | ||
| 1800 | + | ||
| 1801 | + /* Construct the BWTed string by using | ||
| 1802 | + the sorted order of type B suffixes. */ | ||
| 1803 | + k = SA + BUCKET_A(c2 = T[n - 1]); | ||
| 1804 | + if (T[n - 2] < c2) { | ||
| 1805 | + if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA; | ||
| 1806 | + *k++ = ~((int)T[n - 2]); | ||
| 1807 | + } | ||
| 1808 | + else { | ||
| 1809 | + *k++ = n - 1; | ||
| 1810 | + } | ||
| 1811 | + | ||
| 1812 | + /* Scan the suffix array from left to right. */ | ||
| 1813 | + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { | ||
| 1814 | + if(0 < (s = *i)) { | ||
| 1815 | + assert(T[s - 1] >= T[s]); | ||
| 1816 | + | ||
| 1817 | + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA; | ||
| 1818 | + | ||
| 1819 | + c0 = T[--s]; | ||
| 1820 | + *i = c0; | ||
| 1821 | + if(c0 != c2) { | ||
| 1822 | + BUCKET_A(c2) = k - SA; | ||
| 1823 | + k = SA + BUCKET_A(c2 = c0); | ||
| 1824 | + } | ||
| 1825 | + assert(i < k); | ||
| 1826 | + if((0 < s) && (T[s - 1] < c0)) { | ||
| 1827 | + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA; | ||
| 1828 | + *k++ = ~((int)T[s - 1]); | ||
| 1829 | + } else | ||
| 1830 | + *k++ = s; | ||
| 1831 | + } else if(s != 0) { | ||
| 1832 | + *i = ~s; | ||
| 1833 | + } else { | ||
| 1834 | + orig = i; | ||
| 1835 | + } | ||
| 1836 | + } | ||
| 1837 | + | ||
| 1838 | + return orig - SA; | ||
| 1839 | +} | ||
| 1840 | + | ||
| 1841 | + | ||
| 1842 | +/*---------------------------------------------------------------------------*/ | ||
| 1843 | + | ||
| 1844 | +/*- Function -*/ | ||
| 1845 | + | ||
| 1846 | +int | ||
| 1847 | +divsufsort(const unsigned char *T, int *SA, int n, int openMP) { | ||
| 1848 | + int *bucket_A, *bucket_B; | ||
| 1849 | + int m; | ||
| 1850 | + int err = 0; | ||
| 1851 | + | ||
| 1852 | + /* Check arguments. */ | ||
| 1853 | + if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } | ||
| 1854 | + else if(n == 0) { return 0; } | ||
| 1855 | + else if(n == 1) { SA[0] = 0; return 0; } | ||
| 1856 | + else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } | ||
| 1857 | + | ||
| 1858 | + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); | ||
| 1859 | + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); | ||
| 1860 | + | ||
| 1861 | + /* Suffixsort. */ | ||
| 1862 | + if((bucket_A != NULL) && (bucket_B != NULL)) { | ||
| 1863 | + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP); | ||
| 1864 | + construct_SA(T, SA, bucket_A, bucket_B, n, m); | ||
| 1865 | + } else { | ||
| 1866 | + err = -2; | ||
| 1867 | + } | ||
| 1868 | + | ||
| 1869 | + free(bucket_B); | ||
| 1870 | + free(bucket_A); | ||
| 1871 | + | ||
| 1872 | + return err; | ||
| 1873 | +} | ||
| 1874 | + | ||
| 1875 | +int | ||
| 1876 | +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) { | ||
| 1877 | + int *B; | ||
| 1878 | + int *bucket_A, *bucket_B; | ||
| 1879 | + int m, pidx, i; | ||
| 1880 | + | ||
| 1881 | + /* Check arguments. */ | ||
| 1882 | + if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } | ||
| 1883 | + else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } | ||
| 1884 | + | ||
| 1885 | + if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); } | ||
| 1886 | + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); | ||
| 1887 | + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); | ||
| 1888 | + | ||
| 1889 | + /* Burrows-Wheeler Transform. */ | ||
| 1890 | + if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { | ||
| 1891 | + m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP); | ||
| 1892 | + | ||
| 1893 | + if (num_indexes == NULL || indexes == NULL) { | ||
| 1894 | + pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); | ||
| 1895 | + } else { | ||
| 1896 | + pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes); | ||
| 1897 | + } | ||
| 1898 | + | ||
| 1899 | + /* Copy to output string. */ | ||
| 1900 | + U[0] = T[n - 1]; | ||
| 1901 | + for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; } | ||
| 1902 | + for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; } | ||
| 1903 | + pidx += 1; | ||
| 1904 | + } else { | ||
| 1905 | + pidx = -2; | ||
| 1906 | + } | ||
| 1907 | + | ||
| 1908 | + free(bucket_B); | ||
| 1909 | + free(bucket_A); | ||
| 1910 | + if(A == NULL) { free(B); } | ||
| 1911 | + | ||
| 1912 | + return pidx; | ||
| 1913 | +} |
vendor/github.com/DataDog/zstd/divsufsort.h
0 → 100644
| 1 | +/* | ||
| 2 | + * divsufsort.h for libdivsufsort-lite | ||
| 3 | + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. | ||
| 4 | + * | ||
| 5 | + * Permission is hereby granted, free of charge, to any person | ||
| 6 | + * obtaining a copy of this software and associated documentation | ||
| 7 | + * files (the "Software"), to deal in the Software without | ||
| 8 | + * restriction, including without limitation the rights to use, | ||
| 9 | + * copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
| 10 | + * copies of the Software, and to permit persons to whom the | ||
| 11 | + * Software is furnished to do so, subject to the following | ||
| 12 | + * conditions: | ||
| 13 | + * | ||
| 14 | + * The above copyright notice and this permission notice shall be | ||
| 15 | + * included in all copies or substantial portions of the Software. | ||
| 16 | + * | ||
| 17 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
| 18 | + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | ||
| 19 | + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
| 20 | + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | ||
| 21 | + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | ||
| 22 | + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
| 23 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
| 24 | + * OTHER DEALINGS IN THE SOFTWARE. | ||
| 25 | + */ | ||
| 26 | + | ||
| 27 | +#ifndef _DIVSUFSORT_H | ||
| 28 | +#define _DIVSUFSORT_H 1 | ||
| 29 | + | ||
| 30 | +#ifdef __cplusplus | ||
| 31 | +extern "C" { | ||
| 32 | +#endif /* __cplusplus */ | ||
| 33 | + | ||
| 34 | + | ||
| 35 | +/*- Prototypes -*/ | ||
| 36 | + | ||
| 37 | +/** | ||
| 38 | + * Constructs the suffix array of a given string. | ||
| 39 | + * @param T [0..n-1] The input string. | ||
| 40 | + * @param SA [0..n-1] The output array of suffixes. | ||
| 41 | + * @param n The length of the given string. | ||
| 42 | + * @param openMP enables OpenMP optimization. | ||
| 43 | + * @return 0 if no error occurred, -1 or -2 otherwise. | ||
| 44 | + */ | ||
| 45 | +int | ||
| 46 | +divsufsort(const unsigned char *T, int *SA, int n, int openMP); | ||
| 47 | + | ||
| 48 | +/** | ||
| 49 | + * Constructs the burrows-wheeler transformed string of a given string. | ||
| 50 | + * @param T [0..n-1] The input string. | ||
| 51 | + * @param U [0..n-1] The output string. (can be T) | ||
| 52 | + * @param A [0..n-1] The temporary array. (can be NULL) | ||
| 53 | + * @param n The length of the given string. | ||
| 54 | + * @param num_indexes The length of secondary indexes array. (can be NULL) | ||
| 55 | + * @param indexes The secondary indexes array. (can be NULL) | ||
| 56 | + * @param openMP enables OpenMP optimization. | ||
| 57 | + * @return The primary index if no error occurred, -1 or -2 otherwise. | ||
| 58 | + */ | ||
| 59 | +int | ||
| 60 | +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP); | ||
| 61 | + | ||
| 62 | + | ||
| 63 | +#ifdef __cplusplus | ||
| 64 | +} /* extern "C" */ | ||
| 65 | +#endif /* __cplusplus */ | ||
| 66 | + | ||
| 67 | +#endif /* _DIVSUFSORT_H */ |
| 1 | +/* | ||
| 2 | + Common functions of New Generation Entropy library | ||
| 3 | + Copyright (C) 2016, Yann Collet. | ||
| 4 | + | ||
| 5 | + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | ||
| 6 | + | ||
| 7 | + Redistribution and use in source and binary forms, with or without | ||
| 8 | + modification, are permitted provided that the following conditions are | ||
| 9 | + met: | ||
| 10 | + | ||
| 11 | + * Redistributions of source code must retain the above copyright | ||
| 12 | + notice, this list of conditions and the following disclaimer. | ||
| 13 | + * Redistributions in binary form must reproduce the above | ||
| 14 | + copyright notice, this list of conditions and the following disclaimer | ||
| 15 | + in the documentation and/or other materials provided with the | ||
| 16 | + distribution. | ||
| 17 | + | ||
| 18 | + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 19 | + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 20 | + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 21 | + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 22 | + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 23 | + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| 24 | + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| 25 | + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| 26 | + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 27 | + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 28 | + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 29 | + | ||
| 30 | + You can contact the author at : | ||
| 31 | + - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy | ||
| 32 | + - Public forum : https://groups.google.com/forum/#!forum/lz4c | ||
| 33 | +*************************************************************************** */ | ||
| 34 | + | ||
| 35 | +/* ************************************* | ||
| 36 | +* Dependencies | ||
| 37 | +***************************************/ | ||
| 38 | +#include "mem.h" | ||
| 39 | +#include "error_private.h" /* ERR_*, ERROR */ | ||
| 40 | +#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ | ||
| 41 | +#include "fse.h" | ||
| 42 | +#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ | ||
| 43 | +#include "huf.h" | ||
| 44 | + | ||
| 45 | + | ||
| 46 | +/*=== Version ===*/ | ||
| 47 | +unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } | ||
| 48 | + | ||
| 49 | + | ||
| 50 | +/*=== Error Management ===*/ | ||
| 51 | +unsigned FSE_isError(size_t code) { return ERR_isError(code); } | ||
| 52 | +const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } | ||
| 53 | + | ||
| 54 | +unsigned HUF_isError(size_t code) { return ERR_isError(code); } | ||
| 55 | +const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } | ||
| 56 | + | ||
| 57 | + | ||
| 58 | +/*-************************************************************** | ||
| 59 | +* FSE NCount encoding-decoding | ||
| 60 | +****************************************************************/ | ||
| 61 | +size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, | ||
| 62 | + const void* headerBuffer, size_t hbSize) | ||
| 63 | +{ | ||
| 64 | + const BYTE* const istart = (const BYTE*) headerBuffer; | ||
| 65 | + const BYTE* const iend = istart + hbSize; | ||
| 66 | + const BYTE* ip = istart; | ||
| 67 | + int nbBits; | ||
| 68 | + int remaining; | ||
| 69 | + int threshold; | ||
| 70 | + U32 bitStream; | ||
| 71 | + int bitCount; | ||
| 72 | + unsigned charnum = 0; | ||
| 73 | + int previous0 = 0; | ||
| 74 | + | ||
| 75 | + if (hbSize < 4) { | ||
| 76 | + /* This function only works when hbSize >= 4 */ | ||
| 77 | + char buffer[4]; | ||
| 78 | + memset(buffer, 0, sizeof(buffer)); | ||
| 79 | + memcpy(buffer, headerBuffer, hbSize); | ||
| 80 | + { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, | ||
| 81 | + buffer, sizeof(buffer)); | ||
| 82 | + if (FSE_isError(countSize)) return countSize; | ||
| 83 | + if (countSize > hbSize) return ERROR(corruption_detected); | ||
| 84 | + return countSize; | ||
| 85 | + } } | ||
| 86 | + assert(hbSize >= 4); | ||
| 87 | + | ||
| 88 | + /* init */ | ||
| 89 | + memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ | ||
| 90 | + bitStream = MEM_readLE32(ip); | ||
| 91 | + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ | ||
| 92 | + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); | ||
| 93 | + bitStream >>= 4; | ||
| 94 | + bitCount = 4; | ||
| 95 | + *tableLogPtr = nbBits; | ||
| 96 | + remaining = (1<<nbBits)+1; | ||
| 97 | + threshold = 1<<nbBits; | ||
| 98 | + nbBits++; | ||
| 99 | + | ||
| 100 | + while ((remaining>1) & (charnum<=*maxSVPtr)) { | ||
| 101 | + if (previous0) { | ||
| 102 | + unsigned n0 = charnum; | ||
| 103 | + while ((bitStream & 0xFFFF) == 0xFFFF) { | ||
| 104 | + n0 += 24; | ||
| 105 | + if (ip < iend-5) { | ||
| 106 | + ip += 2; | ||
| 107 | + bitStream = MEM_readLE32(ip) >> bitCount; | ||
| 108 | + } else { | ||
| 109 | + bitStream >>= 16; | ||
| 110 | + bitCount += 16; | ||
| 111 | + } } | ||
| 112 | + while ((bitStream & 3) == 3) { | ||
| 113 | + n0 += 3; | ||
| 114 | + bitStream >>= 2; | ||
| 115 | + bitCount += 2; | ||
| 116 | + } | ||
| 117 | + n0 += bitStream & 3; | ||
| 118 | + bitCount += 2; | ||
| 119 | + if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); | ||
| 120 | + while (charnum < n0) normalizedCounter[charnum++] = 0; | ||
| 121 | + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { | ||
| 122 | + assert((bitCount >> 3) <= 3); /* For first condition to work */ | ||
| 123 | + ip += bitCount>>3; | ||
| 124 | + bitCount &= 7; | ||
| 125 | + bitStream = MEM_readLE32(ip) >> bitCount; | ||
| 126 | + } else { | ||
| 127 | + bitStream >>= 2; | ||
| 128 | + } } | ||
| 129 | + { int const max = (2*threshold-1) - remaining; | ||
| 130 | + int count; | ||
| 131 | + | ||
| 132 | + if ((bitStream & (threshold-1)) < (U32)max) { | ||
| 133 | + count = bitStream & (threshold-1); | ||
| 134 | + bitCount += nbBits-1; | ||
| 135 | + } else { | ||
| 136 | + count = bitStream & (2*threshold-1); | ||
| 137 | + if (count >= threshold) count -= max; | ||
| 138 | + bitCount += nbBits; | ||
| 139 | + } | ||
| 140 | + | ||
| 141 | + count--; /* extra accuracy */ | ||
| 142 | + remaining -= count < 0 ? -count : count; /* -1 means +1 */ | ||
| 143 | + normalizedCounter[charnum++] = (short)count; | ||
| 144 | + previous0 = !count; | ||
| 145 | + while (remaining < threshold) { | ||
| 146 | + nbBits--; | ||
| 147 | + threshold >>= 1; | ||
| 148 | + } | ||
| 149 | + | ||
| 150 | + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { | ||
| 151 | + ip += bitCount>>3; | ||
| 152 | + bitCount &= 7; | ||
| 153 | + } else { | ||
| 154 | + bitCount -= (int)(8 * (iend - 4 - ip)); | ||
| 155 | + ip = iend - 4; | ||
| 156 | + } | ||
| 157 | + bitStream = MEM_readLE32(ip) >> (bitCount & 31); | ||
| 158 | + } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ | ||
| 159 | + if (remaining != 1) return ERROR(corruption_detected); | ||
| 160 | + if (bitCount > 32) return ERROR(corruption_detected); | ||
| 161 | + *maxSVPtr = charnum-1; | ||
| 162 | + | ||
| 163 | + ip += (bitCount+7)>>3; | ||
| 164 | + return ip-istart; | ||
| 165 | +} | ||
| 166 | + | ||
| 167 | + | ||
| 168 | +/*! HUF_readStats() : | ||
| 169 | + Read compact Huffman tree, saved by HUF_writeCTable(). | ||
| 170 | + `huffWeight` is destination buffer. | ||
| 171 | + `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. | ||
| 172 | + @return : size read from `src` , or an error Code . | ||
| 173 | + Note : Needed by HUF_readCTable() and HUF_readDTableX?() . | ||
| 174 | +*/ | ||
| 175 | +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, | ||
| 176 | + U32* nbSymbolsPtr, U32* tableLogPtr, | ||
| 177 | + const void* src, size_t srcSize) | ||
| 178 | +{ | ||
| 179 | + U32 weightTotal; | ||
| 180 | + const BYTE* ip = (const BYTE*) src; | ||
| 181 | + size_t iSize; | ||
| 182 | + size_t oSize; | ||
| 183 | + | ||
| 184 | + if (!srcSize) return ERROR(srcSize_wrong); | ||
| 185 | + iSize = ip[0]; | ||
| 186 | + /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ | ||
| 187 | + | ||
| 188 | + if (iSize >= 128) { /* special header */ | ||
| 189 | + oSize = iSize - 127; | ||
| 190 | + iSize = ((oSize+1)/2); | ||
| 191 | + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); | ||
| 192 | + if (oSize >= hwSize) return ERROR(corruption_detected); | ||
| 193 | + ip += 1; | ||
| 194 | + { U32 n; | ||
| 195 | + for (n=0; n<oSize; n+=2) { | ||
| 196 | + huffWeight[n] = ip[n/2] >> 4; | ||
| 197 | + huffWeight[n+1] = ip[n/2] & 15; | ||
| 198 | + } } } | ||
| 199 | + else { /* header compressed with FSE (normal case) */ | ||
| 200 | + FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ | ||
| 201 | + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); | ||
| 202 | + oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ | ||
| 203 | + if (FSE_isError(oSize)) return oSize; | ||
| 204 | + } | ||
| 205 | + | ||
| 206 | + /* collect weight stats */ | ||
| 207 | + memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); | ||
| 208 | + weightTotal = 0; | ||
| 209 | + { U32 n; for (n=0; n<oSize; n++) { | ||
| 210 | + if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected); | ||
| 211 | + rankStats[huffWeight[n]]++; | ||
| 212 | + weightTotal += (1 << huffWeight[n]) >> 1; | ||
| 213 | + } } | ||
| 214 | + if (weightTotal == 0) return ERROR(corruption_detected); | ||
| 215 | + | ||
| 216 | + /* get last non-null symbol weight (implied, total must be 2^n) */ | ||
| 217 | + { U32 const tableLog = BIT_highbit32(weightTotal) + 1; | ||
| 218 | + if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); | ||
| 219 | + *tableLogPtr = tableLog; | ||
| 220 | + /* determine last weight */ | ||
| 221 | + { U32 const total = 1 << tableLog; | ||
| 222 | + U32 const rest = total - weightTotal; | ||
| 223 | + U32 const verif = 1 << BIT_highbit32(rest); | ||
| 224 | + U32 const lastWeight = BIT_highbit32(rest) + 1; | ||
| 225 | + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ | ||
| 226 | + huffWeight[oSize] = (BYTE)lastWeight; | ||
| 227 | + rankStats[lastWeight]++; | ||
| 228 | + } } | ||
| 229 | + | ||
| 230 | + /* check tree construction validity */ | ||
| 231 | + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ | ||
| 232 | + | ||
| 233 | + /* results */ | ||
| 234 | + *nbSymbolsPtr = (U32)(oSize+1); | ||
| 235 | + return iSize+1; | ||
| 236 | +} |
-
请 注册 或 登录 后发表评论