正在显示
24 个修改的文件
包含
4858 行增加
和
0 行删除
@@ -4,6 +4,7 @@ go 1.14 | @@ -4,6 +4,7 @@ go 1.14 | ||
4 | 4 | ||
5 | require ( | 5 | require ( |
6 | github.com/GeeTeam/gt3-golang-sdk v0.0.0-20200116043922-446ca8a507d2 | 6 | github.com/GeeTeam/gt3-golang-sdk v0.0.0-20200116043922-446ca8a507d2 |
7 | + github.com/Shopify/sarama v1.23.1 | ||
7 | github.com/ajg/form v1.5.1 // indirect | 8 | github.com/ajg/form v1.5.1 // indirect |
8 | github.com/astaxie/beego v1.12.2 | 9 | github.com/astaxie/beego v1.12.2 |
9 | github.com/dgrijalva/jwt-go v3.2.0+incompatible | 10 | github.com/dgrijalva/jwt-go v3.2.0+incompatible |
1 | package main | 1 | package main |
2 | 2 | ||
3 | import ( | 3 | import ( |
4 | + "context" | ||
5 | + "fmt" | ||
6 | + "os" | ||
7 | + "os/signal" | ||
8 | + "syscall" | ||
9 | + | ||
4 | "github.com/astaxie/beego" | 10 | "github.com/astaxie/beego" |
5 | "github.com/astaxie/beego/logs" | 11 | "github.com/astaxie/beego/logs" |
6 | _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/infrastructure/pg" | 12 | _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/infrastructure/pg" |
7 | _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/log" | 13 | _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/log" |
8 | _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/port/beego" | 14 | _ "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/port/beego" |
15 | + "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/port/consumer" | ||
9 | ) | 16 | ) |
10 | 17 | ||
11 | func main() { | 18 | func main() { |
19 | + sigs := make(chan os.Signal, 1) | ||
20 | + signal.Notify(sigs, os.Interrupt, os.Kill, syscall.SIGINT, syscall.SIGTERM) | ||
21 | + ctx, cancel := context.WithCancel(context.Background()) | ||
22 | + closeConsumer, err := consumer.StartConsumer(ctx) | ||
23 | + if err != nil { | ||
24 | + fmt.Printf("启动kafka消息消费者失败 err%s \n", err) | ||
25 | + logs.Error("启动kafka消息消费者失败:%s", err) | ||
26 | + return | ||
27 | + } | ||
28 | + go func() { | ||
29 | + select { | ||
30 | + case <-sigs: | ||
31 | + cancel() | ||
32 | + closeConsumer() | ||
33 | + } | ||
34 | + }() | ||
35 | + | ||
12 | logs.Info("应用启动") | 36 | logs.Info("应用启动") |
13 | beego.Run() | 37 | beego.Run() |
14 | } | 38 | } |
@@ -21,6 +21,14 @@ func CreateOrderBaseDao(options map[string]interface{}) (*dao.OrderBaseDao, erro | @@ -21,6 +21,14 @@ func CreateOrderBaseDao(options map[string]interface{}) (*dao.OrderBaseDao, erro | ||
21 | return dao.NewOrderBaseDao(transactionContext) | 21 | return dao.NewOrderBaseDao(transactionContext) |
22 | } | 22 | } |
23 | 23 | ||
24 | +func CreateOrderBestshopDao(options map[string]interface{}) (*dao.OrderBestshopDao, error) { | ||
25 | + var transactionContext *transaction.TransactionContext | ||
26 | + if value, ok := options["transactionContext"]; ok { | ||
27 | + transactionContext = value.(*transaction.TransactionContext) | ||
28 | + } | ||
29 | + return dao.NewOrderBestshopDao(transactionContext) | ||
30 | +} | ||
31 | + | ||
24 | func CreateUsersDao(options map[string]interface{}) (*dao.UsersDao, error) { | 32 | func CreateUsersDao(options map[string]interface{}) (*dao.UsersDao, error) { |
25 | var transactionContext *transaction.TransactionContext | 33 | var transactionContext *transaction.TransactionContext |
26 | if value, ok := options["transactionContext"]; ok { | 34 | if value, ok := options["transactionContext"]; ok { |
@@ -8,6 +8,8 @@ type CreateOrderFromBestshop struct { | @@ -8,6 +8,8 @@ type CreateOrderFromBestshop struct { | ||
8 | OrderCode string `json:"orderCode"` | 8 | OrderCode string `json:"orderCode"` |
9 | //下单时间 | 9 | //下单时间 |
10 | OrderTime string `json:"orderTime"` | 10 | OrderTime string `json:"orderTime"` |
11 | + //公司id | ||
12 | + CompanyId int64 `json:"companyId"` | ||
11 | //订单状态 | 13 | //订单状态 |
12 | OrderState int8 `json:"orderState"` | 14 | OrderState int8 `json:"orderState"` |
13 | //发货状态 | 15 | //发货状态 |
@@ -4,6 +4,8 @@ import ( | @@ -4,6 +4,8 @@ import ( | ||
4 | "fmt" | 4 | "fmt" |
5 | "time" | 5 | "time" |
6 | 6 | ||
7 | + "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/infrastructure/dao" | ||
8 | + | ||
7 | "github.com/astaxie/beego/logs" | 9 | "github.com/astaxie/beego/logs" |
8 | 10 | ||
9 | "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/application/factory" | 11 | "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/application/factory" |
@@ -37,6 +39,25 @@ func (s SyncOrderService) SyncOrderFromBestshop(cmd command.CreateOrderFromBests | @@ -37,6 +39,25 @@ func (s SyncOrderService) SyncOrderFromBestshop(cmd command.CreateOrderFromBests | ||
37 | defer func() { | 39 | defer func() { |
38 | transactionContext.RollbackTransaction() | 40 | transactionContext.RollbackTransaction() |
39 | }() | 41 | }() |
42 | + | ||
43 | + //检查账号是否存在 | ||
44 | + var ( | ||
45 | + orderBestshopDao *dao.OrderBestshopDao | ||
46 | + ) | ||
47 | + if orderBestshopDao, err = factory.CreateOrderBestshopDao(map[string]interface{}{ | ||
48 | + "transactionContext": transactionContext, | ||
49 | + }); err != nil { | ||
50 | + return lib.ThrowError(lib.TRANSACTION_ERROR, err.Error()) | ||
51 | + } | ||
52 | + ok, err := orderBestshopDao.OrderExist(cmd.OrderCode) | ||
53 | + if err != nil { | ||
54 | + return lib.ThrowError(lib.TRANSACTION_ERROR, err.Error()) | ||
55 | + } | ||
56 | + if ok { | ||
57 | + logs.Info("订单已存在,order_code=%s", cmd.OrderCode) | ||
58 | + return nil | ||
59 | + } | ||
60 | + | ||
40 | var ( | 61 | var ( |
41 | orderBestshopRepository domain.OrderBestshopRepository | 62 | orderBestshopRepository domain.OrderBestshopRepository |
42 | orderGoodBestshopRepository domain.OrderGoodBestshopRepository | 63 | orderGoodBestshopRepository domain.OrderGoodBestshopRepository |
@@ -173,11 +194,25 @@ func (s SyncOrderService) copyOrderBestshopToOrderBase(orderBestshop *domain.Ord | @@ -173,11 +194,25 @@ func (s SyncOrderService) copyOrderBestshopToOrderBase(orderBestshop *domain.Ord | ||
173 | ordergoods []domain.OrderGood | 194 | ordergoods []domain.OrderGood |
174 | ) | 195 | ) |
175 | //TODO 添加orderBase | 196 | //TODO 添加orderBase |
197 | + orderBestshop.CopyToOrderBase(&orderbase) | ||
198 | + orderbase.CompanyId = companyData.Id | ||
199 | + for i := range orderBestshop.Goods { | ||
200 | + good := domain.NewOrderGood() | ||
201 | + orderBestshop.Goods[i].CopyToOrderGood(&good) | ||
202 | + good.OrderId = orderbase.Id | ||
203 | + good.Compute() | ||
204 | + ordergoods = append(ordergoods, good) | ||
205 | + } | ||
206 | + orderbase.Goods = ordergoods | ||
207 | + orderbase.Compute() | ||
176 | err = orderBaseRepository.Save(&orderbase) | 208 | err = orderBaseRepository.Save(&orderbase) |
177 | if err != nil { | 209 | if err != nil { |
178 | e := fmt.Sprintf("添加order_base数据失败%s", err) | 210 | e := fmt.Sprintf("添加order_base数据失败%s", err) |
179 | return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, e) | 211 | return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, e) |
180 | } | 212 | } |
213 | + for i := range ordergoods { | ||
214 | + ordergoods[i].OrderId = orderbase.Id | ||
215 | + } | ||
181 | //TODO 添加goods | 216 | //TODO 添加goods |
182 | err = orderGoodRepository.Save(ordergoods) | 217 | err = orderGoodRepository.Save(ordergoods) |
183 | if err != nil { | 218 | if err != nil { |
@@ -185,6 +220,7 @@ func (s SyncOrderService) copyOrderBestshopToOrderBase(orderBestshop *domain.Ord | @@ -185,6 +220,7 @@ func (s SyncOrderService) copyOrderBestshopToOrderBase(orderBestshop *domain.Ord | ||
185 | return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, e) | 220 | return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, e) |
186 | } | 221 | } |
187 | //TODO 更新isCopy | 222 | //TODO 更新isCopy |
223 | + orderBestshop.IsCopy = true | ||
188 | err = orderBestshopRepository.Edit(orderBestshop) | 224 | err = orderBestshopRepository.Edit(orderBestshop) |
189 | if err != nil { | 225 | if err != nil { |
190 | return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, err.Error()) | 226 | return lib.ThrowError(lib.INTERNAL_SERVER_ERROR, err.Error()) |
pkg/infrastructure/dao/pg_order_bestshop.go
0 → 100644
1 | +package dao | ||
2 | + | ||
3 | +import ( | ||
4 | + "fmt" | ||
5 | + | ||
6 | + "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/infrastructure/pg/models" | ||
7 | + "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/infrastructure/pg/transaction" | ||
8 | +) | ||
9 | + | ||
10 | +type OrderBestshopDao struct { | ||
11 | + transactionContext *transaction.TransactionContext | ||
12 | +} | ||
13 | + | ||
14 | +func NewOrderBestshopDao(transactionContext *transaction.TransactionContext) (*OrderBestshopDao, error) { | ||
15 | + if transactionContext == nil { | ||
16 | + return nil, fmt.Errorf("transactionContext参数不能为nil") | ||
17 | + } else { | ||
18 | + return &OrderBestshopDao{ | ||
19 | + transactionContext: transactionContext, | ||
20 | + }, nil | ||
21 | + } | ||
22 | +} | ||
23 | + | ||
24 | +func (dao OrderBestshopDao) OrderExist(orderCode string) (bool, error) { | ||
25 | + tx := dao.transactionContext.GetDB() | ||
26 | + m := models.OrderBestshop{} | ||
27 | + query := tx.Model(m).Where("order_code=?", orderCode) | ||
28 | + ok, err := query.Exists() | ||
29 | + return ok, err | ||
30 | +} |
pkg/port/consumer/configs/config.go
0 → 100644
1 | +package configs | ||
2 | + | ||
3 | +type MqConfig struct { | ||
4 | + Servers []string `json:"servers"` | ||
5 | + ConsumerId string `json:"consumerGroup"` | ||
6 | +} | ||
7 | + | ||
8 | +var Cfg MqConfig | ||
9 | + | ||
10 | +func init() { | ||
11 | + Cfg = MqConfig{ | ||
12 | + Servers: []string{"192.168.190.136:9092"}, | ||
13 | + ConsumerId: "partnermg", | ||
14 | + } | ||
15 | +} | ||
16 | + | ||
17 | +// "", | ||
18 | +// "106.52.15.41:9092" |
pkg/port/consumer/consumer.go
0 → 100644
1 | +package consumer | ||
2 | + | ||
3 | +import ( | ||
4 | + "context" | ||
5 | + "errors" | ||
6 | + "sync" | ||
7 | + | ||
8 | + "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/port/consumer/configs" | ||
9 | + | ||
10 | + "github.com/Shopify/sarama" | ||
11 | + "github.com/astaxie/beego/logs" | ||
12 | +) | ||
13 | + | ||
14 | +//MessageConsumer 消息消费者 | ||
15 | +type MessageConsumer struct { | ||
16 | + ready chan bool | ||
17 | + kafkaHosts []string | ||
18 | + groupId string | ||
19 | + topics []string | ||
20 | + topicsHandles map[string]TopicHandle | ||
21 | +} | ||
22 | + | ||
23 | +func NewMessageConsumer() *MessageConsumer { | ||
24 | + topics := []string{} | ||
25 | + for key := range TopicHandleRouters { | ||
26 | + topics = append(topics, key) | ||
27 | + } | ||
28 | + return &MessageConsumer{ | ||
29 | + ready: make(chan bool), | ||
30 | + kafkaHosts: configs.Cfg.Servers, | ||
31 | + groupId: configs.Cfg.ConsumerId, | ||
32 | + topicsHandles: TopicHandleRouters, | ||
33 | + topics: topics, | ||
34 | + } | ||
35 | +} | ||
36 | + | ||
37 | +//实现对应的接口 | ||
38 | +var _ sarama.ConsumerGroupHandler = (*MessageConsumer)(nil) | ||
39 | + | ||
40 | +func (c *MessageConsumer) Setup(groupSession sarama.ConsumerGroupSession) error { | ||
41 | + close(c.ready) | ||
42 | + return nil | ||
43 | +} | ||
44 | + | ||
45 | +func (c *MessageConsumer) Cleanup(groupSession sarama.ConsumerGroupSession) error { | ||
46 | + return nil | ||
47 | +} | ||
48 | + | ||
49 | +func (c *MessageConsumer) ConsumeClaim(groupSession sarama.ConsumerGroupSession, | ||
50 | + groupClaim sarama.ConsumerGroupClaim) error { | ||
51 | + var ( | ||
52 | + topicHandle TopicHandle | ||
53 | + err error | ||
54 | + ) | ||
55 | + for message := range groupClaim.Messages() { | ||
56 | + if topicHandle, err = c.FindTopichandle(groupClaim.Topic()); err != nil { | ||
57 | + logs.Error("FindTopichandle err:%s \n", err) | ||
58 | + continue | ||
59 | + } | ||
60 | + if err = topicHandle(message); err != nil { | ||
61 | + logs.Error("Message claimed: kafka消息处理错误 topic =", message.Topic, message.Offset, err) | ||
62 | + } | ||
63 | + groupSession.MarkMessage(message, "") | ||
64 | + } | ||
65 | + return nil | ||
66 | +} | ||
67 | + | ||
68 | +func (c *MessageConsumer) FindTopichandle(topic string) (TopicHandle, error) { | ||
69 | + if v, ok := c.topicsHandles[topic]; ok { | ||
70 | + return v, nil | ||
71 | + } | ||
72 | + return nil, errors.New("TopicHandle not found") | ||
73 | +} | ||
74 | + | ||
75 | +//StartConsumer 启动 | ||
76 | +//返回 Consumer关闭方法 和 error | ||
77 | +func StartConsumer(ctx context.Context) (func(), error) { | ||
78 | + consumer := NewMessageConsumer() | ||
79 | + config := sarama.NewConfig() | ||
80 | + config.Consumer.Group.Rebalance.Strategy = sarama.BalanceStrategyRoundRobin | ||
81 | + config.Consumer.Offsets.Initial = sarama.OffsetNewest | ||
82 | + config.Version = sarama.V0_11_0_2 | ||
83 | + consumerGroup, err := sarama.NewConsumerGroup(consumer.kafkaHosts, consumer.groupId, config) | ||
84 | + if err != nil { | ||
85 | + return nil, err | ||
86 | + } | ||
87 | + wg := &sync.WaitGroup{} | ||
88 | + wg.Add(1) | ||
89 | + go func() { | ||
90 | + defer wg.Done() | ||
91 | + for { | ||
92 | + if err := ctx.Err(); err != nil { | ||
93 | + logs.Error("ctx err:%s \n", err) | ||
94 | + return | ||
95 | + } | ||
96 | + if err := consumerGroup.Consume(ctx, consumer.topics, consumer); err != nil { | ||
97 | + logs.Error("consumerGroup err:%s \n", err) | ||
98 | + } | ||
99 | + } | ||
100 | + }() | ||
101 | + //等待 consumerGroup 设置完成 | ||
102 | + <-consumer.ready | ||
103 | + logs.Error("Sarama consumer up and running!...") | ||
104 | + return func() { | ||
105 | + wg.Wait() | ||
106 | + if err := consumerGroup.Close(); err != nil { | ||
107 | + logs.Error("consumerGroup.Close err %s", err) | ||
108 | + } | ||
109 | + logs.Info("consumerGroup.Close") | ||
110 | + }, nil | ||
111 | +} |
pkg/port/consumer/topic_handle_router.go
0 → 100644
1 | +package consumer | ||
2 | + | ||
3 | +import ( | ||
4 | + "fmt" | ||
5 | + | ||
6 | + "github.com/Shopify/sarama" | ||
7 | +) | ||
8 | + | ||
9 | +//TopicHandle 处理kafka中得消息 | ||
10 | +type TopicHandle func(*sarama.ConsumerMessage) error | ||
11 | + | ||
12 | +//TopicHandleRouters 根据topic区分消息并进行处理 | ||
13 | +var TopicHandleRouters = map[string]TopicHandle{ | ||
14 | + "topic_test": func(message *sarama.ConsumerMessage) error { | ||
15 | + fmt.Printf("Done Message claimed: timestamp = %v, topic = %s offset = %v value = %v \n", | ||
16 | + message.Timestamp, message.Topic, message.Offset, string(message.Value)) | ||
17 | + return nil | ||
18 | + }, | ||
19 | + "bestshop_order": SyncBestshopOrder, | ||
20 | +} |
pkg/port/consumer/topic_handles.go
0 → 100644
1 | +package consumer | ||
2 | + | ||
3 | +import ( | ||
4 | + "encoding/json" | ||
5 | + "fmt" | ||
6 | + | ||
7 | + "github.com/Shopify/sarama" | ||
8 | + "github.com/astaxie/beego/logs" | ||
9 | + syncOrderCmd "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/application/syncOrder/command" | ||
10 | + syncOrderSrv "gitlab.fjmaimaimai.com/mmm-go/partnermg/pkg/application/syncOrder/service" | ||
11 | +) | ||
12 | + | ||
13 | +//SyncBestshopOrder 同步 | ||
14 | +func SyncBestshopOrder(message *sarama.ConsumerMessage) error { | ||
15 | + logs.Info("Done Message claimed: timestamp = %v, topic = %s offset = %v value = %v \n", | ||
16 | + message.Timestamp, message.Topic, message.Offset, string(message.Value)) | ||
17 | + var ( | ||
18 | + cmd syncOrderCmd.CreateOrderFromBestshop | ||
19 | + err error | ||
20 | + ) | ||
21 | + err = json.Unmarshal(message.Value, &cmd) | ||
22 | + if err != nil { | ||
23 | + return fmt.Errorf("[SyncBestshopOrder] 解析kafka数据失败;%s", err) | ||
24 | + } | ||
25 | + if cmd.PartnerId <= 0 { | ||
26 | + logs.Info("[SyncBestshopOrder] PartnerId<=0 ,不处理消息") | ||
27 | + return nil | ||
28 | + } | ||
29 | + srv := syncOrderSrv.NewOrderInfoService(nil) | ||
30 | + err = srv.SyncOrderFromBestshop(cmd) | ||
31 | + return err | ||
32 | +} |
vendor/github.com/DataDog/zstd/.travis.yml
0 → 100644
1 | +dist: xenial | ||
2 | +language: go | ||
3 | + | ||
4 | +go: | ||
5 | + - 1.10.x | ||
6 | + - 1.11.x | ||
7 | + - 1.12.x | ||
8 | + | ||
9 | +os: | ||
10 | + - linux | ||
11 | + - osx | ||
12 | + | ||
13 | +matrix: | ||
14 | + include: | ||
15 | + name: "Go 1.11.x CentOS 32bits" | ||
16 | + language: go | ||
17 | + go: 1.11.x | ||
18 | + os: linux | ||
19 | + services: | ||
20 | + - docker | ||
21 | + script: | ||
22 | + # Please update Go version in travis_test_32 as needed | ||
23 | + - "docker run -i -v \"${PWD}:/zstd\" toopher/centos-i386:centos6 /bin/bash -c \"linux32 --32bit i386 /zstd/travis_test_32.sh\"" | ||
24 | + | ||
25 | +install: | ||
26 | + - "wget https://github.com/DataDog/zstd/files/2246767/mr.zip" | ||
27 | + - "unzip mr.zip" | ||
28 | +script: | ||
29 | + - "go build" | ||
30 | + - "PAYLOAD=`pwd`/mr go test -v" | ||
31 | + - "PAYLOAD=`pwd`/mr go test -bench ." |
vendor/github.com/DataDog/zstd/LICENSE
0 → 100644
1 | +Simplified BSD License | ||
2 | + | ||
3 | +Copyright (c) 2016, Datadog <info@datadoghq.com> | ||
4 | +All rights reserved. | ||
5 | + | ||
6 | +Redistribution and use in source and binary forms, with or without | ||
7 | +modification, are permitted provided that the following conditions are met: | ||
8 | + | ||
9 | + * Redistributions of source code must retain the above copyright notice, | ||
10 | + this list of conditions and the following disclaimer. | ||
11 | + * Redistributions in binary form must reproduce the above copyright notice, | ||
12 | + this list of conditions and the following disclaimer in the documentation | ||
13 | + and/or other materials provided with the distribution. | ||
14 | + * Neither the name of the copyright holder nor the names of its contributors | ||
15 | + may be used to endorse or promote products derived from this software | ||
16 | + without specific prior written permission. | ||
17 | + | ||
18 | +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
19 | +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
20 | +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
21 | +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE | ||
22 | +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
23 | +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
24 | +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
25 | +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
26 | +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
27 | +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
vendor/github.com/DataDog/zstd/README.md
0 → 100644
1 | +# Zstd Go Wrapper | ||
2 | + | ||
3 | +[C Zstd Homepage](https://github.com/Cyan4973/zstd) | ||
4 | + | ||
5 | +The current headers and C files are from *v1.3.8* (Commit | ||
6 | +[470344d](https://github.com/facebook/zstd/releases/tag/v1.3.8)). | ||
7 | + | ||
8 | +## Usage | ||
9 | + | ||
10 | +There are two main APIs: | ||
11 | + | ||
12 | +* simple Compress/Decompress | ||
13 | +* streaming API (io.Reader/io.Writer) | ||
14 | + | ||
15 | +The compress/decompress APIs mirror that of lz4, while the streaming API was | ||
16 | +designed to be a drop-in replacement for zlib. | ||
17 | + | ||
18 | +### Simple `Compress/Decompress` | ||
19 | + | ||
20 | + | ||
21 | +```go | ||
22 | +// Compress compresses the byte array given in src and writes it to dst. | ||
23 | +// If you already have a buffer allocated, you can pass it to prevent allocation | ||
24 | +// If not, you can pass nil as dst. | ||
25 | +// If the buffer is too small, it will be reallocated, resized, and returned bu the function | ||
26 | +// If dst is nil, this will allocate the worst case size (CompressBound(src)) | ||
27 | +Compress(dst, src []byte) ([]byte, error) | ||
28 | +``` | ||
29 | + | ||
30 | +```go | ||
31 | +// CompressLevel is the same as Compress but you can pass another compression level | ||
32 | +CompressLevel(dst, src []byte, level int) ([]byte, error) | ||
33 | +``` | ||
34 | + | ||
35 | +```go | ||
36 | +// Decompress will decompress your payload into dst. | ||
37 | +// If you already have a buffer allocated, you can pass it to prevent allocation | ||
38 | +// If not, you can pass nil as dst (allocates a 4*src size as default). | ||
39 | +// If the buffer is too small, it will retry 3 times by doubling the dst size | ||
40 | +// After max retries, it will switch to the slower stream API to be sure to be able | ||
41 | +// to decompress. Currently switches if compression ratio > 4*2**3=32. | ||
42 | +Decompress(dst, src []byte) ([]byte, error) | ||
43 | +``` | ||
44 | + | ||
45 | +### Stream API | ||
46 | + | ||
47 | +```go | ||
48 | +// NewWriter creates a new object that can optionally be initialized with | ||
49 | +// a precomputed dictionary. If dict is nil, compress without a dictionary. | ||
50 | +// The dictionary array should not be changed during the use of this object. | ||
51 | +// You MUST CALL Close() to write the last bytes of a zstd stream and free C objects. | ||
52 | +NewWriter(w io.Writer) *Writer | ||
53 | +NewWriterLevel(w io.Writer, level int) *Writer | ||
54 | +NewWriterLevelDict(w io.Writer, level int, dict []byte) *Writer | ||
55 | + | ||
56 | +// Write compresses the input data and write it to the underlying writer | ||
57 | +(w *Writer) Write(p []byte) (int, error) | ||
58 | + | ||
59 | +// Close flushes the buffer and frees C zstd objects | ||
60 | +(w *Writer) Close() error | ||
61 | +``` | ||
62 | + | ||
63 | +```go | ||
64 | +// NewReader returns a new io.ReadCloser that will decompress data from the | ||
65 | +// underlying reader. If a dictionary is provided to NewReaderDict, it must | ||
66 | +// not be modified until Close is called. It is the caller's responsibility | ||
67 | +// to call Close, which frees up C objects. | ||
68 | +NewReader(r io.Reader) io.ReadCloser | ||
69 | +NewReaderDict(r io.Reader, dict []byte) io.ReadCloser | ||
70 | +``` | ||
71 | + | ||
72 | +### Benchmarks (benchmarked with v0.5.0) | ||
73 | + | ||
74 | +The author of Zstd also wrote lz4. Zstd is intended to occupy a speed/ratio | ||
75 | +level similar to what zlib currently provides. In our tests, the can always | ||
76 | +be made to be better than zlib by chosing an appropriate level while still | ||
77 | +keeping compression and decompression time faster than zlib. | ||
78 | + | ||
79 | +You can run the benchmarks against your own payloads by using the Go benchmarks tool. | ||
80 | +Just export your payload filepath as the `PAYLOAD` environment variable and run the benchmarks: | ||
81 | + | ||
82 | +```go | ||
83 | +go test -bench . | ||
84 | +``` | ||
85 | + | ||
86 | +Compression of a 7Mb pdf zstd (this wrapper) vs [czlib](https://github.com/DataDog/czlib): | ||
87 | +``` | ||
88 | +BenchmarkCompression 5 221056624 ns/op 67.34 MB/s | ||
89 | +BenchmarkDecompression 100 18370416 ns/op 810.32 MB/s | ||
90 | + | ||
91 | +BenchmarkFzlibCompress 2 610156603 ns/op 24.40 MB/s | ||
92 | +BenchmarkFzlibDecompress 20 81195246 ns/op 183.33 MB/s | ||
93 | +``` | ||
94 | + | ||
95 | +Ratio is also better by a margin of ~20%. | ||
96 | +Compression speed is always better than zlib on all the payloads we tested; | ||
97 | +However, [czlib](https://github.com/DataDog/czlib) has optimisations that make it | ||
98 | +faster at decompressiong small payloads: | ||
99 | + | ||
100 | +``` | ||
101 | +Testing with size: 11... czlib: 8.97 MB/s, zstd: 3.26 MB/s | ||
102 | +Testing with size: 27... czlib: 23.3 MB/s, zstd: 8.22 MB/s | ||
103 | +Testing with size: 62... czlib: 31.6 MB/s, zstd: 19.49 MB/s | ||
104 | +Testing with size: 141... czlib: 74.54 MB/s, zstd: 42.55 MB/s | ||
105 | +Testing with size: 323... czlib: 155.14 MB/s, zstd: 99.39 MB/s | ||
106 | +Testing with size: 739... czlib: 235.9 MB/s, zstd: 216.45 MB/s | ||
107 | +Testing with size: 1689... czlib: 116.45 MB/s, zstd: 345.64 MB/s | ||
108 | +Testing with size: 3858... czlib: 176.39 MB/s, zstd: 617.56 MB/s | ||
109 | +Testing with size: 8811... czlib: 254.11 MB/s, zstd: 824.34 MB/s | ||
110 | +Testing with size: 20121... czlib: 197.43 MB/s, zstd: 1339.11 MB/s | ||
111 | +Testing with size: 45951... czlib: 201.62 MB/s, zstd: 1951.57 MB/s | ||
112 | +``` | ||
113 | + | ||
114 | +zstd starts to shine with payloads > 1KB | ||
115 | + | ||
116 | +### Stability - Current state: STABLE | ||
117 | + | ||
118 | +The C library seems to be pretty stable and according to the author has been tested and fuzzed. | ||
119 | + | ||
120 | +For the Go wrapper, the test cover most usual cases and we have succesfully tested it on all staging and prod data. |
vendor/github.com/DataDog/zstd/ZSTD_LICENSE
0 → 100644
1 | +BSD License | ||
2 | + | ||
3 | +For Zstandard software | ||
4 | + | ||
5 | +Copyright (c) 2016-present, Facebook, Inc. All rights reserved. | ||
6 | + | ||
7 | +Redistribution and use in source and binary forms, with or without modification, | ||
8 | +are permitted provided that the following conditions are met: | ||
9 | + | ||
10 | + * Redistributions of source code must retain the above copyright notice, this | ||
11 | + list of conditions and the following disclaimer. | ||
12 | + | ||
13 | + * Redistributions in binary form must reproduce the above copyright notice, | ||
14 | + this list of conditions and the following disclaimer in the documentation | ||
15 | + and/or other materials provided with the distribution. | ||
16 | + | ||
17 | + * Neither the name Facebook nor the names of its contributors may be used to | ||
18 | + endorse or promote products derived from this software without specific | ||
19 | + prior written permission. | ||
20 | + | ||
21 | +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
22 | +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
23 | +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
24 | +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR | ||
25 | +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
26 | +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
27 | +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | ||
28 | +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
29 | +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
30 | +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
vendor/github.com/DataDog/zstd/bitstream.h
0 → 100644
1 | +/* ****************************************************************** | ||
2 | + bitstream | ||
3 | + Part of FSE library | ||
4 | + Copyright (C) 2013-present, Yann Collet. | ||
5 | + | ||
6 | + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | ||
7 | + | ||
8 | + Redistribution and use in source and binary forms, with or without | ||
9 | + modification, are permitted provided that the following conditions are | ||
10 | + met: | ||
11 | + | ||
12 | + * Redistributions of source code must retain the above copyright | ||
13 | + notice, this list of conditions and the following disclaimer. | ||
14 | + * Redistributions in binary form must reproduce the above | ||
15 | + copyright notice, this list of conditions and the following disclaimer | ||
16 | + in the documentation and/or other materials provided with the | ||
17 | + distribution. | ||
18 | + | ||
19 | + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
20 | + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
21 | + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
22 | + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
23 | + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
24 | + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
25 | + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
26 | + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
27 | + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
28 | + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
29 | + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
30 | + | ||
31 | + You can contact the author at : | ||
32 | + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | ||
33 | +****************************************************************** */ | ||
34 | +#ifndef BITSTREAM_H_MODULE | ||
35 | +#define BITSTREAM_H_MODULE | ||
36 | + | ||
37 | +#if defined (__cplusplus) | ||
38 | +extern "C" { | ||
39 | +#endif | ||
40 | + | ||
41 | +/* | ||
42 | +* This API consists of small unitary functions, which must be inlined for best performance. | ||
43 | +* Since link-time-optimization is not available for all compilers, | ||
44 | +* these functions are defined into a .h to be included. | ||
45 | +*/ | ||
46 | + | ||
47 | +/*-**************************************** | ||
48 | +* Dependencies | ||
49 | +******************************************/ | ||
50 | +#include "mem.h" /* unaligned access routines */ | ||
51 | +#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ | ||
52 | +#include "error_private.h" /* error codes and messages */ | ||
53 | + | ||
54 | + | ||
55 | +/*========================================= | ||
56 | +* Target specific | ||
57 | +=========================================*/ | ||
58 | +#if defined(__BMI__) && defined(__GNUC__) | ||
59 | +# include <immintrin.h> /* support for bextr (experimental) */ | ||
60 | +#endif | ||
61 | + | ||
62 | +#define STREAM_ACCUMULATOR_MIN_32 25 | ||
63 | +#define STREAM_ACCUMULATOR_MIN_64 57 | ||
64 | +#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) | ||
65 | + | ||
66 | + | ||
67 | +/*-****************************************** | ||
68 | +* bitStream encoding API (write forward) | ||
69 | +********************************************/ | ||
70 | +/* bitStream can mix input from multiple sources. | ||
71 | + * A critical property of these streams is that they encode and decode in **reverse** direction. | ||
72 | + * So the first bit sequence you add will be the last to be read, like a LIFO stack. | ||
73 | + */ | ||
74 | +typedef struct { | ||
75 | + size_t bitContainer; | ||
76 | + unsigned bitPos; | ||
77 | + char* startPtr; | ||
78 | + char* ptr; | ||
79 | + char* endPtr; | ||
80 | +} BIT_CStream_t; | ||
81 | + | ||
82 | +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); | ||
83 | +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); | ||
84 | +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); | ||
85 | +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); | ||
86 | + | ||
87 | +/* Start with initCStream, providing the size of buffer to write into. | ||
88 | +* bitStream will never write outside of this buffer. | ||
89 | +* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. | ||
90 | +* | ||
91 | +* bits are first added to a local register. | ||
92 | +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. | ||
93 | +* Writing data into memory is an explicit operation, performed by the flushBits function. | ||
94 | +* Hence keep track how many bits are potentially stored into local register to avoid register overflow. | ||
95 | +* After a flushBits, a maximum of 7 bits might still be stored into local register. | ||
96 | +* | ||
97 | +* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. | ||
98 | +* | ||
99 | +* Last operation is to close the bitStream. | ||
100 | +* The function returns the final size of CStream in bytes. | ||
101 | +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) | ||
102 | +*/ | ||
103 | + | ||
104 | + | ||
105 | +/*-******************************************** | ||
106 | +* bitStream decoding API (read backward) | ||
107 | +**********************************************/ | ||
108 | +typedef struct { | ||
109 | + size_t bitContainer; | ||
110 | + unsigned bitsConsumed; | ||
111 | + const char* ptr; | ||
112 | + const char* start; | ||
113 | + const char* limitPtr; | ||
114 | +} BIT_DStream_t; | ||
115 | + | ||
116 | +typedef enum { BIT_DStream_unfinished = 0, | ||
117 | + BIT_DStream_endOfBuffer = 1, | ||
118 | + BIT_DStream_completed = 2, | ||
119 | + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ | ||
120 | + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ | ||
121 | + | ||
122 | +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); | ||
123 | +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); | ||
124 | +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); | ||
125 | +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); | ||
126 | + | ||
127 | + | ||
128 | +/* Start by invoking BIT_initDStream(). | ||
129 | +* A chunk of the bitStream is then stored into a local register. | ||
130 | +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). | ||
131 | +* You can then retrieve bitFields stored into the local register, **in reverse order**. | ||
132 | +* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. | ||
133 | +* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. | ||
134 | +* Otherwise, it can be less than that, so proceed accordingly. | ||
135 | +* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). | ||
136 | +*/ | ||
137 | + | ||
138 | + | ||
139 | +/*-**************************************** | ||
140 | +* unsafe API | ||
141 | +******************************************/ | ||
142 | +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); | ||
143 | +/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ | ||
144 | + | ||
145 | +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); | ||
146 | +/* unsafe version; does not check buffer overflow */ | ||
147 | + | ||
148 | +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); | ||
149 | +/* faster, but works only if nbBits >= 1 */ | ||
150 | + | ||
151 | + | ||
152 | + | ||
153 | +/*-************************************************************** | ||
154 | +* Internal functions | ||
155 | +****************************************************************/ | ||
156 | +MEM_STATIC unsigned BIT_highbit32 (U32 val) | ||
157 | +{ | ||
158 | + assert(val != 0); | ||
159 | + { | ||
160 | +# if defined(_MSC_VER) /* Visual */ | ||
161 | + unsigned long r=0; | ||
162 | + _BitScanReverse ( &r, val ); | ||
163 | + return (unsigned) r; | ||
164 | +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ | ||
165 | + return 31 - __builtin_clz (val); | ||
166 | +# else /* Software version */ | ||
167 | + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, | ||
168 | + 11, 14, 16, 18, 22, 25, 3, 30, | ||
169 | + 8, 12, 20, 28, 15, 17, 24, 7, | ||
170 | + 19, 27, 23, 6, 26, 5, 4, 31 }; | ||
171 | + U32 v = val; | ||
172 | + v |= v >> 1; | ||
173 | + v |= v >> 2; | ||
174 | + v |= v >> 4; | ||
175 | + v |= v >> 8; | ||
176 | + v |= v >> 16; | ||
177 | + return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; | ||
178 | +# endif | ||
179 | + } | ||
180 | +} | ||
181 | + | ||
182 | +/*===== Local Constants =====*/ | ||
183 | +static const unsigned BIT_mask[] = { | ||
184 | + 0, 1, 3, 7, 0xF, 0x1F, | ||
185 | + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, | ||
186 | + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, | ||
187 | + 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, | ||
188 | + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, | ||
189 | + 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ | ||
190 | +#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) | ||
191 | + | ||
192 | +/*-************************************************************** | ||
193 | +* bitStream encoding | ||
194 | +****************************************************************/ | ||
195 | +/*! BIT_initCStream() : | ||
196 | + * `dstCapacity` must be > sizeof(size_t) | ||
197 | + * @return : 0 if success, | ||
198 | + * otherwise an error code (can be tested using ERR_isError()) */ | ||
199 | +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, | ||
200 | + void* startPtr, size_t dstCapacity) | ||
201 | +{ | ||
202 | + bitC->bitContainer = 0; | ||
203 | + bitC->bitPos = 0; | ||
204 | + bitC->startPtr = (char*)startPtr; | ||
205 | + bitC->ptr = bitC->startPtr; | ||
206 | + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); | ||
207 | + if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); | ||
208 | + return 0; | ||
209 | +} | ||
210 | + | ||
211 | +/*! BIT_addBits() : | ||
212 | + * can add up to 31 bits into `bitC`. | ||
213 | + * Note : does not check for register overflow ! */ | ||
214 | +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, | ||
215 | + size_t value, unsigned nbBits) | ||
216 | +{ | ||
217 | + MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32); | ||
218 | + assert(nbBits < BIT_MASK_SIZE); | ||
219 | + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); | ||
220 | + bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; | ||
221 | + bitC->bitPos += nbBits; | ||
222 | +} | ||
223 | + | ||
224 | +/*! BIT_addBitsFast() : | ||
225 | + * works only if `value` is _clean_, | ||
226 | + * meaning all high bits above nbBits are 0 */ | ||
227 | +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, | ||
228 | + size_t value, unsigned nbBits) | ||
229 | +{ | ||
230 | + assert((value>>nbBits) == 0); | ||
231 | + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); | ||
232 | + bitC->bitContainer |= value << bitC->bitPos; | ||
233 | + bitC->bitPos += nbBits; | ||
234 | +} | ||
235 | + | ||
236 | +/*! BIT_flushBitsFast() : | ||
237 | + * assumption : bitContainer has not overflowed | ||
238 | + * unsafe version; does not check buffer overflow */ | ||
239 | +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) | ||
240 | +{ | ||
241 | + size_t const nbBytes = bitC->bitPos >> 3; | ||
242 | + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); | ||
243 | + MEM_writeLEST(bitC->ptr, bitC->bitContainer); | ||
244 | + bitC->ptr += nbBytes; | ||
245 | + assert(bitC->ptr <= bitC->endPtr); | ||
246 | + bitC->bitPos &= 7; | ||
247 | + bitC->bitContainer >>= nbBytes*8; | ||
248 | +} | ||
249 | + | ||
250 | +/*! BIT_flushBits() : | ||
251 | + * assumption : bitContainer has not overflowed | ||
252 | + * safe version; check for buffer overflow, and prevents it. | ||
253 | + * note : does not signal buffer overflow. | ||
254 | + * overflow will be revealed later on using BIT_closeCStream() */ | ||
255 | +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) | ||
256 | +{ | ||
257 | + size_t const nbBytes = bitC->bitPos >> 3; | ||
258 | + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); | ||
259 | + MEM_writeLEST(bitC->ptr, bitC->bitContainer); | ||
260 | + bitC->ptr += nbBytes; | ||
261 | + if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; | ||
262 | + bitC->bitPos &= 7; | ||
263 | + bitC->bitContainer >>= nbBytes*8; | ||
264 | +} | ||
265 | + | ||
266 | +/*! BIT_closeCStream() : | ||
267 | + * @return : size of CStream, in bytes, | ||
268 | + * or 0 if it could not fit into dstBuffer */ | ||
269 | +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) | ||
270 | +{ | ||
271 | + BIT_addBitsFast(bitC, 1, 1); /* endMark */ | ||
272 | + BIT_flushBits(bitC); | ||
273 | + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ | ||
274 | + return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); | ||
275 | +} | ||
276 | + | ||
277 | + | ||
278 | +/*-******************************************************** | ||
279 | +* bitStream decoding | ||
280 | +**********************************************************/ | ||
281 | +/*! BIT_initDStream() : | ||
282 | + * Initialize a BIT_DStream_t. | ||
283 | + * `bitD` : a pointer to an already allocated BIT_DStream_t structure. | ||
284 | + * `srcSize` must be the *exact* size of the bitStream, in bytes. | ||
285 | + * @return : size of stream (== srcSize), or an errorCode if a problem is detected | ||
286 | + */ | ||
287 | +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) | ||
288 | +{ | ||
289 | + if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } | ||
290 | + | ||
291 | + bitD->start = (const char*)srcBuffer; | ||
292 | + bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); | ||
293 | + | ||
294 | + if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ | ||
295 | + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); | ||
296 | + bitD->bitContainer = MEM_readLEST(bitD->ptr); | ||
297 | + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; | ||
298 | + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ | ||
299 | + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } | ||
300 | + } else { | ||
301 | + bitD->ptr = bitD->start; | ||
302 | + bitD->bitContainer = *(const BYTE*)(bitD->start); | ||
303 | + switch(srcSize) | ||
304 | + { | ||
305 | + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); | ||
306 | + /* fall-through */ | ||
307 | + | ||
308 | + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); | ||
309 | + /* fall-through */ | ||
310 | + | ||
311 | + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); | ||
312 | + /* fall-through */ | ||
313 | + | ||
314 | + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; | ||
315 | + /* fall-through */ | ||
316 | + | ||
317 | + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; | ||
318 | + /* fall-through */ | ||
319 | + | ||
320 | + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; | ||
321 | + /* fall-through */ | ||
322 | + | ||
323 | + default: break; | ||
324 | + } | ||
325 | + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; | ||
326 | + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; | ||
327 | + if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ | ||
328 | + } | ||
329 | + bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; | ||
330 | + } | ||
331 | + | ||
332 | + return srcSize; | ||
333 | +} | ||
334 | + | ||
335 | +MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) | ||
336 | +{ | ||
337 | + return bitContainer >> start; | ||
338 | +} | ||
339 | + | ||
340 | +MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) | ||
341 | +{ | ||
342 | + U32 const regMask = sizeof(bitContainer)*8 - 1; | ||
343 | + /* if start > regMask, bitstream is corrupted, and result is undefined */ | ||
344 | + assert(nbBits < BIT_MASK_SIZE); | ||
345 | + return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; | ||
346 | +} | ||
347 | + | ||
348 | +MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) | ||
349 | +{ | ||
350 | + assert(nbBits < BIT_MASK_SIZE); | ||
351 | + return bitContainer & BIT_mask[nbBits]; | ||
352 | +} | ||
353 | + | ||
354 | +/*! BIT_lookBits() : | ||
355 | + * Provides next n bits from local register. | ||
356 | + * local register is not modified. | ||
357 | + * On 32-bits, maxNbBits==24. | ||
358 | + * On 64-bits, maxNbBits==56. | ||
359 | + * @return : value extracted */ | ||
360 | +MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) | ||
361 | +{ | ||
362 | + /* arbitrate between double-shift and shift+mask */ | ||
363 | +#if 1 | ||
364 | + /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, | ||
365 | + * bitstream is likely corrupted, and result is undefined */ | ||
366 | + return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); | ||
367 | +#else | ||
368 | + /* this code path is slower on my os-x laptop */ | ||
369 | + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; | ||
370 | + return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); | ||
371 | +#endif | ||
372 | +} | ||
373 | + | ||
374 | +/*! BIT_lookBitsFast() : | ||
375 | + * unsafe version; only works if nbBits >= 1 */ | ||
376 | +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) | ||
377 | +{ | ||
378 | + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; | ||
379 | + assert(nbBits >= 1); | ||
380 | + return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); | ||
381 | +} | ||
382 | + | ||
383 | +MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) | ||
384 | +{ | ||
385 | + bitD->bitsConsumed += nbBits; | ||
386 | +} | ||
387 | + | ||
388 | +/*! BIT_readBits() : | ||
389 | + * Read (consume) next n bits from local register and update. | ||
390 | + * Pay attention to not read more than nbBits contained into local register. | ||
391 | + * @return : extracted value. */ | ||
392 | +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) | ||
393 | +{ | ||
394 | + size_t const value = BIT_lookBits(bitD, nbBits); | ||
395 | + BIT_skipBits(bitD, nbBits); | ||
396 | + return value; | ||
397 | +} | ||
398 | + | ||
399 | +/*! BIT_readBitsFast() : | ||
400 | + * unsafe version; only works only if nbBits >= 1 */ | ||
401 | +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) | ||
402 | +{ | ||
403 | + size_t const value = BIT_lookBitsFast(bitD, nbBits); | ||
404 | + assert(nbBits >= 1); | ||
405 | + BIT_skipBits(bitD, nbBits); | ||
406 | + return value; | ||
407 | +} | ||
408 | + | ||
409 | +/*! BIT_reloadDStream() : | ||
410 | + * Refill `bitD` from buffer previously set in BIT_initDStream() . | ||
411 | + * This function is safe, it guarantees it will not read beyond src buffer. | ||
412 | + * @return : status of `BIT_DStream_t` internal register. | ||
413 | + * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ | ||
414 | +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) | ||
415 | +{ | ||
416 | + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ | ||
417 | + return BIT_DStream_overflow; | ||
418 | + | ||
419 | + if (bitD->ptr >= bitD->limitPtr) { | ||
420 | + bitD->ptr -= bitD->bitsConsumed >> 3; | ||
421 | + bitD->bitsConsumed &= 7; | ||
422 | + bitD->bitContainer = MEM_readLEST(bitD->ptr); | ||
423 | + return BIT_DStream_unfinished; | ||
424 | + } | ||
425 | + if (bitD->ptr == bitD->start) { | ||
426 | + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; | ||
427 | + return BIT_DStream_completed; | ||
428 | + } | ||
429 | + /* start < ptr < limitPtr */ | ||
430 | + { U32 nbBytes = bitD->bitsConsumed >> 3; | ||
431 | + BIT_DStream_status result = BIT_DStream_unfinished; | ||
432 | + if (bitD->ptr - nbBytes < bitD->start) { | ||
433 | + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ | ||
434 | + result = BIT_DStream_endOfBuffer; | ||
435 | + } | ||
436 | + bitD->ptr -= nbBytes; | ||
437 | + bitD->bitsConsumed -= nbBytes*8; | ||
438 | + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ | ||
439 | + return result; | ||
440 | + } | ||
441 | +} | ||
442 | + | ||
443 | +/*! BIT_endOfDStream() : | ||
444 | + * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). | ||
445 | + */ | ||
446 | +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) | ||
447 | +{ | ||
448 | + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); | ||
449 | +} | ||
450 | + | ||
451 | +#if defined (__cplusplus) | ||
452 | +} | ||
453 | +#endif | ||
454 | + | ||
455 | +#endif /* BITSTREAM_H_MODULE */ |
vendor/github.com/DataDog/zstd/compiler.h
0 → 100644
1 | +/* | ||
2 | + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | ||
3 | + * All rights reserved. | ||
4 | + * | ||
5 | + * This source code is licensed under both the BSD-style license (found in the | ||
6 | + * LICENSE file in the root directory of this source tree) and the GPLv2 (found | ||
7 | + * in the COPYING file in the root directory of this source tree). | ||
8 | + * You may select, at your option, one of the above-listed licenses. | ||
9 | + */ | ||
10 | + | ||
11 | +#ifndef ZSTD_COMPILER_H | ||
12 | +#define ZSTD_COMPILER_H | ||
13 | + | ||
14 | +/*-******************************************************* | ||
15 | +* Compiler specifics | ||
16 | +*********************************************************/ | ||
17 | +/* force inlining */ | ||
18 | + | ||
19 | +#if !defined(ZSTD_NO_INLINE) | ||
20 | +#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ | ||
21 | +# define INLINE_KEYWORD inline | ||
22 | +#else | ||
23 | +# define INLINE_KEYWORD | ||
24 | +#endif | ||
25 | + | ||
26 | +#if defined(__GNUC__) | ||
27 | +# define FORCE_INLINE_ATTR __attribute__((always_inline)) | ||
28 | +#elif defined(_MSC_VER) | ||
29 | +# define FORCE_INLINE_ATTR __forceinline | ||
30 | +#else | ||
31 | +# define FORCE_INLINE_ATTR | ||
32 | +#endif | ||
33 | + | ||
34 | +#else | ||
35 | + | ||
36 | +#define INLINE_KEYWORD | ||
37 | +#define FORCE_INLINE_ATTR | ||
38 | + | ||
39 | +#endif | ||
40 | + | ||
41 | +/** | ||
42 | + * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant | ||
43 | + * parameters. They must be inlined for the compiler to elimininate the constant | ||
44 | + * branches. | ||
45 | + */ | ||
46 | +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR | ||
47 | +/** | ||
48 | + * HINT_INLINE is used to help the compiler generate better code. It is *not* | ||
49 | + * used for "templates", so it can be tweaked based on the compilers | ||
50 | + * performance. | ||
51 | + * | ||
52 | + * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the | ||
53 | + * always_inline attribute. | ||
54 | + * | ||
55 | + * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline | ||
56 | + * attribute. | ||
57 | + */ | ||
58 | +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 | ||
59 | +# define HINT_INLINE static INLINE_KEYWORD | ||
60 | +#else | ||
61 | +# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR | ||
62 | +#endif | ||
63 | + | ||
64 | +/* force no inlining */ | ||
65 | +#ifdef _MSC_VER | ||
66 | +# define FORCE_NOINLINE static __declspec(noinline) | ||
67 | +#else | ||
68 | +# ifdef __GNUC__ | ||
69 | +# define FORCE_NOINLINE static __attribute__((__noinline__)) | ||
70 | +# else | ||
71 | +# define FORCE_NOINLINE static | ||
72 | +# endif | ||
73 | +#endif | ||
74 | + | ||
75 | +/* target attribute */ | ||
76 | +#ifndef __has_attribute | ||
77 | + #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ | ||
78 | +#endif | ||
79 | +#if defined(__GNUC__) | ||
80 | +# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) | ||
81 | +#else | ||
82 | +# define TARGET_ATTRIBUTE(target) | ||
83 | +#endif | ||
84 | + | ||
85 | +/* Enable runtime BMI2 dispatch based on the CPU. | ||
86 | + * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. | ||
87 | + */ | ||
88 | +#ifndef DYNAMIC_BMI2 | ||
89 | + #if ((defined(__clang__) && __has_attribute(__target__)) \ | ||
90 | + || (defined(__GNUC__) \ | ||
91 | + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ | ||
92 | + && (defined(__x86_64__) || defined(_M_X86)) \ | ||
93 | + && !defined(__BMI2__) | ||
94 | + # define DYNAMIC_BMI2 1 | ||
95 | + #else | ||
96 | + # define DYNAMIC_BMI2 0 | ||
97 | + #endif | ||
98 | +#endif | ||
99 | + | ||
100 | +/* prefetch | ||
101 | + * can be disabled, by declaring NO_PREFETCH build macro */ | ||
102 | +#if defined(NO_PREFETCH) | ||
103 | +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ | ||
104 | +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ | ||
105 | +#else | ||
106 | +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ | ||
107 | +# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ | ||
108 | +# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) | ||
109 | +# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) | ||
110 | +# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) | ||
111 | +# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) | ||
112 | +# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) | ||
113 | +# else | ||
114 | +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ | ||
115 | +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ | ||
116 | +# endif | ||
117 | +#endif /* NO_PREFETCH */ | ||
118 | + | ||
119 | +#define CACHELINE_SIZE 64 | ||
120 | + | ||
121 | +#define PREFETCH_AREA(p, s) { \ | ||
122 | + const char* const _ptr = (const char*)(p); \ | ||
123 | + size_t const _size = (size_t)(s); \ | ||
124 | + size_t _pos; \ | ||
125 | + for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ | ||
126 | + PREFETCH_L2(_ptr + _pos); \ | ||
127 | + } \ | ||
128 | +} | ||
129 | + | ||
130 | +/* disable warnings */ | ||
131 | +#ifdef _MSC_VER /* Visual Studio */ | ||
132 | +# include <intrin.h> /* For Visual 2005 */ | ||
133 | +# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ | ||
134 | +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ | ||
135 | +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ | ||
136 | +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ | ||
137 | +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ | ||
138 | +#endif | ||
139 | + | ||
140 | +#endif /* ZSTD_COMPILER_H */ |
vendor/github.com/DataDog/zstd/cover.c
0 → 100644
1 | +/* | ||
2 | + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | ||
3 | + * All rights reserved. | ||
4 | + * | ||
5 | + * This source code is licensed under both the BSD-style license (found in the | ||
6 | + * LICENSE file in the root directory of this source tree) and the GPLv2 (found | ||
7 | + * in the COPYING file in the root directory of this source tree). | ||
8 | + * You may select, at your option, one of the above-listed licenses. | ||
9 | + */ | ||
10 | + | ||
11 | +/* ***************************************************************************** | ||
12 | + * Constructs a dictionary using a heuristic based on the following paper: | ||
13 | + * | ||
14 | + * Liao, Petri, Moffat, Wirth | ||
15 | + * Effective Construction of Relative Lempel-Ziv Dictionaries | ||
16 | + * Published in WWW 2016. | ||
17 | + * | ||
18 | + * Adapted from code originally written by @ot (Giuseppe Ottaviano). | ||
19 | + ******************************************************************************/ | ||
20 | + | ||
21 | +/*-************************************* | ||
22 | +* Dependencies | ||
23 | +***************************************/ | ||
24 | +#include <stdio.h> /* fprintf */ | ||
25 | +#include <stdlib.h> /* malloc, free, qsort */ | ||
26 | +#include <string.h> /* memset */ | ||
27 | +#include <time.h> /* clock */ | ||
28 | + | ||
29 | +#include "mem.h" /* read */ | ||
30 | +#include "pool.h" | ||
31 | +#include "threading.h" | ||
32 | +#include "cover.h" | ||
33 | +#include "zstd_internal.h" /* includes zstd.h */ | ||
34 | +#ifndef ZDICT_STATIC_LINKING_ONLY | ||
35 | +#define ZDICT_STATIC_LINKING_ONLY | ||
36 | +#endif | ||
37 | +#include "zdict.h" | ||
38 | + | ||
39 | +/*-************************************* | ||
40 | +* Constants | ||
41 | +***************************************/ | ||
42 | +#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) | ||
43 | +#define DEFAULT_SPLITPOINT 1.0 | ||
44 | + | ||
45 | +/*-************************************* | ||
46 | +* Console display | ||
47 | +***************************************/ | ||
48 | +static int g_displayLevel = 2; | ||
49 | +#define DISPLAY(...) \ | ||
50 | + { \ | ||
51 | + fprintf(stderr, __VA_ARGS__); \ | ||
52 | + fflush(stderr); \ | ||
53 | + } | ||
54 | +#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ | ||
55 | + if (displayLevel >= l) { \ | ||
56 | + DISPLAY(__VA_ARGS__); \ | ||
57 | + } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ | ||
58 | +#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) | ||
59 | + | ||
60 | +#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ | ||
61 | + if (displayLevel >= l) { \ | ||
62 | + if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \ | ||
63 | + g_time = clock(); \ | ||
64 | + DISPLAY(__VA_ARGS__); \ | ||
65 | + } \ | ||
66 | + } | ||
67 | +#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) | ||
68 | +static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; | ||
69 | +static clock_t g_time = 0; | ||
70 | + | ||
71 | +/*-************************************* | ||
72 | +* Hash table | ||
73 | +*************************************** | ||
74 | +* A small specialized hash map for storing activeDmers. | ||
75 | +* The map does not resize, so if it becomes full it will loop forever. | ||
76 | +* Thus, the map must be large enough to store every value. | ||
77 | +* The map implements linear probing and keeps its load less than 0.5. | ||
78 | +*/ | ||
79 | + | ||
80 | +#define MAP_EMPTY_VALUE ((U32)-1) | ||
81 | +typedef struct COVER_map_pair_t_s { | ||
82 | + U32 key; | ||
83 | + U32 value; | ||
84 | +} COVER_map_pair_t; | ||
85 | + | ||
86 | +typedef struct COVER_map_s { | ||
87 | + COVER_map_pair_t *data; | ||
88 | + U32 sizeLog; | ||
89 | + U32 size; | ||
90 | + U32 sizeMask; | ||
91 | +} COVER_map_t; | ||
92 | + | ||
93 | +/** | ||
94 | + * Clear the map. | ||
95 | + */ | ||
96 | +static void COVER_map_clear(COVER_map_t *map) { | ||
97 | + memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t)); | ||
98 | +} | ||
99 | + | ||
100 | +/** | ||
101 | + * Initializes a map of the given size. | ||
102 | + * Returns 1 on success and 0 on failure. | ||
103 | + * The map must be destroyed with COVER_map_destroy(). | ||
104 | + * The map is only guaranteed to be large enough to hold size elements. | ||
105 | + */ | ||
106 | +static int COVER_map_init(COVER_map_t *map, U32 size) { | ||
107 | + map->sizeLog = ZSTD_highbit32(size) + 2; | ||
108 | + map->size = (U32)1 << map->sizeLog; | ||
109 | + map->sizeMask = map->size - 1; | ||
110 | + map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t)); | ||
111 | + if (!map->data) { | ||
112 | + map->sizeLog = 0; | ||
113 | + map->size = 0; | ||
114 | + return 0; | ||
115 | + } | ||
116 | + COVER_map_clear(map); | ||
117 | + return 1; | ||
118 | +} | ||
119 | + | ||
120 | +/** | ||
121 | + * Internal hash function | ||
122 | + */ | ||
123 | +static const U32 prime4bytes = 2654435761U; | ||
124 | +static U32 COVER_map_hash(COVER_map_t *map, U32 key) { | ||
125 | + return (key * prime4bytes) >> (32 - map->sizeLog); | ||
126 | +} | ||
127 | + | ||
128 | +/** | ||
129 | + * Helper function that returns the index that a key should be placed into. | ||
130 | + */ | ||
131 | +static U32 COVER_map_index(COVER_map_t *map, U32 key) { | ||
132 | + const U32 hash = COVER_map_hash(map, key); | ||
133 | + U32 i; | ||
134 | + for (i = hash;; i = (i + 1) & map->sizeMask) { | ||
135 | + COVER_map_pair_t *pos = &map->data[i]; | ||
136 | + if (pos->value == MAP_EMPTY_VALUE) { | ||
137 | + return i; | ||
138 | + } | ||
139 | + if (pos->key == key) { | ||
140 | + return i; | ||
141 | + } | ||
142 | + } | ||
143 | +} | ||
144 | + | ||
145 | +/** | ||
146 | + * Returns the pointer to the value for key. | ||
147 | + * If key is not in the map, it is inserted and the value is set to 0. | ||
148 | + * The map must not be full. | ||
149 | + */ | ||
150 | +static U32 *COVER_map_at(COVER_map_t *map, U32 key) { | ||
151 | + COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)]; | ||
152 | + if (pos->value == MAP_EMPTY_VALUE) { | ||
153 | + pos->key = key; | ||
154 | + pos->value = 0; | ||
155 | + } | ||
156 | + return &pos->value; | ||
157 | +} | ||
158 | + | ||
159 | +/** | ||
160 | + * Deletes key from the map if present. | ||
161 | + */ | ||
162 | +static void COVER_map_remove(COVER_map_t *map, U32 key) { | ||
163 | + U32 i = COVER_map_index(map, key); | ||
164 | + COVER_map_pair_t *del = &map->data[i]; | ||
165 | + U32 shift = 1; | ||
166 | + if (del->value == MAP_EMPTY_VALUE) { | ||
167 | + return; | ||
168 | + } | ||
169 | + for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) { | ||
170 | + COVER_map_pair_t *const pos = &map->data[i]; | ||
171 | + /* If the position is empty we are done */ | ||
172 | + if (pos->value == MAP_EMPTY_VALUE) { | ||
173 | + del->value = MAP_EMPTY_VALUE; | ||
174 | + return; | ||
175 | + } | ||
176 | + /* If pos can be moved to del do so */ | ||
177 | + if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) { | ||
178 | + del->key = pos->key; | ||
179 | + del->value = pos->value; | ||
180 | + del = pos; | ||
181 | + shift = 1; | ||
182 | + } else { | ||
183 | + ++shift; | ||
184 | + } | ||
185 | + } | ||
186 | +} | ||
187 | + | ||
188 | +/** | ||
189 | + * Destroys a map that is inited with COVER_map_init(). | ||
190 | + */ | ||
191 | +static void COVER_map_destroy(COVER_map_t *map) { | ||
192 | + if (map->data) { | ||
193 | + free(map->data); | ||
194 | + } | ||
195 | + map->data = NULL; | ||
196 | + map->size = 0; | ||
197 | +} | ||
198 | + | ||
199 | +/*-************************************* | ||
200 | +* Context | ||
201 | +***************************************/ | ||
202 | + | ||
203 | +typedef struct { | ||
204 | + const BYTE *samples; | ||
205 | + size_t *offsets; | ||
206 | + const size_t *samplesSizes; | ||
207 | + size_t nbSamples; | ||
208 | + size_t nbTrainSamples; | ||
209 | + size_t nbTestSamples; | ||
210 | + U32 *suffix; | ||
211 | + size_t suffixSize; | ||
212 | + U32 *freqs; | ||
213 | + U32 *dmerAt; | ||
214 | + unsigned d; | ||
215 | +} COVER_ctx_t; | ||
216 | + | ||
217 | +/* We need a global context for qsort... */ | ||
218 | +static COVER_ctx_t *g_ctx = NULL; | ||
219 | + | ||
220 | +/*-************************************* | ||
221 | +* Helper functions | ||
222 | +***************************************/ | ||
223 | + | ||
224 | +/** | ||
225 | + * Returns the sum of the sample sizes. | ||
226 | + */ | ||
227 | +size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) { | ||
228 | + size_t sum = 0; | ||
229 | + unsigned i; | ||
230 | + for (i = 0; i < nbSamples; ++i) { | ||
231 | + sum += samplesSizes[i]; | ||
232 | + } | ||
233 | + return sum; | ||
234 | +} | ||
235 | + | ||
236 | +/** | ||
237 | + * Returns -1 if the dmer at lp is less than the dmer at rp. | ||
238 | + * Return 0 if the dmers at lp and rp are equal. | ||
239 | + * Returns 1 if the dmer at lp is greater than the dmer at rp. | ||
240 | + */ | ||
241 | +static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) { | ||
242 | + U32 const lhs = *(U32 const *)lp; | ||
243 | + U32 const rhs = *(U32 const *)rp; | ||
244 | + return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d); | ||
245 | +} | ||
246 | +/** | ||
247 | + * Faster version for d <= 8. | ||
248 | + */ | ||
249 | +static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) { | ||
250 | + U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1); | ||
251 | + U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask; | ||
252 | + U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask; | ||
253 | + if (lhs < rhs) { | ||
254 | + return -1; | ||
255 | + } | ||
256 | + return (lhs > rhs); | ||
257 | +} | ||
258 | + | ||
259 | +/** | ||
260 | + * Same as COVER_cmp() except ties are broken by pointer value | ||
261 | + * NOTE: g_ctx must be set to call this function. A global is required because | ||
262 | + * qsort doesn't take an opaque pointer. | ||
263 | + */ | ||
264 | +static int COVER_strict_cmp(const void *lp, const void *rp) { | ||
265 | + int result = COVER_cmp(g_ctx, lp, rp); | ||
266 | + if (result == 0) { | ||
267 | + result = lp < rp ? -1 : 1; | ||
268 | + } | ||
269 | + return result; | ||
270 | +} | ||
271 | +/** | ||
272 | + * Faster version for d <= 8. | ||
273 | + */ | ||
274 | +static int COVER_strict_cmp8(const void *lp, const void *rp) { | ||
275 | + int result = COVER_cmp8(g_ctx, lp, rp); | ||
276 | + if (result == 0) { | ||
277 | + result = lp < rp ? -1 : 1; | ||
278 | + } | ||
279 | + return result; | ||
280 | +} | ||
281 | + | ||
282 | +/** | ||
283 | + * Returns the first pointer in [first, last) whose element does not compare | ||
284 | + * less than value. If no such element exists it returns last. | ||
285 | + */ | ||
286 | +static const size_t *COVER_lower_bound(const size_t *first, const size_t *last, | ||
287 | + size_t value) { | ||
288 | + size_t count = last - first; | ||
289 | + while (count != 0) { | ||
290 | + size_t step = count / 2; | ||
291 | + const size_t *ptr = first; | ||
292 | + ptr += step; | ||
293 | + if (*ptr < value) { | ||
294 | + first = ++ptr; | ||
295 | + count -= step + 1; | ||
296 | + } else { | ||
297 | + count = step; | ||
298 | + } | ||
299 | + } | ||
300 | + return first; | ||
301 | +} | ||
302 | + | ||
303 | +/** | ||
304 | + * Generic groupBy function. | ||
305 | + * Groups an array sorted by cmp into groups with equivalent values. | ||
306 | + * Calls grp for each group. | ||
307 | + */ | ||
308 | +static void | ||
309 | +COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx, | ||
310 | + int (*cmp)(COVER_ctx_t *, const void *, const void *), | ||
311 | + void (*grp)(COVER_ctx_t *, const void *, const void *)) { | ||
312 | + const BYTE *ptr = (const BYTE *)data; | ||
313 | + size_t num = 0; | ||
314 | + while (num < count) { | ||
315 | + const BYTE *grpEnd = ptr + size; | ||
316 | + ++num; | ||
317 | + while (num < count && cmp(ctx, ptr, grpEnd) == 0) { | ||
318 | + grpEnd += size; | ||
319 | + ++num; | ||
320 | + } | ||
321 | + grp(ctx, ptr, grpEnd); | ||
322 | + ptr = grpEnd; | ||
323 | + } | ||
324 | +} | ||
325 | + | ||
326 | +/*-************************************* | ||
327 | +* Cover functions | ||
328 | +***************************************/ | ||
329 | + | ||
330 | +/** | ||
331 | + * Called on each group of positions with the same dmer. | ||
332 | + * Counts the frequency of each dmer and saves it in the suffix array. | ||
333 | + * Fills `ctx->dmerAt`. | ||
334 | + */ | ||
335 | +static void COVER_group(COVER_ctx_t *ctx, const void *group, | ||
336 | + const void *groupEnd) { | ||
337 | + /* The group consists of all the positions with the same first d bytes. */ | ||
338 | + const U32 *grpPtr = (const U32 *)group; | ||
339 | + const U32 *grpEnd = (const U32 *)groupEnd; | ||
340 | + /* The dmerId is how we will reference this dmer. | ||
341 | + * This allows us to map the whole dmer space to a much smaller space, the | ||
342 | + * size of the suffix array. | ||
343 | + */ | ||
344 | + const U32 dmerId = (U32)(grpPtr - ctx->suffix); | ||
345 | + /* Count the number of samples this dmer shows up in */ | ||
346 | + U32 freq = 0; | ||
347 | + /* Details */ | ||
348 | + const size_t *curOffsetPtr = ctx->offsets; | ||
349 | + const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples; | ||
350 | + /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a | ||
351 | + * different sample than the last. | ||
352 | + */ | ||
353 | + size_t curSampleEnd = ctx->offsets[0]; | ||
354 | + for (; grpPtr != grpEnd; ++grpPtr) { | ||
355 | + /* Save the dmerId for this position so we can get back to it. */ | ||
356 | + ctx->dmerAt[*grpPtr] = dmerId; | ||
357 | + /* Dictionaries only help for the first reference to the dmer. | ||
358 | + * After that zstd can reference the match from the previous reference. | ||
359 | + * So only count each dmer once for each sample it is in. | ||
360 | + */ | ||
361 | + if (*grpPtr < curSampleEnd) { | ||
362 | + continue; | ||
363 | + } | ||
364 | + freq += 1; | ||
365 | + /* Binary search to find the end of the sample *grpPtr is in. | ||
366 | + * In the common case that grpPtr + 1 == grpEnd we can skip the binary | ||
367 | + * search because the loop is over. | ||
368 | + */ | ||
369 | + if (grpPtr + 1 != grpEnd) { | ||
370 | + const size_t *sampleEndPtr = | ||
371 | + COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr); | ||
372 | + curSampleEnd = *sampleEndPtr; | ||
373 | + curOffsetPtr = sampleEndPtr + 1; | ||
374 | + } | ||
375 | + } | ||
376 | + /* At this point we are never going to look at this segment of the suffix | ||
377 | + * array again. We take advantage of this fact to save memory. | ||
378 | + * We store the frequency of the dmer in the first position of the group, | ||
379 | + * which is dmerId. | ||
380 | + */ | ||
381 | + ctx->suffix[dmerId] = freq; | ||
382 | +} | ||
383 | + | ||
384 | + | ||
385 | +/** | ||
386 | + * Selects the best segment in an epoch. | ||
387 | + * Segments of are scored according to the function: | ||
388 | + * | ||
389 | + * Let F(d) be the frequency of dmer d. | ||
390 | + * Let S_i be the dmer at position i of segment S which has length k. | ||
391 | + * | ||
392 | + * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) | ||
393 | + * | ||
394 | + * Once the dmer d is in the dictionay we set F(d) = 0. | ||
395 | + */ | ||
396 | +static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs, | ||
397 | + COVER_map_t *activeDmers, U32 begin, | ||
398 | + U32 end, | ||
399 | + ZDICT_cover_params_t parameters) { | ||
400 | + /* Constants */ | ||
401 | + const U32 k = parameters.k; | ||
402 | + const U32 d = parameters.d; | ||
403 | + const U32 dmersInK = k - d + 1; | ||
404 | + /* Try each segment (activeSegment) and save the best (bestSegment) */ | ||
405 | + COVER_segment_t bestSegment = {0, 0, 0}; | ||
406 | + COVER_segment_t activeSegment; | ||
407 | + /* Reset the activeDmers in the segment */ | ||
408 | + COVER_map_clear(activeDmers); | ||
409 | + /* The activeSegment starts at the beginning of the epoch. */ | ||
410 | + activeSegment.begin = begin; | ||
411 | + activeSegment.end = begin; | ||
412 | + activeSegment.score = 0; | ||
413 | + /* Slide the activeSegment through the whole epoch. | ||
414 | + * Save the best segment in bestSegment. | ||
415 | + */ | ||
416 | + while (activeSegment.end < end) { | ||
417 | + /* The dmerId for the dmer at the next position */ | ||
418 | + U32 newDmer = ctx->dmerAt[activeSegment.end]; | ||
419 | + /* The entry in activeDmers for this dmerId */ | ||
420 | + U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer); | ||
421 | + /* If the dmer isn't already present in the segment add its score. */ | ||
422 | + if (*newDmerOcc == 0) { | ||
423 | + /* The paper suggest using the L-0.5 norm, but experiments show that it | ||
424 | + * doesn't help. | ||
425 | + */ | ||
426 | + activeSegment.score += freqs[newDmer]; | ||
427 | + } | ||
428 | + /* Add the dmer to the segment */ | ||
429 | + activeSegment.end += 1; | ||
430 | + *newDmerOcc += 1; | ||
431 | + | ||
432 | + /* If the window is now too large, drop the first position */ | ||
433 | + if (activeSegment.end - activeSegment.begin == dmersInK + 1) { | ||
434 | + U32 delDmer = ctx->dmerAt[activeSegment.begin]; | ||
435 | + U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer); | ||
436 | + activeSegment.begin += 1; | ||
437 | + *delDmerOcc -= 1; | ||
438 | + /* If this is the last occurence of the dmer, subtract its score */ | ||
439 | + if (*delDmerOcc == 0) { | ||
440 | + COVER_map_remove(activeDmers, delDmer); | ||
441 | + activeSegment.score -= freqs[delDmer]; | ||
442 | + } | ||
443 | + } | ||
444 | + | ||
445 | + /* If this segment is the best so far save it */ | ||
446 | + if (activeSegment.score > bestSegment.score) { | ||
447 | + bestSegment = activeSegment; | ||
448 | + } | ||
449 | + } | ||
450 | + { | ||
451 | + /* Trim off the zero frequency head and tail from the segment. */ | ||
452 | + U32 newBegin = bestSegment.end; | ||
453 | + U32 newEnd = bestSegment.begin; | ||
454 | + U32 pos; | ||
455 | + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { | ||
456 | + U32 freq = freqs[ctx->dmerAt[pos]]; | ||
457 | + if (freq != 0) { | ||
458 | + newBegin = MIN(newBegin, pos); | ||
459 | + newEnd = pos + 1; | ||
460 | + } | ||
461 | + } | ||
462 | + bestSegment.begin = newBegin; | ||
463 | + bestSegment.end = newEnd; | ||
464 | + } | ||
465 | + { | ||
466 | + /* Zero out the frequency of each dmer covered by the chosen segment. */ | ||
467 | + U32 pos; | ||
468 | + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { | ||
469 | + freqs[ctx->dmerAt[pos]] = 0; | ||
470 | + } | ||
471 | + } | ||
472 | + return bestSegment; | ||
473 | +} | ||
474 | + | ||
475 | +/** | ||
476 | + * Check the validity of the parameters. | ||
477 | + * Returns non-zero if the parameters are valid and 0 otherwise. | ||
478 | + */ | ||
479 | +static int COVER_checkParameters(ZDICT_cover_params_t parameters, | ||
480 | + size_t maxDictSize) { | ||
481 | + /* k and d are required parameters */ | ||
482 | + if (parameters.d == 0 || parameters.k == 0) { | ||
483 | + return 0; | ||
484 | + } | ||
485 | + /* k <= maxDictSize */ | ||
486 | + if (parameters.k > maxDictSize) { | ||
487 | + return 0; | ||
488 | + } | ||
489 | + /* d <= k */ | ||
490 | + if (parameters.d > parameters.k) { | ||
491 | + return 0; | ||
492 | + } | ||
493 | + /* 0 < splitPoint <= 1 */ | ||
494 | + if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){ | ||
495 | + return 0; | ||
496 | + } | ||
497 | + return 1; | ||
498 | +} | ||
499 | + | ||
500 | +/** | ||
501 | + * Clean up a context initialized with `COVER_ctx_init()`. | ||
502 | + */ | ||
503 | +static void COVER_ctx_destroy(COVER_ctx_t *ctx) { | ||
504 | + if (!ctx) { | ||
505 | + return; | ||
506 | + } | ||
507 | + if (ctx->suffix) { | ||
508 | + free(ctx->suffix); | ||
509 | + ctx->suffix = NULL; | ||
510 | + } | ||
511 | + if (ctx->freqs) { | ||
512 | + free(ctx->freqs); | ||
513 | + ctx->freqs = NULL; | ||
514 | + } | ||
515 | + if (ctx->dmerAt) { | ||
516 | + free(ctx->dmerAt); | ||
517 | + ctx->dmerAt = NULL; | ||
518 | + } | ||
519 | + if (ctx->offsets) { | ||
520 | + free(ctx->offsets); | ||
521 | + ctx->offsets = NULL; | ||
522 | + } | ||
523 | +} | ||
524 | + | ||
525 | +/** | ||
526 | + * Prepare a context for dictionary building. | ||
527 | + * The context is only dependent on the parameter `d` and can used multiple | ||
528 | + * times. | ||
529 | + * Returns 1 on success or zero on error. | ||
530 | + * The context must be destroyed with `COVER_ctx_destroy()`. | ||
531 | + */ | ||
532 | +static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, | ||
533 | + const size_t *samplesSizes, unsigned nbSamples, | ||
534 | + unsigned d, double splitPoint) { | ||
535 | + const BYTE *const samples = (const BYTE *)samplesBuffer; | ||
536 | + const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); | ||
537 | + /* Split samples into testing and training sets */ | ||
538 | + const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples; | ||
539 | + const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples; | ||
540 | + const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize; | ||
541 | + const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize; | ||
542 | + /* Checks */ | ||
543 | + if (totalSamplesSize < MAX(d, sizeof(U64)) || | ||
544 | + totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { | ||
545 | + DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", | ||
546 | + (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20)); | ||
547 | + return 0; | ||
548 | + } | ||
549 | + /* Check if there are at least 5 training samples */ | ||
550 | + if (nbTrainSamples < 5) { | ||
551 | + DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples); | ||
552 | + return 0; | ||
553 | + } | ||
554 | + /* Check if there's testing sample */ | ||
555 | + if (nbTestSamples < 1) { | ||
556 | + DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples); | ||
557 | + return 0; | ||
558 | + } | ||
559 | + /* Zero the context */ | ||
560 | + memset(ctx, 0, sizeof(*ctx)); | ||
561 | + DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples, | ||
562 | + (unsigned)trainingSamplesSize); | ||
563 | + DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples, | ||
564 | + (unsigned)testSamplesSize); | ||
565 | + ctx->samples = samples; | ||
566 | + ctx->samplesSizes = samplesSizes; | ||
567 | + ctx->nbSamples = nbSamples; | ||
568 | + ctx->nbTrainSamples = nbTrainSamples; | ||
569 | + ctx->nbTestSamples = nbTestSamples; | ||
570 | + /* Partial suffix array */ | ||
571 | + ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1; | ||
572 | + ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); | ||
573 | + /* Maps index to the dmerID */ | ||
574 | + ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); | ||
575 | + /* The offsets of each file */ | ||
576 | + ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t)); | ||
577 | + if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) { | ||
578 | + DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); | ||
579 | + COVER_ctx_destroy(ctx); | ||
580 | + return 0; | ||
581 | + } | ||
582 | + ctx->freqs = NULL; | ||
583 | + ctx->d = d; | ||
584 | + | ||
585 | + /* Fill offsets from the samplesSizes */ | ||
586 | + { | ||
587 | + U32 i; | ||
588 | + ctx->offsets[0] = 0; | ||
589 | + for (i = 1; i <= nbSamples; ++i) { | ||
590 | + ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1]; | ||
591 | + } | ||
592 | + } | ||
593 | + DISPLAYLEVEL(2, "Constructing partial suffix array\n"); | ||
594 | + { | ||
595 | + /* suffix is a partial suffix array. | ||
596 | + * It only sorts suffixes by their first parameters.d bytes. | ||
597 | + * The sort is stable, so each dmer group is sorted by position in input. | ||
598 | + */ | ||
599 | + U32 i; | ||
600 | + for (i = 0; i < ctx->suffixSize; ++i) { | ||
601 | + ctx->suffix[i] = i; | ||
602 | + } | ||
603 | + /* qsort doesn't take an opaque pointer, so pass as a global. | ||
604 | + * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is. | ||
605 | + */ | ||
606 | + g_ctx = ctx; | ||
607 | +#if defined(__OpenBSD__) | ||
608 | + mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32), | ||
609 | + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); | ||
610 | +#else | ||
611 | + qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), | ||
612 | + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); | ||
613 | +#endif | ||
614 | + } | ||
615 | + DISPLAYLEVEL(2, "Computing frequencies\n"); | ||
616 | + /* For each dmer group (group of positions with the same first d bytes): | ||
617 | + * 1. For each position we set dmerAt[position] = dmerID. The dmerID is | ||
618 | + * (groupBeginPtr - suffix). This allows us to go from position to | ||
619 | + * dmerID so we can look up values in freq. | ||
620 | + * 2. We calculate how many samples the dmer occurs in and save it in | ||
621 | + * freqs[dmerId]. | ||
622 | + */ | ||
623 | + COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, | ||
624 | + (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group); | ||
625 | + ctx->freqs = ctx->suffix; | ||
626 | + ctx->suffix = NULL; | ||
627 | + return 1; | ||
628 | +} | ||
629 | + | ||
630 | +/** | ||
631 | + * Given the prepared context build the dictionary. | ||
632 | + */ | ||
633 | +static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, | ||
634 | + COVER_map_t *activeDmers, void *dictBuffer, | ||
635 | + size_t dictBufferCapacity, | ||
636 | + ZDICT_cover_params_t parameters) { | ||
637 | + BYTE *const dict = (BYTE *)dictBuffer; | ||
638 | + size_t tail = dictBufferCapacity; | ||
639 | + /* Divide the data up into epochs of equal size. | ||
640 | + * We will select at least one segment from each epoch. | ||
641 | + */ | ||
642 | + const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4)); | ||
643 | + const unsigned epochSize = (U32)(ctx->suffixSize / epochs); | ||
644 | + size_t epoch; | ||
645 | + DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", | ||
646 | + epochs, epochSize); | ||
647 | + /* Loop through the epochs until there are no more segments or the dictionary | ||
648 | + * is full. | ||
649 | + */ | ||
650 | + for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) { | ||
651 | + const U32 epochBegin = (U32)(epoch * epochSize); | ||
652 | + const U32 epochEnd = epochBegin + epochSize; | ||
653 | + size_t segmentSize; | ||
654 | + /* Select a segment */ | ||
655 | + COVER_segment_t segment = COVER_selectSegment( | ||
656 | + ctx, freqs, activeDmers, epochBegin, epochEnd, parameters); | ||
657 | + /* If the segment covers no dmers, then we are out of content */ | ||
658 | + if (segment.score == 0) { | ||
659 | + break; | ||
660 | + } | ||
661 | + /* Trim the segment if necessary and if it is too small then we are done */ | ||
662 | + segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); | ||
663 | + if (segmentSize < parameters.d) { | ||
664 | + break; | ||
665 | + } | ||
666 | + /* We fill the dictionary from the back to allow the best segments to be | ||
667 | + * referenced with the smallest offsets. | ||
668 | + */ | ||
669 | + tail -= segmentSize; | ||
670 | + memcpy(dict + tail, ctx->samples + segment.begin, segmentSize); | ||
671 | + DISPLAYUPDATE( | ||
672 | + 2, "\r%u%% ", | ||
673 | + (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); | ||
674 | + } | ||
675 | + DISPLAYLEVEL(2, "\r%79s\r", ""); | ||
676 | + return tail; | ||
677 | +} | ||
678 | + | ||
679 | +ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( | ||
680 | + void *dictBuffer, size_t dictBufferCapacity, | ||
681 | + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, | ||
682 | + ZDICT_cover_params_t parameters) | ||
683 | +{ | ||
684 | + BYTE* const dict = (BYTE*)dictBuffer; | ||
685 | + COVER_ctx_t ctx; | ||
686 | + COVER_map_t activeDmers; | ||
687 | + parameters.splitPoint = 1.0; | ||
688 | + /* Initialize global data */ | ||
689 | + g_displayLevel = parameters.zParams.notificationLevel; | ||
690 | + /* Checks */ | ||
691 | + if (!COVER_checkParameters(parameters, dictBufferCapacity)) { | ||
692 | + DISPLAYLEVEL(1, "Cover parameters incorrect\n"); | ||
693 | + return ERROR(GENERIC); | ||
694 | + } | ||
695 | + if (nbSamples == 0) { | ||
696 | + DISPLAYLEVEL(1, "Cover must have at least one input file\n"); | ||
697 | + return ERROR(GENERIC); | ||
698 | + } | ||
699 | + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { | ||
700 | + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", | ||
701 | + ZDICT_DICTSIZE_MIN); | ||
702 | + return ERROR(dstSize_tooSmall); | ||
703 | + } | ||
704 | + /* Initialize context and activeDmers */ | ||
705 | + if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, | ||
706 | + parameters.d, parameters.splitPoint)) { | ||
707 | + return ERROR(GENERIC); | ||
708 | + } | ||
709 | + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { | ||
710 | + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); | ||
711 | + COVER_ctx_destroy(&ctx); | ||
712 | + return ERROR(GENERIC); | ||
713 | + } | ||
714 | + | ||
715 | + DISPLAYLEVEL(2, "Building dictionary\n"); | ||
716 | + { | ||
717 | + const size_t tail = | ||
718 | + COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer, | ||
719 | + dictBufferCapacity, parameters); | ||
720 | + const size_t dictionarySize = ZDICT_finalizeDictionary( | ||
721 | + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, | ||
722 | + samplesBuffer, samplesSizes, nbSamples, parameters.zParams); | ||
723 | + if (!ZSTD_isError(dictionarySize)) { | ||
724 | + DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", | ||
725 | + (unsigned)dictionarySize); | ||
726 | + } | ||
727 | + COVER_ctx_destroy(&ctx); | ||
728 | + COVER_map_destroy(&activeDmers); | ||
729 | + return dictionarySize; | ||
730 | + } | ||
731 | +} | ||
732 | + | ||
733 | + | ||
734 | + | ||
735 | +size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, | ||
736 | + const size_t *samplesSizes, const BYTE *samples, | ||
737 | + size_t *offsets, | ||
738 | + size_t nbTrainSamples, size_t nbSamples, | ||
739 | + BYTE *const dict, size_t dictBufferCapacity) { | ||
740 | + size_t totalCompressedSize = ERROR(GENERIC); | ||
741 | + /* Pointers */ | ||
742 | + ZSTD_CCtx *cctx; | ||
743 | + ZSTD_CDict *cdict; | ||
744 | + void *dst; | ||
745 | + /* Local variables */ | ||
746 | + size_t dstCapacity; | ||
747 | + size_t i; | ||
748 | + /* Allocate dst with enough space to compress the maximum sized sample */ | ||
749 | + { | ||
750 | + size_t maxSampleSize = 0; | ||
751 | + i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0; | ||
752 | + for (; i < nbSamples; ++i) { | ||
753 | + maxSampleSize = MAX(samplesSizes[i], maxSampleSize); | ||
754 | + } | ||
755 | + dstCapacity = ZSTD_compressBound(maxSampleSize); | ||
756 | + dst = malloc(dstCapacity); | ||
757 | + } | ||
758 | + /* Create the cctx and cdict */ | ||
759 | + cctx = ZSTD_createCCtx(); | ||
760 | + cdict = ZSTD_createCDict(dict, dictBufferCapacity, | ||
761 | + parameters.zParams.compressionLevel); | ||
762 | + if (!dst || !cctx || !cdict) { | ||
763 | + goto _compressCleanup; | ||
764 | + } | ||
765 | + /* Compress each sample and sum their sizes (or error) */ | ||
766 | + totalCompressedSize = dictBufferCapacity; | ||
767 | + i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0; | ||
768 | + for (; i < nbSamples; ++i) { | ||
769 | + const size_t size = ZSTD_compress_usingCDict( | ||
770 | + cctx, dst, dstCapacity, samples + offsets[i], | ||
771 | + samplesSizes[i], cdict); | ||
772 | + if (ZSTD_isError(size)) { | ||
773 | + totalCompressedSize = ERROR(GENERIC); | ||
774 | + goto _compressCleanup; | ||
775 | + } | ||
776 | + totalCompressedSize += size; | ||
777 | + } | ||
778 | +_compressCleanup: | ||
779 | + ZSTD_freeCCtx(cctx); | ||
780 | + ZSTD_freeCDict(cdict); | ||
781 | + if (dst) { | ||
782 | + free(dst); | ||
783 | + } | ||
784 | + return totalCompressedSize; | ||
785 | +} | ||
786 | + | ||
787 | + | ||
788 | +/** | ||
789 | + * Initialize the `COVER_best_t`. | ||
790 | + */ | ||
791 | +void COVER_best_init(COVER_best_t *best) { | ||
792 | + if (best==NULL) return; /* compatible with init on NULL */ | ||
793 | + (void)ZSTD_pthread_mutex_init(&best->mutex, NULL); | ||
794 | + (void)ZSTD_pthread_cond_init(&best->cond, NULL); | ||
795 | + best->liveJobs = 0; | ||
796 | + best->dict = NULL; | ||
797 | + best->dictSize = 0; | ||
798 | + best->compressedSize = (size_t)-1; | ||
799 | + memset(&best->parameters, 0, sizeof(best->parameters)); | ||
800 | +} | ||
801 | + | ||
802 | +/** | ||
803 | + * Wait until liveJobs == 0. | ||
804 | + */ | ||
805 | +void COVER_best_wait(COVER_best_t *best) { | ||
806 | + if (!best) { | ||
807 | + return; | ||
808 | + } | ||
809 | + ZSTD_pthread_mutex_lock(&best->mutex); | ||
810 | + while (best->liveJobs != 0) { | ||
811 | + ZSTD_pthread_cond_wait(&best->cond, &best->mutex); | ||
812 | + } | ||
813 | + ZSTD_pthread_mutex_unlock(&best->mutex); | ||
814 | +} | ||
815 | + | ||
816 | +/** | ||
817 | + * Call COVER_best_wait() and then destroy the COVER_best_t. | ||
818 | + */ | ||
819 | +void COVER_best_destroy(COVER_best_t *best) { | ||
820 | + if (!best) { | ||
821 | + return; | ||
822 | + } | ||
823 | + COVER_best_wait(best); | ||
824 | + if (best->dict) { | ||
825 | + free(best->dict); | ||
826 | + } | ||
827 | + ZSTD_pthread_mutex_destroy(&best->mutex); | ||
828 | + ZSTD_pthread_cond_destroy(&best->cond); | ||
829 | +} | ||
830 | + | ||
831 | +/** | ||
832 | + * Called when a thread is about to be launched. | ||
833 | + * Increments liveJobs. | ||
834 | + */ | ||
835 | +void COVER_best_start(COVER_best_t *best) { | ||
836 | + if (!best) { | ||
837 | + return; | ||
838 | + } | ||
839 | + ZSTD_pthread_mutex_lock(&best->mutex); | ||
840 | + ++best->liveJobs; | ||
841 | + ZSTD_pthread_mutex_unlock(&best->mutex); | ||
842 | +} | ||
843 | + | ||
844 | +/** | ||
845 | + * Called when a thread finishes executing, both on error or success. | ||
846 | + * Decrements liveJobs and signals any waiting threads if liveJobs == 0. | ||
847 | + * If this dictionary is the best so far save it and its parameters. | ||
848 | + */ | ||
849 | +void COVER_best_finish(COVER_best_t *best, size_t compressedSize, | ||
850 | + ZDICT_cover_params_t parameters, void *dict, | ||
851 | + size_t dictSize) { | ||
852 | + if (!best) { | ||
853 | + return; | ||
854 | + } | ||
855 | + { | ||
856 | + size_t liveJobs; | ||
857 | + ZSTD_pthread_mutex_lock(&best->mutex); | ||
858 | + --best->liveJobs; | ||
859 | + liveJobs = best->liveJobs; | ||
860 | + /* If the new dictionary is better */ | ||
861 | + if (compressedSize < best->compressedSize) { | ||
862 | + /* Allocate space if necessary */ | ||
863 | + if (!best->dict || best->dictSize < dictSize) { | ||
864 | + if (best->dict) { | ||
865 | + free(best->dict); | ||
866 | + } | ||
867 | + best->dict = malloc(dictSize); | ||
868 | + if (!best->dict) { | ||
869 | + best->compressedSize = ERROR(GENERIC); | ||
870 | + best->dictSize = 0; | ||
871 | + ZSTD_pthread_cond_signal(&best->cond); | ||
872 | + ZSTD_pthread_mutex_unlock(&best->mutex); | ||
873 | + return; | ||
874 | + } | ||
875 | + } | ||
876 | + /* Save the dictionary, parameters, and size */ | ||
877 | + memcpy(best->dict, dict, dictSize); | ||
878 | + best->dictSize = dictSize; | ||
879 | + best->parameters = parameters; | ||
880 | + best->compressedSize = compressedSize; | ||
881 | + } | ||
882 | + if (liveJobs == 0) { | ||
883 | + ZSTD_pthread_cond_broadcast(&best->cond); | ||
884 | + } | ||
885 | + ZSTD_pthread_mutex_unlock(&best->mutex); | ||
886 | + } | ||
887 | +} | ||
888 | + | ||
889 | +/** | ||
890 | + * Parameters for COVER_tryParameters(). | ||
891 | + */ | ||
892 | +typedef struct COVER_tryParameters_data_s { | ||
893 | + const COVER_ctx_t *ctx; | ||
894 | + COVER_best_t *best; | ||
895 | + size_t dictBufferCapacity; | ||
896 | + ZDICT_cover_params_t parameters; | ||
897 | +} COVER_tryParameters_data_t; | ||
898 | + | ||
899 | +/** | ||
900 | + * Tries a set of parameters and updates the COVER_best_t with the results. | ||
901 | + * This function is thread safe if zstd is compiled with multithreaded support. | ||
902 | + * It takes its parameters as an *OWNING* opaque pointer to support threading. | ||
903 | + */ | ||
904 | +static void COVER_tryParameters(void *opaque) { | ||
905 | + /* Save parameters as local variables */ | ||
906 | + COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque; | ||
907 | + const COVER_ctx_t *const ctx = data->ctx; | ||
908 | + const ZDICT_cover_params_t parameters = data->parameters; | ||
909 | + size_t dictBufferCapacity = data->dictBufferCapacity; | ||
910 | + size_t totalCompressedSize = ERROR(GENERIC); | ||
911 | + /* Allocate space for hash table, dict, and freqs */ | ||
912 | + COVER_map_t activeDmers; | ||
913 | + BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); | ||
914 | + U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); | ||
915 | + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { | ||
916 | + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); | ||
917 | + goto _cleanup; | ||
918 | + } | ||
919 | + if (!dict || !freqs) { | ||
920 | + DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); | ||
921 | + goto _cleanup; | ||
922 | + } | ||
923 | + /* Copy the frequencies because we need to modify them */ | ||
924 | + memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32)); | ||
925 | + /* Build the dictionary */ | ||
926 | + { | ||
927 | + const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict, | ||
928 | + dictBufferCapacity, parameters); | ||
929 | + dictBufferCapacity = ZDICT_finalizeDictionary( | ||
930 | + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, | ||
931 | + ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, | ||
932 | + parameters.zParams); | ||
933 | + if (ZDICT_isError(dictBufferCapacity)) { | ||
934 | + DISPLAYLEVEL(1, "Failed to finalize dictionary\n"); | ||
935 | + goto _cleanup; | ||
936 | + } | ||
937 | + } | ||
938 | + /* Check total compressed size */ | ||
939 | + totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes, | ||
940 | + ctx->samples, ctx->offsets, | ||
941 | + ctx->nbTrainSamples, ctx->nbSamples, | ||
942 | + dict, dictBufferCapacity); | ||
943 | + | ||
944 | +_cleanup: | ||
945 | + COVER_best_finish(data->best, totalCompressedSize, parameters, dict, | ||
946 | + dictBufferCapacity); | ||
947 | + free(data); | ||
948 | + COVER_map_destroy(&activeDmers); | ||
949 | + if (dict) { | ||
950 | + free(dict); | ||
951 | + } | ||
952 | + if (freqs) { | ||
953 | + free(freqs); | ||
954 | + } | ||
955 | +} | ||
956 | + | ||
957 | +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( | ||
958 | + void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, | ||
959 | + const size_t *samplesSizes, unsigned nbSamples, | ||
960 | + ZDICT_cover_params_t *parameters) { | ||
961 | + /* constants */ | ||
962 | + const unsigned nbThreads = parameters->nbThreads; | ||
963 | + const double splitPoint = | ||
964 | + parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint; | ||
965 | + const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; | ||
966 | + const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; | ||
967 | + const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; | ||
968 | + const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k; | ||
969 | + const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps; | ||
970 | + const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); | ||
971 | + const unsigned kIterations = | ||
972 | + (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); | ||
973 | + /* Local variables */ | ||
974 | + const int displayLevel = parameters->zParams.notificationLevel; | ||
975 | + unsigned iteration = 1; | ||
976 | + unsigned d; | ||
977 | + unsigned k; | ||
978 | + COVER_best_t best; | ||
979 | + POOL_ctx *pool = NULL; | ||
980 | + | ||
981 | + /* Checks */ | ||
982 | + if (splitPoint <= 0 || splitPoint > 1) { | ||
983 | + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); | ||
984 | + return ERROR(GENERIC); | ||
985 | + } | ||
986 | + if (kMinK < kMaxD || kMaxK < kMinK) { | ||
987 | + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); | ||
988 | + return ERROR(GENERIC); | ||
989 | + } | ||
990 | + if (nbSamples == 0) { | ||
991 | + DISPLAYLEVEL(1, "Cover must have at least one input file\n"); | ||
992 | + return ERROR(GENERIC); | ||
993 | + } | ||
994 | + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { | ||
995 | + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", | ||
996 | + ZDICT_DICTSIZE_MIN); | ||
997 | + return ERROR(dstSize_tooSmall); | ||
998 | + } | ||
999 | + if (nbThreads > 1) { | ||
1000 | + pool = POOL_create(nbThreads, 1); | ||
1001 | + if (!pool) { | ||
1002 | + return ERROR(memory_allocation); | ||
1003 | + } | ||
1004 | + } | ||
1005 | + /* Initialization */ | ||
1006 | + COVER_best_init(&best); | ||
1007 | + /* Turn down global display level to clean up display at level 2 and below */ | ||
1008 | + g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; | ||
1009 | + /* Loop through d first because each new value needs a new context */ | ||
1010 | + LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", | ||
1011 | + kIterations); | ||
1012 | + for (d = kMinD; d <= kMaxD; d += 2) { | ||
1013 | + /* Initialize the context for this value of d */ | ||
1014 | + COVER_ctx_t ctx; | ||
1015 | + LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); | ||
1016 | + if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) { | ||
1017 | + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); | ||
1018 | + COVER_best_destroy(&best); | ||
1019 | + POOL_free(pool); | ||
1020 | + return ERROR(GENERIC); | ||
1021 | + } | ||
1022 | + /* Loop through k reusing the same context */ | ||
1023 | + for (k = kMinK; k <= kMaxK; k += kStepSize) { | ||
1024 | + /* Prepare the arguments */ | ||
1025 | + COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc( | ||
1026 | + sizeof(COVER_tryParameters_data_t)); | ||
1027 | + LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k); | ||
1028 | + if (!data) { | ||
1029 | + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n"); | ||
1030 | + COVER_best_destroy(&best); | ||
1031 | + COVER_ctx_destroy(&ctx); | ||
1032 | + POOL_free(pool); | ||
1033 | + return ERROR(GENERIC); | ||
1034 | + } | ||
1035 | + data->ctx = &ctx; | ||
1036 | + data->best = &best; | ||
1037 | + data->dictBufferCapacity = dictBufferCapacity; | ||
1038 | + data->parameters = *parameters; | ||
1039 | + data->parameters.k = k; | ||
1040 | + data->parameters.d = d; | ||
1041 | + data->parameters.splitPoint = splitPoint; | ||
1042 | + data->parameters.steps = kSteps; | ||
1043 | + data->parameters.zParams.notificationLevel = g_displayLevel; | ||
1044 | + /* Check the parameters */ | ||
1045 | + if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) { | ||
1046 | + DISPLAYLEVEL(1, "Cover parameters incorrect\n"); | ||
1047 | + free(data); | ||
1048 | + continue; | ||
1049 | + } | ||
1050 | + /* Call the function and pass ownership of data to it */ | ||
1051 | + COVER_best_start(&best); | ||
1052 | + if (pool) { | ||
1053 | + POOL_add(pool, &COVER_tryParameters, data); | ||
1054 | + } else { | ||
1055 | + COVER_tryParameters(data); | ||
1056 | + } | ||
1057 | + /* Print status */ | ||
1058 | + LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ", | ||
1059 | + (unsigned)((iteration * 100) / kIterations)); | ||
1060 | + ++iteration; | ||
1061 | + } | ||
1062 | + COVER_best_wait(&best); | ||
1063 | + COVER_ctx_destroy(&ctx); | ||
1064 | + } | ||
1065 | + LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", ""); | ||
1066 | + /* Fill the output buffer and parameters with output of the best parameters */ | ||
1067 | + { | ||
1068 | + const size_t dictSize = best.dictSize; | ||
1069 | + if (ZSTD_isError(best.compressedSize)) { | ||
1070 | + const size_t compressedSize = best.compressedSize; | ||
1071 | + COVER_best_destroy(&best); | ||
1072 | + POOL_free(pool); | ||
1073 | + return compressedSize; | ||
1074 | + } | ||
1075 | + *parameters = best.parameters; | ||
1076 | + memcpy(dictBuffer, best.dict, dictSize); | ||
1077 | + COVER_best_destroy(&best); | ||
1078 | + POOL_free(pool); | ||
1079 | + return dictSize; | ||
1080 | + } | ||
1081 | +} |
vendor/github.com/DataDog/zstd/cover.h
0 → 100644
1 | +#include <stdio.h> /* fprintf */ | ||
2 | +#include <stdlib.h> /* malloc, free, qsort */ | ||
3 | +#include <string.h> /* memset */ | ||
4 | +#include <time.h> /* clock */ | ||
5 | +#include "mem.h" /* read */ | ||
6 | +#include "pool.h" | ||
7 | +#include "threading.h" | ||
8 | +#include "zstd_internal.h" /* includes zstd.h */ | ||
9 | +#ifndef ZDICT_STATIC_LINKING_ONLY | ||
10 | +#define ZDICT_STATIC_LINKING_ONLY | ||
11 | +#endif | ||
12 | +#include "zdict.h" | ||
13 | + | ||
14 | +/** | ||
15 | + * COVER_best_t is used for two purposes: | ||
16 | + * 1. Synchronizing threads. | ||
17 | + * 2. Saving the best parameters and dictionary. | ||
18 | + * | ||
19 | + * All of the methods except COVER_best_init() are thread safe if zstd is | ||
20 | + * compiled with multithreaded support. | ||
21 | + */ | ||
22 | +typedef struct COVER_best_s { | ||
23 | + ZSTD_pthread_mutex_t mutex; | ||
24 | + ZSTD_pthread_cond_t cond; | ||
25 | + size_t liveJobs; | ||
26 | + void *dict; | ||
27 | + size_t dictSize; | ||
28 | + ZDICT_cover_params_t parameters; | ||
29 | + size_t compressedSize; | ||
30 | +} COVER_best_t; | ||
31 | + | ||
32 | +/** | ||
33 | + * A segment is a range in the source as well as the score of the segment. | ||
34 | + */ | ||
35 | +typedef struct { | ||
36 | + U32 begin; | ||
37 | + U32 end; | ||
38 | + U32 score; | ||
39 | +} COVER_segment_t; | ||
40 | + | ||
41 | +/** | ||
42 | + * Checks total compressed size of a dictionary | ||
43 | + */ | ||
44 | +size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, | ||
45 | + const size_t *samplesSizes, const BYTE *samples, | ||
46 | + size_t *offsets, | ||
47 | + size_t nbTrainSamples, size_t nbSamples, | ||
48 | + BYTE *const dict, size_t dictBufferCapacity); | ||
49 | + | ||
50 | +/** | ||
51 | + * Returns the sum of the sample sizes. | ||
52 | + */ | ||
53 | +size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ; | ||
54 | + | ||
55 | +/** | ||
56 | + * Initialize the `COVER_best_t`. | ||
57 | + */ | ||
58 | +void COVER_best_init(COVER_best_t *best); | ||
59 | + | ||
60 | +/** | ||
61 | + * Wait until liveJobs == 0. | ||
62 | + */ | ||
63 | +void COVER_best_wait(COVER_best_t *best); | ||
64 | + | ||
65 | +/** | ||
66 | + * Call COVER_best_wait() and then destroy the COVER_best_t. | ||
67 | + */ | ||
68 | +void COVER_best_destroy(COVER_best_t *best); | ||
69 | + | ||
70 | +/** | ||
71 | + * Called when a thread is about to be launched. | ||
72 | + * Increments liveJobs. | ||
73 | + */ | ||
74 | +void COVER_best_start(COVER_best_t *best); | ||
75 | + | ||
76 | +/** | ||
77 | + * Called when a thread finishes executing, both on error or success. | ||
78 | + * Decrements liveJobs and signals any waiting threads if liveJobs == 0. | ||
79 | + * If this dictionary is the best so far save it and its parameters. | ||
80 | + */ | ||
81 | +void COVER_best_finish(COVER_best_t *best, size_t compressedSize, | ||
82 | + ZDICT_cover_params_t parameters, void *dict, | ||
83 | + size_t dictSize); |
vendor/github.com/DataDog/zstd/cpu.h
0 → 100644
1 | +/* | ||
2 | + * Copyright (c) 2018-present, Facebook, Inc. | ||
3 | + * All rights reserved. | ||
4 | + * | ||
5 | + * This source code is licensed under both the BSD-style license (found in the | ||
6 | + * LICENSE file in the root directory of this source tree) and the GPLv2 (found | ||
7 | + * in the COPYING file in the root directory of this source tree). | ||
8 | + * You may select, at your option, one of the above-listed licenses. | ||
9 | + */ | ||
10 | + | ||
11 | +#ifndef ZSTD_COMMON_CPU_H | ||
12 | +#define ZSTD_COMMON_CPU_H | ||
13 | + | ||
14 | +/** | ||
15 | + * Implementation taken from folly/CpuId.h | ||
16 | + * https://github.com/facebook/folly/blob/master/folly/CpuId.h | ||
17 | + */ | ||
18 | + | ||
19 | +#include <string.h> | ||
20 | + | ||
21 | +#include "mem.h" | ||
22 | + | ||
23 | +#ifdef _MSC_VER | ||
24 | +#include <intrin.h> | ||
25 | +#endif | ||
26 | + | ||
27 | +typedef struct { | ||
28 | + U32 f1c; | ||
29 | + U32 f1d; | ||
30 | + U32 f7b; | ||
31 | + U32 f7c; | ||
32 | +} ZSTD_cpuid_t; | ||
33 | + | ||
34 | +MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { | ||
35 | + U32 f1c = 0; | ||
36 | + U32 f1d = 0; | ||
37 | + U32 f7b = 0; | ||
38 | + U32 f7c = 0; | ||
39 | +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) | ||
40 | + int reg[4]; | ||
41 | + __cpuid((int*)reg, 0); | ||
42 | + { | ||
43 | + int const n = reg[0]; | ||
44 | + if (n >= 1) { | ||
45 | + __cpuid((int*)reg, 1); | ||
46 | + f1c = (U32)reg[2]; | ||
47 | + f1d = (U32)reg[3]; | ||
48 | + } | ||
49 | + if (n >= 7) { | ||
50 | + __cpuidex((int*)reg, 7, 0); | ||
51 | + f7b = (U32)reg[1]; | ||
52 | + f7c = (U32)reg[2]; | ||
53 | + } | ||
54 | + } | ||
55 | +#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) | ||
56 | + /* The following block like the normal cpuid branch below, but gcc | ||
57 | + * reserves ebx for use of its pic register so we must specially | ||
58 | + * handle the save and restore to avoid clobbering the register | ||
59 | + */ | ||
60 | + U32 n; | ||
61 | + __asm__( | ||
62 | + "pushl %%ebx\n\t" | ||
63 | + "cpuid\n\t" | ||
64 | + "popl %%ebx\n\t" | ||
65 | + : "=a"(n) | ||
66 | + : "a"(0) | ||
67 | + : "ecx", "edx"); | ||
68 | + if (n >= 1) { | ||
69 | + U32 f1a; | ||
70 | + __asm__( | ||
71 | + "pushl %%ebx\n\t" | ||
72 | + "cpuid\n\t" | ||
73 | + "popl %%ebx\n\t" | ||
74 | + : "=a"(f1a), "=c"(f1c), "=d"(f1d) | ||
75 | + : "a"(1)); | ||
76 | + } | ||
77 | + if (n >= 7) { | ||
78 | + __asm__( | ||
79 | + "pushl %%ebx\n\t" | ||
80 | + "cpuid\n\t" | ||
81 | + "movl %%ebx, %%eax\n\t" | ||
82 | + "popl %%ebx" | ||
83 | + : "=a"(f7b), "=c"(f7c) | ||
84 | + : "a"(7), "c"(0) | ||
85 | + : "edx"); | ||
86 | + } | ||
87 | +#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) | ||
88 | + U32 n; | ||
89 | + __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); | ||
90 | + if (n >= 1) { | ||
91 | + U32 f1a; | ||
92 | + __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); | ||
93 | + } | ||
94 | + if (n >= 7) { | ||
95 | + U32 f7a; | ||
96 | + __asm__("cpuid" | ||
97 | + : "=a"(f7a), "=b"(f7b), "=c"(f7c) | ||
98 | + : "a"(7), "c"(0) | ||
99 | + : "edx"); | ||
100 | + } | ||
101 | +#endif | ||
102 | + { | ||
103 | + ZSTD_cpuid_t cpuid; | ||
104 | + cpuid.f1c = f1c; | ||
105 | + cpuid.f1d = f1d; | ||
106 | + cpuid.f7b = f7b; | ||
107 | + cpuid.f7c = f7c; | ||
108 | + return cpuid; | ||
109 | + } | ||
110 | +} | ||
111 | + | ||
112 | +#define X(name, r, bit) \ | ||
113 | + MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ | ||
114 | + return ((cpuid.r) & (1U << bit)) != 0; \ | ||
115 | + } | ||
116 | + | ||
117 | +/* cpuid(1): Processor Info and Feature Bits. */ | ||
118 | +#define C(name, bit) X(name, f1c, bit) | ||
119 | + C(sse3, 0) | ||
120 | + C(pclmuldq, 1) | ||
121 | + C(dtes64, 2) | ||
122 | + C(monitor, 3) | ||
123 | + C(dscpl, 4) | ||
124 | + C(vmx, 5) | ||
125 | + C(smx, 6) | ||
126 | + C(eist, 7) | ||
127 | + C(tm2, 8) | ||
128 | + C(ssse3, 9) | ||
129 | + C(cnxtid, 10) | ||
130 | + C(fma, 12) | ||
131 | + C(cx16, 13) | ||
132 | + C(xtpr, 14) | ||
133 | + C(pdcm, 15) | ||
134 | + C(pcid, 17) | ||
135 | + C(dca, 18) | ||
136 | + C(sse41, 19) | ||
137 | + C(sse42, 20) | ||
138 | + C(x2apic, 21) | ||
139 | + C(movbe, 22) | ||
140 | + C(popcnt, 23) | ||
141 | + C(tscdeadline, 24) | ||
142 | + C(aes, 25) | ||
143 | + C(xsave, 26) | ||
144 | + C(osxsave, 27) | ||
145 | + C(avx, 28) | ||
146 | + C(f16c, 29) | ||
147 | + C(rdrand, 30) | ||
148 | +#undef C | ||
149 | +#define D(name, bit) X(name, f1d, bit) | ||
150 | + D(fpu, 0) | ||
151 | + D(vme, 1) | ||
152 | + D(de, 2) | ||
153 | + D(pse, 3) | ||
154 | + D(tsc, 4) | ||
155 | + D(msr, 5) | ||
156 | + D(pae, 6) | ||
157 | + D(mce, 7) | ||
158 | + D(cx8, 8) | ||
159 | + D(apic, 9) | ||
160 | + D(sep, 11) | ||
161 | + D(mtrr, 12) | ||
162 | + D(pge, 13) | ||
163 | + D(mca, 14) | ||
164 | + D(cmov, 15) | ||
165 | + D(pat, 16) | ||
166 | + D(pse36, 17) | ||
167 | + D(psn, 18) | ||
168 | + D(clfsh, 19) | ||
169 | + D(ds, 21) | ||
170 | + D(acpi, 22) | ||
171 | + D(mmx, 23) | ||
172 | + D(fxsr, 24) | ||
173 | + D(sse, 25) | ||
174 | + D(sse2, 26) | ||
175 | + D(ss, 27) | ||
176 | + D(htt, 28) | ||
177 | + D(tm, 29) | ||
178 | + D(pbe, 31) | ||
179 | +#undef D | ||
180 | + | ||
181 | +/* cpuid(7): Extended Features. */ | ||
182 | +#define B(name, bit) X(name, f7b, bit) | ||
183 | + B(bmi1, 3) | ||
184 | + B(hle, 4) | ||
185 | + B(avx2, 5) | ||
186 | + B(smep, 7) | ||
187 | + B(bmi2, 8) | ||
188 | + B(erms, 9) | ||
189 | + B(invpcid, 10) | ||
190 | + B(rtm, 11) | ||
191 | + B(mpx, 14) | ||
192 | + B(avx512f, 16) | ||
193 | + B(avx512dq, 17) | ||
194 | + B(rdseed, 18) | ||
195 | + B(adx, 19) | ||
196 | + B(smap, 20) | ||
197 | + B(avx512ifma, 21) | ||
198 | + B(pcommit, 22) | ||
199 | + B(clflushopt, 23) | ||
200 | + B(clwb, 24) | ||
201 | + B(avx512pf, 26) | ||
202 | + B(avx512er, 27) | ||
203 | + B(avx512cd, 28) | ||
204 | + B(sha, 29) | ||
205 | + B(avx512bw, 30) | ||
206 | + B(avx512vl, 31) | ||
207 | +#undef B | ||
208 | +#define C(name, bit) X(name, f7c, bit) | ||
209 | + C(prefetchwt1, 0) | ||
210 | + C(avx512vbmi, 1) | ||
211 | +#undef C | ||
212 | + | ||
213 | +#undef X | ||
214 | + | ||
215 | +#endif /* ZSTD_COMMON_CPU_H */ |
vendor/github.com/DataDog/zstd/debug.c
0 → 100644
1 | +/* ****************************************************************** | ||
2 | + debug | ||
3 | + Part of FSE library | ||
4 | + Copyright (C) 2013-present, Yann Collet. | ||
5 | + | ||
6 | + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | ||
7 | + | ||
8 | + Redistribution and use in source and binary forms, with or without | ||
9 | + modification, are permitted provided that the following conditions are | ||
10 | + met: | ||
11 | + | ||
12 | + * Redistributions of source code must retain the above copyright | ||
13 | + notice, this list of conditions and the following disclaimer. | ||
14 | + * Redistributions in binary form must reproduce the above | ||
15 | + copyright notice, this list of conditions and the following disclaimer | ||
16 | + in the documentation and/or other materials provided with the | ||
17 | + distribution. | ||
18 | + | ||
19 | + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
20 | + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
21 | + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
22 | + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
23 | + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
24 | + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
25 | + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
26 | + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
27 | + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
28 | + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
29 | + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
30 | + | ||
31 | + You can contact the author at : | ||
32 | + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | ||
33 | +****************************************************************** */ | ||
34 | + | ||
35 | + | ||
36 | +/* | ||
37 | + * This module only hosts one global variable | ||
38 | + * which can be used to dynamically influence the verbosity of traces, | ||
39 | + * such as DEBUGLOG and RAWLOG | ||
40 | + */ | ||
41 | + | ||
42 | +#include "debug.h" | ||
43 | + | ||
44 | +int g_debuglevel = DEBUGLEVEL; |
vendor/github.com/DataDog/zstd/debug.h
0 → 100644
1 | +/* ****************************************************************** | ||
2 | + debug | ||
3 | + Part of FSE library | ||
4 | + Copyright (C) 2013-present, Yann Collet. | ||
5 | + | ||
6 | + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | ||
7 | + | ||
8 | + Redistribution and use in source and binary forms, with or without | ||
9 | + modification, are permitted provided that the following conditions are | ||
10 | + met: | ||
11 | + | ||
12 | + * Redistributions of source code must retain the above copyright | ||
13 | + notice, this list of conditions and the following disclaimer. | ||
14 | + * Redistributions in binary form must reproduce the above | ||
15 | + copyright notice, this list of conditions and the following disclaimer | ||
16 | + in the documentation and/or other materials provided with the | ||
17 | + distribution. | ||
18 | + | ||
19 | + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
20 | + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
21 | + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
22 | + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
23 | + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
24 | + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
25 | + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
26 | + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
27 | + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
28 | + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
29 | + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
30 | + | ||
31 | + You can contact the author at : | ||
32 | + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy | ||
33 | +****************************************************************** */ | ||
34 | + | ||
35 | + | ||
36 | +/* | ||
37 | + * The purpose of this header is to enable debug functions. | ||
38 | + * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, | ||
39 | + * and DEBUG_STATIC_ASSERT() for compile-time. | ||
40 | + * | ||
41 | + * By default, DEBUGLEVEL==0, which means run-time debug is disabled. | ||
42 | + * | ||
43 | + * Level 1 enables assert() only. | ||
44 | + * Starting level 2, traces can be generated and pushed to stderr. | ||
45 | + * The higher the level, the more verbose the traces. | ||
46 | + * | ||
47 | + * It's possible to dynamically adjust level using variable g_debug_level, | ||
48 | + * which is only declared if DEBUGLEVEL>=2, | ||
49 | + * and is a global variable, not multi-thread protected (use with care) | ||
50 | + */ | ||
51 | + | ||
52 | +#ifndef DEBUG_H_12987983217 | ||
53 | +#define DEBUG_H_12987983217 | ||
54 | + | ||
55 | +#if defined (__cplusplus) | ||
56 | +extern "C" { | ||
57 | +#endif | ||
58 | + | ||
59 | + | ||
60 | +/* static assert is triggered at compile time, leaving no runtime artefact. | ||
61 | + * static assert only works with compile-time constants. | ||
62 | + * Also, this variant can only be used inside a function. */ | ||
63 | +#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) | ||
64 | + | ||
65 | + | ||
66 | +/* DEBUGLEVEL is expected to be defined externally, | ||
67 | + * typically through compiler command line. | ||
68 | + * Value must be a number. */ | ||
69 | +#ifndef DEBUGLEVEL | ||
70 | +# define DEBUGLEVEL 0 | ||
71 | +#endif | ||
72 | + | ||
73 | + | ||
74 | +/* DEBUGFILE can be defined externally, | ||
75 | + * typically through compiler command line. | ||
76 | + * note : currently useless. | ||
77 | + * Value must be stderr or stdout */ | ||
78 | +#ifndef DEBUGFILE | ||
79 | +# define DEBUGFILE stderr | ||
80 | +#endif | ||
81 | + | ||
82 | + | ||
83 | +/* recommended values for DEBUGLEVEL : | ||
84 | + * 0 : release mode, no debug, all run-time checks disabled | ||
85 | + * 1 : enables assert() only, no display | ||
86 | + * 2 : reserved, for currently active debug path | ||
87 | + * 3 : events once per object lifetime (CCtx, CDict, etc.) | ||
88 | + * 4 : events once per frame | ||
89 | + * 5 : events once per block | ||
90 | + * 6 : events once per sequence (verbose) | ||
91 | + * 7+: events at every position (*very* verbose) | ||
92 | + * | ||
93 | + * It's generally inconvenient to output traces > 5. | ||
94 | + * In which case, it's possible to selectively trigger high verbosity levels | ||
95 | + * by modifying g_debug_level. | ||
96 | + */ | ||
97 | + | ||
98 | +#if (DEBUGLEVEL>=1) | ||
99 | +# include <assert.h> | ||
100 | +#else | ||
101 | +# ifndef assert /* assert may be already defined, due to prior #include <assert.h> */ | ||
102 | +# define assert(condition) ((void)0) /* disable assert (default) */ | ||
103 | +# endif | ||
104 | +#endif | ||
105 | + | ||
106 | +#if (DEBUGLEVEL>=2) | ||
107 | +# include <stdio.h> | ||
108 | +extern int g_debuglevel; /* the variable is only declared, | ||
109 | + it actually lives in debug.c, | ||
110 | + and is shared by the whole process. | ||
111 | + It's not thread-safe. | ||
112 | + It's useful when enabling very verbose levels | ||
113 | + on selective conditions (such as position in src) */ | ||
114 | + | ||
115 | +# define RAWLOG(l, ...) { \ | ||
116 | + if (l<=g_debuglevel) { \ | ||
117 | + fprintf(stderr, __VA_ARGS__); \ | ||
118 | + } } | ||
119 | +# define DEBUGLOG(l, ...) { \ | ||
120 | + if (l<=g_debuglevel) { \ | ||
121 | + fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ | ||
122 | + fprintf(stderr, " \n"); \ | ||
123 | + } } | ||
124 | +#else | ||
125 | +# define RAWLOG(l, ...) {} /* disabled */ | ||
126 | +# define DEBUGLOG(l, ...) {} /* disabled */ | ||
127 | +#endif | ||
128 | + | ||
129 | + | ||
130 | +#if defined (__cplusplus) | ||
131 | +} | ||
132 | +#endif | ||
133 | + | ||
134 | +#endif /* DEBUG_H_12987983217 */ |
vendor/github.com/DataDog/zstd/divsufsort.c
0 → 100644
1 | +/* | ||
2 | + * divsufsort.c for libdivsufsort-lite | ||
3 | + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. | ||
4 | + * | ||
5 | + * Permission is hereby granted, free of charge, to any person | ||
6 | + * obtaining a copy of this software and associated documentation | ||
7 | + * files (the "Software"), to deal in the Software without | ||
8 | + * restriction, including without limitation the rights to use, | ||
9 | + * copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
10 | + * copies of the Software, and to permit persons to whom the | ||
11 | + * Software is furnished to do so, subject to the following | ||
12 | + * conditions: | ||
13 | + * | ||
14 | + * The above copyright notice and this permission notice shall be | ||
15 | + * included in all copies or substantial portions of the Software. | ||
16 | + * | ||
17 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
18 | + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | ||
19 | + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
20 | + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | ||
21 | + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | ||
22 | + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
23 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
24 | + * OTHER DEALINGS IN THE SOFTWARE. | ||
25 | + */ | ||
26 | + | ||
27 | +/*- Compiler specifics -*/ | ||
28 | +#ifdef __clang__ | ||
29 | +#pragma clang diagnostic ignored "-Wshorten-64-to-32" | ||
30 | +#endif | ||
31 | + | ||
32 | +#if defined(_MSC_VER) | ||
33 | +# pragma warning(disable : 4244) | ||
34 | +# pragma warning(disable : 4127) /* C4127 : Condition expression is constant */ | ||
35 | +#endif | ||
36 | + | ||
37 | + | ||
38 | +/*- Dependencies -*/ | ||
39 | +#include <assert.h> | ||
40 | +#include <stdio.h> | ||
41 | +#include <stdlib.h> | ||
42 | + | ||
43 | +#include "divsufsort.h" | ||
44 | + | ||
45 | +/*- Constants -*/ | ||
46 | +#if defined(INLINE) | ||
47 | +# undef INLINE | ||
48 | +#endif | ||
49 | +#if !defined(INLINE) | ||
50 | +# define INLINE __inline | ||
51 | +#endif | ||
52 | +#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) | ||
53 | +# undef ALPHABET_SIZE | ||
54 | +#endif | ||
55 | +#if !defined(ALPHABET_SIZE) | ||
56 | +# define ALPHABET_SIZE (256) | ||
57 | +#endif | ||
58 | +#define BUCKET_A_SIZE (ALPHABET_SIZE) | ||
59 | +#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) | ||
60 | +#if defined(SS_INSERTIONSORT_THRESHOLD) | ||
61 | +# if SS_INSERTIONSORT_THRESHOLD < 1 | ||
62 | +# undef SS_INSERTIONSORT_THRESHOLD | ||
63 | +# define SS_INSERTIONSORT_THRESHOLD (1) | ||
64 | +# endif | ||
65 | +#else | ||
66 | +# define SS_INSERTIONSORT_THRESHOLD (8) | ||
67 | +#endif | ||
68 | +#if defined(SS_BLOCKSIZE) | ||
69 | +# if SS_BLOCKSIZE < 0 | ||
70 | +# undef SS_BLOCKSIZE | ||
71 | +# define SS_BLOCKSIZE (0) | ||
72 | +# elif 32768 <= SS_BLOCKSIZE | ||
73 | +# undef SS_BLOCKSIZE | ||
74 | +# define SS_BLOCKSIZE (32767) | ||
75 | +# endif | ||
76 | +#else | ||
77 | +# define SS_BLOCKSIZE (1024) | ||
78 | +#endif | ||
79 | +/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ | ||
80 | +#if SS_BLOCKSIZE == 0 | ||
81 | +# define SS_MISORT_STACKSIZE (96) | ||
82 | +#elif SS_BLOCKSIZE <= 4096 | ||
83 | +# define SS_MISORT_STACKSIZE (16) | ||
84 | +#else | ||
85 | +# define SS_MISORT_STACKSIZE (24) | ||
86 | +#endif | ||
87 | +#define SS_SMERGE_STACKSIZE (32) | ||
88 | +#define TR_INSERTIONSORT_THRESHOLD (8) | ||
89 | +#define TR_STACKSIZE (64) | ||
90 | + | ||
91 | + | ||
92 | +/*- Macros -*/ | ||
93 | +#ifndef SWAP | ||
94 | +# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) | ||
95 | +#endif /* SWAP */ | ||
96 | +#ifndef MIN | ||
97 | +# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) | ||
98 | +#endif /* MIN */ | ||
99 | +#ifndef MAX | ||
100 | +# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) | ||
101 | +#endif /* MAX */ | ||
102 | +#define STACK_PUSH(_a, _b, _c, _d)\ | ||
103 | + do {\ | ||
104 | + assert(ssize < STACK_SIZE);\ | ||
105 | + stack[ssize].a = (_a), stack[ssize].b = (_b),\ | ||
106 | + stack[ssize].c = (_c), stack[ssize++].d = (_d);\ | ||
107 | + } while(0) | ||
108 | +#define STACK_PUSH5(_a, _b, _c, _d, _e)\ | ||
109 | + do {\ | ||
110 | + assert(ssize < STACK_SIZE);\ | ||
111 | + stack[ssize].a = (_a), stack[ssize].b = (_b),\ | ||
112 | + stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ | ||
113 | + } while(0) | ||
114 | +#define STACK_POP(_a, _b, _c, _d)\ | ||
115 | + do {\ | ||
116 | + assert(0 <= ssize);\ | ||
117 | + if(ssize == 0) { return; }\ | ||
118 | + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ | ||
119 | + (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ | ||
120 | + } while(0) | ||
121 | +#define STACK_POP5(_a, _b, _c, _d, _e)\ | ||
122 | + do {\ | ||
123 | + assert(0 <= ssize);\ | ||
124 | + if(ssize == 0) { return; }\ | ||
125 | + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ | ||
126 | + (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ | ||
127 | + } while(0) | ||
128 | +#define BUCKET_A(_c0) bucket_A[(_c0)] | ||
129 | +#if ALPHABET_SIZE == 256 | ||
130 | +#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) | ||
131 | +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) | ||
132 | +#else | ||
133 | +#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) | ||
134 | +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) | ||
135 | +#endif | ||
136 | + | ||
137 | + | ||
138 | +/*- Private Functions -*/ | ||
139 | + | ||
140 | +static const int lg_table[256]= { | ||
141 | + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, | ||
142 | + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, | ||
143 | + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, | ||
144 | + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, | ||
145 | + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, | ||
146 | + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, | ||
147 | + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, | ||
148 | + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 | ||
149 | +}; | ||
150 | + | ||
151 | +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) | ||
152 | + | ||
153 | +static INLINE | ||
154 | +int | ||
155 | +ss_ilg(int n) { | ||
156 | +#if SS_BLOCKSIZE == 0 | ||
157 | + return (n & 0xffff0000) ? | ||
158 | + ((n & 0xff000000) ? | ||
159 | + 24 + lg_table[(n >> 24) & 0xff] : | ||
160 | + 16 + lg_table[(n >> 16) & 0xff]) : | ||
161 | + ((n & 0x0000ff00) ? | ||
162 | + 8 + lg_table[(n >> 8) & 0xff] : | ||
163 | + 0 + lg_table[(n >> 0) & 0xff]); | ||
164 | +#elif SS_BLOCKSIZE < 256 | ||
165 | + return lg_table[n]; | ||
166 | +#else | ||
167 | + return (n & 0xff00) ? | ||
168 | + 8 + lg_table[(n >> 8) & 0xff] : | ||
169 | + 0 + lg_table[(n >> 0) & 0xff]; | ||
170 | +#endif | ||
171 | +} | ||
172 | + | ||
173 | +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ | ||
174 | + | ||
175 | +#if SS_BLOCKSIZE != 0 | ||
176 | + | ||
177 | +static const int sqq_table[256] = { | ||
178 | + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, | ||
179 | + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, | ||
180 | + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, | ||
181 | +110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, | ||
182 | +128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, | ||
183 | +143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, | ||
184 | +156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, | ||
185 | +169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, | ||
186 | +181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, | ||
187 | +192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, | ||
188 | +202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, | ||
189 | +212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, | ||
190 | +221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, | ||
191 | +230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, | ||
192 | +239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, | ||
193 | +247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 | ||
194 | +}; | ||
195 | + | ||
196 | +static INLINE | ||
197 | +int | ||
198 | +ss_isqrt(int x) { | ||
199 | + int y, e; | ||
200 | + | ||
201 | + if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } | ||
202 | + e = (x & 0xffff0000) ? | ||
203 | + ((x & 0xff000000) ? | ||
204 | + 24 + lg_table[(x >> 24) & 0xff] : | ||
205 | + 16 + lg_table[(x >> 16) & 0xff]) : | ||
206 | + ((x & 0x0000ff00) ? | ||
207 | + 8 + lg_table[(x >> 8) & 0xff] : | ||
208 | + 0 + lg_table[(x >> 0) & 0xff]); | ||
209 | + | ||
210 | + if(e >= 16) { | ||
211 | + y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); | ||
212 | + if(e >= 24) { y = (y + 1 + x / y) >> 1; } | ||
213 | + y = (y + 1 + x / y) >> 1; | ||
214 | + } else if(e >= 8) { | ||
215 | + y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; | ||
216 | + } else { | ||
217 | + return sqq_table[x] >> 4; | ||
218 | + } | ||
219 | + | ||
220 | + return (x < (y * y)) ? y - 1 : y; | ||
221 | +} | ||
222 | + | ||
223 | +#endif /* SS_BLOCKSIZE != 0 */ | ||
224 | + | ||
225 | + | ||
226 | +/*---------------------------------------------------------------------------*/ | ||
227 | + | ||
228 | +/* Compares two suffixes. */ | ||
229 | +static INLINE | ||
230 | +int | ||
231 | +ss_compare(const unsigned char *T, | ||
232 | + const int *p1, const int *p2, | ||
233 | + int depth) { | ||
234 | + const unsigned char *U1, *U2, *U1n, *U2n; | ||
235 | + | ||
236 | + for(U1 = T + depth + *p1, | ||
237 | + U2 = T + depth + *p2, | ||
238 | + U1n = T + *(p1 + 1) + 2, | ||
239 | + U2n = T + *(p2 + 1) + 2; | ||
240 | + (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); | ||
241 | + ++U1, ++U2) { | ||
242 | + } | ||
243 | + | ||
244 | + return U1 < U1n ? | ||
245 | + (U2 < U2n ? *U1 - *U2 : 1) : | ||
246 | + (U2 < U2n ? -1 : 0); | ||
247 | +} | ||
248 | + | ||
249 | + | ||
250 | +/*---------------------------------------------------------------------------*/ | ||
251 | + | ||
252 | +#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) | ||
253 | + | ||
254 | +/* Insertionsort for small size groups */ | ||
255 | +static | ||
256 | +void | ||
257 | +ss_insertionsort(const unsigned char *T, const int *PA, | ||
258 | + int *first, int *last, int depth) { | ||
259 | + int *i, *j; | ||
260 | + int t; | ||
261 | + int r; | ||
262 | + | ||
263 | + for(i = last - 2; first <= i; --i) { | ||
264 | + for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { | ||
265 | + do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); | ||
266 | + if(last <= j) { break; } | ||
267 | + } | ||
268 | + if(r == 0) { *j = ~*j; } | ||
269 | + *(j - 1) = t; | ||
270 | + } | ||
271 | +} | ||
272 | + | ||
273 | +#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ | ||
274 | + | ||
275 | + | ||
276 | +/*---------------------------------------------------------------------------*/ | ||
277 | + | ||
278 | +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) | ||
279 | + | ||
280 | +static INLINE | ||
281 | +void | ||
282 | +ss_fixdown(const unsigned char *Td, const int *PA, | ||
283 | + int *SA, int i, int size) { | ||
284 | + int j, k; | ||
285 | + int v; | ||
286 | + int c, d, e; | ||
287 | + | ||
288 | + for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { | ||
289 | + d = Td[PA[SA[k = j++]]]; | ||
290 | + if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } | ||
291 | + if(d <= c) { break; } | ||
292 | + } | ||
293 | + SA[i] = v; | ||
294 | +} | ||
295 | + | ||
296 | +/* Simple top-down heapsort. */ | ||
297 | +static | ||
298 | +void | ||
299 | +ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) { | ||
300 | + int i, m; | ||
301 | + int t; | ||
302 | + | ||
303 | + m = size; | ||
304 | + if((size % 2) == 0) { | ||
305 | + m--; | ||
306 | + if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } | ||
307 | + } | ||
308 | + | ||
309 | + for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } | ||
310 | + if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } | ||
311 | + for(i = m - 1; 0 < i; --i) { | ||
312 | + t = SA[0], SA[0] = SA[i]; | ||
313 | + ss_fixdown(Td, PA, SA, 0, i); | ||
314 | + SA[i] = t; | ||
315 | + } | ||
316 | +} | ||
317 | + | ||
318 | + | ||
319 | +/*---------------------------------------------------------------------------*/ | ||
320 | + | ||
321 | +/* Returns the median of three elements. */ | ||
322 | +static INLINE | ||
323 | +int * | ||
324 | +ss_median3(const unsigned char *Td, const int *PA, | ||
325 | + int *v1, int *v2, int *v3) { | ||
326 | + int *t; | ||
327 | + if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } | ||
328 | + if(Td[PA[*v2]] > Td[PA[*v3]]) { | ||
329 | + if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } | ||
330 | + else { return v3; } | ||
331 | + } | ||
332 | + return v2; | ||
333 | +} | ||
334 | + | ||
335 | +/* Returns the median of five elements. */ | ||
336 | +static INLINE | ||
337 | +int * | ||
338 | +ss_median5(const unsigned char *Td, const int *PA, | ||
339 | + int *v1, int *v2, int *v3, int *v4, int *v5) { | ||
340 | + int *t; | ||
341 | + if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } | ||
342 | + if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } | ||
343 | + if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } | ||
344 | + if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } | ||
345 | + if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } | ||
346 | + if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } | ||
347 | + return v3; | ||
348 | +} | ||
349 | + | ||
350 | +/* Returns the pivot element. */ | ||
351 | +static INLINE | ||
352 | +int * | ||
353 | +ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) { | ||
354 | + int *middle; | ||
355 | + int t; | ||
356 | + | ||
357 | + t = last - first; | ||
358 | + middle = first + t / 2; | ||
359 | + | ||
360 | + if(t <= 512) { | ||
361 | + if(t <= 32) { | ||
362 | + return ss_median3(Td, PA, first, middle, last - 1); | ||
363 | + } else { | ||
364 | + t >>= 2; | ||
365 | + return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); | ||
366 | + } | ||
367 | + } | ||
368 | + t >>= 3; | ||
369 | + first = ss_median3(Td, PA, first, first + t, first + (t << 1)); | ||
370 | + middle = ss_median3(Td, PA, middle - t, middle, middle + t); | ||
371 | + last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); | ||
372 | + return ss_median3(Td, PA, first, middle, last); | ||
373 | +} | ||
374 | + | ||
375 | + | ||
376 | +/*---------------------------------------------------------------------------*/ | ||
377 | + | ||
378 | +/* Binary partition for substrings. */ | ||
379 | +static INLINE | ||
380 | +int * | ||
381 | +ss_partition(const int *PA, | ||
382 | + int *first, int *last, int depth) { | ||
383 | + int *a, *b; | ||
384 | + int t; | ||
385 | + for(a = first - 1, b = last;;) { | ||
386 | + for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } | ||
387 | + for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } | ||
388 | + if(b <= a) { break; } | ||
389 | + t = ~*b; | ||
390 | + *b = *a; | ||
391 | + *a = t; | ||
392 | + } | ||
393 | + if(first < a) { *first = ~*first; } | ||
394 | + return a; | ||
395 | +} | ||
396 | + | ||
397 | +/* Multikey introsort for medium size groups. */ | ||
398 | +static | ||
399 | +void | ||
400 | +ss_mintrosort(const unsigned char *T, const int *PA, | ||
401 | + int *first, int *last, | ||
402 | + int depth) { | ||
403 | +#define STACK_SIZE SS_MISORT_STACKSIZE | ||
404 | + struct { int *a, *b, c; int d; } stack[STACK_SIZE]; | ||
405 | + const unsigned char *Td; | ||
406 | + int *a, *b, *c, *d, *e, *f; | ||
407 | + int s, t; | ||
408 | + int ssize; | ||
409 | + int limit; | ||
410 | + int v, x = 0; | ||
411 | + | ||
412 | + for(ssize = 0, limit = ss_ilg(last - first);;) { | ||
413 | + | ||
414 | + if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { | ||
415 | +#if 1 < SS_INSERTIONSORT_THRESHOLD | ||
416 | + if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } | ||
417 | +#endif | ||
418 | + STACK_POP(first, last, depth, limit); | ||
419 | + continue; | ||
420 | + } | ||
421 | + | ||
422 | + Td = T + depth; | ||
423 | + if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } | ||
424 | + if(limit < 0) { | ||
425 | + for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { | ||
426 | + if((x = Td[PA[*a]]) != v) { | ||
427 | + if(1 < (a - first)) { break; } | ||
428 | + v = x; | ||
429 | + first = a; | ||
430 | + } | ||
431 | + } | ||
432 | + if(Td[PA[*first] - 1] < v) { | ||
433 | + first = ss_partition(PA, first, a, depth); | ||
434 | + } | ||
435 | + if((a - first) <= (last - a)) { | ||
436 | + if(1 < (a - first)) { | ||
437 | + STACK_PUSH(a, last, depth, -1); | ||
438 | + last = a, depth += 1, limit = ss_ilg(a - first); | ||
439 | + } else { | ||
440 | + first = a, limit = -1; | ||
441 | + } | ||
442 | + } else { | ||
443 | + if(1 < (last - a)) { | ||
444 | + STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); | ||
445 | + first = a, limit = -1; | ||
446 | + } else { | ||
447 | + last = a, depth += 1, limit = ss_ilg(a - first); | ||
448 | + } | ||
449 | + } | ||
450 | + continue; | ||
451 | + } | ||
452 | + | ||
453 | + /* choose pivot */ | ||
454 | + a = ss_pivot(Td, PA, first, last); | ||
455 | + v = Td[PA[*a]]; | ||
456 | + SWAP(*first, *a); | ||
457 | + | ||
458 | + /* partition */ | ||
459 | + for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } | ||
460 | + if(((a = b) < last) && (x < v)) { | ||
461 | + for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { | ||
462 | + if(x == v) { SWAP(*b, *a); ++a; } | ||
463 | + } | ||
464 | + } | ||
465 | + for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } | ||
466 | + if((b < (d = c)) && (x > v)) { | ||
467 | + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { | ||
468 | + if(x == v) { SWAP(*c, *d); --d; } | ||
469 | + } | ||
470 | + } | ||
471 | + for(; b < c;) { | ||
472 | + SWAP(*b, *c); | ||
473 | + for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { | ||
474 | + if(x == v) { SWAP(*b, *a); ++a; } | ||
475 | + } | ||
476 | + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { | ||
477 | + if(x == v) { SWAP(*c, *d); --d; } | ||
478 | + } | ||
479 | + } | ||
480 | + | ||
481 | + if(a <= d) { | ||
482 | + c = b - 1; | ||
483 | + | ||
484 | + if((s = a - first) > (t = b - a)) { s = t; } | ||
485 | + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } | ||
486 | + if((s = d - c) > (t = last - d - 1)) { s = t; } | ||
487 | + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } | ||
488 | + | ||
489 | + a = first + (b - a), c = last - (d - c); | ||
490 | + b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); | ||
491 | + | ||
492 | + if((a - first) <= (last - c)) { | ||
493 | + if((last - c) <= (c - b)) { | ||
494 | + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); | ||
495 | + STACK_PUSH(c, last, depth, limit); | ||
496 | + last = a; | ||
497 | + } else if((a - first) <= (c - b)) { | ||
498 | + STACK_PUSH(c, last, depth, limit); | ||
499 | + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); | ||
500 | + last = a; | ||
501 | + } else { | ||
502 | + STACK_PUSH(c, last, depth, limit); | ||
503 | + STACK_PUSH(first, a, depth, limit); | ||
504 | + first = b, last = c, depth += 1, limit = ss_ilg(c - b); | ||
505 | + } | ||
506 | + } else { | ||
507 | + if((a - first) <= (c - b)) { | ||
508 | + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); | ||
509 | + STACK_PUSH(first, a, depth, limit); | ||
510 | + first = c; | ||
511 | + } else if((last - c) <= (c - b)) { | ||
512 | + STACK_PUSH(first, a, depth, limit); | ||
513 | + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); | ||
514 | + first = c; | ||
515 | + } else { | ||
516 | + STACK_PUSH(first, a, depth, limit); | ||
517 | + STACK_PUSH(c, last, depth, limit); | ||
518 | + first = b, last = c, depth += 1, limit = ss_ilg(c - b); | ||
519 | + } | ||
520 | + } | ||
521 | + } else { | ||
522 | + limit += 1; | ||
523 | + if(Td[PA[*first] - 1] < v) { | ||
524 | + first = ss_partition(PA, first, last, depth); | ||
525 | + limit = ss_ilg(last - first); | ||
526 | + } | ||
527 | + depth += 1; | ||
528 | + } | ||
529 | + } | ||
530 | +#undef STACK_SIZE | ||
531 | +} | ||
532 | + | ||
533 | +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ | ||
534 | + | ||
535 | + | ||
536 | +/*---------------------------------------------------------------------------*/ | ||
537 | + | ||
538 | +#if SS_BLOCKSIZE != 0 | ||
539 | + | ||
540 | +static INLINE | ||
541 | +void | ||
542 | +ss_blockswap(int *a, int *b, int n) { | ||
543 | + int t; | ||
544 | + for(; 0 < n; --n, ++a, ++b) { | ||
545 | + t = *a, *a = *b, *b = t; | ||
546 | + } | ||
547 | +} | ||
548 | + | ||
549 | +static INLINE | ||
550 | +void | ||
551 | +ss_rotate(int *first, int *middle, int *last) { | ||
552 | + int *a, *b, t; | ||
553 | + int l, r; | ||
554 | + l = middle - first, r = last - middle; | ||
555 | + for(; (0 < l) && (0 < r);) { | ||
556 | + if(l == r) { ss_blockswap(first, middle, l); break; } | ||
557 | + if(l < r) { | ||
558 | + a = last - 1, b = middle - 1; | ||
559 | + t = *a; | ||
560 | + do { | ||
561 | + *a-- = *b, *b-- = *a; | ||
562 | + if(b < first) { | ||
563 | + *a = t; | ||
564 | + last = a; | ||
565 | + if((r -= l + 1) <= l) { break; } | ||
566 | + a -= 1, b = middle - 1; | ||
567 | + t = *a; | ||
568 | + } | ||
569 | + } while(1); | ||
570 | + } else { | ||
571 | + a = first, b = middle; | ||
572 | + t = *a; | ||
573 | + do { | ||
574 | + *a++ = *b, *b++ = *a; | ||
575 | + if(last <= b) { | ||
576 | + *a = t; | ||
577 | + first = a + 1; | ||
578 | + if((l -= r + 1) <= r) { break; } | ||
579 | + a += 1, b = middle; | ||
580 | + t = *a; | ||
581 | + } | ||
582 | + } while(1); | ||
583 | + } | ||
584 | + } | ||
585 | +} | ||
586 | + | ||
587 | + | ||
588 | +/*---------------------------------------------------------------------------*/ | ||
589 | + | ||
590 | +static | ||
591 | +void | ||
592 | +ss_inplacemerge(const unsigned char *T, const int *PA, | ||
593 | + int *first, int *middle, int *last, | ||
594 | + int depth) { | ||
595 | + const int *p; | ||
596 | + int *a, *b; | ||
597 | + int len, half; | ||
598 | + int q, r; | ||
599 | + int x; | ||
600 | + | ||
601 | + for(;;) { | ||
602 | + if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } | ||
603 | + else { x = 0; p = PA + *(last - 1); } | ||
604 | + for(a = first, len = middle - first, half = len >> 1, r = -1; | ||
605 | + 0 < len; | ||
606 | + len = half, half >>= 1) { | ||
607 | + b = a + half; | ||
608 | + q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); | ||
609 | + if(q < 0) { | ||
610 | + a = b + 1; | ||
611 | + half -= (len & 1) ^ 1; | ||
612 | + } else { | ||
613 | + r = q; | ||
614 | + } | ||
615 | + } | ||
616 | + if(a < middle) { | ||
617 | + if(r == 0) { *a = ~*a; } | ||
618 | + ss_rotate(a, middle, last); | ||
619 | + last -= middle - a; | ||
620 | + middle = a; | ||
621 | + if(first == middle) { break; } | ||
622 | + } | ||
623 | + --last; | ||
624 | + if(x != 0) { while(*--last < 0) { } } | ||
625 | + if(middle == last) { break; } | ||
626 | + } | ||
627 | +} | ||
628 | + | ||
629 | + | ||
630 | +/*---------------------------------------------------------------------------*/ | ||
631 | + | ||
632 | +/* Merge-forward with internal buffer. */ | ||
633 | +static | ||
634 | +void | ||
635 | +ss_mergeforward(const unsigned char *T, const int *PA, | ||
636 | + int *first, int *middle, int *last, | ||
637 | + int *buf, int depth) { | ||
638 | + int *a, *b, *c, *bufend; | ||
639 | + int t; | ||
640 | + int r; | ||
641 | + | ||
642 | + bufend = buf + (middle - first) - 1; | ||
643 | + ss_blockswap(buf, first, middle - first); | ||
644 | + | ||
645 | + for(t = *(a = first), b = buf, c = middle;;) { | ||
646 | + r = ss_compare(T, PA + *b, PA + *c, depth); | ||
647 | + if(r < 0) { | ||
648 | + do { | ||
649 | + *a++ = *b; | ||
650 | + if(bufend <= b) { *bufend = t; return; } | ||
651 | + *b++ = *a; | ||
652 | + } while(*b < 0); | ||
653 | + } else if(r > 0) { | ||
654 | + do { | ||
655 | + *a++ = *c, *c++ = *a; | ||
656 | + if(last <= c) { | ||
657 | + while(b < bufend) { *a++ = *b, *b++ = *a; } | ||
658 | + *a = *b, *b = t; | ||
659 | + return; | ||
660 | + } | ||
661 | + } while(*c < 0); | ||
662 | + } else { | ||
663 | + *c = ~*c; | ||
664 | + do { | ||
665 | + *a++ = *b; | ||
666 | + if(bufend <= b) { *bufend = t; return; } | ||
667 | + *b++ = *a; | ||
668 | + } while(*b < 0); | ||
669 | + | ||
670 | + do { | ||
671 | + *a++ = *c, *c++ = *a; | ||
672 | + if(last <= c) { | ||
673 | + while(b < bufend) { *a++ = *b, *b++ = *a; } | ||
674 | + *a = *b, *b = t; | ||
675 | + return; | ||
676 | + } | ||
677 | + } while(*c < 0); | ||
678 | + } | ||
679 | + } | ||
680 | +} | ||
681 | + | ||
682 | +/* Merge-backward with internal buffer. */ | ||
683 | +static | ||
684 | +void | ||
685 | +ss_mergebackward(const unsigned char *T, const int *PA, | ||
686 | + int *first, int *middle, int *last, | ||
687 | + int *buf, int depth) { | ||
688 | + const int *p1, *p2; | ||
689 | + int *a, *b, *c, *bufend; | ||
690 | + int t; | ||
691 | + int r; | ||
692 | + int x; | ||
693 | + | ||
694 | + bufend = buf + (last - middle) - 1; | ||
695 | + ss_blockswap(buf, middle, last - middle); | ||
696 | + | ||
697 | + x = 0; | ||
698 | + if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } | ||
699 | + else { p1 = PA + *bufend; } | ||
700 | + if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } | ||
701 | + else { p2 = PA + *(middle - 1); } | ||
702 | + for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { | ||
703 | + r = ss_compare(T, p1, p2, depth); | ||
704 | + if(0 < r) { | ||
705 | + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } | ||
706 | + *a-- = *b; | ||
707 | + if(b <= buf) { *buf = t; break; } | ||
708 | + *b-- = *a; | ||
709 | + if(*b < 0) { p1 = PA + ~*b; x |= 1; } | ||
710 | + else { p1 = PA + *b; } | ||
711 | + } else if(r < 0) { | ||
712 | + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } | ||
713 | + *a-- = *c, *c-- = *a; | ||
714 | + if(c < first) { | ||
715 | + while(buf < b) { *a-- = *b, *b-- = *a; } | ||
716 | + *a = *b, *b = t; | ||
717 | + break; | ||
718 | + } | ||
719 | + if(*c < 0) { p2 = PA + ~*c; x |= 2; } | ||
720 | + else { p2 = PA + *c; } | ||
721 | + } else { | ||
722 | + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } | ||
723 | + *a-- = ~*b; | ||
724 | + if(b <= buf) { *buf = t; break; } | ||
725 | + *b-- = *a; | ||
726 | + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } | ||
727 | + *a-- = *c, *c-- = *a; | ||
728 | + if(c < first) { | ||
729 | + while(buf < b) { *a-- = *b, *b-- = *a; } | ||
730 | + *a = *b, *b = t; | ||
731 | + break; | ||
732 | + } | ||
733 | + if(*b < 0) { p1 = PA + ~*b; x |= 1; } | ||
734 | + else { p1 = PA + *b; } | ||
735 | + if(*c < 0) { p2 = PA + ~*c; x |= 2; } | ||
736 | + else { p2 = PA + *c; } | ||
737 | + } | ||
738 | + } | ||
739 | +} | ||
740 | + | ||
741 | +/* D&C based merge. */ | ||
742 | +static | ||
743 | +void | ||
744 | +ss_swapmerge(const unsigned char *T, const int *PA, | ||
745 | + int *first, int *middle, int *last, | ||
746 | + int *buf, int bufsize, int depth) { | ||
747 | +#define STACK_SIZE SS_SMERGE_STACKSIZE | ||
748 | +#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) | ||
749 | +#define MERGE_CHECK(a, b, c)\ | ||
750 | + do {\ | ||
751 | + if(((c) & 1) ||\ | ||
752 | + (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ | ||
753 | + *(a) = ~*(a);\ | ||
754 | + }\ | ||
755 | + if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ | ||
756 | + *(b) = ~*(b);\ | ||
757 | + }\ | ||
758 | + } while(0) | ||
759 | + struct { int *a, *b, *c; int d; } stack[STACK_SIZE]; | ||
760 | + int *l, *r, *lm, *rm; | ||
761 | + int m, len, half; | ||
762 | + int ssize; | ||
763 | + int check, next; | ||
764 | + | ||
765 | + for(check = 0, ssize = 0;;) { | ||
766 | + if((last - middle) <= bufsize) { | ||
767 | + if((first < middle) && (middle < last)) { | ||
768 | + ss_mergebackward(T, PA, first, middle, last, buf, depth); | ||
769 | + } | ||
770 | + MERGE_CHECK(first, last, check); | ||
771 | + STACK_POP(first, middle, last, check); | ||
772 | + continue; | ||
773 | + } | ||
774 | + | ||
775 | + if((middle - first) <= bufsize) { | ||
776 | + if(first < middle) { | ||
777 | + ss_mergeforward(T, PA, first, middle, last, buf, depth); | ||
778 | + } | ||
779 | + MERGE_CHECK(first, last, check); | ||
780 | + STACK_POP(first, middle, last, check); | ||
781 | + continue; | ||
782 | + } | ||
783 | + | ||
784 | + for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; | ||
785 | + 0 < len; | ||
786 | + len = half, half >>= 1) { | ||
787 | + if(ss_compare(T, PA + GETIDX(*(middle + m + half)), | ||
788 | + PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { | ||
789 | + m += half + 1; | ||
790 | + half -= (len & 1) ^ 1; | ||
791 | + } | ||
792 | + } | ||
793 | + | ||
794 | + if(0 < m) { | ||
795 | + lm = middle - m, rm = middle + m; | ||
796 | + ss_blockswap(lm, middle, m); | ||
797 | + l = r = middle, next = 0; | ||
798 | + if(rm < last) { | ||
799 | + if(*rm < 0) { | ||
800 | + *rm = ~*rm; | ||
801 | + if(first < lm) { for(; *--l < 0;) { } next |= 4; } | ||
802 | + next |= 1; | ||
803 | + } else if(first < lm) { | ||
804 | + for(; *r < 0; ++r) { } | ||
805 | + next |= 2; | ||
806 | + } | ||
807 | + } | ||
808 | + | ||
809 | + if((l - first) <= (last - r)) { | ||
810 | + STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); | ||
811 | + middle = lm, last = l, check = (check & 3) | (next & 4); | ||
812 | + } else { | ||
813 | + if((next & 2) && (r == middle)) { next ^= 6; } | ||
814 | + STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); | ||
815 | + first = r, middle = rm, check = (next & 3) | (check & 4); | ||
816 | + } | ||
817 | + } else { | ||
818 | + if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { | ||
819 | + *middle = ~*middle; | ||
820 | + } | ||
821 | + MERGE_CHECK(first, last, check); | ||
822 | + STACK_POP(first, middle, last, check); | ||
823 | + } | ||
824 | + } | ||
825 | +#undef STACK_SIZE | ||
826 | +} | ||
827 | + | ||
828 | +#endif /* SS_BLOCKSIZE != 0 */ | ||
829 | + | ||
830 | + | ||
831 | +/*---------------------------------------------------------------------------*/ | ||
832 | + | ||
833 | +/* Substring sort */ | ||
834 | +static | ||
835 | +void | ||
836 | +sssort(const unsigned char *T, const int *PA, | ||
837 | + int *first, int *last, | ||
838 | + int *buf, int bufsize, | ||
839 | + int depth, int n, int lastsuffix) { | ||
840 | + int *a; | ||
841 | +#if SS_BLOCKSIZE != 0 | ||
842 | + int *b, *middle, *curbuf; | ||
843 | + int j, k, curbufsize, limit; | ||
844 | +#endif | ||
845 | + int i; | ||
846 | + | ||
847 | + if(lastsuffix != 0) { ++first; } | ||
848 | + | ||
849 | +#if SS_BLOCKSIZE == 0 | ||
850 | + ss_mintrosort(T, PA, first, last, depth); | ||
851 | +#else | ||
852 | + if((bufsize < SS_BLOCKSIZE) && | ||
853 | + (bufsize < (last - first)) && | ||
854 | + (bufsize < (limit = ss_isqrt(last - first)))) { | ||
855 | + if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } | ||
856 | + buf = middle = last - limit, bufsize = limit; | ||
857 | + } else { | ||
858 | + middle = last, limit = 0; | ||
859 | + } | ||
860 | + for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { | ||
861 | +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE | ||
862 | + ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); | ||
863 | +#elif 1 < SS_BLOCKSIZE | ||
864 | + ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); | ||
865 | +#endif | ||
866 | + curbufsize = last - (a + SS_BLOCKSIZE); | ||
867 | + curbuf = a + SS_BLOCKSIZE; | ||
868 | + if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } | ||
869 | + for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { | ||
870 | + ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); | ||
871 | + } | ||
872 | + } | ||
873 | +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE | ||
874 | + ss_mintrosort(T, PA, a, middle, depth); | ||
875 | +#elif 1 < SS_BLOCKSIZE | ||
876 | + ss_insertionsort(T, PA, a, middle, depth); | ||
877 | +#endif | ||
878 | + for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { | ||
879 | + if(i & 1) { | ||
880 | + ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); | ||
881 | + a -= k; | ||
882 | + } | ||
883 | + } | ||
884 | + if(limit != 0) { | ||
885 | +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE | ||
886 | + ss_mintrosort(T, PA, middle, last, depth); | ||
887 | +#elif 1 < SS_BLOCKSIZE | ||
888 | + ss_insertionsort(T, PA, middle, last, depth); | ||
889 | +#endif | ||
890 | + ss_inplacemerge(T, PA, first, middle, last, depth); | ||
891 | + } | ||
892 | +#endif | ||
893 | + | ||
894 | + if(lastsuffix != 0) { | ||
895 | + /* Insert last type B* suffix. */ | ||
896 | + int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; | ||
897 | + for(a = first, i = *(first - 1); | ||
898 | + (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); | ||
899 | + ++a) { | ||
900 | + *(a - 1) = *a; | ||
901 | + } | ||
902 | + *(a - 1) = i; | ||
903 | + } | ||
904 | +} | ||
905 | + | ||
906 | + | ||
907 | +/*---------------------------------------------------------------------------*/ | ||
908 | + | ||
909 | +static INLINE | ||
910 | +int | ||
911 | +tr_ilg(int n) { | ||
912 | + return (n & 0xffff0000) ? | ||
913 | + ((n & 0xff000000) ? | ||
914 | + 24 + lg_table[(n >> 24) & 0xff] : | ||
915 | + 16 + lg_table[(n >> 16) & 0xff]) : | ||
916 | + ((n & 0x0000ff00) ? | ||
917 | + 8 + lg_table[(n >> 8) & 0xff] : | ||
918 | + 0 + lg_table[(n >> 0) & 0xff]); | ||
919 | +} | ||
920 | + | ||
921 | + | ||
922 | +/*---------------------------------------------------------------------------*/ | ||
923 | + | ||
924 | +/* Simple insertionsort for small size groups. */ | ||
925 | +static | ||
926 | +void | ||
927 | +tr_insertionsort(const int *ISAd, int *first, int *last) { | ||
928 | + int *a, *b; | ||
929 | + int t, r; | ||
930 | + | ||
931 | + for(a = first + 1; a < last; ++a) { | ||
932 | + for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { | ||
933 | + do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); | ||
934 | + if(b < first) { break; } | ||
935 | + } | ||
936 | + if(r == 0) { *b = ~*b; } | ||
937 | + *(b + 1) = t; | ||
938 | + } | ||
939 | +} | ||
940 | + | ||
941 | + | ||
942 | +/*---------------------------------------------------------------------------*/ | ||
943 | + | ||
944 | +static INLINE | ||
945 | +void | ||
946 | +tr_fixdown(const int *ISAd, int *SA, int i, int size) { | ||
947 | + int j, k; | ||
948 | + int v; | ||
949 | + int c, d, e; | ||
950 | + | ||
951 | + for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { | ||
952 | + d = ISAd[SA[k = j++]]; | ||
953 | + if(d < (e = ISAd[SA[j]])) { k = j; d = e; } | ||
954 | + if(d <= c) { break; } | ||
955 | + } | ||
956 | + SA[i] = v; | ||
957 | +} | ||
958 | + | ||
959 | +/* Simple top-down heapsort. */ | ||
960 | +static | ||
961 | +void | ||
962 | +tr_heapsort(const int *ISAd, int *SA, int size) { | ||
963 | + int i, m; | ||
964 | + int t; | ||
965 | + | ||
966 | + m = size; | ||
967 | + if((size % 2) == 0) { | ||
968 | + m--; | ||
969 | + if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } | ||
970 | + } | ||
971 | + | ||
972 | + for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } | ||
973 | + if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } | ||
974 | + for(i = m - 1; 0 < i; --i) { | ||
975 | + t = SA[0], SA[0] = SA[i]; | ||
976 | + tr_fixdown(ISAd, SA, 0, i); | ||
977 | + SA[i] = t; | ||
978 | + } | ||
979 | +} | ||
980 | + | ||
981 | + | ||
982 | +/*---------------------------------------------------------------------------*/ | ||
983 | + | ||
984 | +/* Returns the median of three elements. */ | ||
985 | +static INLINE | ||
986 | +int * | ||
987 | +tr_median3(const int *ISAd, int *v1, int *v2, int *v3) { | ||
988 | + int *t; | ||
989 | + if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } | ||
990 | + if(ISAd[*v2] > ISAd[*v3]) { | ||
991 | + if(ISAd[*v1] > ISAd[*v3]) { return v1; } | ||
992 | + else { return v3; } | ||
993 | + } | ||
994 | + return v2; | ||
995 | +} | ||
996 | + | ||
997 | +/* Returns the median of five elements. */ | ||
998 | +static INLINE | ||
999 | +int * | ||
1000 | +tr_median5(const int *ISAd, | ||
1001 | + int *v1, int *v2, int *v3, int *v4, int *v5) { | ||
1002 | + int *t; | ||
1003 | + if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } | ||
1004 | + if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } | ||
1005 | + if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } | ||
1006 | + if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } | ||
1007 | + if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } | ||
1008 | + if(ISAd[*v3] > ISAd[*v4]) { return v4; } | ||
1009 | + return v3; | ||
1010 | +} | ||
1011 | + | ||
1012 | +/* Returns the pivot element. */ | ||
1013 | +static INLINE | ||
1014 | +int * | ||
1015 | +tr_pivot(const int *ISAd, int *first, int *last) { | ||
1016 | + int *middle; | ||
1017 | + int t; | ||
1018 | + | ||
1019 | + t = last - first; | ||
1020 | + middle = first + t / 2; | ||
1021 | + | ||
1022 | + if(t <= 512) { | ||
1023 | + if(t <= 32) { | ||
1024 | + return tr_median3(ISAd, first, middle, last - 1); | ||
1025 | + } else { | ||
1026 | + t >>= 2; | ||
1027 | + return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); | ||
1028 | + } | ||
1029 | + } | ||
1030 | + t >>= 3; | ||
1031 | + first = tr_median3(ISAd, first, first + t, first + (t << 1)); | ||
1032 | + middle = tr_median3(ISAd, middle - t, middle, middle + t); | ||
1033 | + last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); | ||
1034 | + return tr_median3(ISAd, first, middle, last); | ||
1035 | +} | ||
1036 | + | ||
1037 | + | ||
1038 | +/*---------------------------------------------------------------------------*/ | ||
1039 | + | ||
1040 | +typedef struct _trbudget_t trbudget_t; | ||
1041 | +struct _trbudget_t { | ||
1042 | + int chance; | ||
1043 | + int remain; | ||
1044 | + int incval; | ||
1045 | + int count; | ||
1046 | +}; | ||
1047 | + | ||
1048 | +static INLINE | ||
1049 | +void | ||
1050 | +trbudget_init(trbudget_t *budget, int chance, int incval) { | ||
1051 | + budget->chance = chance; | ||
1052 | + budget->remain = budget->incval = incval; | ||
1053 | +} | ||
1054 | + | ||
1055 | +static INLINE | ||
1056 | +int | ||
1057 | +trbudget_check(trbudget_t *budget, int size) { | ||
1058 | + if(size <= budget->remain) { budget->remain -= size; return 1; } | ||
1059 | + if(budget->chance == 0) { budget->count += size; return 0; } | ||
1060 | + budget->remain += budget->incval - size; | ||
1061 | + budget->chance -= 1; | ||
1062 | + return 1; | ||
1063 | +} | ||
1064 | + | ||
1065 | + | ||
1066 | +/*---------------------------------------------------------------------------*/ | ||
1067 | + | ||
1068 | +static INLINE | ||
1069 | +void | ||
1070 | +tr_partition(const int *ISAd, | ||
1071 | + int *first, int *middle, int *last, | ||
1072 | + int **pa, int **pb, int v) { | ||
1073 | + int *a, *b, *c, *d, *e, *f; | ||
1074 | + int t, s; | ||
1075 | + int x = 0; | ||
1076 | + | ||
1077 | + for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } | ||
1078 | + if(((a = b) < last) && (x < v)) { | ||
1079 | + for(; (++b < last) && ((x = ISAd[*b]) <= v);) { | ||
1080 | + if(x == v) { SWAP(*b, *a); ++a; } | ||
1081 | + } | ||
1082 | + } | ||
1083 | + for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } | ||
1084 | + if((b < (d = c)) && (x > v)) { | ||
1085 | + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { | ||
1086 | + if(x == v) { SWAP(*c, *d); --d; } | ||
1087 | + } | ||
1088 | + } | ||
1089 | + for(; b < c;) { | ||
1090 | + SWAP(*b, *c); | ||
1091 | + for(; (++b < c) && ((x = ISAd[*b]) <= v);) { | ||
1092 | + if(x == v) { SWAP(*b, *a); ++a; } | ||
1093 | + } | ||
1094 | + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { | ||
1095 | + if(x == v) { SWAP(*c, *d); --d; } | ||
1096 | + } | ||
1097 | + } | ||
1098 | + | ||
1099 | + if(a <= d) { | ||
1100 | + c = b - 1; | ||
1101 | + if((s = a - first) > (t = b - a)) { s = t; } | ||
1102 | + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } | ||
1103 | + if((s = d - c) > (t = last - d - 1)) { s = t; } | ||
1104 | + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } | ||
1105 | + first += (b - a), last -= (d - c); | ||
1106 | + } | ||
1107 | + *pa = first, *pb = last; | ||
1108 | +} | ||
1109 | + | ||
1110 | +static | ||
1111 | +void | ||
1112 | +tr_copy(int *ISA, const int *SA, | ||
1113 | + int *first, int *a, int *b, int *last, | ||
1114 | + int depth) { | ||
1115 | + /* sort suffixes of middle partition | ||
1116 | + by using sorted order of suffixes of left and right partition. */ | ||
1117 | + int *c, *d, *e; | ||
1118 | + int s, v; | ||
1119 | + | ||
1120 | + v = b - SA - 1; | ||
1121 | + for(c = first, d = a - 1; c <= d; ++c) { | ||
1122 | + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { | ||
1123 | + *++d = s; | ||
1124 | + ISA[s] = d - SA; | ||
1125 | + } | ||
1126 | + } | ||
1127 | + for(c = last - 1, e = d + 1, d = b; e < d; --c) { | ||
1128 | + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { | ||
1129 | + *--d = s; | ||
1130 | + ISA[s] = d - SA; | ||
1131 | + } | ||
1132 | + } | ||
1133 | +} | ||
1134 | + | ||
1135 | +static | ||
1136 | +void | ||
1137 | +tr_partialcopy(int *ISA, const int *SA, | ||
1138 | + int *first, int *a, int *b, int *last, | ||
1139 | + int depth) { | ||
1140 | + int *c, *d, *e; | ||
1141 | + int s, v; | ||
1142 | + int rank, lastrank, newrank = -1; | ||
1143 | + | ||
1144 | + v = b - SA - 1; | ||
1145 | + lastrank = -1; | ||
1146 | + for(c = first, d = a - 1; c <= d; ++c) { | ||
1147 | + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { | ||
1148 | + *++d = s; | ||
1149 | + rank = ISA[s + depth]; | ||
1150 | + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } | ||
1151 | + ISA[s] = newrank; | ||
1152 | + } | ||
1153 | + } | ||
1154 | + | ||
1155 | + lastrank = -1; | ||
1156 | + for(e = d; first <= e; --e) { | ||
1157 | + rank = ISA[*e]; | ||
1158 | + if(lastrank != rank) { lastrank = rank; newrank = e - SA; } | ||
1159 | + if(newrank != rank) { ISA[*e] = newrank; } | ||
1160 | + } | ||
1161 | + | ||
1162 | + lastrank = -1; | ||
1163 | + for(c = last - 1, e = d + 1, d = b; e < d; --c) { | ||
1164 | + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { | ||
1165 | + *--d = s; | ||
1166 | + rank = ISA[s + depth]; | ||
1167 | + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } | ||
1168 | + ISA[s] = newrank; | ||
1169 | + } | ||
1170 | + } | ||
1171 | +} | ||
1172 | + | ||
1173 | +static | ||
1174 | +void | ||
1175 | +tr_introsort(int *ISA, const int *ISAd, | ||
1176 | + int *SA, int *first, int *last, | ||
1177 | + trbudget_t *budget) { | ||
1178 | +#define STACK_SIZE TR_STACKSIZE | ||
1179 | + struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE]; | ||
1180 | + int *a, *b, *c; | ||
1181 | + int t; | ||
1182 | + int v, x = 0; | ||
1183 | + int incr = ISAd - ISA; | ||
1184 | + int limit, next; | ||
1185 | + int ssize, trlink = -1; | ||
1186 | + | ||
1187 | + for(ssize = 0, limit = tr_ilg(last - first);;) { | ||
1188 | + | ||
1189 | + if(limit < 0) { | ||
1190 | + if(limit == -1) { | ||
1191 | + /* tandem repeat partition */ | ||
1192 | + tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); | ||
1193 | + | ||
1194 | + /* update ranks */ | ||
1195 | + if(a < last) { | ||
1196 | + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } | ||
1197 | + } | ||
1198 | + if(b < last) { | ||
1199 | + for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } | ||
1200 | + } | ||
1201 | + | ||
1202 | + /* push */ | ||
1203 | + if(1 < (b - a)) { | ||
1204 | + STACK_PUSH5(NULL, a, b, 0, 0); | ||
1205 | + STACK_PUSH5(ISAd - incr, first, last, -2, trlink); | ||
1206 | + trlink = ssize - 2; | ||
1207 | + } | ||
1208 | + if((a - first) <= (last - b)) { | ||
1209 | + if(1 < (a - first)) { | ||
1210 | + STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); | ||
1211 | + last = a, limit = tr_ilg(a - first); | ||
1212 | + } else if(1 < (last - b)) { | ||
1213 | + first = b, limit = tr_ilg(last - b); | ||
1214 | + } else { | ||
1215 | + STACK_POP5(ISAd, first, last, limit, trlink); | ||
1216 | + } | ||
1217 | + } else { | ||
1218 | + if(1 < (last - b)) { | ||
1219 | + STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); | ||
1220 | + first = b, limit = tr_ilg(last - b); | ||
1221 | + } else if(1 < (a - first)) { | ||
1222 | + last = a, limit = tr_ilg(a - first); | ||
1223 | + } else { | ||
1224 | + STACK_POP5(ISAd, first, last, limit, trlink); | ||
1225 | + } | ||
1226 | + } | ||
1227 | + } else if(limit == -2) { | ||
1228 | + /* tandem repeat copy */ | ||
1229 | + a = stack[--ssize].b, b = stack[ssize].c; | ||
1230 | + if(stack[ssize].d == 0) { | ||
1231 | + tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); | ||
1232 | + } else { | ||
1233 | + if(0 <= trlink) { stack[trlink].d = -1; } | ||
1234 | + tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); | ||
1235 | + } | ||
1236 | + STACK_POP5(ISAd, first, last, limit, trlink); | ||
1237 | + } else { | ||
1238 | + /* sorted partition */ | ||
1239 | + if(0 <= *first) { | ||
1240 | + a = first; | ||
1241 | + do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); | ||
1242 | + first = a; | ||
1243 | + } | ||
1244 | + if(first < last) { | ||
1245 | + a = first; do { *a = ~*a; } while(*++a < 0); | ||
1246 | + next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; | ||
1247 | + if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } | ||
1248 | + | ||
1249 | + /* push */ | ||
1250 | + if(trbudget_check(budget, a - first)) { | ||
1251 | + if((a - first) <= (last - a)) { | ||
1252 | + STACK_PUSH5(ISAd, a, last, -3, trlink); | ||
1253 | + ISAd += incr, last = a, limit = next; | ||
1254 | + } else { | ||
1255 | + if(1 < (last - a)) { | ||
1256 | + STACK_PUSH5(ISAd + incr, first, a, next, trlink); | ||
1257 | + first = a, limit = -3; | ||
1258 | + } else { | ||
1259 | + ISAd += incr, last = a, limit = next; | ||
1260 | + } | ||
1261 | + } | ||
1262 | + } else { | ||
1263 | + if(0 <= trlink) { stack[trlink].d = -1; } | ||
1264 | + if(1 < (last - a)) { | ||
1265 | + first = a, limit = -3; | ||
1266 | + } else { | ||
1267 | + STACK_POP5(ISAd, first, last, limit, trlink); | ||
1268 | + } | ||
1269 | + } | ||
1270 | + } else { | ||
1271 | + STACK_POP5(ISAd, first, last, limit, trlink); | ||
1272 | + } | ||
1273 | + } | ||
1274 | + continue; | ||
1275 | + } | ||
1276 | + | ||
1277 | + if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { | ||
1278 | + tr_insertionsort(ISAd, first, last); | ||
1279 | + limit = -3; | ||
1280 | + continue; | ||
1281 | + } | ||
1282 | + | ||
1283 | + if(limit-- == 0) { | ||
1284 | + tr_heapsort(ISAd, first, last - first); | ||
1285 | + for(a = last - 1; first < a; a = b) { | ||
1286 | + for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } | ||
1287 | + } | ||
1288 | + limit = -3; | ||
1289 | + continue; | ||
1290 | + } | ||
1291 | + | ||
1292 | + /* choose pivot */ | ||
1293 | + a = tr_pivot(ISAd, first, last); | ||
1294 | + SWAP(*first, *a); | ||
1295 | + v = ISAd[*first]; | ||
1296 | + | ||
1297 | + /* partition */ | ||
1298 | + tr_partition(ISAd, first, first + 1, last, &a, &b, v); | ||
1299 | + if((last - first) != (b - a)) { | ||
1300 | + next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; | ||
1301 | + | ||
1302 | + /* update ranks */ | ||
1303 | + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } | ||
1304 | + if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } | ||
1305 | + | ||
1306 | + /* push */ | ||
1307 | + if((1 < (b - a)) && (trbudget_check(budget, b - a))) { | ||
1308 | + if((a - first) <= (last - b)) { | ||
1309 | + if((last - b) <= (b - a)) { | ||
1310 | + if(1 < (a - first)) { | ||
1311 | + STACK_PUSH5(ISAd + incr, a, b, next, trlink); | ||
1312 | + STACK_PUSH5(ISAd, b, last, limit, trlink); | ||
1313 | + last = a; | ||
1314 | + } else if(1 < (last - b)) { | ||
1315 | + STACK_PUSH5(ISAd + incr, a, b, next, trlink); | ||
1316 | + first = b; | ||
1317 | + } else { | ||
1318 | + ISAd += incr, first = a, last = b, limit = next; | ||
1319 | + } | ||
1320 | + } else if((a - first) <= (b - a)) { | ||
1321 | + if(1 < (a - first)) { | ||
1322 | + STACK_PUSH5(ISAd, b, last, limit, trlink); | ||
1323 | + STACK_PUSH5(ISAd + incr, a, b, next, trlink); | ||
1324 | + last = a; | ||
1325 | + } else { | ||
1326 | + STACK_PUSH5(ISAd, b, last, limit, trlink); | ||
1327 | + ISAd += incr, first = a, last = b, limit = next; | ||
1328 | + } | ||
1329 | + } else { | ||
1330 | + STACK_PUSH5(ISAd, b, last, limit, trlink); | ||
1331 | + STACK_PUSH5(ISAd, first, a, limit, trlink); | ||
1332 | + ISAd += incr, first = a, last = b, limit = next; | ||
1333 | + } | ||
1334 | + } else { | ||
1335 | + if((a - first) <= (b - a)) { | ||
1336 | + if(1 < (last - b)) { | ||
1337 | + STACK_PUSH5(ISAd + incr, a, b, next, trlink); | ||
1338 | + STACK_PUSH5(ISAd, first, a, limit, trlink); | ||
1339 | + first = b; | ||
1340 | + } else if(1 < (a - first)) { | ||
1341 | + STACK_PUSH5(ISAd + incr, a, b, next, trlink); | ||
1342 | + last = a; | ||
1343 | + } else { | ||
1344 | + ISAd += incr, first = a, last = b, limit = next; | ||
1345 | + } | ||
1346 | + } else if((last - b) <= (b - a)) { | ||
1347 | + if(1 < (last - b)) { | ||
1348 | + STACK_PUSH5(ISAd, first, a, limit, trlink); | ||
1349 | + STACK_PUSH5(ISAd + incr, a, b, next, trlink); | ||
1350 | + first = b; | ||
1351 | + } else { | ||
1352 | + STACK_PUSH5(ISAd, first, a, limit, trlink); | ||
1353 | + ISAd += incr, first = a, last = b, limit = next; | ||
1354 | + } | ||
1355 | + } else { | ||
1356 | + STACK_PUSH5(ISAd, first, a, limit, trlink); | ||
1357 | + STACK_PUSH5(ISAd, b, last, limit, trlink); | ||
1358 | + ISAd += incr, first = a, last = b, limit = next; | ||
1359 | + } | ||
1360 | + } | ||
1361 | + } else { | ||
1362 | + if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } | ||
1363 | + if((a - first) <= (last - b)) { | ||
1364 | + if(1 < (a - first)) { | ||
1365 | + STACK_PUSH5(ISAd, b, last, limit, trlink); | ||
1366 | + last = a; | ||
1367 | + } else if(1 < (last - b)) { | ||
1368 | + first = b; | ||
1369 | + } else { | ||
1370 | + STACK_POP5(ISAd, first, last, limit, trlink); | ||
1371 | + } | ||
1372 | + } else { | ||
1373 | + if(1 < (last - b)) { | ||
1374 | + STACK_PUSH5(ISAd, first, a, limit, trlink); | ||
1375 | + first = b; | ||
1376 | + } else if(1 < (a - first)) { | ||
1377 | + last = a; | ||
1378 | + } else { | ||
1379 | + STACK_POP5(ISAd, first, last, limit, trlink); | ||
1380 | + } | ||
1381 | + } | ||
1382 | + } | ||
1383 | + } else { | ||
1384 | + if(trbudget_check(budget, last - first)) { | ||
1385 | + limit = tr_ilg(last - first), ISAd += incr; | ||
1386 | + } else { | ||
1387 | + if(0 <= trlink) { stack[trlink].d = -1; } | ||
1388 | + STACK_POP5(ISAd, first, last, limit, trlink); | ||
1389 | + } | ||
1390 | + } | ||
1391 | + } | ||
1392 | +#undef STACK_SIZE | ||
1393 | +} | ||
1394 | + | ||
1395 | + | ||
1396 | + | ||
1397 | +/*---------------------------------------------------------------------------*/ | ||
1398 | + | ||
1399 | +/* Tandem repeat sort */ | ||
1400 | +static | ||
1401 | +void | ||
1402 | +trsort(int *ISA, int *SA, int n, int depth) { | ||
1403 | + int *ISAd; | ||
1404 | + int *first, *last; | ||
1405 | + trbudget_t budget; | ||
1406 | + int t, skip, unsorted; | ||
1407 | + | ||
1408 | + trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); | ||
1409 | +/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ | ||
1410 | + for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { | ||
1411 | + first = SA; | ||
1412 | + skip = 0; | ||
1413 | + unsorted = 0; | ||
1414 | + do { | ||
1415 | + if((t = *first) < 0) { first -= t; skip += t; } | ||
1416 | + else { | ||
1417 | + if(skip != 0) { *(first + skip) = skip; skip = 0; } | ||
1418 | + last = SA + ISA[t] + 1; | ||
1419 | + if(1 < (last - first)) { | ||
1420 | + budget.count = 0; | ||
1421 | + tr_introsort(ISA, ISAd, SA, first, last, &budget); | ||
1422 | + if(budget.count != 0) { unsorted += budget.count; } | ||
1423 | + else { skip = first - last; } | ||
1424 | + } else if((last - first) == 1) { | ||
1425 | + skip = -1; | ||
1426 | + } | ||
1427 | + first = last; | ||
1428 | + } | ||
1429 | + } while(first < (SA + n)); | ||
1430 | + if(skip != 0) { *(first + skip) = skip; } | ||
1431 | + if(unsorted == 0) { break; } | ||
1432 | + } | ||
1433 | +} | ||
1434 | + | ||
1435 | + | ||
1436 | +/*---------------------------------------------------------------------------*/ | ||
1437 | + | ||
1438 | +/* Sorts suffixes of type B*. */ | ||
1439 | +static | ||
1440 | +int | ||
1441 | +sort_typeBstar(const unsigned char *T, int *SA, | ||
1442 | + int *bucket_A, int *bucket_B, | ||
1443 | + int n, int openMP) { | ||
1444 | + int *PAb, *ISAb, *buf; | ||
1445 | +#ifdef LIBBSC_OPENMP | ||
1446 | + int *curbuf; | ||
1447 | + int l; | ||
1448 | +#endif | ||
1449 | + int i, j, k, t, m, bufsize; | ||
1450 | + int c0, c1; | ||
1451 | +#ifdef LIBBSC_OPENMP | ||
1452 | + int d0, d1; | ||
1453 | +#endif | ||
1454 | + (void)openMP; | ||
1455 | + | ||
1456 | + /* Initialize bucket arrays. */ | ||
1457 | + for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } | ||
1458 | + for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } | ||
1459 | + | ||
1460 | + /* Count the number of occurrences of the first one or two characters of each | ||
1461 | + type A, B and B* suffix. Moreover, store the beginning position of all | ||
1462 | + type B* suffixes into the array SA. */ | ||
1463 | + for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { | ||
1464 | + /* type A suffix. */ | ||
1465 | + do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); | ||
1466 | + if(0 <= i) { | ||
1467 | + /* type B* suffix. */ | ||
1468 | + ++BUCKET_BSTAR(c0, c1); | ||
1469 | + SA[--m] = i; | ||
1470 | + /* type B suffix. */ | ||
1471 | + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { | ||
1472 | + ++BUCKET_B(c0, c1); | ||
1473 | + } | ||
1474 | + } | ||
1475 | + } | ||
1476 | + m = n - m; | ||
1477 | +/* | ||
1478 | +note: | ||
1479 | + A type B* suffix is lexicographically smaller than a type B suffix that | ||
1480 | + begins with the same first two characters. | ||
1481 | +*/ | ||
1482 | + | ||
1483 | + /* Calculate the index of start/end point of each bucket. */ | ||
1484 | + for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { | ||
1485 | + t = i + BUCKET_A(c0); | ||
1486 | + BUCKET_A(c0) = i + j; /* start point */ | ||
1487 | + i = t + BUCKET_B(c0, c0); | ||
1488 | + for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { | ||
1489 | + j += BUCKET_BSTAR(c0, c1); | ||
1490 | + BUCKET_BSTAR(c0, c1) = j; /* end point */ | ||
1491 | + i += BUCKET_B(c0, c1); | ||
1492 | + } | ||
1493 | + } | ||
1494 | + | ||
1495 | + if(0 < m) { | ||
1496 | + /* Sort the type B* suffixes by their first two characters. */ | ||
1497 | + PAb = SA + n - m; ISAb = SA + m; | ||
1498 | + for(i = m - 2; 0 <= i; --i) { | ||
1499 | + t = PAb[i], c0 = T[t], c1 = T[t + 1]; | ||
1500 | + SA[--BUCKET_BSTAR(c0, c1)] = i; | ||
1501 | + } | ||
1502 | + t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; | ||
1503 | + SA[--BUCKET_BSTAR(c0, c1)] = m - 1; | ||
1504 | + | ||
1505 | + /* Sort the type B* substrings using sssort. */ | ||
1506 | +#ifdef LIBBSC_OPENMP | ||
1507 | + if (openMP) | ||
1508 | + { | ||
1509 | + buf = SA + m; | ||
1510 | + c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; | ||
1511 | +#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1) | ||
1512 | + { | ||
1513 | + bufsize = (n - (2 * m)) / omp_get_num_threads(); | ||
1514 | + curbuf = buf + omp_get_thread_num() * bufsize; | ||
1515 | + k = 0; | ||
1516 | + for(;;) { | ||
1517 | + #pragma omp critical(sssort_lock) | ||
1518 | + { | ||
1519 | + if(0 < (l = j)) { | ||
1520 | + d0 = c0, d1 = c1; | ||
1521 | + do { | ||
1522 | + k = BUCKET_BSTAR(d0, d1); | ||
1523 | + if(--d1 <= d0) { | ||
1524 | + d1 = ALPHABET_SIZE - 1; | ||
1525 | + if(--d0 < 0) { break; } | ||
1526 | + } | ||
1527 | + } while(((l - k) <= 1) && (0 < (l = k))); | ||
1528 | + c0 = d0, c1 = d1, j = k; | ||
1529 | + } | ||
1530 | + } | ||
1531 | + if(l == 0) { break; } | ||
1532 | + sssort(T, PAb, SA + k, SA + l, | ||
1533 | + curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); | ||
1534 | + } | ||
1535 | + } | ||
1536 | + } | ||
1537 | + else | ||
1538 | + { | ||
1539 | + buf = SA + m, bufsize = n - (2 * m); | ||
1540 | + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { | ||
1541 | + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { | ||
1542 | + i = BUCKET_BSTAR(c0, c1); | ||
1543 | + if(1 < (j - i)) { | ||
1544 | + sssort(T, PAb, SA + i, SA + j, | ||
1545 | + buf, bufsize, 2, n, *(SA + i) == (m - 1)); | ||
1546 | + } | ||
1547 | + } | ||
1548 | + } | ||
1549 | + } | ||
1550 | +#else | ||
1551 | + buf = SA + m, bufsize = n - (2 * m); | ||
1552 | + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { | ||
1553 | + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { | ||
1554 | + i = BUCKET_BSTAR(c0, c1); | ||
1555 | + if(1 < (j - i)) { | ||
1556 | + sssort(T, PAb, SA + i, SA + j, | ||
1557 | + buf, bufsize, 2, n, *(SA + i) == (m - 1)); | ||
1558 | + } | ||
1559 | + } | ||
1560 | + } | ||
1561 | +#endif | ||
1562 | + | ||
1563 | + /* Compute ranks of type B* substrings. */ | ||
1564 | + for(i = m - 1; 0 <= i; --i) { | ||
1565 | + if(0 <= SA[i]) { | ||
1566 | + j = i; | ||
1567 | + do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); | ||
1568 | + SA[i + 1] = i - j; | ||
1569 | + if(i <= 0) { break; } | ||
1570 | + } | ||
1571 | + j = i; | ||
1572 | + do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); | ||
1573 | + ISAb[SA[i]] = j; | ||
1574 | + } | ||
1575 | + | ||
1576 | + /* Construct the inverse suffix array of type B* suffixes using trsort. */ | ||
1577 | + trsort(ISAb, SA, m, 1); | ||
1578 | + | ||
1579 | + /* Set the sorted order of tyoe B* suffixes. */ | ||
1580 | + for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { | ||
1581 | + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } | ||
1582 | + if(0 <= i) { | ||
1583 | + t = i; | ||
1584 | + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } | ||
1585 | + SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; | ||
1586 | + } | ||
1587 | + } | ||
1588 | + | ||
1589 | + /* Calculate the index of start/end point of each bucket. */ | ||
1590 | + BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ | ||
1591 | + for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { | ||
1592 | + i = BUCKET_A(c0 + 1) - 1; | ||
1593 | + for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { | ||
1594 | + t = i - BUCKET_B(c0, c1); | ||
1595 | + BUCKET_B(c0, c1) = i; /* end point */ | ||
1596 | + | ||
1597 | + /* Move all type B* suffixes to the correct position. */ | ||
1598 | + for(i = t, j = BUCKET_BSTAR(c0, c1); | ||
1599 | + j <= k; | ||
1600 | + --i, --k) { SA[i] = SA[k]; } | ||
1601 | + } | ||
1602 | + BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ | ||
1603 | + BUCKET_B(c0, c0) = i; /* end point */ | ||
1604 | + } | ||
1605 | + } | ||
1606 | + | ||
1607 | + return m; | ||
1608 | +} | ||
1609 | + | ||
1610 | +/* Constructs the suffix array by using the sorted order of type B* suffixes. */ | ||
1611 | +static | ||
1612 | +void | ||
1613 | +construct_SA(const unsigned char *T, int *SA, | ||
1614 | + int *bucket_A, int *bucket_B, | ||
1615 | + int n, int m) { | ||
1616 | + int *i, *j, *k; | ||
1617 | + int s; | ||
1618 | + int c0, c1, c2; | ||
1619 | + | ||
1620 | + if(0 < m) { | ||
1621 | + /* Construct the sorted order of type B suffixes by using | ||
1622 | + the sorted order of type B* suffixes. */ | ||
1623 | + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { | ||
1624 | + /* Scan the suffix array from right to left. */ | ||
1625 | + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), | ||
1626 | + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; | ||
1627 | + i <= j; | ||
1628 | + --j) { | ||
1629 | + if(0 < (s = *j)) { | ||
1630 | + assert(T[s] == c1); | ||
1631 | + assert(((s + 1) < n) && (T[s] <= T[s + 1])); | ||
1632 | + assert(T[s - 1] <= T[s]); | ||
1633 | + *j = ~s; | ||
1634 | + c0 = T[--s]; | ||
1635 | + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } | ||
1636 | + if(c0 != c2) { | ||
1637 | + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } | ||
1638 | + k = SA + BUCKET_B(c2 = c0, c1); | ||
1639 | + } | ||
1640 | + assert(k < j); assert(k != NULL); | ||
1641 | + *k-- = s; | ||
1642 | + } else { | ||
1643 | + assert(((s == 0) && (T[s] == c1)) || (s < 0)); | ||
1644 | + *j = ~s; | ||
1645 | + } | ||
1646 | + } | ||
1647 | + } | ||
1648 | + } | ||
1649 | + | ||
1650 | + /* Construct the suffix array by using | ||
1651 | + the sorted order of type B suffixes. */ | ||
1652 | + k = SA + BUCKET_A(c2 = T[n - 1]); | ||
1653 | + *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); | ||
1654 | + /* Scan the suffix array from left to right. */ | ||
1655 | + for(i = SA, j = SA + n; i < j; ++i) { | ||
1656 | + if(0 < (s = *i)) { | ||
1657 | + assert(T[s - 1] >= T[s]); | ||
1658 | + c0 = T[--s]; | ||
1659 | + if((s == 0) || (T[s - 1] < c0)) { s = ~s; } | ||
1660 | + if(c0 != c2) { | ||
1661 | + BUCKET_A(c2) = k - SA; | ||
1662 | + k = SA + BUCKET_A(c2 = c0); | ||
1663 | + } | ||
1664 | + assert(i < k); | ||
1665 | + *k++ = s; | ||
1666 | + } else { | ||
1667 | + assert(s < 0); | ||
1668 | + *i = ~s; | ||
1669 | + } | ||
1670 | + } | ||
1671 | +} | ||
1672 | + | ||
1673 | +/* Constructs the burrows-wheeler transformed string directly | ||
1674 | + by using the sorted order of type B* suffixes. */ | ||
1675 | +static | ||
1676 | +int | ||
1677 | +construct_BWT(const unsigned char *T, int *SA, | ||
1678 | + int *bucket_A, int *bucket_B, | ||
1679 | + int n, int m) { | ||
1680 | + int *i, *j, *k, *orig; | ||
1681 | + int s; | ||
1682 | + int c0, c1, c2; | ||
1683 | + | ||
1684 | + if(0 < m) { | ||
1685 | + /* Construct the sorted order of type B suffixes by using | ||
1686 | + the sorted order of type B* suffixes. */ | ||
1687 | + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { | ||
1688 | + /* Scan the suffix array from right to left. */ | ||
1689 | + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), | ||
1690 | + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; | ||
1691 | + i <= j; | ||
1692 | + --j) { | ||
1693 | + if(0 < (s = *j)) { | ||
1694 | + assert(T[s] == c1); | ||
1695 | + assert(((s + 1) < n) && (T[s] <= T[s + 1])); | ||
1696 | + assert(T[s - 1] <= T[s]); | ||
1697 | + c0 = T[--s]; | ||
1698 | + *j = ~((int)c0); | ||
1699 | + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } | ||
1700 | + if(c0 != c2) { | ||
1701 | + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } | ||
1702 | + k = SA + BUCKET_B(c2 = c0, c1); | ||
1703 | + } | ||
1704 | + assert(k < j); assert(k != NULL); | ||
1705 | + *k-- = s; | ||
1706 | + } else if(s != 0) { | ||
1707 | + *j = ~s; | ||
1708 | +#ifndef NDEBUG | ||
1709 | + } else { | ||
1710 | + assert(T[s] == c1); | ||
1711 | +#endif | ||
1712 | + } | ||
1713 | + } | ||
1714 | + } | ||
1715 | + } | ||
1716 | + | ||
1717 | + /* Construct the BWTed string by using | ||
1718 | + the sorted order of type B suffixes. */ | ||
1719 | + k = SA + BUCKET_A(c2 = T[n - 1]); | ||
1720 | + *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1); | ||
1721 | + /* Scan the suffix array from left to right. */ | ||
1722 | + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { | ||
1723 | + if(0 < (s = *i)) { | ||
1724 | + assert(T[s - 1] >= T[s]); | ||
1725 | + c0 = T[--s]; | ||
1726 | + *i = c0; | ||
1727 | + if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); } | ||
1728 | + if(c0 != c2) { | ||
1729 | + BUCKET_A(c2) = k - SA; | ||
1730 | + k = SA + BUCKET_A(c2 = c0); | ||
1731 | + } | ||
1732 | + assert(i < k); | ||
1733 | + *k++ = s; | ||
1734 | + } else if(s != 0) { | ||
1735 | + *i = ~s; | ||
1736 | + } else { | ||
1737 | + orig = i; | ||
1738 | + } | ||
1739 | + } | ||
1740 | + | ||
1741 | + return orig - SA; | ||
1742 | +} | ||
1743 | + | ||
1744 | +/* Constructs the burrows-wheeler transformed string directly | ||
1745 | + by using the sorted order of type B* suffixes. */ | ||
1746 | +static | ||
1747 | +int | ||
1748 | +construct_BWT_indexes(const unsigned char *T, int *SA, | ||
1749 | + int *bucket_A, int *bucket_B, | ||
1750 | + int n, int m, | ||
1751 | + unsigned char * num_indexes, int * indexes) { | ||
1752 | + int *i, *j, *k, *orig; | ||
1753 | + int s; | ||
1754 | + int c0, c1, c2; | ||
1755 | + | ||
1756 | + int mod = n / 8; | ||
1757 | + { | ||
1758 | + mod |= mod >> 1; mod |= mod >> 2; | ||
1759 | + mod |= mod >> 4; mod |= mod >> 8; | ||
1760 | + mod |= mod >> 16; mod >>= 1; | ||
1761 | + | ||
1762 | + *num_indexes = (unsigned char)((n - 1) / (mod + 1)); | ||
1763 | + } | ||
1764 | + | ||
1765 | + if(0 < m) { | ||
1766 | + /* Construct the sorted order of type B suffixes by using | ||
1767 | + the sorted order of type B* suffixes. */ | ||
1768 | + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { | ||
1769 | + /* Scan the suffix array from right to left. */ | ||
1770 | + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), | ||
1771 | + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; | ||
1772 | + i <= j; | ||
1773 | + --j) { | ||
1774 | + if(0 < (s = *j)) { | ||
1775 | + assert(T[s] == c1); | ||
1776 | + assert(((s + 1) < n) && (T[s] <= T[s + 1])); | ||
1777 | + assert(T[s - 1] <= T[s]); | ||
1778 | + | ||
1779 | + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA; | ||
1780 | + | ||
1781 | + c0 = T[--s]; | ||
1782 | + *j = ~((int)c0); | ||
1783 | + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } | ||
1784 | + if(c0 != c2) { | ||
1785 | + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } | ||
1786 | + k = SA + BUCKET_B(c2 = c0, c1); | ||
1787 | + } | ||
1788 | + assert(k < j); assert(k != NULL); | ||
1789 | + *k-- = s; | ||
1790 | + } else if(s != 0) { | ||
1791 | + *j = ~s; | ||
1792 | +#ifndef NDEBUG | ||
1793 | + } else { | ||
1794 | + assert(T[s] == c1); | ||
1795 | +#endif | ||
1796 | + } | ||
1797 | + } | ||
1798 | + } | ||
1799 | + } | ||
1800 | + | ||
1801 | + /* Construct the BWTed string by using | ||
1802 | + the sorted order of type B suffixes. */ | ||
1803 | + k = SA + BUCKET_A(c2 = T[n - 1]); | ||
1804 | + if (T[n - 2] < c2) { | ||
1805 | + if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA; | ||
1806 | + *k++ = ~((int)T[n - 2]); | ||
1807 | + } | ||
1808 | + else { | ||
1809 | + *k++ = n - 1; | ||
1810 | + } | ||
1811 | + | ||
1812 | + /* Scan the suffix array from left to right. */ | ||
1813 | + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { | ||
1814 | + if(0 < (s = *i)) { | ||
1815 | + assert(T[s - 1] >= T[s]); | ||
1816 | + | ||
1817 | + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA; | ||
1818 | + | ||
1819 | + c0 = T[--s]; | ||
1820 | + *i = c0; | ||
1821 | + if(c0 != c2) { | ||
1822 | + BUCKET_A(c2) = k - SA; | ||
1823 | + k = SA + BUCKET_A(c2 = c0); | ||
1824 | + } | ||
1825 | + assert(i < k); | ||
1826 | + if((0 < s) && (T[s - 1] < c0)) { | ||
1827 | + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA; | ||
1828 | + *k++ = ~((int)T[s - 1]); | ||
1829 | + } else | ||
1830 | + *k++ = s; | ||
1831 | + } else if(s != 0) { | ||
1832 | + *i = ~s; | ||
1833 | + } else { | ||
1834 | + orig = i; | ||
1835 | + } | ||
1836 | + } | ||
1837 | + | ||
1838 | + return orig - SA; | ||
1839 | +} | ||
1840 | + | ||
1841 | + | ||
1842 | +/*---------------------------------------------------------------------------*/ | ||
1843 | + | ||
1844 | +/*- Function -*/ | ||
1845 | + | ||
1846 | +int | ||
1847 | +divsufsort(const unsigned char *T, int *SA, int n, int openMP) { | ||
1848 | + int *bucket_A, *bucket_B; | ||
1849 | + int m; | ||
1850 | + int err = 0; | ||
1851 | + | ||
1852 | + /* Check arguments. */ | ||
1853 | + if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } | ||
1854 | + else if(n == 0) { return 0; } | ||
1855 | + else if(n == 1) { SA[0] = 0; return 0; } | ||
1856 | + else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } | ||
1857 | + | ||
1858 | + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); | ||
1859 | + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); | ||
1860 | + | ||
1861 | + /* Suffixsort. */ | ||
1862 | + if((bucket_A != NULL) && (bucket_B != NULL)) { | ||
1863 | + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP); | ||
1864 | + construct_SA(T, SA, bucket_A, bucket_B, n, m); | ||
1865 | + } else { | ||
1866 | + err = -2; | ||
1867 | + } | ||
1868 | + | ||
1869 | + free(bucket_B); | ||
1870 | + free(bucket_A); | ||
1871 | + | ||
1872 | + return err; | ||
1873 | +} | ||
1874 | + | ||
1875 | +int | ||
1876 | +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) { | ||
1877 | + int *B; | ||
1878 | + int *bucket_A, *bucket_B; | ||
1879 | + int m, pidx, i; | ||
1880 | + | ||
1881 | + /* Check arguments. */ | ||
1882 | + if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } | ||
1883 | + else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } | ||
1884 | + | ||
1885 | + if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); } | ||
1886 | + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); | ||
1887 | + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); | ||
1888 | + | ||
1889 | + /* Burrows-Wheeler Transform. */ | ||
1890 | + if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { | ||
1891 | + m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP); | ||
1892 | + | ||
1893 | + if (num_indexes == NULL || indexes == NULL) { | ||
1894 | + pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); | ||
1895 | + } else { | ||
1896 | + pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes); | ||
1897 | + } | ||
1898 | + | ||
1899 | + /* Copy to output string. */ | ||
1900 | + U[0] = T[n - 1]; | ||
1901 | + for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; } | ||
1902 | + for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; } | ||
1903 | + pidx += 1; | ||
1904 | + } else { | ||
1905 | + pidx = -2; | ||
1906 | + } | ||
1907 | + | ||
1908 | + free(bucket_B); | ||
1909 | + free(bucket_A); | ||
1910 | + if(A == NULL) { free(B); } | ||
1911 | + | ||
1912 | + return pidx; | ||
1913 | +} |
vendor/github.com/DataDog/zstd/divsufsort.h
0 → 100644
1 | +/* | ||
2 | + * divsufsort.h for libdivsufsort-lite | ||
3 | + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. | ||
4 | + * | ||
5 | + * Permission is hereby granted, free of charge, to any person | ||
6 | + * obtaining a copy of this software and associated documentation | ||
7 | + * files (the "Software"), to deal in the Software without | ||
8 | + * restriction, including without limitation the rights to use, | ||
9 | + * copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
10 | + * copies of the Software, and to permit persons to whom the | ||
11 | + * Software is furnished to do so, subject to the following | ||
12 | + * conditions: | ||
13 | + * | ||
14 | + * The above copyright notice and this permission notice shall be | ||
15 | + * included in all copies or substantial portions of the Software. | ||
16 | + * | ||
17 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
18 | + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | ||
19 | + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
20 | + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | ||
21 | + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | ||
22 | + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
23 | + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||
24 | + * OTHER DEALINGS IN THE SOFTWARE. | ||
25 | + */ | ||
26 | + | ||
27 | +#ifndef _DIVSUFSORT_H | ||
28 | +#define _DIVSUFSORT_H 1 | ||
29 | + | ||
30 | +#ifdef __cplusplus | ||
31 | +extern "C" { | ||
32 | +#endif /* __cplusplus */ | ||
33 | + | ||
34 | + | ||
35 | +/*- Prototypes -*/ | ||
36 | + | ||
37 | +/** | ||
38 | + * Constructs the suffix array of a given string. | ||
39 | + * @param T [0..n-1] The input string. | ||
40 | + * @param SA [0..n-1] The output array of suffixes. | ||
41 | + * @param n The length of the given string. | ||
42 | + * @param openMP enables OpenMP optimization. | ||
43 | + * @return 0 if no error occurred, -1 or -2 otherwise. | ||
44 | + */ | ||
45 | +int | ||
46 | +divsufsort(const unsigned char *T, int *SA, int n, int openMP); | ||
47 | + | ||
48 | +/** | ||
49 | + * Constructs the burrows-wheeler transformed string of a given string. | ||
50 | + * @param T [0..n-1] The input string. | ||
51 | + * @param U [0..n-1] The output string. (can be T) | ||
52 | + * @param A [0..n-1] The temporary array. (can be NULL) | ||
53 | + * @param n The length of the given string. | ||
54 | + * @param num_indexes The length of secondary indexes array. (can be NULL) | ||
55 | + * @param indexes The secondary indexes array. (can be NULL) | ||
56 | + * @param openMP enables OpenMP optimization. | ||
57 | + * @return The primary index if no error occurred, -1 or -2 otherwise. | ||
58 | + */ | ||
59 | +int | ||
60 | +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP); | ||
61 | + | ||
62 | + | ||
63 | +#ifdef __cplusplus | ||
64 | +} /* extern "C" */ | ||
65 | +#endif /* __cplusplus */ | ||
66 | + | ||
67 | +#endif /* _DIVSUFSORT_H */ |
1 | +/* | ||
2 | + Common functions of New Generation Entropy library | ||
3 | + Copyright (C) 2016, Yann Collet. | ||
4 | + | ||
5 | + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | ||
6 | + | ||
7 | + Redistribution and use in source and binary forms, with or without | ||
8 | + modification, are permitted provided that the following conditions are | ||
9 | + met: | ||
10 | + | ||
11 | + * Redistributions of source code must retain the above copyright | ||
12 | + notice, this list of conditions and the following disclaimer. | ||
13 | + * Redistributions in binary form must reproduce the above | ||
14 | + copyright notice, this list of conditions and the following disclaimer | ||
15 | + in the documentation and/or other materials provided with the | ||
16 | + distribution. | ||
17 | + | ||
18 | + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
19 | + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
20 | + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
21 | + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
22 | + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
23 | + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
24 | + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
25 | + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
26 | + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
27 | + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
28 | + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
29 | + | ||
30 | + You can contact the author at : | ||
31 | + - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy | ||
32 | + - Public forum : https://groups.google.com/forum/#!forum/lz4c | ||
33 | +*************************************************************************** */ | ||
34 | + | ||
35 | +/* ************************************* | ||
36 | +* Dependencies | ||
37 | +***************************************/ | ||
38 | +#include "mem.h" | ||
39 | +#include "error_private.h" /* ERR_*, ERROR */ | ||
40 | +#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ | ||
41 | +#include "fse.h" | ||
42 | +#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ | ||
43 | +#include "huf.h" | ||
44 | + | ||
45 | + | ||
46 | +/*=== Version ===*/ | ||
47 | +unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } | ||
48 | + | ||
49 | + | ||
50 | +/*=== Error Management ===*/ | ||
51 | +unsigned FSE_isError(size_t code) { return ERR_isError(code); } | ||
52 | +const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } | ||
53 | + | ||
54 | +unsigned HUF_isError(size_t code) { return ERR_isError(code); } | ||
55 | +const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } | ||
56 | + | ||
57 | + | ||
58 | +/*-************************************************************** | ||
59 | +* FSE NCount encoding-decoding | ||
60 | +****************************************************************/ | ||
61 | +size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, | ||
62 | + const void* headerBuffer, size_t hbSize) | ||
63 | +{ | ||
64 | + const BYTE* const istart = (const BYTE*) headerBuffer; | ||
65 | + const BYTE* const iend = istart + hbSize; | ||
66 | + const BYTE* ip = istart; | ||
67 | + int nbBits; | ||
68 | + int remaining; | ||
69 | + int threshold; | ||
70 | + U32 bitStream; | ||
71 | + int bitCount; | ||
72 | + unsigned charnum = 0; | ||
73 | + int previous0 = 0; | ||
74 | + | ||
75 | + if (hbSize < 4) { | ||
76 | + /* This function only works when hbSize >= 4 */ | ||
77 | + char buffer[4]; | ||
78 | + memset(buffer, 0, sizeof(buffer)); | ||
79 | + memcpy(buffer, headerBuffer, hbSize); | ||
80 | + { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, | ||
81 | + buffer, sizeof(buffer)); | ||
82 | + if (FSE_isError(countSize)) return countSize; | ||
83 | + if (countSize > hbSize) return ERROR(corruption_detected); | ||
84 | + return countSize; | ||
85 | + } } | ||
86 | + assert(hbSize >= 4); | ||
87 | + | ||
88 | + /* init */ | ||
89 | + memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ | ||
90 | + bitStream = MEM_readLE32(ip); | ||
91 | + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ | ||
92 | + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); | ||
93 | + bitStream >>= 4; | ||
94 | + bitCount = 4; | ||
95 | + *tableLogPtr = nbBits; | ||
96 | + remaining = (1<<nbBits)+1; | ||
97 | + threshold = 1<<nbBits; | ||
98 | + nbBits++; | ||
99 | + | ||
100 | + while ((remaining>1) & (charnum<=*maxSVPtr)) { | ||
101 | + if (previous0) { | ||
102 | + unsigned n0 = charnum; | ||
103 | + while ((bitStream & 0xFFFF) == 0xFFFF) { | ||
104 | + n0 += 24; | ||
105 | + if (ip < iend-5) { | ||
106 | + ip += 2; | ||
107 | + bitStream = MEM_readLE32(ip) >> bitCount; | ||
108 | + } else { | ||
109 | + bitStream >>= 16; | ||
110 | + bitCount += 16; | ||
111 | + } } | ||
112 | + while ((bitStream & 3) == 3) { | ||
113 | + n0 += 3; | ||
114 | + bitStream >>= 2; | ||
115 | + bitCount += 2; | ||
116 | + } | ||
117 | + n0 += bitStream & 3; | ||
118 | + bitCount += 2; | ||
119 | + if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); | ||
120 | + while (charnum < n0) normalizedCounter[charnum++] = 0; | ||
121 | + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { | ||
122 | + assert((bitCount >> 3) <= 3); /* For first condition to work */ | ||
123 | + ip += bitCount>>3; | ||
124 | + bitCount &= 7; | ||
125 | + bitStream = MEM_readLE32(ip) >> bitCount; | ||
126 | + } else { | ||
127 | + bitStream >>= 2; | ||
128 | + } } | ||
129 | + { int const max = (2*threshold-1) - remaining; | ||
130 | + int count; | ||
131 | + | ||
132 | + if ((bitStream & (threshold-1)) < (U32)max) { | ||
133 | + count = bitStream & (threshold-1); | ||
134 | + bitCount += nbBits-1; | ||
135 | + } else { | ||
136 | + count = bitStream & (2*threshold-1); | ||
137 | + if (count >= threshold) count -= max; | ||
138 | + bitCount += nbBits; | ||
139 | + } | ||
140 | + | ||
141 | + count--; /* extra accuracy */ | ||
142 | + remaining -= count < 0 ? -count : count; /* -1 means +1 */ | ||
143 | + normalizedCounter[charnum++] = (short)count; | ||
144 | + previous0 = !count; | ||
145 | + while (remaining < threshold) { | ||
146 | + nbBits--; | ||
147 | + threshold >>= 1; | ||
148 | + } | ||
149 | + | ||
150 | + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { | ||
151 | + ip += bitCount>>3; | ||
152 | + bitCount &= 7; | ||
153 | + } else { | ||
154 | + bitCount -= (int)(8 * (iend - 4 - ip)); | ||
155 | + ip = iend - 4; | ||
156 | + } | ||
157 | + bitStream = MEM_readLE32(ip) >> (bitCount & 31); | ||
158 | + } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ | ||
159 | + if (remaining != 1) return ERROR(corruption_detected); | ||
160 | + if (bitCount > 32) return ERROR(corruption_detected); | ||
161 | + *maxSVPtr = charnum-1; | ||
162 | + | ||
163 | + ip += (bitCount+7)>>3; | ||
164 | + return ip-istart; | ||
165 | +} | ||
166 | + | ||
167 | + | ||
168 | +/*! HUF_readStats() : | ||
169 | + Read compact Huffman tree, saved by HUF_writeCTable(). | ||
170 | + `huffWeight` is destination buffer. | ||
171 | + `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. | ||
172 | + @return : size read from `src` , or an error Code . | ||
173 | + Note : Needed by HUF_readCTable() and HUF_readDTableX?() . | ||
174 | +*/ | ||
175 | +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, | ||
176 | + U32* nbSymbolsPtr, U32* tableLogPtr, | ||
177 | + const void* src, size_t srcSize) | ||
178 | +{ | ||
179 | + U32 weightTotal; | ||
180 | + const BYTE* ip = (const BYTE*) src; | ||
181 | + size_t iSize; | ||
182 | + size_t oSize; | ||
183 | + | ||
184 | + if (!srcSize) return ERROR(srcSize_wrong); | ||
185 | + iSize = ip[0]; | ||
186 | + /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ | ||
187 | + | ||
188 | + if (iSize >= 128) { /* special header */ | ||
189 | + oSize = iSize - 127; | ||
190 | + iSize = ((oSize+1)/2); | ||
191 | + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); | ||
192 | + if (oSize >= hwSize) return ERROR(corruption_detected); | ||
193 | + ip += 1; | ||
194 | + { U32 n; | ||
195 | + for (n=0; n<oSize; n+=2) { | ||
196 | + huffWeight[n] = ip[n/2] >> 4; | ||
197 | + huffWeight[n+1] = ip[n/2] & 15; | ||
198 | + } } } | ||
199 | + else { /* header compressed with FSE (normal case) */ | ||
200 | + FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ | ||
201 | + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); | ||
202 | + oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ | ||
203 | + if (FSE_isError(oSize)) return oSize; | ||
204 | + } | ||
205 | + | ||
206 | + /* collect weight stats */ | ||
207 | + memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); | ||
208 | + weightTotal = 0; | ||
209 | + { U32 n; for (n=0; n<oSize; n++) { | ||
210 | + if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected); | ||
211 | + rankStats[huffWeight[n]]++; | ||
212 | + weightTotal += (1 << huffWeight[n]) >> 1; | ||
213 | + } } | ||
214 | + if (weightTotal == 0) return ERROR(corruption_detected); | ||
215 | + | ||
216 | + /* get last non-null symbol weight (implied, total must be 2^n) */ | ||
217 | + { U32 const tableLog = BIT_highbit32(weightTotal) + 1; | ||
218 | + if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); | ||
219 | + *tableLogPtr = tableLog; | ||
220 | + /* determine last weight */ | ||
221 | + { U32 const total = 1 << tableLog; | ||
222 | + U32 const rest = total - weightTotal; | ||
223 | + U32 const verif = 1 << BIT_highbit32(rest); | ||
224 | + U32 const lastWeight = BIT_highbit32(rest) + 1; | ||
225 | + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ | ||
226 | + huffWeight[oSize] = (BYTE)lastWeight; | ||
227 | + rankStats[lastWeight]++; | ||
228 | + } } | ||
229 | + | ||
230 | + /* check tree construction validity */ | ||
231 | + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ | ||
232 | + | ||
233 | + /* results */ | ||
234 | + *nbSymbolsPtr = (U32)(oSize+1); | ||
235 | + return iSize+1; | ||
236 | +} |
-
请 注册 或 登录 后发表评论