[feature] add per-uri dereferencer locks (#2291)

This commit is contained in:
kim
2023-10-31 11:12:22 +00:00
committed by GitHub
parent 51d0a0bba5
commit ce71a5a790
54 changed files with 2432 additions and 2719 deletions

View File

@ -106,6 +106,25 @@ func MakeDict(data []byte, searchStart []byte) *Dict {
return &d
}
// MakeDictManual will create a dictionary.
// 'data' must be at least MinDictSize and less than or equal to MaxDictSize.
// A manual first repeat index into data must be provided.
// It must be less than len(data)-8.
func MakeDictManual(data []byte, firstIdx uint16) *Dict {
if len(data) < MinDictSize || int(firstIdx) >= len(data)-8 || len(data) > MaxDictSize {
return nil
}
var d Dict
dict := data
d.dict = dict
if cap(d.dict) < len(d.dict)+16 {
d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
}
d.repeat = int(firstIdx)
return &d
}
// Encode returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.

View File

@ -57,7 +57,7 @@ func Encode(dst, src []byte) []byte {
// The function returns -1 if no improvement could be achieved.
// Using actual compression will most often produce better compression than the estimate.
func EstimateBlockSize(src []byte) (d int) {
if len(src) < 6 || int64(len(src)) > 0xffffffff {
if len(src) <= inputMargin || int64(len(src)) > 0xffffffff {
return -1
}
if len(src) <= 1024 {

View File

@ -157,6 +157,9 @@ func encodeBlockBest(dst, src []byte, dict *Dict) (d int) {
return m
}
matchDict := func(candidate, s int, first uint32, rep bool) match {
if s >= MaxDictSrcOffset {
return match{offset: candidate, s: s}
}
// Calculate offset as if in continuous array with s
offset := -len(dict.dict) + candidate
if best.length != 0 && best.s-best.offset == s-offset && !rep {

View File

@ -316,6 +316,7 @@ func matchLen(a []byte, b []byte) int {
return len(a) + checked
}
// input must be > inputMargin
func calcBlockSize(src []byte) (d int) {
// Initialize the hash table.
const (
@ -501,6 +502,7 @@ emitRemainder:
return d
}
// length must be > inputMargin.
func calcBlockSizeSmall(src []byte) (d int) {
// Initialize the hash table.
const (

File diff suppressed because it is too large Load Diff

View File

@ -511,24 +511,22 @@ func IndexStream(r io.Reader) ([]byte, error) {
// JSON returns the index as JSON text.
func (i *Index) JSON() []byte {
type offset struct {
CompressedOffset int64 `json:"compressed"`
UncompressedOffset int64 `json:"uncompressed"`
}
x := struct {
TotalUncompressed int64 `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown.
TotalCompressed int64 `json:"total_compressed"` // Total Compressed size if known. Will be -1 if unknown.
Offsets []struct {
CompressedOffset int64 `json:"compressed"`
UncompressedOffset int64 `json:"uncompressed"`
} `json:"offsets"`
EstBlockUncomp int64 `json:"est_block_uncompressed"`
TotalUncompressed int64 `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown.
TotalCompressed int64 `json:"total_compressed"` // Total Compressed size if known. Will be -1 if unknown.
Offsets []offset `json:"offsets"`
EstBlockUncomp int64 `json:"est_block_uncompressed"`
}{
TotalUncompressed: i.TotalUncompressed,
TotalCompressed: i.TotalCompressed,
EstBlockUncomp: i.estBlockUncomp,
}
for _, v := range i.info {
x.Offsets = append(x.Offsets, struct {
CompressedOffset int64 `json:"compressed"`
UncompressedOffset int64 `json:"uncompressed"`
}{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset})
x.Offsets = append(x.Offsets, offset{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset})
}
b, _ := json.MarshalIndent(x, "", " ")
return b