add vendoring with go dep

2025-12-19 13:39:21 +00:00 · 2017-10-25 20:52:40 +00:00
parent 704f4d20d1
commit a59409f16b
1627 changed files with 489673 additions and 0 deletions
--- a/vendor/github.com/influxdata/influxdb/tsdb/engine/tsm1/timestamp.go
+++ b/vendor/github.com/influxdata/influxdb/tsdb/engine/tsm1/timestamp.go
@@ -0,0 +1,414 @@
+package tsm1
+
+// Timestamp encoding is adaptive and based on structure of the timestamps that are encoded.  It
+// uses a combination of delta encoding, scaling and compression using simple8b, run length encoding
+// as well as falling back to no compression if needed.
+//
+// Timestamp values to be encoded should be sorted before encoding.  When encoded, the values are
+// first delta-encoded.  The first value is the starting timestamp, subsequent values are the difference
+// from the prior value.
+//
+// Timestamp resolution can also be in the nanosecond.  Many timestamps are monotonically increasing
+// and fall on even boundaries of time such as every 10s.  When the timestamps have this structure,
+// they are scaled by the largest common divisor that is also a factor of 10.  This has the effect
+// of converting very large integer deltas into very small one that can be reversed by multiplying them
+// by the scaling factor.
+//
+// Using these adjusted values, if all the deltas are the same, the time range is stored using run
+// length encoding.  If run length encoding is not possible and all values are less than 1 << 60 - 1
+// (~36.5 yrs in nanosecond resolution), then the timestamps are encoded using simple8b encoding.  If
+// any value exceeds the maximum values, the deltas are stored uncompressed using 8b each.
+//
+// Each compressed byte slice has a 1 byte header indicating the compression type.  The 4 high bits
+// indicate the encoding type.  The 4 low bits are used by the encoding type.
+//
+// For run-length encoding, the 4 low bits store the log10 of the scaling factor.  The next 8 bytes are
+// the starting timestamp, next 1-10 bytes is the delta value using variable-length encoding, finally the
+// next 1-10 bytes is the count of values.
+//
+// For simple8b encoding, the 4 low bits store the log10 of the scaling factor.  The next 8 bytes is the
+// first delta value stored uncompressed, the remaining bytes are 64bit words containg compressed delta
+// values.
+//
+// For uncompressed encoding, the delta values are stored using 8 bytes each.
+
+import (
+	"encoding/binary"
+	"fmt"
+	"math"
+
+	"github.com/jwilder/encoding/simple8b"
+)
+
+const (
+	// timeUncompressed is a an uncompressed format using 8 bytes per timestamp
+	timeUncompressed = 0
+	// timeCompressedPackedSimple is a bit-packed format using simple8b encoding
+	timeCompressedPackedSimple = 1
+	// timeCompressedRLE is a run-length encoding format
+	timeCompressedRLE = 2
+)
+
+// TimeEncoder encodes time.Time to byte slices.
+type TimeEncoder interface {
+	Write(t int64)
+	Bytes() ([]byte, error)
+	Reset()
+}
+
+type encoder struct {
+	ts    []uint64
+	bytes []byte
+	enc   *simple8b.Encoder
+}
+
+// NewTimeEncoder returns a TimeEncoder with an initial buffer ready to hold sz bytes.
+func NewTimeEncoder(sz int) TimeEncoder {
+	return &encoder{
+		ts:  make([]uint64, 0, sz),
+		enc: simple8b.NewEncoder(),
+	}
+}
+
+// Reset sets the encoder back to its initial state.
+func (e *encoder) Reset() {
+	e.ts = e.ts[:0]
+	e.bytes = e.bytes[:0]
+	e.enc.Reset()
+}
+
+// Write adds a timestamp to the compressed stream.
+func (e *encoder) Write(t int64) {
+	e.ts = append(e.ts, uint64(t))
+}
+
+func (e *encoder) reduce() (max, divisor uint64, rle bool, deltas []uint64) {
+	// Compute the deltas in place to avoid allocating another slice
+	deltas = e.ts
+	// Starting values for a max and divisor
+	max, divisor = 0, 1e12
+
+	// Indicates whether the the deltas can be run-length encoded
+	rle = true
+
+	// Iterate in reverse so we can apply deltas in place
+	for i := len(deltas) - 1; i > 0; i-- {
+
+		// First differential encode the values
+		deltas[i] = deltas[i] - deltas[i-1]
+
+		// We also need to keep track of the max value and largest common divisor
+		v := deltas[i]
+
+		if v > max {
+			max = v
+		}
+
+		// If our value is divisible by 10, break.  Otherwise, try the next smallest divisor.
+		for divisor > 1 && v%divisor != 0 {
+			divisor /= 10
+		}
+
+		// Skip the first value || see if prev = curr.  The deltas can be RLE if the are all equal.
+		rle = i == len(deltas)-1 || rle && (deltas[i+1] == deltas[i])
+	}
+	return
+}
+
+// Bytes returns the encoded bytes of all written times.
+func (e *encoder) Bytes() ([]byte, error) {
+	if len(e.ts) == 0 {
+		return e.bytes[:0], nil
+	}
+
+	// Maximum and largest common divisor.  rle is true if dts (the delta timestamps),
+	// are all the same.
+	max, div, rle, dts := e.reduce()
+
+	// The deltas are all the same, so we can run-length encode them
+	if rle && len(e.ts) > 1 {
+		return e.encodeRLE(e.ts[0], e.ts[1], div, len(e.ts))
+	}
+
+	// We can't compress this time-range, the deltas exceed 1 << 60
+	if max > simple8b.MaxValue {
+		return e.encodeRaw()
+	}
+
+	return e.encodePacked(div, dts)
+}
+
+func (e *encoder) encodePacked(div uint64, dts []uint64) ([]byte, error) {
+	// Only apply the divisor if it's greater than 1 since division is expensive.
+	if div > 1 {
+		for _, v := range dts[1:] {
+			if err := e.enc.Write(v / div); err != nil {
+				return nil, err
+			}
+		}
+	} else {
+		for _, v := range dts[1:] {
+			if err := e.enc.Write(v); err != nil {
+				return nil, err
+			}
+		}
+	}
+
+	// The compressed deltas
+	deltas, err := e.enc.Bytes()
+	if err != nil {
+		return nil, err
+	}
+
+	sz := 8 + 1 + len(deltas)
+	if cap(e.bytes) < sz {
+		e.bytes = make([]byte, sz)
+	}
+	b := e.bytes[:sz]
+
+	// 4 high bits used for the encoding type
+	b[0] = byte(timeCompressedPackedSimple) << 4
+	// 4 low bits are the log10 divisor
+	b[0] |= byte(math.Log10(float64(div)))
+
+	// The first delta value
+	binary.BigEndian.PutUint64(b[1:9], uint64(dts[0]))
+
+	copy(b[9:], deltas)
+	return b[:9+len(deltas)], nil
+}
+
+func (e *encoder) encodeRaw() ([]byte, error) {
+	sz := 1 + len(e.ts)*8
+	if cap(e.bytes) < sz {
+		e.bytes = make([]byte, sz)
+	}
+	b := e.bytes[:sz]
+	b[0] = byte(timeUncompressed) << 4
+	for i, v := range e.ts {
+		binary.BigEndian.PutUint64(b[1+i*8:1+i*8+8], uint64(v))
+	}
+	return b, nil
+}
+
+func (e *encoder) encodeRLE(first, delta, div uint64, n int) ([]byte, error) {
+	// Large varints can take up to 10 bytes, we're encoding 3 + 1 byte type
+	sz := 31
+	if cap(e.bytes) < sz {
+		e.bytes = make([]byte, sz)
+	}
+	b := e.bytes[:sz]
+	// 4 high bits used for the encoding type
+	b[0] = byte(timeCompressedRLE) << 4
+	// 4 low bits are the log10 divisor
+	b[0] |= byte(math.Log10(float64(div)))
+
+	i := 1
+	// The first timestamp
+	binary.BigEndian.PutUint64(b[i:], uint64(first))
+	i += 8
+	// The first delta
+	i += binary.PutUvarint(b[i:], uint64(delta/div))
+	// The number of times the delta is repeated
+	i += binary.PutUvarint(b[i:], uint64(n))
+
+	return b[:i], nil
+}
+
+// TimeDecoder decodes a byte slice into timestamps.
+type TimeDecoder struct {
+	v    int64
+	i, n int
+	ts   []uint64
+	dec  simple8b.Decoder
+	err  error
+
+	// The delta value for a run-length encoded byte slice
+	rleDelta int64
+
+	encoding byte
+}
+
+// Init initializes the decoder with bytes to read from.
+func (d *TimeDecoder) Init(b []byte) {
+	d.v = 0
+	d.i = 0
+	d.ts = d.ts[:0]
+	d.err = nil
+	if len(b) > 0 {
+		// Encoding type is stored in the 4 high bits of the first byte
+		d.encoding = b[0] >> 4
+	}
+	d.decode(b)
+}
+
+// Next returns true if there are any timestamps remaining to be decoded.
+func (d *TimeDecoder) Next() bool {
+	if d.err != nil {
+		return false
+	}
+
+	if d.encoding == timeCompressedRLE {
+		if d.i >= d.n {
+			return false
+		}
+		d.i++
+		d.v += d.rleDelta
+		return d.i < d.n
+	}
+
+	if d.i >= len(d.ts) {
+		return false
+	}
+	d.v = int64(d.ts[d.i])
+	d.i++
+	return true
+}
+
+// Read returns the next timestamp from the decoder.
+func (d *TimeDecoder) Read() int64 {
+	return d.v
+}
+
+// Error returns the last error encountered by the decoder.
+func (d *TimeDecoder) Error() error {
+	return d.err
+}
+
+func (d *TimeDecoder) decode(b []byte) {
+	if len(b) == 0 {
+		return
+	}
+
+	switch d.encoding {
+	case timeUncompressed:
+		d.decodeRaw(b[1:])
+	case timeCompressedRLE:
+		d.decodeRLE(b)
+	case timeCompressedPackedSimple:
+		d.decodePacked(b)
+	default:
+		d.err = fmt.Errorf("unknown encoding: %v", d.encoding)
+	}
+}
+
+func (d *TimeDecoder) decodePacked(b []byte) {
+	if len(b) < 9 {
+		d.err = fmt.Errorf("TimeDecoder: not enough data to decode packed timestamps")
+		return
+	}
+	div := uint64(math.Pow10(int(b[0] & 0xF)))
+	first := uint64(binary.BigEndian.Uint64(b[1:9]))
+
+	d.dec.SetBytes(b[9:])
+
+	d.i = 0
+	deltas := d.ts[:0]
+	deltas = append(deltas, first)
+
+	for d.dec.Next() {
+		deltas = append(deltas, d.dec.Read())
+	}
+
+	// Compute the prefix sum and scale the deltas back up
+	last := deltas[0]
+	if div > 1 {
+		for i := 1; i < len(deltas); i++ {
+			dgap := deltas[i] * div
+			deltas[i] = last + dgap
+			last = deltas[i]
+		}
+	} else {
+		for i := 1; i < len(deltas); i++ {
+			deltas[i] += last
+			last = deltas[i]
+		}
+	}
+
+	d.i = 0
+	d.ts = deltas
+}
+
+func (d *TimeDecoder) decodeRLE(b []byte) {
+	if len(b) < 9 {
+		d.err = fmt.Errorf("TimeDecoder: not enough data for initial RLE timestamp")
+		return
+	}
+
+	var i, n int
+
+	// Lower 4 bits hold the 10 based exponent so we can scale the values back up
+	mod := int64(math.Pow10(int(b[i] & 0xF)))
+	i++
+
+	// Next 8 bytes is the starting timestamp
+	first := binary.BigEndian.Uint64(b[i : i+8])
+	i += 8
+
+	// Next 1-10 bytes is our (scaled down by factor of 10) run length values
+	value, n := binary.Uvarint(b[i:])
+	if n <= 0 {
+		d.err = fmt.Errorf("TimeDecoder: invalid run length in decodeRLE")
+		return
+	}
+
+	// Scale the value back up
+	value *= uint64(mod)
+	i += n
+
+	// Last 1-10 bytes is how many times the value repeats
+	count, n := binary.Uvarint(b[i:])
+	if n <= 0 {
+		d.err = fmt.Errorf("TimeDecoder: invalid repeat value in decodeRLE")
+		return
+	}
+
+	d.v = int64(first - value)
+	d.rleDelta = int64(value)
+
+	d.i = -1
+	d.n = int(count)
+}
+
+func (d *TimeDecoder) decodeRaw(b []byte) {
+	d.i = 0
+	d.ts = make([]uint64, len(b)/8)
+	for i := range d.ts {
+		d.ts[i] = binary.BigEndian.Uint64(b[i*8 : i*8+8])
+
+		delta := d.ts[i]
+		// Compute the prefix sum and scale the deltas back up
+		if i > 0 {
+			d.ts[i] = d.ts[i-1] + delta
+		}
+	}
+}
+
+func CountTimestamps(b []byte) int {
+	if len(b) == 0 {
+		return 0
+	}
+
+	// Encoding type is stored in the 4 high bits of the first byte
+	encoding := b[0] >> 4
+	switch encoding {
+	case timeUncompressed:
+		// Uncompressed timestamps are just 8 bytes each
+		return len(b[1:]) / 8
+	case timeCompressedRLE:
+		// First 9 bytes are the starting timestamp and scaling factor, skip over them
+		i := 9
+		// Next 1-10 bytes is our (scaled down by factor of 10) run length values
+		_, n := binary.Uvarint(b[9:])
+		i += n
+		// Last 1-10 bytes is how many times the value repeats
+		count, _ := binary.Uvarint(b[i:])
+		return int(count)
+	case timeCompressedPackedSimple:
+		// First 9 bytes are the starting timestamp and scaling factor, skip over them
+		count, _ := simple8b.CountBytes(b[9:])
+		return count + 1 // +1 is for the first uncompressed timestamp, starting timestamep in b[1:9]
+	default:
+		return 0
+	}
+}