add vendoring with go dep

This commit is contained in:
Adrian Todorov
2017-10-25 20:52:40 +00:00
parent 704f4d20d1
commit a59409f16b
1627 changed files with 489673 additions and 0 deletions

View File

@@ -0,0 +1,107 @@
# `influx_inspect`
## Ways to run
### `influx_inspect`
Will print usage for the tool.
### `influx_inspect report`
Displays series meta-data for all shards. Default location [$HOME/.influxdb]
### `influx_inspect dumptsm`
Dumps low-level details about tsm1 files
#### Flags
##### `-index` bool
Dump raw index data.
`default` = false
#### `-blocks` bool
Dump raw block data.
`default` = false
#### `-all`
Dump all data. Caution: This may print a lot of information.
`default` = false
#### `-filter-key`
Only display index and block data match this key substring.
`default` = ""
### `influx_inspect export`
Exports all tsm files to line protocol. This output file can be imported via the [influx](https://github.com/influxdata/influxdb/tree/master/importer#running-the-import-command) command.
#### `-datadir` string
Data storage path.
`default` = "$HOME/.influxdb/data"
#### `-waldir` string
WAL storage path.
`default` = "$HOME/.influxdb/wal"
#### `-out` string
Destination file to export to
`default` = "$HOME/.influxdb/export"
#### `-database` string (optional)
Database to export.
`default` = ""
#### `-retention` string (optional)
Retention policy to export.
`default` = ""
#### `-start` string (optional)
Optional. The time range to start at.
#### `-end` string (optional)
Optional. The time range to end at.
#### `-compress` bool (optional)
Compress the output.
`default` = false
#### Sample Commands
Export entire database and compress output:
```
influx_inspect export --compress
```
Export specific retention policy:
```
influx_inspect export --database mydb --retention autogen
```
##### Sample Data
This is a sample of what the output will look like.
```
# DDL
CREATE DATABASE MY_DB_NAME
CREATE RETENTION POLICY autogen ON MY_DB_NAME DURATION inf REPLICATION 1
# DML
# CONTEXT-DATABASE:MY_DB_NAME
# CONTEXT-RETENTION-POLICY:autogen
randset value=97.9296104805 1439856000000000000
randset value=25.3849066842 1439856100000000000
```
# Caveats
The system does not have access to the meta store when exporting TSM shards. As such, it always creates the retention policy with infinite duration and replication factor of 1.
End users may want to change this prior to re-importing if they are importing to a cluster or want a different duration for retention.

View File

@@ -0,0 +1,474 @@
// Package dumptsi inspects low-level details about tsi1 files.
package dumptsi
import (
"flag"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"text/tabwriter"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/tsdb/index/tsi1"
)
// Command represents the program execution for "influxd dumptsi".
type Command struct {
// Standard input/output, overridden for testing.
Stderr io.Writer
Stdout io.Writer
paths []string
showSeries bool
showMeasurements bool
showTagKeys bool
showTagValues bool
showTagValueSeries bool
measurementFilter *regexp.Regexp
tagKeyFilter *regexp.Regexp
tagValueFilter *regexp.Regexp
}
// NewCommand returns a new instance of Command.
func NewCommand() *Command {
return &Command{
Stderr: os.Stderr,
Stdout: os.Stdout,
}
}
// Run executes the command.
func (cmd *Command) Run(args ...string) error {
var measurementFilter, tagKeyFilter, tagValueFilter string
fs := flag.NewFlagSet("dumptsi", flag.ExitOnError)
fs.BoolVar(&cmd.showSeries, "series", false, "Show raw series data")
fs.BoolVar(&cmd.showMeasurements, "measurements", false, "Show raw measurement data")
fs.BoolVar(&cmd.showTagKeys, "tag-keys", false, "Show raw tag key data")
fs.BoolVar(&cmd.showTagValues, "tag-values", false, "Show raw tag value data")
fs.BoolVar(&cmd.showTagValueSeries, "tag-value-series", false, "Show raw series data for each value")
fs.StringVar(&measurementFilter, "measurement-filter", "", "Regex measurement filter")
fs.StringVar(&tagKeyFilter, "tag-key-filter", "", "Regex tag key filter")
fs.StringVar(&tagValueFilter, "tag-value-filter", "", "Regex tag value filter")
fs.SetOutput(cmd.Stdout)
fs.Usage = cmd.printUsage
if err := fs.Parse(args); err != nil {
return err
}
// Parse filters.
if measurementFilter != "" {
re, err := regexp.Compile(measurementFilter)
if err != nil {
return err
}
cmd.measurementFilter = re
}
if tagKeyFilter != "" {
re, err := regexp.Compile(tagKeyFilter)
if err != nil {
return err
}
cmd.tagKeyFilter = re
}
if tagValueFilter != "" {
re, err := regexp.Compile(tagValueFilter)
if err != nil {
return err
}
cmd.tagValueFilter = re
}
cmd.paths = fs.Args()
if len(cmd.paths) == 0 {
fmt.Printf("at least one path required\n\n")
fs.Usage()
return nil
}
// Some flags imply other flags.
if cmd.showTagValueSeries {
cmd.showTagValues = true
}
if cmd.showTagValues {
cmd.showTagKeys = true
}
if cmd.showTagKeys {
cmd.showMeasurements = true
}
return cmd.run()
}
func (cmd *Command) run() error {
// Build a file set from the paths on the command line.
idx, fs, err := cmd.readFileSet()
if err != nil {
return err
}
if idx != nil {
defer idx.Close()
} else {
defer fs.Close()
}
defer fs.Release()
// Show either raw data or summary stats.
if cmd.showSeries || cmd.showMeasurements {
if err := cmd.printMerged(fs); err != nil {
return err
}
} else {
if err := cmd.printFileSummaries(fs); err != nil {
return err
}
}
return nil
}
func (cmd *Command) readFileSet() (*tsi1.Index, *tsi1.FileSet, error) {
// If only one path exists and it's a directory then open as an index.
if len(cmd.paths) == 1 {
fi, err := os.Stat(cmd.paths[0])
if err != nil {
return nil, nil, err
} else if fi.IsDir() {
idx := tsi1.NewIndex()
idx.Path = cmd.paths[0]
idx.CompactionEnabled = false
if err := idx.Open(); err != nil {
return nil, nil, err
}
return idx, idx.RetainFileSet(), nil
}
}
// Open each file and group into a fileset.
var files []tsi1.File
for _, path := range cmd.paths {
switch ext := filepath.Ext(path); ext {
case tsi1.LogFileExt:
f := tsi1.NewLogFile(path)
if err := f.Open(); err != nil {
return nil, nil, err
}
files = append(files, f)
case tsi1.IndexFileExt:
f := tsi1.NewIndexFile()
f.SetPath(path)
if err := f.Open(); err != nil {
return nil, nil, err
}
files = append(files, f)
default:
return nil, nil, fmt.Errorf("unexpected file extension: %s", ext)
}
}
fs, err := tsi1.NewFileSet(nil, files)
if err != nil {
return nil, nil, err
}
fs.Retain()
return nil, fs, nil
}
func (cmd *Command) printMerged(fs *tsi1.FileSet) error {
if err := cmd.printSeries(fs); err != nil {
return err
} else if err := cmd.printMeasurements(fs); err != nil {
return err
}
return nil
}
func (cmd *Command) printSeries(fs *tsi1.FileSet) error {
if !cmd.showSeries {
return nil
}
// Print header.
tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0)
fmt.Fprintln(tw, "Series\t")
// Iterate over each series.
itr := fs.SeriesIterator()
for e := itr.Next(); e != nil; e = itr.Next() {
name, tags := e.Name(), e.Tags()
if !cmd.matchSeries(e.Name(), e.Tags()) {
continue
}
fmt.Fprintf(tw, "%s%s\t%v\n", name, tags.HashKey(), deletedString(e.Deleted()))
}
// Flush & write footer spacing.
if err := tw.Flush(); err != nil {
return err
}
fmt.Fprint(cmd.Stdout, "\n\n")
return nil
}
func (cmd *Command) printMeasurements(fs *tsi1.FileSet) error {
if !cmd.showMeasurements {
return nil
}
tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0)
fmt.Fprintln(tw, "Measurement\t")
// Iterate over each series.
itr := fs.MeasurementIterator()
for e := itr.Next(); e != nil; e = itr.Next() {
if cmd.measurementFilter != nil && !cmd.measurementFilter.Match(e.Name()) {
continue
}
fmt.Fprintf(tw, "%s\t%v\n", e.Name(), deletedString(e.Deleted()))
if err := tw.Flush(); err != nil {
return err
}
if err := cmd.printTagKeys(fs, e.Name()); err != nil {
return err
}
}
fmt.Fprint(cmd.Stdout, "\n\n")
return nil
}
func (cmd *Command) printTagKeys(fs *tsi1.FileSet, name []byte) error {
if !cmd.showTagKeys {
return nil
}
// Iterate over each key.
tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0)
itr := fs.TagKeyIterator(name)
for e := itr.Next(); e != nil; e = itr.Next() {
if cmd.tagKeyFilter != nil && !cmd.tagKeyFilter.Match(e.Key()) {
continue
}
fmt.Fprintf(tw, " %s\t%v\n", e.Key(), deletedString(e.Deleted()))
if err := tw.Flush(); err != nil {
return err
}
if err := cmd.printTagValues(fs, name, e.Key()); err != nil {
return err
}
}
fmt.Fprint(cmd.Stdout, "\n")
return nil
}
func (cmd *Command) printTagValues(fs *tsi1.FileSet, name, key []byte) error {
if !cmd.showTagValues {
return nil
}
// Iterate over each value.
tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0)
itr := fs.TagValueIterator(name, key)
for e := itr.Next(); e != nil; e = itr.Next() {
if cmd.tagValueFilter != nil && !cmd.tagValueFilter.Match(e.Value()) {
continue
}
fmt.Fprintf(tw, " %s\t%v\n", e.Value(), deletedString(e.Deleted()))
if err := tw.Flush(); err != nil {
return err
}
if err := cmd.printTagValueSeries(fs, name, key, e.Value()); err != nil {
return err
}
}
fmt.Fprint(cmd.Stdout, "\n")
return nil
}
func (cmd *Command) printTagValueSeries(fs *tsi1.FileSet, name, key, value []byte) error {
if !cmd.showTagValueSeries {
return nil
}
// Iterate over each series.
tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0)
itr := fs.TagValueSeriesIterator(name, key, value)
for e := itr.Next(); e != nil; e = itr.Next() {
if !cmd.matchSeries(e.Name(), e.Tags()) {
continue
}
fmt.Fprintf(tw, " %s%s\n", e.Name(), e.Tags().HashKey())
if err := tw.Flush(); err != nil {
return err
}
}
fmt.Fprint(cmd.Stdout, "\n")
return nil
}
func (cmd *Command) printFileSummaries(fs *tsi1.FileSet) error {
for _, f := range fs.Files() {
switch f := f.(type) {
case *tsi1.LogFile:
if err := cmd.printLogFileSummary(f); err != nil {
return err
}
case *tsi1.IndexFile:
if err := cmd.printIndexFileSummary(f); err != nil {
return err
}
default:
panic("unreachable")
}
fmt.Fprintln(cmd.Stdout, "")
}
return nil
}
func (cmd *Command) printLogFileSummary(f *tsi1.LogFile) error {
fmt.Fprintf(cmd.Stdout, "[LOG FILE] %s\n", filepath.Base(f.Path()))
tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0)
fmt.Fprintf(tw, "Series:\t%d\n", f.SeriesN())
fmt.Fprintf(tw, "Measurements:\t%d\n", f.MeasurementN())
fmt.Fprintf(tw, "Tag Keys:\t%d\n", f.TagKeyN())
fmt.Fprintf(tw, "Tag Values:\t%d\n", f.TagValueN())
return tw.Flush()
}
func (cmd *Command) printIndexFileSummary(f *tsi1.IndexFile) error {
fmt.Fprintf(cmd.Stdout, "[INDEX FILE] %s\n", filepath.Base(f.Path()))
// Calculate summary stats.
seriesN := f.SeriesN()
var measurementN, measurementSeriesN, measurementSeriesSize uint64
var keyN uint64
var valueN, valueSeriesN, valueSeriesSize uint64
mitr := f.MeasurementIterator()
for me, _ := mitr.Next().(*tsi1.MeasurementBlockElem); me != nil; me, _ = mitr.Next().(*tsi1.MeasurementBlockElem) {
kitr := f.TagKeyIterator(me.Name())
for ke, _ := kitr.Next().(*tsi1.TagBlockKeyElem); ke != nil; ke, _ = kitr.Next().(*tsi1.TagBlockKeyElem) {
vitr := f.TagValueIterator(me.Name(), ke.Key())
for ve, _ := vitr.Next().(*tsi1.TagBlockValueElem); ve != nil; ve, _ = vitr.Next().(*tsi1.TagBlockValueElem) {
valueN++
valueSeriesN += uint64(ve.SeriesN())
valueSeriesSize += uint64(len(ve.SeriesData()))
}
keyN++
}
measurementN++
measurementSeriesN += uint64(me.SeriesN())
measurementSeriesSize += uint64(len(me.SeriesData()))
}
// Write stats.
tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0)
fmt.Fprintf(tw, "Series:\t%d\n", seriesN)
fmt.Fprintf(tw, "Measurements:\t%d\n", measurementN)
fmt.Fprintf(tw, " Series data size:\t%d (%s)\n", measurementSeriesSize, formatSize(measurementSeriesSize))
fmt.Fprintf(tw, " Bytes per series:\t%.01fb\n", float64(measurementSeriesSize)/float64(measurementSeriesN))
fmt.Fprintf(tw, "Tag Keys:\t%d\n", keyN)
fmt.Fprintf(tw, "Tag Values:\t%d\n", valueN)
fmt.Fprintf(tw, " Series:\t%d\n", valueSeriesN)
fmt.Fprintf(tw, " Series data size:\t%d (%s)\n", valueSeriesSize, formatSize(valueSeriesSize))
fmt.Fprintf(tw, " Bytes per series:\t%.01fb\n", float64(valueSeriesSize)/float64(valueSeriesN))
fmt.Fprintf(tw, "Avg tags per series:\t%.01f\n", float64(valueSeriesN)/float64(seriesN))
if err := tw.Flush(); err != nil {
return err
}
return nil
}
// matchSeries returns true if the command filters matches the series.
func (cmd *Command) matchSeries(name []byte, tags models.Tags) bool {
// Filter by measurement.
if cmd.measurementFilter != nil && !cmd.measurementFilter.Match(name) {
return false
}
// Filter by tag key/value.
if cmd.tagKeyFilter != nil || cmd.tagValueFilter != nil {
var matched bool
for _, tag := range tags {
if (cmd.tagKeyFilter == nil || cmd.tagKeyFilter.Match(tag.Key)) && (cmd.tagValueFilter == nil || cmd.tagValueFilter.Match(tag.Value)) {
matched = true
break
}
}
if !matched {
return false
}
}
return true
}
// printUsage prints the usage message to STDERR.
func (cmd *Command) printUsage() {
usage := `Dumps low-level details about tsi1 files.
Usage: influx_inspect dumptsi [flags] path...
-series
Dump raw series data
-measurements
Dump raw measurement data
-tag-keys
Dump raw tag keys
-tag-values
Dump raw tag values
-tag-value-series
Dump raw series for each tag value
-measurement-filter REGEXP
Filters data by measurement regular expression
-tag-key-filter REGEXP
Filters data by tag key regular expression
-tag-value-filter REGEXP
Filters data by tag value regular expression
If no flags are specified then summary stats are provided for each file.
`
fmt.Fprintf(cmd.Stdout, usage)
}
// deletedString returns "(deleted)" if v is true.
func deletedString(v bool) string {
if v {
return "(deleted)"
}
return ""
}
func formatSize(v uint64) string {
denom := uint64(1)
var uom string
for _, uom = range []string{"b", "kb", "mb", "gb", "tb"} {
if denom*1024 > v {
break
}
denom *= 1024
}
return fmt.Sprintf("%0.01f%s", float64(v)/float64(denom), uom)
}

View File

@@ -0,0 +1,332 @@
// Package dumptsm inspects low-level details about tsm1 files.
package dumptsm
import (
"encoding/binary"
"flag"
"fmt"
"io"
"os"
"strconv"
"strings"
"text/tabwriter"
"time"
"github.com/influxdata/influxdb/tsdb/engine/tsm1"
)
// Command represents the program execution for "influxd dumptsm".
type Command struct {
// Standard input/output, overridden for testing.
Stderr io.Writer
Stdout io.Writer
dumpIndex bool
dumpBlocks bool
dumpAll bool
filterKey string
path string
}
// NewCommand returns a new instance of Command.
func NewCommand() *Command {
return &Command{
Stderr: os.Stderr,
Stdout: os.Stdout,
}
}
// Run executes the command.
func (cmd *Command) Run(args ...string) error {
fs := flag.NewFlagSet("file", flag.ExitOnError)
fs.BoolVar(&cmd.dumpIndex, "index", false, "Dump raw index data")
fs.BoolVar(&cmd.dumpBlocks, "blocks", false, "Dump raw block data")
fs.BoolVar(&cmd.dumpAll, "all", false, "Dump all data. Caution: This may print a lot of information")
fs.StringVar(&cmd.filterKey, "filter-key", "", "Only display index and block data match this key substring")
fs.SetOutput(cmd.Stdout)
fs.Usage = cmd.printUsage
if err := fs.Parse(args); err != nil {
return err
}
if fs.Arg(0) == "" {
fmt.Printf("TSM file not specified\n\n")
fs.Usage()
return nil
}
cmd.path = fs.Args()[0]
cmd.dumpBlocks = cmd.dumpBlocks || cmd.dumpAll || cmd.filterKey != ""
cmd.dumpIndex = cmd.dumpIndex || cmd.dumpAll || cmd.filterKey != ""
return cmd.dump()
}
func (cmd *Command) dump() error {
var errors []error
f, err := os.Open(cmd.path)
if err != nil {
return err
}
// Get the file size
stat, err := f.Stat()
if err != nil {
return err
}
b := make([]byte, 8)
r, err := tsm1.NewTSMReader(f)
if err != nil {
return fmt.Errorf("Error opening TSM files: %s", err.Error())
}
defer r.Close()
minTime, maxTime := r.TimeRange()
keyCount := r.KeyCount()
blockStats := &blockStats{}
println("Summary:")
fmt.Printf(" File: %s\n", cmd.path)
fmt.Printf(" Time Range: %s - %s\n",
time.Unix(0, minTime).UTC().Format(time.RFC3339Nano),
time.Unix(0, maxTime).UTC().Format(time.RFC3339Nano),
)
fmt.Printf(" Duration: %s ", time.Unix(0, maxTime).Sub(time.Unix(0, minTime)))
fmt.Printf(" Series: %d ", keyCount)
fmt.Printf(" File Size: %d\n", stat.Size())
println()
tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0)
if cmd.dumpIndex {
println("Index:")
tw.Flush()
println()
fmt.Fprintln(tw, " "+strings.Join([]string{"Pos", "Min Time", "Max Time", "Ofs", "Size", "Key", "Field"}, "\t"))
var pos int
for i := 0; i < keyCount; i++ {
key, _ := r.KeyAt(i)
for _, e := range r.Entries(string(key)) {
pos++
split := strings.Split(string(key), "#!~#")
// Possible corruption? Try to read as much as we can and point to the problem.
measurement := split[0]
field := split[1]
if cmd.filterKey != "" && !strings.Contains(string(key), cmd.filterKey) {
continue
}
fmt.Fprintln(tw, " "+strings.Join([]string{
strconv.FormatInt(int64(pos), 10),
time.Unix(0, e.MinTime).UTC().Format(time.RFC3339Nano),
time.Unix(0, e.MaxTime).UTC().Format(time.RFC3339Nano),
strconv.FormatInt(int64(e.Offset), 10),
strconv.FormatInt(int64(e.Size), 10),
measurement,
field,
}, "\t"))
tw.Flush()
}
}
}
tw = tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0)
fmt.Fprintln(tw, " "+strings.Join([]string{"Blk", "Chk", "Ofs", "Len", "Type", "Min Time", "Points", "Enc [T/V]", "Len [T/V]"}, "\t"))
// Starting at 5 because the magic number is 4 bytes + 1 byte version
i := int64(5)
var blockCount, pointCount, blockSize int64
indexSize := r.IndexSize()
// Start at the beginning and read every block
for j := 0; j < keyCount; j++ {
key, _ := r.KeyAt(j)
for _, e := range r.Entries(string(key)) {
f.Seek(int64(e.Offset), 0)
f.Read(b[:4])
chksum := binary.BigEndian.Uint32(b[:4])
buf := make([]byte, e.Size-4)
f.Read(buf)
blockSize += int64(e.Size)
if cmd.filterKey != "" && !strings.Contains(string(key), cmd.filterKey) {
i += blockSize
blockCount++
continue
}
blockType := buf[0]
encoded := buf[1:]
var v []tsm1.Value
v, err := tsm1.DecodeBlock(buf, v)
if err != nil {
return err
}
startTime := time.Unix(0, v[0].UnixNano())
pointCount += int64(len(v))
// Length of the timestamp block
tsLen, j := binary.Uvarint(encoded)
// Unpack the timestamp bytes
ts := encoded[int(j) : int(j)+int(tsLen)]
// Unpack the value bytes
values := encoded[int(j)+int(tsLen):]
tsEncoding := timeEnc[int(ts[0]>>4)]
vEncoding := encDescs[int(blockType+1)][values[0]>>4]
typeDesc := blockTypes[blockType]
blockStats.inc(0, ts[0]>>4)
blockStats.inc(int(blockType+1), values[0]>>4)
blockStats.size(len(buf))
if cmd.dumpBlocks {
fmt.Fprintln(tw, " "+strings.Join([]string{
strconv.FormatInt(blockCount, 10),
strconv.FormatUint(uint64(chksum), 10),
strconv.FormatInt(i, 10),
strconv.FormatInt(int64(len(buf)), 10),
typeDesc,
startTime.UTC().Format(time.RFC3339Nano),
strconv.FormatInt(int64(len(v)), 10),
fmt.Sprintf("%s/%s", tsEncoding, vEncoding),
fmt.Sprintf("%d/%d", len(ts), len(values)),
}, "\t"))
}
i += blockSize
blockCount++
}
}
if cmd.dumpBlocks {
println("Blocks:")
tw.Flush()
println()
}
var blockSizeAvg int64
if blockCount > 0 {
blockSizeAvg = blockSize / blockCount
}
fmt.Printf("Statistics\n")
fmt.Printf(" Blocks:\n")
fmt.Printf(" Total: %d Size: %d Min: %d Max: %d Avg: %d\n",
blockCount, blockSize, blockStats.min, blockStats.max, blockSizeAvg)
fmt.Printf(" Index:\n")
fmt.Printf(" Total: %d Size: %d\n", blockCount, indexSize)
fmt.Printf(" Points:\n")
fmt.Printf(" Total: %d", pointCount)
println()
println(" Encoding:")
for i, counts := range blockStats.counts {
if len(counts) == 0 {
continue
}
fmt.Printf(" %s: ", strings.Title(fieldType[i]))
for j, v := range counts {
fmt.Printf("\t%s: %d (%d%%) ", encDescs[i][j], v, int(float64(v)/float64(blockCount)*100))
}
println()
}
fmt.Printf(" Compression:\n")
fmt.Printf(" Per block: %0.2f bytes/point\n", float64(blockSize)/float64(pointCount))
fmt.Printf(" Total: %0.2f bytes/point\n", float64(stat.Size())/float64(pointCount))
if len(errors) > 0 {
println()
fmt.Printf("Errors (%d):\n", len(errors))
for _, err := range errors {
fmt.Printf(" * %v\n", err)
}
println()
return fmt.Errorf("error count %d", len(errors))
}
return nil
}
// printUsage prints the usage message to STDERR.
func (cmd *Command) printUsage() {
usage := `Dumps low-level details about tsm1 files.
Usage: influx_inspect dumptsm [flags] <path
-index
Dump raw index data
-blocks
Dump raw block data
-all
Dump all data. Caution: This may print a lot of information
-filter-key <name>
Only display index and block data match this key substring
`
fmt.Fprintf(cmd.Stdout, usage)
}
var (
fieldType = []string{
"timestamp", "float", "int", "bool", "string",
}
blockTypes = []string{
"float64", "int64", "bool", "string",
}
timeEnc = []string{
"none", "s8b", "rle",
}
floatEnc = []string{
"none", "gor",
}
intEnc = []string{
"none", "s8b", "rle",
}
boolEnc = []string{
"none", "bp",
}
stringEnc = []string{
"none", "snpy",
}
encDescs = [][]string{
timeEnc, floatEnc, intEnc, boolEnc, stringEnc,
}
)
type blockStats struct {
min, max int
counts [][]int
}
func (b *blockStats) inc(typ int, enc byte) {
for len(b.counts) <= typ {
b.counts = append(b.counts, []int{})
}
for len(b.counts[typ]) <= int(enc) {
b.counts[typ] = append(b.counts[typ], 0)
}
b.counts[typ][enc]++
}
func (b *blockStats) size(sz int) {
if b.min == 0 || sz < b.min {
b.min = sz
}
if b.min == 0 || sz > b.max {
b.max = sz
}
}

View File

@@ -0,0 +1,3 @@
package dumptsm_test
// TODO: write some tests

View File

@@ -0,0 +1,408 @@
// Package export exports TSM files into InfluxDB line protocol format.
package export
import (
"bufio"
"compress/gzip"
"flag"
"fmt"
"io"
"math"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"sync"
"time"
"github.com/influxdata/influxdb/influxql"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/pkg/escape"
"github.com/influxdata/influxdb/tsdb/engine/tsm1"
)
// Command represents the program execution for "influx_inspect export".
type Command struct {
// Standard input/output, overridden for testing.
Stderr io.Writer
Stdout io.Writer
dataDir string
walDir string
out string
database string
retentionPolicy string
startTime int64
endTime int64
compress bool
manifest map[string]struct{}
tsmFiles map[string][]string
walFiles map[string][]string
}
// NewCommand returns a new instance of Command.
func NewCommand() *Command {
return &Command{
Stderr: os.Stderr,
Stdout: os.Stdout,
manifest: make(map[string]struct{}),
tsmFiles: make(map[string][]string),
walFiles: make(map[string][]string),
}
}
// Run executes the command.
func (cmd *Command) Run(args ...string) error {
var start, end string
fs := flag.NewFlagSet("export", flag.ExitOnError)
fs.StringVar(&cmd.dataDir, "datadir", os.Getenv("HOME")+"/.influxdb/data", "Data storage path")
fs.StringVar(&cmd.walDir, "waldir", os.Getenv("HOME")+"/.influxdb/wal", "WAL storage path")
fs.StringVar(&cmd.out, "out", os.Getenv("HOME")+"/.influxdb/export", "Destination file to export to")
fs.StringVar(&cmd.database, "database", "", "Optional: the database to export")
fs.StringVar(&cmd.retentionPolicy, "retention", "", "Optional: the retention policy to export (requires -database)")
fs.StringVar(&start, "start", "", "Optional: the start time to export (RFC3339 format)")
fs.StringVar(&end, "end", "", "Optional: the end time to export (RFC3339 format)")
fs.BoolVar(&cmd.compress, "compress", false, "Compress the output")
fs.SetOutput(cmd.Stdout)
fs.Usage = func() {
fmt.Fprintf(cmd.Stdout, "Exports TSM files into InfluxDB line protocol format.\n\n")
fmt.Fprintf(cmd.Stdout, "Usage: %s export [flags]\n\n", filepath.Base(os.Args[0]))
fs.PrintDefaults()
}
if err := fs.Parse(args); err != nil {
return err
}
// set defaults
if start != "" {
s, err := time.Parse(time.RFC3339, start)
if err != nil {
return err
}
cmd.startTime = s.UnixNano()
} else {
cmd.startTime = math.MinInt64
}
if end != "" {
e, err := time.Parse(time.RFC3339, end)
if err != nil {
return err
}
cmd.endTime = e.UnixNano()
} else {
// set end time to max if it is not set.
cmd.endTime = math.MaxInt64
}
if err := cmd.validate(); err != nil {
return err
}
return cmd.export()
}
func (cmd *Command) validate() error {
if cmd.retentionPolicy != "" && cmd.database == "" {
return fmt.Errorf("must specify a db")
}
if cmd.startTime != 0 && cmd.endTime != 0 && cmd.endTime < cmd.startTime {
return fmt.Errorf("end time before start time")
}
return nil
}
func (cmd *Command) export() error {
if err := cmd.walkTSMFiles(); err != nil {
return err
}
if err := cmd.walkWALFiles(); err != nil {
return err
}
return cmd.write()
}
func (cmd *Command) walkTSMFiles() error {
return filepath.Walk(cmd.dataDir, func(path string, f os.FileInfo, err error) error {
if err != nil {
return err
}
// check to see if this is a tsm file
if filepath.Ext(path) != "."+tsm1.TSMFileExtension {
return nil
}
relPath, err := filepath.Rel(cmd.dataDir, path)
if err != nil {
return err
}
dirs := strings.Split(relPath, string(byte(os.PathSeparator)))
if len(dirs) < 2 {
return fmt.Errorf("invalid directory structure for %s", path)
}
if dirs[0] == cmd.database || cmd.database == "" {
if dirs[1] == cmd.retentionPolicy || cmd.retentionPolicy == "" {
key := filepath.Join(dirs[0], dirs[1])
cmd.manifest[key] = struct{}{}
cmd.tsmFiles[key] = append(cmd.tsmFiles[key], path)
}
}
return nil
})
}
func (cmd *Command) walkWALFiles() error {
return filepath.Walk(cmd.walDir, func(path string, f os.FileInfo, err error) error {
if err != nil {
return err
}
// check to see if this is a wal file
fileName := filepath.Base(path)
if filepath.Ext(path) != "."+tsm1.WALFileExtension || !strings.HasPrefix(fileName, tsm1.WALFilePrefix) {
return nil
}
relPath, err := filepath.Rel(cmd.walDir, path)
if err != nil {
return err
}
dirs := strings.Split(relPath, string(byte(os.PathSeparator)))
if len(dirs) < 2 {
return fmt.Errorf("invalid directory structure for %s", path)
}
if dirs[0] == cmd.database || cmd.database == "" {
if dirs[1] == cmd.retentionPolicy || cmd.retentionPolicy == "" {
key := filepath.Join(dirs[0], dirs[1])
cmd.manifest[key] = struct{}{}
cmd.walFiles[key] = append(cmd.walFiles[key], path)
}
}
return nil
})
}
func (cmd *Command) write() error {
// open our output file and create an output buffer
f, err := os.Create(cmd.out)
if err != nil {
return err
}
defer f.Close()
// Because calling (*os.File).Write is relatively expensive,
// and we don't *need* to sync to disk on every written line of export,
// use a sized buffered writer so that we only sync the file every megabyte.
bw := bufio.NewWriterSize(f, 1024*1024)
defer bw.Flush()
var w io.Writer = bw
if cmd.compress {
gzw := gzip.NewWriter(w)
defer gzw.Close()
w = gzw
}
s, e := time.Unix(0, cmd.startTime).Format(time.RFC3339), time.Unix(0, cmd.endTime).Format(time.RFC3339)
fmt.Fprintf(w, "# INFLUXDB EXPORT: %s - %s\n", s, e)
// Write out all the DDL
fmt.Fprintln(w, "# DDL")
for key := range cmd.manifest {
keys := strings.Split(key, string(os.PathSeparator))
db, rp := influxql.QuoteIdent(keys[0]), influxql.QuoteIdent(keys[1])
fmt.Fprintf(w, "CREATE DATABASE %s WITH NAME %s\n", db, rp)
}
fmt.Fprintln(w, "# DML")
for key := range cmd.manifest {
keys := strings.Split(key, string(os.PathSeparator))
fmt.Fprintf(w, "# CONTEXT-DATABASE:%s\n", keys[0])
fmt.Fprintf(w, "# CONTEXT-RETENTION-POLICY:%s\n", keys[1])
if files, ok := cmd.tsmFiles[key]; ok {
fmt.Fprintf(cmd.Stdout, "writing out tsm file data for %s...", key)
if err := cmd.writeTsmFiles(w, files); err != nil {
return err
}
fmt.Fprintln(cmd.Stdout, "complete.")
}
if _, ok := cmd.walFiles[key]; ok {
fmt.Fprintf(cmd.Stdout, "writing out wal file data for %s...", key)
if err := cmd.writeWALFiles(w, cmd.walFiles[key], key); err != nil {
return err
}
fmt.Fprintln(cmd.Stdout, "complete.")
}
}
return nil
}
func (cmd *Command) writeTsmFiles(w io.Writer, files []string) error {
fmt.Fprintln(w, "# writing tsm data")
// we need to make sure we write the same order that the files were written
sort.Strings(files)
for _, f := range files {
if err := cmd.exportTSMFile(f, w); err != nil {
return err
}
}
return nil
}
func (cmd *Command) exportTSMFile(tsmFilePath string, w io.Writer) error {
f, err := os.Open(tsmFilePath)
if err != nil {
return err
}
defer f.Close()
r, err := tsm1.NewTSMReader(f)
if err != nil {
fmt.Fprintf(cmd.Stderr, "unable to read %s, skipping: %s\n", tsmFilePath, err.Error())
return nil
}
defer r.Close()
if sgStart, sgEnd := r.TimeRange(); sgStart > cmd.endTime || sgEnd < cmd.startTime {
return nil
}
for i := 0; i < r.KeyCount(); i++ {
key, _ := r.KeyAt(i)
values, err := r.ReadAll(string(key))
if err != nil {
fmt.Fprintf(cmd.Stderr, "unable to read key %q in %s, skipping: %s\n", string(key), tsmFilePath, err.Error())
continue
}
measurement, field := tsm1.SeriesAndFieldFromCompositeKey(key)
field = escape.Bytes(field)
if err := cmd.writeValues(w, measurement, string(field), values); err != nil {
// An error from writeValues indicates an IO error, which should be returned.
return err
}
}
return nil
}
func (cmd *Command) writeWALFiles(w io.Writer, files []string, key string) error {
fmt.Fprintln(w, "# writing wal data")
// we need to make sure we write the same order that the wal received the data
sort.Strings(files)
var once sync.Once
warnDelete := func() {
once.Do(func() {
msg := fmt.Sprintf(`WARNING: detected deletes in wal file.
Some series for %q may be brought back by replaying this data.
To resolve, you can either let the shard snapshot prior to exporting the data
or manually editing the exported file.
`, key)
fmt.Fprintln(cmd.Stderr, msg)
})
}
for _, f := range files {
if err := cmd.exportWALFile(f, w, warnDelete); err != nil {
return err
}
}
return nil
}
// exportWAL reads every WAL entry from r and exports it to w.
func (cmd *Command) exportWALFile(walFilePath string, w io.Writer, warnDelete func()) error {
f, err := os.Open(walFilePath)
if err != nil {
return err
}
defer f.Close()
r := tsm1.NewWALSegmentReader(f)
defer r.Close()
for r.Next() {
entry, err := r.Read()
if err != nil {
n := r.Count()
fmt.Fprintf(cmd.Stderr, "file %s corrupt at position %d", walFilePath, n)
break
}
switch t := entry.(type) {
case *tsm1.DeleteWALEntry, *tsm1.DeleteRangeWALEntry:
warnDelete()
continue
case *tsm1.WriteWALEntry:
for key, values := range t.Values {
measurement, field := tsm1.SeriesAndFieldFromCompositeKey([]byte(key))
// measurements are stored escaped, field names are not
field = escape.Bytes(field)
if err := cmd.writeValues(w, measurement, string(field), values); err != nil {
// An error from writeValues indicates an IO error, which should be returned.
return err
}
}
}
}
return nil
}
// writeValues writes every value in values to w, using the given series key and field name.
// If any call to w.Write fails, that error is returned.
func (cmd *Command) writeValues(w io.Writer, seriesKey []byte, field string, values []tsm1.Value) error {
buf := []byte(string(seriesKey) + " " + field + "=")
prefixLen := len(buf)
for _, value := range values {
ts := value.UnixNano()
if (ts < cmd.startTime) || (ts > cmd.endTime) {
continue
}
// Re-slice buf to be "<series_key> <field>=".
buf = buf[:prefixLen]
// Append the correct representation of the value.
switch v := value.Value().(type) {
case float64:
buf = strconv.AppendFloat(buf, v, 'g', -1, 64)
case int64:
buf = strconv.AppendInt(buf, v, 10)
buf = append(buf, 'i')
case bool:
buf = strconv.AppendBool(buf, v)
case string:
buf = append(buf, '"')
buf = append(buf, models.EscapeStringField(v)...)
buf = append(buf, '"')
default:
// This shouldn't be possible, but we'll format it anyway.
buf = append(buf, fmt.Sprintf("%v", v)...)
}
// Now buf has "<series_key> <field>=<value>".
// Append the timestamp and a newline, then write it.
buf = append(buf, ' ')
buf = strconv.AppendInt(buf, ts, 10)
buf = append(buf, '\n')
if _, err := w.Write(buf); err != nil {
// Underlying IO error needs to be returned.
return err
}
}
return nil
}

View File

@@ -0,0 +1,340 @@
package export
import (
"bytes"
"fmt"
"io/ioutil"
"math"
"math/rand"
"os"
"sort"
"strconv"
"strings"
"testing"
"github.com/golang/snappy"
"github.com/influxdata/influxdb/tsdb/engine/tsm1"
)
type corpus map[string][]tsm1.Value
var (
basicCorpus = corpus{
tsm1.SeriesFieldKey("floats,k=f", "f"): []tsm1.Value{
tsm1.NewValue(1, float64(1.5)),
tsm1.NewValue(2, float64(3)),
},
tsm1.SeriesFieldKey("ints,k=i", "i"): []tsm1.Value{
tsm1.NewValue(10, int64(15)),
tsm1.NewValue(20, int64(30)),
},
tsm1.SeriesFieldKey("bools,k=b", "b"): []tsm1.Value{
tsm1.NewValue(100, true),
tsm1.NewValue(200, false),
},
tsm1.SeriesFieldKey("strings,k=s", "s"): []tsm1.Value{
tsm1.NewValue(1000, "1k"),
tsm1.NewValue(2000, "2k"),
},
}
basicCorpusExpLines = []string{
"floats,k=f f=1.5 1",
"floats,k=f f=3 2",
"ints,k=i i=15i 10",
"ints,k=i i=30i 20",
"bools,k=b b=true 100",
"bools,k=b b=false 200",
`strings,k=s s="1k" 1000`,
`strings,k=s s="2k" 2000`,
}
escapeStringCorpus = corpus{
tsm1.SeriesFieldKey("t", "s"): []tsm1.Value{
tsm1.NewValue(1, `1. "quotes"`),
tsm1.NewValue(2, `2. back\slash`),
tsm1.NewValue(3, `3. bs\q"`),
},
}
escCorpusExpLines = []string{
`t s="1. \"quotes\"" 1`,
`t s="2. back\\slash" 2`,
`t s="3. bs\\q\"" 3`,
}
)
func Test_exportWALFile(t *testing.T) {
for _, c := range []struct {
corpus corpus
lines []string
}{
{corpus: basicCorpus, lines: basicCorpusExpLines},
{corpus: escapeStringCorpus, lines: escCorpusExpLines},
} {
walFile := writeCorpusToWALFile(c.corpus)
defer os.Remove(walFile.Name())
var out bytes.Buffer
if err := newCommand().exportWALFile(walFile.Name(), &out, func() {}); err != nil {
t.Fatal(err)
}
lines := strings.Split(out.String(), "\n")
for _, exp := range c.lines {
found := false
for _, l := range lines {
if exp == l {
found = true
break
}
}
if !found {
t.Fatalf("expected line %q to be in exported output:\n%s", exp, out.String())
}
}
}
}
func Test_exportTSMFile(t *testing.T) {
for _, c := range []struct {
corpus corpus
lines []string
}{
{corpus: basicCorpus, lines: basicCorpusExpLines},
{corpus: escapeStringCorpus, lines: escCorpusExpLines},
} {
tsmFile := writeCorpusToTSMFile(c.corpus)
defer os.Remove(tsmFile.Name())
var out bytes.Buffer
if err := newCommand().exportTSMFile(tsmFile.Name(), &out); err != nil {
t.Fatal(err)
}
lines := strings.Split(out.String(), "\n")
for _, exp := range c.lines {
found := false
for _, l := range lines {
if exp == l {
found = true
break
}
}
if !found {
t.Fatalf("expected line %q to be in exported output:\n%s", exp, out.String())
}
}
}
}
var sink interface{}
func benchmarkExportTSM(c corpus, b *testing.B) {
// Garbage collection is relatively likely to happen during export, so track allocations.
b.ReportAllocs()
f := writeCorpusToTSMFile(c)
defer os.Remove(f.Name())
cmd := newCommand()
var out bytes.Buffer
b.ResetTimer()
b.StartTimer()
for i := 0; i < b.N; i++ {
if err := cmd.exportTSMFile(f.Name(), &out); err != nil {
b.Fatal(err)
}
sink = out.Bytes()
out.Reset()
}
}
func BenchmarkExportTSMFloats_100s_250vps(b *testing.B) {
benchmarkExportTSM(makeFloatsCorpus(100, 250), b)
}
func BenchmarkExportTSMInts_100s_250vps(b *testing.B) {
benchmarkExportTSM(makeIntsCorpus(100, 250), b)
}
func BenchmarkExportTSMBools_100s_250vps(b *testing.B) {
benchmarkExportTSM(makeBoolsCorpus(100, 250), b)
}
func BenchmarkExportTSMStrings_100s_250vps(b *testing.B) {
benchmarkExportTSM(makeStringsCorpus(100, 250), b)
}
func benchmarkExportWAL(c corpus, b *testing.B) {
// Garbage collection is relatively likely to happen during export, so track allocations.
b.ReportAllocs()
f := writeCorpusToWALFile(c)
defer os.Remove(f.Name())
cmd := newCommand()
var out bytes.Buffer
b.ResetTimer()
b.StartTimer()
for i := 0; i < b.N; i++ {
if err := cmd.exportWALFile(f.Name(), &out, func() {}); err != nil {
b.Fatal(err)
}
sink = out.Bytes()
out.Reset()
}
}
func BenchmarkExportWALFloats_100s_250vps(b *testing.B) {
benchmarkExportWAL(makeFloatsCorpus(100, 250), b)
}
func BenchmarkExportWALInts_100s_250vps(b *testing.B) {
benchmarkExportWAL(makeIntsCorpus(100, 250), b)
}
func BenchmarkExportWALBools_100s_250vps(b *testing.B) {
benchmarkExportWAL(makeBoolsCorpus(100, 250), b)
}
func BenchmarkExportWALStrings_100s_250vps(b *testing.B) {
benchmarkExportWAL(makeStringsCorpus(100, 250), b)
}
// newCommand returns a command that discards its output and that accepts all timestamps.
func newCommand() *Command {
return &Command{
Stderr: ioutil.Discard,
Stdout: ioutil.Discard,
startTime: math.MinInt64,
endTime: math.MaxInt64,
}
}
// makeCorpus returns a new corpus filled with values generated by fn.
// The RNG passed to fn is seeded with numSeries * numValuesPerSeries, for predictable output.
func makeCorpus(numSeries, numValuesPerSeries int, fn func(*rand.Rand) interface{}) corpus {
rng := rand.New(rand.NewSource(int64(numSeries) * int64(numValuesPerSeries)))
var unixNano int64
corpus := make(corpus, numSeries)
for i := 0; i < numSeries; i++ {
vals := make([]tsm1.Value, numValuesPerSeries)
for j := 0; j < numValuesPerSeries; j++ {
vals[j] = tsm1.NewValue(unixNano, fn(rng))
unixNano++
}
k := fmt.Sprintf("m,t=%d", i)
corpus[tsm1.SeriesFieldKey(k, "x")] = vals
}
return corpus
}
func makeFloatsCorpus(numSeries, numFloatsPerSeries int) corpus {
return makeCorpus(numSeries, numFloatsPerSeries, func(rng *rand.Rand) interface{} {
return rng.Float64()
})
}
func makeIntsCorpus(numSeries, numIntsPerSeries int) corpus {
return makeCorpus(numSeries, numIntsPerSeries, func(rng *rand.Rand) interface{} {
// This will only return positive integers. That's probably okay.
return rng.Int63()
})
}
func makeBoolsCorpus(numSeries, numBoolsPerSeries int) corpus {
return makeCorpus(numSeries, numBoolsPerSeries, func(rng *rand.Rand) interface{} {
return rand.Int63n(2) == 1
})
}
func makeStringsCorpus(numSeries, numStringsPerSeries int) corpus {
return makeCorpus(numSeries, numStringsPerSeries, func(rng *rand.Rand) interface{} {
// The string will randomly have 2-6 parts
parts := make([]string, rand.Intn(4)+2)
for i := range parts {
// Each part is a random base36-encoded number
parts[i] = strconv.FormatInt(rand.Int63(), 36)
}
// Join the individual parts with underscores.
return strings.Join(parts, "_")
})
}
// writeCorpusToWALFile writes the given corpus as a WAL file, and returns a handle to that file.
// It is the caller's responsibility to remove the returned temp file.
// writeCorpusToWALFile will panic on any error that occurs.
func writeCorpusToWALFile(c corpus) *os.File {
walFile, err := ioutil.TempFile("", "export_test_corpus_wal")
if err != nil {
panic(err)
}
e := &tsm1.WriteWALEntry{Values: c}
b, err := e.Encode(nil)
if err != nil {
panic(err)
}
w := tsm1.NewWALSegmentWriter(walFile)
if err := w.Write(e.Type(), snappy.Encode(nil, b)); err != nil {
panic(err)
}
if err := w.Flush(); err != nil {
panic(err)
}
// (*tsm1.WALSegmentWriter).sync isn't exported, but it only Syncs the file anyway.
if err := walFile.Sync(); err != nil {
panic(err)
}
return walFile
}
// writeCorpusToTSMFile writes the given corpus as a TSM file, and returns a handle to that file.
// It is the caller's responsibility to remove the returned temp file.
// writeCorpusToTSMFile will panic on any error that occurs.
func writeCorpusToTSMFile(c corpus) *os.File {
tsmFile, err := ioutil.TempFile("", "export_test_corpus_tsm")
if err != nil {
panic(err)
}
w, err := tsm1.NewTSMWriter(tsmFile)
if err != nil {
panic(err)
}
// Write the series in alphabetical order so that each test run is comparable,
// given an identical corpus.
keys := make([]string, 0, len(c))
for k := range c {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
if err := w.Write(k, c[k]); err != nil {
panic(err)
}
}
if err := w.WriteIndex(); err != nil {
panic(err)
}
if err := w.Close(); err != nil {
panic(err)
}
return tsmFile
}

View File

@@ -0,0 +1,43 @@
// Package help contains the help for the influx_inspect command.
package help
import (
"fmt"
"io"
"os"
"strings"
)
// Command displays help for command-line sub-commands.
type Command struct {
Stdout io.Writer
}
// NewCommand returns a new instance of Command.
func NewCommand() *Command {
return &Command{
Stdout: os.Stdout,
}
}
// Run executes the command.
func (cmd *Command) Run(args ...string) error {
fmt.Fprintln(cmd.Stdout, strings.TrimSpace(usage))
return nil
}
const usage = `
Usage: influx_inspect [[command] [arguments]]
The commands are:
dumptsm dumps low-level details about tsm1 files.
export exports raw data from a shard to line protocol
help display this help message
report displays a shard level report
verify verifies integrity of TSM files
"help" is the default command.
Use "influx_inspect [command] -help" for more information about a command.
`

View File

@@ -0,0 +1,3 @@
package help_test
// TODO: write some tests

View File

@@ -0,0 +1,90 @@
// The influx_inspect command displays detailed information about InfluxDB data files.
package main
import (
"fmt"
"io"
"log"
"os"
"github.com/influxdata/influxdb/cmd"
"github.com/influxdata/influxdb/cmd/influx_inspect/dumptsi"
"github.com/influxdata/influxdb/cmd/influx_inspect/dumptsm"
"github.com/influxdata/influxdb/cmd/influx_inspect/export"
"github.com/influxdata/influxdb/cmd/influx_inspect/help"
"github.com/influxdata/influxdb/cmd/influx_inspect/report"
"github.com/influxdata/influxdb/cmd/influx_inspect/verify"
_ "github.com/influxdata/influxdb/tsdb/engine"
)
func main() {
m := NewMain()
if err := m.Run(os.Args[1:]...); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
// Main represents the program execution.
type Main struct {
Logger *log.Logger
Stdin io.Reader
Stdout io.Writer
Stderr io.Writer
}
// NewMain returns a new instance of Main.
func NewMain() *Main {
return &Main{
Logger: log.New(os.Stderr, "[influx_inspect] ", log.LstdFlags),
Stdin: os.Stdin,
Stdout: os.Stdout,
Stderr: os.Stderr,
}
}
// Run determines and runs the command specified by the CLI args.
func (m *Main) Run(args ...string) error {
name, args := cmd.ParseCommandName(args)
// Extract name from args.
switch name {
case "", "help":
if err := help.NewCommand().Run(args...); err != nil {
return fmt.Errorf("help: %s", err)
}
case "dumptsi":
name := dumptsi.NewCommand()
if err := name.Run(args...); err != nil {
return fmt.Errorf("dumptsi: %s", err)
}
case "dumptsmdev":
fmt.Fprintf(m.Stderr, "warning: dumptsmdev is deprecated, use dumptsm instead.\n")
fallthrough
case "dumptsm":
name := dumptsm.NewCommand()
if err := name.Run(args...); err != nil {
return fmt.Errorf("dumptsm: %s", err)
}
case "export":
name := export.NewCommand()
if err := name.Run(args...); err != nil {
return fmt.Errorf("export: %s", err)
}
case "report":
name := report.NewCommand()
if err := name.Run(args...); err != nil {
return fmt.Errorf("report: %s", err)
}
case "verify":
name := verify.NewCommand()
if err := name.Run(args...); err != nil {
return fmt.Errorf("verify: %s", err)
}
default:
return fmt.Errorf(`unknown command "%s"`+"\n"+`Run 'influx_inspect help' for usage`+"\n\n", name)
}
return nil
}

View File

@@ -0,0 +1,192 @@
// Package report reports statistics about TSM files.
package report
import (
"flag"
"fmt"
"io"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"text/tabwriter"
"time"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/tsdb/engine/tsm1"
"github.com/retailnext/hllpp"
)
// Command represents the program execution for "influxd report".
type Command struct {
Stderr io.Writer
Stdout io.Writer
dir string
pattern string
detailed bool
}
// NewCommand returns a new instance of Command.
func NewCommand() *Command {
return &Command{
Stderr: os.Stderr,
Stdout: os.Stdout,
}
}
// Run executes the command.
func (cmd *Command) Run(args ...string) error {
fs := flag.NewFlagSet("report", flag.ExitOnError)
fs.StringVar(&cmd.pattern, "pattern", "", "Include only files matching a pattern")
fs.BoolVar(&cmd.detailed, "detailed", false, "Report detailed cardinality estimates")
fs.SetOutput(cmd.Stdout)
fs.Usage = cmd.printUsage
if err := fs.Parse(args); err != nil {
return err
}
cmd.dir = fs.Arg(0)
start := time.Now()
files, err := filepath.Glob(filepath.Join(cmd.dir, fmt.Sprintf("*.%s", tsm1.TSMFileExtension)))
if err != nil {
return err
}
var filtered []string
if cmd.pattern != "" {
for _, f := range files {
if strings.Contains(f, cmd.pattern) {
filtered = append(filtered, f)
}
}
files = filtered
}
if len(files) == 0 {
return fmt.Errorf("no tsm files at %v", cmd.dir)
}
tw := tabwriter.NewWriter(cmd.Stdout, 8, 8, 1, '\t', 0)
fmt.Fprintln(tw, strings.Join([]string{"File", "Series", "Load Time"}, "\t"))
totalSeries := hllpp.New()
tagCardinalities := map[string]*hllpp.HLLPP{}
measCardinalities := map[string]*hllpp.HLLPP{}
fieldCardinalities := map[string]*hllpp.HLLPP{}
for _, f := range files {
file, err := os.OpenFile(f, os.O_RDONLY, 0600)
if err != nil {
fmt.Fprintf(cmd.Stderr, "error: %s: %v. Skipping.\n", f, err)
continue
}
loadStart := time.Now()
reader, err := tsm1.NewTSMReader(file)
if err != nil {
fmt.Fprintf(cmd.Stderr, "error: %s: %v. Skipping.\n", file.Name(), err)
continue
}
loadTime := time.Since(loadStart)
seriesCount := reader.KeyCount()
for i := 0; i < seriesCount; i++ {
key, _ := reader.KeyAt(i)
totalSeries.Add([]byte(key))
if cmd.detailed {
sep := strings.Index(string(key), "#!~#")
seriesKey, field := key[:sep], key[sep+4:]
measurement, tags := models.ParseKey(seriesKey)
measCount, ok := measCardinalities[measurement]
if !ok {
measCount = hllpp.New()
measCardinalities[measurement] = measCount
}
measCount.Add([]byte(key))
fieldCount, ok := fieldCardinalities[measurement]
if !ok {
fieldCount = hllpp.New()
fieldCardinalities[measurement] = fieldCount
}
fieldCount.Add([]byte(field))
for _, t := range tags {
tagCount, ok := tagCardinalities[string(t.Key)]
if !ok {
tagCount = hllpp.New()
tagCardinalities[string(t.Key)] = tagCount
}
tagCount.Add(t.Value)
}
}
}
reader.Close()
fmt.Fprintln(tw, strings.Join([]string{
filepath.Base(file.Name()),
strconv.FormatInt(int64(seriesCount), 10),
loadTime.String(),
}, "\t"))
tw.Flush()
}
tw.Flush()
println()
fmt.Printf("Statistics\n")
fmt.Printf("\tSeries:\n")
fmt.Printf("\t\tTotal (est): %d\n", totalSeries.Count())
if cmd.detailed {
fmt.Printf("\tMeasurements (est):\n")
for _, t := range sortKeys(measCardinalities) {
fmt.Printf("\t\t%v: %d (%d%%)\n", t, measCardinalities[t].Count(), int((float64(measCardinalities[t].Count())/float64(totalSeries.Count()))*100))
}
fmt.Printf("\tFields (est):\n")
for _, t := range sortKeys(fieldCardinalities) {
fmt.Printf("\t\t%v: %d\n", t, fieldCardinalities[t].Count())
}
fmt.Printf("\tTags (est):\n")
for _, t := range sortKeys(tagCardinalities) {
fmt.Printf("\t\t%v: %d\n", t, tagCardinalities[t].Count())
}
}
fmt.Printf("Completed in %s\n", time.Since(start))
return nil
}
// sortKeys is a quick helper to return the sorted set of a map's keys
func sortKeys(vals map[string]*hllpp.HLLPP) (keys []string) {
for k := range vals {
keys = append(keys, k)
}
sort.Strings(keys)
return keys
}
// printUsage prints the usage message to STDERR.
func (cmd *Command) printUsage() {
usage := `Displays shard level report.
Usage: influx_inspect report [flags]
-pattern <pattern>
Include only files matching a pattern.
-detailed
Report detailed cardinality estimates.
Defaults to "false".
`
fmt.Fprintf(cmd.Stdout, usage)
}

View File

@@ -0,0 +1,3 @@
package report_test
// TODO: write some tests

View File

@@ -0,0 +1,120 @@
// Package verify verifies integrity of TSM files.
package verify
import (
"flag"
"fmt"
"hash/crc32"
"io"
"os"
"path/filepath"
"text/tabwriter"
"time"
"github.com/influxdata/influxdb/tsdb/engine/tsm1"
)
// Command represents the program execution for "influx_inspect verify".
type Command struct {
Stderr io.Writer
Stdout io.Writer
}
// NewCommand returns a new instance of Command.
func NewCommand() *Command {
return &Command{
Stderr: os.Stderr,
Stdout: os.Stdout,
}
}
// Run executes the command.
func (cmd *Command) Run(args ...string) error {
var path string
fs := flag.NewFlagSet("verify", flag.ExitOnError)
fs.StringVar(&path, "dir", os.Getenv("HOME")+"/.influxdb", "Root storage path. [$HOME/.influxdb]")
fs.SetOutput(cmd.Stdout)
fs.Usage = cmd.printUsage
if err := fs.Parse(args); err != nil {
return err
}
start := time.Now()
dataPath := filepath.Join(path, "data")
brokenBlocks := 0
totalBlocks := 0
// No need to do this in a loop
ext := fmt.Sprintf(".%s", tsm1.TSMFileExtension)
// Get all TSM files by walking through the data dir
files := []string{}
err := filepath.Walk(dataPath, func(path string, f os.FileInfo, err error) error {
if err != nil {
return err
}
if filepath.Ext(path) == ext {
files = append(files, path)
}
return nil
})
if err != nil {
panic(err)
}
tw := tabwriter.NewWriter(cmd.Stdout, 16, 8, 0, '\t', 0)
// Verify the checksums of every block in every file
for _, f := range files {
file, err := os.OpenFile(f, os.O_RDONLY, 0600)
if err != nil {
return err
}
reader, err := tsm1.NewTSMReader(file)
if err != nil {
return err
}
blockItr := reader.BlockIterator()
brokenFileBlocks := 0
count := 0
for blockItr.Next() {
totalBlocks++
key, _, _, _, checksum, buf, err := blockItr.Read()
if err != nil {
brokenBlocks++
fmt.Fprintf(tw, "%s: could not get checksum for key %v block %d due to error: %q\n", f, key, count, err)
} else if expected := crc32.ChecksumIEEE(buf); checksum != expected {
brokenBlocks++
fmt.Fprintf(tw, "%s: got %d but expected %d for key %v, block %d\n", f, checksum, expected, key, count)
}
count++
}
if brokenFileBlocks == 0 {
fmt.Fprintf(tw, "%s: healthy\n", f)
}
reader.Close()
}
fmt.Fprintf(tw, "Broken Blocks: %d / %d, in %vs\n", brokenBlocks, totalBlocks, time.Since(start).Seconds())
tw.Flush()
return nil
}
// printUsage prints the usage message to STDERR.
func (cmd *Command) printUsage() {
usage := fmt.Sprintf(`Verifies the integrity of TSM files.
Usage: influx_inspect verify [flags]
-dir <path>
Root storage path
Defaults to "%[1]s/.influxdb".
`, os.Getenv("HOME"))
fmt.Fprintf(cmd.Stdout, usage)
}

View File

@@ -0,0 +1,3 @@
package verify_test
// TODO: write some tests