add vendoring with go dep

This commit is contained in:
Adrian Todorov
2017-10-25 20:52:40 +00:00
parent 704f4d20d1
commit a59409f16b
1627 changed files with 489673 additions and 0 deletions

View File

@@ -0,0 +1,68 @@
package continuous_querier
import (
"errors"
"time"
"github.com/influxdata/influxdb/monitor/diagnostics"
"github.com/influxdata/influxdb/toml"
)
// Default values for aspects of interval computation.
const (
// The default value of how often to check whether any CQs need to be run.
DefaultRunInterval = time.Second
)
// Config represents a configuration for the continuous query service.
type Config struct {
// Enables logging in CQ service to display when CQ's are processed and how many points are wrote.
LogEnabled bool `toml:"log-enabled"`
// If this flag is set to false, both the brokers and data nodes should ignore any CQ processing.
Enabled bool `toml:"enabled"`
// Run interval for checking continuous queries. This should be set to the least common factor
// of the interval for running continuous queries. If you only aggregate continuous queries
// every minute, this should be set to 1 minute. The default is set to '1s' so the interval
// is compatible with most aggregations.
RunInterval toml.Duration `toml:"run-interval"`
}
// NewConfig returns a new instance of Config with defaults.
func NewConfig() Config {
return Config{
LogEnabled: true,
Enabled: true,
RunInterval: toml.Duration(DefaultRunInterval),
}
}
// Validate returns an error if the Config is invalid.
func (c Config) Validate() error {
if !c.Enabled {
return nil
}
// TODO: Should we enforce a minimum interval?
// Polling every nanosecond, for instance, will greatly impact performance.
if c.RunInterval <= 0 {
return errors.New("run-interval must be positive")
}
return nil
}
// Diagnostics returns a diagnostics representation of a subset of the Config.
func (c Config) Diagnostics() (*diagnostics.Diagnostics, error) {
if !c.Enabled {
return diagnostics.RowFromMap(map[string]interface{}{
"enabled": false,
}), nil
}
return diagnostics.RowFromMap(map[string]interface{}{
"enabled": true,
"run-interval": c.RunInterval,
}), nil
}

View File

@@ -0,0 +1,46 @@
package continuous_querier_test
import (
"testing"
"time"
"github.com/BurntSushi/toml"
"github.com/influxdata/influxdb/services/continuous_querier"
)
func TestConfig_Parse(t *testing.T) {
// Parse configuration.
var c continuous_querier.Config
if _, err := toml.Decode(`
run-interval = "1m"
enabled = true
`, &c); err != nil {
t.Fatal(err)
}
// Validate configuration.
if time.Duration(c.RunInterval) != time.Minute {
t.Fatalf("unexpected run interval: %v", c.RunInterval)
} else if c.Enabled != true {
t.Fatalf("unexpected enabled: %v", c.Enabled)
}
}
func TestConfig_Validate(t *testing.T) {
c := continuous_querier.NewConfig()
if err := c.Validate(); err != nil {
t.Fatalf("unexpected validation fail from NewConfig: %s", err)
}
c = continuous_querier.NewConfig()
c.RunInterval = 0
if err := c.Validate(); err == nil {
t.Fatal("expected error for run-interval = 0, got nil")
}
c = continuous_querier.NewConfig()
c.RunInterval *= -1
if err := c.Validate(); err == nil {
t.Fatal("expected error for negative run-interval, got nil")
}
}

View File

@@ -0,0 +1,235 @@
# Continuous Queries
This document lays out continuous queries and a proposed architecture for how they'll work within an InfluxDB cluster.
## Definition of Continuous Queries
Continuous queries serve two purposes in InfluxDB:
1. Combining many series into a single series (i.e. removing 1 or more tag dimensions to make queries more efficient)
2. Aggregating and downsampling series
The purpose of both types of continuous query is to duplicate or downsample data automatically in the background, to make querying their results fast and efficient. Think of them as another way to create indexes on data.
Generally, there are continuous queries that create copies of data into another measurement or tagset, and queries that downsample and aggregate data. The only difference between the two types is if the query has a `GROUP BY time` clause.
Before we get to the continuous query examples, we need to define the `INTO` syntax of queries.
### INTO
`INTO` is a method for running a query and having it output into either another measurement name, retention policy, or database. The syntax looks like this:
```sql
SELECT *
INTO [<retention policy>.]<measurement> [ON <database>]
FROM <measurement>
[WHERE ...]
[GROUP BY ...]
```
The syntax states that the retention policy, database, where clause, and group by clause are all optional. If a retention policy isn't specified, the database's default retention policy will be written into. If the database isn't specified, the database the query is running from will be written into.
By selecting specific fields, `INTO` can merge many series into one that will go into either a new measurement, retention policy, or database. For example:
```sql
SELECT mean(value) as value, region
INTO "1h.cpu_load"
FROM cpu_load
GROUP BY time(1h), region
```
That will give 1h summaries of the mean value of the `cpu_load` for each `region`. Specifying `region` in the `GROUP BY` clause is unnecessary since having it in the `SELECT` clause forces it to be grouped by that tag, we've just included it in the example for clarity.
With `SELECT ... INTO`, fields will be written as fields and tags will be written as tags.
### Continuous Query Syntax
The `INTO` queries run once. Continuous queries will turn `INTO` queries into something that run in the background in the cluster. They're kind of like triggers in SQL.
```sql
CREATE CONTINUOUS QUERY "1h_cpu_load"
ON database_name
BEGIN
SELECT mean(value) as value, region
INTO "1h.cpu_load"
FROM cpu_load
GROUP BY time(1h), region
END
```
Or chain them together:
```sql
CREATE CONTINUOUS QUERY "10m_event_count"
ON database_name
BEGIN
SELECT count(value)
INTO "10m.events"
FROM events
GROUP BY time(10m)
END
-- this selects from the output of one continuous query and outputs to another series
CREATE CONTINUOUS QUERY "1h_event_count"
ON database_name
BEGIN
SELECT sum(count) as count
INTO "1h.events"
FROM events
GROUP BY time(1h)
END
```
Or multiple aggregations from all series in a measurement. This example assumes you have a retention policy named `1h`.
```sql
CREATE CONTINUOUS QUERY "1h_cpu_load"
ON database_name
BEGIN
SELECT mean(value), percentile(80, value) as percentile_80, percentile(95, value) as percentile_95
INTO "1h.cpu_load"
FROM cpu_load
GROUP BY time(1h), *
END
```
The `GROUP BY *` indicates that we want to group by the tagset of the points written in. The same tags will be written to the output series. The multiple aggregates in the `SELECT` clause (percentile, mean) will be written in as fields to the resulting series.
Showing what continuous queries we have:
```sql
SHOW CONTINUOUS QUERIES
```
Dropping continuous queries:
```sql
DROP CONTINUOUS QUERY <name> ON <database>
```
### Security
To create or drop a continuous query, the user must be an admin.
### Limitations
In order to prevent cycles and endless copying of data, the following limitation is enforced on continuous queries at create time:
*The output of a continuous query must go to either a different measurement or to a different retention policy.*
In theory they'd still be able to create a cycle with multiple continuous queries. We should check for these and disallow.
## Proposed Architecture
Continuous queries should be stored in the metastore cluster wide. That is, they amount to a database schema that should be stored in every server in a cluster.
Continuous queries will have to be handled in a different way for two different use cases: those that simply copy data (CQs without a group by time) and those that aggregate and downsample data (those with a group by time).
### No GROUP BY time
For CQs that have no `GROUP BY time` clause, they should be evaluated at the data node as part of the write. The single write should create any other writes for the CQ and submit those in the same request to the brokers to ensure that all writes succeed (both the original and the new CQ writes) or none do.
I imagine the process going something like this:
1. Convert the data point into its compact form `<series id><time><values>`
2. For each CQ on the measurement and retention policy without a `GROUP BY time`:
2.1. Run the data point through a special query engine that will output 0 or 1 data point.
2.2. GOTO 1. for each newly generated data point
2.3. Write all the data points in a single call to the brokers
2.4. Return success to the user
Note that for the generated data points, we need to go through and run this process against them since they can feed into different retention policies, measurements, and new tag-sets. On 2.2 I mention that the output will either be a data point or not. That's because of `WHERE` clauses on the query. However, it will never be more than a single data point.
I mention that we'll need a special query engine for these types of queries. In this case, they never have an aggregate function. Any query with an aggregate function also has a group by time, and these queries by definition don't have that.
The only thing we have to worry about is which fields are being selected, and what the where clause looks like. We should be able to put the raw data point through a simple transform function that either outputs another raw points or doesn't.
I think this transform function be something separate from the regular query planner and engine. It can be in `influxQL` but it should be something fairly simply since the only purpose of these types of queries is to either filter some data out and output to a new series or transform into a new series by dropping tags.
### Has GROUP BY time
CQs that have a `GROUP BY time` (or aggregate CQs) will need to be handled differently.
One key point on continuous queries with a `GROUP BY time`, is that all their writes should always be `overwrite = true`. That is, they should only have a single data point for each timestamp. This distinction means that continuous queries for previous blocks of time can be safely run multiple times without duplicating data (i.e. they're idempotent).
There are two different ideas I have for how CQs with group by time could be handled. The first is through periodic updates handled by the Raft Leader. The second would be to expand out writes for each CQ and handle them on the data node.
#### Periodic Updates
In this approach the management of how CQs run in a cluster will be centrally located on the Raft Leader. It will be responsible for orchestrating which data nodes run CQs and when.
The naive approach would be to have the leader hand out each CQ for a block of time periodically. The leader could also rerun CQ for periods of time that have recently passed. This would be an easy way to handle the "lagging data" problem, but it's not precise.
Unfortunately, there's no easy way to tell cluster wide if there were data points written in an already passed window of time for a CQ. We might be able to add this at the data nodes and have them track it, but it would be quite a bit more work.
The easy way would just be to have CQs re-execute for periods that recently passed and have some user-configurable window of time that they stop checking after. Then we could give the user the ability to recalculate CQs ranges of time if they need to correct for some problem that occurred or the loading of a bunch of historical data.
With this approach, we'd have the metadata in the database store the last time each CQ was run. Whenever the Raft leader sent out a command to a data node to handle a CQ, the data node would use this metadata to determine which windows of time it should compute.
This approach is like what exists in 0.8, with the exception that it will automatically catch data that is lagged behind in a small window of time and give the user the ability to force recalculation.
#### Expanding writes
When a write comes into a data node, we could have it evaluated against group by CQs in addition to the non-group by ones. It would then create writes that would then go through the brokers. When the CQ writes arrive at the data nodes, they would have to handle each write differently depending on if it was a write to a raw series or if it was a CQ write.
Let's lay out a concrete example.
```sql
CREATE CONTINUOUS QUERY "10m_cpu_by_region"
ON foo
BEGIN
SELECT mean(value)
INTO cpu_by_region
FROM cpu
GROUP BY time(10m), region
END
```
In this example we write values into `cpu` with the tags `region` and `host`.
Here's another example CQ:
```sql
CREATE CONTINUOUS QUERY "1h_cpu"
ON foo
BEGIN
SELECT mean(value)
INTO "1h.cpu"
FROM raw.cpu
GROUP BY time(10m), *
END
```
That would output one series into the `1h` retention policy for the `cpu` measurement for every series from the `raw` retention policy and the `cpu` measurement.
Both of these examples would be handled the same way despite one being a big merge of a bunch of series into one and the other being an aggregation of series in a 1-to-1 mapping.
Say we're collecting data for two hosts in a single region. Then we'd have two distinct series like this:
```
1 - cpu host=serverA region=uswest
2 - cpu host=serverB region=uswest
```
Whenever a write came into a server, we'd look at the continuous queries and see if we needed to create new writes. If we had the two CQ examples above, we'd have to expand a single write into two more writes (one for each CQ).
The first CQ would have to create a new series:
```
3 - cpu_by_region region=uswest
```
The second CQ would use the same series id as the write, but would send it to another retention policy (and thus shard).
We'd need to keep track of which series + retention policy combinations were the result of a CQ. When the data nodes get writes replicated downward, they would have to handle them like this:
1. If write is normal, write through
2. If write is CQ write, compute based on existing values, write to DB
#### Approach tradeoffs
The first approach of periodically running queries would almost certainly be the easiest to implement quickly. It also has the added advantage of not putting additional load on the brokers by ballooning up the number of writes that go through the system.
The second approach is appealing because it would be accurate regardless of when writes come in. However, it would take more work and cause the number of writes going through the brokers to be multiplied by the number of continuous queries, which might not scale to where we need.
Also, if the data nodes write for every single update, the load on the underlying storage engine would go up significantly as well.

View File

@@ -0,0 +1,529 @@
// Package continuous_querier provides the continuous query service.
package continuous_querier // import "github.com/influxdata/influxdb/services/continuous_querier"
import (
"errors"
"fmt"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/influxdata/influxdb/influxql"
"github.com/influxdata/influxdb/models"
"github.com/influxdata/influxdb/services/meta"
"github.com/uber-go/zap"
)
const (
// NoChunkingSize specifies when not to chunk results. When planning
// a select statement, passing zero tells it not to chunk results.
// Only applies to raw queries.
NoChunkingSize = 0
// idDelimiter is used as a delimiter when creating a unique name for a
// Continuous Query.
idDelimiter = string(rune(31)) // unit separator
)
// Statistics for the CQ service.
const (
statQueryOK = "queryOk"
statQueryFail = "queryFail"
)
// ContinuousQuerier represents a service that executes continuous queries.
type ContinuousQuerier interface {
// Run executes the named query in the named database. Blank database or name matches all.
Run(database, name string, t time.Time) error
}
// metaClient is an internal interface to make testing easier.
type metaClient interface {
AcquireLease(name string) (l *meta.Lease, err error)
Databases() []meta.DatabaseInfo
Database(name string) *meta.DatabaseInfo
}
// RunRequest is a request to run one or more CQs.
type RunRequest struct {
// Now tells the CQ serivce what the current time is.
Now time.Time
// CQs tells the CQ service which queries to run.
// If nil, all queries will be run.
CQs []string
}
// matches returns true if the CQ matches one of the requested CQs.
func (rr *RunRequest) matches(cq *meta.ContinuousQueryInfo) bool {
if rr.CQs == nil {
return true
}
for _, q := range rr.CQs {
if q == cq.Name {
return true
}
}
return false
}
// Service manages continuous query execution.
type Service struct {
MetaClient metaClient
QueryExecutor *influxql.QueryExecutor
Config *Config
RunInterval time.Duration
// RunCh can be used by clients to signal service to run CQs.
RunCh chan *RunRequest
Logger zap.Logger
loggingEnabled bool
stats *Statistics
// lastRuns maps CQ name to last time it was run.
mu sync.RWMutex
lastRuns map[string]time.Time
stop chan struct{}
wg *sync.WaitGroup
}
// NewService returns a new instance of Service.
func NewService(c Config) *Service {
s := &Service{
Config: &c,
RunInterval: time.Duration(c.RunInterval),
RunCh: make(chan *RunRequest),
loggingEnabled: c.LogEnabled,
Logger: zap.New(zap.NullEncoder()),
stats: &Statistics{},
lastRuns: map[string]time.Time{},
}
return s
}
// Open starts the service.
func (s *Service) Open() error {
s.Logger.Info("Starting continuous query service")
if s.stop != nil {
return nil
}
assert(s.MetaClient != nil, "MetaClient is nil")
assert(s.QueryExecutor != nil, "QueryExecutor is nil")
s.stop = make(chan struct{})
s.wg = &sync.WaitGroup{}
s.wg.Add(1)
go s.backgroundLoop()
return nil
}
// Close stops the service.
func (s *Service) Close() error {
if s.stop == nil {
return nil
}
close(s.stop)
s.wg.Wait()
s.wg = nil
s.stop = nil
return nil
}
// WithLogger sets the logger on the service.
func (s *Service) WithLogger(log zap.Logger) {
s.Logger = log.With(zap.String("service", "continuous_querier"))
}
// Statistics maintains the statistics for the continuous query service.
type Statistics struct {
QueryOK int64
QueryFail int64
}
// Statistics returns statistics for periodic monitoring.
func (s *Service) Statistics(tags map[string]string) []models.Statistic {
return []models.Statistic{{
Name: "cq",
Tags: tags,
Values: map[string]interface{}{
statQueryOK: atomic.LoadInt64(&s.stats.QueryOK),
statQueryFail: atomic.LoadInt64(&s.stats.QueryFail),
},
}}
}
// Run runs the specified continuous query, or all CQs if none is specified.
func (s *Service) Run(database, name string, t time.Time) error {
var dbs []meta.DatabaseInfo
if database != "" {
// Find the requested database.
db := s.MetaClient.Database(database)
if db == nil {
return influxql.ErrDatabaseNotFound(database)
}
dbs = append(dbs, *db)
} else {
// Get all databases.
dbs = s.MetaClient.Databases()
}
// Loop through databases.
s.mu.Lock()
defer s.mu.Unlock()
for _, db := range dbs {
// Loop through CQs in each DB executing the ones that match name.
for _, cq := range db.ContinuousQueries {
if name == "" || cq.Name == name {
// Remove the last run time for the CQ
id := fmt.Sprintf("%s%s%s", db.Name, idDelimiter, cq.Name)
if _, ok := s.lastRuns[id]; ok {
delete(s.lastRuns, id)
}
}
}
}
// Signal the background routine to run CQs.
s.RunCh <- &RunRequest{Now: t}
return nil
}
// backgroundLoop runs on a go routine and periodically executes CQs.
func (s *Service) backgroundLoop() {
leaseName := "continuous_querier"
t := time.NewTimer(s.RunInterval)
defer t.Stop()
defer s.wg.Done()
for {
select {
case <-s.stop:
s.Logger.Info("continuous query service terminating")
return
case req := <-s.RunCh:
if !s.hasContinuousQueries() {
continue
}
if _, err := s.MetaClient.AcquireLease(leaseName); err == nil {
s.Logger.Info(fmt.Sprintf("running continuous queries by request for time: %v", req.Now))
s.runContinuousQueries(req)
}
case <-t.C:
if !s.hasContinuousQueries() {
t.Reset(s.RunInterval)
continue
}
if _, err := s.MetaClient.AcquireLease(leaseName); err == nil {
s.runContinuousQueries(&RunRequest{Now: time.Now()})
}
t.Reset(s.RunInterval)
}
}
}
// hasContinuousQueries returns true if any CQs exist.
func (s *Service) hasContinuousQueries() bool {
// Get list of all databases.
dbs := s.MetaClient.Databases()
// Loop through all databases executing CQs.
for _, db := range dbs {
if len(db.ContinuousQueries) > 0 {
return true
}
}
return false
}
// runContinuousQueries gets CQs from the meta store and runs them.
func (s *Service) runContinuousQueries(req *RunRequest) {
// Get list of all databases.
dbs := s.MetaClient.Databases()
// Loop through all databases executing CQs.
for _, db := range dbs {
// TODO: distribute across nodes
for _, cq := range db.ContinuousQueries {
if !req.matches(&cq) {
continue
}
if ok, err := s.ExecuteContinuousQuery(&db, &cq, req.Now); err != nil {
s.Logger.Info(fmt.Sprintf("error executing query: %s: err = %s", cq.Query, err))
atomic.AddInt64(&s.stats.QueryFail, 1)
} else if ok {
atomic.AddInt64(&s.stats.QueryOK, 1)
}
}
}
}
// ExecuteContinuousQuery may execute a single CQ. This will return false if there were no errors and the CQ was not run.
func (s *Service) ExecuteContinuousQuery(dbi *meta.DatabaseInfo, cqi *meta.ContinuousQueryInfo, now time.Time) (bool, error) {
// TODO: re-enable stats
//s.stats.Inc("continuousQueryExecuted")
// Local wrapper / helper.
cq, err := NewContinuousQuery(dbi.Name, cqi)
if err != nil {
return false, err
}
// Set the time zone on the now time if the CQ has one. Otherwise, force UTC.
now = now.UTC()
if cq.q.Location != nil {
now = now.In(cq.q.Location)
}
// Get the last time this CQ was run from the service's cache.
s.mu.Lock()
defer s.mu.Unlock()
id := fmt.Sprintf("%s%s%s", dbi.Name, idDelimiter, cqi.Name)
cq.LastRun, cq.HasRun = s.lastRuns[id]
// Set the retention policy to default if it wasn't specified in the query.
if cq.intoRP() == "" {
cq.setIntoRP(dbi.DefaultRetentionPolicy)
}
// Get the group by interval.
interval, err := cq.q.GroupByInterval()
if err != nil {
return false, err
} else if interval == 0 {
return false, nil
}
// Get the group by offset.
offset, err := cq.q.GroupByOffset()
if err != nil {
return false, err
}
// See if this query needs to be run.
run, nextRun, err := cq.shouldRunContinuousQuery(now, interval)
if err != nil {
return false, err
} else if !run {
return false, nil
}
resampleEvery := interval
if cq.Resample.Every != 0 {
resampleEvery = cq.Resample.Every
}
// We're about to run the query so store the current time closest to the nearest interval.
// If all is going well, this time should be the same as nextRun.
cq.LastRun = truncate(now.Add(-offset), resampleEvery).Add(offset)
s.lastRuns[id] = cq.LastRun
// Retrieve the oldest interval we should calculate based on the next time
// interval. We do this instead of using the current time just in case any
// time intervals were missed. The start time of the oldest interval is what
// we use as the start time.
resampleFor := interval
if cq.Resample.For != 0 {
resampleFor = cq.Resample.For
} else if interval < resampleEvery {
resampleFor = resampleEvery
}
// If the resample interval is greater than the interval of the query, use the
// query interval instead.
if interval < resampleEvery {
resampleEvery = interval
}
// Calculate and set the time range for the query.
startTime := truncate(nextRun.Add(interval-resampleFor-offset-1), interval).Add(offset)
endTime := truncate(now.Add(interval-resampleEvery-offset), interval).Add(offset)
if !endTime.After(startTime) {
// Exit early since there is no time interval.
return false, nil
}
if err := cq.q.SetTimeRange(startTime, endTime); err != nil {
s.Logger.Info(fmt.Sprintf("error setting time range: %s\n", err))
return false, err
}
var start time.Time
if s.loggingEnabled {
s.Logger.Info(fmt.Sprintf("executing continuous query %s (%v to %v)", cq.Info.Name, startTime, endTime))
start = time.Now()
}
// Do the actual processing of the query & writing of results.
if err := s.runContinuousQueryAndWriteResult(cq); err != nil {
s.Logger.Info(fmt.Sprintf("error: %s. running: %s\n", err, cq.q.String()))
return false, err
}
if s.loggingEnabled {
s.Logger.Info(fmt.Sprintf("finished continuous query %s (%v to %v) in %s", cq.Info.Name, startTime, endTime, time.Since(start)))
}
return true, nil
}
// runContinuousQueryAndWriteResult will run the query against the cluster and write the results back in
func (s *Service) runContinuousQueryAndWriteResult(cq *ContinuousQuery) error {
// Wrap the CQ's inner SELECT statement in a Query for the QueryExecutor.
q := &influxql.Query{
Statements: influxql.Statements([]influxql.Statement{cq.q}),
}
closing := make(chan struct{})
defer close(closing)
// Execute the SELECT.
ch := s.QueryExecutor.ExecuteQuery(q, influxql.ExecutionOptions{
Database: cq.Database,
}, closing)
// There is only one statement, so we will only ever receive one result
res, ok := <-ch
if !ok {
panic("result channel was closed")
}
if res.Err != nil {
return res.Err
}
return nil
}
// ContinuousQuery is a local wrapper / helper around continuous queries.
type ContinuousQuery struct {
Database string
Info *meta.ContinuousQueryInfo
HasRun bool
LastRun time.Time
Resample ResampleOptions
q *influxql.SelectStatement
}
func (cq *ContinuousQuery) intoRP() string { return cq.q.Target.Measurement.RetentionPolicy }
func (cq *ContinuousQuery) setIntoRP(rp string) { cq.q.Target.Measurement.RetentionPolicy = rp }
// ResampleOptions controls the resampling intervals and duration of this continuous query.
type ResampleOptions struct {
// The query will be resampled at this time interval. The first query will be
// performed at this time interval. If this option is not given, the resample
// interval is set to the group by interval.
Every time.Duration
// The query will continue being resampled for this time duration. If this
// option is not given, the resample duration is the same as the group by
// interval. A bucket's time is calculated based on the bucket's start time,
// so a 40m resample duration with a group by interval of 10m will resample
// the bucket 4 times (using the default time interval).
For time.Duration
}
// NewContinuousQuery returns a ContinuousQuery object with a parsed influxql.CreateContinuousQueryStatement.
func NewContinuousQuery(database string, cqi *meta.ContinuousQueryInfo) (*ContinuousQuery, error) {
stmt, err := influxql.NewParser(strings.NewReader(cqi.Query)).ParseStatement()
if err != nil {
return nil, err
}
q, ok := stmt.(*influxql.CreateContinuousQueryStatement)
if !ok || q.Source.Target == nil || q.Source.Target.Measurement == nil {
return nil, errors.New("query isn't a valid continuous query")
}
cquery := &ContinuousQuery{
Database: database,
Info: cqi,
Resample: ResampleOptions{
Every: q.ResampleEvery,
For: q.ResampleFor,
},
q: q.Source,
}
return cquery, nil
}
// shouldRunContinuousQuery returns true if the CQ should be schedule to run. It will use the
// lastRunTime of the CQ and the rules for when to run set through the query to determine
// if this CQ should be run.
func (cq *ContinuousQuery) shouldRunContinuousQuery(now time.Time, interval time.Duration) (bool, time.Time, error) {
// If it's not aggregated, do not run the query.
if cq.q.IsRawQuery {
return false, cq.LastRun, errors.New("continuous queries must be aggregate queries")
}
// Override the query's default run interval with the resample options.
resampleEvery := interval
if cq.Resample.Every != 0 {
resampleEvery = cq.Resample.Every
}
// Determine if we should run the continuous query based on the last time it ran.
// If the query never ran, execute it using the current time.
if cq.HasRun {
// Retrieve the zone offset for the previous window.
_, startOffset := cq.LastRun.Add(-1).Zone()
nextRun := cq.LastRun.Add(resampleEvery)
// Retrieve the end zone offset for the end of the current interval.
if _, endOffset := nextRun.Add(-1).Zone(); startOffset != endOffset {
diff := int64(startOffset-endOffset) * int64(time.Second)
if abs(diff) < int64(resampleEvery) {
nextRun = nextRun.Add(time.Duration(diff))
}
}
if nextRun.UnixNano() <= now.UnixNano() {
return true, nextRun, nil
}
} else {
// Retrieve the location from the CQ.
loc := cq.q.Location
if loc == nil {
loc = time.UTC
}
return true, now.In(loc), nil
}
return false, cq.LastRun, nil
}
// assert will panic with a given formatted message if the given condition is false.
func assert(condition bool, msg string, v ...interface{}) {
if !condition {
panic(fmt.Sprintf("assert failed: "+msg, v...))
}
}
// truncate truncates the time based on the unix timestamp instead of the
// Go time library. The Go time library has the start of the week on Monday
// while the start of the week for the unix timestamp is a Thursday.
func truncate(ts time.Time, d time.Duration) time.Time {
t := ts.UnixNano()
offset := zone(ts)
dt := (t + offset) % int64(d)
if dt < 0 {
// Negative modulo rounds up instead of down, so offset
// with the duration.
dt += int64(d)
}
ts = time.Unix(0, t-dt).In(ts.Location())
if adjustedOffset := zone(ts); adjustedOffset != offset {
diff := offset - adjustedOffset
if abs(diff) < int64(d) {
ts = ts.Add(time.Duration(diff))
}
}
return ts
}
func zone(ts time.Time) int64 {
_, offset := ts.Zone()
return int64(offset) * int64(time.Second)
}
func abs(v int64) int64 {
if v < 0 {
return -v
}
return v
}

View File

@@ -0,0 +1,768 @@
package continuous_querier
import (
"errors"
"fmt"
"os"
"sync"
"testing"
"time"
"github.com/influxdata/influxdb/influxql"
"github.com/influxdata/influxdb/services/meta"
"github.com/uber-go/zap"
)
var (
errExpected = errors.New("expected error")
errUnexpected = errors.New("unexpected error")
)
// Test closing never opened, open, open already open, close, and close already closed.
func TestOpenAndClose(t *testing.T) {
s := NewTestService(t)
if err := s.Close(); err != nil {
t.Error(err)
} else if err = s.Open(); err != nil {
t.Error(err)
} else if err = s.Open(); err != nil {
t.Error(err)
} else if err = s.Close(); err != nil {
t.Error(err)
} else if err = s.Close(); err != nil {
t.Error(err)
}
}
// Test Run method.
func TestContinuousQueryService_Run(t *testing.T) {
s := NewTestService(t)
// Set RunInterval high so we can trigger using Run method.
s.RunInterval = 10 * time.Minute
done := make(chan struct{})
expectCallCnt := 3
callCnt := 0
// Set a callback for ExecuteStatement.
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
callCnt++
if callCnt >= expectCallCnt {
done <- struct{}{}
}
ctx.Results <- &influxql.Result{}
return nil
},
}
// Use a custom "now" time since the internals of last run care about
// what the actual time is. Truncate to 10 minutes we are starting on an interval.
now := time.Now().Truncate(10 * time.Minute)
s.Open()
// Trigger service to run all CQs.
s.Run("", "", now)
// Shouldn't time out.
if err := wait(done, 100*time.Millisecond); err != nil {
t.Error(err)
}
// This time it should timeout because ExecuteQuery should not get called again.
if err := wait(done, 100*time.Millisecond); err == nil {
t.Error("too many queries executed")
}
s.Close()
// Now test just one query.
expectCallCnt = 1
callCnt = 0
s.Open()
s.Run("db", "cq", now)
// Shouldn't time out.
if err := wait(done, 100*time.Millisecond); err != nil {
t.Error(err)
}
// This time it should timeout because ExecuteQuery should not get called again.
if err := wait(done, 100*time.Millisecond); err == nil {
t.Error("too many queries executed")
}
s.Close()
}
func TestContinuousQueryService_ResampleOptions(t *testing.T) {
s := NewTestService(t)
mc := NewMetaClient(t)
mc.CreateDatabase("db", "")
mc.CreateContinuousQuery("db", "cq", `CREATE CONTINUOUS QUERY cq ON db RESAMPLE EVERY 10s FOR 2m BEGIN SELECT mean(value) INTO cpu_mean FROM cpu GROUP BY time(1m) END`)
s.MetaClient = mc
db := s.MetaClient.Database("db")
cq, err := NewContinuousQuery(db.Name, &db.ContinuousQueries[0])
if err != nil {
t.Fatal(err)
} else if cq.Resample.Every != 10*time.Second {
t.Errorf("expected resample every to be 10s, got %s", influxql.FormatDuration(cq.Resample.Every))
} else if cq.Resample.For != 2*time.Minute {
t.Errorf("expected resample for 2m, got %s", influxql.FormatDuration(cq.Resample.For))
}
// Set RunInterval high so we can trigger using Run method.
s.RunInterval = 10 * time.Minute
done := make(chan struct{})
var expected struct {
min time.Time
max time.Time
}
// Set a callback for ExecuteStatement.
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
s := stmt.(*influxql.SelectStatement)
min, max, err := influxql.TimeRange(s.Condition, s.Location)
if err != nil {
t.Errorf("unexpected error parsing time range: %s", err)
} else if !expected.min.Equal(min) || !expected.max.Equal(max) {
t.Errorf("mismatched time range: got=(%s, %s) exp=(%s, %s)", min, max, expected.min, expected.max)
}
done <- struct{}{}
ctx.Results <- &influxql.Result{}
return nil
},
}
s.Open()
defer s.Close()
// Set the 'now' time to the start of a 10 minute interval. Then trigger a run.
// This should trigger two queries (one for the current time interval, one for the previous).
now := time.Now().UTC().Truncate(10 * time.Minute)
expected.min = now.Add(-2 * time.Minute)
expected.max = now.Add(-1)
s.RunCh <- &RunRequest{Now: now}
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(err)
}
// Trigger another run 10 seconds later. Another two queries should happen,
// but it will be a different two queries.
expected.min = expected.min.Add(time.Minute)
expected.max = expected.max.Add(time.Minute)
s.RunCh <- &RunRequest{Now: now.Add(10 * time.Second)}
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(err)
}
// Reset the time period and send the initial request at 5 seconds after the
// 10 minute mark. There should be exactly one call since the current interval is too
// young and only one interval matches the FOR duration.
expected.min = now.Add(-time.Minute)
expected.max = now.Add(-1)
s.Run("", "", now.Add(5*time.Second))
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(err)
}
// Send a message 10 minutes later and ensure that the system plays catchup.
expected.max = now.Add(10*time.Minute - 1)
s.RunCh <- &RunRequest{Now: now.Add(10 * time.Minute)}
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(err)
}
// No overflow should be sent.
if err := wait(done, 100*time.Millisecond); err == nil {
t.Error("too many queries executed")
}
}
func TestContinuousQueryService_EveryHigherThanInterval(t *testing.T) {
s := NewTestService(t)
ms := NewMetaClient(t)
ms.CreateDatabase("db", "")
ms.CreateContinuousQuery("db", "cq", `CREATE CONTINUOUS QUERY cq ON db RESAMPLE EVERY 1m BEGIN SELECT mean(value) INTO cpu_mean FROM cpu GROUP BY time(30s) END`)
s.MetaClient = ms
// Set RunInterval high so we can trigger using Run method.
s.RunInterval = 10 * time.Minute
done := make(chan struct{})
var expected struct {
min time.Time
max time.Time
}
// Set a callback for ExecuteQuery.
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
s := stmt.(*influxql.SelectStatement)
min, max, err := influxql.TimeRange(s.Condition, s.Location)
if err != nil {
t.Errorf("unexpected error parsing time range: %s", err)
} else if !expected.min.Equal(min) || !expected.max.Equal(max) {
t.Errorf("mismatched time range: got=(%s, %s) exp=(%s, %s)", min, max, expected.min, expected.max)
}
done <- struct{}{}
ctx.Results <- &influxql.Result{}
return nil
},
}
s.Open()
defer s.Close()
// Set the 'now' time to the start of a 10 minute interval. Then trigger a run.
// This should trigger two queries (one for the current time interval, one for the previous)
// since the default FOR interval should be EVERY, not the GROUP BY interval.
now := time.Now().Truncate(10 * time.Minute)
expected.min = now.Add(-time.Minute)
expected.max = now.Add(-1)
s.RunCh <- &RunRequest{Now: now}
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(err)
}
// Trigger 30 seconds later. Nothing should run.
s.RunCh <- &RunRequest{Now: now.Add(30 * time.Second)}
if err := wait(done, 100*time.Millisecond); err == nil {
t.Fatal("too many queries")
}
// Run again 1 minute later. Another two queries should run.
expected.min = now
expected.max = now.Add(time.Minute - 1)
s.RunCh <- &RunRequest{Now: now.Add(time.Minute)}
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(err)
}
// No overflow should be sent.
if err := wait(done, 100*time.Millisecond); err == nil {
t.Error("too many queries executed")
}
}
func TestContinuousQueryService_GroupByOffset(t *testing.T) {
s := NewTestService(t)
mc := NewMetaClient(t)
mc.CreateDatabase("db", "")
mc.CreateContinuousQuery("db", "cq", `CREATE CONTINUOUS QUERY cq ON db BEGIN SELECT mean(value) INTO cpu_mean FROM cpu GROUP BY time(1m, 30s) END`)
s.MetaClient = mc
// Set RunInterval high so we can trigger using Run method.
s.RunInterval = 10 * time.Minute
done := make(chan struct{})
var expected struct {
min time.Time
max time.Time
}
// Set a callback for ExecuteStatement.
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
s := stmt.(*influxql.SelectStatement)
min, max, err := influxql.TimeRange(s.Condition, s.Location)
if err != nil {
t.Errorf("unexpected error parsing time range: %s", err)
} else if !expected.min.Equal(min) || !expected.max.Equal(max) {
t.Errorf("mismatched time range: got=(%s, %s) exp=(%s, %s)", min, max, expected.min, expected.max)
}
done <- struct{}{}
ctx.Results <- &influxql.Result{}
return nil
},
}
s.Open()
defer s.Close()
// Set the 'now' time to the start of a 10 minute interval with a 30 second offset.
// Then trigger a run. This should trigger two queries (one for the current time
// interval, one for the previous).
now := time.Now().UTC().Truncate(10 * time.Minute).Add(30 * time.Second)
expected.min = now.Add(-time.Minute)
expected.max = now.Add(-1)
s.RunCh <- &RunRequest{Now: now}
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(err)
}
}
// Test service when not the cluster leader (CQs shouldn't run).
func TestContinuousQueryService_NotLeader(t *testing.T) {
s := NewTestService(t)
// Set RunInterval high so we can test triggering with the RunCh below.
s.RunInterval = 10 * time.Second
s.MetaClient.(*MetaClient).Leader = false
done := make(chan struct{})
// Set a callback for ExecuteStatement. Shouldn't get called because we're not the leader.
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
done <- struct{}{}
ctx.Results <- &influxql.Result{Err: errUnexpected}
return nil
},
}
s.Open()
// Trigger service to run CQs.
s.RunCh <- &RunRequest{Now: time.Now()}
// Expect timeout error because ExecuteQuery callback wasn't called.
if err := wait(done, 100*time.Millisecond); err == nil {
t.Error(err)
}
s.Close()
}
// Test ExecuteContinuousQuery with invalid queries.
func TestExecuteContinuousQuery_InvalidQueries(t *testing.T) {
s := NewTestService(t)
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
return errUnexpected
},
}
dbis := s.MetaClient.Databases()
dbi := dbis[0]
cqi := dbi.ContinuousQueries[0]
cqi.Query = `this is not a query`
if _, err := s.ExecuteContinuousQuery(&dbi, &cqi, time.Now()); err == nil {
t.Error("expected error but got nil")
}
// Valid query but invalid continuous query.
cqi.Query = `SELECT * FROM cpu`
if _, err := s.ExecuteContinuousQuery(&dbi, &cqi, time.Now()); err == nil {
t.Error("expected error but got nil")
}
// Group by requires aggregate.
cqi.Query = `SELECT value INTO other_value FROM cpu WHERE time > now() - 1h GROUP BY time(1s)`
if _, err := s.ExecuteContinuousQuery(&dbi, &cqi, time.Now()); err == nil {
t.Error("expected error but got nil")
}
}
// Test the time range for different CQ durations.
func TestExecuteContinuousQuery_TimeRange(t *testing.T) {
// Choose a start date that is not on an interval border for anyone.
now := mustParseTime(t, "2000-01-01T00:00:00Z")
for _, tt := range []struct {
d string
start, end time.Time
}{
{
d: "10s",
start: mustParseTime(t, "2000-01-01T00:00:00Z"),
end: mustParseTime(t, "2000-01-01T00:00:10Z"),
},
{
d: "1m",
start: mustParseTime(t, "2000-01-01T00:00:00Z"),
end: mustParseTime(t, "2000-01-01T00:01:00Z"),
},
{
d: "10m",
start: mustParseTime(t, "2000-01-01T00:00:00Z"),
end: mustParseTime(t, "2000-01-01T00:10:00Z"),
},
{
d: "30m",
start: mustParseTime(t, "2000-01-01T00:00:00Z"),
end: mustParseTime(t, "2000-01-01T00:30:00Z"),
},
{
d: "1h",
start: mustParseTime(t, "2000-01-01T00:00:00Z"),
end: mustParseTime(t, "2000-01-01T01:00:00Z"),
},
{
d: "2h",
start: mustParseTime(t, "2000-01-01T00:00:00Z"),
end: mustParseTime(t, "2000-01-01T02:00:00Z"),
},
{
d: "12h",
start: mustParseTime(t, "2000-01-01T00:00:00Z"),
end: mustParseTime(t, "2000-01-01T12:00:00Z"),
},
{
d: "1d",
start: mustParseTime(t, "2000-01-01T00:00:00Z"),
end: mustParseTime(t, "2000-01-02T00:00:00Z"),
},
{
d: "1w",
start: mustParseTime(t, "1999-12-30T00:00:00Z"),
end: mustParseTime(t, "2000-01-06T00:00:00Z"),
},
} {
t.Run(tt.d, func(t *testing.T) {
d, err := influxql.ParseDuration(tt.d)
if err != nil {
t.Fatalf("unable to parse duration: %s", err)
}
s := NewTestService(t)
mc := NewMetaClient(t)
mc.CreateDatabase("db", "")
mc.CreateContinuousQuery("db", "cq",
fmt.Sprintf(`CREATE CONTINUOUS QUERY cq ON db BEGIN SELECT mean(value) INTO cpu_mean FROM cpu GROUP BY time(%s) END`, tt.d))
s.MetaClient = mc
// Set RunInterval high so we can trigger using Run method.
s.RunInterval = 10 * time.Minute
done := make(chan struct{})
// Set a callback for ExecuteStatement.
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
s := stmt.(*influxql.SelectStatement)
min, max, err := influxql.TimeRange(s.Condition, s.Location)
max = max.Add(time.Nanosecond)
if err != nil {
t.Errorf("unexpected error parsing time range: %s", err)
} else if !tt.start.Equal(min) || !tt.end.Equal(max) {
t.Errorf("mismatched time range: got=(%s, %s) exp=(%s, %s)", min, max, tt.start, tt.end)
}
done <- struct{}{}
ctx.Results <- &influxql.Result{}
return nil
},
}
s.Open()
defer s.Close()
// Send an initial run request one nanosecond after the start to
// prime the last CQ map.
s.RunCh <- &RunRequest{Now: now.Add(time.Nanosecond)}
// Execute the real request after the time interval.
s.RunCh <- &RunRequest{Now: now.Add(d)}
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(err)
}
})
}
}
// Test the time range for different CQ durations.
func TestExecuteContinuousQuery_TimeZone(t *testing.T) {
type test struct {
now time.Time
start, end time.Time
}
// Choose a start date that is not on an interval border for anyone.
for _, tt := range []struct {
name string
d string
options string
initial time.Time
tests []test
}{
{
name: "DaylightSavingsStart/1d",
d: "1d",
initial: mustParseTime(t, "2000-04-02T00:00:00-05:00"),
tests: []test{
{
start: mustParseTime(t, "2000-04-02T00:00:00-05:00"),
end: mustParseTime(t, "2000-04-03T00:00:00-04:00"),
},
},
},
{
name: "DaylightSavingsStart/2h",
d: "2h",
initial: mustParseTime(t, "2000-04-02T00:00:00-05:00"),
tests: []test{
{
start: mustParseTime(t, "2000-04-02T00:00:00-05:00"),
end: mustParseTime(t, "2000-04-02T03:00:00-04:00"),
},
{
start: mustParseTime(t, "2000-04-02T03:00:00-04:00"),
end: mustParseTime(t, "2000-04-02T04:00:00-04:00"),
},
},
},
{
name: "DaylightSavingsEnd/1d",
d: "1d",
initial: mustParseTime(t, "2000-10-29T00:00:00-04:00"),
tests: []test{
{
start: mustParseTime(t, "2000-10-29T00:00:00-04:00"),
end: mustParseTime(t, "2000-10-30T00:00:00-05:00"),
},
},
},
{
name: "DaylightSavingsEnd/2h",
d: "2h",
initial: mustParseTime(t, "2000-10-29T00:00:00-04:00"),
tests: []test{
{
start: mustParseTime(t, "2000-10-29T00:00:00-04:00"),
end: mustParseTime(t, "2000-10-29T02:00:00-05:00"),
},
{
start: mustParseTime(t, "2000-10-29T02:00:00-05:00"),
end: mustParseTime(t, "2000-10-29T04:00:00-05:00"),
},
},
},
} {
t.Run(tt.name, func(t *testing.T) {
s := NewTestService(t)
mc := NewMetaClient(t)
mc.CreateDatabase("db", "")
mc.CreateContinuousQuery("db", "cq",
fmt.Sprintf(`CREATE CONTINUOUS QUERY cq ON db %s BEGIN SELECT mean(value) INTO cpu_mean FROM cpu GROUP BY time(%s) TZ('America/New_York') END`, tt.options, tt.d))
s.MetaClient = mc
// Set RunInterval high so we can trigger using Run method.
s.RunInterval = 10 * time.Minute
done := make(chan struct{})
// Set a callback for ExecuteStatement.
tests := make(chan test, 1)
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
test := <-tests
s := stmt.(*influxql.SelectStatement)
min, max, err := influxql.TimeRange(s.Condition, s.Location)
max = max.Add(time.Nanosecond)
if err != nil {
t.Errorf("unexpected error parsing time range: %s", err)
} else if !test.start.Equal(min) || !test.end.Equal(max) {
t.Errorf("mismatched time range: got=(%s, %s) exp=(%s, %s)", min, max, test.start, test.end)
}
done <- struct{}{}
ctx.Results <- &influxql.Result{}
return nil
},
}
s.Open()
defer s.Close()
// Send an initial run request one nanosecond after the start to
// prime the last CQ map.
s.RunCh <- &RunRequest{Now: tt.initial.Add(time.Nanosecond)}
// Execute each of the tests and ensure the times are correct.
for i, test := range tt.tests {
tests <- test
now := test.now
if now.IsZero() {
now = test.end
}
s.RunCh <- &RunRequest{Now: now}
if err := wait(done, 100*time.Millisecond); err != nil {
t.Fatal(fmt.Errorf("%d. %s", i+1, err))
}
}
})
}
}
// Test ExecuteContinuousQuery when QueryExecutor returns an error.
func TestExecuteContinuousQuery_QueryExecutor_Error(t *testing.T) {
s := NewTestService(t)
s.QueryExecutor.StatementExecutor = &StatementExecutor{
ExecuteStatementFn: func(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
return errExpected
},
}
dbis := s.MetaClient.Databases()
dbi := dbis[0]
cqi := dbi.ContinuousQueries[0]
now := time.Now().Truncate(10 * time.Minute)
if _, err := s.ExecuteContinuousQuery(&dbi, &cqi, now); err != errExpected {
t.Errorf("exp = %s, got = %v", errExpected, err)
}
}
// NewTestService returns a new *Service with default mock object members.
func NewTestService(t *testing.T) *Service {
s := NewService(NewConfig())
ms := NewMetaClient(t)
s.MetaClient = ms
s.QueryExecutor = influxql.NewQueryExecutor()
s.RunInterval = time.Millisecond
// Set Logger to write to dev/null so stdout isn't polluted.
if testing.Verbose() {
s.WithLogger(zap.New(
zap.NewTextEncoder(),
zap.Output(os.Stderr),
))
}
// Add a couple test databases and CQs.
ms.CreateDatabase("db", "rp")
ms.CreateContinuousQuery("db", "cq", `CREATE CONTINUOUS QUERY cq ON db BEGIN SELECT count(cpu) INTO cpu_count FROM cpu WHERE time > now() - 1h GROUP BY time(1s) END`)
ms.CreateDatabase("db2", "default")
ms.CreateContinuousQuery("db2", "cq2", `CREATE CONTINUOUS QUERY cq2 ON db2 BEGIN SELECT mean(value) INTO cpu_mean FROM cpu WHERE time > now() - 10m GROUP BY time(1m) END`)
ms.CreateDatabase("db3", "default")
ms.CreateContinuousQuery("db3", "cq3", `CREATE CONTINUOUS QUERY cq3 ON db3 BEGIN SELECT mean(value) INTO "1hAverages".:MEASUREMENT FROM /cpu[0-9]?/ GROUP BY time(10s) END`)
return s
}
// MetaClient is a mock meta store.
type MetaClient struct {
mu sync.RWMutex
Leader bool
AllowLease bool
DatabaseInfos []meta.DatabaseInfo
Err error
t *testing.T
nodeID uint64
}
// NewMetaClient returns a *MetaClient.
func NewMetaClient(t *testing.T) *MetaClient {
return &MetaClient{
Leader: true,
AllowLease: true,
t: t,
nodeID: 1,
}
}
// NodeID returns the client's node ID.
func (ms *MetaClient) NodeID() uint64 { return ms.nodeID }
// AcquireLease attempts to acquire the specified lease.
func (ms *MetaClient) AcquireLease(name string) (l *meta.Lease, err error) {
if ms.Leader {
if ms.AllowLease {
return &meta.Lease{Name: name}, nil
}
return nil, errors.New("another node owns the lease")
}
return nil, meta.ErrServiceUnavailable
}
// Databases returns a list of database info about each database in the coordinator.
func (ms *MetaClient) Databases() []meta.DatabaseInfo {
ms.mu.RLock()
defer ms.mu.RUnlock()
return ms.DatabaseInfos
}
// Database returns a single database by name.
func (ms *MetaClient) Database(name string) *meta.DatabaseInfo {
ms.mu.RLock()
defer ms.mu.RUnlock()
return ms.database(name)
}
func (ms *MetaClient) database(name string) *meta.DatabaseInfo {
if ms.Err != nil {
return nil
}
for i := range ms.DatabaseInfos {
if ms.DatabaseInfos[i].Name == name {
return &ms.DatabaseInfos[i]
}
}
return nil
}
// CreateDatabase adds a new database to the meta store.
func (ms *MetaClient) CreateDatabase(name, defaultRetentionPolicy string) error {
ms.mu.Lock()
defer ms.mu.Unlock()
if ms.Err != nil {
return ms.Err
}
// See if the database already exists.
for _, dbi := range ms.DatabaseInfos {
if dbi.Name == name {
return fmt.Errorf("database already exists: %s", name)
}
}
// Create database.
ms.DatabaseInfos = append(ms.DatabaseInfos, meta.DatabaseInfo{
Name: name,
DefaultRetentionPolicy: defaultRetentionPolicy,
})
return nil
}
// CreateContinuousQuery adds a CQ to the meta store.
func (ms *MetaClient) CreateContinuousQuery(database, name, query string) error {
ms.mu.Lock()
defer ms.mu.Unlock()
if ms.Err != nil {
return ms.Err
}
dbi := ms.database(database)
if dbi == nil {
return fmt.Errorf("database not found: %s", database)
}
// See if CQ already exists.
for _, cqi := range dbi.ContinuousQueries {
if cqi.Name == name {
return fmt.Errorf("continuous query already exists: %s", name)
}
}
// Create a new CQ and store it.
dbi.ContinuousQueries = append(dbi.ContinuousQueries, meta.ContinuousQueryInfo{
Name: name,
Query: query,
})
return nil
}
// StatementExecutor is a mock statement executor.
type StatementExecutor struct {
ExecuteStatementFn func(stmt influxql.Statement, ctx influxql.ExecutionContext) error
}
func (e *StatementExecutor) ExecuteStatement(stmt influxql.Statement, ctx influxql.ExecutionContext) error {
return e.ExecuteStatementFn(stmt, ctx)
}
func wait(c chan struct{}, d time.Duration) (err error) {
select {
case <-c:
case <-time.After(d):
err = errors.New("timed out")
}
return
}
func mustParseTime(t *testing.T, value string) time.Time {
ts, err := time.Parse(time.RFC3339, value)
if err != nil {
t.Fatalf("unable to parse time: %s", err)
}
return ts
}