Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 18 additions & 41 deletions sei-db/db_engine/litt/littbuilder/build_utils.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
package littbuilder

import (
"context"
"fmt"
"log/slog"
"net/http"
"os"
"path"
"strings"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promhttp"
commonmetrics "github.com/sei-protocol/sei-chain/sei-db/common/metrics"
"github.com/sei-protocol/sei-chain/sei-db/db_engine/litt"
"github.com/sei-protocol/sei-chain/sei-db/db_engine/litt/dbcache"
"github.com/sei-protocol/sei-chain/sei-db/db_engine/litt/disktable"
Expand Down Expand Up @@ -243,45 +239,26 @@ func buildLogger(config *litt.Config) *slog.Logger {
return slog.Default()
}

// buildMetrics creates a new metrics object based on the configuration. If the returned server is not nil,
// then it is the responsibility of the caller to eventually call server.Shutdown().
func buildMetrics(config *litt.Config, logger *slog.Logger) (*metrics.LittDBMetrics, *http.Server) {
// buildMetrics creates a new metrics object backed by the global OTel
// MeterProvider. When MetricsEnabled is true, this configures the global
// provider with a Prometheus exporter and starts an HTTP server on
// MetricsPort that serves /metrics. The returned shutdown function flushes
// the provider; it is the responsibility of the caller to invoke it during
// teardown.
func buildMetrics(config *litt.Config, logger *slog.Logger) (*metrics.LittDBMetrics, func(context.Context) error) {
if !config.MetricsEnabled {
return nil, nil
}

var registry *prometheus.Registry
var server *http.Server

if config.MetricsEnabled {
if config.MetricsRegistry == nil {
registry = prometheus.NewRegistry()
registry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
registry.MustRegister(collectors.NewGoCollector())

logger.Info("Starting metrics server", "port", config.MetricsPort)
addr := fmt.Sprintf(":%d", config.MetricsPort)
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.HandlerFor(
registry,
promhttp.HandlerOpts{},
))
server = &http.Server{
Addr: addr,
Handler: mux,
ReadHeaderTimeout: 10 * time.Second,
}

go func() {
err := server.ListenAndServe()
if err != nil && !strings.Contains(err.Error(), "http: Server closed") {
logger.Error("metrics server error", "error", err)
}
}()
} else {
registry = config.MetricsRegistry
}
reg, shutdown, err := commonmetrics.SetupOtelPrometheus()
if err != nil {
logger.Error("failed to set up OTel Prometheus exporter", "error", err)
return nil, nil
}

return metrics.NewLittDBMetrics(registry, config.MetricsNamespace), server
addr := fmt.Sprintf(":%d", config.MetricsPort)
logger.Info("Starting metrics server", "port", config.MetricsPort)
commonmetrics.StartMetricsServer(config.CTX, reg, addr)

return metrics.NewLittDBMetrics(), shutdown
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Metrics HTTP server leaks after DB close

Medium Severity

The metrics HTTP server started by commonmetrics.StartMetricsServer is never shut down when db.Close() is called. The old code deferred d.metricsServer.Close() inside gatherMetrics, but the new code only defers d.metricsShutdown (the OTel provider.Shutdown), which flushes the meter provider without stopping the HTTP listener. The server only terminates when config.CTX is cancelled, and the default context is context.Background(), which is never cancelled. This leaks a goroutine and a bound TCP port after every Close() call.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit be9328d. Configure here.

}
37 changes: 19 additions & 18 deletions sei-db/db_engine/litt/littbuilder/db_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"context"
"fmt"
"log/slog"
"net/http"
"sync"
"sync/atomic"
"time"
Expand Down Expand Up @@ -53,8 +52,8 @@ type db struct {
// Metrics for the database.
metrics *metrics.LittDBMetrics

// The HTTP server for metrics. nil if metrics are disabled or if an external party is managing the server.
metricsServer *http.Server
// Shuts down the OTel MeterProvider configured by buildMetrics. nil if metrics are disabled.
metricsShutdown func(context.Context) error

// A function that releases file locks.
releaseLocks func()
Expand Down Expand Up @@ -125,9 +124,9 @@ func NewDBUnsafe(config *litt.Config, tableBuilder TableBuilderFunc) (litt.DB, e
}

var dbMetrics *metrics.LittDBMetrics
var metricsServer *http.Server
var metricsShutdown func(context.Context) error
if config.MetricsEnabled {
dbMetrics, metricsServer = buildMetrics(config, config.Logger)
dbMetrics, metricsShutdown = buildMetrics(config, config.Logger)
}

if config.SnapshotDirectory != "" {
Expand All @@ -136,16 +135,16 @@ func NewDBUnsafe(config *litt.Config, tableBuilder TableBuilderFunc) (litt.DB, e
}

database := &db{
ctx: config.CTX,
logger: config.Logger,
clock: config.Clock,
ttl: config.TTL,
gcPeriod: config.GCPeriod,
tableBuilder: tableBuilder,
tables: make(map[string]litt.ManagedTable),
metrics: dbMetrics,
metricsServer: metricsServer,
releaseLocks: releaseLocks,
ctx: config.CTX,
logger: config.Logger,
clock: config.Clock,
ttl: config.TTL,
gcPeriod: config.GCPeriod,
tableBuilder: tableBuilder,
tables: make(map[string]litt.ManagedTable),
metrics: dbMetrics,
metricsShutdown: metricsShutdown,
releaseLocks: releaseLocks,
}

if config.MetricsEnabled {
Expand Down Expand Up @@ -281,11 +280,13 @@ func (d *db) Destroy() error {

// gatherMetrics is a method that periodically collects metrics.
func (d *db) gatherMetrics(interval time.Duration) {
if d.metricsServer != nil {
if d.metricsShutdown != nil {
defer func() {
err := d.metricsServer.Close()
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := d.metricsShutdown(shutdownCtx)
if err != nil {
d.logger.Error("error closing metrics server", "error", err)
d.logger.Error("error shutting down metrics provider", "error", err)
}
}()
}
Expand Down
17 changes: 5 additions & 12 deletions sei-db/db_engine/litt/littdb_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"math"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/sei-protocol/sei-chain/sei-db/common/unit"
"github.com/sei-protocol/sei-chain/sei-db/db_engine/litt/disktable/keymap"
"github.com/sei-protocol/sei-chain/sei-db/db_engine/litt/util"
Expand Down Expand Up @@ -117,17 +116,12 @@ type Config struct {
// than keymap.MemKeymapType, performing this check may be very expensive. By default, this is false.
DoubleWriteProtection bool

// If enabled, collect DB metrics and export them to prometheus. By default, this is false.
// If enabled, collect DB metrics and export them via the global OTel MeterProvider. By default, this is false.
// When enabled, the database configures a Prometheus exporter on the global provider and serves /metrics on
// MetricsPort.
MetricsEnabled bool

// The namespace to use for metrics. If empty, the default namespace "litt" is used.
MetricsNamespace string

// The prometheus registry to use for metrics. If nil and metrics are enabled, a new registry is created.
MetricsRegistry *prometheus.Registry

// The port to use for the metrics server. Ignored if MetricsEnabled is false or MetricsRegistry is not nil.
// The default is 9101.
// The port to use for the metrics server. Ignored if MetricsEnabled is false. The default is 9101.
MetricsPort int

// The interval at which various DB metrics are updated. The default is 1 second.
Expand Down Expand Up @@ -194,7 +188,6 @@ func DefaultConfigNoPaths() *Config {
Fsync: true,
DoubleWriteProtection: false,
MetricsEnabled: false,
MetricsNamespace: "litt",
MetricsPort: 9101,
MetricsUpdateInterval: time.Second,
PurgeLocks: false,
Expand Down Expand Up @@ -258,7 +251,7 @@ func (c *Config) SanityCheck() error {
if c.GCPeriod == 0 {
return fmt.Errorf("gc period must be at least 1")
}
if (c.MetricsEnabled || c.MetricsRegistry != nil) && c.MetricsUpdateInterval == 0 {
if c.MetricsEnabled && c.MetricsUpdateInterval == 0 {
return fmt.Errorf("metrics update interval must be at least 1 if metrics are enabled")
}

Expand Down
Loading
Loading