1
0
mirror of https://github.com/prometheus/prometheus synced 2025-04-01 22:59:03 +00:00

Merge pull request from prometheus/fabxc-vendor

Update vendoring
This commit is contained in:
Fabian Reinartz 2016-07-04 13:21:50 +02:00 committed by GitHub
commit 3c1e15087d
102 changed files with 2923 additions and 1581 deletions
vendor
github.com
golang.org/x

20
vendor/github.com/beorn7/perks/LICENSE generated vendored Normal file
View File

@ -0,0 +1,20 @@
Copyright (C) 2013 Blake Mizerany
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -15,7 +15,6 @@ package prometheus
import (
"errors"
"hash/fnv"
)
// Counter is a Metric that represents a single numerical value that only ever
@ -97,7 +96,6 @@ func NewCounterVec(opts CounterOpts, labelNames []string) *CounterVec {
MetricVec: MetricVec{
children: map[uint64]Metric{},
desc: desc,
hash: fnv.New64a(),
newMetric: func(lvs ...string) Metric {
result := &counter{value: value{
desc: desc,

View File

@ -1,10 +1,8 @@
package prometheus
import (
"bytes"
"errors"
"fmt"
"hash/fnv"
"regexp"
"sort"
"strings"
@ -131,31 +129,24 @@ func NewDesc(fqName, help string, variableLabels []string, constLabels Labels) *
d.err = errors.New("duplicate label names")
return d
}
h := fnv.New64a()
var b bytes.Buffer // To copy string contents into, avoiding []byte allocations.
vh := hashNew()
for _, val := range labelValues {
b.Reset()
b.WriteString(val)
b.WriteByte(separatorByte)
h.Write(b.Bytes())
vh = hashAdd(vh, val)
vh = hashAddByte(vh, separatorByte)
}
d.id = h.Sum64()
d.id = vh
// Sort labelNames so that order doesn't matter for the hash.
sort.Strings(labelNames)
// Now hash together (in this order) the help string and the sorted
// label names.
h.Reset()
b.Reset()
b.WriteString(help)
b.WriteByte(separatorByte)
h.Write(b.Bytes())
lh := hashNew()
lh = hashAdd(lh, help)
lh = hashAddByte(lh, separatorByte)
for _, labelName := range labelNames {
b.Reset()
b.WriteString(labelName)
b.WriteByte(separatorByte)
h.Write(b.Bytes())
lh = hashAdd(lh, labelName)
lh = hashAddByte(lh, separatorByte)
}
d.dimHash = h.Sum64()
d.dimHash = lh
d.constLabelPairs = make([]*dto.LabelPair, 0, len(constLabels))
for n, v := range constLabels {

View File

@ -61,7 +61,9 @@
// It also exports some stats about the HTTP usage of the /metrics
// endpoint. (See the Handler function for more detail.)
//
// Two more advanced metric types are the Summary and Histogram.
// Two more advanced metric types are the Summary and Histogram. A more
// thorough description of metric types can be found in the prometheus docs:
// https://prometheus.io/docs/concepts/metric_types/
//
// In addition to the fundamental metric types Gauge, Counter, Summary, and
// Histogram, a very important part of the Prometheus data model is the

View File

@ -0,0 +1,29 @@
package prometheus
// Inline and byte-free variant of hash/fnv's fnv64a.
const (
offset64 = 14695981039346656037
prime64 = 1099511628211
)
// hashNew initializies a new fnv64a hash value.
func hashNew() uint64 {
return offset64
}
// hashAdd adds a string to a fnv64a hash value, returning the updated hash.
func hashAdd(h uint64, s string) uint64 {
for i := 0; i < len(s); i++ {
h ^= uint64(s[i])
h *= prime64
}
return h
}
// hashAddByte adds a byte to a fnv64a hash value, returning the updated hash.
func hashAddByte(h uint64, b byte) uint64 {
h ^= uint64(b)
h *= prime64
return h
}

View File

@ -13,8 +13,6 @@
package prometheus
import "hash/fnv"
// Gauge is a Metric that represents a single numerical value that can
// arbitrarily go up and down.
//
@ -77,7 +75,6 @@ func NewGaugeVec(opts GaugeOpts, labelNames []string) *GaugeVec {
MetricVec: MetricVec{
children: map[uint64]Metric{},
desc: desc,
hash: fnv.New64a(),
newMetric: func(lvs ...string) Metric {
return newValue(desc, GaugeValue, 0, lvs...)
},

View File

@ -211,7 +211,7 @@ func NewGoCollector() *goCollector {
"Number of seconds since 1970 of last garbage collection.",
nil, nil,
),
eval: func(ms *runtime.MemStats) float64 { return float64(ms.LastGC*10 ^ 9) },
eval: func(ms *runtime.MemStats) float64 { return float64(ms.LastGC) / 1e9 },
valType: GaugeValue,
},
},

View File

@ -15,7 +15,6 @@ package prometheus
import (
"fmt"
"hash/fnv"
"math"
"sort"
"sync/atomic"
@ -305,7 +304,6 @@ func NewHistogramVec(opts HistogramOpts, labelNames []string) *HistogramVec {
MetricVec: MetricVec{
children: map[uint64]Metric{},
desc: desc,
hash: fnv.New64a(),
newMetric: func(lvs ...string) Metric {
return newHistogram(desc, opts, lvs...)
},

View File

@ -57,12 +57,31 @@ func nowSeries(t ...time.Time) nower {
// has a constant label named "handler" with the provided handlerName as
// value. http_requests_total is a metric vector partitioned by HTTP method
// (label name "method") and HTTP status code (label name "code").
//
// Note that InstrumentHandler has several issues:
//
// - It uses Summaries rather than Histograms. Summaries are not useful if
// aggregation across multiple instances is required.
//
// - It uses microseconds as unit, which is deprecated and should be replaced by
// seconds.
//
// - The size of the request is calculated in a separate goroutine. Since this
// calculator requires access to the request header, it creates a race with
// any writes to the header performed during request handling.
// httputil.ReverseProxy is a prominent example for a handler
// performing such writes.
//
// Upcoming versions of this package will provide ways of instrumenting HTTP
// handlers that are more flexible and have fewer issues. Consider this function
// DEPRECATED and prefer direct instrumentation in the meantime.
func InstrumentHandler(handlerName string, handler http.Handler) http.HandlerFunc {
return InstrumentHandlerFunc(handlerName, handler.ServeHTTP)
}
// InstrumentHandlerFunc wraps the given function for instrumentation. It
// otherwise works in the same way as InstrumentHandler.
// otherwise works in the same way as InstrumentHandler (and shares the same
// issues).
func InstrumentHandlerFunc(handlerName string, handlerFunc func(http.ResponseWriter, *http.Request)) http.HandlerFunc {
return InstrumentHandlerFuncWithOpts(
SummaryOpts{
@ -73,13 +92,13 @@ func InstrumentHandlerFunc(handlerName string, handlerFunc func(http.ResponseWri
)
}
// InstrumentHandlerWithOpts works like InstrumentHandler but provides more
// flexibility (at the cost of a more complex call syntax). As
// InstrumentHandler, this function registers four metric collectors, but it
// uses the provided SummaryOpts to create them. However, the fields "Name" and
// "Help" in the SummaryOpts are ignored. "Name" is replaced by
// "requests_total", "request_duration_microseconds", "request_size_bytes", and
// "response_size_bytes", respectively. "Help" is replaced by an appropriate
// InstrumentHandlerWithOpts works like InstrumentHandler (and shares the same
// issues) but provides more flexibility (at the cost of a more complex call
// syntax). As InstrumentHandler, this function registers four metric
// collectors, but it uses the provided SummaryOpts to create them. However, the
// fields "Name" and "Help" in the SummaryOpts are ignored. "Name" is replaced
// by "requests_total", "request_duration_microseconds", "request_size_bytes",
// and "response_size_bytes", respectively. "Help" is replaced by an appropriate
// help string. The names of the variable labels of the http_requests_total
// CounterVec are "method" (get, post, etc.), and "code" (HTTP status code).
//
@ -102,9 +121,10 @@ func InstrumentHandlerWithOpts(opts SummaryOpts, handler http.Handler) http.Hand
return InstrumentHandlerFuncWithOpts(opts, handler.ServeHTTP)
}
// InstrumentHandlerFuncWithOpts works like InstrumentHandlerFunc but provides
// more flexibility (at the cost of a more complex call syntax). See
// InstrumentHandlerWithOpts for details how the provided SummaryOpts are used.
// InstrumentHandlerFuncWithOpts works like InstrumentHandlerFunc (and shares
// the same issues) but provides more flexibility (at the cost of a more complex
// call syntax). See InstrumentHandlerWithOpts for details how the provided
// SummaryOpts are used.
func InstrumentHandlerFuncWithOpts(opts SummaryOpts, handlerFunc func(http.ResponseWriter, *http.Request)) http.HandlerFunc {
reqCnt := NewCounterVec(
CounterOpts{

View File

@ -20,7 +20,7 @@
package prometheus
// Push triggers a metric collection by the default registry and pushes all
// collected metrics to the Pushgateway specified by addr. See the Pushgateway
// collected metrics to the Pushgateway specified by url. See the Pushgateway
// documentation for detailed implications of the job and instance
// parameter. instance can be left empty. You can use just host:port or ip:port
// as url, in which case 'http://' is added automatically. You can also include

View File

@ -24,7 +24,6 @@ import (
"compress/gzip"
"errors"
"fmt"
"hash/fnv"
"io"
"net/http"
"net/url"
@ -85,6 +84,9 @@ const (
// Handler returns the HTTP handler for the global Prometheus registry. It is
// already instrumented with InstrumentHandler (using "prometheus" as handler
// name). Usually the handler is used to handle the "/metrics" endpoint.
//
// Please note the issues described in the doc comment of InstrumentHandler. You
// might want to consider using UninstrumentedHandler instead.
func Handler() http.Handler {
return InstrumentHandler("prometheus", defRegistry)
}
@ -337,6 +339,9 @@ func (r *registry) Push(job, instance, pushURL, method string) error {
if !strings.Contains(pushURL, "://") {
pushURL = "http://" + pushURL
}
if strings.HasSuffix(pushURL, "/") {
pushURL = pushURL[:len(pushURL)-1]
}
pushURL = fmt.Sprintf("%s/metrics/jobs/%s", pushURL, url.QueryEscape(job))
if instance != "" {
pushURL += "/instances/" + url.QueryEscape(instance)
@ -528,30 +533,25 @@ func (r *registry) checkConsistency(metricFamily *dto.MetricFamily, dtoMetric *d
}
// Is the metric unique (i.e. no other metric with the same name and the same label values)?
h := fnv.New64a()
var buf bytes.Buffer
buf.WriteString(metricFamily.GetName())
buf.WriteByte(separatorByte)
h.Write(buf.Bytes())
h := hashNew()
h = hashAdd(h, metricFamily.GetName())
h = hashAddByte(h, separatorByte)
// Make sure label pairs are sorted. We depend on it for the consistency
// check. Label pairs must be sorted by contract. But the point of this
// method is to check for contract violations. So we better do the sort
// now.
sort.Sort(LabelPairSorter(dtoMetric.Label))
for _, lp := range dtoMetric.Label {
buf.Reset()
buf.WriteString(lp.GetValue())
buf.WriteByte(separatorByte)
h.Write(buf.Bytes())
h = hashAdd(h, lp.GetValue())
h = hashAddByte(h, separatorByte)
}
metricHash := h.Sum64()
if _, exists := metricHashes[metricHash]; exists {
if _, exists := metricHashes[h]; exists {
return fmt.Errorf(
"collected metric %s %s was collected before with the same name and label values",
metricFamily.GetName(), dtoMetric,
)
}
metricHashes[metricHash] = struct{}{}
metricHashes[h] = struct{}{}
if desc == nil {
return nil // Nothing left to check if we have no desc.
@ -722,5 +722,18 @@ func (s metricSorter) Less(i, j int) bool {
return vi < vj
}
}
return true
// We should never arrive here. Multiple metrics with the same
// label set in the same scrape will lead to undefined ingestion
// behavior. However, as above, we have to provide stable sorting
// here, even for inconsistent metrics. So sort equal metrics
// by their timestamp, with missing timestamps (implying "now")
// coming last.
if s[i].TimestampMs == nil {
return false
}
if s[j].TimestampMs == nil {
return true
}
return s[i].GetTimestampMs() < s[j].GetTimestampMs()
}

View File

@ -15,7 +15,6 @@ package prometheus
import (
"fmt"
"hash/fnv"
"math"
"sort"
"sync"
@ -408,7 +407,6 @@ func NewSummaryVec(opts SummaryOpts, labelNames []string) *SummaryVec {
MetricVec: MetricVec{
children: map[uint64]Metric{},
desc: desc,
hash: fnv.New64a(),
newMetric: func(lvs ...string) Metric {
return newSummary(desc, opts, lvs...)
},

View File

@ -13,8 +13,6 @@
package prometheus
import "hash/fnv"
// Untyped is a Metric that represents a single numerical value that can
// arbitrarily go up and down.
//
@ -75,7 +73,6 @@ func NewUntypedVec(opts UntypedOpts, labelNames []string) *UntypedVec {
MetricVec: MetricVec{
children: map[uint64]Metric{},
desc: desc,
hash: fnv.New64a(),
newMetric: func(lvs ...string) Metric {
return newValue(desc, UntypedValue, 0, lvs...)
},

View File

@ -14,9 +14,7 @@
package prometheus
import (
"bytes"
"fmt"
"hash"
"sync"
)
@ -26,16 +24,10 @@ import (
// type. GaugeVec, CounterVec, SummaryVec, and UntypedVec are examples already
// provided in this package.
type MetricVec struct {
mtx sync.RWMutex // Protects not only children, but also hash and buf.
mtx sync.RWMutex // Protects the children.
children map[uint64]Metric
desc *Desc
// hash is our own hash instance to avoid repeated allocations.
hash hash.Hash64
// buf is used to copy string contents into it for hashing,
// again to avoid allocations.
buf bytes.Buffer
newMetric func(labelValues ...string) Metric
}
@ -80,13 +72,20 @@ func (m *MetricVec) Collect(ch chan<- Metric) {
// with a performance overhead (for creating and processing the Labels map).
// See also the GaugeVec example.
func (m *MetricVec) GetMetricWithLabelValues(lvs ...string) (Metric, error) {
m.mtx.Lock()
defer m.mtx.Unlock()
h, err := m.hashLabelValues(lvs)
if err != nil {
return nil, err
}
m.mtx.RLock()
metric, ok := m.children[h]
m.mtx.RUnlock()
if ok {
return metric, nil
}
m.mtx.Lock()
defer m.mtx.Unlock()
return m.getOrCreateMetric(h, lvs...), nil
}
@ -103,17 +102,24 @@ func (m *MetricVec) GetMetricWithLabelValues(lvs ...string) (Metric, error) {
// GetMetricWithLabelValues(...string). See there for pros and cons of the two
// methods.
func (m *MetricVec) GetMetricWith(labels Labels) (Metric, error) {
m.mtx.Lock()
defer m.mtx.Unlock()
h, err := m.hashLabels(labels)
if err != nil {
return nil, err
}
m.mtx.RLock()
metric, ok := m.children[h]
m.mtx.RUnlock()
if ok {
return metric, nil
}
lvs := make([]string, len(labels))
for i, label := range m.desc.variableLabels {
lvs[i] = labels[label]
}
m.mtx.Lock()
defer m.mtx.Unlock()
return m.getOrCreateMetric(h, lvs...), nil
}
@ -162,7 +168,7 @@ func (m *MetricVec) DeleteLabelValues(lvs ...string) bool {
if err != nil {
return false
}
if _, has := m.children[h]; !has {
if _, ok := m.children[h]; !ok {
return false
}
delete(m.children, h)
@ -187,7 +193,7 @@ func (m *MetricVec) Delete(labels Labels) bool {
if err != nil {
return false
}
if _, has := m.children[h]; !has {
if _, ok := m.children[h]; !ok {
return false
}
delete(m.children, h)
@ -208,30 +214,26 @@ func (m *MetricVec) hashLabelValues(vals []string) (uint64, error) {
if len(vals) != len(m.desc.variableLabels) {
return 0, errInconsistentCardinality
}
m.hash.Reset()
h := hashNew()
for _, val := range vals {
m.buf.Reset()
m.buf.WriteString(val)
m.hash.Write(m.buf.Bytes())
h = hashAdd(h, val)
}
return m.hash.Sum64(), nil
return h, nil
}
func (m *MetricVec) hashLabels(labels Labels) (uint64, error) {
if len(labels) != len(m.desc.variableLabels) {
return 0, errInconsistentCardinality
}
m.hash.Reset()
h := hashNew()
for _, label := range m.desc.variableLabels {
val, ok := labels[label]
if !ok {
return 0, fmt.Errorf("label name %q missing in label map", label)
}
m.buf.Reset()
m.buf.WriteString(val)
m.hash.Write(m.buf.Bytes())
h = hashAdd(h, val)
}
return m.hash.Sum64(), nil
return h, nil
}
func (m *MetricVec) getOrCreateMetric(hash uint64, labelValues ...string) Metric {

View File

@ -12,5 +12,5 @@
// limitations under the License.
// Package model contains common data structures that are shared across
// Prometheus componenets and libraries.
// Prometheus components and libraries.
package model

View File

@ -8,5 +8,13 @@ Maintainers of this repository:
The following individuals have contributed code to this repository
(listed in alphabetical order):
* Armen Baghumian <abaghumian@noggin.com.au>
* Bjoern Rabenstein <beorn@soundcloud.com>
* David Cournapeau <cournape@gmail.com>
* Ji-Hoon, Seol <jihoon.seol@gmail.com>
* Jonas Große Sundrup <cherti@letopolis.de>
* Julius Volz <julius.volz@gmail.com>
* Matthias Rampke <mr@soundcloud.com>
* Nicky Gerritsen <nicky@streamone.nl>
* Rémi Audebert <contact@halfr.net>
* Tobias Schmidt <tobidt@gmail.com>

6
vendor/github.com/prometheus/procfs/Makefile generated vendored Normal file
View File

@ -0,0 +1,6 @@
ci:
! gofmt -l *.go | read nothing
go vet
go test -v ./...
go get github.com/golang/lint/golint
golint *.go

View File

@ -27,14 +27,7 @@ func NewFS(mountPoint string) (FS, error) {
return FS(mountPoint), nil
}
func (fs FS) stat(p string) (os.FileInfo, error) {
return os.Stat(path.Join(string(fs), p))
}
func (fs FS) open(p string) (*os.File, error) {
return os.Open(path.Join(string(fs), p))
}
func (fs FS) readlink(p string) (string, error) {
return os.Readlink(path.Join(string(fs), p))
// Path returns the path of the given subsystem relative to the procfs root.
func (fs FS) Path(p ...string) string {
return path.Join(append([]string{string(fs)}, p...)...)
}

View File

@ -8,6 +8,7 @@ import (
"io"
"io/ioutil"
"net"
"os"
"strconv"
"strings"
)
@ -58,7 +59,7 @@ func NewIPVSStats() (IPVSStats, error) {
// NewIPVSStats reads the IPVS statistics from the specified `proc` filesystem.
func (fs FS) NewIPVSStats() (IPVSStats, error) {
file, err := fs.open("net/ip_vs_stats")
file, err := os.Open(fs.Path("net/ip_vs_stats"))
if err != nil {
return IPVSStats{}, err
}
@ -127,7 +128,7 @@ func NewIPVSBackendStatus() ([]IPVSBackendStatus, error) {
// NewIPVSBackendStatus reads and returns the status of all (virtual,real) server pairs from the specified `proc` filesystem.
func (fs FS) NewIPVSBackendStatus() ([]IPVSBackendStatus, error) {
file, err := fs.open("net/ip_vs")
file, err := os.Open(fs.Path("net/ip_vs"))
if err != nil {
return nil, err
}

138
vendor/github.com/prometheus/procfs/mdstat.go generated vendored Normal file
View File

@ -0,0 +1,138 @@
package procfs
import (
"fmt"
"io/ioutil"
"regexp"
"strconv"
"strings"
)
var (
statuslineRE = regexp.MustCompile(`(\d+) blocks .*\[(\d+)/(\d+)\] \[[U_]+\]`)
buildlineRE = regexp.MustCompile(`\((\d+)/\d+\)`)
)
// MDStat holds info parsed from /proc/mdstat.
type MDStat struct {
// Name of the device.
Name string
// activity-state of the device.
ActivityState string
// Number of active disks.
DisksActive int64
// Total number of disks the device consists of.
DisksTotal int64
// Number of blocks the device holds.
BlocksTotal int64
// Number of blocks on the device that are in sync.
BlocksSynced int64
}
// ParseMDStat parses an mdstat-file and returns a struct with the relevant infos.
func (fs FS) ParseMDStat() (mdstates []MDStat, err error) {
mdStatusFilePath := fs.Path("mdstat")
content, err := ioutil.ReadFile(mdStatusFilePath)
if err != nil {
return []MDStat{}, fmt.Errorf("error parsing %s: %s", mdStatusFilePath, err)
}
mdStates := []MDStat{}
lines := strings.Split(string(content), "\n")
for i, l := range lines {
if l == "" {
continue
}
if l[0] == ' ' {
continue
}
if strings.HasPrefix(l, "Personalities") || strings.HasPrefix(l, "unused") {
continue
}
mainLine := strings.Split(l, " ")
if len(mainLine) < 3 {
return mdStates, fmt.Errorf("error parsing mdline: %s", l)
}
mdName := mainLine[0]
activityState := mainLine[2]
if len(lines) <= i+3 {
return mdStates, fmt.Errorf(
"error parsing %s: too few lines for md device %s",
mdStatusFilePath,
mdName,
)
}
active, total, size, err := evalStatusline(lines[i+1])
if err != nil {
return mdStates, fmt.Errorf("error parsing %s: %s", mdStatusFilePath, err)
}
// j is the line number of the syncing-line.
j := i + 2
if strings.Contains(lines[i+2], "bitmap") { // skip bitmap line
j = i + 3
}
// If device is syncing at the moment, get the number of currently
// synced bytes, otherwise that number equals the size of the device.
syncedBlocks := size
if strings.Contains(lines[j], "recovery") || strings.Contains(lines[j], "resync") {
syncedBlocks, err = evalBuildline(lines[j])
if err != nil {
return mdStates, fmt.Errorf("error parsing %s: %s", mdStatusFilePath, err)
}
}
mdStates = append(mdStates, MDStat{
Name: mdName,
ActivityState: activityState,
DisksActive: active,
DisksTotal: total,
BlocksTotal: size,
BlocksSynced: syncedBlocks,
})
}
return mdStates, nil
}
func evalStatusline(statusline string) (active, total, size int64, err error) {
matches := statuslineRE.FindStringSubmatch(statusline)
if len(matches) != 4 {
return 0, 0, 0, fmt.Errorf("unexpected statusline: %s", statusline)
}
size, err = strconv.ParseInt(matches[1], 10, 64)
if err != nil {
return 0, 0, 0, fmt.Errorf("unexpected statusline %s: %s", statusline, err)
}
total, err = strconv.ParseInt(matches[2], 10, 64)
if err != nil {
return 0, 0, 0, fmt.Errorf("unexpected statusline %s: %s", statusline, err)
}
active, err = strconv.ParseInt(matches[3], 10, 64)
if err != nil {
return 0, 0, 0, fmt.Errorf("unexpected statusline %s: %s", statusline, err)
}
return active, total, size, nil
}
func evalBuildline(buildline string) (syncedBlocks int64, err error) {
matches := buildlineRE.FindStringSubmatch(buildline)
if len(matches) != 2 {
return 0, fmt.Errorf("unexpected buildline: %s", buildline)
}
syncedBlocks, err = strconv.ParseInt(matches[1], 10, 64)
if err != nil {
return 0, fmt.Errorf("%s in buildline: %s", err, buildline)
}
return syncedBlocks, nil
}

View File

@ -4,7 +4,6 @@ import (
"fmt"
"io/ioutil"
"os"
"path"
"strconv"
"strings"
)
@ -24,9 +23,13 @@ func (p Procs) Len() int { return len(p) }
func (p Procs) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p Procs) Less(i, j int) bool { return p[i].PID < p[j].PID }
// Self returns a process for the current process.
// Self returns a process for the current process read via /proc/self.
func Self() (Proc, error) {
return NewProc(os.Getpid())
fs, err := NewFS(DefaultMountPoint)
if err != nil {
return Proc{}, err
}
return fs.Self()
}
// NewProc returns a process for the given pid under /proc.
@ -35,32 +38,42 @@ func NewProc(pid int) (Proc, error) {
if err != nil {
return Proc{}, err
}
return fs.NewProc(pid)
}
// AllProcs returns a list of all currently avaible processes under /proc.
// AllProcs returns a list of all currently available processes under /proc.
func AllProcs() (Procs, error) {
fs, err := NewFS(DefaultMountPoint)
if err != nil {
return Procs{}, err
}
return fs.AllProcs()
}
// Self returns a process for the current process.
func (fs FS) Self() (Proc, error) {
p, err := os.Readlink(fs.Path("self"))
if err != nil {
return Proc{}, err
}
pid, err := strconv.Atoi(strings.Replace(p, string(fs), "", -1))
if err != nil {
return Proc{}, err
}
return fs.NewProc(pid)
}
// NewProc returns a process for the given pid.
func (fs FS) NewProc(pid int) (Proc, error) {
if _, err := fs.stat(strconv.Itoa(pid)); err != nil {
if _, err := os.Stat(fs.Path(strconv.Itoa(pid))); err != nil {
return Proc{}, err
}
return Proc{PID: pid, fs: fs}, nil
}
// AllProcs returns a list of all currently avaible processes.
// AllProcs returns a list of all currently available processes.
func (fs FS) AllProcs() (Procs, error) {
d, err := fs.open("")
d, err := os.Open(fs.Path())
if err != nil {
return Procs{}, err
}
@ -85,7 +98,7 @@ func (fs FS) AllProcs() (Procs, error) {
// CmdLine returns the command line of a process.
func (p Proc) CmdLine() ([]string, error) {
f, err := p.open("cmdline")
f, err := os.Open(p.path("cmdline"))
if err != nil {
return nil, err
}
@ -103,10 +116,25 @@ func (p Proc) CmdLine() ([]string, error) {
return strings.Split(string(data[:len(data)-1]), string(byte(0))), nil
}
// Comm returns the command name of a process.
func (p Proc) Comm() (string, error) {
f, err := os.Open(p.path("comm"))
if err != nil {
return "", err
}
defer f.Close()
data, err := ioutil.ReadAll(f)
if err != nil {
return "", err
}
return strings.TrimSpace(string(data)), nil
}
// Executable returns the absolute path of the executable command of a process.
func (p Proc) Executable() (string, error) {
exe, err := p.readlink("exe")
exe, err := os.Readlink(p.path("exe"))
if os.IsNotExist(err) {
return "", nil
}
@ -144,7 +172,7 @@ func (p Proc) FileDescriptorTargets() ([]string, error) {
targets := make([]string, len(names))
for i, name := range names {
target, err := p.readlink("fd/" + name)
target, err := os.Readlink(p.path("fd", name))
if err == nil {
targets[i] = target
}
@ -165,7 +193,7 @@ func (p Proc) FileDescriptorsLen() (int, error) {
}
func (p Proc) fileDescriptors() ([]string, error) {
d, err := p.open("fd")
d, err := os.Open(p.path("fd"))
if err != nil {
return nil, err
}
@ -179,10 +207,6 @@ func (p Proc) fileDescriptors() ([]string, error) {
return names, nil
}
func (p Proc) open(pa string) (*os.File, error) {
return p.fs.open(path.Join(strconv.Itoa(p.PID), pa))
}
func (p Proc) readlink(pa string) (string, error) {
return p.fs.readlink(path.Join(strconv.Itoa(p.PID), pa))
func (p Proc) path(pa ...string) string {
return p.fs.Path(append([]string{strconv.Itoa(p.PID)}, pa...)...)
}

View File

@ -3,6 +3,7 @@ package procfs
import (
"fmt"
"io/ioutil"
"os"
)
// ProcIO models the content of /proc/<pid>/io.
@ -29,7 +30,7 @@ type ProcIO struct {
func (p Proc) NewIO() (ProcIO, error) {
pio := ProcIO{}
f, err := p.open("io")
f, err := os.Open(p.path("io"))
if err != nil {
return pio, err
}

View File

@ -3,29 +3,56 @@ package procfs
import (
"bufio"
"fmt"
"os"
"regexp"
"strconv"
)
// ProcLimits represents the soft limits for each of the process's resource
// limits.
// limits. For more information see getrlimit(2):
// http://man7.org/linux/man-pages/man2/getrlimit.2.html.
type ProcLimits struct {
CPUTime int
FileSize int
DataSize int
StackSize int
CoreFileSize int
ResidentSet int
Processes int
OpenFiles int
LockedMemory int
AddressSpace int
FileLocks int
PendingSignals int
MsqqueueSize int
NicePriority int
// CPU time limit in seconds.
CPUTime int
// Maximum size of files that the process may create.
FileSize int
// Maximum size of the process's data segment (initialized data,
// uninitialized data, and heap).
DataSize int
// Maximum size of the process stack in bytes.
StackSize int
// Maximum size of a core file.
CoreFileSize int
// Limit of the process's resident set in pages.
ResidentSet int
// Maximum number of processes that can be created for the real user ID of
// the calling process.
Processes int
// Value one greater than the maximum file descriptor number that can be
// opened by this process.
OpenFiles int
// Maximum number of bytes of memory that may be locked into RAM.
LockedMemory int
// Maximum size of the process's virtual memory address space in bytes.
AddressSpace int
// Limit on the combined number of flock(2) locks and fcntl(2) leases that
// this process may establish.
FileLocks int
// Limit of signals that may be queued for the real user ID of the calling
// process.
PendingSignals int
// Limit on the number of bytes that can be allocated for POSIX message
// queues for the real user ID of the calling process.
MsqqueueSize int
// Limit of the nice priority set using setpriority(2) or nice(2).
NicePriority int
// Limit of the real-time priority set using sched_setscheduler(2) or
// sched_setparam(2).
RealtimePriority int
RealtimeTimeout int
// Limit (in microseconds) on the amount of CPU time that a process
// scheduled under a real-time scheduling policy may consume without making
// a blocking system call.
RealtimeTimeout int
}
const (
@ -39,7 +66,7 @@ var (
// NewLimits returns the current soft limits of the process.
func (p Proc) NewLimits() (ProcLimits, error) {
f, err := p.open("limits")
f, err := os.Open(p.path("limits"))
if err != nil {
return ProcLimits{}, err
}
@ -60,7 +87,7 @@ func (p Proc) NewLimits() (ProcLimits, error) {
case "Max cpu time":
l.CPUTime, err = parseInt(fields[1])
case "Max file size":
l.FileLocks, err = parseInt(fields[1])
l.FileSize, err = parseInt(fields[1])
case "Max data size":
l.DataSize, err = parseInt(fields[1])
case "Max stack size":
@ -90,7 +117,6 @@ func (p Proc) NewLimits() (ProcLimits, error) {
case "Max realtime timeout":
l.RealtimeTimeout, err = parseInt(fields[1])
}
if err != nil {
return ProcLimits{}, err
}

View File

@ -7,15 +7,15 @@ import (
"os"
)
// Originally, this USER_HZ value was dynamically retrieved via a sysconf call which
// required cgo. However, that caused a lot of problems regarding
// Originally, this USER_HZ value was dynamically retrieved via a sysconf call
// which required cgo. However, that caused a lot of problems regarding
// cross-compilation. Alternatives such as running a binary to determine the
// value, or trying to derive it in some other way were all problematic.
// After much research it was determined that USER_HZ is actually hardcoded to
// 100 on all Go-supported platforms as of the time of this writing. This is
// why we decided to hardcode it here as well. It is not impossible that there
// could be systems with exceptions, but they should be very exotic edge cases,
// and in that case, the worst outcome will be two misreported metrics.
// value, or trying to derive it in some other way were all problematic. After
// much research it was determined that USER_HZ is actually hardcoded to 100 on
// all Go-supported platforms as of the time of this writing. This is why we
// decided to hardcode it here as well. It is not impossible that there could
// be systems with exceptions, but they should be very exotic edge cases, and
// in that case, the worst outcome will be two misreported metrics.
//
// See also the following discussions:
//
@ -91,7 +91,7 @@ type ProcStat struct {
// NewStat returns the current status information of the process.
func (p Proc) NewStat() (ProcStat, error) {
f, err := p.open("stat")
f, err := os.Open(p.path("stat"))
if err != nil {
return ProcStat{}, err
}

View File

@ -3,6 +3,7 @@ package procfs
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
)
@ -25,7 +26,7 @@ func NewStat() (Stat, error) {
// NewStat returns an information about current kernel/system statistics.
func (fs FS) NewStat() (Stat, error) {
f, err := fs.open("stat")
f, err := os.Open(fs.Path("stat"))
if err != nil {
return Stat{}, err
}

View File

@ -12,8 +12,10 @@ import (
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/memdb"
"github.com/syndtr/goleveldb/leveldb/storage"
)
// ErrBatchCorrupted records reason of batch corruption.
type ErrBatchCorrupted struct {
Reason string
}
@ -23,7 +25,7 @@ func (e *ErrBatchCorrupted) Error() string {
}
func newErrBatchCorrupted(reason string) error {
return errors.NewErrCorrupted(nil, &ErrBatchCorrupted{reason})
return errors.NewErrCorrupted(storage.FileDesc{}, &ErrBatchCorrupted{reason})
}
const (
@ -31,6 +33,7 @@ const (
batchGrowRec = 3000
)
// BatchReplay wraps basic batch operations.
type BatchReplay interface {
Put(key, value []byte)
Delete(key []byte)
@ -67,20 +70,20 @@ func (b *Batch) grow(n int) {
}
}
func (b *Batch) appendRec(kt kType, key, value []byte) {
func (b *Batch) appendRec(kt keyType, key, value []byte) {
n := 1 + binary.MaxVarintLen32 + len(key)
if kt == ktVal {
if kt == keyTypeVal {
n += binary.MaxVarintLen32 + len(value)
}
b.grow(n)
off := len(b.data)
data := b.data[:off+n]
data[off] = byte(kt)
off += 1
off++
off += binary.PutUvarint(data[off:], uint64(len(key)))
copy(data[off:], key)
off += len(key)
if kt == ktVal {
if kt == keyTypeVal {
off += binary.PutUvarint(data[off:], uint64(len(value)))
copy(data[off:], value)
off += len(value)
@ -94,13 +97,13 @@ func (b *Batch) appendRec(kt kType, key, value []byte) {
// Put appends 'put operation' of the given key/value pair to the batch.
// It is safe to modify the contents of the argument after Put returns.
func (b *Batch) Put(key, value []byte) {
b.appendRec(ktVal, key, value)
b.appendRec(keyTypeVal, key, value)
}
// Delete appends 'delete operation' of the given key to the batch.
// It is safe to modify the contents of the argument after Delete returns.
func (b *Batch) Delete(key []byte) {
b.appendRec(ktDel, key, nil)
b.appendRec(keyTypeDel, key, nil)
}
// Dump dumps batch contents. The returned slice can be loaded into the
@ -121,13 +124,14 @@ func (b *Batch) Load(data []byte) error {
// Replay replays batch contents.
func (b *Batch) Replay(r BatchReplay) error {
return b.decodeRec(func(i int, kt kType, key, value []byte) {
return b.decodeRec(func(i int, kt keyType, key, value []byte) error {
switch kt {
case ktVal:
case keyTypeVal:
r.Put(key, value)
case ktDel:
case keyTypeDel:
r.Delete(key)
}
return nil
})
}
@ -154,6 +158,7 @@ func (b *Batch) append(p *Batch) {
b.grow(len(p.data) - batchHdrLen)
b.data = append(b.data, p.data[batchHdrLen:]...)
b.rLen += p.rLen
b.bLen += p.bLen
}
if p.sync {
b.sync = true
@ -193,18 +198,19 @@ func (b *Batch) decode(prevSeq uint64, data []byte) error {
return nil
}
func (b *Batch) decodeRec(f func(i int, kt kType, key, value []byte)) (err error) {
func (b *Batch) decodeRec(f func(i int, kt keyType, key, value []byte) error) error {
off := batchHdrLen
for i := 0; i < b.rLen; i++ {
if off >= len(b.data) {
return newErrBatchCorrupted("invalid records length")
}
kt := kType(b.data[off])
if kt > ktVal {
kt := keyType(b.data[off])
if kt > keyTypeVal {
panic(kt)
return newErrBatchCorrupted("bad record: invalid type")
}
off += 1
off++
x, n := binary.Uvarint(b.data[off:])
off += n
@ -214,7 +220,7 @@ func (b *Batch) decodeRec(f func(i int, kt kType, key, value []byte)) (err error
key := b.data[off : off+int(x)]
off += int(x)
var value []byte
if kt == ktVal {
if kt == keyTypeVal {
x, n := binary.Uvarint(b.data[off:])
off += n
if n <= 0 || off+int(x) > len(b.data) {
@ -224,16 +230,19 @@ func (b *Batch) decodeRec(f func(i int, kt kType, key, value []byte)) (err error
off += int(x)
}
f(i, kt, key, value)
if err := f(i, kt, key, value); err != nil {
return err
}
}
return nil
}
func (b *Batch) memReplay(to *memdb.DB) error {
return b.decodeRec(func(i int, kt kType, key, value []byte) {
ikey := newIkey(key, b.seq+uint64(i), kt)
to.Put(ikey, value)
var ikScratch []byte
return b.decodeRec(func(i int, kt keyType, key, value []byte) error {
ikScratch = makeInternalKey(ikScratch, key, b.seq+uint64(i), kt)
return to.Put(ikScratch, value)
})
}
@ -245,8 +254,9 @@ func (b *Batch) memDecodeAndReplay(prevSeq uint64, data []byte, to *memdb.DB) er
}
func (b *Batch) revertMemReplay(to *memdb.DB) error {
return b.decodeRec(func(i int, kt kType, key, value []byte) {
ikey := newIkey(key, b.seq+uint64(i), kt)
to.Delete(ikey)
var ikScratch []byte
return b.decodeRec(func(i int, kt keyType, key, value []byte) error {
ikScratch := makeInternalKey(ikScratch, key, b.seq+uint64(i), kt)
return to.Delete(ikScratch)
})
}

View File

@ -47,17 +47,21 @@ type Cacher interface {
// so the the Release method will be called once object is released.
type Value interface{}
type CacheGetter struct {
// NamespaceGetter provides convenient wrapper for namespace.
type NamespaceGetter struct {
Cache *Cache
NS uint64
}
func (g *CacheGetter) Get(key uint64, setFunc func() (size int, value Value)) *Handle {
// Get simply calls Cache.Get() method.
func (g *NamespaceGetter) Get(key uint64, setFunc func() (size int, value Value)) *Handle {
return g.Cache.Get(g.NS, key, setFunc)
}
// The hash tables implementation is based on:
// "Dynamic-Sized Nonblocking Hash Tables", by Yujie Liu, Kunlong Zhang, and Michael Spear. ACM Symposium on Principles of Distributed Computing, Jul 2014.
// "Dynamic-Sized Nonblocking Hash Tables", by Yujie Liu,
// Kunlong Zhang, and Michael Spear.
// ACM Symposium on Principles of Distributed Computing, Jul 2014.
const (
mInitialSize = 1 << 4
@ -610,10 +614,12 @@ func (n *Node) unrefLocked() {
}
}
// Handle is a 'cache handle' of a 'cache node'.
type Handle struct {
n unsafe.Pointer // *Node
}
// Value returns the value of the 'cache node'.
func (h *Handle) Value() Value {
n := (*Node)(atomic.LoadPointer(&h.n))
if n != nil {
@ -622,6 +628,8 @@ func (h *Handle) Value() Value {
return nil
}
// Release releases this 'cache handle'.
// It is safe to call release multiple times.
func (h *Handle) Release() {
nPtr := atomic.LoadPointer(&h.n)
if nPtr != nil && atomic.CompareAndSwapPointer(&h.n, nPtr, nil) {

View File

@ -33,9 +33,9 @@ func (icmp *iComparer) Name() string {
}
func (icmp *iComparer) Compare(a, b []byte) int {
x := icmp.ucmp.Compare(iKey(a).ukey(), iKey(b).ukey())
x := icmp.ucmp.Compare(internalKey(a).ukey(), internalKey(b).ukey())
if x == 0 {
if m, n := iKey(a).num(), iKey(b).num(); m > n {
if m, n := internalKey(a).num(), internalKey(b).num(); m > n {
x = -1
} else if m < n {
x = 1
@ -45,13 +45,13 @@ func (icmp *iComparer) Compare(a, b []byte) int {
}
func (icmp *iComparer) Separator(dst, a, b []byte) []byte {
ua, ub := iKey(a).ukey(), iKey(b).ukey()
ua, ub := internalKey(a).ukey(), internalKey(b).ukey()
dst = icmp.ucmp.Separator(dst, ua, ub)
if dst == nil {
return nil
}
if len(dst) < len(ua) && icmp.uCompare(ua, dst) < 0 {
dst = append(dst, kMaxNumBytes...)
dst = append(dst, keyMaxNumBytes...)
} else {
// Did not close possibilities that n maybe longer than len(ub).
dst = append(dst, a[len(a)-8:]...)
@ -60,13 +60,13 @@ func (icmp *iComparer) Separator(dst, a, b []byte) []byte {
}
func (icmp *iComparer) Successor(dst, b []byte) []byte {
ub := iKey(b).ukey()
ub := internalKey(b).ukey()
dst = icmp.ucmp.Successor(dst, ub)
if dst == nil {
return nil
}
if len(dst) < len(ub) && icmp.uCompare(ub, dst) < 0 {
dst = append(dst, kMaxNumBytes...)
dst = append(dst, keyMaxNumBytes...)
} else {
// Did not close possibilities that n maybe longer than len(ub).
dst = append(dst, b[len(b)-8:]...)

View File

@ -36,14 +36,14 @@ type DB struct {
s *session
// MemDB.
memMu sync.RWMutex
memPool chan *memdb.DB
mem, frozenMem *memDB
journal *journal.Writer
journalWriter storage.Writer
journalFile storage.File
frozenJournalFile storage.File
frozenSeq uint64
memMu sync.RWMutex
memPool chan *memdb.DB
mem, frozenMem *memDB
journal *journal.Writer
journalWriter storage.Writer
journalFd storage.FileDesc
frozenJournalFd storage.FileDesc
frozenSeq uint64
// Snapshot.
snapsMu sync.Mutex
@ -61,8 +61,10 @@ type DB struct {
writeDelayN int
journalC chan *Batch
journalAckC chan error
tr *Transaction
// Compaction.
compCommitLk sync.Mutex
tcompCmdC chan cCmd
tcompPauseC chan chan<- struct{}
mcompCmdC chan cCmd
@ -70,7 +72,8 @@ type DB struct {
compPerErrC chan error
compErrSetC chan error
compWriteLocking bool
compStats []cStats
compStats cStats
memdbMaxLevel int // For testing.
// Close.
closeW sync.WaitGroup
@ -104,7 +107,6 @@ func openDB(s *session) (*DB, error) {
compErrC: make(chan error),
compPerErrC: make(chan error),
compErrSetC: make(chan error),
compStats: make([]cStats, s.o.GetNumLevel()),
// Close
closeC: make(chan struct{}),
}
@ -209,7 +211,7 @@ func Open(stor storage.Storage, o *opt.Options) (db *DB, err error) {
// The returned DB instance is goroutine-safe.
// The DB must be closed after use, by calling Close method.
func OpenFile(path string, o *opt.Options) (db *DB, err error) {
stor, err := storage.OpenFile(path)
stor, err := storage.OpenFile(path, o.GetReadOnly())
if err != nil {
return
}
@ -259,7 +261,7 @@ func Recover(stor storage.Storage, o *opt.Options) (db *DB, err error) {
// The returned DB instance is goroutine-safe.
// The DB must be closed after use, by calling Close method.
func RecoverFile(path string, o *opt.Options) (db *DB, err error) {
stor, err := storage.OpenFile(path)
stor, err := storage.OpenFile(path, false)
if err != nil {
return
}
@ -278,12 +280,11 @@ func recoverTable(s *session, o *opt.Options) error {
o.Strict &= ^opt.StrictReader
// Get all tables and sort it by file number.
tableFiles_, err := s.getFiles(storage.TypeTable)
fds, err := s.stor.List(storage.TypeTable)
if err != nil {
return err
}
tableFiles := files(tableFiles_)
tableFiles.sort()
sortFds(fds)
var (
maxSeq uint64
@ -296,17 +297,17 @@ func recoverTable(s *session, o *opt.Options) error {
rec = &sessionRecord{}
bpool = util.NewBufferPool(o.GetBlockSize() + 5)
)
buildTable := func(iter iterator.Iterator) (tmp storage.File, size int64, err error) {
tmp = s.newTemp()
writer, err := tmp.Create()
buildTable := func(iter iterator.Iterator) (tmpFd storage.FileDesc, size int64, err error) {
tmpFd = s.newTemp()
writer, err := s.stor.Create(tmpFd)
if err != nil {
return
}
defer func() {
writer.Close()
if err != nil {
tmp.Remove()
tmp = nil
s.stor.Remove(tmpFd)
tmpFd = storage.FileDesc{}
}
}()
@ -314,7 +315,7 @@ func recoverTable(s *session, o *opt.Options) error {
tw := table.NewWriter(writer, o)
for iter.Next() {
key := iter.Key()
if validIkey(key) {
if validInternalKey(key) {
err = tw.Append(key, iter.Value())
if err != nil {
return
@ -338,9 +339,9 @@ func recoverTable(s *session, o *opt.Options) error {
size = int64(tw.BytesLen())
return
}
recoverTable := func(file storage.File) error {
s.logf("table@recovery recovering @%d", file.Num())
reader, err := file.Open()
recoverTable := func(fd storage.FileDesc) error {
s.logf("table@recovery recovering @%d", fd.Num)
reader, err := s.stor.Open(fd)
if err != nil {
return err
}
@ -362,7 +363,7 @@ func recoverTable(s *session, o *opt.Options) error {
tgoodKey, tcorruptedKey, tcorruptedBlock int
imin, imax []byte
)
tr, err := table.NewReader(reader, size, storage.NewFileInfo(file), nil, bpool, o)
tr, err := table.NewReader(reader, size, fd, nil, bpool, o)
if err != nil {
return err
}
@ -370,7 +371,7 @@ func recoverTable(s *session, o *opt.Options) error {
if itererr, ok := iter.(iterator.ErrorCallbackSetter); ok {
itererr.SetErrorCallback(func(err error) {
if errors.IsCorrupted(err) {
s.logf("table@recovery block corruption @%d %q", file.Num(), err)
s.logf("table@recovery block corruption @%d %q", fd.Num, err)
tcorruptedBlock++
}
})
@ -379,7 +380,7 @@ func recoverTable(s *session, o *opt.Options) error {
// Scan the table.
for iter.Next() {
key := iter.Key()
_, seq, _, kerr := parseIkey(key)
_, seq, _, kerr := parseInternalKey(key)
if kerr != nil {
tcorruptedKey++
continue
@ -405,23 +406,23 @@ func recoverTable(s *session, o *opt.Options) error {
if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) {
droppedTable++
s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", file.Num(), tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
return nil
}
if tgoodKey > 0 {
if tcorruptedKey > 0 || tcorruptedBlock > 0 {
// Rebuild the table.
s.logf("table@recovery rebuilding @%d", file.Num())
s.logf("table@recovery rebuilding @%d", fd.Num)
iter := tr.NewIterator(nil, nil)
tmp, newSize, err := buildTable(iter)
tmpFd, newSize, err := buildTable(iter)
iter.Release()
if err != nil {
return err
}
closed = true
reader.Close()
if err := file.Replace(tmp); err != nil {
if err := s.stor.Rename(tmpFd, fd); err != nil {
return err
}
size = newSize
@ -431,30 +432,30 @@ func recoverTable(s *session, o *opt.Options) error {
}
recoveredKey += tgoodKey
// Add table to level 0.
rec.addTable(0, file.Num(), uint64(size), imin, imax)
s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", file.Num(), tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
rec.addTable(0, fd.Num, size, imin, imax)
s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
} else {
droppedTable++
s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", file.Num(), tcorruptedKey, tcorruptedBlock, size)
s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", fd.Num, tcorruptedKey, tcorruptedBlock, size)
}
return nil
}
// Recover all tables.
if len(tableFiles) > 0 {
s.logf("table@recovery F·%d", len(tableFiles))
if len(fds) > 0 {
s.logf("table@recovery F·%d", len(fds))
// Mark file number as used.
s.markFileNum(tableFiles[len(tableFiles)-1].Num())
s.markFileNum(fds[len(fds)-1].Num)
for _, file := range tableFiles {
if err := recoverTable(file); err != nil {
for _, fd := range fds {
if err := recoverTable(fd); err != nil {
return err
}
}
s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(tableFiles), recoveredKey, goodKey, corruptedKey, maxSeq)
s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(fds), recoveredKey, goodKey, corruptedKey, maxSeq)
}
// Set sequence number.
@ -471,31 +472,31 @@ func recoverTable(s *session, o *opt.Options) error {
func (db *DB) recoverJournal() error {
// Get all journals and sort it by file number.
allJournalFiles, err := db.s.getFiles(storage.TypeJournal)
rawFds, err := db.s.stor.List(storage.TypeJournal)
if err != nil {
return err
}
files(allJournalFiles).sort()
sortFds(rawFds)
// Journals that will be recovered.
var recJournalFiles []storage.File
for _, jf := range allJournalFiles {
if jf.Num() >= db.s.stJournalNum || jf.Num() == db.s.stPrevJournalNum {
recJournalFiles = append(recJournalFiles, jf)
var fds []storage.FileDesc
for _, fd := range rawFds {
if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum {
fds = append(fds, fd)
}
}
var (
of storage.File // Obsolete file.
ofd storage.FileDesc // Obsolete file.
rec = &sessionRecord{}
)
// Recover journals.
if len(recJournalFiles) > 0 {
db.logf("journal@recovery F·%d", len(recJournalFiles))
if len(fds) > 0 {
db.logf("journal@recovery F·%d", len(fds))
// Mark file number as used.
db.s.markFileNum(recJournalFiles[len(recJournalFiles)-1].Num())
db.s.markFileNum(fds[len(fds)-1].Num)
var (
// Options.
@ -509,31 +510,31 @@ func (db *DB) recoverJournal() error {
batch = &Batch{}
)
for _, jf := range recJournalFiles {
db.logf("journal@recovery recovering @%d", jf.Num())
for _, fd := range fds {
db.logf("journal@recovery recovering @%d", fd.Num)
fr, err := jf.Open()
fr, err := db.s.stor.Open(fd)
if err != nil {
return err
}
// Create or reset journal reader instance.
if jr == nil {
jr = journal.NewReader(fr, dropper{db.s, jf}, strict, checksum)
jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum)
} else {
jr.Reset(fr, dropper{db.s, jf}, strict, checksum)
jr.Reset(fr, dropper{db.s, fd}, strict, checksum)
}
// Flush memdb and remove obsolete journal file.
if of != nil {
if !ofd.Nil() {
if mdb.Len() > 0 {
if _, err := db.s.flushMemdb(rec, mdb, -1); err != nil {
if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
fr.Close()
return err
}
}
rec.setJournalNum(jf.Num())
rec.setJournalNum(fd.Num)
rec.setSeqNum(db.seq)
if err := db.s.commit(rec); err != nil {
fr.Close()
@ -541,8 +542,8 @@ func (db *DB) recoverJournal() error {
}
rec.resetAddedTables()
of.Remove()
of = nil
db.s.stor.Remove(ofd)
ofd = storage.FileDesc{}
}
// Replay journal to memdb.
@ -555,7 +556,7 @@ func (db *DB) recoverJournal() error {
}
fr.Close()
return errors.SetFile(err, jf)
return errors.SetFd(err, fd)
}
buf.Reset()
@ -566,7 +567,7 @@ func (db *DB) recoverJournal() error {
}
fr.Close()
return errors.SetFile(err, jf)
return errors.SetFd(err, fd)
}
if err := batch.memDecodeAndReplay(db.seq, buf.Bytes(), mdb); err != nil {
if !strict && errors.IsCorrupted(err) {
@ -576,7 +577,7 @@ func (db *DB) recoverJournal() error {
}
fr.Close()
return errors.SetFile(err, jf)
return errors.SetFd(err, fd)
}
// Save sequence number.
@ -594,7 +595,7 @@ func (db *DB) recoverJournal() error {
}
fr.Close()
of = jf
ofd = fd
}
// Flush the last memdb.
@ -611,7 +612,7 @@ func (db *DB) recoverJournal() error {
}
// Commit.
rec.setJournalNum(db.journalFile.Num())
rec.setJournalNum(db.journalFd.Num)
rec.setSeqNum(db.seq)
if err := db.s.commit(rec); err != nil {
// Close journal on error.
@ -623,8 +624,8 @@ func (db *DB) recoverJournal() error {
}
// Remove the last obsolete journal file.
if of != nil {
of.Remove()
if !ofd.Nil() {
db.s.stor.Remove(ofd)
}
return nil
@ -632,17 +633,17 @@ func (db *DB) recoverJournal() error {
func (db *DB) recoverJournalRO() error {
// Get all journals and sort it by file number.
allJournalFiles, err := db.s.getFiles(storage.TypeJournal)
rawFds, err := db.s.stor.List(storage.TypeJournal)
if err != nil {
return err
}
files(allJournalFiles).sort()
sortFds(rawFds)
// Journals that will be recovered.
var recJournalFiles []storage.File
for _, jf := range allJournalFiles {
if jf.Num() >= db.s.stJournalNum || jf.Num() == db.s.stPrevJournalNum {
recJournalFiles = append(recJournalFiles, jf)
var fds []storage.FileDesc
for _, fd := range rawFds {
if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum {
fds = append(fds, fd)
}
}
@ -656,8 +657,8 @@ func (db *DB) recoverJournalRO() error {
)
// Recover journals.
if len(recJournalFiles) > 0 {
db.logf("journal@recovery RO·Mode F·%d", len(recJournalFiles))
if len(fds) > 0 {
db.logf("journal@recovery RO·Mode F·%d", len(fds))
var (
jr *journal.Reader
@ -665,19 +666,19 @@ func (db *DB) recoverJournalRO() error {
batch = &Batch{}
)
for _, jf := range recJournalFiles {
db.logf("journal@recovery recovering @%d", jf.Num())
for _, fd := range fds {
db.logf("journal@recovery recovering @%d", fd.Num)
fr, err := jf.Open()
fr, err := db.s.stor.Open(fd)
if err != nil {
return err
}
// Create or reset journal reader instance.
if jr == nil {
jr = journal.NewReader(fr, dropper{db.s, jf}, strict, checksum)
jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum)
} else {
jr.Reset(fr, dropper{db.s, jf}, strict, checksum)
jr.Reset(fr, dropper{db.s, fd}, strict, checksum)
}
// Replay journal to memdb.
@ -689,7 +690,7 @@ func (db *DB) recoverJournalRO() error {
}
fr.Close()
return errors.SetFile(err, jf)
return errors.SetFd(err, fd)
}
buf.Reset()
@ -700,7 +701,7 @@ func (db *DB) recoverJournalRO() error {
}
fr.Close()
return errors.SetFile(err, jf)
return errors.SetFd(err, fd)
}
if err := batch.memDecodeAndReplay(db.seq, buf.Bytes(), mdb); err != nil {
if !strict && errors.IsCorrupted(err) {
@ -710,7 +711,7 @@ func (db *DB) recoverJournalRO() error {
}
fr.Close()
return errors.SetFile(err, jf)
return errors.SetFd(err, fd)
}
// Save sequence number.
@ -727,46 +728,35 @@ func (db *DB) recoverJournalRO() error {
return nil
}
func (db *DB) get(key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) {
ikey := newIkey(key, seq, ktSeek)
em, fm := db.getMems()
for _, m := range [...]*memDB{em, fm} {
if m == nil {
continue
func memGet(mdb *memdb.DB, ikey internalKey, icmp *iComparer) (ok bool, mv []byte, err error) {
mk, mv, err := mdb.Find(ikey)
if err == nil {
ukey, _, kt, kerr := parseInternalKey(mk)
if kerr != nil {
// Shouldn't have had happen.
panic(kerr)
}
defer m.decref()
if icmp.uCompare(ukey, ikey.ukey()) == 0 {
if kt == keyTypeDel {
return true, nil, ErrNotFound
}
return true, mv, nil
mk, mv, me := m.Find(ikey)
if me == nil {
ukey, _, kt, kerr := parseIkey(mk)
if kerr != nil {
// Shouldn't have had happen.
panic(kerr)
}
if db.s.icmp.uCompare(ukey, key) == 0 {
if kt == ktDel {
return nil, ErrNotFound
}
return append([]byte{}, mv...), nil
}
} else if me != ErrNotFound {
return nil, me
}
}
v := db.s.version()
value, cSched, err := v.get(ikey, ro, false)
v.release()
if cSched {
// Trigger table compaction.
db.compSendTrigger(db.tcompCmdC)
} else if err != ErrNotFound {
return true, nil, err
}
return
}
func (db *DB) has(key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err error) {
ikey := newIkey(key, seq, ktSeek)
func (db *DB) get(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) {
ikey := makeInternalKey(nil, key, seq, keyTypeSeek)
if auxm != nil {
if ok, mv, me := memGet(auxm, ikey, db.s.icmp); ok {
return append([]byte{}, mv...), me
}
}
em, fm := db.getMems()
for _, m := range [...]*memDB{em, fm} {
@ -775,30 +765,55 @@ func (db *DB) has(key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err er
}
defer m.decref()
mk, _, me := m.Find(ikey)
if me == nil {
ukey, _, kt, kerr := parseIkey(mk)
if kerr != nil {
// Shouldn't have had happen.
panic(kerr)
}
if db.s.icmp.uCompare(ukey, key) == 0 {
if kt == ktDel {
return false, nil
}
return true, nil
}
} else if me != ErrNotFound {
return false, me
if ok, mv, me := memGet(m.DB, ikey, db.s.icmp); ok {
return append([]byte{}, mv...), me
}
}
v := db.s.version()
_, cSched, err := v.get(ikey, ro, true)
value, cSched, err := v.get(auxt, ikey, ro, false)
v.release()
if cSched {
// Trigger table compaction.
db.compSendTrigger(db.tcompCmdC)
db.compTrigger(db.tcompCmdC)
}
return
}
func nilIfNotFound(err error) error {
if err == ErrNotFound {
return nil
}
return err
}
func (db *DB) has(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err error) {
ikey := makeInternalKey(nil, key, seq, keyTypeSeek)
if auxm != nil {
if ok, _, me := memGet(auxm, ikey, db.s.icmp); ok {
return me == nil, nilIfNotFound(me)
}
}
em, fm := db.getMems()
for _, m := range [...]*memDB{em, fm} {
if m == nil {
continue
}
defer m.decref()
if ok, _, me := memGet(m.DB, ikey, db.s.icmp); ok {
return me == nil, nilIfNotFound(me)
}
}
v := db.s.version()
_, cSched, err := v.get(auxt, ikey, ro, true)
v.release()
if cSched {
// Trigger table compaction.
db.compTrigger(db.tcompCmdC)
}
if err == nil {
ret = true
@ -822,7 +837,7 @@ func (db *DB) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
se := db.acquireSnapshot()
defer db.releaseSnapshot(se)
return db.get(key, se.seq, ro)
return db.get(nil, nil, key, se.seq, ro)
}
// Has returns true if the DB does contains the given key.
@ -836,11 +851,11 @@ func (db *DB) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) {
se := db.acquireSnapshot()
defer db.releaseSnapshot(se)
return db.has(key, se.seq, ro)
return db.has(nil, nil, key, se.seq, ro)
}
// NewIterator returns an iterator for the latest snapshot of the
// uderlying DB.
// underlying DB.
// The returned iterator is not goroutine-safe, but it is safe to use
// multiple iterators concurrently, with each in a dedicated goroutine.
// It is also safe to use an iterator concurrently with modifying its
@ -864,7 +879,7 @@ func (db *DB) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Itera
defer db.releaseSnapshot(se)
// Iterator holds 'version' lock, 'version' is immutable so snapshot
// can be released after iterator created.
return db.newIterator(se.seq, slice, ro)
return db.newIterator(nil, nil, se.seq, slice, ro)
}
// GetSnapshot returns a latest snapshot of the underlying DB. A snapshot
@ -920,7 +935,7 @@ func (db *DB) GetProperty(name string) (value string, err error) {
var level uint
var rest string
n, _ := fmt.Sscanf(p[len(numFilesPrefix):], "%d%s", &level, &rest)
if n != 1 || int(level) >= db.s.o.GetNumLevel() {
if n != 1 {
err = ErrNotFound
} else {
value = fmt.Sprint(v.tLen(int(level)))
@ -929,8 +944,8 @@ func (db *DB) GetProperty(name string) (value string, err error) {
value = "Compactions\n" +
" Level | Tables | Size(MB) | Time(sec) | Read(MB) | Write(MB)\n" +
"-------+------------+---------------+---------------+---------------+---------------\n"
for level, tables := range v.tables {
duration, read, write := db.compStats[level].get()
for level, tables := range v.levels {
duration, read, write := db.compStats.getStat(level)
if len(tables) == 0 && duration == 0 {
continue
}
@ -939,10 +954,10 @@ func (db *DB) GetProperty(name string) (value string, err error) {
float64(read)/1048576.0, float64(write)/1048576.0)
}
case p == "sstables":
for level, tables := range v.tables {
for level, tables := range v.levels {
value += fmt.Sprintf("--- level %d ---\n", level)
for _, t := range tables {
value += fmt.Sprintf("%d:%d[%q .. %q]\n", t.file.Num(), t.size, t.imin, t.imax)
value += fmt.Sprintf("%d:%d[%q .. %q]\n", t.fd.Num, t.size, t.imin, t.imax)
}
}
case p == "blockpool":
@ -982,8 +997,8 @@ func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) {
sizes := make(Sizes, 0, len(ranges))
for _, r := range ranges {
imin := newIkey(r.Start, kMaxSeq, ktSeek)
imax := newIkey(r.Limit, kMaxSeq, ktSeek)
imin := makeInternalKey(nil, r.Start, keyMaxSeq, keyTypeSeek)
imax := makeInternalKey(nil, r.Limit, keyMaxSeq, keyTypeSeek)
start, err := v.offsetOf(imin)
if err != nil {
return nil, err
@ -992,7 +1007,7 @@ func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) {
if err != nil {
return nil, err
}
var size uint64
var size int64
if limit >= start {
size = limit - start
}
@ -1002,8 +1017,8 @@ func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) {
return sizes, nil
}
// Close closes the DB. This will also releases any outstanding snapshot and
// abort any in-flight compaction.
// Close closes the DB. This will also releases any outstanding snapshot,
// abort any in-flight compaction and discard open transaction.
//
// It is not safe to close a DB until all outstanding iterators are released.
// It is valid to call Close multiple times. Other methods should not be
@ -1032,11 +1047,18 @@ func (db *DB) Close() error {
// Signal all goroutines.
close(db.closeC)
// Discard open transaction.
if db.tr != nil {
db.tr.Discard()
}
// Acquire writer lock.
db.writeLockC <- struct{}{}
// Wait for all gorotines to exit.
db.closeW.Wait()
// Lock writer and closes journal.
db.writeLockC <- struct{}{}
// Closes journal.
if db.journal != nil {
db.journal.Close()
db.journalWriter.Close()
@ -1063,8 +1085,6 @@ func (db *DB) Close() error {
db.frozenMem = nil
db.journal = nil
db.journalWriter = nil
db.journalFile = nil
db.frozenJournalFile = nil
db.closer = nil
return err

View File

@ -12,55 +12,76 @@ import (
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/storage"
)
var (
errCompactionTransactExiting = errors.New("leveldb: compaction transact exiting")
)
type cStats struct {
sync.Mutex
type cStat struct {
duration time.Duration
read uint64
write uint64
read int64
write int64
}
func (p *cStats) add(n *cStatsStaging) {
p.Lock()
func (p *cStat) add(n *cStatStaging) {
p.duration += n.duration
p.read += n.read
p.write += n.write
p.Unlock()
}
func (p *cStats) get() (duration time.Duration, read, write uint64) {
p.Lock()
defer p.Unlock()
func (p *cStat) get() (duration time.Duration, read, write int64) {
return p.duration, p.read, p.write
}
type cStatsStaging struct {
type cStatStaging struct {
start time.Time
duration time.Duration
on bool
read uint64
write uint64
read int64
write int64
}
func (p *cStatsStaging) startTimer() {
func (p *cStatStaging) startTimer() {
if !p.on {
p.start = time.Now()
p.on = true
}
}
func (p *cStatsStaging) stopTimer() {
func (p *cStatStaging) stopTimer() {
if p.on {
p.duration += time.Since(p.start)
p.on = false
}
}
type cStats struct {
lk sync.Mutex
stats []cStat
}
func (p *cStats) addStat(level int, n *cStatStaging) {
p.lk.Lock()
if level >= len(p.stats) {
newStats := make([]cStat, level+1)
copy(newStats, p.stats)
p.stats = newStats
}
p.stats[level].add(n)
p.lk.Unlock()
}
func (p *cStats) getStat(level int) (duration time.Duration, read, write int64) {
p.lk.Lock()
defer p.lk.Unlock()
if level < len(p.stats) {
return p.stats[level].get()
}
return
}
func (db *DB) compactionError() {
var err error
noerr:
@ -151,7 +172,7 @@ func (db *DB) compactionTransact(name string, t compactionTransactInterface) {
disableBackoff = db.s.o.GetDisableCompactionBackoff()
)
for n := 0; ; n++ {
// Check wether the DB is closed.
// Check whether the DB is closed.
if db.isClosed() {
db.logf("%s exiting", name)
db.compactionExitTransact()
@ -235,6 +256,14 @@ func (db *DB) compactionExitTransact() {
panic(errCompactionTransactExiting)
}
func (db *DB) compactionCommit(name string, rec *sessionRecord) {
db.compCommitLk.Lock()
defer db.compCommitLk.Unlock() // Defer is necessary.
db.compactionTransactFunc(name+"@commit", func(cnt *compactionTransactCounter) error {
return db.s.commit(rec)
}, nil)
}
func (db *DB) memCompaction() {
mdb := db.getFrozenMem()
if mdb == nil {
@ -265,41 +294,40 @@ func (db *DB) memCompaction() {
var (
rec = &sessionRecord{}
stats = &cStatsStaging{}
stats = &cStatStaging{}
flushLevel int
)
// Generate tables.
db.compactionTransactFunc("memdb@flush", func(cnt *compactionTransactCounter) (err error) {
stats.startTimer()
flushLevel, err = db.s.flushMemdb(rec, mdb.DB, -1)
flushLevel, err = db.s.flushMemdb(rec, mdb.DB, db.memdbMaxLevel)
stats.stopTimer()
return
}, func() error {
for _, r := range rec.addedTables {
db.logf("memdb@flush revert @%d", r.num)
f := db.s.getTableFile(r.num)
if err := f.Remove(); err != nil {
if err := db.s.stor.Remove(storage.FileDesc{Type: storage.TypeTable, Num: r.num}); err != nil {
return err
}
}
return nil
})
db.compactionTransactFunc("memdb@commit", func(cnt *compactionTransactCounter) (err error) {
stats.startTimer()
rec.setJournalNum(db.journalFile.Num())
rec.setSeqNum(db.frozenSeq)
err = db.s.commit(rec)
stats.stopTimer()
return
}, nil)
rec.setJournalNum(db.journalFd.Num)
rec.setSeqNum(db.frozenSeq)
// Commit.
stats.startTimer()
db.compactionCommit("memdb", rec)
stats.stopTimer()
db.logf("memdb@flush committed F·%d T·%v", len(rec.addedTables), stats.duration)
for _, r := range rec.addedTables {
stats.write += r.size
}
db.compStats[flushLevel].add(stats)
db.compStats.addStat(flushLevel, stats)
// Drop frozen memdb.
db.dropFrozenMem()
@ -315,7 +343,7 @@ func (db *DB) memCompaction() {
}
// Trigger table compaction.
db.compSendTrigger(db.tcompCmdC)
db.compTrigger(db.tcompCmdC)
}
type tableCompactionBuilder struct {
@ -323,7 +351,7 @@ type tableCompactionBuilder struct {
s *session
c *compaction
rec *sessionRecord
stat0, stat1 *cStatsStaging
stat0, stat1 *cStatStaging
snapHasLastUkey bool
snapLastUkey []byte
@ -377,9 +405,9 @@ func (b *tableCompactionBuilder) flush() error {
if err != nil {
return err
}
b.rec.addTableFile(b.c.level+1, t)
b.rec.addTableFile(b.c.sourceLevel+1, t)
b.stat1.write += t.size
b.s.logf("table@build created L%d@%d N·%d S·%s %q:%q", b.c.level+1, t.file.Num(), b.tw.tw.EntriesLen(), shortenb(int(t.size)), t.imin, t.imax)
b.s.logf("table@build created L%d@%d N·%d S·%s %q:%q", b.c.sourceLevel+1, t.fd.Num, b.tw.tw.EntriesLen(), shortenb(int(t.size)), t.imin, t.imax)
b.tw = nil
return nil
}
@ -424,7 +452,7 @@ func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error {
}
ikey := iter.Key()
ukey, seq, kt, kerr := parseIkey(ikey)
ukey, seq, kt, kerr := parseInternalKey(ikey)
if kerr == nil {
shouldStop := !resumed && b.c.shouldStopBefore(ikey)
@ -450,14 +478,14 @@ func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error {
hasLastUkey = true
lastUkey = append(lastUkey[:0], ukey...)
lastSeq = kMaxSeq
lastSeq = keyMaxSeq
}
switch {
case lastSeq <= b.minSeq:
// Dropped because newer entry for same user key exist
fallthrough // (A)
case kt == ktDel && seq <= b.minSeq && b.c.baseLevelForKey(lastUkey):
case kt == keyTypeDel && seq <= b.minSeq && b.c.baseLevelForKey(lastUkey):
// For this user key:
// (1) there is no data in higher levels
// (2) data in lower levels will have larger seq numbers
@ -479,7 +507,7 @@ func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error {
// Don't drop corrupted keys.
hasLastUkey = false
lastUkey = lastUkey[:0]
lastSeq = kMaxSeq
lastSeq = keyMaxSeq
b.kerrCnt++
}
@ -502,8 +530,7 @@ func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error {
func (b *tableCompactionBuilder) revert() error {
for _, at := range b.rec.addedTables {
b.s.logf("table@build revert @%d", at.num)
f := b.s.getTableFile(at.num)
if err := f.Remove(); err != nil {
if err := b.s.stor.Remove(storage.FileDesc{Type: storage.TypeTable, Num: at.num}); err != nil {
return err
}
}
@ -514,30 +541,28 @@ func (db *DB) tableCompaction(c *compaction, noTrivial bool) {
defer c.release()
rec := &sessionRecord{}
rec.addCompPtr(c.level, c.imax)
rec.addCompPtr(c.sourceLevel, c.imax)
if !noTrivial && c.trivial() {
t := c.tables[0][0]
db.logf("table@move L%d@%d -> L%d", c.level, t.file.Num(), c.level+1)
rec.delTable(c.level, t.file.Num())
rec.addTableFile(c.level+1, t)
db.compactionTransactFunc("table@move", func(cnt *compactionTransactCounter) (err error) {
return db.s.commit(rec)
}, nil)
t := c.levels[0][0]
db.logf("table@move L%d@%d -> L%d", c.sourceLevel, t.fd.Num, c.sourceLevel+1)
rec.delTable(c.sourceLevel, t.fd.Num)
rec.addTableFile(c.sourceLevel+1, t)
db.compactionCommit("table-move", rec)
return
}
var stats [2]cStatsStaging
for i, tables := range c.tables {
var stats [2]cStatStaging
for i, tables := range c.levels {
for _, t := range tables {
stats[i].read += t.size
// Insert deleted tables into record
rec.delTable(c.level+i, t.file.Num())
rec.delTable(c.sourceLevel+i, t.fd.Num)
}
}
sourceSize := int(stats[0].read + stats[1].read)
minSeq := db.minSeq()
db.logf("table@compaction L%d·%d -> L%d·%d S·%s Q·%d", c.level, len(c.tables[0]), c.level+1, len(c.tables[1]), shortenb(sourceSize), minSeq)
db.logf("table@compaction L%d·%d -> L%d·%d S·%s Q·%d", c.sourceLevel, len(c.levels[0]), c.sourceLevel+1, len(c.levels[1]), shortenb(sourceSize), minSeq)
b := &tableCompactionBuilder{
db: db,
@ -547,49 +572,60 @@ func (db *DB) tableCompaction(c *compaction, noTrivial bool) {
stat1: &stats[1],
minSeq: minSeq,
strict: db.s.o.GetStrict(opt.StrictCompaction),
tableSize: db.s.o.GetCompactionTableSize(c.level + 1),
tableSize: db.s.o.GetCompactionTableSize(c.sourceLevel + 1),
}
db.compactionTransact("table@build", b)
// Commit changes
db.compactionTransactFunc("table@commit", func(cnt *compactionTransactCounter) (err error) {
stats[1].startTimer()
defer stats[1].stopTimer()
return db.s.commit(rec)
}, nil)
// Commit.
stats[1].startTimer()
db.compactionCommit("table", rec)
stats[1].stopTimer()
resultSize := int(stats[1].write)
db.logf("table@compaction committed F%s S%s Ke·%d D·%d T·%v", sint(len(rec.addedTables)-len(rec.deletedTables)), sshortenb(resultSize-sourceSize), b.kerrCnt, b.dropCnt, stats[1].duration)
// Save compaction stats
for i := range stats {
db.compStats[c.level+1].add(&stats[i])
db.compStats.addStat(c.sourceLevel+1, &stats[i])
}
}
func (db *DB) tableRangeCompaction(level int, umin, umax []byte) {
func (db *DB) tableRangeCompaction(level int, umin, umax []byte) error {
db.logf("table@compaction range L%d %q:%q", level, umin, umax)
if level >= 0 {
if c := db.s.getCompactionRange(level, umin, umax); c != nil {
if c := db.s.getCompactionRange(level, umin, umax, true); c != nil {
db.tableCompaction(c, true)
}
} else {
v := db.s.version()
m := 1
for i, t := range v.tables[1:] {
if t.overlaps(db.s.icmp, umin, umax, false) {
m = i + 1
}
}
v.release()
// Retry until nothing to compact.
for {
compacted := false
for level := 0; level < m; level++ {
if c := db.s.getCompactionRange(level, umin, umax); c != nil {
db.tableCompaction(c, true)
// Scan for maximum level with overlapped tables.
v := db.s.version()
m := 1
for i := m; i < len(v.levels); i++ {
tables := v.levels[i]
if tables.overlaps(db.s.icmp, umin, umax, false) {
m = i
}
}
v.release()
for level := 0; level < m; level++ {
if c := db.s.getCompactionRange(level, umin, umax, false); c != nil {
db.tableCompaction(c, true)
compacted = true
}
}
if !compacted {
break
}
}
}
return nil
}
func (db *DB) tableAutoCompaction() {
@ -616,11 +652,11 @@ type cCmd interface {
ack(err error)
}
type cIdle struct {
type cAuto struct {
ackC chan<- error
}
func (r cIdle) ack(err error) {
func (r cAuto) ack(err error) {
if r.ackC != nil {
defer func() {
recover()
@ -644,13 +680,21 @@ func (r cRange) ack(err error) {
}
}
// This will trigger auto compation and/or wait for all compaction to be done.
func (db *DB) compSendIdle(compC chan<- cCmd) (err error) {
// This will trigger auto compaction but will not wait for it.
func (db *DB) compTrigger(compC chan<- cCmd) {
select {
case compC <- cAuto{}:
default:
}
}
// This will trigger auto compaction and/or wait for all compaction to be done.
func (db *DB) compTriggerWait(compC chan<- cCmd) (err error) {
ch := make(chan error)
defer close(ch)
// Send cmd.
select {
case compC <- cIdle{ch}:
case compC <- cAuto{ch}:
case err = <-db.compErrC:
return
case _, _ = <-db.closeC:
@ -666,16 +710,8 @@ func (db *DB) compSendIdle(compC chan<- cCmd) (err error) {
return err
}
// This will trigger auto compaction but will not wait for it.
func (db *DB) compSendTrigger(compC chan<- cCmd) {
select {
case compC <- cIdle{}:
default:
}
}
// Send range compaction request.
func (db *DB) compSendRange(compC chan<- cCmd, level int, min, max []byte) (err error) {
func (db *DB) compTriggerRange(compC chan<- cCmd, level int, min, max []byte) (err error) {
ch := make(chan error)
defer close(ch)
// Send cmd.
@ -715,7 +751,7 @@ func (db *DB) mCompaction() {
select {
case x = <-db.mcompCmdC:
switch x.(type) {
case cIdle:
case cAuto:
db.memCompaction()
x.ack(nil)
x = nil
@ -776,11 +812,10 @@ func (db *DB) tCompaction() {
}
if x != nil {
switch cmd := x.(type) {
case cIdle:
case cAuto:
ackQ = append(ackQ, x)
case cRange:
db.tableRangeCompaction(cmd.level, cmd.min, cmd.max)
x.ack(nil)
x.ack(db.tableRangeCompaction(cmd.level, cmd.min, cmd.max))
default:
panic("leveldb: unknown command")
}

View File

@ -19,7 +19,7 @@ import (
)
var (
errInvalidIkey = errors.New("leveldb: Iterator: invalid internal key")
errInvalidInternalKey = errors.New("leveldb: Iterator: invalid internal key")
)
type memdbReleaser struct {
@ -33,40 +33,50 @@ func (mr *memdbReleaser) Release() {
})
}
func (db *DB) newRawIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
func (db *DB) newRawIterator(auxm *memDB, auxt tFiles, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
strict := opt.GetStrict(db.s.o.Options, ro, opt.StrictReader)
em, fm := db.getMems()
v := db.s.version()
ti := v.getIterators(slice, ro)
n := len(ti) + 2
i := make([]iterator.Iterator, 0, n)
tableIts := v.getIterators(slice, ro)
n := len(tableIts) + len(auxt) + 3
its := make([]iterator.Iterator, 0, n)
if auxm != nil {
ami := auxm.NewIterator(slice)
ami.SetReleaser(&memdbReleaser{m: auxm})
its = append(its, ami)
}
for _, t := range auxt {
its = append(its, v.s.tops.newIterator(t, slice, ro))
}
emi := em.NewIterator(slice)
emi.SetReleaser(&memdbReleaser{m: em})
i = append(i, emi)
its = append(its, emi)
if fm != nil {
fmi := fm.NewIterator(slice)
fmi.SetReleaser(&memdbReleaser{m: fm})
i = append(i, fmi)
its = append(its, fmi)
}
i = append(i, ti...)
strict := opt.GetStrict(db.s.o.Options, ro, opt.StrictReader)
mi := iterator.NewMergedIterator(i, db.s.icmp, strict)
its = append(its, tableIts...)
mi := iterator.NewMergedIterator(its, db.s.icmp, strict)
mi.SetReleaser(&versionReleaser{v: v})
return mi
}
func (db *DB) newIterator(seq uint64, slice *util.Range, ro *opt.ReadOptions) *dbIter {
func (db *DB) newIterator(auxm *memDB, auxt tFiles, seq uint64, slice *util.Range, ro *opt.ReadOptions) *dbIter {
var islice *util.Range
if slice != nil {
islice = &util.Range{}
if slice.Start != nil {
islice.Start = newIkey(slice.Start, kMaxSeq, ktSeek)
islice.Start = makeInternalKey(nil, slice.Start, keyMaxSeq, keyTypeSeek)
}
if slice.Limit != nil {
islice.Limit = newIkey(slice.Limit, kMaxSeq, ktSeek)
islice.Limit = makeInternalKey(nil, slice.Limit, keyMaxSeq, keyTypeSeek)
}
}
rawIter := db.newRawIterator(islice, ro)
rawIter := db.newRawIterator(auxm, auxt, islice, ro)
iter := &dbIter{
db: db,
icmp: db.s.icmp,
@ -177,7 +187,7 @@ func (i *dbIter) Seek(key []byte) bool {
return false
}
ikey := newIkey(key, i.seq, ktSeek)
ikey := makeInternalKey(nil, key, i.seq, keyTypeSeek)
if i.iter.Seek(ikey) {
i.dir = dirSOI
return i.next()
@ -189,15 +199,15 @@ func (i *dbIter) Seek(key []byte) bool {
func (i *dbIter) next() bool {
for {
if ukey, seq, kt, kerr := parseIkey(i.iter.Key()); kerr == nil {
if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
i.sampleSeek()
if seq <= i.seq {
switch kt {
case ktDel:
case keyTypeDel:
// Skip deleted key.
i.key = append(i.key[:0], ukey...)
i.dir = dirForward
case ktVal:
case keyTypeVal:
if i.dir == dirSOI || i.icmp.uCompare(ukey, i.key) > 0 {
i.key = append(i.key[:0], ukey...)
i.value = append(i.value[:0], i.iter.Value()...)
@ -240,13 +250,13 @@ func (i *dbIter) prev() bool {
del := true
if i.iter.Valid() {
for {
if ukey, seq, kt, kerr := parseIkey(i.iter.Key()); kerr == nil {
if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
i.sampleSeek()
if seq <= i.seq {
if !del && i.icmp.uCompare(ukey, i.key) < 0 {
return true
}
del = (kt == ktDel)
del = (kt == keyTypeDel)
if !del {
i.key = append(i.key[:0], ukey...)
i.value = append(i.value[:0], i.iter.Value()...)
@ -282,7 +292,7 @@ func (i *dbIter) Prev() bool {
return i.Last()
case dirForward:
for i.iter.Prev() {
if ukey, _, _, kerr := parseIkey(i.iter.Key()); kerr == nil {
if ukey, _, _, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
i.sampleSeek()
if i.icmp.uCompare(ukey, i.key) < 0 {
goto cont

View File

@ -110,7 +110,7 @@ func (snap *Snapshot) Get(key []byte, ro *opt.ReadOptions) (value []byte, err er
err = ErrSnapshotReleased
return
}
return snap.db.get(key, snap.elem.seq, ro)
return snap.db.get(nil, nil, key, snap.elem.seq, ro)
}
// Has returns true if the DB does contains the given key.
@ -127,10 +127,10 @@ func (snap *Snapshot) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error)
err = ErrSnapshotReleased
return
}
return snap.db.has(key, snap.elem.seq, ro)
return snap.db.has(nil, nil, key, snap.elem.seq, ro)
}
// NewIterator returns an iterator for the snapshot of the uderlying DB.
// NewIterator returns an iterator for the snapshot of the underlying DB.
// The returned iterator is not goroutine-safe, but it is safe to use
// multiple iterators concurrently, with each in a dedicated goroutine.
// It is also safe to use an iterator concurrently with modifying its
@ -158,7 +158,7 @@ func (snap *Snapshot) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterat
}
// Since iterator already hold version ref, it doesn't need to
// hold snapshot ref.
return snap.db.newIterator(snap.elem.seq, slice, ro)
return snap.db.newIterator(nil, nil, snap.elem.seq, slice, ro)
}
// Release releases the snapshot. This will not release any returned

View File

@ -12,6 +12,7 @@ import (
"github.com/syndtr/goleveldb/leveldb/journal"
"github.com/syndtr/goleveldb/leveldb/memdb"
"github.com/syndtr/goleveldb/leveldb/storage"
)
type memDB struct {
@ -20,6 +21,10 @@ type memDB struct {
ref int32
}
func (m *memDB) getref() int32 {
return atomic.LoadInt32(&m.ref)
}
func (m *memDB) incref() {
atomic.AddInt32(&m.ref, 1)
}
@ -48,11 +53,15 @@ func (db *DB) addSeq(delta uint64) {
atomic.AddUint64(&db.seq, delta)
}
func (db *DB) sampleSeek(ikey iKey) {
func (db *DB) setSeq(seq uint64) {
atomic.StoreUint64(&db.seq, seq)
}
func (db *DB) sampleSeek(ikey internalKey) {
v := db.s.version()
if v.sampleSeek(ikey) {
// Trigger table compaction.
db.compSendTrigger(db.tcompCmdC)
db.compTrigger(db.tcompCmdC)
}
v.release()
}
@ -67,12 +76,18 @@ func (db *DB) mpoolPut(mem *memdb.DB) {
}
}
func (db *DB) mpoolGet() *memdb.DB {
func (db *DB) mpoolGet(n int) *memDB {
var mdb *memdb.DB
select {
case mem := <-db.memPool:
return mem
case mdb = <-db.memPool:
default:
return nil
}
if mdb == nil || mdb.Capacity() < n {
mdb = memdb.New(db.s.icmp, maxInt(db.s.o.GetWriteBuffer(), n))
}
return &memDB{
db: db,
DB: mdb,
}
}
@ -95,11 +110,10 @@ func (db *DB) mpoolDrain() {
// Create new memdb and froze the old one; need external synchronization.
// newMem only called synchronously by the writer.
func (db *DB) newMem(n int) (mem *memDB, err error) {
num := db.s.allocFileNum()
file := db.s.getJournalFile(num)
w, err := file.Create()
fd := storage.FileDesc{Type: storage.TypeJournal, Num: db.s.allocFileNum()}
w, err := db.s.stor.Create(fd)
if err != nil {
db.s.reuseFileNum(num)
db.s.reuseFileNum(fd.Num)
return
}
@ -115,20 +129,14 @@ func (db *DB) newMem(n int) (mem *memDB, err error) {
} else {
db.journal.Reset(w)
db.journalWriter.Close()
db.frozenJournalFile = db.journalFile
db.frozenJournalFd = db.journalFd
}
db.journalWriter = w
db.journalFile = file
db.journalFd = fd
db.frozenMem = db.mem
mdb := db.mpoolGet()
if mdb == nil || mdb.Capacity() < n {
mdb = memdb.New(db.s.icmp, maxInt(db.s.o.GetWriteBuffer(), n))
}
mem = &memDB{
db: db,
DB: mdb,
ref: 2,
}
mem = db.mpoolGet(n)
mem.incref() // for self
mem.incref() // for caller
db.mem = mem
// The seq only incremented by the writer. And whoever called newMem
// should hold write lock, so no need additional synchronization here.
@ -181,12 +189,12 @@ func (db *DB) getFrozenMem() *memDB {
// Drop frozen memdb; assume that frozen memdb isn't nil.
func (db *DB) dropFrozenMem() {
db.memMu.Lock()
if err := db.frozenJournalFile.Remove(); err != nil {
db.logf("journal@remove removing @%d %q", db.frozenJournalFile.Num(), err)
if err := db.s.stor.Remove(db.frozenJournalFd); err != nil {
db.logf("journal@remove removing @%d %q", db.frozenJournalFd.Num, err)
} else {
db.logf("journal@remove removed @%d", db.frozenJournalFile.Num())
db.logf("journal@remove removed @%d", db.frozenJournalFd.Num)
}
db.frozenJournalFile = nil
db.frozenJournalFd = storage.FileDesc{}
db.frozenMem.decref()
db.frozenMem = nil
db.memMu.Unlock()

View File

@ -0,0 +1,289 @@
// Copyright (c) 2016, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package leveldb
import (
"errors"
"sync"
"time"
"github.com/syndtr/goleveldb/leveldb/iterator"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/util"
)
var errTransactionDone = errors.New("leveldb: transaction already closed")
// Transaction is the transaction handle.
type Transaction struct {
db *DB
lk sync.RWMutex
seq uint64
mem *memDB
tables tFiles
ikScratch []byte
rec sessionRecord
stats cStatStaging
closed bool
}
// Get gets the value for the given key. It returns ErrNotFound if the
// DB does not contains the key.
//
// The returned slice is its own copy, it is safe to modify the contents
// of the returned slice.
// It is safe to modify the contents of the argument after Get returns.
func (tr *Transaction) Get(key []byte, ro *opt.ReadOptions) ([]byte, error) {
tr.lk.RLock()
defer tr.lk.RUnlock()
if tr.closed {
return nil, errTransactionDone
}
return tr.db.get(tr.mem.DB, tr.tables, key, tr.seq, ro)
}
// Has returns true if the DB does contains the given key.
//
// It is safe to modify the contents of the argument after Has returns.
func (tr *Transaction) Has(key []byte, ro *opt.ReadOptions) (bool, error) {
tr.lk.RLock()
defer tr.lk.RUnlock()
if tr.closed {
return false, errTransactionDone
}
return tr.db.has(tr.mem.DB, tr.tables, key, tr.seq, ro)
}
// NewIterator returns an iterator for the latest snapshot of the transaction.
// The returned iterator is not goroutine-safe, but it is safe to use multiple
// iterators concurrently, with each in a dedicated goroutine.
// It is also safe to use an iterator concurrently while writes to the
// transaction. The resultant key/value pairs are guaranteed to be consistent.
//
// Slice allows slicing the iterator to only contains keys in the given
// range. A nil Range.Start is treated as a key before all keys in the
// DB. And a nil Range.Limit is treated as a key after all keys in
// the DB.
//
// The iterator must be released after use, by calling Release method.
//
// Also read Iterator documentation of the leveldb/iterator package.
func (tr *Transaction) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
tr.lk.RLock()
defer tr.lk.RUnlock()
if tr.closed {
return iterator.NewEmptyIterator(errTransactionDone)
}
tr.mem.incref()
return tr.db.newIterator(tr.mem, tr.tables, tr.seq, slice, ro)
}
func (tr *Transaction) flush() error {
// Flush memdb.
if tr.mem.Len() != 0 {
tr.stats.startTimer()
iter := tr.mem.NewIterator(nil)
t, n, err := tr.db.s.tops.createFrom(iter)
iter.Release()
tr.stats.stopTimer()
if err != nil {
return err
}
if tr.mem.getref() == 1 {
tr.mem.Reset()
} else {
tr.mem.decref()
tr.mem = tr.db.mpoolGet(0)
tr.mem.incref()
}
tr.tables = append(tr.tables, t)
tr.rec.addTableFile(0, t)
tr.stats.write += t.size
tr.db.logf("transaction@flush created L0@%d N·%d S·%s %q:%q", t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax)
}
return nil
}
func (tr *Transaction) put(kt keyType, key, value []byte) error {
tr.ikScratch = makeInternalKey(tr.ikScratch, key, tr.seq+1, kt)
if tr.mem.Free() < len(tr.ikScratch)+len(value) {
if err := tr.flush(); err != nil {
return err
}
}
if err := tr.mem.Put(tr.ikScratch, value); err != nil {
return err
}
tr.seq++
return nil
}
// Put sets the value for the given key. It overwrites any previous value
// for that key; a DB is not a multi-map.
// Please note that the transaction is not compacted until committed, so if you
// writes 10 same keys, then those 10 same keys are in the transaction.
//
// It is safe to modify the contents of the arguments after Put returns.
func (tr *Transaction) Put(key, value []byte, wo *opt.WriteOptions) error {
tr.lk.Lock()
defer tr.lk.Unlock()
if tr.closed {
return errTransactionDone
}
return tr.put(keyTypeVal, key, value)
}
// Delete deletes the value for the given key.
// Please note that the transaction is not compacted until committed, so if you
// writes 10 same keys, then those 10 same keys are in the transaction.
//
// It is safe to modify the contents of the arguments after Delete returns.
func (tr *Transaction) Delete(key []byte, wo *opt.WriteOptions) error {
tr.lk.Lock()
defer tr.lk.Unlock()
if tr.closed {
return errTransactionDone
}
return tr.put(keyTypeDel, key, nil)
}
// Write apply the given batch to the transaction. The batch will be applied
// sequentially.
// Please note that the transaction is not compacted until committed, so if you
// writes 10 same keys, then those 10 same keys are in the transaction.
//
// It is safe to modify the contents of the arguments after Write returns.
func (tr *Transaction) Write(b *Batch, wo *opt.WriteOptions) error {
if b == nil || b.Len() == 0 {
return nil
}
tr.lk.Lock()
defer tr.lk.Unlock()
if tr.closed {
return errTransactionDone
}
return b.decodeRec(func(i int, kt keyType, key, value []byte) error {
return tr.put(kt, key, value)
})
}
func (tr *Transaction) setDone() {
tr.closed = true
tr.db.tr = nil
tr.mem.decref()
<-tr.db.writeLockC
}
// Commit commits the transaction.
//
// Other methods should not be called after transaction has been committed.
func (tr *Transaction) Commit() error {
if err := tr.db.ok(); err != nil {
return err
}
tr.lk.Lock()
defer tr.lk.Unlock()
if tr.closed {
return errTransactionDone
}
defer tr.setDone()
if err := tr.flush(); err != nil {
tr.discard()
return err
}
if len(tr.tables) != 0 {
// Committing transaction.
tr.rec.setSeqNum(tr.seq)
tr.db.compCommitLk.Lock()
defer tr.db.compCommitLk.Unlock()
for retry := 0; retry < 3; retry++ {
if err := tr.db.s.commit(&tr.rec); err != nil {
tr.db.logf("transaction@commit error R·%d %q", retry, err)
select {
case <-time.After(time.Second):
case _, _ = <-tr.db.closeC:
tr.db.logf("transaction@commit exiting")
return err
}
} else {
// Success. Set db.seq.
tr.db.setSeq(tr.seq)
break
}
}
// Trigger table auto-compaction.
tr.db.compTrigger(tr.db.tcompCmdC)
}
return nil
}
func (tr *Transaction) discard() {
// Discard transaction.
for _, t := range tr.tables {
tr.db.logf("transaction@discard @%d", t.fd.Num)
if err1 := tr.db.s.stor.Remove(t.fd); err1 == nil {
tr.db.s.reuseFileNum(t.fd.Num)
}
}
}
// Discard discards the transaction.
//
// Other methods should not be called after transaction has been discarded.
func (tr *Transaction) Discard() {
tr.lk.Lock()
if !tr.closed {
tr.discard()
tr.setDone()
}
tr.lk.Unlock()
}
// OpenTransaction opens an atomic DB transaction. Only one transaction can be
// opened at a time. Write will be blocked until the transaction is committed or
// discarded.
// The returned transaction handle is goroutine-safe.
//
// The transaction must be closed once done, either by committing or discarding
// the transaction.
// Closing the DB will discard open transaction.
func (db *DB) OpenTransaction() (*Transaction, error) {
if err := db.ok(); err != nil {
return nil, err
}
// The write happen synchronously.
select {
case db.writeLockC <- struct{}{}:
case err := <-db.compPerErrC:
return nil, err
case _, _ = <-db.closeC:
return nil, ErrClosed
}
if db.tr != nil {
panic("leveldb: has open transaction")
}
// Flush current memdb.
if db.mem != nil && db.mem.Len() != 0 {
if _, err := db.rotateMem(0, true); err != nil {
return nil, err
}
}
tr := &Transaction{
db: db,
seq: db.seq,
mem: db.mpoolGet(0),
}
tr.mem.incref()
db.tr = tr
return tr, nil
}

View File

@ -21,14 +21,16 @@ type Reader interface {
NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator
}
type Sizes []uint64
// Sizes is list of size.
type Sizes []int64
// Sum returns sum of the sizes.
func (p Sizes) Sum() (n uint64) {
for _, s := range p {
n += s
func (sizes Sizes) Sum() int64 {
var sum int64
for _, size := range sizes {
sum += size
}
return n
return sum
}
// Logging.
@ -40,59 +42,59 @@ func (db *DB) checkAndCleanFiles() error {
v := db.s.version()
defer v.release()
tablesMap := make(map[uint64]bool)
for _, tables := range v.tables {
tmap := make(map[int64]bool)
for _, tables := range v.levels {
for _, t := range tables {
tablesMap[t.file.Num()] = false
tmap[t.fd.Num] = false
}
}
files, err := db.s.getFiles(storage.TypeAll)
fds, err := db.s.stor.List(storage.TypeAll)
if err != nil {
return err
}
var nTables int
var rem []storage.File
for _, f := range files {
var nt int
var rem []storage.FileDesc
for _, fd := range fds {
keep := true
switch f.Type() {
switch fd.Type {
case storage.TypeManifest:
keep = f.Num() >= db.s.manifestFile.Num()
keep = fd.Num >= db.s.manifestFd.Num
case storage.TypeJournal:
if db.frozenJournalFile != nil {
keep = f.Num() >= db.frozenJournalFile.Num()
if !db.frozenJournalFd.Nil() {
keep = fd.Num >= db.frozenJournalFd.Num
} else {
keep = f.Num() >= db.journalFile.Num()
keep = fd.Num >= db.journalFd.Num
}
case storage.TypeTable:
_, keep = tablesMap[f.Num()]
_, keep = tmap[fd.Num]
if keep {
tablesMap[f.Num()] = true
nTables++
tmap[fd.Num] = true
nt++
}
}
if !keep {
rem = append(rem, f)
rem = append(rem, fd)
}
}
if nTables != len(tablesMap) {
var missing []*storage.FileInfo
for num, present := range tablesMap {
if nt != len(tmap) {
var mfds []storage.FileDesc
for num, present := range tmap {
if !present {
missing = append(missing, &storage.FileInfo{Type: storage.TypeTable, Num: num})
mfds = append(mfds, storage.FileDesc{storage.TypeTable, num})
db.logf("db@janitor table missing @%d", num)
}
}
return errors.NewErrCorrupted(nil, &errors.ErrMissingFiles{Files: missing})
return errors.NewErrCorrupted(storage.FileDesc{}, &errors.ErrMissingFiles{Fds: mfds})
}
db.logf("db@janitor F·%d G·%d", len(files), len(rem))
for _, f := range rem {
db.logf("db@janitor removing %s-%d", f.Type(), f.Num())
if err := f.Remove(); err != nil {
db.logf("db@janitor F·%d G·%d", len(fds), len(rem))
for _, fd := range rem {
db.logf("db@janitor removing %s-%d", fd.Type, fd.Num)
if err := db.s.stor.Remove(fd); err != nil {
return err
}
}

View File

@ -45,9 +45,9 @@ func (db *DB) jWriter() {
}
}
func (db *DB) rotateMem(n int) (mem *memDB, err error) {
func (db *DB) rotateMem(n int, wait bool) (mem *memDB, err error) {
// Wait for pending memdb compaction.
err = db.compSendIdle(db.mcompCmdC)
err = db.compTriggerWait(db.mcompCmdC)
if err != nil {
return
}
@ -59,7 +59,11 @@ func (db *DB) rotateMem(n int) (mem *memDB, err error) {
}
// Schedule memdb compaction.
db.compSendTrigger(db.mcompCmdC)
if wait {
err = db.compTriggerWait(db.mcompCmdC)
} else {
db.compTrigger(db.mcompCmdC)
}
return
}
@ -84,7 +88,7 @@ func (db *DB) flush(n int) (mdb *memDB, mdbFree int, err error) {
return false
case v.tLen(0) >= db.s.o.GetWriteL0PauseTrigger():
delayed = true
err = db.compSendIdle(db.tcompCmdC)
err = db.compTriggerWait(db.tcompCmdC)
if err != nil {
return false
}
@ -94,7 +98,7 @@ func (db *DB) flush(n int) (mdb *memDB, mdbFree int, err error) {
mdbFree = n
} else {
mdb.decref()
mdb, err = db.rotateMem(n)
mdb, err = db.rotateMem(n, false)
if err == nil {
mdbFree = mdb.Free()
} else {
@ -131,12 +135,27 @@ func (db *DB) Write(b *Batch, wo *opt.WriteOptions) (err error) {
b.init(wo.GetSync() && !db.s.o.GetNoSync())
if b.size() > db.s.o.GetWriteBuffer() && !db.s.o.GetDisableLargeBatchTransaction() {
// Writes using transaction.
tr, err1 := db.OpenTransaction()
if err1 != nil {
return err1
}
if err1 := tr.Write(b, wo); err1 != nil {
tr.Discard()
return err1
}
return tr.Commit()
}
// The write happen synchronously.
select {
case db.writeC <- b:
if <-db.writeMergedC {
return <-db.writeAckC
}
// Continue, the write lock already acquired by previous writer
// and handed out to us.
case db.writeLockC <- struct{}{}:
case err = <-db.compPerErrC:
return
@ -147,14 +166,15 @@ func (db *DB) Write(b *Batch, wo *opt.WriteOptions) (err error) {
merged := 0
danglingMerge := false
defer func() {
for i := 0; i < merged; i++ {
db.writeAckC <- err
}
if danglingMerge {
// Only one dangling merge at most, so this is safe.
db.writeMergedC <- false
} else {
<-db.writeLockC
}
for i := 0; i < merged; i++ {
db.writeAckC <- err
}
}()
mdb, mdbFree, err := db.flush(b.size())
@ -234,7 +254,7 @@ drain:
db.addSeq(uint64(b.Len()))
if b.size() >= mdbFree {
db.rotateMem(0)
db.rotateMem(0, false)
}
return
}
@ -261,8 +281,8 @@ func (db *DB) Delete(key []byte, wo *opt.WriteOptions) error {
func isMemOverlaps(icmp *iComparer, mem *memdb.DB, min, max []byte) bool {
iter := mem.NewIterator(nil)
defer iter.Release()
return (max == nil || (iter.First() && icmp.uCompare(max, iKey(iter.Key()).ukey()) >= 0)) &&
(min == nil || (iter.Last() && icmp.uCompare(min, iKey(iter.Key()).ukey()) <= 0))
return (max == nil || (iter.First() && icmp.uCompare(max, internalKey(iter.Key()).ukey()) >= 0)) &&
(min == nil || (iter.Last() && icmp.uCompare(min, internalKey(iter.Key()).ukey()) <= 0))
}
// CompactRange compacts the underlying DB for the given key range.
@ -293,12 +313,12 @@ func (db *DB) CompactRange(r util.Range) error {
defer mdb.decref()
if isMemOverlaps(db.s.icmp, mdb.DB, r.Start, r.Limit) {
// Memdb compaction.
if _, err := db.rotateMem(0); err != nil {
if _, err := db.rotateMem(0, false); err != nil {
<-db.writeLockC
return err
}
<-db.writeLockC
if err := db.compSendIdle(db.mcompCmdC); err != nil {
if err := db.compTriggerWait(db.mcompCmdC); err != nil {
return err
}
} else {
@ -306,7 +326,7 @@ func (db *DB) CompactRange(r util.Range) error {
}
// Table compaction.
return db.compSendRange(db.tcompCmdC, -1, r.Start, r.Limit)
return db.compTriggerRange(db.tcompCmdC, -1, r.Start, r.Limit)
}
// SetReadOnly makes DB read-only. It will stay read-only until reopened.

View File

@ -29,21 +29,21 @@ func New(text string) error {
// ErrCorrupted is the type that wraps errors that indicate corruption in
// the database.
type ErrCorrupted struct {
File *storage.FileInfo
Err error
Fd storage.FileDesc
Err error
}
func (e *ErrCorrupted) Error() string {
if e.File != nil {
return fmt.Sprintf("%v [file=%v]", e.Err, e.File)
if !e.Fd.Nil() {
return fmt.Sprintf("%v [file=%v]", e.Err, e.Fd)
} else {
return e.Err.Error()
}
}
// NewErrCorrupted creates new ErrCorrupted error.
func NewErrCorrupted(f storage.File, err error) error {
return &ErrCorrupted{storage.NewFileInfo(f), err}
func NewErrCorrupted(fd storage.FileDesc, err error) error {
return &ErrCorrupted{fd, err}
}
// IsCorrupted returns a boolean indicating whether the error is indicating
@ -61,17 +61,17 @@ func IsCorrupted(err error) bool {
// ErrMissingFiles is the type that indicating a corruption due to missing
// files. ErrMissingFiles always wrapped with ErrCorrupted.
type ErrMissingFiles struct {
Files []*storage.FileInfo
Fds []storage.FileDesc
}
func (e *ErrMissingFiles) Error() string { return "file missing" }
// SetFile sets 'file info' of the given error with the given file.
// SetFd sets 'file info' of the given error with the given file.
// Currently only ErrCorrupted is supported, otherwise will do nothing.
func SetFile(err error, f storage.File) error {
func SetFd(err error, fd storage.FileDesc) error {
switch x := err.(type) {
case *ErrCorrupted:
x.File = storage.NewFileInfo(f)
x.Fd = fd
return x
}
return err

View File

@ -15,7 +15,7 @@ type iFilter struct {
}
func (f iFilter) Contains(filter, key []byte) bool {
return f.Filter.Contains(filter, iKey(key).ukey())
return f.Filter.Contains(filter, internalKey(key).ukey())
}
func (f iFilter) NewGenerator() filter.FilterGenerator {
@ -27,5 +27,5 @@ type iFilterGenerator struct {
}
func (g iFilterGenerator) Add(key []byte) {
g.FilterGenerator.Add(iKey(key).ukey())
g.FilterGenerator.Add(internalKey(key).ukey())
}

View File

@ -83,6 +83,7 @@ import (
"io"
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/storage"
"github.com/syndtr/goleveldb/leveldb/util"
)
@ -165,7 +166,7 @@ func (r *Reader) corrupt(n int, reason string, skip bool) error {
r.dropper.Drop(&ErrCorrupted{n, reason})
}
if r.strict && !skip {
r.err = errors.NewErrCorrupted(nil, &ErrCorrupted{n, reason})
r.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrCorrupted{n, reason})
return r.err
}
return errSkip

View File

@ -11,28 +11,30 @@ import (
"fmt"
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/storage"
)
type ErrIkeyCorrupted struct {
// ErrInternalKeyCorrupted records internal key corruption.
type ErrInternalKeyCorrupted struct {
Ikey []byte
Reason string
}
func (e *ErrIkeyCorrupted) Error() string {
return fmt.Sprintf("leveldb: iKey %q corrupted: %s", e.Ikey, e.Reason)
func (e *ErrInternalKeyCorrupted) Error() string {
return fmt.Sprintf("leveldb: internal key %q corrupted: %s", e.Ikey, e.Reason)
}
func newErrIkeyCorrupted(ikey []byte, reason string) error {
return errors.NewErrCorrupted(nil, &ErrIkeyCorrupted{append([]byte{}, ikey...), reason})
func newErrInternalKeyCorrupted(ikey []byte, reason string) error {
return errors.NewErrCorrupted(storage.FileDesc{}, &ErrInternalKeyCorrupted{append([]byte{}, ikey...), reason})
}
type kType int
type keyType uint
func (kt kType) String() string {
func (kt keyType) String() string {
switch kt {
case ktDel:
case keyTypeDel:
return "d"
case ktVal:
case keyTypeVal:
return "v"
}
return "x"
@ -41,102 +43,105 @@ func (kt kType) String() string {
// Value types encoded as the last component of internal keys.
// Don't modify; this value are saved to disk.
const (
ktDel kType = iota
ktVal
keyTypeDel keyType = iota
keyTypeVal
)
// ktSeek defines the kType that should be passed when constructing an
// keyTypeSeek defines the keyType that should be passed when constructing an
// internal key for seeking to a particular sequence number (since we
// sort sequence numbers in decreasing order and the value type is
// embedded as the low 8 bits in the sequence number in internal keys,
// we need to use the highest-numbered ValueType, not the lowest).
const ktSeek = ktVal
const keyTypeSeek = keyTypeVal
const (
// Maximum value possible for sequence number; the 8-bits are
// used by value type, so its can packed together in single
// 64-bit integer.
kMaxSeq uint64 = (uint64(1) << 56) - 1
keyMaxSeq = (uint64(1) << 56) - 1
// Maximum value possible for packed sequence number and type.
kMaxNum uint64 = (kMaxSeq << 8) | uint64(ktSeek)
keyMaxNum = (keyMaxSeq << 8) | uint64(keyTypeSeek)
)
// Maximum number encoded in bytes.
var kMaxNumBytes = make([]byte, 8)
var keyMaxNumBytes = make([]byte, 8)
func init() {
binary.LittleEndian.PutUint64(kMaxNumBytes, kMaxNum)
binary.LittleEndian.PutUint64(keyMaxNumBytes, keyMaxNum)
}
type iKey []byte
type internalKey []byte
func newIkey(ukey []byte, seq uint64, kt kType) iKey {
if seq > kMaxSeq {
func makeInternalKey(dst, ukey []byte, seq uint64, kt keyType) internalKey {
if seq > keyMaxSeq {
panic("leveldb: invalid sequence number")
} else if kt > ktVal {
} else if kt > keyTypeVal {
panic("leveldb: invalid type")
}
ik := make(iKey, len(ukey)+8)
copy(ik, ukey)
binary.LittleEndian.PutUint64(ik[len(ukey):], (seq<<8)|uint64(kt))
return ik
if n := len(ukey) + 8; cap(dst) < n {
dst = make([]byte, n)
} else {
dst = dst[:n]
}
copy(dst, ukey)
binary.LittleEndian.PutUint64(dst[len(ukey):], (seq<<8)|uint64(kt))
return internalKey(dst)
}
func parseIkey(ik []byte) (ukey []byte, seq uint64, kt kType, err error) {
func parseInternalKey(ik []byte) (ukey []byte, seq uint64, kt keyType, err error) {
if len(ik) < 8 {
return nil, 0, 0, newErrIkeyCorrupted(ik, "invalid length")
return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid length")
}
num := binary.LittleEndian.Uint64(ik[len(ik)-8:])
seq, kt = uint64(num>>8), kType(num&0xff)
if kt > ktVal {
return nil, 0, 0, newErrIkeyCorrupted(ik, "invalid type")
seq, kt = uint64(num>>8), keyType(num&0xff)
if kt > keyTypeVal {
return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid type")
}
ukey = ik[:len(ik)-8]
return
}
func validIkey(ik []byte) bool {
_, _, _, err := parseIkey(ik)
func validInternalKey(ik []byte) bool {
_, _, _, err := parseInternalKey(ik)
return err == nil
}
func (ik iKey) assert() {
func (ik internalKey) assert() {
if ik == nil {
panic("leveldb: nil iKey")
panic("leveldb: nil internalKey")
}
if len(ik) < 8 {
panic(fmt.Sprintf("leveldb: iKey %q, len=%d: invalid length", []byte(ik), len(ik)))
panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid length", []byte(ik), len(ik)))
}
}
func (ik iKey) ukey() []byte {
func (ik internalKey) ukey() []byte {
ik.assert()
return ik[:len(ik)-8]
}
func (ik iKey) num() uint64 {
func (ik internalKey) num() uint64 {
ik.assert()
return binary.LittleEndian.Uint64(ik[len(ik)-8:])
}
func (ik iKey) parseNum() (seq uint64, kt kType) {
func (ik internalKey) parseNum() (seq uint64, kt keyType) {
num := ik.num()
seq, kt = uint64(num>>8), kType(num&0xff)
if kt > ktVal {
panic(fmt.Sprintf("leveldb: iKey %q, len=%d: invalid type %#x", []byte(ik), len(ik), kt))
seq, kt = uint64(num>>8), keyType(num&0xff)
if kt > keyTypeVal {
panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid type %#x", []byte(ik), len(ik), kt))
}
return
}
func (ik iKey) String() string {
func (ik internalKey) String() string {
if ik == nil {
return "<nil>"
}
if ukey, seq, kt, err := parseIkey(ik); err == nil {
if ukey, seq, kt, err := parseInternalKey(ik); err == nil {
return fmt.Sprintf("%s,%s%d", shorten(string(ukey)), kt, seq)
} else {
return "<invalid>"
}
return "<invalid>"
}

View File

@ -8,10 +8,11 @@
package opt
import (
"math"
"github.com/syndtr/goleveldb/leveldb/cache"
"github.com/syndtr/goleveldb/leveldb/comparer"
"github.com/syndtr/goleveldb/leveldb/filter"
"math"
)
const (
@ -35,8 +36,6 @@ var (
DefaultCompactionTotalSizeMultiplier = 10.0
DefaultCompressionType = SnappyCompression
DefaultIteratorSamplingRate = 1 * MiB
DefaultMaxMemCompationLevel = 2
DefaultNumLevel = 7
DefaultOpenFilesCacher = LRUCacher
DefaultOpenFilesCacheCapacity = 500
DefaultWriteBuffer = 4 * MiB
@ -266,6 +265,13 @@ type Options struct {
// The default value is false.
DisableCompactionBackoff bool
// DisableLargeBatchTransaction allows disabling switch-to-transaction mode
// on large batch write. If enable batch writes large than WriteBuffer will
// use transaction.
//
// The default is false.
DisableLargeBatchTransaction bool
// ErrorIfExist defines whether an error should returned if the DB already
// exist.
//
@ -301,24 +307,11 @@ type Options struct {
// The default is 1MiB.
IteratorSamplingRate int
// MaxMemCompationLevel defines maximum level a newly compacted 'memdb'
// will be pushed into if doesn't creates overlap. This should less than
// NumLevel. Use -1 for level-0.
//
// The default is 2.
MaxMemCompationLevel int
// NoSync allows completely disable fsync.
//
// The default is false.
NoSync bool
// NumLevel defines number of database level. The level shouldn't changed
// between opens, or the database will panic.
//
// The default is 7.
NumLevel int
// OpenFilesCacher provides cache algorithm for open files caching.
// Specify NoCacher to disable caching algorithm.
//
@ -440,7 +433,7 @@ func (o *Options) GetCompactionTableSize(level int) int {
if o.CompactionTableSize > 0 {
base = o.CompactionTableSize
}
if len(o.CompactionTableSizeMultiplierPerLevel) > level && o.CompactionTableSizeMultiplierPerLevel[level] > 0 {
if level < len(o.CompactionTableSizeMultiplierPerLevel) && o.CompactionTableSizeMultiplierPerLevel[level] > 0 {
mult = o.CompactionTableSizeMultiplierPerLevel[level]
} else if o.CompactionTableSizeMultiplier > 0 {
mult = math.Pow(o.CompactionTableSizeMultiplier, float64(level))
@ -461,7 +454,7 @@ func (o *Options) GetCompactionTotalSize(level int) int64 {
if o.CompactionTotalSize > 0 {
base = o.CompactionTotalSize
}
if len(o.CompactionTotalSizeMultiplierPerLevel) > level && o.CompactionTotalSizeMultiplierPerLevel[level] > 0 {
if level < len(o.CompactionTotalSizeMultiplierPerLevel) && o.CompactionTotalSizeMultiplierPerLevel[level] > 0 {
mult = o.CompactionTotalSizeMultiplierPerLevel[level]
} else if o.CompactionTotalSizeMultiplier > 0 {
mult = math.Pow(o.CompactionTotalSizeMultiplier, float64(level))
@ -508,6 +501,13 @@ func (o *Options) GetDisableCompactionBackoff() bool {
return o.DisableCompactionBackoff
}
func (o *Options) GetDisableLargeBatchTransaction() bool {
if o == nil {
return false
}
return o.DisableLargeBatchTransaction
}
func (o *Options) GetErrorIfExist() bool {
if o == nil {
return false
@ -536,21 +536,6 @@ func (o *Options) GetIteratorSamplingRate() int {
return o.IteratorSamplingRate
}
func (o *Options) GetMaxMemCompationLevel() int {
level := DefaultMaxMemCompationLevel
if o != nil {
if o.MaxMemCompationLevel > 0 {
level = o.MaxMemCompationLevel
} else if o.MaxMemCompationLevel < 0 {
level = 0
}
}
if level >= o.GetNumLevel() {
return o.GetNumLevel() - 1
}
return level
}
func (o *Options) GetNoSync() bool {
if o == nil {
return false
@ -558,13 +543,6 @@ func (o *Options) GetNoSync() bool {
return o.NoSync
}
func (o *Options) GetNumLevel() int {
if o == nil || o.NumLevel <= 0 {
return DefaultNumLevel
}
return o.NumLevel
}
func (o *Options) GetOpenFilesCacher() Cacher {
if o == nil || o.OpenFilesCacher == nil {
return DefaultOpenFilesCacher

View File

@ -43,6 +43,8 @@ func (s *session) setOptions(o *opt.Options) {
s.o.cache()
}
const optCachedLevel = 7
type cachedOptions struct {
*opt.Options
@ -54,15 +56,13 @@ type cachedOptions struct {
}
func (co *cachedOptions) cache() {
numLevel := co.Options.GetNumLevel()
co.compactionExpandLimit = make([]int, optCachedLevel)
co.compactionGPOverlaps = make([]int, optCachedLevel)
co.compactionSourceLimit = make([]int, optCachedLevel)
co.compactionTableSize = make([]int, optCachedLevel)
co.compactionTotalSize = make([]int64, optCachedLevel)
co.compactionExpandLimit = make([]int, numLevel)
co.compactionGPOverlaps = make([]int, numLevel)
co.compactionSourceLimit = make([]int, numLevel)
co.compactionTableSize = make([]int, numLevel)
co.compactionTotalSize = make([]int64, numLevel)
for level := 0; level < numLevel; level++ {
for level := 0; level < optCachedLevel; level++ {
co.compactionExpandLimit[level] = co.Options.GetCompactionExpandLimit(level)
co.compactionGPOverlaps[level] = co.Options.GetCompactionGPOverlaps(level)
co.compactionSourceLimit[level] = co.Options.GetCompactionSourceLimit(level)
@ -72,21 +72,36 @@ func (co *cachedOptions) cache() {
}
func (co *cachedOptions) GetCompactionExpandLimit(level int) int {
return co.compactionExpandLimit[level]
if level < optCachedLevel {
return co.compactionExpandLimit[level]
}
return co.Options.GetCompactionExpandLimit(level)
}
func (co *cachedOptions) GetCompactionGPOverlaps(level int) int {
return co.compactionGPOverlaps[level]
if level < optCachedLevel {
return co.compactionGPOverlaps[level]
}
return co.Options.GetCompactionGPOverlaps(level)
}
func (co *cachedOptions) GetCompactionSourceLimit(level int) int {
return co.compactionSourceLimit[level]
if level < optCachedLevel {
return co.compactionSourceLimit[level]
}
return co.Options.GetCompactionSourceLimit(level)
}
func (co *cachedOptions) GetCompactionTableSize(level int) int {
return co.compactionTableSize[level]
if level < optCachedLevel {
return co.compactionTableSize[level]
}
return co.Options.GetCompactionTableSize(level)
}
func (co *cachedOptions) GetCompactionTotalSize(level int) int64 {
return co.compactionTotalSize[level]
if level < optCachedLevel {
return co.compactionTotalSize[level]
}
return co.Options.GetCompactionTotalSize(level)
}

View File

@ -16,9 +16,9 @@ import (
"github.com/syndtr/goleveldb/leveldb/journal"
"github.com/syndtr/goleveldb/leveldb/opt"
"github.com/syndtr/goleveldb/leveldb/storage"
"github.com/syndtr/goleveldb/leveldb/util"
)
// ErrManifestCorrupted records manifest corruption.
type ErrManifestCorrupted struct {
Field string
Reason string
@ -28,31 +28,31 @@ func (e *ErrManifestCorrupted) Error() string {
return fmt.Sprintf("leveldb: manifest corrupted (field '%s'): %s", e.Field, e.Reason)
}
func newErrManifestCorrupted(f storage.File, field, reason string) error {
return errors.NewErrCorrupted(f, &ErrManifestCorrupted{field, reason})
func newErrManifestCorrupted(fd storage.FileDesc, field, reason string) error {
return errors.NewErrCorrupted(fd, &ErrManifestCorrupted{field, reason})
}
// session represent a persistent database session.
type session struct {
// Need 64-bit alignment.
stNextFileNum uint64 // current unused file number
stJournalNum uint64 // current journal file number; need external synchronization
stPrevJournalNum uint64 // prev journal file number; no longer used; for compatibility with older version of leveldb
stNextFileNum int64 // current unused file number
stJournalNum int64 // current journal file number; need external synchronization
stPrevJournalNum int64 // prev journal file number; no longer used; for compatibility with older version of leveldb
stTempFileNum int64
stSeqNum uint64 // last mem compacted seq; need external synchronization
stTempFileNum uint64
stor storage.Storage
storLock util.Releaser
storLock storage.Lock
o *cachedOptions
icmp *iComparer
tops *tOps
manifest *journal.Writer
manifestWriter storage.Writer
manifestFile storage.File
manifestFd storage.FileDesc
stCompPtrs []iKey // compaction pointers; need external synchronization
stVersion *version // current version
stCompPtrs []internalKey // compaction pointers; need external synchronization
stVersion *version // current version
vmu sync.Mutex
}
@ -66,9 +66,8 @@ func newSession(stor storage.Storage, o *opt.Options) (s *session, err error) {
return
}
s = &session{
stor: stor,
storLock: storLock,
stCompPtrs: make([]iKey, o.GetNumLevel()),
stor: stor,
storLock: storLock,
}
s.setOptions(o)
s.tops = newTableOps(s)
@ -88,7 +87,6 @@ func (s *session) close() {
}
s.manifest = nil
s.manifestWriter = nil
s.manifestFile = nil
s.stVersion = nil
}
@ -109,18 +107,18 @@ func (s *session) recover() (err error) {
if os.IsNotExist(err) {
// Don't return os.ErrNotExist if the underlying storage contains
// other files that belong to LevelDB. So the DB won't get trashed.
if files, _ := s.stor.GetFiles(storage.TypeAll); len(files) > 0 {
err = &errors.ErrCorrupted{File: &storage.FileInfo{Type: storage.TypeManifest}, Err: &errors.ErrMissingFiles{}}
if fds, _ := s.stor.List(storage.TypeAll); len(fds) > 0 {
err = &errors.ErrCorrupted{Fd: storage.FileDesc{Type: storage.TypeManifest}, Err: &errors.ErrMissingFiles{}}
}
}
}()
m, err := s.stor.GetManifest()
fd, err := s.stor.GetMeta()
if err != nil {
return
}
reader, err := m.Open()
reader, err := s.stor.Open(fd)
if err != nil {
return
}
@ -128,10 +126,9 @@ func (s *session) recover() (err error) {
var (
// Options.
numLevel = s.o.GetNumLevel()
strict = s.o.GetStrict(opt.StrictManifest)
strict = s.o.GetStrict(opt.StrictManifest)
jr = journal.NewReader(reader, dropper{s, m}, strict, true)
jr = journal.NewReader(reader, dropper{s, fd}, strict, true)
rec = &sessionRecord{}
staging = s.stVersion.newStaging()
)
@ -143,24 +140,23 @@ func (s *session) recover() (err error) {
err = nil
break
}
return errors.SetFile(err, m)
return errors.SetFd(err, fd)
}
err = rec.decode(r, numLevel)
err = rec.decode(r)
if err == nil {
// save compact pointers
for _, r := range rec.compPtrs {
s.stCompPtrs[r.level] = iKey(r.ikey)
s.setCompPtr(r.level, internalKey(r.ikey))
}
// commit record to version staging
staging.commit(rec)
} else {
err = errors.SetFile(err, m)
err = errors.SetFd(err, fd)
if strict || !errors.IsCorrupted(err) {
return
} else {
s.logf("manifest error: %v (skipped)", errors.SetFile(err, m))
}
s.logf("manifest error: %v (skipped)", errors.SetFd(err, fd))
}
rec.resetCompPtrs()
rec.resetAddedTables()
@ -169,18 +165,18 @@ func (s *session) recover() (err error) {
switch {
case !rec.has(recComparer):
return newErrManifestCorrupted(m, "comparer", "missing")
return newErrManifestCorrupted(fd, "comparer", "missing")
case rec.comparer != s.icmp.uName():
return newErrManifestCorrupted(m, "comparer", fmt.Sprintf("mismatch: want '%s', got '%s'", s.icmp.uName(), rec.comparer))
return newErrManifestCorrupted(fd, "comparer", fmt.Sprintf("mismatch: want '%s', got '%s'", s.icmp.uName(), rec.comparer))
case !rec.has(recNextFileNum):
return newErrManifestCorrupted(m, "next-file-num", "missing")
return newErrManifestCorrupted(fd, "next-file-num", "missing")
case !rec.has(recJournalNum):
return newErrManifestCorrupted(m, "journal-file-num", "missing")
return newErrManifestCorrupted(fd, "journal-file-num", "missing")
case !rec.has(recSeqNum):
return newErrManifestCorrupted(m, "seq-num", "missing")
return newErrManifestCorrupted(fd, "seq-num", "missing")
}
s.manifestFile = m
s.manifestFd = fd
s.setVersion(staging.finish())
s.setNextFileNum(rec.nextFileNum)
s.recordCommited(rec)

View File

@ -14,41 +14,46 @@ import (
"github.com/syndtr/goleveldb/leveldb/opt"
)
func (s *session) pickMemdbLevel(umin, umax []byte) int {
func (s *session) pickMemdbLevel(umin, umax []byte, maxLevel int) int {
v := s.version()
defer v.release()
return v.pickMemdbLevel(umin, umax)
return v.pickMemdbLevel(umin, umax, maxLevel)
}
func (s *session) flushMemdb(rec *sessionRecord, mdb *memdb.DB, level int) (level_ int, err error) {
func (s *session) flushMemdb(rec *sessionRecord, mdb *memdb.DB, maxLevel int) (int, error) {
// Create sorted table.
iter := mdb.NewIterator(nil)
defer iter.Release()
t, n, err := s.tops.createFrom(iter)
if err != nil {
return level, err
return 0, err
}
// Pick level and add to record.
if level < 0 {
level = s.pickMemdbLevel(t.imin.ukey(), t.imax.ukey())
}
rec.addTableFile(level, t)
// Pick level other than zero can cause compaction issue with large
// bulk insert and delete on strictly incrementing key-space. The
// problem is that the small deletion markers trapped at lower level,
// while key/value entries keep growing at higher level. Since the
// key-space is strictly incrementing it will not overlaps with
// higher level, thus maximum possible level is always picked, while
// overlapping deletion marker pushed into lower level.
// See: https://github.com/syndtr/goleveldb/issues/127.
flushLevel := s.pickMemdbLevel(t.imin.ukey(), t.imax.ukey(), maxLevel)
rec.addTableFile(flushLevel, t)
s.logf("memdb@flush created L%d@%d N·%d S·%s %q:%q", level, t.file.Num(), n, shortenb(int(t.size)), t.imin, t.imax)
return level, nil
s.logf("memdb@flush created L%d@%d N·%d S·%s %q:%q", flushLevel, t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax)
return flushLevel, nil
}
// Pick a compaction based on current state; need external synchronization.
func (s *session) pickCompaction() *compaction {
v := s.version()
var level int
var sourceLevel int
var t0 tFiles
if v.cScore >= 1 {
level = v.cLevel
cptr := s.stCompPtrs[level]
tables := v.tables[level]
sourceLevel = v.cLevel
cptr := s.getCompPtr(sourceLevel)
tables := v.levels[sourceLevel]
for _, t := range tables {
if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 {
t0 = append(t0, t)
@ -61,7 +66,7 @@ func (s *session) pickCompaction() *compaction {
} else {
if p := atomic.LoadPointer(&v.cSeek); p != nil {
ts := (*tSet)(p)
level = ts.level
sourceLevel = ts.level
t0 = append(t0, ts.table)
} else {
v.release()
@ -69,14 +74,19 @@ func (s *session) pickCompaction() *compaction {
}
}
return newCompaction(s, v, level, t0)
return newCompaction(s, v, sourceLevel, t0)
}
// Create compaction from given level and range; need external synchronization.
func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction {
func (s *session) getCompactionRange(sourceLevel int, umin, umax []byte, noLimit bool) *compaction {
v := s.version()
t0 := v.tables[level].getOverlaps(nil, s.icmp, umin, umax, level == 0)
if sourceLevel >= len(v.levels) {
v.release()
return nil
}
t0 := v.levels[sourceLevel].getOverlaps(nil, s.icmp, umin, umax, sourceLevel == 0)
if len(t0) == 0 {
v.release()
return nil
@ -86,9 +96,9 @@ func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction {
// But we cannot do this for level-0 since level-0 files can overlap
// and we must not pick one file and drop another older file if the
// two files overlap.
if level > 0 {
limit := uint64(v.s.o.GetCompactionSourceLimit(level))
total := uint64(0)
if !noLimit && sourceLevel > 0 {
limit := int64(v.s.o.GetCompactionSourceLimit(sourceLevel))
total := int64(0)
for i, t := range t0 {
total += t.size
if total >= limit {
@ -99,17 +109,17 @@ func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction {
}
}
return newCompaction(s, v, level, t0)
return newCompaction(s, v, sourceLevel, t0)
}
func newCompaction(s *session, v *version, level int, t0 tFiles) *compaction {
func newCompaction(s *session, v *version, sourceLevel int, t0 tFiles) *compaction {
c := &compaction{
s: s,
v: v,
level: level,
tables: [2]tFiles{t0, nil},
maxGPOverlaps: uint64(s.o.GetCompactionGPOverlaps(level)),
tPtrs: make([]int, s.o.GetNumLevel()),
sourceLevel: sourceLevel,
levels: [2]tFiles{t0, nil},
maxGPOverlaps: int64(s.o.GetCompactionGPOverlaps(sourceLevel)),
tPtrs: make([]int, len(v.levels)),
}
c.expand()
c.save()
@ -121,21 +131,21 @@ type compaction struct {
s *session
v *version
level int
tables [2]tFiles
maxGPOverlaps uint64
sourceLevel int
levels [2]tFiles
maxGPOverlaps int64
gp tFiles
gpi int
seenKey bool
gpOverlappedBytes uint64
imin, imax iKey
gpOverlappedBytes int64
imin, imax internalKey
tPtrs []int
released bool
snapGPI int
snapSeenKey bool
snapGPOverlappedBytes uint64
snapGPOverlappedBytes int64
snapTPtrs []int
}
@ -162,30 +172,34 @@ func (c *compaction) release() {
// Expand compacted tables; need external synchronization.
func (c *compaction) expand() {
limit := uint64(c.s.o.GetCompactionExpandLimit(c.level))
vt0, vt1 := c.v.tables[c.level], c.v.tables[c.level+1]
limit := int64(c.s.o.GetCompactionExpandLimit(c.sourceLevel))
vt0 := c.v.levels[c.sourceLevel]
vt1 := tFiles{}
if level := c.sourceLevel + 1; level < len(c.v.levels) {
vt1 = c.v.levels[level]
}
t0, t1 := c.tables[0], c.tables[1]
t0, t1 := c.levels[0], c.levels[1]
imin, imax := t0.getRange(c.s.icmp)
// We expand t0 here just incase ukey hop across tables.
t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.level == 0)
if len(t0) != len(c.tables[0]) {
t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.sourceLevel == 0)
if len(t0) != len(c.levels[0]) {
imin, imax = t0.getRange(c.s.icmp)
}
t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false)
// Get entire range covered by compaction.
amin, amax := append(t0, t1...).getRange(c.s.icmp)
// See if we can grow the number of inputs in "level" without
// changing the number of "level+1" files we pick up.
// See if we can grow the number of inputs in "sourceLevel" without
// changing the number of "sourceLevel+1" files we pick up.
if len(t1) > 0 {
exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.level == 0)
exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.sourceLevel == 0)
if len(exp0) > len(t0) && t1.size()+exp0.size() < limit {
xmin, xmax := exp0.getRange(c.s.icmp)
exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false)
if len(exp1) == len(t1) {
c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)",
c.level, c.level+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())),
c.sourceLevel, c.sourceLevel+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())),
len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size())))
imin, imax = xmin, xmax
t0, t1 = exp0, exp1
@ -195,22 +209,23 @@ func (c *compaction) expand() {
}
// Compute the set of grandparent files that overlap this compaction
// (parent == level+1; grandparent == level+2)
if c.level+2 < c.s.o.GetNumLevel() {
c.gp = c.v.tables[c.level+2].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false)
// (parent == sourceLevel+1; grandparent == sourceLevel+2)
if level := c.sourceLevel + 2; level < len(c.v.levels) {
c.gp = c.v.levels[level].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false)
}
c.tables[0], c.tables[1] = t0, t1
c.levels[0], c.levels[1] = t0, t1
c.imin, c.imax = imin, imax
}
// Check whether compaction is trivial.
func (c *compaction) trivial() bool {
return len(c.tables[0]) == 1 && len(c.tables[1]) == 0 && c.gp.size() <= c.maxGPOverlaps
return len(c.levels[0]) == 1 && len(c.levels[1]) == 0 && c.gp.size() <= c.maxGPOverlaps
}
func (c *compaction) baseLevelForKey(ukey []byte) bool {
for level, tables := range c.v.tables[c.level+2:] {
for level := c.sourceLevel + 2; level < len(c.v.levels); level++ {
tables := c.v.levels[level]
for c.tPtrs[level] < len(tables) {
t := tables[c.tPtrs[level]]
if c.s.icmp.uCompare(ukey, t.imax.ukey()) <= 0 {
@ -227,7 +242,7 @@ func (c *compaction) baseLevelForKey(ukey []byte) bool {
return true
}
func (c *compaction) shouldStopBefore(ikey iKey) bool {
func (c *compaction) shouldStopBefore(ikey internalKey) bool {
for ; c.gpi < len(c.gp); c.gpi++ {
gp := c.gp[c.gpi]
if c.s.icmp.Compare(ikey, gp.imax) <= 0 {
@ -250,10 +265,10 @@ func (c *compaction) shouldStopBefore(ikey iKey) bool {
// Creates an iterator.
func (c *compaction) newIterator() iterator.Iterator {
// Creates iterator slice.
icap := len(c.tables)
if c.level == 0 {
icap := len(c.levels)
if c.sourceLevel == 0 {
// Special case for level-0.
icap = len(c.tables[0]) + 1
icap = len(c.levels[0]) + 1
}
its := make([]iterator.Iterator, 0, icap)
@ -267,13 +282,13 @@ func (c *compaction) newIterator() iterator.Iterator {
ro.Strict |= opt.StrictReader
}
for i, tables := range c.tables {
for i, tables := range c.levels {
if len(tables) == 0 {
continue
}
// Level-0 is not sorted and may overlaps each other.
if c.level+i == 0 {
if c.sourceLevel+i == 0 {
for _, t := range tables {
its = append(its, c.s.tops.newIterator(t, nil, ro))
}

View File

@ -13,6 +13,7 @@ import (
"strings"
"github.com/syndtr/goleveldb/leveldb/errors"
"github.com/syndtr/goleveldb/leveldb/storage"
)
type byteReader interface {
@ -35,28 +36,28 @@ const (
type cpRecord struct {
level int
ikey iKey
ikey internalKey
}
type atRecord struct {
level int
num uint64
size uint64
imin iKey
imax iKey
num int64
size int64
imin internalKey
imax internalKey
}
type dtRecord struct {
level int
num uint64
num int64
}
type sessionRecord struct {
hasRec int
comparer string
journalNum uint64
prevJournalNum uint64
nextFileNum uint64
journalNum int64
prevJournalNum int64
nextFileNum int64
seqNum uint64
compPtrs []cpRecord
addedTables []atRecord
@ -75,17 +76,17 @@ func (p *sessionRecord) setComparer(name string) {
p.comparer = name
}
func (p *sessionRecord) setJournalNum(num uint64) {
func (p *sessionRecord) setJournalNum(num int64) {
p.hasRec |= 1 << recJournalNum
p.journalNum = num
}
func (p *sessionRecord) setPrevJournalNum(num uint64) {
func (p *sessionRecord) setPrevJournalNum(num int64) {
p.hasRec |= 1 << recPrevJournalNum
p.prevJournalNum = num
}
func (p *sessionRecord) setNextFileNum(num uint64) {
func (p *sessionRecord) setNextFileNum(num int64) {
p.hasRec |= 1 << recNextFileNum
p.nextFileNum = num
}
@ -95,7 +96,7 @@ func (p *sessionRecord) setSeqNum(num uint64) {
p.seqNum = num
}
func (p *sessionRecord) addCompPtr(level int, ikey iKey) {
func (p *sessionRecord) addCompPtr(level int, ikey internalKey) {
p.hasRec |= 1 << recCompPtr
p.compPtrs = append(p.compPtrs, cpRecord{level, ikey})
}
@ -105,13 +106,13 @@ func (p *sessionRecord) resetCompPtrs() {
p.compPtrs = p.compPtrs[:0]
}
func (p *sessionRecord) addTable(level int, num, size uint64, imin, imax iKey) {
func (p *sessionRecord) addTable(level int, num, size int64, imin, imax internalKey) {
p.hasRec |= 1 << recAddTable
p.addedTables = append(p.addedTables, atRecord{level, num, size, imin, imax})
}
func (p *sessionRecord) addTableFile(level int, t *tFile) {
p.addTable(level, t.file.Num(), t.size, t.imin, t.imax)
p.addTable(level, t.fd.Num, t.size, t.imin, t.imax)
}
func (p *sessionRecord) resetAddedTables() {
@ -119,7 +120,7 @@ func (p *sessionRecord) resetAddedTables() {
p.addedTables = p.addedTables[:0]
}
func (p *sessionRecord) delTable(level int, num uint64) {
func (p *sessionRecord) delTable(level int, num int64) {
p.hasRec |= 1 << recDelTable
p.deletedTables = append(p.deletedTables, dtRecord{level, num})
}
@ -137,6 +138,13 @@ func (p *sessionRecord) putUvarint(w io.Writer, x uint64) {
_, p.err = w.Write(p.scratch[:n])
}
func (p *sessionRecord) putVarint(w io.Writer, x int64) {
if x < 0 {
panic("invalid negative value")
}
p.putUvarint(w, uint64(x))
}
func (p *sessionRecord) putBytes(w io.Writer, x []byte) {
if p.err != nil {
return
@ -156,11 +164,11 @@ func (p *sessionRecord) encode(w io.Writer) error {
}
if p.has(recJournalNum) {
p.putUvarint(w, recJournalNum)
p.putUvarint(w, p.journalNum)
p.putVarint(w, p.journalNum)
}
if p.has(recNextFileNum) {
p.putUvarint(w, recNextFileNum)
p.putUvarint(w, p.nextFileNum)
p.putVarint(w, p.nextFileNum)
}
if p.has(recSeqNum) {
p.putUvarint(w, recSeqNum)
@ -174,13 +182,13 @@ func (p *sessionRecord) encode(w io.Writer) error {
for _, r := range p.deletedTables {
p.putUvarint(w, recDelTable)
p.putUvarint(w, uint64(r.level))
p.putUvarint(w, r.num)
p.putVarint(w, r.num)
}
for _, r := range p.addedTables {
p.putUvarint(w, recAddTable)
p.putUvarint(w, uint64(r.level))
p.putUvarint(w, r.num)
p.putUvarint(w, r.size)
p.putVarint(w, r.num)
p.putVarint(w, r.size)
p.putBytes(w, r.imin)
p.putBytes(w, r.imax)
}
@ -194,9 +202,9 @@ func (p *sessionRecord) readUvarintMayEOF(field string, r io.ByteReader, mayEOF
x, err := binary.ReadUvarint(r)
if err != nil {
if err == io.ErrUnexpectedEOF || (mayEOF == false && err == io.EOF) {
p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, "short read"})
p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "short read"})
} else if strings.HasPrefix(err.Error(), "binary:") {
p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, err.Error()})
p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, err.Error()})
} else {
p.err = err
}
@ -209,6 +217,14 @@ func (p *sessionRecord) readUvarint(field string, r io.ByteReader) uint64 {
return p.readUvarintMayEOF(field, r, false)
}
func (p *sessionRecord) readVarint(field string, r io.ByteReader) int64 {
x := int64(p.readUvarintMayEOF(field, r, false))
if x < 0 {
p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "invalid negative value"})
}
return x
}
func (p *sessionRecord) readBytes(field string, r byteReader) []byte {
if p.err != nil {
return nil
@ -221,14 +237,14 @@ func (p *sessionRecord) readBytes(field string, r byteReader) []byte {
_, p.err = io.ReadFull(r, x)
if p.err != nil {
if p.err == io.ErrUnexpectedEOF {
p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, "short read"})
p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "short read"})
}
return nil
}
return x
}
func (p *sessionRecord) readLevel(field string, r io.ByteReader, numLevel int) int {
func (p *sessionRecord) readLevel(field string, r io.ByteReader) int {
if p.err != nil {
return 0
}
@ -236,14 +252,10 @@ func (p *sessionRecord) readLevel(field string, r io.ByteReader, numLevel int) i
if p.err != nil {
return 0
}
if x >= uint64(numLevel) {
p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, "invalid level number"})
return 0
}
return int(x)
}
func (p *sessionRecord) decode(r io.Reader, numLevel int) error {
func (p *sessionRecord) decode(r io.Reader) error {
br, ok := r.(byteReader)
if !ok {
br = bufio.NewReader(r)
@ -264,17 +276,17 @@ func (p *sessionRecord) decode(r io.Reader, numLevel int) error {
p.setComparer(string(x))
}
case recJournalNum:
x := p.readUvarint("journal-num", br)
x := p.readVarint("journal-num", br)
if p.err == nil {
p.setJournalNum(x)
}
case recPrevJournalNum:
x := p.readUvarint("prev-journal-num", br)
x := p.readVarint("prev-journal-num", br)
if p.err == nil {
p.setPrevJournalNum(x)
}
case recNextFileNum:
x := p.readUvarint("next-file-num", br)
x := p.readVarint("next-file-num", br)
if p.err == nil {
p.setNextFileNum(x)
}
@ -284,23 +296,23 @@ func (p *sessionRecord) decode(r io.Reader, numLevel int) error {
p.setSeqNum(x)
}
case recCompPtr:
level := p.readLevel("comp-ptr.level", br, numLevel)
level := p.readLevel("comp-ptr.level", br)
ikey := p.readBytes("comp-ptr.ikey", br)
if p.err == nil {
p.addCompPtr(level, iKey(ikey))
p.addCompPtr(level, internalKey(ikey))
}
case recAddTable:
level := p.readLevel("add-table.level", br, numLevel)
num := p.readUvarint("add-table.num", br)
size := p.readUvarint("add-table.size", br)
level := p.readLevel("add-table.level", br)
num := p.readVarint("add-table.num", br)
size := p.readVarint("add-table.size", br)
imin := p.readBytes("add-table.imin", br)
imax := p.readBytes("add-table.imax", br)
if p.err == nil {
p.addTable(level, num, size, imin, imax)
}
case recDelTable:
level := p.readLevel("del-table.level", br, numLevel)
num := p.readUvarint("del-table.num", br)
level := p.readLevel("del-table.level", br)
num := p.readVarint("del-table.num", br)
if p.err == nil {
p.delTable(level, num)
}

View File

@ -17,15 +17,15 @@ import (
// Logging.
type dropper struct {
s *session
file storage.File
s *session
fd storage.FileDesc
}
func (d dropper) Drop(err error) {
if e, ok := err.(*journal.ErrCorrupted); ok {
d.s.logf("journal@drop %s-%d S·%s %q", d.file.Type(), d.file.Num(), shortenb(e.Size), e.Reason)
d.s.logf("journal@drop %s-%d S·%s %q", d.fd.Type, d.fd.Num, shortenb(e.Size), e.Reason)
} else {
d.s.logf("journal@drop %s-%d %q", d.file.Type(), d.file.Num(), err)
d.s.logf("journal@drop %s-%d %q", d.fd.Type, d.fd.Num, err)
}
}
@ -34,25 +34,9 @@ func (s *session) logf(format string, v ...interface{}) { s.stor.Log(fmt.Sprintf
// File utils.
func (s *session) getJournalFile(num uint64) storage.File {
return s.stor.GetFile(num, storage.TypeJournal)
}
func (s *session) getTableFile(num uint64) storage.File {
return s.stor.GetFile(num, storage.TypeTable)
}
func (s *session) getFiles(t storage.FileType) ([]storage.File, error) {
return s.stor.GetFiles(t)
}
func (s *session) newTemp() storage.File {
num := atomic.AddUint64(&s.stTempFileNum, 1) - 1
return s.stor.GetFile(num, storage.TypeTemp)
}
func (s *session) tableFileFromRecord(r atRecord) *tFile {
return newTableFile(s.getTableFile(r.num), r.size, r.imin, r.imax)
func (s *session) newTemp() storage.FileDesc {
num := atomic.AddInt64(&s.stTempFileNum, 1) - 1
return storage.FileDesc{storage.TypeTemp, num}
}
// Session state.
@ -80,47 +64,65 @@ func (s *session) setVersion(v *version) {
}
// Get current unused file number.
func (s *session) nextFileNum() uint64 {
return atomic.LoadUint64(&s.stNextFileNum)
func (s *session) nextFileNum() int64 {
return atomic.LoadInt64(&s.stNextFileNum)
}
// Set current unused file number to num.
func (s *session) setNextFileNum(num uint64) {
atomic.StoreUint64(&s.stNextFileNum, num)
func (s *session) setNextFileNum(num int64) {
atomic.StoreInt64(&s.stNextFileNum, num)
}
// Mark file number as used.
func (s *session) markFileNum(num uint64) {
func (s *session) markFileNum(num int64) {
nextFileNum := num + 1
for {
old, x := s.stNextFileNum, nextFileNum
if old > x {
x = old
}
if atomic.CompareAndSwapUint64(&s.stNextFileNum, old, x) {
if atomic.CompareAndSwapInt64(&s.stNextFileNum, old, x) {
break
}
}
}
// Allocate a file number.
func (s *session) allocFileNum() uint64 {
return atomic.AddUint64(&s.stNextFileNum, 1) - 1
func (s *session) allocFileNum() int64 {
return atomic.AddInt64(&s.stNextFileNum, 1) - 1
}
// Reuse given file number.
func (s *session) reuseFileNum(num uint64) {
func (s *session) reuseFileNum(num int64) {
for {
old, x := s.stNextFileNum, num
if old != x+1 {
x = old
}
if atomic.CompareAndSwapUint64(&s.stNextFileNum, old, x) {
if atomic.CompareAndSwapInt64(&s.stNextFileNum, old, x) {
break
}
}
}
// Set compaction ptr at given level; need external synchronization.
func (s *session) setCompPtr(level int, ik internalKey) {
if level >= len(s.stCompPtrs) {
newCompPtrs := make([]internalKey, level+1)
copy(newCompPtrs, s.stCompPtrs)
s.stCompPtrs = newCompPtrs
}
s.stCompPtrs[level] = append(internalKey{}, ik...)
}
// Get compaction ptr at given level; need external synchronization.
func (s *session) getCompPtr(level int) internalKey {
if level >= len(s.stCompPtrs) {
return nil
}
return s.stCompPtrs[level]
}
// Manifest related utils.
// Fill given session record obj with current states; need external
@ -149,29 +151,28 @@ func (s *session) fillRecord(r *sessionRecord, snapshot bool) {
// Mark if record has been committed, this will update session state;
// need external synchronization.
func (s *session) recordCommited(r *sessionRecord) {
if r.has(recJournalNum) {
s.stJournalNum = r.journalNum
func (s *session) recordCommited(rec *sessionRecord) {
if rec.has(recJournalNum) {
s.stJournalNum = rec.journalNum
}
if r.has(recPrevJournalNum) {
s.stPrevJournalNum = r.prevJournalNum
if rec.has(recPrevJournalNum) {
s.stPrevJournalNum = rec.prevJournalNum
}
if r.has(recSeqNum) {
s.stSeqNum = r.seqNum
if rec.has(recSeqNum) {
s.stSeqNum = rec.seqNum
}
for _, p := range r.compPtrs {
s.stCompPtrs[p.level] = iKey(p.ikey)
for _, r := range rec.compPtrs {
s.setCompPtr(r.level, internalKey(r.ikey))
}
}
// Create a new manifest file; need external synchronization.
func (s *session) newManifest(rec *sessionRecord, v *version) (err error) {
num := s.allocFileNum()
file := s.stor.GetFile(num, storage.TypeManifest)
writer, err := file.Create()
fd := storage.FileDesc{storage.TypeManifest, s.allocFileNum()}
writer, err := s.stor.Create(fd)
if err != nil {
return
}
@ -196,16 +197,16 @@ func (s *session) newManifest(rec *sessionRecord, v *version) (err error) {
if s.manifestWriter != nil {
s.manifestWriter.Close()
}
if s.manifestFile != nil {
s.manifestFile.Remove()
if !s.manifestFd.Nil() {
s.stor.Remove(s.manifestFd)
}
s.manifestFile = file
s.manifestFd = fd
s.manifestWriter = writer
s.manifest = jw
} else {
writer.Close()
file.Remove()
s.reuseFileNum(num)
s.stor.Remove(fd)
s.reuseFileNum(fd.Num)
}
}()
@ -221,7 +222,7 @@ func (s *session) newManifest(rec *sessionRecord, v *version) (err error) {
if err != nil {
return
}
err = s.stor.SetManifest(file)
err = s.stor.SetMeta(fd)
return
}

View File

@ -17,11 +17,12 @@ import (
"strings"
"sync"
"time"
"github.com/syndtr/goleveldb/leveldb/util"
)
var errFileOpen = errors.New("leveldb/storage: file still open")
var (
errFileOpen = errors.New("leveldb/storage: file still open")
errReadOnly = errors.New("leveldb/storage: storage is read-only")
)
type fileLock interface {
release() error
@ -32,40 +33,52 @@ type fileStorageLock struct {
}
func (lock *fileStorageLock) Release() {
fs := lock.fs
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.slock == lock {
fs.slock = nil
if lock.fs != nil {
lock.fs.mu.Lock()
defer lock.fs.mu.Unlock()
if lock.fs.slock == lock {
lock.fs.slock = nil
}
}
return
}
const logSizeThreshold = 1024 * 1024 // 1 MiB
// fileStorage is a file-system backed storage.
type fileStorage struct {
path string
path string
readOnly bool
mu sync.Mutex
flock fileLock
slock *fileStorageLock
logw *os.File
buf []byte
mu sync.Mutex
flock fileLock
slock *fileStorageLock
logw *os.File
logSize int64
buf []byte
// Opened file counter; if open < 0 means closed.
open int
day int
}
// OpenFile returns a new filesytem-backed storage implementation with the given
// path. This also hold a file lock, so any subsequent attempt to open the same
// path will fail.
// path. This also acquire a file lock, so any subsequent attempt to open the
// same path will fail.
//
// The storage must be closed after use, by calling Close method.
func OpenFile(path string) (Storage, error) {
if err := os.MkdirAll(path, 0755); err != nil {
func OpenFile(path string, readOnly bool) (Storage, error) {
if fi, err := os.Stat(path); err == nil {
if !fi.IsDir() {
return nil, fmt.Errorf("leveldb/storage: open %s: not a directory", path)
}
} else if os.IsNotExist(err) && !readOnly {
if err := os.MkdirAll(path, 0755); err != nil {
return nil, err
}
} else {
return nil, err
}
flock, err := newFileLock(filepath.Join(path, "LOCK"))
flock, err := newFileLock(filepath.Join(path, "LOCK"), readOnly)
if err != nil {
return nil, err
}
@ -76,23 +89,42 @@ func OpenFile(path string) (Storage, error) {
}
}()
rename(filepath.Join(path, "LOG"), filepath.Join(path, "LOG.old"))
logw, err := os.OpenFile(filepath.Join(path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
return nil, err
var (
logw *os.File
logSize int64
)
if !readOnly {
logw, err = os.OpenFile(filepath.Join(path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
return nil, err
}
logSize, err = logw.Seek(0, os.SEEK_END)
if err != nil {
logw.Close()
return nil, err
}
}
fs := &fileStorage{path: path, flock: flock, logw: logw}
fs := &fileStorage{
path: path,
readOnly: readOnly,
flock: flock,
logw: logw,
logSize: logSize,
}
runtime.SetFinalizer(fs, (*fileStorage).Close)
return fs, nil
}
func (fs *fileStorage) Lock() (util.Releaser, error) {
func (fs *fileStorage) Lock() (Lock, error) {
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return nil, ErrClosed
}
if fs.readOnly {
return &fileStorageLock{}, nil
}
if fs.slock != nil {
return nil, ErrLocked
}
@ -101,7 +133,7 @@ func (fs *fileStorage) Lock() (util.Releaser, error) {
}
func itoa(buf []byte, i int, wid int) []byte {
var u uint = uint(i)
u := uint(i)
if u == 0 && wid <= 1 {
return append(buf, '0')
}
@ -126,6 +158,22 @@ func (fs *fileStorage) printDay(t time.Time) {
}
func (fs *fileStorage) doLog(t time.Time, str string) {
if fs.logSize > logSizeThreshold {
// Rotate log file.
fs.logw.Close()
fs.logw = nil
fs.logSize = 0
rename(filepath.Join(fs.path, "LOG"), filepath.Join(fs.path, "LOG.old"))
}
if fs.logw == nil {
var err error
fs.logw, err = os.OpenFile(filepath.Join(fs.path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
return
}
// Force printDay on new log file.
fs.day = 0
}
fs.printDay(t)
hour, min, sec := t.Clock()
msec := t.Nanosecond() / 1e3
@ -145,65 +193,71 @@ func (fs *fileStorage) doLog(t time.Time, str string) {
}
func (fs *fileStorage) Log(str string) {
t := time.Now()
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return
if !fs.readOnly {
t := time.Now()
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return
}
fs.doLog(t, str)
}
fs.doLog(t, str)
}
func (fs *fileStorage) log(str string) {
fs.doLog(time.Now(), str)
if !fs.readOnly {
fs.doLog(time.Now(), str)
}
}
func (fs *fileStorage) GetFile(num uint64, t FileType) File {
return &file{fs: fs, num: num, t: t}
}
func (fs *fileStorage) SetMeta(fd FileDesc) (err error) {
if !FileDescOk(fd) {
return ErrInvalidFile
}
if fs.readOnly {
return errReadOnly
}
func (fs *fileStorage) GetFiles(t FileType) (ff []File, err error) {
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return nil, ErrClosed
return ErrClosed
}
dir, err := os.Open(fs.path)
if err != nil {
return
}
fnn, err := dir.Readdirnames(0)
// Close the dir first before checking for Readdirnames error.
if err := dir.Close(); err != nil {
fs.log(fmt.Sprintf("close dir: %v", err))
}
if err != nil {
return
}
f := &file{fs: fs}
for _, fn := range fnn {
if f.parse(fn) && (f.t&t) != 0 {
ff = append(ff, f)
f = &file{fs: fs}
defer func() {
if err != nil {
fs.log(fmt.Sprintf("CURRENT: %v", err))
}
}()
path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), fd.Num)
w, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
return
}
return
_, err = fmt.Fprintln(w, fsGenName(fd))
// Close the file first.
if cerr := w.Close(); cerr != nil {
fs.log(fmt.Sprintf("close CURRENT.%d: %v", fd.Num, cerr))
}
if err != nil {
return
}
return rename(path, filepath.Join(fs.path, "CURRENT"))
}
func (fs *fileStorage) GetManifest() (f File, err error) {
func (fs *fileStorage) GetMeta() (fd FileDesc, err error) {
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return nil, ErrClosed
return FileDesc{}, ErrClosed
}
dir, err := os.Open(fs.path)
if err != nil {
return
}
fnn, err := dir.Readdirnames(0)
names, err := dir.Readdirnames(0)
// Close the dir first before checking for Readdirnames error.
if err := dir.Close(); err != nil {
fs.log(fmt.Sprintf("close dir: %v", err))
if ce := dir.Close(); ce != nil {
fs.log(fmt.Sprintf("close dir: %v", ce))
}
if err != nil {
return
@ -212,58 +266,64 @@ func (fs *fileStorage) GetManifest() (f File, err error) {
var rem []string
var pend bool
var cerr error
for _, fn := range fnn {
if strings.HasPrefix(fn, "CURRENT") {
pend1 := len(fn) > 7
for _, name := range names {
if strings.HasPrefix(name, "CURRENT") {
pend1 := len(name) > 7
var pendNum int64
// Make sure it is valid name for a CURRENT file, otherwise skip it.
if pend1 {
if fn[7] != '.' || len(fn) < 9 {
fs.log(fmt.Sprintf("skipping %s: invalid file name", fn))
if name[7] != '.' || len(name) < 9 {
fs.log(fmt.Sprintf("skipping %s: invalid file name", name))
continue
}
if _, e1 := strconv.ParseUint(fn[8:], 10, 0); e1 != nil {
fs.log(fmt.Sprintf("skipping %s: invalid file num: %v", fn, e1))
var e1 error
if pendNum, e1 = strconv.ParseInt(name[8:], 10, 0); e1 != nil {
fs.log(fmt.Sprintf("skipping %s: invalid file num: %v", name, e1))
continue
}
}
path := filepath.Join(fs.path, fn)
path := filepath.Join(fs.path, name)
r, e1 := os.OpenFile(path, os.O_RDONLY, 0)
if e1 != nil {
return nil, e1
return FileDesc{}, e1
}
b, e1 := ioutil.ReadAll(r)
if e1 != nil {
r.Close()
return nil, e1
return FileDesc{}, e1
}
f1 := &file{fs: fs}
if len(b) < 1 || b[len(b)-1] != '\n' || !f1.parse(string(b[:len(b)-1])) {
fs.log(fmt.Sprintf("skipping %s: corrupted or incomplete", fn))
var fd1 FileDesc
if len(b) < 1 || b[len(b)-1] != '\n' || !fsParseNamePtr(string(b[:len(b)-1]), &fd1) {
fs.log(fmt.Sprintf("skipping %s: corrupted or incomplete", name))
if pend1 {
rem = append(rem, fn)
rem = append(rem, name)
}
if !pend1 || cerr == nil {
metaFd, _ := fsParseName(name)
cerr = &ErrCorrupted{
File: fsParseName(filepath.Base(fn)),
Err: errors.New("leveldb/storage: corrupted or incomplete manifest file"),
Fd: metaFd,
Err: errors.New("leveldb/storage: corrupted or incomplete meta file"),
}
}
} else if f != nil && f1.Num() < f.Num() {
fs.log(fmt.Sprintf("skipping %s: obsolete", fn))
} else if pend1 && pendNum != fd1.Num {
fs.log(fmt.Sprintf("skipping %s: inconsistent pending-file num: %d vs %d", name, pendNum, fd1.Num))
rem = append(rem, name)
} else if fd1.Num < fd.Num {
fs.log(fmt.Sprintf("skipping %s: obsolete", name))
if pend1 {
rem = append(rem, fn)
rem = append(rem, name)
}
} else {
f = f1
fd = fd1
pend = pend1
}
if err := r.Close(); err != nil {
fs.log(fmt.Sprintf("close %s: %v", fn, err))
fs.log(fmt.Sprintf("close %s: %v", name, err))
}
}
}
// Don't remove any files if there is no valid CURRENT file.
if f == nil {
if fd.Nil() {
if cerr != nil {
err = cerr
} else {
@ -271,52 +331,140 @@ func (fs *fileStorage) GetManifest() (f File, err error) {
}
return
}
// Rename pending CURRENT file to an effective CURRENT.
if pend {
path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), f.Num())
if err := rename(path, filepath.Join(fs.path, "CURRENT")); err != nil {
fs.log(fmt.Sprintf("CURRENT.%d -> CURRENT: %v", f.Num(), err))
if !fs.readOnly {
// Rename pending CURRENT file to an effective CURRENT.
if pend {
path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), fd.Num)
if err := rename(path, filepath.Join(fs.path, "CURRENT")); err != nil {
fs.log(fmt.Sprintf("CURRENT.%d -> CURRENT: %v", fd.Num, err))
}
}
}
// Remove obsolete or incomplete pending CURRENT files.
for _, fn := range rem {
path := filepath.Join(fs.path, fn)
if err := os.Remove(path); err != nil {
fs.log(fmt.Sprintf("remove %s: %v", fn, err))
// Remove obsolete or incomplete pending CURRENT files.
for _, name := range rem {
path := filepath.Join(fs.path, name)
if err := os.Remove(path); err != nil {
fs.log(fmt.Sprintf("remove %s: %v", name, err))
}
}
}
return
}
func (fs *fileStorage) SetManifest(f File) (err error) {
func (fs *fileStorage) List(ft FileType) (fds []FileDesc, err error) {
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return nil, ErrClosed
}
dir, err := os.Open(fs.path)
if err != nil {
return
}
names, err := dir.Readdirnames(0)
// Close the dir first before checking for Readdirnames error.
if cerr := dir.Close(); cerr != nil {
fs.log(fmt.Sprintf("close dir: %v", cerr))
}
if err == nil {
for _, name := range names {
if fd, ok := fsParseName(name); ok && fd.Type&ft != 0 {
fds = append(fds, fd)
}
}
}
return
}
func (fs *fileStorage) Open(fd FileDesc) (Reader, error) {
if !FileDescOk(fd) {
return nil, ErrInvalidFile
}
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return nil, ErrClosed
}
of, err := os.OpenFile(filepath.Join(fs.path, fsGenName(fd)), os.O_RDONLY, 0)
if err != nil {
if fsHasOldName(fd) && os.IsNotExist(err) {
of, err = os.OpenFile(filepath.Join(fs.path, fsGenOldName(fd)), os.O_RDONLY, 0)
if err == nil {
goto ok
}
}
return nil, err
}
ok:
fs.open++
return &fileWrap{File: of, fs: fs, fd: fd}, nil
}
func (fs *fileStorage) Create(fd FileDesc) (Writer, error) {
if !FileDescOk(fd) {
return nil, ErrInvalidFile
}
if fs.readOnly {
return nil, errReadOnly
}
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return nil, ErrClosed
}
of, err := os.OpenFile(filepath.Join(fs.path, fsGenName(fd)), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
return nil, err
}
fs.open++
return &fileWrap{File: of, fs: fs, fd: fd}, nil
}
func (fs *fileStorage) Remove(fd FileDesc) error {
if !FileDescOk(fd) {
return ErrInvalidFile
}
if fs.readOnly {
return errReadOnly
}
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return ErrClosed
}
f2, ok := f.(*file)
if !ok || f2.t != TypeManifest {
err := os.Remove(filepath.Join(fs.path, fsGenName(fd)))
if err != nil {
if fsHasOldName(fd) && os.IsNotExist(err) {
if e1 := os.Remove(filepath.Join(fs.path, fsGenOldName(fd))); !os.IsNotExist(e1) {
fs.log(fmt.Sprintf("remove %s: %v (old name)", fd, err))
err = e1
}
} else {
fs.log(fmt.Sprintf("remove %s: %v", fd, err))
}
}
return err
}
func (fs *fileStorage) Rename(oldfd, newfd FileDesc) error {
if !FileDescOk(oldfd) || !FileDescOk(newfd) {
return ErrInvalidFile
}
defer func() {
if err != nil {
fs.log(fmt.Sprintf("CURRENT: %v", err))
}
}()
path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), f2.Num())
w, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
return err
if oldfd == newfd {
return nil
}
_, err = fmt.Fprintln(w, f2.name())
// Close the file first.
if err := w.Close(); err != nil {
fs.log(fmt.Sprintf("close CURRENT.%d: %v", f2.num, err))
if fs.readOnly {
return errReadOnly
}
if err != nil {
return err
fs.mu.Lock()
defer fs.mu.Unlock()
if fs.open < 0 {
return ErrClosed
}
return rename(path, filepath.Join(fs.path, "CURRENT"))
return rename(filepath.Join(fs.path, fsGenName(oldfd)), filepath.Join(fs.path, fsGenName(newfd)))
}
func (fs *fileStorage) Close() error {
@ -332,212 +480,104 @@ func (fs *fileStorage) Close() error {
fs.log(fmt.Sprintf("close: warning, %d files still open", fs.open))
}
fs.open = -1
e1 := fs.logw.Close()
err := fs.flock.release()
if err == nil {
err = e1
if fs.logw != nil {
fs.logw.Close()
}
return err
return fs.flock.release()
}
type fileWrap struct {
*os.File
f *file
fs *fileStorage
fd FileDesc
closed bool
}
func (fw fileWrap) Sync() error {
func (fw *fileWrap) Sync() error {
if err := fw.File.Sync(); err != nil {
return err
}
if fw.f.Type() == TypeManifest {
if fw.fd.Type == TypeManifest {
// Also sync parent directory if file type is manifest.
// See: https://code.google.com/p/leveldb/issues/detail?id=190.
if err := syncDir(fw.f.fs.path); err != nil {
if err := syncDir(fw.fs.path); err != nil {
fw.fs.log(fmt.Sprintf("syncDir: %v", err))
return err
}
}
return nil
}
func (fw fileWrap) Close() error {
f := fw.f
f.fs.mu.Lock()
defer f.fs.mu.Unlock()
if !f.open {
func (fw *fileWrap) Close() error {
fw.fs.mu.Lock()
defer fw.fs.mu.Unlock()
if fw.closed {
return ErrClosed
}
f.open = false
f.fs.open--
fw.closed = true
fw.fs.open--
err := fw.File.Close()
if err != nil {
f.fs.log(fmt.Sprintf("close %s.%d: %v", f.Type(), f.Num(), err))
fw.fs.log(fmt.Sprintf("close %s: %v", fw.fd, err))
}
return err
}
type file struct {
fs *fileStorage
num uint64
t FileType
open bool
}
func (f *file) Open() (Reader, error) {
f.fs.mu.Lock()
defer f.fs.mu.Unlock()
if f.fs.open < 0 {
return nil, ErrClosed
}
if f.open {
return nil, errFileOpen
}
of, err := os.OpenFile(f.path(), os.O_RDONLY, 0)
if err != nil {
if f.hasOldName() && os.IsNotExist(err) {
of, err = os.OpenFile(f.oldPath(), os.O_RDONLY, 0)
if err == nil {
goto ok
}
}
return nil, err
}
ok:
f.open = true
f.fs.open++
return fileWrap{of, f}, nil
}
func (f *file) Create() (Writer, error) {
f.fs.mu.Lock()
defer f.fs.mu.Unlock()
if f.fs.open < 0 {
return nil, ErrClosed
}
if f.open {
return nil, errFileOpen
}
of, err := os.OpenFile(f.path(), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
return nil, err
}
f.open = true
f.fs.open++
return fileWrap{of, f}, nil
}
func (f *file) Replace(newfile File) error {
f.fs.mu.Lock()
defer f.fs.mu.Unlock()
if f.fs.open < 0 {
return ErrClosed
}
newfile2, ok := newfile.(*file)
if !ok {
return ErrInvalidFile
}
if f.open || newfile2.open {
return errFileOpen
}
return rename(newfile2.path(), f.path())
}
func (f *file) Type() FileType {
return f.t
}
func (f *file) Num() uint64 {
return f.num
}
func (f *file) Remove() error {
f.fs.mu.Lock()
defer f.fs.mu.Unlock()
if f.fs.open < 0 {
return ErrClosed
}
if f.open {
return errFileOpen
}
err := os.Remove(f.path())
if err != nil {
f.fs.log(fmt.Sprintf("remove %s.%d: %v", f.Type(), f.Num(), err))
}
// Also try remove file with old name, just in case.
if f.hasOldName() {
if e1 := os.Remove(f.oldPath()); !os.IsNotExist(e1) {
f.fs.log(fmt.Sprintf("remove %s.%d: %v (old name)", f.Type(), f.Num(), err))
err = e1
}
}
return err
}
func (f *file) hasOldName() bool {
return f.t == TypeTable
}
func (f *file) oldName() string {
switch f.t {
case TypeTable:
return fmt.Sprintf("%06d.sst", f.num)
}
return f.name()
}
func (f *file) oldPath() string {
return filepath.Join(f.fs.path, f.oldName())
}
func (f *file) name() string {
switch f.t {
func fsGenName(fd FileDesc) string {
switch fd.Type {
case TypeManifest:
return fmt.Sprintf("MANIFEST-%06d", f.num)
return fmt.Sprintf("MANIFEST-%06d", fd.Num)
case TypeJournal:
return fmt.Sprintf("%06d.log", f.num)
return fmt.Sprintf("%06d.log", fd.Num)
case TypeTable:
return fmt.Sprintf("%06d.ldb", f.num)
return fmt.Sprintf("%06d.ldb", fd.Num)
case TypeTemp:
return fmt.Sprintf("%06d.tmp", f.num)
return fmt.Sprintf("%06d.tmp", fd.Num)
default:
panic("invalid file type")
}
}
func (f *file) path() string {
return filepath.Join(f.fs.path, f.name())
func fsHasOldName(fd FileDesc) bool {
return fd.Type == TypeTable
}
func fsParseName(name string) *FileInfo {
fi := &FileInfo{}
func fsGenOldName(fd FileDesc) string {
switch fd.Type {
case TypeTable:
return fmt.Sprintf("%06d.sst", fd.Num)
}
return fsGenName(fd)
}
func fsParseName(name string) (fd FileDesc, ok bool) {
var tail string
_, err := fmt.Sscanf(name, "%d.%s", &fi.Num, &tail)
_, err := fmt.Sscanf(name, "%d.%s", &fd.Num, &tail)
if err == nil {
switch tail {
case "log":
fi.Type = TypeJournal
fd.Type = TypeJournal
case "ldb", "sst":
fi.Type = TypeTable
fd.Type = TypeTable
case "tmp":
fi.Type = TypeTemp
fd.Type = TypeTemp
default:
return nil
return
}
return fi
return fd, true
}
n, _ := fmt.Sscanf(name, "MANIFEST-%d%s", &fi.Num, &tail)
n, _ := fmt.Sscanf(name, "MANIFEST-%d%s", &fd.Num, &tail)
if n == 1 {
fi.Type = TypeManifest
return fi
fd.Type = TypeManifest
return fd, true
}
return nil
return
}
func (f *file) parse(name string) bool {
fi := fsParseName(name)
if fi == nil {
return false
func fsParseNamePtr(name string, fd *FileDesc) bool {
_fd, ok := fsParseName(name)
if fd != nil {
*fd = _fd
}
f.t = fi.Type
f.num = fi.Num
return true
return ok
}

View File

@ -0,0 +1,34 @@
// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// +build nacl
package storage
import (
"os"
"syscall"
)
func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
return nil, syscall.ENOTSUP
}
func setFileLock(f *os.File, readOnly, lock bool) error {
return syscall.ENOTSUP
}
func rename(oldpath, newpath string) error {
return syscall.ENOTSUP
}
func isErrInvalid(err error) bool {
return false
}
func syncDir(name string) error {
return syscall.ENOTSUP
}

View File

@ -19,8 +19,21 @@ func (fl *plan9FileLock) release() error {
return fl.f.Close()
}
func newFileLock(path string) (fl fileLock, err error) {
f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, os.ModeExclusive|0644)
func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
var (
flag int
perm os.FileMode
)
if readOnly {
flag = os.O_RDONLY
} else {
flag = os.O_RDWR
perm = os.ModeExclusive
}
f, err := os.OpenFile(path, flag, perm)
if os.IsNotExist(err) {
f, err = os.OpenFile(path, flag|os.O_CREATE, perm|0644)
}
if err != nil {
return
}

View File

@ -18,18 +18,27 @@ type unixFileLock struct {
}
func (fl *unixFileLock) release() error {
if err := setFileLock(fl.f, false); err != nil {
if err := setFileLock(fl.f, false, false); err != nil {
return err
}
return fl.f.Close()
}
func newFileLock(path string) (fl fileLock, err error) {
f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0644)
func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
var flag int
if readOnly {
flag = os.O_RDONLY
} else {
flag = os.O_RDWR
}
f, err := os.OpenFile(path, flag, 0)
if os.IsNotExist(err) {
f, err = os.OpenFile(path, flag|os.O_CREATE, 0644)
}
if err != nil {
return
}
err = setFileLock(f, true)
err = setFileLock(f, readOnly, true)
if err != nil {
f.Close()
return
@ -38,7 +47,7 @@ func newFileLock(path string) (fl fileLock, err error) {
return
}
func setFileLock(f *os.File, lock bool) error {
func setFileLock(f *os.File, readOnly, lock bool) error {
flock := syscall.Flock_t{
Type: syscall.F_UNLCK,
Start: 0,
@ -46,7 +55,11 @@ func setFileLock(f *os.File, lock bool) error {
Whence: 1,
}
if lock {
flock.Type = syscall.F_WRLCK
if readOnly {
flock.Type = syscall.F_RDLCK
} else {
flock.Type = syscall.F_WRLCK
}
}
return syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &flock)
}

View File

@ -18,18 +18,27 @@ type unixFileLock struct {
}
func (fl *unixFileLock) release() error {
if err := setFileLock(fl.f, false); err != nil {
if err := setFileLock(fl.f, false, false); err != nil {
return err
}
return fl.f.Close()
}
func newFileLock(path string) (fl fileLock, err error) {
f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0644)
func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
var flag int
if readOnly {
flag = os.O_RDONLY
} else {
flag = os.O_RDWR
}
f, err := os.OpenFile(path, flag, 0)
if os.IsNotExist(err) {
f, err = os.OpenFile(path, flag|os.O_CREATE, 0644)
}
if err != nil {
return
}
err = setFileLock(f, true)
err = setFileLock(f, readOnly, true)
if err != nil {
f.Close()
return
@ -38,10 +47,14 @@ func newFileLock(path string) (fl fileLock, err error) {
return
}
func setFileLock(f *os.File, lock bool) error {
func setFileLock(f *os.File, readOnly, lock bool) error {
how := syscall.LOCK_UN
if lock {
how = syscall.LOCK_EX
if readOnly {
how = syscall.LOCK_SH
} else {
how = syscall.LOCK_EX
}
}
return syscall.Flock(int(f.Fd()), how|syscall.LOCK_NB)
}

View File

@ -29,12 +29,22 @@ func (fl *windowsFileLock) release() error {
return syscall.Close(fl.fd)
}
func newFileLock(path string) (fl fileLock, err error) {
func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
pathp, err := syscall.UTF16PtrFromString(path)
if err != nil {
return
}
fd, err := syscall.CreateFile(pathp, syscall.GENERIC_READ|syscall.GENERIC_WRITE, 0, nil, syscall.CREATE_ALWAYS, syscall.FILE_ATTRIBUTE_NORMAL, 0)
var access, shareMode uint32
if readOnly {
access = syscall.GENERIC_READ
shareMode = syscall.FILE_SHARE_READ
} else {
access = syscall.GENERIC_READ | syscall.GENERIC_WRITE
}
fd, err := syscall.CreateFile(pathp, access, shareMode, nil, syscall.OPEN_EXISTING, syscall.FILE_ATTRIBUTE_NORMAL, 0)
if err == syscall.ERROR_FILE_NOT_FOUND {
fd, err = syscall.CreateFile(pathp, access, shareMode, nil, syscall.OPEN_ALWAYS, syscall.FILE_ATTRIBUTE_NORMAL, 0)
}
if err != nil {
return
}
@ -47,9 +57,8 @@ func moveFileEx(from *uint16, to *uint16, flags uint32) error {
if r1 == 0 {
if e1 != 0 {
return error(e1)
} else {
return syscall.EINVAL
}
return syscall.EINVAL
}
return nil
}

View File

@ -10,8 +10,6 @@ import (
"bytes"
"os"
"sync"
"github.com/syndtr/goleveldb/leveldb/util"
)
const typeShift = 3
@ -32,10 +30,10 @@ func (lock *memStorageLock) Release() {
// memStorage is a memory-backed storage.
type memStorage struct {
mu sync.Mutex
slock *memStorageLock
files map[uint64]*memFile
manifest *memFilePtr
mu sync.Mutex
slock *memStorageLock
files map[uint64]*memFile
meta FileDesc
}
// NewMemStorage returns a new memory-backed storage implementation.
@ -45,7 +43,7 @@ func NewMemStorage() Storage {
}
}
func (ms *memStorage) Lock() (util.Releaser, error) {
func (ms *memStorage) Lock() (Lock, error) {
ms.mu.Lock()
defer ms.mu.Unlock()
if ms.slock != nil {
@ -57,147 +55,164 @@ func (ms *memStorage) Lock() (util.Releaser, error) {
func (*memStorage) Log(str string) {}
func (ms *memStorage) GetFile(num uint64, t FileType) File {
return &memFilePtr{ms: ms, num: num, t: t}
}
func (ms *memStorage) GetFiles(t FileType) ([]File, error) {
ms.mu.Lock()
var ff []File
for x, _ := range ms.files {
num, mt := x>>typeShift, FileType(x)&TypeAll
if mt&t == 0 {
continue
}
ff = append(ff, &memFilePtr{ms: ms, num: num, t: mt})
}
ms.mu.Unlock()
return ff, nil
}
func (ms *memStorage) GetManifest() (File, error) {
ms.mu.Lock()
defer ms.mu.Unlock()
if ms.manifest == nil {
return nil, os.ErrNotExist
}
return ms.manifest, nil
}
func (ms *memStorage) SetManifest(f File) error {
fm, ok := f.(*memFilePtr)
if !ok || fm.t != TypeManifest {
func (ms *memStorage) SetMeta(fd FileDesc) error {
if !FileDescOk(fd) {
return ErrInvalidFile
}
ms.mu.Lock()
ms.manifest = fm
ms.meta = fd
ms.mu.Unlock()
return nil
}
func (*memStorage) Close() error { return nil }
type memReader struct {
*bytes.Reader
m *memFile
}
func (mr *memReader) Close() error {
return mr.m.Close()
}
type memFile struct {
bytes.Buffer
ms *memStorage
open bool
}
func (*memFile) Sync() error { return nil }
func (m *memFile) Close() error {
m.ms.mu.Lock()
m.open = false
m.ms.mu.Unlock()
return nil
}
type memFilePtr struct {
ms *memStorage
num uint64
t FileType
}
func (p *memFilePtr) x() uint64 {
return p.Num()<<typeShift | uint64(p.Type())
}
func (p *memFilePtr) Open() (Reader, error) {
ms := p.ms
func (ms *memStorage) GetMeta() (FileDesc, error) {
ms.mu.Lock()
defer ms.mu.Unlock()
if m, exist := ms.files[p.x()]; exist {
if ms.meta.Nil() {
return FileDesc{}, os.ErrNotExist
}
return ms.meta, nil
}
func (ms *memStorage) List(ft FileType) ([]FileDesc, error) {
ms.mu.Lock()
var fds []FileDesc
for x, _ := range ms.files {
fd := unpackFile(x)
if fd.Type&ft != 0 {
fds = append(fds, fd)
}
}
ms.mu.Unlock()
return fds, nil
}
func (ms *memStorage) Open(fd FileDesc) (Reader, error) {
if !FileDescOk(fd) {
return nil, ErrInvalidFile
}
ms.mu.Lock()
defer ms.mu.Unlock()
if m, exist := ms.files[packFile(fd)]; exist {
if m.open {
return nil, errFileOpen
}
m.open = true
return &memReader{Reader: bytes.NewReader(m.Bytes()), m: m}, nil
return &memReader{Reader: bytes.NewReader(m.Bytes()), ms: ms, m: m}, nil
}
return nil, os.ErrNotExist
}
func (p *memFilePtr) Create() (Writer, error) {
ms := p.ms
func (ms *memStorage) Create(fd FileDesc) (Writer, error) {
if !FileDescOk(fd) {
return nil, ErrInvalidFile
}
x := packFile(fd)
ms.mu.Lock()
defer ms.mu.Unlock()
m, exist := ms.files[p.x()]
m, exist := ms.files[x]
if exist {
if m.open {
return nil, errFileOpen
}
m.Reset()
} else {
m = &memFile{ms: ms}
ms.files[p.x()] = m
m = &memFile{}
ms.files[x] = m
}
m.open = true
return m, nil
return &memWriter{memFile: m, ms: ms}, nil
}
func (p *memFilePtr) Replace(newfile File) error {
p1, ok := newfile.(*memFilePtr)
if !ok {
func (ms *memStorage) Remove(fd FileDesc) error {
if !FileDescOk(fd) {
return ErrInvalidFile
}
ms := p.ms
x := packFile(fd)
ms.mu.Lock()
defer ms.mu.Unlock()
m1, exist := ms.files[p1.x()]
if !exist {
return os.ErrNotExist
}
m0, exist := ms.files[p.x()]
if (exist && m0.open) || m1.open {
return errFileOpen
}
delete(ms.files, p1.x())
ms.files[p.x()] = m1
return nil
}
func (p *memFilePtr) Type() FileType {
return p.t
}
func (p *memFilePtr) Num() uint64 {
return p.num
}
func (p *memFilePtr) Remove() error {
ms := p.ms
ms.mu.Lock()
defer ms.mu.Unlock()
if _, exist := ms.files[p.x()]; exist {
delete(ms.files, p.x())
if _, exist := ms.files[x]; exist {
delete(ms.files, x)
return nil
}
return os.ErrNotExist
}
func (ms *memStorage) Rename(oldfd, newfd FileDesc) error {
if FileDescOk(oldfd) || FileDescOk(newfd) {
return ErrInvalidFile
}
if oldfd == newfd {
return nil
}
oldx := packFile(oldfd)
newx := packFile(newfd)
ms.mu.Lock()
defer ms.mu.Unlock()
oldm, exist := ms.files[oldx]
if !exist {
return os.ErrNotExist
}
newm, exist := ms.files[newx]
if (exist && newm.open) || oldm.open {
return errFileOpen
}
delete(ms.files, oldx)
ms.files[newx] = oldm
return nil
}
func (*memStorage) Close() error { return nil }
type memFile struct {
bytes.Buffer
open bool
}
type memReader struct {
*bytes.Reader
ms *memStorage
m *memFile
closed bool
}
func (mr *memReader) Close() error {
mr.ms.mu.Lock()
defer mr.ms.mu.Unlock()
if mr.closed {
return ErrClosed
}
mr.m.open = false
return nil
}
type memWriter struct {
*memFile
ms *memStorage
closed bool
}
func (*memWriter) Sync() error { return nil }
func (mw *memWriter) Close() error {
mw.ms.mu.Lock()
defer mw.ms.mu.Unlock()
if mw.closed {
return ErrClosed
}
mw.memFile.open = false
return nil
}
func packFile(fd FileDesc) uint64 {
return uint64(fd.Num)<<typeShift | uint64(fd.Type)
}
func unpackFile(x uint64) FileDesc {
return FileDesc{FileType(x) & TypeAll, int64(x >> typeShift)}
}

View File

@ -15,7 +15,7 @@ import (
"github.com/syndtr/goleveldb/leveldb/util"
)
type FileType uint32
type FileType int
const (
TypeManifest FileType = 1 << iota
@ -50,13 +50,13 @@ var (
// a file. Package storage has its own type instead of using
// errors.ErrCorrupted to prevent circular import.
type ErrCorrupted struct {
File *FileInfo
Err error
Fd FileDesc
Err error
}
func (e *ErrCorrupted) Error() string {
if e.File != nil {
return fmt.Sprintf("%v [file=%v]", e.Err, e.File)
if !e.Fd.Nil() {
return fmt.Sprintf("%v [file=%v]", e.Err, e.Fd)
} else {
return e.Err.Error()
}
@ -83,31 +83,47 @@ type Writer interface {
Syncer
}
// File is the file. A file instance must be goroutine-safe.
type File interface {
// Open opens the file for read. Returns os.ErrNotExist error
// if the file does not exist.
// Returns ErrClosed if the underlying storage is closed.
Open() (r Reader, err error)
type Lock interface {
util.Releaser
}
// Create creates the file for writting. Truncate the file if
// already exist.
// Returns ErrClosed if the underlying storage is closed.
Create() (w Writer, err error)
// FileDesc is a file descriptor.
type FileDesc struct {
Type FileType
Num int64
}
// Replace replaces file with newfile.
// Returns ErrClosed if the underlying storage is closed.
Replace(newfile File) error
func (fd FileDesc) String() string {
switch fd.Type {
case TypeManifest:
return fmt.Sprintf("MANIFEST-%06d", fd.Num)
case TypeJournal:
return fmt.Sprintf("%06d.log", fd.Num)
case TypeTable:
return fmt.Sprintf("%06d.ldb", fd.Num)
case TypeTemp:
return fmt.Sprintf("%06d.tmp", fd.Num)
default:
return fmt.Sprintf("%#x-%d", fd.Type, fd.Num)
}
}
// Type returns the file type
Type() FileType
// Nil returns true if fd == (FileDesc{}).
func (fd FileDesc) Nil() bool {
return fd == (FileDesc{})
}
// Num returns the file number.
Num() uint64
// Remove removes the file.
// Returns ErrClosed if the underlying storage is closed.
Remove() error
// FileDescOk returns true if fd is a valid file descriptor.
func FileDescOk(fd FileDesc) bool {
switch fd.Type {
case TypeManifest:
case TypeJournal:
case TypeTable:
case TypeTemp:
default:
return false
}
return fd.Num >= 0
}
// Storage is the storage. A storage instance must be goroutine-safe.
@ -115,59 +131,47 @@ type Storage interface {
// Lock locks the storage. Any subsequent attempt to call Lock will fail
// until the last lock released.
// After use the caller should call the Release method.
Lock() (l util.Releaser, err error)
Lock() (Lock, error)
// Log logs a string. This is used for logging. An implementation
// may write to a file, stdout or simply do nothing.
// Log logs a string. This is used for logging.
// An implementation may write to a file, stdout or simply do nothing.
Log(str string)
// GetFile returns a file for the given number and type. GetFile will never
// returns nil, even if the underlying storage is closed.
GetFile(num uint64, t FileType) File
// SetMeta sets to point to the given fd, which then can be acquired using
// GetMeta method.
// SetMeta should be implemented in such way that changes should happened
// atomically.
SetMeta(fd FileDesc) error
// GetFiles returns a slice of files that match the given file types.
// GetManifest returns a manifest file.
// Returns os.ErrNotExist if meta doesn't point to any fd, or point to fd
// that doesn't exist.
GetMeta() (FileDesc, error)
// List returns fds that match the given file types.
// The file types may be OR'ed together.
GetFiles(t FileType) ([]File, error)
List(ft FileType) ([]FileDesc, error)
// GetManifest returns a manifest file. Returns os.ErrNotExist if manifest
// file does not exist.
GetManifest() (File, error)
// Open opens file with the given fd read-only.
// Returns os.ErrNotExist error if the file does not exist.
// Returns ErrClosed if the underlying storage is closed.
Open(fd FileDesc) (Reader, error)
// SetManifest sets the given file as manifest file. The given file should
// be a manifest file type or error will be returned.
SetManifest(f File) error
// Create creates file with the given fd, truncate if already exist and
// opens write-only.
// Returns ErrClosed if the underlying storage is closed.
Create(fd FileDesc) (Writer, error)
// Close closes the storage. It is valid to call Close multiple times.
// Other methods should not be called after the storage has been closed.
// Remove removes file with the given fd.
// Returns ErrClosed if the underlying storage is closed.
Remove(fd FileDesc) error
// Rename renames file from oldfd to newfd.
// Returns ErrClosed if the underlying storage is closed.
Rename(oldfd, newfd FileDesc) error
// Close closes the storage.
// It is valid to call Close multiple times. Other methods should not be
// called after the storage has been closed.
Close() error
}
// FileInfo wraps basic file info.
type FileInfo struct {
Type FileType
Num uint64
}
func (fi FileInfo) String() string {
switch fi.Type {
case TypeManifest:
return fmt.Sprintf("MANIFEST-%06d", fi.Num)
case TypeJournal:
return fmt.Sprintf("%06d.log", fi.Num)
case TypeTable:
return fmt.Sprintf("%06d.ldb", fi.Num)
case TypeTemp:
return fmt.Sprintf("%06d.tmp", fi.Num)
default:
return fmt.Sprintf("%#x-%d", fi.Type, fi.Num)
}
}
// NewFileInfo creates new FileInfo from the given File. It will returns nil
// if File is nil.
func NewFileInfo(f File) *FileInfo {
if f == nil {
return nil
}
return &FileInfo{f.Type(), f.Num()}
}

View File

@ -21,10 +21,10 @@ import (
// tFile holds basic information about a table.
type tFile struct {
file storage.File
fd storage.FileDesc
seekLeft int32
size uint64
imin, imax iKey
size int64
imin, imax internalKey
}
// Returns true if given key is after largest key of this table.
@ -48,9 +48,9 @@ func (t *tFile) consumeSeek() int32 {
}
// Creates new tFile.
func newTableFile(file storage.File, size uint64, imin, imax iKey) *tFile {
func newTableFile(fd storage.FileDesc, size int64, imin, imax internalKey) *tFile {
f := &tFile{
file: file,
fd: fd,
size: size,
imin: imin,
imax: imax,
@ -77,6 +77,10 @@ func newTableFile(file storage.File, size uint64, imin, imax iKey) *tFile {
return f
}
func tableFileFromRecord(r atRecord) *tFile {
return newTableFile(storage.FileDesc{storage.TypeTable, r.num}, r.size, r.imin, r.imax)
}
// tFiles hold multiple tFile.
type tFiles []*tFile
@ -89,7 +93,7 @@ func (tf tFiles) nums() string {
if i != 0 {
x += ", "
}
x += fmt.Sprint(f.file.Num())
x += fmt.Sprint(f.fd.Num)
}
x += " ]"
return x
@ -101,7 +105,7 @@ func (tf tFiles) lessByKey(icmp *iComparer, i, j int) bool {
a, b := tf[i], tf[j]
n := icmp.Compare(a.imin, b.imin)
if n == 0 {
return a.file.Num() < b.file.Num()
return a.fd.Num < b.fd.Num
}
return n < 0
}
@ -109,7 +113,7 @@ func (tf tFiles) lessByKey(icmp *iComparer, i, j int) bool {
// Returns true if i file number is greater than j.
// This used for sort by file number in descending order.
func (tf tFiles) lessByNum(i, j int) bool {
return tf[i].file.Num() > tf[j].file.Num()
return tf[i].fd.Num > tf[j].fd.Num
}
// Sorts tables by key in ascending order.
@ -123,7 +127,7 @@ func (tf tFiles) sortByNum() {
}
// Returns sum of all tables size.
func (tf tFiles) size() (sum uint64) {
func (tf tFiles) size() (sum int64) {
for _, t := range tf {
sum += t.size
}
@ -132,7 +136,7 @@ func (tf tFiles) size() (sum uint64) {
// Searches smallest index of tables whose its smallest
// key is after or equal with given key.
func (tf tFiles) searchMin(icmp *iComparer, ikey iKey) int {
func (tf tFiles) searchMin(icmp *iComparer, ikey internalKey) int {
return sort.Search(len(tf), func(i int) bool {
return icmp.Compare(tf[i].imin, ikey) >= 0
})
@ -140,7 +144,7 @@ func (tf tFiles) searchMin(icmp *iComparer, ikey iKey) int {
// Searches smallest index of tables whose its largest
// key is after or equal with given key.
func (tf tFiles) searchMax(icmp *iComparer, ikey iKey) int {
func (tf tFiles) searchMax(icmp *iComparer, ikey internalKey) int {
return sort.Search(len(tf), func(i int) bool {
return icmp.Compare(tf[i].imax, ikey) >= 0
})
@ -162,7 +166,7 @@ func (tf tFiles) overlaps(icmp *iComparer, umin, umax []byte, unsorted bool) boo
i := 0
if len(umin) > 0 {
// Find the earliest possible internal key for min.
i = tf.searchMax(icmp, newIkey(umin, kMaxSeq, ktSeek))
i = tf.searchMax(icmp, makeInternalKey(nil, umin, keyMaxSeq, keyTypeSeek))
}
if i >= len(tf) {
// Beginning of range is after all files, so no overlap.
@ -205,7 +209,7 @@ func (tf tFiles) getOverlaps(dst tFiles, icmp *iComparer, umin, umax []byte, ove
}
// Returns tables key range.
func (tf tFiles) getRange(icmp *iComparer) (imin, imax iKey) {
func (tf tFiles) getRange(icmp *iComparer) (imin, imax internalKey) {
for i, t := range tf {
if i == 0 {
imin, imax = t.imin, t.imax
@ -227,10 +231,10 @@ func (tf tFiles) newIndexIterator(tops *tOps, icmp *iComparer, slice *util.Range
if slice != nil {
var start, limit int
if slice.Start != nil {
start = tf.searchMax(icmp, iKey(slice.Start))
start = tf.searchMax(icmp, internalKey(slice.Start))
}
if slice.Limit != nil {
limit = tf.searchMin(icmp, iKey(slice.Limit))
limit = tf.searchMin(icmp, internalKey(slice.Limit))
} else {
limit = tf.Len()
}
@ -255,7 +259,7 @@ type tFilesArrayIndexer struct {
}
func (a *tFilesArrayIndexer) Search(key []byte) int {
return a.searchMax(a.icmp, iKey(key))
return a.searchMax(a.icmp, internalKey(key))
}
func (a *tFilesArrayIndexer) Get(i int) iterator.Iterator {
@ -295,16 +299,16 @@ type tOps struct {
// Creates an empty table and returns table writer.
func (t *tOps) create() (*tWriter, error) {
file := t.s.getTableFile(t.s.allocFileNum())
fw, err := file.Create()
fd := storage.FileDesc{storage.TypeTable, t.s.allocFileNum()}
fw, err := t.s.stor.Create(fd)
if err != nil {
return nil, err
}
return &tWriter{
t: t,
file: file,
w: fw,
tw: table.NewWriter(fw, t.s.o.Options),
t: t,
fd: fd,
w: fw,
tw: table.NewWriter(fw, t.s.o.Options),
}, nil
}
@ -340,21 +344,20 @@ func (t *tOps) createFrom(src iterator.Iterator) (f *tFile, n int, err error) {
// Opens table. It returns a cache handle, which should
// be released after use.
func (t *tOps) open(f *tFile) (ch *cache.Handle, err error) {
num := f.file.Num()
ch = t.cache.Get(0, num, func() (size int, value cache.Value) {
ch = t.cache.Get(0, uint64(f.fd.Num), func() (size int, value cache.Value) {
var r storage.Reader
r, err = f.file.Open()
r, err = t.s.stor.Open(f.fd)
if err != nil {
return 0, nil
}
var bcache *cache.CacheGetter
var bcache *cache.NamespaceGetter
if t.bcache != nil {
bcache = &cache.CacheGetter{Cache: t.bcache, NS: num}
bcache = &cache.NamespaceGetter{Cache: t.bcache, NS: uint64(f.fd.Num)}
}
var tr *table.Reader
tr, err = table.NewReader(r, int64(f.size), storage.NewFileInfo(f.file), bcache, t.bpool, t.s.o.Options)
tr, err = table.NewReader(r, f.size, f.fd, bcache, t.bpool, t.s.o.Options)
if err != nil {
r.Close()
return 0, nil
@ -390,14 +393,13 @@ func (t *tOps) findKey(f *tFile, key []byte, ro *opt.ReadOptions) (rkey []byte,
}
// Returns approximate offset of the given key.
func (t *tOps) offsetOf(f *tFile, key []byte) (offset uint64, err error) {
func (t *tOps) offsetOf(f *tFile, key []byte) (offset int64, err error) {
ch, err := t.open(f)
if err != nil {
return
}
defer ch.Release()
offset_, err := ch.Value().(*table.Reader).OffsetOf(key)
return uint64(offset_), err
return ch.Value().(*table.Reader).OffsetOf(key)
}
// Creates an iterator from the given table.
@ -414,15 +416,14 @@ func (t *tOps) newIterator(f *tFile, slice *util.Range, ro *opt.ReadOptions) ite
// Removes table from persistent storage. It waits until
// no one use the the table.
func (t *tOps) remove(f *tFile) {
num := f.file.Num()
t.cache.Delete(0, num, func() {
if err := f.file.Remove(); err != nil {
t.s.logf("table@remove removing @%d %q", num, err)
t.cache.Delete(0, uint64(f.fd.Num), func() {
if err := t.s.stor.Remove(f.fd); err != nil {
t.s.logf("table@remove removing @%d %q", f.fd.Num, err)
} else {
t.s.logf("table@remove removed @%d", num)
t.s.logf("table@remove removed @%d", f.fd.Num)
}
if t.bcache != nil {
t.bcache.EvictNS(num)
t.bcache.EvictNS(uint64(f.fd.Num))
}
})
}
@ -471,9 +472,9 @@ func newTableOps(s *session) *tOps {
type tWriter struct {
t *tOps
file storage.File
w storage.Writer
tw *table.Writer
fd storage.FileDesc
w storage.Writer
tw *table.Writer
first, last []byte
}
@ -513,16 +514,15 @@ func (w *tWriter) finish() (f *tFile, err error) {
return
}
}
f = newTableFile(w.file, uint64(w.tw.BytesLen()), iKey(w.first), iKey(w.last))
f = newTableFile(w.fd, int64(w.tw.BytesLen()), internalKey(w.first), internalKey(w.last))
return
}
// Drops the table.
func (w *tWriter) drop() {
w.close()
w.file.Remove()
w.t.s.reuseFileNum(w.file.Num())
w.file = nil
w.t.s.stor.Remove(w.fd)
w.t.s.reuseFileNum(w.fd.Num)
w.tw = nil
w.first = nil
w.last = nil

View File

@ -507,9 +507,9 @@ func (i *indexIter) Get() iterator.Iterator {
// Reader is a table reader.
type Reader struct {
mu sync.RWMutex
fi *storage.FileInfo
fd storage.FileDesc
reader io.ReaderAt
cache *cache.CacheGetter
cache *cache.NamespaceGetter
err error
bpool *util.BufferPool
// Options
@ -539,7 +539,7 @@ func (r *Reader) blockKind(bh blockHandle) string {
}
func (r *Reader) newErrCorrupted(pos, size int64, kind, reason string) error {
return &errors.ErrCorrupted{File: r.fi, Err: &ErrCorrupted{Pos: pos, Size: size, Kind: kind, Reason: reason}}
return &errors.ErrCorrupted{Fd: r.fd, Err: &ErrCorrupted{Pos: pos, Size: size, Kind: kind, Reason: reason}}
}
func (r *Reader) newErrCorruptedBH(bh blockHandle, reason string) error {
@ -551,7 +551,7 @@ func (r *Reader) fixErrCorruptedBH(bh blockHandle, err error) error {
cerr.Pos = int64(bh.offset)
cerr.Size = int64(bh.length)
cerr.Kind = r.blockKind(bh)
return &errors.ErrCorrupted{File: r.fi, Err: cerr}
return &errors.ErrCorrupted{Fd: r.fd, Err: cerr}
}
return err
}
@ -988,13 +988,13 @@ func (r *Reader) Release() {
// The fi, cache and bpool is optional and can be nil.
//
// The returned table reader instance is goroutine-safe.
func NewReader(f io.ReaderAt, size int64, fi *storage.FileInfo, cache *cache.CacheGetter, bpool *util.BufferPool, o *opt.Options) (*Reader, error) {
func NewReader(f io.ReaderAt, size int64, fd storage.FileDesc, cache *cache.NamespaceGetter, bpool *util.BufferPool, o *opt.Options) (*Reader, error) {
if f == nil {
return nil, errors.New("leveldb/table: nil file")
}
r := &Reader{
fi: fi,
fd: fd,
reader: f,
cache: cache,
bpool: bpool,

View File

@ -72,20 +72,20 @@ func maxInt(a, b int) int {
return b
}
type files []storage.File
type fdSorter []storage.FileDesc
func (p files) Len() int {
func (p fdSorter) Len() int {
return len(p)
}
func (p files) Less(i, j int) bool {
return p[i].Num() < p[j].Num()
func (p fdSorter) Less(i, j int) bool {
return p[i].Num < p[j].Num
}
func (p files) Swap(i, j int) {
func (p fdSorter) Swap(i, j int) {
p[i], p[j] = p[j], p[i]
}
func (p files) sort() {
sort.Sort(p)
func sortFds(fds []storage.FileDesc) {
sort.Sort(fdSorter(fds))
}

View File

@ -7,38 +7,38 @@
package util
import (
"bytes"
"encoding/binary"
)
// Hash return hash of the given data.
func Hash(data []byte, seed uint32) uint32 {
// Similar to murmur hash
var m uint32 = 0xc6a4a793
var r uint32 = 24
h := seed ^ (uint32(len(data)) * m)
const (
m = uint32(0xc6a4a793)
r = uint32(24)
)
var (
h = seed ^ (uint32(len(data)) * m)
i int
)
buf := bytes.NewBuffer(data)
for buf.Len() >= 4 {
var w uint32
binary.Read(buf, binary.LittleEndian, &w)
h += w
for n := len(data) - len(data)%4; i < n; i += 4 {
h += binary.LittleEndian.Uint32(data[i:])
h *= m
h ^= (h >> 16)
}
rest := buf.Bytes()
switch len(rest) {
switch len(data) - i {
default:
panic("not reached")
case 3:
h += uint32(rest[2]) << 16
h += uint32(data[i+2]) << 16
fallthrough
case 2:
h += uint32(rest[1]) << 8
h += uint32(data[i+1]) << 8
fallthrough
case 1:
h += uint32(rest[0])
h += uint32(data[i])
h *= m
h ^= (h >> r)
case 0:

View File

@ -1,21 +0,0 @@
// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// +build go1.3
package util
import (
"sync"
)
type Pool struct {
sync.Pool
}
func NewPool(cap int) *Pool {
return &Pool{}
}

View File

@ -1,33 +0,0 @@
// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
// All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// +build !go1.3
package util
type Pool struct {
pool chan interface{}
}
func (p *Pool) Get() interface{} {
select {
case x := <-p.pool:
return x
default:
return nil
}
}
func (p *Pool) Put(x interface{}) {
select {
case p.pool <- x:
default:
}
}
func NewPool(cap int) *Pool {
return &Pool{pool: make(chan interface{}, cap)}
}

View File

@ -7,6 +7,7 @@
package leveldb
import (
"fmt"
"sync/atomic"
"unsafe"
@ -23,7 +24,7 @@ type tSet struct {
type version struct {
s *session
tables []tFiles
levels []tFiles
// Level that should be compacted next and its compaction score.
// Score < 1 means compaction is not strictly needed. These fields
@ -39,7 +40,7 @@ type version struct {
}
func newVersion(s *session) *version {
return &version{s: s, tables: make([]tFiles, s.o.GetNumLevel())}
return &version{s: s}
}
func (v *version) releaseNB() {
@ -51,18 +52,18 @@ func (v *version) releaseNB() {
panic("negative version ref")
}
tables := make(map[uint64]bool)
for _, tt := range v.next.tables {
nextTables := make(map[int64]bool)
for _, tt := range v.next.levels {
for _, t := range tt {
num := t.file.Num()
tables[num] = true
num := t.fd.Num
nextTables[num] = true
}
}
for _, tt := range v.tables {
for _, tt := range v.levels {
for _, t := range tt {
num := t.file.Num()
if _, ok := tables[num]; !ok {
num := t.fd.Num
if _, ok := nextTables[num]; !ok {
v.s.tops.remove(t)
}
}
@ -78,11 +79,26 @@ func (v *version) release() {
v.s.vmu.Unlock()
}
func (v *version) walkOverlapping(ikey iKey, f func(level int, t *tFile) bool, lf func(level int) bool) {
func (v *version) walkOverlapping(aux tFiles, ikey internalKey, f func(level int, t *tFile) bool, lf func(level int) bool) {
ukey := ikey.ukey()
// Aux level.
if aux != nil {
for _, t := range aux {
if t.overlaps(v.s.icmp, ukey, ukey) {
if !f(-1, t) {
return
}
}
}
if lf != nil && !lf(-1) {
return
}
}
// Walk tables level-by-level.
for level, tables := range v.tables {
for level, tables := range v.levels {
if len(tables) == 0 {
continue
}
@ -114,7 +130,7 @@ func (v *version) walkOverlapping(ikey iKey, f func(level int, t *tFile) bool, l
}
}
func (v *version) get(ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) {
func (v *version) get(aux tFiles, ikey internalKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) {
ukey := ikey.ukey()
var (
@ -124,16 +140,16 @@ func (v *version) get(ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byt
// Level-0.
zfound bool
zseq uint64
zkt kType
zkt keyType
zval []byte
)
err = ErrNotFound
// Since entries never hope across level, finding key/value
// Since entries never hop across level, finding key/value
// in smaller level make later levels irrelevant.
v.walkOverlapping(ikey, func(level int, t *tFile) bool {
if !tseek {
v.walkOverlapping(aux, ikey, func(level int, t *tFile) bool {
if level >= 0 && !tseek {
if tset == nil {
tset = &tSet{level, t}
} else {
@ -150,6 +166,7 @@ func (v *version) get(ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byt
} else {
fikey, fval, ferr = v.s.tops.find(t, ikey, ro)
}
switch ferr {
case nil:
case ErrNotFound:
@ -159,9 +176,10 @@ func (v *version) get(ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byt
return false
}
if fukey, fseq, fkt, fkerr := parseIkey(fikey); fkerr == nil {
if fukey, fseq, fkt, fkerr := parseInternalKey(fikey); fkerr == nil {
if v.s.icmp.uCompare(ukey, fukey) == 0 {
if level == 0 {
// Level <= 0 may overlaps each-other.
if level <= 0 {
if fseq >= zseq {
zfound = true
zseq = fseq
@ -170,12 +188,12 @@ func (v *version) get(ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byt
}
} else {
switch fkt {
case ktVal:
case keyTypeVal:
value = fval
err = nil
case ktDel:
case keyTypeDel:
default:
panic("leveldb: invalid iKey type")
panic("leveldb: invalid internalKey type")
}
return false
}
@ -189,12 +207,12 @@ func (v *version) get(ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byt
}, func(level int) bool {
if zfound {
switch zkt {
case ktVal:
case keyTypeVal:
value = zval
err = nil
case ktDel:
case keyTypeDel:
default:
panic("leveldb: invalid iKey type")
panic("leveldb: invalid internalKey type")
}
return false
}
@ -209,46 +227,40 @@ func (v *version) get(ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byt
return
}
func (v *version) sampleSeek(ikey iKey) (tcomp bool) {
func (v *version) sampleSeek(ikey internalKey) (tcomp bool) {
var tset *tSet
v.walkOverlapping(ikey, func(level int, t *tFile) bool {
v.walkOverlapping(nil, ikey, func(level int, t *tFile) bool {
if tset == nil {
tset = &tSet{level, t}
return true
} else {
if tset.table.consumeSeek() <= 0 {
tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
}
return false
}
if tset.table.consumeSeek() <= 0 {
tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
}
return false
}, nil)
return
}
func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []iterator.Iterator) {
// Merge all level zero files together since they may overlap
for _, t := range v.tables[0] {
it := v.s.tops.newIterator(t, slice, ro)
its = append(its, it)
}
strict := opt.GetStrict(v.s.o.Options, ro, opt.StrictReader)
for _, tables := range v.tables[1:] {
if len(tables) == 0 {
continue
for level, tables := range v.levels {
if level == 0 {
// Merge all level zero files together since they may overlap.
for _, t := range tables {
its = append(its, v.s.tops.newIterator(t, slice, ro))
}
} else if len(tables) != 0 {
its = append(its, iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict))
}
it := iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict)
its = append(its, it)
}
return
}
func (v *version) newStaging() *versionStaging {
return &versionStaging{base: v, tables: make([]tablesScratch, v.s.o.GetNumLevel())}
return &versionStaging{base: v}
}
// Spawn a new version based on this version.
@ -259,19 +271,22 @@ func (v *version) spawn(r *sessionRecord) *version {
}
func (v *version) fillRecord(r *sessionRecord) {
for level, ts := range v.tables {
for _, t := range ts {
for level, tables := range v.levels {
for _, t := range tables {
r.addTableFile(level, t)
}
}
}
func (v *version) tLen(level int) int {
return len(v.tables[level])
if level < len(v.levels) {
return len(v.levels[level])
}
return 0
}
func (v *version) offsetOf(ikey iKey) (n uint64, err error) {
for level, tables := range v.tables {
func (v *version) offsetOf(ikey internalKey) (n int64, err error) {
for level, tables := range v.levels {
for _, t := range tables {
if v.s.icmp.Compare(t.imax, ikey) <= 0 {
// Entire file is before "ikey", so just add the file size
@ -287,12 +302,11 @@ func (v *version) offsetOf(ikey iKey) (n uint64, err error) {
} else {
// "ikey" falls in the range for this table. Add the
// approximate offset of "ikey" within the table.
var nn uint64
nn, err = v.s.tops.offsetOf(t, ikey)
if err != nil {
if m, err := v.s.tops.offsetOf(t, ikey); err == nil {
n += m
} else {
return 0, err
}
n += nn
}
}
}
@ -300,37 +314,50 @@ func (v *version) offsetOf(ikey iKey) (n uint64, err error) {
return
}
func (v *version) pickMemdbLevel(umin, umax []byte) (level int) {
if !v.tables[0].overlaps(v.s.icmp, umin, umax, true) {
var overlaps tFiles
maxLevel := v.s.o.GetMaxMemCompationLevel()
for ; level < maxLevel; level++ {
if v.tables[level+1].overlaps(v.s.icmp, umin, umax, false) {
break
}
overlaps = v.tables[level+2].getOverlaps(overlaps, v.s.icmp, umin, umax, false)
if overlaps.size() > uint64(v.s.o.GetCompactionGPOverlaps(level)) {
break
func (v *version) pickMemdbLevel(umin, umax []byte, maxLevel int) (level int) {
if maxLevel > 0 {
if len(v.levels) == 0 {
return maxLevel
}
if !v.levels[0].overlaps(v.s.icmp, umin, umax, true) {
var overlaps tFiles
for ; level < maxLevel; level++ {
if pLevel := level + 1; pLevel >= len(v.levels) {
return maxLevel
} else if v.levels[pLevel].overlaps(v.s.icmp, umin, umax, false) {
break
}
if gpLevel := level + 2; gpLevel < len(v.levels) {
overlaps = v.levels[gpLevel].getOverlaps(overlaps, v.s.icmp, umin, umax, false)
if overlaps.size() > int64(v.s.o.GetCompactionGPOverlaps(level)) {
break
}
}
}
}
}
return
}
func (v *version) computeCompaction() {
// Precomputed best level for next compaction
var bestLevel int = -1
var bestScore float64 = -1
bestLevel := int(-1)
bestScore := float64(-1)
for level, tables := range v.tables {
statFiles := make([]int, len(v.levels))
statSizes := make([]string, len(v.levels))
statScore := make([]string, len(v.levels))
statTotSize := int64(0)
for level, tables := range v.levels {
var score float64
size := tables.size()
if level == 0 {
// We treat level-0 specially by bounding the number of files
// instead of number of bytes for two reasons:
//
// (1) With larger write-buffer sizes, it is nice not to do too
// many level-0 compactions.
// many level-0 compaction.
//
// (2) The files in level-0 are merged on every read and
// therefore we wish to avoid too many files when the individual
@ -339,17 +366,24 @@ func (v *version) computeCompaction() {
// overwrites/deletions).
score = float64(len(tables)) / float64(v.s.o.GetCompactionL0Trigger())
} else {
score = float64(tables.size()) / float64(v.s.o.GetCompactionTotalSize(level))
score = float64(size) / float64(v.s.o.GetCompactionTotalSize(level))
}
if score > bestScore {
bestLevel = level
bestScore = score
}
statFiles[level] = len(tables)
statSizes[level] = shortenb(int(size))
statScore[level] = fmt.Sprintf("%.2f", score)
statTotSize += size
}
v.cLevel = bestLevel
v.cScore = bestScore
v.s.logf("version@stat F·%v S·%s%v Sc·%v", statFiles, shortenb(int(statTotSize)), statSizes, statScore)
}
func (v *version) needCompaction() bool {
@ -357,43 +391,48 @@ func (v *version) needCompaction() bool {
}
type tablesScratch struct {
added map[uint64]atRecord
deleted map[uint64]struct{}
added map[int64]atRecord
deleted map[int64]struct{}
}
type versionStaging struct {
base *version
tables []tablesScratch
levels []tablesScratch
}
func (p *versionStaging) getScratch(level int) *tablesScratch {
if level >= len(p.levels) {
newLevels := make([]tablesScratch, level+1)
copy(newLevels, p.levels)
p.levels = newLevels
}
return &(p.levels[level])
}
func (p *versionStaging) commit(r *sessionRecord) {
// Deleted tables.
for _, r := range r.deletedTables {
tm := &(p.tables[r.level])
if len(p.base.tables[r.level]) > 0 {
if tm.deleted == nil {
tm.deleted = make(map[uint64]struct{})
scratch := p.getScratch(r.level)
if r.level < len(p.base.levels) && len(p.base.levels[r.level]) > 0 {
if scratch.deleted == nil {
scratch.deleted = make(map[int64]struct{})
}
tm.deleted[r.num] = struct{}{}
scratch.deleted[r.num] = struct{}{}
}
if tm.added != nil {
delete(tm.added, r.num)
if scratch.added != nil {
delete(scratch.added, r.num)
}
}
// New tables.
for _, r := range r.addedTables {
tm := &(p.tables[r.level])
if tm.added == nil {
tm.added = make(map[uint64]atRecord)
scratch := p.getScratch(r.level)
if scratch.added == nil {
scratch.added = make(map[int64]atRecord)
}
tm.added[r.num] = r
if tm.deleted != nil {
delete(tm.deleted, r.num)
scratch.added[r.num] = r
if scratch.deleted != nil {
delete(scratch.deleted, r.num)
}
}
}
@ -401,39 +440,62 @@ func (p *versionStaging) commit(r *sessionRecord) {
func (p *versionStaging) finish() *version {
// Build new version.
nv := newVersion(p.base.s)
for level, tm := range p.tables {
btables := p.base.tables[level]
n := len(btables) + len(tm.added) - len(tm.deleted)
if n < 0 {
n = 0
}
nt := make(tFiles, 0, n)
// Base tables.
for _, t := range btables {
if _, ok := tm.deleted[t.file.Num()]; ok {
continue
}
if _, ok := tm.added[t.file.Num()]; ok {
continue
}
nt = append(nt, t)
}
// New tables.
for _, r := range tm.added {
nt = append(nt, p.base.s.tableFileFromRecord(r))
}
// Sort tables.
if level == 0 {
nt.sortByNum()
} else {
nt.sortByKey(p.base.s.icmp)
}
nv.tables[level] = nt
numLevel := len(p.levels)
if len(p.base.levels) > numLevel {
numLevel = len(p.base.levels)
}
nv.levels = make([]tFiles, numLevel)
for level := 0; level < numLevel; level++ {
var baseTabels tFiles
if level < len(p.base.levels) {
baseTabels = p.base.levels[level]
}
if level < len(p.levels) {
scratch := p.levels[level]
var nt tFiles
// Prealloc list if possible.
if n := len(baseTabels) + len(scratch.added) - len(scratch.deleted); n > 0 {
nt = make(tFiles, 0, n)
}
// Base tables.
for _, t := range baseTabels {
if _, ok := scratch.deleted[t.fd.Num]; ok {
continue
}
if _, ok := scratch.added[t.fd.Num]; ok {
continue
}
nt = append(nt, t)
}
// New tables.
for _, r := range scratch.added {
nt = append(nt, tableFileFromRecord(r))
}
if len(nt) != 0 {
// Sort tables.
if level == 0 {
nt.sortByNum()
} else {
nt.sortByKey(p.base.s.icmp)
}
nv.levels[level] = nt
}
} else {
nv.levels[level] = baseTabels
}
}
// Trim levels.
n := len(nv.levels)
for ; n > 0 && nv.levels[n-1] == nil; n-- {
}
nv.levels = nv.levels[:n]
// Compute compaction score for new version.
nv.computeCompaction()

22
vendor/golang.org/x/net/PATENTS generated vendored Normal file
View File

@ -0,0 +1,22 @@
Additional IP Rights Grant (Patents)
"This implementation" means the copyrightable works distributed by
Google as part of the Go project.
Google hereby grants to You a perpetual, worldwide, non-exclusive,
no-charge, royalty-free, irrevocable (except as stated in this section)
patent license to make, have made, use, offer to sell, sell, import,
transfer and otherwise run, modify and propagate the contents of this
implementation of Go, where such license applies only to those patent
claims, both currently owned or controlled by Google and acquired in
the future, licensable by Google that are necessarily infringed by this
implementation of Go. This grant does not include claims that would be
infringed only as a consequence of further modification of this
implementation. If you or your agent or exclusive licensee institute or
order or agree to the institution of patent litigation against any
entity (including a cross-claim or counterclaim in a lawsuit) alleging
that this implementation of Go or any code incorporated within this
implementation of Go constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any patent
rights granted to you under this License for this implementation of Go
shall terminate as of the date such litigation is filed.

View File

@ -144,6 +144,7 @@ func getAttrList(path string, attrList attrList, attrBuf []byte, options uint) (
uintptr(options),
0,
)
use(unsafe.Pointer(_p0))
if e1 != 0 {
return nil, e1
}
@ -196,6 +197,7 @@ func Getfsstat(buf []Statfs_t, flags int) (n int, err error) {
bufsize = unsafe.Sizeof(Statfs_t{}) * uintptr(len(buf))
}
r0, _, e1 := Syscall(SYS_GETFSSTAT64, uintptr(_p0), bufsize, uintptr(flags))
use(unsafe.Pointer(_p0))
n = int(r0)
if e1 != 0 {
err = e1

View File

@ -109,6 +109,7 @@ func Getfsstat(buf []Statfs_t, flags int) (n int, err error) {
bufsize = unsafe.Sizeof(Statfs_t{}) * uintptr(len(buf))
}
r0, _, e1 := Syscall(SYS_GETFSSTAT, uintptr(_p0), bufsize, uintptr(flags))
use(unsafe.Pointer(_p0))
n = int(r0)
if e1 != 0 {
err = e1

View File

@ -129,6 +129,7 @@ func Getfsstat(buf []Statfs_t, flags int) (n int, err error) {
bufsize = unsafe.Sizeof(Statfs_t{}) * uintptr(len(buf))
}
r0, _, e1 := Syscall(SYS_GETFSSTAT, uintptr(_p0), bufsize, uintptr(flags))
use(unsafe.Pointer(_p0))
n = int(r0)
if e1 != 0 {
err = e1

View File

@ -60,6 +60,15 @@ func Openat(dirfd int, path string, flags int, mode uint32) (fd int, err error)
return openat(dirfd, path, flags|O_LARGEFILE, mode)
}
//sys ppoll(fds *PollFd, nfds int, timeout *Timespec, sigmask *Sigset_t) (n int, err error)
func Ppoll(fds []PollFd, timeout *Timespec, sigmask *Sigset_t) (n int, err error) {
if len(fds) == 0 {
return ppoll(nil, 0, timeout, sigmask)
}
return ppoll(&fds[0], len(fds), timeout, sigmask)
}
//sys readlinkat(dirfd int, path string, buf []byte) (n int, err error)
func Readlink(path string, buf []byte) (n int, err error) {
@ -1043,8 +1052,6 @@ func Munmap(b []byte) (err error) {
// Newfstatat
// Nfsservctl
// Personality
// Poll
// Ppoll
// Pselect6
// Ptrace
// Putpmsg

View File

@ -388,3 +388,12 @@ func (msghdr *Msghdr) SetControllen(length int) {
func (cmsg *Cmsghdr) SetLen(length int) {
cmsg.Len = uint32(length)
}
//sys poll(fds *PollFd, nfds int, timeout int) (n int, err error)
func Poll(fds []PollFd, timeout int) (n int, err error) {
if len(fds) == 0 {
return poll(nil, 0, timeout)
}
return poll(&fds[0], len(fds), timeout)
}

View File

@ -146,3 +146,12 @@ func (msghdr *Msghdr) SetControllen(length int) {
func (cmsg *Cmsghdr) SetLen(length int) {
cmsg.Len = uint64(length)
}
//sys poll(fds *PollFd, nfds int, timeout int) (n int, err error)
func Poll(fds []PollFd, timeout int) (n int, err error) {
if len(fds) == 0 {
return poll(nil, 0, timeout)
}
return poll(&fds[0], len(fds), timeout)
}

View File

@ -252,3 +252,12 @@ func (msghdr *Msghdr) SetControllen(length int) {
func (cmsg *Cmsghdr) SetLen(length int) {
cmsg.Len = uint32(length)
}
//sys poll(fds *PollFd, nfds int, timeout int) (n int, err error)
func Poll(fds []PollFd, timeout int) (n int, err error) {
if len(fds) == 0 {
return poll(nil, 0, timeout)
}
return poll(&fds[0], len(fds), timeout)
}

View File

@ -178,3 +178,15 @@ const (
SYS_EPOLL_CREATE = 1042
SYS_EPOLL_WAIT = 1069
)
func Poll(fds []PollFd, timeout int) (n int, err error) {
var ts *Timespec
if timeout >= 0 {
ts = new(Timespec)
*ts = NsecToTimespec(int64(timeout) * 1e6)
}
if len(fds) == 0 {
return ppoll(nil, 0, ts, nil)
}
return ppoll(&fds[0], len(fds), ts, nil)
}

View File

@ -204,3 +204,12 @@ func (msghdr *Msghdr) SetControllen(length int) {
func (cmsg *Cmsghdr) SetLen(length int) {
cmsg.Len = uint64(length)
}
//sys poll(fds *PollFd, nfds int, timeout int) (n int, err error)
func Poll(fds []PollFd, timeout int) (n int, err error) {
if len(fds) == 0 {
return poll(nil, 0, timeout)
}
return poll(&fds[0], len(fds), timeout)
}

View File

@ -124,3 +124,12 @@ func Pipe2(p []int, flags int) (err error) {
p[1] = int(pp[1])
return
}
//sys poll(fds *PollFd, nfds int, timeout int) (n int, err error)
func Poll(fds []PollFd, timeout int) (n int, err error) {
if len(fds) == 0 {
return poll(nil, 0, timeout)
}
return poll(&fds[0], len(fds), timeout)
}

View File

@ -318,3 +318,12 @@ func Shutdown(s, how int) error {
}
return nil
}
//sys poll(fds *PollFd, nfds int, timeout int) (n int, err error)
func Poll(fds []PollFd, timeout int) (n int, err error) {
if len(fds) == 0 {
return poll(nil, 0, timeout)
}
return poll(&fds[0], len(fds), timeout)
}

View File

@ -111,6 +111,7 @@ func Getfsstat(buf []Statfs_t, flags int) (n int, err error) {
bufsize = unsafe.Sizeof(Statfs_t{}) * uintptr(len(buf))
}
r0, _, e1 := Syscall(SYS_GETFSSTAT, uintptr(_p0), bufsize, uintptr(flags))
use(unsafe.Pointer(_p0))
n = int(r0)
if e1 != 0 {
err = e1

View File

@ -24,6 +24,7 @@ package unix
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <netpacket/packet.h>
#include <poll.h>
#include <signal.h>
#include <stdio.h>
#include <sys/epoll.h>
@ -430,6 +431,20 @@ const (
AT_SYMLINK_NOFOLLOW = C.AT_SYMLINK_NOFOLLOW
)
type PollFd C.struct_pollfd
const (
POLLIN = C.POLLIN
POLLPRI = C.POLLPRI
POLLOUT = C.POLLOUT
POLLRDHUP = C.POLLRDHUP
POLLERR = C.POLLERR
POLLHUP = C.POLLHUP
POLLNVAL = C.POLLNVAL
)
type Sigset_t C.sigset_t
// Terminal handling
type Termios C.termios_t

View File

@ -216,6 +216,7 @@ const (
CLONE_FILES = 0x400
CLONE_FS = 0x200
CLONE_IO = 0x80000000
CLONE_NEWCGROUP = 0x2000000
CLONE_NEWIPC = 0x8000000
CLONE_NEWNET = 0x40000000
CLONE_NEWNS = 0x20000

View File

@ -216,6 +216,7 @@ const (
CLONE_FILES = 0x400
CLONE_FS = 0x200
CLONE_IO = 0x80000000
CLONE_NEWCGROUP = 0x2000000
CLONE_NEWIPC = 0x8000000
CLONE_NEWNET = 0x40000000
CLONE_NEWNS = 0x20000

View File

@ -212,6 +212,7 @@ const (
CLONE_FILES = 0x400
CLONE_FS = 0x200
CLONE_IO = 0x80000000
CLONE_NEWCGROUP = 0x2000000
CLONE_NEWIPC = 0x8000000
CLONE_NEWNET = 0x40000000
CLONE_NEWNS = 0x20000

View File

@ -222,6 +222,7 @@ const (
CLONE_FILES = 0x400
CLONE_FS = 0x200
CLONE_IO = 0x80000000
CLONE_NEWCGROUP = 0x2000000
CLONE_NEWIPC = 0x8000000
CLONE_NEWNET = 0x40000000
CLONE_NEWNS = 0x20000

View File

@ -221,6 +221,7 @@ const (
CLONE_FILES = 0x400
CLONE_FS = 0x200
CLONE_IO = 0x80000000
CLONE_NEWCGROUP = 0x2000000
CLONE_NEWIPC = 0x8000000
CLONE_NEWNET = 0x40000000
CLONE_NEWNS = 0x20000

View File

@ -221,6 +221,7 @@ const (
CLONE_FILES = 0x400
CLONE_FS = 0x200
CLONE_IO = 0x80000000
CLONE_NEWCGROUP = 0x2000000
CLONE_NEWIPC = 0x8000000
CLONE_NEWNET = 0x40000000
CLONE_NEWNS = 0x20000

View File

@ -223,6 +223,7 @@ const (
CLONE_FILES = 0x400
CLONE_FS = 0x200
CLONE_IO = 0x80000000
CLONE_NEWCGROUP = 0x2000000
CLONE_NEWIPC = 0x8000000
CLONE_NEWNET = 0x40000000
CLONE_NEWNS = 0x20000

View File

@ -222,6 +222,7 @@ const (
CLONE_FILES = 0x400
CLONE_FS = 0x200
CLONE_IO = 0x80000000
CLONE_NEWCGROUP = 0x2000000
CLONE_NEWIPC = 0x8000000
CLONE_NEWNET = 0x40000000
CLONE_NEWNS = 0x20000

View File

@ -53,6 +53,17 @@ func openat(dirfd int, path string, flags int, mode uint32) (fd int, err error)
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func ppoll(fds *PollFd, nfds int, timeout *Timespec, sigmask *Sigset_t) (n int, err error) {
r0, _, e1 := Syscall6(SYS_PPOLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(unsafe.Pointer(timeout)), uintptr(unsafe.Pointer(sigmask)), 0, 0)
n = int(r0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func readlinkat(dirfd int, path string, buf []byte) (n int, err error) {
var _p0 *byte
_p0, err = BytePtrFromString(path)
@ -1636,3 +1647,14 @@ func Utime(path string, buf *Utimbuf) (err error) {
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func poll(fds *PollFd, nfds int, timeout int) (n int, err error) {
r0, _, e1 := Syscall(SYS_POLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(timeout))
n = int(r0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}

View File

@ -53,6 +53,17 @@ func openat(dirfd int, path string, flags int, mode uint32) (fd int, err error)
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func ppoll(fds *PollFd, nfds int, timeout *Timespec, sigmask *Sigset_t) (n int, err error) {
r0, _, e1 := Syscall6(SYS_PPOLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(unsafe.Pointer(timeout)), uintptr(unsafe.Pointer(sigmask)), 0, 0)
n = int(r0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func readlinkat(dirfd int, path string, buf []byte) (n int, err error) {
var _p0 *byte
_p0, err = BytePtrFromString(path)
@ -1830,3 +1841,14 @@ func pipe2(p *[2]_C_int, flags int) (err error) {
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func poll(fds *PollFd, nfds int, timeout int) (n int, err error) {
r0, _, e1 := Syscall(SYS_POLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(timeout))
n = int(r0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}

View File

@ -53,6 +53,17 @@ func openat(dirfd int, path string, flags int, mode uint32) (fd int, err error)
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func ppoll(fds *PollFd, nfds int, timeout *Timespec, sigmask *Sigset_t) (n int, err error) {
r0, _, e1 := Syscall6(SYS_PPOLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(unsafe.Pointer(timeout)), uintptr(unsafe.Pointer(sigmask)), 0, 0)
n = int(r0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func readlinkat(dirfd int, path string, buf []byte) (n int, err error) {
var _p0 *byte
_p0, err = BytePtrFromString(path)
@ -1737,3 +1748,14 @@ func setrlimit(resource int, rlim *rlimit32) (err error) {
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func poll(fds *PollFd, nfds int, timeout int) (n int, err error) {
r0, _, e1 := Syscall(SYS_POLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(timeout))
n = int(r0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}

View File

@ -53,6 +53,17 @@ func openat(dirfd int, path string, flags int, mode uint32) (fd int, err error)
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func ppoll(fds *PollFd, nfds int, timeout *Timespec, sigmask *Sigset_t) (n int, err error) {
r0, _, e1 := Syscall6(SYS_PPOLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(unsafe.Pointer(timeout)), uintptr(unsafe.Pointer(sigmask)), 0, 0)
n = int(r0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func readlinkat(dirfd int, path string, buf []byte) (n int, err error) {
var _p0 *byte
_p0, err = BytePtrFromString(path)

View File

@ -53,6 +53,17 @@ func openat(dirfd int, path string, flags int, mode uint32) (fd int, err error)
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func ppoll(fds *PollFd, nfds int, timeout *Timespec, sigmask *Sigset_t) (n int, err error) {
r0, _, e1 := Syscall6(SYS_PPOLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(unsafe.Pointer(timeout)), uintptr(unsafe.Pointer(sigmask)), 0, 0)
n = int(r0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func readlinkat(dirfd int, path string, buf []byte) (n int, err error) {
var _p0 *byte
_p0, err = BytePtrFromString(path)
@ -1779,3 +1790,14 @@ func stat(path string, st *stat_t) (err error) {
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func poll(fds *PollFd, nfds int, timeout int) (n int, err error) {
r0, _, e1 := Syscall(SYS_POLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(timeout))
n = int(r0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}

View File

@ -53,6 +53,17 @@ func openat(dirfd int, path string, flags int, mode uint32) (fd int, err error)
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func ppoll(fds *PollFd, nfds int, timeout *Timespec, sigmask *Sigset_t) (n int, err error) {
r0, _, e1 := Syscall6(SYS_PPOLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(unsafe.Pointer(timeout)), uintptr(unsafe.Pointer(sigmask)), 0, 0)
n = int(r0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func readlinkat(dirfd int, path string, buf []byte) (n int, err error) {
var _p0 *byte
_p0, err = BytePtrFromString(path)
@ -1779,3 +1790,14 @@ func stat(path string, st *stat_t) (err error) {
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func poll(fds *PollFd, nfds int, timeout int) (n int, err error) {
r0, _, e1 := Syscall(SYS_POLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(timeout))
n = int(r0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}

View File

@ -53,6 +53,17 @@ func openat(dirfd int, path string, flags int, mode uint32) (fd int, err error)
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func ppoll(fds *PollFd, nfds int, timeout *Timespec, sigmask *Sigset_t) (n int, err error) {
r0, _, e1 := Syscall6(SYS_PPOLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(unsafe.Pointer(timeout)), uintptr(unsafe.Pointer(sigmask)), 0, 0)
n = int(r0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func readlinkat(dirfd int, path string, buf []byte) (n int, err error) {
var _p0 *byte
_p0, err = BytePtrFromString(path)
@ -1841,3 +1852,14 @@ func pipe2(p *[2]_C_int, flags int) (err error) {
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func poll(fds *PollFd, nfds int, timeout int) (n int, err error) {
r0, _, e1 := Syscall(SYS_POLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(timeout))
n = int(r0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}

View File

@ -53,6 +53,17 @@ func openat(dirfd int, path string, flags int, mode uint32) (fd int, err error)
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func ppoll(fds *PollFd, nfds int, timeout *Timespec, sigmask *Sigset_t) (n int, err error) {
r0, _, e1 := Syscall6(SYS_PPOLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(unsafe.Pointer(timeout)), uintptr(unsafe.Pointer(sigmask)), 0, 0)
n = int(r0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func readlinkat(dirfd int, path string, buf []byte) (n int, err error) {
var _p0 *byte
_p0, err = BytePtrFromString(path)
@ -1841,3 +1852,14 @@ func pipe2(p *[2]_C_int, flags int) (err error) {
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func poll(fds *PollFd, nfds int, timeout int) (n int, err error) {
r0, _, e1 := Syscall(SYS_POLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(timeout))
n = int(r0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}

View File

@ -53,6 +53,17 @@ func openat(dirfd int, path string, flags int, mode uint32) (fd int, err error)
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func ppoll(fds *PollFd, nfds int, timeout *Timespec, sigmask *Sigset_t) (n int, err error) {
r0, _, e1 := Syscall6(SYS_PPOLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(unsafe.Pointer(timeout)), uintptr(unsafe.Pointer(sigmask)), 0, 0)
n = int(r0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func readlinkat(dirfd int, path string, buf []byte) (n int, err error) {
var _p0 *byte
_p0, err = BytePtrFromString(path)
@ -1621,3 +1632,14 @@ func pipe2(p *[2]_C_int, flags int) (err error) {
}
return
}
// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
func poll(fds *PollFd, nfds int, timeout int) (n int, err error) {
r0, _, e1 := Syscall(SYS_POLL, uintptr(unsafe.Pointer(fds)), uintptr(nfds), uintptr(timeout))
n = int(r0)
if e1 != 0 {
err = errnoErr(e1)
}
return
}

View File

@ -595,6 +595,26 @@ const (
AT_SYMLINK_NOFOLLOW = 0x100
)
type PollFd struct {
Fd int32
Events int16
Revents int16
}
const (
POLLIN = 0x1
POLLPRI = 0x2
POLLOUT = 0x4
POLLRDHUP = 0x2000
POLLERR = 0x8
POLLHUP = 0x10
POLLNVAL = 0x20
)
type Sigset_t struct {
X__val [16]uint64
}
type Termios struct {
Iflag uint32
Oflag uint32

View File

@ -613,6 +613,26 @@ const (
AT_SYMLINK_NOFOLLOW = 0x100
)
type PollFd struct {
Fd int32
Events int16
Revents int16
}
const (
POLLIN = 0x1
POLLPRI = 0x2
POLLOUT = 0x4
POLLRDHUP = 0x2000
POLLERR = 0x8
POLLHUP = 0x10
POLLNVAL = 0x20
)
type Sigset_t struct {
X__val [16]uint64
}
type Termios struct {
Iflag uint32
Oflag uint32

View File

@ -575,6 +575,26 @@ const (
AT_SYMLINK_NOFOLLOW = 0x100
)
type PollFd struct {
Fd int32
Events int16
Revents int16
}
const (
POLLIN = 0x1
POLLPRI = 0x2
POLLOUT = 0x4
POLLRDHUP = 0x2000
POLLERR = 0x8
POLLHUP = 0x10
POLLNVAL = 0x20
)
type Sigset_t struct {
X__val [16]uint64
}
type Termios struct {
Iflag uint32
Oflag uint32

View File

@ -592,6 +592,26 @@ const (
AT_SYMLINK_NOFOLLOW = 0x100
)
type PollFd struct {
Fd int32
Events int16
Revents int16
}
const (
POLLIN = 0x1
POLLPRI = 0x2
POLLOUT = 0x4
POLLRDHUP = 0x2000
POLLERR = 0x8
POLLHUP = 0x10
POLLNVAL = 0x20
)
type Sigset_t struct {
X__val [16]uint64
}
type Termios struct {
Iflag uint32
Oflag uint32

View File

@ -596,6 +596,26 @@ const (
AT_SYMLINK_NOFOLLOW = 0x100
)
type PollFd struct {
Fd int32
Events int16
Revents int16
}
const (
POLLIN = 0x1
POLLPRI = 0x2
POLLOUT = 0x4
POLLRDHUP = 0x2000
POLLERR = 0x8
POLLHUP = 0x10
POLLNVAL = 0x20
)
type Sigset_t struct {
X__val [16]uint64
}
type Termios struct {
Iflag uint32
Oflag uint32

View File

@ -596,6 +596,26 @@ const (
AT_SYMLINK_NOFOLLOW = 0x100
)
type PollFd struct {
Fd int32
Events int16
Revents int16
}
const (
POLLIN = 0x1
POLLPRI = 0x2
POLLOUT = 0x4
POLLRDHUP = 0x2000
POLLERR = 0x8
POLLHUP = 0x10
POLLNVAL = 0x20
)
type Sigset_t struct {
X__val [16]uint64
}
type Termios struct {
Iflag uint32
Oflag uint32

View File

@ -602,6 +602,26 @@ const (
AT_SYMLINK_NOFOLLOW = 0x100
)
type PollFd struct {
Fd int32
Events int16
Revents int16
}
const (
POLLIN = 0x1
POLLPRI = 0x2
POLLOUT = 0x4
POLLRDHUP = 0x2000
POLLERR = 0x8
POLLHUP = 0x10
POLLNVAL = 0x20
)
type Sigset_t struct {
X__val [16]uint64
}
type Termios struct {
Iflag uint32
Oflag uint32

View File

@ -602,6 +602,26 @@ const (
AT_SYMLINK_NOFOLLOW = 0x100
)
type PollFd struct {
Fd int32
Events int16
Revents int16
}
const (
POLLIN = 0x1
POLLPRI = 0x2
POLLOUT = 0x4
POLLRDHUP = 0x2000
POLLERR = 0x8
POLLHUP = 0x10
POLLNVAL = 0x20
)
type Sigset_t struct {
X__val [16]uint64
}
type Termios struct {
Iflag uint32
Oflag uint32

Some files were not shown because too many files have changed in this diff Show More