go-ceph/rados/watcher.go
John Mulligan ec0b9a1455 rados: watcher apis are now stable
Watcher apis are now stable as per the stability plan.

Signed-off-by: John Mulligan <jmulligan@redhat.com>
2022-06-14 16:01:43 +00:00

368 lines
9.7 KiB
Go

package rados
/*
#cgo LDFLAGS: -lrados
#include <stdlib.h>
#include <rados/librados.h>
extern void watchNotifyCb(void*, uint64_t, uint64_t, uint64_t, void*, size_t);
extern void watchErrorCb(void*, uint64_t, int);
*/
import "C"
import (
"encoding/binary"
"fmt"
"math"
"sync"
"time"
"unsafe"
"github.com/ceph/go-ceph/internal/log"
)
type (
// WatcherID is the unique id of a Watcher.
WatcherID uint64
// NotifyID is the unique id of a NotifyEvent.
NotifyID uint64
// NotifierID is the unique id of a notifying client.
NotifierID uint64
)
// NotifyEvent is received by a watcher for each notification.
type NotifyEvent struct {
ID NotifyID
WatcherID WatcherID
NotifierID NotifierID
Data []byte
}
// NotifyAck represents an acknowleged notification.
type NotifyAck struct {
WatcherID WatcherID
NotifierID NotifierID
Response []byte
}
// NotifyTimeout represents an unacknowleged notification.
type NotifyTimeout struct {
WatcherID WatcherID
NotifierID NotifierID
}
// Watcher receives all notifications for certain object.
type Watcher struct {
id WatcherID
oid string
ioctx *IOContext
events chan NotifyEvent
errors chan error
done chan struct{}
}
var (
watchers = map[WatcherID]*Watcher{}
watchersMtx sync.RWMutex
)
// Watch creates a Watcher for the specified object.
//
// A Watcher receives all notifications that are sent to the object on which it
// has been created. It exposes two read-only channels: Events() receives all
// the NotifyEvents and Errors() receives all occuring errors. A typical code
// creating a Watcher could look like this:
//
// watcher, err := ioctx.Watch(oid)
// go func() { // event handler
// for ne := range watcher.Events() {
// ...
// ne.Ack([]byte("response data..."))
// ...
// }
// }()
// go func() { // error handler
// for err := range watcher.Errors() {
// ... handle err ...
// }
// }()
//
// CAUTION: the Watcher references the IOContext in which it has been created.
// Therefore all watchers must be deleted with the Delete() method before the
// IOContext is being destroyed.
//
// Implements:
// int rados_watch2(rados_ioctx_t io, const char* o, uint64_t* cookie,
// rados_watchcb2_t watchcb, rados_watcherrcb_t watcherrcb, void* arg)
func (ioctx *IOContext) Watch(obj string) (*Watcher, error) {
return ioctx.WatchWithTimeout(obj, 0)
}
// WatchWithTimeout creates a watcher on an object. Same as Watcher(), but
// different timeout than the default can be specified.
//
// Implements:
// int rados_watch3(rados_ioctx_t io, const char *o, uint64_t *cookie,
// rados_watchcb2_t watchcb, rados_watcherrcb_t watcherrcb, uint32_t timeout,
// void *arg);
func (ioctx *IOContext) WatchWithTimeout(oid string, timeout time.Duration) (*Watcher, error) {
cObj := C.CString(oid)
defer C.free(unsafe.Pointer(cObj))
var id C.uint64_t
watchersMtx.Lock()
defer watchersMtx.Unlock()
ret := C.rados_watch3(
ioctx.ioctx,
cObj,
&id,
(C.rados_watchcb2_t)(C.watchNotifyCb),
(C.rados_watcherrcb_t)(C.watchErrorCb),
C.uint32_t(timeout.Milliseconds()/1000),
nil,
)
if err := getError(ret); err != nil {
return nil, err
}
evCh := make(chan NotifyEvent)
errCh := make(chan error)
w := &Watcher{
id: WatcherID(id),
ioctx: ioctx,
oid: oid,
events: evCh,
errors: errCh,
done: make(chan struct{}),
}
watchers[WatcherID(id)] = w
return w, nil
}
// ID returns the WatcherId of the Watcher
func (w *Watcher) ID() WatcherID {
return w.id
}
// Events returns a read-only channel, that receives all notifications that are
// sent to the object of the Watcher.
func (w *Watcher) Events() <-chan NotifyEvent {
return w.events
}
// Errors returns a read-only channel, that receives all errors for the Watcher.
func (w *Watcher) Errors() <-chan error {
return w.errors
}
// Check on the status of a Watcher.
//
// Returns the time since it was last confirmed. If there is an error, the
// Watcher is no longer valid, and should be destroyed with the Delete() method.
//
// Implements:
// int rados_watch_check(rados_ioctx_t io, uint64_t cookie)
func (w *Watcher) Check() (time.Duration, error) {
ret := C.rados_watch_check(w.ioctx.ioctx, C.uint64_t(w.id))
if ret < 0 {
return 0, getError(ret)
}
return time.Millisecond * time.Duration(ret), nil
}
// Delete the watcher. This closes both the event and error channel.
//
// Implements:
// int rados_unwatch2(rados_ioctx_t io, uint64_t cookie)
func (w *Watcher) Delete() error {
watchersMtx.Lock()
_, ok := watchers[w.id]
if ok {
delete(watchers, w.id)
}
watchersMtx.Unlock()
if !ok {
return nil
}
ret := C.rados_unwatch2(w.ioctx.ioctx, C.uint64_t(w.id))
if ret != 0 {
return getError(ret)
}
close(w.done) // unblock blocked callbacks
close(w.events)
close(w.errors)
return nil
}
// Notify sends a notification with the provided data to all Watchers of the
// specified object.
//
// CAUTION: even if the error is not nil. the returned slices
// might still contain data.
func (ioctx *IOContext) Notify(obj string, data []byte) ([]NotifyAck, []NotifyTimeout, error) {
return ioctx.NotifyWithTimeout(obj, data, 0)
}
// NotifyWithTimeout is like Notify() but with a different timeout than the
// default.
//
// Implements:
// int rados_notify2(rados_ioctx_t io, const char* o, const char* buf, int buf_len,
// uint64_t timeout_ms, char** reply_buffer, size_t* reply_buffer_len)
func (ioctx *IOContext) NotifyWithTimeout(obj string, data []byte, timeout time.Duration) ([]NotifyAck,
[]NotifyTimeout, error) {
cObj := C.CString(obj)
defer C.free(unsafe.Pointer(cObj))
var cResponse *C.char
defer C.rados_buffer_free(cResponse)
var responseLen C.size_t
var dataPtr *C.char
if len(data) > 0 {
dataPtr = (*C.char)(unsafe.Pointer(&data[0]))
}
ret := C.rados_notify2(
ioctx.ioctx,
cObj,
dataPtr,
C.int(len(data)),
C.uint64_t(timeout.Milliseconds()),
&cResponse,
&responseLen,
)
// cResponse has been set even if an error is returned, so we decode it anyway
acks, timeouts := decodeNotifyResponse(cResponse, responseLen)
return acks, timeouts, getError(ret)
}
// Ack sends an acknowledgement with the specified response data to the notfier
// of the NotifyEvent. If a notify is not ack'ed, the originating Notify() call
// blocks and eventiually times out.
//
// Implements:
// int rados_notify_ack(rados_ioctx_t io, const char *o, uint64_t notify_id,
// uint64_t cookie, const char *buf, int buf_len)
func (ne *NotifyEvent) Ack(response []byte) error {
watchersMtx.RLock()
w, ok := watchers[ne.WatcherID]
watchersMtx.RUnlock()
if !ok {
return fmt.Errorf("can't ack on deleted watcher %v", ne.WatcherID)
}
cOID := C.CString(w.oid)
defer C.free(unsafe.Pointer(cOID))
var respPtr *C.char
if len(response) > 0 {
respPtr = (*C.char)(unsafe.Pointer(&response[0]))
}
ret := C.rados_notify_ack(
w.ioctx.ioctx,
cOID,
C.uint64_t(ne.ID),
C.uint64_t(ne.WatcherID),
respPtr,
C.int(len(response)),
)
return getError(ret)
}
// WatcherFlush flushes all pending notifications of the cluster.
//
// Implements:
// int rados_watch_flush(rados_t cluster)
func (c *Conn) WatcherFlush() error {
if !c.connected {
return ErrNotConnected
}
ret := C.rados_watch_flush(c.cluster)
return getError(ret)
}
// decoder for this notify response format:
// le32 num_acks
// {
// le64 gid global id for the client (for client.1234 that's 1234)
// le64 cookie cookie for the client
// le32 buflen length of reply message buffer
// u8 buflen payload
// } num_acks
// le32 num_timeouts
// {
// le64 gid global id for the client
// le64 cookie cookie for the client
// } num_timeouts
//
// NOTE: starting with pacific this is implemented as a C function and this can
// be replaced later
func decodeNotifyResponse(response *C.char, len C.size_t) ([]NotifyAck, []NotifyTimeout) {
if len == 0 || response == nil {
return nil, nil
}
b := (*[math.MaxInt32]byte)(unsafe.Pointer(response))[:len:len]
pos := 0
num := binary.LittleEndian.Uint32(b[pos:])
pos += 4
acks := make([]NotifyAck, num)
for i := range acks {
acks[i].NotifierID = NotifierID(binary.LittleEndian.Uint64(b[pos:]))
pos += 8
acks[i].WatcherID = WatcherID(binary.LittleEndian.Uint64(b[pos:]))
pos += 8
dataLen := binary.LittleEndian.Uint32(b[pos:])
pos += 4
if dataLen > 0 {
acks[i].Response = C.GoBytes(unsafe.Pointer(&b[pos]), C.int(dataLen))
pos += int(dataLen)
}
}
num = binary.LittleEndian.Uint32(b[pos:])
pos += 4
timeouts := make([]NotifyTimeout, num)
for i := range timeouts {
timeouts[i].NotifierID = NotifierID(binary.LittleEndian.Uint64(b[pos:]))
pos += 8
timeouts[i].WatcherID = WatcherID(binary.LittleEndian.Uint64(b[pos:]))
pos += 8
}
return acks, timeouts
}
//export watchNotifyCb
func watchNotifyCb(_ unsafe.Pointer, notifyID C.uint64_t, id C.uint64_t,
notifierID C.uint64_t, cData unsafe.Pointer, dataLen C.size_t) {
ev := NotifyEvent{
ID: NotifyID(notifyID),
WatcherID: WatcherID(id),
NotifierID: NotifierID(notifierID),
}
if dataLen > 0 {
ev.Data = C.GoBytes(cData, C.int(dataLen))
}
watchersMtx.RLock()
w, ok := watchers[WatcherID(id)]
watchersMtx.RUnlock()
if !ok {
// usually this should not happen, but who knows
log.Warnf("received notification for unknown watcher ID: %#v", ev)
return
}
select {
case <-w.done: // unblock when deleted
case w.events <- ev:
}
}
//export watchErrorCb
func watchErrorCb(_ unsafe.Pointer, id C.uint64_t, err C.int) {
watchersMtx.RLock()
w, ok := watchers[WatcherID(id)]
watchersMtx.RUnlock()
if !ok {
// usually this should not happen, but who knows
log.Warnf("received error for unknown watcher ID: id=%d err=%#v", id, err)
return
}
select {
case <-w.done: // unblock when deleted
case w.errors <- getError(err):
}
}