Index: configure.in =================================================================== --- configure.in (revision 26859) +++ configure.in (working copy) @@ -1671,6 +1671,7 @@ plugins/agentx/Makefile plugins/artnet/Makefile plugins/asn1/Makefile + plugins/ceph/Makefile plugins/ciscosm/Makefile plugins/docsis/Makefile plugins/enttec/Makefile Index: Makefile.am =================================================================== --- Makefile.am (revision 26859) +++ Makefile.am (working copy) @@ -243,6 +243,7 @@ -dlopen plugins/agentx/agentx.la \ -dlopen plugins/artnet/artnet.la \ -dlopen plugins/asn1/asn1.la \ + -dlopen plugins/ceph/ceph.la \ -dlopen plugins/ciscosm/ciscosm.la \ -dlopen plugins/docsis/docsis.la \ -dlopen plugins/enttec/enttec.la \ Index: plugins/ceph/Makefile.common =================================================================== --- plugins/ceph/Makefile.common (revision 0) +++ plugins/ceph/Makefile.common (revision 0) @@ -0,0 +1,31 @@ +# Makefile.common for Cisco SS7 Session Management plugin +# Contains the stuff from Makefile.am and Makefile.nmake that is +# a) common to both files and +# b) portable between both files +# +# $Id: Makefile.common 18197 2006-05-21 05:12:17Z sahlberg $ +# +# Wireshark - Network traffic analyzer +# By Gerald Combs +# Copyright 1998 Gerald Combs +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +# the name of the plugin +PLUGIN_NAME = ceph + +# the dissector sources (without any helpers) +DISSECTOR_SRC = \ + packet-ceph.c Index: plugins/ceph/packet-ceph.c =================================================================== --- plugins/ceph/packet-ceph.c (revision 0) +++ plugins/ceph/packet-ceph.c (revision 0) @@ -0,0 +1,1155 @@ +/* packet-ceph.c +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License +* as published by the Free Software Foundation; either version 2 +* of the License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include + +// this is needed for ceph_fs to compile in userland +#ifdef _MSC_VER +typedef char __s8; +typedef short __s16; +typedef int __s32; +typedef __int64 __s64; +typedef unsigned char __u8; +typedef unsigned short __u16; +typedef unsigned int __u32; +typedef unsigned __int64 __u64; +typedef __u16 __le16; +typedef __u32 __le32; +typedef __u64 __le64; +#define __attribute__(x) +#define O_ACCMODE (O_RDONLY | O_RDWR | O_WRONLY) +#include +#else +#include +#include +#endif +typedef int bool; + +#define le16_to_cpu(x) (x) +#define le32_to_cpu(x) (x) +#define le64_to_cpu(x) (x) + +#include +#include + +#ifdef _MSC_VER +#pragma pack(1) +#endif +#include "ceph_fs.h" +#ifdef _MSC_VER +#pragma pack() +#endif + + + +#include + +#define PROTO_TAG_CEPH "CEPH" + +/* Wireshark ID of the CEPH protocol */ +static int proto_ceph = -1; + + + +/* These are the handles of our subdissectors */ +static dissector_handle_t data_handle=NULL; + +static dissector_handle_t ceph_handle; +static void dissect_ceph(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree); + +static guint32 global_ceph_min_port = 12340; +static guint32 global_ceph_max_port = 12369; + +static guint32 global_ceph_min_mon_port = 12340; +static guint32 global_ceph_max_mon_port = 12349; +static guint32 global_ceph_min_mds_port = 12350; +static guint32 global_ceph_max_mds_port = 12359; +static guint32 global_ceph_min_osd_port = 12360; +static guint32 global_ceph_max_osd_port = 12369; + +#define DEST_PORT_CEPH ((pinfo->destport >= global_ceph_min_port) && (pinfo->destport <= global_ceph_max_port)) + +#define PORT_IS_MON(port) ((port >= global_ceph_min_mon_port) && (port <= global_ceph_max_mon_port)) +#define PORT_IS_MDS(port) ((port >= global_ceph_min_mds_port) && (port <= global_ceph_max_mds_port)) +#define PORT_IS_OSD(port) ((port >= global_ceph_min_osd_port) && (port <= global_ceph_max_osd_port)) + +#define IS_ENTITY(cmp, port1, port2) (cmp(port1) || cmp(port2)) + +#define IS_MON(pinfo) IS_ENTITY(PORT_IS_MON, pinfo->srcport, pinfo->destport) +#define IS_MDS(pinfo) IS_ENTITY(PORT_IS_MDS, pinfo->srcport, pinfo->destport) +#define IS_OSD(pinfo) IS_ENTITY(PORT_IS_OSD, pinfo->srcport, pinfo->destport) + +#define MON_STR "mon" +#define MDS_STR "mds" +#define OSD_STR "osd" + +#define FMT_INO "0x%.16llx" + +#define PROTO_ADD_TEXT(type, s, field, modifier)\ + proto_tree_add_text(tree, tvb, offset + offsetof(type, field), sizeof(s->field), "" #field ": " modifier, s->field); + +#define CTIME_BUF_LEN 128 + +#define PROTO_ADD_TIME(tvb, tree, type, offset, head, field, name) \ + do { \ + time_t time; \ + time = head->field.tv_sec; \ + proto_tree_add_text(tree, tvb, offset + offsetof(type, field), \ + sizeof(head->field), "" #name ": %s (%d ns)", ctime(&time), head->field.tv_nsec); \ + } while (0) + +static const value_string packettypenames[] = { + { 1, "Shutdown" }, + { 2, "Ping" }, + { 4, "Mon Map" }, + { 5, "Mon Get Map" }, + { 10, "Client Mount" }, + { 11, "Client Unmount" }, + { 12, "Statfs" }, + { 13, "Statfs Reply" }, + { 20, "MDS Get Map" }, + { 21, "MDS Map" }, + { 22, "Client Session" }, + { 23, "Client Reconnect" }, + { 24, "Client Request" }, + { 25, "Client Request Forward" }, + { 26, "Client Reply" }, + { 0x310, "Client Caps" }, + { 0x311, "Client Lease" }, + { 0x312, "Client Snap" }, + { 40, "OSD Get Map" }, + { 41, "OSD Map" }, + { 42, "OSD Op" }, + { 43, "OSD Op Reply" }, + { 0, NULL } +}; + +#define ACK_MSG_SIZE 5 +#define TVB_MSG_HEADER_POS(x) (1 + offsetof(struct ceph_msg_header, x)) +#define TVB_IS_ACK(ofs) (tvb_get_guint8(tvb, ofs) == CEPH_MSGR_TAG_ACK) +#define TVB_MSG_FIELD(func, tvb, ofs, field) func(tvb, ofs + (TVB_IS_ACK(ofs) ? ACK_MSG_SIZE : 0) + TVB_MSG_HEADER_POS(field)) + +/* The following hf_* variables are used to hold the Wireshark IDs of +* our header fields; they are filled out when we call +* proto_register_field_array() in proto_register_ceph() +*/ +/** Kts attempt at defining the protocol */ +static gint hf_ceph = -1; +static gint hf_ceph_mds_op = -1; +static gint hf_ceph_header = -1; +static gint hf_ceph_banner = -1; +static gint hf_ceph_entity_addr = -1; +static gint hf_ceph_entity_type = -1; +static gint hf_ceph_entity_num = -1; +static gint hf_ceph_fsid = -1; +static gint hf_ceph_banner_magic = -1; +static gint hf_ceph_banner_version = -1; +static gint hf_ceph_connect_erank = -1; +static gint hf_ceph_connect_nonce = -1; +static gint hf_ceph_sockaddr_in = -1; +static gint hf_ceph_connect_host_type = -1; +static gint hf_ceph_connect_tag = -1; +static gint hf_ceph_connect_global_seq = -1; +static gint hf_ceph_connect_connect_seq = -1; +static gint hf_ceph_connect_flags = -1; +static gint hf_ceph_length = -1; +static gint hf_ceph_type = -1; +static gint hf_ceph_text = -1; +static gint hf_ceph_path = -1; +static gint hf_sin_family = -1; +static gint hf_sin_port = -1; +static gint hf_sin_addr = -1; +static gint hf_ceph_hdr_tag = -1; +static gint hf_ceph_hdr_seq_ack = -1; +static gint hf_ceph_hdr_seq = -1; +static gint hf_ceph_hdr_type = -1; +static gint hf_ceph_hdr_priority = -1; +static gint hf_ceph_hdr_mon_protocol = -1; +static gint hf_ceph_hdr_osd_protocol = -1; +static gint hf_ceph_hdr_mds_protocol = -1; +static gint hf_ceph_hdr_client_protocol = -1; +static gint hf_ceph_hdr_front_len = -1; +static gint hf_ceph_hdr_data_off = -1; +static gint hf_ceph_hdr_data_len = -1; +static gint hf_ceph_data = -1; +static gint hf_ceph_front = -1; +static gint hf_ceph_hdr_src = -1; +static gint hf_ceph_hdr_orig_src = -1; +static gint hf_ceph_hdr_dst = -1; +static gint hf_ceph_hdr_crc = -1; +static gint hf_ceph_footer = -1; +static gint hf_ceph_footer_flags = -1; +static gint hf_ceph_footer_front_crc = -1; +static gint hf_ceph_footer_data_crc = -1; + + +/* These are the ids of the subtrees that we may be creating */ +static gint ett_ceph = -1; +static gint ett_ceph_header = -1; +static gint ett_ceph_banner = -1; +static gint ett_ceph_entity_addr = -1; +static gint ett_ceph_length = -1; +static gint ett_ceph_type = -1; +static gint ett_ceph_text = -1; +static gint ett_ceph_front = -1; +static gint ett_ceph_data = -1; +static gint ett_ceph_footer = -1; + + +void proto_reg_handoff_ceph(void) +{ + static gboolean initialized=FALSE; + static guint32 port; + + if (!initialized) { + data_handle = find_dissector("data"); + ceph_handle = create_dissector_handle(dissect_ceph, proto_ceph); + for (port = global_ceph_min_port; port <= global_ceph_max_port; port++) + dissector_add("tcp.port", port, ceph_handle); + } + +} + +void proto_register_ceph (void) +{ + /* A header field is something you can search/filter on. + * + * We create a structure to register our fields. It consists of an + * array of hf_register_info structures, each of which are of the format + * {&(field id), {name, abbrev, type, display, strings, bitmask, blurb, HFILL}}. + */ + static hf_register_info hf[] = { + { &hf_ceph, + { "Data", "ceph.data", FT_NONE, BASE_NONE, NULL, 0x0, + "CEPH PDU", HFILL }}, + { &hf_ceph_header, + { "Header", "ceph.header", FT_NONE, BASE_NONE, NULL, 0x0, + "CEPH Header", HFILL }}, + { &hf_ceph_banner, + { "Ceph Banner", "ceph.connect.banner", FT_STRING, BASE_NONE, NULL, 0x0, + "Ceph Banner", HFILL }}, + { &hf_ceph_entity_type, + { "Ceph Entity Type", "ceph.entity.type", FT_UINT32, BASE_DEC, NULL, 0x0, + "Ceph Entity Type", HFILL }}, + { &hf_ceph_entity_num, + { "Ceph Entity Num", "ceph.entity.num", FT_UINT32, BASE_DEC, NULL, 0x0, + "Ceph Entity Num", HFILL }}, + { &hf_ceph_entity_addr, + { "Ceph Entity Addr", "ceph.entity.addr", FT_NONE, BASE_NONE, NULL, 0x0, + "Ceph Entity Addr", HFILL }}, + { &hf_ceph_fsid, + { "Ceph FSID", "ceph.fsid", FT_NONE, BASE_NONE, NULL, 0x0, + "Ceph FSID", HFILL }}, + { &hf_ceph_banner_magic, + { "Ceph Banner Magic", "ceph.connect.banner.magic", FT_STRING, BASE_NONE, NULL, 0x0, + "Ceph Banner Magic", HFILL }}, + { &hf_ceph_banner_version, + { "Ceph Banner Version", "ceph.connect.banner.ver", FT_STRING, BASE_NONE, NULL, 0x0, + "Ceph Banner", HFILL }}, + { &hf_ceph_connect_erank, + { "erank", "ceph.connect.erank", FT_UINT32, BASE_HEX, NULL, 0x0, + "connect: erank", HFILL }}, + { &hf_ceph_connect_nonce, + { "nonce", "ceph.connect.nonce", FT_UINT32, BASE_HEX, NULL, 0x0, + "connect: nonce", HFILL }}, + { &hf_ceph_sockaddr_in, + { "sockaddr_in", "ceph.sockaddr_in", FT_NONE, BASE_NONE, NULL, 0x0, + "sockaddr_in", HFILL }}, + { &hf_sin_family, + { "sin_family", "ceph.sin_family", FT_UINT16, BASE_HEX, NULL, 0x0, + "sockaddr_in: sin_family", HFILL }}, + { &hf_sin_port, + { "sin_port", "ceph.sin_port", FT_UINT16, BASE_DEC, NULL, 0x0, + "sockaddr_in: sin_port", HFILL }}, + { &hf_sin_addr, + { "ip addr", "ceph.addr", FT_IPv4, BASE_NONE, NULL, 0x0, + "sockaddr_in: ip addr", HFILL }}, + { &hf_ceph_connect_host_type, + { "host_type", "ceph.connect.host_type", FT_UINT32, BASE_DEC, NULL, 0x0, + "connect: host_type", HFILL }}, + { &hf_ceph_connect_tag, + { "tag", "ceph.connect.tag", FT_UINT8, BASE_DEC, NULL, 0x0, + "connect: tag", HFILL }}, + { &hf_ceph_mds_op, + { "mds op", "ceph.mds.op", FT_UINT32, BASE_HEX, NULL, 0x0, + "ceph: mds op", HFILL }}, + { &hf_ceph_connect_global_seq, + { "global_seq", "ceph.connect.global_seq", FT_UINT32, BASE_DEC, NULL, 0x0, + "connect: global_seq", HFILL }}, + { &hf_ceph_connect_connect_seq, + { "connect_seq", "ceph.connect.connect_seq", FT_UINT32, BASE_DEC, NULL, 0x0, + "connect: connect_seq", HFILL }}, + { &hf_ceph_connect_flags, + { "flags", "ceph.connect.flags", FT_UINT8, BASE_HEX, NULL, 0x0, + "connect: flags", HFILL }}, + { &hf_ceph_length, + { "Package Length", "ceph.len", FT_UINT32, BASE_DEC, NULL, 0x0, + "Package Length", HFILL }}, + { &hf_ceph_type, + { "Type", "ceph.type", FT_UINT8, BASE_DEC, VALS(packettypenames), 0x0, + "Package Type", HFILL }}, + { &hf_ceph_text, + { "Text", "ceph.text", FT_STRING, BASE_NONE, NULL, 0x0, + "Text", HFILL }}, + { &hf_ceph_path, + { "path", "ceph.path", FT_STRING, BASE_NONE, NULL, 0x0, + "path", HFILL }}, + { &hf_ceph_hdr_tag, + { "tag", "ceph.tag", FT_UINT8, BASE_DEC, NULL, 0x0, + "hdr: tag", HFILL }}, + { &hf_ceph_hdr_seq_ack, + { "ack seq", "ceph.ack.seq", FT_UINT32, BASE_DEC, NULL, 0x0, + "ack: seq", HFILL }}, + { &hf_ceph_hdr_seq, + { "seq", "ceph.seq", FT_UINT64, BASE_DEC, NULL, 0x0, + "hdr: seq", HFILL }}, + { &hf_ceph_hdr_type, + { "type", "ceph.type", FT_UINT16, BASE_HEX, NULL, 0x0, + "hdr: type", HFILL }}, + { &hf_ceph_hdr_priority, + { "priority", "ceph.priority", FT_UINT16, BASE_DEC, NULL, 0x0, + "hdr: priority", HFILL }}, + { &hf_ceph_hdr_mon_protocol, + { "mon_protocol", "ceph.mon_protocol", FT_UINT16, BASE_DEC, NULL, 0x0, + "hdr: mon_protocol", HFILL }}, + { &hf_ceph_hdr_osd_protocol, + { "osd_protocol", "ceph.osd_protocol", FT_UINT16, BASE_DEC, NULL, 0x0, + "hdr: osd_protocol", HFILL }}, + { &hf_ceph_hdr_mds_protocol, + { "mds_protocol", "ceph.mds_protocol", FT_UINT16, BASE_DEC, NULL, 0x0, + "hdr: mds_protocol", HFILL }}, + { &hf_ceph_hdr_client_protocol, + { "client_protocol", "ceph.client_protocol", FT_UINT16, BASE_DEC, NULL, 0x0, + "hdr: client_protocol", HFILL }}, + { &hf_ceph_hdr_front_len, + { "front_len", "ceph.front_len", FT_UINT32, BASE_DEC, NULL, 0x0, + "hdr: front_len", HFILL }}, + { &hf_ceph_hdr_data_off, + { "data_off", "ceph.data_off", FT_UINT32, BASE_DEC, NULL, 0x0, + "hdr: data_off", HFILL }}, + { &hf_ceph_hdr_data_len, + { "data_len", "ceph.data_len", FT_UINT32, BASE_DEC, NULL, 0x0, + "hdr: data_len", HFILL }}, + { &hf_ceph_hdr_src, + { "src", "ceph.src", FT_NONE, BASE_NONE, NULL, 0x0, + "hdr: src", HFILL }}, + { &hf_ceph_hdr_orig_src, + { "orig_src", "ceph.orig_src", FT_NONE, BASE_NONE, NULL, 0x0, + "hdr: orig_src", HFILL }}, + { &hf_ceph_hdr_dst, + { "dst", "ceph.dst", FT_NONE, BASE_NONE, NULL, 0x0, + "hdr: dst", HFILL }}, + { &hf_ceph_hdr_crc, + { "crc", "ceph.crc", FT_UINT32, BASE_HEX, NULL, 0x0, + "hdr: crc", HFILL }}, + { &hf_ceph_front, + { "Front", "ceph.front", FT_NONE, BASE_NONE, NULL, 0x0, + "Ceph Front", HFILL }}, + { &hf_ceph_data, + { "Data", "ceph.data", FT_NONE, BASE_HEX, NULL, 0x0, + "Ceph Data", HFILL }}, + { &hf_ceph_footer, + { "Footer", "ceph.footer", FT_NONE, BASE_HEX, NULL, 0x0, + "Ceph Footer", HFILL }}, + { &hf_ceph_footer_flags, + { "flags", "ceph.footer.flags", FT_UINT32, BASE_HEX, NULL, 0x0, + "footer: flags", HFILL }}, + { &hf_ceph_footer_front_crc, + { "front_crc", "ceph.footer.front_crc", FT_UINT32, BASE_HEX, NULL, 0x0, + "footer: front_crc", HFILL }}, + { &hf_ceph_footer_data_crc, + { "data_crc", "ceph.footer.data_crc", FT_UINT32, BASE_HEX, NULL, 0x0, + "footer: data_crc", HFILL }}, + }; + static gint *ett[] = { + &ett_ceph, + &ett_ceph_header, + &ett_ceph_banner, + &ett_ceph_length, + &ett_ceph_entity_addr, + &ett_ceph_type, + &ett_ceph_text, + &ett_ceph_data, + &ett_ceph_front, + &ett_ceph_footer + }; + //if (proto_ceph == -1) { /* execute protocol initialization only once */ + proto_ceph = proto_register_protocol ("CEPH Protocol", "CEPH", "ceph"); + + proto_register_field_array (proto_ceph, hf, array_length (hf)); + proto_register_subtree_array (ett, array_length (ett)); + register_dissector("ceph", dissect_ceph, proto_ceph); + //} +} + +static guint32 dissect_sockaddr_in(tvbuff_t *tvb, proto_tree *tree, guint32 offset) +{ + proto_tree *ceph_sockaddr_tree = NULL; + proto_item *ceph_sub_item = NULL; + proto_item *ceph_item = proto_tree_get_parent(tree); + + ceph_sockaddr_tree = proto_item_add_subtree(ceph_item, ett_ceph); + + ceph_sub_item = proto_tree_add_item( tree, hf_ceph_sockaddr_in, tvb, offset, 16, TRUE ); + ceph_sockaddr_tree = proto_item_add_subtree(ceph_sub_item, ett_ceph); + + proto_tree_add_item(ceph_sockaddr_tree, hf_sin_family, tvb, offset, 2, TRUE); + proto_tree_add_item(ceph_sockaddr_tree, hf_sin_port, tvb, offset+2, 2, TRUE); + proto_tree_add_item(ceph_sockaddr_tree, hf_sin_addr, tvb, offset+4, 4, FALSE); + offset += 16; + return offset; +} + +static guint32 dissect_ceph_banner(tvbuff_t *tvb, proto_tree *tree, guint32 offset) +{ + proto_tree *ceph_banner_tree = NULL; + proto_item *ceph_sub_item = NULL; + + ceph_sub_item = proto_tree_add_item( tree, hf_ceph_banner, tvb, offset, 8, TRUE ); + ceph_banner_tree = proto_item_add_subtree(ceph_sub_item, ett_ceph); + + proto_tree_add_item(ceph_banner_tree, hf_ceph_banner_magic, tvb, offset, 4, TRUE); + proto_tree_add_item(ceph_banner_tree, hf_ceph_banner_version, tvb, offset+4, 4, TRUE); + + return offset+9; +} + +static guint32 dissect_ceph_entity_addr(tvbuff_t *tvb, proto_tree *tree, guint32 offset) +{ + proto_tree *ceph_entity_tree = NULL; + proto_item *ceph_sub_item = NULL; + + ceph_sub_item = proto_tree_add_item( tree, hf_ceph_entity_addr, tvb, offset, sizeof(struct ceph_entity_addr), TRUE ); + ceph_entity_tree = proto_item_add_subtree(ceph_sub_item, ett_ceph); + proto_tree_add_item(ceph_entity_tree, hf_ceph_connect_erank, tvb, offset, 4, TRUE); + proto_tree_add_item(ceph_entity_tree, hf_ceph_connect_nonce, tvb, offset+4, 4, TRUE); + offset = dissect_sockaddr_in(tvb, ceph_entity_tree, offset+8); +#if 0 + proto_tree_add_item(ceph_entity_tree, hf_ceph_connect_host_type, tvb, offset, 4, TRUE); + offset += 4; +#endif + + return offset; +} + +static guint32 dissect_ceph_fsid(tvbuff_t *tvb, proto_tree *tree, guint32 offset) +{ + proto_tree *ceph_entity_tree = NULL; + proto_item *ceph_sub_item = NULL; + struct ceph_fsid *fsid; + + fsid = (struct ceph_fsid *)tvb_get_ptr(tvb, offset, sizeof(struct ceph_fsid)); + + ceph_sub_item = proto_tree_add_item( tree, hf_ceph_fsid, tvb, offset, sizeof(struct ceph_entity_addr), TRUE ); + ceph_entity_tree = proto_item_add_subtree(ceph_sub_item, ett_ceph); + + proto_tree_add_item(ceph_entity_tree, hf_ceph_connect_erank, tvb, offset, 4, TRUE); + proto_tree_add_item(ceph_entity_tree, hf_ceph_connect_nonce, tvb, offset+4, 4, TRUE); + + proto_tree_add_text(tree, tvb, offsetof(struct ceph_fsid, major), + sizeof(fsid->major), "major: " FMT_INO, fsid->major); + proto_tree_add_text(tree, tvb, offsetof(struct ceph_fsid, minor), + sizeof(fsid->minor), "minor: " FMT_INO, fsid->minor); + + offset += sizeof(struct ceph_fsid); + + return offset; +} + +static guint32 dissect_ceph_entity_inst(tvbuff_t *tvb, proto_tree *tree, guint32 offset) +{ + proto_tree *ceph_entity_tree = NULL; + proto_item *ceph_sub_item = NULL; + + ceph_sub_item = proto_tree_add_item( tree, hf_ceph_entity_addr, tvb, offset, sizeof(struct ceph_entity_addr), TRUE ); + ceph_entity_tree = proto_item_add_subtree(ceph_sub_item, ett_ceph); + proto_tree_add_item(ceph_entity_tree, hf_ceph_entity_type, tvb, offset, 4, TRUE); + proto_tree_add_item(ceph_entity_tree, hf_ceph_entity_num, tvb, offset+4, 4, TRUE); + offset += 8; + offset = dissect_ceph_entity_addr(tvb, ceph_entity_tree, offset); + return offset; +} + +static guint32 dissect_ceph_footer(tvbuff_t *tvb, proto_tree *tree, guint32 offset) +{ + proto_tree *ceph_footer_tree = NULL; + proto_item *ceph_sub_item = NULL; + + ceph_sub_item = proto_tree_add_item( tree, hf_ceph_footer, tvb, offset, sizeof(struct ceph_msg_footer), TRUE ); + ceph_footer_tree = proto_item_add_subtree(ceph_sub_item, ett_ceph); + proto_tree_add_item(ceph_footer_tree, hf_ceph_footer_flags, tvb, offset, 4, TRUE); + proto_tree_add_item(ceph_footer_tree, hf_ceph_footer_front_crc, tvb, offset+4, 4, TRUE); + proto_tree_add_item(ceph_footer_tree, hf_ceph_footer_data_crc, tvb, offset+8, 4, TRUE); + offset += 12; + return offset; +} + +static guint32 dissect_ceph_client_connect(tvbuff_t *tvb, proto_tree *tree, guint32 offset) +{ + proto_tree *ceph_header_tree = NULL; + proto_item *ceph_sub_item = NULL; + proto_item *ceph_item = proto_tree_get_parent(tree); + + offset = dissect_ceph_banner(tvb, tree, offset); + + ceph_header_tree = proto_item_add_subtree(ceph_item, ett_ceph); + + ceph_sub_item = proto_tree_add_item( tree, hf_ceph_header, tvb, offset, -1, TRUE ); + ceph_header_tree = proto_item_add_subtree(ceph_sub_item, ett_ceph); + + offset = dissect_ceph_entity_addr(tvb, ceph_header_tree, offset); + proto_tree_add_item(ceph_header_tree, hf_ceph_connect_host_type, tvb, offset, 4, TRUE); + offset += 4; + proto_tree_add_item(ceph_header_tree, hf_ceph_connect_global_seq, tvb, offset, 4, TRUE); + proto_tree_add_item(ceph_header_tree, hf_ceph_connect_connect_seq, tvb, offset+4, 4, TRUE); + proto_tree_add_item(ceph_header_tree, hf_ceph_connect_flags, tvb, offset+8, 1, TRUE); + offset += 8; + return offset; +} + +static guint32 dissect_ceph_server_connect(tvbuff_t *tvb, proto_tree *tree, guint32 offset) +{ + proto_tree *ceph_header_tree = NULL; + proto_item *ceph_sub_item = NULL; + proto_item *ceph_item = proto_tree_get_parent(tree); + + offset = dissect_ceph_banner(tvb, tree, offset); + + ceph_header_tree = proto_item_add_subtree(ceph_item, ett_ceph); + + ceph_sub_item = proto_tree_add_item( tree, hf_ceph_header, tvb, offset, -1, TRUE ); + ceph_header_tree = proto_item_add_subtree(ceph_sub_item, ett_ceph); + + offset = dissect_ceph_entity_addr(tvb, ceph_header_tree, offset); + + proto_tree_add_item(ceph_header_tree, hf_ceph_hdr_tag, tvb, offset, 1, TRUE); + offset += 1; + proto_tree_add_item(ceph_header_tree, hf_ceph_connect_global_seq, tvb, offset, 4, TRUE); + proto_tree_add_item(ceph_header_tree, hf_ceph_connect_connect_seq, tvb, offset+4, 4, TRUE); + proto_tree_add_item(ceph_header_tree, hf_ceph_connect_flags, tvb, offset+8, 1, TRUE); + return offset; +} + +static guint32 dissect_ceph_file_layout(tvbuff_t *tvb, proto_tree *tree, guint32 offset) +{ + guint32 orig_ofs = offset; + struct ceph_file_layout *lo; + + lo = (struct ceph_file_layout *)tvb_get_ptr(tvb, offset, sizeof(struct ceph_file_layout)); + + PROTO_ADD_TEXT(struct ceph_file_layout, lo, fl_stripe_unit, "%d"); + PROTO_ADD_TEXT(struct ceph_file_layout, lo, fl_stripe_count, "%d"); + PROTO_ADD_TEXT(struct ceph_file_layout, lo, fl_object_size, "%d"); + PROTO_ADD_TEXT(struct ceph_file_layout, lo, fl_cas_hash, "%d"); + PROTO_ADD_TEXT(struct ceph_file_layout, lo, fl_object_stripe_unit, "%d"); + PROTO_ADD_TEXT(struct ceph_file_layout, lo, fl_pg_preferred, "%d"); + PROTO_ADD_TEXT(struct ceph_file_layout, lo, fl_pg_type, "%u"); + PROTO_ADD_TEXT(struct ceph_file_layout, lo, fl_pg_size, "%u"); + PROTO_ADD_TEXT(struct ceph_file_layout, lo, fl_pg_pool, "%u"); + + return orig_ofs + sizeof(struct ceph_mds_reply_head); +} + +static int dissect_ceph_filepath(tvbuff_t *tvb, proto_tree *tree, guint32 offset, char **path, guint64 *ino) +{ + guint32 len; + const char *p = NULL; + + *ino = tvb_get_letoh64(tvb, offset); + proto_tree_add_text(tree, tvb, offset, sizeof(*ino), "inode: " FMT_INO, *ino); + offset += sizeof(*ino); + len = tvb_get_letohl(tvb, offset); + proto_tree_add_text(tree, tvb, offset, sizeof(len), "len: %d", len); + offset += sizeof(len); + + if (len) { + p = tvb_get_ptr(tvb, offset, len); + *path = malloc(len+1); + if (*path) { + memcpy(*path, p, len); + (*path)[len] = '\0'; + proto_tree_add_item(tree, hf_ceph_path, tvb, offset, len, TRUE); + } + } + + offset += len; + + return offset; +} + +static guint32 dissect_ceph_mon_statfs(tvbuff_t *tvb, proto_tree *tree, guint32 offset) +{ + struct ceph_mon_statfs *req; + + req = (struct ceph_mon_statfs *)tvb_get_ptr(tvb, offset, sizeof(struct ceph_mon_statfs)); + + dissect_ceph_fsid(tvb, tree, offset + offsetof(struct ceph_mon_statfs, fsid)); + PROTO_ADD_TEXT(struct ceph_mon_statfs, req, tid, "%lld"); + + return offset + sizeof(struct ceph_mon_statfs); +} + +static guint32 dissect_ceph_mon_statfs_reply(tvbuff_t *tvb, proto_tree *tree, guint32 offset) +{ + struct ceph_mon_statfs_reply *req; + + req = (struct ceph_mon_statfs_reply *)tvb_get_ptr(tvb, offset, sizeof(struct ceph_mon_statfs_reply)); + + dissect_ceph_fsid(tvb, tree, offset + offsetof(struct ceph_mon_statfs_reply, fsid)); + PROTO_ADD_TEXT(struct ceph_mon_statfs_reply, req, tid, "%lld"); + PROTO_ADD_TEXT(struct ceph_mon_statfs_reply, req, st.f_total, "%lld"); + PROTO_ADD_TEXT(struct ceph_mon_statfs_reply, req, st.f_free, "%lld"); + PROTO_ADD_TEXT(struct ceph_mon_statfs_reply, req, st.f_avail, "%lld"); + PROTO_ADD_TEXT(struct ceph_mon_statfs_reply, req, st.f_objects, "%lld"); + + return offset + sizeof(struct ceph_mon_statfs_reply); +} + +static guint32 dissect_ceph_client_osd_getmap(tvbuff_t *tvb, proto_tree *tree, guint32 offset) +{ + struct ceph_osd_getmap *req; + + req = (struct ceph_osd_getmap *)tvb_get_ptr(tvb, offset, sizeof(struct ceph_osd_getmap)); + + dissect_ceph_fsid(tvb, tree, offset + offsetof(struct ceph_osd_getmap, fsid)); + PROTO_ADD_TEXT(struct ceph_osd_getmap, req, start, "%d"); + + return offset + sizeof(struct ceph_osd_getmap); +} + +static guint32 dissect_ceph_client_mds_getmap(tvbuff_t *tvb, proto_tree *tree, guint32 offset) +{ + struct ceph_mds_getmap *req; + + req = (struct ceph_mds_getmap *)tvb_get_ptr(tvb, offset, sizeof(struct ceph_mds_getmap)); + + dissect_ceph_fsid(tvb, tree, offset + offsetof(struct ceph_mds_getmap, fsid)); + PROTO_ADD_TEXT(struct ceph_mds_getmap, req, want, "%d"); + + return offset + sizeof(struct ceph_mds_getmap); +} + +static guint32 dissect_ceph_client_mds_request(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset) +{ + struct ceph_mds_request_head *head; + proto_item *item; + + head = (struct ceph_mds_request_head *)tvb_get_ptr(tvb, offset, sizeof(struct ceph_mds_request_head)); + + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, tid, "%lld"); + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, oldest_client_tid, "%lld"); + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, mdsmap_epoch, "%d"); + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, num_fwd, "%d"); + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, retry_attempt, "%d"); + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, mds_wants_replica_in_dirino, "%lld"); + + item = proto_tree_add_item(tree, hf_ceph_mds_op, tvb, offset+offsetof(struct ceph_mds_request_head, op), sizeof(head->op), TRUE); + proto_item_append_text(item, " (%s)", ceph_mds_op_name(head->op)); + + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, caller_uid, "%d"); + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, caller_gid, "%d"); + + if (check_col(pinfo->cinfo, COL_INFO)) { + col_append_fstr(pinfo->cinfo, COL_INFO, " (%s)", ceph_mds_op_name(head->op)); + } + + switch (head->op) { + case CEPH_MDS_OP_FINDINODE: + break; + case CEPH_MDS_OP_STAT: + case CEPH_MDS_OP_LSTAT: + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, args.stat.mask, "0x%.4x"); + break; + case CEPH_MDS_OP_LUTIME: + case CEPH_MDS_OP_UTIME: +#define MDS_REQ_ADD_TIME(field) PROTO_ADD_TIME(tvb, tree, struct ceph_mds_request_head, offset, head, args.utime.field, field) + MDS_REQ_ADD_TIME(mtime); + MDS_REQ_ADD_TIME(atime); + MDS_REQ_ADD_TIME(ctime); + break; + case CEPH_MDS_OP_CHMOD: + case CEPH_MDS_OP_LCHMOD: + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, args.chmod.mode, "0%.5o"); + break; + case CEPH_MDS_OP_CHOWN: + case CEPH_MDS_OP_LCHOWN: + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, args.chown.uid, "%d"); + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, args.chown.gid, "%d"); + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, args.chown.mask, "0x%.4x"); + break; + case CEPH_MDS_OP_LSETLAYOUT: + dissect_ceph_file_layout(tvb, tree, offset + offsetof(struct ceph_mds_request_head, args.setlayout.layout)); + break; + case CEPH_MDS_OP_SETXATTR: + case CEPH_MDS_OP_LSETXATTR: + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, args.setxattr.flags, "0x%.4x"); + break; + case CEPH_MDS_OP_READDIR: + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, args.readdir.frag, "%d"); + break; + case CEPH_MDS_OP_MKNOD: + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, args.mknod.mode, "0%.5o"); + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, args.mknod.rdev, "%d"); + break; + case CEPH_MDS_OP_OPEN: + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, args.open.flags, "%x"); + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, args.open.mode, "0%.5o"); + break; + case CEPH_MDS_OP_LTRUNCATE: + case CEPH_MDS_OP_TRUNCATE: + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, args.truncate.length, "%lld"); + break; + case CEPH_MDS_OP_MKDIR: + PROTO_ADD_TEXT(struct ceph_mds_request_head, head, args.mkdir.mode, "0%.5o"); + break; + case CEPH_MDS_OP_RMXATTR: + case CEPH_MDS_OP_LRMXATTR: + case CEPH_MDS_OP_LINK: + case CEPH_MDS_OP_UNLINK: + case CEPH_MDS_OP_RENAME: + case CEPH_MDS_OP_RMDIR: + case CEPH_MDS_OP_SYMLINK: + case CEPH_MDS_OP_FSYNC: + case CEPH_MDS_OP_LSSNAP: + case CEPH_MDS_OP_MKSNAP: + case CEPH_MDS_OP_RMSNAP: + break; + } + + offset += sizeof(struct ceph_mds_request_head); + + if (head->op == CEPH_MDS_OP_FINDINODE) { + + } else { + guint64 ino1, ino2; + char *s1 = NULL, *s2 = NULL; + + offset = dissect_ceph_filepath(tvb, tree, offset, &s1, &ino1); + offset = dissect_ceph_filepath(tvb, tree, offset, &s2, &ino2); + + if (check_col(pinfo->cinfo, COL_INFO)) { + if (s1) + col_append_fstr(pinfo->cinfo, COL_INFO, " %s", s1); + if (s2) + col_append_fstr(pinfo->cinfo, COL_INFO, " -> %s", s2); + } + + + + } + + return offset; +} + +static guint32 dissect_ceph_client_mds_reply(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset) +{ + guint32 orig_ofs = offset; + struct ceph_mds_reply_head *head; + + head = (struct ceph_mds_reply_head *)tvb_get_ptr(tvb, offset, sizeof(struct ceph_mds_reply_head)); + + PROTO_ADD_TEXT(struct ceph_mds_reply_head, head, tid, "%lld"); + + proto_tree_add_text(tree, tvb, offsetof(struct ceph_mds_reply_head, op), + sizeof(head->op), "op: %d (%s)", head->op, ceph_mds_op_name(head->op)); + + PROTO_ADD_TEXT(struct ceph_mds_reply_head, head, result, "%d"); + PROTO_ADD_TEXT(struct ceph_mds_reply_head, head, file_caps, "%d"); + PROTO_ADD_TEXT(struct ceph_mds_reply_head, head, file_caps_seq, "%d"); + PROTO_ADD_TEXT(struct ceph_mds_reply_head, head, file_caps_mseq, "%d"); + PROTO_ADD_TEXT(struct ceph_mds_reply_head, head, mdsmap_epoch, "%d"); + + + if (check_col(pinfo->cinfo, COL_INFO)) { + col_append_fstr(pinfo->cinfo, COL_INFO, " (%s)", ceph_mds_op_name(head->op)); + } + + return orig_ofs + sizeof(struct ceph_mds_reply_head); +} + +static guint32 dissect_ceph_client_mds_lease_request(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset) +{ + guint32 orig_ofs = offset; + struct ceph_mds_lease *head; + static char *lease_action[] = { "", "revoke", "release", "renew" }; + + head = (struct ceph_mds_lease *)tvb_get_ptr(tvb, offset, sizeof(struct ceph_mds_lease)); + + PROTO_ADD_TEXT(struct ceph_mds_lease, head, action, "%d"); + PROTO_ADD_TEXT(struct ceph_mds_lease, head, mask, "%.4x"); + PROTO_ADD_TEXT(struct ceph_mds_lease, head, ino, FMT_INO); + PROTO_ADD_TEXT(struct ceph_mds_lease, head, first, "%lld"); + PROTO_ADD_TEXT(struct ceph_mds_lease, head, last, "%lld"); + + if (check_col(pinfo->cinfo, COL_INFO)) { + if (head->action < 4) { + col_append_fstr(pinfo->cinfo, COL_INFO, " (%s)", lease_action[head->action]); + } + } + + return orig_ofs + sizeof(struct ceph_mds_lease); +} + +static guint32 dissect_ceph_client_mds_caps_request(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset) +{ + guint32 orig_ofs = offset; + struct ceph_mds_caps *head; + + head = (struct ceph_mds_caps *)tvb_get_ptr(tvb, offset, sizeof(struct ceph_mds_caps)); + + PROTO_ADD_TEXT(struct ceph_mds_caps, head, op, "%d"); + PROTO_ADD_TEXT(struct ceph_mds_caps, head, ino, FMT_INO); + PROTO_ADD_TEXT(struct ceph_mds_caps, head, seq, "%d"); + PROTO_ADD_TEXT(struct ceph_mds_caps, head, caps, "%d"); + PROTO_ADD_TEXT(struct ceph_mds_caps, head, wanted, "%d"); + PROTO_ADD_TEXT(struct ceph_mds_caps, head, size, "%llu"); + PROTO_ADD_TEXT(struct ceph_mds_caps, head, max_size, "%llu"); + PROTO_ADD_TEXT(struct ceph_mds_caps, head, truncate_seq, "%lld"); + PROTO_ADD_TEXT(struct ceph_mds_caps, head, migrate_seq, "%d"); + PROTO_ADD_TEXT(struct ceph_mds_caps, head, time_warp_seq, "%llu"); + PROTO_ADD_TEXT(struct ceph_mds_caps, head, snap_follows, "%llu"); + PROTO_ADD_TEXT(struct ceph_mds_caps, head, snap_trace_len, "%d"); + +#define CAPS_REQ_ADD_TIME(field) PROTO_ADD_TIME(tvb, tree, struct ceph_mds_caps, offset, head, field, field) + CAPS_REQ_ADD_TIME(mtime); + CAPS_REQ_ADD_TIME(atime); + CAPS_REQ_ADD_TIME(ctime); + + if (check_col(pinfo->cinfo, COL_INFO)) { + col_append_fstr(pinfo->cinfo, COL_INFO, " (%s)", ceph_cap_op_name(head->op)); + } + + return orig_ofs + sizeof(struct ceph_mds_caps); +} + +static guint32 dissect_ceph_client_front(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset, guint16 type) +{ + switch (type) { + case CEPH_MSG_STATFS: + offset = dissect_ceph_mon_statfs(tvb, tree, offset); + break; + case CEPH_MSG_STATFS_REPLY: + offset = dissect_ceph_mon_statfs_reply(tvb, tree, offset); + break; + case CEPH_MSG_CLIENT_REQUEST: /* mds request */ + offset = dissect_ceph_client_mds_request(tvb, pinfo, tree, offset); + break; + case CEPH_MSG_CLIENT_REPLY: + offset = dissect_ceph_client_mds_reply(tvb, pinfo, tree, offset); + break; + case CEPH_MSG_CLIENT_LEASE: + offset = dissect_ceph_client_mds_lease_request(tvb, pinfo, tree, offset); + break; + case CEPH_MSG_CLIENT_CAPS: + offset = dissect_ceph_client_mds_caps_request(tvb, pinfo, tree, offset); + break; + case CEPH_MSG_OSD_GETMAP: + offset = dissect_ceph_client_osd_getmap(tvb, tree, offset); + break; + case CEPH_MSG_MDS_GETMAP: + offset = dissect_ceph_client_mds_getmap(tvb, tree, offset); + break; + default: + break; + } + return offset; +} + +static guint32 dissect_ceph_generic(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, guint32 offset) +{ + proto_tree *ceph_header_tree = NULL; + proto_item *ceph_sub_item = NULL; + proto_item *ceph_item = proto_tree_get_parent(tree); + guint32 front_len, data_len; + guint8 tag; + guint32 orig_ofs = offset; + guint16 type; + guint64 seq; + + ceph_header_tree = proto_item_add_subtree(ceph_item, ett_ceph); + + ceph_sub_item = proto_tree_add_item( tree, hf_ceph_header, tvb, offset, -1, TRUE ); + ceph_header_tree = proto_item_add_subtree(ceph_sub_item, ett_ceph); + + tag = tvb_get_guint8(tvb, offset); + + if (tag == CEPH_MSGR_TAG_ACK) { + proto_tree_add_item(ceph_header_tree, hf_ceph_hdr_tag, tvb, offset, 1, TRUE); + proto_tree_add_item(ceph_header_tree, hf_ceph_hdr_seq_ack, tvb, offset+1, 4, TRUE); + offset += ACK_MSG_SIZE; + } + + proto_tree_add_item(ceph_header_tree, hf_ceph_hdr_tag, tvb, offset, 1, TRUE); + proto_tree_add_item(ceph_header_tree, hf_ceph_hdr_seq, tvb, offset+1, 8, TRUE); + offset += 9; + proto_tree_add_item(ceph_header_tree, hf_ceph_hdr_type, tvb, offset, 2, TRUE); + offset += 2; + proto_tree_add_item(ceph_header_tree, hf_ceph_hdr_priority, tvb, offset, 2, TRUE); + proto_tree_add_item(ceph_header_tree, hf_ceph_hdr_mon_protocol, tvb, offset+2, 2, TRUE); + proto_tree_add_item(ceph_header_tree, hf_ceph_hdr_osd_protocol, tvb, offset+4, 2, TRUE); + proto_tree_add_item(ceph_header_tree, hf_ceph_hdr_mds_protocol, tvb, offset+6, 2, TRUE); + proto_tree_add_item(ceph_header_tree, hf_ceph_hdr_client_protocol, tvb, offset+8, 2, TRUE); + offset += 10; + proto_tree_add_item(ceph_header_tree, hf_ceph_hdr_front_len, tvb, offset, 4, TRUE); + proto_tree_add_item(ceph_header_tree, hf_ceph_hdr_data_off, tvb, offset+4, 4, TRUE); + proto_tree_add_item(ceph_header_tree, hf_ceph_hdr_data_len, tvb, offset+8, 4, TRUE); + offset += 12; + offset = dissect_ceph_entity_inst(tvb, ceph_header_tree, offset); + offset = dissect_ceph_entity_inst(tvb, ceph_header_tree, offset); + offset = dissect_ceph_entity_inst(tvb, ceph_header_tree, offset); + proto_tree_add_item(ceph_header_tree, hf_ceph_hdr_crc, tvb, offset, 4, TRUE); + offset += 4; + + front_len = TVB_MSG_FIELD(tvb_get_letohs, tvb, orig_ofs, front_len); + type = TVB_MSG_FIELD(tvb_get_letohl, tvb, orig_ofs, type); + + seq = TVB_MSG_FIELD(tvb_get_letoh64, tvb, orig_ofs, seq); + + if (front_len) { + /* ceph_sub_item = proto_tree_add_item( tree, hf_ceph_front, tvb, offset, front_len, TRUE ); + offset += front_len; */ + + offset = dissect_ceph_client_front(tvb, pinfo, tree, offset, type); +/* + } else { + ceph_sub_item = proto_tree_add_item( tree, hf_ceph_front, tvb, offset, front_len, TRUE ); + offset += front_len; + } */ + } + + data_len = TVB_MSG_FIELD(tvb_get_letohl, tvb, orig_ofs, data_len); + if (data_len) { + ceph_sub_item = proto_tree_add_item( tree, hf_ceph_data, tvb, offset, data_len, TRUE ); + offset += data_len; + } + + offset = dissect_ceph_footer(tvb, tree, offset); + + return offset; +} + +static void +dissect_ceph_client(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree) +{ + + proto_item *ceph_item = NULL; + proto_tree *ceph_tree = NULL; + guint16 type = 0; + const guchar *ptr; + guint32 pos = 0; + int have_banner = 0; + + if (check_col(pinfo->cinfo, COL_PROTOCOL)) + col_set_str(pinfo->cinfo, COL_PROTOCOL, PROTO_TAG_CEPH); + /* Clear out stuff in the info column */ + if(check_col(pinfo->cinfo,COL_INFO)){ + col_clear(pinfo->cinfo,COL_INFO); + } + + ptr = tvb_get_ptr(tvb, pos, 9); + if (ptr && memcmp(ptr, "ceph", 4) == 0) { + have_banner = 1; + pos += 9; + } + + // This is not a good way of dissecting packets. The tvb length should + // be sanity checked so we aren't going past the actual size of the buffer. + type = tvb_get_guint8( tvb, 4 ); // Get the type byte + + if (check_col(pinfo->cinfo, COL_INFO)) { + char *entity_str = NULL; + if (IS_MON(pinfo)) + entity_str = MON_STR; + else if (IS_MDS(pinfo)) + entity_str = MDS_STR; + else if (IS_OSD(pinfo)) + entity_str = OSD_STR; + else + entity_str = "???"; + if (have_banner) { + col_add_fstr(pinfo->cinfo, COL_INFO, "[%s] Connect Request", entity_str); + } else { + type = TVB_MSG_FIELD(tvb_get_letohl, tvb, 0, type); + col_add_fstr(pinfo->cinfo, COL_INFO, "[%s] %s", + entity_str, + val_to_str(type, packettypenames, "Unknown Type:0x%02x")); + } + } + + if (tree) { /* we are being asked for details */ + guint32 offset = 0; + + ceph_item = proto_tree_add_item(tree, proto_ceph, tvb, 0, -1, TRUE); + ceph_tree = proto_item_add_subtree(ceph_item, ett_ceph); + if (have_banner) { /* this is a connect message */ + offset = dissect_ceph_client_connect(tvb, ceph_tree, offset); + } else { + offset = dissect_ceph_generic(tvb, pinfo, ceph_tree, offset); + } + } +} + +static void +dissect_ceph_server(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree) +{ + + proto_item *ceph_item = NULL; + proto_tree *ceph_tree = NULL; + guint16 type = 0; + const guchar *ptr; + guint32 pos = 0; + int have_banner = 0; + + if (check_col(pinfo->cinfo, COL_PROTOCOL)) + col_set_str(pinfo->cinfo, COL_PROTOCOL, PROTO_TAG_CEPH); + /* Clear out stuff in the info column */ + if(check_col(pinfo->cinfo,COL_INFO)){ + col_clear(pinfo->cinfo,COL_INFO); + } + + ptr = tvb_get_ptr(tvb, pos, 9); + if (ptr && memcmp(ptr, "ceph", 4) == 0) { + have_banner = 1; + pos += 9; + } + + // This is not a good way of dissecting packets. The tvb length should + // be sanity checked so we aren't going past the actual size of the buffer. + type = tvb_get_guint8( tvb, 4 ); // Get the type byte + + if (check_col(pinfo->cinfo, COL_INFO)) { + char *entity_str = NULL; + if (IS_MON(pinfo)) + entity_str = MON_STR; + else if (IS_MDS(pinfo)) + entity_str = MDS_STR; + else if (IS_OSD(pinfo)) + entity_str = OSD_STR; + else + entity_str = "???"; + if (have_banner) { + col_add_fstr(pinfo->cinfo, COL_INFO, "[%s] Connect Response", entity_str); + } else { + type = TVB_MSG_FIELD(tvb_get_letohl, tvb, 0, type); + col_add_fstr(pinfo->cinfo, COL_INFO, "[%s] %s", + entity_str, + val_to_str(type, packettypenames, "Unknown Type:0x%02x")); + } + } + + if (tree) { /* we are being asked for details */ + guint32 offset = 0; + + ceph_item = proto_tree_add_item(tree, proto_ceph, tvb, 0, -1, TRUE); + ceph_tree = proto_item_add_subtree(ceph_item, ett_ceph); + + if (have_banner) { + offset = dissect_ceph_server_connect(tvb, ceph_tree, offset); + } else { + offset = dissect_ceph_generic(tvb, pinfo, ceph_tree, offset); + } + } +} + +static void +dissect_ceph_message(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree) +{ + if (DEST_PORT_CEPH) + dissect_ceph_client(tvb, pinfo, tree); + else + dissect_ceph_server(tvb, pinfo, tree); +} + +static guint dissect_ceph_acks(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree) +{ + guint32 offset = 0; + + if (check_col(pinfo->cinfo, COL_PROTOCOL)) + col_set_str(pinfo->cinfo, COL_PROTOCOL, PROTO_TAG_CEPH); + /* Clear out stuff in the info column */ + if(check_col(pinfo->cinfo,COL_INFO)){ + col_clear(pinfo->cinfo,COL_INFO); + col_add_fstr(pinfo->cinfo, COL_INFO, "Ack"); + } + if (tree) { + proto_tree_add_item(tree, proto_ceph, tvb, 0, 5, TRUE); + proto_tree_add_item(tree, hf_ceph_hdr_tag, tvb, offset, 1, TRUE); + proto_tree_add_item(tree, hf_ceph_hdr_seq_ack, tvb, offset+1, 4, TRUE); + offset += 5; + } + + return offset; +} + +/* determine PDU length of protocol ceph */ +static guint get_ceph_message_len(packet_info *pinfo, tvbuff_t *tvb, int offset) +{ + const char *ptr; + guint32 len; + guint32 pos = 0; + + ptr = tvb_get_ptr(tvb, offset, /* sizeof(CEPH_BANNER) */tvb->length-offset); + if (ptr && memcmp(ptr, "ceph", 4) == 0) { + if (DEST_PORT_CEPH) { + len = sizeof(CEPH_BANNER) - 1 + + sizeof(struct ceph_entity_addr) + + sizeof(struct ceph_msg_connect); + } else + len = sizeof(CEPH_BANNER) - 1 + + sizeof(struct ceph_entity_addr) + + sizeof(struct ceph_msg_connect_reply); + + return len; + } + + if (*ptr == CEPH_MSGR_TAG_ACK) + pos = ACK_MSG_SIZE; + + len = pos + (guint)1 + sizeof(struct ceph_msg_header) + + TVB_MSG_FIELD(tvb_get_letohl, tvb, offset, front_len) + + TVB_MSG_FIELD(tvb_get_letohl, tvb, offset, data_len) + + sizeof(struct ceph_msg_footer); + + if (!*ptr) + return 0; + return len; +} + + +static void dissect_ceph(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree) +{ + const char *ptr; + + ptr = tvb_get_ptr(tvb, 0, 6); + + if ((*ptr == CEPH_MSGR_TAG_MSG) || + (memcmp(ptr, CEPH_BANNER, 4) == 0) || + ((ptr[0] == CEPH_MSGR_TAG_ACK) && (ptr[5] == CEPH_MSGR_TAG_MSG)) + ) { + tcp_dissect_pdus(tvb, pinfo, tree, TRUE, TVB_MSG_HEADER_POS(src), + get_ceph_message_len, dissect_ceph_message); + } else { + dissect_ceph_acks(tvb, pinfo, tree); + } +} + Index: plugins/ceph/plugin.rc.in =================================================================== --- plugins/ceph/plugin.rc.in (revision 0) +++ plugins/ceph/plugin.rc.in (revision 0) @@ -0,0 +1,34 @@ +#include "winver.h" + +VS_VERSION_INFO VERSIONINFO + FILEVERSION @RC_MODULE_VERSION@ + PRODUCTVERSION @RC_VERSION@ + FILEFLAGSMASK 0x0L +#ifdef _DEBUG + FILEFLAGS VS_FF_PRERELEASE+VS_FF_DEBUG +#else + FILEFLAGS VS_FF_PRERELEASE +#endif + FILEOS VOS_NT_WINDOWS32 + FILETYPE VFT_DLL +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904b0" + BEGIN + VALUE "CompanyName", "The Wireshark developer community, http://www.wireshark.org/\0" + VALUE "FileDescription", "@PACKAGE@ dissector\0" + VALUE "FileVersion", "@MODULE_VERSION@\0" + VALUE "InternalName", "@PACKAGE@ @MODULE_VERSION@\0" + VALUE "LegalCopyright", "Copyright � 1998 Gerald Combs , Gilbert Ramirez and others\0" + VALUE "OriginalFilename", "@PLUGIN_NAME@.dll\0" + VALUE "ProductName", "Wireshark\0" + VALUE "ProductVersion", "@VERSION@\0" + VALUE "Comments", "Build with @MSVC_VARIANT@\0" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1200 + END +END Index: plugins/ceph/Makefile.nmake =================================================================== --- plugins/ceph/Makefile.nmake (revision 0) +++ plugins/ceph/Makefile.nmake (revision 0) @@ -0,0 +1,100 @@ +# Makefile.nmake +# nmake file for Wireshark plugin +# +# $Id: Makefile.nmake 24520 2008-03-01 12:31:01Z jake $ +# + +include ..\..\config.nmake +include moduleinfo.nmake + +include Makefile.common + +CFLAGS=/WX /DHAVE_CONFIG_H /I../.. /I../../wiretap $(GLIB_CFLAGS) \ + /I$(PCAP_DIR)\include -D_U_="" $(LOCAL_CFLAGS) + +.c.obj:: + $(CC) $(CFLAGS) -Fd.\ -c $< + +LDFLAGS = $(PLUGIN_LDFLAGS) + +!IFDEF ENABLE_LIBWIRESHARK +LINK_PLUGIN_WITH=..\..\epan\libwireshark.lib +CFLAGS=/DHAVE_WIN32_LIBWIRESHARK_LIB /D_NEED_VAR_IMPORT_ $(CFLAGS) + +DISSECTOR_OBJECTS = $(DISSECTOR_SRC:.c=.obj) + +DISSECTOR_SUPPORT_OBJECTS = $(DISSECTOR_SUPPORT_SRC:.c=.obj) + +OBJECTS = $(DISSECTOR_OBJECTS) $(DISSECTOR_SUPPORT_OBJECTS) plugin.obj + +RESOURCE=$(PLUGIN_NAME).res + +all: $(PLUGIN_NAME).dll + +$(PLUGIN_NAME).rc : moduleinfo.nmake + sed -e s/@PLUGIN_NAME@/$(PLUGIN_NAME)/ \ + -e s/@RC_MODULE_VERSION@/$(RC_MODULE_VERSION)/ \ + -e s/@RC_VERSION@/$(RC_VERSION)/ \ + -e s/@MODULE_VERSION@/$(MODULE_VERSION)/ \ + -e s/@PACKAGE@/$(PACKAGE)/ \ + -e s/@VERSION@/$(VERSION)/ \ + -e s/@MSVC_VARIANT@/$(MSVC_VARIANT)/ \ + < plugin.rc.in > $@ + +$(PLUGIN_NAME).dll $(PLUGIN_NAME).exp $(PLUGIN_NAME).lib : $(OBJECTS) $(LINK_PLUGIN_WITH) $(RESOURCE) + link -dll /out:$(PLUGIN_NAME).dll $(LDFLAGS) $(OBJECTS) $(LINK_PLUGIN_WITH) \ + $(GLIB_LIBS) $(RESOURCE) + +# +# Build plugin.c, which contains the plugin version[] string, a +# function plugin_register() that calls the register routines for all +# protocols, and a function plugin_reg_handoff() that calls the handoff +# registration routines for all protocols. +# +# We do this by scanning sources. If that turns out to be too slow, +# maybe we could just require every .o file to have an register routine +# of a given name (packet-aarp.o -> proto_register_aarp, etc.). +# +# Formatting conventions: The name of the proto_register_* routines an +# proto_reg_handoff_* routines must start in column zero, or must be +# preceded only by "void " starting in column zero, and must not be +# inside #if. +# +# DISSECTOR_SRC is assumed to have all the files that need to be scanned. +# +# For some unknown reason, having a big "for" loop in the Makefile +# to scan all the files doesn't work with some "make"s; they seem to +# pass only the first few names in the list to the shell, for some +# reason. +# +# Therefore, we have a script to generate the plugin.c file. +# The shell script runs slowly, as multiple greps and seds are run +# for each input file; this is especially slow on Windows. Therefore, +# if Python is present (as indicated by PYTHON being defined), we run +# a faster Python script to do that work instead. +# +# The first argument is the directory in which the source files live. +# The second argument is "plugin", to indicate that we should build +# a plugin.c file for a plugin. +# All subsequent arguments are the files to scan. +# +!IFDEF PYTHON +plugin.c: $(DISSECTOR_SRC) moduleinfo.h ../../tools/make-dissector-reg.py + @echo Making plugin.c (using python) + @$(PYTHON) "../../tools/make-dissector-reg.py" . plugin $(DISSECTOR_SRC) +!ELSE +plugin.c: $(DISSECTOR_SRC) moduleinfo.h ../../tools/make-dissector-reg + @echo Making plugin.c (using sh) + @$(SH) ../../tools/make-dissector-reg . plugin $(DISSECTOR_SRC) +!ENDIF + +!ENDIF + +clean: + rm -f $(OBJECTS) $(RESOURCE) plugin.c *.pdb \ + $(PLUGIN_NAME).dll $(PLUGIN_NAME).dll.manifest $(PLUGIN_NAME).lib \ + $(PLUGIN_NAME).exp $(PLUGIN_NAME).rc + +distclean: clean + +maintainer-clean: distclean Index: plugins/ceph/moduleinfo.h =================================================================== --- plugins/ceph/moduleinfo.h (revision 0) +++ plugins/ceph/moduleinfo.h (revision 0) @@ -0,0 +1,16 @@ +/* Included *after* config.h, in order to re-define these macros */ + +#ifdef PACKAGE +#undef PACKAGE +#endif + +/* Name of package */ +#define PACKAGE "ceph" + + +#ifdef VERSION +#undef VERSION +#endif + +/* Version number of package */ +#define VERSION "0.0.1" Index: plugins/ceph/Makefile.am =================================================================== --- plugins/ceph/Makefile.am (revision 0) +++ plugins/ceph/Makefile.am (revision 0) @@ -0,0 +1,126 @@ +# Makefile.am +# Automake file for Cisco SS7 Session Management plugin +# Copyright 2004, Duncan Sargeant +# +# $Id: Makefile.am 24488 2008-02-27 16:18:30Z stig $ +# +# Wireshark - Network traffic analyzer +# By Gerald Combs +# Copyright 1998 Gerald Combs +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + +INCLUDES = -I$(top_srcdir) -I$(includedir) + +include Makefile.common + + +if HAVE_WARNINGS_AS_ERRORS +AM_CFLAGS = -Werror +endif + +plugindir = @plugindir@ + +plugin_LTLIBRARIES = ceph.la +ceph_la_SOURCES = \ + plugin.c \ + moduleinfo.h \ + $(DISSECTOR_SRC) \ + $(DISSECTOR_INCLUDES) +ceph_la_LDFLAGS = -module -avoid-version +ceph_la_LIBADD = @PLUGIN_LIBS@ + +# Libs must be cleared, or else libtool won't create a shared module. +# If your module needs to be linked against any particular libraries, +# add them here. +LIBS = + +# +# Build plugin.c, which contains the plugin version[] string, a +# function plugin_register() that calls the register routines for all +# protocols, and a function plugin_reg_handoff() that calls the handoff +# registration routines for all protocols. +# +# We do this by scanning sources. If that turns out to be too slow, +# maybe we could just require every .o file to have an register routine +# of a given name (packet-aarp.o -> proto_register_aarp, etc.). +# +# Formatting conventions: The name of the proto_register_* routines an +# proto_reg_handoff_* routines must start in column zero, or must be +# preceded only by "void " starting in column zero, and must not be +# inside #if. +# +# DISSECTOR_SRC is assumed to have all the files that need to be scanned. +# +# For some unknown reason, having a big "for" loop in the Makefile +# to scan all the files doesn't work with some "make"s; they seem to +# pass only the first few names in the list to the shell, for some +# reason. +# +# Therefore, we have a script to generate the plugin.c file. +# The shell script runs slowly, as multiple greps and seds are run +# for each input file; this is especially slow on Windows. Therefore, +# if Python is present (as indicated by PYTHON being defined), we run +# a faster Python script to do that work instead. +# +# The first argument is the directory in which the source files live. +# The second argument is "plugin", to indicate that we should build +# a plugin.c file for a plugin. +# All subsequent arguments are the files to scan. +# +plugin.c: $(DISSECTOR_SRC) $(top_srcdir)/tools/make-dissector-reg \ + $(top_srcdir)/tools/make-dissector-reg.py + @if test -n "$(PYTHON)"; then \ + echo Making plugin.c with python ; \ + $(PYTHON) $(top_srcdir)/tools/make-dissector-reg.py $(srcdir) \ + plugin $(DISSECTOR_SRC) ; \ + else \ + echo Making plugin.c with shell script ; \ + $(top_srcdir)/tools/make-dissector-reg $(srcdir) \ + $(plugin_src) plugin $(DISSECTOR_SRC) ; \ + fi + +# +# Currently plugin.c can be included in the distribution because +# we always build all protocol dissectors. We used to have to check +# whether or not to build the snmp dissector. If we again need to +# variably build something, making plugin.c non-portable, uncomment +# the dist-hook line below. +# +# Oh, yuk. We don't want to include "plugin.c" in the distribution, as +# its contents depend on the configuration, and therefore we want it +# to be built when the first "make" is done; however, Automake insists +# on putting *all* source into the distribution. +# +# We work around this by having a "dist-hook" rule that deletes +# "plugin.c", so that "dist" won't pick it up. +# +#dist-hook: +# @rm -f $(distdir)/plugin.c + +CLEANFILES = \ + ceph \ + *~ + +MAINTAINERCLEANFILES = \ + Makefile.in \ + plugin.c + +EXTRA_DIST = \ + Makefile.common \ + Makefile.nmake \ + moduleinfo.nmake \ + plugin.rc.in Index: plugins/ceph/moduleinfo.nmake =================================================================== --- plugins/ceph/moduleinfo.nmake (revision 0) +++ plugins/ceph/moduleinfo.nmake (revision 0) @@ -0,0 +1,28 @@ +# +# $Id: moduleinfo.nmake 20158 2006-12-19 22:23:37Z jake $ +# + +# The name +PACKAGE=ceph + +# The version +MODULE_VERSION_MAJOR=0 +MODULE_VERSION_MINOR=0 +MODULE_VERSION_MICRO=1 +MODULE_VERSION_EXTRA=0 + +# +# The RC_VERSION should be comma-separated, not dot-separated, +# as per Graham Bloice's message in +# +# http://www.ethereal.com/lists/ethereal-dev/200303/msg00283.html +# +# "The RC_VERSION variable in config.nmake should be comma separated. +# This allows the resources to be built correctly and the version +# number to be correctly displayed in the explorer properties dialog +# for the executables, and XP's tooltip, rather than 0.0.0.0." +# + +MODULE_VERSION=$(MODULE_VERSION_MAJOR).$(MODULE_VERSION_MINOR).$(MODULE_VERSION_MICRO).$(MODULE_VERSION_EXTRA) +RC_MODULE_VERSION=$(MODULE_VERSION_MAJOR),$(MODULE_VERSION_MINOR),$(MODULE_VERSION_MICRO),$(MODULE_VERSION_EXTRA) + Index: plugins/ceph/ceph_fs.h =================================================================== --- plugins/ceph/ceph_fs.h (revision 0) +++ plugins/ceph/ceph_fs.h (revision 0) @@ -0,0 +1,1225 @@ +/* + * ceph_fs.h - Ceph constants and data types to share between kernel and + * user space. + * + * LGPL2 + */ + +#ifndef _FS_CEPH_CEPH_FS_H +#define _FS_CEPH_CEPH_FS_H + + +#define CEPH_MON_PORT 12345 + +/* + * Max file size is a policy choice; in reality we are limited + * by 2^64. + */ +#define CEPH_FILE_MAX_SIZE (1ULL << 40) /* 1 TB */ + +/* + * tcp connection banner. include a protocol version. and adjust + * whenever the wire protocol changes. try to keep this string length + * constant. + */ +#define CEPH_BANNER "ceph 008\n" +#define CEPH_BANNER_MAX_LEN 30 + +/* + * subprotocol versions. when specific messages types or high-level + * protocols change, bump the affected components. + */ +#define CEPH_OSD_PROTOCOL 3 +#define CEPH_MDS_PROTOCOL 2 +#define CEPH_MON_PROTOCOL 2 +#define CEPH_CLIENT_PROTOCOL 1 +#define CEPH_ONDISK_FORMAT 1 + +/* + * types in this file are defined as little-endian, and are + * primarily intended to describe data structures that pass + * over the wire or that are stored on disk. + */ + +/* + * some basics + */ +typedef __le64 ceph_version_t; +typedef __le64 ceph_tid_t; /* transaction id */ +typedef __le32 ceph_epoch_t; + + +/* + * fs id + */ +struct ceph_fsid { + __le64 major; + __le64 minor; +} __attribute__ ((packed)); + +static inline int ceph_fsid_equal(const struct ceph_fsid *a, + const struct ceph_fsid *b) +{ + return a->major == b->major && a->minor == b->minor; +} + + +/* + * ino, object, etc. + */ +#define CEPH_INO_ROOT 1 + +typedef __le64 ceph_snapid_t; +#define CEPH_MAXSNAP ((__u64)(-3)) +#define CEPH_SNAPDIR ((__u64)(-1)) +#define CEPH_NOSNAP ((__u64)(-2)) + +struct ceph_object { + union { + __u8 raw[20]; /* fits a sha1 hash */ + struct { + __le64 ino; /* inode "file" identifier */ + __le32 bno; /* "block" (object) in that "file" */ + __le64 snap; /* snapshot id. usually NOSNAP. */ + } __attribute__ ((packed)); + }; +} __attribute__ ((packed)); + +struct ceph_timespec { + __le32 tv_sec; + __le32 tv_nsec; +} __attribute__ ((packed)); + + +/* + * "Frags" are a way to describe a subset of a 32-bit number space, + * using a mask and a value to match against that mask. Any given frag + * (subset of the number space) can be partitioned into 2^n sub-frags. + * + * Frags are encoded into a 32-bit word: + * 8 upper bits = "bits" + * 24 lower bits = "value" + * (We could go to 5+27 bits, but who cares.) + * + * We use the _most_ significant bits of the 24 bit value. This makes + * values logically sort. + * + * Unfortunately, because the "bits" field is still in the high bits, we + * can't sort encoded frags numerically. However, it does allow you + * to feed encoded frags as values into frag_contains_value. + */ +static inline __u32 frag_make(__u32 b, __u32 v) +{ + return (b << 24) | + (v & (0xffffffu << (24-b)) & 0xffffffu); +} +static inline __u32 frag_bits(__u32 f) +{ + return f >> 24; +} +static inline __u32 frag_value(__u32 f) +{ + return f & 0xffffffu; +} +static inline __u32 frag_mask(__u32 f) +{ + return (0xffffffu << (24-frag_bits(f))) & 0xffffffu; +} +static inline __u32 frag_mask_shift(__u32 f) +{ + return 24 - frag_bits(f); +} + +static inline int frag_contains_value(__u32 f, __u32 v) +{ + return (v & frag_mask(f)) == frag_value(f); +} +static inline int frag_contains_frag(__u32 f, __u32 sub) +{ + /* is sub as specific as us, and contained by us? */ + return frag_bits(sub) >= frag_bits(f) && + (frag_value(sub) & frag_mask(f)) == frag_value(f); +} + +static inline __u32 frag_parent(__u32 f) +{ + return frag_make(frag_bits(f) - 1, + frag_value(f) & (frag_mask(f) << 1)); +} +static inline int frag_is_left_child(__u32 f) +{ + return frag_bits(f) > 0 && + (frag_value(f) & (0x1000000 >> frag_bits(f))) == 0; +} +static inline int frag_is_right_child(__u32 f) +{ + return frag_bits(f) > 0 && + (frag_value(f) & (0x1000000 >> frag_bits(f))) == 1; +} +static inline __u32 frag_sibling(__u32 f) +{ + return frag_make(frag_bits(f), + frag_value(f) ^ (0x1000000 >> frag_bits(f))); +} +static inline __u32 frag_left_child(__u32 f) +{ + return frag_make(frag_bits(f)+1, frag_value(f)); +} +static inline __u32 frag_right_child(__u32 f) +{ + return frag_make(frag_bits(f)+1, + frag_value(f) | (0x1000000 >> (1+frag_bits(f)))); +} +static inline __u32 frag_make_child(__u32 f, int by, int i) +{ + int newbits = frag_bits(f) + by; + return frag_make(newbits, + frag_value(f) | (i << (24 - newbits))); +} +static inline int frag_is_leftmost(__u32 f) +{ + return frag_value(f) == 0; +} +static inline int frag_is_rightmost(__u32 f) +{ + return frag_value(f) == frag_mask(f); +} +static inline __u32 frag_next(__u32 f) +{ + return frag_make(frag_bits(f), + frag_value(f) + (0x1000000 >> frag_bits(f))); +} + +/* + * comparator to sort frags logically, as when traversing the + * number space in ascending order... + */ +static inline int frag_compare(__u32 a, __u32 b) +{ + unsigned va = frag_value(a); + unsigned vb = frag_value(b); + if (va < vb) + return -1; + if (va > vb) + return 1; + va = frag_bits(a); + vb = frag_bits(b); + if (va < vb) + return -1; + if (va > vb) + return 1; + return 0; +} + +/* + * object layout - how objects are mapped into PGs + */ +#define CEPH_OBJECT_LAYOUT_HASH 1 +#define CEPH_OBJECT_LAYOUT_LINEAR 2 +#define CEPH_OBJECT_LAYOUT_HASHINO 3 + +/* + * pg layout -- how PGs are mapped onto (sets of) OSDs + */ +#define CEPH_PG_LAYOUT_CRUSH 0 +#define CEPH_PG_LAYOUT_HASH 1 +#define CEPH_PG_LAYOUT_LINEAR 2 +#define CEPH_PG_LAYOUT_HYBRID 3 + +/* + * ceph_file_layout - describe data layout for a file/inode + */ +struct ceph_file_layout { + /* file -> object mapping */ + __le32 fl_stripe_unit; /* stripe unit, in bytes. must be multiple + of page size. */ + __le32 fl_stripe_count; /* over this many objects */ + __le32 fl_object_size; /* until objects are this big, then move to + new objects */ + __le32 fl_cas_hash; /* 0 = none; 1 = sha256 */ + + /* pg -> disk layout */ + __le32 fl_object_stripe_unit; /* for per-object parity, if any */ + + /* object -> pg layout */ + __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */ + __u8 fl_pg_type; /* pg type; see PG_TYPE_* */ + __u8 fl_pg_size; /* pg size (num replicas, etc.) */ + __u8 fl_pg_pool; /* implies crush ruleset AND object namespace */ +} __attribute__ ((packed)); + +#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit)) +#define ceph_file_layout_stripe_count(l) \ + ((__s32)le32_to_cpu((l).fl_stripe_count)) +#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size)) +#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash)) +#define ceph_file_layout_object_su(l) \ + ((__s32)le32_to_cpu((l).fl_object_stripe_unit)) +#define ceph_file_layout_pg_preferred(l) \ + ((__s32)le32_to_cpu((l).fl_pg_preferred)) + +#define ceph_file_layout_stripe_width(l) (le32_to_cpu((l).fl_stripe_unit) * \ + le32_to_cpu((l).fl_stripe_count)) + +/* "period" == bytes before i start on a new set of objects */ +#define ceph_file_layout_period(l) (le32_to_cpu((l).fl_object_size) * \ + le32_to_cpu((l).fl_stripe_count)) + +/* + * placement group. + * we encode this into one __le64. + */ +#define CEPH_PG_TYPE_REP 1 +#define CEPH_PG_TYPE_RAID4 2 +union ceph_pg { + __u64 pg64; + struct { + __s16 preferred; /* preferred primary osd */ + __u16 ps; /* placement seed */ + __u8 __pad; + __u8 size; + __u8 pool; /* implies crush ruleset */ + __u8 type; + } pg; +} __attribute__ ((packed)); + +#define ceph_pg_is_rep(pg) ((pg).pg.type == CEPH_PG_TYPE_REP) +#define ceph_pg_is_raid4(pg) ((pg).pg.type == CEPH_PG_TYPE_RAID4) + +/* + * stable_mod func is used to control number of placement groups. + * similar to straight-up modulo, but produces a stable mapping as b + * increases over time. b is the number of bins, and bmask is the + * containing power of 2 minus 1. + * + * b <= bmask and bmask=(2**n)-1 + * e.g., b=12 -> bmask=15, b=123 -> bmask=127 + */ +static inline int ceph_stable_mod(int x, int b, int bmask) +{ + if ((x & bmask) < b) + return x & bmask; + else + return x & (bmask >> 1); +} + +/* + * object layout - how a given object should be stored. + */ +struct ceph_object_layout { + __le64 ol_pgid; /* raw pg, with _full_ ps precision. */ + __le32 ol_stripe_unit; +} __attribute__ ((packed)); + +/* + * compound epoch+version, used by storage layer to serialize mutations + */ +struct ceph_eversion { + ceph_epoch_t epoch; + __le64 version; +} __attribute__ ((packed)); + +/* + * osd map bits + */ + +/* status bits */ +#define CEPH_OSD_EXISTS 1 +#define CEPH_OSD_UP 2 +#define CEPH_OSD_CLEAN 4 /* as in, clean shutdown */ + +/* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ +#define CEPH_OSD_IN 0x10000 +#define CEPH_OSD_OUT 0 + + +/* + * string hash. + * + * taken from Linux, tho we should probably take care to use this one + * in case the upstream hash changes. + */ + +/* Name hashing routines. Initial hash value */ +/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ +#define ceph_init_name_hash() 0 + +/* partial hash update function. Assume roughly 4 bits per character */ +static inline unsigned long +ceph_partial_name_hash(unsigned long c, unsigned long prevhash) +{ + return (prevhash + (c << 4) + (c >> 4)) * 11; +} + +/* + * Finally: cut down the number of bits to a int value (and try to avoid + * losing bits) + */ +static inline unsigned long ceph_end_name_hash(unsigned long hash) +{ + return (unsigned int) hash; +} + +/* Compute the hash for a name string. */ +static inline unsigned int +ceph_full_name_hash(const char *name, unsigned int len) +{ + unsigned long hash = ceph_init_name_hash(); + while (len--) + hash = ceph_partial_name_hash(*name++, hash); + return ceph_end_name_hash(hash); +} + + + +/********************************************* + * message layer + */ + +/* + * entity_name + */ +struct ceph_entity_name { + __le32 type; + __le32 num; +} __attribute__ ((packed)); + +#define CEPH_ENTITY_TYPE_MON 1 +#define CEPH_ENTITY_TYPE_MDS 2 +#define CEPH_ENTITY_TYPE_OSD 3 +#define CEPH_ENTITY_TYPE_CLIENT 4 +#define CEPH_ENTITY_TYPE_ADMIN 5 + +/* used by message exchange protocol */ +#define CEPH_MSGR_TAG_READY 1 /* server->client: ready for messages */ +#define CEPH_MSGR_TAG_RESETSESSION 2 /* server->client: reset, try again */ +#define CEPH_MSGR_TAG_WAIT 3 /* server->client: wait for racing + incoming connection */ +#define CEPH_MSGR_TAG_RETRY_SESSION 4 /* server->client + cseq: try again + with higher cseq */ +#define CEPH_MSGR_TAG_RETRY_GLOBAL 5 /* server->client + gseq: try again + with higher gseq */ +#define CEPH_MSGR_TAG_CLOSE 6 /* closing pipe */ +#define CEPH_MSGR_TAG_MSG 10 /* message */ +#define CEPH_MSGR_TAG_ACK 11 /* message ack */ + + +/* + * entity_addr -- network address + */ +struct ceph_entity_addr { + __le32 erank; /* entity's rank in process */ + __le32 nonce; /* unique id for process (e.g. pid) */ + struct sockaddr_in ipaddr; +} __attribute__ ((packed)); + +static inline bool ceph_entity_addr_is_local(const struct ceph_entity_addr *a, + const struct ceph_entity_addr *b) +{ + return le32_to_cpu(a->nonce) == le32_to_cpu(b->nonce) && + a->ipaddr.sin_addr.s_addr == b->ipaddr.sin_addr.s_addr; +} + +static inline bool ceph_entity_addr_equal(const struct ceph_entity_addr *a, + const struct ceph_entity_addr *b) +{ + return memcmp(a, b, sizeof(*a)) == 0; +} + +struct ceph_entity_inst { + struct ceph_entity_name name; + struct ceph_entity_addr addr; +} __attribute__ ((packed)); + + +/* + * connection negotiation + */ +struct ceph_msg_connect { + __le32 host_type; /* CEPH_ENTITY_TYPE_* */ + __le32 global_seq; + __le32 connect_seq; + __u8 flags; +} __attribute__ ((packed)); + +struct ceph_msg_connect_reply { + __u8 tag; + __le32 global_seq; + __le32 connect_seq; + __u8 flags; +} __attribute__ ((packed)); + +#define CEPH_MSG_CONNECT_LOSSY 1 /* messages i send may be safely dropped */ + + +/* + * message header + */ +struct ceph_msg_header { + __le64 seq; /* message seq# for this session */ + __le16 type; /* message type */ + __le16 priority; /* priority. higher value == higher priority */ + __le16 mon_protocol, osd_protocol, mds_protocol, + client_protocol; /* protocol versions */ + __le32 front_len; /* bytes in main payload */ + __le32 data_off; /* sender: include full offset; + receiver: mask against ~PAGE_MASK */ + __le32 data_len; /* bytes of data payload */ + struct ceph_entity_inst src, orig_src, dst; + __le32 crc; /* header crc32c */ +} __attribute__ ((packed)); + +#define CEPH_MSG_PRIO_LOW 64 +#define CEPH_MSG_PRIO_DEFAULT 127 +#define CEPH_MSG_PRIO_HIGH 196 +#define CEPH_MSG_PRIO_HIGHEST 255 + +/* + * follows data payload + */ +struct ceph_msg_footer { + __le32 flags; + __le32 front_crc; + __le32 data_crc; +} __attribute__ ((packed)); + +#define CEPH_MSG_FOOTER_ABORTED (1<<0) /* drop this message */ +#define CEPH_MSG_FOOTER_NOCRC (1<<1) /* no data crc */ + + +/* + * message types + */ + +/* misc */ +#define CEPH_MSG_SHUTDOWN 1 +#define CEPH_MSG_PING 2 + +/* client <-> monitor */ +#define CEPH_MSG_MON_MAP 4 +#define CEPH_MSG_MON_GET_MAP 5 +#define CEPH_MSG_CLIENT_MOUNT 10 +#define CEPH_MSG_CLIENT_UNMOUNT 11 +#define CEPH_MSG_STATFS 12 +#define CEPH_MSG_STATFS_REPLY 13 + +/* client <-> mds */ +#define CEPH_MSG_MDS_GETMAP 20 +#define CEPH_MSG_MDS_MAP 21 + +#define CEPH_MSG_CLIENT_SESSION 22 +#define CEPH_MSG_CLIENT_RECONNECT 23 + +#define CEPH_MSG_CLIENT_REQUEST 24 +#define CEPH_MSG_CLIENT_REQUEST_FORWARD 25 +#define CEPH_MSG_CLIENT_REPLY 26 +#define CEPH_MSG_CLIENT_CAPS 0x310 +#define CEPH_MSG_CLIENT_LEASE 0x311 +#define CEPH_MSG_CLIENT_SNAP 0x312 + +/* osd */ +#define CEPH_MSG_OSD_GETMAP 40 +#define CEPH_MSG_OSD_MAP 41 +#define CEPH_MSG_OSD_OP 42 +#define CEPH_MSG_OSD_OPREPLY 43 + + +struct ceph_mon_statfs { + struct ceph_fsid fsid; + __le64 tid; +}; + +struct ceph_statfs { + __le64 f_total; + __le64 f_free; /* used = total - free (KB) */ + __le64 f_avail; /* usable */ + __le64 f_objects; +}; + +struct ceph_mon_statfs_reply { + struct ceph_fsid fsid; + __le64 tid; + struct ceph_statfs st; +}; + +struct ceph_osd_getmap { + struct ceph_fsid fsid; + __le32 start; +} __attribute__ ((packed)); + +struct ceph_mds_getmap { + struct ceph_fsid fsid; + __le32 want; +} __attribute__ ((packed)); + + +/* + * mds states + * > 0 -> in + * <= 0 -> out + */ +#define CEPH_MDS_STATE_DNE 0 /* down, does not exist. */ +#define CEPH_MDS_STATE_STOPPED -1 /* down, once existed, but no subtrees. + empty log. */ +#define CEPH_MDS_STATE_DESTROYING -2 /* down, existing, semi-destroyed. */ +#define CEPH_MDS_STATE_FAILED 3 /* down, needs to be recovered. */ + +#define CEPH_MDS_STATE_BOOT -4 /* up, boot announcement. */ +#define CEPH_MDS_STATE_STANDBY -5 /* up, idle. waiting for assignment. */ +#define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */ +#define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds. */ + +#define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */ +#define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed + operations (import, rename, etc.) */ +#define CEPH_MDS_STATE_RECONNECT 10 /* up, reconnect to clients */ +#define CEPH_MDS_STATE_REJOIN 11 /* up, rejoining distributed cache */ +#define CEPH_MDS_STATE_ACTIVE 12 /* up, active */ +#define CEPH_MDS_STATE_STOPPING 13 /* up, but exporting metadata */ + + +/* + * metadata lock types. + * - these are bitmasks.. we can compose them + * - they also define the lock ordering by the MDS + * - a few of these are internal to the mds + */ +#define CEPH_LOCK_DN 1 +#define CEPH_LOCK_ISNAP 2 +#define CEPH_LOCK_IVERSION 4 /* mds internal */ +#define CEPH_LOCK_IFILE 8 /* mds internal */ +#define CEPH_LOCK_IDIR 16 /* mds internal */ +#define CEPH_LOCK_IAUTH 32 +#define CEPH_LOCK_ILINK 64 +#define CEPH_LOCK_IDFT 128 /* dir frag tree */ +#define CEPH_LOCK_INEST 256 /* mds internal */ +#define CEPH_LOCK_IXATTR 512 +#define CEPH_LOCK_INO 2048 /* immutable inode bits; not a lock */ + +/* alias for either filelock or dirlock */ +#define CEPH_LOCK_ICONTENT (CEPH_LOCK_IFILE|CEPH_LOCK_IDIR) + +/* + * stat masks are defined in terms of the locks that cover inode fields. + */ +#define CEPH_STAT_MASK_INODE CEPH_LOCK_INO +#define CEPH_STAT_MASK_TYPE CEPH_LOCK_INO /* mode >> 12 */ +#define CEPH_STAT_MASK_SYMLINK CEPH_LOCK_INO +#define CEPH_STAT_MASK_UID CEPH_LOCK_IAUTH +#define CEPH_STAT_MASK_GID CEPH_LOCK_IAUTH +#define CEPH_STAT_MASK_MODE CEPH_LOCK_IAUTH +#define CEPH_STAT_MASK_NLINK CEPH_LOCK_ILINK +#define CEPH_STAT_MASK_LAYOUT CEPH_LOCK_ICONTENT +#define CEPH_STAT_MASK_MTIME CEPH_LOCK_ICONTENT +#define CEPH_STAT_MASK_SIZE CEPH_LOCK_ICONTENT +#define CEPH_STAT_MASK_ATIME CEPH_LOCK_ICONTENT /* fixme */ +#define CEPH_STAT_MASK_XATTR CEPH_LOCK_IXATTR +#define CEPH_STAT_MASK_INODE_ALL (CEPH_LOCK_ICONTENT | CEPH_LOCK_IAUTH | \ + CEPH_LOCK_ILINK | CEPH_LOCK_INO) + +/* client_session ops */ +enum { + CEPH_SESSION_REQUEST_OPEN, + CEPH_SESSION_OPEN, + CEPH_SESSION_REQUEST_CLOSE, + CEPH_SESSION_CLOSE, + CEPH_SESSION_REQUEST_RENEWCAPS, + CEPH_SESSION_RENEWCAPS, + CEPH_SESSION_STALE, +}; + +static inline const char *ceph_session_op_name(int op) +{ + switch (op) { + case CEPH_SESSION_REQUEST_OPEN: return "request_open"; + case CEPH_SESSION_OPEN: return "open"; + case CEPH_SESSION_REQUEST_CLOSE: return "request_close"; + case CEPH_SESSION_CLOSE: return "close"; + case CEPH_SESSION_REQUEST_RENEWCAPS: return "request_renewcaps"; + case CEPH_SESSION_RENEWCAPS: return "renewcaps"; + case CEPH_SESSION_STALE: return "stale"; + default: return "???"; + } +} + +struct ceph_mds_session_head { + __le32 op; + __le64 seq; + struct ceph_timespec stamp; +} __attribute__ ((packed)); + +/* client_request */ +/* + * metadata ops. + * & 0x001000 -> write op + * & 0x010000 -> follow symlink (e.g. stat(), not lstat()). + & & 0x100000 -> use weird ino/path trace + */ +#define CEPH_MDS_OP_WRITE 0x001000 +#define CEPH_MDS_OP_FOLLOW_LINK 0x010000 +#define CEPH_MDS_OP_INO_PATH 0x100000 +enum { + CEPH_MDS_OP_FINDINODE = 0x100100, + + CEPH_MDS_OP_LSTAT = 0x00100, + CEPH_MDS_OP_LUTIME = 0x01101, + CEPH_MDS_OP_LCHMOD = 0x01102, + CEPH_MDS_OP_LCHOWN = 0x01103, + CEPH_MDS_OP_LSETXATTR = 0x01104, + CEPH_MDS_OP_LRMXATTR = 0x01105, + CEPH_MDS_OP_LSETLAYOUT = 0x01106, + + CEPH_MDS_OP_STAT = 0x10100, + CEPH_MDS_OP_UTIME = 0x11101, + CEPH_MDS_OP_CHMOD = 0x11102, + CEPH_MDS_OP_CHOWN = 0x11103, + CEPH_MDS_OP_SETXATTR = 0x11104, + CEPH_MDS_OP_RMXATTR = 0x11105, + + CEPH_MDS_OP_MKNOD = 0x01201, + CEPH_MDS_OP_LINK = 0x01202, + CEPH_MDS_OP_UNLINK = 0x01203, + CEPH_MDS_OP_RENAME = 0x01204, + CEPH_MDS_OP_MKDIR = 0x01220, + CEPH_MDS_OP_RMDIR = 0x01221, + CEPH_MDS_OP_SYMLINK = 0x01222, + + CEPH_MDS_OP_OPEN = 0x10302, + CEPH_MDS_OP_TRUNCATE = 0x11303, + CEPH_MDS_OP_LTRUNCATE = 0x01303, + CEPH_MDS_OP_FSYNC = 0x00304, + CEPH_MDS_OP_READDIR = 0x00305, + + CEPH_MDS_OP_MKSNAP = 0x01400, + CEPH_MDS_OP_RMSNAP = 0x01401, + CEPH_MDS_OP_LSSNAP = 0x00402, +}; + +static inline const char *ceph_mds_op_name(int op) +{ + switch (op) { + case CEPH_MDS_OP_FINDINODE: return "findinode"; + case CEPH_MDS_OP_STAT: return "stat"; + case CEPH_MDS_OP_LSTAT: return "lstat"; + case CEPH_MDS_OP_UTIME: return "utime"; + case CEPH_MDS_OP_LUTIME: return "lutime"; + case CEPH_MDS_OP_CHMOD: return "chmod"; + case CEPH_MDS_OP_LCHMOD: return "lchmod"; + case CEPH_MDS_OP_CHOWN: return "chown"; + case CEPH_MDS_OP_LCHOWN: return "lchown"; + case CEPH_MDS_OP_LSETLAYOUT: return "lsetlayout"; + case CEPH_MDS_OP_SETXATTR: return "setxattr"; + case CEPH_MDS_OP_LSETXATTR: return "lsetxattr"; + case CEPH_MDS_OP_RMXATTR: return "rmxattr"; + case CEPH_MDS_OP_LRMXATTR: return "lrmxattr"; + case CEPH_MDS_OP_READDIR: return "readdir"; + case CEPH_MDS_OP_MKNOD: return "mknod"; + case CEPH_MDS_OP_LINK: return "link"; + case CEPH_MDS_OP_UNLINK: return "unlink"; + case CEPH_MDS_OP_RENAME: return "rename"; + case CEPH_MDS_OP_MKDIR: return "mkdir"; + case CEPH_MDS_OP_RMDIR: return "rmdir"; + case CEPH_MDS_OP_SYMLINK: return "symlink"; + case CEPH_MDS_OP_OPEN: return "open"; + case CEPH_MDS_OP_TRUNCATE: return "truncate"; + case CEPH_MDS_OP_LTRUNCATE: return "ltruncate"; + case CEPH_MDS_OP_FSYNC: return "fsync"; + case CEPH_MDS_OP_LSSNAP: return "lssnap"; + case CEPH_MDS_OP_MKSNAP: return "mksnap"; + case CEPH_MDS_OP_RMSNAP: return "rmsnap"; + default: return "???"; + } +} + +struct ceph_mds_request_head { + ceph_tid_t tid, oldest_client_tid; + ceph_epoch_t mdsmap_epoch; /* on client */ + __le32 num_fwd; + __le32 retry_attempt; + __le64 mds_wants_replica_in_dirino; + __le32 op; + __le32 caller_uid, caller_gid; + + union { + struct { + __le32 mask; + } __attribute__ ((packed)) stat; + struct { + __le32 mask; + } __attribute__ ((packed)) fstat; + struct { + __le32 frag; + } __attribute__ ((packed)) readdir; + struct { + struct ceph_timespec mtime; + struct ceph_timespec atime; + struct ceph_timespec ctime; + __le32 mask; + } __attribute__ ((packed)) utime; + struct { + __le32 mode; + } __attribute__ ((packed)) chmod; + struct { + __le32 uid; + __le32 gid; + __le32 mask; + } __attribute__ ((packed)) chown; + struct { + __le32 mode; + __le32 rdev; + } __attribute__ ((packed)) mknod; + struct { + __le32 mode; + } __attribute__ ((packed)) mkdir; + struct { + __le32 flags; + __le32 mode; + } __attribute__ ((packed)) open; + struct { + __le64 length; + } __attribute__ ((packed)) truncate; + struct { + __le32 flags; + } __attribute__ ((packed)) setxattr; + struct { + struct ceph_file_layout layout; + } __attribute__ ((packed)) setlayout; + } __attribute__ ((packed)) args; +} __attribute__ ((packed)); + +/* masks for utimes() */ +#define CEPH_UTIME_ATIME 1 +#define CEPH_UTIME_MTIME 2 +#define CEPH_UTIME_CTIME 4 + +/* masks for chown */ +#define CEPH_CHOWN_UID 1 +#define CEPH_CHOWN_GID 2 + +struct ceph_inopath_item { + __le64 ino; + __le32 dname_hash; +} __attribute__ ((packed)); + +/* client reply */ +struct ceph_mds_reply_head { + ceph_tid_t tid; + __le32 op; + __le32 result; + __le32 file_caps; + __le32 file_caps_seq; + __le32 file_caps_mseq; + __le32 mdsmap_epoch; +} __attribute__ ((packed)); + +/* one for each node split */ +struct ceph_frag_tree_split { + __le32 frag; /* this frag splits... */ + __le32 by; /* ...by this many bits */ +} __attribute__ ((packed)); + +struct ceph_frag_tree_head { + __le32 nsplits; + struct ceph_frag_tree_split splits[]; +} __attribute__ ((packed)); + +struct ceph_mds_reply_inode { + __le64 ino; + __le64 snapid; + __le64 version; + struct ceph_file_layout layout; + struct ceph_timespec ctime, mtime, atime; + __le64 time_warp_seq; + __le32 rdev; + __le32 mode, uid, gid; + __le32 nlink; + __le64 size, max_size, truncate_seq; + __le64 files, subdirs, rbytes, rfiles, rsubdirs; /* dir stats */ + struct ceph_timespec rctime; + struct ceph_frag_tree_head fragtree; +} __attribute__ ((packed)); +/* followed by frag array, then symlink string, then xattr blob */ + +/* reply_lease follows dname, and reply_inode */ +struct ceph_mds_reply_lease { + __le16 mask; + __le32 duration_ms; +} __attribute__ ((packed)); + +struct ceph_mds_reply_dirfrag { + __le32 frag; /* fragment */ + __le32 auth; /* auth mds, if this is a delegation point */ + __le32 ndist; /* number of mds' this is replicated on */ + __le32 dist[]; +} __attribute__ ((packed)); + +/* file access modes */ +#define CEPH_FILE_MODE_PIN 0 +#define CEPH_FILE_MODE_RD 1 +#define CEPH_FILE_MODE_WR 2 +#define CEPH_FILE_MODE_RDWR 3 /* RD | WR */ +#define CEPH_FILE_MODE_LAZY 4 /* lazy io */ +#define CEPH_FILE_MODE_NUM 8 /* bc these are bit fields.. mostly */ + +static inline int ceph_flags_to_mode(int flags) +{ +#ifdef O_DIRECTORY /* fixme */ + if ((flags & O_DIRECTORY) == O_DIRECTORY) + return CEPH_FILE_MODE_PIN; +#endif +#ifdef O_LAZY + if (flags & O_LAZY) + return CEPH_FILE_MODE_LAZY; +#endif + if ((flags & O_APPEND) == O_APPEND) + flags |= O_WRONLY; + + flags &= O_ACCMODE; + if ((flags & O_RDWR) == O_RDWR) + return CEPH_FILE_MODE_RDWR; + if ((flags & O_WRONLY) == O_WRONLY) + return CEPH_FILE_MODE_WR; + return CEPH_FILE_MODE_RD; +} + +/* client file caps */ +#define CEPH_CAP_PIN 1 /* no specific capabilities beyond the pin */ +#define CEPH_CAP_RDCACHE 2 /* client can cache reads */ +#define CEPH_CAP_RD 4 /* client can read */ +#define CEPH_CAP_WR 8 /* client can write */ +#define CEPH_CAP_WRBUFFER 16 /* client can buffer writes */ +#define CEPH_CAP_WREXTEND 32 /* client can extend EOF */ +#define CEPH_CAP_LAZYIO 64 /* client can perform lazy io */ +#define CEPH_CAP_EXCL 128 /* exclusive/loner access */ + +static inline int ceph_caps_for_mode(int mode) +{ + switch (mode) { + case CEPH_FILE_MODE_PIN: + return CEPH_CAP_PIN; + case CEPH_FILE_MODE_RD: + return CEPH_CAP_PIN | + CEPH_CAP_RD | CEPH_CAP_RDCACHE; + case CEPH_FILE_MODE_RDWR: + return CEPH_CAP_PIN | + CEPH_CAP_RD | CEPH_CAP_RDCACHE | + CEPH_CAP_WR | CEPH_CAP_WRBUFFER | + CEPH_CAP_EXCL; + case CEPH_FILE_MODE_WR: + return CEPH_CAP_PIN | + CEPH_CAP_WR | CEPH_CAP_WRBUFFER | + CEPH_CAP_EXCL; + } + return 0; +} + +enum { + CEPH_CAP_OP_GRANT, /* mds->client grant */ + CEPH_CAP_OP_TRUNC, /* mds->client trunc notify */ + CEPH_CAP_OP_EXPORT, /* mds has exported the cap */ + CEPH_CAP_OP_IMPORT, /* mds has imported the cap from specified mds */ + CEPH_CAP_OP_RELEASED, /* mds->client close out cap */ + CEPH_CAP_OP_FLUSHEDSNAP, /* mds->client flushed snap */ + CEPH_CAP_OP_ACK, /* client->mds ack (if prior grant was recall) */ + CEPH_CAP_OP_REQUEST, /* client->mds request (update wanted bits) */ + CEPH_CAP_OP_FLUSHSNAP, /* client->mds flush snapped metadata */ + CEPH_CAP_OP_RELEASE, /* client->mds request release cap */ +}; + +static inline const char *ceph_cap_op_name(int op) +{ + switch (op) { + case CEPH_CAP_OP_GRANT: return "grant"; + case CEPH_CAP_OP_TRUNC: return "trunc"; + case CEPH_CAP_OP_EXPORT: return "export"; + case CEPH_CAP_OP_IMPORT: return "import"; + case CEPH_CAP_OP_RELEASED: return "released"; + case CEPH_CAP_OP_FLUSHEDSNAP: return "flushedsnap"; + case CEPH_CAP_OP_ACK: return "ack"; + case CEPH_CAP_OP_REQUEST: return "request"; + case CEPH_CAP_OP_FLUSHSNAP: return "flushsnap"; + case CEPH_CAP_OP_RELEASE: return "release"; + default: return "???"; + } +} + +/* + * caps message, used for capability callbacks, acks, requests, etc. + */ +struct ceph_mds_caps { + __le32 op; + __le64 ino; + __le32 seq; + __le32 caps, wanted; + __le64 size, max_size; + __le64 truncate_seq; + __le32 migrate_seq; + struct ceph_timespec mtime, atime, ctime; + struct ceph_file_layout layout; + __le64 time_warp_seq; + __le64 snap_follows; + __le32 snap_trace_len; +} __attribute__ ((packed)); + + +#define CEPH_MDS_LEASE_REVOKE 1 /* mds -> client */ +#define CEPH_MDS_LEASE_RELEASE 2 /* client -> mds */ +#define CEPH_MDS_LEASE_RENEW 3 /* client <-> mds */ + +struct ceph_mds_lease { + __u8 action; + __le16 mask; + __le64 ino; + __le64 first, last; +} __attribute__ ((packed)); +/* followed by a __le32+string for dname */ + + +/* client reconnect */ +struct ceph_mds_cap_reconnect { + __le32 wanted; + __le32 issued; + __le64 size; + struct ceph_timespec mtime, atime; + __le64 snaprealm; +} __attribute__ ((packed)); +/* followed by encoded string */ + +struct ceph_mds_snaprealm_reconnect { + __le64 ino; + __le64 seq; + __le64 parent; /* parent realm */ +} __attribute__ ((packed)); + +/* + * snaps + */ +enum { + CEPH_SNAP_OP_UPDATE, /* CREATE or DESTROY */ + CEPH_SNAP_OP_CREATE, + CEPH_SNAP_OP_DESTROY, + CEPH_SNAP_OP_SPLIT, +}; + +static inline const char *ceph_snap_op_name(int o) +{ + switch (o) { + case CEPH_SNAP_OP_UPDATE: return "update"; + case CEPH_SNAP_OP_CREATE: return "create"; + case CEPH_SNAP_OP_DESTROY: return "destroy"; + case CEPH_SNAP_OP_SPLIT: return "split"; + default: return "???"; + } +} + +struct ceph_mds_snap_head { + __le32 op; + __le64 split; + __le32 num_split_inos; + __le32 num_split_realms; + __le32 trace_len; +} __attribute__ ((packed)); +/* followed by split ino list, then split realms, then the trace blob */ + +/* + * encode info about a snaprealm, as viewed by a client + */ +struct ceph_mds_snap_realm { + __le64 ino; /* ino */ + __le64 created; /* snap: when created */ + __le64 parent; /* ino: parent realm */ + __le64 parent_since; /* snap: same parent since */ + __le64 seq; /* snap: version */ + __le32 num_snaps; + __le32 num_prior_parent_snaps; +} __attribute__ ((packed)); +/* followed by my snap list, then prior parent snap list */ + +/* + * osd map flag bits + */ +#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */ +#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */ + +/* + * osd ops + */ +#define CEPH_OSD_OP_MODE 0xf000 +#define CEPH_OSD_OP_MODE_RD 0x1000 +#define CEPH_OSD_OP_MODE_WR 0x2000 +#define CEPH_OSD_OP_MODE_SUB 0x4000 + +#define CEPH_OSD_OP_TYPE 0x0f00 +#define CEPH_OSD_OP_TYPE_LOCK 0x0100 +#define CEPH_OSD_OP_TYPE_DATA 0x0200 +#define CEPH_OSD_OP_TYPE_ATTR 0x0300 + +enum { + /* read */ + CEPH_OSD_OP_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 1, + CEPH_OSD_OP_STAT = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 2, + + CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, + CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2, + + /* subop */ + CEPH_OSD_OP_PULL = CEPH_OSD_OP_MODE_SUB | 1, + CEPH_OSD_OP_PUSH = CEPH_OSD_OP_MODE_SUB | 2, + CEPH_OSD_OP_BALANCEREADS = CEPH_OSD_OP_MODE_SUB | 3, + CEPH_OSD_OP_UNBALANCEREADS = CEPH_OSD_OP_MODE_SUB | 4, + + /* object data */ + CEPH_OSD_OP_WRITE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1, + CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2, + CEPH_OSD_OP_TRUNCATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 3, + CEPH_OSD_OP_ZERO = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 4, + CEPH_OSD_OP_DELETE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 5, + + /* object attrs */ + CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1, + CEPH_OSD_OP_SETXATTRS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 2, + CEPH_OSD_OP_RESETXATTRS= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 3, + CEPH_OSD_OP_RMXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 4, + + /* lock */ + CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, + CEPH_OSD_OP_WRUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 2, + CEPH_OSD_OP_RDLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 3, + CEPH_OSD_OP_RDUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 4, + CEPH_OSD_OP_UPLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 5, + CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, + + /* fancy read */ + CEPH_OSD_OP_GREP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 3, + + /* fancy write */ + CEPH_OSD_OP_APPEND = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 6, +}; + +static inline int ceph_osd_op_type_lock(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_LOCK; +} +static inline int ceph_osd_op_type_data(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_DATA; +} +static inline int ceph_osd_op_type_attr(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_ATTR; +} + +static inline int ceph_osd_op_mode_subop(int op) +{ + return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_SUB; +} +static inline int ceph_osd_op_mode_read(int op) +{ + return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD; +} +static inline int ceph_osd_op_mode_modify(int op) +{ + return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; +} + +static inline const char *ceph_osd_op_name(int op) +{ + switch (op) { + case CEPH_OSD_OP_READ: return "read"; + case CEPH_OSD_OP_STAT: return "stat"; + + case CEPH_OSD_OP_WRITE: return "write"; + case CEPH_OSD_OP_DELETE: return "delete"; + case CEPH_OSD_OP_TRUNCATE: return "truncate"; + case CEPH_OSD_OP_ZERO: return "zero"; + case CEPH_OSD_OP_WRITEFULL: return "writefull"; + + case CEPH_OSD_OP_SETXATTR: return "setxattr"; + case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; + case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; + case CEPH_OSD_OP_RMXATTR: return "rmxattr"; + + case CEPH_OSD_OP_WRLOCK: return "wrlock"; + case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; + case CEPH_OSD_OP_RDLOCK: return "rdlock"; + case CEPH_OSD_OP_RDUNLOCK: return "rdunlock"; + case CEPH_OSD_OP_UPLOCK: return "uplock"; + case CEPH_OSD_OP_DNLOCK: return "dnlock"; + + case CEPH_OSD_OP_PULL: return "pull"; + case CEPH_OSD_OP_PUSH: return "push"; + case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; + case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; + + default: return "???"; + } +} + + +/* + * osd op flags + */ +enum { + CEPH_OSD_OP_ACK = 1, /* want (or is) "ack" ack */ + CEPH_OSD_OP_SAFE = 2, /* want (or is) "safe" ack */ + CEPH_OSD_OP_RETRY = 4, /* resend attempt */ + CEPH_OSD_OP_INCLOCK_FAIL = 8, /* fail on inclock collision */ + CEPH_OSD_OP_MODIFY = 16, /* op is/was a mutation */ + CEPH_OSD_OP_ACKNVRAM = 32, /* ACK when stable in NVRAM, not RAM */ + CEPH_OSD_OP_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */ + CEPH_OSD_OP_PEERSTAT = 128, /* msg includes osd_peer_stat */ + CEPH_OSD_OP_BALANCE_READS = 256, +}; + +#define EOLDSNAPC 44 /* ORDERSNAP flag set and writer has old snap context*/ + +struct ceph_osd_op { + __le16 op; + union { + struct { + __le64 offset, length; + }; + struct { + __le32 name_len; + __le32 value_len; + }; + }; +} __attribute__ ((packed)); + +struct ceph_osd_request_head { + ceph_tid_t tid; + __le32 client_inc; + struct ceph_object oid; + struct ceph_object_layout layout; + ceph_epoch_t osdmap_epoch; + + __le32 flags; + __le32 inc_lock; + + struct ceph_eversion reassert_version; + + /* writer's snap context */ + __le64 snap_seq; + __le32 num_snaps; + + /* read or mutation */ + __le16 num_ops; + __u16 object_type; + struct ceph_osd_op ops[]; /* followed by snaps */ +} __attribute__ ((packed)); + +struct ceph_osd_reply_head { + ceph_tid_t tid; + __le32 flags; + struct ceph_object oid; + struct ceph_object_layout layout; + ceph_epoch_t osdmap_epoch; + struct ceph_eversion reassert_version; + + __le32 result; + + __le32 num_ops; + struct ceph_osd_op ops[0]; +} __attribute__ ((packed)); + +#endif Index: plugins/Makefile.nmake =================================================================== --- plugins/Makefile.nmake (revision 26859) +++ plugins/Makefile.nmake (working copy) @@ -39,6 +39,9 @@ cd asn1 $(MAKE) /$(MAKEFLAGS) -f Makefile.nmake $(PLUGIN_TARGET) cd .. + cd ceph + $(MAKE) /$(MAKEFLAGS) -f Makefile.nmake $(PLUGIN_TARGET) + cd .. cd ciscosm $(MAKE) /$(MAKEFLAGS) -f Makefile.nmake $(PLUGIN_TARGET) cd .. @@ -128,6 +131,7 @@ xcopy agentx\*.dll $(VERSION) /d xcopy artnet\*.dll $(VERSION) /d xcopy asn1\*.dll $(VERSION) /d + xcopy ceph\*.dll $(VERSION) /d xcopy ciscosm\*.dll $(VERSION) /d xcopy docsis\*.dll $(VERSION) /d xcopy enttec\*.dll $(VERSION) /d Index: plugins/Makefile.am =================================================================== --- plugins/Makefile.am (revision 26859) +++ plugins/Makefile.am (working copy) @@ -26,6 +26,7 @@ agentx \ artnet \ asn1 \ + ceph \ ciscosm \ docsis \ enttec \ Index: epan/Makefile.am =================================================================== --- epan/Makefile.am (revision 26859) +++ epan/Makefile.am (working copy) @@ -189,6 +189,7 @@ plugin_src = \ ../plugins/artnet/packet-artnet.c \ ../plugins/asn1/packet-asn1.c \ + ../plugins/ceph/packet-ceph.c \ ../plugins/docsis/packet-bpkmattr.c \ ../plugins/docsis/packet-bpkmreq.c \ ../plugins/docsis/packet-bpkmrsp.c \ Index: packaging/nsis/Makefile.nmake =================================================================== --- packaging/nsis/Makefile.nmake (revision 26859) +++ packaging/nsis/Makefile.nmake (working copy) @@ -47,6 +47,7 @@ ../../plugins/agentx/agentx.dll \ ../../plugins/artnet/artnet.dll \ ../../plugins/asn1/asn1.dll \ + ../../plugins/ceph/ceph.dll \ ../../plugins/ciscosm/ciscosm.dll \ ../../plugins/docsis/docsis.dll \ ../../plugins/enttec/enttec.dll \ Index: packaging/nsis/wireshark.nsi =================================================================== --- packaging/nsis/wireshark.nsi (revision 26859) +++ packaging/nsis/wireshark.nsi (working copy) @@ -779,6 +779,7 @@ File "..\..\plugins\agentx\agentx.dll" File "..\..\plugins\artnet\artnet.dll" File "..\..\plugins\asn1\asn1.dll" +File "..\..\plugins\ceph\ceph.dll" File "..\..\plugins\ciscosm\ciscosm.dll" File "..\..\plugins\docsis\docsis.dll" File "..\..\plugins\enttec\enttec.dll"