1
0
mirror of http://git.haproxy.org/git/haproxy.git/ synced 2025-04-04 15:19:52 +00:00

MEDIUM: quic: use a global CID trees list

Previously, quic_connection_id were stored in a per-thread tree list.
Datagram were first dispatched to the correct thread using the encoded
TID before a tree lookup was done.

Remove these trees and replace it with a global trees list of 256
entries. A CID is using the list index corresponding to its first byte.
On datagram dispatch, CID is lookup on its tree and TID is retrieved
using new member quic_connection_id.tid. As such, a read-write lock
protects each list instances. With 256 entries, it is expected that
contention should be reduced.

A new structure quic_cid_tree served as a tree container associated with
its read-write lock. An API is implemented to ensure lock safety for
insert/lookup/delete operation.

This patch is a step forward to be able to break the affinity between a
CID and a TID encoded thread. This is required to be able to migrate a
quic_conn after accept to select thread based on their load.

This should be backported up to 2.7 after a period of observation.
This commit is contained in:
Amaury Denoyelle 2023-04-18 11:10:54 +02:00
parent 66947283ba
commit e83f937cc1
8 changed files with 166 additions and 52 deletions

View File

@ -24,6 +24,12 @@
extern struct protocol proto_quic4;
extern struct protocol proto_quic6;
struct quic_cid_tree {
struct eb_root root;
__decl_thread(HA_RWLOCK_T lock);
};
extern struct quic_dghdlr *quic_dghdlrs;
extern struct quic_cid_tree *quic_cid_trees;
#endif /* _HAPROXY_PROTO_QUIC_H */

View File

@ -303,6 +303,7 @@ struct quic_connection_id {
struct quic_cid cid; /* CID data */
struct quic_conn *qc; /* QUIC connection using this CID */
uint tid; /* Attached Thread ID for the connection. */
};
/* Structure to hold a range of ACKs sent in ACK frames. */
@ -444,7 +445,6 @@ struct quic_rx_packet {
struct quic_dghdlr {
struct mt_list dgrams;
struct tasklet *task;
struct eb_root cids;
};
/* Structure to store enough information about the RX CRYPTO frames. */

View File

@ -38,6 +38,7 @@
#include <haproxy/ticks.h>
#include <haproxy/listener.h>
#include <haproxy/proto_quic.h>
#include <haproxy/quic_cc.h>
#include <haproxy/quic_conn-t.h>
#include <haproxy/quic_enc.h>
@ -137,9 +138,41 @@ static inline void quic_cid_dump(struct buffer *buf,
chunk_appendf(buf, ")");
}
/* Free the CIDs attached to <conn> QUIC connection. This must be called under
* the CID lock.
*/
/* Return tree index where <cid> is stored. */
static inline uchar _quic_cid_tree_idx(const unsigned char *cid)
{
return cid[0];
}
/* Return tree index where <cid> is stored. */
static inline uchar quic_cid_tree_idx(const struct quic_cid *cid)
{
return _quic_cid_tree_idx(cid->data);
}
/* Insert <conn_id> into global CID tree as a thread-safe operation. */
static inline void quic_cid_insert(struct quic_connection_id *conn_id)
{
const uchar idx = quic_cid_tree_idx(&conn_id->cid);
struct quic_cid_tree *tree = &quic_cid_trees[idx];
HA_RWLOCK_WRLOCK(QC_CID_LOCK, &tree->lock);
ebmb_insert(&tree->root, &conn_id->node, conn_id->cid.len);
HA_RWLOCK_WRUNLOCK(QC_CID_LOCK, &tree->lock);
}
/* Remove <conn_id> from global CID tree as a thread-safe operation. */
static inline void quic_cid_delete(struct quic_connection_id *conn_id)
{
const uchar idx = quic_cid_tree_idx(&conn_id->cid);
struct quic_cid_tree *tree = &quic_cid_trees[idx];
HA_RWLOCK_WRLOCK(QC_CID_LOCK, &tree->lock);
ebmb_delete(&conn_id->node);
HA_RWLOCK_WRUNLOCK(QC_CID_LOCK, &tree->lock);
}
/* Free the CIDs attached to <conn> QUIC connection. */
static inline void free_quic_conn_cids(struct quic_conn *conn)
{
struct eb64_node *node;
@ -151,7 +184,7 @@ static inline void free_quic_conn_cids(struct quic_conn *conn)
conn_id = eb64_entry(node, struct quic_connection_id, seq_num);
/* remove the CID from the receiver tree */
ebmb_delete(&conn_id->node);
quic_cid_delete(conn_id);
/* remove the CID from the quic_conn tree */
node = eb64_next(node);
@ -175,39 +208,6 @@ static inline void quic_connection_id_to_frm_cpy(struct quic_frame *dst,
to->stateless_reset_token = src->stateless_reset_token;
}
/* extract a TID from a CID for receiver <rx>, from 0 to global.nbthread-1 and
* in any case no more than 4095. It takes into account the bind_conf's thread
* group and the bind_conf's thread mask. The algorithm is the following: most
* packets contain a valid thread ID for the bind_conf, which means that the
* retrieved ID directly maps to a bound thread ID. If that's not the case,
* then we have to remap it. The resulting thread ID will then differ but will
* be correctly encoded and decoded.
*/
static inline uint quic_get_cid_tid(const unsigned char *cid, const struct receiver *rx)
{
uint id, grp;
uint base, count;
id = read_n16(cid) & 4095;
grp = rx->bind_tgroup;
base = ha_tgroup_info[grp - 1].base;
count = ha_tgroup_info[grp - 1].count;
if (base <= id && id < base + count &&
rx->bind_thread & ha_thread_info[id].ltid_bit)
return id; // part of the group and bound: valid
/* The thread number isn't valid, it doesn't map to a thread bound on
* this receiver. Let's reduce it to one of the thread(s) valid for
* that receiver.
*/
count = my_popcountl(rx->bind_thread);
id = count - 1 - id % count;
id = mask_find_rank_bit(id, rx->bind_thread);
id += base;
return id;
}
/* Modify <cid> to have a CID linked to the thread ID <target_tid> that
* quic_get_cid_tid() will be able to extract return.
*/
@ -676,6 +676,9 @@ int quic_get_dgram_dcid(unsigned char *buf, const unsigned char *end,
unsigned char **dcid, size_t *dcid_len);
struct quic_cid quic_derive_cid(const struct quic_cid *orig,
const struct sockaddr_storage *addr);
int quic_get_cid_tid(const unsigned char *cid, size_t cid_len,
const struct sockaddr_storage *cli_addr,
unsigned char *buf, size_t buf_len);
int qc_send_mux(struct quic_conn *qc, struct list *frms);
void qc_notify_close(struct quic_conn *qc);

View File

@ -424,6 +424,7 @@ enum lock_label {
SFT_LOCK, /* sink forward target */
IDLE_CONNS_LOCK,
OCSP_LOCK,
QC_CID_LOCK,
OTHER_LOCK,
/* WT: make sure never to use these ones outside of development,
* we need them for lock profiling!

View File

@ -51,6 +51,11 @@
/* per-thread quic datagram handlers */
struct quic_dghdlr *quic_dghdlrs;
struct eb_root *quic_cid_tree;
/* global CID trees */
#define QUIC_CID_TREES_CNT 256
struct quic_cid_tree *quic_cid_trees;
/* Size of the internal buffer of QUIC RX buffer at the fd level */
#define QUIC_RX_BUFSZ (1UL << 18)
@ -734,11 +739,20 @@ static int quic_alloc_dghdlrs(void)
dghdlr->task->context = dghdlr;
dghdlr->task->process = quic_lstnr_dghdlr;
dghdlr->cids = EB_ROOT_UNIQUE;
MT_LIST_INIT(&dghdlr->dgrams);
}
quic_cid_trees = calloc(QUIC_CID_TREES_CNT, sizeof(struct quic_cid_tree));
if (!quic_cid_trees) {
ha_alert("Failed to allocate global CIDs trees.\n");
return 0;
}
for (i = 0; i < QUIC_CID_TREES_CNT; ++i) {
HA_RWLOCK_INIT(&quic_cid_trees[i].lock);
quic_cid_trees[i].root = EB_ROOT_UNIQUE;
}
return 1;
}
REGISTER_POST_CHECK(quic_alloc_dghdlrs);
@ -753,6 +767,8 @@ static int quic_deallocate_dghdlrs(void)
free(quic_dghdlrs);
}
ha_free(&quic_cid_trees);
return 1;
}
REGISTER_POST_DEINIT(quic_deallocate_dghdlrs);

View File

@ -3253,8 +3253,7 @@ static int qc_parse_pkt_frms(struct quic_conn *qc, struct quic_rx_packet *pkt,
TRACE_ERROR("CID allocation error", QUIC_EV_CONN_IO_CB, qc);
}
else {
/* insert the allocated CID in the receiver datagram handler tree */
ebmb_insert(&quic_dghdlrs[tid].cids, &conn_id->node, conn_id->cid.len);
quic_cid_insert(conn_id);
qc_build_new_connection_id_frm(qc, conn_id);
}
break;
@ -3969,6 +3968,59 @@ struct quic_cid quic_derive_cid(const struct quic_cid *orig,
return cid;
}
/* Retrieve the thread ID associated to QUIC connection ID <cid> of length
* <cid_len>. CID may be not found on the CID tree because it is an ODCID. In
* this case, it will derived using client address <cli_addr> as hash
* parameter. However, this is done only if <buf> points to an INITIAL or 0RTT
* packet of length <len>.
*
* Returns the thread ID or a negative error code.
*/
int quic_get_cid_tid(const unsigned char *cid, size_t cid_len,
const struct sockaddr_storage *cli_addr,
unsigned char *buf, size_t buf_len)
{
struct quic_cid_tree *tree;
struct quic_connection_id *conn_id;
struct ebmb_node *node;
tree = &quic_cid_trees[_quic_cid_tree_idx(cid)];
HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
node = ebmb_lookup(&tree->root, cid, cid_len);
HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
if (!node) {
struct quic_cid orig, derive_cid;
struct quic_rx_packet pkt;
if (!qc_parse_hd_form(&pkt, &buf, buf + buf_len))
goto not_found;
if (pkt.type != QUIC_PACKET_TYPE_INITIAL &&
pkt.type != QUIC_PACKET_TYPE_0RTT) {
goto not_found;
}
memcpy(orig.data, cid, cid_len);
orig.len = cid_len;
derive_cid = quic_derive_cid(&orig, cli_addr);
tree = &quic_cid_trees[quic_cid_tree_idx(&derive_cid)];
HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
node = ebmb_lookup(&tree->root, cid, cid_len);
HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
}
if (!node)
goto not_found;
conn_id = ebmb_entry(node, struct quic_connection_id, node);
return HA_ATOMIC_LOAD(&conn_id->tid);
not_found:
return -1;
}
/* Allocate a new CID and attach it to <root> ebtree.
*
* If <orig> and <addr> params are non null, the new CID value is directly
@ -4016,6 +4068,7 @@ static struct quic_connection_id *new_quic_cid(struct eb_root *root,
}
conn_id->qc = qc;
HA_ATOMIC_STORE(&conn_id->tid, tid);
conn_id->seq_num.key = qc->next_cid_seq_num++;
conn_id->retire_prior_to = 0;
@ -4082,8 +4135,10 @@ static int quic_build_post_handshake_frames(struct quic_conn *qc)
goto err;
}
/* insert the allocated CID in the receiver datagram handler tree */
ebmb_insert(&quic_dghdlrs[tid].cids, &conn_id->node, conn_id->cid.len);
/* TODO To prevent CID tree locking, all CIDs created here
* could be allocated at the same time as the first one.
*/
quic_cid_insert(conn_id);
quic_connection_id_to_frm_cpy(frm, conn_id);
LIST_APPEND(&frm_list, &frm->list);
@ -4114,7 +4169,8 @@ static int quic_build_post_handshake_frames(struct quic_conn *qc)
break;
node = eb64_next(node);
ebmb_delete(&conn_id->node);
quic_cid_delete(conn_id);
eb64_delete(&conn_id->seq_num);
pool_free(pool_head_quic_connection_id, conn_id);
}
@ -5504,7 +5560,7 @@ static struct quic_conn *qc_new_conn(const struct quic_version *qv, int ipv4,
/* insert the allocated CID in the receiver datagram handler tree */
if (server)
ebmb_insert(&quic_dghdlrs[tid].cids, &conn_id->node, conn_id->cid.len);
quic_cid_insert(conn_id);
/* Select our SCID which is the first CID with 0 as sequence number. */
qc->scid = conn_id->cid;
@ -6511,11 +6567,15 @@ static struct quic_conn *retrieve_qc_conn_from_cid(struct quic_rx_packet *pkt,
struct quic_conn *qc = NULL;
struct ebmb_node *node;
struct quic_connection_id *conn_id;
struct quic_cid_tree *tree;
TRACE_ENTER(QUIC_EV_CONN_RXPKT);
/* First look into DCID tree. */
node = ebmb_lookup(&quic_dghdlrs[tid].cids, pkt->dcid.data, pkt->dcid.len);
tree = &quic_cid_trees[_quic_cid_tree_idx(pkt->dcid.data)];
HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
node = ebmb_lookup(&tree->root, pkt->dcid.data, pkt->dcid.len);
HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
/* If not found on an Initial/0-RTT packet, it could be because an
* ODCID is reused by the client. Calculate the derived CID value to
@ -6524,7 +6584,11 @@ static struct quic_conn *retrieve_qc_conn_from_cid(struct quic_rx_packet *pkt,
if (!node && (pkt->type == QUIC_PACKET_TYPE_INITIAL ||
pkt->type == QUIC_PACKET_TYPE_0RTT)) {
const struct quic_cid derive_cid = quic_derive_cid(&pkt->dcid, saddr);
node = ebmb_lookup(&quic_dghdlrs[tid].cids, derive_cid.data, derive_cid.len);
tree = &quic_cid_trees[quic_cid_tree_idx(&derive_cid)];
HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
node = ebmb_lookup(&tree->root, derive_cid.data, derive_cid.len);
HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
}
if (!node)
@ -8196,9 +8260,12 @@ int quic_dgram_parse(struct quic_dgram *dgram, struct quic_conn *from_qc,
*/
int qc_check_dcid(struct quic_conn *qc, unsigned char *dcid, size_t dcid_len)
{
struct ebmb_node *node;
const uchar idx = _quic_cid_tree_idx(dcid);
struct quic_connection_id *conn_id;
struct ebmb_node *node = NULL;
struct quic_cid_tree *tree = &quic_cid_trees[idx];
/* Test against our default CID or client ODCID. */
if ((qc->scid.len == dcid_len &&
memcmp(qc->scid.data, dcid, dcid_len) == 0) ||
(qc->odcid.len == dcid_len &&
@ -8206,7 +8273,17 @@ int qc_check_dcid(struct quic_conn *qc, unsigned char *dcid, size_t dcid_len)
return 1;
}
node = ebmb_lookup(&quic_dghdlrs[tid].cids, dcid, dcid_len);
/* Test against our other CIDs. This can happen if the client has
* decided to switch to a new one.
*
* TODO to avoid locking, loop through qc.cids as an alternative.
*
* TODO set it to our default CID to avoid this operation next time.
*/
HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
node = ebmb_lookup(&tree->root, dcid, dcid_len);
HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
if (node) {
conn_id = ebmb_entry(node, struct quic_connection_id, node);
if (qc == conn_id->qc)

View File

@ -209,7 +209,6 @@ static int quic_lstnr_dgram_dispatch(unsigned char *buf, size_t len, void *owner
struct quic_dgram *new_dgram, struct list *dgrams)
{
struct quic_dgram *dgram;
const struct listener *l = owner;
unsigned char *dcid;
size_t dcid_len;
int cid_tid;
@ -221,7 +220,18 @@ static int quic_lstnr_dgram_dispatch(unsigned char *buf, size_t len, void *owner
if (!dgram)
goto err;
cid_tid = quic_get_cid_tid(dcid, &l->rx);
if ((cid_tid = quic_get_cid_tid(dcid, dcid_len, saddr, buf, len)) < 0) {
/* CID not found. Ensure only one thread will handle this CID
* to avoid duplicating connection creation. This code may not
* work if using thread-mask on the listener but is only here
* for a short time.
*
* TODO this code implies the thread used for QUIC handshake is
* directly derived from client chosen CID. This association
* must be broken.
*/
cid_tid = dcid[0] % global.nbthread;
}
/* All the members must be initialized! */
dgram->owner = owner;

View File

@ -443,6 +443,7 @@ static const char *lock_label(enum lock_label label)
case SFT_LOCK: return "SFT";
case IDLE_CONNS_LOCK: return "IDLE_CONNS";
case OCSP_LOCK: return "OCSP";
case QC_CID_LOCK: return "QC_CID";
case OTHER_LOCK: return "OTHER";
case DEBUG1_LOCK: return "DEBUG1";
case DEBUG2_LOCK: return "DEBUG2";