BUG/MAJOR: lb/threads: fix insufficient locking on round-robin LB

Maksim Kupriianov reported very strange crashes in fwrr_update_position() which didn't make sense because of an apparent divide overflow except that the value was not null in the core. It happens that while the locking is correct in all the functions' call graph, the uppermost one (fwrr_get_next_server()) incorrectly expected that its target server was already locked when called. This stupid assumption causd the server lock not to be held when calling the other ones, explaining how it was possible to change the server's eweight by calling srv_lb_commit_status() under the server lock yet collide with its unprotected usage. This commit makes sure that fwrr_get_server_from_group() retrieves a locked server and that fwrr_get_next_server() is responsible for unlocking the server before returning it. There is one subtlety in this function which is that it builds a list of avoided servers that were full while scanning the tree, and all of them are queued in a full state so they must be unlocked upon return. Many thanks to Maksim for providing detailed info allowing to narrow down this bug. This fix must be backported to 1.9. In 1.8 the lock seems much wider and changes to the server's state are performed under the rendez-vous point so this it doesn't seem possible that it happens there.
2025-04-19 13:35:47 +00:00 · 2019-04-16 11:21:14 +02:00 · 2019-04-16 11:21:14 +02:00 · 9df86f997e
commit 9df86f997e
parent 21dde5053a
2 changed files with 52 additions and 30 deletions
--- a/include/proto/backend.h
+++ b/include/proto/backend.h
@ -113,7 +113,8 @@ static inline int srv_currently_usable(const struct server *srv)
 }

 /* This function commits the next server state and weight onto the current
- * ones in order to detect future changes.
+ * ones in order to detect future changes. The server's lock is expected to
+ * be held when calling this function.
 */
 static inline void srv_lb_commit_status(struct server *srv)
 {
--- a/src/lb_fwrr.c
+++ b/src/lb_fwrr.c
@ -448,32 +448,40 @@ static inline void fwrr_switch_trees(struct fwrr_group *grp)
 /* return next server from the current tree in FWRR group <grp>, or a server
 * from the "init" tree if appropriate. If both trees are empty, return NULL.
 *
- * The lbprm's lock must be held.
+ * The lbprm's lock must be held. The server's lock will be held on return if
+ * a non-null server is returned.
 */
 static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
 {
-	struct eb32_node *node;
-	struct server *s;
+	struct eb32_node *node1;
+	struct eb32_node *node2;
+	struct server *s1 = NULL;
+	struct server *s2 = NULL;

-	node = eb32_first(&grp->curr);
-	s = eb32_entry(node, struct server, lb_node);
+	node1 = eb32_first(&grp->curr);
+	if (node1) {
+		s1 = eb32_entry(node1, struct server, lb_node);
+		HA_SPIN_LOCK(SERVER_LOCK, &s1->lock);
+		if (s1->cur_eweight && s1->npos <= grp->curr_pos)
+			return s1;
+	}

-	if (!node || s->npos > grp->curr_pos) {
-		/* either we have no server left, or we have a hole */
-		struct eb32_node *node2;
-		node2 = eb32_first(grp->init);
-		if (node2) {
-			node = node2;
-			s = eb32_entry(node, struct server, lb_node);
-			fwrr_get_srv_init(s);
-			if (s->cur_eweight == 0) /* FIXME: is it possible at all ? */
-				node = NULL;
+	/* Either we have no server left, or we have a hole. We'll look in the
+	 * init tree or a better proposal. At this point, if <s1> is non-null,
+	 * it is locked.
+	 */
+	node2 = eb32_first(grp->init);
+	if (node2) {
+		s2 = eb32_entry(node2, struct server, lb_node);
+		HA_SPIN_LOCK(SERVER_LOCK, &s2->lock);
+		if (s2->cur_eweight) {
+			if (s1)
+				HA_SPIN_UNLOCK(SERVER_LOCK, &s1->lock);
+			fwrr_get_srv_init(s2);
+			return s2;
 		}
 	}
-	if (node)
-		return s;
-	else
-		return NULL;
+	return s1;
 }

 /* Computes next position of server <s> in the group. It is mandatory for <s>
@ -509,7 +517,7 @@ static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s
 * the init tree if appropriate. If both trees are empty, return NULL.
 * Saturated servers are skipped and requeued.
 *
- * The server's lock must be held. The lbprm's lock will be used.
+ * The lbprm's lock and the server's lock will be used.
 */
 struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
 {
@ -550,6 +558,9 @@ struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
 				break;
 			if (switched) {
 				if (avoided) {
+					/* no need to lock it, it was already
+					 * locked on first pass.
+					 */
 					srv = avoided;
 					break;
 				}
@ -557,13 +568,12 @@ struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
 			}
 			switched = 1;
 			fwrr_switch_trees(grp);
-
 		}

-		/* OK, we have a server. However, it may be saturated, in which
-		 * case we don't want to reconsider it for now. We'll update
-		 * its position and dequeue it anyway, so that we can move it
-		 * to a better place afterwards.
+		/* OK, we have a server and it is locked. However, it may be
+		 * saturated, in which case we don't want to reconsider it for
+		 * now. We'll update its position and dequeue it anyway, so
+		 * that we can move it to a better place afterwards.
 		 */
 		fwrr_update_position(grp, srv);
 		fwrr_dequeue_srv(srv);
@ -576,7 +586,9 @@ struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
 			avoided = srv; /* ...but remember that is was selected yet avoided */
 		}

-		/* the server is saturated or avoided, let's chain it for later reinsertion */
+		/* the server is saturated or avoided, let's chain it for later reinsertion.
+		 * Note that servers chained this way are all locked.
+		 */
 		srv->next_full = full;
 		full = srv;
 	}
@ -584,9 +596,14 @@ struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
 	/* OK, we got the best server, let's update it */
 	fwrr_queue_srv(srv);

+	/* we don't need to keep this lock anymore  */
+	HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+
 requeue_servers:
 	/* Requeue all extracted servers. If full==srv then it was
-	 * avoided (unsuccessfully) and chained, omit it now.
+	 * avoided (unsuccessfully) and chained, omit it now. The
+	 * only way to get there is by having <avoided>==NULL or
+	 * <avoided>==<srv>.
 	 */
 	if (unlikely(full != NULL)) {
 		if (switched) {
@ -595,8 +612,10 @@ struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
 			 * their weight matters.
 			 */
 			do {
-				if (likely(full != srv))
+				if (likely(full != srv)) {
 					fwrr_queue_by_weight(grp->init, full);
+					HA_SPIN_UNLOCK(SERVER_LOCK, &full->lock);
+				}
 				full = full->next_full;
 			} while (full);
 		} else {
@ -604,8 +623,10 @@ struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
 			 * so that they regain their expected place.
 			 */
 			do {
-				if (likely(full != srv))
+				if (likely(full != srv)) {
 					fwrr_queue_srv(full);
+					HA_SPIN_UNLOCK(SERVER_LOCK, &full->lock);
+				}
 				full = full->next_full;
 			} while (full);
 		}