From b37c27e28f57a022ef570e3f60a99faaf5a7a9f2 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 18 Oct 2009 22:53:08 +0200 Subject: [PATCH] [MAJOR] http: create the analyser which waits for a response The code part which waits for an HTTP response has been extracted from the old function. We now have two analysers and the second one may re-enable the first one when an 1xx response is encountered. This has been tested and works. The calls to stream_int_return() that were remaining in the wait analyser have been converted to stream_int_retnclose(). --- include/proto/proto_http.h | 3 +- include/types/buffers.h | 10 +- src/cfgparse.c | 4 +- src/proto_http.c | 501 ++++++++++++++++++++----------------- src/session.c | 37 ++- 5 files changed, 312 insertions(+), 243 deletions(-) diff --git a/include/proto/proto_http.h b/include/proto/proto_http.h index 5fcbced81..832be29fb 100644 --- a/include/proto/proto_http.h +++ b/include/proto/proto_http.h @@ -66,7 +66,8 @@ int http_process_req_common(struct session *s, struct buffer *req, int an_bit, s int http_process_request(struct session *t, struct buffer *req, int an_bit); int http_process_tarpit(struct session *s, struct buffer *req, int an_bit); int http_process_request_body(struct session *s, struct buffer *req, int an_bit); -int process_response(struct session *t); +int http_wait_for_response(struct session *s, struct buffer *rep, int an_bit); +int http_process_res_common(struct session *t, struct buffer *rep, int an_bit, struct proxy *px); void debug_hdr(const char *dir, struct session *t, const char *start, const char *end); void get_srv_from_appsession(struct session *t, const char *begin, int len); diff --git a/include/types/buffers.h b/include/types/buffers.h index 8c8ee9b7e..e8ac6419a 100644 --- a/include/types/buffers.h +++ b/include/types/buffers.h @@ -140,10 +140,16 @@ #define AN_REQ_HTTP_INNER 0x00000020 /* inner processing of HTTP request */ #define AN_REQ_HTTP_TARPIT 0x00000040 /* wait for end of HTTP tarpit */ #define AN_REQ_HTTP_BODY 0x00000080 /* inspect HTTP request body */ - -#define AN_RTR_HTTP_HDR 0x00000200 /* inspect HTTP response headers */ +/* unused: 0x100, 0x200 */ #define AN_REQ_PRST_RDP_COOKIE 0x00000400 /* persistence on rdp cookie */ +/* response analysers */ +#define AN_RES_INSPECT 0x00010000 /* content inspection */ +#define AN_RES_WAIT_HTTP 0x00020000 /* wait for HTTP response */ +#define AN_RES_HTTP_PROCESS_BE 0x00040000 /* process backend's HTTP part */ +#define AN_RES_HTTP_PROCESS_FE 0x00040000 /* process frontend's HTTP part (same for now) */ + + /* Magic value to forward infinite size (TCP, ...), used with ->to_forward */ #define BUF_INFINITE_FORWARD (~0UL) diff --git a/src/cfgparse.c b/src/cfgparse.c index bcadaee59..43637968f 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -4489,7 +4489,7 @@ int check_config_validity() if (curproxy->mode == PR_MODE_HTTP) { curproxy->fe_req_ana |= AN_REQ_WAIT_HTTP | AN_REQ_HTTP_PROCESS_FE; - curproxy->fe_rsp_ana |= AN_RTR_HTTP_HDR; + curproxy->fe_rsp_ana |= AN_RES_WAIT_HTTP | AN_RES_HTTP_PROCESS_FE; } /* both TCP and HTTP must check switching rules */ @@ -4499,7 +4499,7 @@ int check_config_validity() if (curproxy->cap & PR_CAP_BE) { if (curproxy->mode == PR_MODE_HTTP) { curproxy->be_req_ana |= AN_REQ_WAIT_HTTP | AN_REQ_HTTP_INNER | AN_REQ_HTTP_PROCESS_BE; - curproxy->be_rsp_ana |= AN_RTR_HTTP_HDR; + curproxy->be_rsp_ana |= AN_RES_WAIT_HTTP | AN_RES_HTTP_PROCESS_BE; } /* If the backend does requires RDP cookie persistence, we have to diff --git a/src/proto_http.c b/src/proto_http.c index 9460cb7a1..31a2098f6 100644 --- a/src/proto_http.c +++ b/src/proto_http.c @@ -2840,20 +2840,274 @@ int http_process_request_body(struct session *s, struct buffer *req, int an_bit) } } +/* This stream analyser waits for a complete HTTP response. It returns 1 if the + * processing can continue on next analysers, or zero if it either needs more + * data or wants to immediately abort the response (eg: timeout, error, ...). It + * is tied to AN_RES_WAIT_HTTP and may may remove itself from s->rep->analysers + * when it has nothing left to do, and may remove any analyser when it wants to + * abort. + */ +int http_wait_for_response(struct session *s, struct buffer *rep, int an_bit) +{ + struct http_txn *txn = &s->txn; + struct http_msg *msg = &txn->rsp; + int cur_idx; + int n; + + DPRINTF(stderr,"[%u] %s: session=%p b=%p, exp(r,w)=%u,%u bf=%08x bl=%d analysers=%02x\n", + now_ms, __FUNCTION__, + s, + rep, + rep->rex, rep->wex, + rep->flags, + rep->l, + rep->analysers); + + /* + * Now parse the partial (or complete) lines. + * We will check the response syntax, and also join multi-line + * headers. An index of all the lines will be elaborated while + * parsing. + * + * For the parsing, we use a 28 states FSM. + * + * Here is the information we currently have : + * rep->data + rep->som = beginning of response + * rep->data + rep->eoh = end of processed headers / start of current one + * rep->data + rep->eol = end of current header or line (LF or CRLF) + * rep->lr = first non-visited byte + * rep->r = end of data + */ + + if (likely(rep->lr < rep->r)) + http_msg_analyzer(rep, msg, &txn->hdr_idx); + + /* 1: we might have to print this header in debug mode */ + if (unlikely((global.mode & MODE_DEBUG) && + (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) && + (msg->msg_state == HTTP_MSG_BODY || msg->msg_state == HTTP_MSG_ERROR))) { + char *eol, *sol; + + sol = rep->data + msg->som; + eol = sol + msg->sl.rq.l; + debug_hdr("srvrep", s, sol, eol); + + sol += hdr_idx_first_pos(&txn->hdr_idx); + cur_idx = hdr_idx_first_idx(&txn->hdr_idx); + + while (cur_idx) { + eol = sol + txn->hdr_idx.v[cur_idx].len; + debug_hdr("srvhdr", s, sol, eol); + sol = eol + txn->hdr_idx.v[cur_idx].cr + 1; + cur_idx = txn->hdr_idx.v[cur_idx].next; + } + } + + /* + * Now we quickly check if we have found a full valid response. + * If not so, we check the FD and buffer states before leaving. + * A full response is indicated by the fact that we have seen + * the double LF/CRLF, so the state is HTTP_MSG_BODY. Invalid + * responses are checked first. + * + * Depending on whether the client is still there or not, we + * may send an error response back or not. Note that normally + * we should only check for HTTP status there, and check I/O + * errors somewhere else. + */ + + if (unlikely(msg->msg_state != HTTP_MSG_BODY)) { + /* Invalid response */ + if (unlikely(msg->msg_state == HTTP_MSG_ERROR)) { + /* we detected a parsing error. We want to archive this response + * in the dedicated proxy area for later troubleshooting. + */ + hdr_response_bad: + if (msg->msg_state == HTTP_MSG_ERROR || msg->err_pos >= 0) + http_capture_bad_message(&s->be->invalid_rep, s, rep, msg, s->fe); + + s->be->counters.failed_resp++; + if (s->srv) + s->srv->counters.failed_resp++; + + rep->analysers = 0; + txn->status = 502; + stream_int_retnclose(rep->cons, error_message(s, HTTP_ERR_502)); + + if (!(s->flags & SN_ERR_MASK)) + s->flags |= SN_ERR_PRXCOND; + if (!(s->flags & SN_FINST_MASK)) + s->flags |= SN_FINST_H; + + return 0; + } + + /* too large response does not fit in buffer. */ + else if (rep->flags & BF_FULL) { + goto hdr_response_bad; + } + + /* read error */ + else if (rep->flags & BF_READ_ERROR) { + if (msg->err_pos >= 0) + http_capture_bad_message(&s->be->invalid_rep, s, rep, msg, s->fe); + + s->be->counters.failed_resp++; + if (s->srv) + s->srv->counters.failed_resp++; + + rep->analysers = 0; + txn->status = 502; + stream_int_retnclose(rep->cons, error_message(s, HTTP_ERR_502)); + + if (!(s->flags & SN_ERR_MASK)) + s->flags |= SN_ERR_SRVCL; + if (!(s->flags & SN_FINST_MASK)) + s->flags |= SN_FINST_H; + return 0; + } + + /* read timeout : return a 504 to the client. */ + else if (rep->flags & BF_READ_TIMEOUT) { + if (msg->err_pos >= 0) + http_capture_bad_message(&s->be->invalid_rep, s, rep, msg, s->fe); + + s->be->counters.failed_resp++; + if (s->srv) + s->srv->counters.failed_resp++; + + rep->analysers = 0; + txn->status = 504; + stream_int_retnclose(rep->cons, error_message(s, HTTP_ERR_504)); + + if (!(s->flags & SN_ERR_MASK)) + s->flags |= SN_ERR_SRVTO; + if (!(s->flags & SN_FINST_MASK)) + s->flags |= SN_FINST_H; + return 0; + } + + /* close from server */ + else if (rep->flags & BF_SHUTR) { + if (msg->err_pos >= 0) + http_capture_bad_message(&s->be->invalid_rep, s, rep, msg, s->fe); + + s->be->counters.failed_resp++; + if (s->srv) + s->srv->counters.failed_resp++; + + rep->analysers = 0; + txn->status = 502; + stream_int_retnclose(rep->cons, error_message(s, HTTP_ERR_502)); + + if (!(s->flags & SN_ERR_MASK)) + s->flags |= SN_ERR_SRVCL; + if (!(s->flags & SN_FINST_MASK)) + s->flags |= SN_FINST_H; + return 0; + } + + /* write error to client (we don't send any message then) */ + else if (rep->flags & BF_WRITE_ERROR) { + if (msg->err_pos >= 0) + http_capture_bad_message(&s->be->invalid_rep, s, rep, msg, s->fe); + + s->be->counters.failed_resp++; + rep->analysers = 0; + + if (!(s->flags & SN_ERR_MASK)) + s->flags |= SN_ERR_CLICL; + if (!(s->flags & SN_FINST_MASK)) + s->flags |= SN_FINST_H; + + /* process_session() will take care of the error */ + return 0; + } + + buffer_dont_close(rep); + return 0; + } + + /* More interesting part now : we know that we have a complete + * response which at least looks like HTTP. We have an indicator + * of each header's length, so we can parse them quickly. + */ + + if (unlikely(msg->err_pos >= 0)) + http_capture_bad_message(&s->be->invalid_rep, s, rep, msg, s->fe); + + /* ensure we keep this pointer to the beginning of the message */ + msg->sol = rep->data + msg->som; + + /* + * 1: get the status code + */ + n = rep->data[msg->sl.st.c] - '0'; + if (n < 1 || n > 5) + n = 0; + s->srv->counters.p.http.rsp[n]++; + s->be->counters.p.http.rsp[n]++; + + txn->status = strl2ui(rep->data + msg->sl.st.c, msg->sl.st.c_l); + + /* + * 2: check for cacheability. + */ + + switch (txn->status) { + case 200: + case 203: + case 206: + case 300: + case 301: + case 410: + /* RFC2616 @13.4: + * "A response received with a status code of + * 200, 203, 206, 300, 301 or 410 MAY be stored + * by a cache (...) unless a cache-control + * directive prohibits caching." + * + * RFC2616 @9.5: POST method : + * "Responses to this method are not cacheable, + * unless the response includes appropriate + * Cache-Control or Expires header fields." + */ + if (likely(txn->meth != HTTP_METH_POST) && + (s->be->options & (PR_O_CHK_CACHE|PR_O_COOK_NOC))) + txn->flags |= TX_CACHEABLE | TX_CACHE_COOK; + break; + default: + break; + } + + /* + * 3: we may need to capture headers + */ + s->logs.logwait &= ~LW_RESP; + if (unlikely((s->logs.logwait & LW_RSPHDR) && s->fe->rsp_cap)) + capture_headers(rep->data + msg->som, &txn->hdr_idx, + txn->rsp.cap, s->fe->rsp_cap); + + /* end of job, return OK */ + rep->analysers &= ~an_bit; + rep->analyse_exp = TICK_ETERNITY; + return 1; +} + /* This function performs all the processing enabled for the current response. * It normally returns zero, but may return 1 if it absolutely needs to be * called again after other functions. It relies on buffers flags, and updates * t->rep->analysers. It might make sense to explode it into several other * functions. It works like process_request (see indications above). */ -int process_response(struct session *t) +int http_process_res_common(struct session *t, struct buffer *rep, int an_bit, struct proxy *px) { struct http_txn *txn = &t->txn; struct buffer *req = t->req; - struct buffer *rep = t->rep; - int n; + struct http_msg *msg = &txn->rsp; + struct proxy *cur_proxy; + int cur_idx; - next_response: DPRINTF(stderr,"[%u] %s: session=%p b=%p, exp(r,w)=%u,%u bf=%08x bl=%d analysers=%02x\n", now_ms, __FUNCTION__, t, @@ -2863,226 +3117,13 @@ int process_response(struct session *t) rep->l, rep->analysers); - if (rep->analysers & AN_RTR_HTTP_HDR) { /* receiving server headers */ - /* - * Now parse the partial (or complete) lines. - * We will check the response syntax, and also join multi-line - * headers. An index of all the lines will be elaborated while - * parsing. - * - * For the parsing, we use a 28 states FSM. - * - * Here is the information we currently have : - * rep->data + rep->som = beginning of response - * rep->data + rep->eoh = end of processed headers / start of current one - * rep->data + rep->eol = end of current header or line (LF or CRLF) - * rep->lr = first non-visited byte - * rep->r = end of data - */ + if (unlikely(msg->msg_state != HTTP_MSG_BODY)) /* we need more data */ + return 0; - int cur_idx; - struct http_msg *msg = &txn->rsp; - struct proxy *cur_proxy; - - if (likely(rep->lr < rep->r)) - http_msg_analyzer(rep, msg, &txn->hdr_idx); - - /* 1: we might have to print this header in debug mode */ - if (unlikely((global.mode & MODE_DEBUG) && - (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) && - (msg->msg_state == HTTP_MSG_BODY || msg->msg_state == HTTP_MSG_ERROR))) { - char *eol, *sol; - - sol = rep->data + msg->som; - eol = sol + msg->sl.rq.l; - debug_hdr("srvrep", t, sol, eol); - - sol += hdr_idx_first_pos(&txn->hdr_idx); - cur_idx = hdr_idx_first_idx(&txn->hdr_idx); - - while (cur_idx) { - eol = sol + txn->hdr_idx.v[cur_idx].len; - debug_hdr("srvhdr", t, sol, eol); - sol = eol + txn->hdr_idx.v[cur_idx].cr + 1; - cur_idx = txn->hdr_idx.v[cur_idx].next; - } - } - - /* - * Now we quickly check if we have found a full valid response. - * If not so, we check the FD and buffer states before leaving. - * A full response is indicated by the fact that we have seen - * the double LF/CRLF, so the state is HTTP_MSG_BODY. Invalid - * responses are checked first. - * - * Depending on whether the client is still there or not, we - * may send an error response back or not. Note that normally - * we should only check for HTTP status there, and check I/O - * errors somewhere else. - */ - - if (unlikely(msg->msg_state != HTTP_MSG_BODY)) { - /* Invalid response */ - if (unlikely(msg->msg_state == HTTP_MSG_ERROR)) { - /* we detected a parsing error. We want to archive this response - * in the dedicated proxy area for later troubleshooting. - */ - hdr_response_bad: - if (msg->msg_state == HTTP_MSG_ERROR || msg->err_pos >= 0) - http_capture_bad_message(&t->be->invalid_rep, t, rep, msg, t->fe); - - buffer_shutr_now(rep); - buffer_shutw_now(req); - if (t->srv) - t->srv->counters.failed_resp++; - t->be->counters.failed_resp++; - rep->analysers = 0; - txn->status = 502; - stream_int_return(rep->cons, error_message(t, HTTP_ERR_502)); - if (!(t->flags & SN_ERR_MASK)) - t->flags |= SN_ERR_PRXCOND; - if (!(t->flags & SN_FINST_MASK)) - t->flags |= SN_FINST_H; - - return 0; - } - /* too large response does not fit in buffer. */ - else if (rep->flags & BF_FULL) { - goto hdr_response_bad; - } - /* read error */ - else if (rep->flags & BF_READ_ERROR) { - if (msg->err_pos >= 0) - http_capture_bad_message(&t->be->invalid_rep, t, rep, msg, t->fe); - buffer_shutr_now(rep); - buffer_shutw_now(req); - if (t->srv) - t->srv->counters.failed_resp++; - t->be->counters.failed_resp++; - rep->analysers = 0; - txn->status = 502; - stream_int_return(rep->cons, error_message(t, HTTP_ERR_502)); - if (!(t->flags & SN_ERR_MASK)) - t->flags |= SN_ERR_SRVCL; - if (!(t->flags & SN_FINST_MASK)) - t->flags |= SN_FINST_H; - return 0; - } - /* read timeout : return a 504 to the client. */ - else if (rep->flags & BF_READ_TIMEOUT) { - if (msg->err_pos >= 0) - http_capture_bad_message(&t->be->invalid_rep, t, rep, msg, t->fe); - buffer_shutr_now(rep); - buffer_shutw_now(req); - if (t->srv) - t->srv->counters.failed_resp++; - t->be->counters.failed_resp++; - rep->analysers = 0; - txn->status = 504; - stream_int_return(rep->cons, error_message(t, HTTP_ERR_504)); - if (!(t->flags & SN_ERR_MASK)) - t->flags |= SN_ERR_SRVTO; - if (!(t->flags & SN_FINST_MASK)) - t->flags |= SN_FINST_H; - return 0; - } - /* close from server */ - else if (rep->flags & BF_SHUTR) { - if (msg->err_pos >= 0) - http_capture_bad_message(&t->be->invalid_rep, t, rep, msg, t->fe); - buffer_shutw_now(req); - if (t->srv) - t->srv->counters.failed_resp++; - t->be->counters.failed_resp++; - rep->analysers = 0; - txn->status = 502; - stream_int_return(rep->cons, error_message(t, HTTP_ERR_502)); - if (!(t->flags & SN_ERR_MASK)) - t->flags |= SN_ERR_SRVCL; - if (!(t->flags & SN_FINST_MASK)) - t->flags |= SN_FINST_H; - return 0; - } - /* write error to client (we don't send any message then) */ - else if (rep->flags & BF_WRITE_ERROR) { - if (msg->err_pos >= 0) - http_capture_bad_message(&t->be->invalid_rep, t, rep, msg, t->fe); - buffer_shutr_now(rep); - t->be->counters.failed_resp++; - rep->analysers = 0; - if (!(t->flags & SN_ERR_MASK)) - t->flags |= SN_ERR_CLICL; - if (!(t->flags & SN_FINST_MASK)) - t->flags |= SN_FINST_H; - return 0; - } - - buffer_dont_close(rep); - return 0; - } - - - /***************************************************************** - * More interesting part now : we know that we have a complete * - * response which at least looks like HTTP. We have an indicator * - * of each header's length, so we can parse them quickly. * - ****************************************************************/ - - if (msg->err_pos >= 0) - http_capture_bad_message(&t->be->invalid_rep, t, rep, msg, t->fe); - - rep->analysers &= ~AN_RTR_HTTP_HDR; - - /* ensure we keep this pointer to the beginning of the message */ - msg->sol = rep->data + msg->som; - - /* - * 1: get the status code and check for cacheability. - */ - - t->logs.logwait &= ~LW_RESP; - txn->status = strl2ui(rep->data + msg->sl.st.c, msg->sl.st.c_l); - - n = rep->data[msg->sl.st.c] - '0'; - if (n < 1 || n > 5) - n = 0; - - t->srv->counters.p.http.rsp[n]++; - t->be->counters.p.http.rsp[n]++; - - switch (txn->status) { - case 200: - case 203: - case 206: - case 300: - case 301: - case 410: - /* RFC2616 @13.4: - * "A response received with a status code of - * 200, 203, 206, 300, 301 or 410 MAY be stored - * by a cache (...) unless a cache-control - * directive prohibits caching." - * - * RFC2616 @9.5: POST method : - * "Responses to this method are not cacheable, - * unless the response includes appropriate - * Cache-Control or Expires header fields." - */ - if (likely(txn->meth != HTTP_METH_POST) && - (t->be->options & (PR_O_CHK_CACHE|PR_O_COOK_NOC))) - txn->flags |= TX_CACHEABLE | TX_CACHE_COOK; - break; - default: - break; - } - - /* - * 2: we may need to capture headers - */ - if (unlikely((t->logs.logwait & LW_RSPHDR) && t->fe->rsp_cap)) - capture_headers(rep->data + msg->som, &txn->hdr_idx, - txn->rsp.cap, t->fe->rsp_cap); + rep->analysers &= ~an_bit; + rep->analyse_exp = TICK_ETERNITY; + if (1) { /* * 3: we will have to evaluate the filters. * As opposed to version 1.2, now they will be evaluated in the @@ -3308,8 +3349,8 @@ int process_response(struct session *t) buffer_forward(rep, rep->lr - (rep->data + msg->som)); msg->msg_state = HTTP_MSG_RPBEFORE; txn->status = 0; - rep->analysers |= AN_RTR_HTTP_HDR; - goto next_response; + rep->analysers |= AN_RES_WAIT_HTTP | an_bit; + return 1; } /************************************************************* @@ -3337,18 +3378,6 @@ int process_response(struct session *t) return 0; } - - /* Note: eventhough nobody should set an unknown flag, clearing them right now will - * probably reduce one day's debugging session. - */ -#ifdef DEBUG_DEV - if (rep->analysers & ~(AN_RTR_HTTP_HDR)) { - fprintf(stderr, "FIXME !!!! unknown analysers flags %s:%d = 0x%08X\n", - __FILE__, __LINE__, rep->analysers); - ABORT_NOW(); - } -#endif - rep->analysers &= AN_RTR_HTTP_HDR; return 0; } diff --git a/src/session.c b/src/session.c index fefdc8648..b010f59f2 100644 --- a/src/session.c +++ b/src/session.c @@ -894,10 +894,43 @@ resync_stream_interface: unsigned int flags = s->rep->flags; if (s->rep->prod->state >= SI_ST_EST) { + unsigned int last_ana = 0; + /* it's up to the analysers to reset auto_close */ buffer_auto_close(s->rep); - if (s->rep->analysers) - process_response(s); + + /* We will call all analysers for which a bit is set in + * s->rep->analysers, following the bit order from LSB + * to MSB. The analysers must remove themselves from + * the list when not needed. Any analyser may return 0 + * to break out of the loop, either because of missing + * data to take a decision, or because it decides to + * kill the session. We loop at least once through each + * analyser, and we may loop again if other analysers + * are added in the middle. + */ + while (s->rep->analysers & ~last_ana) { + last_ana = s->rep->analysers; + + if (s->rep->analysers & AN_RES_WAIT_HTTP) { + last_ana |= AN_RES_WAIT_HTTP; + if (!http_wait_for_response(s, s->rep, AN_RES_WAIT_HTTP)) + break; + } + + if (s->rep->analysers & AN_RES_HTTP_PROCESS_BE) { + last_ana |= AN_RES_HTTP_PROCESS_BE; + if (!http_process_res_common(s, s->rep, AN_RES_HTTP_PROCESS_BE, s->be)) + break; + /* FIXME: we may wait for a second response in case of a status 1xx + * and want to immediately loop back to the top. This is a dirty way + * of doing it, and we should find a cleaner method relying on a + * circular list of function pointers. + */ + if ((s->rep->analysers & ~last_ana) & AN_RES_WAIT_HTTP) + continue; + } + } } if ((s->rep->flags ^ flags) & BF_MASK_STATIC) {