mirror of
http://git.haproxy.org/git/haproxy.git/
synced 2025-01-20 20:50:44 +00:00
REORG: http: move the HTTP/1 header block parser to h1.c
Since it still depends on http_msg, it was not renamed yet.
This commit is contained in:
parent
db4893d6a4
commit
8740c8b1b2
@ -28,8 +28,16 @@
|
||||
#include <common/standard.h>
|
||||
#include <types/h1.h>
|
||||
#include <types/proto_http.h>
|
||||
#include <proto/hdr_idx.h>
|
||||
|
||||
extern const uint8_t h1_char_classes[256];
|
||||
const char *http_parse_reqline(struct http_msg *msg,
|
||||
enum h1_state state, const char *ptr, const char *end,
|
||||
unsigned int *ret_ptr, enum h1_state *ret_state);
|
||||
const char *http_parse_stsline(struct http_msg *msg,
|
||||
enum h1_state state, const char *ptr, const char *end,
|
||||
unsigned int *ret_ptr, enum h1_state *ret_state);
|
||||
void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx);
|
||||
int http_forward_trailers(struct http_msg *msg);
|
||||
|
||||
#define H1_FLG_CTL 0x01
|
||||
|
640
src/h1.c
640
src/h1.c
@ -13,6 +13,7 @@
|
||||
#include <common/config.h>
|
||||
|
||||
#include <proto/h1.h>
|
||||
#include <proto/hdr_idx.h>
|
||||
|
||||
/* It is about twice as fast on recent architectures to lookup a byte in a
|
||||
* table than to perform a boolean AND or OR between two tests. Refer to
|
||||
@ -155,6 +156,645 @@ const unsigned char h1_char_classes[256] = {
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* This function parses a status line between <ptr> and <end>, starting with
|
||||
* parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
|
||||
* HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
|
||||
* will give undefined results.
|
||||
* Note that it is upon the caller's responsibility to ensure that ptr < end,
|
||||
* and that msg->sol points to the beginning of the response.
|
||||
* If a complete line is found (which implies that at least one CR or LF is
|
||||
* found before <end>, the updated <ptr> is returned, otherwise NULL is
|
||||
* returned indicating an incomplete line (which does not mean that parts have
|
||||
* not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
|
||||
* non-NULL, they are fed with the new <ptr> and <state> values to be passed
|
||||
* upon next call.
|
||||
*
|
||||
* This function was intentionally designed to be called from
|
||||
* http_msg_analyzer() with the lowest overhead. It should integrate perfectly
|
||||
* within its state machine and use the same macros, hence the need for same
|
||||
* labels and variable names. Note that msg->sol is left unchanged.
|
||||
*/
|
||||
const char *http_parse_stsline(struct http_msg *msg,
|
||||
enum h1_state state, const char *ptr, const char *end,
|
||||
unsigned int *ret_ptr, enum h1_state *ret_state)
|
||||
{
|
||||
const char *msg_start = msg->chn->buf->p;
|
||||
|
||||
switch (state) {
|
||||
case HTTP_MSG_RPVER:
|
||||
http_msg_rpver:
|
||||
if (likely(HTTP_IS_VER_TOKEN(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
|
||||
|
||||
if (likely(HTTP_IS_SPHT(*ptr))) {
|
||||
msg->sl.st.v_l = ptr - msg_start;
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
|
||||
}
|
||||
msg->err_state = HTTP_MSG_RPVER;
|
||||
state = HTTP_MSG_ERROR;
|
||||
break;
|
||||
|
||||
case HTTP_MSG_RPVER_SP:
|
||||
http_msg_rpver_sp:
|
||||
if (likely(!HTTP_IS_LWS(*ptr))) {
|
||||
msg->sl.st.c = ptr - msg_start;
|
||||
goto http_msg_rpcode;
|
||||
}
|
||||
if (likely(HTTP_IS_SPHT(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
|
||||
/* so it's a CR/LF, this is invalid */
|
||||
msg->err_state = HTTP_MSG_RPVER_SP;
|
||||
state = HTTP_MSG_ERROR;
|
||||
break;
|
||||
|
||||
case HTTP_MSG_RPCODE:
|
||||
http_msg_rpcode:
|
||||
if (likely(!HTTP_IS_LWS(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
|
||||
|
||||
if (likely(HTTP_IS_SPHT(*ptr))) {
|
||||
msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
|
||||
}
|
||||
|
||||
/* so it's a CR/LF, so there is no reason phrase */
|
||||
msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
|
||||
http_msg_rsp_reason:
|
||||
/* FIXME: should we support HTTP responses without any reason phrase ? */
|
||||
msg->sl.st.r = ptr - msg_start;
|
||||
msg->sl.st.r_l = 0;
|
||||
goto http_msg_rpline_eol;
|
||||
|
||||
case HTTP_MSG_RPCODE_SP:
|
||||
http_msg_rpcode_sp:
|
||||
if (likely(!HTTP_IS_LWS(*ptr))) {
|
||||
msg->sl.st.r = ptr - msg_start;
|
||||
goto http_msg_rpreason;
|
||||
}
|
||||
if (likely(HTTP_IS_SPHT(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
|
||||
/* so it's a CR/LF, so there is no reason phrase */
|
||||
goto http_msg_rsp_reason;
|
||||
|
||||
case HTTP_MSG_RPREASON:
|
||||
http_msg_rpreason:
|
||||
if (likely(!HTTP_IS_CRLF(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
|
||||
msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
|
||||
http_msg_rpline_eol:
|
||||
/* We have seen the end of line. Note that we do not
|
||||
* necessarily have the \n yet, but at least we know that we
|
||||
* have EITHER \r OR \n, otherwise the response would not be
|
||||
* complete. We can then record the response length and return
|
||||
* to the caller which will be able to register it.
|
||||
*/
|
||||
msg->sl.st.l = ptr - msg_start - msg->sol;
|
||||
return ptr;
|
||||
|
||||
default:
|
||||
#ifdef DEBUG_FULL
|
||||
fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
|
||||
exit(1);
|
||||
#endif
|
||||
;
|
||||
}
|
||||
|
||||
http_msg_ood:
|
||||
/* out of valid data */
|
||||
if (ret_state)
|
||||
*ret_state = state;
|
||||
if (ret_ptr)
|
||||
*ret_ptr = ptr - msg_start;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function parses a request line between <ptr> and <end>, starting with
|
||||
* parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
|
||||
* HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
|
||||
* will give undefined results.
|
||||
* Note that it is upon the caller's responsibility to ensure that ptr < end,
|
||||
* and that msg->sol points to the beginning of the request.
|
||||
* If a complete line is found (which implies that at least one CR or LF is
|
||||
* found before <end>, the updated <ptr> is returned, otherwise NULL is
|
||||
* returned indicating an incomplete line (which does not mean that parts have
|
||||
* not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
|
||||
* non-NULL, they are fed with the new <ptr> and <state> values to be passed
|
||||
* upon next call.
|
||||
*
|
||||
* This function was intentionally designed to be called from
|
||||
* http_msg_analyzer() with the lowest overhead. It should integrate perfectly
|
||||
* within its state machine and use the same macros, hence the need for same
|
||||
* labels and variable names. Note that msg->sol is left unchanged.
|
||||
*/
|
||||
const char *http_parse_reqline(struct http_msg *msg,
|
||||
enum h1_state state, const char *ptr, const char *end,
|
||||
unsigned int *ret_ptr, enum h1_state *ret_state)
|
||||
{
|
||||
const char *msg_start = msg->chn->buf->p;
|
||||
|
||||
switch (state) {
|
||||
case HTTP_MSG_RQMETH:
|
||||
http_msg_rqmeth:
|
||||
if (likely(HTTP_IS_TOKEN(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH);
|
||||
|
||||
if (likely(HTTP_IS_SPHT(*ptr))) {
|
||||
msg->sl.rq.m_l = ptr - msg_start;
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
|
||||
}
|
||||
|
||||
if (likely(HTTP_IS_CRLF(*ptr))) {
|
||||
/* HTTP 0.9 request */
|
||||
msg->sl.rq.m_l = ptr - msg_start;
|
||||
http_msg_req09_uri:
|
||||
msg->sl.rq.u = ptr - msg_start;
|
||||
http_msg_req09_uri_e:
|
||||
msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
|
||||
http_msg_req09_ver:
|
||||
msg->sl.rq.v = ptr - msg_start;
|
||||
msg->sl.rq.v_l = 0;
|
||||
goto http_msg_rqline_eol;
|
||||
}
|
||||
msg->err_state = HTTP_MSG_RQMETH;
|
||||
state = HTTP_MSG_ERROR;
|
||||
break;
|
||||
|
||||
case HTTP_MSG_RQMETH_SP:
|
||||
http_msg_rqmeth_sp:
|
||||
if (likely(!HTTP_IS_LWS(*ptr))) {
|
||||
msg->sl.rq.u = ptr - msg_start;
|
||||
goto http_msg_rquri;
|
||||
}
|
||||
if (likely(HTTP_IS_SPHT(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
|
||||
/* so it's a CR/LF, meaning an HTTP 0.9 request */
|
||||
goto http_msg_req09_uri;
|
||||
|
||||
case HTTP_MSG_RQURI:
|
||||
http_msg_rquri:
|
||||
#if defined(__x86_64__) || \
|
||||
defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
|
||||
defined(__ARM_ARCH_7A__)
|
||||
/* speedup: skip bytes not between 0x21 and 0x7e inclusive */
|
||||
while (ptr <= end - sizeof(int)) {
|
||||
int x = *(int *)ptr - 0x21212121;
|
||||
if (x & 0x80808080)
|
||||
break;
|
||||
|
||||
x -= 0x5e5e5e5e;
|
||||
if (!(x & 0x80808080))
|
||||
break;
|
||||
|
||||
ptr += sizeof(int);
|
||||
}
|
||||
#endif
|
||||
if (ptr >= end) {
|
||||
state = HTTP_MSG_RQURI;
|
||||
goto http_msg_ood;
|
||||
}
|
||||
http_msg_rquri2:
|
||||
if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI);
|
||||
|
||||
if (likely(HTTP_IS_SPHT(*ptr))) {
|
||||
msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
|
||||
}
|
||||
|
||||
if (likely((unsigned char)*ptr >= 128)) {
|
||||
/* non-ASCII chars are forbidden unless option
|
||||
* accept-invalid-http-request is enabled in the frontend.
|
||||
* In any case, we capture the faulty char.
|
||||
*/
|
||||
if (msg->err_pos < -1)
|
||||
goto invalid_char;
|
||||
if (msg->err_pos == -1)
|
||||
msg->err_pos = ptr - msg_start;
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI);
|
||||
}
|
||||
|
||||
if (likely(HTTP_IS_CRLF(*ptr))) {
|
||||
/* so it's a CR/LF, meaning an HTTP 0.9 request */
|
||||
goto http_msg_req09_uri_e;
|
||||
}
|
||||
|
||||
/* OK forbidden chars, 0..31 or 127 */
|
||||
invalid_char:
|
||||
msg->err_pos = ptr - msg_start;
|
||||
msg->err_state = HTTP_MSG_RQURI;
|
||||
state = HTTP_MSG_ERROR;
|
||||
break;
|
||||
|
||||
case HTTP_MSG_RQURI_SP:
|
||||
http_msg_rquri_sp:
|
||||
if (likely(!HTTP_IS_LWS(*ptr))) {
|
||||
msg->sl.rq.v = ptr - msg_start;
|
||||
goto http_msg_rqver;
|
||||
}
|
||||
if (likely(HTTP_IS_SPHT(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
|
||||
/* so it's a CR/LF, meaning an HTTP 0.9 request */
|
||||
goto http_msg_req09_ver;
|
||||
|
||||
case HTTP_MSG_RQVER:
|
||||
http_msg_rqver:
|
||||
if (likely(HTTP_IS_VER_TOKEN(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER);
|
||||
|
||||
if (likely(HTTP_IS_CRLF(*ptr))) {
|
||||
msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
|
||||
http_msg_rqline_eol:
|
||||
/* We have seen the end of line. Note that we do not
|
||||
* necessarily have the \n yet, but at least we know that we
|
||||
* have EITHER \r OR \n, otherwise the request would not be
|
||||
* complete. We can then record the request length and return
|
||||
* to the caller which will be able to register it.
|
||||
*/
|
||||
msg->sl.rq.l = ptr - msg_start - msg->sol;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* neither an HTTP_VER token nor a CRLF */
|
||||
msg->err_state = HTTP_MSG_RQVER;
|
||||
state = HTTP_MSG_ERROR;
|
||||
break;
|
||||
|
||||
default:
|
||||
#ifdef DEBUG_FULL
|
||||
fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
|
||||
exit(1);
|
||||
#endif
|
||||
;
|
||||
}
|
||||
|
||||
http_msg_ood:
|
||||
/* out of valid data */
|
||||
if (ret_state)
|
||||
*ret_state = state;
|
||||
if (ret_ptr)
|
||||
*ret_ptr = ptr - msg_start;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function parses an HTTP message, either a request or a response,
|
||||
* depending on the initial msg->msg_state. The caller is responsible for
|
||||
* ensuring that the message does not wrap. The function can be preempted
|
||||
* everywhere when data are missing and recalled at the exact same location
|
||||
* with no information loss. The message may even be realigned between two
|
||||
* calls. The header index is re-initialized when switching from
|
||||
* MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other
|
||||
* fields. Note that msg->sol will be initialized after completing the first
|
||||
* state, so that none of the msg pointers has to be initialized prior to the
|
||||
* first call.
|
||||
*/
|
||||
void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx)
|
||||
{
|
||||
enum h1_state state; /* updated only when leaving the FSM */
|
||||
register char *ptr, *end; /* request pointers, to avoid dereferences */
|
||||
struct buffer *buf;
|
||||
|
||||
state = msg->msg_state;
|
||||
buf = msg->chn->buf;
|
||||
ptr = buf->p + msg->next;
|
||||
end = buf->p + buf->i;
|
||||
|
||||
if (unlikely(ptr >= end))
|
||||
goto http_msg_ood;
|
||||
|
||||
switch (state) {
|
||||
/*
|
||||
* First, states that are specific to the response only.
|
||||
* We check them first so that request and headers are
|
||||
* closer to each other (accessed more often).
|
||||
*/
|
||||
case HTTP_MSG_RPBEFORE:
|
||||
http_msg_rpbefore:
|
||||
if (likely(HTTP_IS_TOKEN(*ptr))) {
|
||||
/* we have a start of message, but we have to check
|
||||
* first if we need to remove some CRLF. We can only
|
||||
* do this when o=0.
|
||||
*/
|
||||
if (unlikely(ptr != buf->p)) {
|
||||
if (buf->o)
|
||||
goto http_msg_ood;
|
||||
/* Remove empty leading lines, as recommended by RFC2616. */
|
||||
bi_fast_delete(buf, ptr - buf->p);
|
||||
}
|
||||
msg->sol = 0;
|
||||
msg->sl.st.l = 0; /* used in debug mode */
|
||||
hdr_idx_init(idx);
|
||||
state = HTTP_MSG_RPVER;
|
||||
goto http_msg_rpver;
|
||||
}
|
||||
|
||||
if (unlikely(!HTTP_IS_CRLF(*ptr))) {
|
||||
state = HTTP_MSG_RPBEFORE;
|
||||
goto http_msg_invalid;
|
||||
}
|
||||
|
||||
if (unlikely(*ptr == '\n'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
|
||||
/* stop here */
|
||||
|
||||
case HTTP_MSG_RPBEFORE_CR:
|
||||
http_msg_rpbefore_cr:
|
||||
EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
|
||||
/* stop here */
|
||||
|
||||
case HTTP_MSG_RPVER:
|
||||
http_msg_rpver:
|
||||
case HTTP_MSG_RPVER_SP:
|
||||
case HTTP_MSG_RPCODE:
|
||||
case HTTP_MSG_RPCODE_SP:
|
||||
case HTTP_MSG_RPREASON:
|
||||
ptr = (char *)http_parse_stsline(msg,
|
||||
state, ptr, end,
|
||||
&msg->next, &msg->msg_state);
|
||||
if (unlikely(!ptr))
|
||||
return;
|
||||
|
||||
/* we have a full response and we know that we have either a CR
|
||||
* or an LF at <ptr>.
|
||||
*/
|
||||
hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
|
||||
|
||||
msg->sol = ptr - buf->p;
|
||||
if (likely(*ptr == '\r'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
|
||||
goto http_msg_rpline_end;
|
||||
|
||||
case HTTP_MSG_RPLINE_END:
|
||||
http_msg_rpline_end:
|
||||
/* msg->sol must point to the first of CR or LF. */
|
||||
EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
|
||||
/* stop here */
|
||||
|
||||
/*
|
||||
* Second, states that are specific to the request only
|
||||
*/
|
||||
case HTTP_MSG_RQBEFORE:
|
||||
http_msg_rqbefore:
|
||||
if (likely(HTTP_IS_TOKEN(*ptr))) {
|
||||
/* we have a start of message, but we have to check
|
||||
* first if we need to remove some CRLF. We can only
|
||||
* do this when o=0.
|
||||
*/
|
||||
if (likely(ptr != buf->p)) {
|
||||
if (buf->o)
|
||||
goto http_msg_ood;
|
||||
/* Remove empty leading lines, as recommended by RFC2616. */
|
||||
bi_fast_delete(buf, ptr - buf->p);
|
||||
}
|
||||
msg->sol = 0;
|
||||
msg->sl.rq.l = 0; /* used in debug mode */
|
||||
state = HTTP_MSG_RQMETH;
|
||||
goto http_msg_rqmeth;
|
||||
}
|
||||
|
||||
if (unlikely(!HTTP_IS_CRLF(*ptr))) {
|
||||
state = HTTP_MSG_RQBEFORE;
|
||||
goto http_msg_invalid;
|
||||
}
|
||||
|
||||
if (unlikely(*ptr == '\n'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR);
|
||||
/* stop here */
|
||||
|
||||
case HTTP_MSG_RQBEFORE_CR:
|
||||
http_msg_rqbefore_cr:
|
||||
EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR);
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
|
||||
/* stop here */
|
||||
|
||||
case HTTP_MSG_RQMETH:
|
||||
http_msg_rqmeth:
|
||||
case HTTP_MSG_RQMETH_SP:
|
||||
case HTTP_MSG_RQURI:
|
||||
case HTTP_MSG_RQURI_SP:
|
||||
case HTTP_MSG_RQVER:
|
||||
ptr = (char *)http_parse_reqline(msg,
|
||||
state, ptr, end,
|
||||
&msg->next, &msg->msg_state);
|
||||
if (unlikely(!ptr))
|
||||
return;
|
||||
|
||||
/* we have a full request and we know that we have either a CR
|
||||
* or an LF at <ptr>.
|
||||
*/
|
||||
hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
|
||||
|
||||
msg->sol = ptr - buf->p;
|
||||
if (likely(*ptr == '\r'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END);
|
||||
goto http_msg_rqline_end;
|
||||
|
||||
case HTTP_MSG_RQLINE_END:
|
||||
http_msg_rqline_end:
|
||||
/* check for HTTP/0.9 request : no version information available.
|
||||
* msg->sol must point to the first of CR or LF.
|
||||
*/
|
||||
if (unlikely(msg->sl.rq.v_l == 0))
|
||||
goto http_msg_last_lf;
|
||||
|
||||
EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END);
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
|
||||
/* stop here */
|
||||
|
||||
/*
|
||||
* Common states below
|
||||
*/
|
||||
case HTTP_MSG_HDR_FIRST:
|
||||
http_msg_hdr_first:
|
||||
msg->sol = ptr - buf->p;
|
||||
if (likely(!HTTP_IS_CRLF(*ptr))) {
|
||||
goto http_msg_hdr_name;
|
||||
}
|
||||
|
||||
if (likely(*ptr == '\r'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
|
||||
goto http_msg_last_lf;
|
||||
|
||||
case HTTP_MSG_HDR_NAME:
|
||||
http_msg_hdr_name:
|
||||
/* assumes msg->sol points to the first char */
|
||||
if (likely(HTTP_IS_TOKEN(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
|
||||
|
||||
if (likely(*ptr == ':'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
|
||||
|
||||
if (likely(msg->err_pos < -1) || *ptr == '\n') {
|
||||
state = HTTP_MSG_HDR_NAME;
|
||||
goto http_msg_invalid;
|
||||
}
|
||||
|
||||
if (msg->err_pos == -1) /* capture error pointer */
|
||||
msg->err_pos = ptr - buf->p; /* >= 0 now */
|
||||
|
||||
/* and we still accept this non-token character */
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
|
||||
|
||||
case HTTP_MSG_HDR_L1_SP:
|
||||
http_msg_hdr_l1_sp:
|
||||
/* assumes msg->sol points to the first char */
|
||||
if (likely(HTTP_IS_SPHT(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
|
||||
|
||||
/* header value can be basically anything except CR/LF */
|
||||
msg->sov = ptr - buf->p;
|
||||
|
||||
if (likely(!HTTP_IS_CRLF(*ptr))) {
|
||||
goto http_msg_hdr_val;
|
||||
}
|
||||
|
||||
if (likely(*ptr == '\r'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
|
||||
goto http_msg_hdr_l1_lf;
|
||||
|
||||
case HTTP_MSG_HDR_L1_LF:
|
||||
http_msg_hdr_l1_lf:
|
||||
EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
|
||||
|
||||
case HTTP_MSG_HDR_L1_LWS:
|
||||
http_msg_hdr_l1_lws:
|
||||
if (likely(HTTP_IS_SPHT(*ptr))) {
|
||||
/* replace HT,CR,LF with spaces */
|
||||
for (; buf->p + msg->sov < ptr; msg->sov++)
|
||||
buf->p[msg->sov] = ' ';
|
||||
goto http_msg_hdr_l1_sp;
|
||||
}
|
||||
/* we had a header consisting only in spaces ! */
|
||||
msg->eol = msg->sov;
|
||||
goto http_msg_complete_header;
|
||||
|
||||
case HTTP_MSG_HDR_VAL:
|
||||
http_msg_hdr_val:
|
||||
/* assumes msg->sol points to the first char, and msg->sov
|
||||
* points to the first character of the value.
|
||||
*/
|
||||
|
||||
/* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
|
||||
* and lower. In fact since most of the time is spent in the loop, we
|
||||
* also remove the sign bit test so that bytes 0x8e..0x0d break the
|
||||
* loop, but we don't care since they're very rare in header values.
|
||||
*/
|
||||
#if defined(__x86_64__)
|
||||
while (ptr <= end - sizeof(long)) {
|
||||
if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
|
||||
goto http_msg_hdr_val2;
|
||||
ptr += sizeof(long);
|
||||
}
|
||||
#endif
|
||||
#if defined(__x86_64__) || \
|
||||
defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
|
||||
defined(__ARM_ARCH_7A__)
|
||||
while (ptr <= end - sizeof(int)) {
|
||||
if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
|
||||
goto http_msg_hdr_val2;
|
||||
ptr += sizeof(int);
|
||||
}
|
||||
#endif
|
||||
if (ptr >= end) {
|
||||
state = HTTP_MSG_HDR_VAL;
|
||||
goto http_msg_ood;
|
||||
}
|
||||
http_msg_hdr_val2:
|
||||
if (likely(!HTTP_IS_CRLF(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
|
||||
|
||||
msg->eol = ptr - buf->p;
|
||||
/* Note: we could also copy eol into ->eoh so that we have the
|
||||
* real header end in case it ends with lots of LWS, but is this
|
||||
* really needed ?
|
||||
*/
|
||||
if (likely(*ptr == '\r'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
|
||||
goto http_msg_hdr_l2_lf;
|
||||
|
||||
case HTTP_MSG_HDR_L2_LF:
|
||||
http_msg_hdr_l2_lf:
|
||||
EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
|
||||
|
||||
case HTTP_MSG_HDR_L2_LWS:
|
||||
http_msg_hdr_l2_lws:
|
||||
if (unlikely(HTTP_IS_SPHT(*ptr))) {
|
||||
/* LWS: replace HT,CR,LF with spaces */
|
||||
for (; buf->p + msg->eol < ptr; msg->eol++)
|
||||
buf->p[msg->eol] = ' ';
|
||||
goto http_msg_hdr_val;
|
||||
}
|
||||
http_msg_complete_header:
|
||||
/*
|
||||
* It was a new header, so the last one is finished.
|
||||
* Assumes msg->sol points to the first char, msg->sov points
|
||||
* to the first character of the value and msg->eol to the
|
||||
* first CR or LF so we know how the line ends. We insert last
|
||||
* header into the index.
|
||||
*/
|
||||
if (unlikely(hdr_idx_add(msg->eol - msg->sol, buf->p[msg->eol] == '\r',
|
||||
idx, idx->tail) < 0)) {
|
||||
state = HTTP_MSG_HDR_L2_LWS;
|
||||
goto http_msg_invalid;
|
||||
}
|
||||
|
||||
msg->sol = ptr - buf->p;
|
||||
if (likely(!HTTP_IS_CRLF(*ptr))) {
|
||||
goto http_msg_hdr_name;
|
||||
}
|
||||
|
||||
if (likely(*ptr == '\r'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
|
||||
goto http_msg_last_lf;
|
||||
|
||||
case HTTP_MSG_LAST_LF:
|
||||
http_msg_last_lf:
|
||||
/* Assumes msg->sol points to the first of either CR or LF.
|
||||
* Sets ->sov and ->next to the total header length, ->eoh to
|
||||
* the last CRLF, and ->eol to the last CRLF length (1 or 2).
|
||||
*/
|
||||
EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
|
||||
ptr++;
|
||||
msg->sov = msg->next = ptr - buf->p;
|
||||
msg->eoh = msg->sol;
|
||||
msg->sol = 0;
|
||||
msg->eol = msg->sov - msg->eoh;
|
||||
msg->msg_state = HTTP_MSG_BODY;
|
||||
return;
|
||||
|
||||
case HTTP_MSG_ERROR:
|
||||
/* this may only happen if we call http_msg_analyser() twice with an error */
|
||||
break;
|
||||
|
||||
default:
|
||||
#ifdef DEBUG_FULL
|
||||
fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
|
||||
exit(1);
|
||||
#endif
|
||||
;
|
||||
}
|
||||
http_msg_ood:
|
||||
/* out of data */
|
||||
msg->msg_state = state;
|
||||
msg->next = ptr - buf->p;
|
||||
return;
|
||||
|
||||
http_msg_invalid:
|
||||
/* invalid message */
|
||||
msg->err_state = state;
|
||||
msg->msg_state = HTTP_MSG_ERROR;
|
||||
msg->next = ptr - buf->p;
|
||||
return;
|
||||
}
|
||||
|
||||
/* This function skips trailers in the buffer associated with HTTP message
|
||||
* <msg>. The first visited position is msg->next. If the end of the trailers is
|
||||
* found, the function returns >0. So, the caller can automatically schedul it
|
||||
|
639
src/proto_http.c
639
src/proto_http.c
@ -1233,288 +1233,6 @@ void capture_headers(char *som, struct hdr_idx *idx,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This function parses a status line between <ptr> and <end>, starting with
|
||||
* parser state <state>. Only states HTTP_MSG_RPVER, HTTP_MSG_RPVER_SP,
|
||||
* HTTP_MSG_RPCODE, HTTP_MSG_RPCODE_SP and HTTP_MSG_RPREASON are handled. Others
|
||||
* will give undefined results.
|
||||
* Note that it is upon the caller's responsibility to ensure that ptr < end,
|
||||
* and that msg->sol points to the beginning of the response.
|
||||
* If a complete line is found (which implies that at least one CR or LF is
|
||||
* found before <end>, the updated <ptr> is returned, otherwise NULL is
|
||||
* returned indicating an incomplete line (which does not mean that parts have
|
||||
* not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
|
||||
* non-NULL, they are fed with the new <ptr> and <state> values to be passed
|
||||
* upon next call.
|
||||
*
|
||||
* This function was intentionally designed to be called from
|
||||
* http_msg_analyzer() with the lowest overhead. It should integrate perfectly
|
||||
* within its state machine and use the same macros, hence the need for same
|
||||
* labels and variable names. Note that msg->sol is left unchanged.
|
||||
*/
|
||||
const char *http_parse_stsline(struct http_msg *msg,
|
||||
enum h1_state state, const char *ptr, const char *end,
|
||||
unsigned int *ret_ptr, enum h1_state *ret_state)
|
||||
{
|
||||
const char *msg_start = msg->chn->buf->p;
|
||||
|
||||
switch (state) {
|
||||
case HTTP_MSG_RPVER:
|
||||
http_msg_rpver:
|
||||
if (likely(HTTP_IS_VER_TOKEN(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, HTTP_MSG_RPVER);
|
||||
|
||||
if (likely(HTTP_IS_SPHT(*ptr))) {
|
||||
msg->sl.st.v_l = ptr - msg_start;
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
|
||||
}
|
||||
msg->err_state = HTTP_MSG_RPVER;
|
||||
state = HTTP_MSG_ERROR;
|
||||
break;
|
||||
|
||||
case HTTP_MSG_RPVER_SP:
|
||||
http_msg_rpver_sp:
|
||||
if (likely(!HTTP_IS_LWS(*ptr))) {
|
||||
msg->sl.st.c = ptr - msg_start;
|
||||
goto http_msg_rpcode;
|
||||
}
|
||||
if (likely(HTTP_IS_SPHT(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, HTTP_MSG_RPVER_SP);
|
||||
/* so it's a CR/LF, this is invalid */
|
||||
msg->err_state = HTTP_MSG_RPVER_SP;
|
||||
state = HTTP_MSG_ERROR;
|
||||
break;
|
||||
|
||||
case HTTP_MSG_RPCODE:
|
||||
http_msg_rpcode:
|
||||
if (likely(!HTTP_IS_LWS(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, HTTP_MSG_RPCODE);
|
||||
|
||||
if (likely(HTTP_IS_SPHT(*ptr))) {
|
||||
msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
|
||||
}
|
||||
|
||||
/* so it's a CR/LF, so there is no reason phrase */
|
||||
msg->sl.st.c_l = ptr - msg_start - msg->sl.st.c;
|
||||
http_msg_rsp_reason:
|
||||
/* FIXME: should we support HTTP responses without any reason phrase ? */
|
||||
msg->sl.st.r = ptr - msg_start;
|
||||
msg->sl.st.r_l = 0;
|
||||
goto http_msg_rpline_eol;
|
||||
|
||||
case HTTP_MSG_RPCODE_SP:
|
||||
http_msg_rpcode_sp:
|
||||
if (likely(!HTTP_IS_LWS(*ptr))) {
|
||||
msg->sl.st.r = ptr - msg_start;
|
||||
goto http_msg_rpreason;
|
||||
}
|
||||
if (likely(HTTP_IS_SPHT(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, HTTP_MSG_RPCODE_SP);
|
||||
/* so it's a CR/LF, so there is no reason phrase */
|
||||
goto http_msg_rsp_reason;
|
||||
|
||||
case HTTP_MSG_RPREASON:
|
||||
http_msg_rpreason:
|
||||
if (likely(!HTTP_IS_CRLF(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, HTTP_MSG_RPREASON);
|
||||
msg->sl.st.r_l = ptr - msg_start - msg->sl.st.r;
|
||||
http_msg_rpline_eol:
|
||||
/* We have seen the end of line. Note that we do not
|
||||
* necessarily have the \n yet, but at least we know that we
|
||||
* have EITHER \r OR \n, otherwise the response would not be
|
||||
* complete. We can then record the response length and return
|
||||
* to the caller which will be able to register it.
|
||||
*/
|
||||
msg->sl.st.l = ptr - msg_start - msg->sol;
|
||||
return ptr;
|
||||
|
||||
default:
|
||||
#ifdef DEBUG_FULL
|
||||
fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
|
||||
exit(1);
|
||||
#endif
|
||||
;
|
||||
}
|
||||
|
||||
http_msg_ood:
|
||||
/* out of valid data */
|
||||
if (ret_state)
|
||||
*ret_state = state;
|
||||
if (ret_ptr)
|
||||
*ret_ptr = ptr - msg_start;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function parses a request line between <ptr> and <end>, starting with
|
||||
* parser state <state>. Only states HTTP_MSG_RQMETH, HTTP_MSG_RQMETH_SP,
|
||||
* HTTP_MSG_RQURI, HTTP_MSG_RQURI_SP and HTTP_MSG_RQVER are handled. Others
|
||||
* will give undefined results.
|
||||
* Note that it is upon the caller's responsibility to ensure that ptr < end,
|
||||
* and that msg->sol points to the beginning of the request.
|
||||
* If a complete line is found (which implies that at least one CR or LF is
|
||||
* found before <end>, the updated <ptr> is returned, otherwise NULL is
|
||||
* returned indicating an incomplete line (which does not mean that parts have
|
||||
* not been updated). In the incomplete case, if <ret_ptr> or <ret_state> are
|
||||
* non-NULL, they are fed with the new <ptr> and <state> values to be passed
|
||||
* upon next call.
|
||||
*
|
||||
* This function was intentionally designed to be called from
|
||||
* http_msg_analyzer() with the lowest overhead. It should integrate perfectly
|
||||
* within its state machine and use the same macros, hence the need for same
|
||||
* labels and variable names. Note that msg->sol is left unchanged.
|
||||
*/
|
||||
const char *http_parse_reqline(struct http_msg *msg,
|
||||
enum h1_state state, const char *ptr, const char *end,
|
||||
unsigned int *ret_ptr, enum h1_state *ret_state)
|
||||
{
|
||||
const char *msg_start = msg->chn->buf->p;
|
||||
|
||||
switch (state) {
|
||||
case HTTP_MSG_RQMETH:
|
||||
http_msg_rqmeth:
|
||||
if (likely(HTTP_IS_TOKEN(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, HTTP_MSG_RQMETH);
|
||||
|
||||
if (likely(HTTP_IS_SPHT(*ptr))) {
|
||||
msg->sl.rq.m_l = ptr - msg_start;
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
|
||||
}
|
||||
|
||||
if (likely(HTTP_IS_CRLF(*ptr))) {
|
||||
/* HTTP 0.9 request */
|
||||
msg->sl.rq.m_l = ptr - msg_start;
|
||||
http_msg_req09_uri:
|
||||
msg->sl.rq.u = ptr - msg_start;
|
||||
http_msg_req09_uri_e:
|
||||
msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
|
||||
http_msg_req09_ver:
|
||||
msg->sl.rq.v = ptr - msg_start;
|
||||
msg->sl.rq.v_l = 0;
|
||||
goto http_msg_rqline_eol;
|
||||
}
|
||||
msg->err_state = HTTP_MSG_RQMETH;
|
||||
state = HTTP_MSG_ERROR;
|
||||
break;
|
||||
|
||||
case HTTP_MSG_RQMETH_SP:
|
||||
http_msg_rqmeth_sp:
|
||||
if (likely(!HTTP_IS_LWS(*ptr))) {
|
||||
msg->sl.rq.u = ptr - msg_start;
|
||||
goto http_msg_rquri;
|
||||
}
|
||||
if (likely(HTTP_IS_SPHT(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, HTTP_MSG_RQMETH_SP);
|
||||
/* so it's a CR/LF, meaning an HTTP 0.9 request */
|
||||
goto http_msg_req09_uri;
|
||||
|
||||
case HTTP_MSG_RQURI:
|
||||
http_msg_rquri:
|
||||
#if defined(__x86_64__) || \
|
||||
defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
|
||||
defined(__ARM_ARCH_7A__)
|
||||
/* speedup: skip bytes not between 0x21 and 0x7e inclusive */
|
||||
while (ptr <= end - sizeof(int)) {
|
||||
int x = *(int *)ptr - 0x21212121;
|
||||
if (x & 0x80808080)
|
||||
break;
|
||||
|
||||
x -= 0x5e5e5e5e;
|
||||
if (!(x & 0x80808080))
|
||||
break;
|
||||
|
||||
ptr += sizeof(int);
|
||||
}
|
||||
#endif
|
||||
if (ptr >= end) {
|
||||
state = HTTP_MSG_RQURI;
|
||||
goto http_msg_ood;
|
||||
}
|
||||
http_msg_rquri2:
|
||||
if (likely((unsigned char)(*ptr - 33) <= 93)) /* 33 to 126 included */
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, HTTP_MSG_RQURI);
|
||||
|
||||
if (likely(HTTP_IS_SPHT(*ptr))) {
|
||||
msg->sl.rq.u_l = ptr - msg_start - msg->sl.rq.u;
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
|
||||
}
|
||||
|
||||
if (likely((unsigned char)*ptr >= 128)) {
|
||||
/* non-ASCII chars are forbidden unless option
|
||||
* accept-invalid-http-request is enabled in the frontend.
|
||||
* In any case, we capture the faulty char.
|
||||
*/
|
||||
if (msg->err_pos < -1)
|
||||
goto invalid_char;
|
||||
if (msg->err_pos == -1)
|
||||
msg->err_pos = ptr - msg_start;
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, HTTP_MSG_RQURI);
|
||||
}
|
||||
|
||||
if (likely(HTTP_IS_CRLF(*ptr))) {
|
||||
/* so it's a CR/LF, meaning an HTTP 0.9 request */
|
||||
goto http_msg_req09_uri_e;
|
||||
}
|
||||
|
||||
/* OK forbidden chars, 0..31 or 127 */
|
||||
invalid_char:
|
||||
msg->err_pos = ptr - msg_start;
|
||||
msg->err_state = HTTP_MSG_RQURI;
|
||||
state = HTTP_MSG_ERROR;
|
||||
break;
|
||||
|
||||
case HTTP_MSG_RQURI_SP:
|
||||
http_msg_rquri_sp:
|
||||
if (likely(!HTTP_IS_LWS(*ptr))) {
|
||||
msg->sl.rq.v = ptr - msg_start;
|
||||
goto http_msg_rqver;
|
||||
}
|
||||
if (likely(HTTP_IS_SPHT(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, HTTP_MSG_RQURI_SP);
|
||||
/* so it's a CR/LF, meaning an HTTP 0.9 request */
|
||||
goto http_msg_req09_ver;
|
||||
|
||||
case HTTP_MSG_RQVER:
|
||||
http_msg_rqver:
|
||||
if (likely(HTTP_IS_VER_TOKEN(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, HTTP_MSG_RQVER);
|
||||
|
||||
if (likely(HTTP_IS_CRLF(*ptr))) {
|
||||
msg->sl.rq.v_l = ptr - msg_start - msg->sl.rq.v;
|
||||
http_msg_rqline_eol:
|
||||
/* We have seen the end of line. Note that we do not
|
||||
* necessarily have the \n yet, but at least we know that we
|
||||
* have EITHER \r OR \n, otherwise the request would not be
|
||||
* complete. We can then record the request length and return
|
||||
* to the caller which will be able to register it.
|
||||
*/
|
||||
msg->sl.rq.l = ptr - msg_start - msg->sol;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* neither an HTTP_VER token nor a CRLF */
|
||||
msg->err_state = HTTP_MSG_RQVER;
|
||||
state = HTTP_MSG_ERROR;
|
||||
break;
|
||||
|
||||
default:
|
||||
#ifdef DEBUG_FULL
|
||||
fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
|
||||
exit(1);
|
||||
#endif
|
||||
;
|
||||
}
|
||||
|
||||
http_msg_ood:
|
||||
/* out of valid data */
|
||||
if (ret_state)
|
||||
*ret_state = state;
|
||||
if (ret_ptr)
|
||||
*ret_ptr = ptr - msg_start;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the data from Authorization header. Function may be called more
|
||||
* than once so data is stored in txn->auth_data. When no header is found
|
||||
@ -1597,363 +1315,6 @@ get_http_auth(struct stream *s)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* This function parses an HTTP message, either a request or a response,
|
||||
* depending on the initial msg->msg_state. The caller is responsible for
|
||||
* ensuring that the message does not wrap. The function can be preempted
|
||||
* everywhere when data are missing and recalled at the exact same location
|
||||
* with no information loss. The message may even be realigned between two
|
||||
* calls. The header index is re-initialized when switching from
|
||||
* MSG_R[PQ]BEFORE to MSG_RPVER|MSG_RQMETH. It modifies msg->sol among other
|
||||
* fields. Note that msg->sol will be initialized after completing the first
|
||||
* state, so that none of the msg pointers has to be initialized prior to the
|
||||
* first call.
|
||||
*/
|
||||
void http_msg_analyzer(struct http_msg *msg, struct hdr_idx *idx)
|
||||
{
|
||||
enum h1_state state; /* updated only when leaving the FSM */
|
||||
register char *ptr, *end; /* request pointers, to avoid dereferences */
|
||||
struct buffer *buf;
|
||||
|
||||
state = msg->msg_state;
|
||||
buf = msg->chn->buf;
|
||||
ptr = buf->p + msg->next;
|
||||
end = buf->p + buf->i;
|
||||
|
||||
if (unlikely(ptr >= end))
|
||||
goto http_msg_ood;
|
||||
|
||||
switch (state) {
|
||||
/*
|
||||
* First, states that are specific to the response only.
|
||||
* We check them first so that request and headers are
|
||||
* closer to each other (accessed more often).
|
||||
*/
|
||||
case HTTP_MSG_RPBEFORE:
|
||||
http_msg_rpbefore:
|
||||
if (likely(HTTP_IS_TOKEN(*ptr))) {
|
||||
/* we have a start of message, but we have to check
|
||||
* first if we need to remove some CRLF. We can only
|
||||
* do this when o=0.
|
||||
*/
|
||||
if (unlikely(ptr != buf->p)) {
|
||||
if (buf->o)
|
||||
goto http_msg_ood;
|
||||
/* Remove empty leading lines, as recommended by RFC2616. */
|
||||
bi_fast_delete(buf, ptr - buf->p);
|
||||
}
|
||||
msg->sol = 0;
|
||||
msg->sl.st.l = 0; /* used in debug mode */
|
||||
hdr_idx_init(idx);
|
||||
state = HTTP_MSG_RPVER;
|
||||
goto http_msg_rpver;
|
||||
}
|
||||
|
||||
if (unlikely(!HTTP_IS_CRLF(*ptr))) {
|
||||
state = HTTP_MSG_RPBEFORE;
|
||||
goto http_msg_invalid;
|
||||
}
|
||||
|
||||
if (unlikely(*ptr == '\n'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, HTTP_MSG_RPBEFORE_CR);
|
||||
/* stop here */
|
||||
|
||||
case HTTP_MSG_RPBEFORE_CR:
|
||||
http_msg_rpbefore_cr:
|
||||
EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPBEFORE_CR);
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, HTTP_MSG_RPBEFORE);
|
||||
/* stop here */
|
||||
|
||||
case HTTP_MSG_RPVER:
|
||||
http_msg_rpver:
|
||||
case HTTP_MSG_RPVER_SP:
|
||||
case HTTP_MSG_RPCODE:
|
||||
case HTTP_MSG_RPCODE_SP:
|
||||
case HTTP_MSG_RPREASON:
|
||||
ptr = (char *)http_parse_stsline(msg,
|
||||
state, ptr, end,
|
||||
&msg->next, &msg->msg_state);
|
||||
if (unlikely(!ptr))
|
||||
return;
|
||||
|
||||
/* we have a full response and we know that we have either a CR
|
||||
* or an LF at <ptr>.
|
||||
*/
|
||||
hdr_idx_set_start(idx, msg->sl.st.l, *ptr == '\r');
|
||||
|
||||
msg->sol = ptr - buf->p;
|
||||
if (likely(*ptr == '\r'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, HTTP_MSG_RPLINE_END);
|
||||
goto http_msg_rpline_end;
|
||||
|
||||
case HTTP_MSG_RPLINE_END:
|
||||
http_msg_rpline_end:
|
||||
/* msg->sol must point to the first of CR or LF. */
|
||||
EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RPLINE_END);
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
|
||||
/* stop here */
|
||||
|
||||
/*
|
||||
* Second, states that are specific to the request only
|
||||
*/
|
||||
case HTTP_MSG_RQBEFORE:
|
||||
http_msg_rqbefore:
|
||||
if (likely(HTTP_IS_TOKEN(*ptr))) {
|
||||
/* we have a start of message, but we have to check
|
||||
* first if we need to remove some CRLF. We can only
|
||||
* do this when o=0.
|
||||
*/
|
||||
if (likely(ptr != buf->p)) {
|
||||
if (buf->o)
|
||||
goto http_msg_ood;
|
||||
/* Remove empty leading lines, as recommended by RFC2616. */
|
||||
bi_fast_delete(buf, ptr - buf->p);
|
||||
}
|
||||
msg->sol = 0;
|
||||
msg->sl.rq.l = 0; /* used in debug mode */
|
||||
state = HTTP_MSG_RQMETH;
|
||||
goto http_msg_rqmeth;
|
||||
}
|
||||
|
||||
if (unlikely(!HTTP_IS_CRLF(*ptr))) {
|
||||
state = HTTP_MSG_RQBEFORE;
|
||||
goto http_msg_invalid;
|
||||
}
|
||||
|
||||
if (unlikely(*ptr == '\n'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, HTTP_MSG_RQBEFORE_CR);
|
||||
/* stop here */
|
||||
|
||||
case HTTP_MSG_RQBEFORE_CR:
|
||||
http_msg_rqbefore_cr:
|
||||
EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQBEFORE_CR);
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, HTTP_MSG_RQBEFORE);
|
||||
/* stop here */
|
||||
|
||||
case HTTP_MSG_RQMETH:
|
||||
http_msg_rqmeth:
|
||||
case HTTP_MSG_RQMETH_SP:
|
||||
case HTTP_MSG_RQURI:
|
||||
case HTTP_MSG_RQURI_SP:
|
||||
case HTTP_MSG_RQVER:
|
||||
ptr = (char *)http_parse_reqline(msg,
|
||||
state, ptr, end,
|
||||
&msg->next, &msg->msg_state);
|
||||
if (unlikely(!ptr))
|
||||
return;
|
||||
|
||||
/* we have a full request and we know that we have either a CR
|
||||
* or an LF at <ptr>.
|
||||
*/
|
||||
hdr_idx_set_start(idx, msg->sl.rq.l, *ptr == '\r');
|
||||
|
||||
msg->sol = ptr - buf->p;
|
||||
if (likely(*ptr == '\r'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, HTTP_MSG_RQLINE_END);
|
||||
goto http_msg_rqline_end;
|
||||
|
||||
case HTTP_MSG_RQLINE_END:
|
||||
http_msg_rqline_end:
|
||||
/* check for HTTP/0.9 request : no version information available.
|
||||
* msg->sol must point to the first of CR or LF.
|
||||
*/
|
||||
if (unlikely(msg->sl.rq.v_l == 0))
|
||||
goto http_msg_last_lf;
|
||||
|
||||
EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_RQLINE_END);
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, HTTP_MSG_HDR_FIRST);
|
||||
/* stop here */
|
||||
|
||||
/*
|
||||
* Common states below
|
||||
*/
|
||||
case HTTP_MSG_HDR_FIRST:
|
||||
http_msg_hdr_first:
|
||||
msg->sol = ptr - buf->p;
|
||||
if (likely(!HTTP_IS_CRLF(*ptr))) {
|
||||
goto http_msg_hdr_name;
|
||||
}
|
||||
|
||||
if (likely(*ptr == '\r'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
|
||||
goto http_msg_last_lf;
|
||||
|
||||
case HTTP_MSG_HDR_NAME:
|
||||
http_msg_hdr_name:
|
||||
/* assumes msg->sol points to the first char */
|
||||
if (likely(HTTP_IS_TOKEN(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
|
||||
|
||||
if (likely(*ptr == ':'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
|
||||
|
||||
if (likely(msg->err_pos < -1) || *ptr == '\n') {
|
||||
state = HTTP_MSG_HDR_NAME;
|
||||
goto http_msg_invalid;
|
||||
}
|
||||
|
||||
if (msg->err_pos == -1) /* capture error pointer */
|
||||
msg->err_pos = ptr - buf->p; /* >= 0 now */
|
||||
|
||||
/* and we still accept this non-token character */
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, HTTP_MSG_HDR_NAME);
|
||||
|
||||
case HTTP_MSG_HDR_L1_SP:
|
||||
http_msg_hdr_l1_sp:
|
||||
/* assumes msg->sol points to the first char */
|
||||
if (likely(HTTP_IS_SPHT(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, HTTP_MSG_HDR_L1_SP);
|
||||
|
||||
/* header value can be basically anything except CR/LF */
|
||||
msg->sov = ptr - buf->p;
|
||||
|
||||
if (likely(!HTTP_IS_CRLF(*ptr))) {
|
||||
goto http_msg_hdr_val;
|
||||
}
|
||||
|
||||
if (likely(*ptr == '\r'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, HTTP_MSG_HDR_L1_LF);
|
||||
goto http_msg_hdr_l1_lf;
|
||||
|
||||
case HTTP_MSG_HDR_L1_LF:
|
||||
http_msg_hdr_l1_lf:
|
||||
EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L1_LF);
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, HTTP_MSG_HDR_L1_LWS);
|
||||
|
||||
case HTTP_MSG_HDR_L1_LWS:
|
||||
http_msg_hdr_l1_lws:
|
||||
if (likely(HTTP_IS_SPHT(*ptr))) {
|
||||
/* replace HT,CR,LF with spaces */
|
||||
for (; buf->p + msg->sov < ptr; msg->sov++)
|
||||
buf->p[msg->sov] = ' ';
|
||||
goto http_msg_hdr_l1_sp;
|
||||
}
|
||||
/* we had a header consisting only in spaces ! */
|
||||
msg->eol = msg->sov;
|
||||
goto http_msg_complete_header;
|
||||
|
||||
case HTTP_MSG_HDR_VAL:
|
||||
http_msg_hdr_val:
|
||||
/* assumes msg->sol points to the first char, and msg->sov
|
||||
* points to the first character of the value.
|
||||
*/
|
||||
|
||||
/* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
|
||||
* and lower. In fact since most of the time is spent in the loop, we
|
||||
* also remove the sign bit test so that bytes 0x8e..0x0d break the
|
||||
* loop, but we don't care since they're very rare in header values.
|
||||
*/
|
||||
#if defined(__x86_64__)
|
||||
while (ptr <= end - sizeof(long)) {
|
||||
if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
|
||||
goto http_msg_hdr_val2;
|
||||
ptr += sizeof(long);
|
||||
}
|
||||
#endif
|
||||
#if defined(__x86_64__) || \
|
||||
defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || \
|
||||
defined(__ARM_ARCH_7A__)
|
||||
while (ptr <= end - sizeof(int)) {
|
||||
if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
|
||||
goto http_msg_hdr_val2;
|
||||
ptr += sizeof(int);
|
||||
}
|
||||
#endif
|
||||
if (ptr >= end) {
|
||||
state = HTTP_MSG_HDR_VAL;
|
||||
goto http_msg_ood;
|
||||
}
|
||||
http_msg_hdr_val2:
|
||||
if (likely(!HTTP_IS_CRLF(*ptr)))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, HTTP_MSG_HDR_VAL);
|
||||
|
||||
msg->eol = ptr - buf->p;
|
||||
/* Note: we could also copy eol into ->eoh so that we have the
|
||||
* real header end in case it ends with lots of LWS, but is this
|
||||
* really needed ?
|
||||
*/
|
||||
if (likely(*ptr == '\r'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, HTTP_MSG_HDR_L2_LF);
|
||||
goto http_msg_hdr_l2_lf;
|
||||
|
||||
case HTTP_MSG_HDR_L2_LF:
|
||||
http_msg_hdr_l2_lf:
|
||||
EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_HDR_L2_LF);
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, HTTP_MSG_HDR_L2_LWS);
|
||||
|
||||
case HTTP_MSG_HDR_L2_LWS:
|
||||
http_msg_hdr_l2_lws:
|
||||
if (unlikely(HTTP_IS_SPHT(*ptr))) {
|
||||
/* LWS: replace HT,CR,LF with spaces */
|
||||
for (; buf->p + msg->eol < ptr; msg->eol++)
|
||||
buf->p[msg->eol] = ' ';
|
||||
goto http_msg_hdr_val;
|
||||
}
|
||||
http_msg_complete_header:
|
||||
/*
|
||||
* It was a new header, so the last one is finished.
|
||||
* Assumes msg->sol points to the first char, msg->sov points
|
||||
* to the first character of the value and msg->eol to the
|
||||
* first CR or LF so we know how the line ends. We insert last
|
||||
* header into the index.
|
||||
*/
|
||||
if (unlikely(hdr_idx_add(msg->eol - msg->sol, buf->p[msg->eol] == '\r',
|
||||
idx, idx->tail) < 0)) {
|
||||
state = HTTP_MSG_HDR_L2_LWS;
|
||||
goto http_msg_invalid;
|
||||
}
|
||||
|
||||
msg->sol = ptr - buf->p;
|
||||
if (likely(!HTTP_IS_CRLF(*ptr))) {
|
||||
goto http_msg_hdr_name;
|
||||
}
|
||||
|
||||
if (likely(*ptr == '\r'))
|
||||
EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, HTTP_MSG_LAST_LF);
|
||||
goto http_msg_last_lf;
|
||||
|
||||
case HTTP_MSG_LAST_LF:
|
||||
http_msg_last_lf:
|
||||
/* Assumes msg->sol points to the first of either CR or LF.
|
||||
* Sets ->sov and ->next to the total header length, ->eoh to
|
||||
* the last CRLF, and ->eol to the last CRLF length (1 or 2).
|
||||
*/
|
||||
EXPECT_LF_HERE(ptr, http_msg_invalid, state, HTTP_MSG_LAST_LF);
|
||||
ptr++;
|
||||
msg->sov = msg->next = ptr - buf->p;
|
||||
msg->eoh = msg->sol;
|
||||
msg->sol = 0;
|
||||
msg->eol = msg->sov - msg->eoh;
|
||||
msg->msg_state = HTTP_MSG_BODY;
|
||||
return;
|
||||
|
||||
case HTTP_MSG_ERROR:
|
||||
/* this may only happen if we call http_msg_analyser() twice with an error */
|
||||
break;
|
||||
|
||||
default:
|
||||
#ifdef DEBUG_FULL
|
||||
fprintf(stderr, "FIXME !!!! impossible state at %s:%d = %d\n", __FILE__, __LINE__, state);
|
||||
exit(1);
|
||||
#endif
|
||||
;
|
||||
}
|
||||
http_msg_ood:
|
||||
/* out of data */
|
||||
msg->msg_state = state;
|
||||
msg->next = ptr - buf->p;
|
||||
return;
|
||||
|
||||
http_msg_invalid:
|
||||
/* invalid message */
|
||||
msg->err_state = state;
|
||||
msg->msg_state = HTTP_MSG_ERROR;
|
||||
msg->next = ptr - buf->p;
|
||||
return;
|
||||
}
|
||||
|
||||
/* convert an HTTP/0.9 request into an HTTP/1.0 request. Returns 1 if the
|
||||
* conversion succeeded, 0 in case of error. If the request was already 1.X,
|
||||
* nothing is done and 1 is returned.
|
||||
|
Loading…
Reference in New Issue
Block a user