From a9e1667b2b5e554f8d8e15b05e49d630b6040066 Mon Sep 17 00:00:00 2001 From: Dridi Boukelmoune Date: Thu, 17 Jul 2025 18:53:26 +0200 Subject: [PATCH 01/39] vtc: White space cleanup in t02027 No diff with the --ignore-all-space option. --- bin/varnishtest/tests/t02027.vtc | 38 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/bin/varnishtest/tests/t02027.vtc b/bin/varnishtest/tests/t02027.vtc index 5bc7b48160..a27976f3b4 100644 --- a/bin/varnishtest/tests/t02027.vtc +++ b/bin/varnishtest/tests/t02027.vtc @@ -22,11 +22,11 @@ logexpect l0 -v v1 -g vxid -q "Begin ~ sess" { client c0 { txpri shutdown -write - stream 0 { + stream 0 { rxsettings - rxgoaway - expect goaway.laststream == 0 - expect goaway.err == NO_ERROR + rxgoaway + expect goaway.laststream == 0 + expect goaway.err == NO_ERROR } -run } -run @@ -46,10 +46,10 @@ client c1 { txreq -nohdrend } -run shutdown -write - stream 0 { - rxgoaway - expect goaway.laststream == 1 - expect goaway.err == NO_ERROR + stream 0 { + rxgoaway + expect goaway.laststream == 1 + expect goaway.err == NO_ERROR } -run } -run @@ -69,10 +69,10 @@ client c2 { txreq -nostrend } -run shutdown -write - stream 0 { - rxgoaway - expect goaway.laststream == 1 - expect goaway.err == NO_ERROR + stream 0 { + rxgoaway + expect goaway.laststream == 1 + expect goaway.err == NO_ERROR } -run } -run @@ -89,10 +89,10 @@ logexpect l3 -v v1 -g vxid -q "Begin ~ sess" { # middle of frame client c3 { stream 1 { - # +- 01 END_STREAM + # +- 01 END_STREAM # +- 04 END_HEADERS - # | - # len ty fl strmid + # | + # len ty fl strmid sendhex { 000024 01 05 00000001 00053a70617468012f00073a6d6574686f640347455400073a736368656d6504687474 @@ -101,10 +101,10 @@ client c3 { # 00053a70617468012f00073a6d6574686f640347455400073a736368656d650468747470 } -run shutdown -write - stream 0 { - rxgoaway - expect goaway.laststream == 0 - expect goaway.err == NO_ERROR + stream 0 { + rxgoaway + expect goaway.laststream == 0 + expect goaway.err == NO_ERROR } -run } -run From 6e330568662af5f7fd0dbdc669be9baefdbcaf13 Mon Sep 17 00:00:00 2001 From: Dridi Boukelmoune Date: Fri, 18 Jul 2025 10:49:09 +0200 Subject: [PATCH 02/39] vtc: Stabilize r2387 The connection may be closed before either stream 1 or 3 is done sending a CONTINUATION frame. --- bin/varnishtest/tests/r02387.vtc | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/bin/varnishtest/tests/r02387.vtc b/bin/varnishtest/tests/r02387.vtc index d2c9796e71..3d9dab7f45 100644 --- a/bin/varnishtest/tests/r02387.vtc +++ b/bin/varnishtest/tests/r02387.vtc @@ -11,8 +11,8 @@ varnish v1 -cliok "param.set feature +http2" varnish v1 -cliok "param.set debug +syncvsl" -barrier b1 cond 2 -barrier b2 cond 2 +barrier b1 cond 3 +barrier b2 cond 3 client c1 { stream 1 { @@ -27,7 +27,16 @@ client c1 { barrier b1 sync txcont -hdr "bar" "foo" - } -run + } -start + + barrier b2 sync + non_fatal + barrier b1 sync + + stream 1 -wait + stream 3 -wait + fatal + stream 0 { rxgoaway expect goaway.laststream == "1" From 541ecb12d2f383e3dcac4c5c719ab3975a53f3fd Mon Sep 17 00:00:00 2001 From: Dridi Boukelmoune Date: Fri, 13 Jun 2025 11:54:16 +0200 Subject: [PATCH 03/39] vefd: Thin emulation layer over eventfd This is a minimal API not attempting to keep track of the eventfd counter, and only caring about threads signalling progress on one end, and another thread being able to notice that progress can be made. This is an alternative to condvars when waiting for progress will content with polling concurrently waiting for progress elsewhere. Since locks/condvars don't compose well with file descriptors, the VEFD API offers an alternative that can integrate an existing poll on other file descriptors. --- include/Makefile.am | 1 + include/vefd.h | 50 +++++++++++++++++ lib/libvarnish/Makefile.am | 1 + lib/libvarnish/vefd.c | 109 +++++++++++++++++++++++++++++++++++++ 4 files changed, 161 insertions(+) create mode 100644 include/vefd.h create mode 100644 lib/libvarnish/vefd.c diff --git a/include/Makefile.am b/include/Makefile.am index 347724d70a..a44b32ccbd 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -95,6 +95,7 @@ nobase_noinst_HEADERS = \ vcs_version.h \ vct.h \ vcurses.h \ + vefd.h \ venc.h \ vend.h \ vev.h \ diff --git a/include/vefd.h b/include/vefd.h new file mode 100644 index 0000000000..5f41bada1e --- /dev/null +++ b/include/vefd.h @@ -0,0 +1,50 @@ +/*- + * Copyright (c) 2025 Varnish Software AS + * All rights reserved. + * + * Author: Dridi Boukelmoune + * + * SPDX-License-Identifier: BSD-2-Clause + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +struct vefd { + unsigned magic; +#define VEFD_MAGIC 0x1548c1a6 + int poll_fd; + int priv_fd; +}; + +#define VEFD_INIT(vefd) \ + do { \ + INIT_OBJ(vefd, VEFD_MAGIC); \ + (vefd)->poll_fd = -1; \ + (vefd)->priv_fd = -1; \ + } while (0) + +int VEFD_Open(struct vefd *); +int VEFD_Signal(struct vefd *); +int VEFD_Clear(struct vefd *); +int VEFD_Close(struct vefd *); diff --git a/lib/libvarnish/Makefile.am b/lib/libvarnish/Makefile.am index 2210911107..1e7e984b8b 100644 --- a/lib/libvarnish/Makefile.am +++ b/lib/libvarnish/Makefile.am @@ -23,6 +23,7 @@ libvarnish_la_SOURCES = \ vcli_serve.c \ vct.c \ venc.c \ + vefd.c \ version.c \ vev.c \ vfil.c \ diff --git a/lib/libvarnish/vefd.c b/lib/libvarnish/vefd.c new file mode 100644 index 0000000000..05860e1004 --- /dev/null +++ b/lib/libvarnish/vefd.c @@ -0,0 +1,109 @@ +/*- + * Copyright (c) 2025 Varnish Software AS + * All rights reserved. + * + * Author: Dridi Boukelmoune + * + * SPDX-License-Identifier: BSD-2-Clause + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include "config.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +int +VEFD_Open(struct vefd *vefd) +{ + int fd[2]; + + CHECK_OBJ_NOTNULL(vefd, VEFD_MAGIC); + assert(vefd->poll_fd == -1); + assert(vefd->priv_fd == -1); + + if (pipe(fd) < 0) + return (-1); + + AZ(fcntl(fd[0], F_SETFL, O_CLOEXEC|O_NONBLOCK)); + AZ(fcntl(fd[1], F_SETFL, O_CLOEXEC|O_NONBLOCK)); + vefd->poll_fd = fd[0]; + vefd->priv_fd = fd[1]; + return (0); +} + +int +VEFD_Signal(struct vefd *vefd) +{ + ssize_t r; + + CHECK_OBJ_NOTNULL(vefd, VEFD_MAGIC); + assert(vefd->poll_fd >= 0); + assert(vefd->priv_fd >= 0); + assert(vefd->poll_fd != vefd->priv_fd); + r = write(vefd->priv_fd, "", 1); + if (r < 0 && errno != EAGAIN && errno != EWOULDBLOCK) + return (-1); + return (0); +} + +int +VEFD_Clear(struct vefd *vefd) +{ + char buf[64]; + ssize_t r; + + CHECK_OBJ_NOTNULL(vefd, VEFD_MAGIC); + assert(vefd->poll_fd >= 0); + assert(vefd->priv_fd >= 0); + assert(vefd->poll_fd != vefd->priv_fd); + do { + r = read(vefd->poll_fd, buf, sizeof buf); + } while (r > 0); + if (errno != EAGAIN && errno != EWOULDBLOCK) + return (-1); + return (0); +} + +int +VEFD_Close(struct vefd *vefd) +{ + + CHECK_OBJ_NOTNULL(vefd, VEFD_MAGIC); + assert(vefd->poll_fd >= 0); + assert(vefd->priv_fd >= 0); + assert(vefd->poll_fd != vefd->priv_fd); + closefd(&vefd->poll_fd); + closefd(&vefd->priv_fd); + return (0); +} From 4c0b6f54b63a95fc6f74134f3dfcc28726897e49 Mon Sep 17 00:00:00 2001 From: Dridi Boukelmoune Date: Fri, 13 Jun 2025 12:05:17 +0200 Subject: [PATCH 04/39] build: Detect eventfd() at configure time --- configure.ac | 2 ++ 1 file changed, 2 insertions(+) diff --git a/configure.ac b/configure.ac index 2483dfeac2..4ff0d3eb65 100644 --- a/configure.ac +++ b/configure.ac @@ -483,6 +483,8 @@ else ac_cv_func_port_create=no fi +AC_CHECK_FUNCS([eventfd]) + # --with-persistent-storage AC_ARG_WITH(persistent-storage, AS_HELP_STRING([--with-persistent-storage], From 845dfc532291ba03a15ecfc25b96d0df67e56fb6 Mon Sep 17 00:00:00 2001 From: Dridi Boukelmoune Date: Fri, 13 Jun 2025 12:15:22 +0200 Subject: [PATCH 05/39] vefd: Use eventfd() when available --- lib/libvarnish/vefd.c | 56 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/lib/libvarnish/vefd.c b/lib/libvarnish/vefd.c index 05860e1004..8be1c70311 100644 --- a/lib/libvarnish/vefd.c +++ b/lib/libvarnish/vefd.c @@ -32,8 +32,13 @@ #include "config.h" +#if HAVE_EVENTFD +# include +#else +# include +#endif + #include -#include #include #include #include @@ -43,6 +48,54 @@ #include #include +#if HAVE_EVENTFD +int +VEFD_Open(struct vefd *vefd) +{ + + CHECK_OBJ_NOTNULL(vefd, VEFD_MAGIC); + assert(vefd->poll_fd == -1); + assert(vefd->priv_fd == -1); + + vefd->poll_fd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + return (vefd->poll_fd); +} + +int +VEFD_Signal(struct vefd *vefd) +{ + int64_t buf = 1; + + CHECK_OBJ_NOTNULL(vefd, VEFD_MAGIC); + assert(vefd->poll_fd >= 0); + assert(vefd->priv_fd == -1); + assert(write(vefd->poll_fd, &buf, sizeof buf) == sizeof buf); + return (0); +} + +int +VEFD_Clear(struct vefd *vefd) +{ + int64_t buf; + + CHECK_OBJ_NOTNULL(vefd, VEFD_MAGIC); + assert(vefd->poll_fd >= 0); + assert(vefd->priv_fd == -1); + assert(read(vefd->poll_fd, &buf, sizeof buf) == sizeof buf); + return (0); +} + +int +VEFD_Close(struct vefd *vefd) +{ + + CHECK_OBJ_NOTNULL(vefd, VEFD_MAGIC); + assert(vefd->poll_fd >= 0); + assert(vefd->priv_fd == -1); + closefd(&vefd->poll_fd); + return (0); +} +#else /* !HAVE_EVENTFD */ int VEFD_Open(struct vefd *vefd) { @@ -107,3 +160,4 @@ VEFD_Close(struct vefd *vefd) closefd(&vefd->priv_fd); return (0); } +#endif /* HAVE_EVENTFD */ From bd12dce986c847b61bdfc76c53d86023d5e49e59 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Mon, 3 Mar 2025 11:53:14 +0100 Subject: [PATCH 06/39] http2_session: Always pass a req to h2_init_sess() The caller is responsible for passing a req for stream 0 depending on a prior knowledge setup of the session or an opportunistic upgrade from an HTTP/1 session. The signature reflects that h2_sess takes ownership of req. --- bin/varnishd/http2/cache_http2_session.c | 37 +++++++++++++++++------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/bin/varnishd/http2/cache_http2_session.c b/bin/varnishd/http2/cache_http2_session.c index 46b02dc094..33012dfa64 100644 --- a/bin/varnishd/http2/cache_http2_session.c +++ b/bin/varnishd/http2/cache_http2_session.c @@ -120,20 +120,20 @@ H2S_Lock_VSLb(const struct h2_sess *h2, enum VSL_tag_e tag, const char *fmt, ... */ static struct h2_sess * -h2_init_sess(struct sess *sp, - struct h2_sess *h2s, struct req *srq, struct h2h_decode *decode) +h2_init_sess(struct sess *sp, struct h2_sess *h2s, struct req **psrq, + struct h2h_decode *decode) { + struct req *srq; uintptr_t *up; struct h2_sess *h2; + TAKE_OBJ_NOTNULL(srq, psrq, REQ_MAGIC); + /* proto_priv session attribute will always have been set up by H1 * before reaching here. */ AZ(SES_Get_proto_priv(sp, &up)); assert(*up == 0); - if (srq == NULL) - srq = Req_New(sp, NULL); - AN(srq); h2 = h2s; AN(h2); INIT_OBJ(h2, H2_SESS_MAGIC); @@ -366,12 +366,13 @@ H2_OU_Sess(struct worker *wrk, struct sess *sp, struct req *req) static void v_matchproto_(task_func_t) h2_new_session(struct worker *wrk, void *arg) { - struct req *req; + struct req *req, *srq = NULL; struct sess *sp; struct h2_sess h2s; struct h2_sess *h2; struct h2_req *r2, *r22; int again; + uint16_t marker; uint8_t settings[48]; struct h2h_decode decode; size_t l; @@ -386,10 +387,26 @@ h2_new_session(struct worker *wrk, void *arg) assert(req->transport == &HTTP2_transport); - assert (req->err_code == H2_PU_MARKER || req->err_code == H2_OU_MARKER); + marker = req->err_code; + assert(marker == H2_PU_MARKER || marker == H2_OU_MARKER); + req->err_code = 0; + + if (marker == H2_PU_MARKER) { + /* Prior knowledge. The incoming req does not hold + * anything of value and can be repurposed as the session + * req (srq). */ + srq = req; + req = NULL; + } else { + /* Opportunistic upgrade. The incoming req holds the first + * stream H/1 received request. We will need a fresh req + * for srq. */ + srq = Req_New(sp, NULL); + } + CHECK_OBJ_NOTNULL(srq, REQ_MAGIC); - h2 = h2_init_sess(sp, &h2s, - req->err_code == H2_PU_MARKER ? req : NULL, &decode); + h2 = h2_init_sess(sp, &h2s, &srq, &decode); + AZ(srq); h2->req0 = h2_new_req(h2, 0, NULL); AZ(h2->htc->priv); h2->htc->priv = h2; @@ -397,7 +414,7 @@ h2_new_session(struct worker *wrk, void *arg) AZ(wrk->vsl); wrk->vsl = h2->vsl; - if (req->err_code == H2_OU_MARKER && !h2_ou_session(wrk, h2, req)) { + if (marker == H2_OU_MARKER && !h2_ou_session(wrk, h2, req)) { assert(h2->refcnt == 1); h2_del_req(wrk, h2->req0); h2_del_sess(wrk, h2, SC_RX_JUNK); From 04fbc504c264241378c189005dcb0bf95a18aa12 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Mon, 3 Mar 2025 13:19:06 +0100 Subject: [PATCH 07/39] http2_proto: Take immediate ownership of req in h2_req The signature of h2_new_req() changed to reflect that. --- bin/varnishd/http2/cache_http2.h | 2 +- bin/varnishd/http2/cache_http2_proto.c | 10 +++++++--- bin/varnishd/http2/cache_http2_session.c | 23 +++++++++++++---------- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/bin/varnishd/http2/cache_http2.h b/bin/varnishd/http2/cache_http2.h index ba036b84d6..faacd9fb99 100644 --- a/bin/varnishd/http2/cache_http2.h +++ b/bin/varnishd/http2/cache_http2.h @@ -264,7 +264,7 @@ void H2_Send(struct worker *, struct h2_req *, h2_frame type, uint8_t flags, uint32_t len, const void *, uint64_t *acct); /* cache_http2_proto.c */ -struct h2_req * h2_new_req(struct h2_sess *, unsigned stream, struct req *); +struct h2_req * h2_new_req(struct h2_sess *, unsigned stream, struct req **); h2_error h2_stream_tmo(struct h2_sess *, const struct h2_req *, vtim_real); void h2_del_req(struct worker *, struct h2_req *); void h2_kill_req(struct worker *, struct h2_sess *, struct h2_req *, h2_error); diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index 254275ebd3..b2c8077fc0 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -144,14 +144,18 @@ h2_connectionerror(uint32_t u) /**********************************************************************/ struct h2_req * -h2_new_req(struct h2_sess *h2, unsigned stream, struct req *req) +h2_new_req(struct h2_sess *h2, unsigned stream, struct req **preq) { + struct req *req; struct h2_req *r2; ASSERT_RXTHR(h2); - if (req == NULL) + if (preq != NULL) + TAKE_OBJ_NOTNULL(req, preq, REQ_MAGIC); + else { req = Req_New(h2->sess, NULL); - CHECK_OBJ_NOTNULL(req, REQ_MAGIC); + CHECK_OBJ_NOTNULL(req, REQ_MAGIC); + } r2 = WS_Alloc(req->ws, sizeof *r2); AN(r2); diff --git a/bin/varnishd/http2/cache_http2_session.c b/bin/varnishd/http2/cache_http2_session.c index 33012dfa64..0e54f26fcf 100644 --- a/bin/varnishd/http2/cache_http2_session.c +++ b/bin/varnishd/http2/cache_http2_session.c @@ -277,12 +277,15 @@ h2_ou_rel(struct worker *wrk, struct req *req) static int h2_ou_session(struct worker *wrk, struct h2_sess *h2, - struct req *req) + struct req **preq) { + struct req *req; ssize_t sz; enum htc_status_e hs; struct h2_req *r2; + TAKE_OBJ_NOTNULL(req, preq, REQ_MAGIC); + if (h2_b64url_settings(h2, req)) { VSLb(h2->vsl, SLT_Debug, "H2: Bad HTTP-Settings"); return (h2_ou_rel(wrk, req)); @@ -310,17 +313,17 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, HTC_RxInit(h2->htc, h2->ws); /* Start req thread */ - r2 = h2_new_req(h2, 1, req); + r2 = h2_new_req(h2, 1, &req); + AZ(req); AZ(h2->highest_stream); h2->highest_stream = r2->stream; - req->transport = &HTTP2_transport; - assert(req->req_step == R_STP_TRANSPORT); - req->task->func = h2_do_req; - req->task->priv = req; + r2->req->transport = &HTTP2_transport; + assert(r2->req->req_step == R_STP_TRANSPORT); + r2->req->task->func = h2_do_req; + r2->req->task->priv = r2->req; r2->scheduled = 1; r2->state = H2_S_CLOS_REM; // rfc7540,l,489,491 - req->err_code = 0; - http_SetH(req->http, HTTP_HDR_PROTO, "HTTP/2.0"); + http_SetH(r2->req->http, HTTP_HDR_PROTO, "HTTP/2.0"); /* Wait for PRISM response */ hs = HTC_RxStuff(h2->htc, H2_prism_complete, @@ -332,7 +335,7 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, h2_del_req(wrk, r2); return (0); } - if (Pool_Task(wrk->pool, req->task, TASK_QUEUE_REQ)) { + if (Pool_Task(wrk->pool, r2->req->task, TASK_QUEUE_REQ)) { r2->scheduled = 0; h2_del_req(wrk, r2); VSLb(h2->vsl, SLT_Debug, "H2: No Worker-threads"); @@ -414,7 +417,7 @@ h2_new_session(struct worker *wrk, void *arg) AZ(wrk->vsl); wrk->vsl = h2->vsl; - if (marker == H2_OU_MARKER && !h2_ou_session(wrk, h2, req)) { + if (marker == H2_OU_MARKER && !h2_ou_session(wrk, h2, &req)) { assert(h2->refcnt == 1); h2_del_req(wrk, h2->req0); h2_del_sess(wrk, h2, SC_RX_JUNK); From e37c41a0bc418dac3c8df660bc436b5152b921b9 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Wed, 9 Apr 2025 18:32:00 +0200 Subject: [PATCH 08/39] http2_session: Remove misleading return value The h2_ou_rel() function is fail-safe, and passing a phony return value further down is misleading. --- bin/varnishd/http2/cache_http2_session.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/bin/varnishd/http2/cache_http2_session.c b/bin/varnishd/http2/cache_http2_session.c index 0e54f26fcf..5dee8c02bc 100644 --- a/bin/varnishd/http2/cache_http2_session.c +++ b/bin/varnishd/http2/cache_http2_session.c @@ -264,7 +264,7 @@ h2_b64url_settings(struct h2_sess *h2, struct req *req) /**********************************************************************/ -static int +static void h2_ou_rel(struct worker *wrk, struct req *req) { CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); @@ -272,7 +272,6 @@ h2_ou_rel(struct worker *wrk, struct req *req) AZ(req->vcl); Req_AcctLogCharge(wrk->stats, req); Req_Release(req); - return (0); } static int @@ -288,7 +287,8 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, if (h2_b64url_settings(h2, req)) { VSLb(h2->vsl, SLT_Debug, "H2: Bad HTTP-Settings"); - return (h2_ou_rel(wrk, req)); + h2_ou_rel(wrk, req); + return (0); } sz = write(h2->sess->fd, h2_resp_101, strlen(h2_resp_101)); @@ -296,7 +296,8 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, if (sz != strlen(h2_resp_101)) { VSLb(h2->vsl, SLT_Debug, "H2: Upgrade: Error writing 101" " response: %s\n", VAS_errtxt(errno)); - return (h2_ou_rel(wrk, req)); + h2_ou_rel(wrk, req); + return (0); } http_Unset(req->http, H_Upgrade); From b0c75fd9e992f336b973e2286e6b4e0f6ff06bd5 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Wed, 9 Apr 2025 18:35:26 +0200 Subject: [PATCH 09/39] http2_session: Rename h2_ou_rel() to h2_ou_rel_req() This clarifies what is being released without looking at the signature. --- bin/varnishd/http2/cache_http2_session.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/varnishd/http2/cache_http2_session.c b/bin/varnishd/http2/cache_http2_session.c index 5dee8c02bc..05410ebc44 100644 --- a/bin/varnishd/http2/cache_http2_session.c +++ b/bin/varnishd/http2/cache_http2_session.c @@ -265,7 +265,7 @@ h2_b64url_settings(struct h2_sess *h2, struct req *req) /**********************************************************************/ static void -h2_ou_rel(struct worker *wrk, struct req *req) +h2_ou_rel_req(struct worker *wrk, struct req *req) { CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); CHECK_OBJ_NOTNULL(req, REQ_MAGIC); @@ -287,7 +287,7 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, if (h2_b64url_settings(h2, req)) { VSLb(h2->vsl, SLT_Debug, "H2: Bad HTTP-Settings"); - h2_ou_rel(wrk, req); + h2_ou_rel_req(wrk, req); return (0); } @@ -296,7 +296,7 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, if (sz != strlen(h2_resp_101)) { VSLb(h2->vsl, SLT_Debug, "H2: Upgrade: Error writing 101" " response: %s\n", VAS_errtxt(errno)); - h2_ou_rel(wrk, req); + h2_ou_rel_req(wrk, req); return (0); } From 433106d93ef75b4aa4ae6dd29d728ac9761747ee Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Wed, 9 Apr 2025 18:37:29 +0200 Subject: [PATCH 10/39] http2_session: Take ownership of req in h2_ou_rel_req() It becomes unusable at the call site, so the signature should reflect that. --- bin/varnishd/http2/cache_http2_session.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/bin/varnishd/http2/cache_http2_session.c b/bin/varnishd/http2/cache_http2_session.c index 05410ebc44..ec2ef6d2b7 100644 --- a/bin/varnishd/http2/cache_http2_session.c +++ b/bin/varnishd/http2/cache_http2_session.c @@ -265,10 +265,12 @@ h2_b64url_settings(struct h2_sess *h2, struct req *req) /**********************************************************************/ static void -h2_ou_rel_req(struct worker *wrk, struct req *req) +h2_ou_rel_req(struct worker *wrk, struct req **preq) { + struct req *req; + CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); - CHECK_OBJ_NOTNULL(req, REQ_MAGIC); + TAKE_OBJ_NOTNULL(req, preq, REQ_MAGIC); AZ(req->vcl); Req_AcctLogCharge(wrk->stats, req); Req_Release(req); @@ -287,7 +289,7 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, if (h2_b64url_settings(h2, req)) { VSLb(h2->vsl, SLT_Debug, "H2: Bad HTTP-Settings"); - h2_ou_rel_req(wrk, req); + h2_ou_rel_req(wrk, &req); return (0); } @@ -296,7 +298,7 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, if (sz != strlen(h2_resp_101)) { VSLb(h2->vsl, SLT_Debug, "H2: Upgrade: Error writing 101" " response: %s\n", VAS_errtxt(errno)); - h2_ou_rel_req(wrk, req); + h2_ou_rel_req(wrk, &req); return (0); } From b19d4e7a179c6e9c65412ba3cba8b181341fbd25 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Mon, 3 Mar 2025 15:20:20 +0100 Subject: [PATCH 11/39] http2_proto: Make h2_del_req() take the struct h2_req The signature is changed to reflect that the h2_req is no longer usable after calling the function. --- bin/varnishd/http2/cache_http2.h | 2 +- bin/varnishd/http2/cache_http2_proto.c | 17 +++++++++-------- bin/varnishd/http2/cache_http2_session.c | 8 ++++---- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/bin/varnishd/http2/cache_http2.h b/bin/varnishd/http2/cache_http2.h index faacd9fb99..5dc358913f 100644 --- a/bin/varnishd/http2/cache_http2.h +++ b/bin/varnishd/http2/cache_http2.h @@ -266,7 +266,7 @@ void H2_Send(struct worker *, struct h2_req *, h2_frame type, uint8_t flags, /* cache_http2_proto.c */ struct h2_req * h2_new_req(struct h2_sess *, unsigned stream, struct req **); h2_error h2_stream_tmo(struct h2_sess *, const struct h2_req *, vtim_real); -void h2_del_req(struct worker *, struct h2_req *); +void h2_del_req(struct worker *, struct h2_req **); void h2_kill_req(struct worker *, struct h2_sess *, struct h2_req *, h2_error); int h2_rxframe(struct worker *, struct h2_sess *); h2_error h2_set_setting(struct h2_sess *, const uint8_t *); diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index b2c8077fc0..8b639d0555 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -179,13 +179,14 @@ h2_new_req(struct h2_sess *h2, unsigned stream, struct req **preq) } void -h2_del_req(struct worker *wrk, struct h2_req *r2) +h2_del_req(struct worker *wrk, struct h2_req **pr2) { + struct h2_req *r2; struct h2_sess *h2; struct sess *sp; struct stv_buffer *stvbuf; - CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); + TAKE_OBJ_NOTNULL(r2, pr2, H2_REQ_MAGIC); AZ(r2->scheduled); h2 = r2->h2sess; CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); @@ -245,7 +246,7 @@ h2_kill_req(struct worker *wrk, struct h2_sess *h2, (void)h2h_decode_hdr_fini(h2); } if (r2 != NULL) - h2_del_req(wrk, r2); + h2_del_req(wrk, &r2); } /**********************************************************************/ @@ -656,7 +657,7 @@ h2_end_headers(struct worker *wrk, struct h2_sess *h2, if (h2e != NULL) { H2S_Lock_VSLb(h2, SLT_Debug, "HPACK/FINI %s", h2e->name); assert(!WS_IsReserved(r2->req->ws)); - h2_del_req(wrk, r2); + h2_del_req(wrk, &r2); return (h2e); } req->t_req = VTIM_real(); @@ -814,7 +815,7 @@ h2_rx_headers(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) H2S_Lock_VSLb(h2, SLT_Debug, "HPACK(hdr) %s", h2e->name); (void)h2h_decode_hdr_fini(h2); assert(!WS_IsReserved(r2->req->ws)); - h2_del_req(wrk, r2); + h2_del_req(wrk, &r2); return (h2e); } @@ -850,7 +851,7 @@ h2_rx_continuation(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) H2S_Lock_VSLb(h2, SLT_Debug, "HPACK(cont) %s", h2e->name); (void)h2h_decode_hdr_fini(h2); assert(!WS_IsReserved(r2->req->ws)); - h2_del_req(wrk, r2); + h2_del_req(wrk, &r2); return (h2e); } if (h2->rxf_flags & H2FF_HEADERS_END_HEADERS) @@ -1424,7 +1425,7 @@ h2_sweep(struct worker *wrk, struct h2_sess *h2) switch (r2->state) { case H2_S_CLOSED: AZ(r2->scheduled); - h2_del_req(wrk, r2); + h2_del_req(wrk, &r2); break; case H2_S_CLOS_REM: if (!r2->scheduled) { @@ -1432,7 +1433,7 @@ h2_sweep(struct worker *wrk, struct h2_sess *h2) H2_Send_RST(wrk, h2, h2->req0, r2->stream, H2SE_REFUSED_STREAM); H2_Send_Rel(h2, h2->req0); - h2_del_req(wrk, r2); + h2_del_req(wrk, &r2); continue; } /* FALLTHROUGH */ diff --git a/bin/varnishd/http2/cache_http2_session.c b/bin/varnishd/http2/cache_http2_session.c index ec2ef6d2b7..78758275af 100644 --- a/bin/varnishd/http2/cache_http2_session.c +++ b/bin/varnishd/http2/cache_http2_session.c @@ -335,12 +335,12 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, if (hs != HTC_S_COMPLETE) { VSLb(h2->vsl, SLT_Debug, "H2: No/Bad OU PRISM (hs=%d)", hs); r2->scheduled = 0; - h2_del_req(wrk, r2); + h2_del_req(wrk, &r2); return (0); } if (Pool_Task(wrk->pool, r2->req->task, TASK_QUEUE_REQ)) { r2->scheduled = 0; - h2_del_req(wrk, r2); + h2_del_req(wrk, &r2); VSLb(h2->vsl, SLT_Debug, "H2: No Worker-threads"); return (0); } @@ -422,7 +422,7 @@ h2_new_session(struct worker *wrk, void *arg) if (marker == H2_OU_MARKER && !h2_ou_session(wrk, h2, &req)) { assert(h2->refcnt == 1); - h2_del_req(wrk, h2->req0); + h2_del_req(wrk, &h2->req0); h2_del_sess(wrk, h2, SC_RX_JUNK); wrk->vsl = NULL; return; @@ -491,7 +491,7 @@ h2_new_session(struct worker *wrk, void *arg) } h2->cond = NULL; assert(h2->refcnt == 1); - h2_del_req(wrk, h2->req0); + h2_del_req(wrk, &h2->req0); h2_del_sess(wrk, h2, h2->error->reason); wrk->vsl = NULL; } From 950a57846e9bf06f0cd5eea40e0a25d56b9ac15a Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Mon, 3 Mar 2025 15:29:16 +0100 Subject: [PATCH 12/39] http2_session: Directly return the h2_req set up This is to be better prepared for handling errors at the call site. --- bin/varnishd/http2/cache_http2_session.c | 41 +++++++++++++++++------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/bin/varnishd/http2/cache_http2_session.c b/bin/varnishd/http2/cache_http2_session.c index 78758275af..9bde607f3c 100644 --- a/bin/varnishd/http2/cache_http2_session.c +++ b/bin/varnishd/http2/cache_http2_session.c @@ -276,7 +276,7 @@ h2_ou_rel_req(struct worker *wrk, struct req **preq) Req_Release(req); } -static int +static struct h2_req * h2_ou_session(struct worker *wrk, struct h2_sess *h2, struct req **preq) { @@ -290,7 +290,7 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, if (h2_b64url_settings(h2, req)) { VSLb(h2->vsl, SLT_Debug, "H2: Bad HTTP-Settings"); h2_ou_rel_req(wrk, &req); - return (0); + return (NULL); } sz = write(h2->sess->fd, h2_resp_101, strlen(h2_resp_101)); @@ -299,7 +299,7 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, VSLb(h2->vsl, SLT_Debug, "H2: Upgrade: Error writing 101" " response: %s\n", VAS_errtxt(errno)); h2_ou_rel_req(wrk, &req); - return (0); + return (NULL); } http_Unset(req->http, H_Upgrade); @@ -336,15 +336,15 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, VSLb(h2->vsl, SLT_Debug, "H2: No/Bad OU PRISM (hs=%d)", hs); r2->scheduled = 0; h2_del_req(wrk, &r2); - return (0); + return (NULL); } if (Pool_Task(wrk->pool, r2->req->task, TASK_QUEUE_REQ)) { r2->scheduled = 0; h2_del_req(wrk, &r2); VSLb(h2->vsl, SLT_Debug, "H2: No Worker-threads"); - return (0); + return (NULL); } - return (1); + return (r2); } /********************************************************************** @@ -377,6 +377,7 @@ h2_new_session(struct worker *wrk, void *arg) struct h2_sess h2s; struct h2_sess *h2; struct h2_req *r2, *r22; + struct h2_req *r2_ou = NULL; int again; uint16_t marker; uint8_t settings[48]; @@ -420,13 +421,29 @@ h2_new_session(struct worker *wrk, void *arg) AZ(wrk->vsl); wrk->vsl = h2->vsl; - if (marker == H2_OU_MARKER && !h2_ou_session(wrk, h2, &req)) { - assert(h2->refcnt == 1); - h2_del_req(wrk, &h2->req0); - h2_del_sess(wrk, h2, SC_RX_JUNK); - wrk->vsl = NULL; - return; + if (marker == H2_OU_MARKER) { + /* Deal with opportunistic upgrade. The upgrade request + * was received by HTTP/1 and is held in req. The response + * will be sent by H/2. Convert the req struct to an H/2 + * req. */ + AN(req); + r2_ou = h2_ou_session(wrk, h2, &req); + AZ(req); + CHECK_OBJ_ORNULL(r2_ou, H2_REQ_MAGIC); + if (r2_ou == NULL) { + assert(h2->refcnt == 1); + h2_del_req(wrk, &h2->req0); + h2_del_sess(wrk, h2, SC_RX_JUNK); + wrk->vsl = NULL; + return; + } + + /* The request was scheduled by h2_ou_session. No need to + * keep track of it from here. */ + AN(r2_ou->scheduled); + r2_ou = NULL; } + assert(HTC_S_COMPLETE == H2_prism_complete(h2->htc)); HTC_RxPipeline(h2->htc, h2->htc->rxbuf_b + sizeof(H2_prism)); HTC_RxInit(h2->htc, h2->ws); From a1ba004152ef35c1af0cdba1fcd1b86007901595 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Mon, 3 Mar 2025 16:30:49 +0100 Subject: [PATCH 13/39] http2_session: Await PRISM req just after 101 resp Waiting for the PRISM request immediately after sending the 101 (switch protocols) is a prerequisite to ensure a proper order of events. A subsequent change will ensure that the first frame sent by Varnish is the initial SETTINGS frame. --- bin/varnishd/http2/cache_http2_session.c | 28 ++++++++++++------------ 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/bin/varnishd/http2/cache_http2_session.c b/bin/varnishd/http2/cache_http2_session.c index 9bde607f3c..82371380db 100644 --- a/bin/varnishd/http2/cache_http2_session.c +++ b/bin/varnishd/http2/cache_http2_session.c @@ -302,10 +302,7 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, return (NULL); } - http_Unset(req->http, H_Upgrade); - http_Unset(req->http, H_HTTP2_Settings); - - /* Steal pipelined read-ahead, if any */ + /* Copy any pipelined data from the request into the session. */ h2->htc->pipeline_b = req->htc->pipeline_b; h2->htc->pipeline_e = req->htc->pipeline_e; req->htc->pipeline_b = NULL; @@ -315,6 +312,19 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, do about the overflowing data is an open issue. */ HTC_RxInit(h2->htc, h2->ws); + /* Wait for PRISM response */ + hs = HTC_RxStuff(h2->htc, H2_prism_complete, + NULL, NULL, NAN, h2->sess->t_idle + cache_param->timeout_idle, NAN, + sizeof H2_prism); + if (hs != HTC_S_COMPLETE) { + VSLb(h2->vsl, SLT_Debug, "H2: No/Bad OU PRISM (hs=%d)", hs); + h2_ou_rel_req(wrk, &req); + return (NULL); + } + + http_Unset(req->http, H_Upgrade); + http_Unset(req->http, H_HTTP2_Settings); + /* Start req thread */ r2 = h2_new_req(h2, 1, &req); AZ(req); @@ -328,16 +338,6 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, r2->state = H2_S_CLOS_REM; // rfc7540,l,489,491 http_SetH(r2->req->http, HTTP_HDR_PROTO, "HTTP/2.0"); - /* Wait for PRISM response */ - hs = HTC_RxStuff(h2->htc, H2_prism_complete, - NULL, NULL, NAN, h2->sess->t_idle + cache_param->timeout_idle, NAN, - sizeof H2_prism); - if (hs != HTC_S_COMPLETE) { - VSLb(h2->vsl, SLT_Debug, "H2: No/Bad OU PRISM (hs=%d)", hs); - r2->scheduled = 0; - h2_del_req(wrk, &r2); - return (NULL); - } if (Pool_Task(wrk->pool, r2->req->task, TASK_QUEUE_REQ)) { r2->scheduled = 0; h2_del_req(wrk, &r2); From b7a2fe635afc3f39e0752ab19f891a26be4a2c16 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Wed, 5 Mar 2025 10:47:53 +0100 Subject: [PATCH 14/39] http2_proto: Expose h2_tx_goaway() --- bin/varnishd/http2/cache_http2.h | 1 + bin/varnishd/http2/cache_http2_proto.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/varnishd/http2/cache_http2.h b/bin/varnishd/http2/cache_http2.h index 5dc358913f..e5fd636b71 100644 --- a/bin/varnishd/http2/cache_http2.h +++ b/bin/varnishd/http2/cache_http2.h @@ -270,6 +270,7 @@ void h2_del_req(struct worker *, struct h2_req **); void h2_kill_req(struct worker *, struct h2_sess *, struct h2_req *, h2_error); int h2_rxframe(struct worker *, struct h2_sess *); h2_error h2_set_setting(struct h2_sess *, const uint8_t *); +void h2_tx_goaway(struct worker *wrk, struct h2_sess *h2, h2_error h2e); void h2_req_body(struct req*); task_func_t h2_do_req; #ifdef TRANSPORT_MAGIC diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index 8b639d0555..2ee88a190e 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -416,7 +416,7 @@ h2_rx_goaway(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) return (h2->error); } -static void +void h2_tx_goaway(struct worker *wrk, struct h2_sess *h2, h2_error h2e) { char b[8]; From 24277a7c2be7a53d2a3eaeca1e3a4662e4a0246a Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Wed, 5 Mar 2025 14:56:00 +0100 Subject: [PATCH 15/39] http2_proto: Release WS in h2_rxframe() on early return The code calling h2_rxframe() assumes that the workspace will have been released upon its return. This was not done in the case of early return due to goaway having been sent. --- bin/varnishd/http2/cache_http2_proto.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index 2ee88a190e..2a8a5aa0af 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -1501,10 +1501,20 @@ h2_rxframe(struct worker *wrk, struct h2_sess *h2) h2_error h2e; const char *s, *r; + /* On entry we assume that HTC_RxInit() has been called, which + * would have reserved workspace for HTC_RxStuff() to receive + * into. */ + AN(h2->htc->ws->r); + ASSERT_RXTHR(h2); - if (h2->goaway && h2->open_streams == 0) + if (h2->goaway && h2->open_streams == 0) { + /* We have not called HTC_RxStuff(), and thus not released + * the workspace. Do it here so that the workspace is in + * the expected state for the caller. */ + WS_ReleaseP(h2->htc->ws, h2->htc->rxbuf_e); return (0); + } h2->t1 = NAN; VTCP_blocking(*h2->htc->rfd); From 8d027104ca44e45495eda908d8a2e737d91a7266 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Wed, 5 Mar 2025 15:03:51 +0100 Subject: [PATCH 16/39] http2_session: Process OU req after sending settings The H/2 RFC requires that the server sends the settings frame during an oppurtunistic upgrade before any stream frames. But we scheduled the stream==1 (H/1 received upgraded request that will get the response over H/2) thread before sending the settings. This technically (though unlikely) means that it could win the race to get the send mutex, and start sending frames before the settings. To clean this up, this patch delays the scheduling of the stream==1 request thread until after the settings have been sent. Also it changes things so that failures to schedule the stream==1 will result in a ENHANCE_YOUR_CALM error being sent to the client. A test case is updated to expect the error message. --- bin/varnishd/http2/cache_http2_session.c | 42 ++++++++++++++++-------- bin/varnishtest/tests/r02937.vtc | 8 +++++ 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/bin/varnishd/http2/cache_http2_session.c b/bin/varnishd/http2/cache_http2_session.c index 82371380db..ac8db261d2 100644 --- a/bin/varnishd/http2/cache_http2_session.c +++ b/bin/varnishd/http2/cache_http2_session.c @@ -325,7 +325,10 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, http_Unset(req->http, H_Upgrade); http_Unset(req->http, H_HTTP2_Settings); - /* Start req thread */ + /* Prepare the req thread, but do not start it. The RFC requires + * us to send our settings frame before any response frames, so we + * delay the start of the thread until after the settings frame + * has been sent. */ r2 = h2_new_req(h2, 1, &req); AZ(req); AZ(h2->highest_stream); @@ -334,16 +337,9 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, assert(r2->req->req_step == R_STP_TRANSPORT); r2->req->task->func = h2_do_req; r2->req->task->priv = r2->req; - r2->scheduled = 1; r2->state = H2_S_CLOS_REM; // rfc7540,l,489,491 http_SetH(r2->req->http, HTTP_HDR_PROTO, "HTTP/2.0"); - if (Pool_Task(wrk->pool, r2->req->task, TASK_QUEUE_REQ)) { - r2->scheduled = 0; - h2_del_req(wrk, &r2); - VSLb(h2->vsl, SLT_Debug, "H2: No Worker-threads"); - return (NULL); - } return (r2); } @@ -429,7 +425,6 @@ h2_new_session(struct worker *wrk, void *arg) AN(req); r2_ou = h2_ou_session(wrk, h2, &req); AZ(req); - CHECK_OBJ_ORNULL(r2_ou, H2_REQ_MAGIC); if (r2_ou == NULL) { assert(h2->refcnt == 1); h2_del_req(wrk, &h2->req0); @@ -438,10 +433,8 @@ h2_new_session(struct worker *wrk, void *arg) return; } - /* The request was scheduled by h2_ou_session. No need to - * keep track of it from here. */ - AN(r2_ou->scheduled); - r2_ou = NULL; + CHECK_OBJ_NOTNULL(r2_ou, H2_REQ_MAGIC); + AZ(r2_ou->scheduled); } assert(HTC_S_COMPLETE == H2_prism_complete(h2->htc)); @@ -453,6 +446,7 @@ h2_new_session(struct worker *wrk, void *arg) THR_SetRequest(h2->srq); AN(WS_Reservation(h2->ws)); + /* Send our settings */ l = h2_enc_settings(&h2->local_settings, settings, sizeof (settings)); AN(WS_Reservation(h2->ws)); H2_Send_Get(wrk, h2, h2->req0); @@ -466,6 +460,28 @@ h2_new_session(struct worker *wrk, void *arg) /* and off we go... */ h2->cond = &wrk->cond; + if (r2_ou != NULL) { + /* Schedule the opportunistic request received over HTTP/1 + * as part of the upgrade. */ + AZ(r2_ou->scheduled); + r2_ou->scheduled = 1; + if (Pool_Task(wrk->pool, r2_ou->req->task, TASK_QUEUE_REQ)) { + /* We failed to schedule it. Make the client go + * away. + * + * Note: Calling h2_tx_goaway will set the + * h2->goaway flag, causing h2_rxframe() below to + * return failure without reading from the + * socket. */ + r2_ou->scheduled = 0; + VSLb(h2->vsl, SLT_Debug, "H2: No Worker-threads"); + h2_kill_req(wrk, h2, r2_ou, H2SE_ENHANCE_YOUR_CALM); + h2->error = H2CE_ENHANCE_YOUR_CALM; + h2_tx_goaway(wrk, h2, h2->error); + } + r2_ou = NULL; + } + while (h2_rxframe(wrk, h2)) { HTC_RxInit(h2->htc, h2->ws); if (WS_Overflowed(h2->ws)) { diff --git a/bin/varnishtest/tests/r02937.vtc b/bin/varnishtest/tests/r02937.vtc index 8a2d00d58b..eed3b2b34e 100644 --- a/bin/varnishtest/tests/r02937.vtc +++ b/bin/varnishtest/tests/r02937.vtc @@ -21,5 +21,13 @@ client c1 { expect resp.http.upgrade == h2c expect resp.http.connection == Upgrade txpri + + stream 0 { + rxsettings + rxgoaway + expect goaway.err == ENHANCE_YOUR_CALM + expect goaway.laststream == 1 + } -run + expect_close } -run From ce499f97a90dec1d923d2060bdd83cc2171f7c64 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Thu, 27 Mar 2025 13:54:21 +0100 Subject: [PATCH 17/39] http2: Consolidate frame flags The H2 spec uses the same bit for frame flags of the same meaning, regardless of the frame they are used in. Simplify and reduce number of headers by defining each only once. --- bin/varnishd/http2/cache_http2_deliver.c | 7 +++--- bin/varnishd/http2/cache_http2_proto.c | 29 ++++++++++++------------ bin/varnishd/http2/cache_http2_send.c | 4 ++-- include/tbl/h2_frames.h | 16 ++++--------- 4 files changed, 24 insertions(+), 32 deletions(-) diff --git a/bin/varnishd/http2/cache_http2_deliver.c b/bin/varnishd/http2/cache_http2_deliver.c index 4013012436..07e133e546 100644 --- a/bin/varnishd/http2/cache_http2_deliver.c +++ b/bin/varnishd/http2/cache_http2_deliver.c @@ -106,7 +106,7 @@ h2_fini(struct vdp_ctx *vdc, void **priv) } H2_Send_Get(vdc->wrk, r2->h2sess, r2); - H2_Send(vdc->wrk, r2, H2_F_DATA, H2FF_DATA_END_STREAM, 0, "", NULL); + H2_Send(vdc->wrk, r2, H2_F_DATA, H2FF_END_STREAM, 0, "", NULL); H2_Send_Rel(r2->h2sess, r2); return (0); } @@ -192,8 +192,7 @@ h2_minimal_response(struct req *req, uint16_t status) H2_Send_Get(req->wrk, r2->h2sess, r2); H2_Send(req->wrk, r2, H2_F_HEADERS, - H2FF_HEADERS_END_HEADERS | - (status < 200 ? 0 : H2FF_HEADERS_END_STREAM), + H2FF_END_HEADERS | (status < 200 ? 0 : H2FF_END_STREAM), l, buf, NULL); H2_Send_Rel(r2->h2sess, r2); return (0); @@ -334,7 +333,7 @@ h2_deliver(struct req *req, int sendbody) H2_Send_Get(req->wrk, r2->h2sess, r2); H2_Send(req->wrk, r2, H2_F_HEADERS, - (sendbody ? 0 : H2FF_HEADERS_END_STREAM) | H2FF_HEADERS_END_HEADERS, + (sendbody ? 0 : H2FF_END_STREAM) | H2FF_END_HEADERS, sz, r, &req->acct.resp_hdrbytes); H2_Send_Rel(r2->h2sess, r2); diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index 2a8a5aa0af..b07dac9aff 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -308,7 +308,7 @@ h2_rx_ping(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); assert(r2 == h2->req0); - if (h2->rxf_len != 8) { // rfc7540,l,2364,2366 + if (h2->rxf_len != 8) { // rfc7540,l,2364,2366 H2S_Lock_VSLb(h2, SLT_SessError, "H2: rx ping with (len != 8)"); return (H2CE_FRAME_SIZE_ERROR); } @@ -318,8 +318,7 @@ h2_rx_ping(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) return (H2SE_PROTOCOL_ERROR); } H2_Send_Get(wrk, h2, r2); - H2_Send_Frame(wrk, h2, - H2_F_PING, H2FF_PING_ACK, 8, 0, h2->rxf_data); + H2_Send_Frame(wrk, h2, H2_F_PING, H2FF_ACK, 8, 0, h2->rxf_data); H2_Send_Rel(h2, r2); return (0); } @@ -585,7 +584,7 @@ h2_rx_settings(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) assert(r2 == h2->req0); AZ(h2->rxf_stream); - if (h2->rxf_flags == H2FF_SETTINGS_ACK) { + if (h2->rxf_flags == H2FF_ACK) { if (h2->rxf_len > 0) { // rfc7540,l,2047,2049 H2S_Lock_VSLb(h2, SLT_SessError, "H2: rx settings ack with " "(len > 0)"); @@ -606,7 +605,7 @@ h2_rx_settings(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) } H2_Send_Get(wrk, h2, r2); H2_Send_Frame(wrk, h2, - H2_F_SETTINGS, H2FF_SETTINGS_ACK, 0, 0, NULL); + H2_F_SETTINGS, H2FF_ACK, 0, 0, NULL); H2_Send_Rel(h2, r2); } return (0); @@ -793,7 +792,7 @@ h2_rx_headers(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) p = h2->rxf_data; l = h2->rxf_len; - if (h2->rxf_flags & H2FF_HEADERS_PADDED) { + if (h2->rxf_flags & H2FF_PADDED) { if (*p + 1 > l) { H2S_Lock_VSLb(h2, SLT_SessError, "H2: rx headers with pad length > frame len"); return (H2CE_PROTOCOL_ERROR); // rfc7540,l,1884,1887 @@ -801,7 +800,7 @@ h2_rx_headers(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) l -= 1 + *p; p += 1; } - if (h2->rxf_flags & H2FF_HEADERS_PRIORITY) { + if (h2->rxf_flags & H2FF_PRIORITY) { if (l < 5) { H2S_Lock_VSLb(h2, SLT_SessError, "H2: rx headers with incorrect " "priority data"); @@ -819,10 +818,10 @@ h2_rx_headers(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) return (h2e); } - if (h2->rxf_flags & H2FF_HEADERS_END_STREAM) + if (h2->rxf_flags & H2FF_END_STREAM) req->req_body_status = BS_NONE; - if (h2->rxf_flags & H2FF_HEADERS_END_HEADERS) + if (h2->rxf_flags & H2FF_END_HEADERS) return (h2_end_headers(wrk, h2, req, r2)); return (0); } @@ -854,7 +853,7 @@ h2_rx_continuation(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) h2_del_req(wrk, &r2); return (h2e); } - if (h2->rxf_flags & H2FF_HEADERS_END_HEADERS) + if (h2->rxf_flags & H2FF_END_HEADERS) return (h2_end_headers(wrk, h2, req, r2)); return (0); } @@ -899,7 +898,7 @@ h2_rx_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) /* Check padding if present */ src = h2->rxf_data; len = h2->rxf_len; - if (h2->rxf_flags & H2FF_DATA_PADDED) { + if (h2->rxf_flags & H2FF_PADDED) { if (*src >= len) { VSLb(h2->vsl, SLT_SessError, "H2: stream %u: Padding larger than frame length", @@ -922,7 +921,7 @@ h2_rx_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) l = 0; l += len; if (l > r2->req->htc->content_length || - ((h2->rxf_flags & H2FF_DATA_END_STREAM) && + ((h2->rxf_flags & H2FF_END_STREAM) && l != r2->req->htc->content_length)) { VSLb(h2->vsl, SLT_Debug, "H2: stream %u: Received data and Content-Length" @@ -998,7 +997,7 @@ h2_rx_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) Lck_Lock(&h2->sess->mtx); } - if (h2->rxf_flags & H2FF_DATA_END_STREAM) + if (h2->rxf_flags & H2FF_END_STREAM) r2->state = H2_S_CLOS_REM; if (r2->cond) PTOK(pthread_cond_signal(r2->cond)); @@ -1024,7 +1023,7 @@ h2_rx_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) bufsize = r2->r_window; } assert(bufsize > 0); - if ((h2->rxf_flags & H2FF_DATA_END_STREAM) && + if ((h2->rxf_flags & H2FF_END_STREAM) && bufsize > len) /* Cap the buffer size when we know this is the * single data frame. */ @@ -1089,7 +1088,7 @@ h2_rx_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) r2->r_window -= h2->rxf_len; r2->rxbuf->head += len; assert(r2->rxbuf->tail <= r2->rxbuf->head); - if (h2->rxf_flags & H2FF_DATA_END_STREAM) + if (h2->rxf_flags & H2FF_END_STREAM) r2->state = H2_S_CLOS_REM; if (r2->cond) PTOK(pthread_cond_signal(r2->cond)); diff --git a/bin/varnishd/http2/cache_http2_send.c b/bin/varnishd/http2/cache_http2_send.c index d4e66aab8c..04993d812d 100644 --- a/bin/varnishd/http2/cache_http2_send.c +++ b/bin/varnishd/http2/cache_http2_send.c @@ -352,8 +352,8 @@ h2_send(struct worker *wrk, struct h2_req *r2, h2_frame ftyp, uint8_t flags, Lck_Lock(&h2->sess->mtx); mfs = h2->remote_settings.max_frame_size; if (r2->counted && ( - (ftyp == H2_F_HEADERS && (flags & H2FF_HEADERS_END_STREAM)) || - (ftyp == H2_F_DATA && (flags & H2FF_DATA_END_STREAM)) || + (ftyp == H2_F_HEADERS && (flags & H2FF_END_STREAM)) || + (ftyp == H2_F_DATA && (flags & H2FF_END_STREAM)) || ftyp == H2_F_RST_STREAM )) { assert(h2->open_streams > 0); diff --git a/include/tbl/h2_frames.h b/include/tbl/h2_frames.h index 2b1e2c04f5..52a987b9b2 100644 --- a/include/tbl/h2_frames.h +++ b/include/tbl/h2_frames.h @@ -138,17 +138,11 @@ #ifdef H2_FRAME_FLAGS /* lower, upper, flag */ H2_FRAME_FLAGS(none, NONE, 0x00) - H2_FRAME_FLAGS(data_end_stream, DATA_END_STREAM, 0x01) - H2_FRAME_FLAGS(data_padded, DATA_PADDED, 0x08) - H2_FRAME_FLAGS(headers_end_stream, HEADERS_END_STREAM, 0x01) - H2_FRAME_FLAGS(headers_end_headers, HEADERS_END_HEADERS, 0x04) - H2_FRAME_FLAGS(headers_padded, HEADERS_PADDED, 0x08) - H2_FRAME_FLAGS(headers_priority, HEADERS_PRIORITY, 0x20) - H2_FRAME_FLAGS(settings_ack, SETTINGS_ACK, 0x01) - H2_FRAME_FLAGS(push_promise_end_headers,PUSH_PROMISE_END_HEADERS, 0x04) - H2_FRAME_FLAGS(push_promise_padded, PUSH_PROMISE_PADDED, 0x08) - H2_FRAME_FLAGS(ping_ack, PING_ACK, 0x01) - H2_FRAME_FLAGS(continuation_end_headers,CONTINUATION_END_HEADERS, 0x04) + H2_FRAME_FLAGS(ack, ACK, 0x01) + H2_FRAME_FLAGS(end_stream, END_STREAM, 0x01) + H2_FRAME_FLAGS(end_headers, END_HEADERS, 0x04) + H2_FRAME_FLAGS(padded, PADDED, 0x08) + H2_FRAME_FLAGS(priority, PRIORITY, 0x20) #undef H2_FRAME_FLAGS #endif From dd79a628c604a7385655d340a0f50fe4617090bb Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Thu, 17 Apr 2025 13:46:32 +0200 Subject: [PATCH 18/39] http2_proto: Drain stale comment At this point, we alrerady have a complete frame. --- bin/varnishd/http2/cache_http2_proto.c | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index b07dac9aff..5dfb29d014 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -1565,7 +1565,6 @@ h2_rxframe(struct worker *wrk, struct h2_sess *h2) if (h2->rxf_type >= H2FMAX) { // rfc7540,l,679,681 - // XXX: later, drain rest of frame h2->bogosity++; H2S_Lock_VSLb(h2, SLT_Debug, "H2: Unknown frame type 0x%02x (ignored)", From eec922f6f59e0846861fe3205a10c94b15800fe0 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Thu, 10 Apr 2025 17:30:18 +0200 Subject: [PATCH 19/39] viov: New VIOV_prune() function It can be used anywhere we expect short writes using writev(). It's hosted by cache_session.c until there is enough momentum to give VIOV its own C file. --- bin/varnishd/cache/cache_session.c | 32 +++++++++++++++++++++++++++++ bin/varnishd/cache/cache_varnishd.h | 2 ++ 2 files changed, 34 insertions(+) diff --git a/bin/varnishd/cache/cache_session.c b/bin/varnishd/cache/cache_session.c index c031cb665f..c8a23948d7 100644 --- a/bin/varnishd/cache/cache_session.c +++ b/bin/varnishd/cache/cache_session.c @@ -410,6 +410,38 @@ HTC_RxStuff(struct http_conn *htc, htc_complete_f *func, } } +/*-------------------------------------------------------------------- + * Prune a vector of struct iovec + */ + +void +VIOV_prune(struct iovec *iov, unsigned *n, size_t l) +{ + unsigned u; + + if (l == 0) + return; + + AN(iov); + AN(n); + + u = 0; + while (l > 0) { + assert(u < *n); + if (iov[u].iov_len <= l) { + l -= iov[u].iov_len; + u++; + } else { + iov[u].iov_base = (char *)iov[u].iov_base + l; + iov[u].iov_len -= l; + break; + } + } + + memmove(iov, &iov[u], (*n - u) * sizeof *iov); + *n -= u; +} + /*-------------------------------------------------------------------- * Get a new session, preferably by recycling an already ready one * diff --git a/bin/varnishd/cache/cache_varnishd.h b/bin/varnishd/cache/cache_varnishd.h index 2892ef188d..0bb2e9ff24 100644 --- a/bin/varnishd/cache/cache_varnishd.h +++ b/bin/varnishd/cache/cache_varnishd.h @@ -462,6 +462,8 @@ enum htc_status_e HTC_RxStuff(struct http_conn *, htc_complete_f *, vtim_real *t1, vtim_real *t2, vtim_real ti, vtim_real tn, vtim_dur td, int maxbytes); +void VIOV_prune(struct iovec *iov, unsigned *n, size_t l); + #define SESS_ATTR(UP, low, typ, len) \ int SES_Set_##low(const struct sess *sp, const typ *src); \ int SES_Reserve_##low(struct sess *sp, typ **dst, ssize_t *sz); From 7b68db315b37d250773f65b3a57c53553a4ccf66 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Thu, 17 Apr 2025 18:29:11 +0200 Subject: [PATCH 20/39] http2_send: Adopt VIOV_prune() --- bin/varnishd/http2/cache_http2_send.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/bin/varnishd/http2/cache_http2_send.c b/bin/varnishd/http2/cache_http2_send.c index 04993d812d..584ad161df 100644 --- a/bin/varnishd/http2/cache_http2_send.c +++ b/bin/varnishd/http2/cache_http2_send.c @@ -181,8 +181,9 @@ H2_Send_Frame(struct worker *wrk, struct h2_sess *h2, uint32_t len, uint32_t stream, const void *ptr) { uint8_t hdr[9]; - ssize_t s; + ssize_t s = 0, i; struct iovec iov[2]; + unsigned niov; (void)wrk; @@ -204,9 +205,21 @@ H2_Send_Frame(struct worker *wrk, struct h2_sess *h2, memset(iov, 0, sizeof iov); iov[0].iov_base = (void*)hdr; iov[0].iov_len = sizeof hdr; - iov[1].iov_base = TRUST_ME(ptr); - iov[1].iov_len = len; - s = writev(h2->sess->fd, iov, len == 0 ? 1 : 2); + if (len > 0) { + iov[1].iov_base = TRUST_ME(ptr); + iov[1].iov_len = len; + niov = 2; + } else + niov = 1; + + while (s != sizeof hdr + len) { + i = writev(h2->sess->fd, iov, niov); + if (i <= 0) + break; + VIOV_prune(iov, &niov, i); + s += i; + } + if (s != sizeof hdr + len) { if (errno == EWOULDBLOCK) { H2S_Lock_VSLb(h2, SLT_SessError, From a758f8de30d9038aa1608acc2550acc032b06e9e Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Thu, 17 Apr 2025 14:00:47 +0200 Subject: [PATCH 21/39] http2_deliver: Rename VDP callbacks to h2_vdp_*() --- bin/varnishd/http2/cache_http2_deliver.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bin/varnishd/http2/cache_http2_deliver.c b/bin/varnishd/http2/cache_http2_deliver.c index 07e133e546..b07d04e4d1 100644 --- a/bin/varnishd/http2/cache_http2_deliver.c +++ b/bin/varnishd/http2/cache_http2_deliver.c @@ -73,7 +73,7 @@ V2D_Init(void) /**********************************************************************/ static int v_matchproto_(vdp_init_f) -h2_init(VRT_CTX, struct vdp_ctx *vdc, void **priv) +h2_vdp_init(VRT_CTX, struct vdp_ctx *vdc, void **priv) { struct h2_req *r2; @@ -86,7 +86,7 @@ h2_init(VRT_CTX, struct vdp_ctx *vdc, void **priv) } static int v_matchproto_(vdp_fini_f) -h2_fini(struct vdp_ctx *vdc, void **priv) +h2_vdp_fini(struct vdp_ctx *vdc, void **priv) { struct h2_req *r2; @@ -112,7 +112,7 @@ h2_fini(struct vdp_ctx *vdc, void **priv) } static int v_matchproto_(vdp_bytes_f) -h2_bytes(struct vdp_ctx *vdc, enum vdp_action act, void **priv, +h2_vdp_bytes(struct vdp_ctx *vdc, enum vdp_action act, void **priv, const void *ptr, ssize_t len) { struct h2_req *r2; @@ -134,9 +134,9 @@ h2_bytes(struct vdp_ctx *vdc, enum vdp_action act, void **priv, static const struct vdp h2_vdp = { .name = "H2B", - .init = h2_init, - .bytes = h2_bytes, - .fini = h2_fini, + .init = h2_vdp_init, + .bytes = h2_vdp_bytes, + .fini = h2_vdp_fini, }; static inline size_t From cc6357925433c1a278547a8a27a6af887e164590 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Thu, 17 Apr 2025 18:52:47 +0200 Subject: [PATCH 22/39] http2_deliver: Prepare response flags upfront --- bin/varnishd/http2/cache_http2_deliver.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bin/varnishd/http2/cache_http2_deliver.c b/bin/varnishd/http2/cache_http2_deliver.c index b07d04e4d1..d13158cca9 100644 --- a/bin/varnishd/http2/cache_http2_deliver.c +++ b/bin/varnishd/http2/cache_http2_deliver.c @@ -301,6 +301,7 @@ h2_deliver(struct req *req, int sendbody) struct vsb resp[1]; struct vrt_ctx ctx[1]; uintptr_t ss; + uint8_t flags; CHECK_OBJ_NOTNULL(req, REQ_MAGIC); CHECK_OBJ_NOTNULL(req->objcore, OBJCORE_MAGIC); @@ -331,10 +332,12 @@ h2_deliver(struct req *req, int sendbody) r2->t_send = req->t_prev; + flags = H2FF_END_HEADERS; + if (!sendbody) + flags |= H2FF_END_STREAM; H2_Send_Get(req->wrk, r2->h2sess, r2); - H2_Send(req->wrk, r2, H2_F_HEADERS, - (sendbody ? 0 : H2FF_END_STREAM) | H2FF_END_HEADERS, - sz, r, &req->acct.resp_hdrbytes); + H2_Send(req->wrk, r2, H2_F_HEADERS, flags, sz, r, + &req->acct.resp_hdrbytes); H2_Send_Rel(r2->h2sess, r2); WS_Reset(req->ws, ss); From 4b7150d95c02652d8de63e020d56d4be5726f6d6 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Thu, 17 Apr 2025 18:49:20 +0200 Subject: [PATCH 23/39] http2_deliver: Prepare minimal response flags upfront --- bin/varnishd/http2/cache_http2_deliver.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/bin/varnishd/http2/cache_http2_deliver.c b/bin/varnishd/http2/cache_http2_deliver.c index d13158cca9..c4a15355cf 100644 --- a/bin/varnishd/http2/cache_http2_deliver.c +++ b/bin/varnishd/http2/cache_http2_deliver.c @@ -170,6 +170,7 @@ h2_minimal_response(struct req *req, uint16_t status) struct h2_req *r2; size_t l; uint8_t buf[6]; + uint8_t flags; CHECK_OBJ_NOTNULL(req, REQ_MAGIC); CAST_OBJ_NOTNULL(r2, req->transport_priv, H2_REQ_MAGIC); @@ -189,11 +190,11 @@ h2_minimal_response(struct req *req, uint16_t status) req->err_code = status; /* XXX return code checking once H2_Send returns anything but 0 */ + flags = H2FF_END_HEADERS; + if (status >= 200) + flags |= H2FF_END_STREAM; H2_Send_Get(req->wrk, r2->h2sess, r2); - H2_Send(req->wrk, r2, - H2_F_HEADERS, - H2FF_END_HEADERS | (status < 200 ? 0 : H2FF_END_STREAM), - l, buf, NULL); + H2_Send(req->wrk, r2, H2_F_HEADERS, flags, l, buf, NULL); H2_Send_Rel(r2->h2sess, r2); return (0); } From d01598345598ada3e27c3e7464458a8777f68680 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Thu, 10 Apr 2025 15:00:50 +0200 Subject: [PATCH 24/39] http2_reqbody: Manage req body in its own file This makes cache_http2_proto.c smaller. --- bin/varnishd/Makefile.am | 1 + bin/varnishd/http2/cache_http2.h | 5 +- bin/varnishd/http2/cache_http2_proto.c | 385 +------------------- bin/varnishd/http2/cache_http2_reqbody.c | 438 +++++++++++++++++++++++ bin/varnishd/http2/cache_http2_session.c | 2 +- 5 files changed, 445 insertions(+), 386 deletions(-) create mode 100644 bin/varnishd/http2/cache_http2_reqbody.c diff --git a/bin/varnishd/Makefile.am b/bin/varnishd/Makefile.am index 0d60f1b188..8ff33394bd 100644 --- a/bin/varnishd/Makefile.am +++ b/bin/varnishd/Makefile.am @@ -81,6 +81,7 @@ varnishd_SOURCES = \ http2/cache_http2_hpack.c \ http2/cache_http2_panic.c \ http2/cache_http2_proto.c \ + http2/cache_http2_reqbody.c \ http2/cache_http2_send.c \ http2/cache_http2_session.c \ mgt/mgt_child.c \ diff --git a/bin/varnishd/http2/cache_http2.h b/bin/varnishd/http2/cache_http2.h index e5fd636b71..97025b9471 100644 --- a/bin/varnishd/http2/cache_http2.h +++ b/bin/varnishd/http2/cache_http2.h @@ -271,12 +271,15 @@ void h2_kill_req(struct worker *, struct h2_sess *, struct h2_req *, h2_error); int h2_rxframe(struct worker *, struct h2_sess *); h2_error h2_set_setting(struct h2_sess *, const uint8_t *); void h2_tx_goaway(struct worker *wrk, struct h2_sess *h2, h2_error h2e); -void h2_req_body(struct req*); task_func_t h2_do_req; #ifdef TRANSPORT_MAGIC vtr_req_fail_f h2_req_fail; #endif +/* cache_http2_reqbody.c */ +h2_error h2_reqbody_data(struct worker *, struct h2_sess *, struct h2_req *); +void h2_reqbody(struct req *); + /* cache_http2_session.c */ void H2S_Lock_VSLb(const struct h2_sess *, enum VSL_tag_e, const char *, ...); diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index 5dfb29d014..a8bd6b98cc 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -863,15 +863,6 @@ h2_rx_continuation(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) static h2_error v_matchproto_(h2_rxframe_f) h2_rx_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) { - char buf[4]; - ssize_t l; - uint64_t l2, head; - const uint8_t *src; - unsigned len; - - /* XXX: Shouldn't error handling, setting of r2->error and - * r2->cond signalling be handled more generally at the end of - * procframe()??? */ CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); ASSERT_RXTHR(h2); @@ -885,381 +876,7 @@ h2_rx_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) return (H2SE_STREAM_CLOSED); // rfc7540,l,1766,1769 } - Lck_Lock(&h2->sess->mtx); - CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC); - - if (h2->error != NULL || r2->error != NULL) { - if (r2->cond) - PTOK(pthread_cond_signal(r2->cond)); - Lck_Unlock(&h2->sess->mtx); - return (h2->error != NULL ? h2->error : r2->error); - } - - /* Check padding if present */ - src = h2->rxf_data; - len = h2->rxf_len; - if (h2->rxf_flags & H2FF_PADDED) { - if (*src >= len) { - VSLb(h2->vsl, SLT_SessError, - "H2: stream %u: Padding larger than frame length", - h2->rxf_stream); - r2->error = H2CE_PROTOCOL_ERROR; - if (r2->cond) - PTOK(pthread_cond_signal(r2->cond)); - Lck_Unlock(&h2->sess->mtx); - return (H2CE_PROTOCOL_ERROR); - } - len -= 1 + *src; - src += 1; - } - - /* Check against the Content-Length header if given */ - if (r2->req->htc->content_length >= 0) { - if (r2->rxbuf) - l = r2->rxbuf->head; - else - l = 0; - l += len; - if (l > r2->req->htc->content_length || - ((h2->rxf_flags & H2FF_END_STREAM) && - l != r2->req->htc->content_length)) { - VSLb(h2->vsl, SLT_Debug, - "H2: stream %u: Received data and Content-Length" - " mismatch", h2->rxf_stream); - r2->error = H2SE_PROTOCOL_ERROR; - if (r2->cond) - PTOK(pthread_cond_signal(r2->cond)); - Lck_Unlock(&h2->sess->mtx); - return (H2SE_PROTOCOL_ERROR); - } - } - - /* Check and charge connection window. The entire frame including - * padding (h2->rxf_len) counts towards the window. */ - if (h2->rxf_len > h2->req0->r_window) { - VSLb(h2->vsl, SLT_SessError, - "H2: stream %u: Exceeded connection receive window", - h2->rxf_stream); - r2->error = H2CE_FLOW_CONTROL_ERROR; - if (r2->cond) - PTOK(pthread_cond_signal(r2->cond)); - Lck_Unlock(&h2->sess->mtx); - return (H2CE_FLOW_CONTROL_ERROR); - } - h2->req0->r_window -= h2->rxf_len; - if (h2->req0->r_window < cache_param->h2_rx_window_low_water) { - h2->req0->r_window += cache_param->h2_rx_window_increment; - vbe32enc(buf, cache_param->h2_rx_window_increment); - Lck_Unlock(&h2->sess->mtx); - H2_Send_Get(wrk, h2, h2->req0); - H2_Send_Frame(wrk, h2, H2_F_WINDOW_UPDATE, 0, 4, 0, buf); - H2_Send_Rel(h2, h2->req0); - Lck_Lock(&h2->sess->mtx); - } - - /* Check stream window. The entire frame including padding - * (h2->rxf_len) counts towards the window. */ - if (h2->rxf_len > r2->r_window) { - VSLb(h2->vsl, SLT_Debug, - "H2: stream %u: Exceeded stream receive window", - h2->rxf_stream); - r2->error = H2SE_FLOW_CONTROL_ERROR; - if (r2->cond) - PTOK(pthread_cond_signal(r2->cond)); - Lck_Unlock(&h2->sess->mtx); - return (H2SE_FLOW_CONTROL_ERROR); - } - - /* Handle zero size frame before starting to allocate buffers */ - if (len == 0) { - r2->r_window -= h2->rxf_len; - - /* Handle the specific corner case where the entire window - * has been exhausted using nothing but padding - * bytes. Since no bytes have been buffered, no bytes - * would be consumed by the request thread and no stream - * window updates sent. Unpaint ourselves from this corner - * by sending a stream window update here. */ - CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC); - if (r2->r_window == 0 && - (r2->rxbuf == NULL || r2->rxbuf->tail == r2->rxbuf->head)) { - if (r2->rxbuf) - l = r2->rxbuf->size; - else - l = h2->local_settings.initial_window_size; - r2->r_window += l; - Lck_Unlock(&h2->sess->mtx); - vbe32enc(buf, l); - H2_Send_Get(wrk, h2, h2->req0); - H2_Send_Frame(wrk, h2, H2_F_WINDOW_UPDATE, 0, 4, - r2->stream, buf); - H2_Send_Rel(h2, h2->req0); - Lck_Lock(&h2->sess->mtx); - } - - if (h2->rxf_flags & H2FF_END_STREAM) - r2->state = H2_S_CLOS_REM; - if (r2->cond) - PTOK(pthread_cond_signal(r2->cond)); - Lck_Unlock(&h2->sess->mtx); - return (0); - } - - /* Make the buffer on demand */ - if (r2->rxbuf == NULL) { - unsigned bufsize; - size_t bstest; - struct stv_buffer *stvbuf; - struct h2_rxbuf *rxbuf; - - Lck_Unlock(&h2->sess->mtx); - - bufsize = h2->local_settings.initial_window_size; - if (bufsize < r2->r_window) { - /* This will not happen because we do not have any - * mechanism to change the initial window size on - * a running session. But if we gain that ability, - * this future proofs it. */ - bufsize = r2->r_window; - } - assert(bufsize > 0); - if ((h2->rxf_flags & H2FF_END_STREAM) && - bufsize > len) - /* Cap the buffer size when we know this is the - * single data frame. */ - bufsize = len; - CHECK_OBJ_NOTNULL(stv_h2_rxbuf, STEVEDORE_MAGIC); - stvbuf = STV_AllocBuf(wrk, stv_h2_rxbuf, - bufsize + sizeof *rxbuf); - if (stvbuf == NULL) { - Lck_Lock(&h2->sess->mtx); - VSLb(h2->vsl, SLT_Debug, - "H2: stream %u: Failed to allocate request body" - " buffer", - h2->rxf_stream); - r2->error = H2SE_INTERNAL_ERROR; - if (r2->cond) - PTOK(pthread_cond_signal(r2->cond)); - Lck_Unlock(&h2->sess->mtx); - return (H2SE_INTERNAL_ERROR); - } - rxbuf = STV_GetBufPtr(stvbuf, &bstest); - AN(rxbuf); - assert(bstest >= bufsize + sizeof *rxbuf); - assert(PAOK(rxbuf)); - INIT_OBJ(rxbuf, H2_RXBUF_MAGIC); - rxbuf->size = bufsize; - rxbuf->stvbuf = stvbuf; - - r2->rxbuf = rxbuf; - - Lck_Lock(&h2->sess->mtx); - } - - CHECK_OBJ_NOTNULL(r2->rxbuf, H2_RXBUF_MAGIC); - assert(r2->rxbuf->tail <= r2->rxbuf->head); - l = r2->rxbuf->head - r2->rxbuf->tail; - assert(l <= r2->rxbuf->size); - l = r2->rxbuf->size - l; - assert(len <= l); /* Stream window handling ensures this */ - - Lck_Unlock(&h2->sess->mtx); - - l = len; - head = r2->rxbuf->head; - do { - l2 = l; - if ((head % r2->rxbuf->size) + l2 > r2->rxbuf->size) - l2 = r2->rxbuf->size - (head % r2->rxbuf->size); - assert(l2 > 0); - memcpy(&r2->rxbuf->data[head % r2->rxbuf->size], src, l2); - src += l2; - head += l2; - l -= l2; - } while (l > 0); - - Lck_Lock(&h2->sess->mtx); - - /* Charge stream window. The entire frame including padding - * (h2->rxf_len) counts towards the window. The used padding - * bytes will be included in the next connection window update - * sent when the buffer bytes are consumed because that is - * calculated against the available buffer space. */ - r2->r_window -= h2->rxf_len; - r2->rxbuf->head += len; - assert(r2->rxbuf->tail <= r2->rxbuf->head); - if (h2->rxf_flags & H2FF_END_STREAM) - r2->state = H2_S_CLOS_REM; - if (r2->cond) - PTOK(pthread_cond_signal(r2->cond)); - Lck_Unlock(&h2->sess->mtx); - - return (0); -} - -static enum vfp_status v_matchproto_(vfp_pull_f) -h2_vfp_body(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr, ssize_t *lp) -{ - struct h2_req *r2; - struct h2_sess *h2; - enum vfp_status retval; - ssize_t l, l2; - uint64_t tail; - uint8_t *dst; - char buf[4]; - int i; - - CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); - CHECK_OBJ_NOTNULL(vfe, VFP_ENTRY_MAGIC); - CAST_OBJ_NOTNULL(r2, vfe->priv1, H2_REQ_MAGIC); - h2 = r2->h2sess; - - AN(ptr); - AN(lp); - assert(*lp >= 0); - - Lck_Lock(&h2->sess->mtx); - - r2->cond = &vc->wrk->cond; - while (1) { - CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC); - if (r2->rxbuf) { - assert(r2->rxbuf->tail <= r2->rxbuf->head); - l = r2->rxbuf->head - r2->rxbuf->tail; - } else - l = 0; - - if (h2->error != NULL || r2->error != NULL) - retval = VFP_ERROR; - else if (r2->state >= H2_S_CLOS_REM && l <= *lp) - retval = VFP_END; - else { - if (l > *lp) - l = *lp; - retval = VFP_OK; - } - - if (retval != VFP_OK || l > 0) - break; - - i = Lck_CondWaitTimeout(r2->cond, &h2->sess->mtx, - SESS_TMO(h2->sess, timeout_idle)); - if (i == ETIMEDOUT) { - retval = VFP_ERROR; - break; - } - } - r2->cond = NULL; - - Lck_Unlock(&h2->sess->mtx); - - if (l == 0 || retval == VFP_ERROR) { - *lp = 0; - return (retval); - } - - *lp = l; - dst = ptr; - tail = r2->rxbuf->tail; - do { - l2 = l; - if ((tail % r2->rxbuf->size) + l2 > r2->rxbuf->size) - l2 = r2->rxbuf->size - (tail % r2->rxbuf->size); - assert(l2 > 0); - memcpy(dst, &r2->rxbuf->data[tail % r2->rxbuf->size], l2); - dst += l2; - tail += l2; - l -= l2; - } while (l > 0); - - Lck_Lock(&h2->sess->mtx); - - CHECK_OBJ_NOTNULL(r2->rxbuf, H2_RXBUF_MAGIC); - r2->rxbuf->tail = tail; - assert(r2->rxbuf->tail <= r2->rxbuf->head); - - if (r2->r_window < cache_param->h2_rx_window_low_water && - r2->state < H2_S_CLOS_REM) { - /* l is free buffer space */ - /* l2 is calculated window increment */ - l = r2->rxbuf->size - (r2->rxbuf->head - r2->rxbuf->tail); - assert(r2->r_window <= l); - l2 = cache_param->h2_rx_window_increment; - if (r2->r_window + l2 > l) - l2 = l - r2->r_window; - r2->r_window += l2; - } else - l2 = 0; - - Lck_Unlock(&h2->sess->mtx); - - if (l2 > 0) { - vbe32enc(buf, l2); - H2_Send_Get(vc->wrk, h2, r2); - H2_Send_Frame(vc->wrk, h2, H2_F_WINDOW_UPDATE, 0, 4, - r2->stream, buf); - H2_Send_Rel(h2, r2); - } - - return (retval); -} - -static void -h2_vfp_body_fini(struct vfp_ctx *vc, struct vfp_entry *vfe) -{ - struct h2_req *r2; - struct h2_sess *h2; - struct stv_buffer *stvbuf = NULL; - - CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); - CHECK_OBJ_NOTNULL(vfe, VFP_ENTRY_MAGIC); - CAST_OBJ_NOTNULL(r2, vfe->priv1, H2_REQ_MAGIC); - CHECK_OBJ_NOTNULL(r2->req, REQ_MAGIC); - h2 = r2->h2sess; - - if (vc->failed) { - CHECK_OBJ_NOTNULL(r2->req->wrk, WORKER_MAGIC); - H2_Send_Get(r2->req->wrk, h2, r2); - H2_Send_RST(r2->req->wrk, h2, r2, r2->stream, - H2SE_REFUSED_STREAM); - H2_Send_Rel(h2, r2); - Lck_Lock(&h2->sess->mtx); - r2->error = H2SE_REFUSED_STREAM; - Lck_Unlock(&h2->sess->mtx); - } - - if (r2->state >= H2_S_CLOS_REM && r2->rxbuf != NULL) { - Lck_Lock(&h2->sess->mtx); - CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC); - if (r2->rxbuf != NULL) { - stvbuf = r2->rxbuf->stvbuf; - r2->rxbuf = NULL; - } - Lck_Unlock(&h2->sess->mtx); - if (stvbuf != NULL) { - STV_FreeBuf(vc->wrk, &stvbuf); - AZ(stvbuf); - } - } -} - -static const struct vfp h2_body = { - .name = "H2_BODY", - .pull = h2_vfp_body, - .fini = h2_vfp_body_fini -}; - -void v_matchproto_(vtr_req_body_t) -h2_req_body(struct req *req) -{ - struct h2_req *r2; - struct vfp_entry *vfe; - - CHECK_OBJ(req, REQ_MAGIC); - CAST_OBJ_NOTNULL(r2, req->transport_priv, H2_REQ_MAGIC); - vfe = VFP_Push(req->vfc, &h2_body); - AN(vfe); - vfe->priv1 = r2; + return (h2_reqbody_data(wrk, h2, r2)); } /**********************************************************************/ diff --git a/bin/varnishd/http2/cache_http2_reqbody.c b/bin/varnishd/http2/cache_http2_reqbody.c new file mode 100644 index 0000000000..2a4c923189 --- /dev/null +++ b/bin/varnishd/http2/cache_http2_reqbody.c @@ -0,0 +1,438 @@ +/*- + * Copyright (c) 2016-2025 Varnish Software AS + * All rights reserved. + * + * Author: Poul-Henning Kamp + * Author: Martin Blix Grydeland + * + * SPDX-License-Identifier: BSD-2-Clause + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include "config.h" + +#include +#include + +#include "cache/cache_varnishd.h" + +#include "cache/cache_transport.h" +#include "cache/cache_filter.h" +#include "http2/cache_http2.h" +#include "storage/storage.h" + +#include "vtim.h" +#include "vend.h" + +h2_error +h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) +{ + char buf[4]; + ssize_t l; + uint64_t l2, head; + const uint8_t *src; + unsigned len; + + CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); + CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); + CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); + + ASSERT_RXTHR(h2); + + Lck_Lock(&h2->sess->mtx); + CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC); + + if (h2->error != NULL || r2->error != NULL) { + if (r2->cond) + PTOK(pthread_cond_signal(r2->cond)); + Lck_Unlock(&h2->sess->mtx); + return (h2->error != NULL ? h2->error : r2->error); + } + + /* Check padding if present */ + src = h2->rxf_data; + len = h2->rxf_len; + if (h2->rxf_flags & H2FF_PADDED) { + if (*src >= len) { + VSLb(h2->vsl, SLT_SessError, + "H2: stream %u: Padding larger than frame length", + h2->rxf_stream); + r2->error = H2CE_PROTOCOL_ERROR; + if (r2->cond) + PTOK(pthread_cond_signal(r2->cond)); + Lck_Unlock(&h2->sess->mtx); + return (H2CE_PROTOCOL_ERROR); + } + len -= 1 + *src; + src += 1; + } + + /* Check against the Content-Length header if given */ + if (r2->req->htc->content_length >= 0) { + if (r2->rxbuf) + l = r2->rxbuf->head; + else + l = 0; + l += len; + if (l > r2->req->htc->content_length || + ((h2->rxf_flags & H2FF_END_STREAM) && + l != r2->req->htc->content_length)) { + VSLb(h2->vsl, SLT_Debug, + "H2: stream %u: Received data and Content-Length" + " mismatch", h2->rxf_stream); + r2->error = H2SE_PROTOCOL_ERROR; + if (r2->cond) + PTOK(pthread_cond_signal(r2->cond)); + Lck_Unlock(&h2->sess->mtx); + return (H2SE_PROTOCOL_ERROR); + } + } + + /* Check and charge connection window. The entire frame including + * padding (h2->rxf_len) counts towards the window. */ + if (h2->rxf_len > h2->req0->r_window) { + VSLb(h2->vsl, SLT_SessError, + "H2: stream %u: Exceeded connection receive window", + h2->rxf_stream); + r2->error = H2CE_FLOW_CONTROL_ERROR; + if (r2->cond) + PTOK(pthread_cond_signal(r2->cond)); + Lck_Unlock(&h2->sess->mtx); + return (H2CE_FLOW_CONTROL_ERROR); + } + h2->req0->r_window -= h2->rxf_len; + if (h2->req0->r_window < cache_param->h2_rx_window_low_water) { + h2->req0->r_window += cache_param->h2_rx_window_increment; + vbe32enc(buf, cache_param->h2_rx_window_increment); + Lck_Unlock(&h2->sess->mtx); + H2_Send_Get(wrk, h2, h2->req0); + H2_Send_Frame(wrk, h2, H2_F_WINDOW_UPDATE, 0, 4, 0, buf); + H2_Send_Rel(h2, h2->req0); + Lck_Lock(&h2->sess->mtx); + } + + /* Check stream window. The entire frame including padding + * (h2->rxf_len) counts towards the window. */ + if (h2->rxf_len > r2->r_window) { + VSLb(h2->vsl, SLT_Debug, + "H2: stream %u: Exceeded stream receive window", + h2->rxf_stream); + r2->error = H2SE_FLOW_CONTROL_ERROR; + if (r2->cond) + PTOK(pthread_cond_signal(r2->cond)); + Lck_Unlock(&h2->sess->mtx); + return (H2SE_FLOW_CONTROL_ERROR); + } + + /* Handle zero size frame before starting to allocate buffers */ + if (len == 0) { + r2->r_window -= h2->rxf_len; + + /* Handle the specific corner case where the entire window + * has been exhausted using nothing but padding + * bytes. Since no bytes have been buffered, no bytes + * would be consumed by the request thread and no stream + * window updates sent. Unpaint ourselves from this corner + * by sending a stream window update here. */ + CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC); + if (r2->r_window == 0 && + (r2->rxbuf == NULL || r2->rxbuf->tail == r2->rxbuf->head)) { + if (r2->rxbuf) + l = r2->rxbuf->size; + else + l = h2->local_settings.initial_window_size; + r2->r_window += l; + Lck_Unlock(&h2->sess->mtx); + vbe32enc(buf, l); + H2_Send_Get(wrk, h2, h2->req0); + H2_Send_Frame(wrk, h2, H2_F_WINDOW_UPDATE, 0, 4, + r2->stream, buf); + H2_Send_Rel(h2, h2->req0); + Lck_Lock(&h2->sess->mtx); + } + + if (h2->rxf_flags & H2FF_END_STREAM) + r2->state = H2_S_CLOS_REM; + if (r2->cond) + PTOK(pthread_cond_signal(r2->cond)); + Lck_Unlock(&h2->sess->mtx); + return (0); + } + + /* Make the buffer on demand */ + if (r2->rxbuf == NULL) { + unsigned bufsize; + size_t bstest; + struct stv_buffer *stvbuf; + struct h2_rxbuf *rxbuf; + + Lck_Unlock(&h2->sess->mtx); + + bufsize = h2->local_settings.initial_window_size; + if (bufsize < r2->r_window) { + /* This will not happen because we do not have any + * mechanism to change the initial window size on + * a running session. But if we gain that ability, + * this future proofs it. */ + bufsize = r2->r_window; + } + assert(bufsize > 0); + if ((h2->rxf_flags & H2FF_END_STREAM) && + bufsize > len) + /* Cap the buffer size when we know this is the + * single data frame. */ + bufsize = len; + CHECK_OBJ_NOTNULL(stv_h2_rxbuf, STEVEDORE_MAGIC); + stvbuf = STV_AllocBuf(wrk, stv_h2_rxbuf, + bufsize + sizeof *rxbuf); + if (stvbuf == NULL) { + Lck_Lock(&h2->sess->mtx); + VSLb(h2->vsl, SLT_Debug, + "H2: stream %u: Failed to allocate request body" + " buffer", + h2->rxf_stream); + r2->error = H2SE_INTERNAL_ERROR; + if (r2->cond) + PTOK(pthread_cond_signal(r2->cond)); + Lck_Unlock(&h2->sess->mtx); + return (H2SE_INTERNAL_ERROR); + } + rxbuf = STV_GetBufPtr(stvbuf, &bstest); + AN(rxbuf); + assert(bstest >= bufsize + sizeof *rxbuf); + assert(PAOK(rxbuf)); + INIT_OBJ(rxbuf, H2_RXBUF_MAGIC); + rxbuf->size = bufsize; + rxbuf->stvbuf = stvbuf; + + r2->rxbuf = rxbuf; + + Lck_Lock(&h2->sess->mtx); + } + + CHECK_OBJ_NOTNULL(r2->rxbuf, H2_RXBUF_MAGIC); + assert(r2->rxbuf->tail <= r2->rxbuf->head); + l = r2->rxbuf->head - r2->rxbuf->tail; + assert(l <= r2->rxbuf->size); + l = r2->rxbuf->size - l; + assert(len <= l); /* Stream window handling ensures this */ + + Lck_Unlock(&h2->sess->mtx); + + l = len; + head = r2->rxbuf->head; + do { + l2 = l; + if ((head % r2->rxbuf->size) + l2 > r2->rxbuf->size) + l2 = r2->rxbuf->size - (head % r2->rxbuf->size); + assert(l2 > 0); + memcpy(&r2->rxbuf->data[head % r2->rxbuf->size], src, l2); + src += l2; + head += l2; + l -= l2; + } while (l > 0); + + Lck_Lock(&h2->sess->mtx); + + /* Charge stream window. The entire frame including padding + * (h2->rxf_len) counts towards the window. The used padding + * bytes will be included in the next connection window update + * sent when the buffer bytes are consumed because that is + * calculated against the available buffer space. */ + r2->r_window -= h2->rxf_len; + r2->rxbuf->head += len; + assert(r2->rxbuf->tail <= r2->rxbuf->head); + if (h2->rxf_flags & H2FF_END_STREAM) + r2->state = H2_S_CLOS_REM; + if (r2->cond) + PTOK(pthread_cond_signal(r2->cond)); + Lck_Unlock(&h2->sess->mtx); + + return (0); +} + +static enum vfp_status v_matchproto_(vfp_pull_f) +h2_vfp_body(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr, ssize_t *lp) +{ + struct h2_req *r2; + struct h2_sess *h2; + enum vfp_status retval; + ssize_t l, l2; + uint64_t tail; + uint8_t *dst; + char buf[4]; + int i; + + CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); + CHECK_OBJ_NOTNULL(vfe, VFP_ENTRY_MAGIC); + CAST_OBJ_NOTNULL(r2, vfe->priv1, H2_REQ_MAGIC); + h2 = r2->h2sess; + + AN(ptr); + AN(lp); + assert(*lp >= 0); + + Lck_Lock(&h2->sess->mtx); + + r2->cond = &vc->wrk->cond; + while (1) { + CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC); + if (r2->rxbuf) { + assert(r2->rxbuf->tail <= r2->rxbuf->head); + l = r2->rxbuf->head - r2->rxbuf->tail; + } else + l = 0; + + if (h2->error != NULL || r2->error != NULL) + retval = VFP_ERROR; + else if (r2->state >= H2_S_CLOS_REM && l <= *lp) + retval = VFP_END; + else { + if (l > *lp) + l = *lp; + retval = VFP_OK; + } + + if (retval != VFP_OK || l > 0) + break; + + i = Lck_CondWaitTimeout(r2->cond, &h2->sess->mtx, + SESS_TMO(h2->sess, timeout_idle)); + if (i == ETIMEDOUT) { + retval = VFP_ERROR; + break; + } + } + r2->cond = NULL; + + Lck_Unlock(&h2->sess->mtx); + + if (l == 0 || retval == VFP_ERROR) { + *lp = 0; + return (retval); + } + + *lp = l; + dst = ptr; + tail = r2->rxbuf->tail; + do { + l2 = l; + if ((tail % r2->rxbuf->size) + l2 > r2->rxbuf->size) + l2 = r2->rxbuf->size - (tail % r2->rxbuf->size); + assert(l2 > 0); + memcpy(dst, &r2->rxbuf->data[tail % r2->rxbuf->size], l2); + dst += l2; + tail += l2; + l -= l2; + } while (l > 0); + + Lck_Lock(&h2->sess->mtx); + + CHECK_OBJ_NOTNULL(r2->rxbuf, H2_RXBUF_MAGIC); + r2->rxbuf->tail = tail; + assert(r2->rxbuf->tail <= r2->rxbuf->head); + + if (r2->r_window < cache_param->h2_rx_window_low_water && + r2->state < H2_S_CLOS_REM) { + /* l is free buffer space */ + /* l2 is calculated window increment */ + l = r2->rxbuf->size - (r2->rxbuf->head - r2->rxbuf->tail); + assert(r2->r_window <= l); + l2 = cache_param->h2_rx_window_increment; + if (r2->r_window + l2 > l) + l2 = l - r2->r_window; + r2->r_window += l2; + } else + l2 = 0; + + Lck_Unlock(&h2->sess->mtx); + + if (l2 > 0) { + vbe32enc(buf, l2); + H2_Send_Get(vc->wrk, h2, r2); + H2_Send_Frame(vc->wrk, h2, H2_F_WINDOW_UPDATE, 0, 4, + r2->stream, buf); + H2_Send_Rel(h2, r2); + } + + return (retval); +} + +static void +h2_vfp_body_fini(struct vfp_ctx *vc, struct vfp_entry *vfe) +{ + struct h2_req *r2; + struct h2_sess *h2; + struct stv_buffer *stvbuf = NULL; + + CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); + CHECK_OBJ_NOTNULL(vfe, VFP_ENTRY_MAGIC); + CAST_OBJ_NOTNULL(r2, vfe->priv1, H2_REQ_MAGIC); + CHECK_OBJ_NOTNULL(r2->req, REQ_MAGIC); + h2 = r2->h2sess; + + if (vc->failed) { + CHECK_OBJ_NOTNULL(r2->req->wrk, WORKER_MAGIC); + H2_Send_Get(r2->req->wrk, h2, r2); + H2_Send_RST(r2->req->wrk, h2, r2, r2->stream, + H2SE_REFUSED_STREAM); + H2_Send_Rel(h2, r2); + Lck_Lock(&h2->sess->mtx); + r2->error = H2SE_REFUSED_STREAM; + Lck_Unlock(&h2->sess->mtx); + } + + if (r2->state >= H2_S_CLOS_REM && r2->rxbuf != NULL) { + Lck_Lock(&h2->sess->mtx); + CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC); + if (r2->rxbuf != NULL) { + stvbuf = r2->rxbuf->stvbuf; + r2->rxbuf = NULL; + } + Lck_Unlock(&h2->sess->mtx); + if (stvbuf != NULL) { + STV_FreeBuf(vc->wrk, &stvbuf); + AZ(stvbuf); + } + } +} + +static const struct vfp h2_body = { + .name = "H2_BODY", + .pull = h2_vfp_body, + .fini = h2_vfp_body_fini +}; + +void v_matchproto_(vtr_req_body_t) +h2_reqbody(struct req *req) +{ + struct h2_req *r2; + struct vfp_entry *vfe; + + CHECK_OBJ(req, REQ_MAGIC); + CAST_OBJ_NOTNULL(r2, req->transport_priv, H2_REQ_MAGIC); + vfe = VFP_Push(req->vfc, &h2_body); + AN(vfe); + vfe->priv1 = r2; +} diff --git a/bin/varnishd/http2/cache_http2_session.c b/bin/varnishd/http2/cache_http2_session.c index ac8db261d2..4f47dac75a 100644 --- a/bin/varnishd/http2/cache_http2_session.c +++ b/bin/varnishd/http2/cache_http2_session.c @@ -545,7 +545,7 @@ struct transport HTTP2_transport = { .deliver = h2_deliver, .minimal_response = h2_minimal_response, .new_session = h2_new_session, - .req_body = h2_req_body, + .req_body = h2_reqbody, .req_fail = h2_req_fail, .sess_panic = h2_sess_panic, .poll = h2_poll, From 25eb677b3ddc7fd866d49d45abbae1380b7eb422 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Thu, 17 Apr 2025 14:08:06 +0200 Subject: [PATCH 25/39] http2: Rename h2_req::[rt]_window fields The t_window field in particular could be confused with the naming convention for timestamps. Using rx_ and tx_ prefixes removes the ambiguity. --- bin/varnishd/http2/cache_http2.h | 5 ++-- bin/varnishd/http2/cache_http2_panic.c | 4 +-- bin/varnishd/http2/cache_http2_proto.c | 10 ++++---- bin/varnishd/http2/cache_http2_reqbody.c | 32 ++++++++++++------------ bin/varnishd/http2/cache_http2_send.c | 16 ++++++------ 5 files changed, 34 insertions(+), 33 deletions(-) diff --git a/bin/varnishd/http2/cache_http2.h b/bin/varnishd/http2/cache_http2.h index 97025b9471..81e1ce025b 100644 --- a/bin/varnishd/http2/cache_http2.h +++ b/bin/varnishd/http2/cache_http2.h @@ -144,8 +144,9 @@ struct h2_req { double t_winupd; pthread_cond_t *cond; VTAILQ_ENTRY(h2_req) list; - int64_t t_window; - int64_t r_window; + + int64_t tx_window; + int64_t rx_window; /* Where to wake this stream up */ struct worker *wrk; diff --git a/bin/varnishd/http2/cache_http2_panic.c b/bin/varnishd/http2/cache_http2_panic.c index 227097e6b4..d6cda3959a 100644 --- a/bin/varnishd/http2/cache_http2_panic.c +++ b/bin/varnishd/http2/cache_http2_panic.c @@ -109,8 +109,8 @@ h2_sess_panic(struct vsb *vsb, const struct sess *sp) r2->h2sess, r2->scheduled, h2_panic_error(r2->error)); VSB_printf(vsb, "t_send = %f, t_winupd = %f,\n", r2->t_send, r2->t_winupd); - VSB_printf(vsb, "t_window = %jd, r_window = %jd,\n", - (intmax_t)r2->t_window, (intmax_t)r2->r_window); + VSB_printf(vsb, "tx_window = %jd, rx_window = %jd,\n", + (intmax_t)r2->tx_window, (intmax_t)r2->rx_window); if (!PAN_dump_struct(vsb, r2->rxbuf, H2_RXBUF_MAGIC, "rxbuf")) { VSB_printf(vsb, "stvbuf = %p,\n", r2->rxbuf->stvbuf); diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index a8bd6b98cc..654d63991b 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -166,8 +166,8 @@ h2_new_req(struct h2_sess *h2, unsigned stream, struct req **preq) r2->req = req; if (stream) r2->counted = 1; - r2->r_window = h2->local_settings.initial_window_size; - r2->t_window = h2->remote_settings.initial_window_size; + r2->rx_window = h2->local_settings.initial_window_size; + r2->tx_window = h2->remote_settings.initial_window_size; req->transport_priv = r2; Lck_Lock(&h2->sess->mtx); if (stream) @@ -456,13 +456,13 @@ h2_rx_window_update(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) if (r2 == NULL) return (0); Lck_Lock(&h2->sess->mtx); - r2->t_window += wu; + r2->tx_window += wu; if (r2 == h2->req0) PTOK(pthread_cond_broadcast(h2->winupd_cond)); else if (r2->cond != NULL) PTOK(pthread_cond_signal(r2->cond)); Lck_Unlock(&h2->sess->mtx); - if (r2->t_window >= (1LL << 31)) + if (r2->tx_window >= (1LL << 31)) return (H2SE_FLOW_CONTROL_ERROR); return (0); } @@ -529,7 +529,7 @@ h2_win_adjust(const struct h2_sess *h2, uint32_t oldval, uint32_t newval) * We allow a window to go negative, as per * rfc7540,l,2676,2680 */ - r2->t_window += (int64_t)newval - oldval; + r2->tx_window += (int64_t)newval - oldval; break; default: break; diff --git a/bin/varnishd/http2/cache_http2_reqbody.c b/bin/varnishd/http2/cache_http2_reqbody.c index 2a4c923189..85b1f39d8e 100644 --- a/bin/varnishd/http2/cache_http2_reqbody.c +++ b/bin/varnishd/http2/cache_http2_reqbody.c @@ -111,7 +111,7 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) /* Check and charge connection window. The entire frame including * padding (h2->rxf_len) counts towards the window. */ - if (h2->rxf_len > h2->req0->r_window) { + if (h2->rxf_len > h2->req0->rx_window) { VSLb(h2->vsl, SLT_SessError, "H2: stream %u: Exceeded connection receive window", h2->rxf_stream); @@ -121,9 +121,9 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) Lck_Unlock(&h2->sess->mtx); return (H2CE_FLOW_CONTROL_ERROR); } - h2->req0->r_window -= h2->rxf_len; - if (h2->req0->r_window < cache_param->h2_rx_window_low_water) { - h2->req0->r_window += cache_param->h2_rx_window_increment; + h2->req0->rx_window -= h2->rxf_len; + if (h2->req0->rx_window < cache_param->h2_rx_window_low_water) { + h2->req0->rx_window += cache_param->h2_rx_window_increment; vbe32enc(buf, cache_param->h2_rx_window_increment); Lck_Unlock(&h2->sess->mtx); H2_Send_Get(wrk, h2, h2->req0); @@ -134,7 +134,7 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) /* Check stream window. The entire frame including padding * (h2->rxf_len) counts towards the window. */ - if (h2->rxf_len > r2->r_window) { + if (h2->rxf_len > r2->rx_window) { VSLb(h2->vsl, SLT_Debug, "H2: stream %u: Exceeded stream receive window", h2->rxf_stream); @@ -147,7 +147,7 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) /* Handle zero size frame before starting to allocate buffers */ if (len == 0) { - r2->r_window -= h2->rxf_len; + r2->rx_window -= h2->rxf_len; /* Handle the specific corner case where the entire window * has been exhausted using nothing but padding @@ -156,13 +156,13 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) * window updates sent. Unpaint ourselves from this corner * by sending a stream window update here. */ CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC); - if (r2->r_window == 0 && + if (r2->rx_window == 0 && (r2->rxbuf == NULL || r2->rxbuf->tail == r2->rxbuf->head)) { if (r2->rxbuf) l = r2->rxbuf->size; else l = h2->local_settings.initial_window_size; - r2->r_window += l; + r2->rx_window += l; Lck_Unlock(&h2->sess->mtx); vbe32enc(buf, l); H2_Send_Get(wrk, h2, h2->req0); @@ -190,12 +190,12 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) Lck_Unlock(&h2->sess->mtx); bufsize = h2->local_settings.initial_window_size; - if (bufsize < r2->r_window) { + if (bufsize < r2->rx_window) { /* This will not happen because we do not have any * mechanism to change the initial window size on * a running session. But if we gain that ability, * this future proofs it. */ - bufsize = r2->r_window; + bufsize = r2->rx_window; } assert(bufsize > 0); if ((h2->rxf_flags & H2FF_END_STREAM) && @@ -260,7 +260,7 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) * bytes will be included in the next connection window update * sent when the buffer bytes are consumed because that is * calculated against the available buffer space. */ - r2->r_window -= h2->rxf_len; + r2->rx_window -= h2->rxf_len; r2->rxbuf->head += len; assert(r2->rxbuf->tail <= r2->rxbuf->head); if (h2->rxf_flags & H2FF_END_STREAM) @@ -353,16 +353,16 @@ h2_vfp_body(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr, ssize_t *lp) r2->rxbuf->tail = tail; assert(r2->rxbuf->tail <= r2->rxbuf->head); - if (r2->r_window < cache_param->h2_rx_window_low_water && + if (r2->rx_window < cache_param->h2_rx_window_low_water && r2->state < H2_S_CLOS_REM) { /* l is free buffer space */ /* l2 is calculated window increment */ l = r2->rxbuf->size - (r2->rxbuf->head - r2->rxbuf->tail); - assert(r2->r_window <= l); + assert(r2->rx_window <= l); l2 = cache_param->h2_rx_window_increment; - if (r2->r_window + l2 > l) - l2 = l - r2->r_window; - r2->r_window += l2; + if (r2->rx_window + l2 > l) + l2 = l - r2->rx_window; + r2->rx_window += l2; } else l2 = 0; diff --git a/bin/varnishd/http2/cache_http2_send.c b/bin/varnishd/http2/cache_http2_send.c index 584ad161df..e147cf5df5 100644 --- a/bin/varnishd/http2/cache_http2_send.c +++ b/bin/varnishd/http2/cache_http2_send.c @@ -252,7 +252,7 @@ h2_win_limit(const struct h2_req *r2, const struct h2_sess *h2) CHECK_OBJ_NOTNULL(h2->req0, H2_REQ_MAGIC); Lck_AssertHeld(&h2->sess->mtx); - return (vmin_t(int64_t, r2->t_window, h2->req0->t_window)); + return (vmin_t(int64_t, r2->tx_window, h2->req0->tx_window)); } static void @@ -263,8 +263,8 @@ h2_win_charge(struct h2_req *r2, const struct h2_sess *h2, uint32_t w) CHECK_OBJ_NOTNULL(h2->req0, H2_REQ_MAGIC); Lck_AssertHeld(&h2->sess->mtx); - r2->t_window -= w; - h2->req0->t_window -= w; + r2->tx_window -= w; + h2->req0->tx_window -= w; } static int64_t @@ -281,20 +281,20 @@ h2_do_window(struct worker *wrk, struct h2_req *r2, return (0); Lck_Lock(&h2->sess->mtx); - if (r2->t_window <= 0 || h2->req0->t_window <= 0) { + if (r2->tx_window <= 0 || h2->req0->tx_window <= 0) { r2->t_winupd = VTIM_real(); h2_send_rel_locked(h2, r2); assert(h2->winup_streams >= 0); h2->winup_streams++; - while (r2->t_window <= 0 && h2_errcheck(r2, h2) == NULL) { + while (r2->tx_window <= 0 && h2_errcheck(r2, h2) == NULL) { r2->cond = &wrk->cond; (void)h2_cond_wait(r2->cond, h2, r2); r2->cond = NULL; } - while (h2->req0->t_window <= 0 && h2_errcheck(r2, h2) == NULL) + while (h2->req0->tx_window <= 0 && h2_errcheck(r2, h2) == NULL) (void)h2_cond_wait(h2->winupd_cond, h2, r2); if (h2_errcheck(r2, h2) == NULL) { @@ -316,8 +316,8 @@ h2_do_window(struct worker *wrk, struct h2_req *r2, } if (w == 0 && h2_errcheck(r2, h2) == NULL) { - assert(r2->t_window > 0); - assert(h2->req0->t_window > 0); + assert(r2->tx_window > 0); + assert(h2->req0->tx_window > 0); w = h2_win_limit(r2, h2); if (w > wanted) w = wanted; From 01e82b764abb652ad59cfb4cae831893feb226a7 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Thu, 17 Apr 2025 14:17:17 +0200 Subject: [PATCH 26/39] http2: Rename ASSERT_RXTHR() to ASSERT_H2_SESS() This is not just any rx thread, and in the future the h2_sess thread will be in charge of sending frames too. While at it, properly check the h2 argument, properly compare threads, and add an h2_req counterpart since any thread working with h2 that is not the session thread, must be a request stream thread. --- bin/varnishd/http2/cache_http2.h | 12 +++++++- bin/varnishd/http2/cache_http2_proto.c | 39 ++++++++++++------------ bin/varnishd/http2/cache_http2_reqbody.c | 2 +- bin/varnishd/http2/cache_http2_send.c | 3 +- 4 files changed, 33 insertions(+), 23 deletions(-) diff --git a/bin/varnishd/http2/cache_http2.h b/bin/varnishd/http2/cache_http2.h index 81e1ce025b..9c5c8abe5e 100644 --- a/bin/varnishd/http2/cache_http2.h +++ b/bin/varnishd/http2/cache_http2.h @@ -214,7 +214,17 @@ struct h2_sess { vtim_real last_rst; }; -#define ASSERT_RXTHR(h2) do {assert(h2->rxthr == pthread_self());} while(0) +#define ASSERT_H2_SESS(h2) \ + do { \ + CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); \ + assert(pthread_equal(h2->rxthr, pthread_self())); \ + } while (0) + +#define ASSERT_H2_REQ(h2) \ + do { \ + CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); \ + assert(!pthread_equal(h2->rxthr, pthread_self())); \ + } while (0) /* http2/cache_http2_panic.c */ #ifdef TRANSPORT_MAGIC diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index 654d63991b..75ce5d6187 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -149,7 +149,7 @@ h2_new_req(struct h2_sess *h2, unsigned stream, struct req **preq) struct req *req; struct h2_req *r2; - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); if (preq != NULL) TAKE_OBJ_NOTNULL(req, preq, REQ_MAGIC); else { @@ -189,8 +189,7 @@ h2_del_req(struct worker *wrk, struct h2_req **pr2) TAKE_OBJ_NOTNULL(r2, pr2, H2_REQ_MAGIC); AZ(r2->scheduled); h2 = r2->h2sess; - CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); sp = h2->sess; Lck_Lock(&sp->mtx); assert(h2->refcnt > 0); @@ -223,7 +222,7 @@ h2_kill_req(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2, h2_error h2e) { - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); AN(h2e); Lck_Lock(&h2->sess->mtx); VSLb(h2->vsl, SLT_Debug, "KILL st=%u state=%d sched=%d", @@ -304,7 +303,7 @@ h2_rx_ping(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) { CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); assert(r2 == h2->req0); @@ -331,7 +330,7 @@ h2_rx_push_promise(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) { CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); CHECK_OBJ_ORNULL(r2, H2_REQ_MAGIC); // rfc7540,l,2262,2267 @@ -349,7 +348,7 @@ h2_rapid_reset(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) vtim_dur d; CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); if (h2->rapid_reset_limit == 0) @@ -382,7 +381,7 @@ h2_rx_rst_stream(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) h2_error h2e; CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); CHECK_OBJ_ORNULL(r2, H2_REQ_MAGIC); if (h2->rxf_len != 4) { // rfc7540,l,2003,2004 @@ -404,7 +403,7 @@ h2_rx_goaway(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) { CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); assert(r2 == h2->req0); @@ -420,7 +419,7 @@ h2_tx_goaway(struct worker *wrk, struct h2_sess *h2, h2_error h2e) { char b[8]; - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); AN(h2e); if (h2->goaway || !h2e->send_goaway) @@ -443,7 +442,7 @@ h2_rx_window_update(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) uint32_t wu; CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); CHECK_OBJ_ORNULL(r2, H2_REQ_MAGIC); if (h2->rxf_len != 4) { @@ -478,7 +477,7 @@ h2_rx_priority(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) { CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); CHECK_OBJ_ORNULL(r2, H2_REQ_MAGIC); return (0); } @@ -579,7 +578,7 @@ h2_rx_settings(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) h2_error retval = 0; CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); assert(r2 == h2->req0); AZ(h2->rxf_stream); @@ -649,7 +648,7 @@ h2_end_headers(struct worker *wrk, struct h2_sess *h2, h2_error h2e; ssize_t cl; - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); assert(r2->state == H2_S_OPEN); h2e = h2h_decode_hdr_fini(h2); h2->new_req = NULL; @@ -741,7 +740,7 @@ h2_rx_headers(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) size_t l; CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); if (r2 != NULL) { H2S_Lock_VSLb(h2, SLT_SessError, "H2: rx headers on non-idle stream"); @@ -835,7 +834,7 @@ h2_rx_continuation(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) h2_error h2e; CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); CHECK_OBJ_ORNULL(r2, H2_REQ_MAGIC); if (r2 == NULL || r2->state != H2_S_OPEN || r2->req != h2->new_req) { @@ -865,7 +864,7 @@ h2_rx_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) { CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); CHECK_OBJ_ORNULL(r2, H2_REQ_MAGIC); if (r2 == NULL) @@ -912,7 +911,7 @@ h2_procframe(struct worker *wrk, struct h2_sess *h2, h2_frame h2f) struct h2_req *r2; h2_error h2e; - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); if (h2->rxf_stream == 0 && h2f->act_szero != 0) { H2S_Lock_VSLb(h2, SLT_SessError, "H2: unexpected %s frame on stream 0", h2f->name); @@ -1024,7 +1023,7 @@ h2_sweep(struct worker *wrk, struct h2_sess *h2) h2_error h2e, tmo; vtim_real now; - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); h2e = h2->error; now = VTIM_real(); @@ -1122,7 +1121,7 @@ h2_rxframe(struct worker *wrk, struct h2_sess *h2) * into. */ AN(h2->htc->ws->r); - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); if (h2->goaway && h2->open_streams == 0) { /* We have not called HTC_RxStuff(), and thus not released diff --git a/bin/varnishd/http2/cache_http2_reqbody.c b/bin/varnishd/http2/cache_http2_reqbody.c index 85b1f39d8e..6870f89096 100644 --- a/bin/varnishd/http2/cache_http2_reqbody.c +++ b/bin/varnishd/http2/cache_http2_reqbody.c @@ -58,7 +58,7 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); Lck_Lock(&h2->sess->mtx); CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC); diff --git a/bin/varnishd/http2/cache_http2_send.c b/bin/varnishd/http2/cache_http2_send.c index e147cf5df5..81bf6ad7c8 100644 --- a/bin/varnishd/http2/cache_http2_send.c +++ b/bin/varnishd/http2/cache_http2_send.c @@ -108,7 +108,8 @@ h2_send_get_locked(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) Lck_AssertHeld(&h2->sess->mtx); if (&wrk->cond == h2->cond) - ASSERT_RXTHR(h2); + ASSERT_H2_SESS(h2); + r2->wrk = wrk; VTAILQ_INSERT_TAIL(&h2->txqueue, r2, tx_list); while (!H2_SEND_HELD(h2, r2)) From 2610bc0b16d7a4a9dc9fc6b20627d458a13f9857 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Fri, 18 Apr 2025 16:33:25 +0200 Subject: [PATCH 27/39] http2_send: Move h2_errcheck() to cache_http2_proto.c And make it operate on the r2_req only, from which the h2_sess can be reached. --- bin/varnishd/http2/cache_http2.h | 1 + bin/varnishd/http2/cache_http2_proto.c | 14 +++++++++++++ bin/varnishd/http2/cache_http2_send.c | 29 +++++++------------------- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/bin/varnishd/http2/cache_http2.h b/bin/varnishd/http2/cache_http2.h index 9c5c8abe5e..0a92c3336b 100644 --- a/bin/varnishd/http2/cache_http2.h +++ b/bin/varnishd/http2/cache_http2.h @@ -275,6 +275,7 @@ void H2_Send(struct worker *, struct h2_req *, h2_frame type, uint8_t flags, uint32_t len, const void *, uint64_t *acct); /* cache_http2_proto.c */ +h2_error h2_errcheck(const struct h2_req *r2); struct h2_req * h2_new_req(struct h2_sess *, unsigned stream, struct req **); h2_error h2_stream_tmo(struct h2_sess *, const struct h2_req *, vtim_real); void h2_del_req(struct worker *, struct h2_req **); diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index 75ce5d6187..3470f153ce 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -141,6 +141,20 @@ h2_connectionerror(uint32_t u) return (H2NN_ERROR); } +h2_error +h2_errcheck(const struct h2_req *r2) +{ + CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); + CHECK_OBJ_NOTNULL(r2->h2sess, H2_SESS_MAGIC); + + if (r2->error != NULL) + return (r2->error); + if (r2->h2sess->error != NULL && + r2->stream > r2->h2sess->goaway_last_stream) + return (r2->h2sess->error); + return (NULL); +} + /**********************************************************************/ struct h2_req * diff --git a/bin/varnishd/http2/cache_http2_send.c b/bin/varnishd/http2/cache_http2_send.c index 81bf6ad7c8..0f284cd244 100644 --- a/bin/varnishd/http2/cache_http2_send.c +++ b/bin/varnishd/http2/cache_http2_send.c @@ -43,19 +43,6 @@ #define H2_SEND_HELD(h2, r2) (VTAILQ_FIRST(&(h2)->txqueue) == (r2)) -static h2_error -h2_errcheck(const struct h2_req *r2, const struct h2_sess *h2) -{ - CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); - CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); - - if (r2->error != NULL) - return (r2->error); - if (h2->error != NULL && r2->stream > h2->goaway_last_stream) - return (h2->error); - return (NULL); -} - static int h2_cond_wait(pthread_cond_t *cond, struct h2_sess *h2, struct h2_req *r2) { @@ -289,16 +276,16 @@ h2_do_window(struct worker *wrk, struct h2_req *r2, assert(h2->winup_streams >= 0); h2->winup_streams++; - while (r2->tx_window <= 0 && h2_errcheck(r2, h2) == NULL) { + while (r2->tx_window <= 0 && h2_errcheck(r2) == NULL) { r2->cond = &wrk->cond; (void)h2_cond_wait(r2->cond, h2, r2); r2->cond = NULL; } - while (h2->req0->tx_window <= 0 && h2_errcheck(r2, h2) == NULL) + while (h2->req0->tx_window <= 0 && h2_errcheck(r2) == NULL) (void)h2_cond_wait(h2->winupd_cond, h2, r2); - if (h2_errcheck(r2, h2) == NULL) { + if (h2_errcheck(r2) == NULL) { w = vmin_t(int64_t, h2_win_limit(r2, h2), wanted); h2_win_charge(r2, h2, w); assert (w > 0); @@ -316,7 +303,7 @@ h2_do_window(struct worker *wrk, struct h2_req *r2, h2_send_get_locked(wrk, h2, r2); } - if (w == 0 && h2_errcheck(r2, h2) == NULL) { + if (w == 0 && h2_errcheck(r2) == NULL) { assert(r2->tx_window > 0); assert(h2->req0->tx_window > 0); w = h2_win_limit(r2, h2); @@ -353,7 +340,7 @@ h2_send(struct worker *wrk, struct h2_req *r2, h2_frame ftyp, uint8_t flags, AN(H2_SEND_HELD(h2, r2)); - if (h2_errcheck(r2, h2) != NULL) + if (h2_errcheck(r2) != NULL) return; AN(ftyp); @@ -378,7 +365,7 @@ h2_send(struct worker *wrk, struct h2_req *r2, h2_frame ftyp, uint8_t flags, if (ftyp->respect_window) { tf = h2_do_window(wrk, r2, h2, (len > mfs) ? mfs : len); - if (h2_errcheck(r2, h2) != NULL) + if (h2_errcheck(r2) != NULL) return; AN(H2_SEND_HELD(h2, r2)); } else @@ -399,7 +386,7 @@ h2_send(struct worker *wrk, struct h2_req *r2, h2_frame ftyp, uint8_t flags, if (ftyp->respect_window && p != ptr) { tf = h2_do_window(wrk, r2, h2, (len > mfs) ? mfs : len); - if (h2_errcheck(r2, h2) != NULL) + if (h2_errcheck(r2) != NULL) return; AN(H2_SEND_HELD(h2, r2)); } @@ -453,7 +440,7 @@ H2_Send(struct worker *wrk, struct h2_req *r2, h2_frame ftyp, uint8_t flags, h2_send(wrk, r2, ftyp, flags, len, ptr, counter); - h2e = h2_errcheck(r2, r2->h2sess); + h2e = h2_errcheck(r2); if (H2_ERROR_MATCH(h2e, H2SE_CANCEL)) H2_Send_RST(wrk, r2->h2sess, r2, r2->stream, h2e); } From 319c043c06d503b8ee019e994072f10c94b0fff5 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Fri, 7 Mar 2025 14:30:17 +0100 Subject: [PATCH 28/39] http2_session: Associate an eventfd to each h2_sess This event FD will be used to signal from client threads to the session thread when sending data. --- bin/varnishd/http2/cache_http2.h | 3 +++ bin/varnishd/http2/cache_http2_session.c | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/bin/varnishd/http2/cache_http2.h b/bin/varnishd/http2/cache_http2.h index 0a92c3336b..cdce390e88 100644 --- a/bin/varnishd/http2/cache_http2.h +++ b/bin/varnishd/http2/cache_http2.h @@ -35,6 +35,7 @@ struct h2h_decode; struct h2_frame_s; #include "hpack/vhp.h" +#include "vefd.h" /**********************************************************************/ @@ -176,6 +177,8 @@ struct h2_sess { int bogosity; int do_sweep; + struct vefd efd[1]; + struct h2_req *req0; struct h2_req_s streams; diff --git a/bin/varnishd/http2/cache_http2_session.c b/bin/varnishd/http2/cache_http2_session.c index 4f47dac75a..5208c6e2cb 100644 --- a/bin/varnishd/http2/cache_http2_session.c +++ b/bin/varnishd/http2/cache_http2_session.c @@ -31,10 +31,9 @@ #include "config.h" -#include "cache/cache_varnishd.h" - #include +#include "cache/cache_varnishd.h" #include "cache/cache_transport.h" #include "http2/cache_http2.h" @@ -152,6 +151,7 @@ h2_init_sess(struct sess *sp, struct h2_sess *h2s, struct req **psrq, h2_local_settings(&h2->local_settings); h2->remote_settings = H2_proto_settings; h2->decode = decode; + VEFD_INIT(h2->efd); h2->rapid_reset = cache_param->h2_rapid_reset; h2->rapid_reset_limit = cache_param->h2_rapid_reset_limit; @@ -181,6 +181,8 @@ h2_del_sess(struct worker *wrk, struct h2_sess *h2, stream_close_t reason) VHT_Fini(h2->dectbl); PTOK(pthread_cond_destroy(h2->winupd_cond)); + if (h2->efd->poll_fd >= 0) + VEFD_Close(h2->efd); TAKE_OBJ_NOTNULL(req, &h2->srq, REQ_MAGIC); assert(!WS_IsReserved(req->ws)); sp = h2->sess; @@ -414,6 +416,17 @@ h2_new_session(struct worker *wrk, void *arg) AZ(h2->htc->priv); h2->htc->priv = h2; + /* Set up the eventfd for communication with request handling + * threads. */ + if (VEFD_Open(h2->efd) < 0) { + VSLb(h2->vsl, SLT_Error, "H2: Failed to create eventfd"); + assert(h2->refcnt == 1); + h2_del_req(wrk, &h2->req0); + h2_del_sess(wrk, h2, SC_OVERLOAD); + wrk->vsl = NULL; + return; + } + AZ(wrk->vsl); wrk->vsl = h2->vsl; From 8373853c3f48ce179ad8616deb3af9fee21dd582 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Tue, 25 Mar 2025 11:21:00 +0100 Subject: [PATCH 29/39] http2: Rework HTTP/2 to be using non-blocking sockets This large commit switches H2 sockets to be using non-blocking sockets instead of blocking operations. It also moves all IO handling into the session thread (where previously write IO was performed both by the session thread and request threads, using a mutex to give exclusive access to the writing). The non-blocking IO is handled as reentrant functions that will write what they can when called. A poll() is used in order to monitor the socket state and notice when further progress can be made. Request threads will queue their payload writes for to the session thread, which will pick them up and execute both the framing and the writes. The request thread is then signalled when the payload has been transmitted. Because the change to non-blocking on the socket throws the rug from beneath most of the core H2 functionality, this is one large commit that changes everything in one go. A send buffer for control frames is allocated at the beginning of the H2 workspace for stream 0. HTC Pipe-lining can no longer afford a complete rollback, and was changed to figure how to reset or roll back the HTC workspace at HTC_RxInit() time, or leave it alone. The HTC owner is in charge of providing a workspace snapshot when needed. --- bin/varnishd/cache/cache_session.c | 9 +- bin/varnishd/cache/cache_varnishd.h | 5 +- bin/varnishd/cache/cache_ws.c | 6 +- bin/varnishd/cache/cache_ws_common.c | 2 + bin/varnishd/cache/cache_ws_emu.c | 6 +- bin/varnishd/http1/cache_http1_fsm.c | 1 + bin/varnishd/http2/cache_http2.h | 89 +-- bin/varnishd/http2/cache_http2_deliver.c | 56 +- bin/varnishd/http2/cache_http2_hpack.c | 58 +- bin/varnishd/http2/cache_http2_panic.c | 12 +- bin/varnishd/http2/cache_http2_proto.c | 878 ++++++++++++++--------- bin/varnishd/http2/cache_http2_reqbody.c | 207 +++--- bin/varnishd/http2/cache_http2_send.c | 768 ++++++++++++-------- bin/varnishd/http2/cache_http2_session.c | 130 +--- bin/varnishtest/tests/f00007.vtc | 1 + bin/varnishtest/tests/r02679.vtc | 2 + bin/varnishtest/tests/r02937.vtc | 9 +- bin/varnishtest/tests/t02003.vtc | 84 ++- bin/varnishtest/tests/t02005.vtc | 2 +- bin/varnishtest/tests/t02008.vtc | 2 + bin/varnishtest/tests/t02015.vtc | 4 +- bin/varnishtest/tests/t02016.vtc | 2 + bin/varnishtest/tests/t02020.vtc | 34 +- bin/varnishtest/tests/t02027.vtc | 12 +- include/tbl/h2_error.h | 18 + 25 files changed, 1357 insertions(+), 1040 deletions(-) diff --git a/bin/varnishd/cache/cache_session.c b/bin/varnishd/cache/cache_session.c index c8a23948d7..c0856b400b 100644 --- a/bin/varnishd/cache/cache_session.c +++ b/bin/varnishd/cache/cache_session.c @@ -258,18 +258,13 @@ HTC_Status(enum htc_status_e e, const char **name, const char **desc) void HTC_RxInit(struct http_conn *htc, struct ws *ws) { - unsigned rollback; int l; CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC); htc->ws = ws; - /* NB: HTTP/1 keep-alive triggers a rollback, so does the first - * request of a session or an h2 request where the rollback is a - * no-op in terms of workspace usage. - */ - rollback = !strcasecmp(ws->id, "req") && htc->body_status == NULL; - l = WS_Pipeline(htc->ws, htc->pipeline_b, htc->pipeline_e, rollback); + l = WS_Pipeline(htc->ws, htc->pipeline_b, htc->pipeline_e, + htc->pipeline_snap); xxxassert(l >= 0); htc->rxbuf_b = WS_Reservation(ws); diff --git a/bin/varnishd/cache/cache_varnishd.h b/bin/varnishd/cache/cache_varnishd.h index 0bb2e9ff24..9eee2580af 100644 --- a/bin/varnishd/cache/cache_varnishd.h +++ b/bin/varnishd/cache/cache_varnishd.h @@ -104,6 +104,7 @@ struct http_conn { char *rxbuf_e; char *pipeline_b; char *pipeline_e; + uintptr_t pipeline_snap; ssize_t content_length; void *priv; @@ -569,7 +570,9 @@ WS_IsReserved(const struct ws *ws) void *WS_AtOffset(const struct ws *ws, unsigned off, unsigned len); unsigned WS_ReservationOffset(const struct ws *ws); -int WS_Pipeline(struct ws *, const void *b, const void *e, unsigned rollback); + +extern uintptr_t const ws_pipeline_rollback; +int WS_Pipeline(struct ws *, const void *b, const void *e, uintptr_t snap); /* cache_ws_common.c */ void WS_Id(const struct ws *ws, char *id); diff --git a/bin/varnishd/cache/cache_ws.c b/bin/varnishd/cache/cache_ws.c index 3f2cc5309c..7895e6161d 100644 --- a/bin/varnishd/cache/cache_ws.c +++ b/bin/varnishd/cache/cache_ws.c @@ -136,14 +136,16 @@ WS_Reset(struct ws *ws, uintptr_t pp) */ int -WS_Pipeline(struct ws *ws, const void *b, const void *e, unsigned rollback) +WS_Pipeline(struct ws *ws, const void *b, const void *e, uintptr_t snap) { unsigned r, l; WS_Assert(ws); - if (rollback) + if (snap == ws_pipeline_rollback) WS_Rollback(ws, 0); + else if (snap != 0) + WS_Rollback(ws, snap); r = WS_ReserveAll(ws); diff --git a/bin/varnishd/cache/cache_ws_common.c b/bin/varnishd/cache/cache_ws_common.c index a23cd06af6..bcd28b1e7a 100644 --- a/bin/varnishd/cache/cache_ws_common.c +++ b/bin/varnishd/cache/cache_ws_common.c @@ -37,6 +37,8 @@ #include "cache_varnishd.h" +uintptr_t const ws_pipeline_rollback = (uintptr_t)&ws_pipeline_rollback; + void WS_Id(const struct ws *ws, char *id) { diff --git a/bin/varnishd/cache/cache_ws_emu.c b/bin/varnishd/cache/cache_ws_emu.c index 767839d1e2..c6a1393197 100644 --- a/bin/varnishd/cache/cache_ws_emu.c +++ b/bin/varnishd/cache/cache_ws_emu.c @@ -222,7 +222,7 @@ WS_Reset(struct ws *ws, uintptr_t pp) } int -WS_Pipeline(struct ws *ws, const void *b, const void *e, unsigned rollback) +WS_Pipeline(struct ws *ws, const void *b, const void *e, uintptr_t snap) { void *tmp; unsigned r, l; @@ -248,8 +248,10 @@ WS_Pipeline(struct ws *ws, const void *b, const void *e, unsigned rollback) tmp = NULL; } - if (rollback) + if (snap == ws_pipeline_rollback) WS_Rollback(ws, 0); + else if (snap != 0) + WS_Rollback(ws, snap); r = WS_ReserveAll(ws); diff --git a/bin/varnishd/http1/cache_http1_fsm.c b/bin/varnishd/http1/cache_http1_fsm.c index ac1dc012cf..f755f5fc77 100644 --- a/bin/varnishd/http1/cache_http1_fsm.c +++ b/bin/varnishd/http1/cache_http1_fsm.c @@ -111,6 +111,7 @@ http1_new_session(struct worker *wrk, void *arg) sp = req->sp; CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); + req->htc->pipeline_snap = ws_pipeline_rollback; HTC_RxInit(req->htc, req->ws); sz = sizeof u; diff --git a/bin/varnishd/http2/cache_http2.h b/bin/varnishd/http2/cache_http2.h index cdce390e88..dc4921566a 100644 --- a/bin/varnishd/http2/cache_http2.h +++ b/bin/varnishd/http2/cache_http2.h @@ -37,6 +37,8 @@ struct h2_frame_s; #include "hpack/vhp.h" #include "vefd.h" +#define H2_TX_BUFSIZE 1024 + /**********************************************************************/ struct h2_error_s { @@ -141,45 +143,42 @@ struct h2_req { int counted; struct h2_sess *h2sess; struct req *req; - double t_send; - double t_winupd; - pthread_cond_t *cond; + vtim_real t_send; + vtim_real t_win_low; VTAILQ_ENTRY(h2_req) list; int64_t tx_window; int64_t rx_window; - /* Where to wake this stream up */ - struct worker *wrk; - struct h2_rxbuf *rxbuf; + struct h2_reqbody_waiter *reqbody_waiter; + h2_error async_error; - VTAILQ_ENTRY(h2_req) tx_list; h2_error error; }; VTAILQ_HEAD(h2_req_s, h2_req); +struct h2_send_large; +VTAILQ_HEAD(h2_send_large_s, h2_send_large); + struct h2_sess { unsigned magic; #define H2_SESS_MAGIC 0xa16f7e4b pthread_t rxthr; - pthread_cond_t *cond; - pthread_cond_t winupd_cond[1]; struct sess *sess; int refcnt; int open_streams; - int winup_streams; + int win_low_streams; uint32_t highest_stream; - int goaway; int bogosity; - int do_sweep; struct vefd efd[1]; - struct h2_req *req0; + int64_t tx_window; + int64_t rx_window; struct h2_req_s streams; @@ -190,6 +189,21 @@ struct h2_sess { struct h2h_decode *decode; struct vht_table dectbl[1]; + vtim_real deadline; + + struct iovec tx_vec[2]; /* Must be 2 wide */ + unsigned tx_nvec; + + uint8_t *tx_s_start; + uint8_t *tx_s_end; + uint8_t *tx_s_head; + uint8_t *tx_s_mark; + + struct h2_send_large_s tx_l_queue; + struct h2_send_large *tx_l_current; + uint8_t tx_l_hdrbuf[9]; + char tx_l_stuck; + unsigned rxf_len; unsigned rxf_type; unsigned rxf_flags; @@ -199,11 +213,8 @@ struct h2_sess { struct h2_settings remote_settings; struct h2_settings local_settings; - struct req *new_req; + struct h2_req *hpack_lock; vtim_real t1; // t_first for new_req - uint32_t goaway_last_stream; - - VTAILQ_HEAD(,h2_req) txqueue; h2_error error; @@ -249,7 +260,6 @@ struct h2h_decode { unsigned has_scheme:1; h2_error error; enum vhd_ret_e vhd_ret; - struct ws *ws; char *out; int64_t limit; size_t out_l; @@ -258,34 +268,34 @@ struct h2h_decode { struct vhd_decode vhd[1]; }; -void h2h_decode_hdr_init(const struct h2_sess *h2); -h2_error h2h_decode_hdr_fini(const struct h2_sess *h2); +void h2h_decode_hdr_init(struct h2_sess *h2, struct h2_req *); +h2_error h2h_decode_hdr_fini(struct h2_sess *h2); h2_error h2h_decode_bytes(struct h2_sess *h2, const uint8_t *ptr, size_t len); /* cache_http2_send.c */ -void H2_Send_Get(struct worker *, struct h2_sess *, struct h2_req *); -void H2_Send_Rel(struct h2_sess *, const struct h2_req *); - -void H2_Send_Frame(struct worker *, struct h2_sess *, - h2_frame type, uint8_t flags, uint32_t len, uint32_t stream, - const void *); - -void H2_Send_RST(struct worker *wrk, struct h2_sess *h2, - const struct h2_req *r2, uint32_t stream, h2_error h2e); - -void H2_Send(struct worker *, struct h2_req *, h2_frame type, uint8_t flags, - uint32_t len, const void *, uint64_t *acct); +int H2_Send_RST(struct h2_sess *h2, uint32_t stream, h2_error h2e); +int H2_Send_SETTINGS(struct h2_sess *h2, uint8_t flags, ssize_t len, + const uint8_t *buf); +int H2_Send_PING(struct h2_sess *h2, uint8_t flags, uint64_t data); +int H2_Send_GOAWAY(struct h2_sess *h2, uint32_t last_stream_id, h2_error h2e); +int H2_Send_WINDOW_UPDATE(struct h2_sess *h2, uint32_t stream, uint32_t incr); +int H2_Send(struct vsl_log *vsl, struct h2_req *r2, h2_frame ftyp, + uint8_t flags, uint32_t len, const void *ptr); +ssize_t H2_Send_TxStuff(struct h2_sess *h2); +int H2_Send_Something(struct h2_sess *h2); +int H2_Send_Pending(struct h2_sess *h2); +void H2_Send_Shutdown(struct h2_sess *h2); /* cache_http2_proto.c */ +const char *h2_framename(int frame); h2_error h2_errcheck(const struct h2_req *r2); +void h2_async_error(struct h2_req *r2, h2_error h2e); +void h2_attention(struct h2_sess *h2); +void h2_run(struct worker *wrk, struct h2_sess *h2); struct h2_req * h2_new_req(struct h2_sess *, unsigned stream, struct req **); -h2_error h2_stream_tmo(struct h2_sess *, const struct h2_req *, vtim_real); -void h2_del_req(struct worker *, struct h2_req **); -void h2_kill_req(struct worker *, struct h2_sess *, struct h2_req *, h2_error); -int h2_rxframe(struct worker *, struct h2_sess *); +void h2_kill_req(struct worker *, struct h2_sess *, struct h2_req **, h2_error); h2_error h2_set_setting(struct h2_sess *, const uint8_t *); -void h2_tx_goaway(struct worker *wrk, struct h2_sess *h2, h2_error h2e); task_func_t h2_do_req; #ifdef TRANSPORT_MAGIC vtr_req_fail_f h2_req_fail; @@ -294,7 +304,4 @@ vtr_req_fail_f h2_req_fail; /* cache_http2_reqbody.c */ h2_error h2_reqbody_data(struct worker *, struct h2_sess *, struct h2_req *); void h2_reqbody(struct req *); - -/* cache_http2_session.c */ -void -H2S_Lock_VSLb(const struct h2_sess *, enum VSL_tag_e, const char *, ...); +void h2_reqbody_kick(struct h2_req *r2); diff --git a/bin/varnishd/http2/cache_http2_deliver.c b/bin/varnishd/http2/cache_http2_deliver.c index c4a15355cf..a3bc571a5e 100644 --- a/bin/varnishd/http2/cache_http2_deliver.c +++ b/bin/varnishd/http2/cache_http2_deliver.c @@ -89,25 +89,27 @@ static int v_matchproto_(vdp_fini_f) h2_vdp_fini(struct vdp_ctx *vdc, void **priv) { struct h2_req *r2; + h2_error h2e = NULL; CHECK_OBJ_NOTNULL(vdc, VDP_CTX_MAGIC); CHECK_OBJ_NOTNULL(vdc->wrk, WORKER_MAGIC); TAKE_OBJ_NOTNULL(r2, priv, H2_REQ_MAGIC); - if (r2->error) - return (0); - if (vdc->retval < 0) { - r2->error = H2SE_INTERNAL_ERROR; /* XXX: proper error? */ - H2_Send_Get(vdc->wrk, r2->h2sess, r2); - H2_Send_RST(vdc->wrk, r2->h2sess, r2, r2->stream, r2->error); - H2_Send_Rel(r2->h2sess, r2); - return (0); + h2e = H2SE_INTERNAL_ERROR; + h2_async_error(r2, h2e); + } else + h2e = h2_errcheck(r2); + + if (h2e != NULL) + VSLb(vdc->vsl, SLT_Error, "H2: delivery error (%s)", h2e->name); + + if (h2e == NULL && r2->state < H2_S_CLOSED) { + /* Not all VDPs will always send VDP_END (e.g. ESI). End + * the stream here if necessary. */ + H2_Send(vdc->vsl, r2, H2_F_DATA, H2FF_END_STREAM, 0, NULL); } - H2_Send_Get(vdc->wrk, r2->h2sess, r2); - H2_Send(vdc->wrk, r2, H2_F_DATA, H2FF_END_STREAM, 0, "", NULL); - H2_Send_Rel(r2->h2sess, r2); return (0); } @@ -116,19 +118,27 @@ h2_vdp_bytes(struct vdp_ctx *vdc, enum vdp_action act, void **priv, const void *ptr, ssize_t len) { struct h2_req *r2; + uint8_t flags = H2FF_NONE; CHECK_OBJ_NOTNULL(vdc, VDP_CTX_MAGIC); CAST_OBJ_NOTNULL(r2, *priv, H2_REQ_MAGIC); - (void)act; + assert(len >= 0); - if ((r2->h2sess->error || r2->error)) + if (h2_errcheck(r2) != NULL) return (-1); - if (len == 0) + vdc->bytes_done = len; + if (len == 0) { + /* No reason to send an empty frame. There is code + * (notably ESI) that will pass len==0 without + * VDP_END. An incomplete delivery will result in + * the len==0 && VDP_END combo, deferring the final + * DATA frame to the h2_vdp_fini() call. */ return (0); - H2_Send_Get(vdc->wrk, r2->h2sess, r2); - vdc->bytes_done = 0; - H2_Send(vdc->wrk, r2, H2_F_DATA, H2FF_NONE, len, ptr, &vdc->bytes_done); - H2_Send_Rel(r2->h2sess, r2); + } + if (act == VDP_END) + flags |= H2FF_END_STREAM; + // XXX? return (H2_Send(...)); + H2_Send(vdc->vsl, r2, H2_F_DATA, flags, len, ptr); return (0); } @@ -193,10 +203,7 @@ h2_minimal_response(struct req *req, uint16_t status) flags = H2FF_END_HEADERS; if (status >= 200) flags |= H2FF_END_STREAM; - H2_Send_Get(req->wrk, r2->h2sess, r2); - H2_Send(req->wrk, r2, H2_F_HEADERS, flags, l, buf, NULL); - H2_Send_Rel(r2->h2sess, r2); - return (0); + return (H2_Send(req->vsl, r2, H2_F_HEADERS, flags, l, buf)); } static void @@ -336,10 +343,7 @@ h2_deliver(struct req *req, int sendbody) flags = H2FF_END_HEADERS; if (!sendbody) flags |= H2FF_END_STREAM; - H2_Send_Get(req->wrk, r2->h2sess, r2); - H2_Send(req->wrk, r2, H2_F_HEADERS, flags, sz, r, - &req->acct.resp_hdrbytes); - H2_Send_Rel(r2->h2sess, r2); + H2_Send(req->vsl, r2, H2_F_HEADERS, flags, sz, r); WS_Reset(req->ws, ss); diff --git a/bin/varnishd/http2/cache_http2_hpack.c b/bin/varnishd/http2/cache_http2_hpack.c index a90e6fde23..ae3709985e 100644 --- a/bin/varnishd/http2/cache_http2_hpack.c +++ b/bin/varnishd/http2/cache_http2_hpack.c @@ -260,25 +260,31 @@ h2h_addhdr(struct http *hp, struct h2h_decode *d) return (0); } -static void -h2h_decode_init(const struct h2_sess *h2, struct ws *ws) +void +h2h_decode_hdr_init(struct h2_sess *h2, struct h2_req *r2) { struct h2h_decode *d; CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); - CHECK_OBJ_NOTNULL(ws, WS_MAGIC); + CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); + CHECK_OBJ_NOTNULL(r2->req, REQ_MAGIC); + CHECK_OBJ_NOTNULL(r2->req->http, HTTP_MAGIC); + + AZ(h2->hpack_lock); + h2->hpack_lock = r2; AN(h2->decode); d = h2->decode; INIT_OBJ(d, H2H_DECODE_MAGIC); VHD_Init(d->vhd); - d->out_l = WS_ReserveSize(ws, cache_param->http_req_size); + d->out_l = WS_ReserveSize(h2->hpack_lock->req->http->ws, + cache_param->http_req_size); /* * Can't do any work without any buffer * space. Require non-zero size. */ XXXAN(d->out_l); - d->out = WS_Reservation(ws); + d->out = WS_Reservation(h2->hpack_lock->req->http->ws); if (cache_param->h2_max_header_list_size == 0) d->limit = @@ -288,18 +294,6 @@ h2h_decode_init(const struct h2_sess *h2, struct ws *ws) if (d->limit < h2->local_settings.max_header_list_size) d->limit = INT64_MAX; - - d->ws = ws; -} - -void -h2h_decode_hdr_init(const struct h2_sess *h2) -{ - - CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); - CHECK_OBJ_NOTNULL(h2->new_req, REQ_MAGIC); - CHECK_OBJ_NOTNULL(h2->new_req->http, HTTP_MAGIC); - h2h_decode_init(h2, h2->new_req->ws); } /* Possible error returns: @@ -311,32 +305,34 @@ h2h_decode_hdr_init(const struct h2_sess *h2) * is a stream level error. */ h2_error -h2h_decode_hdr_fini(const struct h2_sess *h2) +h2h_decode_hdr_fini(struct h2_sess *h2) { h2_error ret; struct h2h_decode *d; CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); d = h2->decode; - CHECK_OBJ_NOTNULL(h2->new_req, REQ_MAGIC); + CHECK_OBJ_NOTNULL(h2->hpack_lock, H2_REQ_MAGIC); CHECK_OBJ_NOTNULL(d, H2H_DECODE_MAGIC); - WS_ReleaseP(d->ws, d->out); + WS_ReleaseP(h2->hpack_lock->req->http->ws, d->out); if (d->vhd_ret != VHD_OK) { /* HPACK header block didn't finish at an instruction boundary */ - VSLb(h2->new_req->http->vsl, SLT_BogoHeader, + VSLb(h2->hpack_lock->req->http->vsl, SLT_BogoHeader, "HPACK compression error/fini (%s)", VHD_Error(d->vhd_ret)); ret = H2CE_COMPRESSION_ERROR; } else if (d->error == NULL && !d->has_scheme) { - H2S_Lock_VSLb(h2, SLT_Debug, "Missing :scheme"); + VSLb(h2->vsl, SLT_Debug, "Missing :scheme"); ret = H2SE_MISSING_SCHEME; //rfc7540,l,3087,3090 } else ret = d->error; FINI_OBJ(d); if (ret == H2SE_REQ_SIZE) { - VSLb(h2->new_req->http->vsl, SLT_LostHeader, + VSLb(h2->hpack_lock->req->http->vsl, SLT_LostHeader, "Header list too large"); } + h2->hpack_lock = NULL; + return (ret); } @@ -357,15 +353,15 @@ h2h_decode_bytes(struct h2_sess *h2, const uint8_t *in, size_t in_l) const char *r, *e; CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); - CHECK_OBJ_NOTNULL(h2->new_req, REQ_MAGIC); - hp = h2->new_req->http; + CHECK_OBJ_NOTNULL(h2->hpack_lock, H2_REQ_MAGIC); + hp = h2->hpack_lock->req->http; CHECK_OBJ_NOTNULL(hp, HTTP_MAGIC); d = h2->decode; CHECK_OBJ_NOTNULL(d, H2H_DECODE_MAGIC); - CHECK_OBJ_NOTNULL(d->ws, WS_MAGIC); - r = WS_Reservation(d->ws); + CHECK_OBJ_NOTNULL(h2->hpack_lock->req->http->ws, WS_MAGIC); + r = WS_Reservation(h2->hpack_lock->req->http->ws); AN(r); - e = r + WS_ReservationSize(d->ws); + e = r + WS_ReservationSize(h2->hpack_lock->req->http->ws); /* Only H2E_ENHANCE_YOUR_CALM indicates that we should continue processing. Other errors should have been returned and handled @@ -380,7 +376,7 @@ h2h_decode_bytes(struct h2_sess *h2, const uint8_t *in, size_t in_l) d->out, d->out_l, &d->out_u); if (d->vhd_ret < 0) { - H2S_Lock_VSLb(h2, SLT_BogoHeader, + VSLb(h2->vsl, SLT_BogoHeader, "HPACK compression error (%s)", VHD_Error(d->vhd_ret)); d->error = H2CE_COMPRESSION_ERROR; @@ -440,7 +436,7 @@ h2h_decode_bytes(struct h2_sess *h2, const uint8_t *in, size_t in_l) } if (H2_ERROR_MATCH(d->error, H2SE_ENHANCE_YOUR_CALM)) { - d->out = WS_Reservation(d->ws); + d->out = WS_Reservation(h2->hpack_lock->req->http->ws); d->out_l = e - d->out; d->limit -= d->out_u; d->out_u = 0; @@ -452,7 +448,7 @@ h2h_decode_bytes(struct h2_sess *h2, const uint8_t *in, size_t in_l) if (d->limit < 0) { /* Fatal error, the client exceeded both http_req_size * and h2_max_header_list_size. */ - H2S_Lock_VSLb(h2, SLT_SessError, "Header list too large"); + VSLb(h2->vsl, SLT_SessError, "Header list too large"); return (H2CE_ENHANCE_YOUR_CALM); } diff --git a/bin/varnishd/http2/cache_http2_panic.c b/bin/varnishd/http2/cache_http2_panic.c index d6cda3959a..6585485037 100644 --- a/bin/varnishd/http2/cache_http2_panic.c +++ b/bin/varnishd/http2/cache_http2_panic.c @@ -78,10 +78,10 @@ h2_sess_panic(struct vsb *vsb, const struct sess *sp) return; VSB_printf(vsb, "refcnt = %d, bogosity = %d, error = %s\n", h2->refcnt, h2->bogosity, h2_panic_error(h2->error)); - VSB_printf(vsb, - "open_streams = %d, highest_stream = %u," - " goaway_last_stream = %u,\n", - h2->open_streams, h2->highest_stream, h2->goaway_last_stream); + VSB_printf(vsb, "open_streams = %d, highest_stream = %u,\n", + h2->open_streams, h2->highest_stream); + VSB_printf(vsb, "tx_window = %jd, rx_window = %jd,\n", + h2->tx_window, h2->rx_window); VSB_cat(vsb, "local_settings = {"); h2_panic_settings(vsb, &h2->local_settings); VSB_cat(vsb, "},\n"); @@ -107,8 +107,8 @@ h2_sess_panic(struct vsb *vsb, const struct sess *sp) VSB_printf(vsb, "h2_sess = %p, scheduled = %d, error = %s,\n", r2->h2sess, r2->scheduled, h2_panic_error(r2->error)); - VSB_printf(vsb, "t_send = %f, t_winupd = %f,\n", - r2->t_send, r2->t_winupd); + VSB_printf(vsb, "t_send = %f, t_win_low = %f,\n", + r2->t_send, r2->t_win_low); VSB_printf(vsb, "tx_window = %jd, rx_window = %jd,\n", (intmax_t)r2->tx_window, (intmax_t)r2->rx_window); diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index 3470f153ce..80ae5b143e 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -31,11 +31,11 @@ #include "config.h" -#include "cache/cache_varnishd.h" - +#include #include #include +#include "cache/cache_varnishd.h" #include "cache/cache_transport.h" #include "cache/cache_filter.h" #include "http2/cache_http2.h" @@ -74,11 +74,11 @@ enum h2frame { #include "tbl/h2_frames.h" }; -static const char * -h2_framename(enum h2frame h2f) +const char * +h2_framename(int frame) { - switch (h2f) { + switch (frame) { #define H2_FRAME(l,u,t,f,...) case H2F_##u: return (#u); #include "tbl/h2_frames.h" default: @@ -149,10 +149,7 @@ h2_errcheck(const struct h2_req *r2) if (r2->error != NULL) return (r2->error); - if (r2->h2sess->error != NULL && - r2->stream > r2->h2sess->goaway_last_stream) - return (r2->h2sess->error); - return (NULL); + return (r2->h2sess->error); } /**********************************************************************/ @@ -178,13 +175,11 @@ h2_new_req(struct h2_sess *h2, unsigned stream, struct req **preq) r2->h2sess = h2; r2->stream = stream; r2->req = req; - if (stream) - r2->counted = 1; r2->rx_window = h2->local_settings.initial_window_size; r2->tx_window = h2->remote_settings.initial_window_size; req->transport_priv = r2; Lck_Lock(&h2->sess->mtx); - if (stream) + if (stream > 0) h2->open_streams++; VTAILQ_INSERT_TAIL(&h2->streams, r2, list); Lck_Unlock(&h2->sess->mtx); @@ -192,7 +187,7 @@ h2_new_req(struct h2_sess *h2, unsigned stream, struct req **preq) return (r2); } -void +static void h2_del_req(struct worker *wrk, struct h2_req **pr2) { struct h2_req *r2; @@ -205,15 +200,13 @@ h2_del_req(struct worker *wrk, struct h2_req **pr2) h2 = r2->h2sess; ASSERT_H2_SESS(h2); sp = h2->sess; - Lck_Lock(&sp->mtx); assert(h2->refcnt > 0); --h2->refcnt; /* XXX: PRIORITY reshuffle */ VTAILQ_REMOVE(&h2->streams, r2, list); - if (r2->req == h2->new_req) - h2->new_req = NULL; - Lck_Unlock(&sp->mtx); + assert(r2->t_win_low == 0.); + AZ(r2->reqbody_waiter); assert(!WS_IsReserved(r2->req->ws)); AZ(r2->req->ws->r); @@ -232,40 +225,73 @@ h2_del_req(struct worker *wrk, struct h2_req **pr2) } void -h2_kill_req(struct worker *wrk, struct h2_sess *h2, - struct h2_req *r2, h2_error h2e) +h2_kill_req(struct worker *wrk, struct h2_sess *h2, struct h2_req **pr2, + h2_error h2e) { + struct h2_req *r2; ASSERT_H2_SESS(h2); + TAKE_OBJ_NOTNULL(r2, pr2, H2_REQ_MAGIC); AN(h2e); - Lck_Lock(&h2->sess->mtx); - VSLb(h2->vsl, SLT_Debug, "KILL st=%u state=%d sched=%d", - r2->stream, r2->state, r2->scheduled); - if (r2->counted) { + + VSLb(h2->vsl, SLT_Debug, "KILL st=%u state=%d sched=%d error=%d", + r2->stream, r2->state, r2->scheduled, h2e->val); + + if (h2->error != NULL) { + /* The connection is in an error state. Don't send RST. */ + } else if (r2->error == NULL && r2->state < H2_S_CLOSED) { + /* Notify the peer only first time it is killed. */ + H2_Send_RST(h2, r2->stream, h2e); + } + + if (r2->error == NULL || H2_ERROR_MATCH(r2->error, H2SE_NO_ERROR)) { + /* We latch the first error set, except if it was a "no + * error". */ + r2->error = h2e; + } + + if (r2 == h2->hpack_lock) + (void)h2h_decode_hdr_fini(h2); + AZ(h2->hpack_lock); + + if (r2->t_win_low != 0.) { + assert(h2->win_low_streams > 0); + h2->win_low_streams--; + r2->t_win_low = 0.; + } + + if (r2->state < H2_S_CLOSED) { + r2->state = H2_S_CLOSED; assert(h2->open_streams > 0); h2->open_streams--; - r2->counted = 0; } - if (r2->error == NULL) - r2->error = h2e; + if (r2->scheduled) { - if (r2->cond != NULL) - PTOK(pthread_cond_signal(r2->cond)); - r2 = NULL; + Lck_Lock(&h2->sess->mtx); + h2_reqbody_kick(r2); Lck_Unlock(&h2->sess->mtx); } else { - Lck_Unlock(&h2->sess->mtx); - if (r2->state == H2_S_OPEN && h2->new_req == r2->req) - (void)h2h_decode_hdr_fini(h2); - } - if (r2 != NULL) h2_del_req(wrk, &r2); + } +} + +static void +h2_kill_all(struct worker *wrk, struct h2_sess *h2, h2_error h2e) +{ + struct h2_req *r2, *r22; + + CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); + AN(h2e); + VTAILQ_FOREACH_SAFE(r2, &h2->streams, list, r22) { + CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); + h2_kill_req(wrk, h2, &r2, h2e); + } } /**********************************************************************/ static void -h2_vsl_frame(const struct h2_sess *h2, const void *ptr, size_t len) +h2_rxframe_vsl(const struct h2_sess *h2, const void *ptr, size_t len) { const uint8_t *b; struct vsb *vsb; @@ -293,18 +319,12 @@ h2_vsl_frame(const struct h2_sess *h2, const void *ptr, size_t len) VSB_quote(vsb, b + 4, 1, VSB_QUOTE_HEX); VSB_putc(vsb, ' '); VSB_quote(vsb, b + 5, 4, VSB_QUOTE_HEX); - if (u > 0) { - VSB_putc(vsb, ' '); - VSB_quote(vsb, b + 9, len - 9, VSB_QUOTE_HEX); - } AZ(VSB_finish(vsb)); - Lck_Lock(&h2->sess->mtx); VSLb_bin(h2->vsl, SLT_H2RxHdr, 9, b); if (len > 9) VSLb_bin(h2->vsl, SLT_H2RxBody, len - 9, b + 9); VSLb(h2->vsl, SLT_Debug, "H2RXF %s", VSB_data(vsb)); - Lck_Unlock(&h2->sess->mtx); VSB_destroy(&vsb); } @@ -315,24 +335,24 @@ h2_vsl_frame(const struct h2_sess *h2, const void *ptr, size_t len) static h2_error v_matchproto_(h2_rxframe_f) h2_rx_ping(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) { + uint64_t val; CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); ASSERT_H2_SESS(h2); - CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); - assert(r2 == h2->req0); + AZ(r2); if (h2->rxf_len != 8) { // rfc7540,l,2364,2366 - H2S_Lock_VSLb(h2, SLT_SessError, "H2: rx ping with (len != 8)"); + VSLb(h2->vsl, SLT_SessError, "H2: rx ping with (len != 8)"); return (H2CE_FRAME_SIZE_ERROR); } AZ(h2->rxf_stream); // rfc7540,l,2359,2362 if (h2->rxf_flags != 0) { // We never send pings - H2S_Lock_VSLb(h2, SLT_SessError, "H2: rx ping ack"); + VSLb(h2->vsl, SLT_SessError, "H2: rx ping ack"); return (H2SE_PROTOCOL_ERROR); } - H2_Send_Get(wrk, h2, r2); - H2_Send_Frame(wrk, h2, H2_F_PING, H2FF_ACK, 8, 0, h2->rxf_data); - H2_Send_Rel(h2, r2); + _Static_assert(sizeof (val) == 8, ""); + memcpy(&val, h2->rxf_data, sizeof val); + H2_Send_PING(h2, H2FF_ACK, val); return (0); } @@ -348,7 +368,7 @@ h2_rx_push_promise(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) CHECK_OBJ_ORNULL(r2, H2_REQ_MAGIC); // rfc7540,l,2262,2267 - H2S_Lock_VSLb(h2, SLT_SessError, "H2: rx push promise"); + VSLb(h2->vsl, SLT_SessError, "H2: rx push promise"); return (H2CE_PROTOCOL_ERROR); } @@ -382,7 +402,7 @@ h2_rapid_reset(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) h2->last_rst = now; if (h2->rst_budget < 1.0) { - H2S_Lock_VSLb(h2, SLT_SessError, "H2: Hit RST limit. Closing session."); + VSLb(h2->vsl, SLT_SessError, "H2: Hit RST limit. Closing session."); return (H2CE_RAPID_RESET); } h2->rst_budget -= 1.0; @@ -392,21 +412,27 @@ h2_rapid_reset(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) static h2_error v_matchproto_(h2_rxframe_f) h2_rx_rst_stream(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) { - h2_error h2e; + h2_error rapid_fault; CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); ASSERT_H2_SESS(h2); CHECK_OBJ_ORNULL(r2, H2_REQ_MAGIC); if (h2->rxf_len != 4) { // rfc7540,l,2003,2004 - H2S_Lock_VSLb(h2, SLT_SessError, "H2: rx rst with (len != 4)"); + VSLb(h2->vsl, SLT_SessError, "H2: rx rst with (len != 4)"); return (H2CE_FRAME_SIZE_ERROR); } if (r2 == NULL) return (0); - h2e = h2_rapid_reset(wrk, h2, r2); - h2_kill_req(wrk, h2, r2, h2_streamerror(vbe32dec(h2->rxf_data))); - return (h2e); + + rapid_fault = h2_rapid_reset(wrk, h2, r2); + + /* We set `r2->error` prior to killing to prevent sending a RST in + * return. */ + r2->error = h2_streamerror(vbe32dec(h2->rxf_data)); + h2_kill_req(wrk, h2, &r2, r2->error); + + return (rapid_fault); } /********************************************************************** @@ -415,36 +441,22 @@ h2_rx_rst_stream(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) static h2_error v_matchproto_(h2_rxframe_f) h2_rx_goaway(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) { + h2_error h2e; CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); ASSERT_H2_SESS(h2); - CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); - assert(r2 == h2->req0); + AZ(r2); - h2->goaway = 1; - h2->goaway_last_stream = vbe32dec(h2->rxf_data); - h2->error = h2_connectionerror(vbe32dec(h2->rxf_data + 4)); - H2S_Lock_VSLb(h2, SLT_Debug, "GOAWAY %s", h2->error->name); - return (h2->error); -} - -void -h2_tx_goaway(struct worker *wrk, struct h2_sess *h2, h2_error h2e) -{ - char b[8]; - - ASSERT_H2_SESS(h2); + h2e = h2_connectionerror(vbe32dec(h2->rxf_data + 4)); AN(h2e); - if (h2->goaway || !h2e->send_goaway) - return; - - h2->goaway = 1; - vbe32enc(b, h2->highest_stream); - vbe32enc(b + 4, h2e->val); - H2_Send_Get(wrk, h2, h2->req0); - H2_Send_Frame(wrk, h2, H2_F_GOAWAY, 0, 8, 0, b); - H2_Send_Rel(h2, h2->req0); + VSLb(h2->vsl, SLT_Debug, "GOAWAY %s", h2e->name); /* XXX: Remove? */ + if (!H2_ERROR_MATCH(h2e, H2CE_NO_ERROR)) { + /* XXX: Should we log something (not SLT_Error) on a + * graceful shutdown? */ + VSLb(h2->vsl, SLT_Error, "H2: rx goaway %s", h2e->name); + } + return (H2CE_NO_ERROR); } /********************************************************************** @@ -460,23 +472,30 @@ h2_rx_window_update(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) CHECK_OBJ_ORNULL(r2, H2_REQ_MAGIC); if (h2->rxf_len != 4) { - H2S_Lock_VSLb(h2, SLT_SessError, "H2: rx winup with (len != 4)"); + VSLb(h2->vsl, SLT_SessError, "H2: rx winup with (len != 4)"); return (H2CE_FRAME_SIZE_ERROR); } wu = vbe32dec(h2->rxf_data) & ~(1LU<<31); - if (wu == 0) - return (H2SE_PROTOCOL_ERROR); - if (r2 == NULL) - return (0); - Lck_Lock(&h2->sess->mtx); - r2->tx_window += wu; - if (r2 == h2->req0) - PTOK(pthread_cond_broadcast(h2->winupd_cond)); - else if (r2->cond != NULL) - PTOK(pthread_cond_signal(r2->cond)); - Lck_Unlock(&h2->sess->mtx); - if (r2->tx_window >= (1LL << 31)) - return (H2SE_FLOW_CONTROL_ERROR); + if (r2 == NULL) { + if (wu == 0) + return (H2CE_PROTOCOL_ERROR); + h2->tx_window += wu; + if (h2->tx_window >= (1LL << 31)) + return (H2CE_FLOW_CONTROL_ERROR); + } else { + if (wu == 0) + return (H2SE_PROTOCOL_ERROR); + r2->tx_window += wu; + if (r2->tx_window >= (1LL << 31)) + return (H2SE_FLOW_CONTROL_ERROR); + if (r2->t_win_low != 0.) { + assert(h2->win_low_streams > 0); + h2->win_low_streams--; + r2->t_win_low = 0.; + } + } + /* Assume we are no longer stuck on output. */ + h2->tx_l_stuck = 0; return (0); } @@ -528,12 +547,9 @@ h2_win_adjust(const struct h2_sess *h2, uint32_t oldval, uint32_t newval) { struct h2_req *r2; - Lck_AssertHeld(&h2->sess->mtx); // rfc7540,l,2668,2674 VTAILQ_FOREACH(r2, &h2->streams, list) { CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); - if (r2 == h2->req0) - continue; // rfc7540,l,2699,2699 switch (r2->state) { case H2_S_IDLE: case H2_S_OPEN: @@ -561,22 +577,25 @@ h2_set_setting(struct h2_sess *h2, const uint8_t *d) y = vbe32dec(d + 2); if (x >= H2_SETTING_TBL_LEN || h2_setting_tbl[x] == NULL) { // rfc7540,l,2181,2182 - H2S_Lock_VSLb(h2, SLT_Debug, + VSLb(h2->vsl, SLT_Debug, "H2SETTING unknown setting 0x%04x=%08x (ignored)", x, y); return (0); } s = h2_setting_tbl[x]; AN(s); if (y < s->minval || y > s->maxval) { - H2S_Lock_VSLb(h2, SLT_Debug, "H2SETTING invalid %s=0x%08x", + VSLb(h2->vsl, SLT_Debug, "H2SETTING invalid %s=0x%08x", s->name, y); AN(s->range_error); if (!DO_DEBUG(DBG_H2_NOCHECK)) return (s->range_error); } Lck_Lock(&h2->sess->mtx); - if (s == H2_SET_INITIAL_WINDOW_SIZE) + if (s == H2_SET_INITIAL_WINDOW_SIZE) { h2_win_adjust(h2, h2->remote_settings.initial_window_size, y); + /* Assume we are no longer stuck on output. */ + h2->tx_l_stuck = 0; + } VSLb(h2->vsl, SLT_Debug, "H2SETTING %s=0x%08x", s->name, y); Lck_Unlock(&h2->sess->mtx); AN(s->setfunc); @@ -593,20 +612,19 @@ h2_rx_settings(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); ASSERT_H2_SESS(h2); - CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); - assert(r2 == h2->req0); AZ(h2->rxf_stream); + AZ(r2); if (h2->rxf_flags == H2FF_ACK) { if (h2->rxf_len > 0) { // rfc7540,l,2047,2049 - H2S_Lock_VSLb(h2, SLT_SessError, "H2: rx settings ack with " + VSLb(h2->vsl, SLT_SessError, "H2: rx settings ack with " "(len > 0)"); return (H2CE_FRAME_SIZE_ERROR); } return (0); } else { if (h2->rxf_len % 6) { // rfc7540,l,2062,2064 - H2S_Lock_VSLb(h2, SLT_SessError, "H2: rx settings with " + VSLb(h2->vsl, SLT_SessError, "H2: rx settings with " "((len %% 6) != 0)"); return (H2CE_PROTOCOL_ERROR); } @@ -616,10 +634,7 @@ h2_rx_settings(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) if (retval) return (retval); } - H2_Send_Get(wrk, h2, r2); - H2_Send_Frame(wrk, h2, - H2_F_SETTINGS, H2FF_ACK, 0, 0, NULL); - H2_Send_Rel(h2, r2); + H2_Send_SETTINGS(h2, H2FF_ACK, 0, NULL); } return (0); } @@ -648,8 +663,7 @@ h2_do_req(struct worker *wrk, void *priv) CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); Lck_Lock(&h2->sess->mtx); r2->scheduled = 0; - r2->state = H2_S_CLOSED; - r2->h2sess->do_sweep = 1; + h2_attention(h2); Lck_Unlock(&h2->sess->mtx); } THR_SetRequest(NULL); @@ -663,13 +677,14 @@ h2_end_headers(struct worker *wrk, struct h2_sess *h2, ssize_t cl; ASSERT_H2_SESS(h2); + assert(h2->hpack_lock == r2); assert(r2->state == H2_S_OPEN); h2e = h2h_decode_hdr_fini(h2); - h2->new_req = NULL; + AZ(h2->hpack_lock); + if (h2e != NULL) { - H2S_Lock_VSLb(h2, SLT_Debug, "HPACK/FINI %s", h2e->name); + VSLb(h2->vsl, SLT_Debug, "HPACK/FINI %s", h2e->name); assert(!WS_IsReserved(r2->req->ws)); - h2_del_req(wrk, &r2); return (h2e); } req->t_req = VTIM_real(); @@ -683,7 +698,7 @@ h2_end_headers(struct worker *wrk, struct h2_sess *h2, cl = http_GetContentLength(req->http); assert(cl >= -2); if (cl == -2) { - H2S_Lock_VSLb(h2, SLT_Debug, "Non-parseable Content-Length"); + VSLb(h2->vsl, SLT_Debug, "Non-parseable Content-Length"); return (H2SE_PROTOCOL_ERROR); } @@ -706,19 +721,19 @@ h2_end_headers(struct worker *wrk, struct h2_sess *h2, assert (req->req_body_status == BS_NONE); r2->state = H2_S_CLOS_REM; if (cl > 0) { - H2S_Lock_VSLb(h2, SLT_SessError, "H2: rx header with END_STREAM " + VSLb(h2->vsl, SLT_SessError, "H2: rx header with END_STREAM " "and content-length > 0"); return (H2CE_PROTOCOL_ERROR); //rfc7540,l,1838,1840 } } if (req->http->hd[HTTP_HDR_METHOD].b == NULL) { - H2S_Lock_VSLb(h2, SLT_Debug, "Missing :method"); + VSLb(h2->vsl, SLT_Debug, "Missing :method"); return (H2SE_PROTOCOL_ERROR); //rfc7540,l,3087,3090 } if (req->http->hd[HTTP_HDR_URL].b == NULL) { - H2S_Lock_VSLb(h2, SLT_Debug, "Missing :path"); + VSLb(h2->vsl, SLT_Debug, "Missing :path"); return (H2SE_PROTOCOL_ERROR); //rfc7540,l,3087,3090 } @@ -727,7 +742,7 @@ h2_end_headers(struct worker *wrk, struct h2_sess *h2, if (*req->http->hd[HTTP_HDR_URL].b == '*' && (Tlen(req->http->hd[HTTP_HDR_METHOD]) != 7 || strncmp(req->http->hd[HTTP_HDR_METHOD].b, "OPTIONS", 7))) { - H2S_Lock_VSLb(h2, SLT_BogoHeader, "Illegal :path pseudo-header"); + VSLb(h2->vsl, SLT_BogoHeader, "Illegal :path pseudo-header"); return (H2SE_PROTOCOL_ERROR); //rfc7540,l,3068,3071 } @@ -736,15 +751,46 @@ h2_end_headers(struct worker *wrk, struct h2_sess *h2, VCL_TaskEnter(req->top->privs); req->task->func = h2_do_req; req->task->priv = req; + + /* NB: we don't need to guard the read of h2->open_streams because + * headers are handled sequentially so it cannot increase under our + * feet. + */ + if (h2->open_streams > (int)h2->local_settings.max_concurrent_streams) { + VSLb(h2->vsl, SLT_Debug, + "H2: stream %u: Hit maximum number of concurrent streams", + h2->rxf_stream); + return (H2SE_REFUSED_STREAM); // rfc7540,l,1200,1205 + } + r2->scheduled = 1; if (Pool_Task(wrk->pool, req->task, TASK_QUEUE_STR) != 0) { r2->scheduled = 0; - r2->state = H2_S_CLOSED; return (H2SE_REFUSED_STREAM); //rfc7540,l,3326,3329 } return (0); } +static h2_error +h2_decode_headers(struct h2_sess *h2, struct h2_req *r2, + const void *p, size_t l) +{ + h2_error h2e; + + CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); + assert(h2->hpack_lock == r2); + + h2e = h2h_decode_bytes(h2, p, l); + r2->req->acct.req_hdrbytes += l; + + if (h2e != NULL) { + VSLb(h2->vsl, SLT_Debug, "HPACK(%s) %s", + h2_framename(h2->rxf_type), h2e->name); + } + + return (h2e); +} + static h2_error v_matchproto_(h2_rxframe_f) h2_rx_headers(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) { @@ -755,29 +801,6 @@ h2_rx_headers(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); ASSERT_H2_SESS(h2); - - if (r2 != NULL) { - H2S_Lock_VSLb(h2, SLT_SessError, "H2: rx headers on non-idle stream"); - return (H2CE_PROTOCOL_ERROR); // rfc9113,l,887,891 - } - - if (h2->rxf_stream <= h2->highest_stream) { - H2S_Lock_VSLb(h2, SLT_SessError, "H2: new stream ID < highest stream"); - return (H2CE_PROTOCOL_ERROR); // rfc7540,l,1153,1158 - } - /* NB: we don't need to guard the read of h2->open_streams - * because headers are handled sequentially so it cannot - * increase under our feet. - */ - if (h2->open_streams >= - (int)h2->local_settings.max_concurrent_streams) { - H2S_Lock_VSLb(h2, SLT_Debug, - "H2: stream %u: Hit maximum number of " - "concurrent streams", h2->rxf_stream); - return (H2SE_REFUSED_STREAM); // rfc7540,l,1200,1205 - } - h2->highest_stream = h2->rxf_stream; - r2 = h2_new_req(h2, h2->rxf_stream, NULL); CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); assert(r2->state == H2_S_IDLE); r2->state = H2_S_OPEN; @@ -789,7 +812,6 @@ h2_rx_headers(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) VSLb(req->vsl, SLT_Begin, "req %ju rxreq", VXID(req->sp->vxid)); VSL(SLT_Link, req->sp->vxid, "req %ju rxreq", VXID(req->vsl->wid)); - h2->new_req = req; req->sp = h2->sess; req->transport = &HTTP2_transport; @@ -801,13 +823,13 @@ h2_rx_headers(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) HTTP_Setup(req->http, req->ws, req->vsl, SLT_ReqMethod); http_SetH(req->http, HTTP_HDR_PROTO, "HTTP/2.0"); - h2h_decode_hdr_init(h2); + h2h_decode_hdr_init(h2, r2); p = h2->rxf_data; l = h2->rxf_len; if (h2->rxf_flags & H2FF_PADDED) { if (*p + 1 > l) { - H2S_Lock_VSLb(h2, SLT_SessError, "H2: rx headers with pad length > frame len"); + VSLb(h2->vsl, SLT_SessError, "H2: rx headers with pad length > frame len"); return (H2CE_PROTOCOL_ERROR); // rfc7540,l,1884,1887 } l -= 1 + *p; @@ -815,28 +837,28 @@ h2_rx_headers(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) } if (h2->rxf_flags & H2FF_PRIORITY) { if (l < 5) { - H2S_Lock_VSLb(h2, SLT_SessError, "H2: rx headers with incorrect " + VSLb(h2->vsl, SLT_SessError, "H2: rx headers with incorrect " "priority data"); return (H2CE_PROTOCOL_ERROR); } l -= 5; p += 5; } - h2e = h2h_decode_bytes(h2, p, l); - if (h2e != NULL) { - H2S_Lock_VSLb(h2, SLT_Debug, "HPACK(hdr) %s", h2e->name); - (void)h2h_decode_hdr_fini(h2); - assert(!WS_IsReserved(r2->req->ws)); - h2_del_req(wrk, &r2); + + h2e = h2_decode_headers(h2, r2, p, l); + if (h2e != NULL) return (h2e); - } if (h2->rxf_flags & H2FF_END_STREAM) req->req_body_status = BS_NONE; if (h2->rxf_flags & H2FF_END_HEADERS) return (h2_end_headers(wrk, h2, req, r2)); - return (0); + + /* This wasn't the end of the headers. h2->hpack_lock is left as + * evidence to pick up that a CONTINUATION frame is expected next + * on this stream. */ + return (NULL); } /**********************************************************************/ @@ -844,31 +866,28 @@ h2_rx_headers(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) static h2_error v_matchproto_(h2_rxframe_f) h2_rx_continuation(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) { - struct req *req; h2_error h2e; CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); ASSERT_H2_SESS(h2); CHECK_OBJ_ORNULL(r2, H2_REQ_MAGIC); - if (r2 == NULL || r2->state != H2_S_OPEN || r2->req != h2->new_req) { - H2S_Lock_VSLb(h2, SLT_SessError, "H2: rx unexpected CONT frame" + if (r2 == NULL || r2->state != H2_S_OPEN || r2 != h2->hpack_lock) { + VSLb(h2->vsl, SLT_SessError, "H2: rx unexpected CONT frame" " on stream %d", h2->rxf_stream); return (H2CE_PROTOCOL_ERROR); // XXX spec ? } - req = r2->req; - h2e = h2h_decode_bytes(h2, h2->rxf_data, h2->rxf_len); - r2->req->acct.req_hdrbytes += h2->rxf_len; - if (h2e != NULL) { - H2S_Lock_VSLb(h2, SLT_Debug, "HPACK(cont) %s", h2e->name); - (void)h2h_decode_hdr_fini(h2); - assert(!WS_IsReserved(r2->req->ws)); - h2_del_req(wrk, &r2); + h2e = h2_decode_headers(h2, r2, h2->rxf_data, h2->rxf_len); + if (h2e != NULL) return (h2e); - } + if (h2->rxf_flags & H2FF_END_HEADERS) - return (h2_end_headers(wrk, h2, req, r2)); - return (0); + return (h2_end_headers(wrk, h2, r2->req, r2)); + + /* This wasn't the end of the headers. h2->hpack_lock is left as + * evidence to pick up that a CONTINUATION frame is expected next + * on this stream. */ + return (NULL); } /**********************************************************************/ @@ -877,17 +896,12 @@ static h2_error v_matchproto_(h2_rxframe_f) h2_rx_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) { - CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); - ASSERT_H2_SESS(h2); CHECK_OBJ_ORNULL(r2, H2_REQ_MAGIC); - if (r2 == NULL) - return (0); - - if (r2->state >= H2_S_CLOS_REM) { - r2->error = H2SE_STREAM_CLOSED; + if (r2 == NULL || r2->state == H2_S_CLOSED) + return (H2CE_PROTOCOL_ERROR); // rfc7540,l,1727,1730 + if (r2->state >= H2_S_CLOS_REM) return (H2SE_STREAM_CLOSED); // rfc7540,l,1766,1769 - } return (h2_reqbody_data(wrk, h2, r2)); } @@ -897,9 +911,13 @@ h2_rx_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) void v_matchproto_(vtr_req_fail_f) h2_req_fail(struct req *req, stream_close_t reason) { + struct h2_req *r2; + assert(reason != SC_NULL); - assert(req->sp->fd != 0); VSLb(req->vsl, SLT_Debug, "H2FAILREQ"); + + CAST_OBJ_NOTNULL(r2, req->transport_priv, H2_REQ_MAGIC); + h2_async_error(r2, H2SE_INTERNAL_ERROR); } /**********************************************************************/ @@ -908,169 +926,193 @@ static enum htc_status_e v_matchproto_(htc_complete_f) h2_frame_complete(struct http_conn *htc) { struct h2_sess *h2; + unsigned u; + size_t l; CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC); CAST_OBJ_NOTNULL(h2, htc->priv, H2_SESS_MAGIC); - if (htc->rxbuf_b + 9 > htc->rxbuf_e || - htc->rxbuf_b + 9 + (vbe32dec(htc->rxbuf_b) >> 8) > htc->rxbuf_e) + l = pdiff(htc->rxbuf_b, htc->rxbuf_e); + if (l == 0) + return (HTC_S_EMPTY); + if (l < 9) return (HTC_S_MORE); - return (HTC_S_COMPLETE); + u = vbe32dec(htc->rxbuf_b) >> 8; + if (u > h2->local_settings.max_frame_size) + return (HTC_S_OVERFLOW); + if (l >= u + 9) + return (HTC_S_COMPLETE); + + return (HTC_S_MORE); } + /**********************************************************************/ -static h2_error +static void h2_procframe(struct worker *wrk, struct h2_sess *h2, h2_frame h2f) { - struct h2_req *r2; - h2_error h2e; + struct h2_req *r2 = NULL; + h2_error h2e = NULL; ASSERT_H2_SESS(h2); - if (h2->rxf_stream == 0 && h2f->act_szero != 0) { - H2S_Lock_VSLb(h2, SLT_SessError, "H2: unexpected %s frame on stream 0", - h2f->name); - return (h2f->act_szero); + if (h2->rxf_stream == 0 && h2f->act_szero != NULL) { + VSLb(h2->vsl, SLT_SessError, + "H2: unexpected %s frame on stream 0", h2f->name); + h2e = h2f->act_szero; + goto exit; } - if (h2->rxf_stream != 0 && h2f->act_snonzero != 0) { - H2S_Lock_VSLb(h2, SLT_SessError, "H2: unexpected %s frame on stream %d", + if (h2->rxf_stream != 0 && h2f->act_snonzero != NULL) { + VSLb(h2->vsl, SLT_SessError, + "H2: unexpected %s frame on stream %d", h2f->name, h2->rxf_stream); - return (h2f->act_snonzero); + h2e = h2f->act_snonzero; + goto exit; } if (h2->rxf_stream > h2->highest_stream && h2f->act_sidle != 0) { - H2S_Lock_VSLb(h2, SLT_SessError, "H2: unexpected %s frame on idle stream " - "%d", h2f->name, h2->rxf_stream); - return (h2f->act_sidle); + VSLb(h2->vsl, SLT_SessError, + "H2: unexpected %s frame on idle stream %d", + h2f->name, h2->rxf_stream); + h2e = h2f->act_sidle; + goto exit; } if (h2->rxf_stream != 0 && !(h2->rxf_stream & 1)) { // rfc7540,l,1140,1145 // rfc7540,l,1153,1158 /* No even streams, we don't do PUSH_PROMISE */ - H2S_Lock_VSLb(h2, SLT_SessError, "H2: illegal stream (=%u)", + VSLb(h2->vsl, SLT_SessError, "H2: illegal stream (=%u)", h2->rxf_stream); - return (H2CE_PROTOCOL_ERROR); + h2e = H2CE_PROTOCOL_ERROR; + goto exit; } - VTAILQ_FOREACH(r2, &h2->streams, list) - if (r2->stream == h2->rxf_stream) - break; + if (h2->hpack_lock != NULL && h2f != H2_F_CONTINUATION) { + VSLb(h2->vsl, SLT_SessError, + "H2: expected continuation but received %s on stream %d", + h2f->name, h2->rxf_stream); + h2e = H2CE_PROTOCOL_ERROR; // rfc7540,l,1859,1863 + goto exit; + } - if (h2->new_req != NULL && h2f != H2_F_CONTINUATION) { - H2S_Lock_VSLb(h2, SLT_SessError, "H2: expected continuation but " - " received %s on stream %d", h2f->name, h2->rxf_stream); - return (H2CE_PROTOCOL_ERROR); // rfc7540,l,1859,1863 + if (h2f == H2_F_HEADERS && h2->rxf_stream <= h2->highest_stream) { + VSLb(h2->vsl, SLT_Error, "H2: new stream ID < highest stream"); + h2e = H2CE_PROTOCOL_ERROR; // rfc7540,l,1153,1158 + goto exit; + } + + if (h2->rxf_stream != 0) { + VTAILQ_FOREACH(r2, &h2->streams, list) { + if (r2->stream == h2->rxf_stream) + break; + } + if (r2 != NULL && r2->error != NULL) { + /* Ignore frames for streams once error is set. */ + /* XXX: missing accounting? */ + return; + } + } + + if (h2f == H2_F_HEADERS) { + AZ(r2); /* We checked against highest_stream above. */ + r2 = h2_new_req(h2, h2->rxf_stream, NULL); + CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); + h2->highest_stream = r2->stream; } h2e = h2f->rxfunc(wrk, h2, r2); - if (h2e == NULL) - return (NULL); - if (h2->rxf_stream == 0 || h2e->connection) - return (h2e); // Connection errors one level up - H2_Send_Get(wrk, h2, h2->req0); - H2_Send_RST(wrk, h2, h2->req0, h2->rxf_stream, h2e); - H2_Send_Rel(h2, h2->req0); - return (NULL); +exit: + if (h2e != NULL) { + if (h2->rxf_stream == 0 || h2e->connection) + h2->error = h2e; + if (r2 != NULL) + h2_kill_req(wrk, h2, &r2, h2e); + } } -h2_error +static h2_error h2_stream_tmo(struct h2_sess *h2, const struct h2_req *r2, vtim_real now) { - h2_error h2e = NULL; CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); - Lck_AssertHeld(&h2->sess->mtx); - - /* NB: when now is NAN, it means that h2_window_timeout was hit - * on a lock condwait operation. - */ - if (isnan(now)) - AN(r2->t_winupd); - if (h2->error != NULL && h2->error->connection && - !h2->error->send_goaway) - return (h2->error); - - if (r2->t_winupd == 0 && r2->t_send == 0) - return (NULL); - - if (isnan(now) || (r2->t_winupd != 0 && - now - r2->t_winupd > cache_param->h2_window_timeout)) { + if (r2->t_win_low != 0 && + now - r2->t_win_low > cache_param->h2_window_timeout) { VSLb(h2->vsl, SLT_Debug, - "H2: stream %u: Hit h2_window_timeout", r2->stream); - h2e = H2SE_BROKE_WINDOW; + "H2: stream %u: Hit h2_window_timeout", r2->stream); + if (h2->open_streams <= h2->win_low_streams) { + /* If all streams ran out of control flow window + * credits upon triggering h2_window_timeout, + * declare bankruptcy for the entire connection. */ + return (H2CE_BANKRUPT); + } + return (H2SE_BROKE_WINDOW); } - if (h2e == NULL && r2->t_send != 0 && + if (r2->t_send != 0 && now - r2->t_send > SESS_TMO(h2->sess, send_timeout)) { VSLb(h2->vsl, SLT_Debug, "H2: stream %u: Hit send_timeout", r2->stream); - h2e = H2SE_CANCEL; + return (H2SE_SEND_TIMEOUT); } - return (h2e); -} - -static h2_error -h2_stream_tmo_unlocked(struct h2_sess *h2, const struct h2_req *r2) -{ - h2_error h2e; - - Lck_Lock(&h2->sess->mtx); - h2e = h2_stream_tmo(h2, r2, h2->sess->t_idle); - Lck_Unlock(&h2->sess->mtx); - - return (h2e); + return (NULL); } /* * This is the janitorial task of cleaning up any closed & refused * streams, and checking if the session is timed out. */ -static h2_error -h2_sweep(struct worker *wrk, struct h2_sess *h2) +static void +h2_sweep(struct worker *wrk, struct h2_sess *h2, vtim_real now) { struct h2_req *r2, *r22; - h2_error h2e, tmo; - vtim_real now; + h2_error h2e; + int64_t l; ASSERT_H2_SESS(h2); - h2e = h2->error; - now = VTIM_real(); - if (h2e == NULL && h2->open_streams == 0 && - h2->sess->t_idle + cache_param->timeout_idle < now) - h2e = H2CE_NO_ERROR; - - h2->do_sweep = 0; VTAILQ_FOREACH_SAFE(r2, &h2->streams, list, r22) { - if (r2 == h2->req0) { - assert (r2->state == H2_S_IDLE); + if (r2->async_error != NULL) { + /* Request thread has set an error state. Kill it. */ + h2e = r2->async_error; + r2->async_error = NULL; + h2_kill_req(wrk, h2, &r2, h2e); continue; } + + if (r2->rxbuf != NULL && r2->state < H2_S_CLOS_REM && + r2->error == NULL) { + /* Check and expand the request body window if + * necessary. */ + CHECK_OBJ_NOTNULL(r2->rxbuf, H2_RXBUF_MAGIC); + assert(r2->rxbuf->tail <= r2->rxbuf->head); + l = r2->rxbuf->head - r2->rxbuf->tail; + assert(l <= r2->rxbuf->size); + l = r2->rxbuf->size - l; + if (r2->rx_window < l) { + l = l - r2->rx_window; + H2_Send_WINDOW_UPDATE(h2, r2->stream, l); + r2->rx_window += l; + } + } + switch (r2->state) { case H2_S_CLOSED: - AZ(r2->scheduled); - h2_del_req(wrk, &r2); + if (!r2->scheduled) + h2_kill_req(wrk, h2, &r2, H2SE_NO_ERROR); break; case H2_S_CLOS_REM: - if (!r2->scheduled) { - H2_Send_Get(wrk, h2, h2->req0); - H2_Send_RST(wrk, h2, h2->req0, r2->stream, - H2SE_REFUSED_STREAM); - H2_Send_Rel(h2, h2->req0); - h2_del_req(wrk, &r2); - continue; - } - /* FALLTHROUGH */ case H2_S_CLOS_LOC: case H2_S_OPEN: - tmo = h2_stream_tmo_unlocked(h2, r2); - if (h2e == NULL) - h2e = tmo; + h2e = h2_stream_tmo(h2, r2, now); + if (h2e != NULL && h2e->connection) + h2->error = h2e; + else if (h2e != NULL) + h2_kill_req(wrk, h2, &r2, h2e); break; case H2_S_IDLE: /* Current code make this unreachable: h2_new_req is @@ -1082,7 +1124,6 @@ h2_sweep(struct worker *wrk, struct h2_sess *h2) break; } } - return (h2e); } /* @@ -1090,21 +1131,15 @@ h2_sweep(struct worker *wrk, struct h2_sess *h2) * if we have not received end_stream, DATA frames are expected later * * neither of these make much sense to output here - * - * goaway currently is always 0, see #4285 */ static void h2_htc_debug(enum htc_status_e hs, struct h2_sess *h2) { const char *s, *r; - if (LIKELY(VSL_tag_is_masked(SLT_Debug))) - return; - HTC_Status(hs, &s, &r); - H2S_Lock_VSLb(h2, SLT_Debug, "H2: HTC %s (%s) frame=%s goaway=%d", - s, r, h2->htc->rxbuf_b == h2->htc->rxbuf_e ? "complete" : "partial", - h2->goaway); + VSLb(h2->vsl, SLT_Debug, "H2: HTC %s (%s) frame=%s", s, r, + h2->htc->rxbuf_b == h2->htc->rxbuf_e ? "complete" : "partial"); } /*********************************************************************** @@ -1122,64 +1157,61 @@ static const h2_frame h2flist[] = { #define H2FMAX (sizeof(h2flist) / sizeof(h2flist[0])) -int -h2_rxframe(struct worker *wrk, struct h2_sess *h2) +static enum htc_status_e +h2_rxstuff(struct h2_sess *h2) { + struct http_conn *htc; enum htc_status_e hs; - h2_frame h2f; - h2_error h2e; - const char *s, *r; + size_t res; + ssize_t l; - /* On entry we assume that HTC_RxInit() has been called, which - * would have reserved workspace for HTC_RxStuff() to receive - * into. */ - AN(h2->htc->ws->r); - - ASSERT_H2_SESS(h2); - - if (h2->goaway && h2->open_streams == 0) { - /* We have not called HTC_RxStuff(), and thus not released - * the workspace. Do it here so that the workspace is in - * the expected state for the caller. */ - WS_ReleaseP(h2->htc->ws, h2->htc->rxbuf_e); - return (0); + CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); + htc = h2->htc; + CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC); + AN(htc->rfd); + assert(*htc->rfd > 0); + + /* Set up the workspace buffer */ + assert(htc->rxbuf_b <= htc->rxbuf_e); + HTC_RxPipeline(htc, htc->rxbuf_b); + HTC_RxInit(htc, h2->ws); + res = WS_ReservationSize(h2->ws); + + if (res == 0) { + WS_Release(htc->ws, 0); + return (HTC_S_OVERFLOW); } - h2->t1 = NAN; - VTCP_blocking(*h2->htc->rfd); - hs = HTC_RxStuff(h2->htc, h2_frame_complete, &h2->t1, NULL, NAN, - VTIM_real() + 0.5, NAN, h2->local_settings.max_frame_size + 9); - - h2e = NULL; - switch (hs) { - case HTC_S_EOF: + l = read(*htc->rfd, htc->rxbuf_e, res); + if (l < 0 && errno == EWOULDBLOCK) + hs = HTC_S_MORE; + else if (l < 0) + hs = HTC_S_CLOSE; + else if (l == 0) { + hs = HTC_S_EOF; h2_htc_debug(hs, h2); - h2e = H2CE_NO_ERROR; - break; - case HTC_S_COMPLETE: - h2->sess->t_idle = VTIM_real(); - if (h2->do_sweep) - h2e = h2_sweep(wrk, h2); - break; - case HTC_S_TIMEOUT: - //// #4279 - // h2_htc_debug(hs, h2); - h2e = h2_sweep(wrk, h2); - break; - default: - HTC_Status(hs, &s, &r); - H2S_Lock_VSLb(h2, SLT_SessError, "H2: HTC %s (%s)", s, r); - h2e = H2CE_ENHANCE_YOUR_CALM; + } else { + h2->t1 = VTIM_real(); + htc->rxbuf_e += l; + hs = h2_frame_complete(htc); } - if (h2e != NULL && h2e->connection) { - h2->error = h2e; - h2_tx_goaway(wrk, h2, h2e); - return (0); - } + WS_ReleaseP(htc->ws, htc->rxbuf_e); + return (hs); +} + +static enum htc_status_e +h2_rxframe(struct worker *wrk, struct h2_sess *h2) +{ + enum htc_status_e hs; + h2_frame h2f; + CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); + ASSERT_H2_SESS(h2); + + hs = h2_frame_complete(h2->htc); if (hs != HTC_S_COMPLETE) - return (1); + return (hs); h2->rxf_len = vbe32dec(h2->htc->rxbuf_b) >> 8; h2->rxf_type = h2->htc->rxbuf_b[3]; @@ -1187,20 +1219,24 @@ h2_rxframe(struct worker *wrk, struct h2_sess *h2) h2->rxf_stream = vbe32dec(h2->htc->rxbuf_b + 5); h2->rxf_stream &= ~(1LU<<31); // rfc7540,l,690,692 h2->rxf_data = (void*)(h2->htc->rxbuf_b + 9); - /* XXX: later full DATA will not be rx'ed yet. */ - HTC_RxPipeline(h2->htc, h2->htc->rxbuf_b + h2->rxf_len + 9); - h2_vsl_frame(h2, h2->htc->rxbuf_b, 9L + h2->rxf_len); + h2_rxframe_vsl(h2, h2->htc->rxbuf_b, 9L + h2->rxf_len); h2->srq->acct.req_hdrbytes += 9; + h2->htc->rxbuf_b += h2->rxf_len + 9; + assert(h2->htc->rxbuf_b <= h2->htc->rxbuf_e); + + /* XXX: Apply connection preface SETTINGS expectation as first + * frame, protocol error on anything else.. */ + if (h2->rxf_type >= H2FMAX) { // rfc7540,l,679,681 h2->bogosity++; - H2S_Lock_VSLb(h2, SLT_Debug, + VSLb(h2->vsl, SLT_Debug, "H2: Unknown frame type 0x%02x (ignored)", (uint8_t)h2->rxf_type); h2->srq->acct.req_bodybytes += h2->rxf_len; - return (1); + return (h2_frame_complete(h2->htc)); } h2f = h2flist[h2->rxf_type]; @@ -1212,17 +1248,181 @@ h2_rxframe(struct worker *wrk, struct h2_sess *h2) if (h2->rxf_flags & ~h2f->flags) { // rfc7540,l,687,688 h2->bogosity++; - H2S_Lock_VSLb(h2, SLT_Debug, + VSLb(h2->vsl, SLT_Debug, "H2: Unknown flags 0x%02x on %s (ignored)", (uint8_t)h2->rxf_flags & ~h2f->flags, h2f->name); h2->rxf_flags &= h2f->flags; } - h2e = h2_procframe(wrk, h2, h2f); - if (h2->error == NULL && h2e != NULL) { - h2->error = h2e; - h2_tx_goaway(wrk, h2, h2e); + if (h2->error == NULL) + h2_procframe(wrk, h2, h2f); + + return (h2_frame_complete(h2->htc)); +} + +void +h2_async_error(struct h2_req *r2, h2_error h2e) +{ + + /* Report an error from a request handling thread */ + CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); + AN(h2e); + + AN(r2->scheduled); + CHECK_OBJ_NOTNULL(r2->h2sess, H2_SESS_MAGIC); + ASSERT_H2_REQ(r2->h2sess); + + if (h2e->connection) + r2->h2sess->error = h2e; + else + r2->async_error = h2e; + + h2_attention(r2->h2sess); +} + +void +h2_attention(struct h2_sess *h2) +{ + + CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); + AZ(VEFD_Signal(h2->efd)); +} + +void +h2_run(struct worker *wrk, struct h2_sess *h2) +{ + struct pollfd pfd[2]; + enum htc_status_e hs; + const char *s, *r; + int i; + ssize_t l; + vtim_real now; + vtim_dur tmo; + + CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); + CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); + + assert(h2->efd->poll_fd >= 0); + + enum { + pfd_h2 = 0, + pfd_ev = 1, + }; + memset(pfd, 0, sizeof pfd); + pfd[pfd_h2].fd = h2->sess->fd; + pfd[pfd_h2].events = POLLIN; + pfd[pfd_ev].fd = h2->efd->poll_fd; + pfd[pfd_ev].events = POLLIN; + + VTCP_nonblocking(h2->sess->fd); + + now = VTIM_real(); + h2->deadline = now + cache_param->timeout_idle; + + while (h2->error == NULL) { + if (H2_Send_Pending(h2)) + pfd[pfd_h2].events = POLLIN | POLLOUT; + else + pfd[pfd_h2].events = POLLIN; + i = poll(pfd, 2, 1000); + + /* Calculate the next deadline. The deadline is the time + * at which any "blocking" poll()s in code called by this + * loop (e.g. a need to flush the output to free up buffer + * space) are allowed to wait before flagging error. */ + now = VTIM_real(); + tmo = SESS_TMO(h2->sess, timeout_idle); + h2->deadline = now + cache_param->timeout_idle; + + /* Connection timeouts */ + if (h2->error == NULL && h2->hpack_lock != NULL && + h2->hpack_lock->req->t_first + tmo < now) + h2->error = H2CE_COMPRESSION_ERROR; + else if (h2->error == NULL && h2->open_streams == 0 && + h2->sess->t_idle + tmo < now) + h2->error = H2CE_NO_ERROR; + + if (pfd[pfd_ev].revents & POLLIN) { + /* Signalled for attention by a request + * thread. Reset the eventfd. */ + AZ(VEFD_Clear(h2->efd)); + } + + if (pfd[pfd_h2].revents & POLLIN) { + hs = h2_rxstuff(h2); + while (h2->error == NULL && hs == HTC_S_COMPLETE) + hs = h2_rxframe(wrk, h2); + if (h2->error == NULL && hs < 0) { + switch (hs) { + case HTC_S_EOF: + /* Remote close */ + h2->error = H2CE_IO_ERROR; + break; + default: + HTC_Status(hs, &s, &r); + VSLb(h2->vsl, SLT_Error, "H2: %s", s); + h2->error = H2CE_PROTOCOL_ERROR; + break; + } + } + } + + if (pfd[pfd_h2].revents & POLLOUT) { + /* We have data to send and it is possible to + * send. */ + l = H2_Send_TxStuff(h2); + if (l < 0 && errno != EWOULDBLOCK) { + VSLb(h2->vsl, SLT_Error, "H2: Send error (%s)", + strerror(errno)); + h2->error = H2CE_IO_ERROR; + } + } + + h2_sweep(wrk, h2, now); + } + AN(h2->error); + + /* Wake up any threads waiting to send, cancelling any queued + * writes. */ + H2_Send_Shutdown(h2); + + /* Kill all streams, kicking any waitinglist stuck items */ + h2_kill_all(wrk, h2, h2->error); + + if (h2->error->send_goaway) { + /* Add timeout_linger to the deadline which may have + * already been spent, to give some additional time to get + * the GOAWAY out the door. */ + h2->deadline += cache_param->timeout_linger; + + /* Send GOAWAY, and then spend up until the last deadline + * set draining the outgoing buffers. This is to be a good + * citizen and make some effort on communicating the + * GOAWAY. */ + H2_Send_GOAWAY(h2, h2->highest_stream, h2->error); + while (H2_Send_Pending(h2)) { + if (H2_Send_Something(h2) < 0) + break; + } } - return (h2->error != NULL ? 0 : 1); + /* XXX: Shutdown socket? Would presumably free up kernel socket + * buffers while waiting for waitinglists and the like to clean + * up. */ + + /* Wait until all the requests have been removed */ + pfd[pfd_h2].fd = -pfd[pfd_h2].fd; /* Disable polling on the sess fd */ + while (h2->refcnt > 0) { + /* Don't use infinite timeout here. The walkaway has data + * race issues, and we may need to kill a req more than + * once to wake it. */ + i = poll(pfd, 2, 250); + + if (i > 0 && pfd[pfd_ev].revents & POLLIN) { + /* Clear the eventfd before the next sleep */ + AZ(VEFD_Clear(h2->efd)); + } + h2_kill_all(wrk, h2, h2->error); + h2_sweep(wrk, h2, now); + } } diff --git a/bin/varnishd/http2/cache_http2_reqbody.c b/bin/varnishd/http2/cache_http2_reqbody.c index 6870f89096..1b4e801f9e 100644 --- a/bin/varnishd/http2/cache_http2_reqbody.c +++ b/bin/varnishd/http2/cache_http2_reqbody.c @@ -43,12 +43,55 @@ #include "storage/storage.h" #include "vtim.h" -#include "vend.h" + +struct h2_reqbody_waiter { + unsigned magic; +#define H2_REQBODY_WAITER_MAGIC 0xb6f4c52c + pthread_cond_t cond; +}; + +static int +h2_reqbody_wait(struct h2_req *r2, vtim_real when) +{ + struct h2_reqbody_waiter w; + int retval; + + CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); + CHECK_OBJ_NOTNULL(r2->h2sess, H2_SESS_MAGIC); + + Lck_AssertHeld(&r2->h2sess->sess->mtx); + + INIT_OBJ(&w, H2_REQBODY_WAITER_MAGIC); + PTOK(pthread_cond_init(&w.cond, NULL)); + + AZ(r2->reqbody_waiter); + r2->reqbody_waiter = &w; + retval = Lck_CondWaitUntil(&w.cond, &r2->h2sess->sess->mtx, when); + r2->reqbody_waiter = NULL; + + PTOK(pthread_cond_destroy(&w.cond)); + w.magic = 0; + + return (retval); +} + +void +h2_reqbody_kick(struct h2_req *r2) +{ + + CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); + CHECK_OBJ_NOTNULL(r2->h2sess, H2_SESS_MAGIC); + + Lck_AssertHeld(&r2->h2sess->sess->mtx); + + CHECK_OBJ_ORNULL(r2->reqbody_waiter, H2_REQBODY_WAITER_MAGIC); + if (r2->reqbody_waiter != NULL) + PTOK(pthread_cond_signal(&r2->reqbody_waiter->cond)); +} h2_error h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) { - char buf[4]; ssize_t l; uint64_t l2, head; const uint8_t *src; @@ -60,15 +103,11 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) ASSERT_H2_SESS(h2); - Lck_Lock(&h2->sess->mtx); CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC); - if (h2->error != NULL || r2->error != NULL) { - if (r2->cond) - PTOK(pthread_cond_signal(r2->cond)); - Lck_Unlock(&h2->sess->mtx); + /* XXX: errcheck? */ + if (h2->error != NULL || r2->error != NULL) return (h2->error != NULL ? h2->error : r2->error); - } /* Check padding if present */ src = h2->rxf_data; @@ -78,10 +117,6 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) VSLb(h2->vsl, SLT_SessError, "H2: stream %u: Padding larger than frame length", h2->rxf_stream); - r2->error = H2CE_PROTOCOL_ERROR; - if (r2->cond) - PTOK(pthread_cond_signal(r2->cond)); - Lck_Unlock(&h2->sess->mtx); return (H2CE_PROTOCOL_ERROR); } len -= 1 + *src; @@ -101,35 +136,25 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) VSLb(h2->vsl, SLT_Debug, "H2: stream %u: Received data and Content-Length" " mismatch", h2->rxf_stream); - r2->error = H2SE_PROTOCOL_ERROR; - if (r2->cond) - PTOK(pthread_cond_signal(r2->cond)); - Lck_Unlock(&h2->sess->mtx); return (H2SE_PROTOCOL_ERROR); } } /* Check and charge connection window. The entire frame including * padding (h2->rxf_len) counts towards the window. */ - if (h2->rxf_len > h2->req0->rx_window) { + if (h2->rxf_len > h2->rx_window) { VSLb(h2->vsl, SLT_SessError, "H2: stream %u: Exceeded connection receive window", h2->rxf_stream); - r2->error = H2CE_FLOW_CONTROL_ERROR; - if (r2->cond) - PTOK(pthread_cond_signal(r2->cond)); - Lck_Unlock(&h2->sess->mtx); return (H2CE_FLOW_CONTROL_ERROR); } - h2->req0->rx_window -= h2->rxf_len; - if (h2->req0->rx_window < cache_param->h2_rx_window_low_water) { - h2->req0->rx_window += cache_param->h2_rx_window_increment; - vbe32enc(buf, cache_param->h2_rx_window_increment); - Lck_Unlock(&h2->sess->mtx); - H2_Send_Get(wrk, h2, h2->req0); - H2_Send_Frame(wrk, h2, H2_F_WINDOW_UPDATE, 0, 4, 0, buf); - H2_Send_Rel(h2, h2->req0); - Lck_Lock(&h2->sess->mtx); + h2->rx_window -= h2->rxf_len; + if (h2->rx_window < cache_param->h2_rx_window_low_water) { + /* Running low, increase the window */ + l = cache_param->h2_rx_window_increment; + assert(l < (1UL << 31)); + h2->rx_window += l; + H2_Send_WINDOW_UPDATE(h2, 0, l); } /* Check stream window. The entire frame including padding @@ -138,10 +163,6 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) VSLb(h2->vsl, SLT_Debug, "H2: stream %u: Exceeded stream receive window", h2->rxf_stream); - r2->error = H2SE_FLOW_CONTROL_ERROR; - if (r2->cond) - PTOK(pthread_cond_signal(r2->cond)); - Lck_Unlock(&h2->sess->mtx); return (H2SE_FLOW_CONTROL_ERROR); } @@ -158,24 +179,19 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC); if (r2->rx_window == 0 && (r2->rxbuf == NULL || r2->rxbuf->tail == r2->rxbuf->head)) { + /* XXX: bogosity++? */ if (r2->rxbuf) l = r2->rxbuf->size; else l = h2->local_settings.initial_window_size; r2->rx_window += l; - Lck_Unlock(&h2->sess->mtx); - vbe32enc(buf, l); - H2_Send_Get(wrk, h2, h2->req0); - H2_Send_Frame(wrk, h2, H2_F_WINDOW_UPDATE, 0, 4, - r2->stream, buf); - H2_Send_Rel(h2, h2->req0); - Lck_Lock(&h2->sess->mtx); + H2_Send_WINDOW_UPDATE(h2, r2->stream, l); } if (h2->rxf_flags & H2FF_END_STREAM) r2->state = H2_S_CLOS_REM; - if (r2->cond) - PTOK(pthread_cond_signal(r2->cond)); + Lck_Lock(&h2->sess->mtx); + h2_reqbody_kick(r2); Lck_Unlock(&h2->sess->mtx); return (0); } @@ -187,8 +203,6 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) struct stv_buffer *stvbuf; struct h2_rxbuf *rxbuf; - Lck_Unlock(&h2->sess->mtx); - bufsize = h2->local_settings.initial_window_size; if (bufsize < r2->rx_window) { /* This will not happen because we do not have any @@ -199,23 +213,19 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) } assert(bufsize > 0); if ((h2->rxf_flags & H2FF_END_STREAM) && - bufsize > len) + bufsize > len) { /* Cap the buffer size when we know this is the * single data frame. */ bufsize = len; + } CHECK_OBJ_NOTNULL(stv_h2_rxbuf, STEVEDORE_MAGIC); stvbuf = STV_AllocBuf(wrk, stv_h2_rxbuf, bufsize + sizeof *rxbuf); if (stvbuf == NULL) { - Lck_Lock(&h2->sess->mtx); VSLb(h2->vsl, SLT_Debug, "H2: stream %u: Failed to allocate request body" " buffer", h2->rxf_stream); - r2->error = H2SE_INTERNAL_ERROR; - if (r2->cond) - PTOK(pthread_cond_signal(r2->cond)); - Lck_Unlock(&h2->sess->mtx); return (H2SE_INTERNAL_ERROR); } rxbuf = STV_GetBufPtr(stvbuf, &bstest); @@ -227,8 +237,6 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) rxbuf->stvbuf = stvbuf; r2->rxbuf = rxbuf; - - Lck_Lock(&h2->sess->mtx); } CHECK_OBJ_NOTNULL(r2->rxbuf, H2_RXBUF_MAGIC); @@ -238,8 +246,6 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) l = r2->rxbuf->size - l; assert(len <= l); /* Stream window handling ensures this */ - Lck_Unlock(&h2->sess->mtx); - l = len; head = r2->rxbuf->head; do { @@ -254,7 +260,6 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) } while (l > 0); Lck_Lock(&h2->sess->mtx); - /* Charge stream window. The entire frame including padding * (h2->rxf_len) counts towards the window. The used padding * bytes will be included in the next connection window update @@ -265,8 +270,7 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) assert(r2->rxbuf->tail <= r2->rxbuf->head); if (h2->rxf_flags & H2FF_END_STREAM) r2->state = H2_S_CLOS_REM; - if (r2->cond) - PTOK(pthread_cond_signal(r2->cond)); + h2_reqbody_kick(r2); Lck_Unlock(&h2->sess->mtx); return (0); @@ -278,24 +282,25 @@ h2_vfp_body(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr, ssize_t *lp) struct h2_req *r2; struct h2_sess *h2; enum vfp_status retval; + h2_error h2e = NULL; ssize_t l, l2; uint64_t tail; uint8_t *dst; - char buf[4]; - int i; + int wait_error = 0; CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); CHECK_OBJ_NOTNULL(vfe, VFP_ENTRY_MAGIC); CAST_OBJ_NOTNULL(r2, vfe->priv1, H2_REQ_MAGIC); h2 = r2->h2sess; + ASSERT_H2_REQ(h2); + AN(ptr); AN(lp); assert(*lp >= 0); Lck_Lock(&h2->sess->mtx); - r2->cond = &vc->wrk->cond; while (1) { CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC); if (r2->rxbuf) { @@ -304,8 +309,9 @@ h2_vfp_body(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr, ssize_t *lp) } else l = 0; - if (h2->error != NULL || r2->error != NULL) - retval = VFP_ERROR; + h2e = h2_errcheck(r2); + if (h2e != NULL) + break; else if (r2->state >= H2_S_CLOS_REM && l <= *lp) retval = VFP_END; else { @@ -317,17 +323,19 @@ h2_vfp_body(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr, ssize_t *lp) if (retval != VFP_OK || l > 0) break; - i = Lck_CondWaitTimeout(r2->cond, &h2->sess->mtx, - SESS_TMO(h2->sess, timeout_idle)); - if (i == ETIMEDOUT) { - retval = VFP_ERROR; + wait_error = h2_reqbody_wait(r2, + VTIM_real() + SESS_TMO(h2->sess, timeout_idle)); + if (wait_error == ETIMEDOUT) break; - } } - r2->cond = NULL; Lck_Unlock(&h2->sess->mtx); + if (h2e != NULL) + retval = VFP_Error(vc, "H2: Request body error (%s)", h2e->txt); + else if (wait_error == ETIMEDOUT) + retval = VFP_Error(vc, "H2: Request body timed out"); + if (l == 0 || retval == VFP_ERROR) { *lp = 0; return (retval); @@ -355,27 +363,12 @@ h2_vfp_body(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr, ssize_t *lp) if (r2->rx_window < cache_param->h2_rx_window_low_water && r2->state < H2_S_CLOS_REM) { - /* l is free buffer space */ - /* l2 is calculated window increment */ - l = r2->rxbuf->size - (r2->rxbuf->head - r2->rxbuf->tail); - assert(r2->rx_window <= l); - l2 = cache_param->h2_rx_window_increment; - if (r2->rx_window + l2 > l) - l2 = l - r2->rx_window; - r2->rx_window += l2; - } else - l2 = 0; - - Lck_Unlock(&h2->sess->mtx); - - if (l2 > 0) { - vbe32enc(buf, l2); - H2_Send_Get(vc->wrk, h2, r2); - H2_Send_Frame(vc->wrk, h2, H2_F_WINDOW_UPDATE, 0, 4, - r2->stream, buf); - H2_Send_Rel(h2, r2); + /* Kick the session thread so it can hand out an extended + * window to the peer. */ + h2_attention(h2); } + Lck_Unlock(&h2->sess->mtx); return (retval); } @@ -383,45 +376,35 @@ static void h2_vfp_body_fini(struct vfp_ctx *vc, struct vfp_entry *vfe) { struct h2_req *r2; - struct h2_sess *h2; struct stv_buffer *stvbuf = NULL; CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); CHECK_OBJ_NOTNULL(vfe, VFP_ENTRY_MAGIC); CAST_OBJ_NOTNULL(r2, vfe->priv1, H2_REQ_MAGIC); CHECK_OBJ_NOTNULL(r2->req, REQ_MAGIC); - h2 = r2->h2sess; - if (vc->failed) { - CHECK_OBJ_NOTNULL(r2->req->wrk, WORKER_MAGIC); - H2_Send_Get(r2->req->wrk, h2, r2); - H2_Send_RST(r2->req->wrk, h2, r2, r2->stream, - H2SE_REFUSED_STREAM); - H2_Send_Rel(h2, r2); - Lck_Lock(&h2->sess->mtx); - r2->error = H2SE_REFUSED_STREAM; - Lck_Unlock(&h2->sess->mtx); - } + ASSERT_H2_REQ(r2->h2sess); + if (vc->failed) + h2_async_error(r2, H2SE_REFUSED_STREAM); + + CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC); if (r2->state >= H2_S_CLOS_REM && r2->rxbuf != NULL) { - Lck_Lock(&h2->sess->mtx); - CHECK_OBJ_ORNULL(r2->rxbuf, H2_RXBUF_MAGIC); - if (r2->rxbuf != NULL) { - stvbuf = r2->rxbuf->stvbuf; - r2->rxbuf = NULL; - } - Lck_Unlock(&h2->sess->mtx); - if (stvbuf != NULL) { - STV_FreeBuf(vc->wrk, &stvbuf); - AZ(stvbuf); - } + /* Free the buffer. This is safe without any locking + * because the session thread will only free the buffer as + * part of h2_del_req(), which won't be run as long as we + * are scheduled. */ + AN(r2->scheduled); + stvbuf = r2->rxbuf->stvbuf; + r2->rxbuf = NULL; + STV_FreeBuf(vc->wrk, &stvbuf); } } static const struct vfp h2_body = { .name = "H2_BODY", .pull = h2_vfp_body, - .fini = h2_vfp_body_fini + .fini = h2_vfp_body_fini, }; void v_matchproto_(vtr_req_body_t) diff --git a/bin/varnishd/http2/cache_http2_send.c b/bin/varnishd/http2/cache_http2_send.c index 0f284cd244..b6a2d3d5ce 100644 --- a/bin/varnishd/http2/cache_http2_send.c +++ b/bin/varnishd/http2/cache_http2_send.c @@ -32,415 +32,561 @@ #include "config.h" #include +#include +#include #include "cache/cache_varnishd.h" - #include "cache/cache_transport.h" #include "http2/cache_http2.h" #include "vend.h" #include "vtim.h" -#define H2_SEND_HELD(h2, r2) (VTAILQ_FIRST(&(h2)->txqueue) == (r2)) - -static int -h2_cond_wait(pthread_cond_t *cond, struct h2_sess *h2, struct h2_req *r2) +static void +h2_send_vsl(struct vsl_log *vsl, const void *ptr, size_t len) { - vtim_dur tmo = 0.; - vtim_real now; - h2_error h2e; - int r; - - AN(cond); - CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); - CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); - - Lck_AssertHeld(&h2->sess->mtx); - - if (cache_param->h2_window_timeout > 0.) - tmo = cache_param->h2_window_timeout; - - r = Lck_CondWaitTimeout(cond, &h2->sess->mtx, tmo); - assert(r == 0 || r == ETIMEDOUT); - - now = VTIM_real(); + const uint8_t *b; + struct vsb *vsb; + const char *p; + unsigned u; - /* NB: when we grab h2_window_timeout before acquiring the session - * lock we may time out, but once we wake up both send_timeout and - * h2_window_timeout may have changed meanwhile. For this reason - * h2_stream_tmo() may not log what timed out and we need to call - * again with a magic NAN "now" that indicates to h2_stream_tmo() - * that the stream reached the h2_window_timeout via the lock and - * force it to log it. - */ - h2e = h2_stream_tmo(h2, r2, now); - if (h2e == NULL && r == ETIMEDOUT) { - h2e = h2_stream_tmo(h2, r2, NAN); - AN(h2e); - } + if (VSL_tag_is_masked(SLT_H2TxHdr) && + VSL_tag_is_masked(SLT_H2TxBody)) + return; - if (r2->error == NULL) - r2->error = h2e; + AN(ptr); + assert(len >= 9); + b = ptr; - return (h2e != NULL ? -1 : 0); + vsb = VSB_new_auto(); + AN(vsb); + p = h2_framename(b[3]); + if (p != NULL) + VSB_cat(vsb, p); + else + VSB_quote(vsb, b + 3, 1, VSB_QUOTE_HEX); + + u = vbe32dec(b) >> 8; + VSB_printf(vsb, "[%u] ", u); + VSB_quote(vsb, b + 4, 1, VSB_QUOTE_HEX); + VSB_putc(vsb, ' '); + VSB_quote(vsb, b + 5, 4, VSB_QUOTE_HEX); + AZ(VSB_finish(vsb)); + VSLb_bin(vsl, SLT_H2TxHdr, 9, b); + if (len > 9) + VSLb_bin(vsl, SLT_H2TxBody, len - 9, b + 9); + + VSLb(vsl, SLT_Debug, "H2TXF %s", VSB_data(vsb)); + VSB_destroy(&vsb); } static void -h2_send_get_locked(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) +h2_mk_hdr(uint8_t *hdr, h2_frame ftyp, uint8_t flags, + uint32_t len, uint32_t stream) { - CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); - CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); - CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); - - Lck_AssertHeld(&h2->sess->mtx); - if (&wrk->cond == h2->cond) - ASSERT_H2_SESS(h2); - - r2->wrk = wrk; - VTAILQ_INSERT_TAIL(&h2->txqueue, r2, tx_list); - while (!H2_SEND_HELD(h2, r2)) - AZ(Lck_CondWait(&wrk->cond, &h2->sess->mtx)); - r2->wrk = NULL; + AN(hdr); + AZ(flags & ~(ftyp->flags)); + if (stream == 0) + AZ(ftyp->act_szero); + else + AZ(ftyp->act_snonzero); + assert(len < (1U << 24)); + vbe32enc(hdr, len << 8); + hdr[3] = ftyp->type; + hdr[4] = flags; + vbe32enc(hdr + 5, stream); } -void -H2_Send_Get(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) +static int64_t +h2_win_limit(const struct h2_req *r2) { - CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); - CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); + CHECK_OBJ_NOTNULL(r2->h2sess, H2_SESS_MAGIC); - Lck_Lock(&h2->sess->mtx); - h2_send_get_locked(wrk, h2, r2); - Lck_Unlock(&h2->sess->mtx); + return (vmin_t(int64_t, r2->tx_window, r2->h2sess->tx_window)); } static void -h2_send_rel_locked(struct h2_sess *h2, const struct h2_req *r2) -{ - CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); - CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); - - Lck_AssertHeld(&h2->sess->mtx); - AN(H2_SEND_HELD(h2, r2)); - VTAILQ_REMOVE(&h2->txqueue, r2, tx_list); - r2 = VTAILQ_FIRST(&h2->txqueue); - if (r2 != NULL) { - CHECK_OBJ_NOTNULL(r2->wrk, WORKER_MAGIC); - PTOK(pthread_cond_signal(&r2->wrk->cond)); - } -} - -void -H2_Send_Rel(struct h2_sess *h2, const struct h2_req *r2) +h2_win_charge(struct h2_req *r2, uint32_t w) { - CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); + CHECK_OBJ_NOTNULL(r2->h2sess, H2_SESS_MAGIC); - Lck_Lock(&h2->sess->mtx); - h2_send_rel_locked(h2, r2); - Lck_Unlock(&h2->sess->mtx); -} - -static void -h2_mk_hdr(uint8_t *hdr, h2_frame ftyp, uint8_t flags, - uint32_t len, uint32_t stream) -{ - - AN(hdr); - assert(len < (1U << 24)); - vbe32enc(hdr, len << 8); - hdr[3] = ftyp->type; - hdr[4] = flags; - vbe32enc(hdr + 5, stream); + r2->tx_window -= w; + r2->h2sess->tx_window -= w; } -/* - * This is the "raw" frame sender, all per-stream accounting and - * prioritization must have happened before this is called, and - * the session mtx must be held. - */ - -void -H2_Send_Frame(struct worker *wrk, struct h2_sess *h2, - h2_frame ftyp, uint8_t flags, - uint32_t len, uint32_t stream, const void *ptr) +static int +h2_send_small(struct h2_sess *h2, h2_frame ftyp, uint8_t flags, + uint32_t stream, uint32_t len, const void *ptr) { - uint8_t hdr[9]; - ssize_t s = 0, i; - struct iovec iov[2]; - unsigned niov; - - (void)wrk; + ASSERT_H2_SESS(h2); AN(ftyp); AZ(flags & ~(ftyp->flags)); if (stream == 0) AZ(ftyp->act_szero); else AZ(ftyp->act_snonzero); + assert(len + 9 <= pdiff(h2->tx_s_start, h2->tx_s_end)); + if (len > 0) + AN(ptr); + + while (len + 9 > pdiff(h2->tx_s_head, h2->tx_s_end)) { + /* Send something (up until h2->deadline) to free up space. */ + if (H2_Send_Something(h2) < 0) + return (-1); + } + + h2_mk_hdr(h2->tx_s_head, ftyp, flags, len, stream); + h2->tx_s_head += 9; + if (len > 0) { + memcpy(h2->tx_s_head, ptr, len); + h2->tx_s_head += len; + } + assert(h2->tx_s_head <= h2->tx_s_end); + h2_send_vsl(h2->vsl, h2->tx_s_head - (9 + len), 9 + len); - h2_mk_hdr(hdr, ftyp, flags, len, stream); - Lck_Lock(&h2->sess->mtx); - VSLb_bin(h2->vsl, SLT_H2TxHdr, 9, hdr); h2->srq->acct.resp_hdrbytes += 9; if (ftyp->overhead) h2->srq->acct.resp_bodybytes += len; - Lck_Unlock(&h2->sess->mtx); - memset(iov, 0, sizeof iov); - iov[0].iov_base = (void*)hdr; - iov[0].iov_len = sizeof hdr; - if (len > 0) { - iov[1].iov_base = TRUST_ME(ptr); - iov[1].iov_len = len; - niov = 2; - } else - niov = 1; - - while (s != sizeof hdr + len) { - i = writev(h2->sess->fd, iov, niov); - if (i <= 0) - break; - VIOV_prune(iov, &niov, i); - s += i; - } + return (0); +} - if (s != sizeof hdr + len) { - if (errno == EWOULDBLOCK) { - H2S_Lock_VSLb(h2, SLT_SessError, - "H2: stream %u: Hit idle_send_timeout", stream); - } - else { - H2S_Lock_VSLb(h2, SLT_Debug, - "H2: stream %u: write error s=%zd/%zu errno=%d", - stream, s, sizeof hdr + len, errno); - } - /* - * There is no point in being nice here, we will be unable - * to send a GOAWAY once the code unrolls, so go directly - * to the finale and be done with it. - */ - h2->error = H2CE_PROTOCOL_ERROR; - } else if (len > 0) { - Lck_Lock(&h2->sess->mtx); - VSLb_bin(h2->vsl, SLT_H2TxBody, len, ptr); - Lck_Unlock(&h2->sess->mtx); - } +int +H2_Send_RST(struct h2_sess *h2, uint32_t stream, h2_error h2e) +{ + uint8_t buf[4]; + + vbe32enc(buf, h2e->val); + return (h2_send_small(h2, H2_F_RST_STREAM, 0, stream, + sizeof buf, buf)); } -static int64_t -h2_win_limit(const struct h2_req *r2, const struct h2_sess *h2) +int +H2_Send_SETTINGS(struct h2_sess *h2, uint8_t flags, ssize_t len, + const uint8_t *buf) { + if (flags & H2FF_ACK) + assert(len == 0); + return (h2_send_small(h2, H2_F_SETTINGS, flags, 0, len, buf)); +} - CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); - CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); - CHECK_OBJ_NOTNULL(h2->req0, H2_REQ_MAGIC); +int +H2_Send_PING(struct h2_sess *h2, uint8_t flags, uint64_t data) +{ + return (h2_send_small(h2, H2_F_PING, flags, 0, sizeof data, &data)); +} - Lck_AssertHeld(&h2->sess->mtx); - return (vmin_t(int64_t, r2->tx_window, h2->req0->tx_window)); +int +H2_Send_GOAWAY(struct h2_sess *h2, uint32_t last_stream_id, h2_error h2e) +{ + uint8_t buf[8]; + + vbe32enc(&buf[0], last_stream_id); + vbe32enc(&buf[4], h2e->val); + return (h2_send_small(h2, H2_F_GOAWAY, 0, 0, sizeof buf, buf)); } -static void -h2_win_charge(struct h2_req *r2, const struct h2_sess *h2, uint32_t w) +int +H2_Send_WINDOW_UPDATE(struct h2_sess *h2, uint32_t stream, uint32_t incr) { - CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); - CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); - CHECK_OBJ_NOTNULL(h2->req0, H2_REQ_MAGIC); + uint8_t buf[4]; - Lck_AssertHeld(&h2->sess->mtx); - r2->tx_window -= w; - h2->req0->tx_window -= w; + vbe32enc(&buf[0], incr); + return (h2_send_small(h2, H2_F_WINDOW_UPDATE, 0, stream, + sizeof buf, buf)); } -static int64_t -h2_do_window(struct worker *wrk, struct h2_req *r2, - struct h2_sess *h2, int64_t wanted) +struct h2_send_large { + unsigned magic; +#define H2_SEND_LARGE_MAGIC 0x478020e3 + + char last; + char started; + char returned; + + uint8_t flags; + h2_frame ftyp; + + VTAILQ_ENTRY(h2_send_large) list; + + pthread_cond_t cond; + + struct h2_req *r2; + + const void *ptr; + uint32_t len; + uint32_t count; +}; + +int +H2_Send(struct vsl_log *vsl, struct h2_req *r2, h2_frame ftyp, uint8_t flags, + uint32_t len, const void *ptr) { - int64_t w = 0; + struct h2_sess *h2; + struct h2_send_large large; + h2_error h2e; - CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); + h2 = r2->h2sess; CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); - if (wanted == 0) - return (0); + ASSERT_H2_REQ(h2); - Lck_Lock(&h2->sess->mtx); - if (r2->tx_window <= 0 || h2->req0->tx_window <= 0) { - r2->t_winupd = VTIM_real(); - h2_send_rel_locked(h2, r2); + assert(ftyp == H2_F_HEADERS || ftyp == H2_F_DATA); + AZ(flags & ~(ftyp->flags)); - assert(h2->winup_streams >= 0); - h2->winup_streams++; + h2e = h2_errcheck(r2); + if (h2e != NULL) { + VSLb(vsl, SLT_Error, "H2: send error (%s)", h2e->name); + return (-1); + } - while (r2->tx_window <= 0 && h2_errcheck(r2) == NULL) { - r2->cond = &wrk->cond; - (void)h2_cond_wait(r2->cond, h2, r2); - r2->cond = NULL; - } + assert(r2->state > H2_S_IDLE); + if (r2->state >= H2_S_CLOSED) { + VSLb(vsl, SLT_Error, "H2: send on closed stream"); + return (-1); + } - while (h2->req0->tx_window <= 0 && h2_errcheck(r2) == NULL) - (void)h2_cond_wait(h2->winupd_cond, h2, r2); + INIT_OBJ(&large, H2_SEND_LARGE_MAGIC); + PTOK(pthread_cond_init(&large.cond, NULL)); - if (h2_errcheck(r2) == NULL) { - w = vmin_t(int64_t, h2_win_limit(r2, h2), wanted); - h2_win_charge(r2, h2, w); - assert (w > 0); - } + large.ftyp = ftyp; + large.flags = flags; + large.r2 = r2; + large.ptr = ptr; + large.len = len; + + Lck_Lock(&h2->sess->mtx); - if (r2->error == H2SE_BROKE_WINDOW && - h2->open_streams <= h2->winup_streams) { - VSLb(h2->vsl, SLT_SessError, "H2: window bankrupt"); - h2->error = r2->error = H2CE_BANKRUPT; - } + VTAILQ_INSERT_TAIL(&h2->tx_l_queue, &large, list); + h2->tx_l_stuck = 0; + h2_attention(h2); - assert(h2->winup_streams > 0); - h2->winup_streams--; + AZ(Lck_CondWait(&large.cond, &h2->sess->mtx)); + AN(large.returned); /* Sanity check */ + /* Note: We will have been removed from the `h2->tx_l_queue` + * list by the signaller. */ - h2_send_get_locked(wrk, h2, r2); - } + h2e = h2_errcheck(r2); - if (w == 0 && h2_errcheck(r2) == NULL) { - assert(r2->tx_window > 0); - assert(h2->req0->tx_window > 0); - w = h2_win_limit(r2, h2); - if (w > wanted) - w = wanted; - h2_win_charge(r2, h2, w); - assert (w > 0); - } - r2->t_winupd = 0; Lck_Unlock(&h2->sess->mtx); - return (w); -} -/* - * This is the per-stream frame sender. - * XXX: priority - */ + PTOK(pthread_cond_destroy(&large.cond)); + large.magic = 0; + + if (h2e != NULL) { + VSLb(vsl, SLT_Error, "H2: send error (%s)", h2e->name); + return (-1); + } + + return (0); +} static void -h2_send(struct worker *wrk, struct h2_req *r2, h2_frame ftyp, uint8_t flags, - uint32_t len, const void *ptr, uint64_t *counter) +h2_send_prep_large(struct h2_sess *h2, struct h2_send_large *large) { - struct h2_sess *h2; - uint32_t mfs, tf; - const char *p; - uint8_t final_flags; + struct h2_req *r2; + uint8_t flags; + ssize_t l, limit; + h2_frame ftyp; - CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); - CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); - h2 = r2->h2sess; CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); - assert(len == 0 || ptr != NULL); - AN(counter); + AZ(h2->tx_l_current); - AN(H2_SEND_HELD(h2, r2)); + CHECK_OBJ_NOTNULL(large, H2_SEND_LARGE_MAGIC); + AN(large->ftyp); + r2 = large->r2; + CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); - if (h2_errcheck(r2) != NULL) - return; + assert(large->ftyp == H2_F_DATA || large->ftyp == H2_F_HEADERS || + large->ftyp == H2_F_PUSH_PROMISE); + AN(large->ftyp->continuation); + + l = large->len - large->count; + if (l > h2->remote_settings.max_frame_size) + l = h2->remote_settings.max_frame_size; + + if (large->ftyp->respect_window) { + limit = h2_win_limit(r2); + assert(limit > 0); + if (l > limit) + l = limit; + h2_win_charge(r2, l); + if (r2->t_win_low == 0. && r2->tx_window == 0) { + /* The send window is low. Set a timestamp to + * record when this happened, so that we can + * become emo if the window isn't extended + * promptly. */ + /* XXX: This mechanism would be more effective if + * we had some threshold (10% of initial window + * size or something. */ + r2->t_win_low = VTIM_real(); + h2->win_low_streams++; + } + } + assert(large->count + l <= large->len); - AN(ftyp); + ftyp = large->ftyp; + flags = large->flags; AZ(flags & ~(ftyp->flags)); - if (r2->stream == 0) - AZ(ftyp->act_szero); - else - AZ(ftyp->act_snonzero); - Lck_Lock(&h2->sess->mtx); - mfs = h2->remote_settings.max_frame_size; - if (r2->counted && ( - (ftyp == H2_F_HEADERS && (flags & H2FF_END_STREAM)) || - (ftyp == H2_F_DATA && (flags & H2FF_END_STREAM)) || - ftyp == H2_F_RST_STREAM - )) { - assert(h2->open_streams > 0); - h2->open_streams--; - r2->counted = 0; + if (large->count > 0) { + /* This is a continuation. Switch frame type and mask out + * the flags not defined on its continuation type. */ + ftyp = ftyp->continuation; + AN(ftyp); + flags &= ftyp->flags; + } + + if (large->count + l < large->len) { + /* We are breaking it up into smaller frames. Clear the + * last marker from the flags if present. */ + flags &= ~(ftyp->final_flags); } - Lck_Unlock(&h2->sess->mtx); - if (ftyp->respect_window) { - tf = h2_do_window(wrk, r2, h2, (len > mfs) ? mfs : len); - if (h2_errcheck(r2) != NULL) - return; - AN(H2_SEND_HELD(h2, r2)); - } else - tf = mfs; - - if (len <= tf) { - H2_Send_Frame(wrk, h2, ftyp, flags, len, r2->stream, ptr); - *counter += len; + h2_mk_hdr(h2->tx_l_hdrbuf, ftyp, flags, l, r2->stream); + h2_send_vsl(h2->vsl, h2->tx_l_hdrbuf, 9); + h2->tx_vec[0].iov_base = h2->tx_l_hdrbuf; + h2->tx_vec[0].iov_len = 9; + if (l == 0) { + /* Zero payload frame is valid. Will be used on + * "chunked encoding" and the end of stream is + * found. */ + h2->tx_nvec = 1; } else { - AN(ptr); - p = ptr; - final_flags = ftyp->final_flags & flags; - flags &= ~ftyp->final_flags; - do { - AN(ftyp->continuation); - if (!ftyp->respect_window) - tf = mfs; - if (ftyp->respect_window && p != ptr) { - tf = h2_do_window(wrk, r2, h2, - (len > mfs) ? mfs : len); - if (h2_errcheck(r2) != NULL) - return; - AN(H2_SEND_HELD(h2, r2)); + h2->tx_vec[1].iov_base = + TRUST_ME((uintptr_t)large->ptr + large->count); + h2->tx_vec[1].iov_len = l; + h2->tx_nvec = 2; + large->count += l; + } + h2->tx_l_current = large; + + /* Charge the session accounting for the protocol bytes */ + h2->srq->acct.resp_hdrbytes += 9; + if (ftyp->overhead) + h2->srq->acct.resp_bodybytes += l; + + /* Charge the request accounting for HEADERS and DATA frames */ + if (large->ftyp == H2_F_HEADERS) + r2->req->acct.resp_hdrbytes += l; + else if (large->ftyp == H2_F_DATA) + r2->req->acct.resp_bodybytes += l; +} + +ssize_t +H2_Send_TxStuff(struct h2_sess *h2) +{ + struct h2_send_large *large; + ssize_t l, ltot = 0; + int err = 0; + + ASSERT_H2_SESS(h2); + + CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); + + if (h2->tx_nvec == 0 && h2->tx_s_head != h2->tx_s_start) { + /* Prioritise sending the small frames */ + assert(h2->tx_s_start < h2->tx_s_head); + assert(h2->tx_s_head <= h2->tx_s_end); + assert(h2->tx_s_mark == h2->tx_s_start); + h2->tx_vec[0].iov_base = h2->tx_s_start; + h2->tx_vec[0].iov_len = h2->tx_s_head - h2->tx_s_start; + h2->tx_nvec = 1; + h2->tx_s_mark = h2->tx_s_head; + } else if (h2->tx_nvec == 0) { + /* Construct a large frame from the queue (if possible + * considering the current windows). If we ever implement + * priorities, this would be the place to take them into + * account. */ + Lck_Lock(&h2->sess->mtx); + + VTAILQ_FOREACH(large, &h2->tx_l_queue, list) { + CHECK_OBJ_NOTNULL(large, H2_SEND_LARGE_MAGIC); + CHECK_OBJ_NOTNULL(large->r2, H2_REQ_MAGIC); + assert(large->count <= large->len); + AN(large->ftyp); + + if (h2_errcheck(large->r2) != NULL) { + VTAILQ_REMOVE(&h2->tx_l_queue, large, list); + large->returned = 1; + PTOK(pthread_cond_signal(&large->cond)); + continue; + } + + if (!large->ftyp->respect_window) + break; + + if (h2->tx_window <= 0) { + /* If the session window is empty, none of + * the respect_window frame types can be + * selected. */ + continue; } - if (tf < len) { - H2_Send_Frame(wrk, h2, ftyp, - flags, tf, r2->stream, p); - } else { - if (ftyp->respect_window) - assert(tf == len); - tf = len; - H2_Send_Frame(wrk, h2, ftyp, final_flags, tf, - r2->stream, p); - flags = 0; + + if (large->r2->tx_window > 0) + break; + } + + if (large == NULL) { + /* Tx is unable to make progress until there has + * been a window update. */ + h2->tx_l_stuck = 1; + } else { + h2->tx_l_stuck = 0; + } + + Lck_Unlock(&h2->sess->mtx); + + if (large == NULL) + return (0); + + h2_send_prep_large(h2, large); + } + + assert(h2->tx_nvec > 0); + while (h2->tx_nvec > 0) { + l = writev(h2->sess->fd, h2->tx_vec, h2->tx_nvec); + if (l < 0) { + /* Save the value of errno. This is strictly not + * necessary as none of the calls between here and + * the return should update errno, but done for + * future proofing. */ + err = errno; + break; + } + + assert(l > 0); + VIOV_prune(h2->tx_vec, &h2->tx_nvec, l); + ltot += l; + } + + if (h2->tx_nvec == 0 && h2->tx_l_current != NULL) { + /* We have just finished sending a large frame. */ + assert(h2->tx_s_mark == h2->tx_s_start); + + TAKE_OBJ_NOTNULL(large, &h2->tx_l_current, H2_SEND_LARGE_MAGIC); + AZ(h2->tx_l_current); + + AN(large->ftyp); + + assert(large->count <= large->len); + if (large->count == large->len) { + if (large->r2->state < H2_S_CLOSED && + large->flags & H2FF_END_STREAM) { + large->r2->state = H2_S_CLOSED; + assert(h2->open_streams > 0); + h2->open_streams--; } - p += tf; - len -= tf; - *counter += tf; - ftyp = ftyp->continuation; - flags &= ftyp->flags; - final_flags &= ftyp->flags; - } while (h2->error == NULL && len > 0); + + /* Signal that we are finished */ + Lck_Lock(&h2->sess->mtx); + VTAILQ_REMOVE(&h2->tx_l_queue, large, list); + PTOK(pthread_cond_signal(&large->cond)); + large->returned = 1; + Lck_Unlock(&h2->sess->mtx); + } else if (large->ftyp == H2_F_HEADERS || + large->ftyp == H2_F_PUSH_PROMISE) { + /* A CONTINUATION frame must come immediately + * after the previous + * HEADER|PUSH_PROMISE|CONTINUATION frame. Prepare + * the `large` again, which will force that to be + * the next output. */ + h2_send_prep_large(h2, large); + assert(large == h2->tx_l_current); + assert(h2->tx_nvec > 0); + } + } else if (h2->tx_nvec == 0) { + /* We have just finished sending the small buffer */ + assert(h2->tx_s_start < h2->tx_s_mark); + assert(h2->tx_s_mark <= h2->tx_s_head); + assert(h2->tx_s_head <= h2->tx_s_end); + memmove(h2->tx_s_start, h2->tx_s_mark, + h2->tx_s_head - h2->tx_s_mark); + h2->tx_s_head -= h2->tx_s_mark - h2->tx_s_start; + h2->tx_s_mark = h2->tx_s_start; } + + if (ltot > 0) + return (ltot); + + errno = err; + return (-1); } -void -H2_Send_RST(struct worker *wrk, struct h2_sess *h2, const struct h2_req *r2, - uint32_t stream, h2_error h2e) +int +H2_Send_Something(struct h2_sess *h2) { - char b[4]; + ssize_t l; + vtim_real now; + struct pollfd pfd[1]; - CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); - CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); - AN(H2_SEND_HELD(h2, r2)); - AN(h2e); + /* Block up until h2->deadline and then send something. */ - H2S_Lock_VSLb(h2, SLT_Debug, "H2: stream %u: %s", stream, h2e->txt); - vbe32enc(b, h2e->val); + CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); + ASSERT_H2_SESS(h2); + assert(h2->sess->fd >= 0); + pfd->fd = h2->sess->fd; + pfd->events = POLLOUT; + + do { + now = VTIM_real(); + if (now > h2->deadline) + goto error; + l = poll(pfd, 1, VTIM_poll_tmo(h2->deadline - now)); + } while (l < 0 && errno == EINTR); + + if (l == 0 || !(pfd->revents & POLLOUT)) + goto error; + + l = H2_Send_TxStuff(h2); + if (l < 0 && errno != EWOULDBLOCK) + goto error; + + return (0); + +error: + /* Failure to send on the socket (IO error or timeout). */ + if (h2->error == NULL) + h2->error = H2CE_IO_ERROR; + return (-1); +} - H2_Send_Frame(wrk, h2, H2_F_RST_STREAM, 0, sizeof b, stream, b); +int +H2_Send_Pending(struct h2_sess *h2) +{ + CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); + ASSERT_H2_SESS(h2); + + if (h2->tx_nvec > 0) + return (1); + if (h2->tx_s_head != h2->tx_s_start) + return (1); + if (!VTAILQ_EMPTY(&h2->tx_l_queue) && !h2->tx_l_stuck) + return (1); + return (0); } void -H2_Send(struct worker *wrk, struct h2_req *r2, h2_frame ftyp, uint8_t flags, - uint32_t len, const void *ptr, uint64_t *counter) +H2_Send_Shutdown(struct h2_sess *h2) { - uint64_t dummy_counter = 0; - h2_error h2e; + struct h2_send_large *large, *large2; - if (counter == NULL) - counter = &dummy_counter; - - h2_send(wrk, r2, ftyp, flags, len, ptr, counter); + CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); + ASSERT_H2_SESS(h2); - h2e = h2_errcheck(r2); - if (H2_ERROR_MATCH(h2e, H2SE_CANCEL)) - H2_Send_RST(wrk, r2->h2sess, r2, r2->stream, h2e); + Lck_Lock(&h2->sess->mtx); + AN(h2->error); + VTAILQ_FOREACH_SAFE(large, &h2->tx_l_queue, list, large2) { + CHECK_OBJ_NOTNULL(large, H2_SEND_LARGE_MAGIC); + VTAILQ_REMOVE(&h2->tx_l_queue, large, list); + large->returned = 1; + PTOK(pthread_cond_signal(&large->cond)); + } + Lck_Unlock(&h2->sess->mtx); } diff --git a/bin/varnishd/http2/cache_http2_session.c b/bin/varnishd/http2/cache_http2_session.c index 5208c6e2cb..2844479f65 100644 --- a/bin/varnishd/http2/cache_http2_session.c +++ b/bin/varnishd/http2/cache_http2_session.c @@ -31,6 +31,7 @@ #include "config.h" +#include #include #include "cache/cache_varnishd.h" @@ -87,30 +88,6 @@ h2_local_settings(struct h2_settings *h2s) h2s->max_header_list_size = cache_param->http_req_size; } -void -H2S_Lock_VSLb(const struct h2_sess *h2, enum VSL_tag_e tag, const char *fmt, ...) -{ - va_list ap; - int held = 0; - - AN(h2); - - if (VSL_tag_is_masked(tag)) - return; - - if (h2->highest_stream > 0) { - held = 1; - Lck_Lock(&h2->sess->mtx); - } - - va_start(ap, fmt); - VSLbv(h2->vsl, tag, fmt, ap); - va_end(ap); - - if (held) - Lck_Unlock(&h2->sess->mtx); -} - /********************************************************************** * The h2_sess struct needs many of the same things as a request, * WS, VSL, HTC &c, but rather than implement all that stuff over, we @@ -145,14 +122,15 @@ h2_init_sess(struct sess *sp, struct h2_sess *h2s, struct req **psrq, h2->htc->rfd = &sp->fd; h2->sess = sp; h2->rxthr = pthread_self(); - PTOK(pthread_cond_init(h2->winupd_cond, NULL)); VTAILQ_INIT(&h2->streams); - VTAILQ_INIT(&h2->txqueue); h2_local_settings(&h2->local_settings); h2->remote_settings = H2_proto_settings; h2->decode = decode; VEFD_INIT(h2->efd); + h2->tx_window = h2->remote_settings.initial_window_size; + h2->rx_window = h2->local_settings.initial_window_size; + h2->rapid_reset = cache_param->h2_rapid_reset; h2->rapid_reset_limit = cache_param->h2_rapid_reset_limit; h2->rapid_reset_period = cache_param->h2_rapid_reset_period; @@ -163,6 +141,19 @@ h2_init_sess(struct sess *sp, struct h2_sess *h2s, struct req **psrq, AZ(VHT_Init(h2->dectbl, h2->local_settings.header_table_size)); + /* Allocate a scratch space to use for staging small outgoing + * frames. */ + h2->tx_s_start = WS_Alloc(h2->ws, H2_TX_BUFSIZE); + AN(h2->tx_s_start); + h2->tx_s_end = h2->tx_s_start + H2_TX_BUFSIZE; + h2->tx_s_head = h2->tx_s_start; + h2->tx_s_mark = h2->tx_s_start; + + /* Init send queue */ + VTAILQ_INIT(&h2->tx_l_queue); + + h2->htc->pipeline_snap = WS_Snapshot(h2->ws); + *up = (uintptr_t)h2; return (h2); @@ -180,7 +171,6 @@ h2_del_sess(struct worker *wrk, struct h2_sess *h2, stream_close_t reason) AN(reason); VHT_Fini(h2->dectbl); - PTOK(pthread_cond_destroy(h2->winupd_cond)); if (h2->efd->poll_fd >= 0) VEFD_Close(h2->efd); TAKE_OBJ_NOTNULL(req, &h2->srq, REQ_MAGIC); @@ -374,12 +364,11 @@ h2_new_session(struct worker *wrk, void *arg) struct sess *sp; struct h2_sess h2s; struct h2_sess *h2; - struct h2_req *r2, *r22; struct h2_req *r2_ou = NULL; - int again; uint16_t marker; uint8_t settings[48]; struct h2h_decode decode; + stream_close_t reason; size_t l; CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); @@ -412,7 +401,8 @@ h2_new_session(struct worker *wrk, void *arg) h2 = h2_init_sess(sp, &h2s, &srq, &decode); AZ(srq); - h2->req0 = h2_new_req(h2, 0, NULL); + + CHECK_OBJ_NOTNULL(h2->htc, HTTP_CONN_MAGIC); AZ(h2->htc->priv); h2->htc->priv = h2; @@ -420,8 +410,6 @@ h2_new_session(struct worker *wrk, void *arg) * threads. */ if (VEFD_Open(h2->efd) < 0) { VSLb(h2->vsl, SLT_Error, "H2: Failed to create eventfd"); - assert(h2->refcnt == 1); - h2_del_req(wrk, &h2->req0); h2_del_sess(wrk, h2, SC_OVERLOAD); wrk->vsl = NULL; return; @@ -439,8 +427,6 @@ h2_new_session(struct worker *wrk, void *arg) r2_ou = h2_ou_session(wrk, h2, &req); AZ(req); if (r2_ou == NULL) { - assert(h2->refcnt == 1); - h2_del_req(wrk, &h2->req0); h2_del_sess(wrk, h2, SC_RX_JUNK); wrk->vsl = NULL; return; @@ -448,30 +434,24 @@ h2_new_session(struct worker *wrk, void *arg) CHECK_OBJ_NOTNULL(r2_ou, H2_REQ_MAGIC); AZ(r2_ou->scheduled); - } + } else + VSLb(h2->vsl, SLT_Debug, "H2: Got pu PRISM"); assert(HTC_S_COMPLETE == H2_prism_complete(h2->htc)); + + /* Initialize the workspace rx buffer. Some read overshoot data + * may be present as pipeline data. This sequence of calls + * basically just resets the WS, memmove()s the pipeline data + * first, and sets htc->rxbuf_[be] to the pipeline data. */ HTC_RxPipeline(h2->htc, h2->htc->rxbuf_b + sizeof(H2_prism)); HTC_RxInit(h2->htc, h2->ws); - AN(WS_Reservation(h2->ws)); - VSLb(h2->vsl, SLT_Debug, "H2: Got pu PRISM"); + WS_ReleaseP(h2->htc->ws, h2->htc->rxbuf_e); THR_SetRequest(h2->srq); - AN(WS_Reservation(h2->ws)); /* Send our settings */ l = h2_enc_settings(&h2->local_settings, settings, sizeof (settings)); - AN(WS_Reservation(h2->ws)); - H2_Send_Get(wrk, h2, h2->req0); - AN(WS_Reservation(h2->ws)); - H2_Send_Frame(wrk, h2, - H2_F_SETTINGS, H2FF_NONE, l, 0, settings); - AN(WS_Reservation(h2->ws)); - H2_Send_Rel(h2, h2->req0); - AN(WS_Reservation(h2->ws)); - - /* and off we go... */ - h2->cond = &wrk->cond; + H2_Send_SETTINGS(h2, H2FF_NONE, l, settings); if (r2_ou != NULL) { /* Schedule the opportunistic request received over HTTP/1 @@ -488,57 +468,23 @@ h2_new_session(struct worker *wrk, void *arg) * socket. */ r2_ou->scheduled = 0; VSLb(h2->vsl, SLT_Debug, "H2: No Worker-threads"); - h2_kill_req(wrk, h2, r2_ou, H2SE_ENHANCE_YOUR_CALM); + h2_kill_req(wrk, h2, &r2_ou, H2SE_ENHANCE_YOUR_CALM); h2->error = H2CE_ENHANCE_YOUR_CALM; - h2_tx_goaway(wrk, h2, h2->error); } r2_ou = NULL; } - while (h2_rxframe(wrk, h2)) { - HTC_RxInit(h2->htc, h2->ws); - if (WS_Overflowed(h2->ws)) { - H2S_Lock_VSLb(h2, SLT_SessError, "H2: Empty Rx Workspace"); - h2->error = H2CE_INTERNAL_ERROR; - break; - } - AN(WS_Reservation(h2->ws)); - } + /* and off we go... */ + h2_run(wrk, h2); AN(h2->error); - - /* Delete all idle streams */ - Lck_Lock(&h2->sess->mtx); - VSLb(h2->vsl, SLT_Debug, "H2 CLEANUP %s", h2->error->name); - VTAILQ_FOREACH(r2, &h2->streams, list) { - if (r2->error == 0) - r2->error = h2->error; - if (r2->cond != NULL) - PTOK(pthread_cond_signal(r2->cond)); - } - PTOK(pthread_cond_broadcast(h2->winupd_cond)); - Lck_Unlock(&h2->sess->mtx); - while (1) { - again = 0; - VTAILQ_FOREACH_SAFE(r2, &h2->streams, list, r22) { - if (r2 != h2->req0) { - h2_kill_req(wrk, h2, r2, h2->error); - again++; - } - } - if (!again) - break; - Lck_Lock(&h2->sess->mtx); - VTAILQ_FOREACH(r2, &h2->streams, list) - VSLb(h2->vsl, SLT_Debug, "ST %u %d", - r2->stream, r2->state); - (void)Lck_CondWaitTimeout(h2->cond, &h2->sess->mtx, .1); - Lck_Unlock(&h2->sess->mtx); + reason = h2->error->reason; + if (reason == SC_NULL) { + /* XXX: It's messy that some h2_errors have reasosn + * SC_NULL, which is just WRONG() wrt to SES_Delete(). */ + reason = SC_REM_CLOSE; } - h2->cond = NULL; - assert(h2->refcnt == 1); - h2_del_req(wrk, &h2->req0); - h2_del_sess(wrk, h2, h2->error->reason); + h2_del_sess(wrk, h2, reason); wrk->vsl = NULL; } diff --git a/bin/varnishtest/tests/f00007.vtc b/bin/varnishtest/tests/f00007.vtc index e982548a03..7976362def 100644 --- a/bin/varnishtest/tests/f00007.vtc +++ b/bin/varnishtest/tests/f00007.vtc @@ -62,6 +62,7 @@ client c3 { stream 1 { txreq -req POST -url /3 -hdr "content-length" "1" -nostrend txdata -data "A" -nostrend + rxwinup delay 0.5 txdata -data "GET /FAIL HTTP/1.1\r\n\r\n" rxrst diff --git a/bin/varnishtest/tests/r02679.vtc b/bin/varnishtest/tests/r02679.vtc index 590dfb264c..d3707151d7 100644 --- a/bin/varnishtest/tests/r02679.vtc +++ b/bin/varnishtest/tests/r02679.vtc @@ -22,7 +22,9 @@ client c1 { stream 1 { txreq -req POST -hdr "content-length" "31469" -nostrend txdata -datalen 1550 -nostrend + rxwinup txdata -datalen 16000 -nostrend + rxwinup txdata -datalen 13919 rxresp expect resp.status == 200 diff --git a/bin/varnishtest/tests/r02937.vtc b/bin/varnishtest/tests/r02937.vtc index eed3b2b34e..11dbf32330 100644 --- a/bin/varnishtest/tests/r02937.vtc +++ b/bin/varnishtest/tests/r02937.vtc @@ -27,7 +27,14 @@ client c1 { rxgoaway expect goaway.err == ENHANCE_YOUR_CALM expect goaway.laststream == 1 - } -run + } -start + + stream 1 { + rxrst + } -start + + stream 0 -wait + stream 1 -wait expect_close } -run diff --git a/bin/varnishtest/tests/t02003.vtc b/bin/varnishtest/tests/t02003.vtc index fe30e82437..fab076273b 100644 --- a/bin/varnishtest/tests/t02003.vtc +++ b/bin/varnishtest/tests/t02003.vtc @@ -36,7 +36,7 @@ varnish v1 -expect MEMPOOL.sess1.live == 0 ####################################################################### # Test reverse order stream numbers -client c1 { +client c2 { stream 0 { rxgoaway expect goaway.laststream == 3 @@ -61,7 +61,7 @@ varnish v1 -expect MEMPOOL.sess1.live == 0 ####################################################################### # Test WINDOW_UPDATE error conditions -client c1 { +client c3 { stream 1 { txreq -nostrend txwinup -size 0 @@ -92,7 +92,7 @@ client c1 { } -run stream 0 -wait } -run -client c1 { +client c4 { stream 0 { txwinup -size 0x40000000 txwinup -size 0x40000000 @@ -102,7 +102,7 @@ client c1 { } -run } -run -client c1 { +client c5 { stream 1 { txreq rxresp @@ -123,7 +123,7 @@ varnish v1 -expect MEMPOOL.sess1.live == 0 ####################################################################### # Test PING error conditions -client c1 { +client c6 { stream 0 { txping -ack -data "FOOBAR42" rxgoaway @@ -132,7 +132,7 @@ client c1 { } -run } -run -client c1 { +client c7 { stream 0 { sendhex "000008 06 80 00000001 0102030405060708" rxgoaway @@ -141,7 +141,7 @@ client c1 { } -run } -run -client c1 { +client c8 { stream 0 { sendhex "000007 06 80 00000000 01020304050607" rxgoaway @@ -160,7 +160,7 @@ varnish v1 -expect MEMPOOL.sess1.live == 0 ####################################################################### # Test PUSH_PROMISE error conditions -client c1 { +client c9 { stream 0 { rxgoaway expect goaway.err == PROTOCOL_ERROR @@ -173,7 +173,7 @@ client c1 { stream 0 -wait } -run -client c1 { +client c10 { stream 0 { rxgoaway expect goaway.err == PROTOCOL_ERROR @@ -198,7 +198,7 @@ varnish v1 -expect MEMPOOL.sess1.live == 0 ####################################################################### # Test RST_STREAM error conditions -client c1 { +client c11 { stream 0 { # RST idle stream sendhex "000004 03 00 00000007 00000008" @@ -208,7 +208,7 @@ client c1 { } -run } -run -client c1 { +client c12 { stream 0 { rxgoaway expect goaway.err == FRAME_SIZE_ERROR @@ -222,7 +222,7 @@ client c1 { stream 0 -wait } -run -client c1 { +client c13 { stream 0 { # RST stream zero sendhex "000000 03 00 00000000 00000008" @@ -232,8 +232,11 @@ client c1 { } -run } -run -client c1 { +barrier b14 cond 2 +client c14 { stream 0 { + barrier b14 sync + txgoaway rxgoaway expect goaway.err == NO_ERROR expect goaway.laststream == 3 @@ -246,11 +249,15 @@ client c1 { txreq -nostrend txrst -err 0x666 } -run + barrier b14 sync stream 0 -wait } -run -client c1 { +barrier b15 cond 2 +client c15 { stream 0 { + barrier b15 sync + txgoaway rxgoaway expect goaway.err == NO_ERROR expect goaway.laststream == 1 @@ -263,6 +270,7 @@ client c1 { # RST_STREAM on closed stream txrst } -run + barrier b15 sync stream 0 -wait } -run @@ -277,7 +285,7 @@ varnish v1 -expect MEMPOOL.sess1.live == 0 ####################################################################### # Test SETTING error conditions -client c1 { +client c16 { stream 0 { # SETTING ACK with data sendhex "000001 04 01 00000000 aa" @@ -287,7 +295,7 @@ client c1 { } -run } -run -client c1 { +client c17 { stream 0 { # SETTING ACK with bad length sendhex "000001 04 00 00000000 aa" @@ -296,7 +304,7 @@ client c1 { expect goaway.laststream == 0 } -run } -run -client c1 { +client c18 { stream 0 { # SETTING ACK with bad value txsettings -winsize 0x80000000 @@ -306,7 +314,7 @@ client c1 { } -run } -run -client c1 { +client c19 { stream 0 { # SETTING unknown value sendhex "000006 04 00 00000000 ffff00000000" @@ -326,16 +334,19 @@ varnish v1 -expect MEMPOOL.sess1.live == 0 ####################################################################### # Test GOAWAY error conditions -client c1 { +client c20 { stream 0 { txgoaway -err 2 + rxgoaway + expect goaway.err == NO_ERROR } -run expect_close } -run -client c1 { +client c21 { stream 0 { txgoaway -err 2222 + rxgoaway } -run expect_close } -run @@ -351,7 +362,7 @@ varnish v1 -expect MEMPOOL.sess1.live == 0 ####################################################################### # Test HEADERS error conditions -client c1 { +client c22 { stream 1 { txreq -nostrend txreq -nostrend @@ -362,7 +373,7 @@ client c1 { expect_close } -run -client c1 { +client c23 { stream 0 { sendhex 00000c sendhex 01 @@ -376,7 +387,7 @@ client c1 { } -run } -run -client c1 { +client c24 { stream 0 { sendhex 000012 sendhex 01 @@ -388,7 +399,7 @@ client c1 { } -run } -run -client c1 { +client c25 { stream 1 { txreq -hdr ":bla" "foo" rxrst @@ -398,7 +409,7 @@ client c1 { #2349: Padding exceeds frame size -client c1 { +client c26 { stream 1 { sendhex 000001 sendhex 01 @@ -415,7 +426,7 @@ client c1 { } -run #2349: Padding equal to frame size -client c1 { +client c27 { stream 1 { sendhex 000001 sendhex 01 @@ -432,7 +443,7 @@ client c1 { } -run #2349: Integer underrun may also occur when the priority flag is set -client c1 { +client c28 { stream 1 { sendhex 000004 sendhex 01 @@ -458,7 +469,7 @@ varnish v1 -expect MEMPOOL.sess1.live == 0 ####################################################################### # Test CONTINUATION error conditions -client c1 { +client c29 { stream 1 { txreq -nostrend txcont -hdr "bar" "foo" @@ -469,7 +480,7 @@ client c1 { expect_close } -run -client c1 { +client c30 { stream 0 { sendhex 000014 sendhex 01 @@ -489,7 +500,7 @@ client c1 { } -run } -run -client c1 { +client c31 { stream 1 { txreq -nohdrend txcont -hdr "bar" "foo" @@ -499,7 +510,7 @@ client c1 { } -run # 2350: Don't accept a continuation frame after stream is closed -client c1 { +client c32 { stream 1 { txreq rxresp @@ -522,25 +533,26 @@ varnish v1 -expect MEMPOOL.sess1.live == 0 ####################################################################### # Test DATA error conditions -client c1 { +client c33 { stream 1 { txdata -data "FOOBAR" } -run stream 0 { rxgoaway + expect goaway.err == PROTOCOL_ERROR } -run expect_close } -run -client c1 { +client c34 { stream 1 { txreq rxresp txdata -data "FOOBAR" } -run - stream 3 { - txreq - rxresp + stream 0 { + rxgoaway + expect goaway.err == PROTOCOL_ERROR } -run } -run diff --git a/bin/varnishtest/tests/t02005.vtc b/bin/varnishtest/tests/t02005.vtc index 39737f93a6..10f54ded8d 100644 --- a/bin/varnishtest/tests/t02005.vtc +++ b/bin/varnishtest/tests/t02005.vtc @@ -31,7 +31,7 @@ varnish v1 -vcl+backend { varnish v1 -cliok "param.set debug +syncvsl" logexpect l1 -v v1 -g raw { - expect * 1001 ReqAcct "80 7 87 78 8 86" + expect * 1001 ReqAcct "160 7 167 78 16 94" expect * 1000 ReqAcct "45 8 53 63 34 97" } -start diff --git a/bin/varnishtest/tests/t02008.vtc b/bin/varnishtest/tests/t02008.vtc index 75cee513fa..4a7f096d78 100644 --- a/bin/varnishtest/tests/t02008.vtc +++ b/bin/varnishtest/tests/t02008.vtc @@ -28,6 +28,8 @@ client c1 { } -run stream 0 { txgoaway -err 2 + rxgoaway + expect goaway.err == NO_ERROR } -run expect_close } -run diff --git a/bin/varnishtest/tests/t02015.vtc b/bin/varnishtest/tests/t02015.vtc index 6e59dc7abc..f0241178a3 100644 --- a/bin/varnishtest/tests/t02015.vtc +++ b/bin/varnishtest/tests/t02015.vtc @@ -14,8 +14,8 @@ varnish v1 -vcl+backend { } -start logexpect l1 -v v1 -g raw -q ReqAcct { - expect ? 1001 ReqAcct "46 0 46 69 12345 12414" - expect ? 1003 ReqAcct "46 0 46 74 1000 1074" + expect ? 1001 ReqAcct "92 0 92 69 24690 24759" + expect ? 1003 ReqAcct "92 0 92 74 13345 13419" } -start client c1 { diff --git a/bin/varnishtest/tests/t02016.vtc b/bin/varnishtest/tests/t02016.vtc index 1e5a7dc8ae..15ccd58d83 100644 --- a/bin/varnishtest/tests/t02016.vtc +++ b/bin/varnishtest/tests/t02016.vtc @@ -6,6 +6,8 @@ server s1 { } -start varnish v1 -cliok "param.set feature +http2" +varnish v1 -cliok "param.set debug +syncvsl" +varnish v1 -cliok "param.set timeout_idle 10" varnish v1 -vcl+backend { sub vcl_recv { if (req.url ~ "synth") { diff --git a/bin/varnishtest/tests/t02020.vtc b/bin/varnishtest/tests/t02020.vtc index e2bcb76f43..e12a5c18e5 100644 --- a/bin/varnishtest/tests/t02020.vtc +++ b/bin/varnishtest/tests/t02020.vtc @@ -1,6 +1,6 @@ varnishtest "H/2 received data frames with padding" -barrier b1 sock 3 +barrier b1 sock 2 server s1 { rxreq @@ -8,6 +8,7 @@ server s1 { expect req.body == abcde txresp rxreq + expect req.bodylen == 81500 txresp rxreq txresp @@ -48,31 +49,20 @@ client c2 { # by unblocking the client thread stuck in vcl_recv. From that # point on window updates will also be sent on the stream. - stream 0 { - rxwinup - rxwinup - rxwinup - rxwinup - barrier b1 sync - } -start - stream 3 { - txreq -req POST -url /3 -hdr "content-length" "131072" -nostrend - txdata -datalen 16300 -padlen 83 -nostrend - txdata -datalen 16300 -padlen 83 -nostrend - txdata -datalen 16300 -padlen 83 -nostrend + txreq -req POST -url /3 -hdr "content-length" "81500" -nostrend + loop 3 { + txdata -datalen 16300 -padlen 83 -nostrend + rxwinup + expect winup.size == 84 + } txdata -datalen 16300 -padlen 82 -nostrend - barrier b1 sync - rxwinup - txdata -datalen 16300 -padlen 83 -nostrend rxwinup - txdata -datalen 16300 -padlen 83 -nostrend - rxwinup - txdata -datalen 16300 -padlen 83 -nostrend - rxwinup - txdata -datalen 16300 -padlen 83 -nostrend + expect winup.size == 83 + barrier b1 sync rxwinup - txdata -datalen 672 + expect winup.size == 65200 + txdata -datalen 16300 -padlen 83 rxresp expect resp.status == 200 } -start diff --git a/bin/varnishtest/tests/t02027.vtc b/bin/varnishtest/tests/t02027.vtc index a27976f3b4..835913b3b4 100644 --- a/bin/varnishtest/tests/t02027.vtc +++ b/bin/varnishtest/tests/t02027.vtc @@ -10,8 +10,7 @@ varnish v1 -arg "-p feature=+http2" -arg "-p debug=+syncvsl" -vcl { logexpect l0 -v v1 -g vxid -q "Begin ~ sess" { fail add * SessError expect * * Debug {^H2: Got pu PRISM} - expect 0 = Debug {^H2: HTC eof.*frame=complete goaway=0} - expect 0 = Debug {^H2 CLEANUP H2CE_NO_ERROR} + expect 0 = Debug {^H2: HTC eof .* frame=complete} expect 0 = ReqAcct {^0 0 0 18 26 44} expect 0 = SessClose {^REM_CLOSE} expect 0 = End @@ -32,8 +31,7 @@ client c0 { logexpect l1 -v v1 -g vxid -q "Begin ~ sess" { fail add * SessError - expect * * Debug {^H2: HTC eof.*frame=complete goaway=0} - expect 0 = Debug {^H2 CLEANUP H2CE_NO_ERROR} + expect * * Debug {^H2: HTC eof .* frame=complete} expect 9 = ReqAcct {^27 0 27 27 26 53} expect 0 = SessClose {^REM_CLOSE} expect 0 = End @@ -55,8 +53,7 @@ client c1 { logexpect l2 -v v1 -g vxid -q "Begin ~ sess" { fail add * SessError - expect * * Debug {^H2: HTC eof.*frame=complete goaway=0} - expect 0 = Debug {^H2 CLEANUP H2CE_NO_ERROR} + expect * * Debug {^H2: HTC eof .* frame=complete} expect 9 = ReqAcct {^27 0 27 27 26 53} expect 0 = SessClose {^REM_CLOSE} expect 0 = End @@ -78,8 +75,7 @@ client c2 { logexpect l3 -v v1 -g vxid -q "Begin ~ sess" { fail add * SessError - expect * * Debug {^H2: HTC eof.*frame=partial goaway=0} - expect 0 = Debug {^H2 CLEANUP H2CE_NO_ERROR} + expect * * Debug {^H2: HTC eof .* frame=partial} expect 0 = ReqAcct {^18 0 18 27 26 53} expect 0 = SessClose {^REM_CLOSE} expect 0 = End diff --git a/include/tbl/h2_error.h b/include/tbl/h2_error.h index adfbbde422..ceffc41904 100644 --- a/include/tbl/h2_error.h +++ b/include/tbl/h2_error.h @@ -206,6 +206,24 @@ H2_ERROR( /* reason */ SC_NULL, /* descr */ "HTTP/2 header list exceeded http_req_size" ) + +H2_ERROR( + /* name */ SEND_TIMEOUT, + /* val */ 8, /* CANCEL */ + /* types */ 2, + /* goaway */ 0, + /* reason */ SC_NULL, + /* descr */ "send timeout" +) + +H2_ERROR( + /* name */ IO_ERROR, + /* val */ 0, + /* types */ 1, + /* goaway */ 1, + /* reason */ SC_REM_CLOSE, + /* descr */ "socket error" +) # undef H2_CUSTOM_ERRORS #endif From fffe00d3463e80723d4741a3ac581630263765a6 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Wed, 16 Apr 2025 12:03:25 +0200 Subject: [PATCH 30/39] http2_proto: Enforce preface SETTINGS frame We have been failing to enforce a MUST requirement of the H2 spec that states that a PRISM MUST be followed by a SETTINGS frame (which may be empty). Note that this also goes for H1->H2 upgrades, even though the settings were also transferred as an H1 header. Relevant RFC references: rfc7540,l,579,637 rfc7540,l,482,485 This change comes with a need to adjust several of our test cases that failed to adhere to this requirement. The test case changes mostly consist of not doing the preface manually, allowing varnishtest to do the needful. --- bin/varnishd/http2/cache_http2.h | 2 ++ bin/varnishd/http2/cache_http2_proto.c | 19 ++++++++++++++++--- bin/varnishd/http2/cache_http2_session.c | 1 + bin/varnishtest/tests/r02923.vtc | 3 --- bin/varnishtest/tests/t02011.vtc | 3 --- bin/varnishtest/tests/t02015.vtc | 5 ----- bin/varnishtest/tests/t02016.vtc | 15 --------------- bin/varnishtest/tests/t02028.vtc | 17 +++++++++++++++++ 8 files changed, 36 insertions(+), 29 deletions(-) create mode 100644 bin/varnishtest/tests/t02028.vtc diff --git a/bin/varnishd/http2/cache_http2.h b/bin/varnishd/http2/cache_http2.h index dc4921566a..4eb00e13ed 100644 --- a/bin/varnishd/http2/cache_http2.h +++ b/bin/varnishd/http2/cache_http2.h @@ -166,6 +166,8 @@ struct h2_sess { unsigned magic; #define H2_SESS_MAGIC 0xa16f7e4b + unsigned expect_settings_next; + pthread_t rxthr; struct sess *sess; diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index 80ae5b143e..a0d5b566c8 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -978,6 +978,22 @@ h2_procframe(struct worker *wrk, struct h2_sess *h2, h2_frame h2f) goto exit; } + if (h2->expect_settings_next) { + if (h2f != H2_F_SETTINGS || (h2->rxf_flags & H2FF_ACK)) { + // rfc7540,l,579,637 + // rfc7540,l,482,485 + VSLb(h2->vsl, SLT_Error, + "H2: unexpected %s%s frame on stream %d," + " expected preface settings", + h2f->name, + h2->rxf_flags & H2FF_ACK ? "(ACK)" : "", + h2->rxf_stream); + h2e = H2CE_PROTOCOL_ERROR; + goto exit; + } + h2->expect_settings_next = 0; + } + if (h2->rxf_stream != 0 && !(h2->rxf_stream & 1)) { // rfc7540,l,1140,1145 // rfc7540,l,1153,1158 @@ -1226,9 +1242,6 @@ h2_rxframe(struct worker *wrk, struct h2_sess *h2) h2->htc->rxbuf_b += h2->rxf_len + 9; assert(h2->htc->rxbuf_b <= h2->htc->rxbuf_e); - /* XXX: Apply connection preface SETTINGS expectation as first - * frame, protocol error on anything else.. */ - if (h2->rxf_type >= H2FMAX) { // rfc7540,l,679,681 h2->bogosity++; diff --git a/bin/varnishd/http2/cache_http2_session.c b/bin/varnishd/http2/cache_http2_session.c index 2844479f65..abc7751dbb 100644 --- a/bin/varnishd/http2/cache_http2_session.c +++ b/bin/varnishd/http2/cache_http2_session.c @@ -126,6 +126,7 @@ h2_init_sess(struct sess *sp, struct h2_sess *h2s, struct req **psrq, h2_local_settings(&h2->local_settings); h2->remote_settings = H2_proto_settings; h2->decode = decode; + h2->expect_settings_next = 1; VEFD_INIT(h2->efd); h2->tx_window = h2->remote_settings.initial_window_size; diff --git a/bin/varnishtest/tests/r02923.vtc b/bin/varnishtest/tests/r02923.vtc index 324f20cff6..537812eb22 100644 --- a/bin/varnishtest/tests/r02923.vtc +++ b/bin/varnishtest/tests/r02923.vtc @@ -34,9 +34,6 @@ varnish v1 -vcl+backend { } -start client c1 { - txpri - stream 0 rxsettings -run - stream 1 { txreq -url /sync rxresp diff --git a/bin/varnishtest/tests/t02011.vtc b/bin/varnishtest/tests/t02011.vtc index 88c64d9045..0d25eb6f2c 100644 --- a/bin/varnishtest/tests/t02011.vtc +++ b/bin/varnishtest/tests/t02011.vtc @@ -43,9 +43,6 @@ varnish v1 -vcl+backend { } -start client c1 { - txpri - stream 0 rxsettings -run - stream 1 { txreq -hdr should sync barrier b1 sync diff --git a/bin/varnishtest/tests/t02015.vtc b/bin/varnishtest/tests/t02015.vtc index f0241178a3..860e79da38 100644 --- a/bin/varnishtest/tests/t02015.vtc +++ b/bin/varnishtest/tests/t02015.vtc @@ -19,12 +19,7 @@ logexpect l1 -v v1 -g raw -q ReqAcct { } -start client c1 { - txpri - stream 0 { - rxsettings - expect settings.ack == false - txsettings -ack txsettings -winsize 1000 rxsettings expect settings.ack == true diff --git a/bin/varnishtest/tests/t02016.vtc b/bin/varnishtest/tests/t02016.vtc index 15ccd58d83..7e8dd5a094 100644 --- a/bin/varnishtest/tests/t02016.vtc +++ b/bin/varnishtest/tests/t02016.vtc @@ -25,12 +25,7 @@ logexpect l1 -v v1 { } -start client c1 { - txpri - stream 0 { - rxsettings - expect settings.ack == false - txsettings -ack txsettings -winsize 1000 rxsettings expect settings.ack == true @@ -62,12 +57,7 @@ logexpect l2 -v v1 { } -start client c2 { - txpri - stream 0 { - rxsettings - expect settings.ack == false - txsettings -ack txsettings -winsize 1000 rxsettings expect settings.ack == true @@ -103,12 +93,7 @@ logexpect l3 -v v1 { } -start client c3 { - txpri - stream 0 { - rxsettings - expect settings.ack == false - txsettings -ack txsettings -winsize 1000 rxsettings expect settings.ack == true diff --git a/bin/varnishtest/tests/t02028.vtc b/bin/varnishtest/tests/t02028.vtc new file mode 100644 index 0000000000..cec3dd58f2 --- /dev/null +++ b/bin/varnishtest/tests/t02028.vtc @@ -0,0 +1,17 @@ +varnishtest "Bad preface: no SETTINGS frame first" + +varnish v1 -cliok "param.set feature +http2" +varnish v1 -cliok "param.set debug +syncvsl" +varnish v1 -vcl "backend default none;" -start + +client c1 { + txpri + + stream 0 { + rxsettings + expect settings.ack == false + txsettings -ack + rxgoaway + expect goaway.err == PROTOCOL_ERROR + } -run +} -run From 1553eca493129317fb019efbfb39c9ced715d8df Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Tue, 22 Apr 2025 14:11:46 +0200 Subject: [PATCH 31/39] http2_proto: Centralize stream state changes This centralizes the logic around h2 stream state changes to one function. --- bin/varnishd/http2/cache_http2.h | 1 + bin/varnishd/http2/cache_http2_proto.c | 36 ++++++++++++++++++++---- bin/varnishd/http2/cache_http2_reqbody.c | 4 +-- bin/varnishd/http2/cache_http2_send.c | 8 ++---- bin/varnishd/http2/cache_http2_session.c | 2 +- 5 files changed, 36 insertions(+), 15 deletions(-) diff --git a/bin/varnishd/http2/cache_http2.h b/bin/varnishd/http2/cache_http2.h index 4eb00e13ed..0eab2eb34a 100644 --- a/bin/varnishd/http2/cache_http2.h +++ b/bin/varnishd/http2/cache_http2.h @@ -294,6 +294,7 @@ const char *h2_framename(int frame); h2_error h2_errcheck(const struct h2_req *r2); void h2_async_error(struct h2_req *r2, h2_error h2e); void h2_attention(struct h2_sess *h2); +void h2_stream_setstate(struct h2_req *r2, enum h2_stream_e state); void h2_run(struct worker *wrk, struct h2_sess *h2); struct h2_req * h2_new_req(struct h2_sess *, unsigned stream, struct req **); void h2_kill_req(struct worker *, struct h2_sess *, struct h2_req **, h2_error); diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index a0d5b566c8..e8f61dcf06 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -260,11 +260,7 @@ h2_kill_req(struct worker *wrk, struct h2_sess *h2, struct h2_req **pr2, r2->t_win_low = 0.; } - if (r2->state < H2_S_CLOSED) { - r2->state = H2_S_CLOSED; - assert(h2->open_streams > 0); - h2->open_streams--; - } + h2_stream_setstate(r2, H2_S_CLOSED); if (r2->scheduled) { Lck_Lock(&h2->sess->mtx); @@ -682,6 +678,11 @@ h2_end_headers(struct worker *wrk, struct h2_sess *h2, h2e = h2h_decode_hdr_fini(h2); AZ(h2->hpack_lock); + if (req->req_body_status == BS_NONE) { + /* REQ_BODY_NONE implies that the HEADERS frame had flag + * END_STREAM set. */ + h2_stream_setstate(r2, H2_S_CLOS_REM); + } if (h2e != NULL) { VSLb(h2->vsl, SLT_Debug, "HPACK/FINI %s", h2e->name); assert(!WS_IsReserved(r2->req->ws)); @@ -803,7 +804,7 @@ h2_rx_headers(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) ASSERT_H2_SESS(h2); CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); assert(r2->state == H2_S_IDLE); - r2->state = H2_S_OPEN; + h2_stream_setstate(r2, H2_S_OPEN); req = r2->req; CHECK_OBJ_NOTNULL(req, REQ_MAGIC); @@ -1048,6 +1049,29 @@ h2_procframe(struct worker *wrk, struct h2_sess *h2, h2_frame h2f) } } +void +h2_stream_setstate(struct h2_req *r2, enum h2_stream_e state) +{ + CHECK_OBJ_NOTNULL(r2, H2_REQ_MAGIC); + ASSERT_H2_SESS(r2->h2sess); + + if (r2->state >= state) { + /* State transitions only go from lower states to + * higher. If we are already at a higher state, ignore + * it. (We do not assert on state changes because change + * of state is both driven by our internal progress as + * well as incoming client data.) */ + return; + } + + if (state >= H2_S_CLOSED) { + assert(r2->h2sess->open_streams > 0); + r2->h2sess->open_streams--; + } + + r2->state = state; +} + static h2_error h2_stream_tmo(struct h2_sess *h2, const struct h2_req *r2, vtim_real now) { diff --git a/bin/varnishd/http2/cache_http2_reqbody.c b/bin/varnishd/http2/cache_http2_reqbody.c index 1b4e801f9e..dad4dd9e8c 100644 --- a/bin/varnishd/http2/cache_http2_reqbody.c +++ b/bin/varnishd/http2/cache_http2_reqbody.c @@ -189,7 +189,7 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) } if (h2->rxf_flags & H2FF_END_STREAM) - r2->state = H2_S_CLOS_REM; + h2_stream_setstate(r2, H2_S_CLOS_REM); Lck_Lock(&h2->sess->mtx); h2_reqbody_kick(r2); Lck_Unlock(&h2->sess->mtx); @@ -269,7 +269,7 @@ h2_reqbody_data(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) r2->rxbuf->head += len; assert(r2->rxbuf->tail <= r2->rxbuf->head); if (h2->rxf_flags & H2FF_END_STREAM) - r2->state = H2_S_CLOS_REM; + h2_stream_setstate(r2, H2_S_CLOS_REM); h2_reqbody_kick(r2); Lck_Unlock(&h2->sess->mtx); diff --git a/bin/varnishd/http2/cache_http2_send.c b/bin/varnishd/http2/cache_http2_send.c index b6a2d3d5ce..b84c6cb466 100644 --- a/bin/varnishd/http2/cache_http2_send.c +++ b/bin/varnishd/http2/cache_http2_send.c @@ -477,12 +477,8 @@ H2_Send_TxStuff(struct h2_sess *h2) assert(large->count <= large->len); if (large->count == large->len) { - if (large->r2->state < H2_S_CLOSED && - large->flags & H2FF_END_STREAM) { - large->r2->state = H2_S_CLOSED; - assert(h2->open_streams > 0); - h2->open_streams--; - } + if (large->flags & H2FF_END_STREAM) + h2_stream_setstate(large->r2, H2_S_CLOSED); /* Signal that we are finished */ Lck_Lock(&h2->sess->mtx); diff --git a/bin/varnishd/http2/cache_http2_session.c b/bin/varnishd/http2/cache_http2_session.c index abc7751dbb..009b4ae128 100644 --- a/bin/varnishd/http2/cache_http2_session.c +++ b/bin/varnishd/http2/cache_http2_session.c @@ -330,7 +330,7 @@ h2_ou_session(struct worker *wrk, struct h2_sess *h2, assert(r2->req->req_step == R_STP_TRANSPORT); r2->req->task->func = h2_do_req; r2->req->task->priv = r2->req; - r2->state = H2_S_CLOS_REM; // rfc7540,l,489,491 + h2_stream_setstate(r2, H2_S_CLOS_REM); // rfc7540,l,489,491 http_SetH(r2->req->http, HTTP_HDR_PROTO, "HTTP/2.0"); return (r2); From d9823ba6f38a2dbb5b40933da87ad0a24ded6980 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Thu, 22 May 2025 12:59:31 +0200 Subject: [PATCH 32/39] H2: Turn session send shutdown into a two stage operation When we are sending a GOAWAY frame during session shutdown, we have to make sure not to cancel the current large frame if any. Fix this by changing the send framework shutdown into a two stage operation. Prior to sending the GOAWAY we cancel any queued large frames, then we attempt to get the GOAWAY out the door, and finally cancel the current large frame if still present. Better diff with the --ignore-all-space option. --- bin/varnishd/http2/cache_http2.h | 3 ++ bin/varnishd/http2/cache_http2_proto.c | 4 ++ bin/varnishd/http2/cache_http2_send.c | 56 +++++++++++++++++++++----- 3 files changed, 53 insertions(+), 10 deletions(-) diff --git a/bin/varnishd/http2/cache_http2.h b/bin/varnishd/http2/cache_http2.h index 0eab2eb34a..19ad5e3dbd 100644 --- a/bin/varnishd/http2/cache_http2.h +++ b/bin/varnishd/http2/cache_http2.h @@ -196,6 +196,8 @@ struct h2_sess { struct iovec tx_vec[2]; /* Must be 2 wide */ unsigned tx_nvec; + unsigned tx_stopped; + uint8_t *tx_s_start; uint8_t *tx_s_end; uint8_t *tx_s_head; @@ -288,6 +290,7 @@ ssize_t H2_Send_TxStuff(struct h2_sess *h2); int H2_Send_Something(struct h2_sess *h2); int H2_Send_Pending(struct h2_sess *h2); void H2_Send_Shutdown(struct h2_sess *h2); +void H2_Send_Stop(struct h2_sess *h2); /* cache_http2_proto.c */ const char *h2_framename(int frame); diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index e8f61dcf06..73d9315a12 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -1443,6 +1443,10 @@ h2_run(struct worker *wrk, struct h2_sess *h2) } } + /* We will not be sending anything more on the socket. */ + H2_Send_Stop(h2); + AN(VTAILQ_EMPTY(&h2->tx_l_queue)); + /* XXX: Shutdown socket? Would presumably free up kernel socket * buffers while waiting for waitinglists and the like to clean * up. */ diff --git a/bin/varnishd/http2/cache_http2_send.c b/bin/varnishd/http2/cache_http2_send.c index b84c6cb466..62093b152e 100644 --- a/bin/varnishd/http2/cache_http2_send.c +++ b/bin/varnishd/http2/cache_http2_send.c @@ -263,14 +263,16 @@ H2_Send(struct vsl_log *vsl, struct h2_req *r2, h2_frame ftyp, uint8_t flags, Lck_Lock(&h2->sess->mtx); - VTAILQ_INSERT_TAIL(&h2->tx_l_queue, &large, list); - h2->tx_l_stuck = 0; - h2_attention(h2); - - AZ(Lck_CondWait(&large.cond, &h2->sess->mtx)); - AN(large.returned); /* Sanity check */ - /* Note: We will have been removed from the `h2->tx_l_queue` - * list by the signaller. */ + if (!h2->tx_stopped) { + VTAILQ_INSERT_TAIL(&h2->tx_l_queue, &large, list); + h2->tx_l_stuck = 0; + h2_attention(h2); + + AZ(Lck_CondWait(&large.cond, &h2->sess->mtx)); + AN(large.returned); /* Sanity check */ + /* Note: We will have been removed from the `h2->tx_l_queue` + * list by the signaller. */ + } h2e = h2_errcheck(r2); @@ -389,6 +391,7 @@ H2_Send_TxStuff(struct h2_sess *h2) ASSERT_H2_SESS(h2); CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); + AZ(h2->tx_stopped); if (h2->tx_nvec == 0 && h2->tx_s_head != h2->tx_s_start) { /* Prioritise sending the small frames */ @@ -526,6 +529,8 @@ H2_Send_Something(struct h2_sess *h2) CHECK_OBJ_NOTNULL(h2, H2_SESS_MAGIC); ASSERT_H2_SESS(h2); + AZ(h2->tx_stopped); + assert(h2->sess->fd >= 0); pfd->fd = h2->sess->fd; pfd->events = POLLOUT; @@ -568,8 +573,8 @@ H2_Send_Pending(struct h2_sess *h2) return (0); } -void -H2_Send_Shutdown(struct h2_sess *h2) +static void +h2_send_close(struct h2_sess *h2, unsigned stop) { struct h2_send_large *large, *large2; @@ -577,12 +582,43 @@ H2_Send_Shutdown(struct h2_sess *h2) ASSERT_H2_SESS(h2); Lck_Lock(&h2->sess->mtx); + + /* A session error state should have been set prior to calling + * this function. */ AN(h2->error); + AZ(h2->tx_stopped); + + if (stop) { + h2->tx_stopped = 1; + + CHECK_OBJ_ORNULL(h2->tx_l_current, H2_SEND_LARGE_MAGIC); + if (h2->tx_l_current != NULL) { + /* Abort the large frame */ + h2->tx_l_current = NULL; + h2->tx_nvec = 0; + } + } + VTAILQ_FOREACH_SAFE(large, &h2->tx_l_queue, list, large2) { CHECK_OBJ_NOTNULL(large, H2_SEND_LARGE_MAGIC); + if (large == h2->tx_l_current) + continue; VTAILQ_REMOVE(&h2->tx_l_queue, large, list); large->returned = 1; PTOK(pthread_cond_signal(&large->cond)); } + Lck_Unlock(&h2->sess->mtx); } + +void +H2_Send_Shutdown(struct h2_sess *h2) +{ + h2_send_close(h2, 0); +} + +void +H2_Send_Stop(struct h2_sess *h2) +{ + h2_send_close(h2, 1); +} From 78b0d0b35a7b8a30414c102817dc18b80fa410c0 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Thu, 22 May 2025 13:03:28 +0200 Subject: [PATCH 33/39] H2: Fix a bad assertion being made An assertion was wrongly being made that when killing any request, the hpack lock would have been released. This of course only is true when we are killing the request holding the hpack lock. Better diff with the --ignore-all-space option. --- bin/varnishd/http2/cache_http2_proto.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index 73d9315a12..0fededb636 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -250,9 +250,10 @@ h2_kill_req(struct worker *wrk, struct h2_sess *h2, struct h2_req **pr2, r2->error = h2e; } - if (r2 == h2->hpack_lock) + if (r2 == h2->hpack_lock) { (void)h2h_decode_hdr_fini(h2); - AZ(h2->hpack_lock); + AZ(h2->hpack_lock); + } if (r2->t_win_low != 0.) { assert(h2->win_low_streams > 0); From f80998847ca405a49b6cecf50a2459005ffcc89e Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Thu, 22 May 2025 13:05:43 +0200 Subject: [PATCH 34/39] H2: Set connection error if we kill the req holding the hpack lock If we end up killing a req that is holding the hpack lock, we will have lost the compression state. This change makes sure to set a connection error if this happens, which in turn will ensure that the connection is closed. --- bin/varnishd/http2/cache_http2_proto.c | 4 ++ bin/varnishtest/tests/t02023.vtc | 89 +++++--------------------- 2 files changed, 19 insertions(+), 74 deletions(-) diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index 0fededb636..e85e7fa876 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -251,8 +251,12 @@ h2_kill_req(struct worker *wrk, struct h2_sess *h2, struct h2_req **pr2, } if (r2 == h2->hpack_lock) { + /* We are killing the request that holds the hpack + * context. This is a hard error. */ (void)h2h_decode_hdr_fini(h2); AZ(h2->hpack_lock); + if (h2->error == NULL) + h2->error = H2CE_COMPRESSION_ERROR; } if (r2->t_win_low != 0.) { diff --git a/bin/varnishtest/tests/t02023.vtc b/bin/varnishtest/tests/t02023.vtc index bdc722ce3a..039cc5f4ae 100644 --- a/bin/varnishtest/tests/t02023.vtc +++ b/bin/varnishtest/tests/t02023.vtc @@ -14,58 +14,43 @@ client c1 { expect resp.status == 400 } -run -client c1 { +client c2 { txreq -req "" rxresp expect resp.status == 400 } -run -client c1 { +client c3 { txreq -proto "" rxresp expect resp.status == 400 } -run -client c1 { +client c4 { stream 1 { txreq -url "" rxrst expect rst.err == PROTOCOL_ERROR } -run - stream 3 { - txreq - rxresp - expect resp.status == 200 - } -run } -run -client c1 { +client c5 { stream 1 { txreq -scheme "" rxrst expect rst.err == PROTOCOL_ERROR } -run - stream 3 { - txreq - rxresp - expect resp.status == 200 - } -run } -run -client c1 { +client c6 { stream 1 { txreq -req "" rxrst expect rst.err == PROTOCOL_ERROR } -run - stream 3 { - txreq - rxresp - expect resp.status == 200 - } -run } -run -client c1 { +client c7 { stream 1 { txreq -hdr "empty" "" rxresp @@ -80,118 +65,74 @@ client c1 { varnish v1 -vsl_catchup -client c1 { +client c8 { stream 1 { txreq -hdr "foo" " bar" rxrst expect rst.err == PROTOCOL_ERROR } -run - stream 3 { - txreq - rxresp - expect resp.status == 200 - } -run } -run -client c1 { +client c9 { stream 1 { txreq -hdr "foo" " " rxrst expect rst.err == PROTOCOL_ERROR } -run - stream 3 { - txreq - rxresp - expect resp.status == 200 - } -run } -run -client c1 { +client c10 { stream 1 { txreq -hdr ":foo" "bar" rxrst expect rst.err == PROTOCOL_ERROR } -run - stream 3 { - txreq - rxresp - expect resp.status == 200 - } -run } -run -client c1 { +client c11 { stream 1 { txreq -hdr "foo" "b\x0car" rxrst expect rst.err == PROTOCOL_ERROR } -run - stream 3 { - txreq - rxresp - expect resp.status == 200 - } -run } -run -client c1 { +client c12 { stream 1 { txreq -hdr "f o" "bar" rxrst expect rst.err == PROTOCOL_ERROR } -run - stream 3 { - txreq - rxresp - expect resp.status == 200 - } -run } -run -client c1 { +client c13 { stream 1 { txreq -hdr "f: o" "bar" rxrst expect rst.err == PROTOCOL_ERROR } -run - stream 3 { - txreq - rxresp - expect resp.status == 200 - } -run } -run -client c1 { +client c14 { stream 1 { txreq -hdr "foo" "bar " rxrst expect rst.err == PROTOCOL_ERROR } -run - stream 3 { - txreq - rxresp - expect resp.status == 200 - } -run } -run -client c1 { +client c15 { stream 1 { txreq -hdr "foo" " bar" rxrst expect rst.err == PROTOCOL_ERROR } -run - stream 3 { - txreq - rxresp - expect resp.status == 200 - } -run } -run -client c1 { +client c16 { stream 1 { txreq -hdr "foo" "bar " rxrst expect rst.err == PROTOCOL_ERROR } -run - stream 3 { - txreq - rxresp - } -run } -run From f76632813ba305b7c32ac4b6c4b224296cec3e54 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Sun, 25 May 2025 15:05:58 +0200 Subject: [PATCH 35/39] H2: Don't call h2_kill_req() from within h2_rx_rst_stream() The per-frame handler functions are not allowed to call h2_kill_req() from within its handlers. This is because the caller will be holding a pointer to it, and h2_kill_req() would free the req if it happens to not currently be scheduled. This patch fixes h2_rx_rst_stream() so that it adheres to this rule. --- bin/varnishd/http2/cache_http2_proto.c | 37 +++++++++++++++++--------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index e85e7fa876..5683a12cbf 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -376,7 +376,7 @@ h2_rx_push_promise(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) /********************************************************************** */ -static h2_error +static int h2_rapid_reset(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) { vtim_real now; @@ -402,18 +402,16 @@ h2_rapid_reset(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) h2->rapid_reset_limit); h2->last_rst = now; - if (h2->rst_budget < 1.0) { - VSLb(h2->vsl, SLT_SessError, "H2: Hit RST limit. Closing session."); - return (H2CE_RAPID_RESET); - } h2->rst_budget -= 1.0; - return (0); + if (h2->rst_budget > 0) + return (0); + return (1); } static h2_error v_matchproto_(h2_rxframe_f) h2_rx_rst_stream(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) { - h2_error rapid_fault; + h2_error h2e; CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); ASSERT_H2_SESS(h2); @@ -426,14 +424,29 @@ h2_rx_rst_stream(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) if (r2 == NULL) return (0); - rapid_fault = h2_rapid_reset(wrk, h2, r2); + h2e = h2_streamerror(vbe32dec(h2->rxf_data)); + AN(h2e); + if (h2e == H2NN_ERROR) { + /* The error is unknown. We don't want to return + * H2NN_ERROR from this function because that will cause + * us to close the connection. Map the unknown error to + * H2SE_INTERNAL_ERROR as suggested by the RFC. */ + /* rfc7540,l,2839,2841 */ + h2e = H2SE_INTERNAL_ERROR; + } - /* We set `r2->error` prior to killing to prevent sending a RST in + /* We set `r2->error` prior to returnnig to prevent sending a RST in * return. */ - r2->error = h2_streamerror(vbe32dec(h2->rxf_data)); - h2_kill_req(wrk, h2, &r2, r2->error); + if (r2->error == NULL) + r2->error = h2e; - return (rapid_fault); + if (h2_rapid_reset(wrk, h2, r2)) { + /* Upgrading to a connection level error. */ + VSLb(h2->vsl, SLT_Error, "H2: Hit RST limit. Closing session."); + h2e = H2CE_RAPID_RESET; + } + + return (h2e); } /********************************************************************** From 359ee11acbebbf56f0ebceb225dbdd0edc149c76 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Sun, 25 May 2025 16:06:31 +0200 Subject: [PATCH 36/39] H2: Clear the h2_req magic value when freeing the req This unsets the `struct h2_req` magic value as held in the req's workspace allocation when releasing the req. This ensures that a dangling `struct h2_req` pointer would fail its magic check after the req has been released. --- bin/varnishd/http2/cache_http2_proto.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index 5683a12cbf..79dd8a12f4 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -194,6 +194,7 @@ h2_del_req(struct worker *wrk, struct h2_req **pr2) struct h2_sess *h2; struct sess *sp; struct stv_buffer *stvbuf; + struct req *req; TAKE_OBJ_NOTNULL(r2, pr2, H2_REQ_MAGIC); AZ(r2->scheduled); @@ -218,10 +219,16 @@ h2_del_req(struct worker *wrk, struct h2_req **pr2) AZ(stvbuf); } - Req_Cleanup(sp, wrk, r2->req); + req = r2->req; + CHECK_OBJ_NOTNULL(req, REQ_MAGIC); + r2->magic = 0; + req->transport_priv = NULL; + + AZ(req->ws->r); + Req_Cleanup(sp, wrk, req); if (FEATURE(FEATURE_BUSY_STATS_RATE)) WRK_AddStat(wrk); - Req_Release(r2->req); + Req_Release(req); } void From d1187c74ad441c559a55ccc749155937049700ce Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Thu, 5 Jun 2025 16:57:28 +0200 Subject: [PATCH 37/39] H2: Don't refund connection window on updates on closed streams The window update handling code would interpret (r2==NULL) as meaning the connection window always. This is faulty logic, as r2 will be NULL also for a stream that has been closed. The RFC clearly states that a window update for a closed stream must be ignored. Fix this by testing on the received stream number being zero rather than (r2==NULL) to determine connection window updates. One way this issue can become a handling problem is that multiple faulty increases of the connection window can make us overflow the connection window maximum size, causing us to send a flow control error to the client. --- bin/varnishd/http2/cache_http2_proto.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index 79dd8a12f4..7d5ee87fe2 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -497,7 +497,8 @@ h2_rx_window_update(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) return (H2CE_FRAME_SIZE_ERROR); } wu = vbe32dec(h2->rxf_data) & ~(1LU<<31); - if (r2 == NULL) { + if (h2->rxf_stream == 0) { + AZ(r2); if (wu == 0) return (H2CE_PROTOCOL_ERROR); h2->tx_window += wu; @@ -506,6 +507,12 @@ h2_rx_window_update(struct worker *wrk, struct h2_sess *h2, struct h2_req *r2) } else { if (wu == 0) return (H2SE_PROTOCOL_ERROR); + if (r2 == NULL) { + /* Window update received for a stream we are no + * longer tracking. We MUST ignore this. + * rfc7540,l,2583,2586 */ + return (0); + } r2->tx_window += wu; if (r2->tx_window >= (1LL << 31)) return (H2SE_FLOW_CONTROL_ERROR); From d9691fc4add1397bdf170d2c83e1e06bbcc51221 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Thu, 12 Jun 2025 11:53:35 +0200 Subject: [PATCH 38/39] H2: Don't lock when adding a new req It is no longer needed to hold the lock when manipulating the open streams list. This list is after the redesign only ever accessed by the session thread. --- bin/varnishd/http2/cache_http2_proto.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index 7d5ee87fe2..ee26b042fb 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -178,11 +178,9 @@ h2_new_req(struct h2_sess *h2, unsigned stream, struct req **preq) r2->rx_window = h2->local_settings.initial_window_size; r2->tx_window = h2->remote_settings.initial_window_size; req->transport_priv = r2; - Lck_Lock(&h2->sess->mtx); if (stream > 0) h2->open_streams++; VTAILQ_INSERT_TAIL(&h2->streams, r2, list); - Lck_Unlock(&h2->sess->mtx); h2->refcnt++; return (r2); } From 022025f009d67d23d6cd65eaf13076d26489b494 Mon Sep 17 00:00:00 2001 From: Martin Blix Grydeland Date: Thu, 12 Jun 2025 13:33:20 +0200 Subject: [PATCH 39/39] H2: Hold session lock while checking the req scheduled status Avoid the data race on `r2->scheduled` by holding the session mutex when testing it. --- bin/varnishd/http2/cache_http2_proto.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/varnishd/http2/cache_http2_proto.c b/bin/varnishd/http2/cache_http2_proto.c index ee26b042fb..78bcc2196a 100644 --- a/bin/varnishd/http2/cache_http2_proto.c +++ b/bin/varnishd/http2/cache_http2_proto.c @@ -272,11 +272,12 @@ h2_kill_req(struct worker *wrk, struct h2_sess *h2, struct h2_req **pr2, h2_stream_setstate(r2, H2_S_CLOSED); + Lck_Lock(&h2->sess->mtx); if (r2->scheduled) { - Lck_Lock(&h2->sess->mtx); h2_reqbody_kick(r2); Lck_Unlock(&h2->sess->mtx); } else { + Lck_Unlock(&h2->sess->mtx); h2_del_req(wrk, &r2); } }