1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright (c) 2012, OmniTI Computer Consulting, Inc. All rights reserved.
14 * Copyright 2014 Ryan Zezeski
15 */
16
17 /*
18 * This file implements a socketfilter used to defer HTTP connections.
19 * To defer a connection means to delay the return of accept(3SOCKET)
20 * until the entire HTTP request is ready (or majority in case of PUT
21 * and POST). This filter may be applied automatically or
22 * programmatically through the use of soconfig(1M) and
23 * setsockopt(3SOCKET).
24 *
25 * The point of this filter is to defer connections, not parse them.
26 * It is up to the webserver to service the request. This filter
27 * accepts both valid an invalid HTTP requests. A valid HTTP request
28 * is deferred until the CRLF sequence that separates the headers from
29 * the optional body is seen. An invalid request is one that doesn't
30 * start with a valid HTTP verb. In either case the connection will be
31 * accepted once MAX_DEFERRED_BYTES have been seen.
32 *
33 * This filter does its best to be accommodating to non-standard HTTP
34 * verbs, instead of restricting deferment to a small subset of verbs.
35 * It accepts a subset of the BNF specified in RFC2616: any sequence
36 * of capital ASCII letters, hyphen, or underscore.
37 *
38 * [A-Z_-]+
39 *
40 * This means that the filter can be tricked into thinking a request
41 * is HTTP when it is not. E.g. a request of 'GET ...' where <...>
42 * contains no LFCR or LFLF sequence. In this case, if the max bytes
43 * are never reached, the connection will stay deferred until either
44 * the client closes or the socketfilter system feels pressure and
45 * schedules this connection for close (see so_newconn() and
46 * sof_sonode_drop_deferred()).
47 *
48 * Note that req_fsm() looks for a sequence of LFCR or LFLF to
49 * indicate a complete request. This is a deliberate deviation from
50 * strict CRLFCRLF. Not all clients respect RFC2616. Popular
51 * webservers, such as nginx, will accept just LF, with no need for
52 * CR. The sequences LFCR and LFLF accept strict and non-strict
53 * clients.
54 */
55
56 #include <sys/kmem.h>
57 #include <sys/systm.h>
58 #include <sys/stropts.h>
59 #include <sys/strsun.h>
60 #include <sys/socketvar.h>
61 #include <sys/sockfilter.h>
62 #include <sys/note.h>
63 #include <sys/taskq.h>
64
65 #define HTTPFILT_MODULE "httpfilt"
66 #define MAX_DEFERRED_BYTES 8192
67
68 enum req_state { REQS_START, REQS_VERB, REQS_HEAD, REQS_CRLF, REQS_END };
69
70 /*
71 * This structure is created during initialization and then passed to
72 * all callbacks so that state may be kept as message blocks arrive.
73 * This information determines when a connection will be accepted at
74 * the user socket level.
75 */
76 typedef struct httpf {
77 size_t httpf_bytes_in; /* bytes read */
78 enum req_state httpf_rs; /* request state */
79 } httpf_t;
80
81 static struct modlmisc httpf_modlmisc = {
82 &mod_miscops,
83 "Kernel HTTP socket filter"
84 };
85
86 static struct modlinkage httpf_modlinkage = {
87 MODREV_1,
88 &httpf_modlmisc,
89 NULL
90 };
91
92 /*
93 * An FSM to determine how long the socket shall remain deferred.
94 * Given the current state and char, return the next state. The
95 * REQS_END indicates that the socket should be accepted.
96 *
97 *
98 * +------------+ A request must start with an HTTP verb.
99 * +--| START | This FSM will accept any verb consisting
100 * | +------------+ of all capital ASCII letters, hyphen, and
101 * | | [A-Z_-] underscore. Any violation will move the
102 * | | FSM into the END state.
103 * | | [A-Z_-]
104 * | | +----+
105 * [^A-Z_-] V V |
106 * | +------------+ |
107 * +--| VERB |--+
108 * | +------------+
109 * | |
110 * | | ' ' [^\n]
111 * | | +----+
112 * | V V |
113 * | +------------+ | This state transverses the rest of the
114 * | | HEAD |--+ the request line and all the headers.
115 * | +------------+ A line feed potentially indicates the
116 * | | ^ end of the headers and thus the CRLF
117 * | | \n | [^\r\n] state is entered.
118 * | | |
119 * | V |
120 * | +------------+ If an LFCR or LFLF sequence is seen then
121 * | | CRLF | all headers have been seen and the
122 * | +------------+ end of the request has been reached (the
123 * | | optional body is not considered for
124 * | | [\r\n] deferment). If the end of the request
125 * | | has not been reached then return to
126 * | V HEAD.
127 * | +------------+
128 * +->| END | Either the end of the request has been
129 * +------------+ reached or it's not an HTTP request at
130 * all. In any case the connection is
131 * accepted.
132 */
133 static int
134 req_fsm(enum req_state s, char ch)
135 {
136 switch (s) {
137 case REQS_START:
138 if ((ch >= 'A' && ch <= 'Z') || ch == '-' || ch == '_')
139 return (REQS_VERB);
140 else
141 return (REQS_END);
142
143 case REQS_VERB:
144 if ((ch >= 'A' && ch <= 'Z') || ch == '-' || ch == '_')
145 return (REQS_VERB);
146 else if (ch == ' ')
147 return (REQS_HEAD);
148 else
149 return (REQS_END);
150
151 case REQS_HEAD:
152 return (ch == '\n' ? REQS_CRLF : REQS_HEAD);
153
154 case REQS_CRLF:
155 return ((ch == '\r' || ch == '\n') ? REQS_END : REQS_HEAD);
156
157 case REQS_END:
158 /*
159 * Should never get here, make the compiler happy.
160 */
161 return (REQS_END);
162 }
163
164 /* Make the compiler happy. */
165 return (REQS_END);
166 }
167
168 /*
169 * Return 0 to continue processing, 1 to accept the connection.
170 */
171 static int
172 httpf_process_input(httpf_t *httpf, mblk_t *mp)
173 {
174 int i, dlen = MBLKL(mp);
175
176 for (i = 0; i < dlen; i++, httpf->httpf_bytes_in++) {
177 httpf->httpf_rs = req_fsm(httpf->httpf_rs, mp->b_rptr[i]);
178 if (httpf->httpf_rs == REQS_END)
179 return (1);
180 }
181
182 if (httpf->httpf_bytes_in >= MAX_DEFERRED_BYTES)
183 return (1);
184
185 return (0);
186 }
187
188 static sof_rval_t
189 httpf_attach_passive_cb(sof_handle_t handle, sof_handle_t ph,
190 void *parg, struct sockaddr *laddr, socklen_t laddrlen,
191 struct sockaddr *faddr, socklen_t faddrlen, void **cookiep)
192 {
193 httpf_t *new;
194
195 _NOTE(ARGUNUSED(handle, ph, parg, faddr, faddrlen, laddr, laddrlen));
196
197 new = kmem_zalloc(sizeof (httpf_t), KM_NOSLEEP | KM_NORMALPRI);
198 if (new == NULL)
199 return (SOF_RVAL_ENOMEM);
200
201 new->httpf_bytes_in = 0;
202 new->httpf_rs = REQS_START;
203 *cookiep = new;
204
205 return (SOF_RVAL_DEFER);
206 }
207
208 static void
209 httpf_detach_cb(sof_handle_t handle, void *cookie, cred_t *cr)
210 {
211 httpf_t *httpf = (httpf_t *)cookie;
212
213 _NOTE(ARGUNUSED(handle, cr));
214
215 if (httpf == NULL)
216 return;
217
218 kmem_free(httpf, sizeof (httpf_t));
219 }
220
221 static mblk_t *
222 httpf_data_in_cb(sof_handle_t handle, void *cookie, mblk_t *mp, int flags,
223 size_t *lenp)
224 {
225 httpf_t *httpf = cookie;
226
227 _NOTE(ARGUNUSED(flags, lenp));
228
229 if (httpf == NULL) {
230 sof_bypass(handle);
231 return (mp);
232 }
233
234 if (mp == NULL)
235 return (mp);
236
237 if (httpf_process_input(httpf, mp) == 1) {
238 sof_newconn_ready(handle);
239 sof_bypass(handle);
240 }
241
242 return (mp);
243 }
244
245 static sof_ops_t httpf_ops = {
246 .sofop_attach_passive = httpf_attach_passive_cb,
247 .sofop_detach = httpf_detach_cb,
248 .sofop_data_in = httpf_data_in_cb,
249 };
250
251 int
252 _init(void)
253 {
254 int err;
255
256 err = sof_register(SOF_VERSION, HTTPFILT_MODULE, &httpf_ops, 0);
257 if (err != 0)
258 return (err);
259 if ((err = mod_install(&httpf_modlinkage)) != 0)
260 (void) sof_unregister(HTTPFILT_MODULE);
261
262 return (err);
263 }
264
265 int
266 _fini(void)
267 {
268 int error;
269
270 if ((error = sof_unregister(HTTPFILT_MODULE)) != 0)
271 return (error);
272
273 return (mod_remove(&httpf_modlinkage));
274 }
275
276 int
277 _info(struct modinfo *modinfop)
278 {
279 return (mod_info(&httpf_modlinkage, modinfop));
280 }