1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright (c) 2012, OmniTI Computer Consulting, Inc. All rights reserved. 14 * Copyright 2014 Ryan Zezeski 15 */ 16 17 /* 18 * This file implements a socketfilter used to defer HTTP connections. 19 * To defer a connection means to delay the return of accept(3SOCKET) 20 * until the entire HTTP request is ready (or majority in case of PUT 21 * and POST). This filter may be applied automatically or 22 * programmatically through the use of soconfig(1M) and 23 * setsockopt(3SOCKET). 24 * 25 * The point of this filter is to defer connections, not parse them. 26 * It is up to the webserver to service the request. This filter 27 * accepts both valid an invalid HTTP requests. A valid HTTP request 28 * is deferred until the CRLF sequence that separates the headers from 29 * the optional body is seen. An invalid request is one that doesn't 30 * start with a valid HTTP verb. In either case the connection will be 31 * accepted once MAX_DEFERRED_BYTES have been seen. 32 * 33 * This filter does its best to be accommodating to non-standard HTTP 34 * verbs, instead of restricting deferment to a small subset of verbs. 35 * It accepts a subset of the BNF specified in RFC2616: any sequence 36 * of capital ASCII letters, hyphen, or underscore. 37 * 38 * [A-Z_-]+ 39 * 40 * This means that the filter can be tricked into thinking a request 41 * is HTTP when it is not. E.g. a request of 'GET ...' where <...> 42 * contains no LFCR or LFLF sequence. In this case, if the max bytes 43 * are never reached, the connection will stay deferred until either 44 * the client closes or the socketfilter system feels pressure and 45 * schedules this connection for close (see so_newconn() and 46 * sof_sonode_drop_deferred()). 47 * 48 * Note that req_fsm() looks for a sequence of LFCR or LFLF to 49 * indicate a complete request. This is a deliberate deviation from 50 * strict CRLFCRLF. Not all clients respect RFC2616. Popular 51 * webservers, such as nginx, will accept just LF, with no need for 52 * CR. The sequences LFCR and LFLF accept strict and non-strict 53 * clients. 54 */ 55 56 #include <sys/kmem.h> 57 #include <sys/systm.h> 58 #include <sys/stropts.h> 59 #include <sys/strsun.h> 60 #include <sys/socketvar.h> 61 #include <sys/sockfilter.h> 62 #include <sys/note.h> 63 #include <sys/taskq.h> 64 65 #define HTTPFILT_MODULE "httpfilt" 66 #define MAX_DEFERRED_BYTES 8192 67 68 enum req_state { REQS_START, REQS_VERB, REQS_HEAD, REQS_CRLF, REQS_END }; 69 70 /* 71 * This structure is created during initialization and then passed to 72 * all callbacks so that state may be kept as message blocks arrive. 73 * This information determines when a connection will be accepted at 74 * the user socket level. 75 */ 76 typedef struct httpf { 77 size_t httpf_bytes_in; /* bytes read */ 78 enum req_state httpf_rs; /* request state */ 79 } httpf_t; 80 81 static struct modlmisc httpf_modlmisc = { 82 &mod_miscops, 83 "Kernel HTTP socket filter" 84 }; 85 86 static struct modlinkage httpf_modlinkage = { 87 MODREV_1, 88 &httpf_modlmisc, 89 NULL 90 }; 91 92 /* 93 * An FSM to determine how long the socket shall remain deferred. 94 * Given the current state and char, return the next state. The 95 * REQS_END indicates that the socket should be accepted. 96 * 97 * 98 * +------------+ A request must start with an HTTP verb. 99 * +--| START | This FSM will accept any verb consisting 100 * | +------------+ of all capital ASCII letters, hyphen, and 101 * | | [A-Z_-] underscore. Any violation will move the 102 * | | FSM into the END state. 103 * | | [A-Z_-] 104 * | | +----+ 105 * [^A-Z_-] V V | 106 * | +------------+ | 107 * +--| VERB |--+ 108 * | +------------+ 109 * | | 110 * | | ' ' [^\n] 111 * | | +----+ 112 * | V V | 113 * | +------------+ | This state transverses the rest of the 114 * | | HEAD |--+ the request line and all the headers. 115 * | +------------+ A line feed potentially indicates the 116 * | | ^ end of the headers and thus the CRLF 117 * | | \n | [^\r\n] state is entered. 118 * | | | 119 * | V | 120 * | +------------+ If an LFCR or LFLF sequence is seen then 121 * | | CRLF | all headers have been seen and the 122 * | +------------+ end of the request has been reached (the 123 * | | optional body is not considered for 124 * | | [\r\n] deferment). If the end of the request 125 * | | has not been reached then return to 126 * | V HEAD. 127 * | +------------+ 128 * +->| END | Either the end of the request has been 129 * +------------+ reached or it's not an HTTP request at 130 * all. In any case the connection is 131 * accepted. 132 */ 133 static int 134 req_fsm(enum req_state s, char ch) 135 { 136 switch (s) { 137 case REQS_START: 138 if ((ch >= 'A' && ch <= 'Z') || ch == '-' || ch == '_') 139 return (REQS_VERB); 140 else 141 return (REQS_END); 142 143 case REQS_VERB: 144 if ((ch >= 'A' && ch <= 'Z') || ch == '-' || ch == '_') 145 return (REQS_VERB); 146 else if (ch == ' ') 147 return (REQS_HEAD); 148 else 149 return (REQS_END); 150 151 case REQS_HEAD: 152 return (ch == '\n' ? REQS_CRLF : REQS_HEAD); 153 154 case REQS_CRLF: 155 return ((ch == '\r' || ch == '\n') ? REQS_END : REQS_HEAD); 156 157 case REQS_END: 158 /* 159 * Should never get here, make the compiler happy. 160 */ 161 return (REQS_END); 162 } 163 164 /* Make the compiler happy. */ 165 return (REQS_END); 166 } 167 168 /* 169 * Return 0 to continue processing, 1 to accept the connection. 170 */ 171 static int 172 httpf_process_input(httpf_t *httpf, mblk_t *mp) 173 { 174 int i, dlen = MBLKL(mp); 175 176 for (i = 0; i < dlen; i++, httpf->httpf_bytes_in++) { 177 httpf->httpf_rs = req_fsm(httpf->httpf_rs, mp->b_rptr[i]); 178 if (httpf->httpf_rs == REQS_END) 179 return (1); 180 } 181 182 if (httpf->httpf_bytes_in >= MAX_DEFERRED_BYTES) 183 return (1); 184 185 return (0); 186 } 187 188 static sof_rval_t 189 httpf_attach_passive_cb(sof_handle_t handle, sof_handle_t ph, 190 void *parg, struct sockaddr *laddr, socklen_t laddrlen, 191 struct sockaddr *faddr, socklen_t faddrlen, void **cookiep) 192 { 193 httpf_t *new; 194 195 _NOTE(ARGUNUSED(handle, ph, parg, faddr, faddrlen, laddr, laddrlen)); 196 197 new = kmem_zalloc(sizeof (httpf_t), KM_NOSLEEP | KM_NORMALPRI); 198 if (new == NULL) 199 return (SOF_RVAL_ENOMEM); 200 201 new->httpf_bytes_in = 0; 202 new->httpf_rs = REQS_START; 203 *cookiep = new; 204 205 return (SOF_RVAL_DEFER); 206 } 207 208 static void 209 httpf_detach_cb(sof_handle_t handle, void *cookie, cred_t *cr) 210 { 211 httpf_t *httpf = (httpf_t *)cookie; 212 213 _NOTE(ARGUNUSED(handle, cr)); 214 215 if (httpf == NULL) 216 return; 217 218 kmem_free(httpf, sizeof (httpf_t)); 219 } 220 221 static mblk_t * 222 httpf_data_in_cb(sof_handle_t handle, void *cookie, mblk_t *mp, int flags, 223 size_t *lenp) 224 { 225 httpf_t *httpf = cookie; 226 227 _NOTE(ARGUNUSED(flags, lenp)); 228 229 if (httpf == NULL) { 230 sof_bypass(handle); 231 return (mp); 232 } 233 234 if (mp == NULL) 235 return (mp); 236 237 if (httpf_process_input(httpf, mp) == 1) { 238 sof_newconn_ready(handle); 239 sof_bypass(handle); 240 } 241 242 return (mp); 243 } 244 245 static sof_ops_t httpf_ops = { 246 .sofop_attach_passive = httpf_attach_passive_cb, 247 .sofop_detach = httpf_detach_cb, 248 .sofop_data_in = httpf_data_in_cb, 249 }; 250 251 int 252 _init(void) 253 { 254 int err; 255 256 err = sof_register(SOF_VERSION, HTTPFILT_MODULE, &httpf_ops, 0); 257 if (err != 0) 258 return (err); 259 if ((err = mod_install(&httpf_modlinkage)) != 0) 260 (void) sof_unregister(HTTPFILT_MODULE); 261 262 return (err); 263 } 264 265 int 266 _fini(void) 267 { 268 int error; 269 270 if ((error = sof_unregister(HTTPFILT_MODULE)) != 0) 271 return (error); 272 273 return (mod_remove(&httpf_modlinkage)); 274 } 275 276 int 277 _info(struct modinfo *modinfop) 278 { 279 return (mod_info(&httpf_modlinkage, modinfop)); 280 }