1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright (c) 2012, OmniTI Computer Consulting, Inc. All rights reserved.
  14  * Copyright 2014 Ryan Zezeski
  15  */
  16 
  17 /*
  18  * This file implements a socketfilter used to defer HTTP connections.
  19  * To defer a connection means to delay the return of accept(3SOCKET)
  20  * until the entire HTTP request is ready (or majority in case of PUT
  21  * and POST). This filter may be applied automatically or
  22  * programmatically through the use of soconfig(1M) and
  23  * setsockopt(3SOCKET).
  24  *
  25  * The point of this filter is to defer connections, not parse them.
  26  * It is up to the webserver to service the request. This filter
  27  * accepts both valid an invalid HTTP requests. A valid HTTP request
  28  * is deferred until the CRLF sequence that separates the headers from
  29  * the optional body is seen. An invalid request is one that doesn't
  30  * start with a valid HTTP verb. In either case the connection will be
  31  * accepted once MAX_DEFERRED_BYTES have been seen.
  32  *
  33  * This filter does its best to be accommodating to non-standard HTTP
  34  * verbs, instead of restricting deferment to a small subset of verbs.
  35  * It accepts a subset of the BNF specified in RFC2616: any sequence
  36  * of capital ASCII letters, hyphen, or underscore.
  37  *
  38  * [A-Z_-]+
  39  *
  40  * This means that the filter can be tricked into thinking a request
  41  * is HTTP when it is not. E.g. a request of 'GET ...' where <...>
  42  * contains no LFCR or LFLF sequence. In this case, if the max bytes
  43  * are never reached, the connection will stay deferred until either
  44  * the client closes or the socketfilter system feels pressure and
  45  * schedules this connection for close (see so_newconn() and
  46  * sof_sonode_drop_deferred()).
  47  *
  48  * Note that req_fsm() looks for a sequence of LFCR or LFLF to
  49  * indicate a complete request. This is a deliberate deviation from
  50  * strict CRLFCRLF. Not all clients respect RFC2616. Popular
  51  * webservers, such as nginx, will accept just LF, with no need for
  52  * CR. The sequences LFCR and LFLF accept strict and non-strict
  53  * clients.
  54  */
  55 
  56 #include <sys/kmem.h>
  57 #include <sys/systm.h>
  58 #include <sys/stropts.h>
  59 #include <sys/strsun.h>
  60 #include <sys/socketvar.h>
  61 #include <sys/sockfilter.h>
  62 #include <sys/note.h>
  63 #include <sys/taskq.h>
  64 
  65 #define HTTPFILT_MODULE         "httpfilt"
  66 #define MAX_DEFERRED_BYTES      8192
  67 
  68 enum req_state { REQS_START, REQS_VERB, REQS_HEAD, REQS_CRLF, REQS_END };
  69 
  70 /*
  71  * This structure is created during initialization and then passed to
  72  * all callbacks so that state may be kept as message blocks arrive.
  73  * This information determines when a connection will be accepted at
  74  * the user socket level.
  75  */
  76 typedef struct httpf {
  77         size_t          httpf_bytes_in; /* bytes read */
  78         enum req_state  httpf_rs;       /* request state */
  79 } httpf_t;
  80 
  81 static struct modlmisc httpf_modlmisc = {
  82         &mod_miscops,
  83         "Kernel HTTP socket filter"
  84 };
  85 
  86 static struct modlinkage httpf_modlinkage = {
  87         MODREV_1,
  88         &httpf_modlmisc,
  89         NULL
  90 };
  91 
  92 /*
  93  * An FSM to determine how long the socket shall remain deferred.
  94  * Given the current state and char, return the next state. The
  95  * REQS_END indicates that the socket should be accepted.
  96  *
  97  *
  98  *     +------------+           A request must start with an HTTP verb.
  99  *  +--|    START   |           This FSM will accept any verb consisting
 100  *  |  +------------+           of all capital ASCII letters, hyphen, and
 101  *  |        | [A-Z_-]          underscore. Any violation will move the
 102  *  |        |                  FSM into the END state.
 103  *  |        |    [A-Z_-]
 104  *  |        |    +----+
 105  * [^A-Z_-]  V    V    |
 106  *  |  +------------+  |
 107  *  +--|    VERB    |--+
 108  *  |  +------------+
 109  *  |   |
 110  *  |   | ' '     [^\n]
 111  *  |   |         +----+
 112  *  |   V         V    |
 113  *  |  +------------+  |        This state transverses the rest of the
 114  *  |  |    HEAD    |--+        the request line and all the headers.
 115  *  |  +------------+           A line feed potentially indicates the
 116  *  |   |          ^            end of the headers and thus the CRLF
 117  *  |   | \n       | [^\r\n]    state is entered.
 118  *  |   |          |
 119  *  |   V          |
 120  *  |  +------------+           If an LFCR or LFLF sequence is seen then
 121  *  |  |    CRLF    |           all headers have been seen and the
 122  *  |  +------------+           end of the request has been reached (the
 123  *  |   |                       optional body is not considered for
 124  *  |   | [\r\n]                deferment). If the end of the request
 125  *  |   |                       has not been reached then return to
 126  *  |   V                       HEAD.
 127  *  |  +------------+
 128  *  +->|    END     |                Either the end of the request has been
 129  *     +------------+           reached or it's not an HTTP request at
 130  *                              all. In any case the connection is
 131  *                              accepted.
 132  */
 133 static int
 134 req_fsm(enum req_state s, char ch)
 135 {
 136         switch (s) {
 137         case REQS_START:
 138                 if ((ch >= 'A' && ch <= 'Z') || ch == '-' || ch == '_')
 139                         return (REQS_VERB);
 140                 else
 141                         return (REQS_END);
 142 
 143         case REQS_VERB:
 144                 if ((ch >= 'A' && ch <= 'Z') || ch == '-' || ch == '_')
 145                         return (REQS_VERB);
 146                 else if (ch == ' ')
 147                         return (REQS_HEAD);
 148                 else
 149                         return (REQS_END);
 150 
 151         case REQS_HEAD:
 152                 return (ch == '\n' ? REQS_CRLF : REQS_HEAD);
 153 
 154         case REQS_CRLF:
 155                 return ((ch == '\r' || ch == '\n') ? REQS_END : REQS_HEAD);
 156 
 157         case REQS_END:
 158                 /*
 159                  * Should never get here, make the compiler happy.
 160                  */
 161                 return (REQS_END);
 162         }
 163 
 164         /* Make the compiler happy. */
 165         return (REQS_END);
 166 }
 167 
 168 /*
 169  * Return 0 to continue processing, 1 to accept the connection.
 170  */
 171 static int
 172 httpf_process_input(httpf_t *httpf, mblk_t *mp)
 173 {
 174         int i, dlen = MBLKL(mp);
 175 
 176         for (i = 0; i < dlen; i++, httpf->httpf_bytes_in++) {
 177                 httpf->httpf_rs = req_fsm(httpf->httpf_rs, mp->b_rptr[i]);
 178                 if (httpf->httpf_rs == REQS_END)
 179                         return (1);
 180         }
 181 
 182         if (httpf->httpf_bytes_in >= MAX_DEFERRED_BYTES)
 183                 return (1);
 184 
 185         return (0);
 186 }
 187 
 188 static sof_rval_t
 189 httpf_attach_passive_cb(sof_handle_t handle, sof_handle_t ph,
 190     void *parg, struct sockaddr *laddr, socklen_t laddrlen,
 191     struct sockaddr *faddr, socklen_t faddrlen, void **cookiep)
 192 {
 193         httpf_t *new;
 194 
 195         _NOTE(ARGUNUSED(handle, ph, parg, faddr, faddrlen, laddr, laddrlen));
 196 
 197         new = kmem_zalloc(sizeof (httpf_t), KM_NOSLEEP | KM_NORMALPRI);
 198         if (new == NULL)
 199                 return (SOF_RVAL_ENOMEM);
 200 
 201         new->httpf_bytes_in = 0;
 202         new->httpf_rs = REQS_START;
 203         *cookiep = new;
 204 
 205         return (SOF_RVAL_DEFER);
 206 }
 207 
 208 static void
 209 httpf_detach_cb(sof_handle_t handle, void *cookie, cred_t *cr)
 210 {
 211         httpf_t *httpf = (httpf_t *)cookie;
 212 
 213         _NOTE(ARGUNUSED(handle, cr));
 214 
 215         if (httpf == NULL)
 216                 return;
 217 
 218         kmem_free(httpf, sizeof (httpf_t));
 219 }
 220 
 221 static mblk_t *
 222 httpf_data_in_cb(sof_handle_t handle, void *cookie, mblk_t *mp, int flags,
 223     size_t *lenp)
 224 {
 225         httpf_t *httpf = cookie;
 226 
 227         _NOTE(ARGUNUSED(flags, lenp));
 228 
 229         if (httpf == NULL) {
 230                 sof_bypass(handle);
 231                 return (mp);
 232         }
 233 
 234         if (mp == NULL)
 235                 return (mp);
 236 
 237         if (httpf_process_input(httpf, mp) == 1) {
 238                 sof_newconn_ready(handle);
 239                 sof_bypass(handle);
 240         }
 241 
 242         return (mp);
 243 }
 244 
 245 static sof_ops_t httpf_ops = {
 246         .sofop_attach_passive = httpf_attach_passive_cb,
 247         .sofop_detach = httpf_detach_cb,
 248         .sofop_data_in = httpf_data_in_cb,
 249 };
 250 
 251 int
 252 _init(void)
 253 {
 254         int err;
 255 
 256         err = sof_register(SOF_VERSION, HTTPFILT_MODULE, &httpf_ops, 0);
 257         if (err != 0)
 258                 return (err);
 259         if ((err = mod_install(&httpf_modlinkage)) != 0)
 260                 (void) sof_unregister(HTTPFILT_MODULE);
 261 
 262         return (err);
 263 }
 264 
 265 int
 266 _fini(void)
 267 {
 268         int error;
 269 
 270         if ((error = sof_unregister(HTTPFILT_MODULE)) != 0)
 271                 return (error);
 272 
 273         return (mod_remove(&httpf_modlinkage));
 274 }
 275 
 276 int
 277 _info(struct modinfo *modinfop)
 278 {
 279         return (mod_info(&httpf_modlinkage, modinfop));
 280 }