1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright 2015 Joyent, Inc.
  26  */
  27 
  28 #include <sys/syscall.h>
  29 
  30 #include <errno.h>
  31 #include <stdlib.h>
  32 #include <string.h>
  33 #include <time.h>
  34 #include <unistd.h>
  35 #include <sys/resource.h>
  36 #include <sys/lx_misc.h>
  37 #include <sys/lx_syscall.h>
  38 #include <lx_signum.h>
  39 
  40 /*
  41  * Translating from the Linux clock types to the Illumos types is a bit of a
  42  * mess.
  43  *
  44  * Linux uses different values for it clock identifiers, so we have to do basic
  45  * translations between the two.  Thankfully, both Linux and Illumos implement
  46  * the same POSIX SUSv3 clock types, so the semantics should be identical.
  47  *
  48  * However, CLOCK_REALTIME and CLOCK_HIGHRES (CLOCK_MONOTONIC) are the only two
  49  * clock backends currently implemented on Illumos. Functions in the kernel
  50  * that use the CLOCK_BACKEND macro will return an error for any clock type
  51  * that does not exist in the clock_backend array. These functions are
  52  * clock_settime, clock_gettime, clock_getres and timer_create.
  53  *
  54  * For reference, the kernel's clock_backend array looks like this:
  55  *
  56  * clock_backend[CLOCK_MAX] (6 entries)
  57  *    0 __CLOCK_REALTIME0               valid ptr. (obs. same as CLOCK_REALTIME)
  58  *    1 CLOCK_VIRTUAL                   NULL
  59  *    2 CLOCK_THREAD_CPUTIME_ID         NULL
  60  *    3 CLOCK_REALTIME                  valid ptr.
  61  *    4 CLOCK_MONOTONIC (CLOCK_HIGHRES) valid ptr.
  62  *    5 CLOCK_PROCESS_CPUTIME_ID        NULL
  63  *
  64  * See the comment on clock_highres_timer_create for full details but a zone
  65  * needs the proc_clock_highres privilege to use the CLOCK_HIGHRES clock so it
  66  * will generally be unusable by lx for timer_create.
  67  */
  68 
  69 static int ltos_clock[] = {
  70         CLOCK_REALTIME,                 /* LX_CLOCK_REALTIME */
  71         CLOCK_HIGHRES,                  /* LX_CLOCK_MONOTONIC */
  72         CLOCK_PROCESS_CPUTIME_ID,       /* LX_CLOCK_PROCESS_CPUTIME_ID */
  73         CLOCK_THREAD_CPUTIME_ID,        /* LX_CLOCK_THREAD_CPUTIME_ID */
  74         CLOCK_HIGHRES,                  /* LX_CLOCK_MONOTONIC_RAW */
  75         CLOCK_REALTIME,                 /* LX_CLOCK_REALTIME_COARSE */
  76         CLOCK_HIGHRES                   /* LX_CLOCK_MONOTONIC_COARSE */
  77 };
  78 
  79 /*
  80  * Since the Illumos CLOCK_HIGHRES clock requires elevated privs, which can
  81  * lead to a DOS, we use the only other option (CLOCK_REALTIME) when given
  82  * LX_CLOCK_MONOTONIC.
  83  */
  84 static int ltos_timer[] = {
  85         CLOCK_REALTIME,
  86         CLOCK_REALTIME,
  87         CLOCK_THREAD_CPUTIME_ID,        /* XXX thread, not process but fails */
  88         CLOCK_THREAD_CPUTIME_ID,
  89         CLOCK_REALTIME,
  90         CLOCK_REALTIME,
  91         CLOCK_REALTIME
  92 };
  93 
  94 #define LX_CLOCK_MAX    (sizeof (ltos_clock) / sizeof (ltos_clock[0]))
  95 #define LX_TIMER_MAX    (sizeof (ltos_timer) / sizeof (ltos_timer[0]))
  96 
  97 #define LX_SIGEV_PAD_SIZE       ((64 - \
  98         (sizeof (int) * 2 + sizeof (union sigval))) / sizeof (int))
  99 
 100 typedef struct {
 101         union sigval    lx_sigev_value; /* same layout for both */
 102         int             lx_sigev_signo;
 103         int             lx_sigev_notify;
 104         union {
 105                 int     lx_pad[LX_SIGEV_PAD_SIZE];
 106                 int     lx_tid;
 107                 struct {
 108                         void (*lx_notify_function)(union sigval);
 109                         void *lx_notify_attribute;
 110                 } lx_sigev_thread;
 111         } lx_sigev_un;
 112 } lx_sigevent_t;
 113 
 114 /* sigevent sigev_notify conversion table */
 115 static int ltos_sigev[] = {
 116         SIGEV_SIGNAL,
 117         SIGEV_NONE,
 118         SIGEV_THREAD,
 119         0,              /* Linux skips event 3 */
 120         SIGEV_THREAD    /* Linux SIGEV_THREAD_ID -- see lx_sigev_thread_id() */
 121 };
 122 
 123 #define LX_SIGEV_MAX            (sizeof (ltos_sigev) / sizeof (ltos_sigev[0]))
 124 #define LX_SIGEV_THREAD_ID      4
 125 
 126 long
 127 lx_clock_nanosleep(int clock, int flags, struct timespec *rqtp,
 128     struct timespec *rmtp)
 129 {
 130         int ret = 0;
 131         int err;
 132         struct timespec rqt, rmt;
 133 
 134         if (clock < 0 || clock >= LX_CLOCK_MAX)
 135                 return (-EINVAL);
 136 
 137         if (uucopy(rqtp, &rqt, sizeof (struct timespec)) < 0)
 138                 return (-EFAULT);
 139 
 140         /* the TIMER_RELTIME and TIMER_ABSTIME flags are the same on Linux */
 141         if ((err = clock_nanosleep(ltos_clock[clock], flags, &rqt, &rmt))
 142             != 0) {
 143                 if (err != EINTR)
 144                         return (-err);
 145                 ret = -EINTR;
 146                 /*
 147                  * We fall through in case we have to pass back the remaining
 148                  * time.
 149                  */
 150         }
 151 
 152         /*
 153          * Only copy values to rmtp if the timer is TIMER_RELTIME and rmtp is
 154          * non-NULL.
 155          */
 156         if (((flags & TIMER_RELTIME) == TIMER_RELTIME) && (rmtp != NULL) &&
 157             (uucopy(&rmt, rmtp, sizeof (struct timespec)) < 0))
 158                 return (-EFAULT);
 159 
 160         return (ret);
 161 }
 162 
 163 /*ARGSUSED*/
 164 long
 165 lx_adjtimex(void *tp)
 166 {
 167         return (-EPERM);
 168 }
 169 
 170 /*
 171  * Notification function for use with native SIGEV_THREAD in order to
 172  * emulate Linux SIGEV_THREAD_ID. Native SIGEV_THREAD is used as the
 173  * timer mechanism and B_SIGEV_THREAD_ID performs the actual event
 174  * delivery to the appropriate lx tid.
 175  */
 176 static void
 177 lx_sigev_thread_id(union sigval sival)
 178 {
 179         lx_sigevent_t *lev = (lx_sigevent_t *)sival.sival_ptr;
 180         syscall(SYS_brand, B_SIGEV_THREAD_ID, lev->lx_sigev_un.lx_tid,
 181             lev->lx_sigev_signo, lev->lx_sigev_value);
 182         free(lev);
 183 }
 184 
 185 
 186 /*
 187  * The Illumos timer_create man page says it accepts the following clocks:
 188  *   CLOCK_REALTIME (3) wall clock
 189  *   CLOCK_VIRTUAL (1)  user CPU usage clock - No Backend
 190  *   CLOCK_PROF (2)     user and system CPU usage clock - No Backend
 191  *   CLOCK_HIGHRES (4)  non-adjustable, high-resolution clock
 192  * However, in reality the Illumos timer_create only accepts CLOCK_REALTIME
 193  * and CLOCK_HIGHRES, and since we can't use CLOCK_HIGHRES in a zone, we're
 194  * down to one clock.
 195  */
 196 long
 197 lx_timer_create(int clock, struct sigevent *lx_sevp, timer_t *tid)
 198 {
 199         lx_sigevent_t lev;
 200         struct sigevent sev;
 201 
 202         if (clock < 0 || clock >= LX_TIMER_MAX)
 203                 return (-EINVAL);
 204 
 205         /* We have to convert the Linux sigevent layout to the Illumos layout */
 206         if (uucopy(lx_sevp, &lev, sizeof (lev)) < 0)
 207                 return (-EFAULT);
 208 
 209         if (lev.lx_sigev_notify < 0 || lev.lx_sigev_notify > LX_SIGEV_MAX)
 210                 return (-EINVAL);
 211 
 212         if ((lev.lx_sigev_signo < 0) || (lev.lx_sigev_signo > LX_NSIG))
 213                 return (-EINVAL);
 214 
 215         sev.sigev_notify = ltos_sigev[lev.lx_sigev_notify];
 216         sev.sigev_signo = lx_ltos_signo(lev.lx_sigev_signo, -1);
 217         sev.sigev_value = lev.lx_sigev_value;
 218 
 219         /*
 220          * Assume all Linux libc implementations map SIGEV_THREAD to
 221          * SIGEV_THREAD_ID and ignore passed-in attributes.
 222          */
 223         sev.sigev_notify_attributes = NULL;
 224 
 225         if (lev.lx_sigev_notify == LX_SIGEV_THREAD_ID) {
 226                 pid_t caller_pid = getpid();
 227                 pid_t target_pid;
 228                 lwpid_t ignore;
 229                 int rval;
 230 
 231                 if ((rval = lx_lpid_to_spair(lev.lx_sigev_un.lx_tid,
 232                             &target_pid, &ignore)) != 0) {
 233 
 234                         /*
 235                          * Attempt to stick to the defined ERRORS in
 236                          * timer_create(2).
 237                          */
 238                         if (rval == -ESRCH)
 239                                 return (-EINVAL);
 240 
 241                         return (rval);
 242                 }
 243 
 244                 /*
 245                  * The caller of SIGEV_THREAD_ID must be in the same
 246                  * process as the target thread.
 247                  */
 248                 if (caller_pid != target_pid)
 249                         return (-EINVAL);
 250 
 251                 /*
 252                  * Pass the original lx sigevent_t to the native
 253                  * notify function so that it may pass it to the lx
 254                  * helper thread. It is the responsibility of
 255                  * lx_sigev_thread_id() to free lev_copy after the
 256                  * information is relayed to lx.
 257                  */
 258                 lx_sigevent_t *lev_copy = malloc(sizeof (lx_sigevent_t));
 259                 if (lev_copy == NULL)
 260                         return (-ENOMEM);
 261 
 262                 if (uucopy(&lev, lev_copy, sizeof (lx_sigevent_t)) < 0) {
 263                         free(lev_copy);
 264                         return (-EFAULT);
 265                 }
 266 
 267                 sev.sigev_notify_function = lx_sigev_thread_id;
 268                 sev.sigev_value.sival_ptr = lev_copy;
 269         }
 270 
 271         return ((timer_create(ltos_timer[clock], &sev, tid) < 0) ? -errno : 0);
 272 }
 273 
 274 long
 275 lx_timer_settime(timer_t tid, int flags, struct itimerspec *new_val,
 276     struct itimerspec *old_val)
 277 {
 278         return ((timer_settime(tid, flags, new_val, old_val) < 0) ? -errno : 0);
 279 }
 280 
 281 long
 282 lx_timer_gettime(timer_t tid, struct itimerspec *val)
 283 {
 284         return ((timer_gettime(tid, val) < 0) ? -errno : 0);
 285 }
 286 
 287 long
 288 lx_timer_getoverrun(timer_t tid)
 289 {
 290         int val;
 291 
 292         val = timer_getoverrun(tid);
 293         return ((val < 0) ? -errno : val);
 294 }
 295 
 296 long
 297 lx_timer_delete(timer_t tid)
 298 {
 299         return ((timer_delete(tid) < 0) ? -errno : 0);
 300 }