1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright 2015 Joyent, Inc.
  26  */
  27 
  28 #include <errno.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <time.h>
  32 #include <unistd.h>
  33 #include <sys/resource.h>
  34 #include <sys/syscall.h>
  35 #include <sys/lx_misc.h>
  36 #include <sys/lx_syscall.h>
  37 #include <lx_signum.h>
  38 
  39 /*
  40  * Translating from the Linux clock types to the Illumos types is a bit of a
  41  * mess.
  42  *
  43  * Linux uses different values for it clock identifiers, so we have to do basic
  44  * translations between the two.  Thankfully, both Linux and Illumos implement
  45  * the same POSIX SUSv3 clock types, so the semantics should be identical.
  46  *
  47  * However, CLOCK_REALTIME and CLOCK_HIGHRES (CLOCK_MONOTONIC) are the only two
  48  * clock backends currently implemented on Illumos. Functions in the kernel
  49  * that use the CLOCK_BACKEND macro will return an error for any clock type
  50  * that does not exist in the clock_backend array. These functions are
  51  * clock_settime, clock_gettime, clock_getres and timer_create.
  52  *
  53  * For reference, the kernel's clock_backend array looks like this:
  54  *
  55  * clock_backend[CLOCK_MAX] (6 entries)
  56  *    0 __CLOCK_REALTIME0               valid ptr. (obs. same as CLOCK_REALTIME)
  57  *    1 CLOCK_VIRTUAL                   NULL
  58  *    2 CLOCK_THREAD_CPUTIME_ID         NULL
  59  *    3 CLOCK_REALTIME                  valid ptr.
  60  *    4 CLOCK_MONOTONIC (CLOCK_HIGHRES) valid ptr.
  61  *    5 CLOCK_PROCESS_CPUTIME_ID        NULL
  62  *
  63  * See the comment on clock_highres_timer_create for full details but a zone
  64  * needs the proc_clock_highres privilege to use the CLOCK_HIGHRES clock so it
  65  * will generally be unusable by lx for timer_create.
  66  */
  67 
  68 static int ltos_clock[] = {
  69         CLOCK_REALTIME,                 /* LX_CLOCK_REALTIME */
  70         CLOCK_HIGHRES,                  /* LX_CLOCK_MONOTONIC */
  71         CLOCK_PROCESS_CPUTIME_ID,       /* LX_CLOCK_PROCESS_CPUTIME_ID */
  72         CLOCK_THREAD_CPUTIME_ID,        /* LX_CLOCK_THREAD_CPUTIME_ID */
  73         CLOCK_HIGHRES,                  /* LX_CLOCK_MONOTONIC_RAW */
  74         CLOCK_REALTIME,                 /* LX_CLOCK_REALTIME_COARSE */
  75         CLOCK_HIGHRES                   /* LX_CLOCK_MONOTONIC_COARSE */
  76 };
  77 
  78 /*
  79  * Since the Illumos CLOCK_HIGHRES clock requires elevated privs, which can
  80  * lead to a DOS, we use the only other option (CLOCK_REALTIME) when given
  81  * LX_CLOCK_MONOTONIC.
  82  */
  83 static int ltos_timer[] = {
  84         CLOCK_REALTIME,
  85         CLOCK_REALTIME,
  86         CLOCK_THREAD_CPUTIME_ID,        /* XXX thread, not process but fails */
  87         CLOCK_THREAD_CPUTIME_ID,
  88         CLOCK_REALTIME,
  89         CLOCK_REALTIME,
  90         CLOCK_REALTIME
  91 };
  92 
  93 #define LX_CLOCK_MAX    (sizeof (ltos_clock) / sizeof (ltos_clock[0]))
  94 #define LX_TIMER_MAX    (sizeof (ltos_timer) / sizeof (ltos_timer[0]))
  95 
  96 #define LX_SIGEV_PAD_SIZE       ((64 - \
  97         (sizeof (int) * 2 + sizeof (union sigval))) / sizeof (int))
  98 
  99 typedef struct {
 100         union sigval    lx_sigev_value; /* same layout for both */
 101         int             lx_sigev_signo;
 102         int             lx_sigev_notify;
 103         union {
 104                 int     lx_pad[LX_SIGEV_PAD_SIZE];
 105                 int     lx_tid;
 106                 struct {
 107                         void (*lx_notify_function)(union sigval);
 108                         void *lx_notify_attribute;
 109                 } lx_sigev_thread;
 110         } lx_sigev_un;
 111 } lx_sigevent_t;
 112 
 113 /* sigevent sigev_notify conversion table */
 114 static int ltos_sigev[] = {
 115         SIGEV_SIGNAL,
 116         SIGEV_NONE,
 117         SIGEV_THREAD,
 118         0,              /* Linux skips event 3 */
 119         SIGEV_THREAD    /* Linux SIGEV_THREAD_ID -- see lx_sigev_thread_id() */
 120 };
 121 
 122 #define LX_SIGEV_MAX            (sizeof (ltos_sigev) / sizeof (ltos_sigev[0]))
 123 #define LX_SIGEV_THREAD_ID      4
 124 
 125 long
 126 lx_clock_nanosleep(int clock, int flags, struct timespec *rqtp,
 127     struct timespec *rmtp)
 128 {
 129         int ret = 0;
 130         int err;
 131         struct timespec rqt, rmt;
 132 
 133         if (clock < 0 || clock >= LX_CLOCK_MAX)
 134                 return (-EINVAL);
 135 
 136         if (uucopy(rqtp, &rqt, sizeof (struct timespec)) < 0)
 137                 return (-EFAULT);
 138 
 139         /* the TIMER_RELTIME and TIMER_ABSTIME flags are the same on Linux */
 140         if ((err = clock_nanosleep(ltos_clock[clock], flags, &rqt, &rmt))
 141             != 0) {
 142                 if (err != EINTR)
 143                         return (-err);
 144                 ret = -EINTR;
 145                 /*
 146                  * We fall through in case we have to pass back the remaining
 147                  * time.
 148                  */
 149         }
 150 
 151         /*
 152          * Only copy values to rmtp if the timer is TIMER_RELTIME and rmtp is
 153          * non-NULL.
 154          */
 155         if (((flags & TIMER_RELTIME) == TIMER_RELTIME) && (rmtp != NULL) &&
 156             (uucopy(&rmt, rmtp, sizeof (struct timespec)) < 0))
 157                 return (-EFAULT);
 158 
 159         return (ret);
 160 }
 161 
 162 /*ARGSUSED*/
 163 long
 164 lx_adjtimex(void *tp)
 165 {
 166         return (-EPERM);
 167 }
 168 
 169 /*
 170  * Notification function for use with native SIGEV_THREAD in order to
 171  * emulate Linux SIGEV_THREAD_ID. Native SIGEV_THREAD is used as the
 172  * timer mechanism and B_SIGEV_THREAD_ID performs the actual event
 173  * delivery to the appropriate lx tid.
 174  */
 175 static void
 176 lx_sigev_thread_id(union sigval sival)
 177 {
 178         lx_sigevent_t *lev = (lx_sigevent_t *)sival.sival_ptr;
 179         syscall(SYS_brand, B_SIGEV_THREAD_ID, lev->lx_sigev_un.lx_tid,
 180             lev->lx_sigev_signo, lev->lx_sigev_value.sival_ptr);
 181         free(lev);
 182 }
 183 
 184 
 185 /*
 186  * The Illumos timer_create man page says it accepts the following clocks:
 187  *   CLOCK_REALTIME (3) wall clock
 188  *   CLOCK_VIRTUAL (1)  user CPU usage clock - No Backend
 189  *   CLOCK_PROF (2)     user and system CPU usage clock - No Backend
 190  *   CLOCK_HIGHRES (4)  non-adjustable, high-resolution clock
 191  * However, in reality the Illumos timer_create only accepts CLOCK_REALTIME
 192  * and CLOCK_HIGHRES, and since we can't use CLOCK_HIGHRES in a zone, we're
 193  * down to one clock.
 194  */
 195 long
 196 lx_timer_create(int clock, struct sigevent *lx_sevp, timer_t *tid)
 197 {
 198         lx_sigevent_t lev;
 199         struct sigevent sev;
 200 
 201         if (clock < 0 || clock >= LX_TIMER_MAX)
 202                 return (-EINVAL);
 203 
 204         /* We have to convert the Linux sigevent layout to the Illumos layout */
 205         if (uucopy(lx_sevp, &lev, sizeof (lev)) < 0)
 206                 return (-EFAULT);
 207 
 208         if (lev.lx_sigev_notify < 0 || lev.lx_sigev_notify > LX_SIGEV_MAX)
 209                 return (-EINVAL);
 210 
 211         sev.sigev_notify = ltos_sigev[lev.lx_sigev_notify];
 212         sev.sigev_signo = lx_ltos_signo(lev.lx_sigev_signo, 0);
 213         sev.sigev_value = lev.lx_sigev_value;
 214 
 215         /*
 216          * The signal number is meaningless in SIGEV_NONE, Linux
 217          * accepts any value. We convert invalid signals to 0 so other
 218          * parts of lx signal handling don't break.
 219          */
 220         if ((sev.sigev_notify != SIGEV_NONE) && (sev.sigev_signo == 0))
 221                 return (-EINVAL);
 222 
 223         /*
 224          * Assume all Linux libc implementations map SIGEV_THREAD to
 225          * SIGEV_THREAD_ID and ignore passed-in attributes.
 226          */
 227         sev.sigev_notify_attributes = NULL;
 228 
 229         if (lev.lx_sigev_notify == LX_SIGEV_THREAD_ID) {
 230                 pid_t caller_pid = getpid();
 231                 pid_t target_pid;
 232                 lwpid_t ignore;
 233                 lx_sigevent_t *lev_copy;
 234 
 235                 if (lx_lpid_to_spair(lev.lx_sigev_un.lx_tid,
 236                     &target_pid, &ignore) != 0)
 237                         return (-EINVAL);
 238 
 239                 /*
 240                  * The caller of SIGEV_THREAD_ID must be in the same
 241                  * process as the target thread.
 242                  */
 243                 if (caller_pid != target_pid)
 244                         return (-EINVAL);
 245 
 246                 /*
 247                  * Pass the original lx sigevent_t to the native
 248                  * notify function so that it may pass it to the lx
 249                  * helper thread. It is the responsibility of
 250                  * lx_sigev_thread_id() to free lev_copy after the
 251                  * information is relayed to lx.
 252                  *
 253                  * If the calling process is forked without an exec
 254                  * after this copy but before the timer fires then
 255                  * lev_copy will leak in the child. This is acceptable
 256                  * given the rarity of this event, the miniscule
 257                  * amount leaked, and the fact that the memory is
 258                  * reclaimed when the proc dies. It is firmly in the
 259                  * land of "good enough".
 260                  */
 261                 lev_copy = malloc(sizeof (lx_sigevent_t));
 262                 if (lev_copy == NULL)
 263                         return (-ENOMEM);
 264 
 265                 if (uucopy(&lev, lev_copy, sizeof (lx_sigevent_t)) < 0) {
 266                         free(lev_copy);
 267                         return (-EFAULT);
 268                 }
 269 
 270                 sev.sigev_notify_function = lx_sigev_thread_id;
 271                 sev.sigev_value.sival_ptr = lev_copy;
 272         }
 273 
 274         return ((timer_create(ltos_timer[clock], &sev, tid) < 0) ? -errno : 0);
 275 }
 276 
 277 long
 278 lx_timer_settime(timer_t tid, int flags, struct itimerspec *new_val,
 279     struct itimerspec *old_val)
 280 {
 281         return ((timer_settime(tid, flags, new_val, old_val) < 0) ? -errno : 0);
 282 }
 283 
 284 long
 285 lx_timer_gettime(timer_t tid, struct itimerspec *val)
 286 {
 287         return ((timer_gettime(tid, val) < 0) ? -errno : 0);
 288 }
 289 
 290 long
 291 lx_timer_getoverrun(timer_t tid)
 292 {
 293         int val;
 294 
 295         val = timer_getoverrun(tid);
 296         return ((val < 0) ? -errno : val);
 297 }
 298 
 299 long
 300 lx_timer_delete(timer_t tid)
 301 {
 302         return ((timer_delete(tid) < 0) ? -errno : 0);
 303 }