1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2016 Joyent, Inc.
  25  */
  26 
  27 /*
  28  * lx_proc -- a Linux-compatible /proc for the LX brand
  29  *
  30  * We have -- confusingly -- two implementations of Linux /proc.  One is to
  31  * support native (but Linux-borne) programs that wish to view the native
  32  * system through the Linux /proc model; the other -- this one -- is to
  33  * support Linux binaries via the LX brand.  These two implementations differ
  34  * greatly in their aspirations (and their willingness to bend the truth
  35  * of the system to accommodate those aspirations); they should not be unified.
  36  */
  37 
  38 #include <sys/cpupart.h>
  39 #include <sys/cpuvar.h>
  40 #include <sys/session.h>
  41 #include <sys/vmparam.h>
  42 #include <sys/mman.h>
  43 #include <vm/rm.h>
  44 #include <vm/seg_vn.h>
  45 #include <sys/sdt.h>
  46 #include <lx_signum.h>
  47 #include <sys/strlog.h>
  48 #include <sys/stropts.h>
  49 #include <sys/cmn_err.h>
  50 #include <sys/lx_brand.h>
  51 #include <lx_auxv.h>
  52 #include <sys/x86_archext.h>
  53 #include <sys/archsystm.h>
  54 #include <sys/fp.h>
  55 #include <sys/pool_pset.h>
  56 #include <sys/pset.h>
  57 #include <sys/zone.h>
  58 #include <sys/pghw.h>
  59 #include <sys/vfs_opreg.h>
  60 #include <sys/param.h>
  61 #include <sys/utsname.h>
  62 #include <sys/rctl.h>
  63 #include <sys/kstat.h>
  64 #include <sys/lx_misc.h>
  65 #include <sys/brand.h>
  66 #include <sys/cred_impl.h>
  67 #include <sys/tihdr.h>
  68 #include <sys/corectl.h>
  69 #include <inet/ip.h>
  70 #include <inet/ip_ire.h>
  71 #include <inet/ip6.h>
  72 #include <inet/ip_if.h>
  73 #include <inet/tcp.h>
  74 #include <inet/tcp_impl.h>
  75 #include <inet/udp_impl.h>
  76 #include <inet/ipclassifier.h>
  77 #include <sys/socketvar.h>
  78 #include <fs/sockfs/socktpi.h>
  79 
  80 /* Dependent on procfs */
  81 extern kthread_t *prchoose(proc_t *);
  82 extern int prreadargv(proc_t *, char *, size_t, size_t *);
  83 extern int prreadenvv(proc_t *, char *, size_t, size_t *);
  84 extern int prreadbuf(proc_t *, uintptr_t, uint8_t *, size_t, size_t *);
  85 
  86 #include "lx_proc.h"
  87 
  88 extern pgcnt_t swapfs_minfree;
  89 extern time_t boot_time;
  90 
  91 /*
  92  * Pointer to the vnode ops vector for this fs.
  93  * This is instantiated in lxprinit() in lxpr_vfsops.c
  94  */
  95 vnodeops_t *lxpr_vnodeops;
  96 
  97 static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *);
  98 static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *,
  99     caller_context_t *);
 100 static int lxpr_create(struct vnode *, char *, struct vattr *, enum vcexcl,
 101     int, struct vnode **, struct cred *, int, caller_context_t *, vsecattr_t *);
 102 static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
 103 static int lxpr_write(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
 104 static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *,
 105     caller_context_t *);
 106 static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *);
 107 static int lxpr_lookup(vnode_t *, char *, vnode_t **,
 108     pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *,
 109     pathname_t *);
 110 static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *,
 111     caller_context_t *, int);
 112 static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
 113 static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *);
 114 static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *);
 115 static int lxpr_sync(void);
 116 static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *);
 117 
 118 static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
 119 static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
 120 static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
 121 static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
 122 static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
 123 static vnode_t *lxpr_lookup_sysdir(vnode_t *, char *);
 124 static vnode_t *lxpr_lookup_sys_fsdir(vnode_t *, char *);
 125 static vnode_t *lxpr_lookup_sys_fs_inotifydir(vnode_t *, char *);
 126 static vnode_t *lxpr_lookup_sys_kerneldir(vnode_t *, char *);
 127 static vnode_t *lxpr_lookup_sys_kdir_randdir(vnode_t *, char *);
 128 static vnode_t *lxpr_lookup_sys_netdir(vnode_t *, char *);
 129 static vnode_t *lxpr_lookup_sys_net_coredir(vnode_t *, char *);
 130 static vnode_t *lxpr_lookup_sys_vmdir(vnode_t *, char *);
 131 static vnode_t *lxpr_lookup_taskdir(vnode_t *, char *);
 132 static vnode_t *lxpr_lookup_task_tid_dir(vnode_t *, char *);
 133 
 134 static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
 135 static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
 136 static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
 137 static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
 138 static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
 139 static int lxpr_readdir_sysdir(lxpr_node_t *, uio_t *, int *);
 140 static int lxpr_readdir_sys_fsdir(lxpr_node_t *, uio_t *, int *);
 141 static int lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *, uio_t *, int *);
 142 static int lxpr_readdir_sys_kerneldir(lxpr_node_t *, uio_t *, int *);
 143 static int lxpr_readdir_sys_kdir_randdir(lxpr_node_t *, uio_t *, int *);
 144 static int lxpr_readdir_sys_netdir(lxpr_node_t *, uio_t *, int *);
 145 static int lxpr_readdir_sys_net_coredir(lxpr_node_t *, uio_t *, int *);
 146 static int lxpr_readdir_sys_vmdir(lxpr_node_t *, uio_t *, int *);
 147 static int lxpr_readdir_taskdir(lxpr_node_t *, uio_t *, int *);
 148 static int lxpr_readdir_task_tid_dir(lxpr_node_t *, uio_t *, int *);
 149 
 150 static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
 151 static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
 152 static void lxpr_read_cgroups(lxpr_node_t *, lxpr_uiobuf_t *);
 153 static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
 154 static void lxpr_read_diskstats(lxpr_node_t *, lxpr_uiobuf_t *);
 155 static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
 156 static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
 157 static void lxpr_read_filesystems(lxpr_node_t *, lxpr_uiobuf_t *);
 158 static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *, ldi_handle_t);
 159 static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
 160 static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
 161 static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
 162 static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
 163 static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 164 static void lxpr_read_swaps(lxpr_node_t *, lxpr_uiobuf_t *);
 165 static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
 166 static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
 167 
 168 static void lxpr_read_pid_auxv(lxpr_node_t *, lxpr_uiobuf_t *);
 169 static void lxpr_read_pid_cgroup(lxpr_node_t *, lxpr_uiobuf_t *);
 170 static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
 171 static void lxpr_read_pid_comm(lxpr_node_t *, lxpr_uiobuf_t *);
 172 static void lxpr_read_pid_env(lxpr_node_t *, lxpr_uiobuf_t *);
 173 static void lxpr_read_pid_limits(lxpr_node_t *, lxpr_uiobuf_t *);
 174 static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
 175 static void lxpr_read_pid_mountinfo(lxpr_node_t *, lxpr_uiobuf_t *);
 176 static void lxpr_read_pid_oom_scr_adj(lxpr_node_t *, lxpr_uiobuf_t *);
 177 static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 178 static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
 179 static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
 180 
 181 static void lxpr_read_pid_tid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 182 static void lxpr_read_pid_tid_status(lxpr_node_t *, lxpr_uiobuf_t *);
 183 
 184 static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
 185 static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
 186 static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
 187 static void lxpr_read_net_if_inet6(lxpr_node_t *, lxpr_uiobuf_t *);
 188 static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
 189 static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
 190 static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
 191 static void lxpr_read_net_ipv6_route(lxpr_node_t *, lxpr_uiobuf_t *);
 192 static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
 193 static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
 194 static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
 195 static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
 196 static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
 197 static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
 198 static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
 199 static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
 200 static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 201 static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
 202 static void lxpr_read_net_tcp6(lxpr_node_t *, lxpr_uiobuf_t *);
 203 static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
 204 static void lxpr_read_net_udp6(lxpr_node_t *, lxpr_uiobuf_t *);
 205 static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
 206 static void lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *,
 207     lxpr_uiobuf_t *);
 208 static void lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *,
 209     lxpr_uiobuf_t *);
 210 static void lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *,
 211     lxpr_uiobuf_t *);
 212 static void lxpr_read_sys_kernel_caplcap(lxpr_node_t *, lxpr_uiobuf_t *);
 213 static void lxpr_read_sys_kernel_corepatt(lxpr_node_t *, lxpr_uiobuf_t *);
 214 static void lxpr_read_sys_kernel_hostname(lxpr_node_t *, lxpr_uiobuf_t *);
 215 static void lxpr_read_sys_kernel_msgmni(lxpr_node_t *, lxpr_uiobuf_t *);
 216 static void lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *, lxpr_uiobuf_t *);
 217 static void lxpr_read_sys_kernel_osrel(lxpr_node_t *, lxpr_uiobuf_t *);
 218 static void lxpr_read_sys_kernel_pid_max(lxpr_node_t *, lxpr_uiobuf_t *);
 219 static void lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *, lxpr_uiobuf_t *);
 220 static void lxpr_read_sys_kernel_shmmax(lxpr_node_t *, lxpr_uiobuf_t *);
 221 static void lxpr_read_sys_kernel_threads_max(lxpr_node_t *, lxpr_uiobuf_t *);
 222 static void lxpr_read_sys_net_core_somaxc(lxpr_node_t *, lxpr_uiobuf_t *);
 223 static void lxpr_read_sys_vm_minfr_kb(lxpr_node_t *, lxpr_uiobuf_t *);
 224 static void lxpr_read_sys_vm_nhpages(lxpr_node_t *, lxpr_uiobuf_t *);
 225 static void lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *, lxpr_uiobuf_t *);
 226 static void lxpr_read_sys_vm_swappiness(lxpr_node_t *, lxpr_uiobuf_t *);
 227 
 228 static int lxpr_write_sys_net_core_somaxc(lxpr_node_t *, uio_t *, cred_t *,
 229     caller_context_t *);
 230 static int lxpr_write_sys_kernel_corepatt(lxpr_node_t *, uio_t *, cred_t *,
 231     caller_context_t *);
 232 
 233 /*
 234  * Simple conversion
 235  */
 236 #define btok(x) ((x) >> 10)                       /* bytes to kbytes */
 237 #define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */
 238 
 239 #define ttolxlwp(t)     ((struct lx_lwp_data *)ttolwpbrand(t))
 240 
 241 extern rctl_hndl_t rc_zone_msgmni;
 242 extern rctl_hndl_t rc_zone_shmmax;
 243 #define FOURGB  4294967295
 244 
 245 /*
 246  * The maximum length of the concatenation of argument vector strings we
 247  * will return to the user via the branded procfs. Likewise for the env vector.
 248  */
 249 int lxpr_maxargvlen = 4096;
 250 int lxpr_maxenvvlen = 4096;
 251 
 252 /*
 253  * The lx /proc vnode operations vector
 254  */
 255 const fs_operation_def_t lxpr_vnodeops_template[] = {
 256         VOPNAME_OPEN,           { .vop_open = lxpr_open },
 257         VOPNAME_CLOSE,          { .vop_close = lxpr_close },
 258         VOPNAME_READ,           { .vop_read = lxpr_read },
 259         VOPNAME_WRITE,          { .vop_read = lxpr_write },
 260         VOPNAME_GETATTR,        { .vop_getattr = lxpr_getattr },
 261         VOPNAME_ACCESS,         { .vop_access = lxpr_access },
 262         VOPNAME_LOOKUP,         { .vop_lookup = lxpr_lookup },
 263         VOPNAME_CREATE,         { .vop_create = lxpr_create },
 264         VOPNAME_READDIR,        { .vop_readdir = lxpr_readdir },
 265         VOPNAME_READLINK,       { .vop_readlink = lxpr_readlink },
 266         VOPNAME_FSYNC,          { .error = lxpr_sync },
 267         VOPNAME_SEEK,           { .error = lxpr_sync },
 268         VOPNAME_INACTIVE,       { .vop_inactive = lxpr_inactive },
 269         VOPNAME_CMP,            { .vop_cmp = lxpr_cmp },
 270         VOPNAME_REALVP,         { .vop_realvp = lxpr_realvp },
 271         NULL,                   NULL
 272 };
 273 
 274 
 275 /*
 276  * file contents of an lx /proc directory.
 277  */
 278 static lxpr_dirent_t lx_procdir[] = {
 279         { LXPR_CGROUPS,         "cgroups" },
 280         { LXPR_CMDLINE,         "cmdline" },
 281         { LXPR_CPUINFO,         "cpuinfo" },
 282         { LXPR_DEVICES,         "devices" },
 283         { LXPR_DISKSTATS,       "diskstats" },
 284         { LXPR_DMA,             "dma" },
 285         { LXPR_FILESYSTEMS,     "filesystems" },
 286         { LXPR_INTERRUPTS,      "interrupts" },
 287         { LXPR_IOPORTS,         "ioports" },
 288         { LXPR_KCORE,           "kcore" },
 289         { LXPR_KMSG,            "kmsg" },
 290         { LXPR_LOADAVG,         "loadavg" },
 291         { LXPR_MEMINFO,         "meminfo" },
 292         { LXPR_MODULES,         "modules" },
 293         { LXPR_MOUNTS,          "mounts" },
 294         { LXPR_NETDIR,          "net" },
 295         { LXPR_PARTITIONS,      "partitions" },
 296         { LXPR_SELF,            "self" },
 297         { LXPR_STAT,            "stat" },
 298         { LXPR_SWAPS,           "swaps" },
 299         { LXPR_SYSDIR,          "sys" },
 300         { LXPR_UPTIME,          "uptime" },
 301         { LXPR_VERSION,         "version" }
 302 };
 303 
 304 #define PROCDIRFILES    (sizeof (lx_procdir) / sizeof (lx_procdir[0]))
 305 
 306 /*
 307  * Contents of an lx /proc/<pid> directory.
 308  */
 309 static lxpr_dirent_t piddir[] = {
 310         { LXPR_PID_AUXV,        "auxv" },
 311         { LXPR_PID_CGROUP,      "cgroup" },
 312         { LXPR_PID_CMDLINE,     "cmdline" },
 313         { LXPR_PID_COMM,        "comm" },
 314         { LXPR_PID_CPU,         "cpu" },
 315         { LXPR_PID_CURDIR,      "cwd" },
 316         { LXPR_PID_ENV,         "environ" },
 317         { LXPR_PID_EXE,         "exe" },
 318         { LXPR_PID_LIMITS,      "limits" },
 319         { LXPR_PID_MAPS,        "maps" },
 320         { LXPR_PID_MEM,         "mem" },
 321         { LXPR_PID_MOUNTINFO,   "mountinfo" },
 322         { LXPR_PID_OOM_SCR_ADJ, "oom_score_adj" },
 323         { LXPR_PID_ROOTDIR,     "root" },
 324         { LXPR_PID_STAT,        "stat" },
 325         { LXPR_PID_STATM,       "statm" },
 326         { LXPR_PID_STATUS,      "status" },
 327         { LXPR_PID_TASKDIR,     "task" },
 328         { LXPR_PID_FDDIR,       "fd" }
 329 };
 330 
 331 #define PIDDIRFILES     (sizeof (piddir) / sizeof (piddir[0]))
 332 
 333 /*
 334  * Contents of an lx /proc/<pid>/task/<tid> directory.
 335  */
 336 static lxpr_dirent_t tiddir[] = {
 337         { LXPR_PID_TID_AUXV,    "auxv" },
 338         { LXPR_PID_CGROUP,      "cgroup" },
 339         { LXPR_PID_CMDLINE,     "cmdline" },
 340         { LXPR_PID_TID_COMM,    "comm" },
 341         { LXPR_PID_CPU,         "cpu" },
 342         { LXPR_PID_CURDIR,      "cwd" },
 343         { LXPR_PID_ENV,         "environ" },
 344         { LXPR_PID_EXE,         "exe" },
 345         { LXPR_PID_LIMITS,      "limits" },
 346         { LXPR_PID_MAPS,        "maps" },
 347         { LXPR_PID_MEM,         "mem" },
 348         { LXPR_PID_MOUNTINFO,   "mountinfo" },
 349         { LXPR_PID_TID_OOM_SCR_ADJ,     "oom_score_adj" },
 350         { LXPR_PID_ROOTDIR,     "root" },
 351         { LXPR_PID_TID_STAT,    "stat" },
 352         { LXPR_PID_STATM,       "statm" },
 353         { LXPR_PID_TID_STATUS,  "status" },
 354         { LXPR_PID_FDDIR,       "fd" }
 355 };
 356 
 357 #define TIDDIRFILES     (sizeof (tiddir) / sizeof (tiddir[0]))
 358 
 359 #define LX_RLIM_INFINITY        0xFFFFFFFFFFFFFFFF
 360 
 361 #define RCTL_INFINITE(x) \
 362         ((x->rcv_flagaction & RCTL_LOCAL_MAXIMAL) && \
 363         (x->rcv_flagaction & RCTL_GLOBAL_INFINITE))
 364 
 365 typedef struct lxpr_rlimtab {
 366         char    *rlim_name;     /* limit name */
 367         char    *rlim_unit;     /* limit unit */
 368         char    *rlim_rctl;     /* rctl source */
 369 } lxpr_rlimtab_t;
 370 
 371 static lxpr_rlimtab_t lxpr_rlimtab[] = {
 372         { "Max cpu time",       "seconds",      "process.max-cpu-time" },
 373         { "Max file size",      "bytes",        "process.max-file-size" },
 374         { "Max data size",      "bytes",        "process.max-data-size" },
 375         { "Max stack size",     "bytes",        "process.max-stack-size" },
 376         { "Max core file size", "bytes",        "process.max-core-size" },
 377         { "Max resident set",   "bytes",        "zone.max-physical-memory" },
 378         { "Max processes",      "processes",    "zone.max-lwps" },
 379         { "Max open files",     "files",        "process.max-file-descriptor" },
 380         { "Max locked memory",  "bytes",        "zone.max-locked-memory" },
 381         { "Max address space",  "bytes",        "process.max-address-space" },
 382         { "Max file locks",     "locks",        NULL },
 383         { "Max pending signals",        "signals",
 384                 "process.max-sigqueue-size" },
 385         { "Max msgqueue size",  "bytes",        "process.max-msg-messages" },
 386         { NULL, NULL, NULL }
 387 };
 388 
 389 
 390 /*
 391  * contents of lx /proc/net directory
 392  */
 393 static lxpr_dirent_t netdir[] = {
 394         { LXPR_NET_ARP,         "arp" },
 395         { LXPR_NET_DEV,         "dev" },
 396         { LXPR_NET_DEV_MCAST,   "dev_mcast" },
 397         { LXPR_NET_IF_INET6,    "if_inet6" },
 398         { LXPR_NET_IGMP,        "igmp" },
 399         { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" },
 400         { LXPR_NET_IP_MR_VIF,   "ip_mr_vif" },
 401         { LXPR_NET_IPV6_ROUTE,  "ipv6_route" },
 402         { LXPR_NET_MCFILTER,    "mcfilter" },
 403         { LXPR_NET_NETSTAT,     "netstat" },
 404         { LXPR_NET_RAW,         "raw" },
 405         { LXPR_NET_ROUTE,       "route" },
 406         { LXPR_NET_RPC,         "rpc" },
 407         { LXPR_NET_RT_CACHE,    "rt_cache" },
 408         { LXPR_NET_SOCKSTAT,    "sockstat" },
 409         { LXPR_NET_SNMP,        "snmp" },
 410         { LXPR_NET_STAT,        "stat" },
 411         { LXPR_NET_TCP,         "tcp" },
 412         { LXPR_NET_TCP6,        "tcp6" },
 413         { LXPR_NET_UDP,         "udp" },
 414         { LXPR_NET_UDP6,        "udp6" },
 415         { LXPR_NET_UNIX,        "unix" }
 416 };
 417 
 418 #define NETDIRFILES     (sizeof (netdir) / sizeof (netdir[0]))
 419 
 420 /*
 421  * contents of /proc/sys directory
 422  */
 423 static lxpr_dirent_t sysdir[] = {
 424         { LXPR_SYS_FSDIR,       "fs" },
 425         { LXPR_SYS_KERNELDIR,   "kernel" },
 426         { LXPR_SYS_NETDIR,      "net" },
 427         { LXPR_SYS_VMDIR,       "vm" },
 428 };
 429 
 430 #define SYSDIRFILES     (sizeof (sysdir) / sizeof (sysdir[0]))
 431 
 432 /*
 433  * contents of /proc/sys/fs directory
 434  */
 435 static lxpr_dirent_t sys_fsdir[] = {
 436         { LXPR_SYS_FS_INOTIFYDIR,       "inotify" },
 437 };
 438 
 439 #define SYS_FSDIRFILES (sizeof (sys_fsdir) / sizeof (sys_fsdir[0]))
 440 
 441 /*
 442  * contents of /proc/sys/fs/inotify directory
 443  */
 444 static lxpr_dirent_t sys_fs_inotifydir[] = {
 445         { LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS,        "max_queued_events" },
 446         { LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES,       "max_user_instances" },
 447         { LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES,         "max_user_watches" },
 448 };
 449 
 450 #define SYS_FS_INOTIFYDIRFILES \
 451         (sizeof (sys_fs_inotifydir) / sizeof (sys_fs_inotifydir[0]))
 452 
 453 /*
 454  * contents of /proc/sys/kernel directory
 455  */
 456 static lxpr_dirent_t sys_kerneldir[] = {
 457         { LXPR_SYS_KERNEL_CAPLCAP,      "cap_last_cap" },
 458         { LXPR_SYS_KERNEL_COREPATT,     "core_pattern" },
 459         { LXPR_SYS_KERNEL_HOSTNAME,     "hostname" },
 460         { LXPR_SYS_KERNEL_MSGMNI,       "msgmni" },
 461         { LXPR_SYS_KERNEL_NGROUPS_MAX,  "ngroups_max" },
 462         { LXPR_SYS_KERNEL_OSREL,        "osrelease" },
 463         { LXPR_SYS_KERNEL_PID_MAX,      "pid_max" },
 464         { LXPR_SYS_KERNEL_RANDDIR,      "random" },
 465         { LXPR_SYS_KERNEL_SHMMAX,       "shmmax" },
 466         { LXPR_SYS_KERNEL_THREADS_MAX,  "threads-max" },
 467 };
 468 
 469 #define SYS_KERNELDIRFILES (sizeof (sys_kerneldir) / sizeof (sys_kerneldir[0]))
 470 
 471 /*
 472  * contents of /proc/sys/kernel/random directory
 473  */
 474 static lxpr_dirent_t sys_randdir[] = {
 475         { LXPR_SYS_KERNEL_RAND_BOOTID,  "boot_id" },
 476 };
 477 
 478 #define SYS_RANDDIRFILES (sizeof (sys_randdir) / sizeof (sys_randdir[0]))
 479 
 480 /*
 481  * contents of /proc/sys/net directory
 482  */
 483 static lxpr_dirent_t sys_netdir[] = {
 484         { LXPR_SYS_NET_COREDIR,         "core" },
 485 };
 486 
 487 #define SYS_NETDIRFILES (sizeof (sys_netdir) / sizeof (sys_netdir[0]))
 488 
 489 /*
 490  * contents of /proc/sys/net/core directory
 491  */
 492 static lxpr_dirent_t sys_net_coredir[] = {
 493         { LXPR_SYS_NET_CORE_SOMAXCON,   "somaxconn" },
 494 };
 495 
 496 #define SYS_NET_COREDIRFILES \
 497         (sizeof (sys_net_coredir) / sizeof (sys_net_coredir[0]))
 498 
 499 /*
 500  * contents of /proc/sys/vm directory
 501  */
 502 static lxpr_dirent_t sys_vmdir[] = {
 503         { LXPR_SYS_VM_MINFR_KB,         "min_free_kbytes" },
 504         { LXPR_SYS_VM_NHUGEP,           "nr_hugepages" },
 505         { LXPR_SYS_VM_OVERCOMMIT_MEM,   "overcommit_memory" },
 506         { LXPR_SYS_VM_SWAPPINESS,       "swappiness" },
 507 };
 508 
 509 #define SYS_VMDIRFILES (sizeof (sys_vmdir) / sizeof (sys_vmdir[0]))
 510 
 511 /*
 512  * lxpr_open(): Vnode operation for VOP_OPEN()
 513  */
 514 static int
 515 lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
 516 {
 517         vnode_t         *vp = *vpp;
 518         lxpr_node_t     *lxpnp = VTOLXP(vp);
 519         lxpr_nodetype_t type = lxpnp->lxpr_type;
 520         vnode_t         *rvp;
 521         int             error = 0;
 522 
 523         if (flag & FWRITE) {
 524                 /* Restrict writes to certain files */
 525                 switch (type) {
 526                 case LXPR_PID_OOM_SCR_ADJ:
 527                 case LXPR_PID_TID_OOM_SCR_ADJ:
 528                 case LXPR_SYS_KERNEL_COREPATT:
 529                 case LXPR_SYS_NET_CORE_SOMAXCON:
 530                 case LXPR_SYS_VM_OVERCOMMIT_MEM:
 531                 case LXPR_SYS_VM_SWAPPINESS:
 532                 case LXPR_PID_FD_FD:
 533                 case LXPR_PID_TID_FD_FD:
 534                         break;
 535                 default:
 536                         return (EPERM);
 537                 }
 538         }
 539 
 540         /*
 541          * If we are opening an underlying file only allow regular files,
 542          * fifos or sockets; reject the open for anything else.
 543          * Just do it if we are opening the current or root directory.
 544          */
 545         if (lxpnp->lxpr_realvp != NULL) {
 546                 rvp = lxpnp->lxpr_realvp;
 547 
 548                 if (type == LXPR_PID_FD_FD && rvp->v_type != VREG &&
 549                     rvp->v_type != VFIFO && rvp->v_type != VSOCK) {
 550                         error = EACCES;
 551                 } else {
 552                         if (type == LXPR_PID_FD_FD && rvp->v_type == VFIFO) {
 553                                 /*
 554                                  * This flag lets the fifo open know that
 555                                  * we're using proc/fd to open a fd which we
 556                                  * already have open. Otherwise, the fifo might
 557                                  * reject an open if the other end has closed.
 558                                  */
 559                                 flag |= FKLYR;
 560                         }
 561                         /*
 562                          * Need to hold rvp since VOP_OPEN() may release it.
 563                          */
 564                         VN_HOLD(rvp);
 565                         error = VOP_OPEN(&rvp, flag, cr, ct);
 566                         if (error) {
 567                                 VN_RELE(rvp);
 568                         } else {
 569                                 *vpp = rvp;
 570                                 VN_RELE(vp);
 571                         }
 572                 }
 573         }
 574 
 575         return (error);
 576 }
 577 
 578 
 579 /*
 580  * lxpr_close(): Vnode operation for VOP_CLOSE()
 581  */
 582 /* ARGSUSED */
 583 static int
 584 lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
 585     caller_context_t *ct)
 586 {
 587         lxpr_node_t     *lxpr = VTOLXP(vp);
 588         lxpr_nodetype_t type = lxpr->lxpr_type;
 589 
 590         /*
 591          * we should never get here because the close is done on the realvp
 592          * for these nodes
 593          */
 594         ASSERT(type != LXPR_PID_FD_FD &&
 595             type != LXPR_PID_CURDIR &&
 596             type != LXPR_PID_ROOTDIR &&
 597             type != LXPR_PID_EXE);
 598 
 599         return (0);
 600 }
 601 
 602 static void (*lxpr_read_function[LXPR_NFILES])() = {
 603         lxpr_read_isdir,                /* /proc                */
 604         lxpr_read_isdir,                /* /proc/<pid>            */
 605         lxpr_read_pid_auxv,             /* /proc/<pid>/auxv       */
 606         lxpr_read_pid_cgroup,           /* /proc/<pid>/cgroup     */
 607         lxpr_read_pid_cmdline,          /* /proc/<pid>/cmdline    */
 608         lxpr_read_pid_comm,             /* /proc/<pid>/comm       */
 609         lxpr_read_empty,                /* /proc/<pid>/cpu        */
 610         lxpr_read_invalid,              /* /proc/<pid>/cwd        */
 611         lxpr_read_pid_env,              /* /proc/<pid>/environ    */
 612         lxpr_read_invalid,              /* /proc/<pid>/exe        */
 613         lxpr_read_pid_limits,           /* /proc/<pid>/limits     */
 614         lxpr_read_pid_maps,             /* /proc/<pid>/maps       */
 615         lxpr_read_empty,                /* /proc/<pid>/mem        */
 616         lxpr_read_pid_mountinfo,        /* /proc/<pid>/mountinfo */
 617         lxpr_read_pid_oom_scr_adj,      /* /proc/<pid>/oom_score_adj */
 618         lxpr_read_invalid,              /* /proc/<pid>/root       */
 619         lxpr_read_pid_stat,             /* /proc/<pid>/stat       */
 620         lxpr_read_pid_statm,            /* /proc/<pid>/statm      */
 621         lxpr_read_pid_status,           /* /proc/<pid>/status     */
 622         lxpr_read_isdir,                /* /proc/<pid>/task       */
 623         lxpr_read_isdir,                /* /proc/<pid>/task/nn    */
 624         lxpr_read_isdir,                /* /proc/<pid>/fd */
 625         lxpr_read_fd,                   /* /proc/<pid>/fd/nn      */
 626         lxpr_read_pid_auxv,             /* /proc/<pid>/task/<tid>/auxv      */
 627         lxpr_read_pid_cgroup,           /* /proc/<pid>/task/<tid>/cgroup */
 628         lxpr_read_pid_cmdline,          /* /proc/<pid>/task/<tid>/cmdline */
 629         lxpr_read_pid_comm,             /* /proc/<pid>/task/<tid>/comm      */
 630         lxpr_read_empty,                /* /proc/<pid>/task/<tid>/cpu       */
 631         lxpr_read_invalid,              /* /proc/<pid>/task/<tid>/cwd       */
 632         lxpr_read_pid_env,              /* /proc/<pid>/task/<tid>/environ */
 633         lxpr_read_invalid,              /* /proc/<pid>/task/<tid>/exe       */
 634         lxpr_read_pid_limits,           /* /proc/<pid>/task/<tid>/limits */
 635         lxpr_read_pid_maps,             /* /proc/<pid>/task/<tid>/maps      */
 636         lxpr_read_empty,                /* /proc/<pid>/task/<tid>/mem       */
 637         lxpr_read_pid_mountinfo,        /* /proc/<pid>/task/<tid>/mountinfo */
 638         lxpr_read_pid_oom_scr_adj,      /* /proc/<pid>/task/<tid>/oom_scr_adj */
 639         lxpr_read_invalid,              /* /proc/<pid>/task/<tid>/root      */
 640         lxpr_read_pid_tid_stat,         /* /proc/<pid>/task/<tid>/stat      */
 641         lxpr_read_pid_statm,            /* /proc/<pid>/task/<tid>/statm     */
 642         lxpr_read_pid_tid_status,       /* /proc/<pid>/task/<tid>/status */
 643         lxpr_read_isdir,                /* /proc/<pid>/task/<tid>/fd        */
 644         lxpr_read_fd,                   /* /proc/<pid>/task/<tid>/fd/nn     */
 645         lxpr_read_cgroups,              /* /proc/cgroups        */
 646         lxpr_read_empty,                /* /proc/cmdline        */
 647         lxpr_read_cpuinfo,              /* /proc/cpuinfo        */
 648         lxpr_read_empty,                /* /proc/devices        */
 649         lxpr_read_diskstats,            /* /proc/diskstats      */
 650         lxpr_read_empty,                /* /proc/dma            */
 651         lxpr_read_filesystems,          /* /proc/filesystems    */
 652         lxpr_read_empty,                /* /proc/interrupts     */
 653         lxpr_read_empty,                /* /proc/ioports        */
 654         lxpr_read_empty,                /* /proc/kcore          */
 655         lxpr_read_invalid,              /* /proc/kmsg -- see lxpr_read() */
 656         lxpr_read_loadavg,              /* /proc/loadavg        */
 657         lxpr_read_meminfo,              /* /proc/meminfo        */
 658         lxpr_read_empty,                /* /proc/modules        */
 659         lxpr_read_mounts,               /* /proc/mounts         */
 660         lxpr_read_isdir,                /* /proc/net            */
 661         lxpr_read_net_arp,              /* /proc/net/arp        */
 662         lxpr_read_net_dev,              /* /proc/net/dev        */
 663         lxpr_read_net_dev_mcast,        /* /proc/net/dev_mcast  */
 664         lxpr_read_net_if_inet6,         /* /proc/net/if_inet6   */
 665         lxpr_read_net_igmp,             /* /proc/net/igmp       */
 666         lxpr_read_net_ip_mr_cache,      /* /proc/net/ip_mr_cache */
 667         lxpr_read_net_ip_mr_vif,        /* /proc/net/ip_mr_vif  */
 668         lxpr_read_net_ipv6_route,       /* /proc/net/ipv6_route */
 669         lxpr_read_net_mcfilter,         /* /proc/net/mcfilter   */
 670         lxpr_read_net_netstat,          /* /proc/net/netstat    */
 671         lxpr_read_net_raw,              /* /proc/net/raw        */
 672         lxpr_read_net_route,            /* /proc/net/route      */
 673         lxpr_read_net_rpc,              /* /proc/net/rpc        */
 674         lxpr_read_net_rt_cache,         /* /proc/net/rt_cache   */
 675         lxpr_read_net_sockstat,         /* /proc/net/sockstat   */
 676         lxpr_read_net_snmp,             /* /proc/net/snmp       */
 677         lxpr_read_net_stat,             /* /proc/net/stat       */
 678         lxpr_read_net_tcp,              /* /proc/net/tcp        */
 679         lxpr_read_net_tcp6,             /* /proc/net/tcp6       */
 680         lxpr_read_net_udp,              /* /proc/net/udp        */
 681         lxpr_read_net_udp6,             /* /proc/net/udp6       */
 682         lxpr_read_net_unix,             /* /proc/net/unix       */
 683         lxpr_read_partitions,           /* /proc/partitions     */
 684         lxpr_read_invalid,              /* /proc/self           */
 685         lxpr_read_stat,                 /* /proc/stat           */
 686         lxpr_read_swaps,                /* /proc/swaps          */
 687         lxpr_read_invalid,              /* /proc/sys            */
 688         lxpr_read_invalid,              /* /proc/sys/fs         */
 689         lxpr_read_invalid,              /* /proc/sys/fs/inotify */
 690         lxpr_read_sys_fs_inotify_max_queued_events, /* max_queued_events */
 691         lxpr_read_sys_fs_inotify_max_user_instances, /* max_user_instances */
 692         lxpr_read_sys_fs_inotify_max_user_watches, /* max_user_watches */
 693         lxpr_read_invalid,              /* /proc/sys/kernel     */
 694         lxpr_read_sys_kernel_caplcap,   /* /proc/sys/kernel/cap_last_cap */
 695         lxpr_read_sys_kernel_corepatt,  /* /proc/sys/kernel/core_pattern */
 696         lxpr_read_sys_kernel_hostname,  /* /proc/sys/kernel/hostname */
 697         lxpr_read_sys_kernel_msgmni,    /* /proc/sys/kernel/msgmni */
 698         lxpr_read_sys_kernel_ngroups_max, /* /proc/sys/kernel/ngroups_max */
 699         lxpr_read_sys_kernel_osrel,     /* /proc/sys/kernel/osrelease */
 700         lxpr_read_sys_kernel_pid_max,   /* /proc/sys/kernel/pid_max */
 701         lxpr_read_invalid,              /* /proc/sys/kernel/random */
 702         lxpr_read_sys_kernel_rand_bootid, /* /proc/sys/kernel/random/boot_id */
 703         lxpr_read_sys_kernel_shmmax,    /* /proc/sys/kernel/shmmax */
 704         lxpr_read_sys_kernel_threads_max, /* /proc/sys/kernel/threads-max */
 705         lxpr_read_invalid,              /* /proc/sys/net        */
 706         lxpr_read_invalid,              /* /proc/sys/net/core   */
 707         lxpr_read_sys_net_core_somaxc,  /* /proc/sys/net/core/somaxconn */
 708         lxpr_read_invalid,              /* /proc/sys/vm */
 709         lxpr_read_sys_vm_minfr_kb,      /* /proc/sys/vm/min_free_kbytes */
 710         lxpr_read_sys_vm_nhpages,       /* /proc/sys/vm/nr_hugepages */
 711         lxpr_read_sys_vm_overcommit_mem, /* /proc/sys/vm/overcommit_memory */
 712         lxpr_read_sys_vm_swappiness,    /* /proc/sys/vm/swappiness */
 713         lxpr_read_uptime,               /* /proc/uptime         */
 714         lxpr_read_version,              /* /proc/version        */
 715 };
 716 
 717 /*
 718  * Array of lookup functions, indexed by lx /proc file type.
 719  */
 720 static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
 721         lxpr_lookup_procdir,            /* /proc                */
 722         lxpr_lookup_piddir,             /* /proc/<pid>            */
 723         lxpr_lookup_not_a_dir,          /* /proc/<pid>/auxv       */
 724         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cgroup     */
 725         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cmdline    */
 726         lxpr_lookup_not_a_dir,          /* /proc/<pid>/comm       */
 727         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cpu        */
 728         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cwd        */
 729         lxpr_lookup_not_a_dir,          /* /proc/<pid>/environ    */
 730         lxpr_lookup_not_a_dir,          /* /proc/<pid>/exe        */
 731         lxpr_lookup_not_a_dir,          /* /proc/<pid>/limits     */
 732         lxpr_lookup_not_a_dir,          /* /proc/<pid>/maps       */
 733         lxpr_lookup_not_a_dir,          /* /proc/<pid>/mem        */
 734         lxpr_lookup_not_a_dir,          /* /proc/<pid>/mountinfo */
 735         lxpr_lookup_not_a_dir,          /* /proc/<pid>/oom_score_adj */
 736         lxpr_lookup_not_a_dir,          /* /proc/<pid>/root       */
 737         lxpr_lookup_not_a_dir,          /* /proc/<pid>/stat       */
 738         lxpr_lookup_not_a_dir,          /* /proc/<pid>/statm      */
 739         lxpr_lookup_not_a_dir,          /* /proc/<pid>/status     */
 740         lxpr_lookup_taskdir,            /* /proc/<pid>/task       */
 741         lxpr_lookup_task_tid_dir,       /* /proc/<pid>/task/nn    */
 742         lxpr_lookup_fddir,              /* /proc/<pid>/fd */
 743         lxpr_lookup_not_a_dir,          /* /proc/<pid>/fd/nn      */
 744         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/auxv      */
 745         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/cgroup */
 746         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/cmdline */
 747         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/comm      */
 748         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/cpu       */
 749         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/cwd       */
 750         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/environ */
 751         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/exe       */
 752         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/limits */
 753         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/maps      */
 754         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/mem       */
 755         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/mountinfo */
 756         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/oom_scr_adj */
 757         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/root      */
 758         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/stat      */
 759         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/statm     */
 760         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/status */
 761         lxpr_lookup_fddir,              /* /proc/<pid>/task/<tid>/fd        */
 762         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/fd/nn     */
 763         lxpr_lookup_not_a_dir,          /* /proc/cgroups        */
 764         lxpr_lookup_not_a_dir,          /* /proc/cmdline        */
 765         lxpr_lookup_not_a_dir,          /* /proc/cpuinfo        */
 766         lxpr_lookup_not_a_dir,          /* /proc/devices        */
 767         lxpr_lookup_not_a_dir,          /* /proc/diskstats      */
 768         lxpr_lookup_not_a_dir,          /* /proc/dma            */
 769         lxpr_lookup_not_a_dir,          /* /proc/filesystems    */
 770         lxpr_lookup_not_a_dir,          /* /proc/interrupts     */
 771         lxpr_lookup_not_a_dir,          /* /proc/ioports        */
 772         lxpr_lookup_not_a_dir,          /* /proc/kcore          */
 773         lxpr_lookup_not_a_dir,          /* /proc/kmsg           */
 774         lxpr_lookup_not_a_dir,          /* /proc/loadavg        */
 775         lxpr_lookup_not_a_dir,          /* /proc/meminfo        */
 776         lxpr_lookup_not_a_dir,          /* /proc/modules        */
 777         lxpr_lookup_not_a_dir,          /* /proc/mounts         */
 778         lxpr_lookup_netdir,             /* /proc/net            */
 779         lxpr_lookup_not_a_dir,          /* /proc/net/arp        */
 780         lxpr_lookup_not_a_dir,          /* /proc/net/dev        */
 781         lxpr_lookup_not_a_dir,          /* /proc/net/dev_mcast  */
 782         lxpr_lookup_not_a_dir,          /* /proc/net/if_inet6   */
 783         lxpr_lookup_not_a_dir,          /* /proc/net/igmp       */
 784         lxpr_lookup_not_a_dir,          /* /proc/net/ip_mr_cache */
 785         lxpr_lookup_not_a_dir,          /* /proc/net/ip_mr_vif  */
 786         lxpr_lookup_not_a_dir,          /* /proc/net/ipv6_route */
 787         lxpr_lookup_not_a_dir,          /* /proc/net/mcfilter   */
 788         lxpr_lookup_not_a_dir,          /* /proc/net/netstat    */
 789         lxpr_lookup_not_a_dir,          /* /proc/net/raw        */
 790         lxpr_lookup_not_a_dir,          /* /proc/net/route      */
 791         lxpr_lookup_not_a_dir,          /* /proc/net/rpc        */
 792         lxpr_lookup_not_a_dir,          /* /proc/net/rt_cache   */
 793         lxpr_lookup_not_a_dir,          /* /proc/net/sockstat   */
 794         lxpr_lookup_not_a_dir,          /* /proc/net/snmp       */
 795         lxpr_lookup_not_a_dir,          /* /proc/net/stat       */
 796         lxpr_lookup_not_a_dir,          /* /proc/net/tcp        */
 797         lxpr_lookup_not_a_dir,          /* /proc/net/tcp6       */
 798         lxpr_lookup_not_a_dir,          /* /proc/net/udp        */
 799         lxpr_lookup_not_a_dir,          /* /proc/net/udp6       */
 800         lxpr_lookup_not_a_dir,          /* /proc/net/unix       */
 801         lxpr_lookup_not_a_dir,          /* /proc/partitions     */
 802         lxpr_lookup_not_a_dir,          /* /proc/self           */
 803         lxpr_lookup_not_a_dir,          /* /proc/stat           */
 804         lxpr_lookup_not_a_dir,          /* /proc/swaps          */
 805         lxpr_lookup_sysdir,             /* /proc/sys            */
 806         lxpr_lookup_sys_fsdir,          /* /proc/sys/fs         */
 807         lxpr_lookup_sys_fs_inotifydir,  /* /proc/sys/fs/inotify */
 808         lxpr_lookup_not_a_dir,          /* .../inotify/max_queued_events */
 809         lxpr_lookup_not_a_dir,          /* .../inotify/max_user_instances */
 810         lxpr_lookup_not_a_dir,          /* .../inotify/max_user_watches */
 811         lxpr_lookup_sys_kerneldir,      /* /proc/sys/kernel     */
 812         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/cap_last_cap */
 813         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/core_pattern */
 814         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/hostname */
 815         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/msgmni */
 816         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/ngroups_max */
 817         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/osrelease */
 818         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/pid_max */
 819         lxpr_lookup_sys_kdir_randdir,   /* /proc/sys/kernel/random */
 820         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/random/boot_id */
 821         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/shmmax */
 822         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/threads-max */
 823         lxpr_lookup_sys_netdir,         /* /proc/sys/net */
 824         lxpr_lookup_sys_net_coredir,    /* /proc/sys/net/core */
 825         lxpr_lookup_not_a_dir,          /* /proc/sys/net/core/somaxconn */
 826         lxpr_lookup_sys_vmdir,          /* /proc/sys/vm */
 827         lxpr_lookup_not_a_dir,          /* /proc/sys/vm/min_free_kbytes */
 828         lxpr_lookup_not_a_dir,          /* /proc/sys/vm/nr_hugepages */
 829         lxpr_lookup_not_a_dir,          /* /proc/sys/vm/overcommit_memory */
 830         lxpr_lookup_not_a_dir,          /* /proc/sys/vm/swappiness */
 831         lxpr_lookup_not_a_dir,          /* /proc/uptime         */
 832         lxpr_lookup_not_a_dir,          /* /proc/version        */
 833 };
 834 
 835 /*
 836  * Array of readdir functions, indexed by /proc file type.
 837  */
 838 static int (*lxpr_readdir_function[LXPR_NFILES])() = {
 839         lxpr_readdir_procdir,           /* /proc                */
 840         lxpr_readdir_piddir,            /* /proc/<pid>            */
 841         lxpr_readdir_not_a_dir,         /* /proc/<pid>/auxv       */
 842         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cgroup     */
 843         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cmdline    */
 844         lxpr_readdir_not_a_dir,         /* /proc/<pid>/comm       */
 845         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cpu        */
 846         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cwd        */
 847         lxpr_readdir_not_a_dir,         /* /proc/<pid>/environ    */
 848         lxpr_readdir_not_a_dir,         /* /proc/<pid>/exe        */
 849         lxpr_readdir_not_a_dir,         /* /proc/<pid>/limits     */
 850         lxpr_readdir_not_a_dir,         /* /proc/<pid>/maps       */
 851         lxpr_readdir_not_a_dir,         /* /proc/<pid>/mem        */
 852         lxpr_readdir_not_a_dir,         /* /proc/<pid>/mountinfo */
 853         lxpr_readdir_not_a_dir,         /* /proc/<pid>/oom_score_adj */
 854         lxpr_readdir_not_a_dir,         /* /proc/<pid>/root       */
 855         lxpr_readdir_not_a_dir,         /* /proc/<pid>/stat       */
 856         lxpr_readdir_not_a_dir,         /* /proc/<pid>/statm      */
 857         lxpr_readdir_not_a_dir,         /* /proc/<pid>/status     */
 858         lxpr_readdir_taskdir,           /* /proc/<pid>/task       */
 859         lxpr_readdir_task_tid_dir,      /* /proc/<pid>/task/nn    */
 860         lxpr_readdir_fddir,             /* /proc/<pid>/fd */
 861         lxpr_readdir_not_a_dir,         /* /proc/<pid>/fd/nn      */
 862         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/auxv      */
 863         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/cgroup */
 864         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/cmdline */
 865         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/comm      */
 866         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/cpu       */
 867         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/cwd       */
 868         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/environ */
 869         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/exe       */
 870         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/limits */
 871         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/maps      */
 872         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/mem       */
 873         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/mountinfo */
 874         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid/oom_scr_adj */
 875         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/root      */
 876         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/stat      */
 877         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/statm     */
 878         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/status */
 879         lxpr_readdir_fddir,             /* /proc/<pid>/task/<tid>/fd        */
 880         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/fd/nn     */
 881         lxpr_readdir_not_a_dir,         /* /proc/cgroups        */
 882         lxpr_readdir_not_a_dir,         /* /proc/cmdline        */
 883         lxpr_readdir_not_a_dir,         /* /proc/cpuinfo        */
 884         lxpr_readdir_not_a_dir,         /* /proc/devices        */
 885         lxpr_readdir_not_a_dir,         /* /proc/diskstats      */
 886         lxpr_readdir_not_a_dir,         /* /proc/dma            */
 887         lxpr_readdir_not_a_dir,         /* /proc/filesystems    */
 888         lxpr_readdir_not_a_dir,         /* /proc/interrupts     */
 889         lxpr_readdir_not_a_dir,         /* /proc/ioports        */
 890         lxpr_readdir_not_a_dir,         /* /proc/kcore          */
 891         lxpr_readdir_not_a_dir,         /* /proc/kmsg           */
 892         lxpr_readdir_not_a_dir,         /* /proc/loadavg        */
 893         lxpr_readdir_not_a_dir,         /* /proc/meminfo        */
 894         lxpr_readdir_not_a_dir,         /* /proc/modules        */
 895         lxpr_readdir_not_a_dir,         /* /proc/mounts         */
 896         lxpr_readdir_netdir,            /* /proc/net            */
 897         lxpr_readdir_not_a_dir,         /* /proc/net/arp        */
 898         lxpr_readdir_not_a_dir,         /* /proc/net/dev        */
 899         lxpr_readdir_not_a_dir,         /* /proc/net/dev_mcast  */
 900         lxpr_readdir_not_a_dir,         /* /proc/net/if_inet6   */
 901         lxpr_readdir_not_a_dir,         /* /proc/net/igmp       */
 902         lxpr_readdir_not_a_dir,         /* /proc/net/ip_mr_cache */
 903         lxpr_readdir_not_a_dir,         /* /proc/net/ip_mr_vif  */
 904         lxpr_readdir_not_a_dir,         /* /proc/net/ipv6_route */
 905         lxpr_readdir_not_a_dir,         /* /proc/net/mcfilter   */
 906         lxpr_readdir_not_a_dir,         /* /proc/net/netstat    */
 907         lxpr_readdir_not_a_dir,         /* /proc/net/raw        */
 908         lxpr_readdir_not_a_dir,         /* /proc/net/route      */
 909         lxpr_readdir_not_a_dir,         /* /proc/net/rpc        */
 910         lxpr_readdir_not_a_dir,         /* /proc/net/rt_cache   */
 911         lxpr_readdir_not_a_dir,         /* /proc/net/sockstat   */
 912         lxpr_readdir_not_a_dir,         /* /proc/net/snmp       */
 913         lxpr_readdir_not_a_dir,         /* /proc/net/stat       */
 914         lxpr_readdir_not_a_dir,         /* /proc/net/tcp        */
 915         lxpr_readdir_not_a_dir,         /* /proc/net/tcp6       */
 916         lxpr_readdir_not_a_dir,         /* /proc/net/udp        */
 917         lxpr_readdir_not_a_dir,         /* /proc/net/udp6       */
 918         lxpr_readdir_not_a_dir,         /* /proc/net/unix       */
 919         lxpr_readdir_not_a_dir,         /* /proc/partitions     */
 920         lxpr_readdir_not_a_dir,         /* /proc/self           */
 921         lxpr_readdir_not_a_dir,         /* /proc/stat           */
 922         lxpr_readdir_not_a_dir,         /* /proc/swaps          */
 923         lxpr_readdir_sysdir,            /* /proc/sys            */
 924         lxpr_readdir_sys_fsdir,         /* /proc/sys/fs         */
 925         lxpr_readdir_sys_fs_inotifydir, /* /proc/sys/fs/inotify */
 926         lxpr_readdir_not_a_dir,         /* .../inotify/max_queued_events */
 927         lxpr_readdir_not_a_dir,         /* .../inotify/max_user_instances */
 928         lxpr_readdir_not_a_dir,         /* .../inotify/max_user_watches */
 929         lxpr_readdir_sys_kerneldir,     /* /proc/sys/kernel     */
 930         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/cap_last_cap */
 931         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/core_pattern */
 932         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/hostname */
 933         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/msgmni */
 934         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/ngroups_max */
 935         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/osrelease */
 936         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/pid_max */
 937         lxpr_readdir_sys_kdir_randdir,  /* /proc/sys/kernel/random */
 938         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/random/boot_id */
 939         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/shmmax */
 940         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/threads-max */
 941         lxpr_readdir_sys_netdir,        /* /proc/sys/net */
 942         lxpr_readdir_sys_net_coredir,   /* /proc/sys/net/core */
 943         lxpr_readdir_not_a_dir,         /* /proc/sys/net/core/somaxconn */
 944         lxpr_readdir_sys_vmdir,         /* /proc/sys/vm */
 945         lxpr_readdir_not_a_dir,         /* /proc/sys/vm/min_free_kbytes */
 946         lxpr_readdir_not_a_dir,         /* /proc/sys/vm/nr_hugepages */
 947         lxpr_readdir_not_a_dir,         /* /proc/sys/vm/overcommit_memory */
 948         lxpr_readdir_not_a_dir,         /* /proc/sys/vm/swappiness */
 949         lxpr_readdir_not_a_dir,         /* /proc/uptime         */
 950         lxpr_readdir_not_a_dir,         /* /proc/version        */
 951 };
 952 
 953 
 954 /*
 955  * lxpr_read(): Vnode operation for VOP_READ()
 956  *
 957  * As the format of all the files that can be read in the lx procfs is human
 958  * readable and not binary structures there do not have to be different
 959  * read variants depending on whether the reading process model is 32 or 64 bits
 960  * (at least in general, and certainly the difference is unlikely to be enough
 961  * to justify have different routines for 32 and 64 bit reads
 962  */
 963 /* ARGSUSED */
 964 static int
 965 lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 966     caller_context_t *ct)
 967 {
 968         lxpr_node_t *lxpnp = VTOLXP(vp);
 969         lxpr_nodetype_t type = lxpnp->lxpr_type;
 970         lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
 971         int error;
 972 
 973         ASSERT(type < LXPR_NFILES);
 974 
 975         if (type == LXPR_KMSG) {
 976                 ldi_ident_t     li = VTOLXPM(vp)->lxprm_li;
 977                 ldi_handle_t    ldih;
 978                 struct strioctl str;
 979                 int             rv;
 980 
 981                 /*
 982                  * Open the zone's console device using the layered driver
 983                  * interface.
 984                  */
 985                 if ((error =
 986                     ldi_open_by_name("/dev/log", FREAD, cr, &ldih, li)) != 0)
 987                         return (error);
 988 
 989                 /*
 990                  * Send an ioctl to the underlying console device, letting it
 991                  * know we're interested in getting console messages.
 992                  */
 993                 str.ic_cmd = I_CONSLOG;
 994                 str.ic_timout = 0;
 995                 str.ic_len = 0;
 996                 str.ic_dp = NULL;
 997                 if ((error = ldi_ioctl(ldih, I_STR,
 998                     (intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
 999                         return (error);
1000 
1001                 lxpr_read_kmsg(lxpnp, uiobuf, ldih);
1002 
1003                 if ((error = ldi_close(ldih, FREAD, cr)) != 0)
1004                         return (error);
1005         } else {
1006                 lxpr_read_function[type](lxpnp, uiobuf);
1007         }
1008 
1009         error = lxpr_uiobuf_flush(uiobuf);
1010         lxpr_uiobuf_free(uiobuf);
1011 
1012         return (error);
1013 }
1014 
1015 /*
1016  * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
1017  *
1018  * Various special case reads:
1019  * - trying to read a directory
1020  * - invalid file (used to mean a file that should be implemented,
1021  *   but isn't yet)
1022  * - empty file
1023  * - wait to be able to read a file that will never have anything to read
1024  */
1025 /* ARGSUSED */
1026 static void
1027 lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1028 {
1029         lxpr_uiobuf_seterr(uiobuf, EISDIR);
1030 }
1031 
1032 /* ARGSUSED */
1033 static void
1034 lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1035 {
1036         lxpr_uiobuf_seterr(uiobuf, EINVAL);
1037 }
1038 
1039 /* ARGSUSED */
1040 static void
1041 lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1042 {
1043 }
1044 
1045 /*
1046  * lxpr_read_pid_auxv(): read process aux vector
1047  */
1048 static void
1049 lxpr_read_pid_auxv(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1050 {
1051         proc_t *p;
1052         lx_proc_data_t *pd;
1053         lx_elf_data_t *edp = NULL;
1054         int i, cnt;
1055 
1056         ASSERT(lxpnp->lxpr_type == LXPR_PID_AUXV ||
1057             lxpnp->lxpr_type == LXPR_PID_TID_AUXV);
1058 
1059         p = lxpr_lock(lxpnp->lxpr_pid);
1060 
1061         if (p == NULL) {
1062                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1063                 return;
1064         }
1065         if ((pd = ptolxproc(p)) == NULL) {
1066                 /* Emit a single AT_NULL record for non-branded processes */
1067                 auxv_t buf;
1068 
1069                 bzero(&buf, sizeof (buf));
1070                 lxpr_unlock(p);
1071                 lxpr_uiobuf_write(uiobuf, (char *)&buf, sizeof (buf));
1072                 return;
1073         } else {
1074                 edp = &pd->l_elf_data;
1075         }
1076 
1077         if (p->p_model == DATAMODEL_NATIVE) {
1078                 auxv_t buf[__KERN_NAUXV_IMPL];
1079 
1080                 /*
1081                  * Because a_type is only of size int (not long), the buffer
1082                  * contents must be zeroed first to ensure cleanliness.
1083                  */
1084                 bzero(buf, sizeof (buf));
1085                 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) {
1086                         if (lx_auxv_stol(&p->p_user.u_auxv[i],
1087                             &buf[cnt], edp) == 0) {
1088                                 cnt++;
1089                         }
1090                         if (p->p_user.u_auxv[i].a_type == AT_NULL) {
1091                                 break;
1092                         }
1093                 }
1094                 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0]));
1095                 lxpr_unlock(p);
1096         }
1097 #if defined(_SYSCALL32_IMPL)
1098         else {
1099                 auxv32_t buf[__KERN_NAUXV_IMPL];
1100 
1101                 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) {
1102                         auxv_t temp;
1103 
1104                         if (lx_auxv_stol(&p->p_user.u_auxv[i],
1105                             &temp, edp) == 0) {
1106                                 buf[cnt].a_type = (int)temp.a_type;
1107                                 buf[cnt].a_un.a_val = (int)temp.a_un.a_val;
1108                                 cnt++;
1109                         }
1110                         if (p->p_user.u_auxv[i].a_type == AT_NULL) {
1111                                 break;
1112                         }
1113                 }
1114                 lxpr_unlock(p);
1115                 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0]));
1116         }
1117 #endif /* defined(_SYSCALL32_IMPL) */
1118 }
1119 
1120 /*
1121  * lxpr_read_pid_cgroup(): read cgroups for process
1122  */
1123 static void
1124 lxpr_read_pid_cgroup(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1125 {
1126         proc_t *p;
1127 
1128         ASSERT(lxpnp->lxpr_type == LXPR_PID_CGROUP ||
1129             lxpnp->lxpr_type == LXPR_PID_TID_CGROUP);
1130 
1131         p = lxpr_lock(lxpnp->lxpr_pid);
1132         if (p == NULL) {
1133                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1134                 return;
1135         }
1136 
1137         /* basic stub, 3rd field will need to be populated */
1138         lxpr_uiobuf_printf(uiobuf, "1:name=systemd:/\n");
1139 
1140         lxpr_unlock(p);
1141 }
1142 
1143 static void
1144 lxpr_copy_cmdline(proc_t *p, lx_proc_data_t *pd, lxpr_uiobuf_t *uiobuf)
1145 {
1146         uio_t *uiop = uiobuf->uiop;
1147         char *buf = uiobuf->buffer;
1148         int bsz = uiobuf->buffsize;
1149         boolean_t env_overflow = B_FALSE;
1150         uintptr_t pos = pd->l_args_start + uiop->uio_offset;
1151         uintptr_t estart = pd->l_envs_start;
1152         uintptr_t eend = pd->l_envs_end;
1153         size_t chunk, copied;
1154         int err = 0;
1155 
1156         /* Do not bother with data beyond the end of the envp strings area. */
1157         if (pos > eend) {
1158                 return;
1159         }
1160         mutex_exit(&p->p_lock);
1161 
1162         /*
1163          * If the starting or ending bounds are outside the argv strings area,
1164          * check to see if the process has overwritten the terminating NULL.
1165          * If not, no data needs to be copied from oustide the argv area.
1166          */
1167         if (pos >= estart || (pos + uiop->uio_resid) >= estart) {
1168                 uint8_t term;
1169                 if (uread(p, &term, sizeof (term), estart - 1) != 0) {
1170                         err = EFAULT;
1171                 } else if (term != 0) {
1172                         env_overflow = B_TRUE;
1173                 }
1174         }
1175 
1176 
1177         /* Data between astart and estart-1 can be copied freely. */
1178         while (pos < estart && uiop->uio_resid > 0 && err == 0) {
1179                 chunk = MIN(estart - pos, uiop->uio_resid);
1180                 chunk = MIN(chunk, bsz);
1181 
1182                 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) != 0 ||
1183                     copied != chunk) {
1184                         err = EFAULT;
1185                         break;
1186                 }
1187                 err = uiomove(buf, copied, UIO_READ, uiop);
1188                 pos += copied;
1189         }
1190 
1191         /*
1192          * Onward from estart, data is copied as a contiguous string.  To
1193          * protect env data from potential snooping, only one buffer-sized copy
1194          * is allowed to avoid complex seek logic.
1195          */
1196         if (err == 0 && env_overflow && pos == estart && uiop->uio_resid > 0) {
1197                 chunk = MIN(eend - pos, uiop->uio_resid);
1198                 chunk = MIN(chunk, bsz);
1199                 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) == 0) {
1200                         int len = strnlen(buf, copied);
1201                         if (len > 0) {
1202                                 err = uiomove(buf, len, UIO_READ, uiop);
1203                         }
1204                 }
1205         }
1206 
1207         uiobuf->error = err;
1208         /* reset any uiobuf state */
1209         uiobuf->pos = uiobuf->buffer;
1210         uiobuf->beg = 0;
1211 
1212         mutex_enter(&p->p_lock);
1213 }
1214 
1215 /*
1216  * lxpr_read_pid_cmdline(): read argument vector from process
1217  */
1218 static void
1219 lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1220 {
1221         proc_t *p;
1222         char *buf;
1223         size_t asz = lxpr_maxargvlen, sz;
1224         lx_proc_data_t *pd;
1225 
1226         ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE ||
1227             lxpnp->lxpr_type == LXPR_PID_TID_CMDLINE);
1228 
1229         buf = kmem_alloc(asz, KM_SLEEP);
1230 
1231         p = lxpr_lock(lxpnp->lxpr_pid);
1232         if (p == NULL) {
1233                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1234                 kmem_free(buf, asz);
1235                 return;
1236         }
1237 
1238         if ((pd = ptolxproc(p)) != NULL && pd->l_args_start != 0 &&
1239             pd->l_envs_start != 0 && pd->l_envs_end != 0) {
1240                 /* Use Linux-style argv bounds if possible. */
1241                 lxpr_copy_cmdline(p, pd, uiobuf);
1242         } else {
1243                 if (prreadargv(p, buf, asz, &sz) != 0) {
1244                         lxpr_uiobuf_seterr(uiobuf, EINVAL);
1245                 } else {
1246                         lxpr_uiobuf_write(uiobuf, buf, sz);
1247                 }
1248         }
1249 
1250         lxpr_unlock(p);
1251         kmem_free(buf, asz);
1252 }
1253 
1254 /*
1255  * lxpr_read_pid_comm(): read command from process
1256  */
1257 static void
1258 lxpr_read_pid_comm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1259 {
1260         proc_t *p;
1261 
1262         VERIFY(lxpnp->lxpr_type == LXPR_PID_COMM ||
1263             lxpnp->lxpr_type == LXPR_PID_TID_COMM);
1264 
1265         /*
1266          * Because prctl(PR_SET_NAME) does not set custom names for threads
1267          * (vs processes), there is no need for special handling here.
1268          */
1269         if ((p = lxpr_lock(lxpnp->lxpr_pid)) == NULL) {
1270                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1271                 return;
1272         }
1273         lxpr_uiobuf_printf(uiobuf, "%s\n", p->p_user.u_comm);
1274         lxpr_unlock(p);
1275 }
1276 
1277 /*
1278  * lxpr_read_pid_env(): read env vector from process
1279  */
1280 static void
1281 lxpr_read_pid_env(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1282 {
1283         proc_t *p;
1284         char *buf;
1285         size_t asz = lxpr_maxenvvlen, sz;
1286 
1287         ASSERT(lxpnp->lxpr_type == LXPR_PID_ENV);
1288 
1289         buf = kmem_alloc(asz, KM_SLEEP);
1290 
1291         p = lxpr_lock(lxpnp->lxpr_pid);
1292         if (p == NULL) {
1293                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1294                 kmem_free(buf, asz);
1295                 return;
1296         }
1297 
1298         if (prreadenvv(p, buf, asz, &sz) != 0) {
1299                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1300         } else {
1301                 lxpr_uiobuf_write(uiobuf, buf, sz);
1302         }
1303 
1304         lxpr_unlock(p);
1305         kmem_free(buf, asz);
1306 }
1307 
1308 /*
1309  * lxpr_read_pid_limits(): ulimit file
1310  */
1311 static void
1312 lxpr_read_pid_limits(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1313 {
1314         proc_t *p;
1315         rctl_qty_t cur, max;
1316         rctl_val_t *oval, *nval;
1317         rctl_hndl_t hndl;
1318         char *kname;
1319         int i;
1320 
1321         ASSERT(lxpnp->lxpr_type == LXPR_PID_LIMITS ||
1322             lxpnp->lxpr_type == LXPR_PID_TID_LIMITS);
1323 
1324         nval = kmem_alloc(sizeof (rctl_val_t), KM_SLEEP);
1325 
1326         p = lxpr_lock(lxpnp->lxpr_pid);
1327         if (p == NULL) {
1328                 kmem_free(nval, sizeof (rctl_val_t));
1329                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1330                 return;
1331         }
1332 
1333         lxpr_uiobuf_printf(uiobuf, "%-25s %-20s %-20s %-10s\n",
1334             "Limit", "Soft Limit", "Hard Limit", "Units");
1335         for (i = 0; lxpr_rlimtab[i].rlim_name != NULL; i++) {
1336                 kname = lxpr_rlimtab[i].rlim_rctl;
1337                 /* default to unlimited for resources without an analog */
1338                 cur = RLIM_INFINITY;
1339                 max = RLIM_INFINITY;
1340                 if (kname != NULL) {
1341                         hndl = rctl_hndl_lookup(kname);
1342                         oval = NULL;
1343                         while ((hndl != -1) &&
1344                             rctl_local_get(hndl, oval, nval, p) == 0) {
1345                                 oval = nval;
1346                                 switch (nval->rcv_privilege) {
1347                                 case RCPRIV_BASIC:
1348                                         if (!RCTL_INFINITE(nval))
1349                                                 cur = nval->rcv_value;
1350                                         break;
1351                                 case RCPRIV_PRIVILEGED:
1352                                         if (!RCTL_INFINITE(nval))
1353                                                 max = nval->rcv_value;
1354                                         break;
1355                                 }
1356                         }
1357                 }
1358 
1359                 lxpr_uiobuf_printf(uiobuf, "%-25s", lxpr_rlimtab[i].rlim_name);
1360                 if (cur == RLIM_INFINITY || cur == LX_RLIM_INFINITY) {
1361                         lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
1362                 } else {
1363                         lxpr_uiobuf_printf(uiobuf, " %-20lu", cur);
1364                 }
1365                 if (max == RLIM_INFINITY || max == LX_RLIM_INFINITY) {
1366                         lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
1367                 } else {
1368                         lxpr_uiobuf_printf(uiobuf, " %-20lu", max);
1369                 }
1370                 lxpr_uiobuf_printf(uiobuf, " %-10s\n",
1371                     lxpr_rlimtab[i].rlim_unit);
1372         }
1373 
1374         lxpr_unlock(p);
1375         kmem_free(nval, sizeof (rctl_val_t));
1376 }
1377 
1378 /*
1379  * lxpr_read_pid_maps(): memory map file
1380  */
1381 static void
1382 lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1383 {
1384         proc_t *p;
1385         struct as *as;
1386         struct seg *seg;
1387         char *buf;
1388         int buflen = MAXPATHLEN;
1389         struct print_data {
1390                 uintptr_t saddr;
1391                 uintptr_t eaddr;
1392                 int type;
1393                 char prot[5];
1394                 uintptr_t offset;
1395                 vnode_t *vp;
1396                 struct print_data *next;
1397         } *print_head = NULL;
1398         struct print_data **print_tail = &print_head;
1399         struct print_data *pbuf;
1400 
1401         ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS ||
1402             lxpnp->lxpr_type == LXPR_PID_TID_MAPS);
1403 
1404         p = lxpr_lock(lxpnp->lxpr_pid);
1405         if (p == NULL) {
1406                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1407                 return;
1408         }
1409 
1410         as = p->p_as;
1411 
1412         if (as == &kas) {
1413                 lxpr_unlock(p);
1414                 return;
1415         }
1416 
1417         mutex_exit(&p->p_lock);
1418 
1419         /* Iterate over all segments in the address space */
1420         AS_LOCK_ENTER(as, RW_READER);
1421         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1422                 vnode_t *vp;
1423                 uint_t protbits;
1424 
1425                 pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
1426 
1427                 pbuf->saddr = (uintptr_t)seg->s_base;
1428                 pbuf->eaddr = pbuf->saddr + seg->s_size;
1429                 pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
1430 
1431                 /*
1432                  * Cheat and only use the protection bits of the first page
1433                  * in the segment
1434                  */
1435                 (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
1436                 (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
1437 
1438                 if (protbits & PROT_READ)      pbuf->prot[0] = 'r';
1439                 if (protbits & PROT_WRITE)     pbuf->prot[1] = 'w';
1440                 if (protbits & PROT_EXEC)      pbuf->prot[2] = 'x';
1441                 if (pbuf->type & MAP_SHARED)        pbuf->prot[3] = 's';
1442                 else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
1443 
1444                 if (seg->s_ops == &segvn_ops &&
1445                     SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
1446                     vp != NULL && vp->v_type == VREG) {
1447                         VN_HOLD(vp);
1448                         pbuf->vp = vp;
1449                 } else {
1450                         pbuf->vp = NULL;
1451                 }
1452 
1453                 pbuf->offset = SEGOP_GETOFFSET(seg, (caddr_t)pbuf->saddr);
1454 
1455                 pbuf->next = NULL;
1456                 *print_tail = pbuf;
1457                 print_tail = &pbuf->next;
1458         }
1459         AS_LOCK_EXIT(as);
1460         mutex_enter(&p->p_lock);
1461         lxpr_unlock(p);
1462 
1463         buf = kmem_alloc(buflen, KM_SLEEP);
1464 
1465         /* print the data we've extracted */
1466         pbuf = print_head;
1467         while (pbuf != NULL) {
1468                 struct print_data *pbuf_next;
1469                 vattr_t vattr;
1470 
1471                 int maj = 0;
1472                 int min = 0;
1473                 ino_t inode = 0;
1474 
1475                 *buf = '\0';
1476                 if (pbuf->vp != NULL) {
1477                         vattr.va_mask = AT_FSID | AT_NODEID;
1478                         if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(),
1479                             NULL) == 0) {
1480                                 maj = getmajor(vattr.va_fsid);
1481                                 min = getminor(vattr.va_fsid);
1482                                 inode = vattr.va_nodeid;
1483                         }
1484                         (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
1485                         VN_RELE(pbuf->vp);
1486                 }
1487 
1488                 if (p->p_model == DATAMODEL_LP64) {
1489                         lxpr_uiobuf_printf(uiobuf,
1490                             "%08llx-%08llx %s %08llx %02x:%02x %llu%s%s\n",
1491                             pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
1492                             maj, min, inode, *buf != '\0' ? " " : "", buf);
1493                 } else {
1494                         lxpr_uiobuf_printf(uiobuf,
1495                             "%08x-%08x %s %08x %02x:%02x %llu%s%s\n",
1496                             (uint32_t)pbuf->saddr, (uint32_t)pbuf->eaddr,
1497                             pbuf->prot, (uint32_t)pbuf->offset, maj, min,
1498                             inode, *buf != '\0' ? " " : "", buf);
1499                 }
1500 
1501                 pbuf_next = pbuf->next;
1502                 kmem_free(pbuf, sizeof (*pbuf));
1503                 pbuf = pbuf_next;
1504         }
1505 
1506         kmem_free(buf, buflen);
1507 }
1508 
1509 /*
1510  * lxpr_read_pid_mountinfo(): information about process mount points. e.g.:
1511  *    14 19 0:13 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
1512  * mntid parid devnums root mntpnt mntopts - fstype mntsrc superopts
1513  *
1514  * We have to make up several of these fields.
1515  */
1516 static void
1517 lxpr_read_pid_mountinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1518 {
1519         struct vfs *vfsp;
1520         struct vfs *vfslist;
1521         zone_t *zone = LXPTOZ(lxpnp);
1522         struct print_data {
1523                 refstr_t *vfs_mntpt;
1524                 refstr_t *vfs_resource;
1525                 uint_t vfs_flag;
1526                 int vfs_fstype;
1527                 dev_t vfs_dev;
1528                 struct print_data *next;
1529         } *print_head = NULL;
1530         struct print_data **print_tail = &print_head;
1531         struct print_data *printp;
1532         int root_id = 15;       /* use a made-up value */
1533         int mnt_id;
1534 
1535         ASSERT(lxpnp->lxpr_type == LXPR_PID_MOUNTINFO ||
1536             lxpnp->lxpr_type == LXPR_PID_TID_MOUNTINFO);
1537 
1538         vfs_list_read_lock();
1539 
1540         /* root is the top-level, it does not appear in this output */
1541         if (zone == global_zone) {
1542                 vfsp = vfslist = rootvfs;
1543         } else {
1544                 vfsp = vfslist = zone->zone_vfslist;
1545                 /*
1546                  * If the zone has a root entry, it will be the first in
1547                  * the list.  If it doesn't, we conjure one up.
1548                  */
1549                 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
1550                     zone->zone_rootpath) != 0) {
1551                         struct vfs *tvfsp;
1552                         /*
1553                          * The root of the zone is not a mount point.  The vfs
1554                          * we want to report is that of the zone's root vnode.
1555                          */
1556                         tvfsp = zone->zone_rootvp->v_vfsp;
1557 
1558                         lxpr_uiobuf_printf(uiobuf,
1559                             "%d 1 %d:%d / / %s - %s / %s\n",
1560                             root_id,
1561                             major(tvfsp->vfs_dev), minor(vfsp->vfs_dev),
1562                             tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw",
1563                             vfssw[tvfsp->vfs_fstype].vsw_name,
1564                             tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1565 
1566                 }
1567                 if (vfslist == NULL) {
1568                         vfs_list_unlock();
1569                         return;
1570                 }
1571         }
1572 
1573         /*
1574          * Later on we have to do a lookupname, which can end up causing
1575          * another vfs_list_read_lock() to be called. Which can lead to a
1576          * deadlock. To avoid this, we extract the data we need into a local
1577          * list, then we can run this list without holding vfs_list_read_lock()
1578          * We keep the list in the same order as the vfs_list
1579          */
1580         do {
1581                 /* Skip mounts we shouldn't show */
1582                 if (vfsp->vfs_flag & VFS_NOMNTTAB) {
1583                         goto nextfs;
1584                 }
1585 
1586                 printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
1587                 refstr_hold(vfsp->vfs_mntpt);
1588                 printp->vfs_mntpt = vfsp->vfs_mntpt;
1589                 refstr_hold(vfsp->vfs_resource);
1590                 printp->vfs_resource = vfsp->vfs_resource;
1591                 printp->vfs_flag = vfsp->vfs_flag;
1592                 printp->vfs_fstype = vfsp->vfs_fstype;
1593                 printp->vfs_dev = vfsp->vfs_dev;
1594                 printp->next = NULL;
1595 
1596                 *print_tail = printp;
1597                 print_tail = &printp->next;
1598 
1599 nextfs:
1600                 vfsp = (zone == global_zone) ?
1601                     vfsp->vfs_next : vfsp->vfs_zone_next;
1602 
1603         } while (vfsp != vfslist);
1604 
1605         vfs_list_unlock();
1606 
1607         mnt_id = root_id + 1;
1608 
1609         /*
1610          * now we can run through what we've extracted without holding
1611          * vfs_list_read_lock()
1612          */
1613         printp = print_head;
1614         while (printp != NULL) {
1615                 struct print_data *printp_next;
1616                 const char *resource;
1617                 char *mntpt;
1618                 struct vnode *vp;
1619                 int error;
1620 
1621                 mntpt = (char *)refstr_value(printp->vfs_mntpt);
1622                 resource = refstr_value(printp->vfs_resource);
1623 
1624                 if (mntpt != NULL && mntpt[0] != '\0')
1625                         mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
1626                 else
1627                         mntpt = "-";
1628 
1629                 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
1630 
1631                 if (error != 0)
1632                         goto nextp;
1633 
1634                 if (!(vp->v_flag & VROOT)) {
1635                         VN_RELE(vp);
1636                         goto nextp;
1637                 }
1638                 VN_RELE(vp);
1639 
1640                 if (resource != NULL && resource[0] != '\0') {
1641                         if (resource[0] == '/') {
1642                                 resource = ZONE_PATH_VISIBLE(resource, zone) ?
1643                                     ZONE_PATH_TRANSLATE(resource, zone) : mntpt;
1644                         }
1645                 } else {
1646                         resource = "none";
1647                 }
1648 
1649                 /*
1650                  * XXX parent ID is not tracked correctly here. Currently we
1651                  * always assume the parent ID is the root ID.
1652                  */
1653                 lxpr_uiobuf_printf(uiobuf,
1654                     "%d %d %d:%d / %s %s - %s %s %s\n",
1655                     mnt_id, root_id,
1656                     major(printp->vfs_dev), minor(printp->vfs_dev),
1657                     mntpt,
1658                     printp->vfs_flag & VFS_RDONLY ? "ro" : "rw",
1659                     vfssw[printp->vfs_fstype].vsw_name,
1660                     resource,
1661                     printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1662 
1663 nextp:
1664                 printp_next = printp->next;
1665                 refstr_rele(printp->vfs_mntpt);
1666                 refstr_rele(printp->vfs_resource);
1667                 kmem_free(printp, sizeof (*printp));
1668                 printp = printp_next;
1669 
1670                 mnt_id++;
1671         }
1672 }
1673 
1674 /*
1675  * lxpr_read_pid_oom_scr_adj(): read oom_score_adj for process
1676  */
1677 static void
1678 lxpr_read_pid_oom_scr_adj(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1679 {
1680         proc_t *p;
1681 
1682         ASSERT(lxpnp->lxpr_type == LXPR_PID_OOM_SCR_ADJ ||
1683             lxpnp->lxpr_type == LXPR_PID_TID_OOM_SCR_ADJ);
1684 
1685         p = lxpr_lock(lxpnp->lxpr_pid);
1686         if (p == NULL) {
1687                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1688                 return;
1689         }
1690 
1691         /* always 0 */
1692         lxpr_uiobuf_printf(uiobuf, "0\n");
1693 
1694         lxpr_unlock(p);
1695 }
1696 
1697 
1698 /*
1699  * lxpr_read_pid_statm(): memory status file
1700  */
1701 static void
1702 lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1703 {
1704         proc_t *p;
1705         struct as *as;
1706         size_t vsize;
1707         size_t rss;
1708 
1709         ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM ||
1710             lxpnp->lxpr_type == LXPR_PID_TID_STATM);
1711 
1712         p = lxpr_lock(lxpnp->lxpr_pid);
1713         if (p == NULL) {
1714                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1715                 return;
1716         }
1717 
1718         as = p->p_as;
1719 
1720         mutex_exit(&p->p_lock);
1721 
1722         AS_LOCK_ENTER(as, RW_READER);
1723         vsize = btopr(as->a_resvsize);
1724         rss = rm_asrss(as);
1725         AS_LOCK_EXIT(as);
1726 
1727         mutex_enter(&p->p_lock);
1728         lxpr_unlock(p);
1729 
1730         lxpr_uiobuf_printf(uiobuf,
1731             "%lu %lu %lu %lu %lu %lu %lu\n",
1732             vsize, rss, 0l, rss, 0l, 0l, 0l);
1733 }
1734 
1735 /*
1736  * Look for either the main thread (lookup_id is 0) or the specified thread.
1737  * If we're looking for the main thread but the proc does not have one, we
1738  * fallback to using prchoose to get any thread available.
1739  */
1740 static kthread_t *
1741 lxpr_get_thread(proc_t *p, uint_t lookup_id)
1742 {
1743         kthread_t *t;
1744         uint_t emul_tid;
1745         lx_lwp_data_t *lwpd;
1746         pid_t pid = p->p_pid;
1747         pid_t init_pid = curproc->p_zone->zone_proc_initpid;
1748         boolean_t branded = (p->p_brand == &lx_brand);
1749 
1750         /* get specified thread  */
1751         if ((t = p->p_tlist) == NULL)
1752                 return (NULL);
1753 
1754         do {
1755                 if (lookup_id == 0 && t->t_tid == 1) {
1756                         thread_lock(t);
1757                         return (t);
1758                 }
1759 
1760                 lwpd = ttolxlwp(t);
1761                 if (branded && lwpd != NULL) {
1762                         if (pid == init_pid && lookup_id == 1) {
1763                                 emul_tid = t->t_tid;
1764                         } else {
1765                                 emul_tid = lwpd->br_pid;
1766                         }
1767                 } else {
1768                         /*
1769                          * Make only the first (assumed to be main) thread
1770                          * visible for non-branded processes.
1771                          */
1772                         emul_tid = p->p_pid;
1773                 }
1774                 if (emul_tid == lookup_id) {
1775                         thread_lock(t);
1776                         return (t);
1777                 }
1778         } while ((t = t->t_forw) != p->p_tlist);
1779 
1780         if (lookup_id == 0)
1781                 return (prchoose(p));
1782         return (NULL);
1783 }
1784 
1785 /*
1786  * Lookup the real pid for procs 0 or 1.
1787  */
1788 static pid_t
1789 get_real_pid(pid_t p)
1790 {
1791         pid_t find_pid;
1792 
1793         if (p == 1) {
1794                 find_pid = curproc->p_zone->zone_proc_initpid;
1795         } else if (p == 0) {
1796                 find_pid = curproc->p_zone->zone_zsched->p_pid;
1797         } else {
1798                 find_pid = p;
1799         }
1800 
1801         return (find_pid);
1802 }
1803 
1804 /*
1805  * pid/tid common code to read status file
1806  */
1807 static void
1808 lxpr_read_status_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf,
1809     uint_t lookup_id)
1810 {
1811         proc_t *p;
1812         kthread_t *t;
1813         user_t *up;
1814         cred_t *cr;
1815         const gid_t *groups;
1816         int    ngroups;
1817         struct as *as;
1818         char *status;
1819         pid_t pid, ppid;
1820         k_sigset_t current, ignore, handle;
1821         int    i, lx_sig;
1822         pid_t real_pid;
1823 
1824         real_pid = get_real_pid(lxpnp->lxpr_pid);
1825         p = lxpr_lock(real_pid);
1826         if (p == NULL) {
1827                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1828                 return;
1829         }
1830 
1831         pid = p->p_pid;
1832 
1833         /*
1834          * Convert pid to the Linux default of 1 if we're the zone's init
1835          * process or if we're the zone's zsched the pid is 0.
1836          */
1837         if (pid == curproc->p_zone->zone_proc_initpid) {
1838                 pid = 1;
1839                 ppid = 0;       /* parent pid for init is 0 */
1840         } else if (pid == curproc->p_zone->zone_zsched->p_pid) {
1841                 pid = 0;        /* zsched is pid 0 */
1842                 ppid = 0;       /* parent pid for zsched is itself */
1843         } else {
1844                 /*
1845                  * Make sure not to reference parent PIDs that reside outside
1846                  * the zone
1847                  */
1848                 ppid = ((p->p_flag & SZONETOP)
1849                     ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
1850 
1851                 /*
1852                  * Convert ppid to the Linux default of 1 if our parent is the
1853                  * zone's init process
1854                  */
1855                 if (ppid == curproc->p_zone->zone_proc_initpid)
1856                         ppid = 1;
1857         }
1858 
1859         t = lxpr_get_thread(p, lookup_id);
1860         if (t != NULL) {
1861                 switch (t->t_state) {
1862                 case TS_SLEEP:
1863                         status = "S (sleeping)";
1864                         break;
1865                 case TS_RUN:
1866                 case TS_ONPROC:
1867                         status = "R (running)";
1868                         break;
1869                 case TS_ZOMB:
1870                         status = "Z (zombie)";
1871                         break;
1872                 case TS_STOPPED:
1873                         status = "T (stopped)";
1874                         break;
1875                 default:
1876                         status = "! (unknown)";
1877                         break;
1878                 }
1879                 thread_unlock(t);
1880         } else {
1881                 if (lookup_id != 0) {
1882                         /* we can't find this specific thread */
1883                         lxpr_uiobuf_seterr(uiobuf, EINVAL);
1884                         lxpr_unlock(p);
1885                         return;
1886                 }
1887 
1888                 /*
1889                  * there is a hole in the exit code, where a proc can have
1890                  * no threads but it is yet to be flagged SZOMB. We will
1891                  * assume we are about to become a zombie
1892                  */
1893                 status = "Z (zombie)";
1894         }
1895 
1896         up = PTOU(p);
1897         mutex_enter(&p->p_crlock);
1898         crhold(cr = p->p_cred);
1899         mutex_exit(&p->p_crlock);
1900 
1901         lxpr_uiobuf_printf(uiobuf,
1902             "Name:\t%s\n"
1903             "State:\t%s\n"
1904             "Tgid:\t%d\n"
1905             "Pid:\t%d\n"
1906             "PPid:\t%d\n"
1907             "TracerPid:\t%d\n"
1908             "Uid:\t%u\t%u\t%u\t%u\n"
1909             "Gid:\t%u\t%u\t%u\t%u\n"
1910             "FDSize:\t%d\n"
1911             "Groups:\t",
1912             up->u_comm,
1913             status,
1914             pid, /* thread group id - same as pid */
1915             (lookup_id == 0) ? pid : lxpnp->lxpr_desc,
1916             ppid,
1917             0,
1918             crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
1919             crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
1920             p->p_fno_ctl);
1921 
1922 
1923         ngroups = crgetngroups(cr);
1924         groups  = crgetgroups(cr);
1925         for (i = 0; i < ngroups; i++) {
1926                 lxpr_uiobuf_printf(uiobuf,
1927                     "%u ",
1928                     groups[i]);
1929         }
1930         crfree(cr);
1931 
1932         as = p->p_as;
1933         if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) {
1934                 size_t vsize, nlocked, rss;
1935 
1936                 mutex_exit(&p->p_lock);
1937                 AS_LOCK_ENTER(as, RW_READER);
1938                 vsize = as->a_resvsize;
1939                 rss = rm_asrss(as);
1940                 AS_LOCK_EXIT(as);
1941                 mutex_enter(&p->p_lock);
1942                 nlocked = p->p_locked_mem;
1943 
1944                 lxpr_uiobuf_printf(uiobuf,
1945                     "\n"
1946                     "VmSize:\t%8lu kB\n"
1947                     "VmLck:\t%8lu kB\n"
1948                     "VmRSS:\t%8lu kB\n"
1949                     "VmData:\t%8lu kB\n"
1950                     "VmStk:\t%8lu kB\n"
1951                     "VmExe:\t%8lu kB\n"
1952                     "VmLib:\t%8lu kB",
1953                     btok(vsize),
1954                     btok(nlocked),
1955                     ptok(rss),
1956                     0l,
1957                     btok(p->p_stksize),
1958                     ptok(rss),
1959                     0l);
1960         }
1961 
1962         lxpr_uiobuf_printf(uiobuf, "\nThreads:\t%u", p->p_lwpcnt);
1963 
1964         sigemptyset(&current);
1965         sigemptyset(&ignore);
1966         sigemptyset(&handle);
1967 
1968         for (i = 1; i < NSIG; i++) {
1969                 lx_sig = stol_signo[i];
1970 
1971                 if ((lx_sig > 0) && (lx_sig <= LX_NSIG)) {
1972                         if (sigismember(&p->p_sig, i))
1973                                 sigaddset(&current, lx_sig);
1974 
1975                         if (up->u_signal[i - 1] == SIG_IGN)
1976                                 sigaddset(&ignore, lx_sig);
1977                         else if (up->u_signal[i - 1] != SIG_DFL)
1978                                 sigaddset(&handle, lx_sig);
1979                 }
1980         }
1981 
1982         lxpr_uiobuf_printf(uiobuf,
1983             "\n"
1984             "SigPnd:\t%08x%08x\n"
1985             "SigBlk:\t%08x%08x\n"
1986             "SigIgn:\t%08x%08x\n"
1987             "SigCgt:\t%08x%08x\n"
1988             "CapInh:\t%016x\n"
1989             "CapPrm:\t%016x\n"
1990             "CapEff:\t%016x\n",
1991             current.__sigbits[1], current.__sigbits[0],
1992             0, 0, /* signals blocked on per thread basis */
1993             ignore.__sigbits[1], ignore.__sigbits[0],
1994             handle.__sigbits[1], handle.__sigbits[0],
1995             /* Can't do anything with linux capabilities */
1996             0,
1997             0,
1998             0);
1999 
2000         lxpr_uiobuf_printf(uiobuf,
2001             "CapBnd:\t%016llx\n",
2002             /* We report the full capability bounding set */
2003             0x1fffffffffLL);
2004 
2005         lxpr_unlock(p);
2006 }
2007 
2008 /*
2009  * lxpr_read_pid_status(): status file
2010  */
2011 static void
2012 lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2013 {
2014         ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
2015 
2016         lxpr_read_status_common(lxpnp, uiobuf, 0);
2017 }
2018 
2019 /*
2020  * lxpr_read_pid_tid_status(): status file
2021  */
2022 static void
2023 lxpr_read_pid_tid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2024 {
2025         ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STATUS);
2026         lxpr_read_status_common(lxpnp, uiobuf, lxpnp->lxpr_desc);
2027 }
2028 
2029 /*
2030  * pid/tid common code to read stat file
2031  */
2032 static void
2033 lxpr_read_stat_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf,
2034     uint_t lookup_id)
2035 {
2036         proc_t *p;
2037         kthread_t *t;
2038         struct as *as;
2039         char stat;
2040         pid_t pid, ppid, pgpid, spid;
2041         gid_t psgid;
2042         dev_t psdev;
2043         size_t rss, vsize;
2044         int nice, pri;
2045         caddr_t wchan;
2046         processorid_t cpu;
2047         pid_t real_pid;
2048 
2049         real_pid = get_real_pid(lxpnp->lxpr_pid);
2050         p = lxpr_lock(real_pid);
2051         if (p == NULL) {
2052                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
2053                 return;
2054         }
2055 
2056         pid = p->p_pid;
2057 
2058         /*
2059          * Set Linux defaults if we're the zone's init process
2060          */
2061         if (pid == curproc->p_zone->zone_proc_initpid) {
2062                 pid = 1;                /* PID for init */
2063                 ppid = 0;               /* parent PID for init is 0 */
2064                 pgpid = 0;              /* process group for init is 0 */
2065                 psgid = (gid_t)-1;      /* credential GID for init is -1 */
2066                 spid = 0;               /* session id for init is 0 */
2067                 psdev = 0;              /* session device for init is 0 */
2068         } else if (pid == curproc->p_zone->zone_zsched->p_pid) {
2069                 pid = 0;                /* PID for zsched */
2070                 ppid = 0;               /* parent PID for zsched is 0 */
2071                 pgpid = 0;              /* process group for zsched is 0 */
2072                 psgid = (gid_t)-1;      /* credential GID for zsched is -1 */
2073                 spid = 0;               /* session id for zsched is 0 */
2074                 psdev = 0;              /* session device for zsched is 0 */
2075         } else {
2076                 /*
2077                  * Make sure not to reference parent PIDs that reside outside
2078                  * the zone
2079                  */
2080                 ppid = ((p->p_flag & SZONETOP) ?
2081                     curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
2082 
2083                 /*
2084                  * Convert ppid to the Linux default of 1 if our parent is the
2085                  * zone's init process
2086                  */
2087                 if (ppid == curproc->p_zone->zone_proc_initpid)
2088                         ppid = 1;
2089 
2090                 pgpid = p->p_pgrp;
2091 
2092                 mutex_enter(&p->p_splock);
2093                 mutex_enter(&p->p_sessp->s_lock);
2094                 spid = p->p_sessp->s_sid;
2095                 psdev = p->p_sessp->s_dev;
2096                 if (p->p_sessp->s_cred)
2097                         psgid = crgetgid(p->p_sessp->s_cred);
2098                 else
2099                         psgid = crgetgid(p->p_cred);
2100 
2101                 mutex_exit(&p->p_sessp->s_lock);
2102                 mutex_exit(&p->p_splock);
2103         }
2104 
2105         t = lxpr_get_thread(p, lookup_id);
2106         if (t != NULL) {
2107                 switch (t->t_state) {
2108                 case TS_SLEEP:
2109                         stat = 'S'; break;
2110                 case TS_RUN:
2111                 case TS_ONPROC:
2112                         stat = 'R'; break;
2113                 case TS_ZOMB:
2114                         stat = 'Z'; break;
2115                 case TS_STOPPED:
2116                         stat = 'T'; break;
2117                 default:
2118                         stat = '!'; break;
2119                 }
2120 
2121                 if (CL_DONICE(t, NULL, 0, &nice) != 0)
2122                         nice = 0;
2123 
2124                 pri = t->t_pri;
2125                 wchan = t->t_wchan;
2126                 cpu = t->t_cpu->cpu_id;
2127                 thread_unlock(t);
2128         } else {
2129                 if (lookup_id != 0) {
2130                         /* we can't find this specific thread */
2131                         lxpr_uiobuf_seterr(uiobuf, EINVAL);
2132                         lxpr_unlock(p);
2133                         return;
2134                 }
2135 
2136                 /* Only zombies have no threads */
2137                 stat = 'Z';
2138                 nice = 0;
2139                 pri = 0;
2140                 wchan = 0;
2141                 cpu = 0;
2142         }
2143         as = p->p_as;
2144         mutex_exit(&p->p_lock);
2145         AS_LOCK_ENTER(as, RW_READER);
2146         vsize = as->a_resvsize;
2147         rss = rm_asrss(as);
2148         AS_LOCK_EXIT(as);
2149         mutex_enter(&p->p_lock);
2150 
2151         lxpr_uiobuf_printf(uiobuf,
2152             "%d (%s) %c %d %d %d %d %d "
2153             "%lu %lu %lu %lu %lu "
2154             "%lu %lu %ld %ld "
2155             "%d %d %d "
2156             "%lu "
2157             "%lu "
2158             "%lu %ld %llu "
2159             "%lu %lu %u "
2160             "%lu %lu "
2161             "%lu %lu %lu %lu "
2162             "%lu "
2163             "%lu %lu "
2164             "%d "
2165             "%d"
2166             "\n",
2167             (lookup_id == 0) ? pid : lxpnp->lxpr_desc,
2168             PTOU(p)->u_comm, stat, ppid, pgpid, spid, psdev, psgid,
2169             0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
2170             p->p_utime, p->p_stime, p->p_cutime, p->p_cstime,
2171             pri, nice, p->p_lwpcnt,
2172             0l, /* itrealvalue (time before next SIGALRM) */
2173             PTOU(p)->u_ticks,
2174             vsize, rss, p->p_vmem_ctl,
2175             0l, 0l, USRSTACK, /* startcode, endcode, startstack */
2176             0l, 0l, /* kstkesp, kstkeip */
2177             0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */
2178             wchan,
2179             0l, 0l, /* nswap, cnswap */
2180             0, /* exit_signal */
2181             cpu);
2182 
2183         lxpr_unlock(p);
2184 }
2185 
2186 /*
2187  * lxpr_read_pid_stat(): pid stat file
2188  */
2189 static void
2190 lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2191 {
2192         ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
2193 
2194         lxpr_read_stat_common(lxpnp, uiobuf, 0);
2195 }
2196 
2197 /*
2198  * lxpr_read_pid_tid_stat(): pid stat file
2199  */
2200 static void
2201 lxpr_read_pid_tid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2202 {
2203         ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STAT);
2204         lxpr_read_stat_common(lxpnp, uiobuf, lxpnp->lxpr_desc);
2205 }
2206 
2207 /* ARGSUSED */
2208 static void
2209 lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2210 {
2211 }
2212 
2213 struct lxpr_ifstat {
2214         uint64_t rx_bytes;
2215         uint64_t rx_packets;
2216         uint64_t rx_errors;
2217         uint64_t rx_drop;
2218         uint64_t tx_bytes;
2219         uint64_t tx_packets;
2220         uint64_t tx_errors;
2221         uint64_t tx_drop;
2222         uint64_t collisions;
2223         uint64_t rx_multicast;
2224 };
2225 
2226 static void *
2227 lxpr_kstat_read(kstat_t *kn, boolean_t byname, size_t *size, int *num)
2228 {
2229         kstat_t *kp;
2230         int i, nrec = 0;
2231         size_t bufsize;
2232         void *buf = NULL;
2233 
2234         if (byname == B_TRUE) {
2235                 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance,
2236                     kn->ks_name, getzoneid());
2237         } else {
2238                 kp = kstat_hold_bykid(kn->ks_kid, getzoneid());
2239         }
2240         if (kp == NULL) {
2241                 return (NULL);
2242         }
2243         if (kp->ks_flags & KSTAT_FLAG_INVALID) {
2244                 kstat_rele(kp);
2245                 return (NULL);
2246         }
2247 
2248         bufsize = kp->ks_data_size + 1;
2249         kstat_rele(kp);
2250 
2251         /*
2252          * The kstat in question is released so that kmem_alloc(KM_SLEEP) is
2253          * performed without it held.  After the alloc, the kstat is reacquired
2254          * and its size is checked again. If the buffer is no longer large
2255          * enough, the alloc and check are repeated up to three times.
2256          */
2257         for (i = 0; i < 2; i++) {
2258                 buf = kmem_alloc(bufsize, KM_SLEEP);
2259 
2260                 /* Check if bufsize still appropriate */
2261                 if (byname == B_TRUE) {
2262                         kp = kstat_hold_byname(kn->ks_module, kn->ks_instance,
2263                             kn->ks_name, getzoneid());
2264                 } else {
2265                         kp = kstat_hold_bykid(kn->ks_kid, getzoneid());
2266                 }
2267                 if (kp == NULL || kp->ks_flags & KSTAT_FLAG_INVALID) {
2268                         if (kp != NULL) {
2269                                 kstat_rele(kp);
2270                         }
2271                         kmem_free(buf, bufsize);
2272                         return (NULL);
2273                 }
2274                 KSTAT_ENTER(kp);
2275                 (void) KSTAT_UPDATE(kp, KSTAT_READ);
2276                 if (bufsize < kp->ks_data_size) {
2277                         kmem_free(buf, bufsize);
2278                         buf = NULL;
2279                         bufsize = kp->ks_data_size + 1;
2280                         KSTAT_EXIT(kp);
2281                         kstat_rele(kp);
2282                         continue;
2283                 } else {
2284                         if (KSTAT_SNAPSHOT(kp, buf, KSTAT_READ) != 0) {
2285                                 kmem_free(buf, bufsize);
2286                                 buf = NULL;
2287                         }
2288                         nrec = kp->ks_ndata;
2289                         KSTAT_EXIT(kp);
2290                         kstat_rele(kp);
2291                         break;
2292                 }
2293         }
2294 
2295         if (buf != NULL) {
2296                 *size = bufsize;
2297                 *num = nrec;
2298         }
2299         return (buf);
2300 }
2301 
2302 static int
2303 lxpr_kstat_ifstat(kstat_t *kn, struct lxpr_ifstat *ifs)
2304 {
2305         kstat_named_t *kp;
2306         int i, num;
2307         size_t size;
2308 
2309         /*
2310          * Search by name instead of by kid since there's a small window to
2311          * race against kstats being added/removed.
2312          */
2313         bzero(ifs, sizeof (*ifs));
2314         kp = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num);
2315         if (kp == NULL)
2316                 return (-1);
2317         for (i = 0; i < num; i++) {
2318                 if (strncmp(kp[i].name, "rbytes64", KSTAT_STRLEN) == 0)
2319                         ifs->rx_bytes = kp[i].value.ui64;
2320                 else if (strncmp(kp[i].name, "ipackets64", KSTAT_STRLEN) == 0)
2321                         ifs->rx_packets = kp[i].value.ui64;
2322                 else if (strncmp(kp[i].name, "ierrors", KSTAT_STRLEN) == 0)
2323                         ifs->rx_errors = kp[i].value.ui32;
2324                 else if (strncmp(kp[i].name, "norcvbuf", KSTAT_STRLEN) == 0)
2325                         ifs->rx_drop = kp[i].value.ui32;
2326                 else if (strncmp(kp[i].name, "multircv", KSTAT_STRLEN) == 0)
2327                         ifs->rx_multicast = kp[i].value.ui32;
2328                 else if (strncmp(kp[i].name, "obytes64", KSTAT_STRLEN) == 0)
2329                         ifs->tx_bytes = kp[i].value.ui64;
2330                 else if (strncmp(kp[i].name, "opackets64", KSTAT_STRLEN) == 0)
2331                         ifs->tx_packets = kp[i].value.ui64;
2332                 else if (strncmp(kp[i].name, "oerrors", KSTAT_STRLEN) == 0)
2333                         ifs->tx_errors = kp[i].value.ui32;
2334                 else if (strncmp(kp[i].name, "noxmtbuf", KSTAT_STRLEN) == 0)
2335                         ifs->tx_drop = kp[i].value.ui32;
2336                 else if (strncmp(kp[i].name, "collisions", KSTAT_STRLEN) == 0)
2337                         ifs->collisions = kp[i].value.ui32;
2338         }
2339         kmem_free(kp, size);
2340         return (0);
2341 }
2342 
2343 /* ARGSUSED */
2344 static void
2345 lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2346 {
2347         kstat_t *ksr;
2348         kstat_t ks0;
2349         int i, nidx;
2350         size_t sidx;
2351         struct lxpr_ifstat ifs;
2352 
2353         lxpr_uiobuf_printf(uiobuf, "Inter-|   Receive                   "
2354             "                             |  Transmit\n");
2355         lxpr_uiobuf_printf(uiobuf, " face |bytes    packets errs drop fifo"
2356             " frame compressed multicast|bytes    packets errs drop fifo"
2357             " colls carrier compressed\n");
2358 
2359         ks0.ks_kid = 0;
2360         ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
2361         if (ksr == NULL)
2362                 return;
2363 
2364         for (i = 1; i < nidx; i++) {
2365                 if (strncmp(ksr[i].ks_module, "link", KSTAT_STRLEN) == 0 ||
2366                     strncmp(ksr[i].ks_module, "lo", KSTAT_STRLEN) == 0) {
2367                         if (lxpr_kstat_ifstat(&ksr[i], &ifs) != 0)
2368                                 continue;
2369 
2370                         /* Overwriting the name is ok in the local snapshot */
2371                         lx_ifname_convert(ksr[i].ks_name, LX_IF_FROMNATIVE);
2372                         lxpr_uiobuf_printf(uiobuf, "%6s: %7llu %7llu %4lu "
2373                             "%4lu %4u %5u %10u %9lu %8llu %7llu %4lu %4lu %4u "
2374                             "%5lu %7u %10u\n",
2375                             ksr[i].ks_name,
2376                             ifs.rx_bytes, ifs.rx_packets,
2377                             ifs.rx_errors, ifs.rx_drop,
2378                             0, 0, 0, ifs.rx_multicast,
2379                             ifs.tx_bytes, ifs.tx_packets,
2380                             ifs.tx_errors, ifs.tx_drop,
2381                             0, ifs.collisions, 0, 0);
2382                 }
2383         }
2384 
2385         kmem_free(ksr, sidx);
2386 }
2387 
2388 /* ARGSUSED */
2389 static void
2390 lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2391 {
2392 }
2393 
2394 static void
2395 lxpr_inet6_out(const in6_addr_t *addr, char buf[33])
2396 {
2397         const uint8_t *ip = addr->s6_addr;
2398         char digits[] = "0123456789abcdef";
2399         int i;
2400         for (i = 0; i < 16; i++) {
2401                 buf[2 * i] = digits[ip[i] >> 4];
2402                 buf[2 * i + 1] = digits[ip[i] & 0xf];
2403         }
2404         buf[32] = '\0';
2405 }
2406 
2407 /* ARGSUSED */
2408 static void
2409 lxpr_read_net_if_inet6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2410 {
2411         netstack_t *ns;
2412         ip_stack_t *ipst;
2413         ill_t *ill;
2414         ipif_t *ipif;
2415         ill_walk_context_t      ctx;
2416         char ifname[LIFNAMSIZ], ip6out[33];
2417 
2418         ns = netstack_get_current();
2419         if (ns == NULL)
2420                 return;
2421         ipst = ns->netstack_ip;
2422 
2423         rw_enter(&ipst->ips_ill_g_lock, RW_READER);
2424         ill = ILL_START_WALK_V6(&ctx, ipst);
2425 
2426         for (; ill != NULL; ill = ill_next(&ctx, ill)) {
2427                 for (ipif = ill->ill_ipif; ipif != NULL;
2428                     ipif = ipif->ipif_next) {
2429                         uint_t index = ill->ill_phyint->phyint_ifindex;
2430                         int plen = ip_mask_to_plen_v6(&ipif->ipif_v6net_mask);
2431                         unsigned int scope = lx_ipv6_scope_convert(
2432                             &ipif->ipif_v6lcl_addr);
2433                         /* Always report PERMANENT flag */
2434                         int flag = 0x80;
2435 
2436                         (void) snprintf(ifname, LIFNAMSIZ, "%s", ill->ill_name);
2437                         lx_ifname_convert(ifname, LX_IF_FROMNATIVE);
2438                         lxpr_inet6_out(&ipif->ipif_v6lcl_addr, ip6out);
2439 
2440                         lxpr_uiobuf_printf(uiobuf, "%32s %02x %02x %02x %02x"
2441                             " %8s\n", ip6out, index, plen, scope, flag, ifname);
2442                 }
2443         }
2444         rw_exit(&ipst->ips_ill_g_lock);
2445         netstack_rele(ns);
2446 }
2447 
2448 /* ARGSUSED */
2449 static void
2450 lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2451 {
2452 }
2453 
2454 /* ARGSUSED */
2455 static void
2456 lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2457 {
2458 }
2459 
2460 /* ARGSUSED */
2461 static void
2462 lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2463 {
2464 }
2465 
2466 static void
2467 lxpr_format_route_ipv6(ire_t *ire, lxpr_uiobuf_t *uiobuf)
2468 {
2469         uint32_t flags;
2470         char name[IFNAMSIZ];
2471         char ipv6addr[33];
2472 
2473         lxpr_inet6_out(&ire->ire_addr_v6, ipv6addr);
2474         lxpr_uiobuf_printf(uiobuf, "%s %02x ", ipv6addr,
2475             ip_mask_to_plen_v6(&ire->ire_mask_v6));
2476 
2477         /* punt on this for now */
2478         lxpr_uiobuf_printf(uiobuf, "%s %02x ",
2479             "00000000000000000000000000000000", 0);
2480 
2481         lxpr_inet6_out(&ire->ire_gateway_addr_v6, ipv6addr);
2482         lxpr_uiobuf_printf(uiobuf, "%s", ipv6addr);
2483 
2484         flags = ire->ire_flags &
2485             (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
2486         /* Linux's RTF_LOCAL equivalent */
2487         if (ire->ire_metrics.iulp_local)
2488                 flags |= 0x80000000;
2489 
2490         if (ire->ire_ill != NULL) {
2491                 ill_get_name(ire->ire_ill, name, sizeof (name));
2492                 lx_ifname_convert(name, LX_IF_FROMNATIVE);
2493         } else {
2494                 name[0] = '\0';
2495         }
2496 
2497         lxpr_uiobuf_printf(uiobuf, " %08x %08x %08x %08x %8s\n",
2498             0, /* metric */
2499             ire->ire_refcnt,
2500             0,
2501             flags,
2502             name);
2503 }
2504 
2505 /* ARGSUSED */
2506 static void
2507 lxpr_read_net_ipv6_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2508 {
2509         netstack_t *ns;
2510         ip_stack_t *ipst;
2511 
2512         ns = netstack_get_current();
2513         if (ns == NULL)
2514                 return;
2515         ipst = ns->netstack_ip;
2516 
2517         /*
2518          * LX branded zones are expected to have exclusive IP stack, hence
2519          * using ALL_ZONES as the zoneid filter.
2520          */
2521         ire_walk_v6(&lxpr_format_route_ipv6, uiobuf, ALL_ZONES, ipst);
2522 
2523         netstack_rele(ns);
2524 }
2525 
2526 /* ARGSUSED */
2527 static void
2528 lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2529 {
2530 }
2531 
2532 /* ARGSUSED */
2533 static void
2534 lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2535 {
2536 }
2537 
2538 /* ARGSUSED */
2539 static void
2540 lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2541 {
2542 }
2543 
2544 #define LXPR_SKIP_ROUTE(type)   \
2545         (((IRE_IF_CLONE | IRE_BROADCAST | IRE_MULTICAST | \
2546         IRE_NOROUTE | IRE_LOOPBACK | IRE_LOCAL) & type) != 0)
2547 
2548 static void
2549 lxpr_format_route_ipv4(ire_t *ire, lxpr_uiobuf_t *uiobuf)
2550 {
2551         uint32_t flags;
2552         char name[IFNAMSIZ];
2553         ill_t *ill;
2554         ire_t *nire;
2555         ipif_t *ipif;
2556         ipaddr_t gateway;
2557 
2558         if (LXPR_SKIP_ROUTE(ire->ire_type) || ire->ire_testhidden != 0)
2559                 return;
2560 
2561         /* These route flags have direct Linux equivalents */
2562         flags = ire->ire_flags &
2563             (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
2564 
2565         /*
2566          * Search for a suitable IRE for naming purposes.
2567          * On Linux, the default route is typically associated with the
2568          * interface used to access gateway.  The default IRE on Illumos
2569          * typically lacks an ill reference but its parent might have one.
2570          */
2571         nire = ire;
2572         do {
2573                 ill = nire->ire_ill;
2574                 nire = nire->ire_dep_parent;
2575         } while (ill == NULL && nire != NULL);
2576         if (ill != NULL) {
2577                 ill_get_name(ill, name, sizeof (name));
2578                 lx_ifname_convert(name, LX_IF_FROMNATIVE);
2579         } else {
2580                 name[0] = '*';
2581                 name[1] = '\0';
2582         }
2583 
2584         /*
2585          * Linux suppresses the gateway address for directly connected
2586          * interface networks.  To emulate this behavior, we walk all addresses
2587          * of a given route interface.  If one matches the gateway, it is
2588          * displayed as NULL.
2589          */
2590         gateway = ire->ire_gateway_addr;
2591         if ((ill = ire->ire_ill) != NULL) {
2592                 for (ipif = ill->ill_ipif; ipif != NULL;
2593                     ipif = ipif->ipif_next) {
2594                         if (ipif->ipif_lcl_addr == gateway) {
2595                                 gateway = 0;
2596                                 break;
2597                         }
2598                 }
2599         }
2600 
2601         lxpr_uiobuf_printf(uiobuf, "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2602             "%d\t%08X\t%d\t%u\t%u\n",
2603             name,
2604             ire->ire_addr,
2605             gateway,
2606             flags, 0, 0,
2607             0, /* priority */
2608             ire->ire_mask,
2609             0, 0, /* mss, window */
2610             ire->ire_metrics.iulp_rtt);
2611 }
2612 
2613 /* ARGSUSED */
2614 static void
2615 lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2616 {
2617         netstack_t *ns;
2618         ip_stack_t *ipst;
2619 
2620         lxpr_uiobuf_printf(uiobuf, "Iface\tDestination\tGateway \tFlags\t"
2621             "RefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n");
2622 
2623         ns = netstack_get_current();
2624         if (ns == NULL)
2625                 return;
2626         ipst = ns->netstack_ip;
2627 
2628         /*
2629          * LX branded zones are expected to have exclusive IP stack, hence
2630          * using ALL_ZONES as the zoneid filter.
2631          */
2632         ire_walk_v4(&lxpr_format_route_ipv4, uiobuf, ALL_ZONES, ipst);
2633 
2634         netstack_rele(ns);
2635 }
2636 
2637 /* ARGSUSED */
2638 static void
2639 lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2640 {
2641 }
2642 
2643 /* ARGSUSED */
2644 static void
2645 lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2646 {
2647 }
2648 
2649 /* ARGSUSED */
2650 static void
2651 lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2652 {
2653 }
2654 
2655 typedef struct lxpr_snmp_table {
2656         const char *lst_proto;
2657         const char *lst_fields[];
2658 } lxpr_snmp_table_t;
2659 
2660 static lxpr_snmp_table_t lxpr_snmp_ip = { "ip",
2661         {
2662         "forwarding", "defaultTTL", "inReceives", "inHdrErrors",
2663         "inAddrErrors", "forwDatagrams", "inUnknownProtos", "inDiscards",
2664         "inDelivers", "outRequests", "outDiscards", "outNoRoutes",
2665         "reasmTimeout", "reasmReqds", "reasmOKs", "reasmFails", "fragOKs",
2666         "fragFails", "fragCreates",
2667         NULL
2668         }
2669 };
2670 static lxpr_snmp_table_t lxpr_snmp_icmp = { "icmp",
2671         {
2672         "inMsgs", "inErrors", "inCsumErrors", "inDestUnreachs", "inTimeExcds",
2673         "inParmProbs", "inSrcQuenchs", "inRedirects", "inEchos", "inEchoReps",
2674         "inTimestamps", "inTimestampReps", "inAddrMasks", "inAddrMaskReps",
2675         "outMsgs", "outErrors", "outDestUnreachs", "outTimeExcds",
2676         "outParmProbs", "outSrcQuenchs", "outRedirects", "outEchos",
2677         "outEchoReps", "outTimestamps", "outTimestampReps", "outAddrMasks",
2678         "outAddrMaskReps",
2679         NULL
2680         }
2681 };
2682 static lxpr_snmp_table_t lxpr_snmp_tcp = { "tcp",
2683         {
2684         "rtoAlgorithm", "rtoMin", "rtoMax", "maxConn", "activeOpens",
2685         "passiveOpens", "attemptFails", "estabResets", "currEstab", "inSegs",
2686         "outSegs", "retransSegs", "inErrs", "outRsts", "inCsumErrors",
2687         NULL
2688         }
2689 };
2690 static lxpr_snmp_table_t lxpr_snmp_udp = { "udp",
2691         {
2692         "inDatagrams", "noPorts", "inErrors", "outDatagrams", "rcvbufErrors",
2693         "sndbufErrors", "inCsumErrors",
2694         NULL
2695         }
2696 };
2697 
2698 static lxpr_snmp_table_t *lxpr_net_snmptab[] = {
2699         &lxpr_snmp_ip,
2700         &lxpr_snmp_icmp,
2701         &lxpr_snmp_tcp,
2702         &lxpr_snmp_udp,
2703         NULL
2704 };
2705 
2706 static void
2707 lxpr_kstat_print_tab(lxpr_uiobuf_t *uiobuf, lxpr_snmp_table_t *table,
2708     kstat_t *kn)
2709 {
2710         kstat_named_t *klist;
2711         char upname[KSTAT_STRLEN], upfield[KSTAT_STRLEN];
2712         int i, j, num;
2713         size_t size;
2714 
2715         klist = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num);
2716         if (klist == NULL)
2717                 return;
2718 
2719         /* Print the header line, fields capitalized */
2720         (void) strncpy(upname, table->lst_proto, KSTAT_STRLEN);
2721         upname[0] = toupper(upname[0]);
2722         lxpr_uiobuf_printf(uiobuf, "%s:", upname);
2723         for (i = 0; table->lst_fields[i] != NULL; i++) {
2724                 (void) strncpy(upfield, table->lst_fields[i], KSTAT_STRLEN);
2725                 upfield[0] = toupper(upfield[0]);
2726                 lxpr_uiobuf_printf(uiobuf, " %s", upfield);
2727         }
2728         lxpr_uiobuf_printf(uiobuf, "\n%s:", upname);
2729 
2730         /* Then loop back through to print the value line. */
2731         for (i = 0; table->lst_fields[i] != NULL; i++) {
2732                 kstat_named_t *kpoint = NULL;
2733                 for (j = 0; j < num; j++) {
2734                         if (strncmp(klist[j].name, table->lst_fields[i],
2735                             KSTAT_STRLEN) == 0) {
2736                                 kpoint = &klist[j];
2737                                 break;
2738                         }
2739                 }
2740                 if (kpoint == NULL) {
2741                         /* Output 0 for unknown fields */
2742                         lxpr_uiobuf_printf(uiobuf, " 0");
2743                 } else {
2744                         switch (kpoint->data_type) {
2745                         case KSTAT_DATA_INT32:
2746                                 lxpr_uiobuf_printf(uiobuf, " %d",
2747                                     kpoint->value.i32);
2748                                 break;
2749                         case KSTAT_DATA_UINT32:
2750                                 lxpr_uiobuf_printf(uiobuf, " %u",
2751                                     kpoint->value.ui32);
2752                                 break;
2753                         case KSTAT_DATA_INT64:
2754                                 lxpr_uiobuf_printf(uiobuf, " %ld",
2755                                     kpoint->value.l);
2756                                 break;
2757                         case KSTAT_DATA_UINT64:
2758                                 lxpr_uiobuf_printf(uiobuf, " %lu",
2759                                     kpoint->value.ul);
2760                                 break;
2761                         }
2762                 }
2763         }
2764         lxpr_uiobuf_printf(uiobuf, "\n");
2765         kmem_free(klist, size);
2766 }
2767 
2768 /* ARGSUSED */
2769 static void
2770 lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2771 {
2772         kstat_t *ksr;
2773         kstat_t ks0;
2774         lxpr_snmp_table_t **table = lxpr_net_snmptab;
2775         int i, t, nidx;
2776         size_t sidx;
2777 
2778         ks0.ks_kid = 0;
2779         ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
2780         if (ksr == NULL)
2781                 return;
2782 
2783         for (t = 0; table[t] != NULL; t++) {
2784                 for (i = 0; i < nidx; i++) {
2785                         if (strncmp(ksr[i].ks_class, "mib2", KSTAT_STRLEN) != 0)
2786                                 continue;
2787                         if (strncmp(ksr[i].ks_name, table[t]->lst_proto,
2788                             KSTAT_STRLEN) == 0) {
2789                                 lxpr_kstat_print_tab(uiobuf, table[t], &ksr[i]);
2790                                 break;
2791                         }
2792                 }
2793         }
2794         kmem_free(ksr, sidx);
2795 }
2796 
2797 /* ARGSUSED */
2798 static void
2799 lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2800 {
2801 }
2802 
2803 static int
2804 lxpr_convert_tcp_state(int st)
2805 {
2806         /*
2807          * Derived from the enum located in the Linux kernel sources:
2808          * include/net/tcp_states.h
2809          */
2810         switch (st) {
2811         case TCPS_ESTABLISHED:
2812                 return (1);
2813         case TCPS_SYN_SENT:
2814                 return (2);
2815         case TCPS_SYN_RCVD:
2816                 return (3);
2817         case TCPS_FIN_WAIT_1:
2818                 return (4);
2819         case TCPS_FIN_WAIT_2:
2820                 return (5);
2821         case TCPS_TIME_WAIT:
2822                 return (6);
2823         case TCPS_CLOSED:
2824                 return (7);
2825         case TCPS_CLOSE_WAIT:
2826                 return (8);
2827         case TCPS_LAST_ACK:
2828                 return (9);
2829         case TCPS_LISTEN:
2830                 return (10);
2831         case TCPS_CLOSING:
2832                 return (11);
2833         default:
2834                 /* No translation for TCPS_IDLE, TCPS_BOUND or anything else */
2835                 return (0);
2836         }
2837 }
2838 
2839 static void
2840 lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
2841 {
2842         int i, sl = 0;
2843         connf_t *connfp;
2844         conn_t *connp;
2845         netstack_t *ns;
2846         ip_stack_t *ipst;
2847 
2848         ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION);
2849         if (ipver == IPV4_VERSION) {
2850                 lxpr_uiobuf_printf(uiobuf, "  sl  local_address rem_address   "
2851                     "st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout "
2852                     "inode\n");
2853         } else {
2854                 lxpr_uiobuf_printf(uiobuf, "  sl  "
2855                     "local_address                         "
2856                     "remote_address                        "
2857                     "st tx_queue rx_queue tr tm->when retrnsmt   "
2858                     "uid  timeout inode\n");
2859         }
2860         /*
2861          * Due to differences between the Linux and illumos TCP
2862          * implementations, some data will be omitted from the output here.
2863          *
2864          * Valid fields:
2865          *  - local_address
2866          *  - remote_address
2867          *  - st
2868          *  - tx_queue
2869          *  - rx_queue
2870          *  - uid
2871          *  - inode
2872          *
2873          * Omitted/invalid fields
2874          *  - tr
2875          *  - tm->when
2876          *  - retrnsmt
2877          *  - timeout
2878          */
2879 
2880         ns = netstack_get_current();
2881         if (ns == NULL)
2882                 return;
2883         ipst = ns->netstack_ip;
2884 
2885         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2886                 connfp = &ipst->ips_ipcl_globalhash_fanout[i];
2887                 connp = NULL;
2888                 while ((connp =
2889                     ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) {
2890                         tcp_t *tcp;
2891                         vattr_t attr;
2892                         sonode_t *so = (sonode_t *)connp->conn_upper_handle;
2893                         vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
2894                         if (connp->conn_ipversion != ipver)
2895                                 continue;
2896                         tcp = connp->conn_tcp;
2897                         if (ipver == IPV4_VERSION) {
2898                                 lxpr_uiobuf_printf(uiobuf,
2899                                     "%4d: %08X:%04X %08X:%04X ",
2900                                     ++sl,
2901                                     connp->conn_laddr_v4,
2902                                     ntohs(connp->conn_lport),
2903                                     connp->conn_faddr_v4,
2904                                     ntohs(connp->conn_fport));
2905                         } else {
2906                                 lxpr_uiobuf_printf(uiobuf, "%4d: "
2907                                     "%08X%08X%08X%08X:%04X "
2908                                     "%08X%08X%08X%08X:%04X ",
2909                                     ++sl,
2910                                     connp->conn_laddr_v6.s6_addr32[0],
2911                                     connp->conn_laddr_v6.s6_addr32[1],
2912                                     connp->conn_laddr_v6.s6_addr32[2],
2913                                     connp->conn_laddr_v6.s6_addr32[3],
2914                                     ntohs(connp->conn_lport),
2915                                     connp->conn_faddr_v6.s6_addr32[0],
2916                                     connp->conn_faddr_v6.s6_addr32[1],
2917                                     connp->conn_faddr_v6.s6_addr32[2],
2918                                     connp->conn_faddr_v6.s6_addr32[3],
2919                                     ntohs(connp->conn_fport));
2920                         }
2921 
2922                         /* fetch the simulated inode for the socket */
2923                         if (vp == NULL ||
2924                             VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
2925                                 attr.va_nodeid = 0;
2926 
2927                         lxpr_uiobuf_printf(uiobuf,
2928                             "%02X %08X:%08X %02X:%08X %08X "
2929                             "%5u %8d %lu %d %p %u %u %u %u %d\n",
2930                             lxpr_convert_tcp_state(tcp->tcp_state),
2931                             tcp->tcp_rcv_cnt, tcp->tcp_unsent, /* rx/tx queue */
2932                             0, 0, /* tr, when */
2933                             0, /* per-connection rexmits aren't tracked today */
2934                             connp->conn_cred->cr_uid,
2935                             0, /* timeout */
2936                             /* inode + more */
2937                             (ino_t)attr.va_nodeid, 0, NULL, 0, 0, 0, 0, 0);
2938                 }
2939         }
2940         netstack_rele(ns);
2941 }
2942 
2943 /* ARGSUSED */
2944 static void
2945 lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2946 {
2947         lxpr_format_tcp(uiobuf, IPV4_VERSION);
2948 }
2949 
2950 /* ARGSUSED */
2951 static void
2952 lxpr_read_net_tcp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2953 {
2954         lxpr_format_tcp(uiobuf, IPV6_VERSION);
2955 }
2956 
2957 static void
2958 lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
2959 {
2960         int i, sl = 0;
2961         connf_t *connfp;
2962         conn_t *connp;
2963         netstack_t *ns;
2964         ip_stack_t *ipst;
2965 
2966         ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION);
2967         if (ipver == IPV4_VERSION) {
2968                 lxpr_uiobuf_printf(uiobuf, "  sl  local_address rem_address"
2969                     "   st tx_queue rx_queue tr tm->when retrnsmt   uid"
2970                     "  timeout inode ref pointer drops\n");
2971         } else {
2972                 lxpr_uiobuf_printf(uiobuf, "  sl  "
2973                     "local_address                         "
2974                     "remote_address                        "
2975                     "st tx_queue rx_queue tr tm->when retrnsmt   "
2976                     "uid  timeout inode ref pointer drops\n");
2977         }
2978         /*
2979          * Due to differences between the Linux and illumos UDP
2980          * implementations, some data will be omitted from the output here.
2981          *
2982          * Valid fields:
2983          *  - local_address
2984          *  - remote_address
2985          *  - st: limited
2986          *  - uid
2987          *
2988          * Omitted/invalid fields
2989          *  - tx_queue
2990          *  - rx_queue
2991          *  - tr
2992          *  - tm->when
2993          *  - retrnsmt
2994          *  - timeout
2995          *  - inode
2996          */
2997 
2998         ns = netstack_get_current();
2999         if (ns == NULL)
3000                 return;
3001         ipst = ns->netstack_ip;
3002 
3003         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
3004                 connfp = &ipst->ips_ipcl_globalhash_fanout[i];
3005                 connp = NULL;
3006                 while ((connp =
3007                     ipcl_get_next_conn(connfp, connp, IPCL_UDPCONN)) != NULL) {
3008                         udp_t *udp;
3009                         int state = 0;
3010                         vattr_t attr;
3011                         sonode_t *so = (sonode_t *)connp->conn_upper_handle;
3012                         vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
3013                         if (connp->conn_ipversion != ipver)
3014                                 continue;
3015                         udp = connp->conn_udp;
3016                         if (ipver == IPV4_VERSION) {
3017                                 lxpr_uiobuf_printf(uiobuf,
3018                                     "%4d: %08X:%04X %08X:%04X ",
3019                                     ++sl,
3020                                     connp->conn_laddr_v4,
3021                                     ntohs(connp->conn_lport),
3022                                     connp->conn_faddr_v4,
3023                                     ntohs(connp->conn_fport));
3024                         } else {
3025                                 lxpr_uiobuf_printf(uiobuf, "%4d: "
3026                                     "%08X%08X%08X%08X:%04X "
3027                                     "%08X%08X%08X%08X:%04X ",
3028                                     ++sl,
3029                                     connp->conn_laddr_v6.s6_addr32[0],
3030                                     connp->conn_laddr_v6.s6_addr32[1],
3031                                     connp->conn_laddr_v6.s6_addr32[2],
3032                                     connp->conn_laddr_v6.s6_addr32[3],
3033                                     ntohs(connp->conn_lport),
3034                                     connp->conn_faddr_v6.s6_addr32[0],
3035                                     connp->conn_faddr_v6.s6_addr32[1],
3036                                     connp->conn_faddr_v6.s6_addr32[2],
3037                                     connp->conn_faddr_v6.s6_addr32[3],
3038                                     ntohs(connp->conn_fport));
3039                         }
3040 
3041                         switch (udp->udp_state) {
3042                         case TS_UNBND:
3043                         case TS_IDLE:
3044                                 state = 7;
3045                                 break;
3046                         case TS_DATA_XFER:
3047                                 state = 1;
3048                                 break;
3049                         }
3050 
3051                         /* fetch the simulated inode for the socket */
3052                         if (vp == NULL ||
3053                             VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
3054                                 attr.va_nodeid = 0;
3055 
3056                         lxpr_uiobuf_printf(uiobuf,
3057                             "%02X %08X:%08X %02X:%08X %08X "
3058                             "%5u %8d %lu %d %p %d\n",
3059                             state,
3060                             0, 0, /* rx/tx queue */
3061                             0, 0, /* tr, when */
3062                             0, /* retrans */
3063                             connp->conn_cred->cr_uid,
3064                             0, /* timeout */
3065                             /* inode, ref, pointer, drops */
3066                             (ino_t)attr.va_nodeid, 0, NULL, 0);
3067                 }
3068         }
3069         netstack_rele(ns);
3070 }
3071 
3072 /* ARGSUSED */
3073 static void
3074 lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3075 {
3076         lxpr_format_udp(uiobuf, IPV4_VERSION);
3077 }
3078 
3079 /* ARGSUSED */
3080 static void
3081 lxpr_read_net_udp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3082 {
3083         lxpr_format_udp(uiobuf, IPV6_VERSION);
3084 }
3085 
3086 /* ARGSUSED */
3087 static void
3088 lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3089 {
3090         sonode_t *so;
3091         zoneid_t zoneid = getzoneid();
3092 
3093         lxpr_uiobuf_printf(uiobuf, "Num       RefCount Protocol Flags    Type "
3094             "St Inode Path\n");
3095 
3096         mutex_enter(&socklist.sl_lock);
3097         for (so = socklist.sl_list; so != NULL;
3098             so = _SOTOTPI(so)->sti_next_so) {
3099                 vnode_t *vp = so->so_vnode;
3100                 vattr_t attr;
3101                 sotpi_info_t *sti;
3102                 const char *name = NULL;
3103                 int status = 0;
3104                 int type = 0;
3105                 int flags = 0;
3106 
3107                 /* Only process active sonodes in this zone */
3108                 if (so->so_count == 0 || so->so_zoneid != zoneid)
3109                         continue;
3110 
3111                 /*
3112                  * Grab the inode, if possible.
3113                  * This must be done before entering so_lock.
3114                  */
3115                 if (vp == NULL ||
3116                     VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
3117                         attr.va_nodeid = 0;
3118 
3119                 mutex_enter(&so->so_lock);
3120                 sti = _SOTOTPI(so);
3121 
3122                 if (sti->sti_laddr_sa != NULL &&
3123                     sti->sti_laddr_len > 0) {
3124                         name = sti->sti_laddr_sa->sa_data;
3125                 } else if (sti->sti_faddr_sa != NULL &&
3126                     sti->sti_faddr_len > 0) {
3127                         name = sti->sti_faddr_sa->sa_data;
3128                 }
3129 
3130                 /*
3131                  * Derived from enum values in Linux kernel source:
3132                  * include/uapi/linux/net.h
3133                  */
3134                 if ((so->so_state & SS_ISDISCONNECTING) != 0) {
3135                         status = 4;
3136                 } else if ((so->so_state & SS_ISCONNECTING) != 0) {
3137                         status = 2;
3138                 } else if ((so->so_state & SS_ISCONNECTED) != 0) {
3139                         status = 3;
3140                 } else {
3141                         status = 1;
3142                         /* Add ACC flag for stream-type server sockets */
3143                         if (so->so_type != SOCK_DGRAM &&
3144                             sti->sti_laddr_sa != NULL)
3145                                 flags |= 0x10000;
3146                 }
3147 
3148                 /* Convert to Linux type */
3149                 switch (so->so_type) {
3150                 case SOCK_DGRAM:
3151                         type = 2;
3152                         break;
3153                 case SOCK_SEQPACKET:
3154                         type = 5;
3155                         break;
3156                 default:
3157                         type = 1;
3158                 }
3159 
3160                 lxpr_uiobuf_printf(uiobuf, "%p: %08X %08X %08X %04X %02X %5llu",
3161                     so,
3162                     so->so_count,
3163                     0, /* proto, always 0 */
3164                     flags,
3165                     type,
3166                     status,
3167                     (ino_t)attr.va_nodeid);
3168 
3169                 /*
3170                  * Due to shortcomings in the abstract socket emulation, they
3171                  * cannot be properly represented here (as @<path>).
3172                  *
3173                  * This will be the case until they are better implemented.
3174                  */
3175                 if (name != NULL)
3176                         lxpr_uiobuf_printf(uiobuf, " %s\n", name);
3177                 else
3178                         lxpr_uiobuf_printf(uiobuf, "\n");
3179                 mutex_exit(&so->so_lock);
3180         }
3181         mutex_exit(&socklist.sl_lock);
3182 }
3183 
3184 /*
3185  * lxpr_read_kmsg(): read the contents of the kernel message queue. We
3186  * translate this into the reception of console messages for this zone; each
3187  * read copies out a single zone console message, or blocks until the next one
3188  * is produced, unless we're open non-blocking, in which case we return after
3189  * 1ms.
3190  */
3191 
3192 #define LX_KMSG_PRI     "<0>"
3193 
3194 static void
3195 lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf, ldi_handle_t lh)
3196 {
3197         mblk_t          *mp;
3198         timestruc_t     to;
3199         timestruc_t     *tp = NULL;
3200 
3201         ASSERT(lxpnp->lxpr_type == LXPR_KMSG);
3202 
3203         if (lxpr_uiobuf_nonblock(uiobuf)) {
3204                 to.tv_sec = 0;
3205                 to.tv_nsec = 1000000; /* 1msec */
3206                 tp = &to;
3207         }
3208 
3209         if (ldi_getmsg(lh, &mp, tp) == 0) {
3210                 /*
3211                  * lx procfs doesn't like successive reads to the same file
3212                  * descriptor unless we do an explicit rewind each time.
3213                  */
3214                 lxpr_uiobuf_seek(uiobuf, 0);
3215 
3216                 lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
3217                     mp->b_cont->b_rptr);
3218 
3219                 freemsg(mp);
3220         }
3221 }
3222 
3223 /*
3224  * lxpr_read_loadavg(): read the contents of the "loadavg" file.  We do just
3225  * enough for uptime and other simple lxproc readers to work
3226  */
3227 extern int nthread;
3228 
3229 static void
3230 lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3231 {
3232         ulong_t avenrun1;
3233         ulong_t avenrun5;
3234         ulong_t avenrun15;
3235         ulong_t avenrun1_cs;
3236         ulong_t avenrun5_cs;
3237         ulong_t avenrun15_cs;
3238         int loadavg[3];
3239         int *loadbuf;
3240         cpupart_t *cp;
3241         zone_t *zone = LXPTOZ(lxpnp);
3242 
3243         uint_t nrunnable = 0;
3244         rctl_qty_t nlwps;
3245 
3246         ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
3247 
3248         mutex_enter(&cpu_lock);
3249 
3250         /*
3251          * Need to add up values over all CPU partitions. If pools are active,
3252          * only report the values of the zone's partition, which by definition
3253          * includes the current CPU.
3254          */
3255         if (pool_pset_enabled()) {
3256                 psetid_t psetid = zone_pset_get(curproc->p_zone);
3257 
3258                 ASSERT(curproc->p_zone != &zone0);
3259                 cp = CPU->cpu_part;
3260 
3261                 nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
3262                 (void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
3263                 loadbuf = &loadavg[0];
3264         } else {
3265                 cp = cp_list_head;
3266                 do {
3267                         nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
3268                 } while ((cp = cp->cp_next) != cp_list_head);
3269 
3270                 loadbuf = zone == global_zone ?
3271                     &avenrun[0] : zone->zone_avenrun;
3272         }
3273 
3274         /*
3275          * If we're in the non-global zone, we'll report the total number of
3276          * LWPs in the zone for the "nproc" parameter of /proc/loadavg,
3277          * otherwise will just use nthread (which will include kernel threads,
3278          * but should be good enough for lxproc).
3279          */
3280         nlwps = zone == global_zone ? nthread : zone->zone_nlwps;
3281 
3282         mutex_exit(&cpu_lock);
3283 
3284         avenrun1 = loadbuf[0] >> FSHIFT;
3285         avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
3286         avenrun5 = loadbuf[1] >> FSHIFT;
3287         avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
3288         avenrun15 = loadbuf[2] >> FSHIFT;
3289         avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
3290 
3291         lxpr_uiobuf_printf(uiobuf,
3292             "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
3293             avenrun1, avenrun1_cs,
3294             avenrun5, avenrun5_cs,
3295             avenrun15, avenrun15_cs,
3296             nrunnable, nlwps, 0);
3297 }
3298 
3299 /*
3300  * lxpr_read_meminfo(): read the contents of the "meminfo" file.
3301  */
3302 static void
3303 lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3304 {
3305         zone_t *zone = LXPTOZ(lxpnp);
3306         int global = zone == global_zone;
3307         long total_mem, free_mem, total_swap, used_swap;
3308 
3309         ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
3310 
3311         if (global || zone->zone_phys_mem_ctl == UINT64_MAX) {
3312                 total_mem = physmem * PAGESIZE;
3313                 free_mem = freemem * PAGESIZE;
3314         } else {
3315                 total_mem = zone->zone_phys_mem_ctl;
3316                 free_mem = zone->zone_phys_mem_ctl - zone->zone_phys_mem;
3317         }
3318 
3319         if (global || zone->zone_max_swap_ctl == UINT64_MAX) {
3320                 total_swap = k_anoninfo.ani_max * PAGESIZE;
3321                 used_swap = k_anoninfo.ani_phys_resv * PAGESIZE;
3322         } else {
3323                 mutex_enter(&zone->zone_mem_lock);
3324                 total_swap = zone->zone_max_swap_ctl;
3325                 used_swap = zone->zone_max_swap;
3326                 mutex_exit(&zone->zone_mem_lock);
3327         }
3328 
3329         lxpr_uiobuf_printf(uiobuf,
3330             "MemTotal:  %8lu kB\n"
3331             "MemFree:   %8lu kB\n"
3332             "MemShared: %8u kB\n"
3333             "Buffers:   %8u kB\n"
3334             "Cached:    %8u kB\n"
3335             "SwapCached:%8u kB\n"
3336             "Active:    %8u kB\n"
3337             "Inactive:  %8u kB\n"
3338             "HighTotal: %8u kB\n"
3339             "HighFree:  %8u kB\n"
3340             "LowTotal:  %8u kB\n"
3341             "LowFree:   %8u kB\n"
3342             "SwapTotal: %8lu kB\n"
3343             "SwapFree:  %8lu kB\n",
3344             btok(total_mem),                            /* MemTotal */
3345             btok(free_mem),                             /* MemFree */
3346             0,                                          /* MemShared */
3347             0,                                          /* Buffers */
3348             0,                                          /* Cached */
3349             0,                                          /* SwapCached */
3350             0,                                          /* Active */
3351             0,                                          /* Inactive */
3352             0,                                          /* HighTotal */
3353             0,                                          /* HighFree */
3354             btok(total_mem),                            /* LowTotal */
3355             btok(free_mem),                             /* LowFree */
3356             btok(total_swap),                           /* SwapTotal */
3357             btok(total_swap - used_swap));              /* SwapFree */
3358 }
3359 
3360 /*
3361  * lxpr_read_mounts():
3362  */
3363 /* ARGSUSED */
3364 static void
3365 lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3366 {
3367         struct vfs *vfsp;
3368         struct vfs *vfslist;
3369         zone_t *zone = LXPTOZ(lxpnp);
3370         struct print_data {
3371                 refstr_t *vfs_mntpt;
3372                 refstr_t *vfs_resource;
3373                 uint_t vfs_flag;
3374                 int vfs_fstype;
3375                 struct print_data *next;
3376         } *print_head = NULL;
3377         struct print_data **print_tail = &print_head;
3378         struct print_data *printp;
3379 
3380         vfs_list_read_lock();
3381 
3382         if (zone == global_zone) {
3383                 vfsp = vfslist = rootvfs;
3384         } else {
3385                 vfsp = vfslist = zone->zone_vfslist;
3386                 /*
3387                  * If the zone has a root entry, it will be the first in
3388                  * the list.  If it doesn't, we conjure one up.
3389                  */
3390                 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
3391                     zone->zone_rootpath) != 0) {
3392                         struct vfs *tvfsp;
3393                         /*
3394                          * The root of the zone is not a mount point.  The vfs
3395                          * we want to report is that of the zone's root vnode.
3396                          */
3397                         tvfsp = zone->zone_rootvp->v_vfsp;
3398 
3399                         lxpr_uiobuf_printf(uiobuf,
3400                             "/ / %s %s 0 0\n",
3401                             vfssw[tvfsp->vfs_fstype].vsw_name,
3402                             tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
3403 
3404                 }
3405                 if (vfslist == NULL) {
3406                         vfs_list_unlock();
3407                         return;
3408                 }
3409         }
3410 
3411         /*
3412          * Later on we have to do a lookupname, which can end up causing
3413          * another vfs_list_read_lock() to be called. Which can lead to a
3414          * deadlock. To avoid this, we extract the data we need into a local
3415          * list, then we can run this list without holding vfs_list_read_lock()
3416          * We keep the list in the same order as the vfs_list
3417          */
3418         do {
3419                 /* Skip mounts we shouldn't show */
3420                 if (vfsp->vfs_flag & VFS_NOMNTTAB) {
3421                         goto nextfs;
3422                 }
3423 
3424                 printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
3425                 refstr_hold(vfsp->vfs_mntpt);
3426                 printp->vfs_mntpt = vfsp->vfs_mntpt;
3427                 refstr_hold(vfsp->vfs_resource);
3428                 printp->vfs_resource = vfsp->vfs_resource;
3429                 printp->vfs_flag = vfsp->vfs_flag;
3430                 printp->vfs_fstype = vfsp->vfs_fstype;
3431                 printp->next = NULL;
3432 
3433                 *print_tail = printp;
3434                 print_tail = &printp->next;
3435 
3436 nextfs:
3437                 vfsp = (zone == global_zone) ?
3438                     vfsp->vfs_next : vfsp->vfs_zone_next;
3439 
3440         } while (vfsp != vfslist);
3441 
3442         vfs_list_unlock();
3443 
3444         /*
3445          * now we can run through what we've extracted without holding
3446          * vfs_list_read_lock()
3447          */
3448         printp = print_head;
3449         while (printp != NULL) {
3450                 struct print_data *printp_next;
3451                 const char *resource;
3452                 char *mntpt;
3453                 struct vnode *vp;
3454                 int error;
3455 
3456                 mntpt = (char *)refstr_value(printp->vfs_mntpt);
3457                 resource = refstr_value(printp->vfs_resource);
3458 
3459                 if (mntpt != NULL && mntpt[0] != '\0')
3460                         mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
3461                 else
3462                         mntpt = "-";
3463 
3464                 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
3465 
3466                 if (error != 0)
3467                         goto nextp;
3468 
3469                 if (!(vp->v_flag & VROOT)) {
3470                         VN_RELE(vp);
3471                         goto nextp;
3472                 }
3473                 VN_RELE(vp);
3474 
3475                 if (resource != NULL && resource[0] != '\0') {
3476                         if (resource[0] == '/') {
3477                                 resource = ZONE_PATH_VISIBLE(resource, zone) ?
3478                                     ZONE_PATH_TRANSLATE(resource, zone) :
3479                                     mntpt;
3480                         }
3481                 } else {
3482                         resource = "-";
3483                 }
3484 
3485                 lxpr_uiobuf_printf(uiobuf,
3486                     "%s %s %s %s 0 0\n",
3487                     resource, mntpt, vfssw[printp->vfs_fstype].vsw_name,
3488                     printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
3489 
3490 nextp:
3491                 printp_next = printp->next;
3492                 refstr_rele(printp->vfs_mntpt);
3493                 refstr_rele(printp->vfs_resource);
3494                 kmem_free(printp, sizeof (*printp));
3495                 printp = printp_next;
3496 
3497         }
3498 }
3499 
3500 /*
3501  * lxpr_read_partitions():
3502  *
3503  * Over the years, /proc/partitions has been made considerably smaller -- to
3504  * the point that it really is only major number, minor number, number of
3505  * blocks (which we report as 0), and partition name.
3506  *
3507  * We support this because some things want to see it to make sense of
3508  * /proc/diskstats, and also because "fdisk -l" and a few other things look
3509  * here to find all disks on the system.
3510  */
3511 /* ARGSUSED */
3512 static void
3513 lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3514 {
3515 
3516         kstat_t *ksr;
3517         kstat_t ks0;
3518         int nidx, num, i;
3519         size_t sidx, size;
3520         zfs_cmd_t *zc;
3521         nvlist_t *nv = NULL;
3522         nvpair_t *elem = NULL;
3523         lxpr_mnt_t *mnt;
3524         lxpr_zfs_iter_t zfsi;
3525 
3526         ASSERT(lxpnp->lxpr_type == LXPR_PARTITIONS);
3527 
3528         ks0.ks_kid = 0;
3529         ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
3530 
3531         if (ksr == NULL)
3532                 return;
3533 
3534         lxpr_uiobuf_printf(uiobuf, "major minor  #blocks  name\n\n");
3535 
3536         for (i = 1; i < nidx; i++) {
3537                 kstat_t *ksp = &ksr[i];
3538                 kstat_io_t *kip;
3539 
3540                 if (ksp->ks_type != KSTAT_TYPE_IO ||
3541                     strcmp(ksp->ks_class, "disk") != 0)
3542                         continue;
3543 
3544                 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE,
3545                     &size, &num)) == NULL)
3546                         continue;
3547 
3548                 if (size < sizeof (kstat_io_t)) {
3549                         kmem_free(kip, size);
3550                         continue;
3551                 }
3552 
3553                 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d %s\n",
3554                     mod_name_to_major(ksp->ks_module),
3555                     ksp->ks_instance, 0, ksp->ks_name);
3556 
3557                 kmem_free(kip, size);
3558         }
3559 
3560         kmem_free(ksr, sidx);
3561 
3562         /* If we never got to open the zfs LDI, then stop now. */
3563         mnt = (lxpr_mnt_t *)lxpnp->lxpr_vnode->v_vfsp->vfs_data;
3564         if (mnt->lxprm_zfs_isopen == B_FALSE)
3565                 return;
3566 
3567         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
3568 
3569         if (lxpr_zfs_list_pools(mnt, zc, &nv) != 0)
3570                 goto out;
3571 
3572         while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) {
3573                 char *pool = nvpair_name(elem);
3574 
3575                 bzero(&zfsi, sizeof (lxpr_zfs_iter_t));
3576                 while (lxpr_zfs_next_zvol(mnt, pool, zc, &zfsi) == 0) {
3577                         major_t major;
3578                         minor_t minor;
3579                         if (lxpr_zvol_dev(mnt, zc->zc_name, &major, &minor)
3580                             != 0)
3581                                 continue;
3582 
3583                         lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d zvol/dsk/%s\n",
3584                             major, minor, 0, zc->zc_name);
3585                 }
3586         }
3587 
3588         nvlist_free(nv);
3589 out:
3590         kmem_free(zc, sizeof (zfs_cmd_t));
3591 }
3592 
3593 /*
3594  * lxpr_read_diskstats():
3595  *
3596  * See the block comment above the per-device output-generating line for the
3597  * details of the format.
3598  */
3599 /* ARGSUSED */
3600 static void
3601 lxpr_read_diskstats(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3602 {
3603         kstat_t *ksr;
3604         kstat_t ks0;
3605         int nidx, num, i;
3606         size_t sidx, size;
3607 
3608         ASSERT(lxpnp->lxpr_type == LXPR_DISKSTATS);
3609 
3610         ks0.ks_kid = 0;
3611         ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
3612 
3613         if (ksr == NULL)
3614                 return;
3615 
3616         for (i = 1; i < nidx; i++) {
3617                 kstat_t *ksp = &ksr[i];
3618                 kstat_io_t *kip;
3619 
3620                 if (ksp->ks_type != KSTAT_TYPE_IO ||
3621                     strcmp(ksp->ks_class, "disk") != 0)
3622                         continue;
3623 
3624                 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE,
3625                     &size, &num)) == NULL)
3626                         continue;
3627 
3628                 if (size < sizeof (kstat_io_t)) {
3629                         kmem_free(kip, size);
3630                         continue;
3631                 }
3632 
3633                 /*
3634                  * /proc/diskstats is defined to have one line of output for
3635                  * each block device, with each line containing the following
3636                  * 14 fields:
3637                  *
3638                  *      1 - major number
3639                  *      2 - minor mumber
3640                  *      3 - device name
3641                  *      4 - reads completed successfully
3642                  *      5 - reads merged
3643                  *      6 - sectors read
3644                  *      7 - time spent reading (ms)
3645                  *      8 - writes completed
3646                  *      9 - writes merged
3647                  *      10 - sectors written
3648                  *      11 - time spent writing (ms)
3649                  *      12 - I/Os currently in progress
3650                  *      13 - time spent doing I/Os (ms)
3651                  *      14 - weighted time spent doing I/Os (ms)
3652                  *
3653                  * One small hiccup:  we don't actually keep track of time
3654                  * spent reading vs. time spent writing -- we keep track of
3655                  * time waiting vs. time actually performing I/O.  While we
3656                  * could divide the total time by the I/O mix (making the
3657                  * obviously wrong assumption that I/O operations all take the
3658                  * same amount of time), this has the undesirable side-effect
3659                  * of moving backwards.  Instead, we report the total time
3660                  * (read + write) for all three stats (read, write, total).
3661                  * This is also a lie of sorts, but it should be more
3662                  * immediately clear to the user that reads and writes are
3663                  * each being double-counted as the other.
3664                  */
3665                 lxpr_uiobuf_printf(uiobuf, "%4d %7d %s "
3666                     "%llu %llu %llu %llu "
3667                     "%llu %llu %llu %llu "
3668                     "%llu %llu %llu\n",
3669                     mod_name_to_major(ksp->ks_module),
3670                     ksp->ks_instance, ksp->ks_name,
3671                     (uint64_t)kip->reads, 0LL,
3672                     kip->nread / (uint64_t)LXPR_SECTOR_SIZE,
3673                     (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3674                     (uint64_t)kip->writes, 0LL,
3675                     kip->nwritten / (uint64_t)LXPR_SECTOR_SIZE,
3676                     (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3677                     (uint64_t)(kip->rcnt + kip->wcnt),
3678                     (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3679                     (kip->rlentime + kip->wlentime) /
3680                     (uint64_t)(NANOSEC / MILLISEC));
3681 
3682                 kmem_free(kip, size);
3683         }
3684 
3685         kmem_free(ksr, sidx);
3686 }
3687 
3688 /*
3689  * lxpr_read_version(): read the contents of the "version" file.
3690  */
3691 /* ARGSUSED */
3692 static void
3693 lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3694 {
3695         lx_zone_data_t *lxzd = ztolxzd(LXPTOZ(lxpnp));
3696         lx_proc_data_t *lxpd = ptolxproc(curproc);
3697         const char *release = lxzd->lxzd_kernel_release;
3698         const char *version = lxzd->lxzd_kernel_version;
3699 
3700         /* Use per-process overrides, if specified */
3701         if (lxpd != NULL && lxpd->l_uname_release[0] != '\0') {
3702                 release = lxpd->l_uname_release;
3703         }
3704         if (lxpd != NULL && lxpd->l_uname_version[0] != '\0') {
3705                 version = lxpd->l_uname_version;
3706         }
3707 
3708         lxpr_uiobuf_printf(uiobuf,
3709             "%s version %s (%s version %d.%d.%d) %s\n",
3710             LX_UNAME_SYSNAME, release,
3711 #if defined(__GNUC__)
3712             "gcc", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__,
3713 #else
3714             "cc", 1, 0, 0,
3715 #endif
3716             version);
3717 }
3718 
3719 /*
3720  * lxpr_read_stat(): read the contents of the "stat" file.
3721  *
3722  */
3723 /* ARGSUSED */
3724 static void
3725 lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3726 {
3727         cpu_t *cp, *cpstart;
3728         int pools_enabled;
3729         ulong_t idle_cum = 0;
3730         ulong_t sys_cum  = 0;
3731         ulong_t user_cum = 0;
3732         ulong_t irq_cum = 0;
3733         ulong_t cpu_nrunnable_cum = 0;
3734         ulong_t w_io_cum = 0;
3735 
3736         ulong_t pgpgin_cum    = 0;
3737         ulong_t pgpgout_cum   = 0;
3738         ulong_t pgswapout_cum = 0;
3739         ulong_t pgswapin_cum  = 0;
3740         ulong_t intr_cum = 0;
3741         ulong_t pswitch_cum = 0;
3742         ulong_t forks_cum = 0;
3743         hrtime_t msnsecs[NCMSTATES];
3744         /* is the emulated release > 2.4 */
3745         boolean_t newer_than24 = lx_kern_release_cmp(LXPTOZ(lxpnp), "2.4") > 0;
3746         /* temporary variable since scalehrtime modifies data in place */
3747         hrtime_t tmptime;
3748 
3749         ASSERT(lxpnp->lxpr_type == LXPR_STAT);
3750 
3751         mutex_enter(&cpu_lock);
3752         pools_enabled = pool_pset_enabled();
3753 
3754         /* Calculate cumulative stats */
3755         cp = cpstart = CPU->cpu_part->cp_cpulist;
3756         do {
3757                 int i;
3758 
3759                 /*
3760                  * Don't count CPUs that aren't even in the system
3761                  * or aren't up yet.
3762                  */
3763                 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
3764                         continue;
3765                 }
3766 
3767                 get_cpu_mstate(cp, msnsecs);
3768 
3769                 idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
3770                 sys_cum  += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
3771                 user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
3772 
3773                 pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
3774                 pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
3775                 pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
3776                 pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
3777 
3778 
3779                 if (newer_than24) {
3780                         cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable;
3781                         w_io_cum += CPU_STATS(cp, sys.iowait);
3782                         for (i = 0; i < NCMSTATES; i++) {
3783                                 tmptime = cp->cpu_intracct[i];
3784                                 scalehrtime(&tmptime);
3785                                 irq_cum += NSEC_TO_TICK(tmptime);
3786                         }
3787                 }
3788 
3789                 for (i = 0; i < PIL_MAX; i++)
3790                         intr_cum += CPU_STATS(cp, sys.intr[i]);
3791 
3792                 pswitch_cum += CPU_STATS(cp, sys.pswitch);
3793                 forks_cum += CPU_STATS(cp, sys.sysfork);
3794                 forks_cum += CPU_STATS(cp, sys.sysvfork);
3795 
3796                 if (pools_enabled)
3797                         cp = cp->cpu_next_part;
3798                 else
3799                         cp = cp->cpu_next;
3800         } while (cp != cpstart);
3801 
3802         if (newer_than24) {
3803                 lxpr_uiobuf_printf(uiobuf,
3804                     "cpu %lu %lu %lu %lu %lu %lu %lu\n",
3805                     user_cum, 0L, sys_cum, idle_cum, 0L, irq_cum, 0L);
3806         } else {
3807                 lxpr_uiobuf_printf(uiobuf,
3808                     "cpu %lu %lu %lu %lu\n",
3809                     user_cum, 0L, sys_cum, idle_cum);
3810         }
3811 
3812         /* Do per processor stats */
3813         do {
3814                 int i;
3815 
3816                 ulong_t idle_ticks;
3817                 ulong_t sys_ticks;
3818                 ulong_t user_ticks;
3819                 ulong_t irq_ticks = 0;
3820 
3821                 /*
3822                  * Don't count CPUs that aren't even in the system
3823                  * or aren't up yet.
3824                  */
3825                 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
3826                         continue;
3827                 }
3828 
3829                 get_cpu_mstate(cp, msnsecs);
3830 
3831                 idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
3832                 sys_ticks  = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
3833                 user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
3834 
3835                 for (i = 0; i < NCMSTATES; i++) {
3836                         tmptime = cp->cpu_intracct[i];
3837                         scalehrtime(&tmptime);
3838                         irq_ticks += NSEC_TO_TICK(tmptime);
3839                 }
3840 
3841                 if (newer_than24) {
3842                         lxpr_uiobuf_printf(uiobuf,
3843                             "cpu%d %lu %lu %lu %lu %lu %lu %lu\n",
3844                             cp->cpu_id, user_ticks, 0L, sys_ticks, idle_ticks,
3845                             0L, irq_ticks, 0L);
3846                 } else {
3847                         lxpr_uiobuf_printf(uiobuf,
3848                             "cpu%d %lu %lu %lu %lu\n",
3849                             cp->cpu_id,
3850                             user_ticks, 0L, sys_ticks, idle_ticks);
3851                 }
3852 
3853                 if (pools_enabled)
3854                         cp = cp->cpu_next_part;
3855                 else
3856                         cp = cp->cpu_next;
3857         } while (cp != cpstart);
3858 
3859         mutex_exit(&cpu_lock);
3860 
3861         if (newer_than24) {
3862                 lxpr_uiobuf_printf(uiobuf,
3863                     "page %lu %lu\n"
3864                     "swap %lu %lu\n"
3865                     "intr %lu\n"
3866                     "ctxt %lu\n"
3867                     "btime %lu\n"
3868                     "processes %lu\n"
3869                     "procs_running %lu\n"
3870                     "procs_blocked %lu\n",
3871                     pgpgin_cum, pgpgout_cum,
3872                     pgswapin_cum, pgswapout_cum,
3873                     intr_cum,
3874                     pswitch_cum,
3875                     boot_time,
3876                     forks_cum,
3877                     cpu_nrunnable_cum,
3878                     w_io_cum);
3879         } else {
3880                 lxpr_uiobuf_printf(uiobuf,
3881                     "page %lu %lu\n"
3882                     "swap %lu %lu\n"
3883                     "intr %lu\n"
3884                     "ctxt %lu\n"
3885                     "btime %lu\n"
3886                     "processes %lu\n",
3887                     pgpgin_cum, pgpgout_cum,
3888                     pgswapin_cum, pgswapout_cum,
3889                     intr_cum,
3890                     pswitch_cum,
3891                     boot_time,
3892                     forks_cum);
3893         }
3894 }
3895 
3896 /*
3897  * lxpr_read_swaps():
3898  *
3899  * We don't support swap files or partitions, but some programs like to look
3900  * here just to check we have some swap on the system, so we lie and show
3901  * our entire swap cap as one swap partition.
3902  *
3903  * It is important to use formatting identical to the Linux implementation
3904  * so that consumers do not break. See swap_show() in mm/swapfile.c.
3905  */
3906 /* ARGSUSED */
3907 static void
3908 lxpr_read_swaps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3909 {
3910         zone_t *zone = curzone;
3911         uint64_t totswap, usedswap;
3912 
3913         mutex_enter(&zone->zone_mem_lock);
3914         /* Uses units of 1 kb (2^10). */
3915         totswap = zone->zone_max_swap_ctl >> 10;
3916         usedswap = zone->zone_max_swap >> 10;
3917         mutex_exit(&zone->zone_mem_lock);
3918 
3919         lxpr_uiobuf_printf(uiobuf,
3920             "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
3921         lxpr_uiobuf_printf(uiobuf, "%-40s%s\t%llu\t%llu\t%d\n",
3922             "/dev/swap", "partition", totswap, usedswap, -1);
3923 }
3924 
3925 /*
3926  * inotify tunables exported via /proc.
3927  */
3928 extern int inotify_maxevents;
3929 extern int inotify_maxinstances;
3930 extern int inotify_maxwatches;
3931 
3932 static void
3933 lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *lxpnp,
3934     lxpr_uiobuf_t *uiobuf)
3935 {
3936         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS);
3937         lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxevents);
3938 }
3939 
3940 static void
3941 lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *lxpnp,
3942     lxpr_uiobuf_t *uiobuf)
3943 {
3944         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES);
3945         lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxinstances);
3946 }
3947 
3948 static void
3949 lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *lxpnp,
3950     lxpr_uiobuf_t *uiobuf)
3951 {
3952         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES);
3953         lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxwatches);
3954 }
3955 
3956 static void
3957 lxpr_read_sys_kernel_caplcap(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3958 {
3959         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_CAPLCAP);
3960         lxpr_uiobuf_printf(uiobuf, "%d\n", LX_CAP_MAX_VALID);
3961 }
3962 
3963 static void
3964 lxpr_read_sys_kernel_corepatt(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3965 {
3966         zone_t *zone = curproc->p_zone;
3967         struct core_globals *cg;
3968         refstr_t *rp;
3969         corectl_path_t *ccp;
3970         char tr[MAXPATHLEN];
3971 
3972         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT);
3973 
3974         cg = zone_getspecific(core_zone_key, zone);
3975         ASSERT(cg != NULL);
3976 
3977         /* If core dumps are disabled, return an empty string. */
3978         if ((cg->core_options & CC_PROCESS_PATH) == 0) {
3979                 lxpr_uiobuf_printf(uiobuf, "\n");
3980                 return;
3981         }
3982 
3983         ccp = cg->core_default_path;
3984         mutex_enter(&ccp->ccp_mtx);
3985         if ((rp = ccp->ccp_path) != NULL)
3986                 refstr_hold(rp);
3987         mutex_exit(&ccp->ccp_mtx);
3988 
3989         if (rp == NULL) {
3990                 lxpr_uiobuf_printf(uiobuf, "\n");
3991                 return;
3992         }
3993 
3994         bzero(tr, sizeof (tr));
3995         if (lxpr_core_path_s2l(refstr_value(rp), tr, sizeof (tr)) != 0) {
3996                 refstr_rele(rp);
3997                 lxpr_uiobuf_printf(uiobuf, "\n");
3998                 return;
3999         }
4000 
4001         refstr_rele(rp);
4002         lxpr_uiobuf_printf(uiobuf, "%s\n", tr);
4003 }
4004 
4005 static void
4006 lxpr_read_sys_kernel_hostname(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4007 {
4008         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_HOSTNAME);
4009         lxpr_uiobuf_printf(uiobuf, "%s\n", uts_nodename());
4010 }
4011 
4012 static void
4013 lxpr_read_sys_kernel_msgmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4014 {
4015         rctl_qty_t val;
4016 
4017         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_MSGMNI);
4018 
4019         mutex_enter(&curproc->p_lock);
4020         val = rctl_enforced_value(rc_zone_msgmni,
4021             curproc->p_zone->zone_rctls, curproc);
4022         mutex_exit(&curproc->p_lock);
4023 
4024         lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
4025 }
4026 
4027 static void
4028 lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4029 {
4030         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_NGROUPS_MAX);
4031         lxpr_uiobuf_printf(uiobuf, "%d\n", ngroups_max);
4032 }
4033 
4034 static void
4035 lxpr_read_sys_kernel_osrel(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4036 {
4037         lx_zone_data_t *br_data;
4038 
4039         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_OSREL);
4040         br_data = ztolxzd(curproc->p_zone);
4041         if (curproc->p_zone->zone_brand == &lx_brand) {
4042                 lxpr_uiobuf_printf(uiobuf, "%s\n",
4043                     br_data->lxzd_kernel_version);
4044         } else {
4045                 lxpr_uiobuf_printf(uiobuf, "\n");
4046         }
4047 }
4048 
4049 static void
4050 lxpr_read_sys_kernel_pid_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4051 {
4052         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_PID_MAX);
4053         lxpr_uiobuf_printf(uiobuf, "%d\n", maxpid);
4054 }
4055 
4056 static void
4057 lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4058 {
4059         /*
4060          * This file isn't documented on the Linux proc(5) man page but
4061          * according to the blog of the author of systemd/journald (the
4062          * consumer), he says:
4063          *    boot_id: A random ID that is regenerated on each boot. As such it
4064          *    can be used to identify the local machine's current boot. It's
4065          *    universally available on any recent Linux kernel. It's a good and
4066          *    safe choice if you need to identify a specific boot on a specific
4067          *    booted kernel.
4068          *
4069          * We'll just generate a random ID if necessary. On Linux the format
4070          * appears to resemble a uuid but since it is not documented to be a
4071          * uuid, we don't worry about that.
4072          */
4073         lx_zone_data_t *br_data;
4074 
4075         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RAND_BOOTID);
4076 
4077         if (curproc->p_zone->zone_brand != &lx_brand) {
4078                 lxpr_uiobuf_printf(uiobuf, "0\n");
4079                 return;
4080         }
4081 
4082         br_data = ztolxzd(curproc->p_zone);
4083         if (br_data->lxzd_bootid[0] == '\0') {
4084                 extern int getrandom(void *, size_t, int);
4085                 int i;
4086 
4087                 for (i = 0; i < 5; i++) {
4088                         u_longlong_t n;
4089                         char s[32];
4090 
4091                         (void) random_get_bytes((uint8_t *)&n, sizeof (n));
4092                         switch (i) {
4093                         case 0: (void) snprintf(s, sizeof (s), "%08llx", n);
4094                                 s[8] = '\0';
4095                                 break;
4096                         case 4: (void) snprintf(s, sizeof (s), "%012llx", n);
4097                                 s[12] = '\0';
4098                                 break;
4099                         default: (void) snprintf(s, sizeof (s), "%04llx", n);
4100                                 s[4] = '\0';
4101                                 break;
4102                         }
4103                         if (i > 0)
4104                                 strlcat(br_data->lxzd_bootid, "-",
4105                                     sizeof (br_data->lxzd_bootid));
4106                         strlcat(br_data->lxzd_bootid, s,
4107                             sizeof (br_data->lxzd_bootid));
4108                 }
4109         }
4110 
4111         lxpr_uiobuf_printf(uiobuf, "%s\n", br_data->lxzd_bootid);
4112 }
4113 
4114 static void
4115 lxpr_read_sys_kernel_shmmax(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4116 {
4117         rctl_qty_t val;
4118 
4119         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMAX);
4120 
4121         mutex_enter(&curproc->p_lock);
4122         val = rctl_enforced_value(rc_zone_shmmax,
4123             curproc->p_zone->zone_rctls, curproc);
4124         mutex_exit(&curproc->p_lock);
4125 
4126         if (val > FOURGB)
4127                 val = FOURGB;
4128 
4129         lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
4130 }
4131 
4132 static void
4133 lxpr_read_sys_kernel_threads_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4134 {
4135         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_THREADS_MAX);
4136         lxpr_uiobuf_printf(uiobuf, "%d\n", curproc->p_zone->zone_nlwps_ctl);
4137 }
4138 
4139 static void
4140 lxpr_read_sys_net_core_somaxc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4141 {
4142         netstack_t *ns;
4143         tcp_stack_t     *tcps;
4144 
4145         ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON);
4146 
4147         ns = netstack_get_current();
4148         if (ns == NULL) {
4149                 lxpr_uiobuf_printf(uiobuf, "%d\n", SOMAXCONN);
4150                 return;
4151         }
4152 
4153         tcps = ns->netstack_tcp;
4154         lxpr_uiobuf_printf(uiobuf, "%d\n", tcps->tcps_conn_req_max_q);
4155         netstack_rele(ns);
4156 }
4157 
4158 static void
4159 lxpr_read_sys_vm_minfr_kb(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4160 {
4161         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_MINFR_KB);
4162         lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4163 }
4164 
4165 static void
4166 lxpr_read_sys_vm_nhpages(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4167 {
4168         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_NHUGEP);
4169         lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4170 }
4171 
4172 static void
4173 lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4174 {
4175         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_OVERCOMMIT_MEM);
4176         lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4177 }
4178 
4179 static void
4180 lxpr_read_sys_vm_swappiness(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4181 {
4182         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_SWAPPINESS);
4183         lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4184 }
4185 
4186 /*
4187  * lxpr_read_uptime(): read the contents of the "uptime" file.
4188  *
4189  * format is: "%.2lf, %.2lf",uptime_secs, idle_secs
4190  * Use fixed point arithmetic to get 2 decimal places
4191  */
4192 /* ARGSUSED */
4193 static void
4194 lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4195 {
4196         cpu_t *cp, *cpstart;
4197         int pools_enabled;
4198         ulong_t idle_cum = 0;
4199         ulong_t cpu_count = 0;
4200         ulong_t idle_s;
4201         ulong_t idle_cs;
4202         ulong_t up_s;
4203         ulong_t up_cs;
4204         hrtime_t birthtime;
4205         hrtime_t centi_sec = 10000000;  /* 10^7 */
4206 
4207         ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
4208 
4209         /* Calculate cumulative stats */
4210         mutex_enter(&cpu_lock);
4211         pools_enabled = pool_pset_enabled();
4212 
4213         cp = cpstart = CPU->cpu_part->cp_cpulist;
4214         do {
4215                 /*
4216                  * Don't count CPUs that aren't even in the system
4217                  * or aren't up yet.
4218                  */
4219                 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
4220                         continue;
4221                 }
4222 
4223                 idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
4224                 idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
4225                 cpu_count += 1;
4226 
4227                 if (pools_enabled)
4228                         cp = cp->cpu_next_part;
4229                 else
4230                         cp = cp->cpu_next;
4231         } while (cp != cpstart);
4232         mutex_exit(&cpu_lock);
4233 
4234         /* Getting the Zone zsched process startup time */
4235         birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart;
4236         up_cs = (gethrtime() - birthtime) / centi_sec;
4237         up_s = up_cs / 100;
4238         up_cs %= 100;
4239 
4240         ASSERT(cpu_count > 0);
4241         idle_cum /= cpu_count;
4242         idle_s = idle_cum / hz;
4243         idle_cs = idle_cum % hz;
4244         idle_cs *= 100;
4245         idle_cs /= hz;
4246 
4247         lxpr_uiobuf_printf(uiobuf,
4248             "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
4249 }
4250 
4251 static const char *amd_x_edx[] = {
4252         NULL,   NULL,   NULL,   NULL,
4253         NULL,   NULL,   NULL,   NULL,
4254         NULL,   NULL,   NULL,   "syscall",
4255         NULL,   NULL,   NULL,   NULL,
4256         NULL,   NULL,   NULL,   "mp",
4257         "nx",   NULL,   "mmxext", NULL,
4258         NULL,   NULL,   NULL,   NULL,
4259         NULL,   "lm",   "3dnowext", "3dnow"
4260 };
4261 
4262 static const char *amd_x_ecx[] = {
4263         "lahf_lm", NULL, "svm", NULL,
4264         "altmovcr8"
4265 };
4266 
4267 static const char *tm_x_edx[] = {
4268         "recovery", "longrun", NULL, "lrti"
4269 };
4270 
4271 /*
4272  * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
4273  */
4274 static const char *intc_x_edx[] = {
4275         NULL,   NULL,   NULL,   NULL,
4276         NULL,   NULL,   NULL,   NULL,
4277         NULL,   NULL,   NULL,   "syscall",
4278         NULL,   NULL,   NULL,   NULL,
4279         NULL,   NULL,   NULL,   NULL,
4280         "nx",   NULL,   NULL,   NULL,
4281         NULL,   NULL,   NULL,   NULL,
4282         NULL,   "lm",   NULL,   NULL
4283 };
4284 
4285 static const char *intc_edx[] = {
4286         "fpu",  "vme",  "de",   "pse",
4287         "tsc",  "msr",  "pae",  "mce",
4288         "cx8",  "apic",  NULL,  "sep",
4289         "mtrr", "pge",  "mca",  "cmov",
4290         "pat",  "pse36", "pn",  "clflush",
4291         NULL,   "dts",  "acpi", "mmx",
4292         "fxsr", "sse",  "sse2", "ss",
4293         "ht",   "tm",   "ia64", "pbe"
4294 };
4295 
4296 /*
4297  * "sse3" on linux is called "pni" (Prescott New Instructions).
4298  */
4299 static const char *intc_ecx[] = {
4300         "pni",  NULL,   NULL, "monitor",
4301         "ds_cpl", NULL, NULL, "est",
4302         "tm2",  NULL,   "cid", NULL,
4303         NULL,   "cx16", "xtpr"
4304 };
4305 
4306 /*
4307  * Report a list of each cgroup subsystem supported by our emulated cgroup fs.
4308  * This needs to exist for systemd to run but for now we don't report any
4309  * cgroup subsystems as being installed. The commented example below shows
4310  * how to print a subsystem entry.
4311  */
4312 static void
4313 lxpr_read_cgroups(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4314 {
4315         lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n",
4316             "#subsys_name", "hierarchy", "num_cgroups", "enabled");
4317 
4318         /*
4319          * lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n",
4320          *   "cpu,cpuacct", "2", "1", "1");
4321          */
4322 }
4323 
4324 static void
4325 lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4326 {
4327         int i;
4328         uint32_t bits;
4329         cpu_t *cp, *cpstart;
4330         int pools_enabled;
4331         const char **fp;
4332         char brandstr[CPU_IDSTRLEN];
4333         struct cpuid_regs cpr;
4334         int maxeax;
4335         int std_ecx, std_edx, ext_ecx, ext_edx;
4336 
4337         ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
4338 
4339         mutex_enter(&cpu_lock);
4340         pools_enabled = pool_pset_enabled();
4341 
4342         cp = cpstart = CPU->cpu_part->cp_cpulist;
4343         do {
4344                 /*
4345                  * This returns the maximum eax value for standard cpuid
4346                  * functions in eax.
4347                  */
4348                 cpr.cp_eax = 0;
4349                 (void) cpuid_insn(cp, &cpr);
4350                 maxeax = cpr.cp_eax;
4351 
4352                 /*
4353                  * Get standard x86 feature flags.
4354                  */
4355                 cpr.cp_eax = 1;
4356                 (void) cpuid_insn(cp, &cpr);
4357                 std_ecx = cpr.cp_ecx;
4358                 std_edx = cpr.cp_edx;
4359 
4360                 /*
4361                  * Now get extended feature flags.
4362                  */
4363                 cpr.cp_eax = 0x80000001;
4364                 (void) cpuid_insn(cp, &cpr);
4365                 ext_ecx = cpr.cp_ecx;
4366                 ext_edx = cpr.cp_edx;
4367 
4368                 (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
4369 
4370                 lxpr_uiobuf_printf(uiobuf,
4371                     "processor\t: %d\n"
4372                     "vendor_id\t: %s\n"
4373                     "cpu family\t: %d\n"
4374                     "model\t\t: %d\n"
4375                     "model name\t: %s\n"
4376                     "stepping\t: %d\n"
4377                     "cpu MHz\t\t: %u.%03u\n",
4378                     cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
4379                     cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
4380                     (uint32_t)(cpu_freq_hz / 1000000),
4381                     ((uint32_t)(cpu_freq_hz / 1000)) % 1000);
4382 
4383                 lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
4384                     getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
4385 
4386                 if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
4387                         /*
4388                          * 'siblings' is used for HT-style threads
4389                          */
4390                         lxpr_uiobuf_printf(uiobuf,
4391                             "physical id\t: %lu\n"
4392                             "siblings\t: %u\n",
4393                             pg_plat_hw_instance_id(cp, PGHW_CHIP),
4394                             cpuid_get_ncpu_per_chip(cp));
4395                 }
4396 
4397                 /*
4398                  * Since we're relatively picky about running on older hardware,
4399                  * we can be somewhat cavalier about the answers to these ones.
4400                  *
4401                  * In fact, given the hardware we support, we just say:
4402                  *
4403                  *      fdiv_bug        : no    (if we're on a 64-bit kernel)
4404                  *      hlt_bug         : no
4405                  *      f00f_bug        : no
4406                  *      coma_bug        : no
4407                  *      wp              : yes   (write protect in supervsr mode)
4408                  */
4409                 lxpr_uiobuf_printf(uiobuf,
4410                     "fdiv_bug\t: %s\n"
4411                     "hlt_bug \t: no\n"
4412                     "f00f_bug\t: no\n"
4413                     "coma_bug\t: no\n"
4414                     "fpu\t\t: %s\n"
4415                     "fpu_exception\t: %s\n"
4416                     "cpuid level\t: %d\n"
4417                     "flags\t\t:",
4418 #if defined(__i386)
4419                     fpu_pentium_fdivbug ? "yes" : "no",
4420 #else
4421                     "no",
4422 #endif /* __i386 */
4423                     fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
4424                     maxeax);
4425 
4426                 for (bits = std_edx, fp = intc_edx, i = 0;
4427                     i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
4428                         if ((bits & (1 << i)) != 0 && *fp)
4429                                 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4430 
4431                 /*
4432                  * name additional features where appropriate
4433                  */
4434                 switch (x86_vendor) {
4435                 case X86_VENDOR_Intel:
4436                         for (bits = ext_edx, fp = intc_x_edx, i = 0;
4437                             i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
4438                             fp++, i++)
4439                                 if ((bits & (1 << i)) != 0 && *fp)
4440                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4441                         break;
4442 
4443                 case X86_VENDOR_AMD:
4444                         for (bits = ext_edx, fp = amd_x_edx, i = 0;
4445                             i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
4446                             fp++, i++)
4447                                 if ((bits & (1 << i)) != 0 && *fp)
4448                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4449 
4450                         for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
4451                             i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
4452                             fp++, i++)
4453                                 if ((bits & (1 << i)) != 0 && *fp)
4454                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4455                         break;
4456 
4457                 case X86_VENDOR_TM:
4458                         for (bits = ext_edx, fp = tm_x_edx, i = 0;
4459                             i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
4460                             fp++, i++)
4461                                 if ((bits & (1 << i)) != 0 && *fp)
4462                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4463                         break;
4464                 default:
4465                         break;
4466                 }
4467 
4468                 for (bits = std_ecx, fp = intc_ecx, i = 0;
4469                     i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
4470                         if ((bits & (1 << i)) != 0 && *fp)
4471                                 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4472 
4473                 lxpr_uiobuf_printf(uiobuf, "\n\n");
4474 
4475                 if (pools_enabled)
4476                         cp = cp->cpu_next_part;
4477                 else
4478                         cp = cp->cpu_next;
4479         } while (cp != cpstart);
4480 
4481         mutex_exit(&cpu_lock);
4482 }
4483 
4484 /* ARGSUSED */
4485 static void
4486 lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4487 {
4488         ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
4489         lxpr_uiobuf_seterr(uiobuf, EFAULT);
4490 }
4491 
4492 /*
4493  * Report a list of file systems loaded in the kernel. We only report the ones
4494  * which we support and which may be checked by various components to see if
4495  * they are loaded.
4496  */
4497 static void
4498 lxpr_read_filesystems(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4499 {
4500         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "autofs");
4501         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "cgroup");
4502         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "nfs");
4503         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "proc");
4504         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "sysfs");
4505         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "tmpfs");
4506 }
4507 
4508 /*
4509  * lxpr_getattr(): Vnode operation for VOP_GETATTR()
4510  */
4511 static int
4512 lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
4513     caller_context_t *ct)
4514 {
4515         register lxpr_node_t *lxpnp = VTOLXP(vp);
4516         lxpr_nodetype_t type = lxpnp->lxpr_type;
4517         extern uint_t nproc;
4518         int error;
4519 
4520         /*
4521          * Return attributes of underlying vnode if ATTR_REAL
4522          *
4523          * but keep fd files with the symlink permissions
4524          */
4525         if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
4526                 vnode_t *rvp = lxpnp->lxpr_realvp;
4527 
4528                 /*
4529                  * withold attribute information to owner or root
4530                  */
4531                 if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) {
4532                         return (error);
4533                 }
4534 
4535                 /*
4536                  * now its attributes
4537                  */
4538                 if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) {
4539                         return (error);
4540                 }
4541 
4542                 /*
4543                  * if it's a file in lx /proc/pid/fd/xx then set its
4544                  * mode and keep it looking like a symlink, fifo or socket
4545                  */
4546                 if (type == LXPR_PID_FD_FD) {
4547                         vap->va_mode = lxpnp->lxpr_mode;
4548                         vap->va_type = lxpnp->lxpr_realvp->v_type;
4549                         vap->va_size = 0;
4550                         vap->va_nlink = 1;
4551                 }
4552                 return (0);
4553         }
4554 
4555         /* Default attributes, that may be overridden below */
4556         bzero(vap, sizeof (*vap));
4557         vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
4558         vap->va_nlink = 1;
4559         vap->va_type = vp->v_type;
4560         vap->va_mode = lxpnp->lxpr_mode;
4561         vap->va_fsid = vp->v_vfsp->vfs_dev;
4562         vap->va_blksize = DEV_BSIZE;
4563         vap->va_uid = lxpnp->lxpr_uid;
4564         vap->va_gid = lxpnp->lxpr_gid;
4565         vap->va_nodeid = lxpnp->lxpr_ino;
4566 
4567         switch (type) {
4568         case LXPR_PROCDIR:
4569                 vap->va_nlink = nproc + 2 + PROCDIRFILES;
4570                 vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
4571                 break;
4572         case LXPR_PIDDIR:
4573                 vap->va_nlink = PIDDIRFILES;
4574                 vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
4575                 break;
4576         case LXPR_PID_TASK_IDDIR:
4577                 vap->va_nlink = TIDDIRFILES;
4578                 vap->va_size = TIDDIRFILES * LXPR_SDSIZE;
4579                 break;
4580         case LXPR_SELF:
4581                 vap->va_uid = crgetruid(curproc->p_cred);
4582                 vap->va_gid = crgetrgid(curproc->p_cred);
4583                 break;
4584         case LXPR_PID_FD_FD:
4585         case LXPR_PID_TID_FD_FD:
4586                 /*
4587                  * Restore VLNK type for lstat-type activity.
4588                  * See lxpr_readlink for more details.
4589                  */
4590                 if ((flags & FOLLOW) == 0)
4591                         vap->va_type = VLNK;
4592         default:
4593                 break;
4594         }
4595 
4596         vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
4597         return (0);
4598 }
4599 
4600 /*
4601  * lxpr_access(): Vnode operation for VOP_ACCESS()
4602  */
4603 static int
4604 lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
4605 {
4606         lxpr_node_t *lxpnp = VTOLXP(vp);
4607         lxpr_nodetype_t type = lxpnp->lxpr_type;
4608         int shift = 0;
4609         proc_t *tp;
4610 
4611         /* lx /proc is a read only file system */
4612         if (mode & VWRITE) {
4613                 switch (type) {
4614                 case LXPR_PID_OOM_SCR_ADJ:
4615                 case LXPR_PID_TID_OOM_SCR_ADJ:
4616                 case LXPR_SYS_KERNEL_COREPATT:
4617                 case LXPR_SYS_NET_CORE_SOMAXCON:
4618                 case LXPR_SYS_VM_OVERCOMMIT_MEM:
4619                 case LXPR_SYS_VM_SWAPPINESS:
4620                 case LXPR_PID_FD_FD:
4621                 case LXPR_PID_TID_FD_FD:
4622                         break;
4623                 default:
4624                         return (EROFS);
4625                 }
4626         }
4627 
4628         /*
4629          * If this is a restricted file, check access permissions.
4630          */
4631         switch (type) {
4632         case LXPR_PIDDIR:
4633                 return (0);
4634         case LXPR_PID_CURDIR:
4635         case LXPR_PID_ENV:
4636         case LXPR_PID_EXE:
4637         case LXPR_PID_LIMITS:
4638         case LXPR_PID_MAPS:
4639         case LXPR_PID_MEM:
4640         case LXPR_PID_ROOTDIR:
4641         case LXPR_PID_FDDIR:
4642         case LXPR_PID_FD_FD:
4643         case LXPR_PID_TID_FDDIR:
4644         case LXPR_PID_TID_FD_FD:
4645                 if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL)
4646                         return (ENOENT);
4647                 if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
4648                     priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
4649                         lxpr_unlock(tp);
4650                         return (EACCES);
4651                 }
4652                 lxpr_unlock(tp);
4653         default:
4654                 break;
4655         }
4656 
4657         if (lxpnp->lxpr_realvp != NULL) {
4658                 /*
4659                  * For these we use the underlying vnode's accessibility.
4660                  */
4661                 return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct));
4662         }
4663 
4664         /* If user is root allow access regardless of permission bits */
4665         if (secpolicy_proc_access(cr) == 0)
4666                 return (0);
4667 
4668         /*
4669          * Access check is based on only one of owner, group, public.  If not
4670          * owner, then check group.  If not a member of the group, then check
4671          * public access.
4672          */
4673         if (crgetuid(cr) != lxpnp->lxpr_uid) {
4674                 shift += 3;
4675                 if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
4676                         shift += 3;
4677         }
4678 
4679         mode &= ~(lxpnp->lxpr_mode << shift);
4680 
4681         if (mode == 0)
4682                 return (0);
4683 
4684         return (EACCES);
4685 }
4686 
4687 /* ARGSUSED */
4688 static vnode_t *
4689 lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
4690 {
4691         return (NULL);
4692 }
4693 
4694 /*
4695  * lxpr_lookup(): Vnode operation for VOP_LOOKUP()
4696  */
4697 /* ARGSUSED */
4698 static int
4699 lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
4700     int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
4701     int *direntflags, pathname_t *realpnp)
4702 {
4703         lxpr_node_t *lxpnp = VTOLXP(dp);
4704         lxpr_nodetype_t type = lxpnp->lxpr_type;
4705         int error;
4706 
4707         ASSERT(dp->v_type == VDIR);
4708         ASSERT(type < LXPR_NFILES);
4709 
4710         /*
4711          * we should never get here because the lookup
4712          * is done on the realvp for these nodes
4713          */
4714         ASSERT(type != LXPR_PID_FD_FD &&
4715             type != LXPR_PID_CURDIR &&
4716             type != LXPR_PID_ROOTDIR);
4717 
4718         /*
4719          * restrict lookup permission to owner or root
4720          */
4721         if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) {
4722                 return (error);
4723         }
4724 
4725         /*
4726          * Just return the parent vnode if that's where we are trying to go.
4727          */
4728         if (strcmp(comp, "..") == 0) {
4729                 VN_HOLD(lxpnp->lxpr_parent);
4730                 *vpp = lxpnp->lxpr_parent;
4731                 return (0);
4732         }
4733 
4734         /*
4735          * Special handling for directory searches.  Note: null component name
4736          * denotes that the current directory is being searched.
4737          */
4738         if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
4739                 VN_HOLD(dp);
4740                 *vpp = dp;
4741                 return (0);
4742         }
4743 
4744         *vpp = (lxpr_lookup_function[type](dp, comp));
4745         return ((*vpp == NULL) ? ENOENT : 0);
4746 }
4747 
4748 /*
4749  * Do a sequential search on the given directory table
4750  */
4751 static vnode_t *
4752 lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
4753     lxpr_dirent_t *dirtab, int dirtablen)
4754 {
4755         lxpr_node_t *lxpnp;
4756         int count;
4757 
4758         for (count = 0; count < dirtablen; count++) {
4759                 if (strcmp(dirtab[count].d_name, comp) == 0) {
4760                         lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
4761                         dp = LXPTOV(lxpnp);
4762                         ASSERT(dp != NULL);
4763                         return (dp);
4764                 }
4765         }
4766         return (NULL);
4767 }
4768 
4769 static vnode_t *
4770 lxpr_lookup_piddir(vnode_t *dp, char *comp)
4771 {
4772         proc_t *p;
4773 
4774         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
4775 
4776         p = lxpr_lock(VTOLXP(dp)->lxpr_pid);
4777         if (p == NULL)
4778                 return (NULL);
4779 
4780         dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
4781 
4782         lxpr_unlock(p);
4783 
4784         return (dp);
4785 }
4786 
4787 /*
4788  * Lookup one of the process's task ID's.
4789  */
4790 static vnode_t *
4791 lxpr_lookup_taskdir(vnode_t *dp, char *comp)
4792 {
4793         lxpr_node_t *dlxpnp = VTOLXP(dp);
4794         lxpr_node_t *lxpnp;
4795         proc_t *p;
4796         pid_t real_pid;
4797         uint_t tid;
4798         int c;
4799         kthread_t *t;
4800 
4801         ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASKDIR);
4802 
4803         /*
4804          * convert the string rendition of the filename to a thread ID
4805          */
4806         tid = 0;
4807         while ((c = *comp++) != '\0') {
4808                 int otid;
4809                 if (c < '0' || c > '9')
4810                         return (NULL);
4811 
4812                 otid = tid;
4813                 tid = 10 * tid + c - '0';
4814                 /* integer overflow */
4815                 if (tid / 10 != otid)
4816                         return (NULL);
4817         }
4818 
4819         /*
4820          * get the proc to work with and lock it
4821          */
4822         real_pid = get_real_pid(dlxpnp->lxpr_pid);
4823         p = lxpr_lock(real_pid);
4824         if ((p == NULL))
4825                 return (NULL);
4826 
4827         /*
4828          * If the process is a zombie or system process
4829          * it can't have any threads.
4830          */
4831         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
4832                 lxpr_unlock(p);
4833                 return (NULL);
4834         }
4835 
4836         if (p->p_brand == &lx_brand) {
4837                 t = lxpr_get_thread(p, tid);
4838         } else {
4839                 /*
4840                  * Only the main thread is visible for non-branded processes.
4841                  */
4842                 t = p->p_tlist;
4843                 if (tid != p->p_pid || t == NULL) {
4844                         t = NULL;
4845                 } else {
4846                         thread_lock(t);
4847                 }
4848         }
4849         if (t == NULL) {
4850                 lxpr_unlock(p);
4851                 return (NULL);
4852         }
4853         thread_unlock(t);
4854 
4855         /*
4856          * Allocate and fill in a new lx /proc taskid node.
4857          * Instead of the last arg being a fd, it is a tid.
4858          */
4859         lxpnp = lxpr_getnode(dp, LXPR_PID_TASK_IDDIR, p, tid);
4860         dp = LXPTOV(lxpnp);
4861         ASSERT(dp != NULL);
4862         lxpr_unlock(p);
4863         return (dp);
4864 }
4865 
4866 /*
4867  * Lookup one of the process's task ID's.
4868  */
4869 static vnode_t *
4870 lxpr_lookup_task_tid_dir(vnode_t *dp, char *comp)
4871 {
4872         lxpr_node_t *dlxpnp = VTOLXP(dp);
4873         lxpr_node_t *lxpnp;
4874         proc_t *p;
4875         pid_t real_pid;
4876         kthread_t *t;
4877         int i;
4878 
4879         ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASK_IDDIR);
4880 
4881         /*
4882          * get the proc to work with and lock it
4883          */
4884         real_pid = get_real_pid(dlxpnp->lxpr_pid);
4885         p = lxpr_lock(real_pid);
4886         if ((p == NULL))
4887                 return (NULL);
4888 
4889         /*
4890          * If the process is a zombie or system process
4891          * it can't have any threads.
4892          */
4893         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
4894                 lxpr_unlock(p);
4895                 return (NULL);
4896         }
4897 
4898         /* need to confirm tid is still there */
4899         t = lxpr_get_thread(p, dlxpnp->lxpr_desc);
4900         if (t == NULL) {
4901                 lxpr_unlock(p);
4902                 return (NULL);
4903         }
4904         thread_unlock(t);
4905 
4906         /*
4907          * allocate and fill in the new lx /proc taskid dir node
4908          */
4909         for (i = 0; i < TIDDIRFILES; i++) {
4910                 if (strcmp(tiddir[i].d_name, comp) == 0) {
4911                         lxpnp = lxpr_getnode(dp, tiddir[i].d_type, p,
4912                             dlxpnp->lxpr_desc);
4913                         dp = LXPTOV(lxpnp);
4914                         ASSERT(dp != NULL);
4915                         lxpr_unlock(p);
4916                         return (dp);
4917                 }
4918         }
4919 
4920         lxpr_unlock(p);
4921         return (NULL);
4922 }
4923 
4924 /*
4925  * Lookup one of the process's open files.
4926  */
4927 static vnode_t *
4928 lxpr_lookup_fddir(vnode_t *dp, char *comp)
4929 {
4930         lxpr_node_t *dlxpnp = VTOLXP(dp);
4931 
4932         ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR ||
4933             dlxpnp->lxpr_type == LXPR_PID_TID_FDDIR);
4934 
4935         return (lxpr_lookup_fdnode(dp, comp));
4936 }
4937 
4938 static vnode_t *
4939 lxpr_lookup_netdir(vnode_t *dp, char *comp)
4940 {
4941         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
4942 
4943         dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
4944 
4945         return (dp);
4946 }
4947 
4948 static vnode_t *
4949 lxpr_lookup_procdir(vnode_t *dp, char *comp)
4950 {
4951         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
4952 
4953         /*
4954          * We know all the names of files & dirs in our file system structure
4955          * except those that are pid names.  These change as pids are created/
4956          * deleted etc., so we just look for a number as the first char to see
4957          * if we are we doing pid lookups.
4958          *
4959          * Don't need to check for "self" as it is implemented as a symlink
4960          */
4961         if (*comp >= '0' && *comp <= '9') {
4962                 pid_t pid = 0;
4963                 lxpr_node_t *lxpnp = NULL;
4964                 proc_t *p;
4965                 int c;
4966 
4967                 while ((c = *comp++) != '\0')
4968                         pid = 10 * pid + c - '0';
4969 
4970                 /*
4971                  * Can't continue if the process is still loading or it doesn't
4972                  * really exist yet (or maybe it just died!)
4973                  */
4974                 p = lxpr_lock(pid);
4975                 if (p == NULL)
4976                         return (NULL);
4977 
4978                 if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
4979                         lxpr_unlock(p);
4980                         return (NULL);
4981                 }
4982 
4983                 /*
4984                  * allocate and fill in a new lx /proc node
4985                  */
4986                 lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
4987 
4988                 lxpr_unlock(p);
4989 
4990                 dp = LXPTOV(lxpnp);
4991                 ASSERT(dp != NULL);
4992 
4993                 return (dp);
4994         }
4995 
4996         /* Lookup fixed names */
4997         return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES));
4998 }
4999 
5000 static vnode_t *
5001 lxpr_lookup_sysdir(vnode_t *dp, char *comp)
5002 {
5003         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYSDIR);
5004         return (lxpr_lookup_common(dp, comp, NULL, sysdir, SYSDIRFILES));
5005 }
5006 
5007 static vnode_t *
5008 lxpr_lookup_sys_kerneldir(vnode_t *dp, char *comp)
5009 {
5010         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNELDIR);
5011         return (lxpr_lookup_common(dp, comp, NULL, sys_kerneldir,
5012             SYS_KERNELDIRFILES));
5013 }
5014 
5015 static vnode_t *
5016 lxpr_lookup_sys_kdir_randdir(vnode_t *dp, char *comp)
5017 {
5018         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNEL_RANDDIR);
5019         return (lxpr_lookup_common(dp, comp, NULL, sys_randdir,
5020             SYS_RANDDIRFILES));
5021 }
5022 
5023 static vnode_t *
5024 lxpr_lookup_sys_netdir(vnode_t *dp, char *comp)
5025 {
5026         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NETDIR);
5027         return (lxpr_lookup_common(dp, comp, NULL, sys_netdir,
5028             SYS_NETDIRFILES));
5029 }
5030 
5031 static vnode_t *
5032 lxpr_lookup_sys_net_coredir(vnode_t *dp, char *comp)
5033 {
5034         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NET_COREDIR);
5035         return (lxpr_lookup_common(dp, comp, NULL, sys_net_coredir,
5036             SYS_NET_COREDIRFILES));
5037 }
5038 
5039 static vnode_t *
5040 lxpr_lookup_sys_vmdir(vnode_t *dp, char *comp)
5041 {
5042         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_VMDIR);
5043         return (lxpr_lookup_common(dp, comp, NULL, sys_vmdir,
5044             SYS_VMDIRFILES));
5045 }
5046 
5047 static vnode_t *
5048 lxpr_lookup_sys_fsdir(vnode_t *dp, char *comp)
5049 {
5050         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FSDIR);
5051         return (lxpr_lookup_common(dp, comp, NULL, sys_fsdir,
5052             SYS_FSDIRFILES));
5053 }
5054 
5055 static vnode_t *
5056 lxpr_lookup_sys_fs_inotifydir(vnode_t *dp, char *comp)
5057 {
5058         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
5059         return (lxpr_lookup_common(dp, comp, NULL, sys_fs_inotifydir,
5060             SYS_FS_INOTIFYDIRFILES));
5061 }
5062 
5063 /*
5064  * lxpr_readdir(): Vnode operation for VOP_READDIR()
5065  */
5066 /* ARGSUSED */
5067 static int
5068 lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp,
5069     caller_context_t *ct, int flags)
5070 {
5071         lxpr_node_t *lxpnp = VTOLXP(dp);
5072         lxpr_nodetype_t type = lxpnp->lxpr_type;
5073         ssize_t uresid;
5074         off_t uoffset;
5075         int error;
5076 
5077         ASSERT(dp->v_type == VDIR);
5078         ASSERT(type < LXPR_NFILES);
5079 
5080         /*
5081          * we should never get here because the readdir
5082          * is done on the realvp for these nodes
5083          */
5084         ASSERT(type != LXPR_PID_FD_FD &&
5085             type != LXPR_PID_CURDIR &&
5086             type != LXPR_PID_ROOTDIR);
5087 
5088         /*
5089          * restrict readdir permission to owner or root
5090          */
5091         if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0)
5092                 return (error);
5093 
5094         uoffset = uiop->uio_offset;
5095         uresid = uiop->uio_resid;
5096 
5097         /* can't do negative reads */
5098         if (uoffset < 0 || uresid <= 0)
5099                 return (EINVAL);
5100 
5101         /* can't read directory entries that don't exist! */
5102         if (uoffset % LXPR_SDSIZE)
5103                 return (ENOENT);
5104 
5105         return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
5106 }
5107 
5108 /* ARGSUSED */
5109 static int
5110 lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5111 {
5112         return (ENOTDIR);
5113 }
5114 
5115 /*
5116  * This has the common logic for returning directory entries
5117  */
5118 static int
5119 lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
5120     lxpr_dirent_t *dirtab, int dirtablen)
5121 {
5122         /* bp holds one dirent64 structure */
5123         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5124         dirent64_t *dirent = (dirent64_t *)bp;
5125         ssize_t oresid; /* save a copy for testing later */
5126         ssize_t uresid;
5127 
5128         oresid = uiop->uio_resid;
5129 
5130         /* clear out the dirent buffer */
5131         bzero(bp, sizeof (bp));
5132 
5133         /*
5134          * Satisfy user request
5135          */
5136         while ((uresid = uiop->uio_resid) > 0) {
5137                 int dirindex;
5138                 off_t uoffset;
5139                 int reclen;
5140                 int error;
5141 
5142                 uoffset = uiop->uio_offset;
5143                 dirindex  = (uoffset / LXPR_SDSIZE) - 2;
5144 
5145                 if (uoffset == 0) {
5146 
5147                         dirent->d_ino = lxpnp->lxpr_ino;
5148                         dirent->d_name[0] = '.';
5149                         dirent->d_name[1] = '\0';
5150                         reclen = DIRENT64_RECLEN(1);
5151 
5152                 } else if (uoffset == LXPR_SDSIZE) {
5153 
5154                         dirent->d_ino = lxpr_parentinode(lxpnp);
5155                         dirent->d_name[0] = '.';
5156                         dirent->d_name[1] = '.';
5157                         dirent->d_name[2] = '\0';
5158                         reclen = DIRENT64_RECLEN(2);
5159 
5160                 } else if (dirindex >= 0 && dirindex < dirtablen) {
5161                         int slen = strlen(dirtab[dirindex].d_name);
5162 
5163                         dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
5164                             lxpnp->lxpr_pid, 0);
5165 
5166                         VERIFY(slen < LXPNSIZ);
5167                         (void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
5168                         reclen = DIRENT64_RECLEN(slen);
5169 
5170                 } else {
5171                         /* Run out of table entries */
5172                         if (eofp) {
5173                                 *eofp = 1;
5174                         }
5175                         return (0);
5176                 }
5177 
5178                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5179                 dirent->d_reclen = (ushort_t)reclen;
5180 
5181                 /*
5182                  * if the size of the data to transfer is greater
5183                  * that that requested then we can't do it this transfer.
5184                  */
5185                 if (reclen > uresid) {
5186                         /*
5187                          * Error if no entries have been returned yet.
5188                          */
5189                         if (uresid == oresid) {
5190                                 return (EINVAL);
5191                         }
5192                         break;
5193                 }
5194 
5195                 /*
5196                  * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5197                  * by the same amount.  But we want uiop->uio_offset to change
5198                  * in increments of LXPR_SDSIZE, which is different from the
5199                  * number of bytes being returned to the user.  So we set
5200                  * uiop->uio_offset separately, ignoring what uiomove() does.
5201                  */
5202                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5203                     uiop)) != 0)
5204                         return (error);
5205 
5206                 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5207         }
5208 
5209         /* Have run out of space, but could have just done last table entry */
5210         if (eofp) {
5211                 *eofp =
5212                     (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
5213         }
5214         return (0);
5215 }
5216 
5217 
5218 static int
5219 lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5220 {
5221         /* bp holds one dirent64 structure */
5222         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5223         dirent64_t *dirent = (dirent64_t *)bp;
5224         ssize_t oresid; /* save a copy for testing later */
5225         ssize_t uresid;
5226         off_t uoffset;
5227         zoneid_t zoneid;
5228         pid_t pid;
5229         int error;
5230         int ceof;
5231 
5232         ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
5233 
5234         oresid = uiop->uio_resid;
5235         zoneid = LXPTOZ(lxpnp)->zone_id;
5236 
5237         /*
5238          * We return directory entries in the order: "." and ".." then the
5239          * unique lxproc files, then the directories corresponding to the
5240          * running processes.  We have defined this as the ordering because
5241          * it allows us to more easily keep track of where we are betwen calls
5242          * to getdents().  If the number of processes changes between calls
5243          * then we can't lose track of where we are in the lxproc files.
5244          */
5245 
5246         /* Do the fixed entries */
5247         error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir,
5248             PROCDIRFILES);
5249 
5250         /* Finished if we got an error or if we couldn't do all the table */
5251         if (error != 0 || ceof == 0)
5252                 return (error);
5253 
5254         /* clear out the dirent buffer */
5255         bzero(bp, sizeof (bp));
5256 
5257         /* Do the process entries */
5258         while ((uresid = uiop->uio_resid) > 0) {
5259                 proc_t *p;
5260                 int len;
5261                 int reclen;
5262                 int i;
5263 
5264                 uoffset = uiop->uio_offset;
5265 
5266                 /*
5267                  * Stop when entire proc table has been examined.
5268                  */
5269                 i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
5270                 if (i < 0 || i >= v.v_proc) {
5271                         /* Run out of table entries */
5272                         if (eofp) {
5273                                 *eofp = 1;
5274                         }
5275                         return (0);
5276                 }
5277                 mutex_enter(&pidlock);
5278 
5279                 /*
5280                  * Skip indices for which there is no pid_entry, PIDs for
5281                  * which there is no corresponding process, a PID of 0,
5282                  * and anything the security policy doesn't allow
5283                  * us to look at.
5284                  */
5285                 if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
5286                     p->p_pid == 0 ||
5287                     secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
5288                         mutex_exit(&pidlock);
5289                         goto next;
5290                 }
5291                 mutex_exit(&pidlock);
5292 
5293                 /*
5294                  * Convert pid to the Linux default of 1 if we're the zone's
5295                  * init process, or 0 if zsched, otherwise use the value from
5296                  * the proc structure
5297                  */
5298                 if (p->p_pid == curproc->p_zone->zone_proc_initpid) {
5299                         pid = 1;
5300                 } else if (p->p_pid == curproc->p_zone->zone_zsched->p_pid) {
5301                         pid = 0;
5302                 } else {
5303                         pid = p->p_pid;
5304                 }
5305 
5306                 /*
5307                  * If this /proc was mounted in the global zone, view
5308                  * all procs; otherwise, only view zone member procs.
5309                  */
5310                 if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
5311                         goto next;
5312                 }
5313 
5314                 ASSERT(p->p_stat != 0);
5315 
5316                 dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
5317                 len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
5318                 ASSERT(len < LXPNSIZ);
5319                 reclen = DIRENT64_RECLEN(len);
5320 
5321                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5322                 dirent->d_reclen = (ushort_t)reclen;
5323 
5324                 /*
5325                  * if the size of the data to transfer is greater
5326                  * that that requested then we can't do it this transfer.
5327                  */
5328                 if (reclen > uresid) {
5329                         /*
5330                          * Error if no entries have been returned yet.
5331                          */
5332                         if (uresid == oresid)
5333                                 return (EINVAL);
5334                         break;
5335                 }
5336 
5337                 /*
5338                  * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5339                  * by the same amount.  But we want uiop->uio_offset to change
5340                  * in increments of LXPR_SDSIZE, which is different from the
5341                  * number of bytes being returned to the user.  So we set
5342                  * uiop->uio_offset separately, in the increment of this for
5343                  * the loop, ignoring what uiomove() does.
5344                  */
5345                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5346                     uiop)) != 0)
5347                         return (error);
5348 next:
5349                 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5350         }
5351 
5352         if (eofp != NULL) {
5353                 *eofp = (uiop->uio_offset >=
5354                     ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
5355         }
5356 
5357         return (0);
5358 }
5359 
5360 static int
5361 lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5362 {
5363         proc_t *p;
5364         pid_t find_pid;
5365 
5366         ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
5367 
5368         /* can't read its contents if it died */
5369         mutex_enter(&pidlock);
5370 
5371         if (lxpnp->lxpr_pid == 1) {
5372                 find_pid = curproc->p_zone->zone_proc_initpid;
5373         } else if (lxpnp->lxpr_pid == 0) {
5374                 find_pid = curproc->p_zone->zone_zsched->p_pid;
5375         } else {
5376                 find_pid = lxpnp->lxpr_pid;
5377         }
5378         p = prfind(find_pid);
5379 
5380         if (p == NULL || p->p_stat == SIDL) {
5381                 mutex_exit(&pidlock);
5382                 return (ENOENT);
5383         }
5384         mutex_exit(&pidlock);
5385 
5386         return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
5387 }
5388 
5389 static int
5390 lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5391 {
5392         ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
5393         return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
5394 }
5395 
5396 static int
5397 lxpr_readdir_taskdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5398 {
5399         /* bp holds one dirent64 structure */
5400         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5401         dirent64_t *dirent = (dirent64_t *)bp;
5402         ssize_t oresid; /* save a copy for testing later */
5403         ssize_t uresid;
5404         off_t uoffset;
5405         int error;
5406         int ceof;
5407         proc_t *p;
5408         int tiddirsize = -1;
5409         int tasknum;
5410         pid_t real_pid;
5411         kthread_t *t;
5412         boolean_t branded = B_FALSE;
5413 
5414         ASSERT(lxpnp->lxpr_type == LXPR_PID_TASKDIR);
5415 
5416         oresid = uiop->uio_resid;
5417 
5418         real_pid = get_real_pid(lxpnp->lxpr_pid);
5419         p = lxpr_lock(real_pid);
5420 
5421         /* can't read its contents if it died */
5422         if (p == NULL) {
5423                 return (ENOENT);
5424         }
5425         if (p->p_stat == SIDL) {
5426                 lxpr_unlock(p);
5427                 return (ENOENT);
5428         }
5429 
5430         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
5431                 tiddirsize = 0;
5432 
5433         branded = (p->p_brand == &lx_brand);
5434         /*
5435          * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
5436          * going away while we iterate over its threads.
5437          */
5438         mutex_exit(&p->p_lock);
5439 
5440         if (tiddirsize == -1)
5441                 tiddirsize = p->p_lwpcnt;
5442 
5443         /* Do the fixed entries (in this case just "." & "..") */
5444         error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
5445 
5446         /* Finished if we got an error or if we couldn't do all the table */
5447         if (error != 0 || ceof == 0)
5448                 goto out;
5449 
5450         if ((t = p->p_tlist) == NULL) {
5451                 if (eofp != NULL)
5452                         *eofp = 1;
5453                 goto out;
5454         }
5455 
5456         /* clear out the dirent buffer */
5457         bzero(bp, sizeof (bp));
5458 
5459         /*
5460          * Loop until user's request is satisfied or until all thread's have
5461          * been returned.
5462          */
5463         for (tasknum = 0; (uresid = uiop->uio_resid) > 0; tasknum++) {
5464                 int i;
5465                 int reclen;
5466                 int len;
5467                 uint_t emul_tid;
5468                 lx_lwp_data_t *lwpd;
5469 
5470                 uoffset = uiop->uio_offset;
5471 
5472                 /*
5473                  * Stop at the end of the thread list
5474                  */
5475                 i = (uoffset / LXPR_SDSIZE) - 2;
5476                 if (i < 0 || i >= tiddirsize) {
5477                         if (eofp) {
5478                                 *eofp = 1;
5479                         }
5480                         goto out;
5481                 }
5482 
5483                 if (i != tasknum)
5484                         goto next;
5485 
5486                 if (!branded) {
5487                         /*
5488                          * Emulating the goofy linux task model is impossible
5489                          * to do for native processes.  We can compromise by
5490                          * presenting only the main thread to the consumer.
5491                          */
5492                         emul_tid = p->p_pid;
5493                 } else {
5494                         if ((lwpd = ttolxlwp(t)) == NULL) {
5495                                 goto next;
5496                         }
5497                         emul_tid = lwpd->br_pid;
5498                         /*
5499                          * Convert pid to Linux default of 1 if we're the
5500                          * zone's init.
5501                          */
5502                         if (emul_tid == curproc->p_zone->zone_proc_initpid)
5503                                 emul_tid = 1;
5504                 }
5505 
5506                 dirent->d_ino = lxpr_inode(LXPR_PID_TASK_IDDIR, lxpnp->lxpr_pid,
5507                     emul_tid);
5508                 len = snprintf(dirent->d_name, LXPNSIZ, "%d", emul_tid);
5509                 ASSERT(len < LXPNSIZ);
5510                 reclen = DIRENT64_RECLEN(len);
5511 
5512                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5513                 dirent->d_reclen = (ushort_t)reclen;
5514 
5515                 if (reclen > uresid) {
5516                         /*
5517                          * Error if no entries have been returned yet.
5518                          */
5519                         if (uresid == oresid)
5520                                 error = EINVAL;
5521                         goto out;
5522                 }
5523 
5524                 /*
5525                  * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5526                  * by the same amount.  But we want uiop->uio_offset to change
5527                  * in increments of LXPR_SDSIZE, which is different from the
5528                  * number of bytes being returned to the user.  So we set
5529                  * uiop->uio_offset separately, in the increment of this for
5530                  * the loop, ignoring what uiomove() does.
5531                  */
5532                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5533                     uiop)) != 0)
5534                         goto out;
5535 
5536 next:
5537                 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5538 
5539                 if ((t = t->t_forw) == p->p_tlist || !branded) {
5540                         if (eofp != NULL)
5541                                 *eofp = 1;
5542                         goto out;
5543                 }
5544         }
5545 
5546         if (eofp != NULL)
5547                 *eofp = 0;
5548 
5549 out:
5550         mutex_enter(&p->p_lock);
5551         lxpr_unlock(p);
5552         return (error);
5553 }
5554 
5555 static int
5556 lxpr_readdir_task_tid_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5557 {
5558         proc_t *p;
5559         pid_t real_pid;
5560         kthread_t *t;
5561 
5562         ASSERT(lxpnp->lxpr_type == LXPR_PID_TASK_IDDIR);
5563 
5564         mutex_enter(&pidlock);
5565 
5566         real_pid = get_real_pid(lxpnp->lxpr_pid);
5567         p = prfind(real_pid);
5568 
5569         /* can't read its contents if it died */
5570         if (p == NULL || p->p_stat == SIDL) {
5571                 mutex_exit(&pidlock);
5572                 return (ENOENT);
5573         }
5574 
5575         mutex_exit(&pidlock);
5576 
5577         /* need to confirm tid is still there */
5578         t = lxpr_get_thread(p, lxpnp->lxpr_desc);
5579         if (t == NULL) {
5580                 /* we can't find this specific thread */
5581                 return (NULL);
5582         }
5583         thread_unlock(t);
5584 
5585         return (lxpr_readdir_common(lxpnp, uiop, eofp, tiddir, TIDDIRFILES));
5586 }
5587 
5588 static int
5589 lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5590 {
5591         /* bp holds one dirent64 structure */
5592         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5593         dirent64_t *dirent = (dirent64_t *)bp;
5594         ssize_t oresid; /* save a copy for testing later */
5595         ssize_t uresid;
5596         off_t uoffset;
5597         int error;
5598         int ceof;
5599         proc_t *p;
5600         int fddirsize = -1;
5601         uf_info_t *fip;
5602 
5603         ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR ||
5604             lxpnp->lxpr_type == LXPR_PID_TID_FDDIR);
5605 
5606         oresid = uiop->uio_resid;
5607 
5608         /* can't read its contents if it died */
5609         p = lxpr_lock(lxpnp->lxpr_pid);
5610         if (p == NULL)
5611                 return (ENOENT);
5612 
5613         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
5614                 fddirsize = 0;
5615 
5616         /*
5617          * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
5618          * going away while we iterate over its fi_list.
5619          */
5620         mutex_exit(&p->p_lock);
5621 
5622         /* Get open file info */
5623         fip = (&(p)->p_user.u_finfo);
5624         mutex_enter(&fip->fi_lock);
5625 
5626         if (fddirsize == -1)
5627                 fddirsize = fip->fi_nfiles;
5628 
5629         /* Do the fixed entries (in this case just "." & "..") */
5630         error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
5631 
5632         /* Finished if we got an error or if we couldn't do all the table */
5633         if (error != 0 || ceof == 0)
5634                 goto out;
5635 
5636         /* clear out the dirent buffer */
5637         bzero(bp, sizeof (bp));
5638 
5639         /*
5640          * Loop until user's request is satisfied or until
5641          * all file descriptors have been examined.
5642          */
5643         for (; (uresid = uiop->uio_resid) > 0;
5644             uiop->uio_offset = uoffset + LXPR_SDSIZE) {
5645                 int reclen;
5646                 int fd;
5647                 int len;
5648 
5649                 uoffset = uiop->uio_offset;
5650 
5651                 /*
5652                  * Stop at the end of the fd list
5653                  */
5654                 fd = (uoffset / LXPR_SDSIZE) - 2;
5655                 if (fd < 0 || fd >= fddirsize) {
5656                         if (eofp) {
5657                                 *eofp = 1;
5658                         }
5659                         goto out;
5660                 }
5661 
5662                 if (fip->fi_list[fd].uf_file == NULL)
5663                         continue;
5664 
5665                 dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
5666                 len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
5667                 ASSERT(len < LXPNSIZ);
5668                 reclen = DIRENT64_RECLEN(len);
5669 
5670                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5671                 dirent->d_reclen = (ushort_t)reclen;
5672 
5673                 if (reclen > uresid) {
5674                         /*
5675                          * Error if no entries have been returned yet.
5676                          */
5677                         if (uresid == oresid)
5678                                 error = EINVAL;
5679                         goto out;
5680                 }
5681 
5682                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5683                     uiop)) != 0)
5684                         goto out;
5685         }
5686 
5687         if (eofp != NULL) {
5688                 *eofp =
5689                     (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
5690         }
5691 
5692 out:
5693         mutex_exit(&fip->fi_lock);
5694         mutex_enter(&p->p_lock);
5695         lxpr_unlock(p);
5696         return (error);
5697 }
5698 
5699 static int
5700 lxpr_readdir_sysdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5701 {
5702         ASSERT(lxpnp->lxpr_type == LXPR_SYSDIR);
5703         return (lxpr_readdir_common(lxpnp, uiop, eofp, sysdir, SYSDIRFILES));
5704 }
5705 
5706 static int
5707 lxpr_readdir_sys_fsdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5708 {
5709         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FSDIR);
5710         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fsdir,
5711             SYS_FSDIRFILES));
5712 }
5713 
5714 static int
5715 lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5716 {
5717         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
5718         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fs_inotifydir,
5719             SYS_FS_INOTIFYDIRFILES));
5720 }
5721 
5722 static int
5723 lxpr_readdir_sys_kerneldir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5724 {
5725         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNELDIR);
5726         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_kerneldir,
5727             SYS_KERNELDIRFILES));
5728 }
5729 
5730 static int
5731 lxpr_readdir_sys_kdir_randdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5732 {
5733         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RANDDIR);
5734         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_randdir,
5735             SYS_RANDDIRFILES));
5736 }
5737 
5738 static int
5739 lxpr_readdir_sys_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5740 {
5741         ASSERT(lxpnp->lxpr_type == LXPR_SYS_NETDIR);
5742         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_netdir,
5743             SYS_NETDIRFILES));
5744 }
5745 
5746 static int
5747 lxpr_readdir_sys_net_coredir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5748 {
5749         ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_COREDIR);
5750         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_net_coredir,
5751             SYS_NET_COREDIRFILES));
5752 }
5753 
5754 static int
5755 lxpr_readdir_sys_vmdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5756 {
5757         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VMDIR);
5758         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_vmdir,
5759             SYS_VMDIRFILES));
5760 }
5761 
5762 static int
5763 lxpr_write_sys_net_core_somaxc(lxpr_node_t *lxpnp, struct uio *uio,
5764     struct cred *cr, caller_context_t *ct)
5765 {
5766         int error;
5767         int res = 0;
5768         size_t olen;
5769         char val[16];   /* big enough for a uint numeric string */
5770         netstack_t *ns;
5771         mod_prop_info_t *ptbl = NULL;
5772         mod_prop_info_t *pinfo = NULL;
5773 
5774         ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON);
5775 
5776         if (uio->uio_loffset != 0)
5777                 return (EINVAL);
5778 
5779         if (uio->uio_resid == 0)
5780                 return (0);
5781 
5782         olen = uio->uio_resid;
5783         if (olen > sizeof (val) - 1)
5784                 return (EINVAL);
5785 
5786         bzero(val, sizeof (val));
5787         error = uiomove(val, olen, UIO_WRITE, uio);
5788         if (error != 0)
5789                 return (error);
5790 
5791         if (val[olen - 1] == '\n')
5792                 val[olen - 1] = '\0';
5793 
5794         if (val[0] == '\0') /* no input */
5795                 return (EINVAL);
5796 
5797         ns = netstack_get_current();
5798         if (ns == NULL)
5799                 return (EINVAL);
5800 
5801         ptbl = ns->netstack_tcp->tcps_propinfo_tbl;
5802         pinfo = mod_prop_lookup(ptbl, "_conn_req_max_q", MOD_PROTO_TCP);
5803         if (pinfo == NULL || pinfo->mpi_setf(ns, cr, pinfo, NULL, val, 0) != 0)
5804                 res = EINVAL;
5805 
5806         netstack_rele(ns);
5807         return (res);
5808 }
5809 
5810 /* ARGSUSED */
5811 static int
5812 lxpr_write_sys_kernel_corepatt(lxpr_node_t *lxpnp, struct uio *uio,
5813     struct cred *cr, caller_context_t *ct)
5814 {
5815         zone_t *zone = curproc->p_zone;
5816         struct core_globals *cg;
5817         refstr_t *rp, *nrp;
5818         corectl_path_t *ccp;
5819         char val[MAXPATHLEN];
5820         char valtr[MAXPATHLEN];
5821         size_t olen;
5822         int error;
5823 
5824         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT);
5825 
5826         cg = zone_getspecific(core_zone_key, zone);
5827         ASSERT(cg != NULL);
5828 
5829         if (secpolicy_coreadm(cr) != 0)
5830                 return (EPERM);
5831 
5832         if (uio->uio_loffset != 0)
5833                 return (EINVAL);
5834 
5835         if (uio->uio_resid == 0)
5836                 return (0);
5837 
5838         olen = uio->uio_resid;
5839         if (olen > sizeof (val) - 1)
5840                 return (EINVAL);
5841 
5842         bzero(val, sizeof (val));
5843         error = uiomove(val, olen, UIO_WRITE, uio);
5844         if (error != 0)
5845                 return (error);
5846 
5847         if (val[olen - 1] == '\n')
5848                 val[olen - 1] = '\0';
5849 
5850         if (val[0] == '|')
5851                 return (EINVAL);
5852 
5853         if ((error = lxpr_core_path_l2s(val, valtr, sizeof (valtr))) != 0)
5854                 return (error);
5855 
5856         nrp = refstr_alloc(valtr);
5857 
5858         ccp = cg->core_default_path;
5859         mutex_enter(&ccp->ccp_mtx);
5860         rp = ccp->ccp_path;
5861         refstr_hold((ccp->ccp_path = nrp));
5862         cg->core_options |= CC_PROCESS_PATH;
5863         mutex_exit(&ccp->ccp_mtx);
5864 
5865         if (rp != NULL)
5866                 refstr_rele(rp);
5867 
5868         return (0);
5869 }
5870 
5871 /*
5872  * lxpr_readlink(): Vnode operation for VOP_READLINK()
5873  */
5874 /* ARGSUSED */
5875 static int
5876 lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
5877 {
5878         char bp[MAXPATHLEN + 1];
5879         size_t buflen = sizeof (bp);
5880         lxpr_node_t *lxpnp = VTOLXP(vp);
5881         vnode_t *rvp = lxpnp->lxpr_realvp;
5882         pid_t pid;
5883         int error = 0;
5884 
5885         /*
5886          * Linux does something very "clever" for /proc/<pid>/fd/<num> entries.
5887          * Open FDs are represented as symlinks, the link contents
5888          * corresponding to the open resource.  For plain files or devices,
5889          * this isn't absurd since one can dereference the symlink to query
5890          * the underlying resource.  For sockets or pipes, it becomes ugly in a
5891          * hurry.  To maintain this human-readable output, those FD symlinks
5892          * point to bogus targets such as "socket:[<inodenum>]".  This requires
5893          * circumventing vfs since the stat/lstat behavior on those FD entries
5894          * will be unusual. (A stat must retrieve information about the open
5895          * socket or pipe.  It cannot fail because the link contents point to
5896          * an absent file.)
5897          *
5898          * To accomplish this, lxpr_getnode returns an vnode typed VNON for FD
5899          * entries.  This bypasses code paths which would normally
5900          * short-circuit on symlinks and allows us to emulate the vfs behavior
5901          * expected by /proc consumers.
5902          */
5903         if (vp->v_type != VLNK && lxpnp->lxpr_type != LXPR_PID_FD_FD)
5904                 return (EINVAL);
5905 
5906         /* Try to produce a symlink name for anything that has a realvp */
5907         if (rvp != NULL) {
5908                 if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0)
5909                         return (error);
5910                 if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) {
5911                         /*
5912                          * Special handling possible for /proc/<pid>/fd/<num>
5913                          * Generate <type>:[<inode>] links, if allowed.
5914                          */
5915                         if (lxpnp->lxpr_type != LXPR_PID_FD_FD ||
5916                             lxpr_readlink_fdnode(lxpnp, bp, buflen) != 0) {
5917                                 return (error);
5918                         }
5919                 }
5920         } else {
5921                 switch (lxpnp->lxpr_type) {
5922                 case LXPR_SELF:
5923                         /*
5924                          * Convert pid to the Linux default of 1 if we're the
5925                          * zone's init process or 0 if zsched.
5926                          */
5927                         if (curproc->p_pid ==
5928                             curproc->p_zone->zone_proc_initpid) {
5929                                 pid = 1;
5930                         } else if (curproc->p_pid ==
5931                             curproc->p_zone->zone_zsched->p_pid) {
5932                                 pid = 0;
5933                         } else {
5934                                 pid = curproc->p_pid;
5935                         }
5936 
5937                         /*
5938                          * Don't need to check result as every possible int
5939                          * will fit within MAXPATHLEN bytes.
5940                          */
5941                         (void) snprintf(bp, buflen, "%d", pid);
5942                         break;
5943                 case LXPR_PID_CURDIR:
5944                 case LXPR_PID_ROOTDIR:
5945                 case LXPR_PID_EXE:
5946                         return (EACCES);
5947                 default:
5948                         /*
5949                          * Need to return error so that nothing thinks
5950                          * that the symlink is empty and hence "."
5951                          */
5952                         return (EINVAL);
5953                 }
5954         }
5955 
5956         /* copy the link data to user space */
5957         return (uiomove(bp, strlen(bp), UIO_READ, uiop));
5958 }
5959 
5960 
5961 /*
5962  * lxpr_inactive(): Vnode operation for VOP_INACTIVE()
5963  * Vnode is no longer referenced, deallocate the file
5964  * and all its resources.
5965  */
5966 /* ARGSUSED */
5967 static void
5968 lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
5969 {
5970         lxpr_freenode(VTOLXP(vp));
5971 }
5972 
5973 /*
5974  * lxpr_sync(): Vnode operation for VOP_SYNC()
5975  */
5976 static int
5977 lxpr_sync()
5978 {
5979         /*
5980          * Nothing to sync but this function must never fail
5981          */
5982         return (0);
5983 }
5984 
5985 /*
5986  * lxpr_cmp(): Vnode operation for VOP_CMP()
5987  */
5988 static int
5989 lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
5990 {
5991         vnode_t *rvp;
5992 
5993         while (vn_matchops(vp1, lxpr_vnodeops) &&
5994             (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) {
5995                 vp1 = rvp;
5996         }
5997 
5998         while (vn_matchops(vp2, lxpr_vnodeops) &&
5999             (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) {
6000                 vp2 = rvp;
6001         }
6002 
6003         if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
6004                 return (vp1 == vp2);
6005         return (VOP_CMP(vp1, vp2, ct));
6006 }
6007 
6008 /*
6009  * lxpr_realvp(): Vnode operation for VOP_REALVP()
6010  */
6011 static int
6012 lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
6013 {
6014         vnode_t *rvp;
6015 
6016         if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
6017                 vp = rvp;
6018                 if (VOP_REALVP(vp, &rvp, ct) == 0)
6019                         vp = rvp;
6020         }
6021 
6022         *vpp = vp;
6023         return (0);
6024 }
6025 
6026 static int
6027 lxpr_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
6028     caller_context_t *ct)
6029 {
6030         lxpr_node_t     *lxpnp = VTOLXP(vp);
6031         lxpr_nodetype_t type = lxpnp->lxpr_type;
6032 
6033         switch (type) {
6034         case LXPR_SYS_KERNEL_COREPATT:
6035                 return (lxpr_write_sys_kernel_corepatt(lxpnp, uiop, cr, ct));
6036         case LXPR_SYS_NET_CORE_SOMAXCON:
6037                 return (lxpr_write_sys_net_core_somaxc(lxpnp, uiop, cr, ct));
6038 
6039         default:
6040                 /* pretend we wrote the whole thing */
6041                 uiop->uio_offset += uiop->uio_resid;
6042                 uiop->uio_resid = 0;
6043                 return (0);
6044         }
6045 }
6046 
6047 /*
6048  * We need to allow open with O_CREAT for the oom_score_adj file.
6049  */
6050 /*ARGSUSED7*/
6051 static int
6052 lxpr_create(struct vnode *dvp, char *nm, struct vattr *vap,
6053     enum vcexcl exclusive, int mode, struct vnode **vpp, struct cred *cred,
6054     int flag, caller_context_t *ct, vsecattr_t *vsecp)
6055 {
6056         lxpr_node_t *lxpnp = VTOLXP(dvp);
6057         lxpr_nodetype_t type = lxpnp->lxpr_type;
6058         vnode_t *vp = NULL;
6059         int error;
6060 
6061         ASSERT(type < LXPR_NFILES);
6062 
6063         /*
6064          * restrict create permission to owner or root
6065          */
6066         if ((error = lxpr_access(dvp, VEXEC, 0, cred, ct)) != 0) {
6067                 return (error);
6068         }
6069 
6070         if (*nm == '\0')
6071                 return (EPERM);
6072 
6073         if (dvp->v_type != VDIR)
6074                 return (EPERM);
6075 
6076         if (exclusive == EXCL)
6077                 return (EEXIST);
6078 
6079         /*
6080          * We're currently restricting O_CREAT to:
6081          * - /proc/<pid>/fd/<num>
6082          * - /proc/<pid>/oom_score_adj
6083          * - /proc/<pid>/task/<tid>/fd/<num>
6084          * - /proc/<pid>/task/<tid>/oom_score_adj
6085          * - /proc/sys/kernel/core_pattern
6086          * - /proc/sys/net/core/somaxconn
6087          * - /proc/sys/vm/overcommit_memory
6088          * - /proc/sys/vm/swappiness
6089          */
6090         switch (type) {
6091         case LXPR_PIDDIR:
6092         case LXPR_PID_TASK_IDDIR:
6093                 if (strcmp(nm, "oom_score_adj") == 0) {
6094                         proc_t *p;
6095                         p = lxpr_lock(lxpnp->lxpr_pid);
6096                         if (p != NULL) {
6097                                 vp = lxpr_lookup_common(dvp, nm, p, piddir,
6098                                     PIDDIRFILES);
6099                         }
6100                         lxpr_unlock(p);
6101                 }
6102                 break;
6103 
6104         case LXPR_SYS_NET_COREDIR:
6105                 if (strcmp(nm, "somaxconn") == 0) {
6106                         vp = lxpr_lookup_common(dvp, nm, NULL, sys_net_coredir,
6107                             SYS_NET_COREDIRFILES);
6108                 }
6109                 break;
6110 
6111         case LXPR_SYS_KERNELDIR:
6112                 if (strcmp(nm, "core_pattern") == 0) {
6113                         vp = lxpr_lookup_common(dvp, nm, NULL, sys_kerneldir,
6114                             SYS_KERNELDIRFILES);
6115                 }
6116                 break;
6117 
6118         case LXPR_SYS_VMDIR:
6119                 if (strcmp(nm, "overcommit_memory") == 0 ||
6120                     strcmp(nm, "swappiness") == 0) {
6121                         vp = lxpr_lookup_common(dvp, nm, NULL, sys_vmdir,
6122                             SYS_VMDIRFILES);
6123                 }
6124                 break;
6125 
6126         case LXPR_PID_FDDIR:
6127         case LXPR_PID_TID_FDDIR:
6128                 vp = lxpr_lookup_fdnode(dvp, nm);
6129                 break;
6130 
6131         default:
6132                 vp = NULL;
6133                 break;
6134         }
6135 
6136         if (vp != NULL) {
6137                 /* Creating an existing file, allow it for regular files. */
6138                 if (vp->v_type == VDIR)
6139                         return (EISDIR);
6140 
6141                 /* confirm permissions against existing file */
6142                 if ((error = lxpr_access(vp, mode, 0, cred, ct)) != 0) {
6143                         VN_RELE(vp);
6144                         return (error);
6145                 }
6146 
6147                 *vpp = vp;
6148                 return (0);
6149         }
6150 
6151         /*
6152          * Linux proc does not allow creation of addition, non-subsystem
6153          * specific files inside the hierarchy.  ENOENT is tossed when such
6154          * actions are attempted.
6155          */
6156         return (ENOENT);
6157 }