1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2016 Joyent, Inc.
  25  */
  26 
  27 /*
  28  * lx_proc -- a Linux-compatible /proc for the LX brand
  29  *
  30  * We have -- confusingly -- two implementations of Linux /proc.  One is to
  31  * support native (but Linux-borne) programs that wish to view the native
  32  * system through the Linux /proc model; the other -- this one -- is to
  33  * support Linux binaries via the LX brand.  These two implementations differ
  34  * greatly in their aspirations (and their willingness to bend the truth
  35  * of the system to accommodate those aspirations); they should not be unified.
  36  */
  37 
  38 #include <sys/cpupart.h>
  39 #include <sys/cpuvar.h>
  40 #include <sys/session.h>
  41 #include <sys/vmparam.h>
  42 #include <sys/mman.h>
  43 #include <vm/rm.h>
  44 #include <vm/seg_vn.h>
  45 #include <sys/sdt.h>
  46 #include <lx_signum.h>
  47 #include <sys/strlog.h>
  48 #include <sys/stropts.h>
  49 #include <sys/cmn_err.h>
  50 #include <sys/lx_brand.h>
  51 #include <lx_auxv.h>
  52 #include <sys/x86_archext.h>
  53 #include <sys/archsystm.h>
  54 #include <sys/fp.h>
  55 #include <sys/pool_pset.h>
  56 #include <sys/pset.h>
  57 #include <sys/zone.h>
  58 #include <sys/pghw.h>
  59 #include <sys/vfs_opreg.h>
  60 #include <sys/param.h>
  61 #include <sys/utsname.h>
  62 #include <sys/rctl.h>
  63 #include <sys/kstat.h>
  64 #include <sys/lx_misc.h>
  65 #include <sys/brand.h>
  66 #include <sys/cred_impl.h>
  67 #include <sys/tihdr.h>
  68 #include <sys/corectl.h>
  69 #include <inet/ip.h>
  70 #include <inet/ip_ire.h>
  71 #include <inet/ip6.h>
  72 #include <inet/ip_if.h>
  73 #include <inet/tcp.h>
  74 #include <inet/tcp_impl.h>
  75 #include <inet/udp_impl.h>
  76 #include <inet/ipclassifier.h>
  77 #include <sys/socketvar.h>
  78 #include <fs/sockfs/socktpi.h>
  79 
  80 /* Dependent on procfs */
  81 extern kthread_t *prchoose(proc_t *);
  82 extern int prreadargv(proc_t *, char *, size_t, size_t *);
  83 extern int prreadenvv(proc_t *, char *, size_t, size_t *);
  84 extern int prreadbuf(proc_t *, uintptr_t, uint8_t *, size_t, size_t *);
  85 
  86 #include "lx_proc.h"
  87 
  88 extern pgcnt_t swapfs_minfree;
  89 extern time_t boot_time;
  90 
  91 /*
  92  * Pointer to the vnode ops vector for this fs.
  93  * This is instantiated in lxprinit() in lxpr_vfsops.c
  94  */
  95 vnodeops_t *lxpr_vnodeops;
  96 
  97 static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *);
  98 static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *,
  99     caller_context_t *);
 100 static int lxpr_create(struct vnode *, char *, struct vattr *, enum vcexcl,
 101     int, struct vnode **, struct cred *, int, caller_context_t *, vsecattr_t *);
 102 static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
 103 static int lxpr_write(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
 104 static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *,
 105     caller_context_t *);
 106 static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *);
 107 static int lxpr_lookup(vnode_t *, char *, vnode_t **,
 108     pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *,
 109     pathname_t *);
 110 static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *,
 111     caller_context_t *, int);
 112 static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
 113 static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *);
 114 static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *);
 115 static int lxpr_sync(void);
 116 static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *);
 117 
 118 static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
 119 static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
 120 static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
 121 static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
 122 static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
 123 static vnode_t *lxpr_lookup_sysdir(vnode_t *, char *);
 124 static vnode_t *lxpr_lookup_sys_fsdir(vnode_t *, char *);
 125 static vnode_t *lxpr_lookup_sys_fs_inotifydir(vnode_t *, char *);
 126 static vnode_t *lxpr_lookup_sys_kerneldir(vnode_t *, char *);
 127 static vnode_t *lxpr_lookup_sys_kdir_randdir(vnode_t *, char *);
 128 static vnode_t *lxpr_lookup_sys_netdir(vnode_t *, char *);
 129 static vnode_t *lxpr_lookup_sys_net_coredir(vnode_t *, char *);
 130 static vnode_t *lxpr_lookup_sys_vmdir(vnode_t *, char *);
 131 static vnode_t *lxpr_lookup_taskdir(vnode_t *, char *);
 132 static vnode_t *lxpr_lookup_task_tid_dir(vnode_t *, char *);
 133 
 134 static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
 135 static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
 136 static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
 137 static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
 138 static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
 139 static int lxpr_readdir_sysdir(lxpr_node_t *, uio_t *, int *);
 140 static int lxpr_readdir_sys_fsdir(lxpr_node_t *, uio_t *, int *);
 141 static int lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *, uio_t *, int *);
 142 static int lxpr_readdir_sys_kerneldir(lxpr_node_t *, uio_t *, int *);
 143 static int lxpr_readdir_sys_kdir_randdir(lxpr_node_t *, uio_t *, int *);
 144 static int lxpr_readdir_sys_netdir(lxpr_node_t *, uio_t *, int *);
 145 static int lxpr_readdir_sys_net_coredir(lxpr_node_t *, uio_t *, int *);
 146 static int lxpr_readdir_sys_vmdir(lxpr_node_t *, uio_t *, int *);
 147 static int lxpr_readdir_taskdir(lxpr_node_t *, uio_t *, int *);
 148 static int lxpr_readdir_task_tid_dir(lxpr_node_t *, uio_t *, int *);
 149 
 150 static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
 151 static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
 152 static void lxpr_read_cgroups(lxpr_node_t *, lxpr_uiobuf_t *);
 153 static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
 154 static void lxpr_read_diskstats(lxpr_node_t *, lxpr_uiobuf_t *);
 155 static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
 156 static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
 157 static void lxpr_read_filesystems(lxpr_node_t *, lxpr_uiobuf_t *);
 158 static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *, ldi_handle_t);
 159 static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
 160 static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
 161 static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
 162 static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
 163 static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 164 static void lxpr_read_swaps(lxpr_node_t *, lxpr_uiobuf_t *);
 165 static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
 166 static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
 167 
 168 static void lxpr_read_pid_auxv(lxpr_node_t *, lxpr_uiobuf_t *);
 169 static void lxpr_read_pid_cgroup(lxpr_node_t *, lxpr_uiobuf_t *);
 170 static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
 171 static void lxpr_read_pid_comm(lxpr_node_t *, lxpr_uiobuf_t *);
 172 static void lxpr_read_pid_env(lxpr_node_t *, lxpr_uiobuf_t *);
 173 static void lxpr_read_pid_limits(lxpr_node_t *, lxpr_uiobuf_t *);
 174 static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
 175 static void lxpr_read_pid_mountinfo(lxpr_node_t *, lxpr_uiobuf_t *);
 176 static void lxpr_read_pid_oom_scr_adj(lxpr_node_t *, lxpr_uiobuf_t *);
 177 static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 178 static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
 179 static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
 180 
 181 static void lxpr_read_pid_tid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 182 static void lxpr_read_pid_tid_status(lxpr_node_t *, lxpr_uiobuf_t *);
 183 
 184 static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
 185 static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
 186 static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
 187 static void lxpr_read_net_if_inet6(lxpr_node_t *, lxpr_uiobuf_t *);
 188 static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
 189 static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
 190 static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
 191 static void lxpr_read_net_ipv6_route(lxpr_node_t *, lxpr_uiobuf_t *);
 192 static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
 193 static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
 194 static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
 195 static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
 196 static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
 197 static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
 198 static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
 199 static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
 200 static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 201 static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
 202 static void lxpr_read_net_tcp6(lxpr_node_t *, lxpr_uiobuf_t *);
 203 static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
 204 static void lxpr_read_net_udp6(lxpr_node_t *, lxpr_uiobuf_t *);
 205 static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
 206 static void lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *,
 207     lxpr_uiobuf_t *);
 208 static void lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *,
 209     lxpr_uiobuf_t *);
 210 static void lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *,
 211     lxpr_uiobuf_t *);
 212 static void lxpr_read_sys_kernel_caplcap(lxpr_node_t *, lxpr_uiobuf_t *);
 213 static void lxpr_read_sys_kernel_corepatt(lxpr_node_t *, lxpr_uiobuf_t *);
 214 static void lxpr_read_sys_kernel_hostname(lxpr_node_t *, lxpr_uiobuf_t *);
 215 static void lxpr_read_sys_kernel_msgmni(lxpr_node_t *, lxpr_uiobuf_t *);
 216 static void lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *, lxpr_uiobuf_t *);
 217 static void lxpr_read_sys_kernel_osrel(lxpr_node_t *, lxpr_uiobuf_t *);
 218 static void lxpr_read_sys_kernel_pid_max(lxpr_node_t *, lxpr_uiobuf_t *);
 219 static void lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *, lxpr_uiobuf_t *);
 220 static void lxpr_read_sys_kernel_shmmax(lxpr_node_t *, lxpr_uiobuf_t *);
 221 static void lxpr_read_sys_kernel_threads_max(lxpr_node_t *, lxpr_uiobuf_t *);
 222 static void lxpr_read_sys_net_core_somaxc(lxpr_node_t *, lxpr_uiobuf_t *);
 223 static void lxpr_read_sys_vm_minfr_kb(lxpr_node_t *, lxpr_uiobuf_t *);
 224 static void lxpr_read_sys_vm_nhpages(lxpr_node_t *, lxpr_uiobuf_t *);
 225 static void lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *, lxpr_uiobuf_t *);
 226 static void lxpr_read_sys_vm_swappiness(lxpr_node_t *, lxpr_uiobuf_t *);
 227 
 228 static int lxpr_write_sys_net_core_somaxc(lxpr_node_t *, uio_t *, cred_t *,
 229     caller_context_t *);
 230 static int lxpr_write_sys_kernel_corepatt(lxpr_node_t *, uio_t *, cred_t *,
 231     caller_context_t *);
 232 
 233 /*
 234  * Simple conversion
 235  */
 236 #define btok(x) ((x) >> 10)                       /* bytes to kbytes */
 237 #define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */
 238 
 239 #define ttolxlwp(t)     ((struct lx_lwp_data *)ttolwpbrand(t))
 240 
 241 extern rctl_hndl_t rc_zone_msgmni;
 242 extern rctl_hndl_t rc_zone_shmmax;
 243 #define FOURGB  4294967295
 244 
 245 /*
 246  * The maximum length of the concatenation of argument vector strings we
 247  * will return to the user via the branded procfs. Likewise for the env vector.
 248  */
 249 int lxpr_maxargvlen = 4096;
 250 int lxpr_maxenvvlen = 4096;
 251 
 252 /*
 253  * The lx /proc vnode operations vector
 254  */
 255 const fs_operation_def_t lxpr_vnodeops_template[] = {
 256         VOPNAME_OPEN,           { .vop_open = lxpr_open },
 257         VOPNAME_CLOSE,          { .vop_close = lxpr_close },
 258         VOPNAME_READ,           { .vop_read = lxpr_read },
 259         VOPNAME_WRITE,          { .vop_read = lxpr_write },
 260         VOPNAME_GETATTR,        { .vop_getattr = lxpr_getattr },
 261         VOPNAME_ACCESS,         { .vop_access = lxpr_access },
 262         VOPNAME_LOOKUP,         { .vop_lookup = lxpr_lookup },
 263         VOPNAME_CREATE,         { .vop_create = lxpr_create },
 264         VOPNAME_READDIR,        { .vop_readdir = lxpr_readdir },
 265         VOPNAME_READLINK,       { .vop_readlink = lxpr_readlink },
 266         VOPNAME_FSYNC,          { .error = lxpr_sync },
 267         VOPNAME_SEEK,           { .error = lxpr_sync },
 268         VOPNAME_INACTIVE,       { .vop_inactive = lxpr_inactive },
 269         VOPNAME_CMP,            { .vop_cmp = lxpr_cmp },
 270         VOPNAME_REALVP,         { .vop_realvp = lxpr_realvp },
 271         NULL,                   NULL
 272 };
 273 
 274 
 275 /*
 276  * file contents of an lx /proc directory.
 277  */
 278 static lxpr_dirent_t lx_procdir[] = {
 279         { LXPR_CGROUPS,         "cgroups" },
 280         { LXPR_CMDLINE,         "cmdline" },
 281         { LXPR_CPUINFO,         "cpuinfo" },
 282         { LXPR_DEVICES,         "devices" },
 283         { LXPR_DISKSTATS,       "diskstats" },
 284         { LXPR_DMA,             "dma" },
 285         { LXPR_FILESYSTEMS,     "filesystems" },
 286         { LXPR_INTERRUPTS,      "interrupts" },
 287         { LXPR_IOPORTS,         "ioports" },
 288         { LXPR_KCORE,           "kcore" },
 289         { LXPR_KMSG,            "kmsg" },
 290         { LXPR_LOADAVG,         "loadavg" },
 291         { LXPR_MEMINFO,         "meminfo" },
 292         { LXPR_MODULES,         "modules" },
 293         { LXPR_MOUNTS,          "mounts" },
 294         { LXPR_NETDIR,          "net" },
 295         { LXPR_PARTITIONS,      "partitions" },
 296         { LXPR_SELF,            "self" },
 297         { LXPR_STAT,            "stat" },
 298         { LXPR_SWAPS,           "swaps" },
 299         { LXPR_SYSDIR,          "sys" },
 300         { LXPR_UPTIME,          "uptime" },
 301         { LXPR_VERSION,         "version" }
 302 };
 303 
 304 #define PROCDIRFILES    (sizeof (lx_procdir) / sizeof (lx_procdir[0]))
 305 
 306 /*
 307  * Contents of an lx /proc/<pid> directory.
 308  */
 309 static lxpr_dirent_t piddir[] = {
 310         { LXPR_PID_AUXV,        "auxv" },
 311         { LXPR_PID_CGROUP,      "cgroup" },
 312         { LXPR_PID_CMDLINE,     "cmdline" },
 313         { LXPR_PID_COMM,        "comm" },
 314         { LXPR_PID_CPU,         "cpu" },
 315         { LXPR_PID_CURDIR,      "cwd" },
 316         { LXPR_PID_ENV,         "environ" },
 317         { LXPR_PID_EXE,         "exe" },
 318         { LXPR_PID_LIMITS,      "limits" },
 319         { LXPR_PID_MAPS,        "maps" },
 320         { LXPR_PID_MEM,         "mem" },
 321         { LXPR_PID_MOUNTINFO,   "mountinfo" },
 322         { LXPR_PID_OOM_SCR_ADJ, "oom_score_adj" },
 323         { LXPR_PID_ROOTDIR,     "root" },
 324         { LXPR_PID_STAT,        "stat" },
 325         { LXPR_PID_STATM,       "statm" },
 326         { LXPR_PID_STATUS,      "status" },
 327         { LXPR_PID_TASKDIR,     "task" },
 328         { LXPR_PID_FDDIR,       "fd" }
 329 };
 330 
 331 #define PIDDIRFILES     (sizeof (piddir) / sizeof (piddir[0]))
 332 
 333 /*
 334  * Contents of an lx /proc/<pid>/task/<tid> directory.
 335  */
 336 static lxpr_dirent_t tiddir[] = {
 337         { LXPR_PID_TID_AUXV,    "auxv" },
 338         { LXPR_PID_CGROUP,      "cgroup" },
 339         { LXPR_PID_CMDLINE,     "cmdline" },
 340         { LXPR_PID_TID_COMM,    "comm" },
 341         { LXPR_PID_CPU,         "cpu" },
 342         { LXPR_PID_CURDIR,      "cwd" },
 343         { LXPR_PID_ENV,         "environ" },
 344         { LXPR_PID_EXE,         "exe" },
 345         { LXPR_PID_LIMITS,      "limits" },
 346         { LXPR_PID_MAPS,        "maps" },
 347         { LXPR_PID_MEM,         "mem" },
 348         { LXPR_PID_MOUNTINFO,   "mountinfo" },
 349         { LXPR_PID_TID_OOM_SCR_ADJ,     "oom_score_adj" },
 350         { LXPR_PID_ROOTDIR,     "root" },
 351         { LXPR_PID_TID_STAT,    "stat" },
 352         { LXPR_PID_STATM,       "statm" },
 353         { LXPR_PID_TID_STATUS,  "status" },
 354         { LXPR_PID_FDDIR,       "fd" }
 355 };
 356 
 357 #define TIDDIRFILES     (sizeof (tiddir) / sizeof (tiddir[0]))
 358 
 359 #define LX_RLIM_INFINITY        0xFFFFFFFFFFFFFFFF
 360 
 361 #define RCTL_INFINITE(x) \
 362         ((x->rcv_flagaction & RCTL_LOCAL_MAXIMAL) && \
 363         (x->rcv_flagaction & RCTL_GLOBAL_INFINITE))
 364 
 365 typedef struct lxpr_rlimtab {
 366         char    *rlim_name;     /* limit name */
 367         char    *rlim_unit;     /* limit unit */
 368         char    *rlim_rctl;     /* rctl source */
 369 } lxpr_rlimtab_t;
 370 
 371 static lxpr_rlimtab_t lxpr_rlimtab[] = {
 372         { "Max cpu time",       "seconds",      "process.max-cpu-time" },
 373         { "Max file size",      "bytes",        "process.max-file-size" },
 374         { "Max data size",      "bytes",        "process.max-data-size" },
 375         { "Max stack size",     "bytes",        "process.max-stack-size" },
 376         { "Max core file size", "bytes",        "process.max-core-size" },
 377         { "Max resident set",   "bytes",        "zone.max-physical-memory" },
 378         { "Max processes",      "processes",    "zone.max-lwps" },
 379         { "Max open files",     "files",        "process.max-file-descriptor" },
 380         { "Max locked memory",  "bytes",        "zone.max-locked-memory" },
 381         { "Max address space",  "bytes",        "process.max-address-space" },
 382         { "Max file locks",     "locks",        NULL },
 383         { "Max pending signals",        "signals",
 384                 "process.max-sigqueue-size" },
 385         { "Max msgqueue size",  "bytes",        "process.max-msg-messages" },
 386         { NULL, NULL, NULL }
 387 };
 388 
 389 
 390 /*
 391  * contents of lx /proc/net directory
 392  */
 393 static lxpr_dirent_t netdir[] = {
 394         { LXPR_NET_ARP,         "arp" },
 395         { LXPR_NET_DEV,         "dev" },
 396         { LXPR_NET_DEV_MCAST,   "dev_mcast" },
 397         { LXPR_NET_IF_INET6,    "if_inet6" },
 398         { LXPR_NET_IGMP,        "igmp" },
 399         { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" },
 400         { LXPR_NET_IP_MR_VIF,   "ip_mr_vif" },
 401         { LXPR_NET_IPV6_ROUTE,  "ipv6_route" },
 402         { LXPR_NET_MCFILTER,    "mcfilter" },
 403         { LXPR_NET_NETSTAT,     "netstat" },
 404         { LXPR_NET_RAW,         "raw" },
 405         { LXPR_NET_ROUTE,       "route" },
 406         { LXPR_NET_RPC,         "rpc" },
 407         { LXPR_NET_RT_CACHE,    "rt_cache" },
 408         { LXPR_NET_SOCKSTAT,    "sockstat" },
 409         { LXPR_NET_SNMP,        "snmp" },
 410         { LXPR_NET_STAT,        "stat" },
 411         { LXPR_NET_TCP,         "tcp" },
 412         { LXPR_NET_TCP6,        "tcp6" },
 413         { LXPR_NET_UDP,         "udp" },
 414         { LXPR_NET_UDP6,        "udp6" },
 415         { LXPR_NET_UNIX,        "unix" }
 416 };
 417 
 418 #define NETDIRFILES     (sizeof (netdir) / sizeof (netdir[0]))
 419 
 420 /*
 421  * contents of /proc/sys directory
 422  */
 423 static lxpr_dirent_t sysdir[] = {
 424         { LXPR_SYS_FSDIR,       "fs" },
 425         { LXPR_SYS_KERNELDIR,   "kernel" },
 426         { LXPR_SYS_NETDIR,      "net" },
 427         { LXPR_SYS_VMDIR,       "vm" },
 428 };
 429 
 430 #define SYSDIRFILES     (sizeof (sysdir) / sizeof (sysdir[0]))
 431 
 432 /*
 433  * contents of /proc/sys/fs directory
 434  */
 435 static lxpr_dirent_t sys_fsdir[] = {
 436         { LXPR_SYS_FS_INOTIFYDIR,       "inotify" },
 437 };
 438 
 439 #define SYS_FSDIRFILES (sizeof (sys_fsdir) / sizeof (sys_fsdir[0]))
 440 
 441 /*
 442  * contents of /proc/sys/fs/inotify directory
 443  */
 444 static lxpr_dirent_t sys_fs_inotifydir[] = {
 445         { LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS,        "max_queued_events" },
 446         { LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES,       "max_user_instances" },
 447         { LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES,         "max_user_watches" },
 448 };
 449 
 450 #define SYS_FS_INOTIFYDIRFILES \
 451         (sizeof (sys_fs_inotifydir) / sizeof (sys_fs_inotifydir[0]))
 452 
 453 /*
 454  * contents of /proc/sys/kernel directory
 455  */
 456 static lxpr_dirent_t sys_kerneldir[] = {
 457         { LXPR_SYS_KERNEL_CAPLCAP,      "cap_last_cap" },
 458         { LXPR_SYS_KERNEL_COREPATT,     "core_pattern" },
 459         { LXPR_SYS_KERNEL_HOSTNAME,     "hostname" },
 460         { LXPR_SYS_KERNEL_MSGMNI,       "msgmni" },
 461         { LXPR_SYS_KERNEL_NGROUPS_MAX,  "ngroups_max" },
 462         { LXPR_SYS_KERNEL_OSREL,        "osrelease" },
 463         { LXPR_SYS_KERNEL_PID_MAX,      "pid_max" },
 464         { LXPR_SYS_KERNEL_RANDDIR,      "random" },
 465         { LXPR_SYS_KERNEL_SHMMAX,       "shmmax" },
 466         { LXPR_SYS_KERNEL_THREADS_MAX,  "threads-max" },
 467 };
 468 
 469 #define SYS_KERNELDIRFILES (sizeof (sys_kerneldir) / sizeof (sys_kerneldir[0]))
 470 
 471 /*
 472  * contents of /proc/sys/kernel/random directory
 473  */
 474 static lxpr_dirent_t sys_randdir[] = {
 475         { LXPR_SYS_KERNEL_RAND_BOOTID,  "boot_id" },
 476 };
 477 
 478 #define SYS_RANDDIRFILES (sizeof (sys_randdir) / sizeof (sys_randdir[0]))
 479 
 480 /*
 481  * contents of /proc/sys/net directory
 482  */
 483 static lxpr_dirent_t sys_netdir[] = {
 484         { LXPR_SYS_NET_COREDIR,         "core" },
 485 };
 486 
 487 #define SYS_NETDIRFILES (sizeof (sys_netdir) / sizeof (sys_netdir[0]))
 488 
 489 /*
 490  * contents of /proc/sys/net/core directory
 491  */
 492 static lxpr_dirent_t sys_net_coredir[] = {
 493         { LXPR_SYS_NET_CORE_SOMAXCON,   "somaxconn" },
 494 };
 495 
 496 #define SYS_NET_COREDIRFILES \
 497         (sizeof (sys_net_coredir) / sizeof (sys_net_coredir[0]))
 498 
 499 /*
 500  * contents of /proc/sys/vm directory
 501  */
 502 static lxpr_dirent_t sys_vmdir[] = {
 503         { LXPR_SYS_VM_MINFR_KB,         "min_free_kbytes" },
 504         { LXPR_SYS_VM_NHUGEP,           "nr_hugepages" },
 505         { LXPR_SYS_VM_OVERCOMMIT_MEM,   "overcommit_memory" },
 506         { LXPR_SYS_VM_SWAPPINESS,       "swappiness" },
 507 };
 508 
 509 #define SYS_VMDIRFILES (sizeof (sys_vmdir) / sizeof (sys_vmdir[0]))
 510 
 511 /*
 512  * lxpr_open(): Vnode operation for VOP_OPEN()
 513  */
 514 static int
 515 lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
 516 {
 517         vnode_t         *vp = *vpp;
 518         lxpr_node_t     *lxpnp = VTOLXP(vp);
 519         lxpr_nodetype_t type = lxpnp->lxpr_type;
 520         vnode_t         *rvp;
 521         int             error = 0;
 522 
 523         if (flag & FWRITE) {
 524                 /* Restrict writes to certain files */
 525                 switch (type) {
 526                 case LXPR_PID_OOM_SCR_ADJ:
 527                 case LXPR_PID_TID_OOM_SCR_ADJ:
 528                 case LXPR_SYS_KERNEL_COREPATT:
 529                 case LXPR_SYS_NET_CORE_SOMAXCON:
 530                 case LXPR_SYS_VM_OVERCOMMIT_MEM:
 531                 case LXPR_SYS_VM_SWAPPINESS:
 532                 case LXPR_PID_FD_FD:
 533                 case LXPR_PID_TID_FD_FD:
 534                         break;
 535                 default:
 536                         return (EPERM);
 537                 }
 538         }
 539 
 540         /*
 541          * If we are opening an underlying file only allow regular files,
 542          * fifos or sockets; reject the open for anything else.
 543          * Just do it if we are opening the current or root directory.
 544          */
 545         if (lxpnp->lxpr_realvp != NULL) {
 546                 rvp = lxpnp->lxpr_realvp;
 547 
 548                 if (type == LXPR_PID_FD_FD && rvp->v_type != VREG &&
 549                     rvp->v_type != VFIFO && rvp->v_type != VSOCK) {
 550                         error = EACCES;
 551                 } else {
 552                         if (type == LXPR_PID_FD_FD && rvp->v_type == VFIFO) {
 553                                 /*
 554                                  * This flag lets the fifo open know that
 555                                  * we're using proc/fd to open a fd which we
 556                                  * already have open. Otherwise, the fifo might
 557                                  * reject an open if the other end has closed.
 558                                  */
 559                                 flag |= FKLYR;
 560                         }
 561                         /*
 562                          * Need to hold rvp since VOP_OPEN() may release it.
 563                          */
 564                         VN_HOLD(rvp);
 565                         error = VOP_OPEN(&rvp, flag, cr, ct);
 566                         if (error) {
 567                                 VN_RELE(rvp);
 568                         } else {
 569                                 *vpp = rvp;
 570                                 VN_RELE(vp);
 571                         }
 572                 }
 573         }
 574 
 575         return (error);
 576 }
 577 
 578 
 579 /*
 580  * lxpr_close(): Vnode operation for VOP_CLOSE()
 581  */
 582 /* ARGSUSED */
 583 static int
 584 lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
 585     caller_context_t *ct)
 586 {
 587         lxpr_node_t     *lxpr = VTOLXP(vp);
 588         lxpr_nodetype_t type = lxpr->lxpr_type;
 589 
 590         /*
 591          * we should never get here because the close is done on the realvp
 592          * for these nodes
 593          */
 594         ASSERT(type != LXPR_PID_FD_FD &&
 595             type != LXPR_PID_CURDIR &&
 596             type != LXPR_PID_ROOTDIR &&
 597             type != LXPR_PID_EXE);
 598 
 599         return (0);
 600 }
 601 
 602 static void (*lxpr_read_function[LXPR_NFILES])() = {
 603         lxpr_read_isdir,                /* /proc                */
 604         lxpr_read_isdir,                /* /proc/<pid>            */
 605         lxpr_read_pid_auxv,             /* /proc/<pid>/auxv       */
 606         lxpr_read_pid_cgroup,           /* /proc/<pid>/cgroup     */
 607         lxpr_read_pid_cmdline,          /* /proc/<pid>/cmdline    */
 608         lxpr_read_pid_comm,             /* /proc/<pid>/comm       */
 609         lxpr_read_empty,                /* /proc/<pid>/cpu        */
 610         lxpr_read_invalid,              /* /proc/<pid>/cwd        */
 611         lxpr_read_pid_env,              /* /proc/<pid>/environ    */
 612         lxpr_read_invalid,              /* /proc/<pid>/exe        */
 613         lxpr_read_pid_limits,           /* /proc/<pid>/limits     */
 614         lxpr_read_pid_maps,             /* /proc/<pid>/maps       */
 615         lxpr_read_empty,                /* /proc/<pid>/mem        */
 616         lxpr_read_pid_mountinfo,        /* /proc/<pid>/mountinfo */
 617         lxpr_read_pid_oom_scr_adj,      /* /proc/<pid>/oom_score_adj */
 618         lxpr_read_invalid,              /* /proc/<pid>/root       */
 619         lxpr_read_pid_stat,             /* /proc/<pid>/stat       */
 620         lxpr_read_pid_statm,            /* /proc/<pid>/statm      */
 621         lxpr_read_pid_status,           /* /proc/<pid>/status     */
 622         lxpr_read_isdir,                /* /proc/<pid>/task       */
 623         lxpr_read_isdir,                /* /proc/<pid>/task/nn    */
 624         lxpr_read_isdir,                /* /proc/<pid>/fd */
 625         lxpr_read_fd,                   /* /proc/<pid>/fd/nn      */
 626         lxpr_read_pid_auxv,             /* /proc/<pid>/task/<tid>/auxv      */
 627         lxpr_read_pid_cgroup,           /* /proc/<pid>/task/<tid>/cgroup */
 628         lxpr_read_pid_cmdline,          /* /proc/<pid>/task/<tid>/cmdline */
 629         lxpr_read_pid_comm,             /* /proc/<pid>/task/<tid>/comm      */
 630         lxpr_read_empty,                /* /proc/<pid>/task/<tid>/cpu       */
 631         lxpr_read_invalid,              /* /proc/<pid>/task/<tid>/cwd       */
 632         lxpr_read_pid_env,              /* /proc/<pid>/task/<tid>/environ */
 633         lxpr_read_invalid,              /* /proc/<pid>/task/<tid>/exe       */
 634         lxpr_read_pid_limits,           /* /proc/<pid>/task/<tid>/limits */
 635         lxpr_read_pid_maps,             /* /proc/<pid>/task/<tid>/maps      */
 636         lxpr_read_empty,                /* /proc/<pid>/task/<tid>/mem       */
 637         lxpr_read_pid_mountinfo,        /* /proc/<pid>/task/<tid>/mountinfo */
 638         lxpr_read_pid_oom_scr_adj,      /* /proc/<pid>/task/<tid>/oom_scr_adj */
 639         lxpr_read_invalid,              /* /proc/<pid>/task/<tid>/root      */
 640         lxpr_read_pid_tid_stat,         /* /proc/<pid>/task/<tid>/stat      */
 641         lxpr_read_pid_statm,            /* /proc/<pid>/task/<tid>/statm     */
 642         lxpr_read_pid_tid_status,       /* /proc/<pid>/task/<tid>/status */
 643         lxpr_read_isdir,                /* /proc/<pid>/task/<tid>/fd        */
 644         lxpr_read_fd,                   /* /proc/<pid>/task/<tid>/fd/nn     */
 645         lxpr_read_cgroups,              /* /proc/cgroups        */
 646         lxpr_read_empty,                /* /proc/cmdline        */
 647         lxpr_read_cpuinfo,              /* /proc/cpuinfo        */
 648         lxpr_read_empty,                /* /proc/devices        */
 649         lxpr_read_diskstats,            /* /proc/diskstats      */
 650         lxpr_read_empty,                /* /proc/dma            */
 651         lxpr_read_filesystems,          /* /proc/filesystems    */
 652         lxpr_read_empty,                /* /proc/interrupts     */
 653         lxpr_read_empty,                /* /proc/ioports        */
 654         lxpr_read_empty,                /* /proc/kcore          */
 655         lxpr_read_invalid,              /* /proc/kmsg -- see lxpr_read() */
 656         lxpr_read_loadavg,              /* /proc/loadavg        */
 657         lxpr_read_meminfo,              /* /proc/meminfo        */
 658         lxpr_read_empty,                /* /proc/modules        */
 659         lxpr_read_mounts,               /* /proc/mounts         */
 660         lxpr_read_isdir,                /* /proc/net            */
 661         lxpr_read_net_arp,              /* /proc/net/arp        */
 662         lxpr_read_net_dev,              /* /proc/net/dev        */
 663         lxpr_read_net_dev_mcast,        /* /proc/net/dev_mcast  */
 664         lxpr_read_net_if_inet6,         /* /proc/net/if_inet6   */
 665         lxpr_read_net_igmp,             /* /proc/net/igmp       */
 666         lxpr_read_net_ip_mr_cache,      /* /proc/net/ip_mr_cache */
 667         lxpr_read_net_ip_mr_vif,        /* /proc/net/ip_mr_vif  */
 668         lxpr_read_net_ipv6_route,       /* /proc/net/ipv6_route */
 669         lxpr_read_net_mcfilter,         /* /proc/net/mcfilter   */
 670         lxpr_read_net_netstat,          /* /proc/net/netstat    */
 671         lxpr_read_net_raw,              /* /proc/net/raw        */
 672         lxpr_read_net_route,            /* /proc/net/route      */
 673         lxpr_read_net_rpc,              /* /proc/net/rpc        */
 674         lxpr_read_net_rt_cache,         /* /proc/net/rt_cache   */
 675         lxpr_read_net_sockstat,         /* /proc/net/sockstat   */
 676         lxpr_read_net_snmp,             /* /proc/net/snmp       */
 677         lxpr_read_net_stat,             /* /proc/net/stat       */
 678         lxpr_read_net_tcp,              /* /proc/net/tcp        */
 679         lxpr_read_net_tcp6,             /* /proc/net/tcp6       */
 680         lxpr_read_net_udp,              /* /proc/net/udp        */
 681         lxpr_read_net_udp6,             /* /proc/net/udp6       */
 682         lxpr_read_net_unix,             /* /proc/net/unix       */
 683         lxpr_read_partitions,           /* /proc/partitions     */
 684         lxpr_read_invalid,              /* /proc/self           */
 685         lxpr_read_stat,                 /* /proc/stat           */
 686         lxpr_read_swaps,                /* /proc/swaps          */
 687         lxpr_read_invalid,              /* /proc/sys            */
 688         lxpr_read_invalid,              /* /proc/sys/fs         */
 689         lxpr_read_invalid,              /* /proc/sys/fs/inotify */
 690         lxpr_read_sys_fs_inotify_max_queued_events, /* max_queued_events */
 691         lxpr_read_sys_fs_inotify_max_user_instances, /* max_user_instances */
 692         lxpr_read_sys_fs_inotify_max_user_watches, /* max_user_watches */
 693         lxpr_read_invalid,              /* /proc/sys/kernel     */
 694         lxpr_read_sys_kernel_caplcap,   /* /proc/sys/kernel/cap_last_cap */
 695         lxpr_read_sys_kernel_corepatt,  /* /proc/sys/kernel/core_pattern */
 696         lxpr_read_sys_kernel_hostname,  /* /proc/sys/kernel/hostname */
 697         lxpr_read_sys_kernel_msgmni,    /* /proc/sys/kernel/msgmni */
 698         lxpr_read_sys_kernel_ngroups_max, /* /proc/sys/kernel/ngroups_max */
 699         lxpr_read_sys_kernel_osrel,     /* /proc/sys/kernel/osrelease */
 700         lxpr_read_sys_kernel_pid_max,   /* /proc/sys/kernel/pid_max */
 701         lxpr_read_invalid,              /* /proc/sys/kernel/random */
 702         lxpr_read_sys_kernel_rand_bootid, /* /proc/sys/kernel/random/boot_id */
 703         lxpr_read_sys_kernel_shmmax,    /* /proc/sys/kernel/shmmax */
 704         lxpr_read_sys_kernel_threads_max, /* /proc/sys/kernel/threads-max */
 705         lxpr_read_invalid,              /* /proc/sys/net        */
 706         lxpr_read_invalid,              /* /proc/sys/net/core   */
 707         lxpr_read_sys_net_core_somaxc,  /* /proc/sys/net/core/somaxconn */
 708         lxpr_read_invalid,              /* /proc/sys/vm */
 709         lxpr_read_sys_vm_minfr_kb,      /* /proc/sys/vm/min_free_kbytes */
 710         lxpr_read_sys_vm_nhpages,       /* /proc/sys/vm/nr_hugepages */
 711         lxpr_read_sys_vm_overcommit_mem, /* /proc/sys/vm/overcommit_memory */
 712         lxpr_read_sys_vm_swappiness,    /* /proc/sys/vm/swappiness */
 713         lxpr_read_uptime,               /* /proc/uptime         */
 714         lxpr_read_version,              /* /proc/version        */
 715 };
 716 
 717 /*
 718  * Array of lookup functions, indexed by lx /proc file type.
 719  */
 720 static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
 721         lxpr_lookup_procdir,            /* /proc                */
 722         lxpr_lookup_piddir,             /* /proc/<pid>            */
 723         lxpr_lookup_not_a_dir,          /* /proc/<pid>/auxv       */
 724         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cgroup     */
 725         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cmdline    */
 726         lxpr_lookup_not_a_dir,          /* /proc/<pid>/comm       */
 727         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cpu        */
 728         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cwd        */
 729         lxpr_lookup_not_a_dir,          /* /proc/<pid>/environ    */
 730         lxpr_lookup_not_a_dir,          /* /proc/<pid>/exe        */
 731         lxpr_lookup_not_a_dir,          /* /proc/<pid>/limits     */
 732         lxpr_lookup_not_a_dir,          /* /proc/<pid>/maps       */
 733         lxpr_lookup_not_a_dir,          /* /proc/<pid>/mem        */
 734         lxpr_lookup_not_a_dir,          /* /proc/<pid>/mountinfo */
 735         lxpr_lookup_not_a_dir,          /* /proc/<pid>/oom_score_adj */
 736         lxpr_lookup_not_a_dir,          /* /proc/<pid>/root       */
 737         lxpr_lookup_not_a_dir,          /* /proc/<pid>/stat       */
 738         lxpr_lookup_not_a_dir,          /* /proc/<pid>/statm      */
 739         lxpr_lookup_not_a_dir,          /* /proc/<pid>/status     */
 740         lxpr_lookup_taskdir,            /* /proc/<pid>/task       */
 741         lxpr_lookup_task_tid_dir,       /* /proc/<pid>/task/nn    */
 742         lxpr_lookup_fddir,              /* /proc/<pid>/fd */
 743         lxpr_lookup_not_a_dir,          /* /proc/<pid>/fd/nn      */
 744         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/auxv      */
 745         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/cgroup */
 746         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/cmdline */
 747         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/comm      */
 748         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/cpu       */
 749         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/cwd       */
 750         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/environ */
 751         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/exe       */
 752         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/limits */
 753         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/maps      */
 754         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/mem       */
 755         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/mountinfo */
 756         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/oom_scr_adj */
 757         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/root      */
 758         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/stat      */
 759         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/statm     */
 760         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/status */
 761         lxpr_lookup_fddir,              /* /proc/<pid>/task/<tid>/fd        */
 762         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/fd/nn     */
 763         lxpr_lookup_not_a_dir,          /* /proc/cgroups        */
 764         lxpr_lookup_not_a_dir,          /* /proc/cmdline        */
 765         lxpr_lookup_not_a_dir,          /* /proc/cpuinfo        */
 766         lxpr_lookup_not_a_dir,          /* /proc/devices        */
 767         lxpr_lookup_not_a_dir,          /* /proc/diskstats      */
 768         lxpr_lookup_not_a_dir,          /* /proc/dma            */
 769         lxpr_lookup_not_a_dir,          /* /proc/filesystems    */
 770         lxpr_lookup_not_a_dir,          /* /proc/interrupts     */
 771         lxpr_lookup_not_a_dir,          /* /proc/ioports        */
 772         lxpr_lookup_not_a_dir,          /* /proc/kcore          */
 773         lxpr_lookup_not_a_dir,          /* /proc/kmsg           */
 774         lxpr_lookup_not_a_dir,          /* /proc/loadavg        */
 775         lxpr_lookup_not_a_dir,          /* /proc/meminfo        */
 776         lxpr_lookup_not_a_dir,          /* /proc/modules        */
 777         lxpr_lookup_not_a_dir,          /* /proc/mounts         */
 778         lxpr_lookup_netdir,             /* /proc/net            */
 779         lxpr_lookup_not_a_dir,          /* /proc/net/arp        */
 780         lxpr_lookup_not_a_dir,          /* /proc/net/dev        */
 781         lxpr_lookup_not_a_dir,          /* /proc/net/dev_mcast  */
 782         lxpr_lookup_not_a_dir,          /* /proc/net/if_inet6   */
 783         lxpr_lookup_not_a_dir,          /* /proc/net/igmp       */
 784         lxpr_lookup_not_a_dir,          /* /proc/net/ip_mr_cache */
 785         lxpr_lookup_not_a_dir,          /* /proc/net/ip_mr_vif  */
 786         lxpr_lookup_not_a_dir,          /* /proc/net/ipv6_route */
 787         lxpr_lookup_not_a_dir,          /* /proc/net/mcfilter   */
 788         lxpr_lookup_not_a_dir,          /* /proc/net/netstat    */
 789         lxpr_lookup_not_a_dir,          /* /proc/net/raw        */
 790         lxpr_lookup_not_a_dir,          /* /proc/net/route      */
 791         lxpr_lookup_not_a_dir,          /* /proc/net/rpc        */
 792         lxpr_lookup_not_a_dir,          /* /proc/net/rt_cache   */
 793         lxpr_lookup_not_a_dir,          /* /proc/net/sockstat   */
 794         lxpr_lookup_not_a_dir,          /* /proc/net/snmp       */
 795         lxpr_lookup_not_a_dir,          /* /proc/net/stat       */
 796         lxpr_lookup_not_a_dir,          /* /proc/net/tcp        */
 797         lxpr_lookup_not_a_dir,          /* /proc/net/tcp6       */
 798         lxpr_lookup_not_a_dir,          /* /proc/net/udp        */
 799         lxpr_lookup_not_a_dir,          /* /proc/net/udp6       */
 800         lxpr_lookup_not_a_dir,          /* /proc/net/unix       */
 801         lxpr_lookup_not_a_dir,          /* /proc/partitions     */
 802         lxpr_lookup_not_a_dir,          /* /proc/self           */
 803         lxpr_lookup_not_a_dir,          /* /proc/stat           */
 804         lxpr_lookup_not_a_dir,          /* /proc/swaps          */
 805         lxpr_lookup_sysdir,             /* /proc/sys            */
 806         lxpr_lookup_sys_fsdir,          /* /proc/sys/fs         */
 807         lxpr_lookup_sys_fs_inotifydir,  /* /proc/sys/fs/inotify */
 808         lxpr_lookup_not_a_dir,          /* .../inotify/max_queued_events */
 809         lxpr_lookup_not_a_dir,          /* .../inotify/max_user_instances */
 810         lxpr_lookup_not_a_dir,          /* .../inotify/max_user_watches */
 811         lxpr_lookup_sys_kerneldir,      /* /proc/sys/kernel     */
 812         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/cap_last_cap */
 813         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/core_pattern */
 814         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/hostname */
 815         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/msgmni */
 816         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/ngroups_max */
 817         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/osrelease */
 818         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/pid_max */
 819         lxpr_lookup_sys_kdir_randdir,   /* /proc/sys/kernel/random */
 820         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/random/boot_id */
 821         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/shmmax */
 822         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/threads-max */
 823         lxpr_lookup_sys_netdir,         /* /proc/sys/net */
 824         lxpr_lookup_sys_net_coredir,    /* /proc/sys/net/core */
 825         lxpr_lookup_not_a_dir,          /* /proc/sys/net/core/somaxconn */
 826         lxpr_lookup_sys_vmdir,          /* /proc/sys/vm */
 827         lxpr_lookup_not_a_dir,          /* /proc/sys/vm/min_free_kbytes */
 828         lxpr_lookup_not_a_dir,          /* /proc/sys/vm/nr_hugepages */
 829         lxpr_lookup_not_a_dir,          /* /proc/sys/vm/overcommit_memory */
 830         lxpr_lookup_not_a_dir,          /* /proc/sys/vm/swappiness */
 831         lxpr_lookup_not_a_dir,          /* /proc/uptime         */
 832         lxpr_lookup_not_a_dir,          /* /proc/version        */
 833 };
 834 
 835 /*
 836  * Array of readdir functions, indexed by /proc file type.
 837  */
 838 static int (*lxpr_readdir_function[LXPR_NFILES])() = {
 839         lxpr_readdir_procdir,           /* /proc                */
 840         lxpr_readdir_piddir,            /* /proc/<pid>            */
 841         lxpr_readdir_not_a_dir,         /* /proc/<pid>/auxv       */
 842         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cgroup     */
 843         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cmdline    */
 844         lxpr_readdir_not_a_dir,         /* /proc/<pid>/comm       */
 845         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cpu        */
 846         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cwd        */
 847         lxpr_readdir_not_a_dir,         /* /proc/<pid>/environ    */
 848         lxpr_readdir_not_a_dir,         /* /proc/<pid>/exe        */
 849         lxpr_readdir_not_a_dir,         /* /proc/<pid>/limits     */
 850         lxpr_readdir_not_a_dir,         /* /proc/<pid>/maps       */
 851         lxpr_readdir_not_a_dir,         /* /proc/<pid>/mem        */
 852         lxpr_readdir_not_a_dir,         /* /proc/<pid>/mountinfo */
 853         lxpr_readdir_not_a_dir,         /* /proc/<pid>/oom_score_adj */
 854         lxpr_readdir_not_a_dir,         /* /proc/<pid>/root       */
 855         lxpr_readdir_not_a_dir,         /* /proc/<pid>/stat       */
 856         lxpr_readdir_not_a_dir,         /* /proc/<pid>/statm      */
 857         lxpr_readdir_not_a_dir,         /* /proc/<pid>/status     */
 858         lxpr_readdir_taskdir,           /* /proc/<pid>/task       */
 859         lxpr_readdir_task_tid_dir,      /* /proc/<pid>/task/nn    */
 860         lxpr_readdir_fddir,             /* /proc/<pid>/fd */
 861         lxpr_readdir_not_a_dir,         /* /proc/<pid>/fd/nn      */
 862         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/auxv      */
 863         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/cgroup */
 864         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/cmdline */
 865         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/comm      */
 866         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/cpu       */
 867         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/cwd       */
 868         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/environ */
 869         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/exe       */
 870         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/limits */
 871         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/maps      */
 872         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/mem       */
 873         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/mountinfo */
 874         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid/oom_scr_adj */
 875         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/root      */
 876         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/stat      */
 877         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/statm     */
 878         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/status */
 879         lxpr_readdir_fddir,             /* /proc/<pid>/task/<tid>/fd        */
 880         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/fd/nn     */
 881         lxpr_readdir_not_a_dir,         /* /proc/cgroups        */
 882         lxpr_readdir_not_a_dir,         /* /proc/cmdline        */
 883         lxpr_readdir_not_a_dir,         /* /proc/cpuinfo        */
 884         lxpr_readdir_not_a_dir,         /* /proc/devices        */
 885         lxpr_readdir_not_a_dir,         /* /proc/diskstats      */
 886         lxpr_readdir_not_a_dir,         /* /proc/dma            */
 887         lxpr_readdir_not_a_dir,         /* /proc/filesystems    */
 888         lxpr_readdir_not_a_dir,         /* /proc/interrupts     */
 889         lxpr_readdir_not_a_dir,         /* /proc/ioports        */
 890         lxpr_readdir_not_a_dir,         /* /proc/kcore          */
 891         lxpr_readdir_not_a_dir,         /* /proc/kmsg           */
 892         lxpr_readdir_not_a_dir,         /* /proc/loadavg        */
 893         lxpr_readdir_not_a_dir,         /* /proc/meminfo        */
 894         lxpr_readdir_not_a_dir,         /* /proc/modules        */
 895         lxpr_readdir_not_a_dir,         /* /proc/mounts         */
 896         lxpr_readdir_netdir,            /* /proc/net            */
 897         lxpr_readdir_not_a_dir,         /* /proc/net/arp        */
 898         lxpr_readdir_not_a_dir,         /* /proc/net/dev        */
 899         lxpr_readdir_not_a_dir,         /* /proc/net/dev_mcast  */
 900         lxpr_readdir_not_a_dir,         /* /proc/net/if_inet6   */
 901         lxpr_readdir_not_a_dir,         /* /proc/net/igmp       */
 902         lxpr_readdir_not_a_dir,         /* /proc/net/ip_mr_cache */
 903         lxpr_readdir_not_a_dir,         /* /proc/net/ip_mr_vif  */
 904         lxpr_readdir_not_a_dir,         /* /proc/net/ipv6_route */
 905         lxpr_readdir_not_a_dir,         /* /proc/net/mcfilter   */
 906         lxpr_readdir_not_a_dir,         /* /proc/net/netstat    */
 907         lxpr_readdir_not_a_dir,         /* /proc/net/raw        */
 908         lxpr_readdir_not_a_dir,         /* /proc/net/route      */
 909         lxpr_readdir_not_a_dir,         /* /proc/net/rpc        */
 910         lxpr_readdir_not_a_dir,         /* /proc/net/rt_cache   */
 911         lxpr_readdir_not_a_dir,         /* /proc/net/sockstat   */
 912         lxpr_readdir_not_a_dir,         /* /proc/net/snmp       */
 913         lxpr_readdir_not_a_dir,         /* /proc/net/stat       */
 914         lxpr_readdir_not_a_dir,         /* /proc/net/tcp        */
 915         lxpr_readdir_not_a_dir,         /* /proc/net/tcp6       */
 916         lxpr_readdir_not_a_dir,         /* /proc/net/udp        */
 917         lxpr_readdir_not_a_dir,         /* /proc/net/udp6       */
 918         lxpr_readdir_not_a_dir,         /* /proc/net/unix       */
 919         lxpr_readdir_not_a_dir,         /* /proc/partitions     */
 920         lxpr_readdir_not_a_dir,         /* /proc/self           */
 921         lxpr_readdir_not_a_dir,         /* /proc/stat           */
 922         lxpr_readdir_not_a_dir,         /* /proc/swaps          */
 923         lxpr_readdir_sysdir,            /* /proc/sys            */
 924         lxpr_readdir_sys_fsdir,         /* /proc/sys/fs         */
 925         lxpr_readdir_sys_fs_inotifydir, /* /proc/sys/fs/inotify */
 926         lxpr_readdir_not_a_dir,         /* .../inotify/max_queued_events */
 927         lxpr_readdir_not_a_dir,         /* .../inotify/max_user_instances */
 928         lxpr_readdir_not_a_dir,         /* .../inotify/max_user_watches */
 929         lxpr_readdir_sys_kerneldir,     /* /proc/sys/kernel     */
 930         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/cap_last_cap */
 931         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/core_pattern */
 932         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/hostname */
 933         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/msgmni */
 934         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/ngroups_max */
 935         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/osrelease */
 936         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/pid_max */
 937         lxpr_readdir_sys_kdir_randdir,  /* /proc/sys/kernel/random */
 938         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/random/boot_id */
 939         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/shmmax */
 940         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/threads-max */
 941         lxpr_readdir_sys_netdir,        /* /proc/sys/net */
 942         lxpr_readdir_sys_net_coredir,   /* /proc/sys/net/core */
 943         lxpr_readdir_not_a_dir,         /* /proc/sys/net/core/somaxconn */
 944         lxpr_readdir_sys_vmdir,         /* /proc/sys/vm */
 945         lxpr_readdir_not_a_dir,         /* /proc/sys/vm/min_free_kbytes */
 946         lxpr_readdir_not_a_dir,         /* /proc/sys/vm/nr_hugepages */
 947         lxpr_readdir_not_a_dir,         /* /proc/sys/vm/overcommit_memory */
 948         lxpr_readdir_not_a_dir,         /* /proc/sys/vm/swappiness */
 949         lxpr_readdir_not_a_dir,         /* /proc/uptime         */
 950         lxpr_readdir_not_a_dir,         /* /proc/version        */
 951 };
 952 
 953 
 954 /*
 955  * lxpr_read(): Vnode operation for VOP_READ()
 956  *
 957  * As the format of all the files that can be read in the lx procfs is human
 958  * readable and not binary structures there do not have to be different
 959  * read variants depending on whether the reading process model is 32 or 64 bits
 960  * (at least in general, and certainly the difference is unlikely to be enough
 961  * to justify have different routines for 32 and 64 bit reads
 962  */
 963 /* ARGSUSED */
 964 static int
 965 lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 966     caller_context_t *ct)
 967 {
 968         lxpr_node_t *lxpnp = VTOLXP(vp);
 969         lxpr_nodetype_t type = lxpnp->lxpr_type;
 970         lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
 971         int error;
 972 
 973         ASSERT(type < LXPR_NFILES);
 974 
 975         if (type == LXPR_KMSG) {
 976                 ldi_ident_t     li = VTOLXPM(vp)->lxprm_li;
 977                 ldi_handle_t    ldih;
 978                 struct strioctl str;
 979                 int             rv;
 980 
 981                 /*
 982                  * Open the zone's console device using the layered driver
 983                  * interface.
 984                  */
 985                 if ((error =
 986                     ldi_open_by_name("/dev/log", FREAD, cr, &ldih, li)) != 0)
 987                         return (error);
 988 
 989                 /*
 990                  * Send an ioctl to the underlying console device, letting it
 991                  * know we're interested in getting console messages.
 992                  */
 993                 str.ic_cmd = I_CONSLOG;
 994                 str.ic_timout = 0;
 995                 str.ic_len = 0;
 996                 str.ic_dp = NULL;
 997                 if ((error = ldi_ioctl(ldih, I_STR,
 998                     (intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
 999                         return (error);
1000 
1001                 lxpr_read_kmsg(lxpnp, uiobuf, ldih);
1002 
1003                 if ((error = ldi_close(ldih, FREAD, cr)) != 0)
1004                         return (error);
1005         } else {
1006                 lxpr_read_function[type](lxpnp, uiobuf);
1007         }
1008 
1009         error = lxpr_uiobuf_flush(uiobuf);
1010         lxpr_uiobuf_free(uiobuf);
1011 
1012         return (error);
1013 }
1014 
1015 /*
1016  * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
1017  *
1018  * Various special case reads:
1019  * - trying to read a directory
1020  * - invalid file (used to mean a file that should be implemented,
1021  *   but isn't yet)
1022  * - empty file
1023  * - wait to be able to read a file that will never have anything to read
1024  */
1025 /* ARGSUSED */
1026 static void
1027 lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1028 {
1029         lxpr_uiobuf_seterr(uiobuf, EISDIR);
1030 }
1031 
1032 /* ARGSUSED */
1033 static void
1034 lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1035 {
1036         lxpr_uiobuf_seterr(uiobuf, EINVAL);
1037 }
1038 
1039 /* ARGSUSED */
1040 static void
1041 lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1042 {
1043 }
1044 
1045 /*
1046  * lxpr_read_pid_auxv(): read process aux vector
1047  */
1048 static void
1049 lxpr_read_pid_auxv(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1050 {
1051         proc_t *p;
1052         lx_proc_data_t *pd;
1053         lx_elf_data_t *edp = NULL;
1054         int i, cnt;
1055 
1056         ASSERT(lxpnp->lxpr_type == LXPR_PID_AUXV ||
1057             lxpnp->lxpr_type == LXPR_PID_TID_AUXV);
1058 
1059         p = lxpr_lock(lxpnp->lxpr_pid);
1060 
1061         if (p == NULL) {
1062                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1063                 return;
1064         }
1065         if ((pd = ptolxproc(p)) == NULL) {
1066                 /* Emit a single AT_NULL record for non-branded processes */
1067                 auxv_t buf;
1068 
1069                 bzero(&buf, sizeof (buf));
1070                 lxpr_unlock(p);
1071                 lxpr_uiobuf_write(uiobuf, (char *)&buf, sizeof (buf));
1072                 return;
1073         } else {
1074                 edp = &pd->l_elf_data;
1075         }
1076 
1077         if (p->p_model == DATAMODEL_NATIVE) {
1078                 auxv_t buf[__KERN_NAUXV_IMPL];
1079 
1080                 /*
1081                  * Because a_type is only of size int (not long), the buffer
1082                  * contents must be zeroed first to ensure cleanliness.
1083                  */
1084                 bzero(buf, sizeof (buf));
1085                 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) {
1086                         if (lx_auxv_stol(&p->p_user.u_auxv[i],
1087                             &buf[cnt], edp) == 0) {
1088                                 cnt++;
1089                         }
1090                         if (p->p_user.u_auxv[i].a_type == AT_NULL) {
1091                                 break;
1092                         }
1093                 }
1094                 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0]));
1095                 lxpr_unlock(p);
1096         }
1097 #if defined(_SYSCALL32_IMPL)
1098         else {
1099                 auxv32_t buf[__KERN_NAUXV_IMPL];
1100 
1101                 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) {
1102                         auxv_t temp;
1103 
1104                         if (lx_auxv_stol(&p->p_user.u_auxv[i],
1105                             &temp, edp) == 0) {
1106                                 buf[cnt].a_type = (int)temp.a_type;
1107                                 buf[cnt].a_un.a_val = (int)temp.a_un.a_val;
1108                                 cnt++;
1109                         }
1110                         if (p->p_user.u_auxv[i].a_type == AT_NULL) {
1111                                 break;
1112                         }
1113                 }
1114                 lxpr_unlock(p);
1115                 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0]));
1116         }
1117 #endif /* defined(_SYSCALL32_IMPL) */
1118 }
1119 
1120 /*
1121  * lxpr_read_pid_cgroup(): read cgroups for process
1122  */
1123 static void
1124 lxpr_read_pid_cgroup(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1125 {
1126         proc_t *p;
1127 
1128         ASSERT(lxpnp->lxpr_type == LXPR_PID_CGROUP ||
1129             lxpnp->lxpr_type == LXPR_PID_TID_CGROUP);
1130 
1131         p = lxpr_lock(lxpnp->lxpr_pid);
1132         if (p == NULL) {
1133                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1134                 return;
1135         }
1136 
1137         /* basic stub, 3rd field will need to be populated */
1138         lxpr_uiobuf_printf(uiobuf, "1:name=systemd:/\n");
1139 
1140         lxpr_unlock(p);
1141 }
1142 
1143 static void
1144 lxpr_copy_cmdline(proc_t *p, lx_proc_data_t *pd, lxpr_uiobuf_t *uiobuf)
1145 {
1146         uio_t *uiop = uiobuf->uiop;
1147         char *buf = uiobuf->buffer;
1148         int bsz = uiobuf->buffsize;
1149         boolean_t env_overflow = B_FALSE;
1150         uintptr_t pos = pd->l_args_start + uiop->uio_offset;
1151         uintptr_t estart = pd->l_envs_start;
1152         uintptr_t eend = pd->l_envs_end;
1153         size_t chunk, copied;
1154         int err = 0;
1155 
1156         /* Do not bother with data beyond the end of the envp strings area. */
1157         if (pos > eend) {
1158                 return;
1159         }
1160         mutex_exit(&p->p_lock);
1161 
1162         /*
1163          * If the starting or ending bounds are outside the argv strings area,
1164          * check to see if the process has overwritten the terminating NULL.
1165          * If not, no data needs to be copied from oustide the argv area.
1166          */
1167         if (pos >= estart || (pos + uiop->uio_resid) >= estart) {
1168                 uint8_t term;
1169                 if (uread(p, &term, sizeof (term), estart - 1) != 0) {
1170                         err = EFAULT;
1171                 } else if (term != 0) {
1172                         env_overflow = B_TRUE;
1173                 }
1174         }
1175 
1176 
1177         /* Data between astart and estart-1 can be copied freely. */
1178         while (pos < estart && uiop->uio_resid > 0 && err == 0) {
1179                 chunk = MIN(estart - pos, uiop->uio_resid);
1180                 chunk = MIN(chunk, bsz);
1181 
1182                 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) != 0 ||
1183                     copied != chunk) {
1184                         err = EFAULT;
1185                         break;
1186                 }
1187                 err = uiomove(buf, copied, UIO_READ, uiop);
1188                 pos += copied;
1189         }
1190 
1191         /*
1192          * Onward from estart, data is copied as a contiguous string.  To
1193          * protect env data from potential snooping, only one buffer-sized copy
1194          * is allowed to avoid complex seek logic.
1195          */
1196         if (err == 0 && env_overflow && pos == estart && uiop->uio_resid > 0) {
1197                 chunk = MIN(eend - pos, uiop->uio_resid);
1198                 chunk = MIN(chunk, bsz);
1199                 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) == 0) {
1200                         int len = strnlen(buf, copied);
1201                         if (len > 0) {
1202                                 err = uiomove(buf, len, UIO_READ, uiop);
1203                         }
1204                 }
1205         }
1206 
1207         uiobuf->error = err;
1208         /* reset any uiobuf state */
1209         uiobuf->pos = uiobuf->buffer;
1210         uiobuf->beg = 0;
1211 
1212         mutex_enter(&p->p_lock);
1213 }
1214 
1215 /*
1216  * lxpr_read_pid_cmdline(): read argument vector from process
1217  */
1218 static void
1219 lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1220 {
1221         proc_t *p;
1222         char *buf;
1223         size_t asz = lxpr_maxargvlen, sz;
1224         lx_proc_data_t *pd;
1225 
1226         ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE ||
1227             lxpnp->lxpr_type == LXPR_PID_TID_CMDLINE);
1228 
1229         buf = kmem_alloc(asz, KM_SLEEP);
1230 
1231         p = lxpr_lock(lxpnp->lxpr_pid);
1232         if (p == NULL) {
1233                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1234                 kmem_free(buf, asz);
1235                 return;
1236         }
1237 
1238         if ((pd = ptolxproc(p)) != NULL && pd->l_args_start != 0 &&
1239             pd->l_envs_start != 0 && pd->l_envs_end != 0) {
1240                 /* Use Linux-style argv bounds if possible. */
1241                 lxpr_copy_cmdline(p, pd, uiobuf);
1242         } else {
1243                 if (prreadargv(p, buf, asz, &sz) != 0) {
1244                         lxpr_uiobuf_seterr(uiobuf, EINVAL);
1245                 } else {
1246                         lxpr_uiobuf_write(uiobuf, buf, sz);
1247                 }
1248         }
1249 
1250         lxpr_unlock(p);
1251         kmem_free(buf, asz);
1252 }
1253 
1254 /*
1255  * lxpr_read_pid_comm(): read command from process
1256  */
1257 static void
1258 lxpr_read_pid_comm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1259 {
1260         proc_t *p;
1261 
1262         VERIFY(lxpnp->lxpr_type == LXPR_PID_COMM ||
1263             lxpnp->lxpr_type == LXPR_PID_TID_COMM);
1264 
1265         /*
1266          * Because prctl(PR_SET_NAME) does not set custom names for threads
1267          * (vs processes), there is no need for special handling here.
1268          */
1269         if ((p = lxpr_lock(lxpnp->lxpr_pid)) == NULL) {
1270                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1271                 return;
1272         }
1273         lxpr_uiobuf_printf(uiobuf, "%s\n", p->p_user.u_comm);
1274         lxpr_unlock(p);
1275 }
1276 
1277 /*
1278  * lxpr_read_pid_env(): read env vector from process
1279  */
1280 static void
1281 lxpr_read_pid_env(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1282 {
1283         proc_t *p;
1284         char *buf;
1285         size_t asz = lxpr_maxenvvlen, sz;
1286 
1287         ASSERT(lxpnp->lxpr_type == LXPR_PID_ENV);
1288 
1289         buf = kmem_alloc(asz, KM_SLEEP);
1290 
1291         p = lxpr_lock(lxpnp->lxpr_pid);
1292         if (p == NULL) {
1293                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1294                 kmem_free(buf, asz);
1295                 return;
1296         }
1297 
1298         if (prreadenvv(p, buf, asz, &sz) != 0) {
1299                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1300         } else {
1301                 lxpr_uiobuf_write(uiobuf, buf, sz);
1302         }
1303 
1304         lxpr_unlock(p);
1305         kmem_free(buf, asz);
1306 }
1307 
1308 /*
1309  * lxpr_read_pid_limits(): ulimit file
1310  */
1311 static void
1312 lxpr_read_pid_limits(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1313 {
1314         proc_t *p;
1315         rctl_qty_t cur, max;
1316         rctl_val_t *oval, *nval;
1317         rctl_hndl_t hndl;
1318         char *kname;
1319         int i;
1320 
1321         ASSERT(lxpnp->lxpr_type == LXPR_PID_LIMITS ||
1322             lxpnp->lxpr_type == LXPR_PID_TID_LIMITS);
1323 
1324         nval = kmem_alloc(sizeof (rctl_val_t), KM_SLEEP);
1325 
1326         p = lxpr_lock(lxpnp->lxpr_pid);
1327         if (p == NULL) {
1328                 kmem_free(nval, sizeof (rctl_val_t));
1329                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1330                 return;
1331         }
1332 
1333         lxpr_uiobuf_printf(uiobuf, "%-25s %-20s %-20s %-10s\n",
1334             "Limit", "Soft Limit", "Hard Limit", "Units");
1335         for (i = 0; lxpr_rlimtab[i].rlim_name != NULL; i++) {
1336                 kname = lxpr_rlimtab[i].rlim_rctl;
1337                 /* default to unlimited for resources without an analog */
1338                 cur = RLIM_INFINITY;
1339                 max = RLIM_INFINITY;
1340                 if (kname != NULL) {
1341                         hndl = rctl_hndl_lookup(kname);
1342                         oval = NULL;
1343                         while ((hndl != -1) &&
1344                             rctl_local_get(hndl, oval, nval, p) == 0) {
1345                                 oval = nval;
1346                                 switch (nval->rcv_privilege) {
1347                                 case RCPRIV_BASIC:
1348                                         if (!RCTL_INFINITE(nval))
1349                                                 cur = nval->rcv_value;
1350                                         break;
1351                                 case RCPRIV_PRIVILEGED:
1352                                         if (!RCTL_INFINITE(nval))
1353                                                 max = nval->rcv_value;
1354                                         break;
1355                                 }
1356                         }
1357                 }
1358 
1359                 lxpr_uiobuf_printf(uiobuf, "%-25s", lxpr_rlimtab[i].rlim_name);
1360                 if (cur == RLIM_INFINITY || cur == LX_RLIM_INFINITY) {
1361                         lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
1362                 } else {
1363                         lxpr_uiobuf_printf(uiobuf, " %-20lu", cur);
1364                 }
1365                 if (max == RLIM_INFINITY || max == LX_RLIM_INFINITY) {
1366                         lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
1367                 } else {
1368                         lxpr_uiobuf_printf(uiobuf, " %-20lu", max);
1369                 }
1370                 lxpr_uiobuf_printf(uiobuf, " %-10s\n",
1371                     lxpr_rlimtab[i].rlim_unit);
1372         }
1373 
1374         lxpr_unlock(p);
1375         kmem_free(nval, sizeof (rctl_val_t));
1376 }
1377 
1378 /*
1379  * lxpr_read_pid_maps(): memory map file
1380  */
1381 static void
1382 lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1383 {
1384         proc_t *p;
1385         struct as *as;
1386         struct seg *seg;
1387         char *buf;
1388         int buflen = MAXPATHLEN;
1389         struct print_data {
1390                 uintptr_t saddr;
1391                 uintptr_t eaddr;
1392                 int type;
1393                 char prot[5];
1394                 uintptr_t offset;
1395                 vnode_t *vp;
1396                 struct print_data *next;
1397         } *print_head = NULL;
1398         struct print_data **print_tail = &print_head;
1399         struct print_data *pbuf;
1400 
1401         ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS ||
1402             lxpnp->lxpr_type == LXPR_PID_TID_MAPS);
1403 
1404         p = lxpr_lock(lxpnp->lxpr_pid);
1405         if (p == NULL) {
1406                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1407                 return;
1408         }
1409 
1410         as = p->p_as;
1411 
1412         if (as == &kas) {
1413                 lxpr_unlock(p);
1414                 return;
1415         }
1416 
1417         mutex_exit(&p->p_lock);
1418 
1419         /* Iterate over all segments in the address space */
1420         AS_LOCK_ENTER(as, RW_READER);
1421         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1422                 vnode_t *vp;
1423                 uint_t protbits;
1424 
1425                 pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
1426 
1427                 pbuf->saddr = (uintptr_t)seg->s_base;
1428                 pbuf->eaddr = pbuf->saddr + seg->s_size;
1429                 pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
1430 
1431                 /*
1432                  * Cheat and only use the protection bits of the first page
1433                  * in the segment
1434                  */
1435                 (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
1436                 (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
1437 
1438                 if (protbits & PROT_READ)      pbuf->prot[0] = 'r';
1439                 if (protbits & PROT_WRITE)     pbuf->prot[1] = 'w';
1440                 if (protbits & PROT_EXEC)      pbuf->prot[2] = 'x';
1441                 if (pbuf->type & MAP_SHARED)        pbuf->prot[3] = 's';
1442                 else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
1443 
1444                 if (seg->s_ops == &segvn_ops &&
1445                     SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
1446                     vp != NULL && vp->v_type == VREG) {
1447                         VN_HOLD(vp);
1448                         pbuf->vp = vp;
1449                 } else {
1450                         pbuf->vp = NULL;
1451                 }
1452 
1453                 pbuf->offset = SEGOP_GETOFFSET(seg, (caddr_t)pbuf->saddr);
1454 
1455                 pbuf->next = NULL;
1456                 *print_tail = pbuf;
1457                 print_tail = &pbuf->next;
1458         }
1459         AS_LOCK_EXIT(as);
1460         mutex_enter(&p->p_lock);
1461         lxpr_unlock(p);
1462 
1463         buf = kmem_alloc(buflen, KM_SLEEP);
1464 
1465         /* print the data we've extracted */
1466         pbuf = print_head;
1467         while (pbuf != NULL) {
1468                 struct print_data *pbuf_next;
1469                 vattr_t vattr;
1470 
1471                 int maj = 0;
1472                 int min = 0;
1473                 ino_t inode = 0;
1474 
1475                 *buf = '\0';
1476                 if (pbuf->vp != NULL) {
1477                         vattr.va_mask = AT_FSID | AT_NODEID;
1478                         if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(),
1479                             NULL) == 0) {
1480                                 maj = getmajor(vattr.va_fsid);
1481                                 min = getminor(vattr.va_fsid);
1482                                 inode = vattr.va_nodeid;
1483                         }
1484                         (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
1485                         VN_RELE(pbuf->vp);
1486                 }
1487 
1488                 if (p->p_model == DATAMODEL_LP64) {
1489                         lxpr_uiobuf_printf(uiobuf,
1490                             "%08llx-%08llx %s %08llx %02x:%02x %llu%s%s\n",
1491                             pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
1492                             maj, min, inode, *buf != '\0' ? " " : "", buf);
1493                 } else {
1494                         lxpr_uiobuf_printf(uiobuf,
1495                             "%08x-%08x %s %08x %02x:%02x %llu%s%s\n",
1496                             (uint32_t)pbuf->saddr, (uint32_t)pbuf->eaddr,
1497                             pbuf->prot, (uint32_t)pbuf->offset, maj, min,
1498                             inode, *buf != '\0' ? " " : "", buf);
1499                 }
1500 
1501                 pbuf_next = pbuf->next;
1502                 kmem_free(pbuf, sizeof (*pbuf));
1503                 pbuf = pbuf_next;
1504         }
1505 
1506         kmem_free(buf, buflen);
1507 }
1508 
1509 /*
1510  * lxpr_read_pid_mountinfo(): information about process mount points. e.g.:
1511  *    14 19 0:13 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
1512  * mntid parid devnums root mntpnt mntopts - fstype mntsrc superopts
1513  *
1514  * We have to make up several of these fields.
1515  */
1516 static void
1517 lxpr_read_pid_mountinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1518 {
1519         struct vfs *vfsp;
1520         struct vfs *vfslist;
1521         zone_t *zone = LXPTOZ(lxpnp);
1522         struct print_data {
1523                 refstr_t *vfs_mntpt;
1524                 refstr_t *vfs_resource;
1525                 uint_t vfs_flag;
1526                 int vfs_fstype;
1527                 dev_t vfs_dev;
1528                 struct print_data *next;
1529         } *print_head = NULL;
1530         struct print_data **print_tail = &print_head;
1531         struct print_data *printp;
1532         int root_id = 15;       /* use a made-up value */
1533         int mnt_id;
1534 
1535         ASSERT(lxpnp->lxpr_type == LXPR_PID_MOUNTINFO ||
1536             lxpnp->lxpr_type == LXPR_PID_TID_MOUNTINFO);
1537 
1538         vfs_list_read_lock();
1539 
1540         /* root is the top-level, it does not appear in this output */
1541         if (zone == global_zone) {
1542                 vfsp = vfslist = rootvfs;
1543         } else {
1544                 vfsp = vfslist = zone->zone_vfslist;
1545                 /*
1546                  * If the zone has a root entry, it will be the first in
1547                  * the list.  If it doesn't, we conjure one up.
1548                  */
1549                 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
1550                     zone->zone_rootpath) != 0) {
1551                         struct vfs *tvfsp;
1552                         /*
1553                          * The root of the zone is not a mount point.  The vfs
1554                          * we want to report is that of the zone's root vnode.
1555                          */
1556                         tvfsp = zone->zone_rootvp->v_vfsp;
1557 
1558                         lxpr_uiobuf_printf(uiobuf,
1559                             "%d 1 %d:%d / / %s - %s / %s\n",
1560                             root_id,
1561                             major(tvfsp->vfs_dev), minor(vfsp->vfs_dev),
1562                             tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw",
1563                             vfssw[tvfsp->vfs_fstype].vsw_name,
1564                             tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1565 
1566                 }
1567                 if (vfslist == NULL) {
1568                         vfs_list_unlock();
1569                         return;
1570                 }
1571         }
1572 
1573         /*
1574          * Later on we have to do a lookupname, which can end up causing
1575          * another vfs_list_read_lock() to be called. Which can lead to a
1576          * deadlock. To avoid this, we extract the data we need into a local
1577          * list, then we can run this list without holding vfs_list_read_lock()
1578          * We keep the list in the same order as the vfs_list
1579          */
1580         do {
1581                 /* Skip mounts we shouldn't show */
1582                 if (vfsp->vfs_flag & VFS_NOMNTTAB) {
1583                         goto nextfs;
1584                 }
1585 
1586                 printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
1587                 refstr_hold(vfsp->vfs_mntpt);
1588                 printp->vfs_mntpt = vfsp->vfs_mntpt;
1589                 refstr_hold(vfsp->vfs_resource);
1590                 printp->vfs_resource = vfsp->vfs_resource;
1591                 printp->vfs_flag = vfsp->vfs_flag;
1592                 printp->vfs_fstype = vfsp->vfs_fstype;
1593                 printp->vfs_dev = vfsp->vfs_dev;
1594                 printp->next = NULL;
1595 
1596                 *print_tail = printp;
1597                 print_tail = &printp->next;
1598 
1599 nextfs:
1600                 vfsp = (zone == global_zone) ?
1601                     vfsp->vfs_next : vfsp->vfs_zone_next;
1602 
1603         } while (vfsp != vfslist);
1604 
1605         vfs_list_unlock();
1606 
1607         mnt_id = root_id + 1;
1608 
1609         /*
1610          * now we can run through what we've extracted without holding
1611          * vfs_list_read_lock()
1612          */
1613         printp = print_head;
1614         while (printp != NULL) {
1615                 struct print_data *printp_next;
1616                 const char *resource;
1617                 char *mntpt;
1618                 struct vnode *vp;
1619                 int error;
1620 
1621                 mntpt = (char *)refstr_value(printp->vfs_mntpt);
1622                 resource = refstr_value(printp->vfs_resource);
1623 
1624                 if (mntpt != NULL && mntpt[0] != '\0')
1625                         mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
1626                 else
1627                         mntpt = "-";
1628 
1629                 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
1630 
1631                 if (error != 0)
1632                         goto nextp;
1633 
1634                 if (!(vp->v_flag & VROOT)) {
1635                         VN_RELE(vp);
1636                         goto nextp;
1637                 }
1638                 VN_RELE(vp);
1639 
1640                 if (resource != NULL && resource[0] != '\0') {
1641                         if (resource[0] == '/') {
1642                                 resource = ZONE_PATH_VISIBLE(resource, zone) ?
1643                                     ZONE_PATH_TRANSLATE(resource, zone) : mntpt;
1644                         }
1645                 } else {
1646                         resource = "none";
1647                 }
1648 
1649                 /*
1650                  * XXX parent ID is not tracked correctly here. Currently we
1651                  * always assume the parent ID is the root ID.
1652                  */
1653                 lxpr_uiobuf_printf(uiobuf,
1654                     "%d %d %d:%d / %s %s - %s %s %s\n",
1655                     mnt_id, root_id,
1656                     major(printp->vfs_dev), minor(printp->vfs_dev),
1657                     mntpt,
1658                     printp->vfs_flag & VFS_RDONLY ? "ro" : "rw",
1659                     vfssw[printp->vfs_fstype].vsw_name,
1660                     resource,
1661                     printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1662 
1663 nextp:
1664                 printp_next = printp->next;
1665                 refstr_rele(printp->vfs_mntpt);
1666                 refstr_rele(printp->vfs_resource);
1667                 kmem_free(printp, sizeof (*printp));
1668                 printp = printp_next;
1669 
1670                 mnt_id++;
1671         }
1672 }
1673 
1674 /*
1675  * lxpr_read_pid_oom_scr_adj(): read oom_score_adj for process
1676  */
1677 static void
1678 lxpr_read_pid_oom_scr_adj(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1679 {
1680         proc_t *p;
1681 
1682         ASSERT(lxpnp->lxpr_type == LXPR_PID_OOM_SCR_ADJ ||
1683             lxpnp->lxpr_type == LXPR_PID_TID_OOM_SCR_ADJ);
1684 
1685         p = lxpr_lock(lxpnp->lxpr_pid);
1686         if (p == NULL) {
1687                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1688                 return;
1689         }
1690 
1691         /* always 0 */
1692         lxpr_uiobuf_printf(uiobuf, "0\n");
1693 
1694         lxpr_unlock(p);
1695 }
1696 
1697 
1698 /*
1699  * lxpr_read_pid_statm(): memory status file
1700  */
1701 static void
1702 lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1703 {
1704         proc_t *p;
1705         struct as *as;
1706         size_t vsize;
1707         size_t rss;
1708 
1709         ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM ||
1710             lxpnp->lxpr_type == LXPR_PID_TID_STATM);
1711 
1712         p = lxpr_lock(lxpnp->lxpr_pid);
1713         if (p == NULL) {
1714                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1715                 return;
1716         }
1717 
1718         as = p->p_as;
1719 
1720         mutex_exit(&p->p_lock);
1721 
1722         AS_LOCK_ENTER(as, RW_READER);
1723         vsize = btopr(as->a_resvsize);
1724         rss = rm_asrss(as);
1725         AS_LOCK_EXIT(as);
1726 
1727         mutex_enter(&p->p_lock);
1728         lxpr_unlock(p);
1729 
1730         lxpr_uiobuf_printf(uiobuf,
1731             "%lu %lu %lu %lu %lu %lu %lu\n",
1732             vsize, rss, 0l, rss, 0l, 0l, 0l);
1733 }
1734 
1735 /*
1736  * Look for either the main thread (lookup_id is 0) or the specified thread.
1737  * If we're looking for the main thread but the proc does not have one, we
1738  * fallback to using prchoose to get any thread available.
1739  */
1740 static kthread_t *
1741 lxpr_get_thread(proc_t *p, uint_t lookup_id)
1742 {
1743         kthread_t *t;
1744         uint_t emul_tid;
1745         lx_lwp_data_t *lwpd;
1746         pid_t pid = p->p_pid;
1747         pid_t init_pid = curproc->p_zone->zone_proc_initpid;
1748         boolean_t branded = (p->p_brand == &lx_brand);
1749 
1750         /* get specified thread  */
1751         if ((t = p->p_tlist) == NULL)
1752                 return (NULL);
1753 
1754         do {
1755                 if (lookup_id == 0 && t->t_tid == 1) {
1756                         thread_lock(t);
1757                         return (t);
1758                 }
1759 
1760                 lwpd = ttolxlwp(t);
1761                 if (branded && lwpd != NULL) {
1762                         if (pid == init_pid && lookup_id == 1) {
1763                                 emul_tid = t->t_tid;
1764                         } else {
1765                                 emul_tid = lwpd->br_pid;
1766                         }
1767                 } else {
1768                         /*
1769                          * Make only the first (assumed to be main) thread
1770                          * visible for non-branded processes.
1771                          */
1772                         emul_tid = p->p_pid;
1773                 }
1774                 if (emul_tid == lookup_id) {
1775                         thread_lock(t);
1776                         return (t);
1777                 }
1778         } while ((t = t->t_forw) != p->p_tlist);
1779 
1780         if (lookup_id == 0)
1781                 return (prchoose(p));
1782         return (NULL);
1783 }
1784 
1785 /*
1786  * Lookup the real pid for procs 0 or 1.
1787  */
1788 static pid_t
1789 get_real_pid(pid_t p)
1790 {
1791         pid_t find_pid;
1792 
1793         if (p == 1) {
1794                 find_pid = curproc->p_zone->zone_proc_initpid;
1795         } else if (p == 0) {
1796                 find_pid = curproc->p_zone->zone_zsched->p_pid;
1797         } else {
1798                 find_pid = p;
1799         }
1800 
1801         return (find_pid);
1802 }
1803 
1804 /*
1805  * pid/tid common code to read status file
1806  */
1807 static void
1808 lxpr_read_status_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf,
1809     uint_t lookup_id)
1810 {
1811         proc_t *p;
1812         kthread_t *t;
1813         user_t *up;
1814         cred_t *cr;
1815         const gid_t *groups;
1816         int    ngroups;
1817         struct as *as;
1818         char *status;
1819         pid_t pid, ppid;
1820         k_sigset_t current, ignore, handle;
1821         int    i, lx_sig;
1822         pid_t real_pid;
1823 
1824         real_pid = get_real_pid(lxpnp->lxpr_pid);
1825         p = lxpr_lock(real_pid);
1826         if (p == NULL) {
1827                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1828                 return;
1829         }
1830 
1831         pid = p->p_pid;
1832 
1833         /*
1834          * Convert pid to the Linux default of 1 if we're the zone's init
1835          * process or if we're the zone's zsched the pid is 0.
1836          */
1837         if (pid == curproc->p_zone->zone_proc_initpid) {
1838                 pid = 1;
1839                 ppid = 0;       /* parent pid for init is 0 */
1840         } else if (pid == curproc->p_zone->zone_zsched->p_pid) {
1841                 pid = 0;        /* zsched is pid 0 */
1842                 ppid = 0;       /* parent pid for zsched is itself */
1843         } else {
1844                 /*
1845                  * Make sure not to reference parent PIDs that reside outside
1846                  * the zone
1847                  */
1848                 ppid = ((p->p_flag & SZONETOP)
1849                     ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
1850 
1851                 /*
1852                  * Convert ppid to the Linux default of 1 if our parent is the
1853                  * zone's init process
1854                  */
1855                 if (ppid == curproc->p_zone->zone_proc_initpid)
1856                         ppid = 1;
1857         }
1858 
1859         t = lxpr_get_thread(p, lookup_id);
1860         if (t != NULL) {
1861                 switch (t->t_state) {
1862                 case TS_SLEEP:
1863                         status = "S (sleeping)";
1864                         break;
1865                 case TS_RUN:
1866                 case TS_ONPROC:
1867                         status = "R (running)";
1868                         break;
1869                 case TS_ZOMB:
1870                         status = "Z (zombie)";
1871                         break;
1872                 case TS_STOPPED:
1873                         status = "T (stopped)";
1874                         break;
1875                 default:
1876                         status = "! (unknown)";
1877                         break;
1878                 }
1879                 thread_unlock(t);
1880         } else {
1881                 if (lookup_id != 0) {
1882                         /* we can't find this specific thread */
1883                         lxpr_uiobuf_seterr(uiobuf, EINVAL);
1884                         lxpr_unlock(p);
1885                         return;
1886                 }
1887 
1888                 /*
1889                  * there is a hole in the exit code, where a proc can have
1890                  * no threads but it is yet to be flagged SZOMB. We will
1891                  * assume we are about to become a zombie
1892                  */
1893                 status = "Z (zombie)";
1894         }
1895 
1896         up = PTOU(p);
1897         mutex_enter(&p->p_crlock);
1898         crhold(cr = p->p_cred);
1899         mutex_exit(&p->p_crlock);
1900 
1901         lxpr_uiobuf_printf(uiobuf,
1902             "Name:\t%s\n"
1903             "State:\t%s\n"
1904             "Tgid:\t%d\n"
1905             "Pid:\t%d\n"
1906             "PPid:\t%d\n"
1907             "TracerPid:\t%d\n"
1908             "Uid:\t%u\t%u\t%u\t%u\n"
1909             "Gid:\t%u\t%u\t%u\t%u\n"
1910             "FDSize:\t%d\n"
1911             "Groups:\t",
1912             up->u_comm,
1913             status,
1914             pid, /* thread group id - same as pid */
1915             (lookup_id == 0) ? pid : lxpnp->lxpr_desc,
1916             ppid,
1917             0,
1918             crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
1919             crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
1920             p->p_fno_ctl);
1921 
1922 
1923         ngroups = crgetngroups(cr);
1924         groups  = crgetgroups(cr);
1925         for (i = 0; i < ngroups; i++) {
1926                 lxpr_uiobuf_printf(uiobuf,
1927                     "%u ",
1928                     groups[i]);
1929         }
1930         crfree(cr);
1931 
1932         as = p->p_as;
1933         if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) {
1934                 size_t vsize, nlocked, rss;
1935 
1936                 mutex_exit(&p->p_lock);
1937                 AS_LOCK_ENTER(as, RW_READER);
1938                 vsize = as->a_resvsize;
1939                 rss = rm_asrss(as);
1940                 AS_LOCK_EXIT(as);
1941                 mutex_enter(&p->p_lock);
1942                 nlocked = p->p_locked_mem;
1943 
1944                 lxpr_uiobuf_printf(uiobuf,
1945                     "\n"
1946                     "VmSize:\t%8lu kB\n"
1947                     "VmLck:\t%8lu kB\n"
1948                     "VmRSS:\t%8lu kB\n"
1949                     "VmData:\t%8lu kB\n"
1950                     "VmStk:\t%8lu kB\n"
1951                     "VmExe:\t%8lu kB\n"
1952                     "VmLib:\t%8lu kB",
1953                     btok(vsize),
1954                     btok(nlocked),
1955                     ptok(rss),
1956                     0l,
1957                     btok(p->p_stksize),
1958                     ptok(rss),
1959                     0l);
1960         }
1961 
1962         lxpr_uiobuf_printf(uiobuf, "\nThreads:\t%u", p->p_lwpcnt);
1963 
1964         sigemptyset(&current);
1965         sigemptyset(&ignore);
1966         sigemptyset(&handle);
1967 
1968         for (i = 1; i < NSIG; i++) {
1969                 lx_sig = stol_signo[i];
1970 
1971                 if ((lx_sig > 0) && (lx_sig <= LX_NSIG)) {
1972                         if (sigismember(&p->p_sig, i))
1973                                 sigaddset(&current, lx_sig);
1974 
1975                         if (up->u_signal[i - 1] == SIG_IGN)
1976                                 sigaddset(&ignore, lx_sig);
1977                         else if (up->u_signal[i - 1] != SIG_DFL)
1978                                 sigaddset(&handle, lx_sig);
1979                 }
1980         }
1981 
1982         lxpr_uiobuf_printf(uiobuf,
1983             "\n"
1984             "SigPnd:\t%08x%08x\n"
1985             "SigBlk:\t%08x%08x\n"
1986             "SigIgn:\t%08x%08x\n"
1987             "SigCgt:\t%08x%08x\n"
1988             "CapInh:\t%016x\n"
1989             "CapPrm:\t%016x\n"
1990             "CapEff:\t%016x\n",
1991             current.__sigbits[1], current.__sigbits[0],
1992             0, 0, /* signals blocked on per thread basis */
1993             ignore.__sigbits[1], ignore.__sigbits[0],
1994             handle.__sigbits[1], handle.__sigbits[0],
1995             /* Can't do anything with linux capabilities */
1996             0,
1997             0,
1998             0);
1999 
2000         lxpr_uiobuf_printf(uiobuf,
2001             "CapBnd:\t%016llx\n",
2002             /* We report the full capability bounding set */
2003             0x1fffffffffLL);
2004 
2005         lxpr_unlock(p);
2006 }
2007 
2008 /*
2009  * lxpr_read_pid_status(): status file
2010  */
2011 static void
2012 lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2013 {
2014         ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
2015 
2016         lxpr_read_status_common(lxpnp, uiobuf, 0);
2017 }
2018 
2019 /*
2020  * lxpr_read_pid_tid_status(): status file
2021  */
2022 static void
2023 lxpr_read_pid_tid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2024 {
2025         ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STATUS);
2026         lxpr_read_status_common(lxpnp, uiobuf, lxpnp->lxpr_desc);
2027 }
2028 
2029 /*
2030  * pid/tid common code to read stat file
2031  */
2032 static void
2033 lxpr_read_stat_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf,
2034     uint_t lookup_id)
2035 {
2036         proc_t *p;
2037         kthread_t *t;
2038         struct as *as;
2039         char stat;
2040         pid_t pid, ppid, pgpid, spid;
2041         gid_t psgid;
2042         dev_t psdev;
2043         size_t rss, vsize;
2044         int nice, pri;
2045         caddr_t wchan;
2046         processorid_t cpu;
2047         pid_t real_pid;
2048 
2049         real_pid = get_real_pid(lxpnp->lxpr_pid);
2050         p = lxpr_lock(real_pid);
2051         if (p == NULL) {
2052                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
2053                 return;
2054         }
2055 
2056         pid = p->p_pid;
2057 
2058         /*
2059          * Set Linux defaults if we're the zone's init process
2060          */
2061         if (pid == curproc->p_zone->zone_proc_initpid) {
2062                 pid = 1;                /* PID for init */
2063                 ppid = 0;               /* parent PID for init is 0 */
2064                 pgpid = 0;              /* process group for init is 0 */
2065                 psgid = (gid_t)-1;      /* credential GID for init is -1 */
2066                 spid = 0;               /* session id for init is 0 */
2067                 psdev = 0;              /* session device for init is 0 */
2068         } else if (pid == curproc->p_zone->zone_zsched->p_pid) {
2069                 pid = 0;                /* PID for zsched */
2070                 ppid = 0;               /* parent PID for zsched is 0 */
2071                 pgpid = 0;              /* process group for zsched is 0 */
2072                 psgid = (gid_t)-1;      /* credential GID for zsched is -1 */
2073                 spid = 0;               /* session id for zsched is 0 */
2074                 psdev = 0;              /* session device for zsched is 0 */
2075         } else {
2076                 /*
2077                  * Make sure not to reference parent PIDs that reside outside
2078                  * the zone
2079                  */
2080                 ppid = ((p->p_flag & SZONETOP) ?
2081                     curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
2082 
2083                 /*
2084                  * Convert ppid to the Linux default of 1 if our parent is the
2085                  * zone's init process
2086                  */
2087                 if (ppid == curproc->p_zone->zone_proc_initpid)
2088                         ppid = 1;
2089 
2090                 pgpid = p->p_pgrp;
2091 
2092                 mutex_enter(&p->p_splock);
2093                 mutex_enter(&p->p_sessp->s_lock);
2094                 spid = p->p_sessp->s_sid;
2095                 psdev = p->p_sessp->s_dev;
2096                 if (p->p_sessp->s_cred)
2097                         psgid = crgetgid(p->p_sessp->s_cred);
2098                 else
2099                         psgid = crgetgid(p->p_cred);
2100 
2101                 mutex_exit(&p->p_sessp->s_lock);
2102                 mutex_exit(&p->p_splock);
2103         }
2104 
2105         t = lxpr_get_thread(p, lookup_id);
2106         if (t != NULL) {
2107                 switch (t->t_state) {
2108                 case TS_SLEEP:
2109                         stat = 'S'; break;
2110                 case TS_RUN:
2111                 case TS_ONPROC:
2112                         stat = 'R'; break;
2113                 case TS_ZOMB:
2114                         stat = 'Z'; break;
2115                 case TS_STOPPED:
2116                         stat = 'T'; break;
2117                 default:
2118                         stat = '!'; break;
2119                 }
2120 
2121                 if (CL_DONICE(t, NULL, 0, &nice) != 0)
2122                         nice = 0;
2123 
2124                 pri = t->t_pri;
2125                 wchan = t->t_wchan;
2126                 cpu = t->t_cpu->cpu_id;
2127                 thread_unlock(t);
2128         } else {
2129                 if (lookup_id != 0) {
2130                         /* we can't find this specific thread */
2131                         lxpr_uiobuf_seterr(uiobuf, EINVAL);
2132                         lxpr_unlock(p);
2133                         return;
2134                 }
2135 
2136                 /* Only zombies have no threads */
2137                 stat = 'Z';
2138                 nice = 0;
2139                 pri = 0;
2140                 wchan = 0;
2141                 cpu = 0;
2142         }
2143         as = p->p_as;
2144         mutex_exit(&p->p_lock);
2145         AS_LOCK_ENTER(as, RW_READER);
2146         vsize = as->a_resvsize;
2147         rss = rm_asrss(as);
2148         AS_LOCK_EXIT(as);
2149         mutex_enter(&p->p_lock);
2150 
2151         lxpr_uiobuf_printf(uiobuf,
2152             "%d (%s) %c %d %d %d %d %d "
2153             "%lu %lu %lu %lu %lu "
2154             "%lu %lu %ld %ld "
2155             "%d %d %d "
2156             "%lu "
2157             "%lu "
2158             "%lu %ld %llu "
2159             "%lu %lu %u "
2160             "%lu %lu "
2161             "%lu %lu %lu %lu "
2162             "%lu "
2163             "%lu %lu "
2164             "%d "
2165             "%d"
2166             "\n",
2167             (lookup_id == 0) ? pid : lxpnp->lxpr_desc,
2168             PTOU(p)->u_comm, stat, ppid, pgpid, spid, psdev, psgid,
2169             0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
2170             p->p_utime, p->p_stime, p->p_cutime, p->p_cstime,
2171             pri, nice, p->p_lwpcnt,
2172             0l, /* itrealvalue (time before next SIGALRM) */
2173             PTOU(p)->u_ticks,
2174             vsize, rss, p->p_vmem_ctl,
2175             0l, 0l, USRSTACK, /* startcode, endcode, startstack */
2176             0l, 0l, /* kstkesp, kstkeip */
2177             0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */
2178             wchan,
2179             0l, 0l, /* nswap, cnswap */
2180             0, /* exit_signal */
2181             cpu);
2182 
2183         lxpr_unlock(p);
2184 }
2185 
2186 /*
2187  * lxpr_read_pid_stat(): pid stat file
2188  */
2189 static void
2190 lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2191 {
2192         ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
2193 
2194         lxpr_read_stat_common(lxpnp, uiobuf, 0);
2195 }
2196 
2197 /*
2198  * lxpr_read_pid_tid_stat(): pid stat file
2199  */
2200 static void
2201 lxpr_read_pid_tid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2202 {
2203         ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STAT);
2204         lxpr_read_stat_common(lxpnp, uiobuf, lxpnp->lxpr_desc);
2205 }
2206 
2207 /* ARGSUSED */
2208 static void
2209 lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2210 {
2211 }
2212 
2213 struct lxpr_ifstat {
2214         uint64_t rx_bytes;
2215         uint64_t rx_packets;
2216         uint64_t rx_errors;
2217         uint64_t rx_drop;
2218         uint64_t tx_bytes;
2219         uint64_t tx_packets;
2220         uint64_t tx_errors;
2221         uint64_t tx_drop;
2222         uint64_t collisions;
2223         uint64_t rx_multicast;
2224 };
2225 
2226 static void *
2227 lxpr_kstat_read(kstat_t *kn, boolean_t byname, size_t *size, int *num)
2228 {
2229         kstat_t *kp;
2230         int i, nrec = 0;
2231         size_t bufsize;
2232         void *buf = NULL;
2233 
2234         if (byname == B_TRUE) {
2235                 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance,
2236                     kn->ks_name, getzoneid());
2237         } else {
2238                 kp = kstat_hold_bykid(kn->ks_kid, getzoneid());
2239         }
2240         if (kp == NULL) {
2241                 return (NULL);
2242         }
2243         if (kp->ks_flags & KSTAT_FLAG_INVALID) {
2244                 kstat_rele(kp);
2245                 return (NULL);
2246         }
2247 
2248         bufsize = kp->ks_data_size + 1;
2249         kstat_rele(kp);
2250 
2251         /*
2252          * The kstat in question is released so that kmem_alloc(KM_SLEEP) is
2253          * performed without it held.  After the alloc, the kstat is reacquired
2254          * and its size is checked again. If the buffer is no longer large
2255          * enough, the alloc and check are repeated up to three times.
2256          */
2257         for (i = 0; i < 2; i++) {
2258                 buf = kmem_alloc(bufsize, KM_SLEEP);
2259 
2260                 /* Check if bufsize still appropriate */
2261                 if (byname == B_TRUE) {
2262                         kp = kstat_hold_byname(kn->ks_module, kn->ks_instance,
2263                             kn->ks_name, getzoneid());
2264                 } else {
2265                         kp = kstat_hold_bykid(kn->ks_kid, getzoneid());
2266                 }
2267                 if (kp == NULL || kp->ks_flags & KSTAT_FLAG_INVALID) {
2268                         if (kp != NULL) {
2269                                 kstat_rele(kp);
2270                         }
2271                         kmem_free(buf, bufsize);
2272                         return (NULL);
2273                 }
2274                 KSTAT_ENTER(kp);
2275                 (void) KSTAT_UPDATE(kp, KSTAT_READ);
2276                 if (bufsize < kp->ks_data_size) {
2277                         kmem_free(buf, bufsize);
2278                         buf = NULL;
2279                         bufsize = kp->ks_data_size + 1;
2280                         KSTAT_EXIT(kp);
2281                         kstat_rele(kp);
2282                         continue;
2283                 } else {
2284                         if (KSTAT_SNAPSHOT(kp, buf, KSTAT_READ) != 0) {
2285                                 kmem_free(buf, bufsize);
2286                                 buf = NULL;
2287                         }
2288                         nrec = kp->ks_ndata;
2289                         KSTAT_EXIT(kp);
2290                         kstat_rele(kp);
2291                         break;
2292                 }
2293         }
2294 
2295         if (buf != NULL) {
2296                 *size = bufsize;
2297                 *num = nrec;
2298         }
2299         return (buf);
2300 }
2301 
2302 static int
2303 lxpr_kstat_ifstat(kstat_t *kn, struct lxpr_ifstat *ifs)
2304 {
2305         kstat_named_t *kp;
2306         int i, num;
2307         size_t size;
2308 
2309         /*
2310          * Search by name instead of by kid since there's a small window to
2311          * race against kstats being added/removed.
2312          */
2313         bzero(ifs, sizeof (*ifs));
2314         kp = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num);
2315         if (kp == NULL)
2316                 return (-1);
2317         for (i = 0; i < num; i++) {
2318                 if (strncmp(kp[i].name, "rbytes64", KSTAT_STRLEN) == 0)
2319                         ifs->rx_bytes = kp[i].value.ui64;
2320                 else if (strncmp(kp[i].name, "ipackets64", KSTAT_STRLEN) == 0)
2321                         ifs->rx_packets = kp[i].value.ui64;
2322                 else if (strncmp(kp[i].name, "ierrors", KSTAT_STRLEN) == 0)
2323                         ifs->rx_errors = kp[i].value.ui32;
2324                 else if (strncmp(kp[i].name, "norcvbuf", KSTAT_STRLEN) == 0)
2325                         ifs->rx_drop = kp[i].value.ui32;
2326                 else if (strncmp(kp[i].name, "multircv", KSTAT_STRLEN) == 0)
2327                         ifs->rx_multicast = kp[i].value.ui32;
2328                 else if (strncmp(kp[i].name, "obytes64", KSTAT_STRLEN) == 0)
2329                         ifs->tx_bytes = kp[i].value.ui64;
2330                 else if (strncmp(kp[i].name, "opackets64", KSTAT_STRLEN) == 0)
2331                         ifs->tx_packets = kp[i].value.ui64;
2332                 else if (strncmp(kp[i].name, "oerrors", KSTAT_STRLEN) == 0)
2333                         ifs->tx_errors = kp[i].value.ui32;
2334                 else if (strncmp(kp[i].name, "noxmtbuf", KSTAT_STRLEN) == 0)
2335                         ifs->tx_drop = kp[i].value.ui32;
2336                 else if (strncmp(kp[i].name, "collisions", KSTAT_STRLEN) == 0)
2337                         ifs->collisions = kp[i].value.ui32;
2338         }
2339         kmem_free(kp, size);
2340         return (0);
2341 }
2342 
2343 /* ARGSUSED */
2344 static void
2345 lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2346 {
2347         kstat_t *ksr;
2348         kstat_t ks0;
2349         int i, nidx;
2350         size_t sidx;
2351         struct lxpr_ifstat ifs;
2352 
2353         lxpr_uiobuf_printf(uiobuf, "Inter-|   Receive                   "
2354             "                             |  Transmit\n");
2355         lxpr_uiobuf_printf(uiobuf, " face |bytes    packets errs drop fifo"
2356             " frame compressed multicast|bytes    packets errs drop fifo"
2357             " colls carrier compressed\n");
2358 
2359         ks0.ks_kid = 0;
2360         ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
2361         if (ksr == NULL)
2362                 return;
2363 
2364         for (i = 1; i < nidx; i++) {
2365                 if (strncmp(ksr[i].ks_module, "link", KSTAT_STRLEN) == 0 ||
2366                     strncmp(ksr[i].ks_module, "lo", KSTAT_STRLEN) == 0) {
2367                         if (lxpr_kstat_ifstat(&ksr[i], &ifs) != 0)
2368                                 continue;
2369 
2370                         /* Overwriting the name is ok in the local snapshot */
2371                         lx_ifname_convert(ksr[i].ks_name, LX_IF_FROMNATIVE);
2372                         lxpr_uiobuf_printf(uiobuf, "%6s: %7llu %7llu %4lu "
2373                             "%4lu %4u %5u %10u %9lu %8llu %7llu %4lu %4lu %4u "
2374                             "%5lu %7u %10u\n",
2375                             ksr[i].ks_name,
2376                             ifs.rx_bytes, ifs.rx_packets,
2377                             ifs.rx_errors, ifs.rx_drop,
2378                             0, 0, 0, ifs.rx_multicast,
2379                             ifs.tx_bytes, ifs.tx_packets,
2380                             ifs.tx_errors, ifs.tx_drop,
2381                             0, ifs.collisions, 0, 0);
2382                 }
2383         }
2384 
2385         kmem_free(ksr, sidx);
2386 }
2387 
2388 /* ARGSUSED */
2389 static void
2390 lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2391 {
2392 }
2393 
2394 static void
2395 lxpr_inet6_out(const in6_addr_t *addr, char buf[33])
2396 {
2397         const uint8_t *ip = addr->s6_addr;
2398         char digits[] = "0123456789abcdef";
2399         int i;
2400         for (i = 0; i < 16; i++) {
2401                 buf[2 * i] = digits[ip[i] >> 4];
2402                 buf[2 * i + 1] = digits[ip[i] & 0xf];
2403         }
2404         buf[32] = '\0';
2405 }
2406 
2407 /* ARGSUSED */
2408 static void
2409 lxpr_read_net_if_inet6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2410 {
2411         netstack_t *ns;
2412         ip_stack_t *ipst;
2413         ill_t *ill;
2414         ipif_t *ipif;
2415         ill_walk_context_t      ctx;
2416         char ifname[LIFNAMSIZ], ip6out[33];
2417 
2418         ns = netstack_get_current();
2419         if (ns == NULL)
2420                 return;
2421         ipst = ns->netstack_ip;
2422 
2423         rw_enter(&ipst->ips_ill_g_lock, RW_READER);
2424         ill = ILL_START_WALK_V6(&ctx, ipst);
2425 
2426         for (; ill != NULL; ill = ill_next(&ctx, ill)) {
2427                 for (ipif = ill->ill_ipif; ipif != NULL;
2428                     ipif = ipif->ipif_next) {
2429                         uint_t index = ill->ill_phyint->phyint_ifindex;
2430                         int plen = ip_mask_to_plen_v6(&ipif->ipif_v6net_mask);
2431                         unsigned int scope = lx_ipv6_scope_convert(
2432                             &ipif->ipif_v6lcl_addr);
2433                         /* Always report PERMANENT flag */
2434                         int flag = 0x80;
2435 
2436                         (void) snprintf(ifname, LIFNAMSIZ, "%s", ill->ill_name);
2437                         lx_ifname_convert(ifname, LX_IF_FROMNATIVE);
2438                         lxpr_inet6_out(&ipif->ipif_v6lcl_addr, ip6out);
2439 
2440                         lxpr_uiobuf_printf(uiobuf, "%32s %02x %02x %02x %02x"
2441                             " %8s\n", ip6out, index, plen, scope, flag, ifname);
2442                 }
2443         }
2444         rw_exit(&ipst->ips_ill_g_lock);
2445         netstack_rele(ns);
2446 }
2447 
2448 /* ARGSUSED */
2449 static void
2450 lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2451 {
2452 }
2453 
2454 /* ARGSUSED */
2455 static void
2456 lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2457 {
2458 }
2459 
2460 /* ARGSUSED */
2461 static void
2462 lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2463 {
2464 }
2465 
2466 static void
2467 lxpr_format_route_ipv6(ire_t *ire, lxpr_uiobuf_t *uiobuf)
2468 {
2469         uint32_t flags;
2470         char name[IFNAMSIZ];
2471         char ipv6addr[33];
2472 
2473         lxpr_inet6_out(&ire->ire_addr_v6, ipv6addr);
2474         lxpr_uiobuf_printf(uiobuf, "%s %02x ", ipv6addr,
2475             ip_mask_to_plen_v6(&ire->ire_mask_v6));
2476 
2477         /* punt on this for now */
2478         lxpr_uiobuf_printf(uiobuf, "%s %02x ",
2479             "00000000000000000000000000000000", 0);
2480 
2481         lxpr_inet6_out(&ire->ire_gateway_addr_v6, ipv6addr);
2482         lxpr_uiobuf_printf(uiobuf, "%s", ipv6addr);
2483 
2484         flags = ire->ire_flags &
2485             (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
2486         /* Linux's RTF_LOCAL equivalent */
2487         if (ire->ire_metrics.iulp_local)
2488                 flags |= 0x80000000;
2489 
2490         if (ire->ire_ill != NULL) {
2491                 ill_get_name(ire->ire_ill, name, sizeof (name));
2492                 lx_ifname_convert(name, LX_IF_FROMNATIVE);
2493         } else {
2494                 name[0] = '\0';
2495         }
2496 
2497         lxpr_uiobuf_printf(uiobuf, " %08x %08x %08x %08x %8s\n",
2498             0, /* metric */
2499             ire->ire_refcnt,
2500             0,
2501             flags,
2502             name);
2503 }
2504 
2505 /* ARGSUSED */
2506 static void
2507 lxpr_read_net_ipv6_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2508 {
2509         netstack_t *ns;
2510         ip_stack_t *ipst;
2511 
2512         ns = netstack_get_current();
2513         if (ns == NULL)
2514                 return;
2515         ipst = ns->netstack_ip;
2516 
2517         /*
2518          * LX branded zones are expected to have exclusive IP stack, hence
2519          * using ALL_ZONES as the zoneid filter.
2520          */
2521         ire_walk_v6(&lxpr_format_route_ipv6, uiobuf, ALL_ZONES, ipst);
2522 
2523         netstack_rele(ns);
2524 }
2525 
2526 /* ARGSUSED */
2527 static void
2528 lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2529 {
2530 }
2531 
2532 /* ARGSUSED */
2533 static void
2534 lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2535 {
2536 }
2537 
2538 /* ARGSUSED */
2539 static void
2540 lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2541 {
2542 }
2543 
2544 #define LXPR_SKIP_ROUTE(type)   \
2545         (((IRE_IF_CLONE | IRE_BROADCAST | IRE_MULTICAST | \
2546         IRE_NOROUTE | IRE_LOOPBACK | IRE_LOCAL) & type) != 0)
2547 
2548 static void
2549 lxpr_format_route_ipv4(ire_t *ire, lxpr_uiobuf_t *uiobuf)
2550 {
2551         uint32_t flags;
2552         char name[IFNAMSIZ];
2553         ill_t *ill;
2554         ire_t *nire;
2555         ipif_t *ipif;
2556         ipaddr_t gateway;
2557 
2558         if (LXPR_SKIP_ROUTE(ire->ire_type) || ire->ire_testhidden != 0)
2559                 return;
2560 
2561         /* These route flags have direct Linux equivalents */
2562         flags = ire->ire_flags &
2563             (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
2564 
2565         /*
2566          * Search for a suitable IRE for naming purposes.
2567          * On Linux, the default route is typically associated with the
2568          * interface used to access gateway.  The default IRE on Illumos
2569          * typically lacks an ill reference but its parent might have one.
2570          */
2571         nire = ire;
2572         do {
2573                 ill = nire->ire_ill;
2574                 nire = nire->ire_dep_parent;
2575         } while (ill == NULL && nire != NULL);
2576         if (ill != NULL) {
2577                 ill_get_name(ill, name, sizeof (name));
2578                 lx_ifname_convert(name, LX_IF_FROMNATIVE);
2579         } else {
2580                 name[0] = '*';
2581                 name[1] = '\0';
2582         }
2583 
2584         /*
2585          * Linux suppresses the gateway address for directly connected
2586          * interface networks.  To emulate this behavior, we walk all addresses
2587          * of a given route interface.  If one matches the gateway, it is
2588          * displayed as NULL.
2589          */
2590         gateway = ire->ire_gateway_addr;
2591         if ((ill = ire->ire_ill) != NULL) {
2592                 for (ipif = ill->ill_ipif; ipif != NULL;
2593                     ipif = ipif->ipif_next) {
2594                         if (ipif->ipif_lcl_addr == gateway) {
2595                                 gateway = 0;
2596                                 break;
2597                         }
2598                 }
2599         }
2600 
2601         lxpr_uiobuf_printf(uiobuf, "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2602             "%d\t%08X\t%d\t%u\t%u\n",
2603             name,
2604             ire->ire_addr,
2605             gateway,
2606             flags, 0, 0,
2607             0, /* priority */
2608             ire->ire_mask,
2609             0, 0, /* mss, window */
2610             ire->ire_metrics.iulp_rtt);
2611 }
2612 
2613 /* ARGSUSED */
2614 static void
2615 lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2616 {
2617         netstack_t *ns;
2618         ip_stack_t *ipst;
2619 
2620         lxpr_uiobuf_printf(uiobuf, "Iface\tDestination\tGateway \tFlags\t"
2621             "RefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n");
2622 
2623         ns = netstack_get_current();
2624         if (ns == NULL)
2625                 return;
2626         ipst = ns->netstack_ip;
2627 
2628         /*
2629          * LX branded zones are expected to have exclusive IP stack, hence
2630          * using ALL_ZONES as the zoneid filter.
2631          */
2632         ire_walk_v4(&lxpr_format_route_ipv4, uiobuf, ALL_ZONES, ipst);
2633 
2634         netstack_rele(ns);
2635 }
2636 
2637 /* ARGSUSED */
2638 static void
2639 lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2640 {
2641 }
2642 
2643 /* ARGSUSED */
2644 static void
2645 lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2646 {
2647 }
2648 
2649 /* ARGSUSED */
2650 static void
2651 lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2652 {
2653 }
2654 
2655 typedef struct lxpr_snmp_table {
2656         const char *lst_proto;
2657         const char *lst_fields[];
2658 } lxpr_snmp_table_t;
2659 
2660 static lxpr_snmp_table_t lxpr_snmp_ip = { "ip",
2661         {
2662         "forwarding", "defaultTTL", "inReceives", "inHdrErrors",
2663         "inAddrErrors", "forwDatagrams", "inUnknownProtos", "inDiscards",
2664         "inDelivers", "outRequests", "outDiscards", "outNoRoutes",
2665         "reasmTimeout", "reasmReqds", "reasmOKs", "reasmFails", "fragOKs",
2666         "fragFails", "fragCreates",
2667         NULL
2668         }
2669 };
2670 static lxpr_snmp_table_t lxpr_snmp_icmp = { "icmp",
2671         {
2672         "inMsgs", "inErrors", "inCsumErrors", "inDestUnreachs", "inTimeExcds",
2673         "inParmProbs", "inSrcQuenchs", "inRedirects", "inEchos", "inEchoReps",
2674         "inTimestamps", "inTimestampReps", "inAddrMasks", "inAddrMaskReps",
2675         "outMsgs", "outErrors", "outDestUnreachs", "outTimeExcds",
2676         "outParmProbs", "outSrcQuenchs", "outRedirects", "outEchos",
2677         "outEchoReps", "outTimestamps", "outTimestampReps", "outAddrMasks",
2678         "outAddrMaskReps",
2679         NULL
2680         }
2681 };
2682 static lxpr_snmp_table_t lxpr_snmp_tcp = { "tcp",
2683         {
2684         "rtoAlgorithm", "rtoMin", "rtoMax", "maxConn", "activeOpens",
2685         "passiveOpens", "attemptFails", "estabResets", "currEstab", "inSegs",
2686         "outSegs", "retransSegs", "inErrs", "outRsts", "inCsumErrors",
2687         NULL
2688         }
2689 };
2690 static lxpr_snmp_table_t lxpr_snmp_udp = { "udp",
2691         {
2692         "inDatagrams", "noPorts", "inErrors", "outDatagrams", "rcvbufErrors",
2693         "sndbufErrors", "inCsumErrors",
2694         NULL
2695         }
2696 };
2697 
2698 static lxpr_snmp_table_t *lxpr_net_snmptab[] = {
2699         &lxpr_snmp_ip,
2700         &lxpr_snmp_icmp,
2701         &lxpr_snmp_tcp,
2702         &lxpr_snmp_udp,
2703         NULL
2704 };
2705 
2706 static void
2707 lxpr_kstat_print_tab(lxpr_uiobuf_t *uiobuf, lxpr_snmp_table_t *table,
2708     kstat_t *kn)
2709 {
2710         kstat_named_t *klist;
2711         char upname[KSTAT_STRLEN], upfield[KSTAT_STRLEN];
2712         int i, j, num;
2713         size_t size;
2714 
2715         klist = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num);
2716         if (klist == NULL)
2717                 return;
2718 
2719         /* Print the header line, fields capitalized */
2720         (void) strncpy(upname, table->lst_proto, KSTAT_STRLEN);
2721         upname[0] = toupper(upname[0]);
2722         lxpr_uiobuf_printf(uiobuf, "%s:", upname);
2723         for (i = 0; table->lst_fields[i] != NULL; i++) {
2724                 (void) strncpy(upfield, table->lst_fields[i], KSTAT_STRLEN);
2725                 upfield[0] = toupper(upfield[0]);
2726                 lxpr_uiobuf_printf(uiobuf, " %s", upfield);
2727         }
2728         lxpr_uiobuf_printf(uiobuf, "\n%s:", upname);
2729 
2730         /* Then loop back through to print the value line. */
2731         for (i = 0; table->lst_fields[i] != NULL; i++) {
2732                 kstat_named_t *kpoint = NULL;
2733                 for (j = 0; j < num; j++) {
2734                         if (strncmp(klist[j].name, table->lst_fields[i],
2735                             KSTAT_STRLEN) == 0) {
2736                                 kpoint = &klist[j];
2737                                 break;
2738                         }
2739                 }
2740                 if (kpoint == NULL) {
2741                         /* Output 0 for unknown fields */
2742                         lxpr_uiobuf_printf(uiobuf, " 0");
2743                 } else {
2744                         switch (kpoint->data_type) {
2745                         case KSTAT_DATA_INT32:
2746                                 lxpr_uiobuf_printf(uiobuf, " %d",
2747                                     kpoint->value.i32);
2748                                 break;
2749                         case KSTAT_DATA_UINT32:
2750                                 lxpr_uiobuf_printf(uiobuf, " %u",
2751                                     kpoint->value.ui32);
2752                                 break;
2753                         case KSTAT_DATA_INT64:
2754                                 lxpr_uiobuf_printf(uiobuf, " %ld",
2755                                     kpoint->value.l);
2756                                 break;
2757                         case KSTAT_DATA_UINT64:
2758                                 lxpr_uiobuf_printf(uiobuf, " %lu",
2759                                     kpoint->value.ul);
2760                                 break;
2761                         }
2762                 }
2763         }
2764         lxpr_uiobuf_printf(uiobuf, "\n");
2765         kmem_free(klist, size);
2766 }
2767 
2768 /* ARGSUSED */
2769 static void
2770 lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2771 {
2772         kstat_t *ksr;
2773         kstat_t ks0;
2774         lxpr_snmp_table_t **table = lxpr_net_snmptab;
2775         int i, t, nidx;
2776         size_t sidx;
2777 
2778         ks0.ks_kid = 0;
2779         ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
2780         if (ksr == NULL)
2781                 return;
2782 
2783         for (t = 0; table[t] != NULL; t++) {
2784                 for (i = 0; i < nidx; i++) {
2785                         if (strncmp(ksr[i].ks_class, "mib2", KSTAT_STRLEN) != 0)
2786                                 continue;
2787                         if (strncmp(ksr[i].ks_name, table[t]->lst_proto,
2788                             KSTAT_STRLEN) == 0) {
2789                                 lxpr_kstat_print_tab(uiobuf, table[t], &ksr[i]);
2790                                 break;
2791                         }
2792                 }
2793         }
2794         kmem_free(ksr, sidx);
2795 }
2796 
2797 /* ARGSUSED */
2798 static void
2799 lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2800 {
2801 }
2802 
2803 static int
2804 lxpr_convert_tcp_state(int st)
2805 {
2806         /*
2807          * Derived from the enum located in the Linux kernel sources:
2808          * include/net/tcp_states.h
2809          */
2810         switch (st) {
2811         case TCPS_ESTABLISHED:
2812                 return (1);
2813         case TCPS_SYN_SENT:
2814                 return (2);
2815         case TCPS_SYN_RCVD:
2816                 return (3);
2817         case TCPS_FIN_WAIT_1:
2818                 return (4);
2819         case TCPS_FIN_WAIT_2:
2820                 return (5);
2821         case TCPS_TIME_WAIT:
2822                 return (6);
2823         case TCPS_CLOSED:
2824                 return (7);
2825         case TCPS_CLOSE_WAIT:
2826                 return (8);
2827         case TCPS_LAST_ACK:
2828                 return (9);
2829         case TCPS_LISTEN:
2830                 return (10);
2831         case TCPS_CLOSING:
2832                 return (11);
2833         default:
2834                 /* No translation for TCPS_IDLE, TCPS_BOUND or anything else */
2835                 return (0);
2836         }
2837 }
2838 
2839 static void
2840 lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
2841 {
2842         int i, sl = 0;
2843         connf_t *connfp;
2844         conn_t *connp;
2845         netstack_t *ns;
2846         ip_stack_t *ipst;
2847 
2848         ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION);
2849         if (ipver == IPV4_VERSION) {
2850                 lxpr_uiobuf_printf(uiobuf, "  sl  local_address rem_address   "
2851                     "st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout "
2852                     "inode\n");
2853         } else {
2854                 lxpr_uiobuf_printf(uiobuf, "  sl  "
2855                     "local_address                         "
2856                     "remote_address                        "
2857                     "st tx_queue rx_queue tr tm->when retrnsmt   "
2858                     "uid  timeout inode\n");
2859         }
2860         /*
2861          * Due to differences between the Linux and illumos TCP
2862          * implementations, some data will be omitted from the output here.
2863          *
2864          * Valid fields:
2865          *  - local_address
2866          *  - remote_address
2867          *  - st
2868          *  - tx_queue
2869          *  - rx_queue
2870          *  - uid
2871          *  - inode
2872          *
2873          * Omitted/invalid fields
2874          *  - tr
2875          *  - tm->when
2876          *  - retrnsmt
2877          *  - timeout
2878          */
2879 
2880         ns = netstack_get_current();
2881         if (ns == NULL)
2882                 return;
2883         ipst = ns->netstack_ip;
2884 
2885         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2886                 connfp = &ipst->ips_ipcl_globalhash_fanout[i];
2887                 connp = NULL;
2888                 while ((connp =
2889                     ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) {
2890                         tcp_t *tcp;
2891                         vattr_t attr;
2892                         sonode_t *so = (sonode_t *)connp->conn_upper_handle;
2893                         vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
2894                         if (connp->conn_ipversion != ipver)
2895                                 continue;
2896                         tcp = connp->conn_tcp;
2897                         if (ipver == IPV4_VERSION) {
2898                                 lxpr_uiobuf_printf(uiobuf,
2899                                     "%4d: %08X:%04X %08X:%04X ",
2900                                     ++sl,
2901                                     connp->conn_laddr_v4,
2902                                     ntohs(connp->conn_lport),
2903                                     connp->conn_faddr_v4,
2904                                     ntohs(connp->conn_fport));
2905                         } else {
2906                                 lxpr_uiobuf_printf(uiobuf, "%4d: "
2907                                     "%08X%08X%08X%08X:%04X "
2908                                     "%08X%08X%08X%08X:%04X ",
2909                                     ++sl,
2910                                     connp->conn_laddr_v6.s6_addr32[0],
2911                                     connp->conn_laddr_v6.s6_addr32[1],
2912                                     connp->conn_laddr_v6.s6_addr32[2],
2913                                     connp->conn_laddr_v6.s6_addr32[3],
2914                                     ntohs(connp->conn_lport),
2915                                     connp->conn_faddr_v6.s6_addr32[0],
2916                                     connp->conn_faddr_v6.s6_addr32[1],
2917                                     connp->conn_faddr_v6.s6_addr32[2],
2918                                     connp->conn_faddr_v6.s6_addr32[3],
2919                                     ntohs(connp->conn_fport));
2920                         }
2921 
2922                         /* fetch the simulated inode for the socket */
2923                         if (vp == NULL ||
2924                             VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
2925                                 attr.va_nodeid = 0;
2926 
2927                         lxpr_uiobuf_printf(uiobuf,
2928                             "%02X %08X:%08X %02X:%08X %08X "
2929                             "%5u %8d %lu %d %p %u %u %u %u %d\n",
2930                             lxpr_convert_tcp_state(tcp->tcp_state),
2931                             tcp->tcp_rcv_cnt, tcp->tcp_unsent, /* rx/tx queue */
2932                             0, 0, /* tr, when */
2933                             0, /* per-connection rexmits aren't tracked today */
2934                             connp->conn_cred->cr_uid,
2935                             0, /* timeout */
2936                             /* inode + more */
2937                             (ino_t)attr.va_nodeid, 0, NULL, 0, 0, 0, 0, 0);
2938                 }
2939         }
2940         netstack_rele(ns);
2941 }
2942 
2943 /* ARGSUSED */
2944 static void
2945 lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2946 {
2947         lxpr_format_tcp(uiobuf, IPV4_VERSION);
2948 }
2949 
2950 /* ARGSUSED */
2951 static void
2952 lxpr_read_net_tcp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2953 {
2954         lxpr_format_tcp(uiobuf, IPV6_VERSION);
2955 }
2956 
2957 static void
2958 lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
2959 {
2960         int i, sl = 0;
2961         connf_t *connfp;
2962         conn_t *connp;
2963         netstack_t *ns;
2964         ip_stack_t *ipst;
2965 
2966         ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION);
2967         if (ipver == IPV4_VERSION) {
2968                 lxpr_uiobuf_printf(uiobuf, "  sl  local_address rem_address"
2969                     "   st tx_queue rx_queue tr tm->when retrnsmt   uid"
2970                     "  timeout inode ref pointer drops\n");
2971         } else {
2972                 lxpr_uiobuf_printf(uiobuf, "  sl  "
2973                     "local_address                         "
2974                     "remote_address                        "
2975                     "st tx_queue rx_queue tr tm->when retrnsmt   "
2976                     "uid  timeout inode ref pointer drops\n");
2977         }
2978         /*
2979          * Due to differences between the Linux and illumos UDP
2980          * implementations, some data will be omitted from the output here.
2981          *
2982          * Valid fields:
2983          *  - local_address
2984          *  - remote_address
2985          *  - st: limited
2986          *  - uid
2987          *
2988          * Omitted/invalid fields
2989          *  - tx_queue
2990          *  - rx_queue
2991          *  - tr
2992          *  - tm->when
2993          *  - retrnsmt
2994          *  - timeout
2995          *  - inode
2996          */
2997 
2998         ns = netstack_get_current();
2999         if (ns == NULL)
3000                 return;
3001         ipst = ns->netstack_ip;
3002 
3003         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
3004                 connfp = &ipst->ips_ipcl_globalhash_fanout[i];
3005                 connp = NULL;
3006                 while ((connp =
3007                     ipcl_get_next_conn(connfp, connp, IPCL_UDPCONN)) != NULL) {
3008                         udp_t *udp;
3009                         int state = 0;
3010                         vattr_t attr;
3011                         sonode_t *so = (sonode_t *)connp->conn_upper_handle;
3012                         vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
3013                         if (connp->conn_ipversion != ipver)
3014                                 continue;
3015                         udp = connp->conn_udp;
3016                         if (ipver == IPV4_VERSION) {
3017                                 lxpr_uiobuf_printf(uiobuf,
3018                                     "%4d: %08X:%04X %08X:%04X ",
3019                                     ++sl,
3020                                     connp->conn_laddr_v4,
3021                                     ntohs(connp->conn_lport),
3022                                     connp->conn_faddr_v4,
3023                                     ntohs(connp->conn_fport));
3024                         } else {
3025                                 lxpr_uiobuf_printf(uiobuf, "%4d: "
3026                                     "%08X%08X%08X%08X:%04X "
3027                                     "%08X%08X%08X%08X:%04X ",
3028                                     ++sl,
3029                                     connp->conn_laddr_v6.s6_addr32[0],
3030                                     connp->conn_laddr_v6.s6_addr32[1],
3031                                     connp->conn_laddr_v6.s6_addr32[2],
3032                                     connp->conn_laddr_v6.s6_addr32[3],
3033                                     ntohs(connp->conn_lport),
3034                                     connp->conn_faddr_v6.s6_addr32[0],
3035                                     connp->conn_faddr_v6.s6_addr32[1],
3036                                     connp->conn_faddr_v6.s6_addr32[2],
3037                                     connp->conn_faddr_v6.s6_addr32[3],
3038                                     ntohs(connp->conn_fport));
3039                         }
3040 
3041                         switch (udp->udp_state) {
3042                         case TS_UNBND:
3043                         case TS_IDLE:
3044                                 state = 7;
3045                                 break;
3046                         case TS_DATA_XFER:
3047                                 state = 1;
3048                                 break;
3049                         }
3050 
3051                         /* fetch the simulated inode for the socket */
3052                         if (vp == NULL ||
3053                             VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
3054                                 attr.va_nodeid = 0;
3055 
3056                         lxpr_uiobuf_printf(uiobuf,
3057                             "%02X %08X:%08X %02X:%08X %08X "
3058                             "%5u %8d %lu %d %p %d\n",
3059                             state,
3060                             0, 0, /* rx/tx queue */
3061                             0, 0, /* tr, when */
3062                             0, /* retrans */
3063                             connp->conn_cred->cr_uid,
3064                             0, /* timeout */
3065                             /* inode, ref, pointer, drops */
3066                             (ino_t)attr.va_nodeid, 0, NULL, 0);
3067                 }
3068         }
3069         netstack_rele(ns);
3070 }
3071 
3072 /* ARGSUSED */
3073 static void
3074 lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3075 {
3076         lxpr_format_udp(uiobuf, IPV4_VERSION);
3077 }
3078 
3079 /* ARGSUSED */
3080 static void
3081 lxpr_read_net_udp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3082 {
3083         lxpr_format_udp(uiobuf, IPV6_VERSION);
3084 }
3085 
3086 /* ARGSUSED */
3087 static void
3088 lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3089 {
3090         sonode_t *so;
3091         zoneid_t zoneid = getzoneid();
3092 
3093         lxpr_uiobuf_printf(uiobuf, "Num       RefCount Protocol Flags    Type "
3094             "St Inode Path\n");
3095 
3096         mutex_enter(&socklist.sl_lock);
3097         for (so = socklist.sl_list; so != NULL;
3098             so = _SOTOTPI(so)->sti_next_so) {
3099                 vnode_t *vp = so->so_vnode;
3100                 vattr_t attr;
3101                 sotpi_info_t *sti;
3102                 const char *name = NULL;
3103                 int status = 0;
3104                 int type = 0;
3105                 int flags = 0;
3106 
3107                 /* Only process active sonodes in this zone */
3108                 if (so->so_count == 0 || so->so_zoneid != zoneid)
3109                         continue;
3110 
3111                 /*
3112                  * Grab the inode, if possible.
3113                  * This must be done before entering so_lock.
3114                  */
3115                 if (vp == NULL ||
3116                     VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
3117                         attr.va_nodeid = 0;
3118 
3119                 mutex_enter(&so->so_lock);
3120                 sti = _SOTOTPI(so);
3121 
3122                 if (sti->sti_laddr_sa != NULL &&
3123                     sti->sti_laddr_len > 0) {
3124                         name = sti->sti_laddr_sa->sa_data;
3125                 } else if (sti->sti_faddr_sa != NULL &&
3126                     sti->sti_faddr_len > 0) {
3127                         name = sti->sti_faddr_sa->sa_data;
3128                 }
3129 
3130                 /*
3131                  * Derived from enum values in Linux kernel source:
3132                  * include/uapi/linux/net.h
3133                  */
3134                 if ((so->so_state & SS_ISDISCONNECTING) != 0) {
3135                         status = 4;
3136                 } else if ((so->so_state & SS_ISCONNECTING) != 0) {
3137                         status = 2;
3138                 } else if ((so->so_state & SS_ISCONNECTED) != 0) {
3139                         status = 3;
3140                 } else {
3141                         status = 1;
3142                         /* Add ACC flag for stream-type server sockets */
3143                         if (so->so_type != SOCK_DGRAM &&
3144                             sti->sti_laddr_sa != NULL)
3145                                 flags |= 0x10000;
3146                 }
3147 
3148                 /* Convert to Linux type */
3149                 switch (so->so_type) {
3150                 case SOCK_DGRAM:
3151                         type = 2;
3152                         break;
3153                 case SOCK_SEQPACKET:
3154                         type = 5;
3155                         break;
3156                 default:
3157                         type = 1;
3158                 }
3159 
3160                 lxpr_uiobuf_printf(uiobuf, "%p: %08X %08X %08X %04X %02X %5llu",
3161                     so,
3162                     so->so_count,
3163                     0, /* proto, always 0 */
3164                     flags,
3165                     type,
3166                     status,
3167                     (ino_t)attr.va_nodeid);
3168 
3169                 /*
3170                  * Due to shortcomings in the abstract socket emulation, they
3171                  * cannot be properly represented here (as @<path>).
3172                  *
3173                  * This will be the case until they are better implemented.
3174                  */
3175                 if (name != NULL)
3176                         lxpr_uiobuf_printf(uiobuf, " %s\n", name);
3177                 else
3178                         lxpr_uiobuf_printf(uiobuf, "\n");
3179                 mutex_exit(&so->so_lock);
3180         }
3181         mutex_exit(&socklist.sl_lock);
3182 }
3183 
3184 /*
3185  * lxpr_read_kmsg(): read the contents of the kernel message queue. We
3186  * translate this into the reception of console messages for this zone; each
3187  * read copies out a single zone console message, or blocks until the next one
3188  * is produced, unless we're open non-blocking, in which case we return after
3189  * 1ms.
3190  */
3191 
3192 #define LX_KMSG_PRI     "<0>"
3193 
3194 static void
3195 lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf, ldi_handle_t lh)
3196 {
3197         mblk_t          *mp;
3198         timestruc_t     to;
3199         timestruc_t     *tp = NULL;
3200 
3201         ASSERT(lxpnp->lxpr_type == LXPR_KMSG);
3202 
3203         if (lxpr_uiobuf_nonblock(uiobuf)) {
3204                 to.tv_sec = 0;
3205                 to.tv_nsec = 1000000; /* 1msec */
3206                 tp = &to;
3207         }
3208 
3209         if (ldi_getmsg(lh, &mp, tp) == 0) {
3210                 /*
3211                  * lx procfs doesn't like successive reads to the same file
3212                  * descriptor unless we do an explicit rewind each time.
3213                  */
3214                 lxpr_uiobuf_seek(uiobuf, 0);
3215 
3216                 lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
3217                     mp->b_cont->b_rptr);
3218 
3219                 freemsg(mp);
3220         }
3221 }
3222 
3223 /*
3224  * lxpr_read_loadavg(): read the contents of the "loadavg" file.  We do just
3225  * enough for uptime and other simple lxproc readers to work
3226  */
3227 extern int nthread;
3228 
3229 static void
3230 lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3231 {
3232         ulong_t avenrun1;
3233         ulong_t avenrun5;
3234         ulong_t avenrun15;
3235         ulong_t avenrun1_cs;
3236         ulong_t avenrun5_cs;
3237         ulong_t avenrun15_cs;
3238         int loadavg[3];
3239         int *loadbuf;
3240         cpupart_t *cp;
3241         zone_t *zone = LXPTOZ(lxpnp);
3242 
3243         uint_t nrunnable = 0;
3244         rctl_qty_t nlwps;
3245 
3246         ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
3247 
3248         mutex_enter(&cpu_lock);
3249 
3250         /*
3251          * Need to add up values over all CPU partitions. If pools are active,
3252          * only report the values of the zone's partition, which by definition
3253          * includes the current CPU.
3254          */
3255         if (pool_pset_enabled()) {
3256                 psetid_t psetid = zone_pset_get(curproc->p_zone);
3257 
3258                 ASSERT(curproc->p_zone != &zone0);
3259                 cp = CPU->cpu_part;
3260 
3261                 nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
3262                 (void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
3263                 loadbuf = &loadavg[0];
3264         } else {
3265                 cp = cp_list_head;
3266                 do {
3267                         nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
3268                 } while ((cp = cp->cp_next) != cp_list_head);
3269 
3270                 loadbuf = zone == global_zone ?
3271                     &avenrun[0] : zone->zone_avenrun;
3272         }
3273 
3274         /*
3275          * If we're in the non-global zone, we'll report the total number of
3276          * LWPs in the zone for the "nproc" parameter of /proc/loadavg,
3277          * otherwise will just use nthread (which will include kernel threads,
3278          * but should be good enough for lxproc).
3279          */
3280         nlwps = zone == global_zone ? nthread : zone->zone_nlwps;
3281 
3282         mutex_exit(&cpu_lock);
3283 
3284         avenrun1 = loadbuf[0] >> FSHIFT;
3285         avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
3286         avenrun5 = loadbuf[1] >> FSHIFT;
3287         avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
3288         avenrun15 = loadbuf[2] >> FSHIFT;
3289         avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
3290 
3291         lxpr_uiobuf_printf(uiobuf,
3292             "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
3293             avenrun1, avenrun1_cs,
3294             avenrun5, avenrun5_cs,
3295             avenrun15, avenrun15_cs,
3296             nrunnable, nlwps, 0);
3297 }
3298 
3299 /*
3300  * lxpr_read_meminfo(): read the contents of the "meminfo" file.
3301  */
3302 static void
3303 lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3304 {
3305         zone_t *zone = LXPTOZ(lxpnp);
3306         int global = zone == global_zone;
3307         long total_mem, free_mem, total_swap, used_swap;
3308 
3309         ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
3310 
3311         if (global || zone->zone_phys_mem_ctl == UINT64_MAX) {
3312                 total_mem = physmem * PAGESIZE;
3313                 free_mem = freemem * PAGESIZE;
3314         } else {
3315                 total_mem = zone->zone_phys_mem_ctl;
3316                 free_mem = zone->zone_phys_mem_ctl - zone->zone_phys_mem;
3317         }
3318 
3319         if (global || zone->zone_max_swap_ctl == UINT64_MAX) {
3320                 total_swap = k_anoninfo.ani_max * PAGESIZE;
3321                 used_swap = k_anoninfo.ani_phys_resv * PAGESIZE;
3322         } else {
3323                 mutex_enter(&zone->zone_mem_lock);
3324                 total_swap = zone->zone_max_swap_ctl;
3325                 used_swap = zone->zone_max_swap;
3326                 mutex_exit(&zone->zone_mem_lock);
3327         }
3328 
3329         lxpr_uiobuf_printf(uiobuf,
3330             "MemTotal:  %8lu kB\n"
3331             "MemFree:   %8lu kB\n"
3332             "MemShared: %8u kB\n"
3333             "Buffers:   %8u kB\n"
3334             "Cached:    %8u kB\n"
3335             "SwapCached:%8u kB\n"
3336             "Active:    %8u kB\n"
3337             "Inactive:  %8u kB\n"
3338             "HighTotal: %8u kB\n"
3339             "HighFree:  %8u kB\n"
3340             "LowTotal:  %8u kB\n"
3341             "LowFree:   %8u kB\n"
3342             "SwapTotal: %8lu kB\n"
3343             "SwapFree:  %8lu kB\n",
3344             btok(total_mem),                            /* MemTotal */
3345             btok(free_mem),                             /* MemFree */
3346             0,                                          /* MemShared */
3347             0,                                          /* Buffers */
3348             0,                                          /* Cached */
3349             0,                                          /* SwapCached */
3350             0,                                          /* Active */
3351             0,                                          /* Inactive */
3352             0,                                          /* HighTotal */
3353             0,                                          /* HighFree */
3354             btok(total_mem),                            /* LowTotal */
3355             btok(free_mem),                             /* LowFree */
3356             btok(total_swap),                           /* SwapTotal */
3357             btok(total_swap - used_swap));              /* SwapFree */
3358 }
3359 
3360 /*
3361  * lxpr_read_mounts():
3362  */
3363 /* ARGSUSED */
3364 static void
3365 lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3366 {
3367         struct vfs *vfsp;
3368         struct vfs *vfslist;
3369         zone_t *zone = LXPTOZ(lxpnp);
3370         struct print_data {
3371                 refstr_t *vfs_mntpt;
3372                 refstr_t *vfs_resource;
3373                 uint_t vfs_flag;
3374                 int vfs_fstype;
3375                 struct print_data *next;
3376         } *print_head = NULL;
3377         struct print_data **print_tail = &print_head;
3378         struct print_data *printp;
3379 
3380         vfs_list_read_lock();
3381 
3382         if (zone == global_zone) {
3383                 vfsp = vfslist = rootvfs;
3384         } else {
3385                 vfsp = vfslist = zone->zone_vfslist;
3386                 /*
3387                  * If the zone has a root entry, it will be the first in
3388                  * the list.  If it doesn't, we conjure one up.
3389                  */
3390                 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
3391                     zone->zone_rootpath) != 0) {
3392                         struct vfs *tvfsp;
3393                         /*
3394                          * The root of the zone is not a mount point.  The vfs
3395                          * we want to report is that of the zone's root vnode.
3396                          */
3397                         tvfsp = zone->zone_rootvp->v_vfsp;
3398 
3399                         lxpr_uiobuf_printf(uiobuf,
3400                             "/ / %s %s 0 0\n",
3401                             vfssw[tvfsp->vfs_fstype].vsw_name,
3402                             tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
3403 
3404                 }
3405                 if (vfslist == NULL) {
3406                         vfs_list_unlock();
3407                         return;
3408                 }
3409         }
3410 
3411         /*
3412          * Later on we have to do a lookupname, which can end up causing
3413          * another vfs_list_read_lock() to be called. Which can lead to a
3414          * deadlock. To avoid this, we extract the data we need into a local
3415          * list, then we can run this list without holding vfs_list_read_lock()
3416          * We keep the list in the same order as the vfs_list
3417          */
3418         do {
3419                 /* Skip mounts we shouldn't show */
3420                 if (vfsp->vfs_flag & VFS_NOMNTTAB) {
3421                         goto nextfs;
3422                 }
3423 
3424                 printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
3425                 refstr_hold(vfsp->vfs_mntpt);
3426                 printp->vfs_mntpt = vfsp->vfs_mntpt;
3427                 refstr_hold(vfsp->vfs_resource);
3428                 printp->vfs_resource = vfsp->vfs_resource;
3429                 printp->vfs_flag = vfsp->vfs_flag;
3430                 printp->vfs_fstype = vfsp->vfs_fstype;
3431                 printp->next = NULL;
3432 
3433                 *print_tail = printp;
3434                 print_tail = &printp->next;
3435 
3436 nextfs:
3437                 vfsp = (zone == global_zone) ?
3438                     vfsp->vfs_next : vfsp->vfs_zone_next;
3439 
3440         } while (vfsp != vfslist);
3441 
3442         vfs_list_unlock();
3443 
3444         /*
3445          * now we can run through what we've extracted without holding
3446          * vfs_list_read_lock()
3447          */
3448         printp = print_head;
3449         while (printp != NULL) {
3450                 struct print_data *printp_next;
3451                 const char *resource;
3452                 char *mntpt;
3453                 struct vnode *vp;
3454                 int error;
3455 
3456                 mntpt = (char *)refstr_value(printp->vfs_mntpt);
3457                 resource = refstr_value(printp->vfs_resource);
3458 
3459                 if (mntpt != NULL && mntpt[0] != '\0')
3460                         mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
3461                 else
3462                         mntpt = "-";
3463 
3464                 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
3465 
3466                 if (error != 0)
3467                         goto nextp;
3468 
3469                 if (!(vp->v_flag & VROOT)) {
3470                         VN_RELE(vp);
3471                         goto nextp;
3472                 }
3473                 VN_RELE(vp);
3474 
3475                 if (resource != NULL && resource[0] != '\0') {
3476                         if (resource[0] == '/') {
3477                                 resource = ZONE_PATH_VISIBLE(resource, zone) ?
3478                                     ZONE_PATH_TRANSLATE(resource, zone) :
3479                                     mntpt;
3480                         }
3481                 } else {
3482                         resource = "-";
3483                 }
3484 
3485                 lxpr_uiobuf_printf(uiobuf,
3486                     "%s %s %s %s 0 0\n",
3487                     resource, mntpt, vfssw[printp->vfs_fstype].vsw_name,
3488                     printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
3489 
3490 nextp:
3491                 printp_next = printp->next;
3492                 refstr_rele(printp->vfs_mntpt);
3493                 refstr_rele(printp->vfs_resource);
3494                 kmem_free(printp, sizeof (*printp));
3495                 printp = printp_next;
3496 
3497         }
3498 }
3499 
3500 /*
3501  * lxpr_read_partitions():
3502  *
3503  * Over the years, /proc/partitions has been made considerably smaller -- to
3504  * the point that it really is only major number, minor number, number of
3505  * blocks (which we report as 0), and partition name.
3506  *
3507  * We support this because some things want to see it to make sense of
3508  * /proc/diskstats, and also because "fdisk -l" and a few other things look
3509  * here to find all disks on the system.
3510  */
3511 /* ARGSUSED */
3512 static void
3513 lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3514 {
3515 
3516         kstat_t *ksr;
3517         kstat_t ks0;
3518         int nidx, num, i;
3519         size_t sidx, size;
3520         zfs_cmd_t *zc;
3521         nvlist_t *nv = NULL;
3522         nvpair_t *elem = NULL;
3523         lxpr_mnt_t *mnt;
3524         lxpr_zfs_iter_t zfsi;
3525 
3526         ASSERT(lxpnp->lxpr_type == LXPR_PARTITIONS);
3527 
3528         ks0.ks_kid = 0;
3529         ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
3530 
3531         if (ksr == NULL)
3532                 return;
3533 
3534         lxpr_uiobuf_printf(uiobuf, "major minor  #blocks  name\n\n");
3535 
3536         for (i = 1; i < nidx; i++) {
3537                 kstat_t *ksp = &ksr[i];
3538                 kstat_io_t *kip;
3539 
3540                 if (ksp->ks_type != KSTAT_TYPE_IO ||
3541                     strcmp(ksp->ks_class, "disk") != 0)
3542                         continue;
3543 
3544                 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE,
3545                     &size, &num)) == NULL)
3546                         continue;
3547 
3548                 if (size < sizeof (kstat_io_t)) {
3549                         kmem_free(kip, size);
3550                         continue;
3551                 }
3552 
3553                 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d %s\n",
3554                     mod_name_to_major(ksp->ks_module),
3555                     ksp->ks_instance, 0, ksp->ks_name);
3556 
3557                 kmem_free(kip, size);
3558         }
3559 
3560         kmem_free(ksr, sidx);
3561 
3562         /* If we never got to open the zfs LDI, then stop now. */
3563         mnt = (lxpr_mnt_t *)lxpnp->lxpr_vnode->v_vfsp->vfs_data;
3564         if (mnt->lxprm_zfs_isopen == B_FALSE)
3565                 return;
3566 
3567         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
3568 
3569         if (lxpr_zfs_list_pools(mnt, zc, &nv) != 0)
3570                 goto out;
3571 
3572         while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) {
3573                 char *pool = nvpair_name(elem);
3574 
3575                 bzero(&zfsi, sizeof (lxpr_zfs_iter_t));
3576                 while (lxpr_zfs_next_zvol(mnt, pool, zc, &zfsi) == 0) {
3577                         major_t major;
3578                         minor_t minor;
3579                         if (lxpr_zvol_dev(mnt, zc->zc_name, &major, &minor)
3580                             != 0)
3581                                 continue;
3582 
3583                         lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d zvol/dsk/%s\n",
3584                             major, minor, 0, zc->zc_name);
3585                 }
3586         }
3587 
3588         nvlist_free(nv);
3589 out:
3590         kmem_free(zc, sizeof (zfs_cmd_t));
3591 }
3592 
3593 /*
3594  * lxpr_read_diskstats():
3595  *
3596  * See the block comment above the per-device output-generating line for the
3597  * details of the format.
3598  */
3599 /* ARGSUSED */
3600 static void
3601 lxpr_read_diskstats(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3602 {
3603         kstat_t *ksr;
3604         kstat_t ks0;
3605         int nidx, num, i;
3606         size_t sidx, size;
3607 
3608         ASSERT(lxpnp->lxpr_type == LXPR_DISKSTATS);
3609 
3610         ks0.ks_kid = 0;
3611         ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
3612 
3613         if (ksr == NULL)
3614                 return;
3615 
3616         for (i = 1; i < nidx; i++) {
3617                 kstat_t *ksp = &ksr[i];
3618                 kstat_io_t *kip;
3619 
3620                 if (ksp->ks_type != KSTAT_TYPE_IO ||
3621                     strcmp(ksp->ks_class, "disk") != 0)
3622                         continue;
3623 
3624                 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE,
3625                     &size, &num)) == NULL)
3626                         continue;
3627 
3628                 if (size < sizeof (kstat_io_t)) {
3629                         kmem_free(kip, size);
3630                         continue;
3631                 }
3632 
3633                 /*
3634                  * /proc/diskstats is defined to have one line of output for
3635                  * each block device, with each line containing the following
3636                  * 14 fields:
3637                  *
3638                  *      1 - major number
3639                  *      2 - minor mumber
3640                  *      3 - device name
3641                  *      4 - reads completed successfully
3642                  *      5 - reads merged
3643                  *      6 - sectors read
3644                  *      7 - time spent reading (ms)
3645                  *      8 - writes completed
3646                  *      9 - writes merged
3647                  *      10 - sectors written
3648                  *      11 - time spent writing (ms)
3649                  *      12 - I/Os currently in progress
3650                  *      13 - time spent doing I/Os (ms)
3651                  *      14 - weighted time spent doing I/Os (ms)
3652                  *
3653                  * One small hiccup:  we don't actually keep track of time
3654                  * spent reading vs. time spent writing -- we keep track of
3655                  * time waiting vs. time actually performing I/O.  While we
3656                  * could divide the total time by the I/O mix (making the
3657                  * obviously wrong assumption that I/O operations all take the
3658                  * same amount of time), this has the undesirable side-effect
3659                  * of moving backwards.  Instead, we report the total time
3660                  * (read + write) for all three stats (read, write, total).
3661                  * This is also a lie of sorts, but it should be more
3662                  * immediately clear to the user that reads and writes are
3663                  * each being double-counted as the other.
3664                  */
3665                 lxpr_uiobuf_printf(uiobuf, "%4d %7d %s "
3666                     "%llu %llu %llu %llu "
3667                     "%llu %llu %llu %llu "
3668                     "%llu %llu %llu\n",
3669                     mod_name_to_major(ksp->ks_module),
3670                     ksp->ks_instance, ksp->ks_name,
3671                     (uint64_t)kip->reads, 0LL,
3672                     kip->nread / (uint64_t)LXPR_SECTOR_SIZE,
3673                     (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3674                     (uint64_t)kip->writes, 0LL,
3675                     kip->nwritten / (uint64_t)LXPR_SECTOR_SIZE,
3676                     (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3677                     (uint64_t)(kip->rcnt + kip->wcnt),
3678                     (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3679                     (kip->rlentime + kip->wlentime) /
3680                     (uint64_t)(NANOSEC / MILLISEC));
3681 
3682                 kmem_free(kip, size);
3683         }
3684 
3685         kmem_free(ksr, sidx);
3686 }
3687 
3688 /*
3689  * lxpr_read_version(): read the contents of the "version" file.
3690  */
3691 /* ARGSUSED */
3692 static void
3693 lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3694 {
3695         lx_zone_data_t *lxzd = ztolxzd(LXPTOZ(lxpnp));
3696         lx_proc_data_t *lxpd = ptolxproc(curproc);
3697         const char *release = lxzd->lxzd_kernel_release;
3698         const char *version = lxzd->lxzd_kernel_version;
3699 
3700         /* Use per-process overrides, if specified */
3701         if (lxpd != NULL && lxpd->l_uname_release[0] != '\0') {
3702                 release = lxpd->l_uname_release;
3703         }
3704         if (lxpd != NULL && lxpd->l_uname_version[0] != '\0') {
3705                 version = lxpd->l_uname_version;
3706         }
3707 
3708         lxpr_uiobuf_printf(uiobuf,
3709             "%s version %s (%s version %d.%d.%d) %s\n",
3710             LX_UNAME_SYSNAME, release,
3711 #if defined(__GNUC__)
3712             "gcc", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__,
3713 #else
3714             "cc", 1, 0, 0,
3715 #endif
3716             version);
3717 }
3718 
3719 /*
3720  * lxpr_read_stat(): read the contents of the "stat" file.
3721  *
3722  */
3723 /* ARGSUSED */
3724 static void
3725 lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3726 {
3727         cpu_t *cp, *cpstart;
3728         int pools_enabled;
3729         ulong_t idle_cum = 0;
3730         ulong_t sys_cum  = 0;
3731         ulong_t user_cum = 0;
3732         ulong_t irq_cum = 0;
3733         ulong_t cpu_nrunnable_cum = 0;
3734         ulong_t w_io_cum = 0;
3735 
3736         ulong_t pgpgin_cum    = 0;
3737         ulong_t pgpgout_cum   = 0;
3738         ulong_t pgswapout_cum = 0;
3739         ulong_t pgswapin_cum  = 0;
3740         ulong_t intr_cum = 0;
3741         ulong_t pswitch_cum = 0;
3742         ulong_t forks_cum = 0;
3743         hrtime_t msnsecs[NCMSTATES];
3744         /* is the emulated release > 2.4 */
3745         boolean_t newer_than24 = lx_kern_release_cmp(LXPTOZ(lxpnp), "2.4") > 0;
3746         /* temporary variable since scalehrtime modifies data in place */
3747         hrtime_t tmptime;
3748 
3749         ASSERT(lxpnp->lxpr_type == LXPR_STAT);
3750 
3751         mutex_enter(&cpu_lock);
3752         pools_enabled = pool_pset_enabled();
3753 
3754         /* Calculate cumulative stats */
3755         cp = cpstart = CPU->cpu_part->cp_cpulist;
3756         do {
3757                 int i;
3758 
3759                 /*
3760                  * Don't count CPUs that aren't even in the system
3761                  * or aren't up yet.
3762                  */
3763                 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
3764                         continue;
3765                 }
3766 
3767                 get_cpu_mstate(cp, msnsecs);
3768 
3769                 idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
3770                 sys_cum  += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
3771                 user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
3772 
3773                 pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
3774                 pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
3775                 pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
3776                 pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
3777 
3778 
3779                 if (newer_than24) {
3780                         cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable;
3781                         w_io_cum += CPU_STATS(cp, sys.iowait);
3782                         for (i = 0; i < NCMSTATES; i++) {
3783                                 tmptime = cp->cpu_intracct[i];
3784                                 scalehrtime(&tmptime);
3785                                 irq_cum += NSEC_TO_TICK(tmptime);
3786                         }
3787                 }
3788 
3789                 for (i = 0; i < PIL_MAX; i++)
3790                         intr_cum += CPU_STATS(cp, sys.intr[i]);
3791 
3792                 pswitch_cum += CPU_STATS(cp, sys.pswitch);
3793                 forks_cum += CPU_STATS(cp, sys.sysfork);
3794                 forks_cum += CPU_STATS(cp, sys.sysvfork);
3795 
3796                 if (pools_enabled)
3797                         cp = cp->cpu_next_part;
3798                 else
3799                         cp = cp->cpu_next;
3800         } while (cp != cpstart);
3801 
3802         if (newer_than24) {
3803                 lxpr_uiobuf_printf(uiobuf,
3804                     "cpu %lu %lu %lu %lu %lu %lu %lu\n",
3805                     user_cum, 0L, sys_cum, idle_cum, 0L, irq_cum, 0L);
3806         } else {
3807                 lxpr_uiobuf_printf(uiobuf,
3808                     "cpu %lu %lu %lu %lu\n",
3809                     user_cum, 0L, sys_cum, idle_cum);
3810         }
3811 
3812         /* Do per processor stats */
3813         do {
3814                 int i;
3815 
3816                 ulong_t idle_ticks;
3817                 ulong_t sys_ticks;
3818                 ulong_t user_ticks;
3819                 ulong_t irq_ticks = 0;
3820 
3821                 /*
3822                  * Don't count CPUs that aren't even in the system
3823                  * or aren't up yet.
3824                  */
3825                 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
3826                         continue;
3827                 }
3828 
3829                 get_cpu_mstate(cp, msnsecs);
3830 
3831                 idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
3832                 sys_ticks  = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
3833                 user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
3834 
3835                 for (i = 0; i < NCMSTATES; i++) {
3836                         tmptime = cp->cpu_intracct[i];
3837                         scalehrtime(&tmptime);
3838                         irq_ticks += NSEC_TO_TICK(tmptime);
3839                 }
3840 
3841                 if (newer_than24) {
3842                         lxpr_uiobuf_printf(uiobuf,
3843                             "cpu%d %lu %lu %lu %lu %lu %lu %lu\n",
3844                             cp->cpu_id, user_ticks, 0L, sys_ticks, idle_ticks,
3845                             0L, irq_ticks, 0L);
3846                 } else {
3847                         lxpr_uiobuf_printf(uiobuf,
3848                             "cpu%d %lu %lu %lu %lu\n",
3849                             cp->cpu_id,
3850                             user_ticks, 0L, sys_ticks, idle_ticks);
3851                 }
3852 
3853                 if (pools_enabled)
3854                         cp = cp->cpu_next_part;
3855                 else
3856                         cp = cp->cpu_next;
3857         } while (cp != cpstart);
3858 
3859         mutex_exit(&cpu_lock);
3860 
3861         if (newer_than24) {
3862                 lxpr_uiobuf_printf(uiobuf,
3863                     "page %lu %lu\n"
3864                     "swap %lu %lu\n"
3865                     "intr %lu\n"
3866                     "ctxt %lu\n"
3867                     "btime %lu\n"
3868                     "processes %lu\n"
3869                     "procs_running %lu\n"
3870                     "procs_blocked %lu\n",
3871                     pgpgin_cum, pgpgout_cum,
3872                     pgswapin_cum, pgswapout_cum,
3873                     intr_cum,
3874                     pswitch_cum,
3875                     boot_time,
3876                     forks_cum,
3877                     cpu_nrunnable_cum,
3878                     w_io_cum);
3879         } else {
3880                 lxpr_uiobuf_printf(uiobuf,
3881                     "page %lu %lu\n"
3882                     "swap %lu %lu\n"
3883                     "intr %lu\n"
3884                     "ctxt %lu\n"
3885                     "btime %lu\n"
3886                     "processes %lu\n",
3887                     pgpgin_cum, pgpgout_cum,
3888                     pgswapin_cum, pgswapout_cum,
3889                     intr_cum,
3890                     pswitch_cum,
3891                     boot_time,
3892                     forks_cum);
3893         }
3894 }
3895 
3896 /*
3897  * lxpr_read_swaps():
3898  *
3899  * We don't support swap files or partitions, but some programs like to look
3900  * here just to check we have some swap on the system, so we lie and show
3901  * our entire swap cap as one swap partition.
3902  */
3903 /* ARGSUSED */
3904 static void
3905 lxpr_read_swaps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3906 {
3907         zone_t *zone = curzone;
3908         uint64_t totswap, usedswap;
3909 
3910         mutex_enter(&zone->zone_mem_lock);
3911         /* Uses units of 1 kb (2^10). */
3912         totswap = zone->zone_max_swap_ctl >> 10;
3913         usedswap = zone->zone_max_swap >> 10;
3914         mutex_exit(&zone->zone_mem_lock);
3915 
3916         lxpr_uiobuf_printf(uiobuf,
3917             "Filename                                "
3918             "Type            Size    Used    Priority\n");
3919         lxpr_uiobuf_printf(uiobuf, "%-40s%-16s%-8llu%-8llu%-8d\n",
3920             "/dev/swap", "partition", totswap, usedswap, -1);
3921 }
3922 
3923 /*
3924  * inotify tunables exported via /proc.
3925  */
3926 extern int inotify_maxevents;
3927 extern int inotify_maxinstances;
3928 extern int inotify_maxwatches;
3929 
3930 static void
3931 lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *lxpnp,
3932     lxpr_uiobuf_t *uiobuf)
3933 {
3934         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS);
3935         lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxevents);
3936 }
3937 
3938 static void
3939 lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *lxpnp,
3940     lxpr_uiobuf_t *uiobuf)
3941 {
3942         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES);
3943         lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxinstances);
3944 }
3945 
3946 static void
3947 lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *lxpnp,
3948     lxpr_uiobuf_t *uiobuf)
3949 {
3950         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES);
3951         lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxwatches);
3952 }
3953 
3954 static void
3955 lxpr_read_sys_kernel_caplcap(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3956 {
3957         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_CAPLCAP);
3958         lxpr_uiobuf_printf(uiobuf, "%d\n", LX_CAP_MAX_VALID);
3959 }
3960 
3961 static void
3962 lxpr_read_sys_kernel_corepatt(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3963 {
3964         zone_t *zone = curproc->p_zone;
3965         struct core_globals *cg;
3966         refstr_t *rp;
3967         corectl_path_t *ccp;
3968         char tr[MAXPATHLEN];
3969 
3970         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT);
3971 
3972         cg = zone_getspecific(core_zone_key, zone);
3973         ASSERT(cg != NULL);
3974 
3975         /* If core dumps are disabled, return an empty string. */
3976         if ((cg->core_options & CC_PROCESS_PATH) == 0) {
3977                 lxpr_uiobuf_printf(uiobuf, "\n");
3978                 return;
3979         }
3980 
3981         ccp = cg->core_default_path;
3982         mutex_enter(&ccp->ccp_mtx);
3983         if ((rp = ccp->ccp_path) != NULL)
3984                 refstr_hold(rp);
3985         mutex_exit(&ccp->ccp_mtx);
3986 
3987         if (rp == NULL) {
3988                 lxpr_uiobuf_printf(uiobuf, "\n");
3989                 return;
3990         }
3991 
3992         bzero(tr, sizeof (tr));
3993         if (lxpr_core_path_s2l(refstr_value(rp), tr, sizeof (tr)) != 0) {
3994                 refstr_rele(rp);
3995                 lxpr_uiobuf_printf(uiobuf, "\n");
3996                 return;
3997         }
3998 
3999         refstr_rele(rp);
4000         lxpr_uiobuf_printf(uiobuf, "%s\n", tr);
4001 }
4002 
4003 static void
4004 lxpr_read_sys_kernel_hostname(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4005 {
4006         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_HOSTNAME);
4007         lxpr_uiobuf_printf(uiobuf, "%s\n", uts_nodename());
4008 }
4009 
4010 static void
4011 lxpr_read_sys_kernel_msgmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4012 {
4013         rctl_qty_t val;
4014 
4015         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_MSGMNI);
4016 
4017         mutex_enter(&curproc->p_lock);
4018         val = rctl_enforced_value(rc_zone_msgmni,
4019             curproc->p_zone->zone_rctls, curproc);
4020         mutex_exit(&curproc->p_lock);
4021 
4022         lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
4023 }
4024 
4025 static void
4026 lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4027 {
4028         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_NGROUPS_MAX);
4029         lxpr_uiobuf_printf(uiobuf, "%d\n", ngroups_max);
4030 }
4031 
4032 static void
4033 lxpr_read_sys_kernel_osrel(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4034 {
4035         lx_zone_data_t *br_data;
4036 
4037         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_OSREL);
4038         br_data = ztolxzd(curproc->p_zone);
4039         if (curproc->p_zone->zone_brand == &lx_brand) {
4040                 lxpr_uiobuf_printf(uiobuf, "%s\n",
4041                     br_data->lxzd_kernel_version);
4042         } else {
4043                 lxpr_uiobuf_printf(uiobuf, "\n");
4044         }
4045 }
4046 
4047 static void
4048 lxpr_read_sys_kernel_pid_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4049 {
4050         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_PID_MAX);
4051         lxpr_uiobuf_printf(uiobuf, "%d\n", maxpid);
4052 }
4053 
4054 static void
4055 lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4056 {
4057         /*
4058          * This file isn't documented on the Linux proc(5) man page but
4059          * according to the blog of the author of systemd/journald (the
4060          * consumer), he says:
4061          *    boot_id: A random ID that is regenerated on each boot. As such it
4062          *    can be used to identify the local machine's current boot. It's
4063          *    universally available on any recent Linux kernel. It's a good and
4064          *    safe choice if you need to identify a specific boot on a specific
4065          *    booted kernel.
4066          *
4067          * We'll just generate a random ID if necessary. On Linux the format
4068          * appears to resemble a uuid but since it is not documented to be a
4069          * uuid, we don't worry about that.
4070          */
4071         lx_zone_data_t *br_data;
4072 
4073         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RAND_BOOTID);
4074 
4075         if (curproc->p_zone->zone_brand != &lx_brand) {
4076                 lxpr_uiobuf_printf(uiobuf, "0\n");
4077                 return;
4078         }
4079 
4080         br_data = ztolxzd(curproc->p_zone);
4081         if (br_data->lxzd_bootid[0] == '\0') {
4082                 extern int getrandom(void *, size_t, int);
4083                 int i;
4084 
4085                 for (i = 0; i < 5; i++) {
4086                         u_longlong_t n;
4087                         char s[32];
4088 
4089                         (void) random_get_bytes((uint8_t *)&n, sizeof (n));
4090                         switch (i) {
4091                         case 0: (void) snprintf(s, sizeof (s), "%08llx", n);
4092                                 s[8] = '\0';
4093                                 break;
4094                         case 4: (void) snprintf(s, sizeof (s), "%012llx", n);
4095                                 s[12] = '\0';
4096                                 break;
4097                         default: (void) snprintf(s, sizeof (s), "%04llx", n);
4098                                 s[4] = '\0';
4099                                 break;
4100                         }
4101                         if (i > 0)
4102                                 strlcat(br_data->lxzd_bootid, "-",
4103                                     sizeof (br_data->lxzd_bootid));
4104                         strlcat(br_data->lxzd_bootid, s,
4105                             sizeof (br_data->lxzd_bootid));
4106                 }
4107         }
4108 
4109         lxpr_uiobuf_printf(uiobuf, "%s\n", br_data->lxzd_bootid);
4110 }
4111 
4112 static void
4113 lxpr_read_sys_kernel_shmmax(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4114 {
4115         rctl_qty_t val;
4116 
4117         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMAX);
4118 
4119         mutex_enter(&curproc->p_lock);
4120         val = rctl_enforced_value(rc_zone_shmmax,
4121             curproc->p_zone->zone_rctls, curproc);
4122         mutex_exit(&curproc->p_lock);
4123 
4124         if (val > FOURGB)
4125                 val = FOURGB;
4126 
4127         lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
4128 }
4129 
4130 static void
4131 lxpr_read_sys_kernel_threads_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4132 {
4133         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_THREADS_MAX);
4134         lxpr_uiobuf_printf(uiobuf, "%d\n", curproc->p_zone->zone_nlwps_ctl);
4135 }
4136 
4137 static void
4138 lxpr_read_sys_net_core_somaxc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4139 {
4140         netstack_t *ns;
4141         tcp_stack_t     *tcps;
4142 
4143         ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON);
4144 
4145         ns = netstack_get_current();
4146         if (ns == NULL) {
4147                 lxpr_uiobuf_printf(uiobuf, "%d\n", SOMAXCONN);
4148                 return;
4149         }
4150 
4151         tcps = ns->netstack_tcp;
4152         lxpr_uiobuf_printf(uiobuf, "%d\n", tcps->tcps_conn_req_max_q);
4153         netstack_rele(ns);
4154 }
4155 
4156 static void
4157 lxpr_read_sys_vm_minfr_kb(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4158 {
4159         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_MINFR_KB);
4160         lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4161 }
4162 
4163 static void
4164 lxpr_read_sys_vm_nhpages(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4165 {
4166         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_NHUGEP);
4167         lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4168 }
4169 
4170 static void
4171 lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4172 {
4173         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_OVERCOMMIT_MEM);
4174         lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4175 }
4176 
4177 static void
4178 lxpr_read_sys_vm_swappiness(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4179 {
4180         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_SWAPPINESS);
4181         lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4182 }
4183 
4184 /*
4185  * lxpr_read_uptime(): read the contents of the "uptime" file.
4186  *
4187  * format is: "%.2lf, %.2lf",uptime_secs, idle_secs
4188  * Use fixed point arithmetic to get 2 decimal places
4189  */
4190 /* ARGSUSED */
4191 static void
4192 lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4193 {
4194         cpu_t *cp, *cpstart;
4195         int pools_enabled;
4196         ulong_t idle_cum = 0;
4197         ulong_t cpu_count = 0;
4198         ulong_t idle_s;
4199         ulong_t idle_cs;
4200         ulong_t up_s;
4201         ulong_t up_cs;
4202         hrtime_t birthtime;
4203         hrtime_t centi_sec = 10000000;  /* 10^7 */
4204 
4205         ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
4206 
4207         /* Calculate cumulative stats */
4208         mutex_enter(&cpu_lock);
4209         pools_enabled = pool_pset_enabled();
4210 
4211         cp = cpstart = CPU->cpu_part->cp_cpulist;
4212         do {
4213                 /*
4214                  * Don't count CPUs that aren't even in the system
4215                  * or aren't up yet.
4216                  */
4217                 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
4218                         continue;
4219                 }
4220 
4221                 idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
4222                 idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
4223                 cpu_count += 1;
4224 
4225                 if (pools_enabled)
4226                         cp = cp->cpu_next_part;
4227                 else
4228                         cp = cp->cpu_next;
4229         } while (cp != cpstart);
4230         mutex_exit(&cpu_lock);
4231 
4232         /* Getting the Zone zsched process startup time */
4233         birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart;
4234         up_cs = (gethrtime() - birthtime) / centi_sec;
4235         up_s = up_cs / 100;
4236         up_cs %= 100;
4237 
4238         ASSERT(cpu_count > 0);
4239         idle_cum /= cpu_count;
4240         idle_s = idle_cum / hz;
4241         idle_cs = idle_cum % hz;
4242         idle_cs *= 100;
4243         idle_cs /= hz;
4244 
4245         lxpr_uiobuf_printf(uiobuf,
4246             "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
4247 }
4248 
4249 static const char *amd_x_edx[] = {
4250         NULL,   NULL,   NULL,   NULL,
4251         NULL,   NULL,   NULL,   NULL,
4252         NULL,   NULL,   NULL,   "syscall",
4253         NULL,   NULL,   NULL,   NULL,
4254         NULL,   NULL,   NULL,   "mp",
4255         "nx",   NULL,   "mmxext", NULL,
4256         NULL,   NULL,   NULL,   NULL,
4257         NULL,   "lm",   "3dnowext", "3dnow"
4258 };
4259 
4260 static const char *amd_x_ecx[] = {
4261         "lahf_lm", NULL, "svm", NULL,
4262         "altmovcr8"
4263 };
4264 
4265 static const char *tm_x_edx[] = {
4266         "recovery", "longrun", NULL, "lrti"
4267 };
4268 
4269 /*
4270  * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
4271  */
4272 static const char *intc_x_edx[] = {
4273         NULL,   NULL,   NULL,   NULL,
4274         NULL,   NULL,   NULL,   NULL,
4275         NULL,   NULL,   NULL,   "syscall",
4276         NULL,   NULL,   NULL,   NULL,
4277         NULL,   NULL,   NULL,   NULL,
4278         "nx",   NULL,   NULL,   NULL,
4279         NULL,   NULL,   NULL,   NULL,
4280         NULL,   "lm",   NULL,   NULL
4281 };
4282 
4283 static const char *intc_edx[] = {
4284         "fpu",  "vme",  "de",   "pse",
4285         "tsc",  "msr",  "pae",  "mce",
4286         "cx8",  "apic",  NULL,  "sep",
4287         "mtrr", "pge",  "mca",  "cmov",
4288         "pat",  "pse36", "pn",  "clflush",
4289         NULL,   "dts",  "acpi", "mmx",
4290         "fxsr", "sse",  "sse2", "ss",
4291         "ht",   "tm",   "ia64", "pbe"
4292 };
4293 
4294 /*
4295  * "sse3" on linux is called "pni" (Prescott New Instructions).
4296  */
4297 static const char *intc_ecx[] = {
4298         "pni",  NULL,   NULL, "monitor",
4299         "ds_cpl", NULL, NULL, "est",
4300         "tm2",  NULL,   "cid", NULL,
4301         NULL,   "cx16", "xtpr"
4302 };
4303 
4304 /*
4305  * Report a list of each cgroup subsystem supported by our emulated cgroup fs.
4306  * This needs to exist for systemd to run but for now we don't report any
4307  * cgroup subsystems as being installed. The commented example below shows
4308  * how to print a subsystem entry.
4309  */
4310 static void
4311 lxpr_read_cgroups(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4312 {
4313         lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n",
4314             "#subsys_name", "hierarchy", "num_cgroups", "enabled");
4315 
4316         /*
4317          * lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n",
4318          *   "cpu,cpuacct", "2", "1", "1");
4319          */
4320 }
4321 
4322 static void
4323 lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4324 {
4325         int i;
4326         uint32_t bits;
4327         cpu_t *cp, *cpstart;
4328         int pools_enabled;
4329         const char **fp;
4330         char brandstr[CPU_IDSTRLEN];
4331         struct cpuid_regs cpr;
4332         int maxeax;
4333         int std_ecx, std_edx, ext_ecx, ext_edx;
4334 
4335         ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
4336 
4337         mutex_enter(&cpu_lock);
4338         pools_enabled = pool_pset_enabled();
4339 
4340         cp = cpstart = CPU->cpu_part->cp_cpulist;
4341         do {
4342                 /*
4343                  * This returns the maximum eax value for standard cpuid
4344                  * functions in eax.
4345                  */
4346                 cpr.cp_eax = 0;
4347                 (void) cpuid_insn(cp, &cpr);
4348                 maxeax = cpr.cp_eax;
4349 
4350                 /*
4351                  * Get standard x86 feature flags.
4352                  */
4353                 cpr.cp_eax = 1;
4354                 (void) cpuid_insn(cp, &cpr);
4355                 std_ecx = cpr.cp_ecx;
4356                 std_edx = cpr.cp_edx;
4357 
4358                 /*
4359                  * Now get extended feature flags.
4360                  */
4361                 cpr.cp_eax = 0x80000001;
4362                 (void) cpuid_insn(cp, &cpr);
4363                 ext_ecx = cpr.cp_ecx;
4364                 ext_edx = cpr.cp_edx;
4365 
4366                 (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
4367 
4368                 lxpr_uiobuf_printf(uiobuf,
4369                     "processor\t: %d\n"
4370                     "vendor_id\t: %s\n"
4371                     "cpu family\t: %d\n"
4372                     "model\t\t: %d\n"
4373                     "model name\t: %s\n"
4374                     "stepping\t: %d\n"
4375                     "cpu MHz\t\t: %u.%03u\n",
4376                     cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
4377                     cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
4378                     (uint32_t)(cpu_freq_hz / 1000000),
4379                     ((uint32_t)(cpu_freq_hz / 1000)) % 1000);
4380 
4381                 lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
4382                     getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
4383 
4384                 if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
4385                         /*
4386                          * 'siblings' is used for HT-style threads
4387                          */
4388                         lxpr_uiobuf_printf(uiobuf,
4389                             "physical id\t: %lu\n"
4390                             "siblings\t: %u\n",
4391                             pg_plat_hw_instance_id(cp, PGHW_CHIP),
4392                             cpuid_get_ncpu_per_chip(cp));
4393                 }
4394 
4395                 /*
4396                  * Since we're relatively picky about running on older hardware,
4397                  * we can be somewhat cavalier about the answers to these ones.
4398                  *
4399                  * In fact, given the hardware we support, we just say:
4400                  *
4401                  *      fdiv_bug        : no    (if we're on a 64-bit kernel)
4402                  *      hlt_bug         : no
4403                  *      f00f_bug        : no
4404                  *      coma_bug        : no
4405                  *      wp              : yes   (write protect in supervsr mode)
4406                  */
4407                 lxpr_uiobuf_printf(uiobuf,
4408                     "fdiv_bug\t: %s\n"
4409                     "hlt_bug \t: no\n"
4410                     "f00f_bug\t: no\n"
4411                     "coma_bug\t: no\n"
4412                     "fpu\t\t: %s\n"
4413                     "fpu_exception\t: %s\n"
4414                     "cpuid level\t: %d\n"
4415                     "flags\t\t:",
4416 #if defined(__i386)
4417                     fpu_pentium_fdivbug ? "yes" : "no",
4418 #else
4419                     "no",
4420 #endif /* __i386 */
4421                     fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
4422                     maxeax);
4423 
4424                 for (bits = std_edx, fp = intc_edx, i = 0;
4425                     i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
4426                         if ((bits & (1 << i)) != 0 && *fp)
4427                                 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4428 
4429                 /*
4430                  * name additional features where appropriate
4431                  */
4432                 switch (x86_vendor) {
4433                 case X86_VENDOR_Intel:
4434                         for (bits = ext_edx, fp = intc_x_edx, i = 0;
4435                             i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
4436                             fp++, i++)
4437                                 if ((bits & (1 << i)) != 0 && *fp)
4438                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4439                         break;
4440 
4441                 case X86_VENDOR_AMD:
4442                         for (bits = ext_edx, fp = amd_x_edx, i = 0;
4443                             i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
4444                             fp++, i++)
4445                                 if ((bits & (1 << i)) != 0 && *fp)
4446                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4447 
4448                         for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
4449                             i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
4450                             fp++, i++)
4451                                 if ((bits & (1 << i)) != 0 && *fp)
4452                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4453                         break;
4454 
4455                 case X86_VENDOR_TM:
4456                         for (bits = ext_edx, fp = tm_x_edx, i = 0;
4457                             i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
4458                             fp++, i++)
4459                                 if ((bits & (1 << i)) != 0 && *fp)
4460                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4461                         break;
4462                 default:
4463                         break;
4464                 }
4465 
4466                 for (bits = std_ecx, fp = intc_ecx, i = 0;
4467                     i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
4468                         if ((bits & (1 << i)) != 0 && *fp)
4469                                 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4470 
4471                 lxpr_uiobuf_printf(uiobuf, "\n\n");
4472 
4473                 if (pools_enabled)
4474                         cp = cp->cpu_next_part;
4475                 else
4476                         cp = cp->cpu_next;
4477         } while (cp != cpstart);
4478 
4479         mutex_exit(&cpu_lock);
4480 }
4481 
4482 /* ARGSUSED */
4483 static void
4484 lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4485 {
4486         ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
4487         lxpr_uiobuf_seterr(uiobuf, EFAULT);
4488 }
4489 
4490 /*
4491  * Report a list of file systems loaded in the kernel. We only report the ones
4492  * which we support and which may be checked by various components to see if
4493  * they are loaded.
4494  */
4495 static void
4496 lxpr_read_filesystems(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4497 {
4498         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "autofs");
4499         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "cgroup");
4500         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "nfs");
4501         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "proc");
4502         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "sysfs");
4503         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "tmpfs");
4504 }
4505 
4506 /*
4507  * lxpr_getattr(): Vnode operation for VOP_GETATTR()
4508  */
4509 static int
4510 lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
4511     caller_context_t *ct)
4512 {
4513         register lxpr_node_t *lxpnp = VTOLXP(vp);
4514         lxpr_nodetype_t type = lxpnp->lxpr_type;
4515         extern uint_t nproc;
4516         int error;
4517 
4518         /*
4519          * Return attributes of underlying vnode if ATTR_REAL
4520          *
4521          * but keep fd files with the symlink permissions
4522          */
4523         if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
4524                 vnode_t *rvp = lxpnp->lxpr_realvp;
4525 
4526                 /*
4527                  * withold attribute information to owner or root
4528                  */
4529                 if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) {
4530                         return (error);
4531                 }
4532 
4533                 /*
4534                  * now its attributes
4535                  */
4536                 if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) {
4537                         return (error);
4538                 }
4539 
4540                 /*
4541                  * if it's a file in lx /proc/pid/fd/xx then set its
4542                  * mode and keep it looking like a symlink, fifo or socket
4543                  */
4544                 if (type == LXPR_PID_FD_FD) {
4545                         vap->va_mode = lxpnp->lxpr_mode;
4546                         vap->va_type = lxpnp->lxpr_realvp->v_type;
4547                         vap->va_size = 0;
4548                         vap->va_nlink = 1;
4549                 }
4550                 return (0);
4551         }
4552 
4553         /* Default attributes, that may be overridden below */
4554         bzero(vap, sizeof (*vap));
4555         vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
4556         vap->va_nlink = 1;
4557         vap->va_type = vp->v_type;
4558         vap->va_mode = lxpnp->lxpr_mode;
4559         vap->va_fsid = vp->v_vfsp->vfs_dev;
4560         vap->va_blksize = DEV_BSIZE;
4561         vap->va_uid = lxpnp->lxpr_uid;
4562         vap->va_gid = lxpnp->lxpr_gid;
4563         vap->va_nodeid = lxpnp->lxpr_ino;
4564 
4565         switch (type) {
4566         case LXPR_PROCDIR:
4567                 vap->va_nlink = nproc + 2 + PROCDIRFILES;
4568                 vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
4569                 break;
4570         case LXPR_PIDDIR:
4571                 vap->va_nlink = PIDDIRFILES;
4572                 vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
4573                 break;
4574         case LXPR_PID_TASK_IDDIR:
4575                 vap->va_nlink = TIDDIRFILES;
4576                 vap->va_size = TIDDIRFILES * LXPR_SDSIZE;
4577                 break;
4578         case LXPR_SELF:
4579                 vap->va_uid = crgetruid(curproc->p_cred);
4580                 vap->va_gid = crgetrgid(curproc->p_cred);
4581                 break;
4582         case LXPR_PID_FD_FD:
4583         case LXPR_PID_TID_FD_FD:
4584                 /*
4585                  * Restore VLNK type for lstat-type activity.
4586                  * See lxpr_readlink for more details.
4587                  */
4588                 if ((flags & FOLLOW) == 0)
4589                         vap->va_type = VLNK;
4590         default:
4591                 break;
4592         }
4593 
4594         vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
4595         return (0);
4596 }
4597 
4598 /*
4599  * lxpr_access(): Vnode operation for VOP_ACCESS()
4600  */
4601 static int
4602 lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
4603 {
4604         lxpr_node_t *lxpnp = VTOLXP(vp);
4605         lxpr_nodetype_t type = lxpnp->lxpr_type;
4606         int shift = 0;
4607         proc_t *tp;
4608 
4609         /* lx /proc is a read only file system */
4610         if (mode & VWRITE) {
4611                 switch (type) {
4612                 case LXPR_PID_OOM_SCR_ADJ:
4613                 case LXPR_PID_TID_OOM_SCR_ADJ:
4614                 case LXPR_SYS_KERNEL_COREPATT:
4615                 case LXPR_SYS_NET_CORE_SOMAXCON:
4616                 case LXPR_SYS_VM_OVERCOMMIT_MEM:
4617                 case LXPR_SYS_VM_SWAPPINESS:
4618                 case LXPR_PID_FD_FD:
4619                 case LXPR_PID_TID_FD_FD:
4620                         break;
4621                 default:
4622                         return (EROFS);
4623                 }
4624         }
4625 
4626         /*
4627          * If this is a restricted file, check access permissions.
4628          */
4629         switch (type) {
4630         case LXPR_PIDDIR:
4631                 return (0);
4632         case LXPR_PID_CURDIR:
4633         case LXPR_PID_ENV:
4634         case LXPR_PID_EXE:
4635         case LXPR_PID_LIMITS:
4636         case LXPR_PID_MAPS:
4637         case LXPR_PID_MEM:
4638         case LXPR_PID_ROOTDIR:
4639         case LXPR_PID_FDDIR:
4640         case LXPR_PID_FD_FD:
4641         case LXPR_PID_TID_FDDIR:
4642         case LXPR_PID_TID_FD_FD:
4643                 if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL)
4644                         return (ENOENT);
4645                 if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
4646                     priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
4647                         lxpr_unlock(tp);
4648                         return (EACCES);
4649                 }
4650                 lxpr_unlock(tp);
4651         default:
4652                 break;
4653         }
4654 
4655         if (lxpnp->lxpr_realvp != NULL) {
4656                 /*
4657                  * For these we use the underlying vnode's accessibility.
4658                  */
4659                 return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct));
4660         }
4661 
4662         /* If user is root allow access regardless of permission bits */
4663         if (secpolicy_proc_access(cr) == 0)
4664                 return (0);
4665 
4666         /*
4667          * Access check is based on only one of owner, group, public.  If not
4668          * owner, then check group.  If not a member of the group, then check
4669          * public access.
4670          */
4671         if (crgetuid(cr) != lxpnp->lxpr_uid) {
4672                 shift += 3;
4673                 if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
4674                         shift += 3;
4675         }
4676 
4677         mode &= ~(lxpnp->lxpr_mode << shift);
4678 
4679         if (mode == 0)
4680                 return (0);
4681 
4682         return (EACCES);
4683 }
4684 
4685 /* ARGSUSED */
4686 static vnode_t *
4687 lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
4688 {
4689         return (NULL);
4690 }
4691 
4692 /*
4693  * lxpr_lookup(): Vnode operation for VOP_LOOKUP()
4694  */
4695 /* ARGSUSED */
4696 static int
4697 lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
4698     int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
4699     int *direntflags, pathname_t *realpnp)
4700 {
4701         lxpr_node_t *lxpnp = VTOLXP(dp);
4702         lxpr_nodetype_t type = lxpnp->lxpr_type;
4703         int error;
4704 
4705         ASSERT(dp->v_type == VDIR);
4706         ASSERT(type < LXPR_NFILES);
4707 
4708         /*
4709          * we should never get here because the lookup
4710          * is done on the realvp for these nodes
4711          */
4712         ASSERT(type != LXPR_PID_FD_FD &&
4713             type != LXPR_PID_CURDIR &&
4714             type != LXPR_PID_ROOTDIR);
4715 
4716         /*
4717          * restrict lookup permission to owner or root
4718          */
4719         if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) {
4720                 return (error);
4721         }
4722 
4723         /*
4724          * Just return the parent vnode if that's where we are trying to go.
4725          */
4726         if (strcmp(comp, "..") == 0) {
4727                 VN_HOLD(lxpnp->lxpr_parent);
4728                 *vpp = lxpnp->lxpr_parent;
4729                 return (0);
4730         }
4731 
4732         /*
4733          * Special handling for directory searches.  Note: null component name
4734          * denotes that the current directory is being searched.
4735          */
4736         if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
4737                 VN_HOLD(dp);
4738                 *vpp = dp;
4739                 return (0);
4740         }
4741 
4742         *vpp = (lxpr_lookup_function[type](dp, comp));
4743         return ((*vpp == NULL) ? ENOENT : 0);
4744 }
4745 
4746 /*
4747  * Do a sequential search on the given directory table
4748  */
4749 static vnode_t *
4750 lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
4751     lxpr_dirent_t *dirtab, int dirtablen)
4752 {
4753         lxpr_node_t *lxpnp;
4754         int count;
4755 
4756         for (count = 0; count < dirtablen; count++) {
4757                 if (strcmp(dirtab[count].d_name, comp) == 0) {
4758                         lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
4759                         dp = LXPTOV(lxpnp);
4760                         ASSERT(dp != NULL);
4761                         return (dp);
4762                 }
4763         }
4764         return (NULL);
4765 }
4766 
4767 static vnode_t *
4768 lxpr_lookup_piddir(vnode_t *dp, char *comp)
4769 {
4770         proc_t *p;
4771 
4772         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
4773 
4774         p = lxpr_lock(VTOLXP(dp)->lxpr_pid);
4775         if (p == NULL)
4776                 return (NULL);
4777 
4778         dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
4779 
4780         lxpr_unlock(p);
4781 
4782         return (dp);
4783 }
4784 
4785 /*
4786  * Lookup one of the process's task ID's.
4787  */
4788 static vnode_t *
4789 lxpr_lookup_taskdir(vnode_t *dp, char *comp)
4790 {
4791         lxpr_node_t *dlxpnp = VTOLXP(dp);
4792         lxpr_node_t *lxpnp;
4793         proc_t *p;
4794         pid_t real_pid;
4795         uint_t tid;
4796         int c;
4797         kthread_t *t;
4798 
4799         ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASKDIR);
4800 
4801         /*
4802          * convert the string rendition of the filename to a thread ID
4803          */
4804         tid = 0;
4805         while ((c = *comp++) != '\0') {
4806                 int otid;
4807                 if (c < '0' || c > '9')
4808                         return (NULL);
4809 
4810                 otid = tid;
4811                 tid = 10 * tid + c - '0';
4812                 /* integer overflow */
4813                 if (tid / 10 != otid)
4814                         return (NULL);
4815         }
4816 
4817         /*
4818          * get the proc to work with and lock it
4819          */
4820         real_pid = get_real_pid(dlxpnp->lxpr_pid);
4821         p = lxpr_lock(real_pid);
4822         if ((p == NULL))
4823                 return (NULL);
4824 
4825         /*
4826          * If the process is a zombie or system process
4827          * it can't have any threads.
4828          */
4829         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
4830                 lxpr_unlock(p);
4831                 return (NULL);
4832         }
4833 
4834         if (p->p_brand == &lx_brand) {
4835                 t = lxpr_get_thread(p, tid);
4836         } else {
4837                 /*
4838                  * Only the main thread is visible for non-branded processes.
4839                  */
4840                 t = p->p_tlist;
4841                 if (tid != p->p_pid || t == NULL) {
4842                         t = NULL;
4843                 } else {
4844                         thread_lock(t);
4845                 }
4846         }
4847         if (t == NULL) {
4848                 lxpr_unlock(p);
4849                 return (NULL);
4850         }
4851         thread_unlock(t);
4852 
4853         /*
4854          * Allocate and fill in a new lx /proc taskid node.
4855          * Instead of the last arg being a fd, it is a tid.
4856          */
4857         lxpnp = lxpr_getnode(dp, LXPR_PID_TASK_IDDIR, p, tid);
4858         dp = LXPTOV(lxpnp);
4859         ASSERT(dp != NULL);
4860         lxpr_unlock(p);
4861         return (dp);
4862 }
4863 
4864 /*
4865  * Lookup one of the process's task ID's.
4866  */
4867 static vnode_t *
4868 lxpr_lookup_task_tid_dir(vnode_t *dp, char *comp)
4869 {
4870         lxpr_node_t *dlxpnp = VTOLXP(dp);
4871         lxpr_node_t *lxpnp;
4872         proc_t *p;
4873         pid_t real_pid;
4874         kthread_t *t;
4875         int i;
4876 
4877         ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASK_IDDIR);
4878 
4879         /*
4880          * get the proc to work with and lock it
4881          */
4882         real_pid = get_real_pid(dlxpnp->lxpr_pid);
4883         p = lxpr_lock(real_pid);
4884         if ((p == NULL))
4885                 return (NULL);
4886 
4887         /*
4888          * If the process is a zombie or system process
4889          * it can't have any threads.
4890          */
4891         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
4892                 lxpr_unlock(p);
4893                 return (NULL);
4894         }
4895 
4896         /* need to confirm tid is still there */
4897         t = lxpr_get_thread(p, dlxpnp->lxpr_desc);
4898         if (t == NULL) {
4899                 lxpr_unlock(p);
4900                 return (NULL);
4901         }
4902         thread_unlock(t);
4903 
4904         /*
4905          * allocate and fill in the new lx /proc taskid dir node
4906          */
4907         for (i = 0; i < TIDDIRFILES; i++) {
4908                 if (strcmp(tiddir[i].d_name, comp) == 0) {
4909                         lxpnp = lxpr_getnode(dp, tiddir[i].d_type, p,
4910                             dlxpnp->lxpr_desc);
4911                         dp = LXPTOV(lxpnp);
4912                         ASSERT(dp != NULL);
4913                         lxpr_unlock(p);
4914                         return (dp);
4915                 }
4916         }
4917 
4918         lxpr_unlock(p);
4919         return (NULL);
4920 }
4921 
4922 /*
4923  * Lookup one of the process's open files.
4924  */
4925 static vnode_t *
4926 lxpr_lookup_fddir(vnode_t *dp, char *comp)
4927 {
4928         lxpr_node_t *dlxpnp = VTOLXP(dp);
4929 
4930         ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR ||
4931             dlxpnp->lxpr_type == LXPR_PID_TID_FDDIR);
4932 
4933         return (lxpr_lookup_fdnode(dp, comp));
4934 }
4935 
4936 static vnode_t *
4937 lxpr_lookup_netdir(vnode_t *dp, char *comp)
4938 {
4939         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
4940 
4941         dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
4942 
4943         return (dp);
4944 }
4945 
4946 static vnode_t *
4947 lxpr_lookup_procdir(vnode_t *dp, char *comp)
4948 {
4949         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
4950 
4951         /*
4952          * We know all the names of files & dirs in our file system structure
4953          * except those that are pid names.  These change as pids are created/
4954          * deleted etc., so we just look for a number as the first char to see
4955          * if we are we doing pid lookups.
4956          *
4957          * Don't need to check for "self" as it is implemented as a symlink
4958          */
4959         if (*comp >= '0' && *comp <= '9') {
4960                 pid_t pid = 0;
4961                 lxpr_node_t *lxpnp = NULL;
4962                 proc_t *p;
4963                 int c;
4964 
4965                 while ((c = *comp++) != '\0')
4966                         pid = 10 * pid + c - '0';
4967 
4968                 /*
4969                  * Can't continue if the process is still loading or it doesn't
4970                  * really exist yet (or maybe it just died!)
4971                  */
4972                 p = lxpr_lock(pid);
4973                 if (p == NULL)
4974                         return (NULL);
4975 
4976                 if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
4977                         lxpr_unlock(p);
4978                         return (NULL);
4979                 }
4980 
4981                 /*
4982                  * allocate and fill in a new lx /proc node
4983                  */
4984                 lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
4985 
4986                 lxpr_unlock(p);
4987 
4988                 dp = LXPTOV(lxpnp);
4989                 ASSERT(dp != NULL);
4990 
4991                 return (dp);
4992         }
4993 
4994         /* Lookup fixed names */
4995         return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES));
4996 }
4997 
4998 static vnode_t *
4999 lxpr_lookup_sysdir(vnode_t *dp, char *comp)
5000 {
5001         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYSDIR);
5002         return (lxpr_lookup_common(dp, comp, NULL, sysdir, SYSDIRFILES));
5003 }
5004 
5005 static vnode_t *
5006 lxpr_lookup_sys_kerneldir(vnode_t *dp, char *comp)
5007 {
5008         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNELDIR);
5009         return (lxpr_lookup_common(dp, comp, NULL, sys_kerneldir,
5010             SYS_KERNELDIRFILES));
5011 }
5012 
5013 static vnode_t *
5014 lxpr_lookup_sys_kdir_randdir(vnode_t *dp, char *comp)
5015 {
5016         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNEL_RANDDIR);
5017         return (lxpr_lookup_common(dp, comp, NULL, sys_randdir,
5018             SYS_RANDDIRFILES));
5019 }
5020 
5021 static vnode_t *
5022 lxpr_lookup_sys_netdir(vnode_t *dp, char *comp)
5023 {
5024         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NETDIR);
5025         return (lxpr_lookup_common(dp, comp, NULL, sys_netdir,
5026             SYS_NETDIRFILES));
5027 }
5028 
5029 static vnode_t *
5030 lxpr_lookup_sys_net_coredir(vnode_t *dp, char *comp)
5031 {
5032         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NET_COREDIR);
5033         return (lxpr_lookup_common(dp, comp, NULL, sys_net_coredir,
5034             SYS_NET_COREDIRFILES));
5035 }
5036 
5037 static vnode_t *
5038 lxpr_lookup_sys_vmdir(vnode_t *dp, char *comp)
5039 {
5040         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_VMDIR);
5041         return (lxpr_lookup_common(dp, comp, NULL, sys_vmdir,
5042             SYS_VMDIRFILES));
5043 }
5044 
5045 static vnode_t *
5046 lxpr_lookup_sys_fsdir(vnode_t *dp, char *comp)
5047 {
5048         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FSDIR);
5049         return (lxpr_lookup_common(dp, comp, NULL, sys_fsdir,
5050             SYS_FSDIRFILES));
5051 }
5052 
5053 static vnode_t *
5054 lxpr_lookup_sys_fs_inotifydir(vnode_t *dp, char *comp)
5055 {
5056         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
5057         return (lxpr_lookup_common(dp, comp, NULL, sys_fs_inotifydir,
5058             SYS_FS_INOTIFYDIRFILES));
5059 }
5060 
5061 /*
5062  * lxpr_readdir(): Vnode operation for VOP_READDIR()
5063  */
5064 /* ARGSUSED */
5065 static int
5066 lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp,
5067     caller_context_t *ct, int flags)
5068 {
5069         lxpr_node_t *lxpnp = VTOLXP(dp);
5070         lxpr_nodetype_t type = lxpnp->lxpr_type;
5071         ssize_t uresid;
5072         off_t uoffset;
5073         int error;
5074 
5075         ASSERT(dp->v_type == VDIR);
5076         ASSERT(type < LXPR_NFILES);
5077 
5078         /*
5079          * we should never get here because the readdir
5080          * is done on the realvp for these nodes
5081          */
5082         ASSERT(type != LXPR_PID_FD_FD &&
5083             type != LXPR_PID_CURDIR &&
5084             type != LXPR_PID_ROOTDIR);
5085 
5086         /*
5087          * restrict readdir permission to owner or root
5088          */
5089         if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0)
5090                 return (error);
5091 
5092         uoffset = uiop->uio_offset;
5093         uresid = uiop->uio_resid;
5094 
5095         /* can't do negative reads */
5096         if (uoffset < 0 || uresid <= 0)
5097                 return (EINVAL);
5098 
5099         /* can't read directory entries that don't exist! */
5100         if (uoffset % LXPR_SDSIZE)
5101                 return (ENOENT);
5102 
5103         return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
5104 }
5105 
5106 /* ARGSUSED */
5107 static int
5108 lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5109 {
5110         return (ENOTDIR);
5111 }
5112 
5113 /*
5114  * This has the common logic for returning directory entries
5115  */
5116 static int
5117 lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
5118     lxpr_dirent_t *dirtab, int dirtablen)
5119 {
5120         /* bp holds one dirent64 structure */
5121         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5122         dirent64_t *dirent = (dirent64_t *)bp;
5123         ssize_t oresid; /* save a copy for testing later */
5124         ssize_t uresid;
5125 
5126         oresid = uiop->uio_resid;
5127 
5128         /* clear out the dirent buffer */
5129         bzero(bp, sizeof (bp));
5130 
5131         /*
5132          * Satisfy user request
5133          */
5134         while ((uresid = uiop->uio_resid) > 0) {
5135                 int dirindex;
5136                 off_t uoffset;
5137                 int reclen;
5138                 int error;
5139 
5140                 uoffset = uiop->uio_offset;
5141                 dirindex  = (uoffset / LXPR_SDSIZE) - 2;
5142 
5143                 if (uoffset == 0) {
5144 
5145                         dirent->d_ino = lxpnp->lxpr_ino;
5146                         dirent->d_name[0] = '.';
5147                         dirent->d_name[1] = '\0';
5148                         reclen = DIRENT64_RECLEN(1);
5149 
5150                 } else if (uoffset == LXPR_SDSIZE) {
5151 
5152                         dirent->d_ino = lxpr_parentinode(lxpnp);
5153                         dirent->d_name[0] = '.';
5154                         dirent->d_name[1] = '.';
5155                         dirent->d_name[2] = '\0';
5156                         reclen = DIRENT64_RECLEN(2);
5157 
5158                 } else if (dirindex >= 0 && dirindex < dirtablen) {
5159                         int slen = strlen(dirtab[dirindex].d_name);
5160 
5161                         dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
5162                             lxpnp->lxpr_pid, 0);
5163 
5164                         VERIFY(slen < LXPNSIZ);
5165                         (void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
5166                         reclen = DIRENT64_RECLEN(slen);
5167 
5168                 } else {
5169                         /* Run out of table entries */
5170                         if (eofp) {
5171                                 *eofp = 1;
5172                         }
5173                         return (0);
5174                 }
5175 
5176                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5177                 dirent->d_reclen = (ushort_t)reclen;
5178 
5179                 /*
5180                  * if the size of the data to transfer is greater
5181                  * that that requested then we can't do it this transfer.
5182                  */
5183                 if (reclen > uresid) {
5184                         /*
5185                          * Error if no entries have been returned yet.
5186                          */
5187                         if (uresid == oresid) {
5188                                 return (EINVAL);
5189                         }
5190                         break;
5191                 }
5192 
5193                 /*
5194                  * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5195                  * by the same amount.  But we want uiop->uio_offset to change
5196                  * in increments of LXPR_SDSIZE, which is different from the
5197                  * number of bytes being returned to the user.  So we set
5198                  * uiop->uio_offset separately, ignoring what uiomove() does.
5199                  */
5200                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5201                     uiop)) != 0)
5202                         return (error);
5203 
5204                 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5205         }
5206 
5207         /* Have run out of space, but could have just done last table entry */
5208         if (eofp) {
5209                 *eofp =
5210                     (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
5211         }
5212         return (0);
5213 }
5214 
5215 
5216 static int
5217 lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5218 {
5219         /* bp holds one dirent64 structure */
5220         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5221         dirent64_t *dirent = (dirent64_t *)bp;
5222         ssize_t oresid; /* save a copy for testing later */
5223         ssize_t uresid;
5224         off_t uoffset;
5225         zoneid_t zoneid;
5226         pid_t pid;
5227         int error;
5228         int ceof;
5229 
5230         ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
5231 
5232         oresid = uiop->uio_resid;
5233         zoneid = LXPTOZ(lxpnp)->zone_id;
5234 
5235         /*
5236          * We return directory entries in the order: "." and ".." then the
5237          * unique lxproc files, then the directories corresponding to the
5238          * running processes.  We have defined this as the ordering because
5239          * it allows us to more easily keep track of where we are betwen calls
5240          * to getdents().  If the number of processes changes between calls
5241          * then we can't lose track of where we are in the lxproc files.
5242          */
5243 
5244         /* Do the fixed entries */
5245         error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir,
5246             PROCDIRFILES);
5247 
5248         /* Finished if we got an error or if we couldn't do all the table */
5249         if (error != 0 || ceof == 0)
5250                 return (error);
5251 
5252         /* clear out the dirent buffer */
5253         bzero(bp, sizeof (bp));
5254 
5255         /* Do the process entries */
5256         while ((uresid = uiop->uio_resid) > 0) {
5257                 proc_t *p;
5258                 int len;
5259                 int reclen;
5260                 int i;
5261 
5262                 uoffset = uiop->uio_offset;
5263 
5264                 /*
5265                  * Stop when entire proc table has been examined.
5266                  */
5267                 i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
5268                 if (i < 0 || i >= v.v_proc) {
5269                         /* Run out of table entries */
5270                         if (eofp) {
5271                                 *eofp = 1;
5272                         }
5273                         return (0);
5274                 }
5275                 mutex_enter(&pidlock);
5276 
5277                 /*
5278                  * Skip indices for which there is no pid_entry, PIDs for
5279                  * which there is no corresponding process, a PID of 0,
5280                  * and anything the security policy doesn't allow
5281                  * us to look at.
5282                  */
5283                 if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
5284                     p->p_pid == 0 ||
5285                     secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
5286                         mutex_exit(&pidlock);
5287                         goto next;
5288                 }
5289                 mutex_exit(&pidlock);
5290 
5291                 /*
5292                  * Convert pid to the Linux default of 1 if we're the zone's
5293                  * init process, or 0 if zsched, otherwise use the value from
5294                  * the proc structure
5295                  */
5296                 if (p->p_pid == curproc->p_zone->zone_proc_initpid) {
5297                         pid = 1;
5298                 } else if (p->p_pid == curproc->p_zone->zone_zsched->p_pid) {
5299                         pid = 0;
5300                 } else {
5301                         pid = p->p_pid;
5302                 }
5303 
5304                 /*
5305                  * If this /proc was mounted in the global zone, view
5306                  * all procs; otherwise, only view zone member procs.
5307                  */
5308                 if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
5309                         goto next;
5310                 }
5311 
5312                 ASSERT(p->p_stat != 0);
5313 
5314                 dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
5315                 len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
5316                 ASSERT(len < LXPNSIZ);
5317                 reclen = DIRENT64_RECLEN(len);
5318 
5319                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5320                 dirent->d_reclen = (ushort_t)reclen;
5321 
5322                 /*
5323                  * if the size of the data to transfer is greater
5324                  * that that requested then we can't do it this transfer.
5325                  */
5326                 if (reclen > uresid) {
5327                         /*
5328                          * Error if no entries have been returned yet.
5329                          */
5330                         if (uresid == oresid)
5331                                 return (EINVAL);
5332                         break;
5333                 }
5334 
5335                 /*
5336                  * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5337                  * by the same amount.  But we want uiop->uio_offset to change
5338                  * in increments of LXPR_SDSIZE, which is different from the
5339                  * number of bytes being returned to the user.  So we set
5340                  * uiop->uio_offset separately, in the increment of this for
5341                  * the loop, ignoring what uiomove() does.
5342                  */
5343                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5344                     uiop)) != 0)
5345                         return (error);
5346 next:
5347                 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5348         }
5349 
5350         if (eofp != NULL) {
5351                 *eofp = (uiop->uio_offset >=
5352                     ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
5353         }
5354 
5355         return (0);
5356 }
5357 
5358 static int
5359 lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5360 {
5361         proc_t *p;
5362         pid_t find_pid;
5363 
5364         ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
5365 
5366         /* can't read its contents if it died */
5367         mutex_enter(&pidlock);
5368 
5369         if (lxpnp->lxpr_pid == 1) {
5370                 find_pid = curproc->p_zone->zone_proc_initpid;
5371         } else if (lxpnp->lxpr_pid == 0) {
5372                 find_pid = curproc->p_zone->zone_zsched->p_pid;
5373         } else {
5374                 find_pid = lxpnp->lxpr_pid;
5375         }
5376         p = prfind(find_pid);
5377 
5378         if (p == NULL || p->p_stat == SIDL) {
5379                 mutex_exit(&pidlock);
5380                 return (ENOENT);
5381         }
5382         mutex_exit(&pidlock);
5383 
5384         return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
5385 }
5386 
5387 static int
5388 lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5389 {
5390         ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
5391         return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
5392 }
5393 
5394 static int
5395 lxpr_readdir_taskdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5396 {
5397         /* bp holds one dirent64 structure */
5398         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5399         dirent64_t *dirent = (dirent64_t *)bp;
5400         ssize_t oresid; /* save a copy for testing later */
5401         ssize_t uresid;
5402         off_t uoffset;
5403         int error;
5404         int ceof;
5405         proc_t *p;
5406         int tiddirsize = -1;
5407         int tasknum;
5408         pid_t real_pid;
5409         kthread_t *t;
5410         boolean_t branded = B_FALSE;
5411 
5412         ASSERT(lxpnp->lxpr_type == LXPR_PID_TASKDIR);
5413 
5414         oresid = uiop->uio_resid;
5415 
5416         real_pid = get_real_pid(lxpnp->lxpr_pid);
5417         p = lxpr_lock(real_pid);
5418 
5419         /* can't read its contents if it died */
5420         if (p == NULL) {
5421                 return (ENOENT);
5422         }
5423         if (p->p_stat == SIDL) {
5424                 lxpr_unlock(p);
5425                 return (ENOENT);
5426         }
5427 
5428         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
5429                 tiddirsize = 0;
5430 
5431         branded = (p->p_brand == &lx_brand);
5432         /*
5433          * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
5434          * going away while we iterate over its threads.
5435          */
5436         mutex_exit(&p->p_lock);
5437 
5438         if (tiddirsize == -1)
5439                 tiddirsize = p->p_lwpcnt;
5440 
5441         /* Do the fixed entries (in this case just "." & "..") */
5442         error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
5443 
5444         /* Finished if we got an error or if we couldn't do all the table */
5445         if (error != 0 || ceof == 0)
5446                 goto out;
5447 
5448         if ((t = p->p_tlist) == NULL) {
5449                 if (eofp != NULL)
5450                         *eofp = 1;
5451                 goto out;
5452         }
5453 
5454         /* clear out the dirent buffer */
5455         bzero(bp, sizeof (bp));
5456 
5457         /*
5458          * Loop until user's request is satisfied or until all thread's have
5459          * been returned.
5460          */
5461         for (tasknum = 0; (uresid = uiop->uio_resid) > 0; tasknum++) {
5462                 int i;
5463                 int reclen;
5464                 int len;
5465                 uint_t emul_tid;
5466                 lx_lwp_data_t *lwpd;
5467 
5468                 uoffset = uiop->uio_offset;
5469 
5470                 /*
5471                  * Stop at the end of the thread list
5472                  */
5473                 i = (uoffset / LXPR_SDSIZE) - 2;
5474                 if (i < 0 || i >= tiddirsize) {
5475                         if (eofp) {
5476                                 *eofp = 1;
5477                         }
5478                         goto out;
5479                 }
5480 
5481                 if (i != tasknum)
5482                         goto next;
5483 
5484                 if (!branded) {
5485                         /*
5486                          * Emulating the goofy linux task model is impossible
5487                          * to do for native processes.  We can compromise by
5488                          * presenting only the main thread to the consumer.
5489                          */
5490                         emul_tid = p->p_pid;
5491                 } else {
5492                         if ((lwpd = ttolxlwp(t)) == NULL) {
5493                                 goto next;
5494                         }
5495                         emul_tid = lwpd->br_pid;
5496                         /*
5497                          * Convert pid to Linux default of 1 if we're the
5498                          * zone's init.
5499                          */
5500                         if (emul_tid == curproc->p_zone->zone_proc_initpid)
5501                                 emul_tid = 1;
5502                 }
5503 
5504                 dirent->d_ino = lxpr_inode(LXPR_PID_TASK_IDDIR, lxpnp->lxpr_pid,
5505                     emul_tid);
5506                 len = snprintf(dirent->d_name, LXPNSIZ, "%d", emul_tid);
5507                 ASSERT(len < LXPNSIZ);
5508                 reclen = DIRENT64_RECLEN(len);
5509 
5510                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5511                 dirent->d_reclen = (ushort_t)reclen;
5512 
5513                 if (reclen > uresid) {
5514                         /*
5515                          * Error if no entries have been returned yet.
5516                          */
5517                         if (uresid == oresid)
5518                                 error = EINVAL;
5519                         goto out;
5520                 }
5521 
5522                 /*
5523                  * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5524                  * by the same amount.  But we want uiop->uio_offset to change
5525                  * in increments of LXPR_SDSIZE, which is different from the
5526                  * number of bytes being returned to the user.  So we set
5527                  * uiop->uio_offset separately, in the increment of this for
5528                  * the loop, ignoring what uiomove() does.
5529                  */
5530                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5531                     uiop)) != 0)
5532                         goto out;
5533 
5534 next:
5535                 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5536 
5537                 if ((t = t->t_forw) == p->p_tlist || !branded) {
5538                         if (eofp != NULL)
5539                                 *eofp = 1;
5540                         goto out;
5541                 }
5542         }
5543 
5544         if (eofp != NULL)
5545                 *eofp = 0;
5546 
5547 out:
5548         mutex_enter(&p->p_lock);
5549         lxpr_unlock(p);
5550         return (error);
5551 }
5552 
5553 static int
5554 lxpr_readdir_task_tid_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5555 {
5556         proc_t *p;
5557         pid_t real_pid;
5558         kthread_t *t;
5559 
5560         ASSERT(lxpnp->lxpr_type == LXPR_PID_TASK_IDDIR);
5561 
5562         mutex_enter(&pidlock);
5563 
5564         real_pid = get_real_pid(lxpnp->lxpr_pid);
5565         p = prfind(real_pid);
5566 
5567         /* can't read its contents if it died */
5568         if (p == NULL || p->p_stat == SIDL) {
5569                 mutex_exit(&pidlock);
5570                 return (ENOENT);
5571         }
5572 
5573         mutex_exit(&pidlock);
5574 
5575         /* need to confirm tid is still there */
5576         t = lxpr_get_thread(p, lxpnp->lxpr_desc);
5577         if (t == NULL) {
5578                 /* we can't find this specific thread */
5579                 return (NULL);
5580         }
5581         thread_unlock(t);
5582 
5583         return (lxpr_readdir_common(lxpnp, uiop, eofp, tiddir, TIDDIRFILES));
5584 }
5585 
5586 static int
5587 lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5588 {
5589         /* bp holds one dirent64 structure */
5590         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5591         dirent64_t *dirent = (dirent64_t *)bp;
5592         ssize_t oresid; /* save a copy for testing later */
5593         ssize_t uresid;
5594         off_t uoffset;
5595         int error;
5596         int ceof;
5597         proc_t *p;
5598         int fddirsize = -1;
5599         uf_info_t *fip;
5600 
5601         ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR ||
5602             lxpnp->lxpr_type == LXPR_PID_TID_FDDIR);
5603 
5604         oresid = uiop->uio_resid;
5605 
5606         /* can't read its contents if it died */
5607         p = lxpr_lock(lxpnp->lxpr_pid);
5608         if (p == NULL)
5609                 return (ENOENT);
5610 
5611         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
5612                 fddirsize = 0;
5613 
5614         /*
5615          * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
5616          * going away while we iterate over its fi_list.
5617          */
5618         mutex_exit(&p->p_lock);
5619 
5620         /* Get open file info */
5621         fip = (&(p)->p_user.u_finfo);
5622         mutex_enter(&fip->fi_lock);
5623 
5624         if (fddirsize == -1)
5625                 fddirsize = fip->fi_nfiles;
5626 
5627         /* Do the fixed entries (in this case just "." & "..") */
5628         error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
5629 
5630         /* Finished if we got an error or if we couldn't do all the table */
5631         if (error != 0 || ceof == 0)
5632                 goto out;
5633 
5634         /* clear out the dirent buffer */
5635         bzero(bp, sizeof (bp));
5636 
5637         /*
5638          * Loop until user's request is satisfied or until
5639          * all file descriptors have been examined.
5640          */
5641         for (; (uresid = uiop->uio_resid) > 0;
5642             uiop->uio_offset = uoffset + LXPR_SDSIZE) {
5643                 int reclen;
5644                 int fd;
5645                 int len;
5646 
5647                 uoffset = uiop->uio_offset;
5648 
5649                 /*
5650                  * Stop at the end of the fd list
5651                  */
5652                 fd = (uoffset / LXPR_SDSIZE) - 2;
5653                 if (fd < 0 || fd >= fddirsize) {
5654                         if (eofp) {
5655                                 *eofp = 1;
5656                         }
5657                         goto out;
5658                 }
5659 
5660                 if (fip->fi_list[fd].uf_file == NULL)
5661                         continue;
5662 
5663                 dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
5664                 len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
5665                 ASSERT(len < LXPNSIZ);
5666                 reclen = DIRENT64_RECLEN(len);
5667 
5668                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5669                 dirent->d_reclen = (ushort_t)reclen;
5670 
5671                 if (reclen > uresid) {
5672                         /*
5673                          * Error if no entries have been returned yet.
5674                          */
5675                         if (uresid == oresid)
5676                                 error = EINVAL;
5677                         goto out;
5678                 }
5679 
5680                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5681                     uiop)) != 0)
5682                         goto out;
5683         }
5684 
5685         if (eofp != NULL) {
5686                 *eofp =
5687                     (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
5688         }
5689 
5690 out:
5691         mutex_exit(&fip->fi_lock);
5692         mutex_enter(&p->p_lock);
5693         lxpr_unlock(p);
5694         return (error);
5695 }
5696 
5697 static int
5698 lxpr_readdir_sysdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5699 {
5700         ASSERT(lxpnp->lxpr_type == LXPR_SYSDIR);
5701         return (lxpr_readdir_common(lxpnp, uiop, eofp, sysdir, SYSDIRFILES));
5702 }
5703 
5704 static int
5705 lxpr_readdir_sys_fsdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5706 {
5707         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FSDIR);
5708         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fsdir,
5709             SYS_FSDIRFILES));
5710 }
5711 
5712 static int
5713 lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5714 {
5715         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
5716         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fs_inotifydir,
5717             SYS_FS_INOTIFYDIRFILES));
5718 }
5719 
5720 static int
5721 lxpr_readdir_sys_kerneldir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5722 {
5723         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNELDIR);
5724         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_kerneldir,
5725             SYS_KERNELDIRFILES));
5726 }
5727 
5728 static int
5729 lxpr_readdir_sys_kdir_randdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5730 {
5731         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RANDDIR);
5732         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_randdir,
5733             SYS_RANDDIRFILES));
5734 }
5735 
5736 static int
5737 lxpr_readdir_sys_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5738 {
5739         ASSERT(lxpnp->lxpr_type == LXPR_SYS_NETDIR);
5740         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_netdir,
5741             SYS_NETDIRFILES));
5742 }
5743 
5744 static int
5745 lxpr_readdir_sys_net_coredir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5746 {
5747         ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_COREDIR);
5748         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_net_coredir,
5749             SYS_NET_COREDIRFILES));
5750 }
5751 
5752 static int
5753 lxpr_readdir_sys_vmdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5754 {
5755         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VMDIR);
5756         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_vmdir,
5757             SYS_VMDIRFILES));
5758 }
5759 
5760 static int
5761 lxpr_write_sys_net_core_somaxc(lxpr_node_t *lxpnp, struct uio *uio,
5762     struct cred *cr, caller_context_t *ct)
5763 {
5764         int error;
5765         int res = 0;
5766         size_t olen;
5767         char val[16];   /* big enough for a uint numeric string */
5768         netstack_t *ns;
5769         mod_prop_info_t *ptbl = NULL;
5770         mod_prop_info_t *pinfo = NULL;
5771 
5772         ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON);
5773 
5774         if (uio->uio_loffset != 0)
5775                 return (EINVAL);
5776 
5777         if (uio->uio_resid == 0)
5778                 return (0);
5779 
5780         olen = uio->uio_resid;
5781         if (olen > sizeof (val) - 1)
5782                 return (EINVAL);
5783 
5784         bzero(val, sizeof (val));
5785         error = uiomove(val, olen, UIO_WRITE, uio);
5786         if (error != 0)
5787                 return (error);
5788 
5789         if (val[olen - 1] == '\n')
5790                 val[olen - 1] = '\0';
5791 
5792         if (val[0] == '\0') /* no input */
5793                 return (EINVAL);
5794 
5795         ns = netstack_get_current();
5796         if (ns == NULL)
5797                 return (EINVAL);
5798 
5799         ptbl = ns->netstack_tcp->tcps_propinfo_tbl;
5800         pinfo = mod_prop_lookup(ptbl, "_conn_req_max_q", MOD_PROTO_TCP);
5801         if (pinfo == NULL || pinfo->mpi_setf(ns, cr, pinfo, NULL, val, 0) != 0)
5802                 res = EINVAL;
5803 
5804         netstack_rele(ns);
5805         return (res);
5806 }
5807 
5808 /* ARGSUSED */
5809 static int
5810 lxpr_write_sys_kernel_corepatt(lxpr_node_t *lxpnp, struct uio *uio,
5811     struct cred *cr, caller_context_t *ct)
5812 {
5813         zone_t *zone = curproc->p_zone;
5814         struct core_globals *cg;
5815         refstr_t *rp, *nrp;
5816         corectl_path_t *ccp;
5817         char val[MAXPATHLEN];
5818         char valtr[MAXPATHLEN];
5819         size_t olen;
5820         int error;
5821 
5822         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT);
5823 
5824         cg = zone_getspecific(core_zone_key, zone);
5825         ASSERT(cg != NULL);
5826 
5827         if (secpolicy_coreadm(cr) != 0)
5828                 return (EPERM);
5829 
5830         if (uio->uio_loffset != 0)
5831                 return (EINVAL);
5832 
5833         if (uio->uio_resid == 0)
5834                 return (0);
5835 
5836         olen = uio->uio_resid;
5837         if (olen > sizeof (val) - 1)
5838                 return (EINVAL);
5839 
5840         bzero(val, sizeof (val));
5841         error = uiomove(val, olen, UIO_WRITE, uio);
5842         if (error != 0)
5843                 return (error);
5844 
5845         if (val[olen - 1] == '\n')
5846                 val[olen - 1] = '\0';
5847 
5848         if (val[0] == '|')
5849                 return (EINVAL);
5850 
5851         if ((error = lxpr_core_path_l2s(val, valtr, sizeof (valtr))) != 0)
5852                 return (error);
5853 
5854         nrp = refstr_alloc(valtr);
5855 
5856         ccp = cg->core_default_path;
5857         mutex_enter(&ccp->ccp_mtx);
5858         rp = ccp->ccp_path;
5859         refstr_hold((ccp->ccp_path = nrp));
5860         cg->core_options |= CC_PROCESS_PATH;
5861         mutex_exit(&ccp->ccp_mtx);
5862 
5863         if (rp != NULL)
5864                 refstr_rele(rp);
5865 
5866         return (0);
5867 }
5868 
5869 /*
5870  * lxpr_readlink(): Vnode operation for VOP_READLINK()
5871  */
5872 /* ARGSUSED */
5873 static int
5874 lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
5875 {
5876         char bp[MAXPATHLEN + 1];
5877         size_t buflen = sizeof (bp);
5878         lxpr_node_t *lxpnp = VTOLXP(vp);
5879         vnode_t *rvp = lxpnp->lxpr_realvp;
5880         pid_t pid;
5881         int error = 0;
5882 
5883         /*
5884          * Linux does something very "clever" for /proc/<pid>/fd/<num> entries.
5885          * Open FDs are represented as symlinks, the link contents
5886          * corresponding to the open resource.  For plain files or devices,
5887          * this isn't absurd since one can dereference the symlink to query
5888          * the underlying resource.  For sockets or pipes, it becomes ugly in a
5889          * hurry.  To maintain this human-readable output, those FD symlinks
5890          * point to bogus targets such as "socket:[<inodenum>]".  This requires
5891          * circumventing vfs since the stat/lstat behavior on those FD entries
5892          * will be unusual. (A stat must retrieve information about the open
5893          * socket or pipe.  It cannot fail because the link contents point to
5894          * an absent file.)
5895          *
5896          * To accomplish this, lxpr_getnode returns an vnode typed VNON for FD
5897          * entries.  This bypasses code paths which would normally
5898          * short-circuit on symlinks and allows us to emulate the vfs behavior
5899          * expected by /proc consumers.
5900          */
5901         if (vp->v_type != VLNK && lxpnp->lxpr_type != LXPR_PID_FD_FD)
5902                 return (EINVAL);
5903 
5904         /* Try to produce a symlink name for anything that has a realvp */
5905         if (rvp != NULL) {
5906                 if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0)
5907                         return (error);
5908                 if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) {
5909                         /*
5910                          * Special handling possible for /proc/<pid>/fd/<num>
5911                          * Generate <type>:[<inode>] links, if allowed.
5912                          */
5913                         if (lxpnp->lxpr_type != LXPR_PID_FD_FD ||
5914                             lxpr_readlink_fdnode(lxpnp, bp, buflen) != 0) {
5915                                 return (error);
5916                         }
5917                 }
5918         } else {
5919                 switch (lxpnp->lxpr_type) {
5920                 case LXPR_SELF:
5921                         /*
5922                          * Convert pid to the Linux default of 1 if we're the
5923                          * zone's init process or 0 if zsched.
5924                          */
5925                         if (curproc->p_pid ==
5926                             curproc->p_zone->zone_proc_initpid) {
5927                                 pid = 1;
5928                         } else if (curproc->p_pid ==
5929                             curproc->p_zone->zone_zsched->p_pid) {
5930                                 pid = 0;
5931                         } else {
5932                                 pid = curproc->p_pid;
5933                         }
5934 
5935                         /*
5936                          * Don't need to check result as every possible int
5937                          * will fit within MAXPATHLEN bytes.
5938                          */
5939                         (void) snprintf(bp, buflen, "%d", pid);
5940                         break;
5941                 case LXPR_PID_CURDIR:
5942                 case LXPR_PID_ROOTDIR:
5943                 case LXPR_PID_EXE:
5944                         return (EACCES);
5945                 default:
5946                         /*
5947                          * Need to return error so that nothing thinks
5948                          * that the symlink is empty and hence "."
5949                          */
5950                         return (EINVAL);
5951                 }
5952         }
5953 
5954         /* copy the link data to user space */
5955         return (uiomove(bp, strlen(bp), UIO_READ, uiop));
5956 }
5957 
5958 
5959 /*
5960  * lxpr_inactive(): Vnode operation for VOP_INACTIVE()
5961  * Vnode is no longer referenced, deallocate the file
5962  * and all its resources.
5963  */
5964 /* ARGSUSED */
5965 static void
5966 lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
5967 {
5968         lxpr_freenode(VTOLXP(vp));
5969 }
5970 
5971 /*
5972  * lxpr_sync(): Vnode operation for VOP_SYNC()
5973  */
5974 static int
5975 lxpr_sync()
5976 {
5977         /*
5978          * Nothing to sync but this function must never fail
5979          */
5980         return (0);
5981 }
5982 
5983 /*
5984  * lxpr_cmp(): Vnode operation for VOP_CMP()
5985  */
5986 static int
5987 lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
5988 {
5989         vnode_t *rvp;
5990 
5991         while (vn_matchops(vp1, lxpr_vnodeops) &&
5992             (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) {
5993                 vp1 = rvp;
5994         }
5995 
5996         while (vn_matchops(vp2, lxpr_vnodeops) &&
5997             (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) {
5998                 vp2 = rvp;
5999         }
6000 
6001         if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
6002                 return (vp1 == vp2);
6003         return (VOP_CMP(vp1, vp2, ct));
6004 }
6005 
6006 /*
6007  * lxpr_realvp(): Vnode operation for VOP_REALVP()
6008  */
6009 static int
6010 lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
6011 {
6012         vnode_t *rvp;
6013 
6014         if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
6015                 vp = rvp;
6016                 if (VOP_REALVP(vp, &rvp, ct) == 0)
6017                         vp = rvp;
6018         }
6019 
6020         *vpp = vp;
6021         return (0);
6022 }
6023 
6024 static int
6025 lxpr_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
6026     caller_context_t *ct)
6027 {
6028         lxpr_node_t     *lxpnp = VTOLXP(vp);
6029         lxpr_nodetype_t type = lxpnp->lxpr_type;
6030 
6031         switch (type) {
6032         case LXPR_SYS_KERNEL_COREPATT:
6033                 return (lxpr_write_sys_kernel_corepatt(lxpnp, uiop, cr, ct));
6034         case LXPR_SYS_NET_CORE_SOMAXCON:
6035                 return (lxpr_write_sys_net_core_somaxc(lxpnp, uiop, cr, ct));
6036 
6037         default:
6038                 /* pretend we wrote the whole thing */
6039                 uiop->uio_offset += uiop->uio_resid;
6040                 uiop->uio_resid = 0;
6041                 return (0);
6042         }
6043 }
6044 
6045 /*
6046  * We need to allow open with O_CREAT for the oom_score_adj file.
6047  */
6048 /*ARGSUSED7*/
6049 static int
6050 lxpr_create(struct vnode *dvp, char *nm, struct vattr *vap,
6051     enum vcexcl exclusive, int mode, struct vnode **vpp, struct cred *cred,
6052     int flag, caller_context_t *ct, vsecattr_t *vsecp)
6053 {
6054         lxpr_node_t *lxpnp = VTOLXP(dvp);
6055         lxpr_nodetype_t type = lxpnp->lxpr_type;
6056         vnode_t *vp = NULL;
6057         int error;
6058 
6059         ASSERT(type < LXPR_NFILES);
6060 
6061         /*
6062          * restrict create permission to owner or root
6063          */
6064         if ((error = lxpr_access(dvp, VEXEC, 0, cred, ct)) != 0) {
6065                 return (error);
6066         }
6067 
6068         if (*nm == '\0')
6069                 return (EPERM);
6070 
6071         if (dvp->v_type != VDIR)
6072                 return (EPERM);
6073 
6074         if (exclusive == EXCL)
6075                 return (EEXIST);
6076 
6077         /*
6078          * We're currently restricting O_CREAT to:
6079          * - /proc/<pid>/fd/<num>
6080          * - /proc/<pid>/oom_score_adj
6081          * - /proc/<pid>/task/<tid>/fd/<num>
6082          * - /proc/<pid>/task/<tid>/oom_score_adj
6083          * - /proc/sys/kernel/core_pattern
6084          * - /proc/sys/net/core/somaxconn
6085          * - /proc/sys/vm/overcommit_memory
6086          * - /proc/sys/vm/swappiness
6087          */
6088         switch (type) {
6089         case LXPR_PIDDIR:
6090         case LXPR_PID_TASK_IDDIR:
6091                 if (strcmp(nm, "oom_score_adj") == 0) {
6092                         proc_t *p;
6093                         p = lxpr_lock(lxpnp->lxpr_pid);
6094                         if (p != NULL) {
6095                                 vp = lxpr_lookup_common(dvp, nm, p, piddir,
6096                                     PIDDIRFILES);
6097                         }
6098                         lxpr_unlock(p);
6099                 }
6100                 break;
6101 
6102         case LXPR_SYS_NET_COREDIR:
6103                 if (strcmp(nm, "somaxconn") == 0) {
6104                         vp = lxpr_lookup_common(dvp, nm, NULL, sys_net_coredir,
6105                             SYS_NET_COREDIRFILES);
6106                 }
6107                 break;
6108 
6109         case LXPR_SYS_KERNELDIR:
6110                 if (strcmp(nm, "core_pattern") == 0) {
6111                         vp = lxpr_lookup_common(dvp, nm, NULL, sys_kerneldir,
6112                             SYS_KERNELDIRFILES);
6113                 }
6114                 break;
6115 
6116         case LXPR_SYS_VMDIR:
6117                 if (strcmp(nm, "overcommit_memory") == 0 ||
6118                     strcmp(nm, "swappiness") == 0) {
6119                         vp = lxpr_lookup_common(dvp, nm, NULL, sys_vmdir,
6120                             SYS_VMDIRFILES);
6121                 }
6122                 break;
6123 
6124         case LXPR_PID_FDDIR:
6125         case LXPR_PID_TID_FDDIR:
6126                 vp = lxpr_lookup_fdnode(dvp, nm);
6127                 break;
6128 
6129         default:
6130                 vp = NULL;
6131                 break;
6132         }
6133 
6134         if (vp != NULL) {
6135                 /* Creating an existing file, allow it for regular files. */
6136                 if (vp->v_type == VDIR)
6137                         return (EISDIR);
6138 
6139                 /* confirm permissions against existing file */
6140                 if ((error = lxpr_access(vp, mode, 0, cred, ct)) != 0) {
6141                         VN_RELE(vp);
6142                         return (error);
6143                 }
6144 
6145                 *vpp = vp;
6146                 return (0);
6147         }
6148 
6149         /*
6150          * Linux proc does not allow creation of addition, non-subsystem
6151          * specific files inside the hierarchy.  ENOENT is tossed when such
6152          * actions are attempted.
6153          */
6154         return (ENOENT);
6155 }