1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2016 Joyent, Inc.
  25  */
  26 
  27 /*
  28  * lx_proc -- a Linux-compatible /proc for the LX brand
  29  *
  30  * We have -- confusingly -- two implementations of Linux /proc.  One is to
  31  * support native (but Linux-borne) programs that wish to view the native
  32  * system through the Linux /proc model; the other -- this one -- is to
  33  * support Linux binaries via the LX brand.  These two implementations differ
  34  * greatly in their aspirations (and their willingness to bend the truth
  35  * of the system to accommodate those aspirations); they should not be unified.
  36  */
  37 
  38 #include <sys/cpupart.h>
  39 #include <sys/cpuvar.h>
  40 #include <sys/session.h>
  41 #include <sys/vmparam.h>
  42 #include <sys/mman.h>
  43 #include <vm/rm.h>
  44 #include <vm/seg_vn.h>
  45 #include <sys/sdt.h>
  46 #include <lx_signum.h>
  47 #include <sys/strlog.h>
  48 #include <sys/stropts.h>
  49 #include <sys/cmn_err.h>
  50 #include <sys/lx_brand.h>
  51 #include <lx_auxv.h>
  52 #include <sys/x86_archext.h>
  53 #include <sys/archsystm.h>
  54 #include <sys/fp.h>
  55 #include <sys/pool_pset.h>
  56 #include <sys/pset.h>
  57 #include <sys/zone.h>
  58 #include <sys/pghw.h>
  59 #include <sys/vfs_opreg.h>
  60 #include <sys/param.h>
  61 #include <sys/utsname.h>
  62 #include <sys/rctl.h>
  63 #include <sys/kstat.h>
  64 #include <sys/lx_misc.h>
  65 #include <sys/brand.h>
  66 #include <sys/cred_impl.h>
  67 #include <sys/tihdr.h>
  68 #include <sys/corectl.h>
  69 #include <inet/ip.h>
  70 #include <inet/ip_ire.h>
  71 #include <inet/ip6.h>
  72 #include <inet/ip_if.h>
  73 #include <inet/tcp.h>
  74 #include <inet/tcp_impl.h>
  75 #include <inet/udp_impl.h>
  76 #include <inet/ipclassifier.h>
  77 #include <sys/socketvar.h>
  78 #include <fs/sockfs/socktpi.h>
  79 
  80 /* Dependent on procfs */
  81 extern kthread_t *prchoose(proc_t *);
  82 extern int prreadargv(proc_t *, char *, size_t, size_t *);
  83 extern int prreadenvv(proc_t *, char *, size_t, size_t *);
  84 extern int prreadbuf(proc_t *, uintptr_t, uint8_t *, size_t, size_t *);
  85 
  86 #include "lx_proc.h"
  87 
  88 extern pgcnt_t swapfs_minfree;
  89 extern time_t boot_time;
  90 
  91 /*
  92  * Pointer to the vnode ops vector for this fs.
  93  * This is instantiated in lxprinit() in lxpr_vfsops.c
  94  */
  95 vnodeops_t *lxpr_vnodeops;
  96 
  97 static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *);
  98 static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *,
  99     caller_context_t *);
 100 static int lxpr_create(struct vnode *, char *, struct vattr *, enum vcexcl,
 101     int, struct vnode **, struct cred *, int, caller_context_t *, vsecattr_t *);
 102 static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
 103 static int lxpr_write(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
 104 static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *,
 105     caller_context_t *);
 106 static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *);
 107 static int lxpr_lookup(vnode_t *, char *, vnode_t **,
 108     pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *,
 109     pathname_t *);
 110 static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *,
 111     caller_context_t *, int);
 112 static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
 113 static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *);
 114 static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *);
 115 static int lxpr_sync(void);
 116 static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *);
 117 
 118 static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
 119 static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
 120 static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
 121 static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
 122 static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
 123 static vnode_t *lxpr_lookup_sysdir(vnode_t *, char *);
 124 static vnode_t *lxpr_lookup_sys_fsdir(vnode_t *, char *);
 125 static vnode_t *lxpr_lookup_sys_fs_inotifydir(vnode_t *, char *);
 126 static vnode_t *lxpr_lookup_sys_kerneldir(vnode_t *, char *);
 127 static vnode_t *lxpr_lookup_sys_kdir_randdir(vnode_t *, char *);
 128 static vnode_t *lxpr_lookup_sys_netdir(vnode_t *, char *);
 129 static vnode_t *lxpr_lookup_sys_net_coredir(vnode_t *, char *);
 130 static vnode_t *lxpr_lookup_sys_vmdir(vnode_t *, char *);
 131 static vnode_t *lxpr_lookup_taskdir(vnode_t *, char *);
 132 static vnode_t *lxpr_lookup_task_tid_dir(vnode_t *, char *);
 133 
 134 static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
 135 static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
 136 static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
 137 static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
 138 static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
 139 static int lxpr_readdir_sysdir(lxpr_node_t *, uio_t *, int *);
 140 static int lxpr_readdir_sys_fsdir(lxpr_node_t *, uio_t *, int *);
 141 static int lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *, uio_t *, int *);
 142 static int lxpr_readdir_sys_kerneldir(lxpr_node_t *, uio_t *, int *);
 143 static int lxpr_readdir_sys_kdir_randdir(lxpr_node_t *, uio_t *, int *);
 144 static int lxpr_readdir_sys_netdir(lxpr_node_t *, uio_t *, int *);
 145 static int lxpr_readdir_sys_net_coredir(lxpr_node_t *, uio_t *, int *);
 146 static int lxpr_readdir_sys_vmdir(lxpr_node_t *, uio_t *, int *);
 147 static int lxpr_readdir_taskdir(lxpr_node_t *, uio_t *, int *);
 148 static int lxpr_readdir_task_tid_dir(lxpr_node_t *, uio_t *, int *);
 149 
 150 static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
 151 static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
 152 static void lxpr_read_cgroups(lxpr_node_t *, lxpr_uiobuf_t *);
 153 static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
 154 static void lxpr_read_diskstats(lxpr_node_t *, lxpr_uiobuf_t *);
 155 static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
 156 static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
 157 static void lxpr_read_filesystems(lxpr_node_t *, lxpr_uiobuf_t *);
 158 static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *, ldi_handle_t);
 159 static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
 160 static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
 161 static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
 162 static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
 163 static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 164 static void lxpr_read_swaps(lxpr_node_t *, lxpr_uiobuf_t *);
 165 static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
 166 static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
 167 
 168 static void lxpr_read_pid_auxv(lxpr_node_t *, lxpr_uiobuf_t *);
 169 static void lxpr_read_pid_cgroup(lxpr_node_t *, lxpr_uiobuf_t *);
 170 static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
 171 static void lxpr_read_pid_comm(lxpr_node_t *, lxpr_uiobuf_t *);
 172 static void lxpr_read_pid_env(lxpr_node_t *, lxpr_uiobuf_t *);
 173 static void lxpr_read_pid_limits(lxpr_node_t *, lxpr_uiobuf_t *);
 174 static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
 175 static void lxpr_read_pid_mountinfo(lxpr_node_t *, lxpr_uiobuf_t *);
 176 static void lxpr_read_pid_oom_scr_adj(lxpr_node_t *, lxpr_uiobuf_t *);
 177 static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 178 static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
 179 static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
 180 
 181 static void lxpr_read_pid_tid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 182 static void lxpr_read_pid_tid_status(lxpr_node_t *, lxpr_uiobuf_t *);
 183 
 184 static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
 185 static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
 186 static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
 187 static void lxpr_read_net_if_inet6(lxpr_node_t *, lxpr_uiobuf_t *);
 188 static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
 189 static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
 190 static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
 191 static void lxpr_read_net_ipv6_route(lxpr_node_t *, lxpr_uiobuf_t *);
 192 static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
 193 static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
 194 static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
 195 static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
 196 static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
 197 static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
 198 static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
 199 static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
 200 static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 201 static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
 202 static void lxpr_read_net_tcp6(lxpr_node_t *, lxpr_uiobuf_t *);
 203 static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
 204 static void lxpr_read_net_udp6(lxpr_node_t *, lxpr_uiobuf_t *);
 205 static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
 206 static void lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *,
 207     lxpr_uiobuf_t *);
 208 static void lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *,
 209     lxpr_uiobuf_t *);
 210 static void lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *,
 211     lxpr_uiobuf_t *);
 212 static void lxpr_read_sys_kernel_caplcap(lxpr_node_t *, lxpr_uiobuf_t *);
 213 static void lxpr_read_sys_kernel_corepatt(lxpr_node_t *, lxpr_uiobuf_t *);
 214 static void lxpr_read_sys_kernel_hostname(lxpr_node_t *, lxpr_uiobuf_t *);
 215 static void lxpr_read_sys_kernel_msgmni(lxpr_node_t *, lxpr_uiobuf_t *);
 216 static void lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *, lxpr_uiobuf_t *);
 217 static void lxpr_read_sys_kernel_osrel(lxpr_node_t *, lxpr_uiobuf_t *);
 218 static void lxpr_read_sys_kernel_pid_max(lxpr_node_t *, lxpr_uiobuf_t *);
 219 static void lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *, lxpr_uiobuf_t *);
 220 static void lxpr_read_sys_kernel_sem(lxpr_node_t *, lxpr_uiobuf_t *);
 221 static void lxpr_read_sys_kernel_shmmax(lxpr_node_t *, lxpr_uiobuf_t *);
 222 static void lxpr_read_sys_kernel_shmmni(lxpr_node_t *, lxpr_uiobuf_t *);
 223 static void lxpr_read_sys_kernel_threads_max(lxpr_node_t *, lxpr_uiobuf_t *);
 224 static void lxpr_read_sys_net_core_somaxc(lxpr_node_t *, lxpr_uiobuf_t *);
 225 static void lxpr_read_sys_vm_minfr_kb(lxpr_node_t *, lxpr_uiobuf_t *);
 226 static void lxpr_read_sys_vm_nhpages(lxpr_node_t *, lxpr_uiobuf_t *);
 227 static void lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *, lxpr_uiobuf_t *);
 228 static void lxpr_read_sys_vm_swappiness(lxpr_node_t *, lxpr_uiobuf_t *);
 229 
 230 static int lxpr_write_sys_net_core_somaxc(lxpr_node_t *, uio_t *, cred_t *,
 231     caller_context_t *);
 232 static int lxpr_write_sys_kernel_corepatt(lxpr_node_t *, uio_t *, cred_t *,
 233     caller_context_t *);
 234 
 235 /*
 236  * Simple conversion
 237  */
 238 #define btok(x) ((x) >> 10)                       /* bytes to kbytes */
 239 #define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */
 240 
 241 #define ttolxlwp(t)     ((struct lx_lwp_data *)ttolwpbrand(t))
 242 
 243 extern rctl_hndl_t rc_process_semmsl;
 244 extern rctl_hndl_t rc_process_semopm;
 245 extern rctl_hndl_t rc_zone_semmni;
 246 
 247 extern rctl_hndl_t rc_zone_msgmni;
 248 extern rctl_hndl_t rc_zone_shmmax;
 249 extern rctl_hndl_t rc_zone_shmmni;
 250 #define FOURGB  4294967295
 251 
 252 /*
 253  * The maximum length of the concatenation of argument vector strings we
 254  * will return to the user via the branded procfs. Likewise for the env vector.
 255  */
 256 int lxpr_maxargvlen = 4096;
 257 int lxpr_maxenvvlen = 4096;
 258 
 259 /*
 260  * The lx /proc vnode operations vector
 261  */
 262 const fs_operation_def_t lxpr_vnodeops_template[] = {
 263         VOPNAME_OPEN,           { .vop_open = lxpr_open },
 264         VOPNAME_CLOSE,          { .vop_close = lxpr_close },
 265         VOPNAME_READ,           { .vop_read = lxpr_read },
 266         VOPNAME_WRITE,          { .vop_read = lxpr_write },
 267         VOPNAME_GETATTR,        { .vop_getattr = lxpr_getattr },
 268         VOPNAME_ACCESS,         { .vop_access = lxpr_access },
 269         VOPNAME_LOOKUP,         { .vop_lookup = lxpr_lookup },
 270         VOPNAME_CREATE,         { .vop_create = lxpr_create },
 271         VOPNAME_READDIR,        { .vop_readdir = lxpr_readdir },
 272         VOPNAME_READLINK,       { .vop_readlink = lxpr_readlink },
 273         VOPNAME_FSYNC,          { .error = lxpr_sync },
 274         VOPNAME_SEEK,           { .error = lxpr_sync },
 275         VOPNAME_INACTIVE,       { .vop_inactive = lxpr_inactive },
 276         VOPNAME_CMP,            { .vop_cmp = lxpr_cmp },
 277         VOPNAME_REALVP,         { .vop_realvp = lxpr_realvp },
 278         NULL,                   NULL
 279 };
 280 
 281 
 282 /*
 283  * file contents of an lx /proc directory.
 284  */
 285 static lxpr_dirent_t lx_procdir[] = {
 286         { LXPR_CGROUPS,         "cgroups" },
 287         { LXPR_CMDLINE,         "cmdline" },
 288         { LXPR_CPUINFO,         "cpuinfo" },
 289         { LXPR_DEVICES,         "devices" },
 290         { LXPR_DISKSTATS,       "diskstats" },
 291         { LXPR_DMA,             "dma" },
 292         { LXPR_FILESYSTEMS,     "filesystems" },
 293         { LXPR_INTERRUPTS,      "interrupts" },
 294         { LXPR_IOPORTS,         "ioports" },
 295         { LXPR_KCORE,           "kcore" },
 296         { LXPR_KMSG,            "kmsg" },
 297         { LXPR_LOADAVG,         "loadavg" },
 298         { LXPR_MEMINFO,         "meminfo" },
 299         { LXPR_MODULES,         "modules" },
 300         { LXPR_MOUNTS,          "mounts" },
 301         { LXPR_NETDIR,          "net" },
 302         { LXPR_PARTITIONS,      "partitions" },
 303         { LXPR_SELF,            "self" },
 304         { LXPR_STAT,            "stat" },
 305         { LXPR_SWAPS,           "swaps" },
 306         { LXPR_SYSDIR,          "sys" },
 307         { LXPR_UPTIME,          "uptime" },
 308         { LXPR_VERSION,         "version" }
 309 };
 310 
 311 #define PROCDIRFILES    (sizeof (lx_procdir) / sizeof (lx_procdir[0]))
 312 
 313 /*
 314  * Contents of an lx /proc/<pid> directory.
 315  */
 316 static lxpr_dirent_t piddir[] = {
 317         { LXPR_PID_AUXV,        "auxv" },
 318         { LXPR_PID_CGROUP,      "cgroup" },
 319         { LXPR_PID_CMDLINE,     "cmdline" },
 320         { LXPR_PID_COMM,        "comm" },
 321         { LXPR_PID_CPU,         "cpu" },
 322         { LXPR_PID_CURDIR,      "cwd" },
 323         { LXPR_PID_ENV,         "environ" },
 324         { LXPR_PID_EXE,         "exe" },
 325         { LXPR_PID_LIMITS,      "limits" },
 326         { LXPR_PID_MAPS,        "maps" },
 327         { LXPR_PID_MEM,         "mem" },
 328         { LXPR_PID_MOUNTINFO,   "mountinfo" },
 329         { LXPR_PID_OOM_SCR_ADJ, "oom_score_adj" },
 330         { LXPR_PID_ROOTDIR,     "root" },
 331         { LXPR_PID_STAT,        "stat" },
 332         { LXPR_PID_STATM,       "statm" },
 333         { LXPR_PID_STATUS,      "status" },
 334         { LXPR_PID_TASKDIR,     "task" },
 335         { LXPR_PID_FDDIR,       "fd" }
 336 };
 337 
 338 #define PIDDIRFILES     (sizeof (piddir) / sizeof (piddir[0]))
 339 
 340 /*
 341  * Contents of an lx /proc/<pid>/task/<tid> directory.
 342  */
 343 static lxpr_dirent_t tiddir[] = {
 344         { LXPR_PID_TID_AUXV,    "auxv" },
 345         { LXPR_PID_CGROUP,      "cgroup" },
 346         { LXPR_PID_CMDLINE,     "cmdline" },
 347         { LXPR_PID_TID_COMM,    "comm" },
 348         { LXPR_PID_CPU,         "cpu" },
 349         { LXPR_PID_CURDIR,      "cwd" },
 350         { LXPR_PID_ENV,         "environ" },
 351         { LXPR_PID_EXE,         "exe" },
 352         { LXPR_PID_LIMITS,      "limits" },
 353         { LXPR_PID_MAPS,        "maps" },
 354         { LXPR_PID_MEM,         "mem" },
 355         { LXPR_PID_MOUNTINFO,   "mountinfo" },
 356         { LXPR_PID_TID_OOM_SCR_ADJ,     "oom_score_adj" },
 357         { LXPR_PID_ROOTDIR,     "root" },
 358         { LXPR_PID_TID_STAT,    "stat" },
 359         { LXPR_PID_STATM,       "statm" },
 360         { LXPR_PID_TID_STATUS,  "status" },
 361         { LXPR_PID_FDDIR,       "fd" }
 362 };
 363 
 364 #define TIDDIRFILES     (sizeof (tiddir) / sizeof (tiddir[0]))
 365 
 366 #define LX_RLIM_INFINITY        0xFFFFFFFFFFFFFFFF
 367 
 368 #define RCTL_INFINITE(x) \
 369         ((x->rcv_flagaction & RCTL_LOCAL_MAXIMAL) && \
 370         (x->rcv_flagaction & RCTL_GLOBAL_INFINITE))
 371 
 372 typedef struct lxpr_rlimtab {
 373         char    *rlim_name;     /* limit name */
 374         char    *rlim_unit;     /* limit unit */
 375         char    *rlim_rctl;     /* rctl source */
 376 } lxpr_rlimtab_t;
 377 
 378 static lxpr_rlimtab_t lxpr_rlimtab[] = {
 379         { "Max cpu time",       "seconds",      "process.max-cpu-time" },
 380         { "Max file size",      "bytes",        "process.max-file-size" },
 381         { "Max data size",      "bytes",        "process.max-data-size" },
 382         { "Max stack size",     "bytes",        "process.max-stack-size" },
 383         { "Max core file size", "bytes",        "process.max-core-size" },
 384         { "Max resident set",   "bytes",        "zone.max-physical-memory" },
 385         { "Max processes",      "processes",    "zone.max-lwps" },
 386         { "Max open files",     "files",        "process.max-file-descriptor" },
 387         { "Max locked memory",  "bytes",        "zone.max-locked-memory" },
 388         { "Max address space",  "bytes",        "process.max-address-space" },
 389         { "Max file locks",     "locks",        NULL },
 390         { "Max pending signals",        "signals",
 391                 "process.max-sigqueue-size" },
 392         { "Max msgqueue size",  "bytes",        "process.max-msg-messages" },
 393         { NULL, NULL, NULL }
 394 };
 395 
 396 
 397 /*
 398  * contents of lx /proc/net directory
 399  */
 400 static lxpr_dirent_t netdir[] = {
 401         { LXPR_NET_ARP,         "arp" },
 402         { LXPR_NET_DEV,         "dev" },
 403         { LXPR_NET_DEV_MCAST,   "dev_mcast" },
 404         { LXPR_NET_IF_INET6,    "if_inet6" },
 405         { LXPR_NET_IGMP,        "igmp" },
 406         { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" },
 407         { LXPR_NET_IP_MR_VIF,   "ip_mr_vif" },
 408         { LXPR_NET_IPV6_ROUTE,  "ipv6_route" },
 409         { LXPR_NET_MCFILTER,    "mcfilter" },
 410         { LXPR_NET_NETSTAT,     "netstat" },
 411         { LXPR_NET_RAW,         "raw" },
 412         { LXPR_NET_ROUTE,       "route" },
 413         { LXPR_NET_RPC,         "rpc" },
 414         { LXPR_NET_RT_CACHE,    "rt_cache" },
 415         { LXPR_NET_SOCKSTAT,    "sockstat" },
 416         { LXPR_NET_SNMP,        "snmp" },
 417         { LXPR_NET_STAT,        "stat" },
 418         { LXPR_NET_TCP,         "tcp" },
 419         { LXPR_NET_TCP6,        "tcp6" },
 420         { LXPR_NET_UDP,         "udp" },
 421         { LXPR_NET_UDP6,        "udp6" },
 422         { LXPR_NET_UNIX,        "unix" }
 423 };
 424 
 425 #define NETDIRFILES     (sizeof (netdir) / sizeof (netdir[0]))
 426 
 427 /*
 428  * contents of /proc/sys directory
 429  */
 430 static lxpr_dirent_t sysdir[] = {
 431         { LXPR_SYS_FSDIR,       "fs" },
 432         { LXPR_SYS_KERNELDIR,   "kernel" },
 433         { LXPR_SYS_NETDIR,      "net" },
 434         { LXPR_SYS_VMDIR,       "vm" },
 435 };
 436 
 437 #define SYSDIRFILES     (sizeof (sysdir) / sizeof (sysdir[0]))
 438 
 439 /*
 440  * contents of /proc/sys/fs directory
 441  */
 442 static lxpr_dirent_t sys_fsdir[] = {
 443         { LXPR_SYS_FS_INOTIFYDIR,       "inotify" },
 444 };
 445 
 446 #define SYS_FSDIRFILES (sizeof (sys_fsdir) / sizeof (sys_fsdir[0]))
 447 
 448 /*
 449  * contents of /proc/sys/fs/inotify directory
 450  */
 451 static lxpr_dirent_t sys_fs_inotifydir[] = {
 452         { LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS,        "max_queued_events" },
 453         { LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES,       "max_user_instances" },
 454         { LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES,         "max_user_watches" },
 455 };
 456 
 457 #define SYS_FS_INOTIFYDIRFILES \
 458         (sizeof (sys_fs_inotifydir) / sizeof (sys_fs_inotifydir[0]))
 459 
 460 /*
 461  * contents of /proc/sys/kernel directory
 462  */
 463 static lxpr_dirent_t sys_kerneldir[] = {
 464         { LXPR_SYS_KERNEL_CAPLCAP,      "cap_last_cap" },
 465         { LXPR_SYS_KERNEL_COREPATT,     "core_pattern" },
 466         { LXPR_SYS_KERNEL_HOSTNAME,     "hostname" },
 467         { LXPR_SYS_KERNEL_MSGMNI,       "msgmni" },
 468         { LXPR_SYS_KERNEL_NGROUPS_MAX,  "ngroups_max" },
 469         { LXPR_SYS_KERNEL_OSREL,        "osrelease" },
 470         { LXPR_SYS_KERNEL_PID_MAX,      "pid_max" },
 471         { LXPR_SYS_KERNEL_RANDDIR,      "random" },
 472         { LXPR_SYS_KERNEL_SEM,          "sem" },
 473         { LXPR_SYS_KERNEL_SHMMAX,       "shmmax" },
 474         { LXPR_SYS_KERNEL_SHMMNI,       "shmmni" },
 475         { LXPR_SYS_KERNEL_THREADS_MAX,  "threads-max" },
 476 };
 477 
 478 #define SYS_KERNELDIRFILES (sizeof (sys_kerneldir) / sizeof (sys_kerneldir[0]))
 479 
 480 /*
 481  * contents of /proc/sys/kernel/random directory
 482  */
 483 static lxpr_dirent_t sys_randdir[] = {
 484         { LXPR_SYS_KERNEL_RAND_BOOTID,  "boot_id" },
 485 };
 486 
 487 #define SYS_RANDDIRFILES (sizeof (sys_randdir) / sizeof (sys_randdir[0]))
 488 
 489 /*
 490  * contents of /proc/sys/net directory
 491  */
 492 static lxpr_dirent_t sys_netdir[] = {
 493         { LXPR_SYS_NET_COREDIR,         "core" },
 494 };
 495 
 496 #define SYS_NETDIRFILES (sizeof (sys_netdir) / sizeof (sys_netdir[0]))
 497 
 498 /*
 499  * contents of /proc/sys/net/core directory
 500  */
 501 static lxpr_dirent_t sys_net_coredir[] = {
 502         { LXPR_SYS_NET_CORE_SOMAXCON,   "somaxconn" },
 503 };
 504 
 505 #define SYS_NET_COREDIRFILES \
 506         (sizeof (sys_net_coredir) / sizeof (sys_net_coredir[0]))
 507 
 508 /*
 509  * contents of /proc/sys/vm directory
 510  */
 511 static lxpr_dirent_t sys_vmdir[] = {
 512         { LXPR_SYS_VM_MINFR_KB,         "min_free_kbytes" },
 513         { LXPR_SYS_VM_NHUGEP,           "nr_hugepages" },
 514         { LXPR_SYS_VM_OVERCOMMIT_MEM,   "overcommit_memory" },
 515         { LXPR_SYS_VM_SWAPPINESS,       "swappiness" },
 516 };
 517 
 518 #define SYS_VMDIRFILES (sizeof (sys_vmdir) / sizeof (sys_vmdir[0]))
 519 
 520 /*
 521  * lxpr_open(): Vnode operation for VOP_OPEN()
 522  */
 523 static int
 524 lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
 525 {
 526         vnode_t         *vp = *vpp;
 527         lxpr_node_t     *lxpnp = VTOLXP(vp);
 528         lxpr_nodetype_t type = lxpnp->lxpr_type;
 529         vnode_t         *rvp;
 530         int             error = 0;
 531 
 532         if (flag & FWRITE) {
 533                 /* Restrict writes to certain files */
 534                 switch (type) {
 535                 case LXPR_PID_OOM_SCR_ADJ:
 536                 case LXPR_PID_TID_OOM_SCR_ADJ:
 537                 case LXPR_SYS_KERNEL_COREPATT:
 538                 case LXPR_SYS_NET_CORE_SOMAXCON:
 539                 case LXPR_SYS_VM_OVERCOMMIT_MEM:
 540                 case LXPR_SYS_VM_SWAPPINESS:
 541                 case LXPR_PID_FD_FD:
 542                 case LXPR_PID_TID_FD_FD:
 543                         break;
 544                 default:
 545                         return (EPERM);
 546                 }
 547         }
 548 
 549         /*
 550          * If we are opening an underlying file only allow regular files,
 551          * fifos or sockets; reject the open for anything else.
 552          * Just do it if we are opening the current or root directory.
 553          */
 554         if (lxpnp->lxpr_realvp != NULL) {
 555                 rvp = lxpnp->lxpr_realvp;
 556 
 557                 if (type == LXPR_PID_FD_FD && rvp->v_type != VREG &&
 558                     rvp->v_type != VFIFO && rvp->v_type != VSOCK) {
 559                         error = EACCES;
 560                 } else {
 561                         if (type == LXPR_PID_FD_FD && rvp->v_type == VFIFO) {
 562                                 /*
 563                                  * This flag lets the fifo open know that
 564                                  * we're using proc/fd to open a fd which we
 565                                  * already have open. Otherwise, the fifo might
 566                                  * reject an open if the other end has closed.
 567                                  */
 568                                 flag |= FKLYR;
 569                         }
 570                         /*
 571                          * Need to hold rvp since VOP_OPEN() may release it.
 572                          */
 573                         VN_HOLD(rvp);
 574                         error = VOP_OPEN(&rvp, flag, cr, ct);
 575                         if (error) {
 576                                 VN_RELE(rvp);
 577                         } else {
 578                                 *vpp = rvp;
 579                                 VN_RELE(vp);
 580                         }
 581                 }
 582         }
 583 
 584         return (error);
 585 }
 586 
 587 
 588 /*
 589  * lxpr_close(): Vnode operation for VOP_CLOSE()
 590  */
 591 /* ARGSUSED */
 592 static int
 593 lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
 594     caller_context_t *ct)
 595 {
 596         lxpr_node_t     *lxpr = VTOLXP(vp);
 597         lxpr_nodetype_t type = lxpr->lxpr_type;
 598 
 599         /*
 600          * we should never get here because the close is done on the realvp
 601          * for these nodes
 602          */
 603         ASSERT(type != LXPR_PID_FD_FD &&
 604             type != LXPR_PID_CURDIR &&
 605             type != LXPR_PID_ROOTDIR &&
 606             type != LXPR_PID_EXE);
 607 
 608         return (0);
 609 }
 610 
 611 static void (*lxpr_read_function[LXPR_NFILES])() = {
 612         lxpr_read_isdir,                /* /proc                */
 613         lxpr_read_isdir,                /* /proc/<pid>            */
 614         lxpr_read_pid_auxv,             /* /proc/<pid>/auxv       */
 615         lxpr_read_pid_cgroup,           /* /proc/<pid>/cgroup     */
 616         lxpr_read_pid_cmdline,          /* /proc/<pid>/cmdline    */
 617         lxpr_read_pid_comm,             /* /proc/<pid>/comm       */
 618         lxpr_read_empty,                /* /proc/<pid>/cpu        */
 619         lxpr_read_invalid,              /* /proc/<pid>/cwd        */
 620         lxpr_read_pid_env,              /* /proc/<pid>/environ    */
 621         lxpr_read_invalid,              /* /proc/<pid>/exe        */
 622         lxpr_read_pid_limits,           /* /proc/<pid>/limits     */
 623         lxpr_read_pid_maps,             /* /proc/<pid>/maps       */
 624         lxpr_read_empty,                /* /proc/<pid>/mem        */
 625         lxpr_read_pid_mountinfo,        /* /proc/<pid>/mountinfo */
 626         lxpr_read_pid_oom_scr_adj,      /* /proc/<pid>/oom_score_adj */
 627         lxpr_read_invalid,              /* /proc/<pid>/root       */
 628         lxpr_read_pid_stat,             /* /proc/<pid>/stat       */
 629         lxpr_read_pid_statm,            /* /proc/<pid>/statm      */
 630         lxpr_read_pid_status,           /* /proc/<pid>/status     */
 631         lxpr_read_isdir,                /* /proc/<pid>/task       */
 632         lxpr_read_isdir,                /* /proc/<pid>/task/nn    */
 633         lxpr_read_isdir,                /* /proc/<pid>/fd */
 634         lxpr_read_fd,                   /* /proc/<pid>/fd/nn      */
 635         lxpr_read_pid_auxv,             /* /proc/<pid>/task/<tid>/auxv      */
 636         lxpr_read_pid_cgroup,           /* /proc/<pid>/task/<tid>/cgroup */
 637         lxpr_read_pid_cmdline,          /* /proc/<pid>/task/<tid>/cmdline */
 638         lxpr_read_pid_comm,             /* /proc/<pid>/task/<tid>/comm      */
 639         lxpr_read_empty,                /* /proc/<pid>/task/<tid>/cpu       */
 640         lxpr_read_invalid,              /* /proc/<pid>/task/<tid>/cwd       */
 641         lxpr_read_pid_env,              /* /proc/<pid>/task/<tid>/environ */
 642         lxpr_read_invalid,              /* /proc/<pid>/task/<tid>/exe       */
 643         lxpr_read_pid_limits,           /* /proc/<pid>/task/<tid>/limits */
 644         lxpr_read_pid_maps,             /* /proc/<pid>/task/<tid>/maps      */
 645         lxpr_read_empty,                /* /proc/<pid>/task/<tid>/mem       */
 646         lxpr_read_pid_mountinfo,        /* /proc/<pid>/task/<tid>/mountinfo */
 647         lxpr_read_pid_oom_scr_adj,      /* /proc/<pid>/task/<tid>/oom_scr_adj */
 648         lxpr_read_invalid,              /* /proc/<pid>/task/<tid>/root      */
 649         lxpr_read_pid_tid_stat,         /* /proc/<pid>/task/<tid>/stat      */
 650         lxpr_read_pid_statm,            /* /proc/<pid>/task/<tid>/statm     */
 651         lxpr_read_pid_tid_status,       /* /proc/<pid>/task/<tid>/status */
 652         lxpr_read_isdir,                /* /proc/<pid>/task/<tid>/fd        */
 653         lxpr_read_fd,                   /* /proc/<pid>/task/<tid>/fd/nn     */
 654         lxpr_read_cgroups,              /* /proc/cgroups        */
 655         lxpr_read_empty,                /* /proc/cmdline        */
 656         lxpr_read_cpuinfo,              /* /proc/cpuinfo        */
 657         lxpr_read_empty,                /* /proc/devices        */
 658         lxpr_read_diskstats,            /* /proc/diskstats      */
 659         lxpr_read_empty,                /* /proc/dma            */
 660         lxpr_read_filesystems,          /* /proc/filesystems    */
 661         lxpr_read_empty,                /* /proc/interrupts     */
 662         lxpr_read_empty,                /* /proc/ioports        */
 663         lxpr_read_empty,                /* /proc/kcore          */
 664         lxpr_read_invalid,              /* /proc/kmsg -- see lxpr_read() */
 665         lxpr_read_loadavg,              /* /proc/loadavg        */
 666         lxpr_read_meminfo,              /* /proc/meminfo        */
 667         lxpr_read_empty,                /* /proc/modules        */
 668         lxpr_read_mounts,               /* /proc/mounts         */
 669         lxpr_read_isdir,                /* /proc/net            */
 670         lxpr_read_net_arp,              /* /proc/net/arp        */
 671         lxpr_read_net_dev,              /* /proc/net/dev        */
 672         lxpr_read_net_dev_mcast,        /* /proc/net/dev_mcast  */
 673         lxpr_read_net_if_inet6,         /* /proc/net/if_inet6   */
 674         lxpr_read_net_igmp,             /* /proc/net/igmp       */
 675         lxpr_read_net_ip_mr_cache,      /* /proc/net/ip_mr_cache */
 676         lxpr_read_net_ip_mr_vif,        /* /proc/net/ip_mr_vif  */
 677         lxpr_read_net_ipv6_route,       /* /proc/net/ipv6_route */
 678         lxpr_read_net_mcfilter,         /* /proc/net/mcfilter   */
 679         lxpr_read_net_netstat,          /* /proc/net/netstat    */
 680         lxpr_read_net_raw,              /* /proc/net/raw        */
 681         lxpr_read_net_route,            /* /proc/net/route      */
 682         lxpr_read_net_rpc,              /* /proc/net/rpc        */
 683         lxpr_read_net_rt_cache,         /* /proc/net/rt_cache   */
 684         lxpr_read_net_sockstat,         /* /proc/net/sockstat   */
 685         lxpr_read_net_snmp,             /* /proc/net/snmp       */
 686         lxpr_read_net_stat,             /* /proc/net/stat       */
 687         lxpr_read_net_tcp,              /* /proc/net/tcp        */
 688         lxpr_read_net_tcp6,             /* /proc/net/tcp6       */
 689         lxpr_read_net_udp,              /* /proc/net/udp        */
 690         lxpr_read_net_udp6,             /* /proc/net/udp6       */
 691         lxpr_read_net_unix,             /* /proc/net/unix       */
 692         lxpr_read_partitions,           /* /proc/partitions     */
 693         lxpr_read_invalid,              /* /proc/self           */
 694         lxpr_read_stat,                 /* /proc/stat           */
 695         lxpr_read_swaps,                /* /proc/swaps          */
 696         lxpr_read_invalid,              /* /proc/sys            */
 697         lxpr_read_invalid,              /* /proc/sys/fs         */
 698         lxpr_read_invalid,              /* /proc/sys/fs/inotify */
 699         lxpr_read_sys_fs_inotify_max_queued_events, /* max_queued_events */
 700         lxpr_read_sys_fs_inotify_max_user_instances, /* max_user_instances */
 701         lxpr_read_sys_fs_inotify_max_user_watches, /* max_user_watches */
 702         lxpr_read_invalid,              /* /proc/sys/kernel     */
 703         lxpr_read_sys_kernel_caplcap,   /* /proc/sys/kernel/cap_last_cap */
 704         lxpr_read_sys_kernel_corepatt,  /* /proc/sys/kernel/core_pattern */
 705         lxpr_read_sys_kernel_hostname,  /* /proc/sys/kernel/hostname */
 706         lxpr_read_sys_kernel_msgmni,    /* /proc/sys/kernel/msgmni */
 707         lxpr_read_sys_kernel_ngroups_max, /* /proc/sys/kernel/ngroups_max */
 708         lxpr_read_sys_kernel_osrel,     /* /proc/sys/kernel/osrelease */
 709         lxpr_read_sys_kernel_pid_max,   /* /proc/sys/kernel/pid_max */
 710         lxpr_read_invalid,              /* /proc/sys/kernel/random */
 711         lxpr_read_sys_kernel_rand_bootid, /* /proc/sys/kernel/random/boot_id */
 712         lxpr_read_sys_kernel_sem,       /* /proc/sys/kernel/sem */
 713         lxpr_read_sys_kernel_shmmax,    /* /proc/sys/kernel/shmmax */
 714         lxpr_read_sys_kernel_shmmni,    /* /proc/sys/kernel/shmmni */
 715         lxpr_read_sys_kernel_threads_max, /* /proc/sys/kernel/threads-max */
 716         lxpr_read_invalid,              /* /proc/sys/net        */
 717         lxpr_read_invalid,              /* /proc/sys/net/core   */
 718         lxpr_read_sys_net_core_somaxc,  /* /proc/sys/net/core/somaxconn */
 719         lxpr_read_invalid,              /* /proc/sys/vm */
 720         lxpr_read_sys_vm_minfr_kb,      /* /proc/sys/vm/min_free_kbytes */
 721         lxpr_read_sys_vm_nhpages,       /* /proc/sys/vm/nr_hugepages */
 722         lxpr_read_sys_vm_overcommit_mem, /* /proc/sys/vm/overcommit_memory */
 723         lxpr_read_sys_vm_swappiness,    /* /proc/sys/vm/swappiness */
 724         lxpr_read_uptime,               /* /proc/uptime         */
 725         lxpr_read_version,              /* /proc/version        */
 726 };
 727 
 728 /*
 729  * Array of lookup functions, indexed by lx /proc file type.
 730  */
 731 static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
 732         lxpr_lookup_procdir,            /* /proc                */
 733         lxpr_lookup_piddir,             /* /proc/<pid>            */
 734         lxpr_lookup_not_a_dir,          /* /proc/<pid>/auxv       */
 735         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cgroup     */
 736         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cmdline    */
 737         lxpr_lookup_not_a_dir,          /* /proc/<pid>/comm       */
 738         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cpu        */
 739         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cwd        */
 740         lxpr_lookup_not_a_dir,          /* /proc/<pid>/environ    */
 741         lxpr_lookup_not_a_dir,          /* /proc/<pid>/exe        */
 742         lxpr_lookup_not_a_dir,          /* /proc/<pid>/limits     */
 743         lxpr_lookup_not_a_dir,          /* /proc/<pid>/maps       */
 744         lxpr_lookup_not_a_dir,          /* /proc/<pid>/mem        */
 745         lxpr_lookup_not_a_dir,          /* /proc/<pid>/mountinfo */
 746         lxpr_lookup_not_a_dir,          /* /proc/<pid>/oom_score_adj */
 747         lxpr_lookup_not_a_dir,          /* /proc/<pid>/root       */
 748         lxpr_lookup_not_a_dir,          /* /proc/<pid>/stat       */
 749         lxpr_lookup_not_a_dir,          /* /proc/<pid>/statm      */
 750         lxpr_lookup_not_a_dir,          /* /proc/<pid>/status     */
 751         lxpr_lookup_taskdir,            /* /proc/<pid>/task       */
 752         lxpr_lookup_task_tid_dir,       /* /proc/<pid>/task/nn    */
 753         lxpr_lookup_fddir,              /* /proc/<pid>/fd */
 754         lxpr_lookup_not_a_dir,          /* /proc/<pid>/fd/nn      */
 755         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/auxv      */
 756         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/cgroup */
 757         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/cmdline */
 758         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/comm      */
 759         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/cpu       */
 760         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/cwd       */
 761         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/environ */
 762         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/exe       */
 763         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/limits */
 764         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/maps      */
 765         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/mem       */
 766         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/mountinfo */
 767         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/oom_scr_adj */
 768         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/root      */
 769         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/stat      */
 770         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/statm     */
 771         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/status */
 772         lxpr_lookup_fddir,              /* /proc/<pid>/task/<tid>/fd        */
 773         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/fd/nn     */
 774         lxpr_lookup_not_a_dir,          /* /proc/cgroups        */
 775         lxpr_lookup_not_a_dir,          /* /proc/cmdline        */
 776         lxpr_lookup_not_a_dir,          /* /proc/cpuinfo        */
 777         lxpr_lookup_not_a_dir,          /* /proc/devices        */
 778         lxpr_lookup_not_a_dir,          /* /proc/diskstats      */
 779         lxpr_lookup_not_a_dir,          /* /proc/dma            */
 780         lxpr_lookup_not_a_dir,          /* /proc/filesystems    */
 781         lxpr_lookup_not_a_dir,          /* /proc/interrupts     */
 782         lxpr_lookup_not_a_dir,          /* /proc/ioports        */
 783         lxpr_lookup_not_a_dir,          /* /proc/kcore          */
 784         lxpr_lookup_not_a_dir,          /* /proc/kmsg           */
 785         lxpr_lookup_not_a_dir,          /* /proc/loadavg        */
 786         lxpr_lookup_not_a_dir,          /* /proc/meminfo        */
 787         lxpr_lookup_not_a_dir,          /* /proc/modules        */
 788         lxpr_lookup_not_a_dir,          /* /proc/mounts         */
 789         lxpr_lookup_netdir,             /* /proc/net            */
 790         lxpr_lookup_not_a_dir,          /* /proc/net/arp        */
 791         lxpr_lookup_not_a_dir,          /* /proc/net/dev        */
 792         lxpr_lookup_not_a_dir,          /* /proc/net/dev_mcast  */
 793         lxpr_lookup_not_a_dir,          /* /proc/net/if_inet6   */
 794         lxpr_lookup_not_a_dir,          /* /proc/net/igmp       */
 795         lxpr_lookup_not_a_dir,          /* /proc/net/ip_mr_cache */
 796         lxpr_lookup_not_a_dir,          /* /proc/net/ip_mr_vif  */
 797         lxpr_lookup_not_a_dir,          /* /proc/net/ipv6_route */
 798         lxpr_lookup_not_a_dir,          /* /proc/net/mcfilter   */
 799         lxpr_lookup_not_a_dir,          /* /proc/net/netstat    */
 800         lxpr_lookup_not_a_dir,          /* /proc/net/raw        */
 801         lxpr_lookup_not_a_dir,          /* /proc/net/route      */
 802         lxpr_lookup_not_a_dir,          /* /proc/net/rpc        */
 803         lxpr_lookup_not_a_dir,          /* /proc/net/rt_cache   */
 804         lxpr_lookup_not_a_dir,          /* /proc/net/sockstat   */
 805         lxpr_lookup_not_a_dir,          /* /proc/net/snmp       */
 806         lxpr_lookup_not_a_dir,          /* /proc/net/stat       */
 807         lxpr_lookup_not_a_dir,          /* /proc/net/tcp        */
 808         lxpr_lookup_not_a_dir,          /* /proc/net/tcp6       */
 809         lxpr_lookup_not_a_dir,          /* /proc/net/udp        */
 810         lxpr_lookup_not_a_dir,          /* /proc/net/udp6       */
 811         lxpr_lookup_not_a_dir,          /* /proc/net/unix       */
 812         lxpr_lookup_not_a_dir,          /* /proc/partitions     */
 813         lxpr_lookup_not_a_dir,          /* /proc/self           */
 814         lxpr_lookup_not_a_dir,          /* /proc/stat           */
 815         lxpr_lookup_not_a_dir,          /* /proc/swaps          */
 816         lxpr_lookup_sysdir,             /* /proc/sys            */
 817         lxpr_lookup_sys_fsdir,          /* /proc/sys/fs         */
 818         lxpr_lookup_sys_fs_inotifydir,  /* /proc/sys/fs/inotify */
 819         lxpr_lookup_not_a_dir,          /* .../inotify/max_queued_events */
 820         lxpr_lookup_not_a_dir,          /* .../inotify/max_user_instances */
 821         lxpr_lookup_not_a_dir,          /* .../inotify/max_user_watches */
 822         lxpr_lookup_sys_kerneldir,      /* /proc/sys/kernel     */
 823         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/cap_last_cap */
 824         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/core_pattern */
 825         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/hostname */
 826         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/msgmni */
 827         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/ngroups_max */
 828         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/osrelease */
 829         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/pid_max */
 830         lxpr_lookup_sys_kdir_randdir,   /* /proc/sys/kernel/random */
 831         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/random/boot_id */
 832         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/sem */
 833         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/shmmax */
 834         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/shmmni */
 835         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/threads-max */
 836         lxpr_lookup_sys_netdir,         /* /proc/sys/net */
 837         lxpr_lookup_sys_net_coredir,    /* /proc/sys/net/core */
 838         lxpr_lookup_not_a_dir,          /* /proc/sys/net/core/somaxconn */
 839         lxpr_lookup_sys_vmdir,          /* /proc/sys/vm */
 840         lxpr_lookup_not_a_dir,          /* /proc/sys/vm/min_free_kbytes */
 841         lxpr_lookup_not_a_dir,          /* /proc/sys/vm/nr_hugepages */
 842         lxpr_lookup_not_a_dir,          /* /proc/sys/vm/overcommit_memory */
 843         lxpr_lookup_not_a_dir,          /* /proc/sys/vm/swappiness */
 844         lxpr_lookup_not_a_dir,          /* /proc/uptime         */
 845         lxpr_lookup_not_a_dir,          /* /proc/version        */
 846 };
 847 
 848 /*
 849  * Array of readdir functions, indexed by /proc file type.
 850  */
 851 static int (*lxpr_readdir_function[LXPR_NFILES])() = {
 852         lxpr_readdir_procdir,           /* /proc                */
 853         lxpr_readdir_piddir,            /* /proc/<pid>            */
 854         lxpr_readdir_not_a_dir,         /* /proc/<pid>/auxv       */
 855         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cgroup     */
 856         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cmdline    */
 857         lxpr_readdir_not_a_dir,         /* /proc/<pid>/comm       */
 858         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cpu        */
 859         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cwd        */
 860         lxpr_readdir_not_a_dir,         /* /proc/<pid>/environ    */
 861         lxpr_readdir_not_a_dir,         /* /proc/<pid>/exe        */
 862         lxpr_readdir_not_a_dir,         /* /proc/<pid>/limits     */
 863         lxpr_readdir_not_a_dir,         /* /proc/<pid>/maps       */
 864         lxpr_readdir_not_a_dir,         /* /proc/<pid>/mem        */
 865         lxpr_readdir_not_a_dir,         /* /proc/<pid>/mountinfo */
 866         lxpr_readdir_not_a_dir,         /* /proc/<pid>/oom_score_adj */
 867         lxpr_readdir_not_a_dir,         /* /proc/<pid>/root       */
 868         lxpr_readdir_not_a_dir,         /* /proc/<pid>/stat       */
 869         lxpr_readdir_not_a_dir,         /* /proc/<pid>/statm      */
 870         lxpr_readdir_not_a_dir,         /* /proc/<pid>/status     */
 871         lxpr_readdir_taskdir,           /* /proc/<pid>/task       */
 872         lxpr_readdir_task_tid_dir,      /* /proc/<pid>/task/nn    */
 873         lxpr_readdir_fddir,             /* /proc/<pid>/fd */
 874         lxpr_readdir_not_a_dir,         /* /proc/<pid>/fd/nn      */
 875         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/auxv      */
 876         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/cgroup */
 877         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/cmdline */
 878         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/comm      */
 879         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/cpu       */
 880         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/cwd       */
 881         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/environ */
 882         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/exe       */
 883         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/limits */
 884         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/maps      */
 885         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/mem       */
 886         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/mountinfo */
 887         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid/oom_scr_adj */
 888         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/root      */
 889         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/stat      */
 890         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/statm     */
 891         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/status */
 892         lxpr_readdir_fddir,             /* /proc/<pid>/task/<tid>/fd        */
 893         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/fd/nn     */
 894         lxpr_readdir_not_a_dir,         /* /proc/cgroups        */
 895         lxpr_readdir_not_a_dir,         /* /proc/cmdline        */
 896         lxpr_readdir_not_a_dir,         /* /proc/cpuinfo        */
 897         lxpr_readdir_not_a_dir,         /* /proc/devices        */
 898         lxpr_readdir_not_a_dir,         /* /proc/diskstats      */
 899         lxpr_readdir_not_a_dir,         /* /proc/dma            */
 900         lxpr_readdir_not_a_dir,         /* /proc/filesystems    */
 901         lxpr_readdir_not_a_dir,         /* /proc/interrupts     */
 902         lxpr_readdir_not_a_dir,         /* /proc/ioports        */
 903         lxpr_readdir_not_a_dir,         /* /proc/kcore          */
 904         lxpr_readdir_not_a_dir,         /* /proc/kmsg           */
 905         lxpr_readdir_not_a_dir,         /* /proc/loadavg        */
 906         lxpr_readdir_not_a_dir,         /* /proc/meminfo        */
 907         lxpr_readdir_not_a_dir,         /* /proc/modules        */
 908         lxpr_readdir_not_a_dir,         /* /proc/mounts         */
 909         lxpr_readdir_netdir,            /* /proc/net            */
 910         lxpr_readdir_not_a_dir,         /* /proc/net/arp        */
 911         lxpr_readdir_not_a_dir,         /* /proc/net/dev        */
 912         lxpr_readdir_not_a_dir,         /* /proc/net/dev_mcast  */
 913         lxpr_readdir_not_a_dir,         /* /proc/net/if_inet6   */
 914         lxpr_readdir_not_a_dir,         /* /proc/net/igmp       */
 915         lxpr_readdir_not_a_dir,         /* /proc/net/ip_mr_cache */
 916         lxpr_readdir_not_a_dir,         /* /proc/net/ip_mr_vif  */
 917         lxpr_readdir_not_a_dir,         /* /proc/net/ipv6_route */
 918         lxpr_readdir_not_a_dir,         /* /proc/net/mcfilter   */
 919         lxpr_readdir_not_a_dir,         /* /proc/net/netstat    */
 920         lxpr_readdir_not_a_dir,         /* /proc/net/raw        */
 921         lxpr_readdir_not_a_dir,         /* /proc/net/route      */
 922         lxpr_readdir_not_a_dir,         /* /proc/net/rpc        */
 923         lxpr_readdir_not_a_dir,         /* /proc/net/rt_cache   */
 924         lxpr_readdir_not_a_dir,         /* /proc/net/sockstat   */
 925         lxpr_readdir_not_a_dir,         /* /proc/net/snmp       */
 926         lxpr_readdir_not_a_dir,         /* /proc/net/stat       */
 927         lxpr_readdir_not_a_dir,         /* /proc/net/tcp        */
 928         lxpr_readdir_not_a_dir,         /* /proc/net/tcp6       */
 929         lxpr_readdir_not_a_dir,         /* /proc/net/udp        */
 930         lxpr_readdir_not_a_dir,         /* /proc/net/udp6       */
 931         lxpr_readdir_not_a_dir,         /* /proc/net/unix       */
 932         lxpr_readdir_not_a_dir,         /* /proc/partitions     */
 933         lxpr_readdir_not_a_dir,         /* /proc/self           */
 934         lxpr_readdir_not_a_dir,         /* /proc/stat           */
 935         lxpr_readdir_not_a_dir,         /* /proc/swaps          */
 936         lxpr_readdir_sysdir,            /* /proc/sys            */
 937         lxpr_readdir_sys_fsdir,         /* /proc/sys/fs         */
 938         lxpr_readdir_sys_fs_inotifydir, /* /proc/sys/fs/inotify */
 939         lxpr_readdir_not_a_dir,         /* .../inotify/max_queued_events */
 940         lxpr_readdir_not_a_dir,         /* .../inotify/max_user_instances */
 941         lxpr_readdir_not_a_dir,         /* .../inotify/max_user_watches */
 942         lxpr_readdir_sys_kerneldir,     /* /proc/sys/kernel     */
 943         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/cap_last_cap */
 944         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/core_pattern */
 945         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/hostname */
 946         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/msgmni */
 947         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/ngroups_max */
 948         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/osrelease */
 949         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/pid_max */
 950         lxpr_readdir_sys_kdir_randdir,  /* /proc/sys/kernel/random */
 951         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/random/boot_id */
 952         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/sem */
 953         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/shmmax */
 954         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/shmmni */
 955         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/threads-max */
 956         lxpr_readdir_sys_netdir,        /* /proc/sys/net */
 957         lxpr_readdir_sys_net_coredir,   /* /proc/sys/net/core */
 958         lxpr_readdir_not_a_dir,         /* /proc/sys/net/core/somaxconn */
 959         lxpr_readdir_sys_vmdir,         /* /proc/sys/vm */
 960         lxpr_readdir_not_a_dir,         /* /proc/sys/vm/min_free_kbytes */
 961         lxpr_readdir_not_a_dir,         /* /proc/sys/vm/nr_hugepages */
 962         lxpr_readdir_not_a_dir,         /* /proc/sys/vm/overcommit_memory */
 963         lxpr_readdir_not_a_dir,         /* /proc/sys/vm/swappiness */
 964         lxpr_readdir_not_a_dir,         /* /proc/uptime         */
 965         lxpr_readdir_not_a_dir,         /* /proc/version        */
 966 };
 967 
 968 
 969 /*
 970  * lxpr_read(): Vnode operation for VOP_READ()
 971  *
 972  * As the format of all the files that can be read in the lx procfs is human
 973  * readable and not binary structures there do not have to be different
 974  * read variants depending on whether the reading process model is 32 or 64 bits
 975  * (at least in general, and certainly the difference is unlikely to be enough
 976  * to justify have different routines for 32 and 64 bit reads
 977  */
 978 /* ARGSUSED */
 979 static int
 980 lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 981     caller_context_t *ct)
 982 {
 983         lxpr_node_t *lxpnp = VTOLXP(vp);
 984         lxpr_nodetype_t type = lxpnp->lxpr_type;
 985         lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
 986         int error;
 987 
 988         ASSERT(type < LXPR_NFILES);
 989 
 990         if (type == LXPR_KMSG) {
 991                 ldi_ident_t     li = VTOLXPM(vp)->lxprm_li;
 992                 ldi_handle_t    ldih;
 993                 struct strioctl str;
 994                 int             rv;
 995 
 996                 /*
 997                  * Open the zone's console device using the layered driver
 998                  * interface.
 999                  */
1000                 if ((error =
1001                     ldi_open_by_name("/dev/log", FREAD, cr, &ldih, li)) != 0)
1002                         return (error);
1003 
1004                 /*
1005                  * Send an ioctl to the underlying console device, letting it
1006                  * know we're interested in getting console messages.
1007                  */
1008                 str.ic_cmd = I_CONSLOG;
1009                 str.ic_timout = 0;
1010                 str.ic_len = 0;
1011                 str.ic_dp = NULL;
1012                 if ((error = ldi_ioctl(ldih, I_STR,
1013                     (intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
1014                         return (error);
1015 
1016                 lxpr_read_kmsg(lxpnp, uiobuf, ldih);
1017 
1018                 if ((error = ldi_close(ldih, FREAD, cr)) != 0)
1019                         return (error);
1020         } else {
1021                 lxpr_read_function[type](lxpnp, uiobuf);
1022         }
1023 
1024         error = lxpr_uiobuf_flush(uiobuf);
1025         lxpr_uiobuf_free(uiobuf);
1026 
1027         return (error);
1028 }
1029 
1030 /*
1031  * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
1032  *
1033  * Various special case reads:
1034  * - trying to read a directory
1035  * - invalid file (used to mean a file that should be implemented,
1036  *   but isn't yet)
1037  * - empty file
1038  * - wait to be able to read a file that will never have anything to read
1039  */
1040 /* ARGSUSED */
1041 static void
1042 lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1043 {
1044         lxpr_uiobuf_seterr(uiobuf, EISDIR);
1045 }
1046 
1047 /* ARGSUSED */
1048 static void
1049 lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1050 {
1051         lxpr_uiobuf_seterr(uiobuf, EINVAL);
1052 }
1053 
1054 /* ARGSUSED */
1055 static void
1056 lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1057 {
1058 }
1059 
1060 /*
1061  * lxpr_read_pid_auxv(): read process aux vector
1062  */
1063 static void
1064 lxpr_read_pid_auxv(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1065 {
1066         proc_t *p;
1067         lx_proc_data_t *pd;
1068         lx_elf_data_t *edp = NULL;
1069         int i, cnt;
1070 
1071         ASSERT(lxpnp->lxpr_type == LXPR_PID_AUXV ||
1072             lxpnp->lxpr_type == LXPR_PID_TID_AUXV);
1073 
1074         p = lxpr_lock(lxpnp->lxpr_pid);
1075 
1076         if (p == NULL) {
1077                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1078                 return;
1079         }
1080         if ((pd = ptolxproc(p)) == NULL) {
1081                 /* Emit a single AT_NULL record for non-branded processes */
1082                 auxv_t buf;
1083 
1084                 bzero(&buf, sizeof (buf));
1085                 lxpr_unlock(p);
1086                 lxpr_uiobuf_write(uiobuf, (char *)&buf, sizeof (buf));
1087                 return;
1088         } else {
1089                 edp = &pd->l_elf_data;
1090         }
1091 
1092         if (p->p_model == DATAMODEL_NATIVE) {
1093                 auxv_t buf[__KERN_NAUXV_IMPL];
1094 
1095                 /*
1096                  * Because a_type is only of size int (not long), the buffer
1097                  * contents must be zeroed first to ensure cleanliness.
1098                  */
1099                 bzero(buf, sizeof (buf));
1100                 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) {
1101                         if (lx_auxv_stol(&p->p_user.u_auxv[i],
1102                             &buf[cnt], edp) == 0) {
1103                                 cnt++;
1104                         }
1105                         if (p->p_user.u_auxv[i].a_type == AT_NULL) {
1106                                 break;
1107                         }
1108                 }
1109                 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0]));
1110                 lxpr_unlock(p);
1111         }
1112 #if defined(_SYSCALL32_IMPL)
1113         else {
1114                 auxv32_t buf[__KERN_NAUXV_IMPL];
1115 
1116                 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) {
1117                         auxv_t temp;
1118 
1119                         if (lx_auxv_stol(&p->p_user.u_auxv[i],
1120                             &temp, edp) == 0) {
1121                                 buf[cnt].a_type = (int)temp.a_type;
1122                                 buf[cnt].a_un.a_val = (int)temp.a_un.a_val;
1123                                 cnt++;
1124                         }
1125                         if (p->p_user.u_auxv[i].a_type == AT_NULL) {
1126                                 break;
1127                         }
1128                 }
1129                 lxpr_unlock(p);
1130                 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0]));
1131         }
1132 #endif /* defined(_SYSCALL32_IMPL) */
1133 }
1134 
1135 /*
1136  * lxpr_read_pid_cgroup(): read cgroups for process
1137  */
1138 static void
1139 lxpr_read_pid_cgroup(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1140 {
1141         proc_t *p;
1142 
1143         ASSERT(lxpnp->lxpr_type == LXPR_PID_CGROUP ||
1144             lxpnp->lxpr_type == LXPR_PID_TID_CGROUP);
1145 
1146         p = lxpr_lock(lxpnp->lxpr_pid);
1147         if (p == NULL) {
1148                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1149                 return;
1150         }
1151 
1152         /* basic stub, 3rd field will need to be populated */
1153         lxpr_uiobuf_printf(uiobuf, "1:name=systemd:/\n");
1154 
1155         lxpr_unlock(p);
1156 }
1157 
1158 static void
1159 lxpr_copy_cmdline(proc_t *p, lx_proc_data_t *pd, lxpr_uiobuf_t *uiobuf)
1160 {
1161         uio_t *uiop = uiobuf->uiop;
1162         char *buf = uiobuf->buffer;
1163         int bsz = uiobuf->buffsize;
1164         boolean_t env_overflow = B_FALSE;
1165         uintptr_t pos = pd->l_args_start + uiop->uio_offset;
1166         uintptr_t estart = pd->l_envs_start;
1167         uintptr_t eend = pd->l_envs_end;
1168         size_t chunk, copied;
1169         int err = 0;
1170 
1171         /* Do not bother with data beyond the end of the envp strings area. */
1172         if (pos > eend) {
1173                 return;
1174         }
1175         mutex_exit(&p->p_lock);
1176 
1177         /*
1178          * If the starting or ending bounds are outside the argv strings area,
1179          * check to see if the process has overwritten the terminating NULL.
1180          * If not, no data needs to be copied from oustide the argv area.
1181          */
1182         if (pos >= estart || (pos + uiop->uio_resid) >= estart) {
1183                 uint8_t term;
1184                 if (uread(p, &term, sizeof (term), estart - 1) != 0) {
1185                         err = EFAULT;
1186                 } else if (term != 0) {
1187                         env_overflow = B_TRUE;
1188                 }
1189         }
1190 
1191 
1192         /* Data between astart and estart-1 can be copied freely. */
1193         while (pos < estart && uiop->uio_resid > 0 && err == 0) {
1194                 chunk = MIN(estart - pos, uiop->uio_resid);
1195                 chunk = MIN(chunk, bsz);
1196 
1197                 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) != 0 ||
1198                     copied != chunk) {
1199                         err = EFAULT;
1200                         break;
1201                 }
1202                 err = uiomove(buf, copied, UIO_READ, uiop);
1203                 pos += copied;
1204         }
1205 
1206         /*
1207          * Onward from estart, data is copied as a contiguous string.  To
1208          * protect env data from potential snooping, only one buffer-sized copy
1209          * is allowed to avoid complex seek logic.
1210          */
1211         if (err == 0 && env_overflow && pos == estart && uiop->uio_resid > 0) {
1212                 chunk = MIN(eend - pos, uiop->uio_resid);
1213                 chunk = MIN(chunk, bsz);
1214                 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) == 0) {
1215                         int len = strnlen(buf, copied);
1216                         if (len > 0) {
1217                                 err = uiomove(buf, len, UIO_READ, uiop);
1218                         }
1219                 }
1220         }
1221 
1222         uiobuf->error = err;
1223         /* reset any uiobuf state */
1224         uiobuf->pos = uiobuf->buffer;
1225         uiobuf->beg = 0;
1226 
1227         mutex_enter(&p->p_lock);
1228 }
1229 
1230 /*
1231  * lxpr_read_pid_cmdline(): read argument vector from process
1232  */
1233 static void
1234 lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1235 {
1236         proc_t *p;
1237         char *buf;
1238         size_t asz = lxpr_maxargvlen, sz;
1239         lx_proc_data_t *pd;
1240 
1241         ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE ||
1242             lxpnp->lxpr_type == LXPR_PID_TID_CMDLINE);
1243 
1244         buf = kmem_alloc(asz, KM_SLEEP);
1245 
1246         p = lxpr_lock(lxpnp->lxpr_pid);
1247         if (p == NULL) {
1248                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1249                 kmem_free(buf, asz);
1250                 return;
1251         }
1252 
1253         if ((pd = ptolxproc(p)) != NULL && pd->l_args_start != 0 &&
1254             pd->l_envs_start != 0 && pd->l_envs_end != 0) {
1255                 /* Use Linux-style argv bounds if possible. */
1256                 lxpr_copy_cmdline(p, pd, uiobuf);
1257         } else {
1258                 if (prreadargv(p, buf, asz, &sz) != 0) {
1259                         lxpr_uiobuf_seterr(uiobuf, EINVAL);
1260                 } else {
1261                         lxpr_uiobuf_write(uiobuf, buf, sz);
1262                 }
1263         }
1264 
1265         lxpr_unlock(p);
1266         kmem_free(buf, asz);
1267 }
1268 
1269 /*
1270  * lxpr_read_pid_comm(): read command from process
1271  */
1272 static void
1273 lxpr_read_pid_comm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1274 {
1275         proc_t *p;
1276 
1277         VERIFY(lxpnp->lxpr_type == LXPR_PID_COMM ||
1278             lxpnp->lxpr_type == LXPR_PID_TID_COMM);
1279 
1280         /*
1281          * Because prctl(PR_SET_NAME) does not set custom names for threads
1282          * (vs processes), there is no need for special handling here.
1283          */
1284         if ((p = lxpr_lock(lxpnp->lxpr_pid)) == NULL) {
1285                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1286                 return;
1287         }
1288         lxpr_uiobuf_printf(uiobuf, "%s\n", p->p_user.u_comm);
1289         lxpr_unlock(p);
1290 }
1291 
1292 /*
1293  * lxpr_read_pid_env(): read env vector from process
1294  */
1295 static void
1296 lxpr_read_pid_env(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1297 {
1298         proc_t *p;
1299         char *buf;
1300         size_t asz = lxpr_maxenvvlen, sz;
1301         int r;
1302 
1303         ASSERT(lxpnp->lxpr_type == LXPR_PID_ENV);
1304 
1305         buf = kmem_alloc(asz, KM_SLEEP);
1306 
1307         p = lxpr_lock(lxpnp->lxpr_pid);
1308         if (p == NULL) {
1309                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1310                 kmem_free(buf, asz);
1311                 return;
1312         }
1313 
1314         r = prreadenvv(p, buf, asz, &sz);
1315         lxpr_unlock(p);
1316 
1317         if (r != 0) {
1318                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1319         } else {
1320                 lxpr_uiobuf_write(uiobuf, buf, sz);
1321         }
1322 
1323         kmem_free(buf, asz);
1324 }
1325 
1326 /*
1327  * lxpr_read_pid_limits(): ulimit file
1328  */
1329 static void
1330 lxpr_read_pid_limits(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1331 {
1332         proc_t *p;
1333         rctl_qty_t cur, max;
1334         rctl_val_t *oval, *nval;
1335         rctl_hndl_t hndl;
1336         char *kname;
1337         int i;
1338 
1339         ASSERT(lxpnp->lxpr_type == LXPR_PID_LIMITS ||
1340             lxpnp->lxpr_type == LXPR_PID_TID_LIMITS);
1341 
1342         nval = kmem_alloc(sizeof (rctl_val_t), KM_SLEEP);
1343 
1344         p = lxpr_lock(lxpnp->lxpr_pid);
1345         if (p == NULL) {
1346                 kmem_free(nval, sizeof (rctl_val_t));
1347                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1348                 return;
1349         }
1350 
1351         lxpr_uiobuf_printf(uiobuf, "%-25s %-20s %-20s %-10s\n",
1352             "Limit", "Soft Limit", "Hard Limit", "Units");
1353         for (i = 0; lxpr_rlimtab[i].rlim_name != NULL; i++) {
1354                 kname = lxpr_rlimtab[i].rlim_rctl;
1355                 /* default to unlimited for resources without an analog */
1356                 cur = RLIM_INFINITY;
1357                 max = RLIM_INFINITY;
1358                 if (kname != NULL) {
1359                         hndl = rctl_hndl_lookup(kname);
1360                         oval = NULL;
1361                         while ((hndl != -1) &&
1362                             rctl_local_get(hndl, oval, nval, p) == 0) {
1363                                 oval = nval;
1364                                 switch (nval->rcv_privilege) {
1365                                 case RCPRIV_BASIC:
1366                                         if (!RCTL_INFINITE(nval))
1367                                                 cur = nval->rcv_value;
1368                                         break;
1369                                 case RCPRIV_PRIVILEGED:
1370                                         if (!RCTL_INFINITE(nval))
1371                                                 max = nval->rcv_value;
1372                                         break;
1373                                 }
1374                         }
1375                 }
1376 
1377                 lxpr_uiobuf_printf(uiobuf, "%-25s", lxpr_rlimtab[i].rlim_name);
1378                 if (cur == RLIM_INFINITY || cur == LX_RLIM_INFINITY) {
1379                         lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
1380                 } else {
1381                         lxpr_uiobuf_printf(uiobuf, " %-20lu", cur);
1382                 }
1383                 if (max == RLIM_INFINITY || max == LX_RLIM_INFINITY) {
1384                         lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
1385                 } else {
1386                         lxpr_uiobuf_printf(uiobuf, " %-20lu", max);
1387                 }
1388                 lxpr_uiobuf_printf(uiobuf, " %-10s\n",
1389                     lxpr_rlimtab[i].rlim_unit);
1390         }
1391 
1392         lxpr_unlock(p);
1393         kmem_free(nval, sizeof (rctl_val_t));
1394 }
1395 
1396 /*
1397  * lxpr_read_pid_maps(): memory map file
1398  */
1399 static void
1400 lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1401 {
1402         proc_t *p;
1403         struct as *as;
1404         struct seg *seg;
1405         char *buf;
1406         int buflen = MAXPATHLEN;
1407         struct print_data {
1408                 uintptr_t saddr;
1409                 uintptr_t eaddr;
1410                 int type;
1411                 char prot[5];
1412                 uintptr_t offset;
1413                 vnode_t *vp;
1414                 struct print_data *next;
1415         } *print_head = NULL;
1416         struct print_data **print_tail = &print_head;
1417         struct print_data *pbuf;
1418 
1419         ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS ||
1420             lxpnp->lxpr_type == LXPR_PID_TID_MAPS);
1421 
1422         p = lxpr_lock(lxpnp->lxpr_pid);
1423         if (p == NULL) {
1424                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1425                 return;
1426         }
1427 
1428         as = p->p_as;
1429 
1430         if (as == &kas) {
1431                 lxpr_unlock(p);
1432                 return;
1433         }
1434 
1435         mutex_exit(&p->p_lock);
1436 
1437         /* Iterate over all segments in the address space */
1438         AS_LOCK_ENTER(as, RW_READER);
1439         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1440                 vnode_t *vp;
1441                 uint_t protbits;
1442 
1443                 pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
1444 
1445                 pbuf->saddr = (uintptr_t)seg->s_base;
1446                 pbuf->eaddr = pbuf->saddr + seg->s_size;
1447                 pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
1448 
1449                 /*
1450                  * Cheat and only use the protection bits of the first page
1451                  * in the segment
1452                  */
1453                 (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
1454                 (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
1455 
1456                 if (protbits & PROT_READ)      pbuf->prot[0] = 'r';
1457                 if (protbits & PROT_WRITE)     pbuf->prot[1] = 'w';
1458                 if (protbits & PROT_EXEC)      pbuf->prot[2] = 'x';
1459                 if (pbuf->type & MAP_SHARED)        pbuf->prot[3] = 's';
1460                 else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
1461 
1462                 if (seg->s_ops == &segvn_ops &&
1463                     SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
1464                     vp != NULL && vp->v_type == VREG) {
1465                         VN_HOLD(vp);
1466                         pbuf->vp = vp;
1467                 } else {
1468                         pbuf->vp = NULL;
1469                 }
1470 
1471                 pbuf->offset = SEGOP_GETOFFSET(seg, (caddr_t)pbuf->saddr);
1472 
1473                 pbuf->next = NULL;
1474                 *print_tail = pbuf;
1475                 print_tail = &pbuf->next;
1476         }
1477         AS_LOCK_EXIT(as);
1478         mutex_enter(&p->p_lock);
1479         lxpr_unlock(p);
1480 
1481         buf = kmem_alloc(buflen, KM_SLEEP);
1482 
1483         /* print the data we've extracted */
1484         pbuf = print_head;
1485         while (pbuf != NULL) {
1486                 struct print_data *pbuf_next;
1487                 vattr_t vattr;
1488 
1489                 int maj = 0;
1490                 int min = 0;
1491                 ino_t inode = 0;
1492 
1493                 *buf = '\0';
1494                 if (pbuf->vp != NULL) {
1495                         vattr.va_mask = AT_FSID | AT_NODEID;
1496                         if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(),
1497                             NULL) == 0) {
1498                                 maj = getmajor(vattr.va_fsid);
1499                                 min = getminor(vattr.va_fsid);
1500                                 inode = vattr.va_nodeid;
1501                         }
1502                         (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
1503                         VN_RELE(pbuf->vp);
1504                 }
1505 
1506                 if (p->p_model == DATAMODEL_LP64) {
1507                         lxpr_uiobuf_printf(uiobuf,
1508                             "%08llx-%08llx %s %08llx %02x:%02x %llu%s%s\n",
1509                             pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
1510                             maj, min, inode, *buf != '\0' ? " " : "", buf);
1511                 } else {
1512                         lxpr_uiobuf_printf(uiobuf,
1513                             "%08x-%08x %s %08x %02x:%02x %llu%s%s\n",
1514                             (uint32_t)pbuf->saddr, (uint32_t)pbuf->eaddr,
1515                             pbuf->prot, (uint32_t)pbuf->offset, maj, min,
1516                             inode, *buf != '\0' ? " " : "", buf);
1517                 }
1518 
1519                 pbuf_next = pbuf->next;
1520                 kmem_free(pbuf, sizeof (*pbuf));
1521                 pbuf = pbuf_next;
1522         }
1523 
1524         kmem_free(buf, buflen);
1525 }
1526 
1527 /*
1528  * lxpr_read_pid_mountinfo(): information about process mount points. e.g.:
1529  *    14 19 0:13 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
1530  * mntid parid devnums root mntpnt mntopts - fstype mntsrc superopts
1531  *
1532  * We have to make up several of these fields.
1533  */
1534 static void
1535 lxpr_read_pid_mountinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1536 {
1537         struct vfs *vfsp;
1538         struct vfs *vfslist;
1539         zone_t *zone = LXPTOZ(lxpnp);
1540         struct print_data {
1541                 refstr_t *vfs_mntpt;
1542                 refstr_t *vfs_resource;
1543                 uint_t vfs_flag;
1544                 int vfs_fstype;
1545                 dev_t vfs_dev;
1546                 struct print_data *next;
1547         } *print_head = NULL;
1548         struct print_data **print_tail = &print_head;
1549         struct print_data *printp;
1550         int root_id = 15;       /* use a made-up value */
1551         int mnt_id;
1552 
1553         ASSERT(lxpnp->lxpr_type == LXPR_PID_MOUNTINFO ||
1554             lxpnp->lxpr_type == LXPR_PID_TID_MOUNTINFO);
1555 
1556         vfs_list_read_lock();
1557 
1558         /* root is the top-level, it does not appear in this output */
1559         if (zone == global_zone) {
1560                 vfsp = vfslist = rootvfs;
1561         } else {
1562                 vfsp = vfslist = zone->zone_vfslist;
1563                 /*
1564                  * If the zone has a root entry, it will be the first in
1565                  * the list.  If it doesn't, we conjure one up.
1566                  */
1567                 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
1568                     zone->zone_rootpath) != 0) {
1569                         struct vfs *tvfsp;
1570                         /*
1571                          * The root of the zone is not a mount point.  The vfs
1572                          * we want to report is that of the zone's root vnode.
1573                          */
1574                         tvfsp = zone->zone_rootvp->v_vfsp;
1575 
1576                         lxpr_uiobuf_printf(uiobuf,
1577                             "%d 1 %d:%d / / %s - %s / %s\n",
1578                             root_id,
1579                             major(tvfsp->vfs_dev), minor(vfsp->vfs_dev),
1580                             tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw",
1581                             vfssw[tvfsp->vfs_fstype].vsw_name,
1582                             tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1583 
1584                 }
1585                 if (vfslist == NULL) {
1586                         vfs_list_unlock();
1587                         return;
1588                 }
1589         }
1590 
1591         /*
1592          * Later on we have to do a lookupname, which can end up causing
1593          * another vfs_list_read_lock() to be called. Which can lead to a
1594          * deadlock. To avoid this, we extract the data we need into a local
1595          * list, then we can run this list without holding vfs_list_read_lock()
1596          * We keep the list in the same order as the vfs_list
1597          */
1598         do {
1599                 /* Skip mounts we shouldn't show */
1600                 if (vfsp->vfs_flag & VFS_NOMNTTAB) {
1601                         goto nextfs;
1602                 }
1603 
1604                 printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
1605                 refstr_hold(vfsp->vfs_mntpt);
1606                 printp->vfs_mntpt = vfsp->vfs_mntpt;
1607                 refstr_hold(vfsp->vfs_resource);
1608                 printp->vfs_resource = vfsp->vfs_resource;
1609                 printp->vfs_flag = vfsp->vfs_flag;
1610                 printp->vfs_fstype = vfsp->vfs_fstype;
1611                 printp->vfs_dev = vfsp->vfs_dev;
1612                 printp->next = NULL;
1613 
1614                 *print_tail = printp;
1615                 print_tail = &printp->next;
1616 
1617 nextfs:
1618                 vfsp = (zone == global_zone) ?
1619                     vfsp->vfs_next : vfsp->vfs_zone_next;
1620 
1621         } while (vfsp != vfslist);
1622 
1623         vfs_list_unlock();
1624 
1625         mnt_id = root_id + 1;
1626 
1627         /*
1628          * now we can run through what we've extracted without holding
1629          * vfs_list_read_lock()
1630          */
1631         printp = print_head;
1632         while (printp != NULL) {
1633                 struct print_data *printp_next;
1634                 const char *resource;
1635                 char *mntpt;
1636                 struct vnode *vp;
1637                 int error;
1638 
1639                 mntpt = (char *)refstr_value(printp->vfs_mntpt);
1640                 resource = refstr_value(printp->vfs_resource);
1641 
1642                 if (mntpt != NULL && mntpt[0] != '\0')
1643                         mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
1644                 else
1645                         mntpt = "-";
1646 
1647                 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
1648 
1649                 if (error != 0)
1650                         goto nextp;
1651 
1652                 if (!(vp->v_flag & VROOT)) {
1653                         VN_RELE(vp);
1654                         goto nextp;
1655                 }
1656                 VN_RELE(vp);
1657 
1658                 if (resource != NULL && resource[0] != '\0') {
1659                         if (resource[0] == '/') {
1660                                 resource = ZONE_PATH_VISIBLE(resource, zone) ?
1661                                     ZONE_PATH_TRANSLATE(resource, zone) : mntpt;
1662                         }
1663                 } else {
1664                         resource = "none";
1665                 }
1666 
1667                 /*
1668                  * XXX parent ID is not tracked correctly here. Currently we
1669                  * always assume the parent ID is the root ID.
1670                  */
1671                 lxpr_uiobuf_printf(uiobuf,
1672                     "%d %d %d:%d / %s %s - %s %s %s\n",
1673                     mnt_id, root_id,
1674                     major(printp->vfs_dev), minor(printp->vfs_dev),
1675                     mntpt,
1676                     printp->vfs_flag & VFS_RDONLY ? "ro" : "rw",
1677                     vfssw[printp->vfs_fstype].vsw_name,
1678                     resource,
1679                     printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1680 
1681 nextp:
1682                 printp_next = printp->next;
1683                 refstr_rele(printp->vfs_mntpt);
1684                 refstr_rele(printp->vfs_resource);
1685                 kmem_free(printp, sizeof (*printp));
1686                 printp = printp_next;
1687 
1688                 mnt_id++;
1689         }
1690 }
1691 
1692 /*
1693  * lxpr_read_pid_oom_scr_adj(): read oom_score_adj for process
1694  */
1695 static void
1696 lxpr_read_pid_oom_scr_adj(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1697 {
1698         proc_t *p;
1699 
1700         ASSERT(lxpnp->lxpr_type == LXPR_PID_OOM_SCR_ADJ ||
1701             lxpnp->lxpr_type == LXPR_PID_TID_OOM_SCR_ADJ);
1702 
1703         p = lxpr_lock(lxpnp->lxpr_pid);
1704         if (p == NULL) {
1705                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1706                 return;
1707         }
1708 
1709         /* always 0 */
1710         lxpr_uiobuf_printf(uiobuf, "0\n");
1711 
1712         lxpr_unlock(p);
1713 }
1714 
1715 
1716 /*
1717  * lxpr_read_pid_statm(): memory status file
1718  */
1719 static void
1720 lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1721 {
1722         proc_t *p;
1723         struct as *as;
1724         size_t vsize;
1725         size_t rss;
1726 
1727         ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM ||
1728             lxpnp->lxpr_type == LXPR_PID_TID_STATM);
1729 
1730         p = lxpr_lock(lxpnp->lxpr_pid);
1731         if (p == NULL) {
1732                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1733                 return;
1734         }
1735 
1736         as = p->p_as;
1737 
1738         mutex_exit(&p->p_lock);
1739 
1740         AS_LOCK_ENTER(as, RW_READER);
1741         vsize = btopr(as->a_resvsize);
1742         rss = rm_asrss(as);
1743         AS_LOCK_EXIT(as);
1744 
1745         mutex_enter(&p->p_lock);
1746         lxpr_unlock(p);
1747 
1748         lxpr_uiobuf_printf(uiobuf,
1749             "%lu %lu %lu %lu %lu %lu %lu\n",
1750             vsize, rss, 0l, rss, 0l, 0l, 0l);
1751 }
1752 
1753 /*
1754  * Look for either the main thread (lookup_id is 0) or the specified thread.
1755  * If we're looking for the main thread but the proc does not have one, we
1756  * fallback to using prchoose to get any thread available.
1757  */
1758 static kthread_t *
1759 lxpr_get_thread(proc_t *p, uint_t lookup_id)
1760 {
1761         kthread_t *t;
1762         uint_t emul_tid;
1763         lx_lwp_data_t *lwpd;
1764         pid_t pid = p->p_pid;
1765         pid_t init_pid = curproc->p_zone->zone_proc_initpid;
1766         boolean_t branded = (p->p_brand == &lx_brand);
1767 
1768         /* get specified thread  */
1769         if ((t = p->p_tlist) == NULL)
1770                 return (NULL);
1771 
1772         do {
1773                 if (lookup_id == 0 && t->t_tid == 1) {
1774                         thread_lock(t);
1775                         return (t);
1776                 }
1777 
1778                 lwpd = ttolxlwp(t);
1779                 if (branded && lwpd != NULL) {
1780                         if (pid == init_pid && lookup_id == 1) {
1781                                 emul_tid = t->t_tid;
1782                         } else {
1783                                 emul_tid = lwpd->br_pid;
1784                         }
1785                 } else {
1786                         /*
1787                          * Make only the first (assumed to be main) thread
1788                          * visible for non-branded processes.
1789                          */
1790                         emul_tid = p->p_pid;
1791                 }
1792                 if (emul_tid == lookup_id) {
1793                         thread_lock(t);
1794                         return (t);
1795                 }
1796         } while ((t = t->t_forw) != p->p_tlist);
1797 
1798         if (lookup_id == 0)
1799                 return (prchoose(p));
1800         return (NULL);
1801 }
1802 
1803 /*
1804  * Lookup the real pid for procs 0 or 1.
1805  */
1806 static pid_t
1807 get_real_pid(pid_t p)
1808 {
1809         pid_t find_pid;
1810 
1811         if (p == 1) {
1812                 find_pid = curproc->p_zone->zone_proc_initpid;
1813         } else if (p == 0) {
1814                 find_pid = curproc->p_zone->zone_zsched->p_pid;
1815         } else {
1816                 find_pid = p;
1817         }
1818 
1819         return (find_pid);
1820 }
1821 
1822 /*
1823  * pid/tid common code to read status file
1824  */
1825 static void
1826 lxpr_read_status_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf,
1827     uint_t lookup_id)
1828 {
1829         proc_t *p;
1830         kthread_t *t;
1831         user_t *up;
1832         cred_t *cr;
1833         const gid_t *groups;
1834         int    ngroups;
1835         struct as *as;
1836         char *status;
1837         pid_t pid, ppid;
1838         k_sigset_t current, ignore, handle;
1839         int    i, lx_sig;
1840         pid_t real_pid;
1841 
1842         real_pid = get_real_pid(lxpnp->lxpr_pid);
1843         p = lxpr_lock(real_pid);
1844         if (p == NULL) {
1845                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1846                 return;
1847         }
1848 
1849         pid = p->p_pid;
1850 
1851         /*
1852          * Convert pid to the Linux default of 1 if we're the zone's init
1853          * process or if we're the zone's zsched the pid is 0.
1854          */
1855         if (pid == curproc->p_zone->zone_proc_initpid) {
1856                 pid = 1;
1857                 ppid = 0;       /* parent pid for init is 0 */
1858         } else if (pid == curproc->p_zone->zone_zsched->p_pid) {
1859                 pid = 0;        /* zsched is pid 0 */
1860                 ppid = 0;       /* parent pid for zsched is itself */
1861         } else {
1862                 /*
1863                  * Make sure not to reference parent PIDs that reside outside
1864                  * the zone
1865                  */
1866                 ppid = ((p->p_flag & SZONETOP)
1867                     ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
1868 
1869                 /*
1870                  * Convert ppid to the Linux default of 1 if our parent is the
1871                  * zone's init process
1872                  */
1873                 if (ppid == curproc->p_zone->zone_proc_initpid)
1874                         ppid = 1;
1875         }
1876 
1877         t = lxpr_get_thread(p, lookup_id);
1878         if (t != NULL) {
1879                 switch (t->t_state) {
1880                 case TS_SLEEP:
1881                         status = "S (sleeping)";
1882                         break;
1883                 case TS_RUN:
1884                 case TS_ONPROC:
1885                         status = "R (running)";
1886                         break;
1887                 case TS_ZOMB:
1888                         status = "Z (zombie)";
1889                         break;
1890                 case TS_STOPPED:
1891                         status = "T (stopped)";
1892                         break;
1893                 default:
1894                         status = "! (unknown)";
1895                         break;
1896                 }
1897                 thread_unlock(t);
1898         } else {
1899                 if (lookup_id != 0) {
1900                         /* we can't find this specific thread */
1901                         lxpr_uiobuf_seterr(uiobuf, EINVAL);
1902                         lxpr_unlock(p);
1903                         return;
1904                 }
1905 
1906                 /*
1907                  * there is a hole in the exit code, where a proc can have
1908                  * no threads but it is yet to be flagged SZOMB. We will
1909                  * assume we are about to become a zombie
1910                  */
1911                 status = "Z (zombie)";
1912         }
1913 
1914         up = PTOU(p);
1915         mutex_enter(&p->p_crlock);
1916         crhold(cr = p->p_cred);
1917         mutex_exit(&p->p_crlock);
1918 
1919         lxpr_uiobuf_printf(uiobuf,
1920             "Name:\t%s\n"
1921             "State:\t%s\n"
1922             "Tgid:\t%d\n"
1923             "Pid:\t%d\n"
1924             "PPid:\t%d\n"
1925             "TracerPid:\t%d\n"
1926             "Uid:\t%u\t%u\t%u\t%u\n"
1927             "Gid:\t%u\t%u\t%u\t%u\n"
1928             "FDSize:\t%d\n"
1929             "Groups:\t",
1930             up->u_comm,
1931             status,
1932             pid, /* thread group id - same as pid */
1933             (lookup_id == 0) ? pid : lxpnp->lxpr_desc,
1934             ppid,
1935             0,
1936             crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
1937             crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
1938             p->p_fno_ctl);
1939 
1940 
1941         ngroups = crgetngroups(cr);
1942         groups  = crgetgroups(cr);
1943         for (i = 0; i < ngroups; i++) {
1944                 lxpr_uiobuf_printf(uiobuf,
1945                     "%u ",
1946                     groups[i]);
1947         }
1948         crfree(cr);
1949 
1950         as = p->p_as;
1951         if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) {
1952                 size_t vsize, nlocked, rss;
1953 
1954                 mutex_exit(&p->p_lock);
1955                 AS_LOCK_ENTER(as, RW_READER);
1956                 vsize = as->a_resvsize;
1957                 rss = rm_asrss(as);
1958                 AS_LOCK_EXIT(as);
1959                 mutex_enter(&p->p_lock);
1960                 nlocked = p->p_locked_mem;
1961 
1962                 lxpr_uiobuf_printf(uiobuf,
1963                     "\n"
1964                     "VmSize:\t%8lu kB\n"
1965                     "VmLck:\t%8lu kB\n"
1966                     "VmRSS:\t%8lu kB\n"
1967                     "VmData:\t%8lu kB\n"
1968                     "VmStk:\t%8lu kB\n"
1969                     "VmExe:\t%8lu kB\n"
1970                     "VmLib:\t%8lu kB",
1971                     btok(vsize),
1972                     btok(nlocked),
1973                     ptok(rss),
1974                     0l,
1975                     btok(p->p_stksize),
1976                     ptok(rss),
1977                     0l);
1978         }
1979 
1980         lxpr_uiobuf_printf(uiobuf, "\nThreads:\t%u", p->p_lwpcnt);
1981 
1982         sigemptyset(&current);
1983         sigemptyset(&ignore);
1984         sigemptyset(&handle);
1985 
1986         for (i = 1; i < NSIG; i++) {
1987                 lx_sig = stol_signo[i];
1988 
1989                 if ((lx_sig > 0) && (lx_sig <= LX_NSIG)) {
1990                         if (sigismember(&p->p_sig, i))
1991                                 sigaddset(&current, lx_sig);
1992 
1993                         if (up->u_signal[i - 1] == SIG_IGN)
1994                                 sigaddset(&ignore, lx_sig);
1995                         else if (up->u_signal[i - 1] != SIG_DFL)
1996                                 sigaddset(&handle, lx_sig);
1997                 }
1998         }
1999 
2000         lxpr_uiobuf_printf(uiobuf,
2001             "\n"
2002             "SigPnd:\t%08x%08x\n"
2003             "SigBlk:\t%08x%08x\n"
2004             "SigIgn:\t%08x%08x\n"
2005             "SigCgt:\t%08x%08x\n"
2006             "CapInh:\t%016x\n"
2007             "CapPrm:\t%016x\n"
2008             "CapEff:\t%016x\n",
2009             current.__sigbits[1], current.__sigbits[0],
2010             0, 0, /* signals blocked on per thread basis */
2011             ignore.__sigbits[1], ignore.__sigbits[0],
2012             handle.__sigbits[1], handle.__sigbits[0],
2013             /* Can't do anything with linux capabilities */
2014             0,
2015             0,
2016             0);
2017 
2018         lxpr_uiobuf_printf(uiobuf,
2019             "CapBnd:\t%016llx\n",
2020             /* We report the full capability bounding set */
2021             0x1fffffffffLL);
2022 
2023         lxpr_unlock(p);
2024 }
2025 
2026 /*
2027  * lxpr_read_pid_status(): status file
2028  */
2029 static void
2030 lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2031 {
2032         ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
2033 
2034         lxpr_read_status_common(lxpnp, uiobuf, 0);
2035 }
2036 
2037 /*
2038  * lxpr_read_pid_tid_status(): status file
2039  */
2040 static void
2041 lxpr_read_pid_tid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2042 {
2043         ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STATUS);
2044         lxpr_read_status_common(lxpnp, uiobuf, lxpnp->lxpr_desc);
2045 }
2046 
2047 /*
2048  * pid/tid common code to read stat file
2049  */
2050 static void
2051 lxpr_read_stat_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf,
2052     uint_t lookup_id)
2053 {
2054         proc_t *p;
2055         kthread_t *t;
2056         struct as *as;
2057         char stat;
2058         pid_t pid, ppid, pgpid, spid;
2059         gid_t psgid;
2060         dev_t psdev;
2061         size_t rss, vsize;
2062         int nice, pri;
2063         caddr_t wchan;
2064         processorid_t cpu;
2065         pid_t real_pid;
2066 
2067         real_pid = get_real_pid(lxpnp->lxpr_pid);
2068         p = lxpr_lock(real_pid);
2069         if (p == NULL) {
2070                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
2071                 return;
2072         }
2073 
2074         pid = p->p_pid;
2075 
2076         /*
2077          * Set Linux defaults if we're the zone's init process
2078          */
2079         if (pid == curproc->p_zone->zone_proc_initpid) {
2080                 pid = 1;                /* PID for init */
2081                 ppid = 0;               /* parent PID for init is 0 */
2082                 pgpid = 0;              /* process group for init is 0 */
2083                 psgid = (gid_t)-1;      /* credential GID for init is -1 */
2084                 spid = 0;               /* session id for init is 0 */
2085                 psdev = 0;              /* session device for init is 0 */
2086         } else if (pid == curproc->p_zone->zone_zsched->p_pid) {
2087                 pid = 0;                /* PID for zsched */
2088                 ppid = 0;               /* parent PID for zsched is 0 */
2089                 pgpid = 0;              /* process group for zsched is 0 */
2090                 psgid = (gid_t)-1;      /* credential GID for zsched is -1 */
2091                 spid = 0;               /* session id for zsched is 0 */
2092                 psdev = 0;              /* session device for zsched is 0 */
2093         } else {
2094                 /*
2095                  * Make sure not to reference parent PIDs that reside outside
2096                  * the zone
2097                  */
2098                 ppid = ((p->p_flag & SZONETOP) ?
2099                     curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
2100 
2101                 /*
2102                  * Convert ppid to the Linux default of 1 if our parent is the
2103                  * zone's init process
2104                  */
2105                 if (ppid == curproc->p_zone->zone_proc_initpid)
2106                         ppid = 1;
2107 
2108                 pgpid = p->p_pgrp;
2109 
2110                 mutex_enter(&p->p_splock);
2111                 mutex_enter(&p->p_sessp->s_lock);
2112                 spid = p->p_sessp->s_sid;
2113                 psdev = p->p_sessp->s_dev;
2114                 if (p->p_sessp->s_cred)
2115                         psgid = crgetgid(p->p_sessp->s_cred);
2116                 else
2117                         psgid = crgetgid(p->p_cred);
2118 
2119                 mutex_exit(&p->p_sessp->s_lock);
2120                 mutex_exit(&p->p_splock);
2121         }
2122 
2123         t = lxpr_get_thread(p, lookup_id);
2124         if (t != NULL) {
2125                 switch (t->t_state) {
2126                 case TS_SLEEP:
2127                         stat = 'S'; break;
2128                 case TS_RUN:
2129                 case TS_ONPROC:
2130                         stat = 'R'; break;
2131                 case TS_ZOMB:
2132                         stat = 'Z'; break;
2133                 case TS_STOPPED:
2134                         stat = 'T'; break;
2135                 default:
2136                         stat = '!'; break;
2137                 }
2138 
2139                 if (CL_DONICE(t, NULL, 0, &nice) != 0)
2140                         nice = 0;
2141 
2142                 pri = t->t_pri;
2143                 wchan = t->t_wchan;
2144                 cpu = t->t_cpu->cpu_id;
2145                 thread_unlock(t);
2146         } else {
2147                 if (lookup_id != 0) {
2148                         /* we can't find this specific thread */
2149                         lxpr_uiobuf_seterr(uiobuf, EINVAL);
2150                         lxpr_unlock(p);
2151                         return;
2152                 }
2153 
2154                 /* Only zombies have no threads */
2155                 stat = 'Z';
2156                 nice = 0;
2157                 pri = 0;
2158                 wchan = 0;
2159                 cpu = 0;
2160         }
2161         as = p->p_as;
2162         mutex_exit(&p->p_lock);
2163         AS_LOCK_ENTER(as, RW_READER);
2164         vsize = as->a_resvsize;
2165         rss = rm_asrss(as);
2166         AS_LOCK_EXIT(as);
2167         mutex_enter(&p->p_lock);
2168 
2169         lxpr_uiobuf_printf(uiobuf,
2170             "%d (%s) %c %d %d %d %d %d "
2171             "%lu %lu %lu %lu %lu "
2172             "%lu %lu %ld %ld "
2173             "%d %d %d "
2174             "%lu "
2175             "%lu "
2176             "%lu %ld %llu "
2177             "%lu %lu %u "
2178             "%lu %lu "
2179             "%lu %lu %lu %lu "
2180             "%lu "
2181             "%lu %lu "
2182             "%d "
2183             "%d"
2184             "\n",
2185             (lookup_id == 0) ? pid : lxpnp->lxpr_desc,
2186             PTOU(p)->u_comm, stat, ppid, pgpid, spid, psdev, psgid,
2187             0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
2188             p->p_utime, p->p_stime, p->p_cutime, p->p_cstime,
2189             pri, nice, p->p_lwpcnt,
2190             0l, /* itrealvalue (time before next SIGALRM) */
2191             PTOU(p)->u_ticks,
2192             vsize, rss, p->p_vmem_ctl,
2193             0l, 0l, USRSTACK, /* startcode, endcode, startstack */
2194             0l, 0l, /* kstkesp, kstkeip */
2195             0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */
2196             wchan,
2197             0l, 0l, /* nswap, cnswap */
2198             0, /* exit_signal */
2199             cpu);
2200 
2201         lxpr_unlock(p);
2202 }
2203 
2204 /*
2205  * lxpr_read_pid_stat(): pid stat file
2206  */
2207 static void
2208 lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2209 {
2210         ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
2211 
2212         lxpr_read_stat_common(lxpnp, uiobuf, 0);
2213 }
2214 
2215 /*
2216  * lxpr_read_pid_tid_stat(): pid stat file
2217  */
2218 static void
2219 lxpr_read_pid_tid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2220 {
2221         ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STAT);
2222         lxpr_read_stat_common(lxpnp, uiobuf, lxpnp->lxpr_desc);
2223 }
2224 
2225 /* ARGSUSED */
2226 static void
2227 lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2228 {
2229 }
2230 
2231 struct lxpr_ifstat {
2232         uint64_t rx_bytes;
2233         uint64_t rx_packets;
2234         uint64_t rx_errors;
2235         uint64_t rx_drop;
2236         uint64_t tx_bytes;
2237         uint64_t tx_packets;
2238         uint64_t tx_errors;
2239         uint64_t tx_drop;
2240         uint64_t collisions;
2241         uint64_t rx_multicast;
2242 };
2243 
2244 static void *
2245 lxpr_kstat_read(kstat_t *kn, boolean_t byname, size_t *size, int *num)
2246 {
2247         kstat_t *kp;
2248         int i, nrec = 0;
2249         size_t bufsize;
2250         void *buf = NULL;
2251 
2252         if (byname == B_TRUE) {
2253                 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance,
2254                     kn->ks_name, getzoneid());
2255         } else {
2256                 kp = kstat_hold_bykid(kn->ks_kid, getzoneid());
2257         }
2258         if (kp == NULL) {
2259                 return (NULL);
2260         }
2261         if (kp->ks_flags & KSTAT_FLAG_INVALID) {
2262                 kstat_rele(kp);
2263                 return (NULL);
2264         }
2265 
2266         bufsize = kp->ks_data_size + 1;
2267         kstat_rele(kp);
2268 
2269         /*
2270          * The kstat in question is released so that kmem_alloc(KM_SLEEP) is
2271          * performed without it held.  After the alloc, the kstat is reacquired
2272          * and its size is checked again. If the buffer is no longer large
2273          * enough, the alloc and check are repeated up to three times.
2274          */
2275         for (i = 0; i < 2; i++) {
2276                 buf = kmem_alloc(bufsize, KM_SLEEP);
2277 
2278                 /* Check if bufsize still appropriate */
2279                 if (byname == B_TRUE) {
2280                         kp = kstat_hold_byname(kn->ks_module, kn->ks_instance,
2281                             kn->ks_name, getzoneid());
2282                 } else {
2283                         kp = kstat_hold_bykid(kn->ks_kid, getzoneid());
2284                 }
2285                 if (kp == NULL || kp->ks_flags & KSTAT_FLAG_INVALID) {
2286                         if (kp != NULL) {
2287                                 kstat_rele(kp);
2288                         }
2289                         kmem_free(buf, bufsize);
2290                         return (NULL);
2291                 }
2292                 KSTAT_ENTER(kp);
2293                 (void) KSTAT_UPDATE(kp, KSTAT_READ);
2294                 if (bufsize < kp->ks_data_size) {
2295                         kmem_free(buf, bufsize);
2296                         buf = NULL;
2297                         bufsize = kp->ks_data_size + 1;
2298                         KSTAT_EXIT(kp);
2299                         kstat_rele(kp);
2300                         continue;
2301                 } else {
2302                         if (KSTAT_SNAPSHOT(kp, buf, KSTAT_READ) != 0) {
2303                                 kmem_free(buf, bufsize);
2304                                 buf = NULL;
2305                         }
2306                         nrec = kp->ks_ndata;
2307                         KSTAT_EXIT(kp);
2308                         kstat_rele(kp);
2309                         break;
2310                 }
2311         }
2312 
2313         if (buf != NULL) {
2314                 *size = bufsize;
2315                 *num = nrec;
2316         }
2317         return (buf);
2318 }
2319 
2320 static int
2321 lxpr_kstat_ifstat(kstat_t *kn, struct lxpr_ifstat *ifs)
2322 {
2323         kstat_named_t *kp;
2324         int i, num;
2325         size_t size;
2326 
2327         /*
2328          * Search by name instead of by kid since there's a small window to
2329          * race against kstats being added/removed.
2330          */
2331         bzero(ifs, sizeof (*ifs));
2332         kp = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num);
2333         if (kp == NULL)
2334                 return (-1);
2335         for (i = 0; i < num; i++) {
2336                 if (strncmp(kp[i].name, "rbytes64", KSTAT_STRLEN) == 0)
2337                         ifs->rx_bytes = kp[i].value.ui64;
2338                 else if (strncmp(kp[i].name, "ipackets64", KSTAT_STRLEN) == 0)
2339                         ifs->rx_packets = kp[i].value.ui64;
2340                 else if (strncmp(kp[i].name, "ierrors", KSTAT_STRLEN) == 0)
2341                         ifs->rx_errors = kp[i].value.ui32;
2342                 else if (strncmp(kp[i].name, "norcvbuf", KSTAT_STRLEN) == 0)
2343                         ifs->rx_drop = kp[i].value.ui32;
2344                 else if (strncmp(kp[i].name, "multircv", KSTAT_STRLEN) == 0)
2345                         ifs->rx_multicast = kp[i].value.ui32;
2346                 else if (strncmp(kp[i].name, "obytes64", KSTAT_STRLEN) == 0)
2347                         ifs->tx_bytes = kp[i].value.ui64;
2348                 else if (strncmp(kp[i].name, "opackets64", KSTAT_STRLEN) == 0)
2349                         ifs->tx_packets = kp[i].value.ui64;
2350                 else if (strncmp(kp[i].name, "oerrors", KSTAT_STRLEN) == 0)
2351                         ifs->tx_errors = kp[i].value.ui32;
2352                 else if (strncmp(kp[i].name, "noxmtbuf", KSTAT_STRLEN) == 0)
2353                         ifs->tx_drop = kp[i].value.ui32;
2354                 else if (strncmp(kp[i].name, "collisions", KSTAT_STRLEN) == 0)
2355                         ifs->collisions = kp[i].value.ui32;
2356         }
2357         kmem_free(kp, size);
2358         return (0);
2359 }
2360 
2361 /* ARGSUSED */
2362 static void
2363 lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2364 {
2365         kstat_t *ksr;
2366         kstat_t ks0;
2367         int i, nidx;
2368         size_t sidx;
2369         struct lxpr_ifstat ifs;
2370 
2371         lxpr_uiobuf_printf(uiobuf, "Inter-|   Receive                   "
2372             "                             |  Transmit\n");
2373         lxpr_uiobuf_printf(uiobuf, " face |bytes    packets errs drop fifo"
2374             " frame compressed multicast|bytes    packets errs drop fifo"
2375             " colls carrier compressed\n");
2376 
2377         ks0.ks_kid = 0;
2378         ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
2379         if (ksr == NULL)
2380                 return;
2381 
2382         for (i = 1; i < nidx; i++) {
2383                 if (strncmp(ksr[i].ks_module, "link", KSTAT_STRLEN) == 0 ||
2384                     strncmp(ksr[i].ks_module, "lo", KSTAT_STRLEN) == 0) {
2385                         if (lxpr_kstat_ifstat(&ksr[i], &ifs) != 0)
2386                                 continue;
2387 
2388                         /* Overwriting the name is ok in the local snapshot */
2389                         lx_ifname_convert(ksr[i].ks_name, LX_IF_FROMNATIVE);
2390                         lxpr_uiobuf_printf(uiobuf, "%6s: %7llu %7llu %4lu "
2391                             "%4lu %4u %5u %10u %9lu %8llu %7llu %4lu %4lu %4u "
2392                             "%5lu %7u %10u\n",
2393                             ksr[i].ks_name,
2394                             ifs.rx_bytes, ifs.rx_packets,
2395                             ifs.rx_errors, ifs.rx_drop,
2396                             0, 0, 0, ifs.rx_multicast,
2397                             ifs.tx_bytes, ifs.tx_packets,
2398                             ifs.tx_errors, ifs.tx_drop,
2399                             0, ifs.collisions, 0, 0);
2400                 }
2401         }
2402 
2403         kmem_free(ksr, sidx);
2404 }
2405 
2406 /* ARGSUSED */
2407 static void
2408 lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2409 {
2410 }
2411 
2412 static void
2413 lxpr_inet6_out(const in6_addr_t *addr, char buf[33])
2414 {
2415         const uint8_t *ip = addr->s6_addr;
2416         char digits[] = "0123456789abcdef";
2417         int i;
2418         for (i = 0; i < 16; i++) {
2419                 buf[2 * i] = digits[ip[i] >> 4];
2420                 buf[2 * i + 1] = digits[ip[i] & 0xf];
2421         }
2422         buf[32] = '\0';
2423 }
2424 
2425 /* ARGSUSED */
2426 static void
2427 lxpr_read_net_if_inet6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2428 {
2429         netstack_t *ns;
2430         ip_stack_t *ipst;
2431         ill_t *ill;
2432         ipif_t *ipif;
2433         ill_walk_context_t      ctx;
2434         char ifname[LIFNAMSIZ], ip6out[33];
2435 
2436         ns = netstack_get_current();
2437         if (ns == NULL)
2438                 return;
2439         ipst = ns->netstack_ip;
2440 
2441         rw_enter(&ipst->ips_ill_g_lock, RW_READER);
2442         ill = ILL_START_WALK_V6(&ctx, ipst);
2443 
2444         for (; ill != NULL; ill = ill_next(&ctx, ill)) {
2445                 for (ipif = ill->ill_ipif; ipif != NULL;
2446                     ipif = ipif->ipif_next) {
2447                         uint_t index = ill->ill_phyint->phyint_ifindex;
2448                         int plen = ip_mask_to_plen_v6(&ipif->ipif_v6net_mask);
2449                         unsigned int scope = lx_ipv6_scope_convert(
2450                             &ipif->ipif_v6lcl_addr);
2451                         /* Always report PERMANENT flag */
2452                         int flag = 0x80;
2453 
2454                         (void) snprintf(ifname, LIFNAMSIZ, "%s", ill->ill_name);
2455                         lx_ifname_convert(ifname, LX_IF_FROMNATIVE);
2456                         lxpr_inet6_out(&ipif->ipif_v6lcl_addr, ip6out);
2457 
2458                         lxpr_uiobuf_printf(uiobuf, "%32s %02x %02x %02x %02x"
2459                             " %8s\n", ip6out, index, plen, scope, flag, ifname);
2460                 }
2461         }
2462         rw_exit(&ipst->ips_ill_g_lock);
2463         netstack_rele(ns);
2464 }
2465 
2466 /* ARGSUSED */
2467 static void
2468 lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2469 {
2470 }
2471 
2472 /* ARGSUSED */
2473 static void
2474 lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2475 {
2476 }
2477 
2478 /* ARGSUSED */
2479 static void
2480 lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2481 {
2482 }
2483 
2484 static void
2485 lxpr_format_route_ipv6(ire_t *ire, lxpr_uiobuf_t *uiobuf)
2486 {
2487         uint32_t flags;
2488         char name[IFNAMSIZ];
2489         char ipv6addr[33];
2490 
2491         lxpr_inet6_out(&ire->ire_addr_v6, ipv6addr);
2492         lxpr_uiobuf_printf(uiobuf, "%s %02x ", ipv6addr,
2493             ip_mask_to_plen_v6(&ire->ire_mask_v6));
2494 
2495         /* punt on this for now */
2496         lxpr_uiobuf_printf(uiobuf, "%s %02x ",
2497             "00000000000000000000000000000000", 0);
2498 
2499         lxpr_inet6_out(&ire->ire_gateway_addr_v6, ipv6addr);
2500         lxpr_uiobuf_printf(uiobuf, "%s", ipv6addr);
2501 
2502         flags = ire->ire_flags &
2503             (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
2504         /* Linux's RTF_LOCAL equivalent */
2505         if (ire->ire_metrics.iulp_local)
2506                 flags |= 0x80000000;
2507 
2508         if (ire->ire_ill != NULL) {
2509                 ill_get_name(ire->ire_ill, name, sizeof (name));
2510                 lx_ifname_convert(name, LX_IF_FROMNATIVE);
2511         } else {
2512                 name[0] = '\0';
2513         }
2514 
2515         lxpr_uiobuf_printf(uiobuf, " %08x %08x %08x %08x %8s\n",
2516             0, /* metric */
2517             ire->ire_refcnt,
2518             0,
2519             flags,
2520             name);
2521 }
2522 
2523 /* ARGSUSED */
2524 static void
2525 lxpr_read_net_ipv6_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2526 {
2527         netstack_t *ns;
2528         ip_stack_t *ipst;
2529 
2530         ns = netstack_get_current();
2531         if (ns == NULL)
2532                 return;
2533         ipst = ns->netstack_ip;
2534 
2535         /*
2536          * LX branded zones are expected to have exclusive IP stack, hence
2537          * using ALL_ZONES as the zoneid filter.
2538          */
2539         ire_walk_v6(&lxpr_format_route_ipv6, uiobuf, ALL_ZONES, ipst);
2540 
2541         netstack_rele(ns);
2542 }
2543 
2544 /* ARGSUSED */
2545 static void
2546 lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2547 {
2548 }
2549 
2550 /* ARGSUSED */
2551 static void
2552 lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2553 {
2554 }
2555 
2556 /* ARGSUSED */
2557 static void
2558 lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2559 {
2560 }
2561 
2562 #define LXPR_SKIP_ROUTE(type)   \
2563         (((IRE_IF_CLONE | IRE_BROADCAST | IRE_MULTICAST | \
2564         IRE_NOROUTE | IRE_LOOPBACK | IRE_LOCAL) & type) != 0)
2565 
2566 static void
2567 lxpr_format_route_ipv4(ire_t *ire, lxpr_uiobuf_t *uiobuf)
2568 {
2569         uint32_t flags;
2570         char name[IFNAMSIZ];
2571         ill_t *ill;
2572         ire_t *nire;
2573         ipif_t *ipif;
2574         ipaddr_t gateway;
2575 
2576         if (LXPR_SKIP_ROUTE(ire->ire_type) || ire->ire_testhidden != 0)
2577                 return;
2578 
2579         /* These route flags have direct Linux equivalents */
2580         flags = ire->ire_flags &
2581             (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
2582 
2583         /*
2584          * Search for a suitable IRE for naming purposes.
2585          * On Linux, the default route is typically associated with the
2586          * interface used to access gateway.  The default IRE on Illumos
2587          * typically lacks an ill reference but its parent might have one.
2588          */
2589         nire = ire;
2590         do {
2591                 ill = nire->ire_ill;
2592                 nire = nire->ire_dep_parent;
2593         } while (ill == NULL && nire != NULL);
2594         if (ill != NULL) {
2595                 ill_get_name(ill, name, sizeof (name));
2596                 lx_ifname_convert(name, LX_IF_FROMNATIVE);
2597         } else {
2598                 name[0] = '*';
2599                 name[1] = '\0';
2600         }
2601 
2602         /*
2603          * Linux suppresses the gateway address for directly connected
2604          * interface networks.  To emulate this behavior, we walk all addresses
2605          * of a given route interface.  If one matches the gateway, it is
2606          * displayed as NULL.
2607          */
2608         gateway = ire->ire_gateway_addr;
2609         if ((ill = ire->ire_ill) != NULL) {
2610                 for (ipif = ill->ill_ipif; ipif != NULL;
2611                     ipif = ipif->ipif_next) {
2612                         if (ipif->ipif_lcl_addr == gateway) {
2613                                 gateway = 0;
2614                                 break;
2615                         }
2616                 }
2617         }
2618 
2619         lxpr_uiobuf_printf(uiobuf, "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2620             "%d\t%08X\t%d\t%u\t%u\n",
2621             name,
2622             ire->ire_addr,
2623             gateway,
2624             flags, 0, 0,
2625             0, /* priority */
2626             ire->ire_mask,
2627             0, 0, /* mss, window */
2628             ire->ire_metrics.iulp_rtt);
2629 }
2630 
2631 /* ARGSUSED */
2632 static void
2633 lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2634 {
2635         netstack_t *ns;
2636         ip_stack_t *ipst;
2637 
2638         lxpr_uiobuf_printf(uiobuf, "Iface\tDestination\tGateway \tFlags\t"
2639             "RefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n");
2640 
2641         ns = netstack_get_current();
2642         if (ns == NULL)
2643                 return;
2644         ipst = ns->netstack_ip;
2645 
2646         /*
2647          * LX branded zones are expected to have exclusive IP stack, hence
2648          * using ALL_ZONES as the zoneid filter.
2649          */
2650         ire_walk_v4(&lxpr_format_route_ipv4, uiobuf, ALL_ZONES, ipst);
2651 
2652         netstack_rele(ns);
2653 }
2654 
2655 /* ARGSUSED */
2656 static void
2657 lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2658 {
2659 }
2660 
2661 /* ARGSUSED */
2662 static void
2663 lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2664 {
2665 }
2666 
2667 /* ARGSUSED */
2668 static void
2669 lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2670 {
2671 }
2672 
2673 typedef struct lxpr_snmp_table {
2674         const char *lst_proto;
2675         const char *lst_fields[];
2676 } lxpr_snmp_table_t;
2677 
2678 static lxpr_snmp_table_t lxpr_snmp_ip = { "ip",
2679         {
2680         "forwarding", "defaultTTL", "inReceives", "inHdrErrors",
2681         "inAddrErrors", "forwDatagrams", "inUnknownProtos", "inDiscards",
2682         "inDelivers", "outRequests", "outDiscards", "outNoRoutes",
2683         "reasmTimeout", "reasmReqds", "reasmOKs", "reasmFails", "fragOKs",
2684         "fragFails", "fragCreates",
2685         NULL
2686         }
2687 };
2688 static lxpr_snmp_table_t lxpr_snmp_icmp = { "icmp",
2689         {
2690         "inMsgs", "inErrors", "inCsumErrors", "inDestUnreachs", "inTimeExcds",
2691         "inParmProbs", "inSrcQuenchs", "inRedirects", "inEchos", "inEchoReps",
2692         "inTimestamps", "inTimestampReps", "inAddrMasks", "inAddrMaskReps",
2693         "outMsgs", "outErrors", "outDestUnreachs", "outTimeExcds",
2694         "outParmProbs", "outSrcQuenchs", "outRedirects", "outEchos",
2695         "outEchoReps", "outTimestamps", "outTimestampReps", "outAddrMasks",
2696         "outAddrMaskReps",
2697         NULL
2698         }
2699 };
2700 static lxpr_snmp_table_t lxpr_snmp_tcp = { "tcp",
2701         {
2702         "rtoAlgorithm", "rtoMin", "rtoMax", "maxConn", "activeOpens",
2703         "passiveOpens", "attemptFails", "estabResets", "currEstab", "inSegs",
2704         "outSegs", "retransSegs", "inErrs", "outRsts", "inCsumErrors",
2705         NULL
2706         }
2707 };
2708 static lxpr_snmp_table_t lxpr_snmp_udp = { "udp",
2709         {
2710         "inDatagrams", "noPorts", "inErrors", "outDatagrams", "rcvbufErrors",
2711         "sndbufErrors", "inCsumErrors",
2712         NULL
2713         }
2714 };
2715 
2716 static lxpr_snmp_table_t *lxpr_net_snmptab[] = {
2717         &lxpr_snmp_ip,
2718         &lxpr_snmp_icmp,
2719         &lxpr_snmp_tcp,
2720         &lxpr_snmp_udp,
2721         NULL
2722 };
2723 
2724 static void
2725 lxpr_kstat_print_tab(lxpr_uiobuf_t *uiobuf, lxpr_snmp_table_t *table,
2726     kstat_t *kn)
2727 {
2728         kstat_named_t *klist;
2729         char upname[KSTAT_STRLEN], upfield[KSTAT_STRLEN];
2730         int i, j, num;
2731         size_t size;
2732 
2733         klist = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num);
2734         if (klist == NULL)
2735                 return;
2736 
2737         /* Print the header line, fields capitalized */
2738         (void) strncpy(upname, table->lst_proto, KSTAT_STRLEN);
2739         upname[0] = toupper(upname[0]);
2740         lxpr_uiobuf_printf(uiobuf, "%s:", upname);
2741         for (i = 0; table->lst_fields[i] != NULL; i++) {
2742                 (void) strncpy(upfield, table->lst_fields[i], KSTAT_STRLEN);
2743                 upfield[0] = toupper(upfield[0]);
2744                 lxpr_uiobuf_printf(uiobuf, " %s", upfield);
2745         }
2746         lxpr_uiobuf_printf(uiobuf, "\n%s:", upname);
2747 
2748         /* Then loop back through to print the value line. */
2749         for (i = 0; table->lst_fields[i] != NULL; i++) {
2750                 kstat_named_t *kpoint = NULL;
2751                 for (j = 0; j < num; j++) {
2752                         if (strncmp(klist[j].name, table->lst_fields[i],
2753                             KSTAT_STRLEN) == 0) {
2754                                 kpoint = &klist[j];
2755                                 break;
2756                         }
2757                 }
2758                 if (kpoint == NULL) {
2759                         /* Output 0 for unknown fields */
2760                         lxpr_uiobuf_printf(uiobuf, " 0");
2761                 } else {
2762                         switch (kpoint->data_type) {
2763                         case KSTAT_DATA_INT32:
2764                                 lxpr_uiobuf_printf(uiobuf, " %d",
2765                                     kpoint->value.i32);
2766                                 break;
2767                         case KSTAT_DATA_UINT32:
2768                                 lxpr_uiobuf_printf(uiobuf, " %u",
2769                                     kpoint->value.ui32);
2770                                 break;
2771                         case KSTAT_DATA_INT64:
2772                                 lxpr_uiobuf_printf(uiobuf, " %ld",
2773                                     kpoint->value.l);
2774                                 break;
2775                         case KSTAT_DATA_UINT64:
2776                                 lxpr_uiobuf_printf(uiobuf, " %lu",
2777                                     kpoint->value.ul);
2778                                 break;
2779                         }
2780                 }
2781         }
2782         lxpr_uiobuf_printf(uiobuf, "\n");
2783         kmem_free(klist, size);
2784 }
2785 
2786 /* ARGSUSED */
2787 static void
2788 lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2789 {
2790         kstat_t *ksr;
2791         kstat_t ks0;
2792         lxpr_snmp_table_t **table = lxpr_net_snmptab;
2793         int i, t, nidx;
2794         size_t sidx;
2795 
2796         ks0.ks_kid = 0;
2797         ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
2798         if (ksr == NULL)
2799                 return;
2800 
2801         for (t = 0; table[t] != NULL; t++) {
2802                 for (i = 0; i < nidx; i++) {
2803                         if (strncmp(ksr[i].ks_class, "mib2", KSTAT_STRLEN) != 0)
2804                                 continue;
2805                         if (strncmp(ksr[i].ks_name, table[t]->lst_proto,
2806                             KSTAT_STRLEN) == 0) {
2807                                 lxpr_kstat_print_tab(uiobuf, table[t], &ksr[i]);
2808                                 break;
2809                         }
2810                 }
2811         }
2812         kmem_free(ksr, sidx);
2813 }
2814 
2815 /* ARGSUSED */
2816 static void
2817 lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2818 {
2819 }
2820 
2821 static int
2822 lxpr_convert_tcp_state(int st)
2823 {
2824         /*
2825          * Derived from the enum located in the Linux kernel sources:
2826          * include/net/tcp_states.h
2827          */
2828         switch (st) {
2829         case TCPS_ESTABLISHED:
2830                 return (1);
2831         case TCPS_SYN_SENT:
2832                 return (2);
2833         case TCPS_SYN_RCVD:
2834                 return (3);
2835         case TCPS_FIN_WAIT_1:
2836                 return (4);
2837         case TCPS_FIN_WAIT_2:
2838                 return (5);
2839         case TCPS_TIME_WAIT:
2840                 return (6);
2841         case TCPS_CLOSED:
2842                 return (7);
2843         case TCPS_CLOSE_WAIT:
2844                 return (8);
2845         case TCPS_LAST_ACK:
2846                 return (9);
2847         case TCPS_LISTEN:
2848                 return (10);
2849         case TCPS_CLOSING:
2850                 return (11);
2851         default:
2852                 /* No translation for TCPS_IDLE, TCPS_BOUND or anything else */
2853                 return (0);
2854         }
2855 }
2856 
2857 static void
2858 lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
2859 {
2860         int i, sl = 0;
2861         connf_t *connfp;
2862         conn_t *connp;
2863         netstack_t *ns;
2864         ip_stack_t *ipst;
2865 
2866         ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION);
2867         if (ipver == IPV4_VERSION) {
2868                 lxpr_uiobuf_printf(uiobuf, "  sl  local_address rem_address   "
2869                     "st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout "
2870                     "inode\n");
2871         } else {
2872                 lxpr_uiobuf_printf(uiobuf, "  sl  "
2873                     "local_address                         "
2874                     "remote_address                        "
2875                     "st tx_queue rx_queue tr tm->when retrnsmt   "
2876                     "uid  timeout inode\n");
2877         }
2878         /*
2879          * Due to differences between the Linux and illumos TCP
2880          * implementations, some data will be omitted from the output here.
2881          *
2882          * Valid fields:
2883          *  - local_address
2884          *  - remote_address
2885          *  - st
2886          *  - tx_queue
2887          *  - rx_queue
2888          *  - uid
2889          *  - inode
2890          *
2891          * Omitted/invalid fields
2892          *  - tr
2893          *  - tm->when
2894          *  - retrnsmt
2895          *  - timeout
2896          */
2897 
2898         ns = netstack_get_current();
2899         if (ns == NULL)
2900                 return;
2901         ipst = ns->netstack_ip;
2902 
2903         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2904                 connfp = &ipst->ips_ipcl_globalhash_fanout[i];
2905                 connp = NULL;
2906                 while ((connp =
2907                     ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) {
2908                         tcp_t *tcp;
2909                         vattr_t attr;
2910                         sonode_t *so = (sonode_t *)connp->conn_upper_handle;
2911                         vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
2912                         if (connp->conn_ipversion != ipver)
2913                                 continue;
2914                         tcp = connp->conn_tcp;
2915                         if (ipver == IPV4_VERSION) {
2916                                 lxpr_uiobuf_printf(uiobuf,
2917                                     "%4d: %08X:%04X %08X:%04X ",
2918                                     ++sl,
2919                                     connp->conn_laddr_v4,
2920                                     ntohs(connp->conn_lport),
2921                                     connp->conn_faddr_v4,
2922                                     ntohs(connp->conn_fport));
2923                         } else {
2924                                 lxpr_uiobuf_printf(uiobuf, "%4d: "
2925                                     "%08X%08X%08X%08X:%04X "
2926                                     "%08X%08X%08X%08X:%04X ",
2927                                     ++sl,
2928                                     connp->conn_laddr_v6.s6_addr32[0],
2929                                     connp->conn_laddr_v6.s6_addr32[1],
2930                                     connp->conn_laddr_v6.s6_addr32[2],
2931                                     connp->conn_laddr_v6.s6_addr32[3],
2932                                     ntohs(connp->conn_lport),
2933                                     connp->conn_faddr_v6.s6_addr32[0],
2934                                     connp->conn_faddr_v6.s6_addr32[1],
2935                                     connp->conn_faddr_v6.s6_addr32[2],
2936                                     connp->conn_faddr_v6.s6_addr32[3],
2937                                     ntohs(connp->conn_fport));
2938                         }
2939 
2940                         /* fetch the simulated inode for the socket */
2941                         if (vp == NULL ||
2942                             VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
2943                                 attr.va_nodeid = 0;
2944 
2945                         lxpr_uiobuf_printf(uiobuf,
2946                             "%02X %08X:%08X %02X:%08X %08X "
2947                             "%5u %8d %lu %d %p %u %u %u %u %d\n",
2948                             lxpr_convert_tcp_state(tcp->tcp_state),
2949                             tcp->tcp_rcv_cnt, tcp->tcp_unsent, /* rx/tx queue */
2950                             0, 0, /* tr, when */
2951                             0, /* per-connection rexmits aren't tracked today */
2952                             connp->conn_cred->cr_uid,
2953                             0, /* timeout */
2954                             /* inode + more */
2955                             (ino_t)attr.va_nodeid, 0, NULL, 0, 0, 0, 0, 0);
2956                 }
2957         }
2958         netstack_rele(ns);
2959 }
2960 
2961 /* ARGSUSED */
2962 static void
2963 lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2964 {
2965         lxpr_format_tcp(uiobuf, IPV4_VERSION);
2966 }
2967 
2968 /* ARGSUSED */
2969 static void
2970 lxpr_read_net_tcp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2971 {
2972         lxpr_format_tcp(uiobuf, IPV6_VERSION);
2973 }
2974 
2975 static void
2976 lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
2977 {
2978         int i, sl = 0;
2979         connf_t *connfp;
2980         conn_t *connp;
2981         netstack_t *ns;
2982         ip_stack_t *ipst;
2983 
2984         ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION);
2985         if (ipver == IPV4_VERSION) {
2986                 lxpr_uiobuf_printf(uiobuf, "  sl  local_address rem_address"
2987                     "   st tx_queue rx_queue tr tm->when retrnsmt   uid"
2988                     "  timeout inode ref pointer drops\n");
2989         } else {
2990                 lxpr_uiobuf_printf(uiobuf, "  sl  "
2991                     "local_address                         "
2992                     "remote_address                        "
2993                     "st tx_queue rx_queue tr tm->when retrnsmt   "
2994                     "uid  timeout inode ref pointer drops\n");
2995         }
2996         /*
2997          * Due to differences between the Linux and illumos UDP
2998          * implementations, some data will be omitted from the output here.
2999          *
3000          * Valid fields:
3001          *  - local_address
3002          *  - remote_address
3003          *  - st: limited
3004          *  - uid
3005          *
3006          * Omitted/invalid fields
3007          *  - tx_queue
3008          *  - rx_queue
3009          *  - tr
3010          *  - tm->when
3011          *  - retrnsmt
3012          *  - timeout
3013          *  - inode
3014          */
3015 
3016         ns = netstack_get_current();
3017         if (ns == NULL)
3018                 return;
3019         ipst = ns->netstack_ip;
3020 
3021         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
3022                 connfp = &ipst->ips_ipcl_globalhash_fanout[i];
3023                 connp = NULL;
3024                 while ((connp =
3025                     ipcl_get_next_conn(connfp, connp, IPCL_UDPCONN)) != NULL) {
3026                         udp_t *udp;
3027                         int state = 0;
3028                         vattr_t attr;
3029                         sonode_t *so = (sonode_t *)connp->conn_upper_handle;
3030                         vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
3031                         if (connp->conn_ipversion != ipver)
3032                                 continue;
3033                         udp = connp->conn_udp;
3034                         if (ipver == IPV4_VERSION) {
3035                                 lxpr_uiobuf_printf(uiobuf,
3036                                     "%4d: %08X:%04X %08X:%04X ",
3037                                     ++sl,
3038                                     connp->conn_laddr_v4,
3039                                     ntohs(connp->conn_lport),
3040                                     connp->conn_faddr_v4,
3041                                     ntohs(connp->conn_fport));
3042                         } else {
3043                                 lxpr_uiobuf_printf(uiobuf, "%4d: "
3044                                     "%08X%08X%08X%08X:%04X "
3045                                     "%08X%08X%08X%08X:%04X ",
3046                                     ++sl,
3047                                     connp->conn_laddr_v6.s6_addr32[0],
3048                                     connp->conn_laddr_v6.s6_addr32[1],
3049                                     connp->conn_laddr_v6.s6_addr32[2],
3050                                     connp->conn_laddr_v6.s6_addr32[3],
3051                                     ntohs(connp->conn_lport),
3052                                     connp->conn_faddr_v6.s6_addr32[0],
3053                                     connp->conn_faddr_v6.s6_addr32[1],
3054                                     connp->conn_faddr_v6.s6_addr32[2],
3055                                     connp->conn_faddr_v6.s6_addr32[3],
3056                                     ntohs(connp->conn_fport));
3057                         }
3058 
3059                         switch (udp->udp_state) {
3060                         case TS_UNBND:
3061                         case TS_IDLE:
3062                                 state = 7;
3063                                 break;
3064                         case TS_DATA_XFER:
3065                                 state = 1;
3066                                 break;
3067                         }
3068 
3069                         /* fetch the simulated inode for the socket */
3070                         if (vp == NULL ||
3071                             VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
3072                                 attr.va_nodeid = 0;
3073 
3074                         lxpr_uiobuf_printf(uiobuf,
3075                             "%02X %08X:%08X %02X:%08X %08X "
3076                             "%5u %8d %lu %d %p %d\n",
3077                             state,
3078                             0, 0, /* rx/tx queue */
3079                             0, 0, /* tr, when */
3080                             0, /* retrans */
3081                             connp->conn_cred->cr_uid,
3082                             0, /* timeout */
3083                             /* inode, ref, pointer, drops */
3084                             (ino_t)attr.va_nodeid, 0, NULL, 0);
3085                 }
3086         }
3087         netstack_rele(ns);
3088 }
3089 
3090 /* ARGSUSED */
3091 static void
3092 lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3093 {
3094         lxpr_format_udp(uiobuf, IPV4_VERSION);
3095 }
3096 
3097 /* ARGSUSED */
3098 static void
3099 lxpr_read_net_udp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3100 {
3101         lxpr_format_udp(uiobuf, IPV6_VERSION);
3102 }
3103 
3104 /* ARGSUSED */
3105 static void
3106 lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3107 {
3108         sonode_t *so;
3109         zoneid_t zoneid = getzoneid();
3110 
3111         lxpr_uiobuf_printf(uiobuf, "Num       RefCount Protocol Flags    Type "
3112             "St Inode Path\n");
3113 
3114         mutex_enter(&socklist.sl_lock);
3115         for (so = socklist.sl_list; so != NULL;
3116             so = _SOTOTPI(so)->sti_next_so) {
3117                 vnode_t *vp = so->so_vnode;
3118                 vattr_t attr;
3119                 sotpi_info_t *sti;
3120                 const char *name = NULL;
3121                 int status = 0;
3122                 int type = 0;
3123                 int flags = 0;
3124 
3125                 /* Only process active sonodes in this zone */
3126                 if (so->so_count == 0 || so->so_zoneid != zoneid)
3127                         continue;
3128 
3129                 /*
3130                  * Grab the inode, if possible.
3131                  * This must be done before entering so_lock.
3132                  */
3133                 if (vp == NULL ||
3134                     VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
3135                         attr.va_nodeid = 0;
3136 
3137                 mutex_enter(&so->so_lock);
3138                 sti = _SOTOTPI(so);
3139 
3140                 if (sti->sti_laddr_sa != NULL &&
3141                     sti->sti_laddr_len > 0) {
3142                         name = sti->sti_laddr_sa->sa_data;
3143                 } else if (sti->sti_faddr_sa != NULL &&
3144                     sti->sti_faddr_len > 0) {
3145                         name = sti->sti_faddr_sa->sa_data;
3146                 }
3147 
3148                 /*
3149                  * Derived from enum values in Linux kernel source:
3150                  * include/uapi/linux/net.h
3151                  */
3152                 if ((so->so_state & SS_ISDISCONNECTING) != 0) {
3153                         status = 4;
3154                 } else if ((so->so_state & SS_ISCONNECTING) != 0) {
3155                         status = 2;
3156                 } else if ((so->so_state & SS_ISCONNECTED) != 0) {
3157                         status = 3;
3158                 } else {
3159                         status = 1;
3160                         /* Add ACC flag for stream-type server sockets */
3161                         if (so->so_type != SOCK_DGRAM &&
3162                             sti->sti_laddr_sa != NULL)
3163                                 flags |= 0x10000;
3164                 }
3165 
3166                 /* Convert to Linux type */
3167                 switch (so->so_type) {
3168                 case SOCK_DGRAM:
3169                         type = 2;
3170                         break;
3171                 case SOCK_SEQPACKET:
3172                         type = 5;
3173                         break;
3174                 default:
3175                         type = 1;
3176                 }
3177 
3178                 lxpr_uiobuf_printf(uiobuf, "%p: %08X %08X %08X %04X %02X %5llu",
3179                     so,
3180                     so->so_count,
3181                     0, /* proto, always 0 */
3182                     flags,
3183                     type,
3184                     status,
3185                     (ino_t)attr.va_nodeid);
3186 
3187                 /*
3188                  * Due to shortcomings in the abstract socket emulation, they
3189                  * cannot be properly represented here (as @<path>).
3190                  *
3191                  * This will be the case until they are better implemented.
3192                  */
3193                 if (name != NULL)
3194                         lxpr_uiobuf_printf(uiobuf, " %s\n", name);
3195                 else
3196                         lxpr_uiobuf_printf(uiobuf, "\n");
3197                 mutex_exit(&so->so_lock);
3198         }
3199         mutex_exit(&socklist.sl_lock);
3200 }
3201 
3202 /*
3203  * lxpr_read_kmsg(): read the contents of the kernel message queue. We
3204  * translate this into the reception of console messages for this zone; each
3205  * read copies out a single zone console message, or blocks until the next one
3206  * is produced, unless we're open non-blocking, in which case we return after
3207  * 1ms.
3208  */
3209 
3210 #define LX_KMSG_PRI     "<0>"
3211 
3212 static void
3213 lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf, ldi_handle_t lh)
3214 {
3215         mblk_t          *mp;
3216         timestruc_t     to;
3217         timestruc_t     *tp = NULL;
3218 
3219         ASSERT(lxpnp->lxpr_type == LXPR_KMSG);
3220 
3221         if (lxpr_uiobuf_nonblock(uiobuf)) {
3222                 to.tv_sec = 0;
3223                 to.tv_nsec = 1000000; /* 1msec */
3224                 tp = &to;
3225         }
3226 
3227         if (ldi_getmsg(lh, &mp, tp) == 0) {
3228                 /*
3229                  * lx procfs doesn't like successive reads to the same file
3230                  * descriptor unless we do an explicit rewind each time.
3231                  */
3232                 lxpr_uiobuf_seek(uiobuf, 0);
3233 
3234                 lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
3235                     mp->b_cont->b_rptr);
3236 
3237                 freemsg(mp);
3238         }
3239 }
3240 
3241 /*
3242  * lxpr_read_loadavg(): read the contents of the "loadavg" file.  We do just
3243  * enough for uptime and other simple lxproc readers to work
3244  */
3245 extern int nthread;
3246 
3247 static void
3248 lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3249 {
3250         ulong_t avenrun1;
3251         ulong_t avenrun5;
3252         ulong_t avenrun15;
3253         ulong_t avenrun1_cs;
3254         ulong_t avenrun5_cs;
3255         ulong_t avenrun15_cs;
3256         int loadavg[3];
3257         int *loadbuf;
3258         cpupart_t *cp;
3259         zone_t *zone = LXPTOZ(lxpnp);
3260 
3261         uint_t nrunnable = 0;
3262         rctl_qty_t nlwps;
3263 
3264         ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
3265 
3266         mutex_enter(&cpu_lock);
3267 
3268         /*
3269          * Need to add up values over all CPU partitions. If pools are active,
3270          * only report the values of the zone's partition, which by definition
3271          * includes the current CPU.
3272          */
3273         if (pool_pset_enabled()) {
3274                 psetid_t psetid = zone_pset_get(curproc->p_zone);
3275 
3276                 ASSERT(curproc->p_zone != &zone0);
3277                 cp = CPU->cpu_part;
3278 
3279                 nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
3280                 (void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
3281                 loadbuf = &loadavg[0];
3282         } else {
3283                 cp = cp_list_head;
3284                 do {
3285                         nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
3286                 } while ((cp = cp->cp_next) != cp_list_head);
3287 
3288                 loadbuf = zone == global_zone ?
3289                     &avenrun[0] : zone->zone_avenrun;
3290         }
3291 
3292         /*
3293          * If we're in the non-global zone, we'll report the total number of
3294          * LWPs in the zone for the "nproc" parameter of /proc/loadavg,
3295          * otherwise will just use nthread (which will include kernel threads,
3296          * but should be good enough for lxproc).
3297          */
3298         nlwps = zone == global_zone ? nthread : zone->zone_nlwps;
3299 
3300         mutex_exit(&cpu_lock);
3301 
3302         avenrun1 = loadbuf[0] >> FSHIFT;
3303         avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
3304         avenrun5 = loadbuf[1] >> FSHIFT;
3305         avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
3306         avenrun15 = loadbuf[2] >> FSHIFT;
3307         avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
3308 
3309         lxpr_uiobuf_printf(uiobuf,
3310             "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
3311             avenrun1, avenrun1_cs,
3312             avenrun5, avenrun5_cs,
3313             avenrun15, avenrun15_cs,
3314             nrunnable, nlwps, 0);
3315 }
3316 
3317 /*
3318  * lxpr_read_meminfo(): read the contents of the "meminfo" file.
3319  */
3320 static void
3321 lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3322 {
3323         zone_t *zone = LXPTOZ(lxpnp);
3324         int global = zone == global_zone;
3325         long total_mem, free_mem, total_swap, used_swap;
3326 
3327         ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
3328 
3329         if (global || zone->zone_phys_mem_ctl == UINT64_MAX) {
3330                 total_mem = physmem * PAGESIZE;
3331                 free_mem = freemem * PAGESIZE;
3332         } else {
3333                 total_mem = zone->zone_phys_mem_ctl;
3334                 free_mem = zone->zone_phys_mem_ctl - zone->zone_phys_mem;
3335         }
3336 
3337         if (global || zone->zone_max_swap_ctl == UINT64_MAX) {
3338                 total_swap = k_anoninfo.ani_max * PAGESIZE;
3339                 used_swap = k_anoninfo.ani_phys_resv * PAGESIZE;
3340         } else {
3341                 mutex_enter(&zone->zone_mem_lock);
3342                 total_swap = zone->zone_max_swap_ctl;
3343                 used_swap = zone->zone_max_swap;
3344                 mutex_exit(&zone->zone_mem_lock);
3345         }
3346 
3347         lxpr_uiobuf_printf(uiobuf,
3348             "MemTotal:  %8lu kB\n"
3349             "MemFree:   %8lu kB\n"
3350             "MemShared: %8u kB\n"
3351             "Buffers:   %8u kB\n"
3352             "Cached:    %8u kB\n"
3353             "SwapCached:%8u kB\n"
3354             "Active:    %8u kB\n"
3355             "Inactive:  %8u kB\n"
3356             "HighTotal: %8u kB\n"
3357             "HighFree:  %8u kB\n"
3358             "LowTotal:  %8u kB\n"
3359             "LowFree:   %8u kB\n"
3360             "SwapTotal: %8lu kB\n"
3361             "SwapFree:  %8lu kB\n",
3362             btok(total_mem),                            /* MemTotal */
3363             btok(free_mem),                             /* MemFree */
3364             0,                                          /* MemShared */
3365             0,                                          /* Buffers */
3366             0,                                          /* Cached */
3367             0,                                          /* SwapCached */
3368             0,                                          /* Active */
3369             0,                                          /* Inactive */
3370             0,                                          /* HighTotal */
3371             0,                                          /* HighFree */
3372             btok(total_mem),                            /* LowTotal */
3373             btok(free_mem),                             /* LowFree */
3374             btok(total_swap),                           /* SwapTotal */
3375             btok(total_swap - used_swap));              /* SwapFree */
3376 }
3377 
3378 /*
3379  * lxpr_read_mounts():
3380  */
3381 /* ARGSUSED */
3382 static void
3383 lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3384 {
3385         struct vfs *vfsp;
3386         struct vfs *vfslist;
3387         zone_t *zone = LXPTOZ(lxpnp);
3388         struct print_data {
3389                 refstr_t *vfs_mntpt;
3390                 refstr_t *vfs_resource;
3391                 uint_t vfs_flag;
3392                 int vfs_fstype;
3393                 struct print_data *next;
3394         } *print_head = NULL;
3395         struct print_data **print_tail = &print_head;
3396         struct print_data *printp;
3397 
3398         vfs_list_read_lock();
3399 
3400         if (zone == global_zone) {
3401                 vfsp = vfslist = rootvfs;
3402         } else {
3403                 vfsp = vfslist = zone->zone_vfslist;
3404                 /*
3405                  * If the zone has a root entry, it will be the first in
3406                  * the list.  If it doesn't, we conjure one up.
3407                  */
3408                 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
3409                     zone->zone_rootpath) != 0) {
3410                         struct vfs *tvfsp;
3411                         /*
3412                          * The root of the zone is not a mount point.  The vfs
3413                          * we want to report is that of the zone's root vnode.
3414                          */
3415                         tvfsp = zone->zone_rootvp->v_vfsp;
3416 
3417                         lxpr_uiobuf_printf(uiobuf,
3418                             "/ / %s %s 0 0\n",
3419                             vfssw[tvfsp->vfs_fstype].vsw_name,
3420                             tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
3421 
3422                 }
3423                 if (vfslist == NULL) {
3424                         vfs_list_unlock();
3425                         return;
3426                 }
3427         }
3428 
3429         /*
3430          * Later on we have to do a lookupname, which can end up causing
3431          * another vfs_list_read_lock() to be called. Which can lead to a
3432          * deadlock. To avoid this, we extract the data we need into a local
3433          * list, then we can run this list without holding vfs_list_read_lock()
3434          * We keep the list in the same order as the vfs_list
3435          */
3436         do {
3437                 /* Skip mounts we shouldn't show */
3438                 if (vfsp->vfs_flag & VFS_NOMNTTAB) {
3439                         goto nextfs;
3440                 }
3441 
3442                 printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
3443                 refstr_hold(vfsp->vfs_mntpt);
3444                 printp->vfs_mntpt = vfsp->vfs_mntpt;
3445                 refstr_hold(vfsp->vfs_resource);
3446                 printp->vfs_resource = vfsp->vfs_resource;
3447                 printp->vfs_flag = vfsp->vfs_flag;
3448                 printp->vfs_fstype = vfsp->vfs_fstype;
3449                 printp->next = NULL;
3450 
3451                 *print_tail = printp;
3452                 print_tail = &printp->next;
3453 
3454 nextfs:
3455                 vfsp = (zone == global_zone) ?
3456                     vfsp->vfs_next : vfsp->vfs_zone_next;
3457 
3458         } while (vfsp != vfslist);
3459 
3460         vfs_list_unlock();
3461 
3462         /*
3463          * now we can run through what we've extracted without holding
3464          * vfs_list_read_lock()
3465          */
3466         printp = print_head;
3467         while (printp != NULL) {
3468                 struct print_data *printp_next;
3469                 const char *resource;
3470                 char *mntpt;
3471                 struct vnode *vp;
3472                 int error;
3473 
3474                 mntpt = (char *)refstr_value(printp->vfs_mntpt);
3475                 resource = refstr_value(printp->vfs_resource);
3476 
3477                 if (mntpt != NULL && mntpt[0] != '\0')
3478                         mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
3479                 else
3480                         mntpt = "-";
3481 
3482                 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
3483 
3484                 if (error != 0)
3485                         goto nextp;
3486 
3487                 if (!(vp->v_flag & VROOT)) {
3488                         VN_RELE(vp);
3489                         goto nextp;
3490                 }
3491                 VN_RELE(vp);
3492 
3493                 if (resource != NULL && resource[0] != '\0') {
3494                         if (resource[0] == '/') {
3495                                 resource = ZONE_PATH_VISIBLE(resource, zone) ?
3496                                     ZONE_PATH_TRANSLATE(resource, zone) :
3497                                     mntpt;
3498                         }
3499                 } else {
3500                         resource = "-";
3501                 }
3502 
3503                 lxpr_uiobuf_printf(uiobuf,
3504                     "%s %s %s %s 0 0\n",
3505                     resource, mntpt, vfssw[printp->vfs_fstype].vsw_name,
3506                     printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
3507 
3508 nextp:
3509                 printp_next = printp->next;
3510                 refstr_rele(printp->vfs_mntpt);
3511                 refstr_rele(printp->vfs_resource);
3512                 kmem_free(printp, sizeof (*printp));
3513                 printp = printp_next;
3514 
3515         }
3516 }
3517 
3518 /*
3519  * lxpr_read_partitions():
3520  *
3521  * Over the years, /proc/partitions has been made considerably smaller -- to
3522  * the point that it really is only major number, minor number, number of
3523  * blocks (which we report as 0), and partition name.
3524  *
3525  * We support this because some things want to see it to make sense of
3526  * /proc/diskstats, and also because "fdisk -l" and a few other things look
3527  * here to find all disks on the system.
3528  */
3529 /* ARGSUSED */
3530 static void
3531 lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3532 {
3533 
3534         kstat_t *ksr;
3535         kstat_t ks0;
3536         int nidx, num, i;
3537         size_t sidx, size;
3538         zfs_cmd_t *zc;
3539         nvlist_t *nv = NULL;
3540         nvpair_t *elem = NULL;
3541         lxpr_mnt_t *mnt;
3542         lxpr_zfs_iter_t zfsi;
3543 
3544         ASSERT(lxpnp->lxpr_type == LXPR_PARTITIONS);
3545 
3546         ks0.ks_kid = 0;
3547         ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
3548 
3549         if (ksr == NULL)
3550                 return;
3551 
3552         lxpr_uiobuf_printf(uiobuf, "major minor  #blocks  name\n\n");
3553 
3554         for (i = 1; i < nidx; i++) {
3555                 kstat_t *ksp = &ksr[i];
3556                 kstat_io_t *kip;
3557 
3558                 if (ksp->ks_type != KSTAT_TYPE_IO ||
3559                     strcmp(ksp->ks_class, "disk") != 0)
3560                         continue;
3561 
3562                 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE,
3563                     &size, &num)) == NULL)
3564                         continue;
3565 
3566                 if (size < sizeof (kstat_io_t)) {
3567                         kmem_free(kip, size);
3568                         continue;
3569                 }
3570 
3571                 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d %s\n",
3572                     mod_name_to_major(ksp->ks_module),
3573                     ksp->ks_instance, 0, ksp->ks_name);
3574 
3575                 kmem_free(kip, size);
3576         }
3577 
3578         kmem_free(ksr, sidx);
3579 
3580         /* If we never got to open the zfs LDI, then stop now. */
3581         mnt = (lxpr_mnt_t *)lxpnp->lxpr_vnode->v_vfsp->vfs_data;
3582         if (mnt->lxprm_zfs_isopen == B_FALSE)
3583                 return;
3584 
3585         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
3586 
3587         if (lxpr_zfs_list_pools(mnt, zc, &nv) != 0)
3588                 goto out;
3589 
3590         while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) {
3591                 char *pool = nvpair_name(elem);
3592 
3593                 bzero(&zfsi, sizeof (lxpr_zfs_iter_t));
3594                 while (lxpr_zfs_next_zvol(mnt, pool, zc, &zfsi) == 0) {
3595                         major_t major;
3596                         minor_t minor;
3597                         if (lxpr_zvol_dev(mnt, zc->zc_name, &major, &minor)
3598                             != 0)
3599                                 continue;
3600 
3601                         lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d zvol/dsk/%s\n",
3602                             major, minor, 0, zc->zc_name);
3603                 }
3604         }
3605 
3606         nvlist_free(nv);
3607 out:
3608         kmem_free(zc, sizeof (zfs_cmd_t));
3609 }
3610 
3611 /*
3612  * lxpr_read_diskstats():
3613  *
3614  * See the block comment above the per-device output-generating line for the
3615  * details of the format.
3616  */
3617 /* ARGSUSED */
3618 static void
3619 lxpr_read_diskstats(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3620 {
3621         kstat_t *ksr;
3622         kstat_t ks0;
3623         int nidx, num, i;
3624         size_t sidx, size;
3625 
3626         ASSERT(lxpnp->lxpr_type == LXPR_DISKSTATS);
3627 
3628         ks0.ks_kid = 0;
3629         ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
3630 
3631         if (ksr == NULL)
3632                 return;
3633 
3634         for (i = 1; i < nidx; i++) {
3635                 kstat_t *ksp = &ksr[i];
3636                 kstat_io_t *kip;
3637 
3638                 if (ksp->ks_type != KSTAT_TYPE_IO ||
3639                     strcmp(ksp->ks_class, "disk") != 0)
3640                         continue;
3641 
3642                 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE,
3643                     &size, &num)) == NULL)
3644                         continue;
3645 
3646                 if (size < sizeof (kstat_io_t)) {
3647                         kmem_free(kip, size);
3648                         continue;
3649                 }
3650 
3651                 /*
3652                  * /proc/diskstats is defined to have one line of output for
3653                  * each block device, with each line containing the following
3654                  * 14 fields:
3655                  *
3656                  *      1 - major number
3657                  *      2 - minor mumber
3658                  *      3 - device name
3659                  *      4 - reads completed successfully
3660                  *      5 - reads merged
3661                  *      6 - sectors read
3662                  *      7 - time spent reading (ms)
3663                  *      8 - writes completed
3664                  *      9 - writes merged
3665                  *      10 - sectors written
3666                  *      11 - time spent writing (ms)
3667                  *      12 - I/Os currently in progress
3668                  *      13 - time spent doing I/Os (ms)
3669                  *      14 - weighted time spent doing I/Os (ms)
3670                  *
3671                  * One small hiccup:  we don't actually keep track of time
3672                  * spent reading vs. time spent writing -- we keep track of
3673                  * time waiting vs. time actually performing I/O.  While we
3674                  * could divide the total time by the I/O mix (making the
3675                  * obviously wrong assumption that I/O operations all take the
3676                  * same amount of time), this has the undesirable side-effect
3677                  * of moving backwards.  Instead, we report the total time
3678                  * (read + write) for all three stats (read, write, total).
3679                  * This is also a lie of sorts, but it should be more
3680                  * immediately clear to the user that reads and writes are
3681                  * each being double-counted as the other.
3682                  */
3683                 lxpr_uiobuf_printf(uiobuf, "%4d %7d %s "
3684                     "%llu %llu %llu %llu "
3685                     "%llu %llu %llu %llu "
3686                     "%llu %llu %llu\n",
3687                     mod_name_to_major(ksp->ks_module),
3688                     ksp->ks_instance, ksp->ks_name,
3689                     (uint64_t)kip->reads, 0LL,
3690                     kip->nread / (uint64_t)LXPR_SECTOR_SIZE,
3691                     (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3692                     (uint64_t)kip->writes, 0LL,
3693                     kip->nwritten / (uint64_t)LXPR_SECTOR_SIZE,
3694                     (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3695                     (uint64_t)(kip->rcnt + kip->wcnt),
3696                     (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3697                     (kip->rlentime + kip->wlentime) /
3698                     (uint64_t)(NANOSEC / MILLISEC));
3699 
3700                 kmem_free(kip, size);
3701         }
3702 
3703         kmem_free(ksr, sidx);
3704 }
3705 
3706 /*
3707  * lxpr_read_version(): read the contents of the "version" file.
3708  */
3709 /* ARGSUSED */
3710 static void
3711 lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3712 {
3713         lx_zone_data_t *lxzd = ztolxzd(LXPTOZ(lxpnp));
3714         lx_proc_data_t *lxpd = ptolxproc(curproc);
3715         const char *release = lxzd->lxzd_kernel_release;
3716         const char *version = lxzd->lxzd_kernel_version;
3717 
3718         /* Use per-process overrides, if specified */
3719         if (lxpd != NULL && lxpd->l_uname_release[0] != '\0') {
3720                 release = lxpd->l_uname_release;
3721         }
3722         if (lxpd != NULL && lxpd->l_uname_version[0] != '\0') {
3723                 version = lxpd->l_uname_version;
3724         }
3725 
3726         lxpr_uiobuf_printf(uiobuf,
3727             "%s version %s (%s version %d.%d.%d) %s\n",
3728             LX_UNAME_SYSNAME, release,
3729 #if defined(__GNUC__)
3730             "gcc", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__,
3731 #else
3732             "cc", 1, 0, 0,
3733 #endif
3734             version);
3735 }
3736 
3737 /*
3738  * lxpr_read_stat(): read the contents of the "stat" file.
3739  *
3740  */
3741 /* ARGSUSED */
3742 static void
3743 lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3744 {
3745         cpu_t *cp, *cpstart;
3746         int pools_enabled;
3747         ulong_t idle_cum = 0;
3748         ulong_t sys_cum  = 0;
3749         ulong_t user_cum = 0;
3750         ulong_t irq_cum = 0;
3751         ulong_t cpu_nrunnable_cum = 0;
3752         ulong_t w_io_cum = 0;
3753 
3754         ulong_t pgpgin_cum    = 0;
3755         ulong_t pgpgout_cum   = 0;
3756         ulong_t pgswapout_cum = 0;
3757         ulong_t pgswapin_cum  = 0;
3758         ulong_t intr_cum = 0;
3759         ulong_t pswitch_cum = 0;
3760         ulong_t forks_cum = 0;
3761         hrtime_t msnsecs[NCMSTATES];
3762         /* is the emulated release > 2.4 */
3763         boolean_t newer_than24 = lx_kern_release_cmp(LXPTOZ(lxpnp), "2.4") > 0;
3764         /* temporary variable since scalehrtime modifies data in place */
3765         hrtime_t tmptime;
3766 
3767         ASSERT(lxpnp->lxpr_type == LXPR_STAT);
3768 
3769         mutex_enter(&cpu_lock);
3770         pools_enabled = pool_pset_enabled();
3771 
3772         /* Calculate cumulative stats */
3773         cp = cpstart = CPU->cpu_part->cp_cpulist;
3774         do {
3775                 int i;
3776 
3777                 /*
3778                  * Don't count CPUs that aren't even in the system
3779                  * or aren't up yet.
3780                  */
3781                 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
3782                         continue;
3783                 }
3784 
3785                 get_cpu_mstate(cp, msnsecs);
3786 
3787                 idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
3788                 sys_cum  += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
3789                 user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
3790 
3791                 pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
3792                 pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
3793                 pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
3794                 pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
3795 
3796 
3797                 if (newer_than24) {
3798                         cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable;
3799                         w_io_cum += CPU_STATS(cp, sys.iowait);
3800                         for (i = 0; i < NCMSTATES; i++) {
3801                                 tmptime = cp->cpu_intracct[i];
3802                                 scalehrtime(&tmptime);
3803                                 irq_cum += NSEC_TO_TICK(tmptime);
3804                         }
3805                 }
3806 
3807                 for (i = 0; i < PIL_MAX; i++)
3808                         intr_cum += CPU_STATS(cp, sys.intr[i]);
3809 
3810                 pswitch_cum += CPU_STATS(cp, sys.pswitch);
3811                 forks_cum += CPU_STATS(cp, sys.sysfork);
3812                 forks_cum += CPU_STATS(cp, sys.sysvfork);
3813 
3814                 if (pools_enabled)
3815                         cp = cp->cpu_next_part;
3816                 else
3817                         cp = cp->cpu_next;
3818         } while (cp != cpstart);
3819 
3820         if (newer_than24) {
3821                 lxpr_uiobuf_printf(uiobuf,
3822                     "cpu %lu %lu %lu %lu %lu %lu %lu\n",
3823                     user_cum, 0L, sys_cum, idle_cum, 0L, irq_cum, 0L);
3824         } else {
3825                 lxpr_uiobuf_printf(uiobuf,
3826                     "cpu %lu %lu %lu %lu\n",
3827                     user_cum, 0L, sys_cum, idle_cum);
3828         }
3829 
3830         /* Do per processor stats */
3831         do {
3832                 int i;
3833 
3834                 ulong_t idle_ticks;
3835                 ulong_t sys_ticks;
3836                 ulong_t user_ticks;
3837                 ulong_t irq_ticks = 0;
3838 
3839                 /*
3840                  * Don't count CPUs that aren't even in the system
3841                  * or aren't up yet.
3842                  */
3843                 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
3844                         continue;
3845                 }
3846 
3847                 get_cpu_mstate(cp, msnsecs);
3848 
3849                 idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
3850                 sys_ticks  = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
3851                 user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
3852 
3853                 for (i = 0; i < NCMSTATES; i++) {
3854                         tmptime = cp->cpu_intracct[i];
3855                         scalehrtime(&tmptime);
3856                         irq_ticks += NSEC_TO_TICK(tmptime);
3857                 }
3858 
3859                 if (newer_than24) {
3860                         lxpr_uiobuf_printf(uiobuf,
3861                             "cpu%d %lu %lu %lu %lu %lu %lu %lu\n",
3862                             cp->cpu_id, user_ticks, 0L, sys_ticks, idle_ticks,
3863                             0L, irq_ticks, 0L);
3864                 } else {
3865                         lxpr_uiobuf_printf(uiobuf,
3866                             "cpu%d %lu %lu %lu %lu\n",
3867                             cp->cpu_id,
3868                             user_ticks, 0L, sys_ticks, idle_ticks);
3869                 }
3870 
3871                 if (pools_enabled)
3872                         cp = cp->cpu_next_part;
3873                 else
3874                         cp = cp->cpu_next;
3875         } while (cp != cpstart);
3876 
3877         mutex_exit(&cpu_lock);
3878 
3879         if (newer_than24) {
3880                 lxpr_uiobuf_printf(uiobuf,
3881                     "page %lu %lu\n"
3882                     "swap %lu %lu\n"
3883                     "intr %lu\n"
3884                     "ctxt %lu\n"
3885                     "btime %lu\n"
3886                     "processes %lu\n"
3887                     "procs_running %lu\n"
3888                     "procs_blocked %lu\n",
3889                     pgpgin_cum, pgpgout_cum,
3890                     pgswapin_cum, pgswapout_cum,
3891                     intr_cum,
3892                     pswitch_cum,
3893                     boot_time,
3894                     forks_cum,
3895                     cpu_nrunnable_cum,
3896                     w_io_cum);
3897         } else {
3898                 lxpr_uiobuf_printf(uiobuf,
3899                     "page %lu %lu\n"
3900                     "swap %lu %lu\n"
3901                     "intr %lu\n"
3902                     "ctxt %lu\n"
3903                     "btime %lu\n"
3904                     "processes %lu\n",
3905                     pgpgin_cum, pgpgout_cum,
3906                     pgswapin_cum, pgswapout_cum,
3907                     intr_cum,
3908                     pswitch_cum,
3909                     boot_time,
3910                     forks_cum);
3911         }
3912 }
3913 
3914 /*
3915  * lxpr_read_swaps():
3916  *
3917  * We don't support swap files or partitions, but some programs like to look
3918  * here just to check we have some swap on the system, so we lie and show
3919  * our entire swap cap as one swap partition.
3920  *
3921  * It is important to use formatting identical to the Linux implementation
3922  * so that consumers do not break. See swap_show() in mm/swapfile.c.
3923  */
3924 /* ARGSUSED */
3925 static void
3926 lxpr_read_swaps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3927 {
3928         zone_t *zone = curzone;
3929         uint64_t totswap, usedswap;
3930 
3931         mutex_enter(&zone->zone_mem_lock);
3932         /* Uses units of 1 kb (2^10). */
3933         totswap = zone->zone_max_swap_ctl >> 10;
3934         usedswap = zone->zone_max_swap >> 10;
3935         mutex_exit(&zone->zone_mem_lock);
3936 
3937         lxpr_uiobuf_printf(uiobuf,
3938             "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
3939         lxpr_uiobuf_printf(uiobuf, "%-40s%s\t%llu\t%llu\t%d\n",
3940             "/dev/swap", "partition", totswap, usedswap, -1);
3941 }
3942 
3943 /*
3944  * inotify tunables exported via /proc.
3945  */
3946 extern int inotify_maxevents;
3947 extern int inotify_maxinstances;
3948 extern int inotify_maxwatches;
3949 
3950 static void
3951 lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *lxpnp,
3952     lxpr_uiobuf_t *uiobuf)
3953 {
3954         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS);
3955         lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxevents);
3956 }
3957 
3958 static void
3959 lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *lxpnp,
3960     lxpr_uiobuf_t *uiobuf)
3961 {
3962         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES);
3963         lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxinstances);
3964 }
3965 
3966 static void
3967 lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *lxpnp,
3968     lxpr_uiobuf_t *uiobuf)
3969 {
3970         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES);
3971         lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxwatches);
3972 }
3973 
3974 static void
3975 lxpr_read_sys_kernel_caplcap(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3976 {
3977         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_CAPLCAP);
3978         lxpr_uiobuf_printf(uiobuf, "%d\n", LX_CAP_MAX_VALID);
3979 }
3980 
3981 static void
3982 lxpr_read_sys_kernel_corepatt(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3983 {
3984         zone_t *zone = curproc->p_zone;
3985         struct core_globals *cg;
3986         refstr_t *rp;
3987         corectl_path_t *ccp;
3988         char tr[MAXPATHLEN];
3989 
3990         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT);
3991 
3992         cg = zone_getspecific(core_zone_key, zone);
3993         ASSERT(cg != NULL);
3994 
3995         /* If core dumps are disabled, return an empty string. */
3996         if ((cg->core_options & CC_PROCESS_PATH) == 0) {
3997                 lxpr_uiobuf_printf(uiobuf, "\n");
3998                 return;
3999         }
4000 
4001         ccp = cg->core_default_path;
4002         mutex_enter(&ccp->ccp_mtx);
4003         if ((rp = ccp->ccp_path) != NULL)
4004                 refstr_hold(rp);
4005         mutex_exit(&ccp->ccp_mtx);
4006 
4007         if (rp == NULL) {
4008                 lxpr_uiobuf_printf(uiobuf, "\n");
4009                 return;
4010         }
4011 
4012         bzero(tr, sizeof (tr));
4013         if (lxpr_core_path_s2l(refstr_value(rp), tr, sizeof (tr)) != 0) {
4014                 refstr_rele(rp);
4015                 lxpr_uiobuf_printf(uiobuf, "\n");
4016                 return;
4017         }
4018 
4019         refstr_rele(rp);
4020         lxpr_uiobuf_printf(uiobuf, "%s\n", tr);
4021 }
4022 
4023 static void
4024 lxpr_read_sys_kernel_hostname(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4025 {
4026         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_HOSTNAME);
4027         lxpr_uiobuf_printf(uiobuf, "%s\n", uts_nodename());
4028 }
4029 
4030 static void
4031 lxpr_read_sys_kernel_msgmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4032 {
4033         rctl_qty_t val;
4034 
4035         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_MSGMNI);
4036 
4037         mutex_enter(&curproc->p_lock);
4038         val = rctl_enforced_value(rc_zone_msgmni,
4039             curproc->p_zone->zone_rctls, curproc);
4040         mutex_exit(&curproc->p_lock);
4041 
4042         lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
4043 }
4044 
4045 static void
4046 lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4047 {
4048         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_NGROUPS_MAX);
4049         lxpr_uiobuf_printf(uiobuf, "%d\n", ngroups_max);
4050 }
4051 
4052 static void
4053 lxpr_read_sys_kernel_osrel(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4054 {
4055         lx_zone_data_t *br_data;
4056 
4057         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_OSREL);
4058         br_data = ztolxzd(curproc->p_zone);
4059         if (curproc->p_zone->zone_brand == &lx_brand) {
4060                 lxpr_uiobuf_printf(uiobuf, "%s\n",
4061                     br_data->lxzd_kernel_version);
4062         } else {
4063                 lxpr_uiobuf_printf(uiobuf, "\n");
4064         }
4065 }
4066 
4067 static void
4068 lxpr_read_sys_kernel_pid_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4069 {
4070         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_PID_MAX);
4071         lxpr_uiobuf_printf(uiobuf, "%d\n", maxpid);
4072 }
4073 
4074 static void
4075 lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4076 {
4077         /*
4078          * This file isn't documented on the Linux proc(5) man page but
4079          * according to the blog of the author of systemd/journald (the
4080          * consumer), he says:
4081          *    boot_id: A random ID that is regenerated on each boot. As such it
4082          *    can be used to identify the local machine's current boot. It's
4083          *    universally available on any recent Linux kernel. It's a good and
4084          *    safe choice if you need to identify a specific boot on a specific
4085          *    booted kernel.
4086          *
4087          * We'll just generate a random ID if necessary. On Linux the format
4088          * appears to resemble a uuid but since it is not documented to be a
4089          * uuid, we don't worry about that.
4090          */
4091         lx_zone_data_t *br_data;
4092 
4093         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RAND_BOOTID);
4094 
4095         if (curproc->p_zone->zone_brand != &lx_brand) {
4096                 lxpr_uiobuf_printf(uiobuf, "0\n");
4097                 return;
4098         }
4099 
4100         br_data = ztolxzd(curproc->p_zone);
4101         if (br_data->lxzd_bootid[0] == '\0') {
4102                 extern int getrandom(void *, size_t, int);
4103                 int i;
4104 
4105                 for (i = 0; i < 5; i++) {
4106                         u_longlong_t n;
4107                         char s[32];
4108 
4109                         (void) random_get_bytes((uint8_t *)&n, sizeof (n));
4110                         switch (i) {
4111                         case 0: (void) snprintf(s, sizeof (s), "%08llx", n);
4112                                 s[8] = '\0';
4113                                 break;
4114                         case 4: (void) snprintf(s, sizeof (s), "%012llx", n);
4115                                 s[12] = '\0';
4116                                 break;
4117                         default: (void) snprintf(s, sizeof (s), "%04llx", n);
4118                                 s[4] = '\0';
4119                                 break;
4120                         }
4121                         if (i > 0)
4122                                 strlcat(br_data->lxzd_bootid, "-",
4123                                     sizeof (br_data->lxzd_bootid));
4124                         strlcat(br_data->lxzd_bootid, s,
4125                             sizeof (br_data->lxzd_bootid));
4126                 }
4127         }
4128 
4129         lxpr_uiobuf_printf(uiobuf, "%s\n", br_data->lxzd_bootid);
4130 }
4131 
4132 static void
4133 lxpr_read_sys_kernel_sem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4134 {
4135         proc_t *pp = curproc;
4136         rctl_qty_t vmsl, vopm, vmni, vmns;
4137 
4138         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SEM);
4139 
4140         mutex_enter(&pp->p_lock);
4141         vmsl = rctl_enforced_value(rc_process_semmsl, pp->p_rctls, pp);
4142         vopm = rctl_enforced_value(rc_process_semopm, pp->p_rctls, pp);
4143         vmni = rctl_enforced_value(rc_zone_semmni, pp->p_zone->zone_rctls, pp);
4144         mutex_exit(&pp->p_lock);
4145         vmns = vmsl * vmni;
4146         if (vmns < vmsl || vmns < vmni) {
4147                 vmns = ULLONG_MAX;
4148         }
4149         /*
4150          * Format: semmsl semmns semopm semmni
4151          *  - semmsl: Limit semaphores in a sempahore set.
4152          *  - semmns: Limit semaphores in all semaphore sets
4153          *  - semopm: Limit operations in a single semop call
4154          *  - semmni: Limit number of semaphore sets
4155          */
4156         lxpr_uiobuf_printf(uiobuf, "%llu\t%llu\t%llu\t%llu\n",
4157             vmsl, vmns, vopm, vmni);
4158 }
4159 
4160 static void
4161 lxpr_read_sys_kernel_shmmax(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4162 {
4163         rctl_qty_t val;
4164 
4165         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMAX);
4166 
4167         mutex_enter(&curproc->p_lock);
4168         val = rctl_enforced_value(rc_zone_shmmax,
4169             curproc->p_zone->zone_rctls, curproc);
4170         mutex_exit(&curproc->p_lock);
4171 
4172         if (val > FOURGB)
4173                 val = FOURGB;
4174 
4175         lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
4176 }
4177 
4178 static void
4179 lxpr_read_sys_kernel_shmmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4180 {
4181         rctl_qty_t val;
4182 
4183         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMNI);
4184 
4185         mutex_enter(&curproc->p_lock);
4186         val = rctl_enforced_value(rc_zone_shmmni,
4187             curproc->p_zone->zone_rctls, curproc);
4188         mutex_exit(&curproc->p_lock);
4189 
4190         if (val > FOURGB)
4191                 val = FOURGB;
4192 
4193         lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
4194 }
4195 
4196 static void
4197 lxpr_read_sys_kernel_threads_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4198 {
4199         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_THREADS_MAX);
4200         lxpr_uiobuf_printf(uiobuf, "%d\n", curproc->p_zone->zone_nlwps_ctl);
4201 }
4202 
4203 static void
4204 lxpr_read_sys_net_core_somaxc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4205 {
4206         netstack_t *ns;
4207         tcp_stack_t     *tcps;
4208 
4209         ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON);
4210 
4211         ns = netstack_get_current();
4212         if (ns == NULL) {
4213                 lxpr_uiobuf_printf(uiobuf, "%d\n", SOMAXCONN);
4214                 return;
4215         }
4216 
4217         tcps = ns->netstack_tcp;
4218         lxpr_uiobuf_printf(uiobuf, "%d\n", tcps->tcps_conn_req_max_q);
4219         netstack_rele(ns);
4220 }
4221 
4222 static void
4223 lxpr_read_sys_vm_minfr_kb(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4224 {
4225         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_MINFR_KB);
4226         lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4227 }
4228 
4229 static void
4230 lxpr_read_sys_vm_nhpages(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4231 {
4232         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_NHUGEP);
4233         lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4234 }
4235 
4236 static void
4237 lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4238 {
4239         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_OVERCOMMIT_MEM);
4240         lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4241 }
4242 
4243 static void
4244 lxpr_read_sys_vm_swappiness(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4245 {
4246         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_SWAPPINESS);
4247         lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4248 }
4249 
4250 /*
4251  * lxpr_read_uptime(): read the contents of the "uptime" file.
4252  *
4253  * format is: "%.2lf, %.2lf",uptime_secs, idle_secs
4254  * Use fixed point arithmetic to get 2 decimal places
4255  */
4256 /* ARGSUSED */
4257 static void
4258 lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4259 {
4260         cpu_t *cp, *cpstart;
4261         int pools_enabled;
4262         ulong_t idle_cum = 0;
4263         ulong_t cpu_count = 0;
4264         ulong_t idle_s;
4265         ulong_t idle_cs;
4266         ulong_t up_s;
4267         ulong_t up_cs;
4268         hrtime_t birthtime;
4269         hrtime_t centi_sec = 10000000;  /* 10^7 */
4270 
4271         ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
4272 
4273         /* Calculate cumulative stats */
4274         mutex_enter(&cpu_lock);
4275         pools_enabled = pool_pset_enabled();
4276 
4277         cp = cpstart = CPU->cpu_part->cp_cpulist;
4278         do {
4279                 /*
4280                  * Don't count CPUs that aren't even in the system
4281                  * or aren't up yet.
4282                  */
4283                 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
4284                         continue;
4285                 }
4286 
4287                 idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
4288                 idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
4289                 cpu_count += 1;
4290 
4291                 if (pools_enabled)
4292                         cp = cp->cpu_next_part;
4293                 else
4294                         cp = cp->cpu_next;
4295         } while (cp != cpstart);
4296         mutex_exit(&cpu_lock);
4297 
4298         /* Getting the Zone zsched process startup time */
4299         birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart;
4300         up_cs = (gethrtime() - birthtime) / centi_sec;
4301         up_s = up_cs / 100;
4302         up_cs %= 100;
4303 
4304         ASSERT(cpu_count > 0);
4305         idle_cum /= cpu_count;
4306         idle_s = idle_cum / hz;
4307         idle_cs = idle_cum % hz;
4308         idle_cs *= 100;
4309         idle_cs /= hz;
4310 
4311         lxpr_uiobuf_printf(uiobuf,
4312             "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
4313 }
4314 
4315 static const char *amd_x_edx[] = {
4316         NULL,   NULL,   NULL,   NULL,
4317         NULL,   NULL,   NULL,   NULL,
4318         NULL,   NULL,   NULL,   "syscall",
4319         NULL,   NULL,   NULL,   NULL,
4320         NULL,   NULL,   NULL,   "mp",
4321         "nx",   NULL,   "mmxext", NULL,
4322         NULL,   NULL,   NULL,   NULL,
4323         NULL,   "lm",   "3dnowext", "3dnow"
4324 };
4325 
4326 static const char *amd_x_ecx[] = {
4327         "lahf_lm", NULL, "svm", NULL,
4328         "altmovcr8"
4329 };
4330 
4331 static const char *tm_x_edx[] = {
4332         "recovery", "longrun", NULL, "lrti"
4333 };
4334 
4335 /*
4336  * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
4337  */
4338 static const char *intc_x_edx[] = {
4339         NULL,   NULL,   NULL,   NULL,
4340         NULL,   NULL,   NULL,   NULL,
4341         NULL,   NULL,   NULL,   "syscall",
4342         NULL,   NULL,   NULL,   NULL,
4343         NULL,   NULL,   NULL,   NULL,
4344         "nx",   NULL,   NULL,   NULL,
4345         NULL,   NULL,   NULL,   NULL,
4346         NULL,   "lm",   NULL,   NULL
4347 };
4348 
4349 static const char *intc_edx[] = {
4350         "fpu",  "vme",  "de",   "pse",
4351         "tsc",  "msr",  "pae",  "mce",
4352         "cx8",  "apic",  NULL,  "sep",
4353         "mtrr", "pge",  "mca",  "cmov",
4354         "pat",  "pse36", "pn",  "clflush",
4355         NULL,   "dts",  "acpi", "mmx",
4356         "fxsr", "sse",  "sse2", "ss",
4357         "ht",   "tm",   "ia64", "pbe"
4358 };
4359 
4360 /*
4361  * "sse3" on linux is called "pni" (Prescott New Instructions).
4362  */
4363 static const char *intc_ecx[] = {
4364         "pni",  NULL,   NULL, "monitor",
4365         "ds_cpl", NULL, NULL, "est",
4366         "tm2",  NULL,   "cid", NULL,
4367         NULL,   "cx16", "xtpr"
4368 };
4369 
4370 /*
4371  * Report a list of each cgroup subsystem supported by our emulated cgroup fs.
4372  * This needs to exist for systemd to run but for now we don't report any
4373  * cgroup subsystems as being installed. The commented example below shows
4374  * how to print a subsystem entry.
4375  */
4376 static void
4377 lxpr_read_cgroups(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4378 {
4379         lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n",
4380             "#subsys_name", "hierarchy", "num_cgroups", "enabled");
4381 
4382         /*
4383          * lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n",
4384          *   "cpu,cpuacct", "2", "1", "1");
4385          */
4386 }
4387 
4388 static void
4389 lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4390 {
4391         int i;
4392         uint32_t bits;
4393         cpu_t *cp, *cpstart;
4394         int pools_enabled;
4395         const char **fp;
4396         char brandstr[CPU_IDSTRLEN];
4397         struct cpuid_regs cpr;
4398         int maxeax;
4399         int std_ecx, std_edx, ext_ecx, ext_edx;
4400 
4401         ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
4402 
4403         mutex_enter(&cpu_lock);
4404         pools_enabled = pool_pset_enabled();
4405 
4406         cp = cpstart = CPU->cpu_part->cp_cpulist;
4407         do {
4408                 /*
4409                  * This returns the maximum eax value for standard cpuid
4410                  * functions in eax.
4411                  */
4412                 cpr.cp_eax = 0;
4413                 (void) cpuid_insn(cp, &cpr);
4414                 maxeax = cpr.cp_eax;
4415 
4416                 /*
4417                  * Get standard x86 feature flags.
4418                  */
4419                 cpr.cp_eax = 1;
4420                 (void) cpuid_insn(cp, &cpr);
4421                 std_ecx = cpr.cp_ecx;
4422                 std_edx = cpr.cp_edx;
4423 
4424                 /*
4425                  * Now get extended feature flags.
4426                  */
4427                 cpr.cp_eax = 0x80000001;
4428                 (void) cpuid_insn(cp, &cpr);
4429                 ext_ecx = cpr.cp_ecx;
4430                 ext_edx = cpr.cp_edx;
4431 
4432                 (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
4433 
4434                 lxpr_uiobuf_printf(uiobuf,
4435                     "processor\t: %d\n"
4436                     "vendor_id\t: %s\n"
4437                     "cpu family\t: %d\n"
4438                     "model\t\t: %d\n"
4439                     "model name\t: %s\n"
4440                     "stepping\t: %d\n"
4441                     "cpu MHz\t\t: %u.%03u\n",
4442                     cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
4443                     cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
4444                     (uint32_t)(cpu_freq_hz / 1000000),
4445                     ((uint32_t)(cpu_freq_hz / 1000)) % 1000);
4446 
4447                 lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
4448                     getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
4449 
4450                 if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
4451                         /*
4452                          * 'siblings' is used for HT-style threads
4453                          */
4454                         lxpr_uiobuf_printf(uiobuf,
4455                             "physical id\t: %lu\n"
4456                             "siblings\t: %u\n",
4457                             pg_plat_hw_instance_id(cp, PGHW_CHIP),
4458                             cpuid_get_ncpu_per_chip(cp));
4459                 }
4460 
4461                 /*
4462                  * Since we're relatively picky about running on older hardware,
4463                  * we can be somewhat cavalier about the answers to these ones.
4464                  *
4465                  * In fact, given the hardware we support, we just say:
4466                  *
4467                  *      fdiv_bug        : no    (if we're on a 64-bit kernel)
4468                  *      hlt_bug         : no
4469                  *      f00f_bug        : no
4470                  *      coma_bug        : no
4471                  *      wp              : yes   (write protect in supervsr mode)
4472                  */
4473                 lxpr_uiobuf_printf(uiobuf,
4474                     "fdiv_bug\t: %s\n"
4475                     "hlt_bug \t: no\n"
4476                     "f00f_bug\t: no\n"
4477                     "coma_bug\t: no\n"
4478                     "fpu\t\t: %s\n"
4479                     "fpu_exception\t: %s\n"
4480                     "cpuid level\t: %d\n"
4481                     "flags\t\t:",
4482 #if defined(__i386)
4483                     fpu_pentium_fdivbug ? "yes" : "no",
4484 #else
4485                     "no",
4486 #endif /* __i386 */
4487                     fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
4488                     maxeax);
4489 
4490                 for (bits = std_edx, fp = intc_edx, i = 0;
4491                     i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
4492                         if ((bits & (1 << i)) != 0 && *fp)
4493                                 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4494 
4495                 /*
4496                  * name additional features where appropriate
4497                  */
4498                 switch (x86_vendor) {
4499                 case X86_VENDOR_Intel:
4500                         for (bits = ext_edx, fp = intc_x_edx, i = 0;
4501                             i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
4502                             fp++, i++)
4503                                 if ((bits & (1 << i)) != 0 && *fp)
4504                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4505                         break;
4506 
4507                 case X86_VENDOR_AMD:
4508                         for (bits = ext_edx, fp = amd_x_edx, i = 0;
4509                             i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
4510                             fp++, i++)
4511                                 if ((bits & (1 << i)) != 0 && *fp)
4512                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4513 
4514                         for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
4515                             i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
4516                             fp++, i++)
4517                                 if ((bits & (1 << i)) != 0 && *fp)
4518                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4519                         break;
4520 
4521                 case X86_VENDOR_TM:
4522                         for (bits = ext_edx, fp = tm_x_edx, i = 0;
4523                             i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
4524                             fp++, i++)
4525                                 if ((bits & (1 << i)) != 0 && *fp)
4526                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4527                         break;
4528                 default:
4529                         break;
4530                 }
4531 
4532                 for (bits = std_ecx, fp = intc_ecx, i = 0;
4533                     i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
4534                         if ((bits & (1 << i)) != 0 && *fp)
4535                                 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4536 
4537                 lxpr_uiobuf_printf(uiobuf, "\n\n");
4538 
4539                 if (pools_enabled)
4540                         cp = cp->cpu_next_part;
4541                 else
4542                         cp = cp->cpu_next;
4543         } while (cp != cpstart);
4544 
4545         mutex_exit(&cpu_lock);
4546 }
4547 
4548 /* ARGSUSED */
4549 static void
4550 lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4551 {
4552         ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
4553         lxpr_uiobuf_seterr(uiobuf, EFAULT);
4554 }
4555 
4556 /*
4557  * Report a list of file systems loaded in the kernel. We only report the ones
4558  * which we support and which may be checked by various components to see if
4559  * they are loaded.
4560  */
4561 static void
4562 lxpr_read_filesystems(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4563 {
4564         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "autofs");
4565         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "cgroup");
4566         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "nfs");
4567         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "proc");
4568         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "sysfs");
4569         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "tmpfs");
4570 }
4571 
4572 /*
4573  * lxpr_getattr(): Vnode operation for VOP_GETATTR()
4574  */
4575 static int
4576 lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
4577     caller_context_t *ct)
4578 {
4579         register lxpr_node_t *lxpnp = VTOLXP(vp);
4580         lxpr_nodetype_t type = lxpnp->lxpr_type;
4581         extern uint_t nproc;
4582         int error;
4583 
4584         /*
4585          * Return attributes of underlying vnode if ATTR_REAL
4586          *
4587          * but keep fd files with the symlink permissions
4588          */
4589         if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
4590                 vnode_t *rvp = lxpnp->lxpr_realvp;
4591 
4592                 /*
4593                  * withold attribute information to owner or root
4594                  */
4595                 if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) {
4596                         return (error);
4597                 }
4598 
4599                 /*
4600                  * now its attributes
4601                  */
4602                 if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) {
4603                         return (error);
4604                 }
4605 
4606                 /*
4607                  * if it's a file in lx /proc/pid/fd/xx then set its
4608                  * mode and keep it looking like a symlink, fifo or socket
4609                  */
4610                 if (type == LXPR_PID_FD_FD) {
4611                         vap->va_mode = lxpnp->lxpr_mode;
4612                         vap->va_type = lxpnp->lxpr_realvp->v_type;
4613                         vap->va_size = 0;
4614                         vap->va_nlink = 1;
4615                 }
4616                 return (0);
4617         }
4618 
4619         /* Default attributes, that may be overridden below */
4620         bzero(vap, sizeof (*vap));
4621         vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
4622         vap->va_nlink = 1;
4623         vap->va_type = vp->v_type;
4624         vap->va_mode = lxpnp->lxpr_mode;
4625         vap->va_fsid = vp->v_vfsp->vfs_dev;
4626         vap->va_blksize = DEV_BSIZE;
4627         vap->va_uid = lxpnp->lxpr_uid;
4628         vap->va_gid = lxpnp->lxpr_gid;
4629         vap->va_nodeid = lxpnp->lxpr_ino;
4630 
4631         switch (type) {
4632         case LXPR_PROCDIR:
4633                 vap->va_nlink = nproc + 2 + PROCDIRFILES;
4634                 vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
4635                 break;
4636         case LXPR_PIDDIR:
4637                 vap->va_nlink = PIDDIRFILES;
4638                 vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
4639                 break;
4640         case LXPR_PID_TASK_IDDIR:
4641                 vap->va_nlink = TIDDIRFILES;
4642                 vap->va_size = TIDDIRFILES * LXPR_SDSIZE;
4643                 break;
4644         case LXPR_SELF:
4645                 vap->va_uid = crgetruid(curproc->p_cred);
4646                 vap->va_gid = crgetrgid(curproc->p_cred);
4647                 break;
4648         case LXPR_PID_FD_FD:
4649         case LXPR_PID_TID_FD_FD:
4650                 /*
4651                  * Restore VLNK type for lstat-type activity.
4652                  * See lxpr_readlink for more details.
4653                  */
4654                 if ((flags & FOLLOW) == 0)
4655                         vap->va_type = VLNK;
4656         default:
4657                 break;
4658         }
4659 
4660         vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
4661         return (0);
4662 }
4663 
4664 /*
4665  * lxpr_access(): Vnode operation for VOP_ACCESS()
4666  */
4667 static int
4668 lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
4669 {
4670         lxpr_node_t *lxpnp = VTOLXP(vp);
4671         lxpr_nodetype_t type = lxpnp->lxpr_type;
4672         int shift = 0;
4673         proc_t *tp;
4674 
4675         /* lx /proc is a read only file system */
4676         if (mode & VWRITE) {
4677                 switch (type) {
4678                 case LXPR_PID_OOM_SCR_ADJ:
4679                 case LXPR_PID_TID_OOM_SCR_ADJ:
4680                 case LXPR_SYS_KERNEL_COREPATT:
4681                 case LXPR_SYS_NET_CORE_SOMAXCON:
4682                 case LXPR_SYS_VM_OVERCOMMIT_MEM:
4683                 case LXPR_SYS_VM_SWAPPINESS:
4684                 case LXPR_PID_FD_FD:
4685                 case LXPR_PID_TID_FD_FD:
4686                         break;
4687                 default:
4688                         return (EROFS);
4689                 }
4690         }
4691 
4692         /*
4693          * If this is a restricted file, check access permissions.
4694          */
4695         switch (type) {
4696         case LXPR_PIDDIR:
4697                 return (0);
4698         case LXPR_PID_CURDIR:
4699         case LXPR_PID_ENV:
4700         case LXPR_PID_EXE:
4701         case LXPR_PID_LIMITS:
4702         case LXPR_PID_MAPS:
4703         case LXPR_PID_MEM:
4704         case LXPR_PID_ROOTDIR:
4705         case LXPR_PID_FDDIR:
4706         case LXPR_PID_FD_FD:
4707         case LXPR_PID_TID_FDDIR:
4708         case LXPR_PID_TID_FD_FD:
4709                 if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL)
4710                         return (ENOENT);
4711                 if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
4712                     priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
4713                         lxpr_unlock(tp);
4714                         return (EACCES);
4715                 }
4716                 lxpr_unlock(tp);
4717         default:
4718                 break;
4719         }
4720 
4721         if (lxpnp->lxpr_realvp != NULL) {
4722                 /*
4723                  * For these we use the underlying vnode's accessibility.
4724                  */
4725                 return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct));
4726         }
4727 
4728         /* If user is root allow access regardless of permission bits */
4729         if (secpolicy_proc_access(cr) == 0)
4730                 return (0);
4731 
4732         /*
4733          * Access check is based on only one of owner, group, public.  If not
4734          * owner, then check group.  If not a member of the group, then check
4735          * public access.
4736          */
4737         if (crgetuid(cr) != lxpnp->lxpr_uid) {
4738                 shift += 3;
4739                 if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
4740                         shift += 3;
4741         }
4742 
4743         mode &= ~(lxpnp->lxpr_mode << shift);
4744 
4745         if (mode == 0)
4746                 return (0);
4747 
4748         return (EACCES);
4749 }
4750 
4751 /* ARGSUSED */
4752 static vnode_t *
4753 lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
4754 {
4755         return (NULL);
4756 }
4757 
4758 /*
4759  * lxpr_lookup(): Vnode operation for VOP_LOOKUP()
4760  */
4761 /* ARGSUSED */
4762 static int
4763 lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
4764     int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
4765     int *direntflags, pathname_t *realpnp)
4766 {
4767         lxpr_node_t *lxpnp = VTOLXP(dp);
4768         lxpr_nodetype_t type = lxpnp->lxpr_type;
4769         int error;
4770 
4771         ASSERT(dp->v_type == VDIR);
4772         ASSERT(type < LXPR_NFILES);
4773 
4774         /*
4775          * we should never get here because the lookup
4776          * is done on the realvp for these nodes
4777          */
4778         ASSERT(type != LXPR_PID_FD_FD &&
4779             type != LXPR_PID_CURDIR &&
4780             type != LXPR_PID_ROOTDIR);
4781 
4782         /*
4783          * restrict lookup permission to owner or root
4784          */
4785         if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) {
4786                 return (error);
4787         }
4788 
4789         /*
4790          * Just return the parent vnode if that's where we are trying to go.
4791          */
4792         if (strcmp(comp, "..") == 0) {
4793                 VN_HOLD(lxpnp->lxpr_parent);
4794                 *vpp = lxpnp->lxpr_parent;
4795                 return (0);
4796         }
4797 
4798         /*
4799          * Special handling for directory searches.  Note: null component name
4800          * denotes that the current directory is being searched.
4801          */
4802         if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
4803                 VN_HOLD(dp);
4804                 *vpp = dp;
4805                 return (0);
4806         }
4807 
4808         *vpp = (lxpr_lookup_function[type](dp, comp));
4809         return ((*vpp == NULL) ? ENOENT : 0);
4810 }
4811 
4812 /*
4813  * Do a sequential search on the given directory table
4814  */
4815 static vnode_t *
4816 lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
4817     lxpr_dirent_t *dirtab, int dirtablen)
4818 {
4819         lxpr_node_t *lxpnp;
4820         int count;
4821 
4822         for (count = 0; count < dirtablen; count++) {
4823                 if (strcmp(dirtab[count].d_name, comp) == 0) {
4824                         lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
4825                         dp = LXPTOV(lxpnp);
4826                         ASSERT(dp != NULL);
4827                         return (dp);
4828                 }
4829         }
4830         return (NULL);
4831 }
4832 
4833 static vnode_t *
4834 lxpr_lookup_piddir(vnode_t *dp, char *comp)
4835 {
4836         proc_t *p;
4837 
4838         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
4839 
4840         p = lxpr_lock(VTOLXP(dp)->lxpr_pid);
4841         if (p == NULL)
4842                 return (NULL);
4843 
4844         dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
4845 
4846         lxpr_unlock(p);
4847 
4848         return (dp);
4849 }
4850 
4851 /*
4852  * Lookup one of the process's task ID's.
4853  */
4854 static vnode_t *
4855 lxpr_lookup_taskdir(vnode_t *dp, char *comp)
4856 {
4857         lxpr_node_t *dlxpnp = VTOLXP(dp);
4858         lxpr_node_t *lxpnp;
4859         proc_t *p;
4860         pid_t real_pid;
4861         uint_t tid;
4862         int c;
4863         kthread_t *t;
4864 
4865         ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASKDIR);
4866 
4867         /*
4868          * convert the string rendition of the filename to a thread ID
4869          */
4870         tid = 0;
4871         while ((c = *comp++) != '\0') {
4872                 int otid;
4873                 if (c < '0' || c > '9')
4874                         return (NULL);
4875 
4876                 otid = tid;
4877                 tid = 10 * tid + c - '0';
4878                 /* integer overflow */
4879                 if (tid / 10 != otid)
4880                         return (NULL);
4881         }
4882 
4883         /*
4884          * get the proc to work with and lock it
4885          */
4886         real_pid = get_real_pid(dlxpnp->lxpr_pid);
4887         p = lxpr_lock(real_pid);
4888         if ((p == NULL))
4889                 return (NULL);
4890 
4891         /*
4892          * If the process is a zombie or system process
4893          * it can't have any threads.
4894          */
4895         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
4896                 lxpr_unlock(p);
4897                 return (NULL);
4898         }
4899 
4900         if (p->p_brand == &lx_brand) {
4901                 t = lxpr_get_thread(p, tid);
4902         } else {
4903                 /*
4904                  * Only the main thread is visible for non-branded processes.
4905                  */
4906                 t = p->p_tlist;
4907                 if (tid != p->p_pid || t == NULL) {
4908                         t = NULL;
4909                 } else {
4910                         thread_lock(t);
4911                 }
4912         }
4913         if (t == NULL) {
4914                 lxpr_unlock(p);
4915                 return (NULL);
4916         }
4917         thread_unlock(t);
4918 
4919         /*
4920          * Allocate and fill in a new lx /proc taskid node.
4921          * Instead of the last arg being a fd, it is a tid.
4922          */
4923         lxpnp = lxpr_getnode(dp, LXPR_PID_TASK_IDDIR, p, tid);
4924         dp = LXPTOV(lxpnp);
4925         ASSERT(dp != NULL);
4926         lxpr_unlock(p);
4927         return (dp);
4928 }
4929 
4930 /*
4931  * Lookup one of the process's task ID's.
4932  */
4933 static vnode_t *
4934 lxpr_lookup_task_tid_dir(vnode_t *dp, char *comp)
4935 {
4936         lxpr_node_t *dlxpnp = VTOLXP(dp);
4937         lxpr_node_t *lxpnp;
4938         proc_t *p;
4939         pid_t real_pid;
4940         kthread_t *t;
4941         int i;
4942 
4943         ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASK_IDDIR);
4944 
4945         /*
4946          * get the proc to work with and lock it
4947          */
4948         real_pid = get_real_pid(dlxpnp->lxpr_pid);
4949         p = lxpr_lock(real_pid);
4950         if ((p == NULL))
4951                 return (NULL);
4952 
4953         /*
4954          * If the process is a zombie or system process
4955          * it can't have any threads.
4956          */
4957         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
4958                 lxpr_unlock(p);
4959                 return (NULL);
4960         }
4961 
4962         /* need to confirm tid is still there */
4963         t = lxpr_get_thread(p, dlxpnp->lxpr_desc);
4964         if (t == NULL) {
4965                 lxpr_unlock(p);
4966                 return (NULL);
4967         }
4968         thread_unlock(t);
4969 
4970         /*
4971          * allocate and fill in the new lx /proc taskid dir node
4972          */
4973         for (i = 0; i < TIDDIRFILES; i++) {
4974                 if (strcmp(tiddir[i].d_name, comp) == 0) {
4975                         lxpnp = lxpr_getnode(dp, tiddir[i].d_type, p,
4976                             dlxpnp->lxpr_desc);
4977                         dp = LXPTOV(lxpnp);
4978                         ASSERT(dp != NULL);
4979                         lxpr_unlock(p);
4980                         return (dp);
4981                 }
4982         }
4983 
4984         lxpr_unlock(p);
4985         return (NULL);
4986 }
4987 
4988 /*
4989  * Lookup one of the process's open files.
4990  */
4991 static vnode_t *
4992 lxpr_lookup_fddir(vnode_t *dp, char *comp)
4993 {
4994         lxpr_node_t *dlxpnp = VTOLXP(dp);
4995 
4996         ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR ||
4997             dlxpnp->lxpr_type == LXPR_PID_TID_FDDIR);
4998 
4999         return (lxpr_lookup_fdnode(dp, comp));
5000 }
5001 
5002 static vnode_t *
5003 lxpr_lookup_netdir(vnode_t *dp, char *comp)
5004 {
5005         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
5006 
5007         dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
5008 
5009         return (dp);
5010 }
5011 
5012 static vnode_t *
5013 lxpr_lookup_procdir(vnode_t *dp, char *comp)
5014 {
5015         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
5016 
5017         /*
5018          * We know all the names of files & dirs in our file system structure
5019          * except those that are pid names.  These change as pids are created/
5020          * deleted etc., so we just look for a number as the first char to see
5021          * if we are we doing pid lookups.
5022          *
5023          * Don't need to check for "self" as it is implemented as a symlink
5024          */
5025         if (*comp >= '0' && *comp <= '9') {
5026                 pid_t pid = 0;
5027                 lxpr_node_t *lxpnp = NULL;
5028                 proc_t *p;
5029                 int c;
5030 
5031                 while ((c = *comp++) != '\0')
5032                         pid = 10 * pid + c - '0';
5033 
5034                 /*
5035                  * Can't continue if the process is still loading or it doesn't
5036                  * really exist yet (or maybe it just died!)
5037                  */
5038                 p = lxpr_lock(pid);
5039                 if (p == NULL)
5040                         return (NULL);
5041 
5042                 if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
5043                         lxpr_unlock(p);
5044                         return (NULL);
5045                 }
5046 
5047                 /*
5048                  * allocate and fill in a new lx /proc node
5049                  */
5050                 lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
5051 
5052                 lxpr_unlock(p);
5053 
5054                 dp = LXPTOV(lxpnp);
5055                 ASSERT(dp != NULL);
5056 
5057                 return (dp);
5058         }
5059 
5060         /* Lookup fixed names */
5061         return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES));
5062 }
5063 
5064 static vnode_t *
5065 lxpr_lookup_sysdir(vnode_t *dp, char *comp)
5066 {
5067         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYSDIR);
5068         return (lxpr_lookup_common(dp, comp, NULL, sysdir, SYSDIRFILES));
5069 }
5070 
5071 static vnode_t *
5072 lxpr_lookup_sys_kerneldir(vnode_t *dp, char *comp)
5073 {
5074         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNELDIR);
5075         return (lxpr_lookup_common(dp, comp, NULL, sys_kerneldir,
5076             SYS_KERNELDIRFILES));
5077 }
5078 
5079 static vnode_t *
5080 lxpr_lookup_sys_kdir_randdir(vnode_t *dp, char *comp)
5081 {
5082         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNEL_RANDDIR);
5083         return (lxpr_lookup_common(dp, comp, NULL, sys_randdir,
5084             SYS_RANDDIRFILES));
5085 }
5086 
5087 static vnode_t *
5088 lxpr_lookup_sys_netdir(vnode_t *dp, char *comp)
5089 {
5090         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NETDIR);
5091         return (lxpr_lookup_common(dp, comp, NULL, sys_netdir,
5092             SYS_NETDIRFILES));
5093 }
5094 
5095 static vnode_t *
5096 lxpr_lookup_sys_net_coredir(vnode_t *dp, char *comp)
5097 {
5098         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NET_COREDIR);
5099         return (lxpr_lookup_common(dp, comp, NULL, sys_net_coredir,
5100             SYS_NET_COREDIRFILES));
5101 }
5102 
5103 static vnode_t *
5104 lxpr_lookup_sys_vmdir(vnode_t *dp, char *comp)
5105 {
5106         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_VMDIR);
5107         return (lxpr_lookup_common(dp, comp, NULL, sys_vmdir,
5108             SYS_VMDIRFILES));
5109 }
5110 
5111 static vnode_t *
5112 lxpr_lookup_sys_fsdir(vnode_t *dp, char *comp)
5113 {
5114         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FSDIR);
5115         return (lxpr_lookup_common(dp, comp, NULL, sys_fsdir,
5116             SYS_FSDIRFILES));
5117 }
5118 
5119 static vnode_t *
5120 lxpr_lookup_sys_fs_inotifydir(vnode_t *dp, char *comp)
5121 {
5122         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
5123         return (lxpr_lookup_common(dp, comp, NULL, sys_fs_inotifydir,
5124             SYS_FS_INOTIFYDIRFILES));
5125 }
5126 
5127 /*
5128  * lxpr_readdir(): Vnode operation for VOP_READDIR()
5129  */
5130 /* ARGSUSED */
5131 static int
5132 lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp,
5133     caller_context_t *ct, int flags)
5134 {
5135         lxpr_node_t *lxpnp = VTOLXP(dp);
5136         lxpr_nodetype_t type = lxpnp->lxpr_type;
5137         ssize_t uresid;
5138         off_t uoffset;
5139         int error;
5140 
5141         ASSERT(dp->v_type == VDIR);
5142         ASSERT(type < LXPR_NFILES);
5143 
5144         /*
5145          * we should never get here because the readdir
5146          * is done on the realvp for these nodes
5147          */
5148         ASSERT(type != LXPR_PID_FD_FD &&
5149             type != LXPR_PID_CURDIR &&
5150             type != LXPR_PID_ROOTDIR);
5151 
5152         /*
5153          * restrict readdir permission to owner or root
5154          */
5155         if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0)
5156                 return (error);
5157 
5158         uoffset = uiop->uio_offset;
5159         uresid = uiop->uio_resid;
5160 
5161         /* can't do negative reads */
5162         if (uoffset < 0 || uresid <= 0)
5163                 return (EINVAL);
5164 
5165         /* can't read directory entries that don't exist! */
5166         if (uoffset % LXPR_SDSIZE)
5167                 return (ENOENT);
5168 
5169         return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
5170 }
5171 
5172 /* ARGSUSED */
5173 static int
5174 lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5175 {
5176         return (ENOTDIR);
5177 }
5178 
5179 /*
5180  * This has the common logic for returning directory entries
5181  */
5182 static int
5183 lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
5184     lxpr_dirent_t *dirtab, int dirtablen)
5185 {
5186         /* bp holds one dirent64 structure */
5187         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5188         dirent64_t *dirent = (dirent64_t *)bp;
5189         ssize_t oresid; /* save a copy for testing later */
5190         ssize_t uresid;
5191 
5192         oresid = uiop->uio_resid;
5193 
5194         /* clear out the dirent buffer */
5195         bzero(bp, sizeof (bp));
5196 
5197         /*
5198          * Satisfy user request
5199          */
5200         while ((uresid = uiop->uio_resid) > 0) {
5201                 int dirindex;
5202                 off_t uoffset;
5203                 int reclen;
5204                 int error;
5205 
5206                 uoffset = uiop->uio_offset;
5207                 dirindex  = (uoffset / LXPR_SDSIZE) - 2;
5208 
5209                 if (uoffset == 0) {
5210 
5211                         dirent->d_ino = lxpnp->lxpr_ino;
5212                         dirent->d_name[0] = '.';
5213                         dirent->d_name[1] = '\0';
5214                         reclen = DIRENT64_RECLEN(1);
5215 
5216                 } else if (uoffset == LXPR_SDSIZE) {
5217 
5218                         dirent->d_ino = lxpr_parentinode(lxpnp);
5219                         dirent->d_name[0] = '.';
5220                         dirent->d_name[1] = '.';
5221                         dirent->d_name[2] = '\0';
5222                         reclen = DIRENT64_RECLEN(2);
5223 
5224                 } else if (dirindex >= 0 && dirindex < dirtablen) {
5225                         int slen = strlen(dirtab[dirindex].d_name);
5226 
5227                         dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
5228                             lxpnp->lxpr_pid, 0);
5229 
5230                         VERIFY(slen < LXPNSIZ);
5231                         (void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
5232                         reclen = DIRENT64_RECLEN(slen);
5233 
5234                 } else {
5235                         /* Run out of table entries */
5236                         if (eofp) {
5237                                 *eofp = 1;
5238                         }
5239                         return (0);
5240                 }
5241 
5242                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5243                 dirent->d_reclen = (ushort_t)reclen;
5244 
5245                 /*
5246                  * if the size of the data to transfer is greater
5247                  * that that requested then we can't do it this transfer.
5248                  */
5249                 if (reclen > uresid) {
5250                         /*
5251                          * Error if no entries have been returned yet.
5252                          */
5253                         if (uresid == oresid) {
5254                                 return (EINVAL);
5255                         }
5256                         break;
5257                 }
5258 
5259                 /*
5260                  * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5261                  * by the same amount.  But we want uiop->uio_offset to change
5262                  * in increments of LXPR_SDSIZE, which is different from the
5263                  * number of bytes being returned to the user.  So we set
5264                  * uiop->uio_offset separately, ignoring what uiomove() does.
5265                  */
5266                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5267                     uiop)) != 0)
5268                         return (error);
5269 
5270                 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5271         }
5272 
5273         /* Have run out of space, but could have just done last table entry */
5274         if (eofp) {
5275                 *eofp =
5276                     (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
5277         }
5278         return (0);
5279 }
5280 
5281 
5282 static int
5283 lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5284 {
5285         /* bp holds one dirent64 structure */
5286         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5287         dirent64_t *dirent = (dirent64_t *)bp;
5288         ssize_t oresid; /* save a copy for testing later */
5289         ssize_t uresid;
5290         off_t uoffset;
5291         zoneid_t zoneid;
5292         pid_t pid;
5293         int error;
5294         int ceof;
5295 
5296         ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
5297 
5298         oresid = uiop->uio_resid;
5299         zoneid = LXPTOZ(lxpnp)->zone_id;
5300 
5301         /*
5302          * We return directory entries in the order: "." and ".." then the
5303          * unique lxproc files, then the directories corresponding to the
5304          * running processes.  We have defined this as the ordering because
5305          * it allows us to more easily keep track of where we are betwen calls
5306          * to getdents().  If the number of processes changes between calls
5307          * then we can't lose track of where we are in the lxproc files.
5308          */
5309 
5310         /* Do the fixed entries */
5311         error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir,
5312             PROCDIRFILES);
5313 
5314         /* Finished if we got an error or if we couldn't do all the table */
5315         if (error != 0 || ceof == 0)
5316                 return (error);
5317 
5318         /* clear out the dirent buffer */
5319         bzero(bp, sizeof (bp));
5320 
5321         /* Do the process entries */
5322         while ((uresid = uiop->uio_resid) > 0) {
5323                 proc_t *p;
5324                 int len;
5325                 int reclen;
5326                 int i;
5327 
5328                 uoffset = uiop->uio_offset;
5329 
5330                 /*
5331                  * Stop when entire proc table has been examined.
5332                  */
5333                 i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
5334                 if (i < 0 || i >= v.v_proc) {
5335                         /* Run out of table entries */
5336                         if (eofp) {
5337                                 *eofp = 1;
5338                         }
5339                         return (0);
5340                 }
5341                 mutex_enter(&pidlock);
5342 
5343                 /*
5344                  * Skip indices for which there is no pid_entry, PIDs for
5345                  * which there is no corresponding process, a PID of 0,
5346                  * and anything the security policy doesn't allow
5347                  * us to look at.
5348                  */
5349                 if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
5350                     p->p_pid == 0 ||
5351                     secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
5352                         mutex_exit(&pidlock);
5353                         goto next;
5354                 }
5355                 mutex_exit(&pidlock);
5356 
5357                 /*
5358                  * Convert pid to the Linux default of 1 if we're the zone's
5359                  * init process, or 0 if zsched, otherwise use the value from
5360                  * the proc structure
5361                  */
5362                 if (p->p_pid == curproc->p_zone->zone_proc_initpid) {
5363                         pid = 1;
5364                 } else if (p->p_pid == curproc->p_zone->zone_zsched->p_pid) {
5365                         pid = 0;
5366                 } else {
5367                         pid = p->p_pid;
5368                 }
5369 
5370                 /*
5371                  * If this /proc was mounted in the global zone, view
5372                  * all procs; otherwise, only view zone member procs.
5373                  */
5374                 if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
5375                         goto next;
5376                 }
5377 
5378                 ASSERT(p->p_stat != 0);
5379 
5380                 dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
5381                 len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
5382                 ASSERT(len < LXPNSIZ);
5383                 reclen = DIRENT64_RECLEN(len);
5384 
5385                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5386                 dirent->d_reclen = (ushort_t)reclen;
5387 
5388                 /*
5389                  * if the size of the data to transfer is greater
5390                  * that that requested then we can't do it this transfer.
5391                  */
5392                 if (reclen > uresid) {
5393                         /*
5394                          * Error if no entries have been returned yet.
5395                          */
5396                         if (uresid == oresid)
5397                                 return (EINVAL);
5398                         break;
5399                 }
5400 
5401                 /*
5402                  * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5403                  * by the same amount.  But we want uiop->uio_offset to change
5404                  * in increments of LXPR_SDSIZE, which is different from the
5405                  * number of bytes being returned to the user.  So we set
5406                  * uiop->uio_offset separately, in the increment of this for
5407                  * the loop, ignoring what uiomove() does.
5408                  */
5409                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5410                     uiop)) != 0)
5411                         return (error);
5412 next:
5413                 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5414         }
5415 
5416         if (eofp != NULL) {
5417                 *eofp = (uiop->uio_offset >=
5418                     ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
5419         }
5420 
5421         return (0);
5422 }
5423 
5424 static int
5425 lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5426 {
5427         proc_t *p;
5428         pid_t find_pid;
5429 
5430         ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
5431 
5432         /* can't read its contents if it died */
5433         mutex_enter(&pidlock);
5434 
5435         if (lxpnp->lxpr_pid == 1) {
5436                 find_pid = curproc->p_zone->zone_proc_initpid;
5437         } else if (lxpnp->lxpr_pid == 0) {
5438                 find_pid = curproc->p_zone->zone_zsched->p_pid;
5439         } else {
5440                 find_pid = lxpnp->lxpr_pid;
5441         }
5442         p = prfind(find_pid);
5443 
5444         if (p == NULL || p->p_stat == SIDL) {
5445                 mutex_exit(&pidlock);
5446                 return (ENOENT);
5447         }
5448         mutex_exit(&pidlock);
5449 
5450         return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
5451 }
5452 
5453 static int
5454 lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5455 {
5456         ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
5457         return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
5458 }
5459 
5460 static int
5461 lxpr_readdir_taskdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5462 {
5463         /* bp holds one dirent64 structure */
5464         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5465         dirent64_t *dirent = (dirent64_t *)bp;
5466         ssize_t oresid; /* save a copy for testing later */
5467         ssize_t uresid;
5468         off_t uoffset;
5469         int error;
5470         int ceof;
5471         proc_t *p;
5472         int tiddirsize = -1;
5473         int tasknum;
5474         pid_t real_pid;
5475         kthread_t *t;
5476         boolean_t branded = B_FALSE;
5477 
5478         ASSERT(lxpnp->lxpr_type == LXPR_PID_TASKDIR);
5479 
5480         oresid = uiop->uio_resid;
5481 
5482         real_pid = get_real_pid(lxpnp->lxpr_pid);
5483         p = lxpr_lock(real_pid);
5484 
5485         /* can't read its contents if it died */
5486         if (p == NULL) {
5487                 return (ENOENT);
5488         }
5489         if (p->p_stat == SIDL) {
5490                 lxpr_unlock(p);
5491                 return (ENOENT);
5492         }
5493 
5494         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
5495                 tiddirsize = 0;
5496 
5497         branded = (p->p_brand == &lx_brand);
5498         /*
5499          * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
5500          * going away while we iterate over its threads.
5501          */
5502         mutex_exit(&p->p_lock);
5503 
5504         if (tiddirsize == -1)
5505                 tiddirsize = p->p_lwpcnt;
5506 
5507         /* Do the fixed entries (in this case just "." & "..") */
5508         error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
5509 
5510         /* Finished if we got an error or if we couldn't do all the table */
5511         if (error != 0 || ceof == 0)
5512                 goto out;
5513 
5514         if ((t = p->p_tlist) == NULL) {
5515                 if (eofp != NULL)
5516                         *eofp = 1;
5517                 goto out;
5518         }
5519 
5520         /* clear out the dirent buffer */
5521         bzero(bp, sizeof (bp));
5522 
5523         /*
5524          * Loop until user's request is satisfied or until all thread's have
5525          * been returned.
5526          */
5527         for (tasknum = 0; (uresid = uiop->uio_resid) > 0; tasknum++) {
5528                 int i;
5529                 int reclen;
5530                 int len;
5531                 uint_t emul_tid;
5532                 lx_lwp_data_t *lwpd;
5533 
5534                 uoffset = uiop->uio_offset;
5535 
5536                 /*
5537                  * Stop at the end of the thread list
5538                  */
5539                 i = (uoffset / LXPR_SDSIZE) - 2;
5540                 if (i < 0 || i >= tiddirsize) {
5541                         if (eofp) {
5542                                 *eofp = 1;
5543                         }
5544                         goto out;
5545                 }
5546 
5547                 if (i != tasknum)
5548                         goto next;
5549 
5550                 if (!branded) {
5551                         /*
5552                          * Emulating the goofy linux task model is impossible
5553                          * to do for native processes.  We can compromise by
5554                          * presenting only the main thread to the consumer.
5555                          */
5556                         emul_tid = p->p_pid;
5557                 } else {
5558                         if ((lwpd = ttolxlwp(t)) == NULL) {
5559                                 goto next;
5560                         }
5561                         emul_tid = lwpd->br_pid;
5562                         /*
5563                          * Convert pid to Linux default of 1 if we're the
5564                          * zone's init.
5565                          */
5566                         if (emul_tid == curproc->p_zone->zone_proc_initpid)
5567                                 emul_tid = 1;
5568                 }
5569 
5570                 dirent->d_ino = lxpr_inode(LXPR_PID_TASK_IDDIR, lxpnp->lxpr_pid,
5571                     emul_tid);
5572                 len = snprintf(dirent->d_name, LXPNSIZ, "%d", emul_tid);
5573                 ASSERT(len < LXPNSIZ);
5574                 reclen = DIRENT64_RECLEN(len);
5575 
5576                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5577                 dirent->d_reclen = (ushort_t)reclen;
5578 
5579                 if (reclen > uresid) {
5580                         /*
5581                          * Error if no entries have been returned yet.
5582                          */
5583                         if (uresid == oresid)
5584                                 error = EINVAL;
5585                         goto out;
5586                 }
5587 
5588                 /*
5589                  * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5590                  * by the same amount.  But we want uiop->uio_offset to change
5591                  * in increments of LXPR_SDSIZE, which is different from the
5592                  * number of bytes being returned to the user.  So we set
5593                  * uiop->uio_offset separately, in the increment of this for
5594                  * the loop, ignoring what uiomove() does.
5595                  */
5596                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5597                     uiop)) != 0)
5598                         goto out;
5599 
5600 next:
5601                 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5602 
5603                 if ((t = t->t_forw) == p->p_tlist || !branded) {
5604                         if (eofp != NULL)
5605                                 *eofp = 1;
5606                         goto out;
5607                 }
5608         }
5609 
5610         if (eofp != NULL)
5611                 *eofp = 0;
5612 
5613 out:
5614         mutex_enter(&p->p_lock);
5615         lxpr_unlock(p);
5616         return (error);
5617 }
5618 
5619 static int
5620 lxpr_readdir_task_tid_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5621 {
5622         proc_t *p;
5623         pid_t real_pid;
5624         kthread_t *t;
5625 
5626         ASSERT(lxpnp->lxpr_type == LXPR_PID_TASK_IDDIR);
5627 
5628         mutex_enter(&pidlock);
5629 
5630         real_pid = get_real_pid(lxpnp->lxpr_pid);
5631         p = prfind(real_pid);
5632 
5633         /* can't read its contents if it died */
5634         if (p == NULL || p->p_stat == SIDL) {
5635                 mutex_exit(&pidlock);
5636                 return (ENOENT);
5637         }
5638 
5639         mutex_exit(&pidlock);
5640 
5641         /* need to confirm tid is still there */
5642         t = lxpr_get_thread(p, lxpnp->lxpr_desc);
5643         if (t == NULL) {
5644                 /* we can't find this specific thread */
5645                 return (NULL);
5646         }
5647         thread_unlock(t);
5648 
5649         return (lxpr_readdir_common(lxpnp, uiop, eofp, tiddir, TIDDIRFILES));
5650 }
5651 
5652 static int
5653 lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5654 {
5655         /* bp holds one dirent64 structure */
5656         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5657         dirent64_t *dirent = (dirent64_t *)bp;
5658         ssize_t oresid; /* save a copy for testing later */
5659         ssize_t uresid;
5660         off_t uoffset;
5661         int error;
5662         int ceof;
5663         proc_t *p;
5664         int fddirsize = -1;
5665         uf_info_t *fip;
5666 
5667         ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR ||
5668             lxpnp->lxpr_type == LXPR_PID_TID_FDDIR);
5669 
5670         oresid = uiop->uio_resid;
5671 
5672         /* can't read its contents if it died */
5673         p = lxpr_lock(lxpnp->lxpr_pid);
5674         if (p == NULL)
5675                 return (ENOENT);
5676 
5677         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
5678                 fddirsize = 0;
5679 
5680         /*
5681          * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
5682          * going away while we iterate over its fi_list.
5683          */
5684         mutex_exit(&p->p_lock);
5685 
5686         /* Get open file info */
5687         fip = (&(p)->p_user.u_finfo);
5688         mutex_enter(&fip->fi_lock);
5689 
5690         if (fddirsize == -1)
5691                 fddirsize = fip->fi_nfiles;
5692 
5693         /* Do the fixed entries (in this case just "." & "..") */
5694         error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
5695 
5696         /* Finished if we got an error or if we couldn't do all the table */
5697         if (error != 0 || ceof == 0)
5698                 goto out;
5699 
5700         /* clear out the dirent buffer */
5701         bzero(bp, sizeof (bp));
5702 
5703         /*
5704          * Loop until user's request is satisfied or until
5705          * all file descriptors have been examined.
5706          */
5707         for (; (uresid = uiop->uio_resid) > 0;
5708             uiop->uio_offset = uoffset + LXPR_SDSIZE) {
5709                 int reclen;
5710                 int fd;
5711                 int len;
5712 
5713                 uoffset = uiop->uio_offset;
5714 
5715                 /*
5716                  * Stop at the end of the fd list
5717                  */
5718                 fd = (uoffset / LXPR_SDSIZE) - 2;
5719                 if (fd < 0 || fd >= fddirsize) {
5720                         if (eofp) {
5721                                 *eofp = 1;
5722                         }
5723                         goto out;
5724                 }
5725 
5726                 if (fip->fi_list[fd].uf_file == NULL)
5727                         continue;
5728 
5729                 dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
5730                 len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
5731                 ASSERT(len < LXPNSIZ);
5732                 reclen = DIRENT64_RECLEN(len);
5733 
5734                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5735                 dirent->d_reclen = (ushort_t)reclen;
5736 
5737                 if (reclen > uresid) {
5738                         /*
5739                          * Error if no entries have been returned yet.
5740                          */
5741                         if (uresid == oresid)
5742                                 error = EINVAL;
5743                         goto out;
5744                 }
5745 
5746                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5747                     uiop)) != 0)
5748                         goto out;
5749         }
5750 
5751         if (eofp != NULL) {
5752                 *eofp =
5753                     (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
5754         }
5755 
5756 out:
5757         mutex_exit(&fip->fi_lock);
5758         mutex_enter(&p->p_lock);
5759         lxpr_unlock(p);
5760         return (error);
5761 }
5762 
5763 static int
5764 lxpr_readdir_sysdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5765 {
5766         ASSERT(lxpnp->lxpr_type == LXPR_SYSDIR);
5767         return (lxpr_readdir_common(lxpnp, uiop, eofp, sysdir, SYSDIRFILES));
5768 }
5769 
5770 static int
5771 lxpr_readdir_sys_fsdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5772 {
5773         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FSDIR);
5774         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fsdir,
5775             SYS_FSDIRFILES));
5776 }
5777 
5778 static int
5779 lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5780 {
5781         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
5782         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fs_inotifydir,
5783             SYS_FS_INOTIFYDIRFILES));
5784 }
5785 
5786 static int
5787 lxpr_readdir_sys_kerneldir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5788 {
5789         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNELDIR);
5790         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_kerneldir,
5791             SYS_KERNELDIRFILES));
5792 }
5793 
5794 static int
5795 lxpr_readdir_sys_kdir_randdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5796 {
5797         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RANDDIR);
5798         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_randdir,
5799             SYS_RANDDIRFILES));
5800 }
5801 
5802 static int
5803 lxpr_readdir_sys_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5804 {
5805         ASSERT(lxpnp->lxpr_type == LXPR_SYS_NETDIR);
5806         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_netdir,
5807             SYS_NETDIRFILES));
5808 }
5809 
5810 static int
5811 lxpr_readdir_sys_net_coredir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5812 {
5813         ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_COREDIR);
5814         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_net_coredir,
5815             SYS_NET_COREDIRFILES));
5816 }
5817 
5818 static int
5819 lxpr_readdir_sys_vmdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5820 {
5821         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VMDIR);
5822         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_vmdir,
5823             SYS_VMDIRFILES));
5824 }
5825 
5826 static int
5827 lxpr_write_sys_net_core_somaxc(lxpr_node_t *lxpnp, struct uio *uio,
5828     struct cred *cr, caller_context_t *ct)
5829 {
5830         int error;
5831         int res = 0;
5832         size_t olen;
5833         char val[16];   /* big enough for a uint numeric string */
5834         netstack_t *ns;
5835         mod_prop_info_t *ptbl = NULL;
5836         mod_prop_info_t *pinfo = NULL;
5837 
5838         ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON);
5839 
5840         if (uio->uio_loffset != 0)
5841                 return (EINVAL);
5842 
5843         if (uio->uio_resid == 0)
5844                 return (0);
5845 
5846         olen = uio->uio_resid;
5847         if (olen > sizeof (val) - 1)
5848                 return (EINVAL);
5849 
5850         bzero(val, sizeof (val));
5851         error = uiomove(val, olen, UIO_WRITE, uio);
5852         if (error != 0)
5853                 return (error);
5854 
5855         if (val[olen - 1] == '\n')
5856                 val[olen - 1] = '\0';
5857 
5858         if (val[0] == '\0') /* no input */
5859                 return (EINVAL);
5860 
5861         ns = netstack_get_current();
5862         if (ns == NULL)
5863                 return (EINVAL);
5864 
5865         ptbl = ns->netstack_tcp->tcps_propinfo_tbl;
5866         pinfo = mod_prop_lookup(ptbl, "_conn_req_max_q", MOD_PROTO_TCP);
5867         if (pinfo == NULL || pinfo->mpi_setf(ns, cr, pinfo, NULL, val, 0) != 0)
5868                 res = EINVAL;
5869 
5870         netstack_rele(ns);
5871         return (res);
5872 }
5873 
5874 /* ARGSUSED */
5875 static int
5876 lxpr_write_sys_kernel_corepatt(lxpr_node_t *lxpnp, struct uio *uio,
5877     struct cred *cr, caller_context_t *ct)
5878 {
5879         zone_t *zone = curproc->p_zone;
5880         struct core_globals *cg;
5881         refstr_t *rp, *nrp;
5882         corectl_path_t *ccp;
5883         char val[MAXPATHLEN];
5884         char valtr[MAXPATHLEN];
5885         size_t olen;
5886         int error;
5887 
5888         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT);
5889 
5890         cg = zone_getspecific(core_zone_key, zone);
5891         ASSERT(cg != NULL);
5892 
5893         if (secpolicy_coreadm(cr) != 0)
5894                 return (EPERM);
5895 
5896         if (uio->uio_loffset != 0)
5897                 return (EINVAL);
5898 
5899         if (uio->uio_resid == 0)
5900                 return (0);
5901 
5902         olen = uio->uio_resid;
5903         if (olen > sizeof (val) - 1)
5904                 return (EINVAL);
5905 
5906         bzero(val, sizeof (val));
5907         error = uiomove(val, olen, UIO_WRITE, uio);
5908         if (error != 0)
5909                 return (error);
5910 
5911         if (val[olen - 1] == '\n')
5912                 val[olen - 1] = '\0';
5913 
5914         if (val[0] == '|')
5915                 return (EINVAL);
5916 
5917         if ((error = lxpr_core_path_l2s(val, valtr, sizeof (valtr))) != 0)
5918                 return (error);
5919 
5920         nrp = refstr_alloc(valtr);
5921 
5922         ccp = cg->core_default_path;
5923         mutex_enter(&ccp->ccp_mtx);
5924         rp = ccp->ccp_path;
5925         refstr_hold((ccp->ccp_path = nrp));
5926         cg->core_options |= CC_PROCESS_PATH;
5927         mutex_exit(&ccp->ccp_mtx);
5928 
5929         if (rp != NULL)
5930                 refstr_rele(rp);
5931 
5932         return (0);
5933 }
5934 
5935 /*
5936  * lxpr_readlink(): Vnode operation for VOP_READLINK()
5937  */
5938 /* ARGSUSED */
5939 static int
5940 lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
5941 {
5942         char bp[MAXPATHLEN + 1];
5943         size_t buflen = sizeof (bp);
5944         lxpr_node_t *lxpnp = VTOLXP(vp);
5945         vnode_t *rvp = lxpnp->lxpr_realvp;
5946         pid_t pid;
5947         int error = 0;
5948 
5949         /*
5950          * Linux does something very "clever" for /proc/<pid>/fd/<num> entries.
5951          * Open FDs are represented as symlinks, the link contents
5952          * corresponding to the open resource.  For plain files or devices,
5953          * this isn't absurd since one can dereference the symlink to query
5954          * the underlying resource.  For sockets or pipes, it becomes ugly in a
5955          * hurry.  To maintain this human-readable output, those FD symlinks
5956          * point to bogus targets such as "socket:[<inodenum>]".  This requires
5957          * circumventing vfs since the stat/lstat behavior on those FD entries
5958          * will be unusual. (A stat must retrieve information about the open
5959          * socket or pipe.  It cannot fail because the link contents point to
5960          * an absent file.)
5961          *
5962          * To accomplish this, lxpr_getnode returns an vnode typed VNON for FD
5963          * entries.  This bypasses code paths which would normally
5964          * short-circuit on symlinks and allows us to emulate the vfs behavior
5965          * expected by /proc consumers.
5966          */
5967         if (vp->v_type != VLNK && lxpnp->lxpr_type != LXPR_PID_FD_FD)
5968                 return (EINVAL);
5969 
5970         /* Try to produce a symlink name for anything that has a realvp */
5971         if (rvp != NULL) {
5972                 if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0)
5973                         return (error);
5974                 if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) {
5975                         /*
5976                          * Special handling possible for /proc/<pid>/fd/<num>
5977                          * Generate <type>:[<inode>] links, if allowed.
5978                          */
5979                         if (lxpnp->lxpr_type != LXPR_PID_FD_FD ||
5980                             lxpr_readlink_fdnode(lxpnp, bp, buflen) != 0) {
5981                                 return (error);
5982                         }
5983                 }
5984         } else {
5985                 switch (lxpnp->lxpr_type) {
5986                 case LXPR_SELF:
5987                         /*
5988                          * Convert pid to the Linux default of 1 if we're the
5989                          * zone's init process or 0 if zsched.
5990                          */
5991                         if (curproc->p_pid ==
5992                             curproc->p_zone->zone_proc_initpid) {
5993                                 pid = 1;
5994                         } else if (curproc->p_pid ==
5995                             curproc->p_zone->zone_zsched->p_pid) {
5996                                 pid = 0;
5997                         } else {
5998                                 pid = curproc->p_pid;
5999                         }
6000 
6001                         /*
6002                          * Don't need to check result as every possible int
6003                          * will fit within MAXPATHLEN bytes.
6004                          */
6005                         (void) snprintf(bp, buflen, "%d", pid);
6006                         break;
6007                 case LXPR_PID_CURDIR:
6008                 case LXPR_PID_ROOTDIR:
6009                 case LXPR_PID_EXE:
6010                         return (EACCES);
6011                 default:
6012                         /*
6013                          * Need to return error so that nothing thinks
6014                          * that the symlink is empty and hence "."
6015                          */
6016                         return (EINVAL);
6017                 }
6018         }
6019 
6020         /* copy the link data to user space */
6021         return (uiomove(bp, strlen(bp), UIO_READ, uiop));
6022 }
6023 
6024 
6025 /*
6026  * lxpr_inactive(): Vnode operation for VOP_INACTIVE()
6027  * Vnode is no longer referenced, deallocate the file
6028  * and all its resources.
6029  */
6030 /* ARGSUSED */
6031 static void
6032 lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
6033 {
6034         lxpr_freenode(VTOLXP(vp));
6035 }
6036 
6037 /*
6038  * lxpr_sync(): Vnode operation for VOP_SYNC()
6039  */
6040 static int
6041 lxpr_sync()
6042 {
6043         /*
6044          * Nothing to sync but this function must never fail
6045          */
6046         return (0);
6047 }
6048 
6049 /*
6050  * lxpr_cmp(): Vnode operation for VOP_CMP()
6051  */
6052 static int
6053 lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
6054 {
6055         vnode_t *rvp;
6056 
6057         while (vn_matchops(vp1, lxpr_vnodeops) &&
6058             (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) {
6059                 vp1 = rvp;
6060         }
6061 
6062         while (vn_matchops(vp2, lxpr_vnodeops) &&
6063             (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) {
6064                 vp2 = rvp;
6065         }
6066 
6067         if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
6068                 return (vp1 == vp2);
6069         return (VOP_CMP(vp1, vp2, ct));
6070 }
6071 
6072 /*
6073  * lxpr_realvp(): Vnode operation for VOP_REALVP()
6074  */
6075 static int
6076 lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
6077 {
6078         vnode_t *rvp;
6079 
6080         if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
6081                 vp = rvp;
6082                 if (VOP_REALVP(vp, &rvp, ct) == 0)
6083                         vp = rvp;
6084         }
6085 
6086         *vpp = vp;
6087         return (0);
6088 }
6089 
6090 static int
6091 lxpr_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
6092     caller_context_t *ct)
6093 {
6094         lxpr_node_t     *lxpnp = VTOLXP(vp);
6095         lxpr_nodetype_t type = lxpnp->lxpr_type;
6096 
6097         switch (type) {
6098         case LXPR_SYS_KERNEL_COREPATT:
6099                 return (lxpr_write_sys_kernel_corepatt(lxpnp, uiop, cr, ct));
6100         case LXPR_SYS_NET_CORE_SOMAXCON:
6101                 return (lxpr_write_sys_net_core_somaxc(lxpnp, uiop, cr, ct));
6102 
6103         default:
6104                 /* pretend we wrote the whole thing */
6105                 uiop->uio_offset += uiop->uio_resid;
6106                 uiop->uio_resid = 0;
6107                 return (0);
6108         }
6109 }
6110 
6111 /*
6112  * We need to allow open with O_CREAT for the oom_score_adj file.
6113  */
6114 /*ARGSUSED7*/
6115 static int
6116 lxpr_create(struct vnode *dvp, char *nm, struct vattr *vap,
6117     enum vcexcl exclusive, int mode, struct vnode **vpp, struct cred *cred,
6118     int flag, caller_context_t *ct, vsecattr_t *vsecp)
6119 {
6120         lxpr_node_t *lxpnp = VTOLXP(dvp);
6121         lxpr_nodetype_t type = lxpnp->lxpr_type;
6122         vnode_t *vp = NULL;
6123         int error;
6124 
6125         ASSERT(type < LXPR_NFILES);
6126 
6127         /*
6128          * restrict create permission to owner or root
6129          */
6130         if ((error = lxpr_access(dvp, VEXEC, 0, cred, ct)) != 0) {
6131                 return (error);
6132         }
6133 
6134         if (*nm == '\0')
6135                 return (EPERM);
6136 
6137         if (dvp->v_type != VDIR)
6138                 return (EPERM);
6139 
6140         if (exclusive == EXCL)
6141                 return (EEXIST);
6142 
6143         /*
6144          * We're currently restricting O_CREAT to:
6145          * - /proc/<pid>/fd/<num>
6146          * - /proc/<pid>/oom_score_adj
6147          * - /proc/<pid>/task/<tid>/fd/<num>
6148          * - /proc/<pid>/task/<tid>/oom_score_adj
6149          * - /proc/sys/kernel/core_pattern
6150          * - /proc/sys/net/core/somaxconn
6151          * - /proc/sys/vm/overcommit_memory
6152          * - /proc/sys/vm/swappiness
6153          */
6154         switch (type) {
6155         case LXPR_PIDDIR:
6156         case LXPR_PID_TASK_IDDIR:
6157                 if (strcmp(nm, "oom_score_adj") == 0) {
6158                         proc_t *p;
6159                         p = lxpr_lock(lxpnp->lxpr_pid);
6160                         if (p != NULL) {
6161                                 vp = lxpr_lookup_common(dvp, nm, p, piddir,
6162                                     PIDDIRFILES);
6163                         }
6164                         lxpr_unlock(p);
6165                 }
6166                 break;
6167 
6168         case LXPR_SYS_NET_COREDIR:
6169                 if (strcmp(nm, "somaxconn") == 0) {
6170                         vp = lxpr_lookup_common(dvp, nm, NULL, sys_net_coredir,
6171                             SYS_NET_COREDIRFILES);
6172                 }
6173                 break;
6174 
6175         case LXPR_SYS_KERNELDIR:
6176                 if (strcmp(nm, "core_pattern") == 0) {
6177                         vp = lxpr_lookup_common(dvp, nm, NULL, sys_kerneldir,
6178                             SYS_KERNELDIRFILES);
6179                 }
6180                 break;
6181 
6182         case LXPR_SYS_VMDIR:
6183                 if (strcmp(nm, "overcommit_memory") == 0 ||
6184                     strcmp(nm, "swappiness") == 0) {
6185                         vp = lxpr_lookup_common(dvp, nm, NULL, sys_vmdir,
6186                             SYS_VMDIRFILES);
6187                 }
6188                 break;
6189 
6190         case LXPR_PID_FDDIR:
6191         case LXPR_PID_TID_FDDIR:
6192                 vp = lxpr_lookup_fdnode(dvp, nm);
6193                 break;
6194 
6195         default:
6196                 vp = NULL;
6197                 break;
6198         }
6199 
6200         if (vp != NULL) {
6201                 /* Creating an existing file, allow it for regular files. */
6202                 if (vp->v_type == VDIR)
6203                         return (EISDIR);
6204 
6205                 /* confirm permissions against existing file */
6206                 if ((error = lxpr_access(vp, mode, 0, cred, ct)) != 0) {
6207                         VN_RELE(vp);
6208                         return (error);
6209                 }
6210 
6211                 *vpp = vp;
6212                 return (0);
6213         }
6214 
6215         /*
6216          * Linux proc does not allow creation of addition, non-subsystem
6217          * specific files inside the hierarchy.  ENOENT is tossed when such
6218          * actions are attempted.
6219          */
6220         return (ENOENT);
6221 }