1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2016 Joyent, Inc.
  25  */
  26 
  27 /*
  28  * lx_proc -- a Linux-compatible /proc for the LX brand
  29  *
  30  * We have -- confusingly -- two implementations of Linux /proc.  One is to
  31  * support native (but Linux-borne) programs that wish to view the native
  32  * system through the Linux /proc model; the other -- this one -- is to
  33  * support Linux binaries via the LX brand.  These two implementations differ
  34  * greatly in their aspirations (and their willingness to bend the truth
  35  * of the system to accommodate those aspirations); they should not be unified.
  36  */
  37 
  38 #include <sys/cpupart.h>
  39 #include <sys/cpuvar.h>
  40 #include <sys/session.h>
  41 #include <sys/vmparam.h>
  42 #include <sys/mman.h>
  43 #include <vm/rm.h>
  44 #include <vm/seg_vn.h>
  45 #include <sys/sdt.h>
  46 #include <lx_signum.h>
  47 #include <sys/strlog.h>
  48 #include <sys/stropts.h>
  49 #include <sys/cmn_err.h>
  50 #include <sys/lx_brand.h>
  51 #include <lx_auxv.h>
  52 #include <sys/x86_archext.h>
  53 #include <sys/archsystm.h>
  54 #include <sys/fp.h>
  55 #include <sys/pool_pset.h>
  56 #include <sys/pset.h>
  57 #include <sys/zone.h>
  58 #include <sys/pghw.h>
  59 #include <sys/vfs_opreg.h>
  60 #include <sys/param.h>
  61 #include <sys/utsname.h>
  62 #include <sys/rctl.h>
  63 #include <sys/kstat.h>
  64 #include <sys/lx_misc.h>
  65 #include <sys/brand.h>
  66 #include <sys/cred_impl.h>
  67 #include <sys/tihdr.h>
  68 #include <sys/corectl.h>
  69 #include <inet/ip.h>
  70 #include <inet/ip_ire.h>
  71 #include <inet/ip6.h>
  72 #include <inet/ip_if.h>
  73 #include <inet/tcp.h>
  74 #include <inet/tcp_impl.h>
  75 #include <inet/udp_impl.h>
  76 #include <inet/ipclassifier.h>
  77 #include <sys/socketvar.h>
  78 #include <fs/sockfs/socktpi.h>
  79 
  80 /* Dependent on procfs */
  81 extern kthread_t *prchoose(proc_t *);
  82 extern int prreadargv(proc_t *, char *, size_t, size_t *);
  83 extern int prreadenvv(proc_t *, char *, size_t, size_t *);
  84 extern int prreadbuf(proc_t *, uintptr_t, uint8_t *, size_t, size_t *);
  85 
  86 #include "lx_proc.h"
  87 
  88 extern pgcnt_t swapfs_minfree;
  89 extern time_t boot_time;
  90 
  91 /*
  92  * Pointer to the vnode ops vector for this fs.
  93  * This is instantiated in lxprinit() in lxpr_vfsops.c
  94  */
  95 vnodeops_t *lxpr_vnodeops;
  96 
  97 static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *);
  98 static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *,
  99     caller_context_t *);
 100 static int lxpr_create(struct vnode *, char *, struct vattr *, enum vcexcl,
 101     int, struct vnode **, struct cred *, int, caller_context_t *, vsecattr_t *);
 102 static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
 103 static int lxpr_write(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
 104 static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *,
 105     caller_context_t *);
 106 static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *);
 107 static int lxpr_lookup(vnode_t *, char *, vnode_t **,
 108     pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *,
 109     pathname_t *);
 110 static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *,
 111     caller_context_t *, int);
 112 static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
 113 static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *);
 114 static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *);
 115 static int lxpr_sync(void);
 116 static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *);
 117 
 118 static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
 119 static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
 120 static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
 121 static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
 122 static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
 123 static vnode_t *lxpr_lookup_sysdir(vnode_t *, char *);
 124 static vnode_t *lxpr_lookup_sys_fsdir(vnode_t *, char *);
 125 static vnode_t *lxpr_lookup_sys_fs_inotifydir(vnode_t *, char *);
 126 static vnode_t *lxpr_lookup_sys_kerneldir(vnode_t *, char *);
 127 static vnode_t *lxpr_lookup_sys_kdir_randdir(vnode_t *, char *);
 128 static vnode_t *lxpr_lookup_sys_netdir(vnode_t *, char *);
 129 static vnode_t *lxpr_lookup_sys_net_coredir(vnode_t *, char *);
 130 static vnode_t *lxpr_lookup_sys_vmdir(vnode_t *, char *);
 131 static vnode_t *lxpr_lookup_taskdir(vnode_t *, char *);
 132 static vnode_t *lxpr_lookup_task_tid_dir(vnode_t *, char *);
 133 
 134 static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
 135 static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
 136 static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
 137 static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
 138 static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
 139 static int lxpr_readdir_sysdir(lxpr_node_t *, uio_t *, int *);
 140 static int lxpr_readdir_sys_fsdir(lxpr_node_t *, uio_t *, int *);
 141 static int lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *, uio_t *, int *);
 142 static int lxpr_readdir_sys_kerneldir(lxpr_node_t *, uio_t *, int *);
 143 static int lxpr_readdir_sys_kdir_randdir(lxpr_node_t *, uio_t *, int *);
 144 static int lxpr_readdir_sys_netdir(lxpr_node_t *, uio_t *, int *);
 145 static int lxpr_readdir_sys_net_coredir(lxpr_node_t *, uio_t *, int *);
 146 static int lxpr_readdir_sys_vmdir(lxpr_node_t *, uio_t *, int *);
 147 static int lxpr_readdir_taskdir(lxpr_node_t *, uio_t *, int *);
 148 static int lxpr_readdir_task_tid_dir(lxpr_node_t *, uio_t *, int *);
 149 
 150 static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
 151 static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
 152 static void lxpr_read_cgroups(lxpr_node_t *, lxpr_uiobuf_t *);
 153 static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
 154 static void lxpr_read_diskstats(lxpr_node_t *, lxpr_uiobuf_t *);
 155 static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
 156 static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
 157 static void lxpr_read_filesystems(lxpr_node_t *, lxpr_uiobuf_t *);
 158 static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *, ldi_handle_t);
 159 static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
 160 static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
 161 static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
 162 static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
 163 static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 164 static void lxpr_read_swaps(lxpr_node_t *, lxpr_uiobuf_t *);
 165 static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
 166 static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
 167 
 168 static void lxpr_read_pid_auxv(lxpr_node_t *, lxpr_uiobuf_t *);
 169 static void lxpr_read_pid_cgroup(lxpr_node_t *, lxpr_uiobuf_t *);
 170 static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
 171 static void lxpr_read_pid_comm(lxpr_node_t *, lxpr_uiobuf_t *);
 172 static void lxpr_read_pid_env(lxpr_node_t *, lxpr_uiobuf_t *);
 173 static void lxpr_read_pid_limits(lxpr_node_t *, lxpr_uiobuf_t *);
 174 static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
 175 static void lxpr_read_pid_mountinfo(lxpr_node_t *, lxpr_uiobuf_t *);
 176 static void lxpr_read_pid_oom_scr_adj(lxpr_node_t *, lxpr_uiobuf_t *);
 177 static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 178 static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
 179 static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
 180 
 181 static void lxpr_read_pid_tid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 182 static void lxpr_read_pid_tid_status(lxpr_node_t *, lxpr_uiobuf_t *);
 183 
 184 static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
 185 static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
 186 static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
 187 static void lxpr_read_net_if_inet6(lxpr_node_t *, lxpr_uiobuf_t *);
 188 static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
 189 static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
 190 static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
 191 static void lxpr_read_net_ipv6_route(lxpr_node_t *, lxpr_uiobuf_t *);
 192 static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
 193 static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
 194 static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
 195 static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
 196 static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
 197 static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
 198 static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
 199 static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
 200 static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 201 static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
 202 static void lxpr_read_net_tcp6(lxpr_node_t *, lxpr_uiobuf_t *);
 203 static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
 204 static void lxpr_read_net_udp6(lxpr_node_t *, lxpr_uiobuf_t *);
 205 static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
 206 static void lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *,
 207     lxpr_uiobuf_t *);
 208 static void lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *,
 209     lxpr_uiobuf_t *);
 210 static void lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *,
 211     lxpr_uiobuf_t *);
 212 static void lxpr_read_sys_kernel_caplcap(lxpr_node_t *, lxpr_uiobuf_t *);
 213 static void lxpr_read_sys_kernel_corepatt(lxpr_node_t *, lxpr_uiobuf_t *);
 214 static void lxpr_read_sys_kernel_hostname(lxpr_node_t *, lxpr_uiobuf_t *);
 215 static void lxpr_read_sys_kernel_msgmni(lxpr_node_t *, lxpr_uiobuf_t *);
 216 static void lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *, lxpr_uiobuf_t *);
 217 static void lxpr_read_sys_kernel_osrel(lxpr_node_t *, lxpr_uiobuf_t *);
 218 static void lxpr_read_sys_kernel_pid_max(lxpr_node_t *, lxpr_uiobuf_t *);
 219 static void lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *, lxpr_uiobuf_t *);
 220 static void lxpr_read_sys_kernel_sem(lxpr_node_t *, lxpr_uiobuf_t *);
 221 static void lxpr_read_sys_kernel_shmmax(lxpr_node_t *, lxpr_uiobuf_t *);
 222 static void lxpr_read_sys_kernel_shmmni(lxpr_node_t *, lxpr_uiobuf_t *);
 223 static void lxpr_read_sys_kernel_threads_max(lxpr_node_t *, lxpr_uiobuf_t *);
 224 static void lxpr_read_sys_net_core_somaxc(lxpr_node_t *, lxpr_uiobuf_t *);
 225 static void lxpr_read_sys_vm_minfr_kb(lxpr_node_t *, lxpr_uiobuf_t *);
 226 static void lxpr_read_sys_vm_nhpages(lxpr_node_t *, lxpr_uiobuf_t *);
 227 static void lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *, lxpr_uiobuf_t *);
 228 static void lxpr_read_sys_vm_swappiness(lxpr_node_t *, lxpr_uiobuf_t *);
 229 
 230 static int lxpr_write_sys_net_core_somaxc(lxpr_node_t *, uio_t *, cred_t *,
 231     caller_context_t *);
 232 static int lxpr_write_sys_kernel_corepatt(lxpr_node_t *, uio_t *, cred_t *,
 233     caller_context_t *);
 234 
 235 /*
 236  * Simple conversion
 237  */
 238 #define btok(x) ((x) >> 10)                       /* bytes to kbytes */
 239 #define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */
 240 
 241 #define ttolxlwp(t)     ((struct lx_lwp_data *)ttolwpbrand(t))
 242 
 243 extern rctl_hndl_t rc_process_semmsl;
 244 extern rctl_hndl_t rc_process_semopm;
 245 extern rctl_hndl_t rc_zone_semmni;
 246 
 247 extern rctl_hndl_t rc_zone_msgmni;
 248 extern rctl_hndl_t rc_zone_shmmax;
 249 extern rctl_hndl_t rc_zone_shmmni;
 250 #define FOURGB  4294967295
 251 
 252 /*
 253  * The maximum length of the concatenation of argument vector strings we
 254  * will return to the user via the branded procfs. Likewise for the env vector.
 255  */
 256 int lxpr_maxargvlen = 4096;
 257 int lxpr_maxenvvlen = 4096;
 258 
 259 /*
 260  * The lx /proc vnode operations vector
 261  */
 262 const fs_operation_def_t lxpr_vnodeops_template[] = {
 263         VOPNAME_OPEN,           { .vop_open = lxpr_open },
 264         VOPNAME_CLOSE,          { .vop_close = lxpr_close },
 265         VOPNAME_READ,           { .vop_read = lxpr_read },
 266         VOPNAME_WRITE,          { .vop_read = lxpr_write },
 267         VOPNAME_GETATTR,        { .vop_getattr = lxpr_getattr },
 268         VOPNAME_ACCESS,         { .vop_access = lxpr_access },
 269         VOPNAME_LOOKUP,         { .vop_lookup = lxpr_lookup },
 270         VOPNAME_CREATE,         { .vop_create = lxpr_create },
 271         VOPNAME_READDIR,        { .vop_readdir = lxpr_readdir },
 272         VOPNAME_READLINK,       { .vop_readlink = lxpr_readlink },
 273         VOPNAME_FSYNC,          { .error = lxpr_sync },
 274         VOPNAME_SEEK,           { .error = lxpr_sync },
 275         VOPNAME_INACTIVE,       { .vop_inactive = lxpr_inactive },
 276         VOPNAME_CMP,            { .vop_cmp = lxpr_cmp },
 277         VOPNAME_REALVP,         { .vop_realvp = lxpr_realvp },
 278         NULL,                   NULL
 279 };
 280 
 281 
 282 /*
 283  * file contents of an lx /proc directory.
 284  */
 285 static lxpr_dirent_t lx_procdir[] = {
 286         { LXPR_CGROUPS,         "cgroups" },
 287         { LXPR_CMDLINE,         "cmdline" },
 288         { LXPR_CPUINFO,         "cpuinfo" },
 289         { LXPR_DEVICES,         "devices" },
 290         { LXPR_DISKSTATS,       "diskstats" },
 291         { LXPR_DMA,             "dma" },
 292         { LXPR_FILESYSTEMS,     "filesystems" },
 293         { LXPR_INTERRUPTS,      "interrupts" },
 294         { LXPR_IOPORTS,         "ioports" },
 295         { LXPR_KCORE,           "kcore" },
 296         { LXPR_KMSG,            "kmsg" },
 297         { LXPR_LOADAVG,         "loadavg" },
 298         { LXPR_MEMINFO,         "meminfo" },
 299         { LXPR_MODULES,         "modules" },
 300         { LXPR_MOUNTS,          "mounts" },
 301         { LXPR_NETDIR,          "net" },
 302         { LXPR_PARTITIONS,      "partitions" },
 303         { LXPR_SELF,            "self" },
 304         { LXPR_STAT,            "stat" },
 305         { LXPR_SWAPS,           "swaps" },
 306         { LXPR_SYSDIR,          "sys" },
 307         { LXPR_UPTIME,          "uptime" },
 308         { LXPR_VERSION,         "version" }
 309 };
 310 
 311 #define PROCDIRFILES    (sizeof (lx_procdir) / sizeof (lx_procdir[0]))
 312 
 313 /*
 314  * Contents of an lx /proc/<pid> directory.
 315  */
 316 static lxpr_dirent_t piddir[] = {
 317         { LXPR_PID_AUXV,        "auxv" },
 318         { LXPR_PID_CGROUP,      "cgroup" },
 319         { LXPR_PID_CMDLINE,     "cmdline" },
 320         { LXPR_PID_COMM,        "comm" },
 321         { LXPR_PID_CPU,         "cpu" },
 322         { LXPR_PID_CURDIR,      "cwd" },
 323         { LXPR_PID_ENV,         "environ" },
 324         { LXPR_PID_EXE,         "exe" },
 325         { LXPR_PID_LIMITS,      "limits" },
 326         { LXPR_PID_MAPS,        "maps" },
 327         { LXPR_PID_MEM,         "mem" },
 328         { LXPR_PID_MOUNTINFO,   "mountinfo" },
 329         { LXPR_PID_OOM_SCR_ADJ, "oom_score_adj" },
 330         { LXPR_PID_ROOTDIR,     "root" },
 331         { LXPR_PID_STAT,        "stat" },
 332         { LXPR_PID_STATM,       "statm" },
 333         { LXPR_PID_STATUS,      "status" },
 334         { LXPR_PID_TASKDIR,     "task" },
 335         { LXPR_PID_FDDIR,       "fd" }
 336 };
 337 
 338 #define PIDDIRFILES     (sizeof (piddir) / sizeof (piddir[0]))
 339 
 340 /*
 341  * Contents of an lx /proc/<pid>/task/<tid> directory.
 342  */
 343 static lxpr_dirent_t tiddir[] = {
 344         { LXPR_PID_TID_AUXV,    "auxv" },
 345         { LXPR_PID_CGROUP,      "cgroup" },
 346         { LXPR_PID_CMDLINE,     "cmdline" },
 347         { LXPR_PID_TID_COMM,    "comm" },
 348         { LXPR_PID_CPU,         "cpu" },
 349         { LXPR_PID_CURDIR,      "cwd" },
 350         { LXPR_PID_ENV,         "environ" },
 351         { LXPR_PID_EXE,         "exe" },
 352         { LXPR_PID_LIMITS,      "limits" },
 353         { LXPR_PID_MAPS,        "maps" },
 354         { LXPR_PID_MEM,         "mem" },
 355         { LXPR_PID_MOUNTINFO,   "mountinfo" },
 356         { LXPR_PID_TID_OOM_SCR_ADJ,     "oom_score_adj" },
 357         { LXPR_PID_ROOTDIR,     "root" },
 358         { LXPR_PID_TID_STAT,    "stat" },
 359         { LXPR_PID_STATM,       "statm" },
 360         { LXPR_PID_TID_STATUS,  "status" },
 361         { LXPR_PID_FDDIR,       "fd" }
 362 };
 363 
 364 #define TIDDIRFILES     (sizeof (tiddir) / sizeof (tiddir[0]))
 365 
 366 #define LX_RLIM_INFINITY        0xFFFFFFFFFFFFFFFF
 367 
 368 #define RCTL_INFINITE(x) \
 369         ((x->rcv_flagaction & RCTL_LOCAL_MAXIMAL) && \
 370         (x->rcv_flagaction & RCTL_GLOBAL_INFINITE))
 371 
 372 typedef struct lxpr_rlimtab {
 373         char    *rlim_name;     /* limit name */
 374         char    *rlim_unit;     /* limit unit */
 375         char    *rlim_rctl;     /* rctl source */
 376 } lxpr_rlimtab_t;
 377 
 378 static lxpr_rlimtab_t lxpr_rlimtab[] = {
 379         { "Max cpu time",       "seconds",      "process.max-cpu-time" },
 380         { "Max file size",      "bytes",        "process.max-file-size" },
 381         { "Max data size",      "bytes",        "process.max-data-size" },
 382         { "Max stack size",     "bytes",        "process.max-stack-size" },
 383         { "Max core file size", "bytes",        "process.max-core-size" },
 384         { "Max resident set",   "bytes",        "zone.max-physical-memory" },
 385         { "Max processes",      "processes",    "zone.max-lwps" },
 386         { "Max open files",     "files",        "process.max-file-descriptor" },
 387         { "Max locked memory",  "bytes",        "zone.max-locked-memory" },
 388         { "Max address space",  "bytes",        "process.max-address-space" },
 389         { "Max file locks",     "locks",        NULL },
 390         { "Max pending signals",        "signals",
 391                 "process.max-sigqueue-size" },
 392         { "Max msgqueue size",  "bytes",        "process.max-msg-messages" },
 393         { NULL, NULL, NULL }
 394 };
 395 
 396 
 397 /*
 398  * contents of lx /proc/net directory
 399  */
 400 static lxpr_dirent_t netdir[] = {
 401         { LXPR_NET_ARP,         "arp" },
 402         { LXPR_NET_DEV,         "dev" },
 403         { LXPR_NET_DEV_MCAST,   "dev_mcast" },
 404         { LXPR_NET_IF_INET6,    "if_inet6" },
 405         { LXPR_NET_IGMP,        "igmp" },
 406         { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" },
 407         { LXPR_NET_IP_MR_VIF,   "ip_mr_vif" },
 408         { LXPR_NET_IPV6_ROUTE,  "ipv6_route" },
 409         { LXPR_NET_MCFILTER,    "mcfilter" },
 410         { LXPR_NET_NETSTAT,     "netstat" },
 411         { LXPR_NET_RAW,         "raw" },
 412         { LXPR_NET_ROUTE,       "route" },
 413         { LXPR_NET_RPC,         "rpc" },
 414         { LXPR_NET_RT_CACHE,    "rt_cache" },
 415         { LXPR_NET_SOCKSTAT,    "sockstat" },
 416         { LXPR_NET_SNMP,        "snmp" },
 417         { LXPR_NET_STAT,        "stat" },
 418         { LXPR_NET_TCP,         "tcp" },
 419         { LXPR_NET_TCP6,        "tcp6" },
 420         { LXPR_NET_UDP,         "udp" },
 421         { LXPR_NET_UDP6,        "udp6" },
 422         { LXPR_NET_UNIX,        "unix" }
 423 };
 424 
 425 #define NETDIRFILES     (sizeof (netdir) / sizeof (netdir[0]))
 426 
 427 /*
 428  * contents of /proc/sys directory
 429  */
 430 static lxpr_dirent_t sysdir[] = {
 431         { LXPR_SYS_FSDIR,       "fs" },
 432         { LXPR_SYS_KERNELDIR,   "kernel" },
 433         { LXPR_SYS_NETDIR,      "net" },
 434         { LXPR_SYS_VMDIR,       "vm" },
 435 };
 436 
 437 #define SYSDIRFILES     (sizeof (sysdir) / sizeof (sysdir[0]))
 438 
 439 /*
 440  * contents of /proc/sys/fs directory
 441  */
 442 static lxpr_dirent_t sys_fsdir[] = {
 443         { LXPR_SYS_FS_INOTIFYDIR,       "inotify" },
 444 };
 445 
 446 #define SYS_FSDIRFILES (sizeof (sys_fsdir) / sizeof (sys_fsdir[0]))
 447 
 448 /*
 449  * contents of /proc/sys/fs/inotify directory
 450  */
 451 static lxpr_dirent_t sys_fs_inotifydir[] = {
 452         { LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS,        "max_queued_events" },
 453         { LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES,       "max_user_instances" },
 454         { LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES,         "max_user_watches" },
 455 };
 456 
 457 #define SYS_FS_INOTIFYDIRFILES \
 458         (sizeof (sys_fs_inotifydir) / sizeof (sys_fs_inotifydir[0]))
 459 
 460 /*
 461  * contents of /proc/sys/kernel directory
 462  */
 463 static lxpr_dirent_t sys_kerneldir[] = {
 464         { LXPR_SYS_KERNEL_CAPLCAP,      "cap_last_cap" },
 465         { LXPR_SYS_KERNEL_COREPATT,     "core_pattern" },
 466         { LXPR_SYS_KERNEL_HOSTNAME,     "hostname" },
 467         { LXPR_SYS_KERNEL_MSGMNI,       "msgmni" },
 468         { LXPR_SYS_KERNEL_NGROUPS_MAX,  "ngroups_max" },
 469         { LXPR_SYS_KERNEL_OSREL,        "osrelease" },
 470         { LXPR_SYS_KERNEL_PID_MAX,      "pid_max" },
 471         { LXPR_SYS_KERNEL_RANDDIR,      "random" },
 472         { LXPR_SYS_KERNEL_SEM,          "sem" },
 473         { LXPR_SYS_KERNEL_SHMMAX,       "shmmax" },
 474         { LXPR_SYS_KERNEL_SHMMNI,       "shmmni" },
 475         { LXPR_SYS_KERNEL_THREADS_MAX,  "threads-max" },
 476 };
 477 
 478 #define SYS_KERNELDIRFILES (sizeof (sys_kerneldir) / sizeof (sys_kerneldir[0]))
 479 
 480 /*
 481  * contents of /proc/sys/kernel/random directory
 482  */
 483 static lxpr_dirent_t sys_randdir[] = {
 484         { LXPR_SYS_KERNEL_RAND_BOOTID,  "boot_id" },
 485 };
 486 
 487 #define SYS_RANDDIRFILES (sizeof (sys_randdir) / sizeof (sys_randdir[0]))
 488 
 489 /*
 490  * contents of /proc/sys/net directory
 491  */
 492 static lxpr_dirent_t sys_netdir[] = {
 493         { LXPR_SYS_NET_COREDIR,         "core" },
 494 };
 495 
 496 #define SYS_NETDIRFILES (sizeof (sys_netdir) / sizeof (sys_netdir[0]))
 497 
 498 /*
 499  * contents of /proc/sys/net/core directory
 500  */
 501 static lxpr_dirent_t sys_net_coredir[] = {
 502         { LXPR_SYS_NET_CORE_SOMAXCON,   "somaxconn" },
 503 };
 504 
 505 #define SYS_NET_COREDIRFILES \
 506         (sizeof (sys_net_coredir) / sizeof (sys_net_coredir[0]))
 507 
 508 /*
 509  * contents of /proc/sys/vm directory
 510  */
 511 static lxpr_dirent_t sys_vmdir[] = {
 512         { LXPR_SYS_VM_MINFR_KB,         "min_free_kbytes" },
 513         { LXPR_SYS_VM_NHUGEP,           "nr_hugepages" },
 514         { LXPR_SYS_VM_OVERCOMMIT_MEM,   "overcommit_memory" },
 515         { LXPR_SYS_VM_SWAPPINESS,       "swappiness" },
 516 };
 517 
 518 #define SYS_VMDIRFILES (sizeof (sys_vmdir) / sizeof (sys_vmdir[0]))
 519 
 520 /*
 521  * lxpr_open(): Vnode operation for VOP_OPEN()
 522  */
 523 static int
 524 lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
 525 {
 526         vnode_t         *vp = *vpp;
 527         lxpr_node_t     *lxpnp = VTOLXP(vp);
 528         lxpr_nodetype_t type = lxpnp->lxpr_type;
 529         vnode_t         *rvp;
 530         int             error = 0;
 531 
 532         if (flag & FWRITE) {
 533                 /* Restrict writes to certain files */
 534                 switch (type) {
 535                 case LXPR_PID_OOM_SCR_ADJ:
 536                 case LXPR_PID_TID_OOM_SCR_ADJ:
 537                 case LXPR_SYS_KERNEL_COREPATT:
 538                 case LXPR_SYS_NET_CORE_SOMAXCON:
 539                 case LXPR_SYS_VM_OVERCOMMIT_MEM:
 540                 case LXPR_SYS_VM_SWAPPINESS:
 541                 case LXPR_PID_FD_FD:
 542                 case LXPR_PID_TID_FD_FD:
 543                         break;
 544                 default:
 545                         return (EPERM);
 546                 }
 547         }
 548 
 549         /*
 550          * If we are opening an underlying file only allow regular files,
 551          * fifos or sockets; reject the open for anything else.
 552          * Just do it if we are opening the current or root directory.
 553          */
 554         if (lxpnp->lxpr_realvp != NULL) {
 555                 rvp = lxpnp->lxpr_realvp;
 556 
 557                 if (type == LXPR_PID_FD_FD && rvp->v_type != VREG &&
 558                     rvp->v_type != VFIFO && rvp->v_type != VSOCK) {
 559                         error = EACCES;
 560                 } else {
 561                         if (type == LXPR_PID_FD_FD && rvp->v_type == VFIFO) {
 562                                 /*
 563                                  * This flag lets the fifo open know that
 564                                  * we're using proc/fd to open a fd which we
 565                                  * already have open. Otherwise, the fifo might
 566                                  * reject an open if the other end has closed.
 567                                  */
 568                                 flag |= FKLYR;
 569                         }
 570                         /*
 571                          * Need to hold rvp since VOP_OPEN() may release it.
 572                          */
 573                         VN_HOLD(rvp);
 574                         error = VOP_OPEN(&rvp, flag, cr, ct);
 575                         if (error) {
 576                                 VN_RELE(rvp);
 577                         } else {
 578                                 *vpp = rvp;
 579                                 VN_RELE(vp);
 580                         }
 581                 }
 582         }
 583 
 584         return (error);
 585 }
 586 
 587 
 588 /*
 589  * lxpr_close(): Vnode operation for VOP_CLOSE()
 590  */
 591 /* ARGSUSED */
 592 static int
 593 lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
 594     caller_context_t *ct)
 595 {
 596         lxpr_node_t     *lxpr = VTOLXP(vp);
 597         lxpr_nodetype_t type = lxpr->lxpr_type;
 598 
 599         /*
 600          * we should never get here because the close is done on the realvp
 601          * for these nodes
 602          */
 603         ASSERT(type != LXPR_PID_FD_FD &&
 604             type != LXPR_PID_CURDIR &&
 605             type != LXPR_PID_ROOTDIR &&
 606             type != LXPR_PID_EXE);
 607 
 608         return (0);
 609 }
 610 
 611 static void (*lxpr_read_function[LXPR_NFILES])() = {
 612         lxpr_read_isdir,                /* /proc                */
 613         lxpr_read_isdir,                /* /proc/<pid>            */
 614         lxpr_read_pid_auxv,             /* /proc/<pid>/auxv       */
 615         lxpr_read_pid_cgroup,           /* /proc/<pid>/cgroup     */
 616         lxpr_read_pid_cmdline,          /* /proc/<pid>/cmdline    */
 617         lxpr_read_pid_comm,             /* /proc/<pid>/comm       */
 618         lxpr_read_empty,                /* /proc/<pid>/cpu        */
 619         lxpr_read_invalid,              /* /proc/<pid>/cwd        */
 620         lxpr_read_pid_env,              /* /proc/<pid>/environ    */
 621         lxpr_read_invalid,              /* /proc/<pid>/exe        */
 622         lxpr_read_pid_limits,           /* /proc/<pid>/limits     */
 623         lxpr_read_pid_maps,             /* /proc/<pid>/maps       */
 624         lxpr_read_empty,                /* /proc/<pid>/mem        */
 625         lxpr_read_pid_mountinfo,        /* /proc/<pid>/mountinfo */
 626         lxpr_read_pid_oom_scr_adj,      /* /proc/<pid>/oom_score_adj */
 627         lxpr_read_invalid,              /* /proc/<pid>/root       */
 628         lxpr_read_pid_stat,             /* /proc/<pid>/stat       */
 629         lxpr_read_pid_statm,            /* /proc/<pid>/statm      */
 630         lxpr_read_pid_status,           /* /proc/<pid>/status     */
 631         lxpr_read_isdir,                /* /proc/<pid>/task       */
 632         lxpr_read_isdir,                /* /proc/<pid>/task/nn    */
 633         lxpr_read_isdir,                /* /proc/<pid>/fd */
 634         lxpr_read_fd,                   /* /proc/<pid>/fd/nn      */
 635         lxpr_read_pid_auxv,             /* /proc/<pid>/task/<tid>/auxv      */
 636         lxpr_read_pid_cgroup,           /* /proc/<pid>/task/<tid>/cgroup */
 637         lxpr_read_pid_cmdline,          /* /proc/<pid>/task/<tid>/cmdline */
 638         lxpr_read_pid_comm,             /* /proc/<pid>/task/<tid>/comm      */
 639         lxpr_read_empty,                /* /proc/<pid>/task/<tid>/cpu       */
 640         lxpr_read_invalid,              /* /proc/<pid>/task/<tid>/cwd       */
 641         lxpr_read_pid_env,              /* /proc/<pid>/task/<tid>/environ */
 642         lxpr_read_invalid,              /* /proc/<pid>/task/<tid>/exe       */
 643         lxpr_read_pid_limits,           /* /proc/<pid>/task/<tid>/limits */
 644         lxpr_read_pid_maps,             /* /proc/<pid>/task/<tid>/maps      */
 645         lxpr_read_empty,                /* /proc/<pid>/task/<tid>/mem       */
 646         lxpr_read_pid_mountinfo,        /* /proc/<pid>/task/<tid>/mountinfo */
 647         lxpr_read_pid_oom_scr_adj,      /* /proc/<pid>/task/<tid>/oom_scr_adj */
 648         lxpr_read_invalid,              /* /proc/<pid>/task/<tid>/root      */
 649         lxpr_read_pid_tid_stat,         /* /proc/<pid>/task/<tid>/stat      */
 650         lxpr_read_pid_statm,            /* /proc/<pid>/task/<tid>/statm     */
 651         lxpr_read_pid_tid_status,       /* /proc/<pid>/task/<tid>/status */
 652         lxpr_read_isdir,                /* /proc/<pid>/task/<tid>/fd        */
 653         lxpr_read_fd,                   /* /proc/<pid>/task/<tid>/fd/nn     */
 654         lxpr_read_cgroups,              /* /proc/cgroups        */
 655         lxpr_read_empty,                /* /proc/cmdline        */
 656         lxpr_read_cpuinfo,              /* /proc/cpuinfo        */
 657         lxpr_read_empty,                /* /proc/devices        */
 658         lxpr_read_diskstats,            /* /proc/diskstats      */
 659         lxpr_read_empty,                /* /proc/dma            */
 660         lxpr_read_filesystems,          /* /proc/filesystems    */
 661         lxpr_read_empty,                /* /proc/interrupts     */
 662         lxpr_read_empty,                /* /proc/ioports        */
 663         lxpr_read_empty,                /* /proc/kcore          */
 664         lxpr_read_invalid,              /* /proc/kmsg -- see lxpr_read() */
 665         lxpr_read_loadavg,              /* /proc/loadavg        */
 666         lxpr_read_meminfo,              /* /proc/meminfo        */
 667         lxpr_read_empty,                /* /proc/modules        */
 668         lxpr_read_mounts,               /* /proc/mounts         */
 669         lxpr_read_isdir,                /* /proc/net            */
 670         lxpr_read_net_arp,              /* /proc/net/arp        */
 671         lxpr_read_net_dev,              /* /proc/net/dev        */
 672         lxpr_read_net_dev_mcast,        /* /proc/net/dev_mcast  */
 673         lxpr_read_net_if_inet6,         /* /proc/net/if_inet6   */
 674         lxpr_read_net_igmp,             /* /proc/net/igmp       */
 675         lxpr_read_net_ip_mr_cache,      /* /proc/net/ip_mr_cache */
 676         lxpr_read_net_ip_mr_vif,        /* /proc/net/ip_mr_vif  */
 677         lxpr_read_net_ipv6_route,       /* /proc/net/ipv6_route */
 678         lxpr_read_net_mcfilter,         /* /proc/net/mcfilter   */
 679         lxpr_read_net_netstat,          /* /proc/net/netstat    */
 680         lxpr_read_net_raw,              /* /proc/net/raw        */
 681         lxpr_read_net_route,            /* /proc/net/route      */
 682         lxpr_read_net_rpc,              /* /proc/net/rpc        */
 683         lxpr_read_net_rt_cache,         /* /proc/net/rt_cache   */
 684         lxpr_read_net_sockstat,         /* /proc/net/sockstat   */
 685         lxpr_read_net_snmp,             /* /proc/net/snmp       */
 686         lxpr_read_net_stat,             /* /proc/net/stat       */
 687         lxpr_read_net_tcp,              /* /proc/net/tcp        */
 688         lxpr_read_net_tcp6,             /* /proc/net/tcp6       */
 689         lxpr_read_net_udp,              /* /proc/net/udp        */
 690         lxpr_read_net_udp6,             /* /proc/net/udp6       */
 691         lxpr_read_net_unix,             /* /proc/net/unix       */
 692         lxpr_read_partitions,           /* /proc/partitions     */
 693         lxpr_read_invalid,              /* /proc/self           */
 694         lxpr_read_stat,                 /* /proc/stat           */
 695         lxpr_read_swaps,                /* /proc/swaps          */
 696         lxpr_read_invalid,              /* /proc/sys            */
 697         lxpr_read_invalid,              /* /proc/sys/fs         */
 698         lxpr_read_invalid,              /* /proc/sys/fs/inotify */
 699         lxpr_read_sys_fs_inotify_max_queued_events, /* max_queued_events */
 700         lxpr_read_sys_fs_inotify_max_user_instances, /* max_user_instances */
 701         lxpr_read_sys_fs_inotify_max_user_watches, /* max_user_watches */
 702         lxpr_read_invalid,              /* /proc/sys/kernel     */
 703         lxpr_read_sys_kernel_caplcap,   /* /proc/sys/kernel/cap_last_cap */
 704         lxpr_read_sys_kernel_corepatt,  /* /proc/sys/kernel/core_pattern */
 705         lxpr_read_sys_kernel_hostname,  /* /proc/sys/kernel/hostname */
 706         lxpr_read_sys_kernel_msgmni,    /* /proc/sys/kernel/msgmni */
 707         lxpr_read_sys_kernel_ngroups_max, /* /proc/sys/kernel/ngroups_max */
 708         lxpr_read_sys_kernel_osrel,     /* /proc/sys/kernel/osrelease */
 709         lxpr_read_sys_kernel_pid_max,   /* /proc/sys/kernel/pid_max */
 710         lxpr_read_invalid,              /* /proc/sys/kernel/random */
 711         lxpr_read_sys_kernel_rand_bootid, /* /proc/sys/kernel/random/boot_id */
 712         lxpr_read_sys_kernel_sem,       /* /proc/sys/kernel/sem */
 713         lxpr_read_sys_kernel_shmmax,    /* /proc/sys/kernel/shmmax */
 714         lxpr_read_sys_kernel_shmmni,    /* /proc/sys/kernel/shmmni */
 715         lxpr_read_sys_kernel_threads_max, /* /proc/sys/kernel/threads-max */
 716         lxpr_read_invalid,              /* /proc/sys/net        */
 717         lxpr_read_invalid,              /* /proc/sys/net/core   */
 718         lxpr_read_sys_net_core_somaxc,  /* /proc/sys/net/core/somaxconn */
 719         lxpr_read_invalid,              /* /proc/sys/vm */
 720         lxpr_read_sys_vm_minfr_kb,      /* /proc/sys/vm/min_free_kbytes */
 721         lxpr_read_sys_vm_nhpages,       /* /proc/sys/vm/nr_hugepages */
 722         lxpr_read_sys_vm_overcommit_mem, /* /proc/sys/vm/overcommit_memory */
 723         lxpr_read_sys_vm_swappiness,    /* /proc/sys/vm/swappiness */
 724         lxpr_read_uptime,               /* /proc/uptime         */
 725         lxpr_read_version,              /* /proc/version        */
 726 };
 727 
 728 /*
 729  * Array of lookup functions, indexed by lx /proc file type.
 730  */
 731 static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
 732         lxpr_lookup_procdir,            /* /proc                */
 733         lxpr_lookup_piddir,             /* /proc/<pid>            */
 734         lxpr_lookup_not_a_dir,          /* /proc/<pid>/auxv       */
 735         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cgroup     */
 736         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cmdline    */
 737         lxpr_lookup_not_a_dir,          /* /proc/<pid>/comm       */
 738         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cpu        */
 739         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cwd        */
 740         lxpr_lookup_not_a_dir,          /* /proc/<pid>/environ    */
 741         lxpr_lookup_not_a_dir,          /* /proc/<pid>/exe        */
 742         lxpr_lookup_not_a_dir,          /* /proc/<pid>/limits     */
 743         lxpr_lookup_not_a_dir,          /* /proc/<pid>/maps       */
 744         lxpr_lookup_not_a_dir,          /* /proc/<pid>/mem        */
 745         lxpr_lookup_not_a_dir,          /* /proc/<pid>/mountinfo */
 746         lxpr_lookup_not_a_dir,          /* /proc/<pid>/oom_score_adj */
 747         lxpr_lookup_not_a_dir,          /* /proc/<pid>/root       */
 748         lxpr_lookup_not_a_dir,          /* /proc/<pid>/stat       */
 749         lxpr_lookup_not_a_dir,          /* /proc/<pid>/statm      */
 750         lxpr_lookup_not_a_dir,          /* /proc/<pid>/status     */
 751         lxpr_lookup_taskdir,            /* /proc/<pid>/task       */
 752         lxpr_lookup_task_tid_dir,       /* /proc/<pid>/task/nn    */
 753         lxpr_lookup_fddir,              /* /proc/<pid>/fd */
 754         lxpr_lookup_not_a_dir,          /* /proc/<pid>/fd/nn      */
 755         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/auxv      */
 756         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/cgroup */
 757         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/cmdline */
 758         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/comm      */
 759         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/cpu       */
 760         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/cwd       */
 761         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/environ */
 762         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/exe       */
 763         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/limits */
 764         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/maps      */
 765         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/mem       */
 766         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/mountinfo */
 767         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/oom_scr_adj */
 768         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/root      */
 769         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/stat      */
 770         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/statm     */
 771         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/status */
 772         lxpr_lookup_fddir,              /* /proc/<pid>/task/<tid>/fd        */
 773         lxpr_lookup_not_a_dir,          /* /proc/<pid>/task/<tid>/fd/nn     */
 774         lxpr_lookup_not_a_dir,          /* /proc/cgroups        */
 775         lxpr_lookup_not_a_dir,          /* /proc/cmdline        */
 776         lxpr_lookup_not_a_dir,          /* /proc/cpuinfo        */
 777         lxpr_lookup_not_a_dir,          /* /proc/devices        */
 778         lxpr_lookup_not_a_dir,          /* /proc/diskstats      */
 779         lxpr_lookup_not_a_dir,          /* /proc/dma            */
 780         lxpr_lookup_not_a_dir,          /* /proc/filesystems    */
 781         lxpr_lookup_not_a_dir,          /* /proc/interrupts     */
 782         lxpr_lookup_not_a_dir,          /* /proc/ioports        */
 783         lxpr_lookup_not_a_dir,          /* /proc/kcore          */
 784         lxpr_lookup_not_a_dir,          /* /proc/kmsg           */
 785         lxpr_lookup_not_a_dir,          /* /proc/loadavg        */
 786         lxpr_lookup_not_a_dir,          /* /proc/meminfo        */
 787         lxpr_lookup_not_a_dir,          /* /proc/modules        */
 788         lxpr_lookup_not_a_dir,          /* /proc/mounts         */
 789         lxpr_lookup_netdir,             /* /proc/net            */
 790         lxpr_lookup_not_a_dir,          /* /proc/net/arp        */
 791         lxpr_lookup_not_a_dir,          /* /proc/net/dev        */
 792         lxpr_lookup_not_a_dir,          /* /proc/net/dev_mcast  */
 793         lxpr_lookup_not_a_dir,          /* /proc/net/if_inet6   */
 794         lxpr_lookup_not_a_dir,          /* /proc/net/igmp       */
 795         lxpr_lookup_not_a_dir,          /* /proc/net/ip_mr_cache */
 796         lxpr_lookup_not_a_dir,          /* /proc/net/ip_mr_vif  */
 797         lxpr_lookup_not_a_dir,          /* /proc/net/ipv6_route */
 798         lxpr_lookup_not_a_dir,          /* /proc/net/mcfilter   */
 799         lxpr_lookup_not_a_dir,          /* /proc/net/netstat    */
 800         lxpr_lookup_not_a_dir,          /* /proc/net/raw        */
 801         lxpr_lookup_not_a_dir,          /* /proc/net/route      */
 802         lxpr_lookup_not_a_dir,          /* /proc/net/rpc        */
 803         lxpr_lookup_not_a_dir,          /* /proc/net/rt_cache   */
 804         lxpr_lookup_not_a_dir,          /* /proc/net/sockstat   */
 805         lxpr_lookup_not_a_dir,          /* /proc/net/snmp       */
 806         lxpr_lookup_not_a_dir,          /* /proc/net/stat       */
 807         lxpr_lookup_not_a_dir,          /* /proc/net/tcp        */
 808         lxpr_lookup_not_a_dir,          /* /proc/net/tcp6       */
 809         lxpr_lookup_not_a_dir,          /* /proc/net/udp        */
 810         lxpr_lookup_not_a_dir,          /* /proc/net/udp6       */
 811         lxpr_lookup_not_a_dir,          /* /proc/net/unix       */
 812         lxpr_lookup_not_a_dir,          /* /proc/partitions     */
 813         lxpr_lookup_not_a_dir,          /* /proc/self           */
 814         lxpr_lookup_not_a_dir,          /* /proc/stat           */
 815         lxpr_lookup_not_a_dir,          /* /proc/swaps          */
 816         lxpr_lookup_sysdir,             /* /proc/sys            */
 817         lxpr_lookup_sys_fsdir,          /* /proc/sys/fs         */
 818         lxpr_lookup_sys_fs_inotifydir,  /* /proc/sys/fs/inotify */
 819         lxpr_lookup_not_a_dir,          /* .../inotify/max_queued_events */
 820         lxpr_lookup_not_a_dir,          /* .../inotify/max_user_instances */
 821         lxpr_lookup_not_a_dir,          /* .../inotify/max_user_watches */
 822         lxpr_lookup_sys_kerneldir,      /* /proc/sys/kernel     */
 823         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/cap_last_cap */
 824         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/core_pattern */
 825         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/hostname */
 826         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/msgmni */
 827         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/ngroups_max */
 828         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/osrelease */
 829         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/pid_max */
 830         lxpr_lookup_sys_kdir_randdir,   /* /proc/sys/kernel/random */
 831         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/random/boot_id */
 832         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/sem */
 833         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/shmmax */
 834         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/shmmni */
 835         lxpr_lookup_not_a_dir,          /* /proc/sys/kernel/threads-max */
 836         lxpr_lookup_sys_netdir,         /* /proc/sys/net */
 837         lxpr_lookup_sys_net_coredir,    /* /proc/sys/net/core */
 838         lxpr_lookup_not_a_dir,          /* /proc/sys/net/core/somaxconn */
 839         lxpr_lookup_sys_vmdir,          /* /proc/sys/vm */
 840         lxpr_lookup_not_a_dir,          /* /proc/sys/vm/min_free_kbytes */
 841         lxpr_lookup_not_a_dir,          /* /proc/sys/vm/nr_hugepages */
 842         lxpr_lookup_not_a_dir,          /* /proc/sys/vm/overcommit_memory */
 843         lxpr_lookup_not_a_dir,          /* /proc/sys/vm/swappiness */
 844         lxpr_lookup_not_a_dir,          /* /proc/uptime         */
 845         lxpr_lookup_not_a_dir,          /* /proc/version        */
 846 };
 847 
 848 /*
 849  * Array of readdir functions, indexed by /proc file type.
 850  */
 851 static int (*lxpr_readdir_function[LXPR_NFILES])() = {
 852         lxpr_readdir_procdir,           /* /proc                */
 853         lxpr_readdir_piddir,            /* /proc/<pid>            */
 854         lxpr_readdir_not_a_dir,         /* /proc/<pid>/auxv       */
 855         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cgroup     */
 856         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cmdline    */
 857         lxpr_readdir_not_a_dir,         /* /proc/<pid>/comm       */
 858         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cpu        */
 859         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cwd        */
 860         lxpr_readdir_not_a_dir,         /* /proc/<pid>/environ    */
 861         lxpr_readdir_not_a_dir,         /* /proc/<pid>/exe        */
 862         lxpr_readdir_not_a_dir,         /* /proc/<pid>/limits     */
 863         lxpr_readdir_not_a_dir,         /* /proc/<pid>/maps       */
 864         lxpr_readdir_not_a_dir,         /* /proc/<pid>/mem        */
 865         lxpr_readdir_not_a_dir,         /* /proc/<pid>/mountinfo */
 866         lxpr_readdir_not_a_dir,         /* /proc/<pid>/oom_score_adj */
 867         lxpr_readdir_not_a_dir,         /* /proc/<pid>/root       */
 868         lxpr_readdir_not_a_dir,         /* /proc/<pid>/stat       */
 869         lxpr_readdir_not_a_dir,         /* /proc/<pid>/statm      */
 870         lxpr_readdir_not_a_dir,         /* /proc/<pid>/status     */
 871         lxpr_readdir_taskdir,           /* /proc/<pid>/task       */
 872         lxpr_readdir_task_tid_dir,      /* /proc/<pid>/task/nn    */
 873         lxpr_readdir_fddir,             /* /proc/<pid>/fd */
 874         lxpr_readdir_not_a_dir,         /* /proc/<pid>/fd/nn      */
 875         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/auxv      */
 876         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/cgroup */
 877         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/cmdline */
 878         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/comm      */
 879         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/cpu       */
 880         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/cwd       */
 881         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/environ */
 882         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/exe       */
 883         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/limits */
 884         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/maps      */
 885         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/mem       */
 886         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/mountinfo */
 887         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid/oom_scr_adj */
 888         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/root      */
 889         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/stat      */
 890         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/statm     */
 891         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/status */
 892         lxpr_readdir_fddir,             /* /proc/<pid>/task/<tid>/fd        */
 893         lxpr_readdir_not_a_dir,         /* /proc/<pid>/task/<tid>/fd/nn     */
 894         lxpr_readdir_not_a_dir,         /* /proc/cgroups        */
 895         lxpr_readdir_not_a_dir,         /* /proc/cmdline        */
 896         lxpr_readdir_not_a_dir,         /* /proc/cpuinfo        */
 897         lxpr_readdir_not_a_dir,         /* /proc/devices        */
 898         lxpr_readdir_not_a_dir,         /* /proc/diskstats      */
 899         lxpr_readdir_not_a_dir,         /* /proc/dma            */
 900         lxpr_readdir_not_a_dir,         /* /proc/filesystems    */
 901         lxpr_readdir_not_a_dir,         /* /proc/interrupts     */
 902         lxpr_readdir_not_a_dir,         /* /proc/ioports        */
 903         lxpr_readdir_not_a_dir,         /* /proc/kcore          */
 904         lxpr_readdir_not_a_dir,         /* /proc/kmsg           */
 905         lxpr_readdir_not_a_dir,         /* /proc/loadavg        */
 906         lxpr_readdir_not_a_dir,         /* /proc/meminfo        */
 907         lxpr_readdir_not_a_dir,         /* /proc/modules        */
 908         lxpr_readdir_not_a_dir,         /* /proc/mounts         */
 909         lxpr_readdir_netdir,            /* /proc/net            */
 910         lxpr_readdir_not_a_dir,         /* /proc/net/arp        */
 911         lxpr_readdir_not_a_dir,         /* /proc/net/dev        */
 912         lxpr_readdir_not_a_dir,         /* /proc/net/dev_mcast  */
 913         lxpr_readdir_not_a_dir,         /* /proc/net/if_inet6   */
 914         lxpr_readdir_not_a_dir,         /* /proc/net/igmp       */
 915         lxpr_readdir_not_a_dir,         /* /proc/net/ip_mr_cache */
 916         lxpr_readdir_not_a_dir,         /* /proc/net/ip_mr_vif  */
 917         lxpr_readdir_not_a_dir,         /* /proc/net/ipv6_route */
 918         lxpr_readdir_not_a_dir,         /* /proc/net/mcfilter   */
 919         lxpr_readdir_not_a_dir,         /* /proc/net/netstat    */
 920         lxpr_readdir_not_a_dir,         /* /proc/net/raw        */
 921         lxpr_readdir_not_a_dir,         /* /proc/net/route      */
 922         lxpr_readdir_not_a_dir,         /* /proc/net/rpc        */
 923         lxpr_readdir_not_a_dir,         /* /proc/net/rt_cache   */
 924         lxpr_readdir_not_a_dir,         /* /proc/net/sockstat   */
 925         lxpr_readdir_not_a_dir,         /* /proc/net/snmp       */
 926         lxpr_readdir_not_a_dir,         /* /proc/net/stat       */
 927         lxpr_readdir_not_a_dir,         /* /proc/net/tcp        */
 928         lxpr_readdir_not_a_dir,         /* /proc/net/tcp6       */
 929         lxpr_readdir_not_a_dir,         /* /proc/net/udp        */
 930         lxpr_readdir_not_a_dir,         /* /proc/net/udp6       */
 931         lxpr_readdir_not_a_dir,         /* /proc/net/unix       */
 932         lxpr_readdir_not_a_dir,         /* /proc/partitions     */
 933         lxpr_readdir_not_a_dir,         /* /proc/self           */
 934         lxpr_readdir_not_a_dir,         /* /proc/stat           */
 935         lxpr_readdir_not_a_dir,         /* /proc/swaps          */
 936         lxpr_readdir_sysdir,            /* /proc/sys            */
 937         lxpr_readdir_sys_fsdir,         /* /proc/sys/fs         */
 938         lxpr_readdir_sys_fs_inotifydir, /* /proc/sys/fs/inotify */
 939         lxpr_readdir_not_a_dir,         /* .../inotify/max_queued_events */
 940         lxpr_readdir_not_a_dir,         /* .../inotify/max_user_instances */
 941         lxpr_readdir_not_a_dir,         /* .../inotify/max_user_watches */
 942         lxpr_readdir_sys_kerneldir,     /* /proc/sys/kernel     */
 943         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/cap_last_cap */
 944         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/core_pattern */
 945         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/hostname */
 946         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/msgmni */
 947         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/ngroups_max */
 948         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/osrelease */
 949         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/pid_max */
 950         lxpr_readdir_sys_kdir_randdir,  /* /proc/sys/kernel/random */
 951         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/random/boot_id */
 952         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/sem */
 953         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/shmmax */
 954         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/shmmni */
 955         lxpr_readdir_not_a_dir,         /* /proc/sys/kernel/threads-max */
 956         lxpr_readdir_sys_netdir,        /* /proc/sys/net */
 957         lxpr_readdir_sys_net_coredir,   /* /proc/sys/net/core */
 958         lxpr_readdir_not_a_dir,         /* /proc/sys/net/core/somaxconn */
 959         lxpr_readdir_sys_vmdir,         /* /proc/sys/vm */
 960         lxpr_readdir_not_a_dir,         /* /proc/sys/vm/min_free_kbytes */
 961         lxpr_readdir_not_a_dir,         /* /proc/sys/vm/nr_hugepages */
 962         lxpr_readdir_not_a_dir,         /* /proc/sys/vm/overcommit_memory */
 963         lxpr_readdir_not_a_dir,         /* /proc/sys/vm/swappiness */
 964         lxpr_readdir_not_a_dir,         /* /proc/uptime         */
 965         lxpr_readdir_not_a_dir,         /* /proc/version        */
 966 };
 967 
 968 
 969 /*
 970  * lxpr_read(): Vnode operation for VOP_READ()
 971  *
 972  * As the format of all the files that can be read in the lx procfs is human
 973  * readable and not binary structures there do not have to be different
 974  * read variants depending on whether the reading process model is 32 or 64 bits
 975  * (at least in general, and certainly the difference is unlikely to be enough
 976  * to justify have different routines for 32 and 64 bit reads
 977  */
 978 /* ARGSUSED */
 979 static int
 980 lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 981     caller_context_t *ct)
 982 {
 983         lxpr_node_t *lxpnp = VTOLXP(vp);
 984         lxpr_nodetype_t type = lxpnp->lxpr_type;
 985         lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
 986         int error;
 987 
 988         ASSERT(type < LXPR_NFILES);
 989 
 990         if (type == LXPR_KMSG) {
 991                 ldi_ident_t     li = VTOLXPM(vp)->lxprm_li;
 992                 ldi_handle_t    ldih;
 993                 struct strioctl str;
 994                 int             rv;
 995 
 996                 /*
 997                  * Open the zone's console device using the layered driver
 998                  * interface.
 999                  */
1000                 if ((error =
1001                     ldi_open_by_name("/dev/log", FREAD, cr, &ldih, li)) != 0)
1002                         return (error);
1003 
1004                 /*
1005                  * Send an ioctl to the underlying console device, letting it
1006                  * know we're interested in getting console messages.
1007                  */
1008                 str.ic_cmd = I_CONSLOG;
1009                 str.ic_timout = 0;
1010                 str.ic_len = 0;
1011                 str.ic_dp = NULL;
1012                 if ((error = ldi_ioctl(ldih, I_STR,
1013                     (intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
1014                         return (error);
1015 
1016                 lxpr_read_kmsg(lxpnp, uiobuf, ldih);
1017 
1018                 if ((error = ldi_close(ldih, FREAD, cr)) != 0)
1019                         return (error);
1020         } else {
1021                 lxpr_read_function[type](lxpnp, uiobuf);
1022         }
1023 
1024         error = lxpr_uiobuf_flush(uiobuf);
1025         lxpr_uiobuf_free(uiobuf);
1026 
1027         return (error);
1028 }
1029 
1030 /*
1031  * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
1032  *
1033  * Various special case reads:
1034  * - trying to read a directory
1035  * - invalid file (used to mean a file that should be implemented,
1036  *   but isn't yet)
1037  * - empty file
1038  * - wait to be able to read a file that will never have anything to read
1039  */
1040 /* ARGSUSED */
1041 static void
1042 lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1043 {
1044         lxpr_uiobuf_seterr(uiobuf, EISDIR);
1045 }
1046 
1047 /* ARGSUSED */
1048 static void
1049 lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1050 {
1051         lxpr_uiobuf_seterr(uiobuf, EINVAL);
1052 }
1053 
1054 /* ARGSUSED */
1055 static void
1056 lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1057 {
1058 }
1059 
1060 /*
1061  * lxpr_read_pid_auxv(): read process aux vector
1062  */
1063 static void
1064 lxpr_read_pid_auxv(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1065 {
1066         proc_t *p;
1067         lx_proc_data_t *pd;
1068         lx_elf_data_t *edp = NULL;
1069         int i, cnt;
1070 
1071         ASSERT(lxpnp->lxpr_type == LXPR_PID_AUXV ||
1072             lxpnp->lxpr_type == LXPR_PID_TID_AUXV);
1073 
1074         p = lxpr_lock(lxpnp->lxpr_pid);
1075 
1076         if (p == NULL) {
1077                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1078                 return;
1079         }
1080         if ((pd = ptolxproc(p)) == NULL) {
1081                 /* Emit a single AT_NULL record for non-branded processes */
1082                 auxv_t buf;
1083 
1084                 bzero(&buf, sizeof (buf));
1085                 lxpr_unlock(p);
1086                 lxpr_uiobuf_write(uiobuf, (char *)&buf, sizeof (buf));
1087                 return;
1088         } else {
1089                 edp = &pd->l_elf_data;
1090         }
1091 
1092         if (p->p_model == DATAMODEL_NATIVE) {
1093                 auxv_t buf[__KERN_NAUXV_IMPL];
1094 
1095                 /*
1096                  * Because a_type is only of size int (not long), the buffer
1097                  * contents must be zeroed first to ensure cleanliness.
1098                  */
1099                 bzero(buf, sizeof (buf));
1100                 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) {
1101                         if (lx_auxv_stol(&p->p_user.u_auxv[i],
1102                             &buf[cnt], edp) == 0) {
1103                                 cnt++;
1104                         }
1105                         if (p->p_user.u_auxv[i].a_type == AT_NULL) {
1106                                 break;
1107                         }
1108                 }
1109                 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0]));
1110                 lxpr_unlock(p);
1111         }
1112 #if defined(_SYSCALL32_IMPL)
1113         else {
1114                 auxv32_t buf[__KERN_NAUXV_IMPL];
1115 
1116                 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) {
1117                         auxv_t temp;
1118 
1119                         if (lx_auxv_stol(&p->p_user.u_auxv[i],
1120                             &temp, edp) == 0) {
1121                                 buf[cnt].a_type = (int)temp.a_type;
1122                                 buf[cnt].a_un.a_val = (int)temp.a_un.a_val;
1123                                 cnt++;
1124                         }
1125                         if (p->p_user.u_auxv[i].a_type == AT_NULL) {
1126                                 break;
1127                         }
1128                 }
1129                 lxpr_unlock(p);
1130                 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0]));
1131         }
1132 #endif /* defined(_SYSCALL32_IMPL) */
1133 }
1134 
1135 /*
1136  * lxpr_read_pid_cgroup(): read cgroups for process
1137  */
1138 static void
1139 lxpr_read_pid_cgroup(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1140 {
1141         proc_t *p;
1142 
1143         ASSERT(lxpnp->lxpr_type == LXPR_PID_CGROUP ||
1144             lxpnp->lxpr_type == LXPR_PID_TID_CGROUP);
1145 
1146         p = lxpr_lock(lxpnp->lxpr_pid);
1147         if (p == NULL) {
1148                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1149                 return;
1150         }
1151 
1152         /* basic stub, 3rd field will need to be populated */
1153         lxpr_uiobuf_printf(uiobuf, "1:name=systemd:/\n");
1154 
1155         lxpr_unlock(p);
1156 }
1157 
1158 static void
1159 lxpr_copy_cmdline(proc_t *p, lx_proc_data_t *pd, lxpr_uiobuf_t *uiobuf)
1160 {
1161         uio_t *uiop = uiobuf->uiop;
1162         char *buf = uiobuf->buffer;
1163         int bsz = uiobuf->buffsize;
1164         boolean_t env_overflow = B_FALSE;
1165         uintptr_t pos = pd->l_args_start + uiop->uio_offset;
1166         uintptr_t estart = pd->l_envs_start;
1167         uintptr_t eend = pd->l_envs_end;
1168         size_t chunk, copied;
1169         int err = 0;
1170 
1171         /* Do not bother with data beyond the end of the envp strings area. */
1172         if (pos > eend) {
1173                 return;
1174         }
1175         mutex_exit(&p->p_lock);
1176 
1177         /*
1178          * If the starting or ending bounds are outside the argv strings area,
1179          * check to see if the process has overwritten the terminating NULL.
1180          * If not, no data needs to be copied from oustide the argv area.
1181          */
1182         if (pos >= estart || (pos + uiop->uio_resid) >= estart) {
1183                 uint8_t term;
1184                 if (uread(p, &term, sizeof (term), estart - 1) != 0) {
1185                         err = EFAULT;
1186                 } else if (term != 0) {
1187                         env_overflow = B_TRUE;
1188                 }
1189         }
1190 
1191 
1192         /* Data between astart and estart-1 can be copied freely. */
1193         while (pos < estart && uiop->uio_resid > 0 && err == 0) {
1194                 chunk = MIN(estart - pos, uiop->uio_resid);
1195                 chunk = MIN(chunk, bsz);
1196 
1197                 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) != 0 ||
1198                     copied != chunk) {
1199                         err = EFAULT;
1200                         break;
1201                 }
1202                 err = uiomove(buf, copied, UIO_READ, uiop);
1203                 pos += copied;
1204         }
1205 
1206         /*
1207          * Onward from estart, data is copied as a contiguous string.  To
1208          * protect env data from potential snooping, only one buffer-sized copy
1209          * is allowed to avoid complex seek logic.
1210          */
1211         if (err == 0 && env_overflow && pos == estart && uiop->uio_resid > 0) {
1212                 chunk = MIN(eend - pos, uiop->uio_resid);
1213                 chunk = MIN(chunk, bsz);
1214                 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) == 0) {
1215                         int len = strnlen(buf, copied);
1216                         if (len > 0) {
1217                                 err = uiomove(buf, len, UIO_READ, uiop);
1218                         }
1219                 }
1220         }
1221 
1222         uiobuf->error = err;
1223         /* reset any uiobuf state */
1224         uiobuf->pos = uiobuf->buffer;
1225         uiobuf->beg = 0;
1226 
1227         mutex_enter(&p->p_lock);
1228 }
1229 
1230 /*
1231  * lxpr_read_pid_cmdline(): read argument vector from process
1232  */
1233 static void
1234 lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1235 {
1236         proc_t *p;
1237         char *buf;
1238         size_t asz = lxpr_maxargvlen, sz;
1239         lx_proc_data_t *pd;
1240 
1241         ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE ||
1242             lxpnp->lxpr_type == LXPR_PID_TID_CMDLINE);
1243 
1244         buf = kmem_alloc(asz, KM_SLEEP);
1245 
1246         p = lxpr_lock(lxpnp->lxpr_pid);
1247         if (p == NULL) {
1248                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1249                 kmem_free(buf, asz);
1250                 return;
1251         }
1252 
1253         if ((pd = ptolxproc(p)) != NULL && pd->l_args_start != 0 &&
1254             pd->l_envs_start != 0 && pd->l_envs_end != 0) {
1255                 /* Use Linux-style argv bounds if possible. */
1256                 lxpr_copy_cmdline(p, pd, uiobuf);
1257         } else {
1258                 if (prreadargv(p, buf, asz, &sz) != 0) {
1259                         lxpr_uiobuf_seterr(uiobuf, EINVAL);
1260                 } else {
1261                         lxpr_uiobuf_write(uiobuf, buf, sz);
1262                 }
1263         }
1264 
1265         lxpr_unlock(p);
1266         kmem_free(buf, asz);
1267 }
1268 
1269 /*
1270  * lxpr_read_pid_comm(): read command from process
1271  */
1272 static void
1273 lxpr_read_pid_comm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1274 {
1275         proc_t *p;
1276 
1277         VERIFY(lxpnp->lxpr_type == LXPR_PID_COMM ||
1278             lxpnp->lxpr_type == LXPR_PID_TID_COMM);
1279 
1280         /*
1281          * Because prctl(PR_SET_NAME) does not set custom names for threads
1282          * (vs processes), there is no need for special handling here.
1283          */
1284         if ((p = lxpr_lock(lxpnp->lxpr_pid)) == NULL) {
1285                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1286                 return;
1287         }
1288         lxpr_uiobuf_printf(uiobuf, "%s\n", p->p_user.u_comm);
1289         lxpr_unlock(p);
1290 }
1291 
1292 /*
1293  * lxpr_read_pid_env(): read env vector from process
1294  */
1295 static void
1296 lxpr_read_pid_env(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1297 {
1298         proc_t *p;
1299         char *buf;
1300         size_t asz = lxpr_maxenvvlen, sz;
1301 
1302         ASSERT(lxpnp->lxpr_type == LXPR_PID_ENV);
1303 
1304         buf = kmem_alloc(asz, KM_SLEEP);
1305 
1306         p = lxpr_lock(lxpnp->lxpr_pid);
1307         if (p == NULL) {
1308                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1309                 kmem_free(buf, asz);
1310                 return;
1311         }
1312 
1313         if (prreadenvv(p, buf, asz, &sz) != 0) {
1314                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1315         } else {
1316                 lxpr_uiobuf_write(uiobuf, buf, sz);
1317         }
1318 
1319         lxpr_unlock(p);
1320         kmem_free(buf, asz);
1321 }
1322 
1323 /*
1324  * lxpr_read_pid_limits(): ulimit file
1325  */
1326 static void
1327 lxpr_read_pid_limits(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1328 {
1329         proc_t *p;
1330         rctl_qty_t cur, max;
1331         rctl_val_t *oval, *nval;
1332         rctl_hndl_t hndl;
1333         char *kname;
1334         int i;
1335 
1336         ASSERT(lxpnp->lxpr_type == LXPR_PID_LIMITS ||
1337             lxpnp->lxpr_type == LXPR_PID_TID_LIMITS);
1338 
1339         nval = kmem_alloc(sizeof (rctl_val_t), KM_SLEEP);
1340 
1341         p = lxpr_lock(lxpnp->lxpr_pid);
1342         if (p == NULL) {
1343                 kmem_free(nval, sizeof (rctl_val_t));
1344                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1345                 return;
1346         }
1347 
1348         lxpr_uiobuf_printf(uiobuf, "%-25s %-20s %-20s %-10s\n",
1349             "Limit", "Soft Limit", "Hard Limit", "Units");
1350         for (i = 0; lxpr_rlimtab[i].rlim_name != NULL; i++) {
1351                 kname = lxpr_rlimtab[i].rlim_rctl;
1352                 /* default to unlimited for resources without an analog */
1353                 cur = RLIM_INFINITY;
1354                 max = RLIM_INFINITY;
1355                 if (kname != NULL) {
1356                         hndl = rctl_hndl_lookup(kname);
1357                         oval = NULL;
1358                         while ((hndl != -1) &&
1359                             rctl_local_get(hndl, oval, nval, p) == 0) {
1360                                 oval = nval;
1361                                 switch (nval->rcv_privilege) {
1362                                 case RCPRIV_BASIC:
1363                                         if (!RCTL_INFINITE(nval))
1364                                                 cur = nval->rcv_value;
1365                                         break;
1366                                 case RCPRIV_PRIVILEGED:
1367                                         if (!RCTL_INFINITE(nval))
1368                                                 max = nval->rcv_value;
1369                                         break;
1370                                 }
1371                         }
1372                 }
1373 
1374                 lxpr_uiobuf_printf(uiobuf, "%-25s", lxpr_rlimtab[i].rlim_name);
1375                 if (cur == RLIM_INFINITY || cur == LX_RLIM_INFINITY) {
1376                         lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
1377                 } else {
1378                         lxpr_uiobuf_printf(uiobuf, " %-20lu", cur);
1379                 }
1380                 if (max == RLIM_INFINITY || max == LX_RLIM_INFINITY) {
1381                         lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
1382                 } else {
1383                         lxpr_uiobuf_printf(uiobuf, " %-20lu", max);
1384                 }
1385                 lxpr_uiobuf_printf(uiobuf, " %-10s\n",
1386                     lxpr_rlimtab[i].rlim_unit);
1387         }
1388 
1389         lxpr_unlock(p);
1390         kmem_free(nval, sizeof (rctl_val_t));
1391 }
1392 
1393 /*
1394  * lxpr_read_pid_maps(): memory map file
1395  */
1396 static void
1397 lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1398 {
1399         proc_t *p;
1400         struct as *as;
1401         struct seg *seg;
1402         char *buf;
1403         int buflen = MAXPATHLEN;
1404         struct print_data {
1405                 uintptr_t saddr;
1406                 uintptr_t eaddr;
1407                 int type;
1408                 char prot[5];
1409                 uintptr_t offset;
1410                 vnode_t *vp;
1411                 struct print_data *next;
1412         } *print_head = NULL;
1413         struct print_data **print_tail = &print_head;
1414         struct print_data *pbuf;
1415 
1416         ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS ||
1417             lxpnp->lxpr_type == LXPR_PID_TID_MAPS);
1418 
1419         p = lxpr_lock(lxpnp->lxpr_pid);
1420         if (p == NULL) {
1421                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1422                 return;
1423         }
1424 
1425         as = p->p_as;
1426 
1427         if (as == &kas) {
1428                 lxpr_unlock(p);
1429                 return;
1430         }
1431 
1432         mutex_exit(&p->p_lock);
1433 
1434         /* Iterate over all segments in the address space */
1435         AS_LOCK_ENTER(as, RW_READER);
1436         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1437                 vnode_t *vp;
1438                 uint_t protbits;
1439 
1440                 pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
1441 
1442                 pbuf->saddr = (uintptr_t)seg->s_base;
1443                 pbuf->eaddr = pbuf->saddr + seg->s_size;
1444                 pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
1445 
1446                 /*
1447                  * Cheat and only use the protection bits of the first page
1448                  * in the segment
1449                  */
1450                 (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
1451                 (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
1452 
1453                 if (protbits & PROT_READ)      pbuf->prot[0] = 'r';
1454                 if (protbits & PROT_WRITE)     pbuf->prot[1] = 'w';
1455                 if (protbits & PROT_EXEC)      pbuf->prot[2] = 'x';
1456                 if (pbuf->type & MAP_SHARED)        pbuf->prot[3] = 's';
1457                 else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
1458 
1459                 if (seg->s_ops == &segvn_ops &&
1460                     SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
1461                     vp != NULL && vp->v_type == VREG) {
1462                         VN_HOLD(vp);
1463                         pbuf->vp = vp;
1464                 } else {
1465                         pbuf->vp = NULL;
1466                 }
1467 
1468                 pbuf->offset = SEGOP_GETOFFSET(seg, (caddr_t)pbuf->saddr);
1469 
1470                 pbuf->next = NULL;
1471                 *print_tail = pbuf;
1472                 print_tail = &pbuf->next;
1473         }
1474         AS_LOCK_EXIT(as);
1475         mutex_enter(&p->p_lock);
1476         lxpr_unlock(p);
1477 
1478         buf = kmem_alloc(buflen, KM_SLEEP);
1479 
1480         /* print the data we've extracted */
1481         pbuf = print_head;
1482         while (pbuf != NULL) {
1483                 struct print_data *pbuf_next;
1484                 vattr_t vattr;
1485 
1486                 int maj = 0;
1487                 int min = 0;
1488                 ino_t inode = 0;
1489 
1490                 *buf = '\0';
1491                 if (pbuf->vp != NULL) {
1492                         vattr.va_mask = AT_FSID | AT_NODEID;
1493                         if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(),
1494                             NULL) == 0) {
1495                                 maj = getmajor(vattr.va_fsid);
1496                                 min = getminor(vattr.va_fsid);
1497                                 inode = vattr.va_nodeid;
1498                         }
1499                         (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
1500                         VN_RELE(pbuf->vp);
1501                 }
1502 
1503                 if (p->p_model == DATAMODEL_LP64) {
1504                         lxpr_uiobuf_printf(uiobuf,
1505                             "%08llx-%08llx %s %08llx %02x:%02x %llu%s%s\n",
1506                             pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
1507                             maj, min, inode, *buf != '\0' ? " " : "", buf);
1508                 } else {
1509                         lxpr_uiobuf_printf(uiobuf,
1510                             "%08x-%08x %s %08x %02x:%02x %llu%s%s\n",
1511                             (uint32_t)pbuf->saddr, (uint32_t)pbuf->eaddr,
1512                             pbuf->prot, (uint32_t)pbuf->offset, maj, min,
1513                             inode, *buf != '\0' ? " " : "", buf);
1514                 }
1515 
1516                 pbuf_next = pbuf->next;
1517                 kmem_free(pbuf, sizeof (*pbuf));
1518                 pbuf = pbuf_next;
1519         }
1520 
1521         kmem_free(buf, buflen);
1522 }
1523 
1524 /*
1525  * lxpr_read_pid_mountinfo(): information about process mount points. e.g.:
1526  *    14 19 0:13 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
1527  * mntid parid devnums root mntpnt mntopts - fstype mntsrc superopts
1528  *
1529  * We have to make up several of these fields.
1530  */
1531 static void
1532 lxpr_read_pid_mountinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1533 {
1534         struct vfs *vfsp;
1535         struct vfs *vfslist;
1536         zone_t *zone = LXPTOZ(lxpnp);
1537         struct print_data {
1538                 refstr_t *vfs_mntpt;
1539                 refstr_t *vfs_resource;
1540                 uint_t vfs_flag;
1541                 int vfs_fstype;
1542                 dev_t vfs_dev;
1543                 struct print_data *next;
1544         } *print_head = NULL;
1545         struct print_data **print_tail = &print_head;
1546         struct print_data *printp;
1547         int root_id = 15;       /* use a made-up value */
1548         int mnt_id;
1549 
1550         ASSERT(lxpnp->lxpr_type == LXPR_PID_MOUNTINFO ||
1551             lxpnp->lxpr_type == LXPR_PID_TID_MOUNTINFO);
1552 
1553         vfs_list_read_lock();
1554 
1555         /* root is the top-level, it does not appear in this output */
1556         if (zone == global_zone) {
1557                 vfsp = vfslist = rootvfs;
1558         } else {
1559                 vfsp = vfslist = zone->zone_vfslist;
1560                 /*
1561                  * If the zone has a root entry, it will be the first in
1562                  * the list.  If it doesn't, we conjure one up.
1563                  */
1564                 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
1565                     zone->zone_rootpath) != 0) {
1566                         struct vfs *tvfsp;
1567                         /*
1568                          * The root of the zone is not a mount point.  The vfs
1569                          * we want to report is that of the zone's root vnode.
1570                          */
1571                         tvfsp = zone->zone_rootvp->v_vfsp;
1572 
1573                         lxpr_uiobuf_printf(uiobuf,
1574                             "%d 1 %d:%d / / %s - %s / %s\n",
1575                             root_id,
1576                             major(tvfsp->vfs_dev), minor(vfsp->vfs_dev),
1577                             tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw",
1578                             vfssw[tvfsp->vfs_fstype].vsw_name,
1579                             tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1580 
1581                 }
1582                 if (vfslist == NULL) {
1583                         vfs_list_unlock();
1584                         return;
1585                 }
1586         }
1587 
1588         /*
1589          * Later on we have to do a lookupname, which can end up causing
1590          * another vfs_list_read_lock() to be called. Which can lead to a
1591          * deadlock. To avoid this, we extract the data we need into a local
1592          * list, then we can run this list without holding vfs_list_read_lock()
1593          * We keep the list in the same order as the vfs_list
1594          */
1595         do {
1596                 /* Skip mounts we shouldn't show */
1597                 if (vfsp->vfs_flag & VFS_NOMNTTAB) {
1598                         goto nextfs;
1599                 }
1600 
1601                 printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
1602                 refstr_hold(vfsp->vfs_mntpt);
1603                 printp->vfs_mntpt = vfsp->vfs_mntpt;
1604                 refstr_hold(vfsp->vfs_resource);
1605                 printp->vfs_resource = vfsp->vfs_resource;
1606                 printp->vfs_flag = vfsp->vfs_flag;
1607                 printp->vfs_fstype = vfsp->vfs_fstype;
1608                 printp->vfs_dev = vfsp->vfs_dev;
1609                 printp->next = NULL;
1610 
1611                 *print_tail = printp;
1612                 print_tail = &printp->next;
1613 
1614 nextfs:
1615                 vfsp = (zone == global_zone) ?
1616                     vfsp->vfs_next : vfsp->vfs_zone_next;
1617 
1618         } while (vfsp != vfslist);
1619 
1620         vfs_list_unlock();
1621 
1622         mnt_id = root_id + 1;
1623 
1624         /*
1625          * now we can run through what we've extracted without holding
1626          * vfs_list_read_lock()
1627          */
1628         printp = print_head;
1629         while (printp != NULL) {
1630                 struct print_data *printp_next;
1631                 const char *resource;
1632                 char *mntpt;
1633                 struct vnode *vp;
1634                 int error;
1635 
1636                 mntpt = (char *)refstr_value(printp->vfs_mntpt);
1637                 resource = refstr_value(printp->vfs_resource);
1638 
1639                 if (mntpt != NULL && mntpt[0] != '\0')
1640                         mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
1641                 else
1642                         mntpt = "-";
1643 
1644                 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
1645 
1646                 if (error != 0)
1647                         goto nextp;
1648 
1649                 if (!(vp->v_flag & VROOT)) {
1650                         VN_RELE(vp);
1651                         goto nextp;
1652                 }
1653                 VN_RELE(vp);
1654 
1655                 if (resource != NULL && resource[0] != '\0') {
1656                         if (resource[0] == '/') {
1657                                 resource = ZONE_PATH_VISIBLE(resource, zone) ?
1658                                     ZONE_PATH_TRANSLATE(resource, zone) : mntpt;
1659                         }
1660                 } else {
1661                         resource = "none";
1662                 }
1663 
1664                 /*
1665                  * XXX parent ID is not tracked correctly here. Currently we
1666                  * always assume the parent ID is the root ID.
1667                  */
1668                 lxpr_uiobuf_printf(uiobuf,
1669                     "%d %d %d:%d / %s %s - %s %s %s\n",
1670                     mnt_id, root_id,
1671                     major(printp->vfs_dev), minor(printp->vfs_dev),
1672                     mntpt,
1673                     printp->vfs_flag & VFS_RDONLY ? "ro" : "rw",
1674                     vfssw[printp->vfs_fstype].vsw_name,
1675                     resource,
1676                     printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1677 
1678 nextp:
1679                 printp_next = printp->next;
1680                 refstr_rele(printp->vfs_mntpt);
1681                 refstr_rele(printp->vfs_resource);
1682                 kmem_free(printp, sizeof (*printp));
1683                 printp = printp_next;
1684 
1685                 mnt_id++;
1686         }
1687 }
1688 
1689 /*
1690  * lxpr_read_pid_oom_scr_adj(): read oom_score_adj for process
1691  */
1692 static void
1693 lxpr_read_pid_oom_scr_adj(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1694 {
1695         proc_t *p;
1696 
1697         ASSERT(lxpnp->lxpr_type == LXPR_PID_OOM_SCR_ADJ ||
1698             lxpnp->lxpr_type == LXPR_PID_TID_OOM_SCR_ADJ);
1699 
1700         p = lxpr_lock(lxpnp->lxpr_pid);
1701         if (p == NULL) {
1702                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1703                 return;
1704         }
1705 
1706         /* always 0 */
1707         lxpr_uiobuf_printf(uiobuf, "0\n");
1708 
1709         lxpr_unlock(p);
1710 }
1711 
1712 
1713 /*
1714  * lxpr_read_pid_statm(): memory status file
1715  */
1716 static void
1717 lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1718 {
1719         proc_t *p;
1720         struct as *as;
1721         size_t vsize;
1722         size_t rss;
1723 
1724         ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM ||
1725             lxpnp->lxpr_type == LXPR_PID_TID_STATM);
1726 
1727         p = lxpr_lock(lxpnp->lxpr_pid);
1728         if (p == NULL) {
1729                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1730                 return;
1731         }
1732 
1733         as = p->p_as;
1734 
1735         mutex_exit(&p->p_lock);
1736 
1737         AS_LOCK_ENTER(as, RW_READER);
1738         vsize = btopr(as->a_resvsize);
1739         rss = rm_asrss(as);
1740         AS_LOCK_EXIT(as);
1741 
1742         mutex_enter(&p->p_lock);
1743         lxpr_unlock(p);
1744 
1745         lxpr_uiobuf_printf(uiobuf,
1746             "%lu %lu %lu %lu %lu %lu %lu\n",
1747             vsize, rss, 0l, rss, 0l, 0l, 0l);
1748 }
1749 
1750 /*
1751  * Look for either the main thread (lookup_id is 0) or the specified thread.
1752  * If we're looking for the main thread but the proc does not have one, we
1753  * fallback to using prchoose to get any thread available.
1754  */
1755 static kthread_t *
1756 lxpr_get_thread(proc_t *p, uint_t lookup_id)
1757 {
1758         kthread_t *t;
1759         uint_t emul_tid;
1760         lx_lwp_data_t *lwpd;
1761         pid_t pid = p->p_pid;
1762         pid_t init_pid = curproc->p_zone->zone_proc_initpid;
1763         boolean_t branded = (p->p_brand == &lx_brand);
1764 
1765         /* get specified thread  */
1766         if ((t = p->p_tlist) == NULL)
1767                 return (NULL);
1768 
1769         do {
1770                 if (lookup_id == 0 && t->t_tid == 1) {
1771                         thread_lock(t);
1772                         return (t);
1773                 }
1774 
1775                 lwpd = ttolxlwp(t);
1776                 if (branded && lwpd != NULL) {
1777                         if (pid == init_pid && lookup_id == 1) {
1778                                 emul_tid = t->t_tid;
1779                         } else {
1780                                 emul_tid = lwpd->br_pid;
1781                         }
1782                 } else {
1783                         /*
1784                          * Make only the first (assumed to be main) thread
1785                          * visible for non-branded processes.
1786                          */
1787                         emul_tid = p->p_pid;
1788                 }
1789                 if (emul_tid == lookup_id) {
1790                         thread_lock(t);
1791                         return (t);
1792                 }
1793         } while ((t = t->t_forw) != p->p_tlist);
1794 
1795         if (lookup_id == 0)
1796                 return (prchoose(p));
1797         return (NULL);
1798 }
1799 
1800 /*
1801  * Lookup the real pid for procs 0 or 1.
1802  */
1803 static pid_t
1804 get_real_pid(pid_t p)
1805 {
1806         pid_t find_pid;
1807 
1808         if (p == 1) {
1809                 find_pid = curproc->p_zone->zone_proc_initpid;
1810         } else if (p == 0) {
1811                 find_pid = curproc->p_zone->zone_zsched->p_pid;
1812         } else {
1813                 find_pid = p;
1814         }
1815 
1816         return (find_pid);
1817 }
1818 
1819 /*
1820  * pid/tid common code to read status file
1821  */
1822 static void
1823 lxpr_read_status_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf,
1824     uint_t lookup_id)
1825 {
1826         proc_t *p;
1827         kthread_t *t;
1828         user_t *up;
1829         cred_t *cr;
1830         const gid_t *groups;
1831         int    ngroups;
1832         struct as *as;
1833         char *status;
1834         pid_t pid, ppid;
1835         k_sigset_t current, ignore, handle;
1836         int    i, lx_sig;
1837         pid_t real_pid;
1838 
1839         real_pid = get_real_pid(lxpnp->lxpr_pid);
1840         p = lxpr_lock(real_pid);
1841         if (p == NULL) {
1842                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1843                 return;
1844         }
1845 
1846         pid = p->p_pid;
1847 
1848         /*
1849          * Convert pid to the Linux default of 1 if we're the zone's init
1850          * process or if we're the zone's zsched the pid is 0.
1851          */
1852         if (pid == curproc->p_zone->zone_proc_initpid) {
1853                 pid = 1;
1854                 ppid = 0;       /* parent pid for init is 0 */
1855         } else if (pid == curproc->p_zone->zone_zsched->p_pid) {
1856                 pid = 0;        /* zsched is pid 0 */
1857                 ppid = 0;       /* parent pid for zsched is itself */
1858         } else {
1859                 /*
1860                  * Make sure not to reference parent PIDs that reside outside
1861                  * the zone
1862                  */
1863                 ppid = ((p->p_flag & SZONETOP)
1864                     ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
1865 
1866                 /*
1867                  * Convert ppid to the Linux default of 1 if our parent is the
1868                  * zone's init process
1869                  */
1870                 if (ppid == curproc->p_zone->zone_proc_initpid)
1871                         ppid = 1;
1872         }
1873 
1874         t = lxpr_get_thread(p, lookup_id);
1875         if (t != NULL) {
1876                 switch (t->t_state) {
1877                 case TS_SLEEP:
1878                         status = "S (sleeping)";
1879                         break;
1880                 case TS_RUN:
1881                 case TS_ONPROC:
1882                         status = "R (running)";
1883                         break;
1884                 case TS_ZOMB:
1885                         status = "Z (zombie)";
1886                         break;
1887                 case TS_STOPPED:
1888                         status = "T (stopped)";
1889                         break;
1890                 default:
1891                         status = "! (unknown)";
1892                         break;
1893                 }
1894                 thread_unlock(t);
1895         } else {
1896                 if (lookup_id != 0) {
1897                         /* we can't find this specific thread */
1898                         lxpr_uiobuf_seterr(uiobuf, EINVAL);
1899                         lxpr_unlock(p);
1900                         return;
1901                 }
1902 
1903                 /*
1904                  * there is a hole in the exit code, where a proc can have
1905                  * no threads but it is yet to be flagged SZOMB. We will
1906                  * assume we are about to become a zombie
1907                  */
1908                 status = "Z (zombie)";
1909         }
1910 
1911         up = PTOU(p);
1912         mutex_enter(&p->p_crlock);
1913         crhold(cr = p->p_cred);
1914         mutex_exit(&p->p_crlock);
1915 
1916         lxpr_uiobuf_printf(uiobuf,
1917             "Name:\t%s\n"
1918             "State:\t%s\n"
1919             "Tgid:\t%d\n"
1920             "Pid:\t%d\n"
1921             "PPid:\t%d\n"
1922             "TracerPid:\t%d\n"
1923             "Uid:\t%u\t%u\t%u\t%u\n"
1924             "Gid:\t%u\t%u\t%u\t%u\n"
1925             "FDSize:\t%d\n"
1926             "Groups:\t",
1927             up->u_comm,
1928             status,
1929             pid, /* thread group id - same as pid */
1930             (lookup_id == 0) ? pid : lxpnp->lxpr_desc,
1931             ppid,
1932             0,
1933             crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
1934             crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
1935             p->p_fno_ctl);
1936 
1937 
1938         ngroups = crgetngroups(cr);
1939         groups  = crgetgroups(cr);
1940         for (i = 0; i < ngroups; i++) {
1941                 lxpr_uiobuf_printf(uiobuf,
1942                     "%u ",
1943                     groups[i]);
1944         }
1945         crfree(cr);
1946 
1947         as = p->p_as;
1948         if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) {
1949                 size_t vsize, nlocked, rss;
1950 
1951                 mutex_exit(&p->p_lock);
1952                 AS_LOCK_ENTER(as, RW_READER);
1953                 vsize = as->a_resvsize;
1954                 rss = rm_asrss(as);
1955                 AS_LOCK_EXIT(as);
1956                 mutex_enter(&p->p_lock);
1957                 nlocked = p->p_locked_mem;
1958 
1959                 lxpr_uiobuf_printf(uiobuf,
1960                     "\n"
1961                     "VmSize:\t%8lu kB\n"
1962                     "VmLck:\t%8lu kB\n"
1963                     "VmRSS:\t%8lu kB\n"
1964                     "VmData:\t%8lu kB\n"
1965                     "VmStk:\t%8lu kB\n"
1966                     "VmExe:\t%8lu kB\n"
1967                     "VmLib:\t%8lu kB",
1968                     btok(vsize),
1969                     btok(nlocked),
1970                     ptok(rss),
1971                     0l,
1972                     btok(p->p_stksize),
1973                     ptok(rss),
1974                     0l);
1975         }
1976 
1977         lxpr_uiobuf_printf(uiobuf, "\nThreads:\t%u", p->p_lwpcnt);
1978 
1979         sigemptyset(&current);
1980         sigemptyset(&ignore);
1981         sigemptyset(&handle);
1982 
1983         for (i = 1; i < NSIG; i++) {
1984                 lx_sig = stol_signo[i];
1985 
1986                 if ((lx_sig > 0) && (lx_sig <= LX_NSIG)) {
1987                         if (sigismember(&p->p_sig, i))
1988                                 sigaddset(&current, lx_sig);
1989 
1990                         if (up->u_signal[i - 1] == SIG_IGN)
1991                                 sigaddset(&ignore, lx_sig);
1992                         else if (up->u_signal[i - 1] != SIG_DFL)
1993                                 sigaddset(&handle, lx_sig);
1994                 }
1995         }
1996 
1997         lxpr_uiobuf_printf(uiobuf,
1998             "\n"
1999             "SigPnd:\t%08x%08x\n"
2000             "SigBlk:\t%08x%08x\n"
2001             "SigIgn:\t%08x%08x\n"
2002             "SigCgt:\t%08x%08x\n"
2003             "CapInh:\t%016x\n"
2004             "CapPrm:\t%016x\n"
2005             "CapEff:\t%016x\n",
2006             current.__sigbits[1], current.__sigbits[0],
2007             0, 0, /* signals blocked on per thread basis */
2008             ignore.__sigbits[1], ignore.__sigbits[0],
2009             handle.__sigbits[1], handle.__sigbits[0],
2010             /* Can't do anything with linux capabilities */
2011             0,
2012             0,
2013             0);
2014 
2015         lxpr_uiobuf_printf(uiobuf,
2016             "CapBnd:\t%016llx\n",
2017             /* We report the full capability bounding set */
2018             0x1fffffffffLL);
2019 
2020         lxpr_unlock(p);
2021 }
2022 
2023 /*
2024  * lxpr_read_pid_status(): status file
2025  */
2026 static void
2027 lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2028 {
2029         ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
2030 
2031         lxpr_read_status_common(lxpnp, uiobuf, 0);
2032 }
2033 
2034 /*
2035  * lxpr_read_pid_tid_status(): status file
2036  */
2037 static void
2038 lxpr_read_pid_tid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2039 {
2040         ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STATUS);
2041         lxpr_read_status_common(lxpnp, uiobuf, lxpnp->lxpr_desc);
2042 }
2043 
2044 /*
2045  * pid/tid common code to read stat file
2046  */
2047 static void
2048 lxpr_read_stat_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf,
2049     uint_t lookup_id)
2050 {
2051         proc_t *p;
2052         kthread_t *t;
2053         struct as *as;
2054         char stat;
2055         pid_t pid, ppid, pgpid, spid;
2056         gid_t psgid;
2057         dev_t psdev;
2058         size_t rss, vsize;
2059         int nice, pri;
2060         caddr_t wchan;
2061         processorid_t cpu;
2062         pid_t real_pid;
2063 
2064         real_pid = get_real_pid(lxpnp->lxpr_pid);
2065         p = lxpr_lock(real_pid);
2066         if (p == NULL) {
2067                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
2068                 return;
2069         }
2070 
2071         pid = p->p_pid;
2072 
2073         /*
2074          * Set Linux defaults if we're the zone's init process
2075          */
2076         if (pid == curproc->p_zone->zone_proc_initpid) {
2077                 pid = 1;                /* PID for init */
2078                 ppid = 0;               /* parent PID for init is 0 */
2079                 pgpid = 0;              /* process group for init is 0 */
2080                 psgid = (gid_t)-1;      /* credential GID for init is -1 */
2081                 spid = 0;               /* session id for init is 0 */
2082                 psdev = 0;              /* session device for init is 0 */
2083         } else if (pid == curproc->p_zone->zone_zsched->p_pid) {
2084                 pid = 0;                /* PID for zsched */
2085                 ppid = 0;               /* parent PID for zsched is 0 */
2086                 pgpid = 0;              /* process group for zsched is 0 */
2087                 psgid = (gid_t)-1;      /* credential GID for zsched is -1 */
2088                 spid = 0;               /* session id for zsched is 0 */
2089                 psdev = 0;              /* session device for zsched is 0 */
2090         } else {
2091                 /*
2092                  * Make sure not to reference parent PIDs that reside outside
2093                  * the zone
2094                  */
2095                 ppid = ((p->p_flag & SZONETOP) ?
2096                     curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
2097 
2098                 /*
2099                  * Convert ppid to the Linux default of 1 if our parent is the
2100                  * zone's init process
2101                  */
2102                 if (ppid == curproc->p_zone->zone_proc_initpid)
2103                         ppid = 1;
2104 
2105                 pgpid = p->p_pgrp;
2106 
2107                 mutex_enter(&p->p_splock);
2108                 mutex_enter(&p->p_sessp->s_lock);
2109                 spid = p->p_sessp->s_sid;
2110                 psdev = p->p_sessp->s_dev;
2111                 if (p->p_sessp->s_cred)
2112                         psgid = crgetgid(p->p_sessp->s_cred);
2113                 else
2114                         psgid = crgetgid(p->p_cred);
2115 
2116                 mutex_exit(&p->p_sessp->s_lock);
2117                 mutex_exit(&p->p_splock);
2118         }
2119 
2120         t = lxpr_get_thread(p, lookup_id);
2121         if (t != NULL) {
2122                 switch (t->t_state) {
2123                 case TS_SLEEP:
2124                         stat = 'S'; break;
2125                 case TS_RUN:
2126                 case TS_ONPROC:
2127                         stat = 'R'; break;
2128                 case TS_ZOMB:
2129                         stat = 'Z'; break;
2130                 case TS_STOPPED:
2131                         stat = 'T'; break;
2132                 default:
2133                         stat = '!'; break;
2134                 }
2135 
2136                 if (CL_DONICE(t, NULL, 0, &nice) != 0)
2137                         nice = 0;
2138 
2139                 pri = t->t_pri;
2140                 wchan = t->t_wchan;
2141                 cpu = t->t_cpu->cpu_id;
2142                 thread_unlock(t);
2143         } else {
2144                 if (lookup_id != 0) {
2145                         /* we can't find this specific thread */
2146                         lxpr_uiobuf_seterr(uiobuf, EINVAL);
2147                         lxpr_unlock(p);
2148                         return;
2149                 }
2150 
2151                 /* Only zombies have no threads */
2152                 stat = 'Z';
2153                 nice = 0;
2154                 pri = 0;
2155                 wchan = 0;
2156                 cpu = 0;
2157         }
2158         as = p->p_as;
2159         mutex_exit(&p->p_lock);
2160         AS_LOCK_ENTER(as, RW_READER);
2161         vsize = as->a_resvsize;
2162         rss = rm_asrss(as);
2163         AS_LOCK_EXIT(as);
2164         mutex_enter(&p->p_lock);
2165 
2166         lxpr_uiobuf_printf(uiobuf,
2167             "%d (%s) %c %d %d %d %d %d "
2168             "%lu %lu %lu %lu %lu "
2169             "%lu %lu %ld %ld "
2170             "%d %d %d "
2171             "%lu "
2172             "%lu "
2173             "%lu %ld %llu "
2174             "%lu %lu %u "
2175             "%lu %lu "
2176             "%lu %lu %lu %lu "
2177             "%lu "
2178             "%lu %lu "
2179             "%d "
2180             "%d"
2181             "\n",
2182             (lookup_id == 0) ? pid : lxpnp->lxpr_desc,
2183             PTOU(p)->u_comm, stat, ppid, pgpid, spid, psdev, psgid,
2184             0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
2185             p->p_utime, p->p_stime, p->p_cutime, p->p_cstime,
2186             pri, nice, p->p_lwpcnt,
2187             0l, /* itrealvalue (time before next SIGALRM) */
2188             PTOU(p)->u_ticks,
2189             vsize, rss, p->p_vmem_ctl,
2190             0l, 0l, USRSTACK, /* startcode, endcode, startstack */
2191             0l, 0l, /* kstkesp, kstkeip */
2192             0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */
2193             wchan,
2194             0l, 0l, /* nswap, cnswap */
2195             0, /* exit_signal */
2196             cpu);
2197 
2198         lxpr_unlock(p);
2199 }
2200 
2201 /*
2202  * lxpr_read_pid_stat(): pid stat file
2203  */
2204 static void
2205 lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2206 {
2207         ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
2208 
2209         lxpr_read_stat_common(lxpnp, uiobuf, 0);
2210 }
2211 
2212 /*
2213  * lxpr_read_pid_tid_stat(): pid stat file
2214  */
2215 static void
2216 lxpr_read_pid_tid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2217 {
2218         ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STAT);
2219         lxpr_read_stat_common(lxpnp, uiobuf, lxpnp->lxpr_desc);
2220 }
2221 
2222 /* ARGSUSED */
2223 static void
2224 lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2225 {
2226 }
2227 
2228 struct lxpr_ifstat {
2229         uint64_t rx_bytes;
2230         uint64_t rx_packets;
2231         uint64_t rx_errors;
2232         uint64_t rx_drop;
2233         uint64_t tx_bytes;
2234         uint64_t tx_packets;
2235         uint64_t tx_errors;
2236         uint64_t tx_drop;
2237         uint64_t collisions;
2238         uint64_t rx_multicast;
2239 };
2240 
2241 static void *
2242 lxpr_kstat_read(kstat_t *kn, boolean_t byname, size_t *size, int *num)
2243 {
2244         kstat_t *kp;
2245         int i, nrec = 0;
2246         size_t bufsize;
2247         void *buf = NULL;
2248 
2249         if (byname == B_TRUE) {
2250                 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance,
2251                     kn->ks_name, getzoneid());
2252         } else {
2253                 kp = kstat_hold_bykid(kn->ks_kid, getzoneid());
2254         }
2255         if (kp == NULL) {
2256                 return (NULL);
2257         }
2258         if (kp->ks_flags & KSTAT_FLAG_INVALID) {
2259                 kstat_rele(kp);
2260                 return (NULL);
2261         }
2262 
2263         bufsize = kp->ks_data_size + 1;
2264         kstat_rele(kp);
2265 
2266         /*
2267          * The kstat in question is released so that kmem_alloc(KM_SLEEP) is
2268          * performed without it held.  After the alloc, the kstat is reacquired
2269          * and its size is checked again. If the buffer is no longer large
2270          * enough, the alloc and check are repeated up to three times.
2271          */
2272         for (i = 0; i < 2; i++) {
2273                 buf = kmem_alloc(bufsize, KM_SLEEP);
2274 
2275                 /* Check if bufsize still appropriate */
2276                 if (byname == B_TRUE) {
2277                         kp = kstat_hold_byname(kn->ks_module, kn->ks_instance,
2278                             kn->ks_name, getzoneid());
2279                 } else {
2280                         kp = kstat_hold_bykid(kn->ks_kid, getzoneid());
2281                 }
2282                 if (kp == NULL || kp->ks_flags & KSTAT_FLAG_INVALID) {
2283                         if (kp != NULL) {
2284                                 kstat_rele(kp);
2285                         }
2286                         kmem_free(buf, bufsize);
2287                         return (NULL);
2288                 }
2289                 KSTAT_ENTER(kp);
2290                 (void) KSTAT_UPDATE(kp, KSTAT_READ);
2291                 if (bufsize < kp->ks_data_size) {
2292                         kmem_free(buf, bufsize);
2293                         buf = NULL;
2294                         bufsize = kp->ks_data_size + 1;
2295                         KSTAT_EXIT(kp);
2296                         kstat_rele(kp);
2297                         continue;
2298                 } else {
2299                         if (KSTAT_SNAPSHOT(kp, buf, KSTAT_READ) != 0) {
2300                                 kmem_free(buf, bufsize);
2301                                 buf = NULL;
2302                         }
2303                         nrec = kp->ks_ndata;
2304                         KSTAT_EXIT(kp);
2305                         kstat_rele(kp);
2306                         break;
2307                 }
2308         }
2309 
2310         if (buf != NULL) {
2311                 *size = bufsize;
2312                 *num = nrec;
2313         }
2314         return (buf);
2315 }
2316 
2317 static int
2318 lxpr_kstat_ifstat(kstat_t *kn, struct lxpr_ifstat *ifs)
2319 {
2320         kstat_named_t *kp;
2321         int i, num;
2322         size_t size;
2323 
2324         /*
2325          * Search by name instead of by kid since there's a small window to
2326          * race against kstats being added/removed.
2327          */
2328         bzero(ifs, sizeof (*ifs));
2329         kp = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num);
2330         if (kp == NULL)
2331                 return (-1);
2332         for (i = 0; i < num; i++) {
2333                 if (strncmp(kp[i].name, "rbytes64", KSTAT_STRLEN) == 0)
2334                         ifs->rx_bytes = kp[i].value.ui64;
2335                 else if (strncmp(kp[i].name, "ipackets64", KSTAT_STRLEN) == 0)
2336                         ifs->rx_packets = kp[i].value.ui64;
2337                 else if (strncmp(kp[i].name, "ierrors", KSTAT_STRLEN) == 0)
2338                         ifs->rx_errors = kp[i].value.ui32;
2339                 else if (strncmp(kp[i].name, "norcvbuf", KSTAT_STRLEN) == 0)
2340                         ifs->rx_drop = kp[i].value.ui32;
2341                 else if (strncmp(kp[i].name, "multircv", KSTAT_STRLEN) == 0)
2342                         ifs->rx_multicast = kp[i].value.ui32;
2343                 else if (strncmp(kp[i].name, "obytes64", KSTAT_STRLEN) == 0)
2344                         ifs->tx_bytes = kp[i].value.ui64;
2345                 else if (strncmp(kp[i].name, "opackets64", KSTAT_STRLEN) == 0)
2346                         ifs->tx_packets = kp[i].value.ui64;
2347                 else if (strncmp(kp[i].name, "oerrors", KSTAT_STRLEN) == 0)
2348                         ifs->tx_errors = kp[i].value.ui32;
2349                 else if (strncmp(kp[i].name, "noxmtbuf", KSTAT_STRLEN) == 0)
2350                         ifs->tx_drop = kp[i].value.ui32;
2351                 else if (strncmp(kp[i].name, "collisions", KSTAT_STRLEN) == 0)
2352                         ifs->collisions = kp[i].value.ui32;
2353         }
2354         kmem_free(kp, size);
2355         return (0);
2356 }
2357 
2358 /* ARGSUSED */
2359 static void
2360 lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2361 {
2362         kstat_t *ksr;
2363         kstat_t ks0;
2364         int i, nidx;
2365         size_t sidx;
2366         struct lxpr_ifstat ifs;
2367 
2368         lxpr_uiobuf_printf(uiobuf, "Inter-|   Receive                   "
2369             "                             |  Transmit\n");
2370         lxpr_uiobuf_printf(uiobuf, " face |bytes    packets errs drop fifo"
2371             " frame compressed multicast|bytes    packets errs drop fifo"
2372             " colls carrier compressed\n");
2373 
2374         ks0.ks_kid = 0;
2375         ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
2376         if (ksr == NULL)
2377                 return;
2378 
2379         for (i = 1; i < nidx; i++) {
2380                 if (strncmp(ksr[i].ks_module, "link", KSTAT_STRLEN) == 0 ||
2381                     strncmp(ksr[i].ks_module, "lo", KSTAT_STRLEN) == 0) {
2382                         if (lxpr_kstat_ifstat(&ksr[i], &ifs) != 0)
2383                                 continue;
2384 
2385                         /* Overwriting the name is ok in the local snapshot */
2386                         lx_ifname_convert(ksr[i].ks_name, LX_IF_FROMNATIVE);
2387                         lxpr_uiobuf_printf(uiobuf, "%6s: %7llu %7llu %4lu "
2388                             "%4lu %4u %5u %10u %9lu %8llu %7llu %4lu %4lu %4u "
2389                             "%5lu %7u %10u\n",
2390                             ksr[i].ks_name,
2391                             ifs.rx_bytes, ifs.rx_packets,
2392                             ifs.rx_errors, ifs.rx_drop,
2393                             0, 0, 0, ifs.rx_multicast,
2394                             ifs.tx_bytes, ifs.tx_packets,
2395                             ifs.tx_errors, ifs.tx_drop,
2396                             0, ifs.collisions, 0, 0);
2397                 }
2398         }
2399 
2400         kmem_free(ksr, sidx);
2401 }
2402 
2403 /* ARGSUSED */
2404 static void
2405 lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2406 {
2407 }
2408 
2409 static void
2410 lxpr_inet6_out(const in6_addr_t *addr, char buf[33])
2411 {
2412         const uint8_t *ip = addr->s6_addr;
2413         char digits[] = "0123456789abcdef";
2414         int i;
2415         for (i = 0; i < 16; i++) {
2416                 buf[2 * i] = digits[ip[i] >> 4];
2417                 buf[2 * i + 1] = digits[ip[i] & 0xf];
2418         }
2419         buf[32] = '\0';
2420 }
2421 
2422 /* ARGSUSED */
2423 static void
2424 lxpr_read_net_if_inet6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2425 {
2426         netstack_t *ns;
2427         ip_stack_t *ipst;
2428         ill_t *ill;
2429         ipif_t *ipif;
2430         ill_walk_context_t      ctx;
2431         char ifname[LIFNAMSIZ], ip6out[33];
2432 
2433         ns = netstack_get_current();
2434         if (ns == NULL)
2435                 return;
2436         ipst = ns->netstack_ip;
2437 
2438         rw_enter(&ipst->ips_ill_g_lock, RW_READER);
2439         ill = ILL_START_WALK_V6(&ctx, ipst);
2440 
2441         for (; ill != NULL; ill = ill_next(&ctx, ill)) {
2442                 for (ipif = ill->ill_ipif; ipif != NULL;
2443                     ipif = ipif->ipif_next) {
2444                         uint_t index = ill->ill_phyint->phyint_ifindex;
2445                         int plen = ip_mask_to_plen_v6(&ipif->ipif_v6net_mask);
2446                         unsigned int scope = lx_ipv6_scope_convert(
2447                             &ipif->ipif_v6lcl_addr);
2448                         /* Always report PERMANENT flag */
2449                         int flag = 0x80;
2450 
2451                         (void) snprintf(ifname, LIFNAMSIZ, "%s", ill->ill_name);
2452                         lx_ifname_convert(ifname, LX_IF_FROMNATIVE);
2453                         lxpr_inet6_out(&ipif->ipif_v6lcl_addr, ip6out);
2454 
2455                         lxpr_uiobuf_printf(uiobuf, "%32s %02x %02x %02x %02x"
2456                             " %8s\n", ip6out, index, plen, scope, flag, ifname);
2457                 }
2458         }
2459         rw_exit(&ipst->ips_ill_g_lock);
2460         netstack_rele(ns);
2461 }
2462 
2463 /* ARGSUSED */
2464 static void
2465 lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2466 {
2467 }
2468 
2469 /* ARGSUSED */
2470 static void
2471 lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2472 {
2473 }
2474 
2475 /* ARGSUSED */
2476 static void
2477 lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2478 {
2479 }
2480 
2481 static void
2482 lxpr_format_route_ipv6(ire_t *ire, lxpr_uiobuf_t *uiobuf)
2483 {
2484         uint32_t flags;
2485         char name[IFNAMSIZ];
2486         char ipv6addr[33];
2487 
2488         lxpr_inet6_out(&ire->ire_addr_v6, ipv6addr);
2489         lxpr_uiobuf_printf(uiobuf, "%s %02x ", ipv6addr,
2490             ip_mask_to_plen_v6(&ire->ire_mask_v6));
2491 
2492         /* punt on this for now */
2493         lxpr_uiobuf_printf(uiobuf, "%s %02x ",
2494             "00000000000000000000000000000000", 0);
2495 
2496         lxpr_inet6_out(&ire->ire_gateway_addr_v6, ipv6addr);
2497         lxpr_uiobuf_printf(uiobuf, "%s", ipv6addr);
2498 
2499         flags = ire->ire_flags &
2500             (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
2501         /* Linux's RTF_LOCAL equivalent */
2502         if (ire->ire_metrics.iulp_local)
2503                 flags |= 0x80000000;
2504 
2505         if (ire->ire_ill != NULL) {
2506                 ill_get_name(ire->ire_ill, name, sizeof (name));
2507                 lx_ifname_convert(name, LX_IF_FROMNATIVE);
2508         } else {
2509                 name[0] = '\0';
2510         }
2511 
2512         lxpr_uiobuf_printf(uiobuf, " %08x %08x %08x %08x %8s\n",
2513             0, /* metric */
2514             ire->ire_refcnt,
2515             0,
2516             flags,
2517             name);
2518 }
2519 
2520 /* ARGSUSED */
2521 static void
2522 lxpr_read_net_ipv6_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2523 {
2524         netstack_t *ns;
2525         ip_stack_t *ipst;
2526 
2527         ns = netstack_get_current();
2528         if (ns == NULL)
2529                 return;
2530         ipst = ns->netstack_ip;
2531 
2532         /*
2533          * LX branded zones are expected to have exclusive IP stack, hence
2534          * using ALL_ZONES as the zoneid filter.
2535          */
2536         ire_walk_v6(&lxpr_format_route_ipv6, uiobuf, ALL_ZONES, ipst);
2537 
2538         netstack_rele(ns);
2539 }
2540 
2541 /* ARGSUSED */
2542 static void
2543 lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2544 {
2545 }
2546 
2547 /* ARGSUSED */
2548 static void
2549 lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2550 {
2551 }
2552 
2553 /* ARGSUSED */
2554 static void
2555 lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2556 {
2557 }
2558 
2559 #define LXPR_SKIP_ROUTE(type)   \
2560         (((IRE_IF_CLONE | IRE_BROADCAST | IRE_MULTICAST | \
2561         IRE_NOROUTE | IRE_LOOPBACK | IRE_LOCAL) & type) != 0)
2562 
2563 static void
2564 lxpr_format_route_ipv4(ire_t *ire, lxpr_uiobuf_t *uiobuf)
2565 {
2566         uint32_t flags;
2567         char name[IFNAMSIZ];
2568         ill_t *ill;
2569         ire_t *nire;
2570         ipif_t *ipif;
2571         ipaddr_t gateway;
2572 
2573         if (LXPR_SKIP_ROUTE(ire->ire_type) || ire->ire_testhidden != 0)
2574                 return;
2575 
2576         /* These route flags have direct Linux equivalents */
2577         flags = ire->ire_flags &
2578             (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
2579 
2580         /*
2581          * Search for a suitable IRE for naming purposes.
2582          * On Linux, the default route is typically associated with the
2583          * interface used to access gateway.  The default IRE on Illumos
2584          * typically lacks an ill reference but its parent might have one.
2585          */
2586         nire = ire;
2587         do {
2588                 ill = nire->ire_ill;
2589                 nire = nire->ire_dep_parent;
2590         } while (ill == NULL && nire != NULL);
2591         if (ill != NULL) {
2592                 ill_get_name(ill, name, sizeof (name));
2593                 lx_ifname_convert(name, LX_IF_FROMNATIVE);
2594         } else {
2595                 name[0] = '*';
2596                 name[1] = '\0';
2597         }
2598 
2599         /*
2600          * Linux suppresses the gateway address for directly connected
2601          * interface networks.  To emulate this behavior, we walk all addresses
2602          * of a given route interface.  If one matches the gateway, it is
2603          * displayed as NULL.
2604          */
2605         gateway = ire->ire_gateway_addr;
2606         if ((ill = ire->ire_ill) != NULL) {
2607                 for (ipif = ill->ill_ipif; ipif != NULL;
2608                     ipif = ipif->ipif_next) {
2609                         if (ipif->ipif_lcl_addr == gateway) {
2610                                 gateway = 0;
2611                                 break;
2612                         }
2613                 }
2614         }
2615 
2616         lxpr_uiobuf_printf(uiobuf, "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2617             "%d\t%08X\t%d\t%u\t%u\n",
2618             name,
2619             ire->ire_addr,
2620             gateway,
2621             flags, 0, 0,
2622             0, /* priority */
2623             ire->ire_mask,
2624             0, 0, /* mss, window */
2625             ire->ire_metrics.iulp_rtt);
2626 }
2627 
2628 /* ARGSUSED */
2629 static void
2630 lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2631 {
2632         netstack_t *ns;
2633         ip_stack_t *ipst;
2634 
2635         lxpr_uiobuf_printf(uiobuf, "Iface\tDestination\tGateway \tFlags\t"
2636             "RefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n");
2637 
2638         ns = netstack_get_current();
2639         if (ns == NULL)
2640                 return;
2641         ipst = ns->netstack_ip;
2642 
2643         /*
2644          * LX branded zones are expected to have exclusive IP stack, hence
2645          * using ALL_ZONES as the zoneid filter.
2646          */
2647         ire_walk_v4(&lxpr_format_route_ipv4, uiobuf, ALL_ZONES, ipst);
2648 
2649         netstack_rele(ns);
2650 }
2651 
2652 /* ARGSUSED */
2653 static void
2654 lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2655 {
2656 }
2657 
2658 /* ARGSUSED */
2659 static void
2660 lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2661 {
2662 }
2663 
2664 /* ARGSUSED */
2665 static void
2666 lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2667 {
2668 }
2669 
2670 typedef struct lxpr_snmp_table {
2671         const char *lst_proto;
2672         const char *lst_fields[];
2673 } lxpr_snmp_table_t;
2674 
2675 static lxpr_snmp_table_t lxpr_snmp_ip = { "ip",
2676         {
2677         "forwarding", "defaultTTL", "inReceives", "inHdrErrors",
2678         "inAddrErrors", "forwDatagrams", "inUnknownProtos", "inDiscards",
2679         "inDelivers", "outRequests", "outDiscards", "outNoRoutes",
2680         "reasmTimeout", "reasmReqds", "reasmOKs", "reasmFails", "fragOKs",
2681         "fragFails", "fragCreates",
2682         NULL
2683         }
2684 };
2685 static lxpr_snmp_table_t lxpr_snmp_icmp = { "icmp",
2686         {
2687         "inMsgs", "inErrors", "inCsumErrors", "inDestUnreachs", "inTimeExcds",
2688         "inParmProbs", "inSrcQuenchs", "inRedirects", "inEchos", "inEchoReps",
2689         "inTimestamps", "inTimestampReps", "inAddrMasks", "inAddrMaskReps",
2690         "outMsgs", "outErrors", "outDestUnreachs", "outTimeExcds",
2691         "outParmProbs", "outSrcQuenchs", "outRedirects", "outEchos",
2692         "outEchoReps", "outTimestamps", "outTimestampReps", "outAddrMasks",
2693         "outAddrMaskReps",
2694         NULL
2695         }
2696 };
2697 static lxpr_snmp_table_t lxpr_snmp_tcp = { "tcp",
2698         {
2699         "rtoAlgorithm", "rtoMin", "rtoMax", "maxConn", "activeOpens",
2700         "passiveOpens", "attemptFails", "estabResets", "currEstab", "inSegs",
2701         "outSegs", "retransSegs", "inErrs", "outRsts", "inCsumErrors",
2702         NULL
2703         }
2704 };
2705 static lxpr_snmp_table_t lxpr_snmp_udp = { "udp",
2706         {
2707         "inDatagrams", "noPorts", "inErrors", "outDatagrams", "rcvbufErrors",
2708         "sndbufErrors", "inCsumErrors",
2709         NULL
2710         }
2711 };
2712 
2713 static lxpr_snmp_table_t *lxpr_net_snmptab[] = {
2714         &lxpr_snmp_ip,
2715         &lxpr_snmp_icmp,
2716         &lxpr_snmp_tcp,
2717         &lxpr_snmp_udp,
2718         NULL
2719 };
2720 
2721 static void
2722 lxpr_kstat_print_tab(lxpr_uiobuf_t *uiobuf, lxpr_snmp_table_t *table,
2723     kstat_t *kn)
2724 {
2725         kstat_named_t *klist;
2726         char upname[KSTAT_STRLEN], upfield[KSTAT_STRLEN];
2727         int i, j, num;
2728         size_t size;
2729 
2730         klist = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num);
2731         if (klist == NULL)
2732                 return;
2733 
2734         /* Print the header line, fields capitalized */
2735         (void) strncpy(upname, table->lst_proto, KSTAT_STRLEN);
2736         upname[0] = toupper(upname[0]);
2737         lxpr_uiobuf_printf(uiobuf, "%s:", upname);
2738         for (i = 0; table->lst_fields[i] != NULL; i++) {
2739                 (void) strncpy(upfield, table->lst_fields[i], KSTAT_STRLEN);
2740                 upfield[0] = toupper(upfield[0]);
2741                 lxpr_uiobuf_printf(uiobuf, " %s", upfield);
2742         }
2743         lxpr_uiobuf_printf(uiobuf, "\n%s:", upname);
2744 
2745         /* Then loop back through to print the value line. */
2746         for (i = 0; table->lst_fields[i] != NULL; i++) {
2747                 kstat_named_t *kpoint = NULL;
2748                 for (j = 0; j < num; j++) {
2749                         if (strncmp(klist[j].name, table->lst_fields[i],
2750                             KSTAT_STRLEN) == 0) {
2751                                 kpoint = &klist[j];
2752                                 break;
2753                         }
2754                 }
2755                 if (kpoint == NULL) {
2756                         /* Output 0 for unknown fields */
2757                         lxpr_uiobuf_printf(uiobuf, " 0");
2758                 } else {
2759                         switch (kpoint->data_type) {
2760                         case KSTAT_DATA_INT32:
2761                                 lxpr_uiobuf_printf(uiobuf, " %d",
2762                                     kpoint->value.i32);
2763                                 break;
2764                         case KSTAT_DATA_UINT32:
2765                                 lxpr_uiobuf_printf(uiobuf, " %u",
2766                                     kpoint->value.ui32);
2767                                 break;
2768                         case KSTAT_DATA_INT64:
2769                                 lxpr_uiobuf_printf(uiobuf, " %ld",
2770                                     kpoint->value.l);
2771                                 break;
2772                         case KSTAT_DATA_UINT64:
2773                                 lxpr_uiobuf_printf(uiobuf, " %lu",
2774                                     kpoint->value.ul);
2775                                 break;
2776                         }
2777                 }
2778         }
2779         lxpr_uiobuf_printf(uiobuf, "\n");
2780         kmem_free(klist, size);
2781 }
2782 
2783 /* ARGSUSED */
2784 static void
2785 lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2786 {
2787         kstat_t *ksr;
2788         kstat_t ks0;
2789         lxpr_snmp_table_t **table = lxpr_net_snmptab;
2790         int i, t, nidx;
2791         size_t sidx;
2792 
2793         ks0.ks_kid = 0;
2794         ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
2795         if (ksr == NULL)
2796                 return;
2797 
2798         for (t = 0; table[t] != NULL; t++) {
2799                 for (i = 0; i < nidx; i++) {
2800                         if (strncmp(ksr[i].ks_class, "mib2", KSTAT_STRLEN) != 0)
2801                                 continue;
2802                         if (strncmp(ksr[i].ks_name, table[t]->lst_proto,
2803                             KSTAT_STRLEN) == 0) {
2804                                 lxpr_kstat_print_tab(uiobuf, table[t], &ksr[i]);
2805                                 break;
2806                         }
2807                 }
2808         }
2809         kmem_free(ksr, sidx);
2810 }
2811 
2812 /* ARGSUSED */
2813 static void
2814 lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2815 {
2816 }
2817 
2818 static int
2819 lxpr_convert_tcp_state(int st)
2820 {
2821         /*
2822          * Derived from the enum located in the Linux kernel sources:
2823          * include/net/tcp_states.h
2824          */
2825         switch (st) {
2826         case TCPS_ESTABLISHED:
2827                 return (1);
2828         case TCPS_SYN_SENT:
2829                 return (2);
2830         case TCPS_SYN_RCVD:
2831                 return (3);
2832         case TCPS_FIN_WAIT_1:
2833                 return (4);
2834         case TCPS_FIN_WAIT_2:
2835                 return (5);
2836         case TCPS_TIME_WAIT:
2837                 return (6);
2838         case TCPS_CLOSED:
2839                 return (7);
2840         case TCPS_CLOSE_WAIT:
2841                 return (8);
2842         case TCPS_LAST_ACK:
2843                 return (9);
2844         case TCPS_LISTEN:
2845                 return (10);
2846         case TCPS_CLOSING:
2847                 return (11);
2848         default:
2849                 /* No translation for TCPS_IDLE, TCPS_BOUND or anything else */
2850                 return (0);
2851         }
2852 }
2853 
2854 static void
2855 lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
2856 {
2857         int i, sl = 0;
2858         connf_t *connfp;
2859         conn_t *connp;
2860         netstack_t *ns;
2861         ip_stack_t *ipst;
2862 
2863         ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION);
2864         if (ipver == IPV4_VERSION) {
2865                 lxpr_uiobuf_printf(uiobuf, "  sl  local_address rem_address   "
2866                     "st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout "
2867                     "inode\n");
2868         } else {
2869                 lxpr_uiobuf_printf(uiobuf, "  sl  "
2870                     "local_address                         "
2871                     "remote_address                        "
2872                     "st tx_queue rx_queue tr tm->when retrnsmt   "
2873                     "uid  timeout inode\n");
2874         }
2875         /*
2876          * Due to differences between the Linux and illumos TCP
2877          * implementations, some data will be omitted from the output here.
2878          *
2879          * Valid fields:
2880          *  - local_address
2881          *  - remote_address
2882          *  - st
2883          *  - tx_queue
2884          *  - rx_queue
2885          *  - uid
2886          *  - inode
2887          *
2888          * Omitted/invalid fields
2889          *  - tr
2890          *  - tm->when
2891          *  - retrnsmt
2892          *  - timeout
2893          */
2894 
2895         ns = netstack_get_current();
2896         if (ns == NULL)
2897                 return;
2898         ipst = ns->netstack_ip;
2899 
2900         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2901                 connfp = &ipst->ips_ipcl_globalhash_fanout[i];
2902                 connp = NULL;
2903                 while ((connp =
2904                     ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) {
2905                         tcp_t *tcp;
2906                         vattr_t attr;
2907                         sonode_t *so = (sonode_t *)connp->conn_upper_handle;
2908                         vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
2909                         if (connp->conn_ipversion != ipver)
2910                                 continue;
2911                         tcp = connp->conn_tcp;
2912                         if (ipver == IPV4_VERSION) {
2913                                 lxpr_uiobuf_printf(uiobuf,
2914                                     "%4d: %08X:%04X %08X:%04X ",
2915                                     ++sl,
2916                                     connp->conn_laddr_v4,
2917                                     ntohs(connp->conn_lport),
2918                                     connp->conn_faddr_v4,
2919                                     ntohs(connp->conn_fport));
2920                         } else {
2921                                 lxpr_uiobuf_printf(uiobuf, "%4d: "
2922                                     "%08X%08X%08X%08X:%04X "
2923                                     "%08X%08X%08X%08X:%04X ",
2924                                     ++sl,
2925                                     connp->conn_laddr_v6.s6_addr32[0],
2926                                     connp->conn_laddr_v6.s6_addr32[1],
2927                                     connp->conn_laddr_v6.s6_addr32[2],
2928                                     connp->conn_laddr_v6.s6_addr32[3],
2929                                     ntohs(connp->conn_lport),
2930                                     connp->conn_faddr_v6.s6_addr32[0],
2931                                     connp->conn_faddr_v6.s6_addr32[1],
2932                                     connp->conn_faddr_v6.s6_addr32[2],
2933                                     connp->conn_faddr_v6.s6_addr32[3],
2934                                     ntohs(connp->conn_fport));
2935                         }
2936 
2937                         /* fetch the simulated inode for the socket */
2938                         if (vp == NULL ||
2939                             VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
2940                                 attr.va_nodeid = 0;
2941 
2942                         lxpr_uiobuf_printf(uiobuf,
2943                             "%02X %08X:%08X %02X:%08X %08X "
2944                             "%5u %8d %lu %d %p %u %u %u %u %d\n",
2945                             lxpr_convert_tcp_state(tcp->tcp_state),
2946                             tcp->tcp_rcv_cnt, tcp->tcp_unsent, /* rx/tx queue */
2947                             0, 0, /* tr, when */
2948                             0, /* per-connection rexmits aren't tracked today */
2949                             connp->conn_cred->cr_uid,
2950                             0, /* timeout */
2951                             /* inode + more */
2952                             (ino_t)attr.va_nodeid, 0, NULL, 0, 0, 0, 0, 0);
2953                 }
2954         }
2955         netstack_rele(ns);
2956 }
2957 
2958 /* ARGSUSED */
2959 static void
2960 lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2961 {
2962         lxpr_format_tcp(uiobuf, IPV4_VERSION);
2963 }
2964 
2965 /* ARGSUSED */
2966 static void
2967 lxpr_read_net_tcp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2968 {
2969         lxpr_format_tcp(uiobuf, IPV6_VERSION);
2970 }
2971 
2972 static void
2973 lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
2974 {
2975         int i, sl = 0;
2976         connf_t *connfp;
2977         conn_t *connp;
2978         netstack_t *ns;
2979         ip_stack_t *ipst;
2980 
2981         ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION);
2982         if (ipver == IPV4_VERSION) {
2983                 lxpr_uiobuf_printf(uiobuf, "  sl  local_address rem_address"
2984                     "   st tx_queue rx_queue tr tm->when retrnsmt   uid"
2985                     "  timeout inode ref pointer drops\n");
2986         } else {
2987                 lxpr_uiobuf_printf(uiobuf, "  sl  "
2988                     "local_address                         "
2989                     "remote_address                        "
2990                     "st tx_queue rx_queue tr tm->when retrnsmt   "
2991                     "uid  timeout inode ref pointer drops\n");
2992         }
2993         /*
2994          * Due to differences between the Linux and illumos UDP
2995          * implementations, some data will be omitted from the output here.
2996          *
2997          * Valid fields:
2998          *  - local_address
2999          *  - remote_address
3000          *  - st: limited
3001          *  - uid
3002          *
3003          * Omitted/invalid fields
3004          *  - tx_queue
3005          *  - rx_queue
3006          *  - tr
3007          *  - tm->when
3008          *  - retrnsmt
3009          *  - timeout
3010          *  - inode
3011          */
3012 
3013         ns = netstack_get_current();
3014         if (ns == NULL)
3015                 return;
3016         ipst = ns->netstack_ip;
3017 
3018         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
3019                 connfp = &ipst->ips_ipcl_globalhash_fanout[i];
3020                 connp = NULL;
3021                 while ((connp =
3022                     ipcl_get_next_conn(connfp, connp, IPCL_UDPCONN)) != NULL) {
3023                         udp_t *udp;
3024                         int state = 0;
3025                         vattr_t attr;
3026                         sonode_t *so = (sonode_t *)connp->conn_upper_handle;
3027                         vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
3028                         if (connp->conn_ipversion != ipver)
3029                                 continue;
3030                         udp = connp->conn_udp;
3031                         if (ipver == IPV4_VERSION) {
3032                                 lxpr_uiobuf_printf(uiobuf,
3033                                     "%4d: %08X:%04X %08X:%04X ",
3034                                     ++sl,
3035                                     connp->conn_laddr_v4,
3036                                     ntohs(connp->conn_lport),
3037                                     connp->conn_faddr_v4,
3038                                     ntohs(connp->conn_fport));
3039                         } else {
3040                                 lxpr_uiobuf_printf(uiobuf, "%4d: "
3041                                     "%08X%08X%08X%08X:%04X "
3042                                     "%08X%08X%08X%08X:%04X ",
3043                                     ++sl,
3044                                     connp->conn_laddr_v6.s6_addr32[0],
3045                                     connp->conn_laddr_v6.s6_addr32[1],
3046                                     connp->conn_laddr_v6.s6_addr32[2],
3047                                     connp->conn_laddr_v6.s6_addr32[3],
3048                                     ntohs(connp->conn_lport),
3049                                     connp->conn_faddr_v6.s6_addr32[0],
3050                                     connp->conn_faddr_v6.s6_addr32[1],
3051                                     connp->conn_faddr_v6.s6_addr32[2],
3052                                     connp->conn_faddr_v6.s6_addr32[3],
3053                                     ntohs(connp->conn_fport));
3054                         }
3055 
3056                         switch (udp->udp_state) {
3057                         case TS_UNBND:
3058                         case TS_IDLE:
3059                                 state = 7;
3060                                 break;
3061                         case TS_DATA_XFER:
3062                                 state = 1;
3063                                 break;
3064                         }
3065 
3066                         /* fetch the simulated inode for the socket */
3067                         if (vp == NULL ||
3068                             VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
3069                                 attr.va_nodeid = 0;
3070 
3071                         lxpr_uiobuf_printf(uiobuf,
3072                             "%02X %08X:%08X %02X:%08X %08X "
3073                             "%5u %8d %lu %d %p %d\n",
3074                             state,
3075                             0, 0, /* rx/tx queue */
3076                             0, 0, /* tr, when */
3077                             0, /* retrans */
3078                             connp->conn_cred->cr_uid,
3079                             0, /* timeout */
3080                             /* inode, ref, pointer, drops */
3081                             (ino_t)attr.va_nodeid, 0, NULL, 0);
3082                 }
3083         }
3084         netstack_rele(ns);
3085 }
3086 
3087 /* ARGSUSED */
3088 static void
3089 lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3090 {
3091         lxpr_format_udp(uiobuf, IPV4_VERSION);
3092 }
3093 
3094 /* ARGSUSED */
3095 static void
3096 lxpr_read_net_udp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3097 {
3098         lxpr_format_udp(uiobuf, IPV6_VERSION);
3099 }
3100 
3101 /* ARGSUSED */
3102 static void
3103 lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3104 {
3105         sonode_t *so;
3106         zoneid_t zoneid = getzoneid();
3107 
3108         lxpr_uiobuf_printf(uiobuf, "Num       RefCount Protocol Flags    Type "
3109             "St Inode Path\n");
3110 
3111         mutex_enter(&socklist.sl_lock);
3112         for (so = socklist.sl_list; so != NULL;
3113             so = _SOTOTPI(so)->sti_next_so) {
3114                 vnode_t *vp = so->so_vnode;
3115                 vattr_t attr;
3116                 sotpi_info_t *sti;
3117                 const char *name = NULL;
3118                 int status = 0;
3119                 int type = 0;
3120                 int flags = 0;
3121 
3122                 /* Only process active sonodes in this zone */
3123                 if (so->so_count == 0 || so->so_zoneid != zoneid)
3124                         continue;
3125 
3126                 /*
3127                  * Grab the inode, if possible.
3128                  * This must be done before entering so_lock.
3129                  */
3130                 if (vp == NULL ||
3131                     VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
3132                         attr.va_nodeid = 0;
3133 
3134                 mutex_enter(&so->so_lock);
3135                 sti = _SOTOTPI(so);
3136 
3137                 if (sti->sti_laddr_sa != NULL &&
3138                     sti->sti_laddr_len > 0) {
3139                         name = sti->sti_laddr_sa->sa_data;
3140                 } else if (sti->sti_faddr_sa != NULL &&
3141                     sti->sti_faddr_len > 0) {
3142                         name = sti->sti_faddr_sa->sa_data;
3143                 }
3144 
3145                 /*
3146                  * Derived from enum values in Linux kernel source:
3147                  * include/uapi/linux/net.h
3148                  */
3149                 if ((so->so_state & SS_ISDISCONNECTING) != 0) {
3150                         status = 4;
3151                 } else if ((so->so_state & SS_ISCONNECTING) != 0) {
3152                         status = 2;
3153                 } else if ((so->so_state & SS_ISCONNECTED) != 0) {
3154                         status = 3;
3155                 } else {
3156                         status = 1;
3157                         /* Add ACC flag for stream-type server sockets */
3158                         if (so->so_type != SOCK_DGRAM &&
3159                             sti->sti_laddr_sa != NULL)
3160                                 flags |= 0x10000;
3161                 }
3162 
3163                 /* Convert to Linux type */
3164                 switch (so->so_type) {
3165                 case SOCK_DGRAM:
3166                         type = 2;
3167                         break;
3168                 case SOCK_SEQPACKET:
3169                         type = 5;
3170                         break;
3171                 default:
3172                         type = 1;
3173                 }
3174 
3175                 lxpr_uiobuf_printf(uiobuf, "%p: %08X %08X %08X %04X %02X %5llu",
3176                     so,
3177                     so->so_count,
3178                     0, /* proto, always 0 */
3179                     flags,
3180                     type,
3181                     status,
3182                     (ino_t)attr.va_nodeid);
3183 
3184                 /*
3185                  * Due to shortcomings in the abstract socket emulation, they
3186                  * cannot be properly represented here (as @<path>).
3187                  *
3188                  * This will be the case until they are better implemented.
3189                  */
3190                 if (name != NULL)
3191                         lxpr_uiobuf_printf(uiobuf, " %s\n", name);
3192                 else
3193                         lxpr_uiobuf_printf(uiobuf, "\n");
3194                 mutex_exit(&so->so_lock);
3195         }
3196         mutex_exit(&socklist.sl_lock);
3197 }
3198 
3199 /*
3200  * lxpr_read_kmsg(): read the contents of the kernel message queue. We
3201  * translate this into the reception of console messages for this zone; each
3202  * read copies out a single zone console message, or blocks until the next one
3203  * is produced, unless we're open non-blocking, in which case we return after
3204  * 1ms.
3205  */
3206 
3207 #define LX_KMSG_PRI     "<0>"
3208 
3209 static void
3210 lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf, ldi_handle_t lh)
3211 {
3212         mblk_t          *mp;
3213         timestruc_t     to;
3214         timestruc_t     *tp = NULL;
3215 
3216         ASSERT(lxpnp->lxpr_type == LXPR_KMSG);
3217 
3218         if (lxpr_uiobuf_nonblock(uiobuf)) {
3219                 to.tv_sec = 0;
3220                 to.tv_nsec = 1000000; /* 1msec */
3221                 tp = &to;
3222         }
3223 
3224         if (ldi_getmsg(lh, &mp, tp) == 0) {
3225                 /*
3226                  * lx procfs doesn't like successive reads to the same file
3227                  * descriptor unless we do an explicit rewind each time.
3228                  */
3229                 lxpr_uiobuf_seek(uiobuf, 0);
3230 
3231                 lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
3232                     mp->b_cont->b_rptr);
3233 
3234                 freemsg(mp);
3235         }
3236 }
3237 
3238 /*
3239  * lxpr_read_loadavg(): read the contents of the "loadavg" file.  We do just
3240  * enough for uptime and other simple lxproc readers to work
3241  */
3242 extern int nthread;
3243 
3244 static void
3245 lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3246 {
3247         ulong_t avenrun1;
3248         ulong_t avenrun5;
3249         ulong_t avenrun15;
3250         ulong_t avenrun1_cs;
3251         ulong_t avenrun5_cs;
3252         ulong_t avenrun15_cs;
3253         int loadavg[3];
3254         int *loadbuf;
3255         cpupart_t *cp;
3256         zone_t *zone = LXPTOZ(lxpnp);
3257 
3258         uint_t nrunnable = 0;
3259         rctl_qty_t nlwps;
3260 
3261         ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
3262 
3263         mutex_enter(&cpu_lock);
3264 
3265         /*
3266          * Need to add up values over all CPU partitions. If pools are active,
3267          * only report the values of the zone's partition, which by definition
3268          * includes the current CPU.
3269          */
3270         if (pool_pset_enabled()) {
3271                 psetid_t psetid = zone_pset_get(curproc->p_zone);
3272 
3273                 ASSERT(curproc->p_zone != &zone0);
3274                 cp = CPU->cpu_part;
3275 
3276                 nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
3277                 (void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
3278                 loadbuf = &loadavg[0];
3279         } else {
3280                 cp = cp_list_head;
3281                 do {
3282                         nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
3283                 } while ((cp = cp->cp_next) != cp_list_head);
3284 
3285                 loadbuf = zone == global_zone ?
3286                     &avenrun[0] : zone->zone_avenrun;
3287         }
3288 
3289         /*
3290          * If we're in the non-global zone, we'll report the total number of
3291          * LWPs in the zone for the "nproc" parameter of /proc/loadavg,
3292          * otherwise will just use nthread (which will include kernel threads,
3293          * but should be good enough for lxproc).
3294          */
3295         nlwps = zone == global_zone ? nthread : zone->zone_nlwps;
3296 
3297         mutex_exit(&cpu_lock);
3298 
3299         avenrun1 = loadbuf[0] >> FSHIFT;
3300         avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
3301         avenrun5 = loadbuf[1] >> FSHIFT;
3302         avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
3303         avenrun15 = loadbuf[2] >> FSHIFT;
3304         avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
3305 
3306         lxpr_uiobuf_printf(uiobuf,
3307             "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
3308             avenrun1, avenrun1_cs,
3309             avenrun5, avenrun5_cs,
3310             avenrun15, avenrun15_cs,
3311             nrunnable, nlwps, 0);
3312 }
3313 
3314 /*
3315  * lxpr_read_meminfo(): read the contents of the "meminfo" file.
3316  */
3317 static void
3318 lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3319 {
3320         zone_t *zone = LXPTOZ(lxpnp);
3321         int global = zone == global_zone;
3322         long total_mem, free_mem, total_swap, used_swap;
3323 
3324         ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
3325 
3326         if (global || zone->zone_phys_mem_ctl == UINT64_MAX) {
3327                 total_mem = physmem * PAGESIZE;
3328                 free_mem = freemem * PAGESIZE;
3329         } else {
3330                 total_mem = zone->zone_phys_mem_ctl;
3331                 free_mem = zone->zone_phys_mem_ctl - zone->zone_phys_mem;
3332         }
3333 
3334         if (global || zone->zone_max_swap_ctl == UINT64_MAX) {
3335                 total_swap = k_anoninfo.ani_max * PAGESIZE;
3336                 used_swap = k_anoninfo.ani_phys_resv * PAGESIZE;
3337         } else {
3338                 mutex_enter(&zone->zone_mem_lock);
3339                 total_swap = zone->zone_max_swap_ctl;
3340                 used_swap = zone->zone_max_swap;
3341                 mutex_exit(&zone->zone_mem_lock);
3342         }
3343 
3344         lxpr_uiobuf_printf(uiobuf,
3345             "MemTotal:  %8lu kB\n"
3346             "MemFree:   %8lu kB\n"
3347             "MemShared: %8u kB\n"
3348             "Buffers:   %8u kB\n"
3349             "Cached:    %8u kB\n"
3350             "SwapCached:%8u kB\n"
3351             "Active:    %8u kB\n"
3352             "Inactive:  %8u kB\n"
3353             "HighTotal: %8u kB\n"
3354             "HighFree:  %8u kB\n"
3355             "LowTotal:  %8u kB\n"
3356             "LowFree:   %8u kB\n"
3357             "SwapTotal: %8lu kB\n"
3358             "SwapFree:  %8lu kB\n",
3359             btok(total_mem),                            /* MemTotal */
3360             btok(free_mem),                             /* MemFree */
3361             0,                                          /* MemShared */
3362             0,                                          /* Buffers */
3363             0,                                          /* Cached */
3364             0,                                          /* SwapCached */
3365             0,                                          /* Active */
3366             0,                                          /* Inactive */
3367             0,                                          /* HighTotal */
3368             0,                                          /* HighFree */
3369             btok(total_mem),                            /* LowTotal */
3370             btok(free_mem),                             /* LowFree */
3371             btok(total_swap),                           /* SwapTotal */
3372             btok(total_swap - used_swap));              /* SwapFree */
3373 }
3374 
3375 /*
3376  * lxpr_read_mounts():
3377  */
3378 /* ARGSUSED */
3379 static void
3380 lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3381 {
3382         struct vfs *vfsp;
3383         struct vfs *vfslist;
3384         zone_t *zone = LXPTOZ(lxpnp);
3385         struct print_data {
3386                 refstr_t *vfs_mntpt;
3387                 refstr_t *vfs_resource;
3388                 uint_t vfs_flag;
3389                 int vfs_fstype;
3390                 struct print_data *next;
3391         } *print_head = NULL;
3392         struct print_data **print_tail = &print_head;
3393         struct print_data *printp;
3394 
3395         vfs_list_read_lock();
3396 
3397         if (zone == global_zone) {
3398                 vfsp = vfslist = rootvfs;
3399         } else {
3400                 vfsp = vfslist = zone->zone_vfslist;
3401                 /*
3402                  * If the zone has a root entry, it will be the first in
3403                  * the list.  If it doesn't, we conjure one up.
3404                  */
3405                 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
3406                     zone->zone_rootpath) != 0) {
3407                         struct vfs *tvfsp;
3408                         /*
3409                          * The root of the zone is not a mount point.  The vfs
3410                          * we want to report is that of the zone's root vnode.
3411                          */
3412                         tvfsp = zone->zone_rootvp->v_vfsp;
3413 
3414                         lxpr_uiobuf_printf(uiobuf,
3415                             "/ / %s %s 0 0\n",
3416                             vfssw[tvfsp->vfs_fstype].vsw_name,
3417                             tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
3418 
3419                 }
3420                 if (vfslist == NULL) {
3421                         vfs_list_unlock();
3422                         return;
3423                 }
3424         }
3425 
3426         /*
3427          * Later on we have to do a lookupname, which can end up causing
3428          * another vfs_list_read_lock() to be called. Which can lead to a
3429          * deadlock. To avoid this, we extract the data we need into a local
3430          * list, then we can run this list without holding vfs_list_read_lock()
3431          * We keep the list in the same order as the vfs_list
3432          */
3433         do {
3434                 /* Skip mounts we shouldn't show */
3435                 if (vfsp->vfs_flag & VFS_NOMNTTAB) {
3436                         goto nextfs;
3437                 }
3438 
3439                 printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
3440                 refstr_hold(vfsp->vfs_mntpt);
3441                 printp->vfs_mntpt = vfsp->vfs_mntpt;
3442                 refstr_hold(vfsp->vfs_resource);
3443                 printp->vfs_resource = vfsp->vfs_resource;
3444                 printp->vfs_flag = vfsp->vfs_flag;
3445                 printp->vfs_fstype = vfsp->vfs_fstype;
3446                 printp->next = NULL;
3447 
3448                 *print_tail = printp;
3449                 print_tail = &printp->next;
3450 
3451 nextfs:
3452                 vfsp = (zone == global_zone) ?
3453                     vfsp->vfs_next : vfsp->vfs_zone_next;
3454 
3455         } while (vfsp != vfslist);
3456 
3457         vfs_list_unlock();
3458 
3459         /*
3460          * now we can run through what we've extracted without holding
3461          * vfs_list_read_lock()
3462          */
3463         printp = print_head;
3464         while (printp != NULL) {
3465                 struct print_data *printp_next;
3466                 const char *resource;
3467                 char *mntpt;
3468                 struct vnode *vp;
3469                 int error;
3470 
3471                 mntpt = (char *)refstr_value(printp->vfs_mntpt);
3472                 resource = refstr_value(printp->vfs_resource);
3473 
3474                 if (mntpt != NULL && mntpt[0] != '\0')
3475                         mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
3476                 else
3477                         mntpt = "-";
3478 
3479                 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
3480 
3481                 if (error != 0)
3482                         goto nextp;
3483 
3484                 if (!(vp->v_flag & VROOT)) {
3485                         VN_RELE(vp);
3486                         goto nextp;
3487                 }
3488                 VN_RELE(vp);
3489 
3490                 if (resource != NULL && resource[0] != '\0') {
3491                         if (resource[0] == '/') {
3492                                 resource = ZONE_PATH_VISIBLE(resource, zone) ?
3493                                     ZONE_PATH_TRANSLATE(resource, zone) :
3494                                     mntpt;
3495                         }
3496                 } else {
3497                         resource = "-";
3498                 }
3499 
3500                 lxpr_uiobuf_printf(uiobuf,
3501                     "%s %s %s %s 0 0\n",
3502                     resource, mntpt, vfssw[printp->vfs_fstype].vsw_name,
3503                     printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
3504 
3505 nextp:
3506                 printp_next = printp->next;
3507                 refstr_rele(printp->vfs_mntpt);
3508                 refstr_rele(printp->vfs_resource);
3509                 kmem_free(printp, sizeof (*printp));
3510                 printp = printp_next;
3511 
3512         }
3513 }
3514 
3515 /*
3516  * lxpr_read_partitions():
3517  *
3518  * Over the years, /proc/partitions has been made considerably smaller -- to
3519  * the point that it really is only major number, minor number, number of
3520  * blocks (which we report as 0), and partition name.
3521  *
3522  * We support this because some things want to see it to make sense of
3523  * /proc/diskstats, and also because "fdisk -l" and a few other things look
3524  * here to find all disks on the system.
3525  */
3526 /* ARGSUSED */
3527 static void
3528 lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3529 {
3530 
3531         kstat_t *ksr;
3532         kstat_t ks0;
3533         int nidx, num, i;
3534         size_t sidx, size;
3535         zfs_cmd_t *zc;
3536         nvlist_t *nv = NULL;
3537         nvpair_t *elem = NULL;
3538         lxpr_mnt_t *mnt;
3539         lxpr_zfs_iter_t zfsi;
3540 
3541         ASSERT(lxpnp->lxpr_type == LXPR_PARTITIONS);
3542 
3543         ks0.ks_kid = 0;
3544         ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
3545 
3546         if (ksr == NULL)
3547                 return;
3548 
3549         lxpr_uiobuf_printf(uiobuf, "major minor  #blocks  name\n\n");
3550 
3551         for (i = 1; i < nidx; i++) {
3552                 kstat_t *ksp = &ksr[i];
3553                 kstat_io_t *kip;
3554 
3555                 if (ksp->ks_type != KSTAT_TYPE_IO ||
3556                     strcmp(ksp->ks_class, "disk") != 0)
3557                         continue;
3558 
3559                 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE,
3560                     &size, &num)) == NULL)
3561                         continue;
3562 
3563                 if (size < sizeof (kstat_io_t)) {
3564                         kmem_free(kip, size);
3565                         continue;
3566                 }
3567 
3568                 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d %s\n",
3569                     mod_name_to_major(ksp->ks_module),
3570                     ksp->ks_instance, 0, ksp->ks_name);
3571 
3572                 kmem_free(kip, size);
3573         }
3574 
3575         kmem_free(ksr, sidx);
3576 
3577         /* If we never got to open the zfs LDI, then stop now. */
3578         mnt = (lxpr_mnt_t *)lxpnp->lxpr_vnode->v_vfsp->vfs_data;
3579         if (mnt->lxprm_zfs_isopen == B_FALSE)
3580                 return;
3581 
3582         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
3583 
3584         if (lxpr_zfs_list_pools(mnt, zc, &nv) != 0)
3585                 goto out;
3586 
3587         while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) {
3588                 char *pool = nvpair_name(elem);
3589 
3590                 bzero(&zfsi, sizeof (lxpr_zfs_iter_t));
3591                 while (lxpr_zfs_next_zvol(mnt, pool, zc, &zfsi) == 0) {
3592                         major_t major;
3593                         minor_t minor;
3594                         if (lxpr_zvol_dev(mnt, zc->zc_name, &major, &minor)
3595                             != 0)
3596                                 continue;
3597 
3598                         lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d zvol/dsk/%s\n",
3599                             major, minor, 0, zc->zc_name);
3600                 }
3601         }
3602 
3603         nvlist_free(nv);
3604 out:
3605         kmem_free(zc, sizeof (zfs_cmd_t));
3606 }
3607 
3608 /*
3609  * lxpr_read_diskstats():
3610  *
3611  * See the block comment above the per-device output-generating line for the
3612  * details of the format.
3613  */
3614 /* ARGSUSED */
3615 static void
3616 lxpr_read_diskstats(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3617 {
3618         kstat_t *ksr;
3619         kstat_t ks0;
3620         int nidx, num, i;
3621         size_t sidx, size;
3622 
3623         ASSERT(lxpnp->lxpr_type == LXPR_DISKSTATS);
3624 
3625         ks0.ks_kid = 0;
3626         ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
3627 
3628         if (ksr == NULL)
3629                 return;
3630 
3631         for (i = 1; i < nidx; i++) {
3632                 kstat_t *ksp = &ksr[i];
3633                 kstat_io_t *kip;
3634 
3635                 if (ksp->ks_type != KSTAT_TYPE_IO ||
3636                     strcmp(ksp->ks_class, "disk") != 0)
3637                         continue;
3638 
3639                 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE,
3640                     &size, &num)) == NULL)
3641                         continue;
3642 
3643                 if (size < sizeof (kstat_io_t)) {
3644                         kmem_free(kip, size);
3645                         continue;
3646                 }
3647 
3648                 /*
3649                  * /proc/diskstats is defined to have one line of output for
3650                  * each block device, with each line containing the following
3651                  * 14 fields:
3652                  *
3653                  *      1 - major number
3654                  *      2 - minor mumber
3655                  *      3 - device name
3656                  *      4 - reads completed successfully
3657                  *      5 - reads merged
3658                  *      6 - sectors read
3659                  *      7 - time spent reading (ms)
3660                  *      8 - writes completed
3661                  *      9 - writes merged
3662                  *      10 - sectors written
3663                  *      11 - time spent writing (ms)
3664                  *      12 - I/Os currently in progress
3665                  *      13 - time spent doing I/Os (ms)
3666                  *      14 - weighted time spent doing I/Os (ms)
3667                  *
3668                  * One small hiccup:  we don't actually keep track of time
3669                  * spent reading vs. time spent writing -- we keep track of
3670                  * time waiting vs. time actually performing I/O.  While we
3671                  * could divide the total time by the I/O mix (making the
3672                  * obviously wrong assumption that I/O operations all take the
3673                  * same amount of time), this has the undesirable side-effect
3674                  * of moving backwards.  Instead, we report the total time
3675                  * (read + write) for all three stats (read, write, total).
3676                  * This is also a lie of sorts, but it should be more
3677                  * immediately clear to the user that reads and writes are
3678                  * each being double-counted as the other.
3679                  */
3680                 lxpr_uiobuf_printf(uiobuf, "%4d %7d %s "
3681                     "%llu %llu %llu %llu "
3682                     "%llu %llu %llu %llu "
3683                     "%llu %llu %llu\n",
3684                     mod_name_to_major(ksp->ks_module),
3685                     ksp->ks_instance, ksp->ks_name,
3686                     (uint64_t)kip->reads, 0LL,
3687                     kip->nread / (uint64_t)LXPR_SECTOR_SIZE,
3688                     (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3689                     (uint64_t)kip->writes, 0LL,
3690                     kip->nwritten / (uint64_t)LXPR_SECTOR_SIZE,
3691                     (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3692                     (uint64_t)(kip->rcnt + kip->wcnt),
3693                     (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3694                     (kip->rlentime + kip->wlentime) /
3695                     (uint64_t)(NANOSEC / MILLISEC));
3696 
3697                 kmem_free(kip, size);
3698         }
3699 
3700         kmem_free(ksr, sidx);
3701 }
3702 
3703 /*
3704  * lxpr_read_version(): read the contents of the "version" file.
3705  */
3706 /* ARGSUSED */
3707 static void
3708 lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3709 {
3710         lx_zone_data_t *lxzd = ztolxzd(LXPTOZ(lxpnp));
3711         lx_proc_data_t *lxpd = ptolxproc(curproc);
3712         const char *release = lxzd->lxzd_kernel_release;
3713         const char *version = lxzd->lxzd_kernel_version;
3714 
3715         /* Use per-process overrides, if specified */
3716         if (lxpd != NULL && lxpd->l_uname_release[0] != '\0') {
3717                 release = lxpd->l_uname_release;
3718         }
3719         if (lxpd != NULL && lxpd->l_uname_version[0] != '\0') {
3720                 version = lxpd->l_uname_version;
3721         }
3722 
3723         lxpr_uiobuf_printf(uiobuf,
3724             "%s version %s (%s version %d.%d.%d) %s\n",
3725             LX_UNAME_SYSNAME, release,
3726 #if defined(__GNUC__)
3727             "gcc", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__,
3728 #else
3729             "cc", 1, 0, 0,
3730 #endif
3731             version);
3732 }
3733 
3734 /*
3735  * lxpr_read_stat(): read the contents of the "stat" file.
3736  *
3737  */
3738 /* ARGSUSED */
3739 static void
3740 lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3741 {
3742         cpu_t *cp, *cpstart;
3743         int pools_enabled;
3744         ulong_t idle_cum = 0;
3745         ulong_t sys_cum  = 0;
3746         ulong_t user_cum = 0;
3747         ulong_t irq_cum = 0;
3748         ulong_t cpu_nrunnable_cum = 0;
3749         ulong_t w_io_cum = 0;
3750 
3751         ulong_t pgpgin_cum    = 0;
3752         ulong_t pgpgout_cum   = 0;
3753         ulong_t pgswapout_cum = 0;
3754         ulong_t pgswapin_cum  = 0;
3755         ulong_t intr_cum = 0;
3756         ulong_t pswitch_cum = 0;
3757         ulong_t forks_cum = 0;
3758         hrtime_t msnsecs[NCMSTATES];
3759         /* is the emulated release > 2.4 */
3760         boolean_t newer_than24 = lx_kern_release_cmp(LXPTOZ(lxpnp), "2.4") > 0;
3761         /* temporary variable since scalehrtime modifies data in place */
3762         hrtime_t tmptime;
3763 
3764         ASSERT(lxpnp->lxpr_type == LXPR_STAT);
3765 
3766         mutex_enter(&cpu_lock);
3767         pools_enabled = pool_pset_enabled();
3768 
3769         /* Calculate cumulative stats */
3770         cp = cpstart = CPU->cpu_part->cp_cpulist;
3771         do {
3772                 int i;
3773 
3774                 /*
3775                  * Don't count CPUs that aren't even in the system
3776                  * or aren't up yet.
3777                  */
3778                 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
3779                         continue;
3780                 }
3781 
3782                 get_cpu_mstate(cp, msnsecs);
3783 
3784                 idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
3785                 sys_cum  += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
3786                 user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
3787 
3788                 pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
3789                 pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
3790                 pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
3791                 pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
3792 
3793 
3794                 if (newer_than24) {
3795                         cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable;
3796                         w_io_cum += CPU_STATS(cp, sys.iowait);
3797                         for (i = 0; i < NCMSTATES; i++) {
3798                                 tmptime = cp->cpu_intracct[i];
3799                                 scalehrtime(&tmptime);
3800                                 irq_cum += NSEC_TO_TICK(tmptime);
3801                         }
3802                 }
3803 
3804                 for (i = 0; i < PIL_MAX; i++)
3805                         intr_cum += CPU_STATS(cp, sys.intr[i]);
3806 
3807                 pswitch_cum += CPU_STATS(cp, sys.pswitch);
3808                 forks_cum += CPU_STATS(cp, sys.sysfork);
3809                 forks_cum += CPU_STATS(cp, sys.sysvfork);
3810 
3811                 if (pools_enabled)
3812                         cp = cp->cpu_next_part;
3813                 else
3814                         cp = cp->cpu_next;
3815         } while (cp != cpstart);
3816 
3817         if (newer_than24) {
3818                 lxpr_uiobuf_printf(uiobuf,
3819                     "cpu %lu %lu %lu %lu %lu %lu %lu\n",
3820                     user_cum, 0L, sys_cum, idle_cum, 0L, irq_cum, 0L);
3821         } else {
3822                 lxpr_uiobuf_printf(uiobuf,
3823                     "cpu %lu %lu %lu %lu\n",
3824                     user_cum, 0L, sys_cum, idle_cum);
3825         }
3826 
3827         /* Do per processor stats */
3828         do {
3829                 int i;
3830 
3831                 ulong_t idle_ticks;
3832                 ulong_t sys_ticks;
3833                 ulong_t user_ticks;
3834                 ulong_t irq_ticks = 0;
3835 
3836                 /*
3837                  * Don't count CPUs that aren't even in the system
3838                  * or aren't up yet.
3839                  */
3840                 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
3841                         continue;
3842                 }
3843 
3844                 get_cpu_mstate(cp, msnsecs);
3845 
3846                 idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
3847                 sys_ticks  = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
3848                 user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
3849 
3850                 for (i = 0; i < NCMSTATES; i++) {
3851                         tmptime = cp->cpu_intracct[i];
3852                         scalehrtime(&tmptime);
3853                         irq_ticks += NSEC_TO_TICK(tmptime);
3854                 }
3855 
3856                 if (newer_than24) {
3857                         lxpr_uiobuf_printf(uiobuf,
3858                             "cpu%d %lu %lu %lu %lu %lu %lu %lu\n",
3859                             cp->cpu_id, user_ticks, 0L, sys_ticks, idle_ticks,
3860                             0L, irq_ticks, 0L);
3861                 } else {
3862                         lxpr_uiobuf_printf(uiobuf,
3863                             "cpu%d %lu %lu %lu %lu\n",
3864                             cp->cpu_id,
3865                             user_ticks, 0L, sys_ticks, idle_ticks);
3866                 }
3867 
3868                 if (pools_enabled)
3869                         cp = cp->cpu_next_part;
3870                 else
3871                         cp = cp->cpu_next;
3872         } while (cp != cpstart);
3873 
3874         mutex_exit(&cpu_lock);
3875 
3876         if (newer_than24) {
3877                 lxpr_uiobuf_printf(uiobuf,
3878                     "page %lu %lu\n"
3879                     "swap %lu %lu\n"
3880                     "intr %lu\n"
3881                     "ctxt %lu\n"
3882                     "btime %lu\n"
3883                     "processes %lu\n"
3884                     "procs_running %lu\n"
3885                     "procs_blocked %lu\n",
3886                     pgpgin_cum, pgpgout_cum,
3887                     pgswapin_cum, pgswapout_cum,
3888                     intr_cum,
3889                     pswitch_cum,
3890                     boot_time,
3891                     forks_cum,
3892                     cpu_nrunnable_cum,
3893                     w_io_cum);
3894         } else {
3895                 lxpr_uiobuf_printf(uiobuf,
3896                     "page %lu %lu\n"
3897                     "swap %lu %lu\n"
3898                     "intr %lu\n"
3899                     "ctxt %lu\n"
3900                     "btime %lu\n"
3901                     "processes %lu\n",
3902                     pgpgin_cum, pgpgout_cum,
3903                     pgswapin_cum, pgswapout_cum,
3904                     intr_cum,
3905                     pswitch_cum,
3906                     boot_time,
3907                     forks_cum);
3908         }
3909 }
3910 
3911 /*
3912  * lxpr_read_swaps():
3913  *
3914  * We don't support swap files or partitions, but some programs like to look
3915  * here just to check we have some swap on the system, so we lie and show
3916  * our entire swap cap as one swap partition.
3917  *
3918  * It is important to use formatting identical to the Linux implementation
3919  * so that consumers do not break. See swap_show() in mm/swapfile.c.
3920  */
3921 /* ARGSUSED */
3922 static void
3923 lxpr_read_swaps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3924 {
3925         zone_t *zone = curzone;
3926         uint64_t totswap, usedswap;
3927 
3928         mutex_enter(&zone->zone_mem_lock);
3929         /* Uses units of 1 kb (2^10). */
3930         totswap = zone->zone_max_swap_ctl >> 10;
3931         usedswap = zone->zone_max_swap >> 10;
3932         mutex_exit(&zone->zone_mem_lock);
3933 
3934         lxpr_uiobuf_printf(uiobuf,
3935             "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
3936         lxpr_uiobuf_printf(uiobuf, "%-40s%s\t%llu\t%llu\t%d\n",
3937             "/dev/swap", "partition", totswap, usedswap, -1);
3938 }
3939 
3940 /*
3941  * inotify tunables exported via /proc.
3942  */
3943 extern int inotify_maxevents;
3944 extern int inotify_maxinstances;
3945 extern int inotify_maxwatches;
3946 
3947 static void
3948 lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *lxpnp,
3949     lxpr_uiobuf_t *uiobuf)
3950 {
3951         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS);
3952         lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxevents);
3953 }
3954 
3955 static void
3956 lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *lxpnp,
3957     lxpr_uiobuf_t *uiobuf)
3958 {
3959         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES);
3960         lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxinstances);
3961 }
3962 
3963 static void
3964 lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *lxpnp,
3965     lxpr_uiobuf_t *uiobuf)
3966 {
3967         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES);
3968         lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxwatches);
3969 }
3970 
3971 static void
3972 lxpr_read_sys_kernel_caplcap(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3973 {
3974         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_CAPLCAP);
3975         lxpr_uiobuf_printf(uiobuf, "%d\n", LX_CAP_MAX_VALID);
3976 }
3977 
3978 static void
3979 lxpr_read_sys_kernel_corepatt(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3980 {
3981         zone_t *zone = curproc->p_zone;
3982         struct core_globals *cg;
3983         refstr_t *rp;
3984         corectl_path_t *ccp;
3985         char tr[MAXPATHLEN];
3986 
3987         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT);
3988 
3989         cg = zone_getspecific(core_zone_key, zone);
3990         ASSERT(cg != NULL);
3991 
3992         /* If core dumps are disabled, return an empty string. */
3993         if ((cg->core_options & CC_PROCESS_PATH) == 0) {
3994                 lxpr_uiobuf_printf(uiobuf, "\n");
3995                 return;
3996         }
3997 
3998         ccp = cg->core_default_path;
3999         mutex_enter(&ccp->ccp_mtx);
4000         if ((rp = ccp->ccp_path) != NULL)
4001                 refstr_hold(rp);
4002         mutex_exit(&ccp->ccp_mtx);
4003 
4004         if (rp == NULL) {
4005                 lxpr_uiobuf_printf(uiobuf, "\n");
4006                 return;
4007         }
4008 
4009         bzero(tr, sizeof (tr));
4010         if (lxpr_core_path_s2l(refstr_value(rp), tr, sizeof (tr)) != 0) {
4011                 refstr_rele(rp);
4012                 lxpr_uiobuf_printf(uiobuf, "\n");
4013                 return;
4014         }
4015 
4016         refstr_rele(rp);
4017         lxpr_uiobuf_printf(uiobuf, "%s\n", tr);
4018 }
4019 
4020 static void
4021 lxpr_read_sys_kernel_hostname(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4022 {
4023         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_HOSTNAME);
4024         lxpr_uiobuf_printf(uiobuf, "%s\n", uts_nodename());
4025 }
4026 
4027 static void
4028 lxpr_read_sys_kernel_msgmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4029 {
4030         rctl_qty_t val;
4031 
4032         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_MSGMNI);
4033 
4034         mutex_enter(&curproc->p_lock);
4035         val = rctl_enforced_value(rc_zone_msgmni,
4036             curproc->p_zone->zone_rctls, curproc);
4037         mutex_exit(&curproc->p_lock);
4038 
4039         lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
4040 }
4041 
4042 static void
4043 lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4044 {
4045         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_NGROUPS_MAX);
4046         lxpr_uiobuf_printf(uiobuf, "%d\n", ngroups_max);
4047 }
4048 
4049 static void
4050 lxpr_read_sys_kernel_osrel(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4051 {
4052         lx_zone_data_t *br_data;
4053 
4054         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_OSREL);
4055         br_data = ztolxzd(curproc->p_zone);
4056         if (curproc->p_zone->zone_brand == &lx_brand) {
4057                 lxpr_uiobuf_printf(uiobuf, "%s\n",
4058                     br_data->lxzd_kernel_version);
4059         } else {
4060                 lxpr_uiobuf_printf(uiobuf, "\n");
4061         }
4062 }
4063 
4064 static void
4065 lxpr_read_sys_kernel_pid_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4066 {
4067         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_PID_MAX);
4068         lxpr_uiobuf_printf(uiobuf, "%d\n", maxpid);
4069 }
4070 
4071 static void
4072 lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4073 {
4074         /*
4075          * This file isn't documented on the Linux proc(5) man page but
4076          * according to the blog of the author of systemd/journald (the
4077          * consumer), he says:
4078          *    boot_id: A random ID that is regenerated on each boot. As such it
4079          *    can be used to identify the local machine's current boot. It's
4080          *    universally available on any recent Linux kernel. It's a good and
4081          *    safe choice if you need to identify a specific boot on a specific
4082          *    booted kernel.
4083          *
4084          * We'll just generate a random ID if necessary. On Linux the format
4085          * appears to resemble a uuid but since it is not documented to be a
4086          * uuid, we don't worry about that.
4087          */
4088         lx_zone_data_t *br_data;
4089 
4090         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RAND_BOOTID);
4091 
4092         if (curproc->p_zone->zone_brand != &lx_brand) {
4093                 lxpr_uiobuf_printf(uiobuf, "0\n");
4094                 return;
4095         }
4096 
4097         br_data = ztolxzd(curproc->p_zone);
4098         if (br_data->lxzd_bootid[0] == '\0') {
4099                 extern int getrandom(void *, size_t, int);
4100                 int i;
4101 
4102                 for (i = 0; i < 5; i++) {
4103                         u_longlong_t n;
4104                         char s[32];
4105 
4106                         (void) random_get_bytes((uint8_t *)&n, sizeof (n));
4107                         switch (i) {
4108                         case 0: (void) snprintf(s, sizeof (s), "%08llx", n);
4109                                 s[8] = '\0';
4110                                 break;
4111                         case 4: (void) snprintf(s, sizeof (s), "%012llx", n);
4112                                 s[12] = '\0';
4113                                 break;
4114                         default: (void) snprintf(s, sizeof (s), "%04llx", n);
4115                                 s[4] = '\0';
4116                                 break;
4117                         }
4118                         if (i > 0)
4119                                 strlcat(br_data->lxzd_bootid, "-",
4120                                     sizeof (br_data->lxzd_bootid));
4121                         strlcat(br_data->lxzd_bootid, s,
4122                             sizeof (br_data->lxzd_bootid));
4123                 }
4124         }
4125 
4126         lxpr_uiobuf_printf(uiobuf, "%s\n", br_data->lxzd_bootid);
4127 }
4128 
4129 static void
4130 lxpr_read_sys_kernel_sem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4131 {
4132         proc_t *pp = curproc;
4133         rctl_qty_t vmsl, vopm, vmni, vmns;
4134 
4135         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SEM);
4136 
4137         mutex_enter(&pp->p_lock);
4138         vmsl = rctl_enforced_value(rc_process_semmsl, pp->p_rctls, pp);
4139         vopm = rctl_enforced_value(rc_process_semopm, pp->p_rctls, pp);
4140         vmni = rctl_enforced_value(rc_zone_semmni, pp->p_zone->zone_rctls, pp);
4141         mutex_exit(&pp->p_lock);
4142         vmns = vmsl * vmni;
4143         if (vmns < vmsl || vmns < vmni) {
4144                 vmns = ULLONG_MAX;
4145         }
4146         /*
4147          * Format: semmsl semmns semopm semmni
4148          *  - semmsl: Limit semaphores in a sempahore set.
4149          *  - semmns: Limit semaphores in all semaphore sets
4150          *  - semopm: Limit operations in a single semop call
4151          *  - semmni: Limit number of semaphore sets
4152          */
4153         lxpr_uiobuf_printf(uiobuf, "%llu\t%llu\t%llu\t%llu\n",
4154             vmsl, vmns, vopm, vmni);
4155 }
4156 
4157 static void
4158 lxpr_read_sys_kernel_shmmax(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4159 {
4160         rctl_qty_t val;
4161 
4162         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMAX);
4163 
4164         mutex_enter(&curproc->p_lock);
4165         val = rctl_enforced_value(rc_zone_shmmax,
4166             curproc->p_zone->zone_rctls, curproc);
4167         mutex_exit(&curproc->p_lock);
4168 
4169         if (val > FOURGB)
4170                 val = FOURGB;
4171 
4172         lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
4173 }
4174 
4175 static void
4176 lxpr_read_sys_kernel_shmmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4177 {
4178         rctl_qty_t val;
4179 
4180         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMNI);
4181 
4182         mutex_enter(&curproc->p_lock);
4183         val = rctl_enforced_value(rc_zone_shmmni,
4184             curproc->p_zone->zone_rctls, curproc);
4185         mutex_exit(&curproc->p_lock);
4186 
4187         if (val > FOURGB)
4188                 val = FOURGB;
4189 
4190         lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
4191 }
4192 
4193 static void
4194 lxpr_read_sys_kernel_threads_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4195 {
4196         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_THREADS_MAX);
4197         lxpr_uiobuf_printf(uiobuf, "%d\n", curproc->p_zone->zone_nlwps_ctl);
4198 }
4199 
4200 static void
4201 lxpr_read_sys_net_core_somaxc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4202 {
4203         netstack_t *ns;
4204         tcp_stack_t     *tcps;
4205 
4206         ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON);
4207 
4208         ns = netstack_get_current();
4209         if (ns == NULL) {
4210                 lxpr_uiobuf_printf(uiobuf, "%d\n", SOMAXCONN);
4211                 return;
4212         }
4213 
4214         tcps = ns->netstack_tcp;
4215         lxpr_uiobuf_printf(uiobuf, "%d\n", tcps->tcps_conn_req_max_q);
4216         netstack_rele(ns);
4217 }
4218 
4219 static void
4220 lxpr_read_sys_vm_minfr_kb(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4221 {
4222         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_MINFR_KB);
4223         lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4224 }
4225 
4226 static void
4227 lxpr_read_sys_vm_nhpages(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4228 {
4229         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_NHUGEP);
4230         lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4231 }
4232 
4233 static void
4234 lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4235 {
4236         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_OVERCOMMIT_MEM);
4237         lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4238 }
4239 
4240 static void
4241 lxpr_read_sys_vm_swappiness(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4242 {
4243         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_SWAPPINESS);
4244         lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4245 }
4246 
4247 /*
4248  * lxpr_read_uptime(): read the contents of the "uptime" file.
4249  *
4250  * format is: "%.2lf, %.2lf",uptime_secs, idle_secs
4251  * Use fixed point arithmetic to get 2 decimal places
4252  */
4253 /* ARGSUSED */
4254 static void
4255 lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4256 {
4257         cpu_t *cp, *cpstart;
4258         int pools_enabled;
4259         ulong_t idle_cum = 0;
4260         ulong_t cpu_count = 0;
4261         ulong_t idle_s;
4262         ulong_t idle_cs;
4263         ulong_t up_s;
4264         ulong_t up_cs;
4265         hrtime_t birthtime;
4266         hrtime_t centi_sec = 10000000;  /* 10^7 */
4267 
4268         ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
4269 
4270         /* Calculate cumulative stats */
4271         mutex_enter(&cpu_lock);
4272         pools_enabled = pool_pset_enabled();
4273 
4274         cp = cpstart = CPU->cpu_part->cp_cpulist;
4275         do {
4276                 /*
4277                  * Don't count CPUs that aren't even in the system
4278                  * or aren't up yet.
4279                  */
4280                 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
4281                         continue;
4282                 }
4283 
4284                 idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
4285                 idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
4286                 cpu_count += 1;
4287 
4288                 if (pools_enabled)
4289                         cp = cp->cpu_next_part;
4290                 else
4291                         cp = cp->cpu_next;
4292         } while (cp != cpstart);
4293         mutex_exit(&cpu_lock);
4294 
4295         /* Getting the Zone zsched process startup time */
4296         birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart;
4297         up_cs = (gethrtime() - birthtime) / centi_sec;
4298         up_s = up_cs / 100;
4299         up_cs %= 100;
4300 
4301         ASSERT(cpu_count > 0);
4302         idle_cum /= cpu_count;
4303         idle_s = idle_cum / hz;
4304         idle_cs = idle_cum % hz;
4305         idle_cs *= 100;
4306         idle_cs /= hz;
4307 
4308         lxpr_uiobuf_printf(uiobuf,
4309             "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
4310 }
4311 
4312 static const char *amd_x_edx[] = {
4313         NULL,   NULL,   NULL,   NULL,
4314         NULL,   NULL,   NULL,   NULL,
4315         NULL,   NULL,   NULL,   "syscall",
4316         NULL,   NULL,   NULL,   NULL,
4317         NULL,   NULL,   NULL,   "mp",
4318         "nx",   NULL,   "mmxext", NULL,
4319         NULL,   NULL,   NULL,   NULL,
4320         NULL,   "lm",   "3dnowext", "3dnow"
4321 };
4322 
4323 static const char *amd_x_ecx[] = {
4324         "lahf_lm", NULL, "svm", NULL,
4325         "altmovcr8"
4326 };
4327 
4328 static const char *tm_x_edx[] = {
4329         "recovery", "longrun", NULL, "lrti"
4330 };
4331 
4332 /*
4333  * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
4334  */
4335 static const char *intc_x_edx[] = {
4336         NULL,   NULL,   NULL,   NULL,
4337         NULL,   NULL,   NULL,   NULL,
4338         NULL,   NULL,   NULL,   "syscall",
4339         NULL,   NULL,   NULL,   NULL,
4340         NULL,   NULL,   NULL,   NULL,
4341         "nx",   NULL,   NULL,   NULL,
4342         NULL,   NULL,   NULL,   NULL,
4343         NULL,   "lm",   NULL,   NULL
4344 };
4345 
4346 static const char *intc_edx[] = {
4347         "fpu",  "vme",  "de",   "pse",
4348         "tsc",  "msr",  "pae",  "mce",
4349         "cx8",  "apic",  NULL,  "sep",
4350         "mtrr", "pge",  "mca",  "cmov",
4351         "pat",  "pse36", "pn",  "clflush",
4352         NULL,   "dts",  "acpi", "mmx",
4353         "fxsr", "sse",  "sse2", "ss",
4354         "ht",   "tm",   "ia64", "pbe"
4355 };
4356 
4357 /*
4358  * "sse3" on linux is called "pni" (Prescott New Instructions).
4359  */
4360 static const char *intc_ecx[] = {
4361         "pni",  NULL,   NULL, "monitor",
4362         "ds_cpl", NULL, NULL, "est",
4363         "tm2",  NULL,   "cid", NULL,
4364         NULL,   "cx16", "xtpr"
4365 };
4366 
4367 /*
4368  * Report a list of each cgroup subsystem supported by our emulated cgroup fs.
4369  * This needs to exist for systemd to run but for now we don't report any
4370  * cgroup subsystems as being installed. The commented example below shows
4371  * how to print a subsystem entry.
4372  */
4373 static void
4374 lxpr_read_cgroups(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4375 {
4376         lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n",
4377             "#subsys_name", "hierarchy", "num_cgroups", "enabled");
4378 
4379         /*
4380          * lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n",
4381          *   "cpu,cpuacct", "2", "1", "1");
4382          */
4383 }
4384 
4385 static void
4386 lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4387 {
4388         int i;
4389         uint32_t bits;
4390         cpu_t *cp, *cpstart;
4391         int pools_enabled;
4392         const char **fp;
4393         char brandstr[CPU_IDSTRLEN];
4394         struct cpuid_regs cpr;
4395         int maxeax;
4396         int std_ecx, std_edx, ext_ecx, ext_edx;
4397 
4398         ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
4399 
4400         mutex_enter(&cpu_lock);
4401         pools_enabled = pool_pset_enabled();
4402 
4403         cp = cpstart = CPU->cpu_part->cp_cpulist;
4404         do {
4405                 /*
4406                  * This returns the maximum eax value for standard cpuid
4407                  * functions in eax.
4408                  */
4409                 cpr.cp_eax = 0;
4410                 (void) cpuid_insn(cp, &cpr);
4411                 maxeax = cpr.cp_eax;
4412 
4413                 /*
4414                  * Get standard x86 feature flags.
4415                  */
4416                 cpr.cp_eax = 1;
4417                 (void) cpuid_insn(cp, &cpr);
4418                 std_ecx = cpr.cp_ecx;
4419                 std_edx = cpr.cp_edx;
4420 
4421                 /*
4422                  * Now get extended feature flags.
4423                  */
4424                 cpr.cp_eax = 0x80000001;
4425                 (void) cpuid_insn(cp, &cpr);
4426                 ext_ecx = cpr.cp_ecx;
4427                 ext_edx = cpr.cp_edx;
4428 
4429                 (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
4430 
4431                 lxpr_uiobuf_printf(uiobuf,
4432                     "processor\t: %d\n"
4433                     "vendor_id\t: %s\n"
4434                     "cpu family\t: %d\n"
4435                     "model\t\t: %d\n"
4436                     "model name\t: %s\n"
4437                     "stepping\t: %d\n"
4438                     "cpu MHz\t\t: %u.%03u\n",
4439                     cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
4440                     cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
4441                     (uint32_t)(cpu_freq_hz / 1000000),
4442                     ((uint32_t)(cpu_freq_hz / 1000)) % 1000);
4443 
4444                 lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
4445                     getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
4446 
4447                 if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
4448                         /*
4449                          * 'siblings' is used for HT-style threads
4450                          */
4451                         lxpr_uiobuf_printf(uiobuf,
4452                             "physical id\t: %lu\n"
4453                             "siblings\t: %u\n",
4454                             pg_plat_hw_instance_id(cp, PGHW_CHIP),
4455                             cpuid_get_ncpu_per_chip(cp));
4456                 }
4457 
4458                 /*
4459                  * Since we're relatively picky about running on older hardware,
4460                  * we can be somewhat cavalier about the answers to these ones.
4461                  *
4462                  * In fact, given the hardware we support, we just say:
4463                  *
4464                  *      fdiv_bug        : no    (if we're on a 64-bit kernel)
4465                  *      hlt_bug         : no
4466                  *      f00f_bug        : no
4467                  *      coma_bug        : no
4468                  *      wp              : yes   (write protect in supervsr mode)
4469                  */
4470                 lxpr_uiobuf_printf(uiobuf,
4471                     "fdiv_bug\t: %s\n"
4472                     "hlt_bug \t: no\n"
4473                     "f00f_bug\t: no\n"
4474                     "coma_bug\t: no\n"
4475                     "fpu\t\t: %s\n"
4476                     "fpu_exception\t: %s\n"
4477                     "cpuid level\t: %d\n"
4478                     "flags\t\t:",
4479 #if defined(__i386)
4480                     fpu_pentium_fdivbug ? "yes" : "no",
4481 #else
4482                     "no",
4483 #endif /* __i386 */
4484                     fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
4485                     maxeax);
4486 
4487                 for (bits = std_edx, fp = intc_edx, i = 0;
4488                     i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
4489                         if ((bits & (1 << i)) != 0 && *fp)
4490                                 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4491 
4492                 /*
4493                  * name additional features where appropriate
4494                  */
4495                 switch (x86_vendor) {
4496                 case X86_VENDOR_Intel:
4497                         for (bits = ext_edx, fp = intc_x_edx, i = 0;
4498                             i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
4499                             fp++, i++)
4500                                 if ((bits & (1 << i)) != 0 && *fp)
4501                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4502                         break;
4503 
4504                 case X86_VENDOR_AMD:
4505                         for (bits = ext_edx, fp = amd_x_edx, i = 0;
4506                             i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
4507                             fp++, i++)
4508                                 if ((bits & (1 << i)) != 0 && *fp)
4509                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4510 
4511                         for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
4512                             i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
4513                             fp++, i++)
4514                                 if ((bits & (1 << i)) != 0 && *fp)
4515                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4516                         break;
4517 
4518                 case X86_VENDOR_TM:
4519                         for (bits = ext_edx, fp = tm_x_edx, i = 0;
4520                             i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
4521                             fp++, i++)
4522                                 if ((bits & (1 << i)) != 0 && *fp)
4523                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4524                         break;
4525                 default:
4526                         break;
4527                 }
4528 
4529                 for (bits = std_ecx, fp = intc_ecx, i = 0;
4530                     i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
4531                         if ((bits & (1 << i)) != 0 && *fp)
4532                                 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4533 
4534                 lxpr_uiobuf_printf(uiobuf, "\n\n");
4535 
4536                 if (pools_enabled)
4537                         cp = cp->cpu_next_part;
4538                 else
4539                         cp = cp->cpu_next;
4540         } while (cp != cpstart);
4541 
4542         mutex_exit(&cpu_lock);
4543 }
4544 
4545 /* ARGSUSED */
4546 static void
4547 lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4548 {
4549         ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
4550         lxpr_uiobuf_seterr(uiobuf, EFAULT);
4551 }
4552 
4553 /*
4554  * Report a list of file systems loaded in the kernel. We only report the ones
4555  * which we support and which may be checked by various components to see if
4556  * they are loaded.
4557  */
4558 static void
4559 lxpr_read_filesystems(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4560 {
4561         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "autofs");
4562         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "cgroup");
4563         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "nfs");
4564         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "proc");
4565         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "sysfs");
4566         lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "tmpfs");
4567 }
4568 
4569 /*
4570  * lxpr_getattr(): Vnode operation for VOP_GETATTR()
4571  */
4572 static int
4573 lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
4574     caller_context_t *ct)
4575 {
4576         register lxpr_node_t *lxpnp = VTOLXP(vp);
4577         lxpr_nodetype_t type = lxpnp->lxpr_type;
4578         extern uint_t nproc;
4579         int error;
4580 
4581         /*
4582          * Return attributes of underlying vnode if ATTR_REAL
4583          *
4584          * but keep fd files with the symlink permissions
4585          */
4586         if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
4587                 vnode_t *rvp = lxpnp->lxpr_realvp;
4588 
4589                 /*
4590                  * withold attribute information to owner or root
4591                  */
4592                 if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) {
4593                         return (error);
4594                 }
4595 
4596                 /*
4597                  * now its attributes
4598                  */
4599                 if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) {
4600                         return (error);
4601                 }
4602 
4603                 /*
4604                  * if it's a file in lx /proc/pid/fd/xx then set its
4605                  * mode and keep it looking like a symlink, fifo or socket
4606                  */
4607                 if (type == LXPR_PID_FD_FD) {
4608                         vap->va_mode = lxpnp->lxpr_mode;
4609                         vap->va_type = lxpnp->lxpr_realvp->v_type;
4610                         vap->va_size = 0;
4611                         vap->va_nlink = 1;
4612                 }
4613                 return (0);
4614         }
4615 
4616         /* Default attributes, that may be overridden below */
4617         bzero(vap, sizeof (*vap));
4618         vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
4619         vap->va_nlink = 1;
4620         vap->va_type = vp->v_type;
4621         vap->va_mode = lxpnp->lxpr_mode;
4622         vap->va_fsid = vp->v_vfsp->vfs_dev;
4623         vap->va_blksize = DEV_BSIZE;
4624         vap->va_uid = lxpnp->lxpr_uid;
4625         vap->va_gid = lxpnp->lxpr_gid;
4626         vap->va_nodeid = lxpnp->lxpr_ino;
4627 
4628         switch (type) {
4629         case LXPR_PROCDIR:
4630                 vap->va_nlink = nproc + 2 + PROCDIRFILES;
4631                 vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
4632                 break;
4633         case LXPR_PIDDIR:
4634                 vap->va_nlink = PIDDIRFILES;
4635                 vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
4636                 break;
4637         case LXPR_PID_TASK_IDDIR:
4638                 vap->va_nlink = TIDDIRFILES;
4639                 vap->va_size = TIDDIRFILES * LXPR_SDSIZE;
4640                 break;
4641         case LXPR_SELF:
4642                 vap->va_uid = crgetruid(curproc->p_cred);
4643                 vap->va_gid = crgetrgid(curproc->p_cred);
4644                 break;
4645         case LXPR_PID_FD_FD:
4646         case LXPR_PID_TID_FD_FD:
4647                 /*
4648                  * Restore VLNK type for lstat-type activity.
4649                  * See lxpr_readlink for more details.
4650                  */
4651                 if ((flags & FOLLOW) == 0)
4652                         vap->va_type = VLNK;
4653         default:
4654                 break;
4655         }
4656 
4657         vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
4658         return (0);
4659 }
4660 
4661 /*
4662  * lxpr_access(): Vnode operation for VOP_ACCESS()
4663  */
4664 static int
4665 lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
4666 {
4667         lxpr_node_t *lxpnp = VTOLXP(vp);
4668         lxpr_nodetype_t type = lxpnp->lxpr_type;
4669         int shift = 0;
4670         proc_t *tp;
4671 
4672         /* lx /proc is a read only file system */
4673         if (mode & VWRITE) {
4674                 switch (type) {
4675                 case LXPR_PID_OOM_SCR_ADJ:
4676                 case LXPR_PID_TID_OOM_SCR_ADJ:
4677                 case LXPR_SYS_KERNEL_COREPATT:
4678                 case LXPR_SYS_NET_CORE_SOMAXCON:
4679                 case LXPR_SYS_VM_OVERCOMMIT_MEM:
4680                 case LXPR_SYS_VM_SWAPPINESS:
4681                 case LXPR_PID_FD_FD:
4682                 case LXPR_PID_TID_FD_FD:
4683                         break;
4684                 default:
4685                         return (EROFS);
4686                 }
4687         }
4688 
4689         /*
4690          * If this is a restricted file, check access permissions.
4691          */
4692         switch (type) {
4693         case LXPR_PIDDIR:
4694                 return (0);
4695         case LXPR_PID_CURDIR:
4696         case LXPR_PID_ENV:
4697         case LXPR_PID_EXE:
4698         case LXPR_PID_LIMITS:
4699         case LXPR_PID_MAPS:
4700         case LXPR_PID_MEM:
4701         case LXPR_PID_ROOTDIR:
4702         case LXPR_PID_FDDIR:
4703         case LXPR_PID_FD_FD:
4704         case LXPR_PID_TID_FDDIR:
4705         case LXPR_PID_TID_FD_FD:
4706                 if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL)
4707                         return (ENOENT);
4708                 if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
4709                     priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
4710                         lxpr_unlock(tp);
4711                         return (EACCES);
4712                 }
4713                 lxpr_unlock(tp);
4714         default:
4715                 break;
4716         }
4717 
4718         if (lxpnp->lxpr_realvp != NULL) {
4719                 /*
4720                  * For these we use the underlying vnode's accessibility.
4721                  */
4722                 return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct));
4723         }
4724 
4725         /* If user is root allow access regardless of permission bits */
4726         if (secpolicy_proc_access(cr) == 0)
4727                 return (0);
4728 
4729         /*
4730          * Access check is based on only one of owner, group, public.  If not
4731          * owner, then check group.  If not a member of the group, then check
4732          * public access.
4733          */
4734         if (crgetuid(cr) != lxpnp->lxpr_uid) {
4735                 shift += 3;
4736                 if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
4737                         shift += 3;
4738         }
4739 
4740         mode &= ~(lxpnp->lxpr_mode << shift);
4741 
4742         if (mode == 0)
4743                 return (0);
4744 
4745         return (EACCES);
4746 }
4747 
4748 /* ARGSUSED */
4749 static vnode_t *
4750 lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
4751 {
4752         return (NULL);
4753 }
4754 
4755 /*
4756  * lxpr_lookup(): Vnode operation for VOP_LOOKUP()
4757  */
4758 /* ARGSUSED */
4759 static int
4760 lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
4761     int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
4762     int *direntflags, pathname_t *realpnp)
4763 {
4764         lxpr_node_t *lxpnp = VTOLXP(dp);
4765         lxpr_nodetype_t type = lxpnp->lxpr_type;
4766         int error;
4767 
4768         ASSERT(dp->v_type == VDIR);
4769         ASSERT(type < LXPR_NFILES);
4770 
4771         /*
4772          * we should never get here because the lookup
4773          * is done on the realvp for these nodes
4774          */
4775         ASSERT(type != LXPR_PID_FD_FD &&
4776             type != LXPR_PID_CURDIR &&
4777             type != LXPR_PID_ROOTDIR);
4778 
4779         /*
4780          * restrict lookup permission to owner or root
4781          */
4782         if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) {
4783                 return (error);
4784         }
4785 
4786         /*
4787          * Just return the parent vnode if that's where we are trying to go.
4788          */
4789         if (strcmp(comp, "..") == 0) {
4790                 VN_HOLD(lxpnp->lxpr_parent);
4791                 *vpp = lxpnp->lxpr_parent;
4792                 return (0);
4793         }
4794 
4795         /*
4796          * Special handling for directory searches.  Note: null component name
4797          * denotes that the current directory is being searched.
4798          */
4799         if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
4800                 VN_HOLD(dp);
4801                 *vpp = dp;
4802                 return (0);
4803         }
4804 
4805         *vpp = (lxpr_lookup_function[type](dp, comp));
4806         return ((*vpp == NULL) ? ENOENT : 0);
4807 }
4808 
4809 /*
4810  * Do a sequential search on the given directory table
4811  */
4812 static vnode_t *
4813 lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
4814     lxpr_dirent_t *dirtab, int dirtablen)
4815 {
4816         lxpr_node_t *lxpnp;
4817         int count;
4818 
4819         for (count = 0; count < dirtablen; count++) {
4820                 if (strcmp(dirtab[count].d_name, comp) == 0) {
4821                         lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
4822                         dp = LXPTOV(lxpnp);
4823                         ASSERT(dp != NULL);
4824                         return (dp);
4825                 }
4826         }
4827         return (NULL);
4828 }
4829 
4830 static vnode_t *
4831 lxpr_lookup_piddir(vnode_t *dp, char *comp)
4832 {
4833         proc_t *p;
4834 
4835         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
4836 
4837         p = lxpr_lock(VTOLXP(dp)->lxpr_pid);
4838         if (p == NULL)
4839                 return (NULL);
4840 
4841         dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
4842 
4843         lxpr_unlock(p);
4844 
4845         return (dp);
4846 }
4847 
4848 /*
4849  * Lookup one of the process's task ID's.
4850  */
4851 static vnode_t *
4852 lxpr_lookup_taskdir(vnode_t *dp, char *comp)
4853 {
4854         lxpr_node_t *dlxpnp = VTOLXP(dp);
4855         lxpr_node_t *lxpnp;
4856         proc_t *p;
4857         pid_t real_pid;
4858         uint_t tid;
4859         int c;
4860         kthread_t *t;
4861 
4862         ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASKDIR);
4863 
4864         /*
4865          * convert the string rendition of the filename to a thread ID
4866          */
4867         tid = 0;
4868         while ((c = *comp++) != '\0') {
4869                 int otid;
4870                 if (c < '0' || c > '9')
4871                         return (NULL);
4872 
4873                 otid = tid;
4874                 tid = 10 * tid + c - '0';
4875                 /* integer overflow */
4876                 if (tid / 10 != otid)
4877                         return (NULL);
4878         }
4879 
4880         /*
4881          * get the proc to work with and lock it
4882          */
4883         real_pid = get_real_pid(dlxpnp->lxpr_pid);
4884         p = lxpr_lock(real_pid);
4885         if ((p == NULL))
4886                 return (NULL);
4887 
4888         /*
4889          * If the process is a zombie or system process
4890          * it can't have any threads.
4891          */
4892         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
4893                 lxpr_unlock(p);
4894                 return (NULL);
4895         }
4896 
4897         if (p->p_brand == &lx_brand) {
4898                 t = lxpr_get_thread(p, tid);
4899         } else {
4900                 /*
4901                  * Only the main thread is visible for non-branded processes.
4902                  */
4903                 t = p->p_tlist;
4904                 if (tid != p->p_pid || t == NULL) {
4905                         t = NULL;
4906                 } else {
4907                         thread_lock(t);
4908                 }
4909         }
4910         if (t == NULL) {
4911                 lxpr_unlock(p);
4912                 return (NULL);
4913         }
4914         thread_unlock(t);
4915 
4916         /*
4917          * Allocate and fill in a new lx /proc taskid node.
4918          * Instead of the last arg being a fd, it is a tid.
4919          */
4920         lxpnp = lxpr_getnode(dp, LXPR_PID_TASK_IDDIR, p, tid);
4921         dp = LXPTOV(lxpnp);
4922         ASSERT(dp != NULL);
4923         lxpr_unlock(p);
4924         return (dp);
4925 }
4926 
4927 /*
4928  * Lookup one of the process's task ID's.
4929  */
4930 static vnode_t *
4931 lxpr_lookup_task_tid_dir(vnode_t *dp, char *comp)
4932 {
4933         lxpr_node_t *dlxpnp = VTOLXP(dp);
4934         lxpr_node_t *lxpnp;
4935         proc_t *p;
4936         pid_t real_pid;
4937         kthread_t *t;
4938         int i;
4939 
4940         ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASK_IDDIR);
4941 
4942         /*
4943          * get the proc to work with and lock it
4944          */
4945         real_pid = get_real_pid(dlxpnp->lxpr_pid);
4946         p = lxpr_lock(real_pid);
4947         if ((p == NULL))
4948                 return (NULL);
4949 
4950         /*
4951          * If the process is a zombie or system process
4952          * it can't have any threads.
4953          */
4954         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
4955                 lxpr_unlock(p);
4956                 return (NULL);
4957         }
4958 
4959         /* need to confirm tid is still there */
4960         t = lxpr_get_thread(p, dlxpnp->lxpr_desc);
4961         if (t == NULL) {
4962                 lxpr_unlock(p);
4963                 return (NULL);
4964         }
4965         thread_unlock(t);
4966 
4967         /*
4968          * allocate and fill in the new lx /proc taskid dir node
4969          */
4970         for (i = 0; i < TIDDIRFILES; i++) {
4971                 if (strcmp(tiddir[i].d_name, comp) == 0) {
4972                         lxpnp = lxpr_getnode(dp, tiddir[i].d_type, p,
4973                             dlxpnp->lxpr_desc);
4974                         dp = LXPTOV(lxpnp);
4975                         ASSERT(dp != NULL);
4976                         lxpr_unlock(p);
4977                         return (dp);
4978                 }
4979         }
4980 
4981         lxpr_unlock(p);
4982         return (NULL);
4983 }
4984 
4985 /*
4986  * Lookup one of the process's open files.
4987  */
4988 static vnode_t *
4989 lxpr_lookup_fddir(vnode_t *dp, char *comp)
4990 {
4991         lxpr_node_t *dlxpnp = VTOLXP(dp);
4992 
4993         ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR ||
4994             dlxpnp->lxpr_type == LXPR_PID_TID_FDDIR);
4995 
4996         return (lxpr_lookup_fdnode(dp, comp));
4997 }
4998 
4999 static vnode_t *
5000 lxpr_lookup_netdir(vnode_t *dp, char *comp)
5001 {
5002         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
5003 
5004         dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
5005 
5006         return (dp);
5007 }
5008 
5009 static vnode_t *
5010 lxpr_lookup_procdir(vnode_t *dp, char *comp)
5011 {
5012         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
5013 
5014         /*
5015          * We know all the names of files & dirs in our file system structure
5016          * except those that are pid names.  These change as pids are created/
5017          * deleted etc., so we just look for a number as the first char to see
5018          * if we are we doing pid lookups.
5019          *
5020          * Don't need to check for "self" as it is implemented as a symlink
5021          */
5022         if (*comp >= '0' && *comp <= '9') {
5023                 pid_t pid = 0;
5024                 lxpr_node_t *lxpnp = NULL;
5025                 proc_t *p;
5026                 int c;
5027 
5028                 while ((c = *comp++) != '\0')
5029                         pid = 10 * pid + c - '0';
5030 
5031                 /*
5032                  * Can't continue if the process is still loading or it doesn't
5033                  * really exist yet (or maybe it just died!)
5034                  */
5035                 p = lxpr_lock(pid);
5036                 if (p == NULL)
5037                         return (NULL);
5038 
5039                 if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
5040                         lxpr_unlock(p);
5041                         return (NULL);
5042                 }
5043 
5044                 /*
5045                  * allocate and fill in a new lx /proc node
5046                  */
5047                 lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
5048 
5049                 lxpr_unlock(p);
5050 
5051                 dp = LXPTOV(lxpnp);
5052                 ASSERT(dp != NULL);
5053 
5054                 return (dp);
5055         }
5056 
5057         /* Lookup fixed names */
5058         return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES));
5059 }
5060 
5061 static vnode_t *
5062 lxpr_lookup_sysdir(vnode_t *dp, char *comp)
5063 {
5064         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYSDIR);
5065         return (lxpr_lookup_common(dp, comp, NULL, sysdir, SYSDIRFILES));
5066 }
5067 
5068 static vnode_t *
5069 lxpr_lookup_sys_kerneldir(vnode_t *dp, char *comp)
5070 {
5071         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNELDIR);
5072         return (lxpr_lookup_common(dp, comp, NULL, sys_kerneldir,
5073             SYS_KERNELDIRFILES));
5074 }
5075 
5076 static vnode_t *
5077 lxpr_lookup_sys_kdir_randdir(vnode_t *dp, char *comp)
5078 {
5079         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNEL_RANDDIR);
5080         return (lxpr_lookup_common(dp, comp, NULL, sys_randdir,
5081             SYS_RANDDIRFILES));
5082 }
5083 
5084 static vnode_t *
5085 lxpr_lookup_sys_netdir(vnode_t *dp, char *comp)
5086 {
5087         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NETDIR);
5088         return (lxpr_lookup_common(dp, comp, NULL, sys_netdir,
5089             SYS_NETDIRFILES));
5090 }
5091 
5092 static vnode_t *
5093 lxpr_lookup_sys_net_coredir(vnode_t *dp, char *comp)
5094 {
5095         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NET_COREDIR);
5096         return (lxpr_lookup_common(dp, comp, NULL, sys_net_coredir,
5097             SYS_NET_COREDIRFILES));
5098 }
5099 
5100 static vnode_t *
5101 lxpr_lookup_sys_vmdir(vnode_t *dp, char *comp)
5102 {
5103         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_VMDIR);
5104         return (lxpr_lookup_common(dp, comp, NULL, sys_vmdir,
5105             SYS_VMDIRFILES));
5106 }
5107 
5108 static vnode_t *
5109 lxpr_lookup_sys_fsdir(vnode_t *dp, char *comp)
5110 {
5111         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FSDIR);
5112         return (lxpr_lookup_common(dp, comp, NULL, sys_fsdir,
5113             SYS_FSDIRFILES));
5114 }
5115 
5116 static vnode_t *
5117 lxpr_lookup_sys_fs_inotifydir(vnode_t *dp, char *comp)
5118 {
5119         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
5120         return (lxpr_lookup_common(dp, comp, NULL, sys_fs_inotifydir,
5121             SYS_FS_INOTIFYDIRFILES));
5122 }
5123 
5124 /*
5125  * lxpr_readdir(): Vnode operation for VOP_READDIR()
5126  */
5127 /* ARGSUSED */
5128 static int
5129 lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp,
5130     caller_context_t *ct, int flags)
5131 {
5132         lxpr_node_t *lxpnp = VTOLXP(dp);
5133         lxpr_nodetype_t type = lxpnp->lxpr_type;
5134         ssize_t uresid;
5135         off_t uoffset;
5136         int error;
5137 
5138         ASSERT(dp->v_type == VDIR);
5139         ASSERT(type < LXPR_NFILES);
5140 
5141         /*
5142          * we should never get here because the readdir
5143          * is done on the realvp for these nodes
5144          */
5145         ASSERT(type != LXPR_PID_FD_FD &&
5146             type != LXPR_PID_CURDIR &&
5147             type != LXPR_PID_ROOTDIR);
5148 
5149         /*
5150          * restrict readdir permission to owner or root
5151          */
5152         if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0)
5153                 return (error);
5154 
5155         uoffset = uiop->uio_offset;
5156         uresid = uiop->uio_resid;
5157 
5158         /* can't do negative reads */
5159         if (uoffset < 0 || uresid <= 0)
5160                 return (EINVAL);
5161 
5162         /* can't read directory entries that don't exist! */
5163         if (uoffset % LXPR_SDSIZE)
5164                 return (ENOENT);
5165 
5166         return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
5167 }
5168 
5169 /* ARGSUSED */
5170 static int
5171 lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5172 {
5173         return (ENOTDIR);
5174 }
5175 
5176 /*
5177  * This has the common logic for returning directory entries
5178  */
5179 static int
5180 lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
5181     lxpr_dirent_t *dirtab, int dirtablen)
5182 {
5183         /* bp holds one dirent64 structure */
5184         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5185         dirent64_t *dirent = (dirent64_t *)bp;
5186         ssize_t oresid; /* save a copy for testing later */
5187         ssize_t uresid;
5188 
5189         oresid = uiop->uio_resid;
5190 
5191         /* clear out the dirent buffer */
5192         bzero(bp, sizeof (bp));
5193 
5194         /*
5195          * Satisfy user request
5196          */
5197         while ((uresid = uiop->uio_resid) > 0) {
5198                 int dirindex;
5199                 off_t uoffset;
5200                 int reclen;
5201                 int error;
5202 
5203                 uoffset = uiop->uio_offset;
5204                 dirindex  = (uoffset / LXPR_SDSIZE) - 2;
5205 
5206                 if (uoffset == 0) {
5207 
5208                         dirent->d_ino = lxpnp->lxpr_ino;
5209                         dirent->d_name[0] = '.';
5210                         dirent->d_name[1] = '\0';
5211                         reclen = DIRENT64_RECLEN(1);
5212 
5213                 } else if (uoffset == LXPR_SDSIZE) {
5214 
5215                         dirent->d_ino = lxpr_parentinode(lxpnp);
5216                         dirent->d_name[0] = '.';
5217                         dirent->d_name[1] = '.';
5218                         dirent->d_name[2] = '\0';
5219                         reclen = DIRENT64_RECLEN(2);
5220 
5221                 } else if (dirindex >= 0 && dirindex < dirtablen) {
5222                         int slen = strlen(dirtab[dirindex].d_name);
5223 
5224                         dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
5225                             lxpnp->lxpr_pid, 0);
5226 
5227                         VERIFY(slen < LXPNSIZ);
5228                         (void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
5229                         reclen = DIRENT64_RECLEN(slen);
5230 
5231                 } else {
5232                         /* Run out of table entries */
5233                         if (eofp) {
5234                                 *eofp = 1;
5235                         }
5236                         return (0);
5237                 }
5238 
5239                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5240                 dirent->d_reclen = (ushort_t)reclen;
5241 
5242                 /*
5243                  * if the size of the data to transfer is greater
5244                  * that that requested then we can't do it this transfer.
5245                  */
5246                 if (reclen > uresid) {
5247                         /*
5248                          * Error if no entries have been returned yet.
5249                          */
5250                         if (uresid == oresid) {
5251                                 return (EINVAL);
5252                         }
5253                         break;
5254                 }
5255 
5256                 /*
5257                  * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5258                  * by the same amount.  But we want uiop->uio_offset to change
5259                  * in increments of LXPR_SDSIZE, which is different from the
5260                  * number of bytes being returned to the user.  So we set
5261                  * uiop->uio_offset separately, ignoring what uiomove() does.
5262                  */
5263                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5264                     uiop)) != 0)
5265                         return (error);
5266 
5267                 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5268         }
5269 
5270         /* Have run out of space, but could have just done last table entry */
5271         if (eofp) {
5272                 *eofp =
5273                     (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
5274         }
5275         return (0);
5276 }
5277 
5278 
5279 static int
5280 lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5281 {
5282         /* bp holds one dirent64 structure */
5283         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5284         dirent64_t *dirent = (dirent64_t *)bp;
5285         ssize_t oresid; /* save a copy for testing later */
5286         ssize_t uresid;
5287         off_t uoffset;
5288         zoneid_t zoneid;
5289         pid_t pid;
5290         int error;
5291         int ceof;
5292 
5293         ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
5294 
5295         oresid = uiop->uio_resid;
5296         zoneid = LXPTOZ(lxpnp)->zone_id;
5297 
5298         /*
5299          * We return directory entries in the order: "." and ".." then the
5300          * unique lxproc files, then the directories corresponding to the
5301          * running processes.  We have defined this as the ordering because
5302          * it allows us to more easily keep track of where we are betwen calls
5303          * to getdents().  If the number of processes changes between calls
5304          * then we can't lose track of where we are in the lxproc files.
5305          */
5306 
5307         /* Do the fixed entries */
5308         error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir,
5309             PROCDIRFILES);
5310 
5311         /* Finished if we got an error or if we couldn't do all the table */
5312         if (error != 0 || ceof == 0)
5313                 return (error);
5314 
5315         /* clear out the dirent buffer */
5316         bzero(bp, sizeof (bp));
5317 
5318         /* Do the process entries */
5319         while ((uresid = uiop->uio_resid) > 0) {
5320                 proc_t *p;
5321                 int len;
5322                 int reclen;
5323                 int i;
5324 
5325                 uoffset = uiop->uio_offset;
5326 
5327                 /*
5328                  * Stop when entire proc table has been examined.
5329                  */
5330                 i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
5331                 if (i < 0 || i >= v.v_proc) {
5332                         /* Run out of table entries */
5333                         if (eofp) {
5334                                 *eofp = 1;
5335                         }
5336                         return (0);
5337                 }
5338                 mutex_enter(&pidlock);
5339 
5340                 /*
5341                  * Skip indices for which there is no pid_entry, PIDs for
5342                  * which there is no corresponding process, a PID of 0,
5343                  * and anything the security policy doesn't allow
5344                  * us to look at.
5345                  */
5346                 if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
5347                     p->p_pid == 0 ||
5348                     secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
5349                         mutex_exit(&pidlock);
5350                         goto next;
5351                 }
5352                 mutex_exit(&pidlock);
5353 
5354                 /*
5355                  * Convert pid to the Linux default of 1 if we're the zone's
5356                  * init process, or 0 if zsched, otherwise use the value from
5357                  * the proc structure
5358                  */
5359                 if (p->p_pid == curproc->p_zone->zone_proc_initpid) {
5360                         pid = 1;
5361                 } else if (p->p_pid == curproc->p_zone->zone_zsched->p_pid) {
5362                         pid = 0;
5363                 } else {
5364                         pid = p->p_pid;
5365                 }
5366 
5367                 /*
5368                  * If this /proc was mounted in the global zone, view
5369                  * all procs; otherwise, only view zone member procs.
5370                  */
5371                 if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
5372                         goto next;
5373                 }
5374 
5375                 ASSERT(p->p_stat != 0);
5376 
5377                 dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
5378                 len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
5379                 ASSERT(len < LXPNSIZ);
5380                 reclen = DIRENT64_RECLEN(len);
5381 
5382                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5383                 dirent->d_reclen = (ushort_t)reclen;
5384 
5385                 /*
5386                  * if the size of the data to transfer is greater
5387                  * that that requested then we can't do it this transfer.
5388                  */
5389                 if (reclen > uresid) {
5390                         /*
5391                          * Error if no entries have been returned yet.
5392                          */
5393                         if (uresid == oresid)
5394                                 return (EINVAL);
5395                         break;
5396                 }
5397 
5398                 /*
5399                  * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5400                  * by the same amount.  But we want uiop->uio_offset to change
5401                  * in increments of LXPR_SDSIZE, which is different from the
5402                  * number of bytes being returned to the user.  So we set
5403                  * uiop->uio_offset separately, in the increment of this for
5404                  * the loop, ignoring what uiomove() does.
5405                  */
5406                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5407                     uiop)) != 0)
5408                         return (error);
5409 next:
5410                 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5411         }
5412 
5413         if (eofp != NULL) {
5414                 *eofp = (uiop->uio_offset >=
5415                     ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
5416         }
5417 
5418         return (0);
5419 }
5420 
5421 static int
5422 lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5423 {
5424         proc_t *p;
5425         pid_t find_pid;
5426 
5427         ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
5428 
5429         /* can't read its contents if it died */
5430         mutex_enter(&pidlock);
5431 
5432         if (lxpnp->lxpr_pid == 1) {
5433                 find_pid = curproc->p_zone->zone_proc_initpid;
5434         } else if (lxpnp->lxpr_pid == 0) {
5435                 find_pid = curproc->p_zone->zone_zsched->p_pid;
5436         } else {
5437                 find_pid = lxpnp->lxpr_pid;
5438         }
5439         p = prfind(find_pid);
5440 
5441         if (p == NULL || p->p_stat == SIDL) {
5442                 mutex_exit(&pidlock);
5443                 return (ENOENT);
5444         }
5445         mutex_exit(&pidlock);
5446 
5447         return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
5448 }
5449 
5450 static int
5451 lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5452 {
5453         ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
5454         return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
5455 }
5456 
5457 static int
5458 lxpr_readdir_taskdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5459 {
5460         /* bp holds one dirent64 structure */
5461         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5462         dirent64_t *dirent = (dirent64_t *)bp;
5463         ssize_t oresid; /* save a copy for testing later */
5464         ssize_t uresid;
5465         off_t uoffset;
5466         int error;
5467         int ceof;
5468         proc_t *p;
5469         int tiddirsize = -1;
5470         int tasknum;
5471         pid_t real_pid;
5472         kthread_t *t;
5473         boolean_t branded = B_FALSE;
5474 
5475         ASSERT(lxpnp->lxpr_type == LXPR_PID_TASKDIR);
5476 
5477         oresid = uiop->uio_resid;
5478 
5479         real_pid = get_real_pid(lxpnp->lxpr_pid);
5480         p = lxpr_lock(real_pid);
5481 
5482         /* can't read its contents if it died */
5483         if (p == NULL) {
5484                 return (ENOENT);
5485         }
5486         if (p->p_stat == SIDL) {
5487                 lxpr_unlock(p);
5488                 return (ENOENT);
5489         }
5490 
5491         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
5492                 tiddirsize = 0;
5493 
5494         branded = (p->p_brand == &lx_brand);
5495         /*
5496          * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
5497          * going away while we iterate over its threads.
5498          */
5499         mutex_exit(&p->p_lock);
5500 
5501         if (tiddirsize == -1)
5502                 tiddirsize = p->p_lwpcnt;
5503 
5504         /* Do the fixed entries (in this case just "." & "..") */
5505         error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
5506 
5507         /* Finished if we got an error or if we couldn't do all the table */
5508         if (error != 0 || ceof == 0)
5509                 goto out;
5510 
5511         if ((t = p->p_tlist) == NULL) {
5512                 if (eofp != NULL)
5513                         *eofp = 1;
5514                 goto out;
5515         }
5516 
5517         /* clear out the dirent buffer */
5518         bzero(bp, sizeof (bp));
5519 
5520         /*
5521          * Loop until user's request is satisfied or until all thread's have
5522          * been returned.
5523          */
5524         for (tasknum = 0; (uresid = uiop->uio_resid) > 0; tasknum++) {
5525                 int i;
5526                 int reclen;
5527                 int len;
5528                 uint_t emul_tid;
5529                 lx_lwp_data_t *lwpd;
5530 
5531                 uoffset = uiop->uio_offset;
5532 
5533                 /*
5534                  * Stop at the end of the thread list
5535                  */
5536                 i = (uoffset / LXPR_SDSIZE) - 2;
5537                 if (i < 0 || i >= tiddirsize) {
5538                         if (eofp) {
5539                                 *eofp = 1;
5540                         }
5541                         goto out;
5542                 }
5543 
5544                 if (i != tasknum)
5545                         goto next;
5546 
5547                 if (!branded) {
5548                         /*
5549                          * Emulating the goofy linux task model is impossible
5550                          * to do for native processes.  We can compromise by
5551                          * presenting only the main thread to the consumer.
5552                          */
5553                         emul_tid = p->p_pid;
5554                 } else {
5555                         if ((lwpd = ttolxlwp(t)) == NULL) {
5556                                 goto next;
5557                         }
5558                         emul_tid = lwpd->br_pid;
5559                         /*
5560                          * Convert pid to Linux default of 1 if we're the
5561                          * zone's init.
5562                          */
5563                         if (emul_tid == curproc->p_zone->zone_proc_initpid)
5564                                 emul_tid = 1;
5565                 }
5566 
5567                 dirent->d_ino = lxpr_inode(LXPR_PID_TASK_IDDIR, lxpnp->lxpr_pid,
5568                     emul_tid);
5569                 len = snprintf(dirent->d_name, LXPNSIZ, "%d", emul_tid);
5570                 ASSERT(len < LXPNSIZ);
5571                 reclen = DIRENT64_RECLEN(len);
5572 
5573                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5574                 dirent->d_reclen = (ushort_t)reclen;
5575 
5576                 if (reclen > uresid) {
5577                         /*
5578                          * Error if no entries have been returned yet.
5579                          */
5580                         if (uresid == oresid)
5581                                 error = EINVAL;
5582                         goto out;
5583                 }
5584 
5585                 /*
5586                  * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5587                  * by the same amount.  But we want uiop->uio_offset to change
5588                  * in increments of LXPR_SDSIZE, which is different from the
5589                  * number of bytes being returned to the user.  So we set
5590                  * uiop->uio_offset separately, in the increment of this for
5591                  * the loop, ignoring what uiomove() does.
5592                  */
5593                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5594                     uiop)) != 0)
5595                         goto out;
5596 
5597 next:
5598                 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5599 
5600                 if ((t = t->t_forw) == p->p_tlist || !branded) {
5601                         if (eofp != NULL)
5602                                 *eofp = 1;
5603                         goto out;
5604                 }
5605         }
5606 
5607         if (eofp != NULL)
5608                 *eofp = 0;
5609 
5610 out:
5611         mutex_enter(&p->p_lock);
5612         lxpr_unlock(p);
5613         return (error);
5614 }
5615 
5616 static int
5617 lxpr_readdir_task_tid_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5618 {
5619         proc_t *p;
5620         pid_t real_pid;
5621         kthread_t *t;
5622 
5623         ASSERT(lxpnp->lxpr_type == LXPR_PID_TASK_IDDIR);
5624 
5625         mutex_enter(&pidlock);
5626 
5627         real_pid = get_real_pid(lxpnp->lxpr_pid);
5628         p = prfind(real_pid);
5629 
5630         /* can't read its contents if it died */
5631         if (p == NULL || p->p_stat == SIDL) {
5632                 mutex_exit(&pidlock);
5633                 return (ENOENT);
5634         }
5635 
5636         mutex_exit(&pidlock);
5637 
5638         /* need to confirm tid is still there */
5639         t = lxpr_get_thread(p, lxpnp->lxpr_desc);
5640         if (t == NULL) {
5641                 /* we can't find this specific thread */
5642                 return (NULL);
5643         }
5644         thread_unlock(t);
5645 
5646         return (lxpr_readdir_common(lxpnp, uiop, eofp, tiddir, TIDDIRFILES));
5647 }
5648 
5649 static int
5650 lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5651 {
5652         /* bp holds one dirent64 structure */
5653         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5654         dirent64_t *dirent = (dirent64_t *)bp;
5655         ssize_t oresid; /* save a copy for testing later */
5656         ssize_t uresid;
5657         off_t uoffset;
5658         int error;
5659         int ceof;
5660         proc_t *p;
5661         int fddirsize = -1;
5662         uf_info_t *fip;
5663 
5664         ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR ||
5665             lxpnp->lxpr_type == LXPR_PID_TID_FDDIR);
5666 
5667         oresid = uiop->uio_resid;
5668 
5669         /* can't read its contents if it died */
5670         p = lxpr_lock(lxpnp->lxpr_pid);
5671         if (p == NULL)
5672                 return (ENOENT);
5673 
5674         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
5675                 fddirsize = 0;
5676 
5677         /*
5678          * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
5679          * going away while we iterate over its fi_list.
5680          */
5681         mutex_exit(&p->p_lock);
5682 
5683         /* Get open file info */
5684         fip = (&(p)->p_user.u_finfo);
5685         mutex_enter(&fip->fi_lock);
5686 
5687         if (fddirsize == -1)
5688                 fddirsize = fip->fi_nfiles;
5689 
5690         /* Do the fixed entries (in this case just "." & "..") */
5691         error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
5692 
5693         /* Finished if we got an error or if we couldn't do all the table */
5694         if (error != 0 || ceof == 0)
5695                 goto out;
5696 
5697         /* clear out the dirent buffer */
5698         bzero(bp, sizeof (bp));
5699 
5700         /*
5701          * Loop until user's request is satisfied or until
5702          * all file descriptors have been examined.
5703          */
5704         for (; (uresid = uiop->uio_resid) > 0;
5705             uiop->uio_offset = uoffset + LXPR_SDSIZE) {
5706                 int reclen;
5707                 int fd;
5708                 int len;
5709 
5710                 uoffset = uiop->uio_offset;
5711 
5712                 /*
5713                  * Stop at the end of the fd list
5714                  */
5715                 fd = (uoffset / LXPR_SDSIZE) - 2;
5716                 if (fd < 0 || fd >= fddirsize) {
5717                         if (eofp) {
5718                                 *eofp = 1;
5719                         }
5720                         goto out;
5721                 }
5722 
5723                 if (fip->fi_list[fd].uf_file == NULL)
5724                         continue;
5725 
5726                 dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
5727                 len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
5728                 ASSERT(len < LXPNSIZ);
5729                 reclen = DIRENT64_RECLEN(len);
5730 
5731                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5732                 dirent->d_reclen = (ushort_t)reclen;
5733 
5734                 if (reclen > uresid) {
5735                         /*
5736                          * Error if no entries have been returned yet.
5737                          */
5738                         if (uresid == oresid)
5739                                 error = EINVAL;
5740                         goto out;
5741                 }
5742 
5743                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5744                     uiop)) != 0)
5745                         goto out;
5746         }
5747 
5748         if (eofp != NULL) {
5749                 *eofp =
5750                     (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
5751         }
5752 
5753 out:
5754         mutex_exit(&fip->fi_lock);
5755         mutex_enter(&p->p_lock);
5756         lxpr_unlock(p);
5757         return (error);
5758 }
5759 
5760 static int
5761 lxpr_readdir_sysdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5762 {
5763         ASSERT(lxpnp->lxpr_type == LXPR_SYSDIR);
5764         return (lxpr_readdir_common(lxpnp, uiop, eofp, sysdir, SYSDIRFILES));
5765 }
5766 
5767 static int
5768 lxpr_readdir_sys_fsdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5769 {
5770         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FSDIR);
5771         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fsdir,
5772             SYS_FSDIRFILES));
5773 }
5774 
5775 static int
5776 lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5777 {
5778         ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
5779         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fs_inotifydir,
5780             SYS_FS_INOTIFYDIRFILES));
5781 }
5782 
5783 static int
5784 lxpr_readdir_sys_kerneldir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5785 {
5786         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNELDIR);
5787         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_kerneldir,
5788             SYS_KERNELDIRFILES));
5789 }
5790 
5791 static int
5792 lxpr_readdir_sys_kdir_randdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5793 {
5794         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RANDDIR);
5795         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_randdir,
5796             SYS_RANDDIRFILES));
5797 }
5798 
5799 static int
5800 lxpr_readdir_sys_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5801 {
5802         ASSERT(lxpnp->lxpr_type == LXPR_SYS_NETDIR);
5803         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_netdir,
5804             SYS_NETDIRFILES));
5805 }
5806 
5807 static int
5808 lxpr_readdir_sys_net_coredir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5809 {
5810         ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_COREDIR);
5811         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_net_coredir,
5812             SYS_NET_COREDIRFILES));
5813 }
5814 
5815 static int
5816 lxpr_readdir_sys_vmdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5817 {
5818         ASSERT(lxpnp->lxpr_type == LXPR_SYS_VMDIR);
5819         return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_vmdir,
5820             SYS_VMDIRFILES));
5821 }
5822 
5823 static int
5824 lxpr_write_sys_net_core_somaxc(lxpr_node_t *lxpnp, struct uio *uio,
5825     struct cred *cr, caller_context_t *ct)
5826 {
5827         int error;
5828         int res = 0;
5829         size_t olen;
5830         char val[16];   /* big enough for a uint numeric string */
5831         netstack_t *ns;
5832         mod_prop_info_t *ptbl = NULL;
5833         mod_prop_info_t *pinfo = NULL;
5834 
5835         ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON);
5836 
5837         if (uio->uio_loffset != 0)
5838                 return (EINVAL);
5839 
5840         if (uio->uio_resid == 0)
5841                 return (0);
5842 
5843         olen = uio->uio_resid;
5844         if (olen > sizeof (val) - 1)
5845                 return (EINVAL);
5846 
5847         bzero(val, sizeof (val));
5848         error = uiomove(val, olen, UIO_WRITE, uio);
5849         if (error != 0)
5850                 return (error);
5851 
5852         if (val[olen - 1] == '\n')
5853                 val[olen - 1] = '\0';
5854 
5855         if (val[0] == '\0') /* no input */
5856                 return (EINVAL);
5857 
5858         ns = netstack_get_current();
5859         if (ns == NULL)
5860                 return (EINVAL);
5861 
5862         ptbl = ns->netstack_tcp->tcps_propinfo_tbl;
5863         pinfo = mod_prop_lookup(ptbl, "_conn_req_max_q", MOD_PROTO_TCP);
5864         if (pinfo == NULL || pinfo->mpi_setf(ns, cr, pinfo, NULL, val, 0) != 0)
5865                 res = EINVAL;
5866 
5867         netstack_rele(ns);
5868         return (res);
5869 }
5870 
5871 /* ARGSUSED */
5872 static int
5873 lxpr_write_sys_kernel_corepatt(lxpr_node_t *lxpnp, struct uio *uio,
5874     struct cred *cr, caller_context_t *ct)
5875 {
5876         zone_t *zone = curproc->p_zone;
5877         struct core_globals *cg;
5878         refstr_t *rp, *nrp;
5879         corectl_path_t *ccp;
5880         char val[MAXPATHLEN];
5881         char valtr[MAXPATHLEN];
5882         size_t olen;
5883         int error;
5884 
5885         ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT);
5886 
5887         cg = zone_getspecific(core_zone_key, zone);
5888         ASSERT(cg != NULL);
5889 
5890         if (secpolicy_coreadm(cr) != 0)
5891                 return (EPERM);
5892 
5893         if (uio->uio_loffset != 0)
5894                 return (EINVAL);
5895 
5896         if (uio->uio_resid == 0)
5897                 return (0);
5898 
5899         olen = uio->uio_resid;
5900         if (olen > sizeof (val) - 1)
5901                 return (EINVAL);
5902 
5903         bzero(val, sizeof (val));
5904         error = uiomove(val, olen, UIO_WRITE, uio);
5905         if (error != 0)
5906                 return (error);
5907 
5908         if (val[olen - 1] == '\n')
5909                 val[olen - 1] = '\0';
5910 
5911         if (val[0] == '|')
5912                 return (EINVAL);
5913 
5914         if ((error = lxpr_core_path_l2s(val, valtr, sizeof (valtr))) != 0)
5915                 return (error);
5916 
5917         nrp = refstr_alloc(valtr);
5918 
5919         ccp = cg->core_default_path;
5920         mutex_enter(&ccp->ccp_mtx);
5921         rp = ccp->ccp_path;
5922         refstr_hold((ccp->ccp_path = nrp));
5923         cg->core_options |= CC_PROCESS_PATH;
5924         mutex_exit(&ccp->ccp_mtx);
5925 
5926         if (rp != NULL)
5927                 refstr_rele(rp);
5928 
5929         return (0);
5930 }
5931 
5932 /*
5933  * lxpr_readlink(): Vnode operation for VOP_READLINK()
5934  */
5935 /* ARGSUSED */
5936 static int
5937 lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
5938 {
5939         char bp[MAXPATHLEN + 1];
5940         size_t buflen = sizeof (bp);
5941         lxpr_node_t *lxpnp = VTOLXP(vp);
5942         vnode_t *rvp = lxpnp->lxpr_realvp;
5943         pid_t pid;
5944         int error = 0;
5945 
5946         /*
5947          * Linux does something very "clever" for /proc/<pid>/fd/<num> entries.
5948          * Open FDs are represented as symlinks, the link contents
5949          * corresponding to the open resource.  For plain files or devices,
5950          * this isn't absurd since one can dereference the symlink to query
5951          * the underlying resource.  For sockets or pipes, it becomes ugly in a
5952          * hurry.  To maintain this human-readable output, those FD symlinks
5953          * point to bogus targets such as "socket:[<inodenum>]".  This requires
5954          * circumventing vfs since the stat/lstat behavior on those FD entries
5955          * will be unusual. (A stat must retrieve information about the open
5956          * socket or pipe.  It cannot fail because the link contents point to
5957          * an absent file.)
5958          *
5959          * To accomplish this, lxpr_getnode returns an vnode typed VNON for FD
5960          * entries.  This bypasses code paths which would normally
5961          * short-circuit on symlinks and allows us to emulate the vfs behavior
5962          * expected by /proc consumers.
5963          */
5964         if (vp->v_type != VLNK && lxpnp->lxpr_type != LXPR_PID_FD_FD)
5965                 return (EINVAL);
5966 
5967         /* Try to produce a symlink name for anything that has a realvp */
5968         if (rvp != NULL) {
5969                 if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0)
5970                         return (error);
5971                 if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) {
5972                         /*
5973                          * Special handling possible for /proc/<pid>/fd/<num>
5974                          * Generate <type>:[<inode>] links, if allowed.
5975                          */
5976                         if (lxpnp->lxpr_type != LXPR_PID_FD_FD ||
5977                             lxpr_readlink_fdnode(lxpnp, bp, buflen) != 0) {
5978                                 return (error);
5979                         }
5980                 }
5981         } else {
5982                 switch (lxpnp->lxpr_type) {
5983                 case LXPR_SELF:
5984                         /*
5985                          * Convert pid to the Linux default of 1 if we're the
5986                          * zone's init process or 0 if zsched.
5987                          */
5988                         if (curproc->p_pid ==
5989                             curproc->p_zone->zone_proc_initpid) {
5990                                 pid = 1;
5991                         } else if (curproc->p_pid ==
5992                             curproc->p_zone->zone_zsched->p_pid) {
5993                                 pid = 0;
5994                         } else {
5995                                 pid = curproc->p_pid;
5996                         }
5997 
5998                         /*
5999                          * Don't need to check result as every possible int
6000                          * will fit within MAXPATHLEN bytes.
6001                          */
6002                         (void) snprintf(bp, buflen, "%d", pid);
6003                         break;
6004                 case LXPR_PID_CURDIR:
6005                 case LXPR_PID_ROOTDIR:
6006                 case LXPR_PID_EXE:
6007                         return (EACCES);
6008                 default:
6009                         /*
6010                          * Need to return error so that nothing thinks
6011                          * that the symlink is empty and hence "."
6012                          */
6013                         return (EINVAL);
6014                 }
6015         }
6016 
6017         /* copy the link data to user space */
6018         return (uiomove(bp, strlen(bp), UIO_READ, uiop));
6019 }
6020 
6021 
6022 /*
6023  * lxpr_inactive(): Vnode operation for VOP_INACTIVE()
6024  * Vnode is no longer referenced, deallocate the file
6025  * and all its resources.
6026  */
6027 /* ARGSUSED */
6028 static void
6029 lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
6030 {
6031         lxpr_freenode(VTOLXP(vp));
6032 }
6033 
6034 /*
6035  * lxpr_sync(): Vnode operation for VOP_SYNC()
6036  */
6037 static int
6038 lxpr_sync()
6039 {
6040         /*
6041          * Nothing to sync but this function must never fail
6042          */
6043         return (0);
6044 }
6045 
6046 /*
6047  * lxpr_cmp(): Vnode operation for VOP_CMP()
6048  */
6049 static int
6050 lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
6051 {
6052         vnode_t *rvp;
6053 
6054         while (vn_matchops(vp1, lxpr_vnodeops) &&
6055             (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) {
6056                 vp1 = rvp;
6057         }
6058 
6059         while (vn_matchops(vp2, lxpr_vnodeops) &&
6060             (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) {
6061                 vp2 = rvp;
6062         }
6063 
6064         if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
6065                 return (vp1 == vp2);
6066         return (VOP_CMP(vp1, vp2, ct));
6067 }
6068 
6069 /*
6070  * lxpr_realvp(): Vnode operation for VOP_REALVP()
6071  */
6072 static int
6073 lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
6074 {
6075         vnode_t *rvp;
6076 
6077         if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
6078                 vp = rvp;
6079                 if (VOP_REALVP(vp, &rvp, ct) == 0)
6080                         vp = rvp;
6081         }
6082 
6083         *vpp = vp;
6084         return (0);
6085 }
6086 
6087 static int
6088 lxpr_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
6089     caller_context_t *ct)
6090 {
6091         lxpr_node_t     *lxpnp = VTOLXP(vp);
6092         lxpr_nodetype_t type = lxpnp->lxpr_type;
6093 
6094         switch (type) {
6095         case LXPR_SYS_KERNEL_COREPATT:
6096                 return (lxpr_write_sys_kernel_corepatt(lxpnp, uiop, cr, ct));
6097         case LXPR_SYS_NET_CORE_SOMAXCON:
6098                 return (lxpr_write_sys_net_core_somaxc(lxpnp, uiop, cr, ct));
6099 
6100         default:
6101                 /* pretend we wrote the whole thing */
6102                 uiop->uio_offset += uiop->uio_resid;
6103                 uiop->uio_resid = 0;
6104                 return (0);
6105         }
6106 }
6107 
6108 /*
6109  * We need to allow open with O_CREAT for the oom_score_adj file.
6110  */
6111 /*ARGSUSED7*/
6112 static int
6113 lxpr_create(struct vnode *dvp, char *nm, struct vattr *vap,
6114     enum vcexcl exclusive, int mode, struct vnode **vpp, struct cred *cred,
6115     int flag, caller_context_t *ct, vsecattr_t *vsecp)
6116 {
6117         lxpr_node_t *lxpnp = VTOLXP(dvp);
6118         lxpr_nodetype_t type = lxpnp->lxpr_type;
6119         vnode_t *vp = NULL;
6120         int error;
6121 
6122         ASSERT(type < LXPR_NFILES);
6123 
6124         /*
6125          * restrict create permission to owner or root
6126          */
6127         if ((error = lxpr_access(dvp, VEXEC, 0, cred, ct)) != 0) {
6128                 return (error);
6129         }
6130 
6131         if (*nm == '\0')
6132                 return (EPERM);
6133 
6134         if (dvp->v_type != VDIR)
6135                 return (EPERM);
6136 
6137         if (exclusive == EXCL)
6138                 return (EEXIST);
6139 
6140         /*
6141          * We're currently restricting O_CREAT to:
6142          * - /proc/<pid>/fd/<num>
6143          * - /proc/<pid>/oom_score_adj
6144          * - /proc/<pid>/task/<tid>/fd/<num>
6145          * - /proc/<pid>/task/<tid>/oom_score_adj
6146          * - /proc/sys/kernel/core_pattern
6147          * - /proc/sys/net/core/somaxconn
6148          * - /proc/sys/vm/overcommit_memory
6149          * - /proc/sys/vm/swappiness
6150          */
6151         switch (type) {
6152         case LXPR_PIDDIR:
6153         case LXPR_PID_TASK_IDDIR:
6154                 if (strcmp(nm, "oom_score_adj") == 0) {
6155                         proc_t *p;
6156                         p = lxpr_lock(lxpnp->lxpr_pid);
6157                         if (p != NULL) {
6158                                 vp = lxpr_lookup_common(dvp, nm, p, piddir,
6159                                     PIDDIRFILES);
6160                         }
6161                         lxpr_unlock(p);
6162                 }
6163                 break;
6164 
6165         case LXPR_SYS_NET_COREDIR:
6166                 if (strcmp(nm, "somaxconn") == 0) {
6167                         vp = lxpr_lookup_common(dvp, nm, NULL, sys_net_coredir,
6168                             SYS_NET_COREDIRFILES);
6169                 }
6170                 break;
6171 
6172         case LXPR_SYS_KERNELDIR:
6173                 if (strcmp(nm, "core_pattern") == 0) {
6174                         vp = lxpr_lookup_common(dvp, nm, NULL, sys_kerneldir,
6175                             SYS_KERNELDIRFILES);
6176                 }
6177                 break;
6178 
6179         case LXPR_SYS_VMDIR:
6180                 if (strcmp(nm, "overcommit_memory") == 0 ||
6181                     strcmp(nm, "swappiness") == 0) {
6182                         vp = lxpr_lookup_common(dvp, nm, NULL, sys_vmdir,
6183                             SYS_VMDIRFILES);
6184                 }
6185                 break;
6186 
6187         case LXPR_PID_FDDIR:
6188         case LXPR_PID_TID_FDDIR:
6189                 vp = lxpr_lookup_fdnode(dvp, nm);
6190                 break;
6191 
6192         default:
6193                 vp = NULL;
6194                 break;
6195         }
6196 
6197         if (vp != NULL) {
6198                 /* Creating an existing file, allow it for regular files. */
6199                 if (vp->v_type == VDIR)
6200                         return (EISDIR);
6201 
6202                 /* confirm permissions against existing file */
6203                 if ((error = lxpr_access(vp, mode, 0, cred, ct)) != 0) {
6204                         VN_RELE(vp);
6205                         return (error);
6206                 }
6207 
6208                 *vpp = vp;
6209                 return (0);
6210         }
6211 
6212         /*
6213          * Linux proc does not allow creation of addition, non-subsystem
6214          * specific files inside the hierarchy.  ENOENT is tossed when such
6215          * actions are attempted.
6216          */
6217         return (ENOENT);
6218 }