1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2016 Joyent, Inc. 25 */ 26 27 /* 28 * lx_proc -- a Linux-compatible /proc for the LX brand 29 * 30 * We have -- confusingly -- two implementations of Linux /proc. One is to 31 * support native (but Linux-borne) programs that wish to view the native 32 * system through the Linux /proc model; the other -- this one -- is to 33 * support Linux binaries via the LX brand. These two implementations differ 34 * greatly in their aspirations (and their willingness to bend the truth 35 * of the system to accommodate those aspirations); they should not be unified. 36 */ 37 38 #include <sys/cpupart.h> 39 #include <sys/cpuvar.h> 40 #include <sys/session.h> 41 #include <sys/vmparam.h> 42 #include <sys/mman.h> 43 #include <vm/rm.h> 44 #include <vm/seg_vn.h> 45 #include <sys/sdt.h> 46 #include <lx_signum.h> 47 #include <sys/strlog.h> 48 #include <sys/stropts.h> 49 #include <sys/cmn_err.h> 50 #include <sys/lx_brand.h> 51 #include <lx_auxv.h> 52 #include <sys/x86_archext.h> 53 #include <sys/archsystm.h> 54 #include <sys/fp.h> 55 #include <sys/pool_pset.h> 56 #include <sys/pset.h> 57 #include <sys/zone.h> 58 #include <sys/pghw.h> 59 #include <sys/vfs_opreg.h> 60 #include <sys/param.h> 61 #include <sys/utsname.h> 62 #include <sys/rctl.h> 63 #include <sys/kstat.h> 64 #include <sys/lx_misc.h> 65 #include <sys/brand.h> 66 #include <sys/cred_impl.h> 67 #include <sys/tihdr.h> 68 #include <sys/corectl.h> 69 #include <inet/ip.h> 70 #include <inet/ip_ire.h> 71 #include <inet/ip6.h> 72 #include <inet/ip_if.h> 73 #include <inet/tcp.h> 74 #include <inet/tcp_impl.h> 75 #include <inet/udp_impl.h> 76 #include <inet/ipclassifier.h> 77 #include <sys/socketvar.h> 78 #include <fs/sockfs/socktpi.h> 79 80 /* Dependent on procfs */ 81 extern kthread_t *prchoose(proc_t *); 82 extern int prreadargv(proc_t *, char *, size_t, size_t *); 83 extern int prreadenvv(proc_t *, char *, size_t, size_t *); 84 extern int prreadbuf(proc_t *, uintptr_t, uint8_t *, size_t, size_t *); 85 86 #include "lx_proc.h" 87 88 extern pgcnt_t swapfs_minfree; 89 extern time_t boot_time; 90 91 /* 92 * Pointer to the vnode ops vector for this fs. 93 * This is instantiated in lxprinit() in lxpr_vfsops.c 94 */ 95 vnodeops_t *lxpr_vnodeops; 96 97 static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *); 98 static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *, 99 caller_context_t *); 100 static int lxpr_create(struct vnode *, char *, struct vattr *, enum vcexcl, 101 int, struct vnode **, struct cred *, int, caller_context_t *, vsecattr_t *); 102 static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *); 103 static int lxpr_write(vnode_t *, uio_t *, int, cred_t *, caller_context_t *); 104 static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *, 105 caller_context_t *); 106 static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *); 107 static int lxpr_lookup(vnode_t *, char *, vnode_t **, 108 pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *, 109 pathname_t *); 110 static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *, 111 caller_context_t *, int); 112 static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *); 113 static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *); 114 static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *); 115 static int lxpr_sync(void); 116 static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *); 117 118 static vnode_t *lxpr_lookup_procdir(vnode_t *, char *); 119 static vnode_t *lxpr_lookup_piddir(vnode_t *, char *); 120 static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *); 121 static vnode_t *lxpr_lookup_fddir(vnode_t *, char *); 122 static vnode_t *lxpr_lookup_netdir(vnode_t *, char *); 123 static vnode_t *lxpr_lookup_sysdir(vnode_t *, char *); 124 static vnode_t *lxpr_lookup_sys_fsdir(vnode_t *, char *); 125 static vnode_t *lxpr_lookup_sys_fs_inotifydir(vnode_t *, char *); 126 static vnode_t *lxpr_lookup_sys_kerneldir(vnode_t *, char *); 127 static vnode_t *lxpr_lookup_sys_kdir_randdir(vnode_t *, char *); 128 static vnode_t *lxpr_lookup_sys_netdir(vnode_t *, char *); 129 static vnode_t *lxpr_lookup_sys_net_coredir(vnode_t *, char *); 130 static vnode_t *lxpr_lookup_sys_vmdir(vnode_t *, char *); 131 static vnode_t *lxpr_lookup_taskdir(vnode_t *, char *); 132 static vnode_t *lxpr_lookup_task_tid_dir(vnode_t *, char *); 133 134 static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *); 135 static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *); 136 static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *); 137 static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *); 138 static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *); 139 static int lxpr_readdir_sysdir(lxpr_node_t *, uio_t *, int *); 140 static int lxpr_readdir_sys_fsdir(lxpr_node_t *, uio_t *, int *); 141 static int lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *, uio_t *, int *); 142 static int lxpr_readdir_sys_kerneldir(lxpr_node_t *, uio_t *, int *); 143 static int lxpr_readdir_sys_kdir_randdir(lxpr_node_t *, uio_t *, int *); 144 static int lxpr_readdir_sys_netdir(lxpr_node_t *, uio_t *, int *); 145 static int lxpr_readdir_sys_net_coredir(lxpr_node_t *, uio_t *, int *); 146 static int lxpr_readdir_sys_vmdir(lxpr_node_t *, uio_t *, int *); 147 static int lxpr_readdir_taskdir(lxpr_node_t *, uio_t *, int *); 148 static int lxpr_readdir_task_tid_dir(lxpr_node_t *, uio_t *, int *); 149 150 static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *); 151 static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *); 152 static void lxpr_read_cgroups(lxpr_node_t *, lxpr_uiobuf_t *); 153 static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *); 154 static void lxpr_read_diskstats(lxpr_node_t *, lxpr_uiobuf_t *); 155 static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *); 156 static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *); 157 static void lxpr_read_filesystems(lxpr_node_t *, lxpr_uiobuf_t *); 158 static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *, ldi_handle_t); 159 static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *); 160 static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *); 161 static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *); 162 static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *); 163 static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *); 164 static void lxpr_read_swaps(lxpr_node_t *, lxpr_uiobuf_t *); 165 static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *); 166 static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *); 167 168 static void lxpr_read_pid_auxv(lxpr_node_t *, lxpr_uiobuf_t *); 169 static void lxpr_read_pid_cgroup(lxpr_node_t *, lxpr_uiobuf_t *); 170 static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *); 171 static void lxpr_read_pid_comm(lxpr_node_t *, lxpr_uiobuf_t *); 172 static void lxpr_read_pid_env(lxpr_node_t *, lxpr_uiobuf_t *); 173 static void lxpr_read_pid_limits(lxpr_node_t *, lxpr_uiobuf_t *); 174 static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *); 175 static void lxpr_read_pid_mountinfo(lxpr_node_t *, lxpr_uiobuf_t *); 176 static void lxpr_read_pid_oom_scr_adj(lxpr_node_t *, lxpr_uiobuf_t *); 177 static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *); 178 static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *); 179 static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *); 180 181 static void lxpr_read_pid_tid_stat(lxpr_node_t *, lxpr_uiobuf_t *); 182 static void lxpr_read_pid_tid_status(lxpr_node_t *, lxpr_uiobuf_t *); 183 184 static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *); 185 static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *); 186 static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *); 187 static void lxpr_read_net_if_inet6(lxpr_node_t *, lxpr_uiobuf_t *); 188 static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *); 189 static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *); 190 static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *); 191 static void lxpr_read_net_ipv6_route(lxpr_node_t *, lxpr_uiobuf_t *); 192 static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *); 193 static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *); 194 static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *); 195 static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *); 196 static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *); 197 static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *); 198 static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *); 199 static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *); 200 static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *); 201 static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *); 202 static void lxpr_read_net_tcp6(lxpr_node_t *, lxpr_uiobuf_t *); 203 static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *); 204 static void lxpr_read_net_udp6(lxpr_node_t *, lxpr_uiobuf_t *); 205 static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *); 206 static void lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *, 207 lxpr_uiobuf_t *); 208 static void lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *, 209 lxpr_uiobuf_t *); 210 static void lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *, 211 lxpr_uiobuf_t *); 212 static void lxpr_read_sys_kernel_caplcap(lxpr_node_t *, lxpr_uiobuf_t *); 213 static void lxpr_read_sys_kernel_corepatt(lxpr_node_t *, lxpr_uiobuf_t *); 214 static void lxpr_read_sys_kernel_hostname(lxpr_node_t *, lxpr_uiobuf_t *); 215 static void lxpr_read_sys_kernel_msgmni(lxpr_node_t *, lxpr_uiobuf_t *); 216 static void lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *, lxpr_uiobuf_t *); 217 static void lxpr_read_sys_kernel_osrel(lxpr_node_t *, lxpr_uiobuf_t *); 218 static void lxpr_read_sys_kernel_pid_max(lxpr_node_t *, lxpr_uiobuf_t *); 219 static void lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *, lxpr_uiobuf_t *); 220 static void lxpr_read_sys_kernel_sem(lxpr_node_t *, lxpr_uiobuf_t *); 221 static void lxpr_read_sys_kernel_shmmax(lxpr_node_t *, lxpr_uiobuf_t *); 222 static void lxpr_read_sys_kernel_shmmni(lxpr_node_t *, lxpr_uiobuf_t *); 223 static void lxpr_read_sys_kernel_threads_max(lxpr_node_t *, lxpr_uiobuf_t *); 224 static void lxpr_read_sys_net_core_somaxc(lxpr_node_t *, lxpr_uiobuf_t *); 225 static void lxpr_read_sys_vm_minfr_kb(lxpr_node_t *, lxpr_uiobuf_t *); 226 static void lxpr_read_sys_vm_nhpages(lxpr_node_t *, lxpr_uiobuf_t *); 227 static void lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *, lxpr_uiobuf_t *); 228 static void lxpr_read_sys_vm_swappiness(lxpr_node_t *, lxpr_uiobuf_t *); 229 230 static int lxpr_write_sys_net_core_somaxc(lxpr_node_t *, uio_t *, cred_t *, 231 caller_context_t *); 232 static int lxpr_write_sys_kernel_corepatt(lxpr_node_t *, uio_t *, cred_t *, 233 caller_context_t *); 234 235 /* 236 * Simple conversion 237 */ 238 #define btok(x) ((x) >> 10) /* bytes to kbytes */ 239 #define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */ 240 241 #define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t)) 242 243 extern rctl_hndl_t rc_process_semmsl; 244 extern rctl_hndl_t rc_process_semopm; 245 extern rctl_hndl_t rc_zone_semmni; 246 247 extern rctl_hndl_t rc_zone_msgmni; 248 extern rctl_hndl_t rc_zone_shmmax; 249 extern rctl_hndl_t rc_zone_shmmni; 250 #define FOURGB 4294967295 251 252 /* 253 * The maximum length of the concatenation of argument vector strings we 254 * will return to the user via the branded procfs. Likewise for the env vector. 255 */ 256 int lxpr_maxargvlen = 4096; 257 int lxpr_maxenvvlen = 4096; 258 259 /* 260 * The lx /proc vnode operations vector 261 */ 262 const fs_operation_def_t lxpr_vnodeops_template[] = { 263 VOPNAME_OPEN, { .vop_open = lxpr_open }, 264 VOPNAME_CLOSE, { .vop_close = lxpr_close }, 265 VOPNAME_READ, { .vop_read = lxpr_read }, 266 VOPNAME_WRITE, { .vop_read = lxpr_write }, 267 VOPNAME_GETATTR, { .vop_getattr = lxpr_getattr }, 268 VOPNAME_ACCESS, { .vop_access = lxpr_access }, 269 VOPNAME_LOOKUP, { .vop_lookup = lxpr_lookup }, 270 VOPNAME_CREATE, { .vop_create = lxpr_create }, 271 VOPNAME_READDIR, { .vop_readdir = lxpr_readdir }, 272 VOPNAME_READLINK, { .vop_readlink = lxpr_readlink }, 273 VOPNAME_FSYNC, { .error = lxpr_sync }, 274 VOPNAME_SEEK, { .error = lxpr_sync }, 275 VOPNAME_INACTIVE, { .vop_inactive = lxpr_inactive }, 276 VOPNAME_CMP, { .vop_cmp = lxpr_cmp }, 277 VOPNAME_REALVP, { .vop_realvp = lxpr_realvp }, 278 NULL, NULL 279 }; 280 281 282 /* 283 * file contents of an lx /proc directory. 284 */ 285 static lxpr_dirent_t lx_procdir[] = { 286 { LXPR_CGROUPS, "cgroups" }, 287 { LXPR_CMDLINE, "cmdline" }, 288 { LXPR_CPUINFO, "cpuinfo" }, 289 { LXPR_DEVICES, "devices" }, 290 { LXPR_DISKSTATS, "diskstats" }, 291 { LXPR_DMA, "dma" }, 292 { LXPR_FILESYSTEMS, "filesystems" }, 293 { LXPR_INTERRUPTS, "interrupts" }, 294 { LXPR_IOPORTS, "ioports" }, 295 { LXPR_KCORE, "kcore" }, 296 { LXPR_KMSG, "kmsg" }, 297 { LXPR_LOADAVG, "loadavg" }, 298 { LXPR_MEMINFO, "meminfo" }, 299 { LXPR_MODULES, "modules" }, 300 { LXPR_MOUNTS, "mounts" }, 301 { LXPR_NETDIR, "net" }, 302 { LXPR_PARTITIONS, "partitions" }, 303 { LXPR_SELF, "self" }, 304 { LXPR_STAT, "stat" }, 305 { LXPR_SWAPS, "swaps" }, 306 { LXPR_SYSDIR, "sys" }, 307 { LXPR_UPTIME, "uptime" }, 308 { LXPR_VERSION, "version" } 309 }; 310 311 #define PROCDIRFILES (sizeof (lx_procdir) / sizeof (lx_procdir[0])) 312 313 /* 314 * Contents of an lx /proc/<pid> directory. 315 */ 316 static lxpr_dirent_t piddir[] = { 317 { LXPR_PID_AUXV, "auxv" }, 318 { LXPR_PID_CGROUP, "cgroup" }, 319 { LXPR_PID_CMDLINE, "cmdline" }, 320 { LXPR_PID_COMM, "comm" }, 321 { LXPR_PID_CPU, "cpu" }, 322 { LXPR_PID_CURDIR, "cwd" }, 323 { LXPR_PID_ENV, "environ" }, 324 { LXPR_PID_EXE, "exe" }, 325 { LXPR_PID_LIMITS, "limits" }, 326 { LXPR_PID_MAPS, "maps" }, 327 { LXPR_PID_MEM, "mem" }, 328 { LXPR_PID_MOUNTINFO, "mountinfo" }, 329 { LXPR_PID_OOM_SCR_ADJ, "oom_score_adj" }, 330 { LXPR_PID_ROOTDIR, "root" }, 331 { LXPR_PID_STAT, "stat" }, 332 { LXPR_PID_STATM, "statm" }, 333 { LXPR_PID_STATUS, "status" }, 334 { LXPR_PID_TASKDIR, "task" }, 335 { LXPR_PID_FDDIR, "fd" } 336 }; 337 338 #define PIDDIRFILES (sizeof (piddir) / sizeof (piddir[0])) 339 340 /* 341 * Contents of an lx /proc/<pid>/task/<tid> directory. 342 */ 343 static lxpr_dirent_t tiddir[] = { 344 { LXPR_PID_TID_AUXV, "auxv" }, 345 { LXPR_PID_CGROUP, "cgroup" }, 346 { LXPR_PID_CMDLINE, "cmdline" }, 347 { LXPR_PID_TID_COMM, "comm" }, 348 { LXPR_PID_CPU, "cpu" }, 349 { LXPR_PID_CURDIR, "cwd" }, 350 { LXPR_PID_ENV, "environ" }, 351 { LXPR_PID_EXE, "exe" }, 352 { LXPR_PID_LIMITS, "limits" }, 353 { LXPR_PID_MAPS, "maps" }, 354 { LXPR_PID_MEM, "mem" }, 355 { LXPR_PID_MOUNTINFO, "mountinfo" }, 356 { LXPR_PID_TID_OOM_SCR_ADJ, "oom_score_adj" }, 357 { LXPR_PID_ROOTDIR, "root" }, 358 { LXPR_PID_TID_STAT, "stat" }, 359 { LXPR_PID_STATM, "statm" }, 360 { LXPR_PID_TID_STATUS, "status" }, 361 { LXPR_PID_FDDIR, "fd" } 362 }; 363 364 #define TIDDIRFILES (sizeof (tiddir) / sizeof (tiddir[0])) 365 366 #define LX_RLIM_INFINITY 0xFFFFFFFFFFFFFFFF 367 368 #define RCTL_INFINITE(x) \ 369 ((x->rcv_flagaction & RCTL_LOCAL_MAXIMAL) && \ 370 (x->rcv_flagaction & RCTL_GLOBAL_INFINITE)) 371 372 typedef struct lxpr_rlimtab { 373 char *rlim_name; /* limit name */ 374 char *rlim_unit; /* limit unit */ 375 char *rlim_rctl; /* rctl source */ 376 } lxpr_rlimtab_t; 377 378 static lxpr_rlimtab_t lxpr_rlimtab[] = { 379 { "Max cpu time", "seconds", "process.max-cpu-time" }, 380 { "Max file size", "bytes", "process.max-file-size" }, 381 { "Max data size", "bytes", "process.max-data-size" }, 382 { "Max stack size", "bytes", "process.max-stack-size" }, 383 { "Max core file size", "bytes", "process.max-core-size" }, 384 { "Max resident set", "bytes", "zone.max-physical-memory" }, 385 { "Max processes", "processes", "zone.max-lwps" }, 386 { "Max open files", "files", "process.max-file-descriptor" }, 387 { "Max locked memory", "bytes", "zone.max-locked-memory" }, 388 { "Max address space", "bytes", "process.max-address-space" }, 389 { "Max file locks", "locks", NULL }, 390 { "Max pending signals", "signals", 391 "process.max-sigqueue-size" }, 392 { "Max msgqueue size", "bytes", "process.max-msg-messages" }, 393 { NULL, NULL, NULL } 394 }; 395 396 397 /* 398 * contents of lx /proc/net directory 399 */ 400 static lxpr_dirent_t netdir[] = { 401 { LXPR_NET_ARP, "arp" }, 402 { LXPR_NET_DEV, "dev" }, 403 { LXPR_NET_DEV_MCAST, "dev_mcast" }, 404 { LXPR_NET_IF_INET6, "if_inet6" }, 405 { LXPR_NET_IGMP, "igmp" }, 406 { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" }, 407 { LXPR_NET_IP_MR_VIF, "ip_mr_vif" }, 408 { LXPR_NET_IPV6_ROUTE, "ipv6_route" }, 409 { LXPR_NET_MCFILTER, "mcfilter" }, 410 { LXPR_NET_NETSTAT, "netstat" }, 411 { LXPR_NET_RAW, "raw" }, 412 { LXPR_NET_ROUTE, "route" }, 413 { LXPR_NET_RPC, "rpc" }, 414 { LXPR_NET_RT_CACHE, "rt_cache" }, 415 { LXPR_NET_SOCKSTAT, "sockstat" }, 416 { LXPR_NET_SNMP, "snmp" }, 417 { LXPR_NET_STAT, "stat" }, 418 { LXPR_NET_TCP, "tcp" }, 419 { LXPR_NET_TCP6, "tcp6" }, 420 { LXPR_NET_UDP, "udp" }, 421 { LXPR_NET_UDP6, "udp6" }, 422 { LXPR_NET_UNIX, "unix" } 423 }; 424 425 #define NETDIRFILES (sizeof (netdir) / sizeof (netdir[0])) 426 427 /* 428 * contents of /proc/sys directory 429 */ 430 static lxpr_dirent_t sysdir[] = { 431 { LXPR_SYS_FSDIR, "fs" }, 432 { LXPR_SYS_KERNELDIR, "kernel" }, 433 { LXPR_SYS_NETDIR, "net" }, 434 { LXPR_SYS_VMDIR, "vm" }, 435 }; 436 437 #define SYSDIRFILES (sizeof (sysdir) / sizeof (sysdir[0])) 438 439 /* 440 * contents of /proc/sys/fs directory 441 */ 442 static lxpr_dirent_t sys_fsdir[] = { 443 { LXPR_SYS_FS_INOTIFYDIR, "inotify" }, 444 }; 445 446 #define SYS_FSDIRFILES (sizeof (sys_fsdir) / sizeof (sys_fsdir[0])) 447 448 /* 449 * contents of /proc/sys/fs/inotify directory 450 */ 451 static lxpr_dirent_t sys_fs_inotifydir[] = { 452 { LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS, "max_queued_events" }, 453 { LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES, "max_user_instances" }, 454 { LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES, "max_user_watches" }, 455 }; 456 457 #define SYS_FS_INOTIFYDIRFILES \ 458 (sizeof (sys_fs_inotifydir) / sizeof (sys_fs_inotifydir[0])) 459 460 /* 461 * contents of /proc/sys/kernel directory 462 */ 463 static lxpr_dirent_t sys_kerneldir[] = { 464 { LXPR_SYS_KERNEL_CAPLCAP, "cap_last_cap" }, 465 { LXPR_SYS_KERNEL_COREPATT, "core_pattern" }, 466 { LXPR_SYS_KERNEL_HOSTNAME, "hostname" }, 467 { LXPR_SYS_KERNEL_MSGMNI, "msgmni" }, 468 { LXPR_SYS_KERNEL_NGROUPS_MAX, "ngroups_max" }, 469 { LXPR_SYS_KERNEL_OSREL, "osrelease" }, 470 { LXPR_SYS_KERNEL_PID_MAX, "pid_max" }, 471 { LXPR_SYS_KERNEL_RANDDIR, "random" }, 472 { LXPR_SYS_KERNEL_SEM, "sem" }, 473 { LXPR_SYS_KERNEL_SHMMAX, "shmmax" }, 474 { LXPR_SYS_KERNEL_SHMMNI, "shmmni" }, 475 { LXPR_SYS_KERNEL_THREADS_MAX, "threads-max" }, 476 }; 477 478 #define SYS_KERNELDIRFILES (sizeof (sys_kerneldir) / sizeof (sys_kerneldir[0])) 479 480 /* 481 * contents of /proc/sys/kernel/random directory 482 */ 483 static lxpr_dirent_t sys_randdir[] = { 484 { LXPR_SYS_KERNEL_RAND_BOOTID, "boot_id" }, 485 }; 486 487 #define SYS_RANDDIRFILES (sizeof (sys_randdir) / sizeof (sys_randdir[0])) 488 489 /* 490 * contents of /proc/sys/net directory 491 */ 492 static lxpr_dirent_t sys_netdir[] = { 493 { LXPR_SYS_NET_COREDIR, "core" }, 494 }; 495 496 #define SYS_NETDIRFILES (sizeof (sys_netdir) / sizeof (sys_netdir[0])) 497 498 /* 499 * contents of /proc/sys/net/core directory 500 */ 501 static lxpr_dirent_t sys_net_coredir[] = { 502 { LXPR_SYS_NET_CORE_SOMAXCON, "somaxconn" }, 503 }; 504 505 #define SYS_NET_COREDIRFILES \ 506 (sizeof (sys_net_coredir) / sizeof (sys_net_coredir[0])) 507 508 /* 509 * contents of /proc/sys/vm directory 510 */ 511 static lxpr_dirent_t sys_vmdir[] = { 512 { LXPR_SYS_VM_MINFR_KB, "min_free_kbytes" }, 513 { LXPR_SYS_VM_NHUGEP, "nr_hugepages" }, 514 { LXPR_SYS_VM_OVERCOMMIT_MEM, "overcommit_memory" }, 515 { LXPR_SYS_VM_SWAPPINESS, "swappiness" }, 516 }; 517 518 #define SYS_VMDIRFILES (sizeof (sys_vmdir) / sizeof (sys_vmdir[0])) 519 520 /* 521 * lxpr_open(): Vnode operation for VOP_OPEN() 522 */ 523 static int 524 lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 525 { 526 vnode_t *vp = *vpp; 527 lxpr_node_t *lxpnp = VTOLXP(vp); 528 lxpr_nodetype_t type = lxpnp->lxpr_type; 529 vnode_t *rvp; 530 int error = 0; 531 532 if (flag & FWRITE) { 533 /* Restrict writes to certain files */ 534 switch (type) { 535 case LXPR_PID_OOM_SCR_ADJ: 536 case LXPR_PID_TID_OOM_SCR_ADJ: 537 case LXPR_SYS_KERNEL_COREPATT: 538 case LXPR_SYS_NET_CORE_SOMAXCON: 539 case LXPR_SYS_VM_OVERCOMMIT_MEM: 540 case LXPR_SYS_VM_SWAPPINESS: 541 case LXPR_PID_FD_FD: 542 case LXPR_PID_TID_FD_FD: 543 break; 544 default: 545 return (EPERM); 546 } 547 } 548 549 /* 550 * If we are opening an underlying file only allow regular files, 551 * fifos or sockets; reject the open for anything else. 552 * Just do it if we are opening the current or root directory. 553 */ 554 if (lxpnp->lxpr_realvp != NULL) { 555 rvp = lxpnp->lxpr_realvp; 556 557 if (type == LXPR_PID_FD_FD && rvp->v_type != VREG && 558 rvp->v_type != VFIFO && rvp->v_type != VSOCK) { 559 error = EACCES; 560 } else { 561 if (type == LXPR_PID_FD_FD && rvp->v_type == VFIFO) { 562 /* 563 * This flag lets the fifo open know that 564 * we're using proc/fd to open a fd which we 565 * already have open. Otherwise, the fifo might 566 * reject an open if the other end has closed. 567 */ 568 flag |= FKLYR; 569 } 570 /* 571 * Need to hold rvp since VOP_OPEN() may release it. 572 */ 573 VN_HOLD(rvp); 574 error = VOP_OPEN(&rvp, flag, cr, ct); 575 if (error) { 576 VN_RELE(rvp); 577 } else { 578 *vpp = rvp; 579 VN_RELE(vp); 580 } 581 } 582 } 583 584 return (error); 585 } 586 587 588 /* 589 * lxpr_close(): Vnode operation for VOP_CLOSE() 590 */ 591 /* ARGSUSED */ 592 static int 593 lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 594 caller_context_t *ct) 595 { 596 lxpr_node_t *lxpr = VTOLXP(vp); 597 lxpr_nodetype_t type = lxpr->lxpr_type; 598 599 /* 600 * we should never get here because the close is done on the realvp 601 * for these nodes 602 */ 603 ASSERT(type != LXPR_PID_FD_FD && 604 type != LXPR_PID_CURDIR && 605 type != LXPR_PID_ROOTDIR && 606 type != LXPR_PID_EXE); 607 608 return (0); 609 } 610 611 static void (*lxpr_read_function[LXPR_NFILES])() = { 612 lxpr_read_isdir, /* /proc */ 613 lxpr_read_isdir, /* /proc/<pid> */ 614 lxpr_read_pid_auxv, /* /proc/<pid>/auxv */ 615 lxpr_read_pid_cgroup, /* /proc/<pid>/cgroup */ 616 lxpr_read_pid_cmdline, /* /proc/<pid>/cmdline */ 617 lxpr_read_pid_comm, /* /proc/<pid>/comm */ 618 lxpr_read_empty, /* /proc/<pid>/cpu */ 619 lxpr_read_invalid, /* /proc/<pid>/cwd */ 620 lxpr_read_pid_env, /* /proc/<pid>/environ */ 621 lxpr_read_invalid, /* /proc/<pid>/exe */ 622 lxpr_read_pid_limits, /* /proc/<pid>/limits */ 623 lxpr_read_pid_maps, /* /proc/<pid>/maps */ 624 lxpr_read_empty, /* /proc/<pid>/mem */ 625 lxpr_read_pid_mountinfo, /* /proc/<pid>/mountinfo */ 626 lxpr_read_pid_oom_scr_adj, /* /proc/<pid>/oom_score_adj */ 627 lxpr_read_invalid, /* /proc/<pid>/root */ 628 lxpr_read_pid_stat, /* /proc/<pid>/stat */ 629 lxpr_read_pid_statm, /* /proc/<pid>/statm */ 630 lxpr_read_pid_status, /* /proc/<pid>/status */ 631 lxpr_read_isdir, /* /proc/<pid>/task */ 632 lxpr_read_isdir, /* /proc/<pid>/task/nn */ 633 lxpr_read_isdir, /* /proc/<pid>/fd */ 634 lxpr_read_fd, /* /proc/<pid>/fd/nn */ 635 lxpr_read_pid_auxv, /* /proc/<pid>/task/<tid>/auxv */ 636 lxpr_read_pid_cgroup, /* /proc/<pid>/task/<tid>/cgroup */ 637 lxpr_read_pid_cmdline, /* /proc/<pid>/task/<tid>/cmdline */ 638 lxpr_read_pid_comm, /* /proc/<pid>/task/<tid>/comm */ 639 lxpr_read_empty, /* /proc/<pid>/task/<tid>/cpu */ 640 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/cwd */ 641 lxpr_read_pid_env, /* /proc/<pid>/task/<tid>/environ */ 642 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/exe */ 643 lxpr_read_pid_limits, /* /proc/<pid>/task/<tid>/limits */ 644 lxpr_read_pid_maps, /* /proc/<pid>/task/<tid>/maps */ 645 lxpr_read_empty, /* /proc/<pid>/task/<tid>/mem */ 646 lxpr_read_pid_mountinfo, /* /proc/<pid>/task/<tid>/mountinfo */ 647 lxpr_read_pid_oom_scr_adj, /* /proc/<pid>/task/<tid>/oom_scr_adj */ 648 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/root */ 649 lxpr_read_pid_tid_stat, /* /proc/<pid>/task/<tid>/stat */ 650 lxpr_read_pid_statm, /* /proc/<pid>/task/<tid>/statm */ 651 lxpr_read_pid_tid_status, /* /proc/<pid>/task/<tid>/status */ 652 lxpr_read_isdir, /* /proc/<pid>/task/<tid>/fd */ 653 lxpr_read_fd, /* /proc/<pid>/task/<tid>/fd/nn */ 654 lxpr_read_cgroups, /* /proc/cgroups */ 655 lxpr_read_empty, /* /proc/cmdline */ 656 lxpr_read_cpuinfo, /* /proc/cpuinfo */ 657 lxpr_read_empty, /* /proc/devices */ 658 lxpr_read_diskstats, /* /proc/diskstats */ 659 lxpr_read_empty, /* /proc/dma */ 660 lxpr_read_filesystems, /* /proc/filesystems */ 661 lxpr_read_empty, /* /proc/interrupts */ 662 lxpr_read_empty, /* /proc/ioports */ 663 lxpr_read_empty, /* /proc/kcore */ 664 lxpr_read_invalid, /* /proc/kmsg -- see lxpr_read() */ 665 lxpr_read_loadavg, /* /proc/loadavg */ 666 lxpr_read_meminfo, /* /proc/meminfo */ 667 lxpr_read_empty, /* /proc/modules */ 668 lxpr_read_mounts, /* /proc/mounts */ 669 lxpr_read_isdir, /* /proc/net */ 670 lxpr_read_net_arp, /* /proc/net/arp */ 671 lxpr_read_net_dev, /* /proc/net/dev */ 672 lxpr_read_net_dev_mcast, /* /proc/net/dev_mcast */ 673 lxpr_read_net_if_inet6, /* /proc/net/if_inet6 */ 674 lxpr_read_net_igmp, /* /proc/net/igmp */ 675 lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */ 676 lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */ 677 lxpr_read_net_ipv6_route, /* /proc/net/ipv6_route */ 678 lxpr_read_net_mcfilter, /* /proc/net/mcfilter */ 679 lxpr_read_net_netstat, /* /proc/net/netstat */ 680 lxpr_read_net_raw, /* /proc/net/raw */ 681 lxpr_read_net_route, /* /proc/net/route */ 682 lxpr_read_net_rpc, /* /proc/net/rpc */ 683 lxpr_read_net_rt_cache, /* /proc/net/rt_cache */ 684 lxpr_read_net_sockstat, /* /proc/net/sockstat */ 685 lxpr_read_net_snmp, /* /proc/net/snmp */ 686 lxpr_read_net_stat, /* /proc/net/stat */ 687 lxpr_read_net_tcp, /* /proc/net/tcp */ 688 lxpr_read_net_tcp6, /* /proc/net/tcp6 */ 689 lxpr_read_net_udp, /* /proc/net/udp */ 690 lxpr_read_net_udp6, /* /proc/net/udp6 */ 691 lxpr_read_net_unix, /* /proc/net/unix */ 692 lxpr_read_partitions, /* /proc/partitions */ 693 lxpr_read_invalid, /* /proc/self */ 694 lxpr_read_stat, /* /proc/stat */ 695 lxpr_read_swaps, /* /proc/swaps */ 696 lxpr_read_invalid, /* /proc/sys */ 697 lxpr_read_invalid, /* /proc/sys/fs */ 698 lxpr_read_invalid, /* /proc/sys/fs/inotify */ 699 lxpr_read_sys_fs_inotify_max_queued_events, /* max_queued_events */ 700 lxpr_read_sys_fs_inotify_max_user_instances, /* max_user_instances */ 701 lxpr_read_sys_fs_inotify_max_user_watches, /* max_user_watches */ 702 lxpr_read_invalid, /* /proc/sys/kernel */ 703 lxpr_read_sys_kernel_caplcap, /* /proc/sys/kernel/cap_last_cap */ 704 lxpr_read_sys_kernel_corepatt, /* /proc/sys/kernel/core_pattern */ 705 lxpr_read_sys_kernel_hostname, /* /proc/sys/kernel/hostname */ 706 lxpr_read_sys_kernel_msgmni, /* /proc/sys/kernel/msgmni */ 707 lxpr_read_sys_kernel_ngroups_max, /* /proc/sys/kernel/ngroups_max */ 708 lxpr_read_sys_kernel_osrel, /* /proc/sys/kernel/osrelease */ 709 lxpr_read_sys_kernel_pid_max, /* /proc/sys/kernel/pid_max */ 710 lxpr_read_invalid, /* /proc/sys/kernel/random */ 711 lxpr_read_sys_kernel_rand_bootid, /* /proc/sys/kernel/random/boot_id */ 712 lxpr_read_sys_kernel_sem, /* /proc/sys/kernel/sem */ 713 lxpr_read_sys_kernel_shmmax, /* /proc/sys/kernel/shmmax */ 714 lxpr_read_sys_kernel_shmmni, /* /proc/sys/kernel/shmmni */ 715 lxpr_read_sys_kernel_threads_max, /* /proc/sys/kernel/threads-max */ 716 lxpr_read_invalid, /* /proc/sys/net */ 717 lxpr_read_invalid, /* /proc/sys/net/core */ 718 lxpr_read_sys_net_core_somaxc, /* /proc/sys/net/core/somaxconn */ 719 lxpr_read_invalid, /* /proc/sys/vm */ 720 lxpr_read_sys_vm_minfr_kb, /* /proc/sys/vm/min_free_kbytes */ 721 lxpr_read_sys_vm_nhpages, /* /proc/sys/vm/nr_hugepages */ 722 lxpr_read_sys_vm_overcommit_mem, /* /proc/sys/vm/overcommit_memory */ 723 lxpr_read_sys_vm_swappiness, /* /proc/sys/vm/swappiness */ 724 lxpr_read_uptime, /* /proc/uptime */ 725 lxpr_read_version, /* /proc/version */ 726 }; 727 728 /* 729 * Array of lookup functions, indexed by lx /proc file type. 730 */ 731 static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = { 732 lxpr_lookup_procdir, /* /proc */ 733 lxpr_lookup_piddir, /* /proc/<pid> */ 734 lxpr_lookup_not_a_dir, /* /proc/<pid>/auxv */ 735 lxpr_lookup_not_a_dir, /* /proc/<pid>/cgroup */ 736 lxpr_lookup_not_a_dir, /* /proc/<pid>/cmdline */ 737 lxpr_lookup_not_a_dir, /* /proc/<pid>/comm */ 738 lxpr_lookup_not_a_dir, /* /proc/<pid>/cpu */ 739 lxpr_lookup_not_a_dir, /* /proc/<pid>/cwd */ 740 lxpr_lookup_not_a_dir, /* /proc/<pid>/environ */ 741 lxpr_lookup_not_a_dir, /* /proc/<pid>/exe */ 742 lxpr_lookup_not_a_dir, /* /proc/<pid>/limits */ 743 lxpr_lookup_not_a_dir, /* /proc/<pid>/maps */ 744 lxpr_lookup_not_a_dir, /* /proc/<pid>/mem */ 745 lxpr_lookup_not_a_dir, /* /proc/<pid>/mountinfo */ 746 lxpr_lookup_not_a_dir, /* /proc/<pid>/oom_score_adj */ 747 lxpr_lookup_not_a_dir, /* /proc/<pid>/root */ 748 lxpr_lookup_not_a_dir, /* /proc/<pid>/stat */ 749 lxpr_lookup_not_a_dir, /* /proc/<pid>/statm */ 750 lxpr_lookup_not_a_dir, /* /proc/<pid>/status */ 751 lxpr_lookup_taskdir, /* /proc/<pid>/task */ 752 lxpr_lookup_task_tid_dir, /* /proc/<pid>/task/nn */ 753 lxpr_lookup_fddir, /* /proc/<pid>/fd */ 754 lxpr_lookup_not_a_dir, /* /proc/<pid>/fd/nn */ 755 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/auxv */ 756 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cgroup */ 757 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cmdline */ 758 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/comm */ 759 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cpu */ 760 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cwd */ 761 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/environ */ 762 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/exe */ 763 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/limits */ 764 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/maps */ 765 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/mem */ 766 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/mountinfo */ 767 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/oom_scr_adj */ 768 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/root */ 769 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/stat */ 770 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/statm */ 771 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/status */ 772 lxpr_lookup_fddir, /* /proc/<pid>/task/<tid>/fd */ 773 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/fd/nn */ 774 lxpr_lookup_not_a_dir, /* /proc/cgroups */ 775 lxpr_lookup_not_a_dir, /* /proc/cmdline */ 776 lxpr_lookup_not_a_dir, /* /proc/cpuinfo */ 777 lxpr_lookup_not_a_dir, /* /proc/devices */ 778 lxpr_lookup_not_a_dir, /* /proc/diskstats */ 779 lxpr_lookup_not_a_dir, /* /proc/dma */ 780 lxpr_lookup_not_a_dir, /* /proc/filesystems */ 781 lxpr_lookup_not_a_dir, /* /proc/interrupts */ 782 lxpr_lookup_not_a_dir, /* /proc/ioports */ 783 lxpr_lookup_not_a_dir, /* /proc/kcore */ 784 lxpr_lookup_not_a_dir, /* /proc/kmsg */ 785 lxpr_lookup_not_a_dir, /* /proc/loadavg */ 786 lxpr_lookup_not_a_dir, /* /proc/meminfo */ 787 lxpr_lookup_not_a_dir, /* /proc/modules */ 788 lxpr_lookup_not_a_dir, /* /proc/mounts */ 789 lxpr_lookup_netdir, /* /proc/net */ 790 lxpr_lookup_not_a_dir, /* /proc/net/arp */ 791 lxpr_lookup_not_a_dir, /* /proc/net/dev */ 792 lxpr_lookup_not_a_dir, /* /proc/net/dev_mcast */ 793 lxpr_lookup_not_a_dir, /* /proc/net/if_inet6 */ 794 lxpr_lookup_not_a_dir, /* /proc/net/igmp */ 795 lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */ 796 lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */ 797 lxpr_lookup_not_a_dir, /* /proc/net/ipv6_route */ 798 lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */ 799 lxpr_lookup_not_a_dir, /* /proc/net/netstat */ 800 lxpr_lookup_not_a_dir, /* /proc/net/raw */ 801 lxpr_lookup_not_a_dir, /* /proc/net/route */ 802 lxpr_lookup_not_a_dir, /* /proc/net/rpc */ 803 lxpr_lookup_not_a_dir, /* /proc/net/rt_cache */ 804 lxpr_lookup_not_a_dir, /* /proc/net/sockstat */ 805 lxpr_lookup_not_a_dir, /* /proc/net/snmp */ 806 lxpr_lookup_not_a_dir, /* /proc/net/stat */ 807 lxpr_lookup_not_a_dir, /* /proc/net/tcp */ 808 lxpr_lookup_not_a_dir, /* /proc/net/tcp6 */ 809 lxpr_lookup_not_a_dir, /* /proc/net/udp */ 810 lxpr_lookup_not_a_dir, /* /proc/net/udp6 */ 811 lxpr_lookup_not_a_dir, /* /proc/net/unix */ 812 lxpr_lookup_not_a_dir, /* /proc/partitions */ 813 lxpr_lookup_not_a_dir, /* /proc/self */ 814 lxpr_lookup_not_a_dir, /* /proc/stat */ 815 lxpr_lookup_not_a_dir, /* /proc/swaps */ 816 lxpr_lookup_sysdir, /* /proc/sys */ 817 lxpr_lookup_sys_fsdir, /* /proc/sys/fs */ 818 lxpr_lookup_sys_fs_inotifydir, /* /proc/sys/fs/inotify */ 819 lxpr_lookup_not_a_dir, /* .../inotify/max_queued_events */ 820 lxpr_lookup_not_a_dir, /* .../inotify/max_user_instances */ 821 lxpr_lookup_not_a_dir, /* .../inotify/max_user_watches */ 822 lxpr_lookup_sys_kerneldir, /* /proc/sys/kernel */ 823 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/cap_last_cap */ 824 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/core_pattern */ 825 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/hostname */ 826 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/msgmni */ 827 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/ngroups_max */ 828 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/osrelease */ 829 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/pid_max */ 830 lxpr_lookup_sys_kdir_randdir, /* /proc/sys/kernel/random */ 831 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/random/boot_id */ 832 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/sem */ 833 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/shmmax */ 834 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/shmmni */ 835 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/threads-max */ 836 lxpr_lookup_sys_netdir, /* /proc/sys/net */ 837 lxpr_lookup_sys_net_coredir, /* /proc/sys/net/core */ 838 lxpr_lookup_not_a_dir, /* /proc/sys/net/core/somaxconn */ 839 lxpr_lookup_sys_vmdir, /* /proc/sys/vm */ 840 lxpr_lookup_not_a_dir, /* /proc/sys/vm/min_free_kbytes */ 841 lxpr_lookup_not_a_dir, /* /proc/sys/vm/nr_hugepages */ 842 lxpr_lookup_not_a_dir, /* /proc/sys/vm/overcommit_memory */ 843 lxpr_lookup_not_a_dir, /* /proc/sys/vm/swappiness */ 844 lxpr_lookup_not_a_dir, /* /proc/uptime */ 845 lxpr_lookup_not_a_dir, /* /proc/version */ 846 }; 847 848 /* 849 * Array of readdir functions, indexed by /proc file type. 850 */ 851 static int (*lxpr_readdir_function[LXPR_NFILES])() = { 852 lxpr_readdir_procdir, /* /proc */ 853 lxpr_readdir_piddir, /* /proc/<pid> */ 854 lxpr_readdir_not_a_dir, /* /proc/<pid>/auxv */ 855 lxpr_readdir_not_a_dir, /* /proc/<pid>/cgroup */ 856 lxpr_readdir_not_a_dir, /* /proc/<pid>/cmdline */ 857 lxpr_readdir_not_a_dir, /* /proc/<pid>/comm */ 858 lxpr_readdir_not_a_dir, /* /proc/<pid>/cpu */ 859 lxpr_readdir_not_a_dir, /* /proc/<pid>/cwd */ 860 lxpr_readdir_not_a_dir, /* /proc/<pid>/environ */ 861 lxpr_readdir_not_a_dir, /* /proc/<pid>/exe */ 862 lxpr_readdir_not_a_dir, /* /proc/<pid>/limits */ 863 lxpr_readdir_not_a_dir, /* /proc/<pid>/maps */ 864 lxpr_readdir_not_a_dir, /* /proc/<pid>/mem */ 865 lxpr_readdir_not_a_dir, /* /proc/<pid>/mountinfo */ 866 lxpr_readdir_not_a_dir, /* /proc/<pid>/oom_score_adj */ 867 lxpr_readdir_not_a_dir, /* /proc/<pid>/root */ 868 lxpr_readdir_not_a_dir, /* /proc/<pid>/stat */ 869 lxpr_readdir_not_a_dir, /* /proc/<pid>/statm */ 870 lxpr_readdir_not_a_dir, /* /proc/<pid>/status */ 871 lxpr_readdir_taskdir, /* /proc/<pid>/task */ 872 lxpr_readdir_task_tid_dir, /* /proc/<pid>/task/nn */ 873 lxpr_readdir_fddir, /* /proc/<pid>/fd */ 874 lxpr_readdir_not_a_dir, /* /proc/<pid>/fd/nn */ 875 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/auxv */ 876 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cgroup */ 877 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cmdline */ 878 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/comm */ 879 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cpu */ 880 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cwd */ 881 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/environ */ 882 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/exe */ 883 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/limits */ 884 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/maps */ 885 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/mem */ 886 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/mountinfo */ 887 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid/oom_scr_adj */ 888 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/root */ 889 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/stat */ 890 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/statm */ 891 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/status */ 892 lxpr_readdir_fddir, /* /proc/<pid>/task/<tid>/fd */ 893 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/fd/nn */ 894 lxpr_readdir_not_a_dir, /* /proc/cgroups */ 895 lxpr_readdir_not_a_dir, /* /proc/cmdline */ 896 lxpr_readdir_not_a_dir, /* /proc/cpuinfo */ 897 lxpr_readdir_not_a_dir, /* /proc/devices */ 898 lxpr_readdir_not_a_dir, /* /proc/diskstats */ 899 lxpr_readdir_not_a_dir, /* /proc/dma */ 900 lxpr_readdir_not_a_dir, /* /proc/filesystems */ 901 lxpr_readdir_not_a_dir, /* /proc/interrupts */ 902 lxpr_readdir_not_a_dir, /* /proc/ioports */ 903 lxpr_readdir_not_a_dir, /* /proc/kcore */ 904 lxpr_readdir_not_a_dir, /* /proc/kmsg */ 905 lxpr_readdir_not_a_dir, /* /proc/loadavg */ 906 lxpr_readdir_not_a_dir, /* /proc/meminfo */ 907 lxpr_readdir_not_a_dir, /* /proc/modules */ 908 lxpr_readdir_not_a_dir, /* /proc/mounts */ 909 lxpr_readdir_netdir, /* /proc/net */ 910 lxpr_readdir_not_a_dir, /* /proc/net/arp */ 911 lxpr_readdir_not_a_dir, /* /proc/net/dev */ 912 lxpr_readdir_not_a_dir, /* /proc/net/dev_mcast */ 913 lxpr_readdir_not_a_dir, /* /proc/net/if_inet6 */ 914 lxpr_readdir_not_a_dir, /* /proc/net/igmp */ 915 lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */ 916 lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */ 917 lxpr_readdir_not_a_dir, /* /proc/net/ipv6_route */ 918 lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */ 919 lxpr_readdir_not_a_dir, /* /proc/net/netstat */ 920 lxpr_readdir_not_a_dir, /* /proc/net/raw */ 921 lxpr_readdir_not_a_dir, /* /proc/net/route */ 922 lxpr_readdir_not_a_dir, /* /proc/net/rpc */ 923 lxpr_readdir_not_a_dir, /* /proc/net/rt_cache */ 924 lxpr_readdir_not_a_dir, /* /proc/net/sockstat */ 925 lxpr_readdir_not_a_dir, /* /proc/net/snmp */ 926 lxpr_readdir_not_a_dir, /* /proc/net/stat */ 927 lxpr_readdir_not_a_dir, /* /proc/net/tcp */ 928 lxpr_readdir_not_a_dir, /* /proc/net/tcp6 */ 929 lxpr_readdir_not_a_dir, /* /proc/net/udp */ 930 lxpr_readdir_not_a_dir, /* /proc/net/udp6 */ 931 lxpr_readdir_not_a_dir, /* /proc/net/unix */ 932 lxpr_readdir_not_a_dir, /* /proc/partitions */ 933 lxpr_readdir_not_a_dir, /* /proc/self */ 934 lxpr_readdir_not_a_dir, /* /proc/stat */ 935 lxpr_readdir_not_a_dir, /* /proc/swaps */ 936 lxpr_readdir_sysdir, /* /proc/sys */ 937 lxpr_readdir_sys_fsdir, /* /proc/sys/fs */ 938 lxpr_readdir_sys_fs_inotifydir, /* /proc/sys/fs/inotify */ 939 lxpr_readdir_not_a_dir, /* .../inotify/max_queued_events */ 940 lxpr_readdir_not_a_dir, /* .../inotify/max_user_instances */ 941 lxpr_readdir_not_a_dir, /* .../inotify/max_user_watches */ 942 lxpr_readdir_sys_kerneldir, /* /proc/sys/kernel */ 943 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/cap_last_cap */ 944 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/core_pattern */ 945 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/hostname */ 946 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/msgmni */ 947 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/ngroups_max */ 948 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/osrelease */ 949 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/pid_max */ 950 lxpr_readdir_sys_kdir_randdir, /* /proc/sys/kernel/random */ 951 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/random/boot_id */ 952 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/sem */ 953 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/shmmax */ 954 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/shmmni */ 955 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/threads-max */ 956 lxpr_readdir_sys_netdir, /* /proc/sys/net */ 957 lxpr_readdir_sys_net_coredir, /* /proc/sys/net/core */ 958 lxpr_readdir_not_a_dir, /* /proc/sys/net/core/somaxconn */ 959 lxpr_readdir_sys_vmdir, /* /proc/sys/vm */ 960 lxpr_readdir_not_a_dir, /* /proc/sys/vm/min_free_kbytes */ 961 lxpr_readdir_not_a_dir, /* /proc/sys/vm/nr_hugepages */ 962 lxpr_readdir_not_a_dir, /* /proc/sys/vm/overcommit_memory */ 963 lxpr_readdir_not_a_dir, /* /proc/sys/vm/swappiness */ 964 lxpr_readdir_not_a_dir, /* /proc/uptime */ 965 lxpr_readdir_not_a_dir, /* /proc/version */ 966 }; 967 968 969 /* 970 * lxpr_read(): Vnode operation for VOP_READ() 971 * 972 * As the format of all the files that can be read in the lx procfs is human 973 * readable and not binary structures there do not have to be different 974 * read variants depending on whether the reading process model is 32 or 64 bits 975 * (at least in general, and certainly the difference is unlikely to be enough 976 * to justify have different routines for 32 and 64 bit reads 977 */ 978 /* ARGSUSED */ 979 static int 980 lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 981 caller_context_t *ct) 982 { 983 lxpr_node_t *lxpnp = VTOLXP(vp); 984 lxpr_nodetype_t type = lxpnp->lxpr_type; 985 lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop); 986 int error; 987 988 ASSERT(type < LXPR_NFILES); 989 990 if (type == LXPR_KMSG) { 991 ldi_ident_t li = VTOLXPM(vp)->lxprm_li; 992 ldi_handle_t ldih; 993 struct strioctl str; 994 int rv; 995 996 /* 997 * Open the zone's console device using the layered driver 998 * interface. 999 */ 1000 if ((error = 1001 ldi_open_by_name("/dev/log", FREAD, cr, &ldih, li)) != 0) 1002 return (error); 1003 1004 /* 1005 * Send an ioctl to the underlying console device, letting it 1006 * know we're interested in getting console messages. 1007 */ 1008 str.ic_cmd = I_CONSLOG; 1009 str.ic_timout = 0; 1010 str.ic_len = 0; 1011 str.ic_dp = NULL; 1012 if ((error = ldi_ioctl(ldih, I_STR, 1013 (intptr_t)&str, FKIOCTL, cr, &rv)) != 0) 1014 return (error); 1015 1016 lxpr_read_kmsg(lxpnp, uiobuf, ldih); 1017 1018 if ((error = ldi_close(ldih, FREAD, cr)) != 0) 1019 return (error); 1020 } else { 1021 lxpr_read_function[type](lxpnp, uiobuf); 1022 } 1023 1024 error = lxpr_uiobuf_flush(uiobuf); 1025 lxpr_uiobuf_free(uiobuf); 1026 1027 return (error); 1028 } 1029 1030 /* 1031 * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty() 1032 * 1033 * Various special case reads: 1034 * - trying to read a directory 1035 * - invalid file (used to mean a file that should be implemented, 1036 * but isn't yet) 1037 * - empty file 1038 * - wait to be able to read a file that will never have anything to read 1039 */ 1040 /* ARGSUSED */ 1041 static void 1042 lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1043 { 1044 lxpr_uiobuf_seterr(uiobuf, EISDIR); 1045 } 1046 1047 /* ARGSUSED */ 1048 static void 1049 lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1050 { 1051 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1052 } 1053 1054 /* ARGSUSED */ 1055 static void 1056 lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1057 { 1058 } 1059 1060 /* 1061 * lxpr_read_pid_auxv(): read process aux vector 1062 */ 1063 static void 1064 lxpr_read_pid_auxv(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1065 { 1066 proc_t *p; 1067 lx_proc_data_t *pd; 1068 lx_elf_data_t *edp = NULL; 1069 int i, cnt; 1070 1071 ASSERT(lxpnp->lxpr_type == LXPR_PID_AUXV || 1072 lxpnp->lxpr_type == LXPR_PID_TID_AUXV); 1073 1074 p = lxpr_lock(lxpnp->lxpr_pid); 1075 1076 if (p == NULL) { 1077 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1078 return; 1079 } 1080 if ((pd = ptolxproc(p)) == NULL) { 1081 /* Emit a single AT_NULL record for non-branded processes */ 1082 auxv_t buf; 1083 1084 bzero(&buf, sizeof (buf)); 1085 lxpr_unlock(p); 1086 lxpr_uiobuf_write(uiobuf, (char *)&buf, sizeof (buf)); 1087 return; 1088 } else { 1089 edp = &pd->l_elf_data; 1090 } 1091 1092 if (p->p_model == DATAMODEL_NATIVE) { 1093 auxv_t buf[__KERN_NAUXV_IMPL]; 1094 1095 /* 1096 * Because a_type is only of size int (not long), the buffer 1097 * contents must be zeroed first to ensure cleanliness. 1098 */ 1099 bzero(buf, sizeof (buf)); 1100 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) { 1101 if (lx_auxv_stol(&p->p_user.u_auxv[i], 1102 &buf[cnt], edp) == 0) { 1103 cnt++; 1104 } 1105 if (p->p_user.u_auxv[i].a_type == AT_NULL) { 1106 break; 1107 } 1108 } 1109 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0])); 1110 lxpr_unlock(p); 1111 } 1112 #if defined(_SYSCALL32_IMPL) 1113 else { 1114 auxv32_t buf[__KERN_NAUXV_IMPL]; 1115 1116 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) { 1117 auxv_t temp; 1118 1119 if (lx_auxv_stol(&p->p_user.u_auxv[i], 1120 &temp, edp) == 0) { 1121 buf[cnt].a_type = (int)temp.a_type; 1122 buf[cnt].a_un.a_val = (int)temp.a_un.a_val; 1123 cnt++; 1124 } 1125 if (p->p_user.u_auxv[i].a_type == AT_NULL) { 1126 break; 1127 } 1128 } 1129 lxpr_unlock(p); 1130 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0])); 1131 } 1132 #endif /* defined(_SYSCALL32_IMPL) */ 1133 } 1134 1135 /* 1136 * lxpr_read_pid_cgroup(): read cgroups for process 1137 */ 1138 static void 1139 lxpr_read_pid_cgroup(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1140 { 1141 proc_t *p; 1142 1143 ASSERT(lxpnp->lxpr_type == LXPR_PID_CGROUP || 1144 lxpnp->lxpr_type == LXPR_PID_TID_CGROUP); 1145 1146 p = lxpr_lock(lxpnp->lxpr_pid); 1147 if (p == NULL) { 1148 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1149 return; 1150 } 1151 1152 /* basic stub, 3rd field will need to be populated */ 1153 lxpr_uiobuf_printf(uiobuf, "1:name=systemd:/\n"); 1154 1155 lxpr_unlock(p); 1156 } 1157 1158 static void 1159 lxpr_copy_cmdline(proc_t *p, lx_proc_data_t *pd, lxpr_uiobuf_t *uiobuf) 1160 { 1161 uio_t *uiop = uiobuf->uiop; 1162 char *buf = uiobuf->buffer; 1163 int bsz = uiobuf->buffsize; 1164 boolean_t env_overflow = B_FALSE; 1165 uintptr_t pos = pd->l_args_start + uiop->uio_offset; 1166 uintptr_t estart = pd->l_envs_start; 1167 uintptr_t eend = pd->l_envs_end; 1168 size_t chunk, copied; 1169 int err = 0; 1170 1171 /* Do not bother with data beyond the end of the envp strings area. */ 1172 if (pos > eend) { 1173 return; 1174 } 1175 mutex_exit(&p->p_lock); 1176 1177 /* 1178 * If the starting or ending bounds are outside the argv strings area, 1179 * check to see if the process has overwritten the terminating NULL. 1180 * If not, no data needs to be copied from oustide the argv area. 1181 */ 1182 if (pos >= estart || (pos + uiop->uio_resid) >= estart) { 1183 uint8_t term; 1184 if (uread(p, &term, sizeof (term), estart - 1) != 0) { 1185 err = EFAULT; 1186 } else if (term != 0) { 1187 env_overflow = B_TRUE; 1188 } 1189 } 1190 1191 1192 /* Data between astart and estart-1 can be copied freely. */ 1193 while (pos < estart && uiop->uio_resid > 0 && err == 0) { 1194 chunk = MIN(estart - pos, uiop->uio_resid); 1195 chunk = MIN(chunk, bsz); 1196 1197 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) != 0 || 1198 copied != chunk) { 1199 err = EFAULT; 1200 break; 1201 } 1202 err = uiomove(buf, copied, UIO_READ, uiop); 1203 pos += copied; 1204 } 1205 1206 /* 1207 * Onward from estart, data is copied as a contiguous string. To 1208 * protect env data from potential snooping, only one buffer-sized copy 1209 * is allowed to avoid complex seek logic. 1210 */ 1211 if (err == 0 && env_overflow && pos == estart && uiop->uio_resid > 0) { 1212 chunk = MIN(eend - pos, uiop->uio_resid); 1213 chunk = MIN(chunk, bsz); 1214 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) == 0) { 1215 int len = strnlen(buf, copied); 1216 if (len > 0) { 1217 err = uiomove(buf, len, UIO_READ, uiop); 1218 } 1219 } 1220 } 1221 1222 uiobuf->error = err; 1223 /* reset any uiobuf state */ 1224 uiobuf->pos = uiobuf->buffer; 1225 uiobuf->beg = 0; 1226 1227 mutex_enter(&p->p_lock); 1228 } 1229 1230 /* 1231 * lxpr_read_pid_cmdline(): read argument vector from process 1232 */ 1233 static void 1234 lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1235 { 1236 proc_t *p; 1237 char *buf; 1238 size_t asz = lxpr_maxargvlen, sz; 1239 lx_proc_data_t *pd; 1240 1241 ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE || 1242 lxpnp->lxpr_type == LXPR_PID_TID_CMDLINE); 1243 1244 buf = kmem_alloc(asz, KM_SLEEP); 1245 1246 p = lxpr_lock(lxpnp->lxpr_pid); 1247 if (p == NULL) { 1248 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1249 kmem_free(buf, asz); 1250 return; 1251 } 1252 1253 if ((pd = ptolxproc(p)) != NULL && pd->l_args_start != 0 && 1254 pd->l_envs_start != 0 && pd->l_envs_end != 0) { 1255 /* Use Linux-style argv bounds if possible. */ 1256 lxpr_copy_cmdline(p, pd, uiobuf); 1257 } else { 1258 if (prreadargv(p, buf, asz, &sz) != 0) { 1259 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1260 } else { 1261 lxpr_uiobuf_write(uiobuf, buf, sz); 1262 } 1263 } 1264 1265 lxpr_unlock(p); 1266 kmem_free(buf, asz); 1267 } 1268 1269 /* 1270 * lxpr_read_pid_comm(): read command from process 1271 */ 1272 static void 1273 lxpr_read_pid_comm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1274 { 1275 proc_t *p; 1276 1277 VERIFY(lxpnp->lxpr_type == LXPR_PID_COMM || 1278 lxpnp->lxpr_type == LXPR_PID_TID_COMM); 1279 1280 /* 1281 * Because prctl(PR_SET_NAME) does not set custom names for threads 1282 * (vs processes), there is no need for special handling here. 1283 */ 1284 if ((p = lxpr_lock(lxpnp->lxpr_pid)) == NULL) { 1285 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1286 return; 1287 } 1288 lxpr_uiobuf_printf(uiobuf, "%s\n", p->p_user.u_comm); 1289 lxpr_unlock(p); 1290 } 1291 1292 /* 1293 * lxpr_read_pid_env(): read env vector from process 1294 */ 1295 static void 1296 lxpr_read_pid_env(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1297 { 1298 proc_t *p; 1299 char *buf; 1300 size_t asz = lxpr_maxenvvlen, sz; 1301 int r; 1302 1303 ASSERT(lxpnp->lxpr_type == LXPR_PID_ENV); 1304 1305 buf = kmem_alloc(asz, KM_SLEEP); 1306 1307 p = lxpr_lock(lxpnp->lxpr_pid); 1308 if (p == NULL) { 1309 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1310 kmem_free(buf, asz); 1311 return; 1312 } 1313 1314 r = prreadenvv(p, buf, asz, &sz); 1315 lxpr_unlock(p); 1316 1317 if (r != 0) { 1318 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1319 } else { 1320 lxpr_uiobuf_write(uiobuf, buf, sz); 1321 } 1322 1323 kmem_free(buf, asz); 1324 } 1325 1326 /* 1327 * lxpr_read_pid_limits(): ulimit file 1328 */ 1329 static void 1330 lxpr_read_pid_limits(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1331 { 1332 proc_t *p; 1333 rctl_qty_t cur, max; 1334 rctl_val_t *oval, *nval; 1335 rctl_hndl_t hndl; 1336 char *kname; 1337 int i; 1338 1339 ASSERT(lxpnp->lxpr_type == LXPR_PID_LIMITS || 1340 lxpnp->lxpr_type == LXPR_PID_TID_LIMITS); 1341 1342 nval = kmem_alloc(sizeof (rctl_val_t), KM_SLEEP); 1343 1344 p = lxpr_lock(lxpnp->lxpr_pid); 1345 if (p == NULL) { 1346 kmem_free(nval, sizeof (rctl_val_t)); 1347 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1348 return; 1349 } 1350 1351 lxpr_uiobuf_printf(uiobuf, "%-25s %-20s %-20s %-10s\n", 1352 "Limit", "Soft Limit", "Hard Limit", "Units"); 1353 for (i = 0; lxpr_rlimtab[i].rlim_name != NULL; i++) { 1354 kname = lxpr_rlimtab[i].rlim_rctl; 1355 /* default to unlimited for resources without an analog */ 1356 cur = RLIM_INFINITY; 1357 max = RLIM_INFINITY; 1358 if (kname != NULL) { 1359 hndl = rctl_hndl_lookup(kname); 1360 oval = NULL; 1361 while ((hndl != -1) && 1362 rctl_local_get(hndl, oval, nval, p) == 0) { 1363 oval = nval; 1364 switch (nval->rcv_privilege) { 1365 case RCPRIV_BASIC: 1366 if (!RCTL_INFINITE(nval)) 1367 cur = nval->rcv_value; 1368 break; 1369 case RCPRIV_PRIVILEGED: 1370 if (!RCTL_INFINITE(nval)) 1371 max = nval->rcv_value; 1372 break; 1373 } 1374 } 1375 } 1376 1377 lxpr_uiobuf_printf(uiobuf, "%-25s", lxpr_rlimtab[i].rlim_name); 1378 if (cur == RLIM_INFINITY || cur == LX_RLIM_INFINITY) { 1379 lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited"); 1380 } else { 1381 lxpr_uiobuf_printf(uiobuf, " %-20lu", cur); 1382 } 1383 if (max == RLIM_INFINITY || max == LX_RLIM_INFINITY) { 1384 lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited"); 1385 } else { 1386 lxpr_uiobuf_printf(uiobuf, " %-20lu", max); 1387 } 1388 lxpr_uiobuf_printf(uiobuf, " %-10s\n", 1389 lxpr_rlimtab[i].rlim_unit); 1390 } 1391 1392 lxpr_unlock(p); 1393 kmem_free(nval, sizeof (rctl_val_t)); 1394 } 1395 1396 /* 1397 * lxpr_read_pid_maps(): memory map file 1398 */ 1399 static void 1400 lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1401 { 1402 proc_t *p; 1403 struct as *as; 1404 struct seg *seg; 1405 char *buf; 1406 int buflen = MAXPATHLEN; 1407 struct print_data { 1408 uintptr_t saddr; 1409 uintptr_t eaddr; 1410 int type; 1411 char prot[5]; 1412 uintptr_t offset; 1413 vnode_t *vp; 1414 struct print_data *next; 1415 } *print_head = NULL; 1416 struct print_data **print_tail = &print_head; 1417 struct print_data *pbuf; 1418 1419 ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS || 1420 lxpnp->lxpr_type == LXPR_PID_TID_MAPS); 1421 1422 p = lxpr_lock(lxpnp->lxpr_pid); 1423 if (p == NULL) { 1424 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1425 return; 1426 } 1427 1428 as = p->p_as; 1429 1430 if (as == &kas) { 1431 lxpr_unlock(p); 1432 return; 1433 } 1434 1435 mutex_exit(&p->p_lock); 1436 1437 /* Iterate over all segments in the address space */ 1438 AS_LOCK_ENTER(as, RW_READER); 1439 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 1440 vnode_t *vp; 1441 uint_t protbits; 1442 1443 pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP); 1444 1445 pbuf->saddr = (uintptr_t)seg->s_base; 1446 pbuf->eaddr = pbuf->saddr + seg->s_size; 1447 pbuf->type = SEGOP_GETTYPE(seg, seg->s_base); 1448 1449 /* 1450 * Cheat and only use the protection bits of the first page 1451 * in the segment 1452 */ 1453 (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot)); 1454 (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits); 1455 1456 if (protbits & PROT_READ) pbuf->prot[0] = 'r'; 1457 if (protbits & PROT_WRITE) pbuf->prot[1] = 'w'; 1458 if (protbits & PROT_EXEC) pbuf->prot[2] = 'x'; 1459 if (pbuf->type & MAP_SHARED) pbuf->prot[3] = 's'; 1460 else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p'; 1461 1462 if (seg->s_ops == &segvn_ops && 1463 SEGOP_GETVP(seg, seg->s_base, &vp) == 0 && 1464 vp != NULL && vp->v_type == VREG) { 1465 VN_HOLD(vp); 1466 pbuf->vp = vp; 1467 } else { 1468 pbuf->vp = NULL; 1469 } 1470 1471 pbuf->offset = SEGOP_GETOFFSET(seg, (caddr_t)pbuf->saddr); 1472 1473 pbuf->next = NULL; 1474 *print_tail = pbuf; 1475 print_tail = &pbuf->next; 1476 } 1477 AS_LOCK_EXIT(as); 1478 mutex_enter(&p->p_lock); 1479 lxpr_unlock(p); 1480 1481 buf = kmem_alloc(buflen, KM_SLEEP); 1482 1483 /* print the data we've extracted */ 1484 pbuf = print_head; 1485 while (pbuf != NULL) { 1486 struct print_data *pbuf_next; 1487 vattr_t vattr; 1488 1489 int maj = 0; 1490 int min = 0; 1491 ino_t inode = 0; 1492 1493 *buf = '\0'; 1494 if (pbuf->vp != NULL) { 1495 vattr.va_mask = AT_FSID | AT_NODEID; 1496 if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(), 1497 NULL) == 0) { 1498 maj = getmajor(vattr.va_fsid); 1499 min = getminor(vattr.va_fsid); 1500 inode = vattr.va_nodeid; 1501 } 1502 (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED()); 1503 VN_RELE(pbuf->vp); 1504 } 1505 1506 if (p->p_model == DATAMODEL_LP64) { 1507 lxpr_uiobuf_printf(uiobuf, 1508 "%08llx-%08llx %s %08llx %02x:%02x %llu%s%s\n", 1509 pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset, 1510 maj, min, inode, *buf != '\0' ? " " : "", buf); 1511 } else { 1512 lxpr_uiobuf_printf(uiobuf, 1513 "%08x-%08x %s %08x %02x:%02x %llu%s%s\n", 1514 (uint32_t)pbuf->saddr, (uint32_t)pbuf->eaddr, 1515 pbuf->prot, (uint32_t)pbuf->offset, maj, min, 1516 inode, *buf != '\0' ? " " : "", buf); 1517 } 1518 1519 pbuf_next = pbuf->next; 1520 kmem_free(pbuf, sizeof (*pbuf)); 1521 pbuf = pbuf_next; 1522 } 1523 1524 kmem_free(buf, buflen); 1525 } 1526 1527 /* 1528 * lxpr_read_pid_mountinfo(): information about process mount points. e.g.: 1529 * 14 19 0:13 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw 1530 * mntid parid devnums root mntpnt mntopts - fstype mntsrc superopts 1531 * 1532 * We have to make up several of these fields. 1533 */ 1534 static void 1535 lxpr_read_pid_mountinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1536 { 1537 struct vfs *vfsp; 1538 struct vfs *vfslist; 1539 zone_t *zone = LXPTOZ(lxpnp); 1540 struct print_data { 1541 refstr_t *vfs_mntpt; 1542 refstr_t *vfs_resource; 1543 uint_t vfs_flag; 1544 int vfs_fstype; 1545 dev_t vfs_dev; 1546 struct print_data *next; 1547 } *print_head = NULL; 1548 struct print_data **print_tail = &print_head; 1549 struct print_data *printp; 1550 int root_id = 15; /* use a made-up value */ 1551 int mnt_id; 1552 1553 ASSERT(lxpnp->lxpr_type == LXPR_PID_MOUNTINFO || 1554 lxpnp->lxpr_type == LXPR_PID_TID_MOUNTINFO); 1555 1556 vfs_list_read_lock(); 1557 1558 /* root is the top-level, it does not appear in this output */ 1559 if (zone == global_zone) { 1560 vfsp = vfslist = rootvfs; 1561 } else { 1562 vfsp = vfslist = zone->zone_vfslist; 1563 /* 1564 * If the zone has a root entry, it will be the first in 1565 * the list. If it doesn't, we conjure one up. 1566 */ 1567 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt), 1568 zone->zone_rootpath) != 0) { 1569 struct vfs *tvfsp; 1570 /* 1571 * The root of the zone is not a mount point. The vfs 1572 * we want to report is that of the zone's root vnode. 1573 */ 1574 tvfsp = zone->zone_rootvp->v_vfsp; 1575 1576 lxpr_uiobuf_printf(uiobuf, 1577 "%d 1 %d:%d / / %s - %s / %s\n", 1578 root_id, 1579 major(tvfsp->vfs_dev), minor(vfsp->vfs_dev), 1580 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw", 1581 vfssw[tvfsp->vfs_fstype].vsw_name, 1582 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); 1583 1584 } 1585 if (vfslist == NULL) { 1586 vfs_list_unlock(); 1587 return; 1588 } 1589 } 1590 1591 /* 1592 * Later on we have to do a lookupname, which can end up causing 1593 * another vfs_list_read_lock() to be called. Which can lead to a 1594 * deadlock. To avoid this, we extract the data we need into a local 1595 * list, then we can run this list without holding vfs_list_read_lock() 1596 * We keep the list in the same order as the vfs_list 1597 */ 1598 do { 1599 /* Skip mounts we shouldn't show */ 1600 if (vfsp->vfs_flag & VFS_NOMNTTAB) { 1601 goto nextfs; 1602 } 1603 1604 printp = kmem_alloc(sizeof (*printp), KM_SLEEP); 1605 refstr_hold(vfsp->vfs_mntpt); 1606 printp->vfs_mntpt = vfsp->vfs_mntpt; 1607 refstr_hold(vfsp->vfs_resource); 1608 printp->vfs_resource = vfsp->vfs_resource; 1609 printp->vfs_flag = vfsp->vfs_flag; 1610 printp->vfs_fstype = vfsp->vfs_fstype; 1611 printp->vfs_dev = vfsp->vfs_dev; 1612 printp->next = NULL; 1613 1614 *print_tail = printp; 1615 print_tail = &printp->next; 1616 1617 nextfs: 1618 vfsp = (zone == global_zone) ? 1619 vfsp->vfs_next : vfsp->vfs_zone_next; 1620 1621 } while (vfsp != vfslist); 1622 1623 vfs_list_unlock(); 1624 1625 mnt_id = root_id + 1; 1626 1627 /* 1628 * now we can run through what we've extracted without holding 1629 * vfs_list_read_lock() 1630 */ 1631 printp = print_head; 1632 while (printp != NULL) { 1633 struct print_data *printp_next; 1634 const char *resource; 1635 char *mntpt; 1636 struct vnode *vp; 1637 int error; 1638 1639 mntpt = (char *)refstr_value(printp->vfs_mntpt); 1640 resource = refstr_value(printp->vfs_resource); 1641 1642 if (mntpt != NULL && mntpt[0] != '\0') 1643 mntpt = ZONE_PATH_TRANSLATE(mntpt, zone); 1644 else 1645 mntpt = "-"; 1646 1647 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 1648 1649 if (error != 0) 1650 goto nextp; 1651 1652 if (!(vp->v_flag & VROOT)) { 1653 VN_RELE(vp); 1654 goto nextp; 1655 } 1656 VN_RELE(vp); 1657 1658 if (resource != NULL && resource[0] != '\0') { 1659 if (resource[0] == '/') { 1660 resource = ZONE_PATH_VISIBLE(resource, zone) ? 1661 ZONE_PATH_TRANSLATE(resource, zone) : mntpt; 1662 } 1663 } else { 1664 resource = "none"; 1665 } 1666 1667 /* 1668 * XXX parent ID is not tracked correctly here. Currently we 1669 * always assume the parent ID is the root ID. 1670 */ 1671 lxpr_uiobuf_printf(uiobuf, 1672 "%d %d %d:%d / %s %s - %s %s %s\n", 1673 mnt_id, root_id, 1674 major(printp->vfs_dev), minor(printp->vfs_dev), 1675 mntpt, 1676 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw", 1677 vfssw[printp->vfs_fstype].vsw_name, 1678 resource, 1679 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); 1680 1681 nextp: 1682 printp_next = printp->next; 1683 refstr_rele(printp->vfs_mntpt); 1684 refstr_rele(printp->vfs_resource); 1685 kmem_free(printp, sizeof (*printp)); 1686 printp = printp_next; 1687 1688 mnt_id++; 1689 } 1690 } 1691 1692 /* 1693 * lxpr_read_pid_oom_scr_adj(): read oom_score_adj for process 1694 */ 1695 static void 1696 lxpr_read_pid_oom_scr_adj(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1697 { 1698 proc_t *p; 1699 1700 ASSERT(lxpnp->lxpr_type == LXPR_PID_OOM_SCR_ADJ || 1701 lxpnp->lxpr_type == LXPR_PID_TID_OOM_SCR_ADJ); 1702 1703 p = lxpr_lock(lxpnp->lxpr_pid); 1704 if (p == NULL) { 1705 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1706 return; 1707 } 1708 1709 /* always 0 */ 1710 lxpr_uiobuf_printf(uiobuf, "0\n"); 1711 1712 lxpr_unlock(p); 1713 } 1714 1715 1716 /* 1717 * lxpr_read_pid_statm(): memory status file 1718 */ 1719 static void 1720 lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1721 { 1722 proc_t *p; 1723 struct as *as; 1724 size_t vsize; 1725 size_t rss; 1726 1727 ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM || 1728 lxpnp->lxpr_type == LXPR_PID_TID_STATM); 1729 1730 p = lxpr_lock(lxpnp->lxpr_pid); 1731 if (p == NULL) { 1732 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1733 return; 1734 } 1735 1736 as = p->p_as; 1737 1738 mutex_exit(&p->p_lock); 1739 1740 AS_LOCK_ENTER(as, RW_READER); 1741 vsize = btopr(as->a_resvsize); 1742 rss = rm_asrss(as); 1743 AS_LOCK_EXIT(as); 1744 1745 mutex_enter(&p->p_lock); 1746 lxpr_unlock(p); 1747 1748 lxpr_uiobuf_printf(uiobuf, 1749 "%lu %lu %lu %lu %lu %lu %lu\n", 1750 vsize, rss, 0l, rss, 0l, 0l, 0l); 1751 } 1752 1753 /* 1754 * Look for either the main thread (lookup_id is 0) or the specified thread. 1755 * If we're looking for the main thread but the proc does not have one, we 1756 * fallback to using prchoose to get any thread available. 1757 */ 1758 static kthread_t * 1759 lxpr_get_thread(proc_t *p, uint_t lookup_id) 1760 { 1761 kthread_t *t; 1762 uint_t emul_tid; 1763 lx_lwp_data_t *lwpd; 1764 pid_t pid = p->p_pid; 1765 pid_t init_pid = curproc->p_zone->zone_proc_initpid; 1766 boolean_t branded = (p->p_brand == &lx_brand); 1767 1768 /* get specified thread */ 1769 if ((t = p->p_tlist) == NULL) 1770 return (NULL); 1771 1772 do { 1773 if (lookup_id == 0 && t->t_tid == 1) { 1774 thread_lock(t); 1775 return (t); 1776 } 1777 1778 lwpd = ttolxlwp(t); 1779 if (branded && lwpd != NULL) { 1780 if (pid == init_pid && lookup_id == 1) { 1781 emul_tid = t->t_tid; 1782 } else { 1783 emul_tid = lwpd->br_pid; 1784 } 1785 } else { 1786 /* 1787 * Make only the first (assumed to be main) thread 1788 * visible for non-branded processes. 1789 */ 1790 emul_tid = p->p_pid; 1791 } 1792 if (emul_tid == lookup_id) { 1793 thread_lock(t); 1794 return (t); 1795 } 1796 } while ((t = t->t_forw) != p->p_tlist); 1797 1798 if (lookup_id == 0) 1799 return (prchoose(p)); 1800 return (NULL); 1801 } 1802 1803 /* 1804 * Lookup the real pid for procs 0 or 1. 1805 */ 1806 static pid_t 1807 get_real_pid(pid_t p) 1808 { 1809 pid_t find_pid; 1810 1811 if (p == 1) { 1812 find_pid = curproc->p_zone->zone_proc_initpid; 1813 } else if (p == 0) { 1814 find_pid = curproc->p_zone->zone_zsched->p_pid; 1815 } else { 1816 find_pid = p; 1817 } 1818 1819 return (find_pid); 1820 } 1821 1822 /* 1823 * pid/tid common code to read status file 1824 */ 1825 static void 1826 lxpr_read_status_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf, 1827 uint_t lookup_id) 1828 { 1829 proc_t *p; 1830 kthread_t *t; 1831 user_t *up; 1832 cred_t *cr; 1833 const gid_t *groups; 1834 int ngroups; 1835 struct as *as; 1836 char *status; 1837 pid_t pid, ppid; 1838 k_sigset_t current, ignore, handle; 1839 int i, lx_sig; 1840 pid_t real_pid; 1841 1842 real_pid = get_real_pid(lxpnp->lxpr_pid); 1843 p = lxpr_lock(real_pid); 1844 if (p == NULL) { 1845 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1846 return; 1847 } 1848 1849 pid = p->p_pid; 1850 1851 /* 1852 * Convert pid to the Linux default of 1 if we're the zone's init 1853 * process or if we're the zone's zsched the pid is 0. 1854 */ 1855 if (pid == curproc->p_zone->zone_proc_initpid) { 1856 pid = 1; 1857 ppid = 0; /* parent pid for init is 0 */ 1858 } else if (pid == curproc->p_zone->zone_zsched->p_pid) { 1859 pid = 0; /* zsched is pid 0 */ 1860 ppid = 0; /* parent pid for zsched is itself */ 1861 } else { 1862 /* 1863 * Make sure not to reference parent PIDs that reside outside 1864 * the zone 1865 */ 1866 ppid = ((p->p_flag & SZONETOP) 1867 ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid); 1868 1869 /* 1870 * Convert ppid to the Linux default of 1 if our parent is the 1871 * zone's init process 1872 */ 1873 if (ppid == curproc->p_zone->zone_proc_initpid) 1874 ppid = 1; 1875 } 1876 1877 t = lxpr_get_thread(p, lookup_id); 1878 if (t != NULL) { 1879 switch (t->t_state) { 1880 case TS_SLEEP: 1881 status = "S (sleeping)"; 1882 break; 1883 case TS_RUN: 1884 case TS_ONPROC: 1885 status = "R (running)"; 1886 break; 1887 case TS_ZOMB: 1888 status = "Z (zombie)"; 1889 break; 1890 case TS_STOPPED: 1891 status = "T (stopped)"; 1892 break; 1893 default: 1894 status = "! (unknown)"; 1895 break; 1896 } 1897 thread_unlock(t); 1898 } else { 1899 if (lookup_id != 0) { 1900 /* we can't find this specific thread */ 1901 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1902 lxpr_unlock(p); 1903 return; 1904 } 1905 1906 /* 1907 * there is a hole in the exit code, where a proc can have 1908 * no threads but it is yet to be flagged SZOMB. We will 1909 * assume we are about to become a zombie 1910 */ 1911 status = "Z (zombie)"; 1912 } 1913 1914 up = PTOU(p); 1915 mutex_enter(&p->p_crlock); 1916 crhold(cr = p->p_cred); 1917 mutex_exit(&p->p_crlock); 1918 1919 lxpr_uiobuf_printf(uiobuf, 1920 "Name:\t%s\n" 1921 "State:\t%s\n" 1922 "Tgid:\t%d\n" 1923 "Pid:\t%d\n" 1924 "PPid:\t%d\n" 1925 "TracerPid:\t%d\n" 1926 "Uid:\t%u\t%u\t%u\t%u\n" 1927 "Gid:\t%u\t%u\t%u\t%u\n" 1928 "FDSize:\t%d\n" 1929 "Groups:\t", 1930 up->u_comm, 1931 status, 1932 pid, /* thread group id - same as pid */ 1933 (lookup_id == 0) ? pid : lxpnp->lxpr_desc, 1934 ppid, 1935 0, 1936 crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr), 1937 crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr), 1938 p->p_fno_ctl); 1939 1940 1941 ngroups = crgetngroups(cr); 1942 groups = crgetgroups(cr); 1943 for (i = 0; i < ngroups; i++) { 1944 lxpr_uiobuf_printf(uiobuf, 1945 "%u ", 1946 groups[i]); 1947 } 1948 crfree(cr); 1949 1950 as = p->p_as; 1951 if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) { 1952 size_t vsize, nlocked, rss; 1953 1954 mutex_exit(&p->p_lock); 1955 AS_LOCK_ENTER(as, RW_READER); 1956 vsize = as->a_resvsize; 1957 rss = rm_asrss(as); 1958 AS_LOCK_EXIT(as); 1959 mutex_enter(&p->p_lock); 1960 nlocked = p->p_locked_mem; 1961 1962 lxpr_uiobuf_printf(uiobuf, 1963 "\n" 1964 "VmSize:\t%8lu kB\n" 1965 "VmLck:\t%8lu kB\n" 1966 "VmRSS:\t%8lu kB\n" 1967 "VmData:\t%8lu kB\n" 1968 "VmStk:\t%8lu kB\n" 1969 "VmExe:\t%8lu kB\n" 1970 "VmLib:\t%8lu kB", 1971 btok(vsize), 1972 btok(nlocked), 1973 ptok(rss), 1974 0l, 1975 btok(p->p_stksize), 1976 ptok(rss), 1977 0l); 1978 } 1979 1980 lxpr_uiobuf_printf(uiobuf, "\nThreads:\t%u", p->p_lwpcnt); 1981 1982 sigemptyset(¤t); 1983 sigemptyset(&ignore); 1984 sigemptyset(&handle); 1985 1986 for (i = 1; i < NSIG; i++) { 1987 lx_sig = stol_signo[i]; 1988 1989 if ((lx_sig > 0) && (lx_sig <= LX_NSIG)) { 1990 if (sigismember(&p->p_sig, i)) 1991 sigaddset(¤t, lx_sig); 1992 1993 if (up->u_signal[i - 1] == SIG_IGN) 1994 sigaddset(&ignore, lx_sig); 1995 else if (up->u_signal[i - 1] != SIG_DFL) 1996 sigaddset(&handle, lx_sig); 1997 } 1998 } 1999 2000 lxpr_uiobuf_printf(uiobuf, 2001 "\n" 2002 "SigPnd:\t%08x%08x\n" 2003 "SigBlk:\t%08x%08x\n" 2004 "SigIgn:\t%08x%08x\n" 2005 "SigCgt:\t%08x%08x\n" 2006 "CapInh:\t%016x\n" 2007 "CapPrm:\t%016x\n" 2008 "CapEff:\t%016x\n", 2009 current.__sigbits[1], current.__sigbits[0], 2010 0, 0, /* signals blocked on per thread basis */ 2011 ignore.__sigbits[1], ignore.__sigbits[0], 2012 handle.__sigbits[1], handle.__sigbits[0], 2013 /* Can't do anything with linux capabilities */ 2014 0, 2015 0, 2016 0); 2017 2018 lxpr_uiobuf_printf(uiobuf, 2019 "CapBnd:\t%016llx\n", 2020 /* We report the full capability bounding set */ 2021 0x1fffffffffLL); 2022 2023 lxpr_unlock(p); 2024 } 2025 2026 /* 2027 * lxpr_read_pid_status(): status file 2028 */ 2029 static void 2030 lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2031 { 2032 ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS); 2033 2034 lxpr_read_status_common(lxpnp, uiobuf, 0); 2035 } 2036 2037 /* 2038 * lxpr_read_pid_tid_status(): status file 2039 */ 2040 static void 2041 lxpr_read_pid_tid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2042 { 2043 ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STATUS); 2044 lxpr_read_status_common(lxpnp, uiobuf, lxpnp->lxpr_desc); 2045 } 2046 2047 /* 2048 * pid/tid common code to read stat file 2049 */ 2050 static void 2051 lxpr_read_stat_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf, 2052 uint_t lookup_id) 2053 { 2054 proc_t *p; 2055 kthread_t *t; 2056 struct as *as; 2057 char stat; 2058 pid_t pid, ppid, pgpid, spid; 2059 gid_t psgid; 2060 dev_t psdev; 2061 size_t rss, vsize; 2062 int nice, pri; 2063 caddr_t wchan; 2064 processorid_t cpu; 2065 pid_t real_pid; 2066 2067 real_pid = get_real_pid(lxpnp->lxpr_pid); 2068 p = lxpr_lock(real_pid); 2069 if (p == NULL) { 2070 lxpr_uiobuf_seterr(uiobuf, EINVAL); 2071 return; 2072 } 2073 2074 pid = p->p_pid; 2075 2076 /* 2077 * Set Linux defaults if we're the zone's init process 2078 */ 2079 if (pid == curproc->p_zone->zone_proc_initpid) { 2080 pid = 1; /* PID for init */ 2081 ppid = 0; /* parent PID for init is 0 */ 2082 pgpid = 0; /* process group for init is 0 */ 2083 psgid = (gid_t)-1; /* credential GID for init is -1 */ 2084 spid = 0; /* session id for init is 0 */ 2085 psdev = 0; /* session device for init is 0 */ 2086 } else if (pid == curproc->p_zone->zone_zsched->p_pid) { 2087 pid = 0; /* PID for zsched */ 2088 ppid = 0; /* parent PID for zsched is 0 */ 2089 pgpid = 0; /* process group for zsched is 0 */ 2090 psgid = (gid_t)-1; /* credential GID for zsched is -1 */ 2091 spid = 0; /* session id for zsched is 0 */ 2092 psdev = 0; /* session device for zsched is 0 */ 2093 } else { 2094 /* 2095 * Make sure not to reference parent PIDs that reside outside 2096 * the zone 2097 */ 2098 ppid = ((p->p_flag & SZONETOP) ? 2099 curproc->p_zone->zone_zsched->p_pid : p->p_ppid); 2100 2101 /* 2102 * Convert ppid to the Linux default of 1 if our parent is the 2103 * zone's init process 2104 */ 2105 if (ppid == curproc->p_zone->zone_proc_initpid) 2106 ppid = 1; 2107 2108 pgpid = p->p_pgrp; 2109 2110 mutex_enter(&p->p_splock); 2111 mutex_enter(&p->p_sessp->s_lock); 2112 spid = p->p_sessp->s_sid; 2113 psdev = p->p_sessp->s_dev; 2114 if (p->p_sessp->s_cred) 2115 psgid = crgetgid(p->p_sessp->s_cred); 2116 else 2117 psgid = crgetgid(p->p_cred); 2118 2119 mutex_exit(&p->p_sessp->s_lock); 2120 mutex_exit(&p->p_splock); 2121 } 2122 2123 t = lxpr_get_thread(p, lookup_id); 2124 if (t != NULL) { 2125 switch (t->t_state) { 2126 case TS_SLEEP: 2127 stat = 'S'; break; 2128 case TS_RUN: 2129 case TS_ONPROC: 2130 stat = 'R'; break; 2131 case TS_ZOMB: 2132 stat = 'Z'; break; 2133 case TS_STOPPED: 2134 stat = 'T'; break; 2135 default: 2136 stat = '!'; break; 2137 } 2138 2139 if (CL_DONICE(t, NULL, 0, &nice) != 0) 2140 nice = 0; 2141 2142 pri = t->t_pri; 2143 wchan = t->t_wchan; 2144 cpu = t->t_cpu->cpu_id; 2145 thread_unlock(t); 2146 } else { 2147 if (lookup_id != 0) { 2148 /* we can't find this specific thread */ 2149 lxpr_uiobuf_seterr(uiobuf, EINVAL); 2150 lxpr_unlock(p); 2151 return; 2152 } 2153 2154 /* Only zombies have no threads */ 2155 stat = 'Z'; 2156 nice = 0; 2157 pri = 0; 2158 wchan = 0; 2159 cpu = 0; 2160 } 2161 as = p->p_as; 2162 mutex_exit(&p->p_lock); 2163 AS_LOCK_ENTER(as, RW_READER); 2164 vsize = as->a_resvsize; 2165 rss = rm_asrss(as); 2166 AS_LOCK_EXIT(as); 2167 mutex_enter(&p->p_lock); 2168 2169 lxpr_uiobuf_printf(uiobuf, 2170 "%d (%s) %c %d %d %d %d %d " 2171 "%lu %lu %lu %lu %lu " 2172 "%lu %lu %ld %ld " 2173 "%d %d %d " 2174 "%lu " 2175 "%lu " 2176 "%lu %ld %llu " 2177 "%lu %lu %u " 2178 "%lu %lu " 2179 "%lu %lu %lu %lu " 2180 "%lu " 2181 "%lu %lu " 2182 "%d " 2183 "%d" 2184 "\n", 2185 (lookup_id == 0) ? pid : lxpnp->lxpr_desc, 2186 PTOU(p)->u_comm, stat, ppid, pgpid, spid, psdev, psgid, 2187 0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */ 2188 p->p_utime, p->p_stime, p->p_cutime, p->p_cstime, 2189 pri, nice, p->p_lwpcnt, 2190 0l, /* itrealvalue (time before next SIGALRM) */ 2191 PTOU(p)->u_ticks, 2192 vsize, rss, p->p_vmem_ctl, 2193 0l, 0l, USRSTACK, /* startcode, endcode, startstack */ 2194 0l, 0l, /* kstkesp, kstkeip */ 2195 0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */ 2196 wchan, 2197 0l, 0l, /* nswap, cnswap */ 2198 0, /* exit_signal */ 2199 cpu); 2200 2201 lxpr_unlock(p); 2202 } 2203 2204 /* 2205 * lxpr_read_pid_stat(): pid stat file 2206 */ 2207 static void 2208 lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2209 { 2210 ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT); 2211 2212 lxpr_read_stat_common(lxpnp, uiobuf, 0); 2213 } 2214 2215 /* 2216 * lxpr_read_pid_tid_stat(): pid stat file 2217 */ 2218 static void 2219 lxpr_read_pid_tid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2220 { 2221 ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STAT); 2222 lxpr_read_stat_common(lxpnp, uiobuf, lxpnp->lxpr_desc); 2223 } 2224 2225 /* ARGSUSED */ 2226 static void 2227 lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2228 { 2229 } 2230 2231 struct lxpr_ifstat { 2232 uint64_t rx_bytes; 2233 uint64_t rx_packets; 2234 uint64_t rx_errors; 2235 uint64_t rx_drop; 2236 uint64_t tx_bytes; 2237 uint64_t tx_packets; 2238 uint64_t tx_errors; 2239 uint64_t tx_drop; 2240 uint64_t collisions; 2241 uint64_t rx_multicast; 2242 }; 2243 2244 static void * 2245 lxpr_kstat_read(kstat_t *kn, boolean_t byname, size_t *size, int *num) 2246 { 2247 kstat_t *kp; 2248 int i, nrec = 0; 2249 size_t bufsize; 2250 void *buf = NULL; 2251 2252 if (byname == B_TRUE) { 2253 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance, 2254 kn->ks_name, getzoneid()); 2255 } else { 2256 kp = kstat_hold_bykid(kn->ks_kid, getzoneid()); 2257 } 2258 if (kp == NULL) { 2259 return (NULL); 2260 } 2261 if (kp->ks_flags & KSTAT_FLAG_INVALID) { 2262 kstat_rele(kp); 2263 return (NULL); 2264 } 2265 2266 bufsize = kp->ks_data_size + 1; 2267 kstat_rele(kp); 2268 2269 /* 2270 * The kstat in question is released so that kmem_alloc(KM_SLEEP) is 2271 * performed without it held. After the alloc, the kstat is reacquired 2272 * and its size is checked again. If the buffer is no longer large 2273 * enough, the alloc and check are repeated up to three times. 2274 */ 2275 for (i = 0; i < 2; i++) { 2276 buf = kmem_alloc(bufsize, KM_SLEEP); 2277 2278 /* Check if bufsize still appropriate */ 2279 if (byname == B_TRUE) { 2280 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance, 2281 kn->ks_name, getzoneid()); 2282 } else { 2283 kp = kstat_hold_bykid(kn->ks_kid, getzoneid()); 2284 } 2285 if (kp == NULL || kp->ks_flags & KSTAT_FLAG_INVALID) { 2286 if (kp != NULL) { 2287 kstat_rele(kp); 2288 } 2289 kmem_free(buf, bufsize); 2290 return (NULL); 2291 } 2292 KSTAT_ENTER(kp); 2293 (void) KSTAT_UPDATE(kp, KSTAT_READ); 2294 if (bufsize < kp->ks_data_size) { 2295 kmem_free(buf, bufsize); 2296 buf = NULL; 2297 bufsize = kp->ks_data_size + 1; 2298 KSTAT_EXIT(kp); 2299 kstat_rele(kp); 2300 continue; 2301 } else { 2302 if (KSTAT_SNAPSHOT(kp, buf, KSTAT_READ) != 0) { 2303 kmem_free(buf, bufsize); 2304 buf = NULL; 2305 } 2306 nrec = kp->ks_ndata; 2307 KSTAT_EXIT(kp); 2308 kstat_rele(kp); 2309 break; 2310 } 2311 } 2312 2313 if (buf != NULL) { 2314 *size = bufsize; 2315 *num = nrec; 2316 } 2317 return (buf); 2318 } 2319 2320 static int 2321 lxpr_kstat_ifstat(kstat_t *kn, struct lxpr_ifstat *ifs) 2322 { 2323 kstat_named_t *kp; 2324 int i, num; 2325 size_t size; 2326 2327 /* 2328 * Search by name instead of by kid since there's a small window to 2329 * race against kstats being added/removed. 2330 */ 2331 bzero(ifs, sizeof (*ifs)); 2332 kp = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num); 2333 if (kp == NULL) 2334 return (-1); 2335 for (i = 0; i < num; i++) { 2336 if (strncmp(kp[i].name, "rbytes64", KSTAT_STRLEN) == 0) 2337 ifs->rx_bytes = kp[i].value.ui64; 2338 else if (strncmp(kp[i].name, "ipackets64", KSTAT_STRLEN) == 0) 2339 ifs->rx_packets = kp[i].value.ui64; 2340 else if (strncmp(kp[i].name, "ierrors", KSTAT_STRLEN) == 0) 2341 ifs->rx_errors = kp[i].value.ui32; 2342 else if (strncmp(kp[i].name, "norcvbuf", KSTAT_STRLEN) == 0) 2343 ifs->rx_drop = kp[i].value.ui32; 2344 else if (strncmp(kp[i].name, "multircv", KSTAT_STRLEN) == 0) 2345 ifs->rx_multicast = kp[i].value.ui32; 2346 else if (strncmp(kp[i].name, "obytes64", KSTAT_STRLEN) == 0) 2347 ifs->tx_bytes = kp[i].value.ui64; 2348 else if (strncmp(kp[i].name, "opackets64", KSTAT_STRLEN) == 0) 2349 ifs->tx_packets = kp[i].value.ui64; 2350 else if (strncmp(kp[i].name, "oerrors", KSTAT_STRLEN) == 0) 2351 ifs->tx_errors = kp[i].value.ui32; 2352 else if (strncmp(kp[i].name, "noxmtbuf", KSTAT_STRLEN) == 0) 2353 ifs->tx_drop = kp[i].value.ui32; 2354 else if (strncmp(kp[i].name, "collisions", KSTAT_STRLEN) == 0) 2355 ifs->collisions = kp[i].value.ui32; 2356 } 2357 kmem_free(kp, size); 2358 return (0); 2359 } 2360 2361 /* ARGSUSED */ 2362 static void 2363 lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2364 { 2365 kstat_t *ksr; 2366 kstat_t ks0; 2367 int i, nidx; 2368 size_t sidx; 2369 struct lxpr_ifstat ifs; 2370 2371 lxpr_uiobuf_printf(uiobuf, "Inter-| Receive " 2372 " | Transmit\n"); 2373 lxpr_uiobuf_printf(uiobuf, " face |bytes packets errs drop fifo" 2374 " frame compressed multicast|bytes packets errs drop fifo" 2375 " colls carrier compressed\n"); 2376 2377 ks0.ks_kid = 0; 2378 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); 2379 if (ksr == NULL) 2380 return; 2381 2382 for (i = 1; i < nidx; i++) { 2383 if (strncmp(ksr[i].ks_module, "link", KSTAT_STRLEN) == 0 || 2384 strncmp(ksr[i].ks_module, "lo", KSTAT_STRLEN) == 0) { 2385 if (lxpr_kstat_ifstat(&ksr[i], &ifs) != 0) 2386 continue; 2387 2388 /* Overwriting the name is ok in the local snapshot */ 2389 lx_ifname_convert(ksr[i].ks_name, LX_IF_FROMNATIVE); 2390 lxpr_uiobuf_printf(uiobuf, "%6s: %7llu %7llu %4lu " 2391 "%4lu %4u %5u %10u %9lu %8llu %7llu %4lu %4lu %4u " 2392 "%5lu %7u %10u\n", 2393 ksr[i].ks_name, 2394 ifs.rx_bytes, ifs.rx_packets, 2395 ifs.rx_errors, ifs.rx_drop, 2396 0, 0, 0, ifs.rx_multicast, 2397 ifs.tx_bytes, ifs.tx_packets, 2398 ifs.tx_errors, ifs.tx_drop, 2399 0, ifs.collisions, 0, 0); 2400 } 2401 } 2402 2403 kmem_free(ksr, sidx); 2404 } 2405 2406 /* ARGSUSED */ 2407 static void 2408 lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2409 { 2410 } 2411 2412 static void 2413 lxpr_inet6_out(const in6_addr_t *addr, char buf[33]) 2414 { 2415 const uint8_t *ip = addr->s6_addr; 2416 char digits[] = "0123456789abcdef"; 2417 int i; 2418 for (i = 0; i < 16; i++) { 2419 buf[2 * i] = digits[ip[i] >> 4]; 2420 buf[2 * i + 1] = digits[ip[i] & 0xf]; 2421 } 2422 buf[32] = '\0'; 2423 } 2424 2425 /* ARGSUSED */ 2426 static void 2427 lxpr_read_net_if_inet6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2428 { 2429 netstack_t *ns; 2430 ip_stack_t *ipst; 2431 ill_t *ill; 2432 ipif_t *ipif; 2433 ill_walk_context_t ctx; 2434 char ifname[LIFNAMSIZ], ip6out[33]; 2435 2436 ns = netstack_get_current(); 2437 if (ns == NULL) 2438 return; 2439 ipst = ns->netstack_ip; 2440 2441 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2442 ill = ILL_START_WALK_V6(&ctx, ipst); 2443 2444 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 2445 for (ipif = ill->ill_ipif; ipif != NULL; 2446 ipif = ipif->ipif_next) { 2447 uint_t index = ill->ill_phyint->phyint_ifindex; 2448 int plen = ip_mask_to_plen_v6(&ipif->ipif_v6net_mask); 2449 unsigned int scope = lx_ipv6_scope_convert( 2450 &ipif->ipif_v6lcl_addr); 2451 /* Always report PERMANENT flag */ 2452 int flag = 0x80; 2453 2454 (void) snprintf(ifname, LIFNAMSIZ, "%s", ill->ill_name); 2455 lx_ifname_convert(ifname, LX_IF_FROMNATIVE); 2456 lxpr_inet6_out(&ipif->ipif_v6lcl_addr, ip6out); 2457 2458 lxpr_uiobuf_printf(uiobuf, "%32s %02x %02x %02x %02x" 2459 " %8s\n", ip6out, index, plen, scope, flag, ifname); 2460 } 2461 } 2462 rw_exit(&ipst->ips_ill_g_lock); 2463 netstack_rele(ns); 2464 } 2465 2466 /* ARGSUSED */ 2467 static void 2468 lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2469 { 2470 } 2471 2472 /* ARGSUSED */ 2473 static void 2474 lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2475 { 2476 } 2477 2478 /* ARGSUSED */ 2479 static void 2480 lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2481 { 2482 } 2483 2484 static void 2485 lxpr_format_route_ipv6(ire_t *ire, lxpr_uiobuf_t *uiobuf) 2486 { 2487 uint32_t flags; 2488 char name[IFNAMSIZ]; 2489 char ipv6addr[33]; 2490 2491 lxpr_inet6_out(&ire->ire_addr_v6, ipv6addr); 2492 lxpr_uiobuf_printf(uiobuf, "%s %02x ", ipv6addr, 2493 ip_mask_to_plen_v6(&ire->ire_mask_v6)); 2494 2495 /* punt on this for now */ 2496 lxpr_uiobuf_printf(uiobuf, "%s %02x ", 2497 "00000000000000000000000000000000", 0); 2498 2499 lxpr_inet6_out(&ire->ire_gateway_addr_v6, ipv6addr); 2500 lxpr_uiobuf_printf(uiobuf, "%s", ipv6addr); 2501 2502 flags = ire->ire_flags & 2503 (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED); 2504 /* Linux's RTF_LOCAL equivalent */ 2505 if (ire->ire_metrics.iulp_local) 2506 flags |= 0x80000000; 2507 2508 if (ire->ire_ill != NULL) { 2509 ill_get_name(ire->ire_ill, name, sizeof (name)); 2510 lx_ifname_convert(name, LX_IF_FROMNATIVE); 2511 } else { 2512 name[0] = '\0'; 2513 } 2514 2515 lxpr_uiobuf_printf(uiobuf, " %08x %08x %08x %08x %8s\n", 2516 0, /* metric */ 2517 ire->ire_refcnt, 2518 0, 2519 flags, 2520 name); 2521 } 2522 2523 /* ARGSUSED */ 2524 static void 2525 lxpr_read_net_ipv6_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2526 { 2527 netstack_t *ns; 2528 ip_stack_t *ipst; 2529 2530 ns = netstack_get_current(); 2531 if (ns == NULL) 2532 return; 2533 ipst = ns->netstack_ip; 2534 2535 /* 2536 * LX branded zones are expected to have exclusive IP stack, hence 2537 * using ALL_ZONES as the zoneid filter. 2538 */ 2539 ire_walk_v6(&lxpr_format_route_ipv6, uiobuf, ALL_ZONES, ipst); 2540 2541 netstack_rele(ns); 2542 } 2543 2544 /* ARGSUSED */ 2545 static void 2546 lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2547 { 2548 } 2549 2550 /* ARGSUSED */ 2551 static void 2552 lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2553 { 2554 } 2555 2556 /* ARGSUSED */ 2557 static void 2558 lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2559 { 2560 } 2561 2562 #define LXPR_SKIP_ROUTE(type) \ 2563 (((IRE_IF_CLONE | IRE_BROADCAST | IRE_MULTICAST | \ 2564 IRE_NOROUTE | IRE_LOOPBACK | IRE_LOCAL) & type) != 0) 2565 2566 static void 2567 lxpr_format_route_ipv4(ire_t *ire, lxpr_uiobuf_t *uiobuf) 2568 { 2569 uint32_t flags; 2570 char name[IFNAMSIZ]; 2571 ill_t *ill; 2572 ire_t *nire; 2573 ipif_t *ipif; 2574 ipaddr_t gateway; 2575 2576 if (LXPR_SKIP_ROUTE(ire->ire_type) || ire->ire_testhidden != 0) 2577 return; 2578 2579 /* These route flags have direct Linux equivalents */ 2580 flags = ire->ire_flags & 2581 (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED); 2582 2583 /* 2584 * Search for a suitable IRE for naming purposes. 2585 * On Linux, the default route is typically associated with the 2586 * interface used to access gateway. The default IRE on Illumos 2587 * typically lacks an ill reference but its parent might have one. 2588 */ 2589 nire = ire; 2590 do { 2591 ill = nire->ire_ill; 2592 nire = nire->ire_dep_parent; 2593 } while (ill == NULL && nire != NULL); 2594 if (ill != NULL) { 2595 ill_get_name(ill, name, sizeof (name)); 2596 lx_ifname_convert(name, LX_IF_FROMNATIVE); 2597 } else { 2598 name[0] = '*'; 2599 name[1] = '\0'; 2600 } 2601 2602 /* 2603 * Linux suppresses the gateway address for directly connected 2604 * interface networks. To emulate this behavior, we walk all addresses 2605 * of a given route interface. If one matches the gateway, it is 2606 * displayed as NULL. 2607 */ 2608 gateway = ire->ire_gateway_addr; 2609 if ((ill = ire->ire_ill) != NULL) { 2610 for (ipif = ill->ill_ipif; ipif != NULL; 2611 ipif = ipif->ipif_next) { 2612 if (ipif->ipif_lcl_addr == gateway) { 2613 gateway = 0; 2614 break; 2615 } 2616 } 2617 } 2618 2619 lxpr_uiobuf_printf(uiobuf, "%s\t%08X\t%08X\t%04X\t%d\t%u\t" 2620 "%d\t%08X\t%d\t%u\t%u\n", 2621 name, 2622 ire->ire_addr, 2623 gateway, 2624 flags, 0, 0, 2625 0, /* priority */ 2626 ire->ire_mask, 2627 0, 0, /* mss, window */ 2628 ire->ire_metrics.iulp_rtt); 2629 } 2630 2631 /* ARGSUSED */ 2632 static void 2633 lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2634 { 2635 netstack_t *ns; 2636 ip_stack_t *ipst; 2637 2638 lxpr_uiobuf_printf(uiobuf, "Iface\tDestination\tGateway \tFlags\t" 2639 "RefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n"); 2640 2641 ns = netstack_get_current(); 2642 if (ns == NULL) 2643 return; 2644 ipst = ns->netstack_ip; 2645 2646 /* 2647 * LX branded zones are expected to have exclusive IP stack, hence 2648 * using ALL_ZONES as the zoneid filter. 2649 */ 2650 ire_walk_v4(&lxpr_format_route_ipv4, uiobuf, ALL_ZONES, ipst); 2651 2652 netstack_rele(ns); 2653 } 2654 2655 /* ARGSUSED */ 2656 static void 2657 lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2658 { 2659 } 2660 2661 /* ARGSUSED */ 2662 static void 2663 lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2664 { 2665 } 2666 2667 /* ARGSUSED */ 2668 static void 2669 lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2670 { 2671 } 2672 2673 typedef struct lxpr_snmp_table { 2674 const char *lst_proto; 2675 const char *lst_fields[]; 2676 } lxpr_snmp_table_t; 2677 2678 static lxpr_snmp_table_t lxpr_snmp_ip = { "ip", 2679 { 2680 "forwarding", "defaultTTL", "inReceives", "inHdrErrors", 2681 "inAddrErrors", "forwDatagrams", "inUnknownProtos", "inDiscards", 2682 "inDelivers", "outRequests", "outDiscards", "outNoRoutes", 2683 "reasmTimeout", "reasmReqds", "reasmOKs", "reasmFails", "fragOKs", 2684 "fragFails", "fragCreates", 2685 NULL 2686 } 2687 }; 2688 static lxpr_snmp_table_t lxpr_snmp_icmp = { "icmp", 2689 { 2690 "inMsgs", "inErrors", "inCsumErrors", "inDestUnreachs", "inTimeExcds", 2691 "inParmProbs", "inSrcQuenchs", "inRedirects", "inEchos", "inEchoReps", 2692 "inTimestamps", "inTimestampReps", "inAddrMasks", "inAddrMaskReps", 2693 "outMsgs", "outErrors", "outDestUnreachs", "outTimeExcds", 2694 "outParmProbs", "outSrcQuenchs", "outRedirects", "outEchos", 2695 "outEchoReps", "outTimestamps", "outTimestampReps", "outAddrMasks", 2696 "outAddrMaskReps", 2697 NULL 2698 } 2699 }; 2700 static lxpr_snmp_table_t lxpr_snmp_tcp = { "tcp", 2701 { 2702 "rtoAlgorithm", "rtoMin", "rtoMax", "maxConn", "activeOpens", 2703 "passiveOpens", "attemptFails", "estabResets", "currEstab", "inSegs", 2704 "outSegs", "retransSegs", "inErrs", "outRsts", "inCsumErrors", 2705 NULL 2706 } 2707 }; 2708 static lxpr_snmp_table_t lxpr_snmp_udp = { "udp", 2709 { 2710 "inDatagrams", "noPorts", "inErrors", "outDatagrams", "rcvbufErrors", 2711 "sndbufErrors", "inCsumErrors", 2712 NULL 2713 } 2714 }; 2715 2716 static lxpr_snmp_table_t *lxpr_net_snmptab[] = { 2717 &lxpr_snmp_ip, 2718 &lxpr_snmp_icmp, 2719 &lxpr_snmp_tcp, 2720 &lxpr_snmp_udp, 2721 NULL 2722 }; 2723 2724 static void 2725 lxpr_kstat_print_tab(lxpr_uiobuf_t *uiobuf, lxpr_snmp_table_t *table, 2726 kstat_t *kn) 2727 { 2728 kstat_named_t *klist; 2729 char upname[KSTAT_STRLEN], upfield[KSTAT_STRLEN]; 2730 int i, j, num; 2731 size_t size; 2732 2733 klist = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num); 2734 if (klist == NULL) 2735 return; 2736 2737 /* Print the header line, fields capitalized */ 2738 (void) strncpy(upname, table->lst_proto, KSTAT_STRLEN); 2739 upname[0] = toupper(upname[0]); 2740 lxpr_uiobuf_printf(uiobuf, "%s:", upname); 2741 for (i = 0; table->lst_fields[i] != NULL; i++) { 2742 (void) strncpy(upfield, table->lst_fields[i], KSTAT_STRLEN); 2743 upfield[0] = toupper(upfield[0]); 2744 lxpr_uiobuf_printf(uiobuf, " %s", upfield); 2745 } 2746 lxpr_uiobuf_printf(uiobuf, "\n%s:", upname); 2747 2748 /* Then loop back through to print the value line. */ 2749 for (i = 0; table->lst_fields[i] != NULL; i++) { 2750 kstat_named_t *kpoint = NULL; 2751 for (j = 0; j < num; j++) { 2752 if (strncmp(klist[j].name, table->lst_fields[i], 2753 KSTAT_STRLEN) == 0) { 2754 kpoint = &klist[j]; 2755 break; 2756 } 2757 } 2758 if (kpoint == NULL) { 2759 /* Output 0 for unknown fields */ 2760 lxpr_uiobuf_printf(uiobuf, " 0"); 2761 } else { 2762 switch (kpoint->data_type) { 2763 case KSTAT_DATA_INT32: 2764 lxpr_uiobuf_printf(uiobuf, " %d", 2765 kpoint->value.i32); 2766 break; 2767 case KSTAT_DATA_UINT32: 2768 lxpr_uiobuf_printf(uiobuf, " %u", 2769 kpoint->value.ui32); 2770 break; 2771 case KSTAT_DATA_INT64: 2772 lxpr_uiobuf_printf(uiobuf, " %ld", 2773 kpoint->value.l); 2774 break; 2775 case KSTAT_DATA_UINT64: 2776 lxpr_uiobuf_printf(uiobuf, " %lu", 2777 kpoint->value.ul); 2778 break; 2779 } 2780 } 2781 } 2782 lxpr_uiobuf_printf(uiobuf, "\n"); 2783 kmem_free(klist, size); 2784 } 2785 2786 /* ARGSUSED */ 2787 static void 2788 lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2789 { 2790 kstat_t *ksr; 2791 kstat_t ks0; 2792 lxpr_snmp_table_t **table = lxpr_net_snmptab; 2793 int i, t, nidx; 2794 size_t sidx; 2795 2796 ks0.ks_kid = 0; 2797 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); 2798 if (ksr == NULL) 2799 return; 2800 2801 for (t = 0; table[t] != NULL; t++) { 2802 for (i = 0; i < nidx; i++) { 2803 if (strncmp(ksr[i].ks_class, "mib2", KSTAT_STRLEN) != 0) 2804 continue; 2805 if (strncmp(ksr[i].ks_name, table[t]->lst_proto, 2806 KSTAT_STRLEN) == 0) { 2807 lxpr_kstat_print_tab(uiobuf, table[t], &ksr[i]); 2808 break; 2809 } 2810 } 2811 } 2812 kmem_free(ksr, sidx); 2813 } 2814 2815 /* ARGSUSED */ 2816 static void 2817 lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2818 { 2819 } 2820 2821 static int 2822 lxpr_convert_tcp_state(int st) 2823 { 2824 /* 2825 * Derived from the enum located in the Linux kernel sources: 2826 * include/net/tcp_states.h 2827 */ 2828 switch (st) { 2829 case TCPS_ESTABLISHED: 2830 return (1); 2831 case TCPS_SYN_SENT: 2832 return (2); 2833 case TCPS_SYN_RCVD: 2834 return (3); 2835 case TCPS_FIN_WAIT_1: 2836 return (4); 2837 case TCPS_FIN_WAIT_2: 2838 return (5); 2839 case TCPS_TIME_WAIT: 2840 return (6); 2841 case TCPS_CLOSED: 2842 return (7); 2843 case TCPS_CLOSE_WAIT: 2844 return (8); 2845 case TCPS_LAST_ACK: 2846 return (9); 2847 case TCPS_LISTEN: 2848 return (10); 2849 case TCPS_CLOSING: 2850 return (11); 2851 default: 2852 /* No translation for TCPS_IDLE, TCPS_BOUND or anything else */ 2853 return (0); 2854 } 2855 } 2856 2857 static void 2858 lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) 2859 { 2860 int i, sl = 0; 2861 connf_t *connfp; 2862 conn_t *connp; 2863 netstack_t *ns; 2864 ip_stack_t *ipst; 2865 2866 ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION); 2867 if (ipver == IPV4_VERSION) { 2868 lxpr_uiobuf_printf(uiobuf, " sl local_address rem_address " 2869 "st tx_queue rx_queue tr tm->when retrnsmt uid timeout " 2870 "inode\n"); 2871 } else { 2872 lxpr_uiobuf_printf(uiobuf, " sl " 2873 "local_address " 2874 "remote_address " 2875 "st tx_queue rx_queue tr tm->when retrnsmt " 2876 "uid timeout inode\n"); 2877 } 2878 /* 2879 * Due to differences between the Linux and illumos TCP 2880 * implementations, some data will be omitted from the output here. 2881 * 2882 * Valid fields: 2883 * - local_address 2884 * - remote_address 2885 * - st 2886 * - tx_queue 2887 * - rx_queue 2888 * - uid 2889 * - inode 2890 * 2891 * Omitted/invalid fields 2892 * - tr 2893 * - tm->when 2894 * - retrnsmt 2895 * - timeout 2896 */ 2897 2898 ns = netstack_get_current(); 2899 if (ns == NULL) 2900 return; 2901 ipst = ns->netstack_ip; 2902 2903 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2904 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 2905 connp = NULL; 2906 while ((connp = 2907 ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) { 2908 tcp_t *tcp; 2909 vattr_t attr; 2910 sonode_t *so = (sonode_t *)connp->conn_upper_handle; 2911 vnode_t *vp = (so != NULL) ? so->so_vnode : NULL; 2912 if (connp->conn_ipversion != ipver) 2913 continue; 2914 tcp = connp->conn_tcp; 2915 if (ipver == IPV4_VERSION) { 2916 lxpr_uiobuf_printf(uiobuf, 2917 "%4d: %08X:%04X %08X:%04X ", 2918 ++sl, 2919 connp->conn_laddr_v4, 2920 ntohs(connp->conn_lport), 2921 connp->conn_faddr_v4, 2922 ntohs(connp->conn_fport)); 2923 } else { 2924 lxpr_uiobuf_printf(uiobuf, "%4d: " 2925 "%08X%08X%08X%08X:%04X " 2926 "%08X%08X%08X%08X:%04X ", 2927 ++sl, 2928 connp->conn_laddr_v6.s6_addr32[0], 2929 connp->conn_laddr_v6.s6_addr32[1], 2930 connp->conn_laddr_v6.s6_addr32[2], 2931 connp->conn_laddr_v6.s6_addr32[3], 2932 ntohs(connp->conn_lport), 2933 connp->conn_faddr_v6.s6_addr32[0], 2934 connp->conn_faddr_v6.s6_addr32[1], 2935 connp->conn_faddr_v6.s6_addr32[2], 2936 connp->conn_faddr_v6.s6_addr32[3], 2937 ntohs(connp->conn_fport)); 2938 } 2939 2940 /* fetch the simulated inode for the socket */ 2941 if (vp == NULL || 2942 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) 2943 attr.va_nodeid = 0; 2944 2945 lxpr_uiobuf_printf(uiobuf, 2946 "%02X %08X:%08X %02X:%08X %08X " 2947 "%5u %8d %lu %d %p %u %u %u %u %d\n", 2948 lxpr_convert_tcp_state(tcp->tcp_state), 2949 tcp->tcp_rcv_cnt, tcp->tcp_unsent, /* rx/tx queue */ 2950 0, 0, /* tr, when */ 2951 0, /* per-connection rexmits aren't tracked today */ 2952 connp->conn_cred->cr_uid, 2953 0, /* timeout */ 2954 /* inode + more */ 2955 (ino_t)attr.va_nodeid, 0, NULL, 0, 0, 0, 0, 0); 2956 } 2957 } 2958 netstack_rele(ns); 2959 } 2960 2961 /* ARGSUSED */ 2962 static void 2963 lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2964 { 2965 lxpr_format_tcp(uiobuf, IPV4_VERSION); 2966 } 2967 2968 /* ARGSUSED */ 2969 static void 2970 lxpr_read_net_tcp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2971 { 2972 lxpr_format_tcp(uiobuf, IPV6_VERSION); 2973 } 2974 2975 static void 2976 lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) 2977 { 2978 int i, sl = 0; 2979 connf_t *connfp; 2980 conn_t *connp; 2981 netstack_t *ns; 2982 ip_stack_t *ipst; 2983 2984 ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION); 2985 if (ipver == IPV4_VERSION) { 2986 lxpr_uiobuf_printf(uiobuf, " sl local_address rem_address" 2987 " st tx_queue rx_queue tr tm->when retrnsmt uid" 2988 " timeout inode ref pointer drops\n"); 2989 } else { 2990 lxpr_uiobuf_printf(uiobuf, " sl " 2991 "local_address " 2992 "remote_address " 2993 "st tx_queue rx_queue tr tm->when retrnsmt " 2994 "uid timeout inode ref pointer drops\n"); 2995 } 2996 /* 2997 * Due to differences between the Linux and illumos UDP 2998 * implementations, some data will be omitted from the output here. 2999 * 3000 * Valid fields: 3001 * - local_address 3002 * - remote_address 3003 * - st: limited 3004 * - uid 3005 * 3006 * Omitted/invalid fields 3007 * - tx_queue 3008 * - rx_queue 3009 * - tr 3010 * - tm->when 3011 * - retrnsmt 3012 * - timeout 3013 * - inode 3014 */ 3015 3016 ns = netstack_get_current(); 3017 if (ns == NULL) 3018 return; 3019 ipst = ns->netstack_ip; 3020 3021 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 3022 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 3023 connp = NULL; 3024 while ((connp = 3025 ipcl_get_next_conn(connfp, connp, IPCL_UDPCONN)) != NULL) { 3026 udp_t *udp; 3027 int state = 0; 3028 vattr_t attr; 3029 sonode_t *so = (sonode_t *)connp->conn_upper_handle; 3030 vnode_t *vp = (so != NULL) ? so->so_vnode : NULL; 3031 if (connp->conn_ipversion != ipver) 3032 continue; 3033 udp = connp->conn_udp; 3034 if (ipver == IPV4_VERSION) { 3035 lxpr_uiobuf_printf(uiobuf, 3036 "%4d: %08X:%04X %08X:%04X ", 3037 ++sl, 3038 connp->conn_laddr_v4, 3039 ntohs(connp->conn_lport), 3040 connp->conn_faddr_v4, 3041 ntohs(connp->conn_fport)); 3042 } else { 3043 lxpr_uiobuf_printf(uiobuf, "%4d: " 3044 "%08X%08X%08X%08X:%04X " 3045 "%08X%08X%08X%08X:%04X ", 3046 ++sl, 3047 connp->conn_laddr_v6.s6_addr32[0], 3048 connp->conn_laddr_v6.s6_addr32[1], 3049 connp->conn_laddr_v6.s6_addr32[2], 3050 connp->conn_laddr_v6.s6_addr32[3], 3051 ntohs(connp->conn_lport), 3052 connp->conn_faddr_v6.s6_addr32[0], 3053 connp->conn_faddr_v6.s6_addr32[1], 3054 connp->conn_faddr_v6.s6_addr32[2], 3055 connp->conn_faddr_v6.s6_addr32[3], 3056 ntohs(connp->conn_fport)); 3057 } 3058 3059 switch (udp->udp_state) { 3060 case TS_UNBND: 3061 case TS_IDLE: 3062 state = 7; 3063 break; 3064 case TS_DATA_XFER: 3065 state = 1; 3066 break; 3067 } 3068 3069 /* fetch the simulated inode for the socket */ 3070 if (vp == NULL || 3071 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) 3072 attr.va_nodeid = 0; 3073 3074 lxpr_uiobuf_printf(uiobuf, 3075 "%02X %08X:%08X %02X:%08X %08X " 3076 "%5u %8d %lu %d %p %d\n", 3077 state, 3078 0, 0, /* rx/tx queue */ 3079 0, 0, /* tr, when */ 3080 0, /* retrans */ 3081 connp->conn_cred->cr_uid, 3082 0, /* timeout */ 3083 /* inode, ref, pointer, drops */ 3084 (ino_t)attr.va_nodeid, 0, NULL, 0); 3085 } 3086 } 3087 netstack_rele(ns); 3088 } 3089 3090 /* ARGSUSED */ 3091 static void 3092 lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3093 { 3094 lxpr_format_udp(uiobuf, IPV4_VERSION); 3095 } 3096 3097 /* ARGSUSED */ 3098 static void 3099 lxpr_read_net_udp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3100 { 3101 lxpr_format_udp(uiobuf, IPV6_VERSION); 3102 } 3103 3104 /* ARGSUSED */ 3105 static void 3106 lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3107 { 3108 sonode_t *so; 3109 zoneid_t zoneid = getzoneid(); 3110 3111 lxpr_uiobuf_printf(uiobuf, "Num RefCount Protocol Flags Type " 3112 "St Inode Path\n"); 3113 3114 mutex_enter(&socklist.sl_lock); 3115 for (so = socklist.sl_list; so != NULL; 3116 so = _SOTOTPI(so)->sti_next_so) { 3117 vnode_t *vp = so->so_vnode; 3118 vattr_t attr; 3119 sotpi_info_t *sti; 3120 const char *name = NULL; 3121 int status = 0; 3122 int type = 0; 3123 int flags = 0; 3124 3125 /* Only process active sonodes in this zone */ 3126 if (so->so_count == 0 || so->so_zoneid != zoneid) 3127 continue; 3128 3129 /* 3130 * Grab the inode, if possible. 3131 * This must be done before entering so_lock. 3132 */ 3133 if (vp == NULL || 3134 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) 3135 attr.va_nodeid = 0; 3136 3137 mutex_enter(&so->so_lock); 3138 sti = _SOTOTPI(so); 3139 3140 if (sti->sti_laddr_sa != NULL && 3141 sti->sti_laddr_len > 0) { 3142 name = sti->sti_laddr_sa->sa_data; 3143 } else if (sti->sti_faddr_sa != NULL && 3144 sti->sti_faddr_len > 0) { 3145 name = sti->sti_faddr_sa->sa_data; 3146 } 3147 3148 /* 3149 * Derived from enum values in Linux kernel source: 3150 * include/uapi/linux/net.h 3151 */ 3152 if ((so->so_state & SS_ISDISCONNECTING) != 0) { 3153 status = 4; 3154 } else if ((so->so_state & SS_ISCONNECTING) != 0) { 3155 status = 2; 3156 } else if ((so->so_state & SS_ISCONNECTED) != 0) { 3157 status = 3; 3158 } else { 3159 status = 1; 3160 /* Add ACC flag for stream-type server sockets */ 3161 if (so->so_type != SOCK_DGRAM && 3162 sti->sti_laddr_sa != NULL) 3163 flags |= 0x10000; 3164 } 3165 3166 /* Convert to Linux type */ 3167 switch (so->so_type) { 3168 case SOCK_DGRAM: 3169 type = 2; 3170 break; 3171 case SOCK_SEQPACKET: 3172 type = 5; 3173 break; 3174 default: 3175 type = 1; 3176 } 3177 3178 lxpr_uiobuf_printf(uiobuf, "%p: %08X %08X %08X %04X %02X %5llu", 3179 so, 3180 so->so_count, 3181 0, /* proto, always 0 */ 3182 flags, 3183 type, 3184 status, 3185 (ino_t)attr.va_nodeid); 3186 3187 /* 3188 * Due to shortcomings in the abstract socket emulation, they 3189 * cannot be properly represented here (as @<path>). 3190 * 3191 * This will be the case until they are better implemented. 3192 */ 3193 if (name != NULL) 3194 lxpr_uiobuf_printf(uiobuf, " %s\n", name); 3195 else 3196 lxpr_uiobuf_printf(uiobuf, "\n"); 3197 mutex_exit(&so->so_lock); 3198 } 3199 mutex_exit(&socklist.sl_lock); 3200 } 3201 3202 /* 3203 * lxpr_read_kmsg(): read the contents of the kernel message queue. We 3204 * translate this into the reception of console messages for this zone; each 3205 * read copies out a single zone console message, or blocks until the next one 3206 * is produced, unless we're open non-blocking, in which case we return after 3207 * 1ms. 3208 */ 3209 3210 #define LX_KMSG_PRI "<0>" 3211 3212 static void 3213 lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf, ldi_handle_t lh) 3214 { 3215 mblk_t *mp; 3216 timestruc_t to; 3217 timestruc_t *tp = NULL; 3218 3219 ASSERT(lxpnp->lxpr_type == LXPR_KMSG); 3220 3221 if (lxpr_uiobuf_nonblock(uiobuf)) { 3222 to.tv_sec = 0; 3223 to.tv_nsec = 1000000; /* 1msec */ 3224 tp = &to; 3225 } 3226 3227 if (ldi_getmsg(lh, &mp, tp) == 0) { 3228 /* 3229 * lx procfs doesn't like successive reads to the same file 3230 * descriptor unless we do an explicit rewind each time. 3231 */ 3232 lxpr_uiobuf_seek(uiobuf, 0); 3233 3234 lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI, 3235 mp->b_cont->b_rptr); 3236 3237 freemsg(mp); 3238 } 3239 } 3240 3241 /* 3242 * lxpr_read_loadavg(): read the contents of the "loadavg" file. We do just 3243 * enough for uptime and other simple lxproc readers to work 3244 */ 3245 extern int nthread; 3246 3247 static void 3248 lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3249 { 3250 ulong_t avenrun1; 3251 ulong_t avenrun5; 3252 ulong_t avenrun15; 3253 ulong_t avenrun1_cs; 3254 ulong_t avenrun5_cs; 3255 ulong_t avenrun15_cs; 3256 int loadavg[3]; 3257 int *loadbuf; 3258 cpupart_t *cp; 3259 zone_t *zone = LXPTOZ(lxpnp); 3260 3261 uint_t nrunnable = 0; 3262 rctl_qty_t nlwps; 3263 3264 ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG); 3265 3266 mutex_enter(&cpu_lock); 3267 3268 /* 3269 * Need to add up values over all CPU partitions. If pools are active, 3270 * only report the values of the zone's partition, which by definition 3271 * includes the current CPU. 3272 */ 3273 if (pool_pset_enabled()) { 3274 psetid_t psetid = zone_pset_get(curproc->p_zone); 3275 3276 ASSERT(curproc->p_zone != &zone0); 3277 cp = CPU->cpu_part; 3278 3279 nrunnable = cp->cp_nrunning + cp->cp_nrunnable; 3280 (void) cpupart_get_loadavg(psetid, &loadavg[0], 3); 3281 loadbuf = &loadavg[0]; 3282 } else { 3283 cp = cp_list_head; 3284 do { 3285 nrunnable += cp->cp_nrunning + cp->cp_nrunnable; 3286 } while ((cp = cp->cp_next) != cp_list_head); 3287 3288 loadbuf = zone == global_zone ? 3289 &avenrun[0] : zone->zone_avenrun; 3290 } 3291 3292 /* 3293 * If we're in the non-global zone, we'll report the total number of 3294 * LWPs in the zone for the "nproc" parameter of /proc/loadavg, 3295 * otherwise will just use nthread (which will include kernel threads, 3296 * but should be good enough for lxproc). 3297 */ 3298 nlwps = zone == global_zone ? nthread : zone->zone_nlwps; 3299 3300 mutex_exit(&cpu_lock); 3301 3302 avenrun1 = loadbuf[0] >> FSHIFT; 3303 avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT; 3304 avenrun5 = loadbuf[1] >> FSHIFT; 3305 avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT; 3306 avenrun15 = loadbuf[2] >> FSHIFT; 3307 avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT; 3308 3309 lxpr_uiobuf_printf(uiobuf, 3310 "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n", 3311 avenrun1, avenrun1_cs, 3312 avenrun5, avenrun5_cs, 3313 avenrun15, avenrun15_cs, 3314 nrunnable, nlwps, 0); 3315 } 3316 3317 /* 3318 * lxpr_read_meminfo(): read the contents of the "meminfo" file. 3319 */ 3320 static void 3321 lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3322 { 3323 zone_t *zone = LXPTOZ(lxpnp); 3324 int global = zone == global_zone; 3325 long total_mem, free_mem, total_swap, used_swap; 3326 3327 ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO); 3328 3329 if (global || zone->zone_phys_mem_ctl == UINT64_MAX) { 3330 total_mem = physmem * PAGESIZE; 3331 free_mem = freemem * PAGESIZE; 3332 } else { 3333 total_mem = zone->zone_phys_mem_ctl; 3334 free_mem = zone->zone_phys_mem_ctl - zone->zone_phys_mem; 3335 } 3336 3337 if (global || zone->zone_max_swap_ctl == UINT64_MAX) { 3338 total_swap = k_anoninfo.ani_max * PAGESIZE; 3339 used_swap = k_anoninfo.ani_phys_resv * PAGESIZE; 3340 } else { 3341 mutex_enter(&zone->zone_mem_lock); 3342 total_swap = zone->zone_max_swap_ctl; 3343 used_swap = zone->zone_max_swap; 3344 mutex_exit(&zone->zone_mem_lock); 3345 } 3346 3347 lxpr_uiobuf_printf(uiobuf, 3348 "MemTotal: %8lu kB\n" 3349 "MemFree: %8lu kB\n" 3350 "MemShared: %8u kB\n" 3351 "Buffers: %8u kB\n" 3352 "Cached: %8u kB\n" 3353 "SwapCached:%8u kB\n" 3354 "Active: %8u kB\n" 3355 "Inactive: %8u kB\n" 3356 "HighTotal: %8u kB\n" 3357 "HighFree: %8u kB\n" 3358 "LowTotal: %8u kB\n" 3359 "LowFree: %8u kB\n" 3360 "SwapTotal: %8lu kB\n" 3361 "SwapFree: %8lu kB\n", 3362 btok(total_mem), /* MemTotal */ 3363 btok(free_mem), /* MemFree */ 3364 0, /* MemShared */ 3365 0, /* Buffers */ 3366 0, /* Cached */ 3367 0, /* SwapCached */ 3368 0, /* Active */ 3369 0, /* Inactive */ 3370 0, /* HighTotal */ 3371 0, /* HighFree */ 3372 btok(total_mem), /* LowTotal */ 3373 btok(free_mem), /* LowFree */ 3374 btok(total_swap), /* SwapTotal */ 3375 btok(total_swap - used_swap)); /* SwapFree */ 3376 } 3377 3378 /* 3379 * lxpr_read_mounts(): 3380 */ 3381 /* ARGSUSED */ 3382 static void 3383 lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3384 { 3385 struct vfs *vfsp; 3386 struct vfs *vfslist; 3387 zone_t *zone = LXPTOZ(lxpnp); 3388 struct print_data { 3389 refstr_t *vfs_mntpt; 3390 refstr_t *vfs_resource; 3391 uint_t vfs_flag; 3392 int vfs_fstype; 3393 struct print_data *next; 3394 } *print_head = NULL; 3395 struct print_data **print_tail = &print_head; 3396 struct print_data *printp; 3397 3398 vfs_list_read_lock(); 3399 3400 if (zone == global_zone) { 3401 vfsp = vfslist = rootvfs; 3402 } else { 3403 vfsp = vfslist = zone->zone_vfslist; 3404 /* 3405 * If the zone has a root entry, it will be the first in 3406 * the list. If it doesn't, we conjure one up. 3407 */ 3408 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt), 3409 zone->zone_rootpath) != 0) { 3410 struct vfs *tvfsp; 3411 /* 3412 * The root of the zone is not a mount point. The vfs 3413 * we want to report is that of the zone's root vnode. 3414 */ 3415 tvfsp = zone->zone_rootvp->v_vfsp; 3416 3417 lxpr_uiobuf_printf(uiobuf, 3418 "/ / %s %s 0 0\n", 3419 vfssw[tvfsp->vfs_fstype].vsw_name, 3420 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); 3421 3422 } 3423 if (vfslist == NULL) { 3424 vfs_list_unlock(); 3425 return; 3426 } 3427 } 3428 3429 /* 3430 * Later on we have to do a lookupname, which can end up causing 3431 * another vfs_list_read_lock() to be called. Which can lead to a 3432 * deadlock. To avoid this, we extract the data we need into a local 3433 * list, then we can run this list without holding vfs_list_read_lock() 3434 * We keep the list in the same order as the vfs_list 3435 */ 3436 do { 3437 /* Skip mounts we shouldn't show */ 3438 if (vfsp->vfs_flag & VFS_NOMNTTAB) { 3439 goto nextfs; 3440 } 3441 3442 printp = kmem_alloc(sizeof (*printp), KM_SLEEP); 3443 refstr_hold(vfsp->vfs_mntpt); 3444 printp->vfs_mntpt = vfsp->vfs_mntpt; 3445 refstr_hold(vfsp->vfs_resource); 3446 printp->vfs_resource = vfsp->vfs_resource; 3447 printp->vfs_flag = vfsp->vfs_flag; 3448 printp->vfs_fstype = vfsp->vfs_fstype; 3449 printp->next = NULL; 3450 3451 *print_tail = printp; 3452 print_tail = &printp->next; 3453 3454 nextfs: 3455 vfsp = (zone == global_zone) ? 3456 vfsp->vfs_next : vfsp->vfs_zone_next; 3457 3458 } while (vfsp != vfslist); 3459 3460 vfs_list_unlock(); 3461 3462 /* 3463 * now we can run through what we've extracted without holding 3464 * vfs_list_read_lock() 3465 */ 3466 printp = print_head; 3467 while (printp != NULL) { 3468 struct print_data *printp_next; 3469 const char *resource; 3470 char *mntpt; 3471 struct vnode *vp; 3472 int error; 3473 3474 mntpt = (char *)refstr_value(printp->vfs_mntpt); 3475 resource = refstr_value(printp->vfs_resource); 3476 3477 if (mntpt != NULL && mntpt[0] != '\0') 3478 mntpt = ZONE_PATH_TRANSLATE(mntpt, zone); 3479 else 3480 mntpt = "-"; 3481 3482 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 3483 3484 if (error != 0) 3485 goto nextp; 3486 3487 if (!(vp->v_flag & VROOT)) { 3488 VN_RELE(vp); 3489 goto nextp; 3490 } 3491 VN_RELE(vp); 3492 3493 if (resource != NULL && resource[0] != '\0') { 3494 if (resource[0] == '/') { 3495 resource = ZONE_PATH_VISIBLE(resource, zone) ? 3496 ZONE_PATH_TRANSLATE(resource, zone) : 3497 mntpt; 3498 } 3499 } else { 3500 resource = "-"; 3501 } 3502 3503 lxpr_uiobuf_printf(uiobuf, 3504 "%s %s %s %s 0 0\n", 3505 resource, mntpt, vfssw[printp->vfs_fstype].vsw_name, 3506 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); 3507 3508 nextp: 3509 printp_next = printp->next; 3510 refstr_rele(printp->vfs_mntpt); 3511 refstr_rele(printp->vfs_resource); 3512 kmem_free(printp, sizeof (*printp)); 3513 printp = printp_next; 3514 3515 } 3516 } 3517 3518 /* 3519 * lxpr_read_partitions(): 3520 * 3521 * Over the years, /proc/partitions has been made considerably smaller -- to 3522 * the point that it really is only major number, minor number, number of 3523 * blocks (which we report as 0), and partition name. 3524 * 3525 * We support this because some things want to see it to make sense of 3526 * /proc/diskstats, and also because "fdisk -l" and a few other things look 3527 * here to find all disks on the system. 3528 */ 3529 /* ARGSUSED */ 3530 static void 3531 lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3532 { 3533 3534 kstat_t *ksr; 3535 kstat_t ks0; 3536 int nidx, num, i; 3537 size_t sidx, size; 3538 zfs_cmd_t *zc; 3539 nvlist_t *nv = NULL; 3540 nvpair_t *elem = NULL; 3541 lxpr_mnt_t *mnt; 3542 lxpr_zfs_iter_t zfsi; 3543 3544 ASSERT(lxpnp->lxpr_type == LXPR_PARTITIONS); 3545 3546 ks0.ks_kid = 0; 3547 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); 3548 3549 if (ksr == NULL) 3550 return; 3551 3552 lxpr_uiobuf_printf(uiobuf, "major minor #blocks name\n\n"); 3553 3554 for (i = 1; i < nidx; i++) { 3555 kstat_t *ksp = &ksr[i]; 3556 kstat_io_t *kip; 3557 3558 if (ksp->ks_type != KSTAT_TYPE_IO || 3559 strcmp(ksp->ks_class, "disk") != 0) 3560 continue; 3561 3562 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE, 3563 &size, &num)) == NULL) 3564 continue; 3565 3566 if (size < sizeof (kstat_io_t)) { 3567 kmem_free(kip, size); 3568 continue; 3569 } 3570 3571 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d %s\n", 3572 mod_name_to_major(ksp->ks_module), 3573 ksp->ks_instance, 0, ksp->ks_name); 3574 3575 kmem_free(kip, size); 3576 } 3577 3578 kmem_free(ksr, sidx); 3579 3580 /* If we never got to open the zfs LDI, then stop now. */ 3581 mnt = (lxpr_mnt_t *)lxpnp->lxpr_vnode->v_vfsp->vfs_data; 3582 if (mnt->lxprm_zfs_isopen == B_FALSE) 3583 return; 3584 3585 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); 3586 3587 if (lxpr_zfs_list_pools(mnt, zc, &nv) != 0) 3588 goto out; 3589 3590 while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) { 3591 char *pool = nvpair_name(elem); 3592 3593 bzero(&zfsi, sizeof (lxpr_zfs_iter_t)); 3594 while (lxpr_zfs_next_zvol(mnt, pool, zc, &zfsi) == 0) { 3595 major_t major; 3596 minor_t minor; 3597 if (lxpr_zvol_dev(mnt, zc->zc_name, &major, &minor) 3598 != 0) 3599 continue; 3600 3601 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d zvol/dsk/%s\n", 3602 major, minor, 0, zc->zc_name); 3603 } 3604 } 3605 3606 nvlist_free(nv); 3607 out: 3608 kmem_free(zc, sizeof (zfs_cmd_t)); 3609 } 3610 3611 /* 3612 * lxpr_read_diskstats(): 3613 * 3614 * See the block comment above the per-device output-generating line for the 3615 * details of the format. 3616 */ 3617 /* ARGSUSED */ 3618 static void 3619 lxpr_read_diskstats(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3620 { 3621 kstat_t *ksr; 3622 kstat_t ks0; 3623 int nidx, num, i; 3624 size_t sidx, size; 3625 3626 ASSERT(lxpnp->lxpr_type == LXPR_DISKSTATS); 3627 3628 ks0.ks_kid = 0; 3629 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); 3630 3631 if (ksr == NULL) 3632 return; 3633 3634 for (i = 1; i < nidx; i++) { 3635 kstat_t *ksp = &ksr[i]; 3636 kstat_io_t *kip; 3637 3638 if (ksp->ks_type != KSTAT_TYPE_IO || 3639 strcmp(ksp->ks_class, "disk") != 0) 3640 continue; 3641 3642 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE, 3643 &size, &num)) == NULL) 3644 continue; 3645 3646 if (size < sizeof (kstat_io_t)) { 3647 kmem_free(kip, size); 3648 continue; 3649 } 3650 3651 /* 3652 * /proc/diskstats is defined to have one line of output for 3653 * each block device, with each line containing the following 3654 * 14 fields: 3655 * 3656 * 1 - major number 3657 * 2 - minor mumber 3658 * 3 - device name 3659 * 4 - reads completed successfully 3660 * 5 - reads merged 3661 * 6 - sectors read 3662 * 7 - time spent reading (ms) 3663 * 8 - writes completed 3664 * 9 - writes merged 3665 * 10 - sectors written 3666 * 11 - time spent writing (ms) 3667 * 12 - I/Os currently in progress 3668 * 13 - time spent doing I/Os (ms) 3669 * 14 - weighted time spent doing I/Os (ms) 3670 * 3671 * One small hiccup: we don't actually keep track of time 3672 * spent reading vs. time spent writing -- we keep track of 3673 * time waiting vs. time actually performing I/O. While we 3674 * could divide the total time by the I/O mix (making the 3675 * obviously wrong assumption that I/O operations all take the 3676 * same amount of time), this has the undesirable side-effect 3677 * of moving backwards. Instead, we report the total time 3678 * (read + write) for all three stats (read, write, total). 3679 * This is also a lie of sorts, but it should be more 3680 * immediately clear to the user that reads and writes are 3681 * each being double-counted as the other. 3682 */ 3683 lxpr_uiobuf_printf(uiobuf, "%4d %7d %s " 3684 "%llu %llu %llu %llu " 3685 "%llu %llu %llu %llu " 3686 "%llu %llu %llu\n", 3687 mod_name_to_major(ksp->ks_module), 3688 ksp->ks_instance, ksp->ks_name, 3689 (uint64_t)kip->reads, 0LL, 3690 kip->nread / (uint64_t)LXPR_SECTOR_SIZE, 3691 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC), 3692 (uint64_t)kip->writes, 0LL, 3693 kip->nwritten / (uint64_t)LXPR_SECTOR_SIZE, 3694 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC), 3695 (uint64_t)(kip->rcnt + kip->wcnt), 3696 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC), 3697 (kip->rlentime + kip->wlentime) / 3698 (uint64_t)(NANOSEC / MILLISEC)); 3699 3700 kmem_free(kip, size); 3701 } 3702 3703 kmem_free(ksr, sidx); 3704 } 3705 3706 /* 3707 * lxpr_read_version(): read the contents of the "version" file. 3708 */ 3709 /* ARGSUSED */ 3710 static void 3711 lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3712 { 3713 lx_zone_data_t *lxzd = ztolxzd(LXPTOZ(lxpnp)); 3714 lx_proc_data_t *lxpd = ptolxproc(curproc); 3715 const char *release = lxzd->lxzd_kernel_release; 3716 const char *version = lxzd->lxzd_kernel_version; 3717 3718 /* Use per-process overrides, if specified */ 3719 if (lxpd != NULL && lxpd->l_uname_release[0] != '\0') { 3720 release = lxpd->l_uname_release; 3721 } 3722 if (lxpd != NULL && lxpd->l_uname_version[0] != '\0') { 3723 version = lxpd->l_uname_version; 3724 } 3725 3726 lxpr_uiobuf_printf(uiobuf, 3727 "%s version %s (%s version %d.%d.%d) %s\n", 3728 LX_UNAME_SYSNAME, release, 3729 #if defined(__GNUC__) 3730 "gcc", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__, 3731 #else 3732 "cc", 1, 0, 0, 3733 #endif 3734 version); 3735 } 3736 3737 /* 3738 * lxpr_read_stat(): read the contents of the "stat" file. 3739 * 3740 */ 3741 /* ARGSUSED */ 3742 static void 3743 lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3744 { 3745 cpu_t *cp, *cpstart; 3746 int pools_enabled; 3747 ulong_t idle_cum = 0; 3748 ulong_t sys_cum = 0; 3749 ulong_t user_cum = 0; 3750 ulong_t irq_cum = 0; 3751 ulong_t cpu_nrunnable_cum = 0; 3752 ulong_t w_io_cum = 0; 3753 3754 ulong_t pgpgin_cum = 0; 3755 ulong_t pgpgout_cum = 0; 3756 ulong_t pgswapout_cum = 0; 3757 ulong_t pgswapin_cum = 0; 3758 ulong_t intr_cum = 0; 3759 ulong_t pswitch_cum = 0; 3760 ulong_t forks_cum = 0; 3761 hrtime_t msnsecs[NCMSTATES]; 3762 /* is the emulated release > 2.4 */ 3763 boolean_t newer_than24 = lx_kern_release_cmp(LXPTOZ(lxpnp), "2.4") > 0; 3764 /* temporary variable since scalehrtime modifies data in place */ 3765 hrtime_t tmptime; 3766 3767 ASSERT(lxpnp->lxpr_type == LXPR_STAT); 3768 3769 mutex_enter(&cpu_lock); 3770 pools_enabled = pool_pset_enabled(); 3771 3772 /* Calculate cumulative stats */ 3773 cp = cpstart = CPU->cpu_part->cp_cpulist; 3774 do { 3775 int i; 3776 3777 /* 3778 * Don't count CPUs that aren't even in the system 3779 * or aren't up yet. 3780 */ 3781 if ((cp->cpu_flags & CPU_EXISTS) == 0) { 3782 continue; 3783 } 3784 3785 get_cpu_mstate(cp, msnsecs); 3786 3787 idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]); 3788 sys_cum += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); 3789 user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]); 3790 3791 pgpgin_cum += CPU_STATS(cp, vm.pgpgin); 3792 pgpgout_cum += CPU_STATS(cp, vm.pgpgout); 3793 pgswapin_cum += CPU_STATS(cp, vm.pgswapin); 3794 pgswapout_cum += CPU_STATS(cp, vm.pgswapout); 3795 3796 3797 if (newer_than24) { 3798 cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable; 3799 w_io_cum += CPU_STATS(cp, sys.iowait); 3800 for (i = 0; i < NCMSTATES; i++) { 3801 tmptime = cp->cpu_intracct[i]; 3802 scalehrtime(&tmptime); 3803 irq_cum += NSEC_TO_TICK(tmptime); 3804 } 3805 } 3806 3807 for (i = 0; i < PIL_MAX; i++) 3808 intr_cum += CPU_STATS(cp, sys.intr[i]); 3809 3810 pswitch_cum += CPU_STATS(cp, sys.pswitch); 3811 forks_cum += CPU_STATS(cp, sys.sysfork); 3812 forks_cum += CPU_STATS(cp, sys.sysvfork); 3813 3814 if (pools_enabled) 3815 cp = cp->cpu_next_part; 3816 else 3817 cp = cp->cpu_next; 3818 } while (cp != cpstart); 3819 3820 if (newer_than24) { 3821 lxpr_uiobuf_printf(uiobuf, 3822 "cpu %lu %lu %lu %lu %lu %lu %lu\n", 3823 user_cum, 0L, sys_cum, idle_cum, 0L, irq_cum, 0L); 3824 } else { 3825 lxpr_uiobuf_printf(uiobuf, 3826 "cpu %lu %lu %lu %lu\n", 3827 user_cum, 0L, sys_cum, idle_cum); 3828 } 3829 3830 /* Do per processor stats */ 3831 do { 3832 int i; 3833 3834 ulong_t idle_ticks; 3835 ulong_t sys_ticks; 3836 ulong_t user_ticks; 3837 ulong_t irq_ticks = 0; 3838 3839 /* 3840 * Don't count CPUs that aren't even in the system 3841 * or aren't up yet. 3842 */ 3843 if ((cp->cpu_flags & CPU_EXISTS) == 0) { 3844 continue; 3845 } 3846 3847 get_cpu_mstate(cp, msnsecs); 3848 3849 idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]); 3850 sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); 3851 user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]); 3852 3853 for (i = 0; i < NCMSTATES; i++) { 3854 tmptime = cp->cpu_intracct[i]; 3855 scalehrtime(&tmptime); 3856 irq_ticks += NSEC_TO_TICK(tmptime); 3857 } 3858 3859 if (newer_than24) { 3860 lxpr_uiobuf_printf(uiobuf, 3861 "cpu%d %lu %lu %lu %lu %lu %lu %lu\n", 3862 cp->cpu_id, user_ticks, 0L, sys_ticks, idle_ticks, 3863 0L, irq_ticks, 0L); 3864 } else { 3865 lxpr_uiobuf_printf(uiobuf, 3866 "cpu%d %lu %lu %lu %lu\n", 3867 cp->cpu_id, 3868 user_ticks, 0L, sys_ticks, idle_ticks); 3869 } 3870 3871 if (pools_enabled) 3872 cp = cp->cpu_next_part; 3873 else 3874 cp = cp->cpu_next; 3875 } while (cp != cpstart); 3876 3877 mutex_exit(&cpu_lock); 3878 3879 if (newer_than24) { 3880 lxpr_uiobuf_printf(uiobuf, 3881 "page %lu %lu\n" 3882 "swap %lu %lu\n" 3883 "intr %lu\n" 3884 "ctxt %lu\n" 3885 "btime %lu\n" 3886 "processes %lu\n" 3887 "procs_running %lu\n" 3888 "procs_blocked %lu\n", 3889 pgpgin_cum, pgpgout_cum, 3890 pgswapin_cum, pgswapout_cum, 3891 intr_cum, 3892 pswitch_cum, 3893 boot_time, 3894 forks_cum, 3895 cpu_nrunnable_cum, 3896 w_io_cum); 3897 } else { 3898 lxpr_uiobuf_printf(uiobuf, 3899 "page %lu %lu\n" 3900 "swap %lu %lu\n" 3901 "intr %lu\n" 3902 "ctxt %lu\n" 3903 "btime %lu\n" 3904 "processes %lu\n", 3905 pgpgin_cum, pgpgout_cum, 3906 pgswapin_cum, pgswapout_cum, 3907 intr_cum, 3908 pswitch_cum, 3909 boot_time, 3910 forks_cum); 3911 } 3912 } 3913 3914 /* 3915 * lxpr_read_swaps(): 3916 * 3917 * We don't support swap files or partitions, but some programs like to look 3918 * here just to check we have some swap on the system, so we lie and show 3919 * our entire swap cap as one swap partition. 3920 * 3921 * It is important to use formatting identical to the Linux implementation 3922 * so that consumers do not break. See swap_show() in mm/swapfile.c. 3923 */ 3924 /* ARGSUSED */ 3925 static void 3926 lxpr_read_swaps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3927 { 3928 zone_t *zone = curzone; 3929 uint64_t totswap, usedswap; 3930 3931 mutex_enter(&zone->zone_mem_lock); 3932 /* Uses units of 1 kb (2^10). */ 3933 totswap = zone->zone_max_swap_ctl >> 10; 3934 usedswap = zone->zone_max_swap >> 10; 3935 mutex_exit(&zone->zone_mem_lock); 3936 3937 lxpr_uiobuf_printf(uiobuf, 3938 "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n"); 3939 lxpr_uiobuf_printf(uiobuf, "%-40s%s\t%llu\t%llu\t%d\n", 3940 "/dev/swap", "partition", totswap, usedswap, -1); 3941 } 3942 3943 /* 3944 * inotify tunables exported via /proc. 3945 */ 3946 extern int inotify_maxevents; 3947 extern int inotify_maxinstances; 3948 extern int inotify_maxwatches; 3949 3950 static void 3951 lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *lxpnp, 3952 lxpr_uiobuf_t *uiobuf) 3953 { 3954 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS); 3955 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxevents); 3956 } 3957 3958 static void 3959 lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *lxpnp, 3960 lxpr_uiobuf_t *uiobuf) 3961 { 3962 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES); 3963 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxinstances); 3964 } 3965 3966 static void 3967 lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *lxpnp, 3968 lxpr_uiobuf_t *uiobuf) 3969 { 3970 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES); 3971 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxwatches); 3972 } 3973 3974 static void 3975 lxpr_read_sys_kernel_caplcap(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3976 { 3977 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_CAPLCAP); 3978 lxpr_uiobuf_printf(uiobuf, "%d\n", LX_CAP_MAX_VALID); 3979 } 3980 3981 static void 3982 lxpr_read_sys_kernel_corepatt(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3983 { 3984 zone_t *zone = curproc->p_zone; 3985 struct core_globals *cg; 3986 refstr_t *rp; 3987 corectl_path_t *ccp; 3988 char tr[MAXPATHLEN]; 3989 3990 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT); 3991 3992 cg = zone_getspecific(core_zone_key, zone); 3993 ASSERT(cg != NULL); 3994 3995 /* If core dumps are disabled, return an empty string. */ 3996 if ((cg->core_options & CC_PROCESS_PATH) == 0) { 3997 lxpr_uiobuf_printf(uiobuf, "\n"); 3998 return; 3999 } 4000 4001 ccp = cg->core_default_path; 4002 mutex_enter(&ccp->ccp_mtx); 4003 if ((rp = ccp->ccp_path) != NULL) 4004 refstr_hold(rp); 4005 mutex_exit(&ccp->ccp_mtx); 4006 4007 if (rp == NULL) { 4008 lxpr_uiobuf_printf(uiobuf, "\n"); 4009 return; 4010 } 4011 4012 bzero(tr, sizeof (tr)); 4013 if (lxpr_core_path_s2l(refstr_value(rp), tr, sizeof (tr)) != 0) { 4014 refstr_rele(rp); 4015 lxpr_uiobuf_printf(uiobuf, "\n"); 4016 return; 4017 } 4018 4019 refstr_rele(rp); 4020 lxpr_uiobuf_printf(uiobuf, "%s\n", tr); 4021 } 4022 4023 static void 4024 lxpr_read_sys_kernel_hostname(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4025 { 4026 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_HOSTNAME); 4027 lxpr_uiobuf_printf(uiobuf, "%s\n", uts_nodename()); 4028 } 4029 4030 static void 4031 lxpr_read_sys_kernel_msgmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4032 { 4033 rctl_qty_t val; 4034 4035 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_MSGMNI); 4036 4037 mutex_enter(&curproc->p_lock); 4038 val = rctl_enforced_value(rc_zone_msgmni, 4039 curproc->p_zone->zone_rctls, curproc); 4040 mutex_exit(&curproc->p_lock); 4041 4042 lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val); 4043 } 4044 4045 static void 4046 lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4047 { 4048 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_NGROUPS_MAX); 4049 lxpr_uiobuf_printf(uiobuf, "%d\n", ngroups_max); 4050 } 4051 4052 static void 4053 lxpr_read_sys_kernel_osrel(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4054 { 4055 lx_zone_data_t *br_data; 4056 4057 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_OSREL); 4058 br_data = ztolxzd(curproc->p_zone); 4059 if (curproc->p_zone->zone_brand == &lx_brand) { 4060 lxpr_uiobuf_printf(uiobuf, "%s\n", 4061 br_data->lxzd_kernel_version); 4062 } else { 4063 lxpr_uiobuf_printf(uiobuf, "\n"); 4064 } 4065 } 4066 4067 static void 4068 lxpr_read_sys_kernel_pid_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4069 { 4070 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_PID_MAX); 4071 lxpr_uiobuf_printf(uiobuf, "%d\n", maxpid); 4072 } 4073 4074 static void 4075 lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4076 { 4077 /* 4078 * This file isn't documented on the Linux proc(5) man page but 4079 * according to the blog of the author of systemd/journald (the 4080 * consumer), he says: 4081 * boot_id: A random ID that is regenerated on each boot. As such it 4082 * can be used to identify the local machine's current boot. It's 4083 * universally available on any recent Linux kernel. It's a good and 4084 * safe choice if you need to identify a specific boot on a specific 4085 * booted kernel. 4086 * 4087 * We'll just generate a random ID if necessary. On Linux the format 4088 * appears to resemble a uuid but since it is not documented to be a 4089 * uuid, we don't worry about that. 4090 */ 4091 lx_zone_data_t *br_data; 4092 4093 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RAND_BOOTID); 4094 4095 if (curproc->p_zone->zone_brand != &lx_brand) { 4096 lxpr_uiobuf_printf(uiobuf, "0\n"); 4097 return; 4098 } 4099 4100 br_data = ztolxzd(curproc->p_zone); 4101 if (br_data->lxzd_bootid[0] == '\0') { 4102 extern int getrandom(void *, size_t, int); 4103 int i; 4104 4105 for (i = 0; i < 5; i++) { 4106 u_longlong_t n; 4107 char s[32]; 4108 4109 (void) random_get_bytes((uint8_t *)&n, sizeof (n)); 4110 switch (i) { 4111 case 0: (void) snprintf(s, sizeof (s), "%08llx", n); 4112 s[8] = '\0'; 4113 break; 4114 case 4: (void) snprintf(s, sizeof (s), "%012llx", n); 4115 s[12] = '\0'; 4116 break; 4117 default: (void) snprintf(s, sizeof (s), "%04llx", n); 4118 s[4] = '\0'; 4119 break; 4120 } 4121 if (i > 0) 4122 strlcat(br_data->lxzd_bootid, "-", 4123 sizeof (br_data->lxzd_bootid)); 4124 strlcat(br_data->lxzd_bootid, s, 4125 sizeof (br_data->lxzd_bootid)); 4126 } 4127 } 4128 4129 lxpr_uiobuf_printf(uiobuf, "%s\n", br_data->lxzd_bootid); 4130 } 4131 4132 static void 4133 lxpr_read_sys_kernel_sem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4134 { 4135 proc_t *pp = curproc; 4136 rctl_qty_t vmsl, vopm, vmni, vmns; 4137 4138 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SEM); 4139 4140 mutex_enter(&pp->p_lock); 4141 vmsl = rctl_enforced_value(rc_process_semmsl, pp->p_rctls, pp); 4142 vopm = rctl_enforced_value(rc_process_semopm, pp->p_rctls, pp); 4143 vmni = rctl_enforced_value(rc_zone_semmni, pp->p_zone->zone_rctls, pp); 4144 mutex_exit(&pp->p_lock); 4145 vmns = vmsl * vmni; 4146 if (vmns < vmsl || vmns < vmni) { 4147 vmns = ULLONG_MAX; 4148 } 4149 /* 4150 * Format: semmsl semmns semopm semmni 4151 * - semmsl: Limit semaphores in a sempahore set. 4152 * - semmns: Limit semaphores in all semaphore sets 4153 * - semopm: Limit operations in a single semop call 4154 * - semmni: Limit number of semaphore sets 4155 */ 4156 lxpr_uiobuf_printf(uiobuf, "%llu\t%llu\t%llu\t%llu\n", 4157 vmsl, vmns, vopm, vmni); 4158 } 4159 4160 static void 4161 lxpr_read_sys_kernel_shmmax(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4162 { 4163 rctl_qty_t val; 4164 4165 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMAX); 4166 4167 mutex_enter(&curproc->p_lock); 4168 val = rctl_enforced_value(rc_zone_shmmax, 4169 curproc->p_zone->zone_rctls, curproc); 4170 mutex_exit(&curproc->p_lock); 4171 4172 if (val > FOURGB) 4173 val = FOURGB; 4174 4175 lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val); 4176 } 4177 4178 static void 4179 lxpr_read_sys_kernel_shmmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4180 { 4181 rctl_qty_t val; 4182 4183 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMNI); 4184 4185 mutex_enter(&curproc->p_lock); 4186 val = rctl_enforced_value(rc_zone_shmmni, 4187 curproc->p_zone->zone_rctls, curproc); 4188 mutex_exit(&curproc->p_lock); 4189 4190 if (val > FOURGB) 4191 val = FOURGB; 4192 4193 lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val); 4194 } 4195 4196 static void 4197 lxpr_read_sys_kernel_threads_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4198 { 4199 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_THREADS_MAX); 4200 lxpr_uiobuf_printf(uiobuf, "%d\n", curproc->p_zone->zone_nlwps_ctl); 4201 } 4202 4203 static void 4204 lxpr_read_sys_net_core_somaxc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4205 { 4206 netstack_t *ns; 4207 tcp_stack_t *tcps; 4208 4209 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON); 4210 4211 ns = netstack_get_current(); 4212 if (ns == NULL) { 4213 lxpr_uiobuf_printf(uiobuf, "%d\n", SOMAXCONN); 4214 return; 4215 } 4216 4217 tcps = ns->netstack_tcp; 4218 lxpr_uiobuf_printf(uiobuf, "%d\n", tcps->tcps_conn_req_max_q); 4219 netstack_rele(ns); 4220 } 4221 4222 static void 4223 lxpr_read_sys_vm_minfr_kb(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4224 { 4225 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_MINFR_KB); 4226 lxpr_uiobuf_printf(uiobuf, "%d\n", 0); 4227 } 4228 4229 static void 4230 lxpr_read_sys_vm_nhpages(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4231 { 4232 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_NHUGEP); 4233 lxpr_uiobuf_printf(uiobuf, "%d\n", 0); 4234 } 4235 4236 static void 4237 lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4238 { 4239 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_OVERCOMMIT_MEM); 4240 lxpr_uiobuf_printf(uiobuf, "%d\n", 0); 4241 } 4242 4243 static void 4244 lxpr_read_sys_vm_swappiness(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4245 { 4246 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_SWAPPINESS); 4247 lxpr_uiobuf_printf(uiobuf, "%d\n", 0); 4248 } 4249 4250 /* 4251 * lxpr_read_uptime(): read the contents of the "uptime" file. 4252 * 4253 * format is: "%.2lf, %.2lf",uptime_secs, idle_secs 4254 * Use fixed point arithmetic to get 2 decimal places 4255 */ 4256 /* ARGSUSED */ 4257 static void 4258 lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4259 { 4260 cpu_t *cp, *cpstart; 4261 int pools_enabled; 4262 ulong_t idle_cum = 0; 4263 ulong_t cpu_count = 0; 4264 ulong_t idle_s; 4265 ulong_t idle_cs; 4266 ulong_t up_s; 4267 ulong_t up_cs; 4268 hrtime_t birthtime; 4269 hrtime_t centi_sec = 10000000; /* 10^7 */ 4270 4271 ASSERT(lxpnp->lxpr_type == LXPR_UPTIME); 4272 4273 /* Calculate cumulative stats */ 4274 mutex_enter(&cpu_lock); 4275 pools_enabled = pool_pset_enabled(); 4276 4277 cp = cpstart = CPU->cpu_part->cp_cpulist; 4278 do { 4279 /* 4280 * Don't count CPUs that aren't even in the system 4281 * or aren't up yet. 4282 */ 4283 if ((cp->cpu_flags & CPU_EXISTS) == 0) { 4284 continue; 4285 } 4286 4287 idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle); 4288 idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait); 4289 cpu_count += 1; 4290 4291 if (pools_enabled) 4292 cp = cp->cpu_next_part; 4293 else 4294 cp = cp->cpu_next; 4295 } while (cp != cpstart); 4296 mutex_exit(&cpu_lock); 4297 4298 /* Getting the Zone zsched process startup time */ 4299 birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart; 4300 up_cs = (gethrtime() - birthtime) / centi_sec; 4301 up_s = up_cs / 100; 4302 up_cs %= 100; 4303 4304 ASSERT(cpu_count > 0); 4305 idle_cum /= cpu_count; 4306 idle_s = idle_cum / hz; 4307 idle_cs = idle_cum % hz; 4308 idle_cs *= 100; 4309 idle_cs /= hz; 4310 4311 lxpr_uiobuf_printf(uiobuf, 4312 "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs); 4313 } 4314 4315 static const char *amd_x_edx[] = { 4316 NULL, NULL, NULL, NULL, 4317 NULL, NULL, NULL, NULL, 4318 NULL, NULL, NULL, "syscall", 4319 NULL, NULL, NULL, NULL, 4320 NULL, NULL, NULL, "mp", 4321 "nx", NULL, "mmxext", NULL, 4322 NULL, NULL, NULL, NULL, 4323 NULL, "lm", "3dnowext", "3dnow" 4324 }; 4325 4326 static const char *amd_x_ecx[] = { 4327 "lahf_lm", NULL, "svm", NULL, 4328 "altmovcr8" 4329 }; 4330 4331 static const char *tm_x_edx[] = { 4332 "recovery", "longrun", NULL, "lrti" 4333 }; 4334 4335 /* 4336 * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx." 4337 */ 4338 static const char *intc_x_edx[] = { 4339 NULL, NULL, NULL, NULL, 4340 NULL, NULL, NULL, NULL, 4341 NULL, NULL, NULL, "syscall", 4342 NULL, NULL, NULL, NULL, 4343 NULL, NULL, NULL, NULL, 4344 "nx", NULL, NULL, NULL, 4345 NULL, NULL, NULL, NULL, 4346 NULL, "lm", NULL, NULL 4347 }; 4348 4349 static const char *intc_edx[] = { 4350 "fpu", "vme", "de", "pse", 4351 "tsc", "msr", "pae", "mce", 4352 "cx8", "apic", NULL, "sep", 4353 "mtrr", "pge", "mca", "cmov", 4354 "pat", "pse36", "pn", "clflush", 4355 NULL, "dts", "acpi", "mmx", 4356 "fxsr", "sse", "sse2", "ss", 4357 "ht", "tm", "ia64", "pbe" 4358 }; 4359 4360 /* 4361 * "sse3" on linux is called "pni" (Prescott New Instructions). 4362 */ 4363 static const char *intc_ecx[] = { 4364 "pni", NULL, NULL, "monitor", 4365 "ds_cpl", NULL, NULL, "est", 4366 "tm2", NULL, "cid", NULL, 4367 NULL, "cx16", "xtpr" 4368 }; 4369 4370 /* 4371 * Report a list of each cgroup subsystem supported by our emulated cgroup fs. 4372 * This needs to exist for systemd to run but for now we don't report any 4373 * cgroup subsystems as being installed. The commented example below shows 4374 * how to print a subsystem entry. 4375 */ 4376 static void 4377 lxpr_read_cgroups(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4378 { 4379 lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n", 4380 "#subsys_name", "hierarchy", "num_cgroups", "enabled"); 4381 4382 /* 4383 * lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n", 4384 * "cpu,cpuacct", "2", "1", "1"); 4385 */ 4386 } 4387 4388 static void 4389 lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4390 { 4391 int i; 4392 uint32_t bits; 4393 cpu_t *cp, *cpstart; 4394 int pools_enabled; 4395 const char **fp; 4396 char brandstr[CPU_IDSTRLEN]; 4397 struct cpuid_regs cpr; 4398 int maxeax; 4399 int std_ecx, std_edx, ext_ecx, ext_edx; 4400 4401 ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO); 4402 4403 mutex_enter(&cpu_lock); 4404 pools_enabled = pool_pset_enabled(); 4405 4406 cp = cpstart = CPU->cpu_part->cp_cpulist; 4407 do { 4408 /* 4409 * This returns the maximum eax value for standard cpuid 4410 * functions in eax. 4411 */ 4412 cpr.cp_eax = 0; 4413 (void) cpuid_insn(cp, &cpr); 4414 maxeax = cpr.cp_eax; 4415 4416 /* 4417 * Get standard x86 feature flags. 4418 */ 4419 cpr.cp_eax = 1; 4420 (void) cpuid_insn(cp, &cpr); 4421 std_ecx = cpr.cp_ecx; 4422 std_edx = cpr.cp_edx; 4423 4424 /* 4425 * Now get extended feature flags. 4426 */ 4427 cpr.cp_eax = 0x80000001; 4428 (void) cpuid_insn(cp, &cpr); 4429 ext_ecx = cpr.cp_ecx; 4430 ext_edx = cpr.cp_edx; 4431 4432 (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN); 4433 4434 lxpr_uiobuf_printf(uiobuf, 4435 "processor\t: %d\n" 4436 "vendor_id\t: %s\n" 4437 "cpu family\t: %d\n" 4438 "model\t\t: %d\n" 4439 "model name\t: %s\n" 4440 "stepping\t: %d\n" 4441 "cpu MHz\t\t: %u.%03u\n", 4442 cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp), 4443 cpuid_getmodel(cp), brandstr, cpuid_getstep(cp), 4444 (uint32_t)(cpu_freq_hz / 1000000), 4445 ((uint32_t)(cpu_freq_hz / 1000)) % 1000); 4446 4447 lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n", 4448 getl2cacheinfo(cp, NULL, NULL, NULL) / 1024); 4449 4450 if (is_x86_feature(x86_featureset, X86FSET_HTT)) { 4451 /* 4452 * 'siblings' is used for HT-style threads 4453 */ 4454 lxpr_uiobuf_printf(uiobuf, 4455 "physical id\t: %lu\n" 4456 "siblings\t: %u\n", 4457 pg_plat_hw_instance_id(cp, PGHW_CHIP), 4458 cpuid_get_ncpu_per_chip(cp)); 4459 } 4460 4461 /* 4462 * Since we're relatively picky about running on older hardware, 4463 * we can be somewhat cavalier about the answers to these ones. 4464 * 4465 * In fact, given the hardware we support, we just say: 4466 * 4467 * fdiv_bug : no (if we're on a 64-bit kernel) 4468 * hlt_bug : no 4469 * f00f_bug : no 4470 * coma_bug : no 4471 * wp : yes (write protect in supervsr mode) 4472 */ 4473 lxpr_uiobuf_printf(uiobuf, 4474 "fdiv_bug\t: %s\n" 4475 "hlt_bug \t: no\n" 4476 "f00f_bug\t: no\n" 4477 "coma_bug\t: no\n" 4478 "fpu\t\t: %s\n" 4479 "fpu_exception\t: %s\n" 4480 "cpuid level\t: %d\n" 4481 "flags\t\t:", 4482 #if defined(__i386) 4483 fpu_pentium_fdivbug ? "yes" : "no", 4484 #else 4485 "no", 4486 #endif /* __i386 */ 4487 fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no", 4488 maxeax); 4489 4490 for (bits = std_edx, fp = intc_edx, i = 0; 4491 i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++) 4492 if ((bits & (1 << i)) != 0 && *fp) 4493 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4494 4495 /* 4496 * name additional features where appropriate 4497 */ 4498 switch (x86_vendor) { 4499 case X86_VENDOR_Intel: 4500 for (bits = ext_edx, fp = intc_x_edx, i = 0; 4501 i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]); 4502 fp++, i++) 4503 if ((bits & (1 << i)) != 0 && *fp) 4504 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4505 break; 4506 4507 case X86_VENDOR_AMD: 4508 for (bits = ext_edx, fp = amd_x_edx, i = 0; 4509 i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]); 4510 fp++, i++) 4511 if ((bits & (1 << i)) != 0 && *fp) 4512 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4513 4514 for (bits = ext_ecx, fp = amd_x_ecx, i = 0; 4515 i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]); 4516 fp++, i++) 4517 if ((bits & (1 << i)) != 0 && *fp) 4518 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4519 break; 4520 4521 case X86_VENDOR_TM: 4522 for (bits = ext_edx, fp = tm_x_edx, i = 0; 4523 i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]); 4524 fp++, i++) 4525 if ((bits & (1 << i)) != 0 && *fp) 4526 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4527 break; 4528 default: 4529 break; 4530 } 4531 4532 for (bits = std_ecx, fp = intc_ecx, i = 0; 4533 i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++) 4534 if ((bits & (1 << i)) != 0 && *fp) 4535 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4536 4537 lxpr_uiobuf_printf(uiobuf, "\n\n"); 4538 4539 if (pools_enabled) 4540 cp = cp->cpu_next_part; 4541 else 4542 cp = cp->cpu_next; 4543 } while (cp != cpstart); 4544 4545 mutex_exit(&cpu_lock); 4546 } 4547 4548 /* ARGSUSED */ 4549 static void 4550 lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4551 { 4552 ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD); 4553 lxpr_uiobuf_seterr(uiobuf, EFAULT); 4554 } 4555 4556 /* 4557 * Report a list of file systems loaded in the kernel. We only report the ones 4558 * which we support and which may be checked by various components to see if 4559 * they are loaded. 4560 */ 4561 static void 4562 lxpr_read_filesystems(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4563 { 4564 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "autofs"); 4565 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "cgroup"); 4566 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "nfs"); 4567 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "proc"); 4568 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "sysfs"); 4569 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "tmpfs"); 4570 } 4571 4572 /* 4573 * lxpr_getattr(): Vnode operation for VOP_GETATTR() 4574 */ 4575 static int 4576 lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 4577 caller_context_t *ct) 4578 { 4579 register lxpr_node_t *lxpnp = VTOLXP(vp); 4580 lxpr_nodetype_t type = lxpnp->lxpr_type; 4581 extern uint_t nproc; 4582 int error; 4583 4584 /* 4585 * Return attributes of underlying vnode if ATTR_REAL 4586 * 4587 * but keep fd files with the symlink permissions 4588 */ 4589 if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) { 4590 vnode_t *rvp = lxpnp->lxpr_realvp; 4591 4592 /* 4593 * withold attribute information to owner or root 4594 */ 4595 if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) { 4596 return (error); 4597 } 4598 4599 /* 4600 * now its attributes 4601 */ 4602 if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) { 4603 return (error); 4604 } 4605 4606 /* 4607 * if it's a file in lx /proc/pid/fd/xx then set its 4608 * mode and keep it looking like a symlink, fifo or socket 4609 */ 4610 if (type == LXPR_PID_FD_FD) { 4611 vap->va_mode = lxpnp->lxpr_mode; 4612 vap->va_type = lxpnp->lxpr_realvp->v_type; 4613 vap->va_size = 0; 4614 vap->va_nlink = 1; 4615 } 4616 return (0); 4617 } 4618 4619 /* Default attributes, that may be overridden below */ 4620 bzero(vap, sizeof (*vap)); 4621 vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time; 4622 vap->va_nlink = 1; 4623 vap->va_type = vp->v_type; 4624 vap->va_mode = lxpnp->lxpr_mode; 4625 vap->va_fsid = vp->v_vfsp->vfs_dev; 4626 vap->va_blksize = DEV_BSIZE; 4627 vap->va_uid = lxpnp->lxpr_uid; 4628 vap->va_gid = lxpnp->lxpr_gid; 4629 vap->va_nodeid = lxpnp->lxpr_ino; 4630 4631 switch (type) { 4632 case LXPR_PROCDIR: 4633 vap->va_nlink = nproc + 2 + PROCDIRFILES; 4634 vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE; 4635 break; 4636 case LXPR_PIDDIR: 4637 vap->va_nlink = PIDDIRFILES; 4638 vap->va_size = PIDDIRFILES * LXPR_SDSIZE; 4639 break; 4640 case LXPR_PID_TASK_IDDIR: 4641 vap->va_nlink = TIDDIRFILES; 4642 vap->va_size = TIDDIRFILES * LXPR_SDSIZE; 4643 break; 4644 case LXPR_SELF: 4645 vap->va_uid = crgetruid(curproc->p_cred); 4646 vap->va_gid = crgetrgid(curproc->p_cred); 4647 break; 4648 case LXPR_PID_FD_FD: 4649 case LXPR_PID_TID_FD_FD: 4650 /* 4651 * Restore VLNK type for lstat-type activity. 4652 * See lxpr_readlink for more details. 4653 */ 4654 if ((flags & FOLLOW) == 0) 4655 vap->va_type = VLNK; 4656 default: 4657 break; 4658 } 4659 4660 vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size); 4661 return (0); 4662 } 4663 4664 /* 4665 * lxpr_access(): Vnode operation for VOP_ACCESS() 4666 */ 4667 static int 4668 lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct) 4669 { 4670 lxpr_node_t *lxpnp = VTOLXP(vp); 4671 lxpr_nodetype_t type = lxpnp->lxpr_type; 4672 int shift = 0; 4673 proc_t *tp; 4674 4675 /* lx /proc is a read only file system */ 4676 if (mode & VWRITE) { 4677 switch (type) { 4678 case LXPR_PID_OOM_SCR_ADJ: 4679 case LXPR_PID_TID_OOM_SCR_ADJ: 4680 case LXPR_SYS_KERNEL_COREPATT: 4681 case LXPR_SYS_NET_CORE_SOMAXCON: 4682 case LXPR_SYS_VM_OVERCOMMIT_MEM: 4683 case LXPR_SYS_VM_SWAPPINESS: 4684 case LXPR_PID_FD_FD: 4685 case LXPR_PID_TID_FD_FD: 4686 break; 4687 default: 4688 return (EROFS); 4689 } 4690 } 4691 4692 /* 4693 * If this is a restricted file, check access permissions. 4694 */ 4695 switch (type) { 4696 case LXPR_PIDDIR: 4697 return (0); 4698 case LXPR_PID_CURDIR: 4699 case LXPR_PID_ENV: 4700 case LXPR_PID_EXE: 4701 case LXPR_PID_LIMITS: 4702 case LXPR_PID_MAPS: 4703 case LXPR_PID_MEM: 4704 case LXPR_PID_ROOTDIR: 4705 case LXPR_PID_FDDIR: 4706 case LXPR_PID_FD_FD: 4707 case LXPR_PID_TID_FDDIR: 4708 case LXPR_PID_TID_FD_FD: 4709 if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL) 4710 return (ENOENT); 4711 if (tp != curproc && secpolicy_proc_access(cr) != 0 && 4712 priv_proc_cred_perm(cr, tp, NULL, mode) != 0) { 4713 lxpr_unlock(tp); 4714 return (EACCES); 4715 } 4716 lxpr_unlock(tp); 4717 default: 4718 break; 4719 } 4720 4721 if (lxpnp->lxpr_realvp != NULL) { 4722 /* 4723 * For these we use the underlying vnode's accessibility. 4724 */ 4725 return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct)); 4726 } 4727 4728 /* If user is root allow access regardless of permission bits */ 4729 if (secpolicy_proc_access(cr) == 0) 4730 return (0); 4731 4732 /* 4733 * Access check is based on only one of owner, group, public. If not 4734 * owner, then check group. If not a member of the group, then check 4735 * public access. 4736 */ 4737 if (crgetuid(cr) != lxpnp->lxpr_uid) { 4738 shift += 3; 4739 if (!groupmember((uid_t)lxpnp->lxpr_gid, cr)) 4740 shift += 3; 4741 } 4742 4743 mode &= ~(lxpnp->lxpr_mode << shift); 4744 4745 if (mode == 0) 4746 return (0); 4747 4748 return (EACCES); 4749 } 4750 4751 /* ARGSUSED */ 4752 static vnode_t * 4753 lxpr_lookup_not_a_dir(vnode_t *dp, char *comp) 4754 { 4755 return (NULL); 4756 } 4757 4758 /* 4759 * lxpr_lookup(): Vnode operation for VOP_LOOKUP() 4760 */ 4761 /* ARGSUSED */ 4762 static int 4763 lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp, 4764 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 4765 int *direntflags, pathname_t *realpnp) 4766 { 4767 lxpr_node_t *lxpnp = VTOLXP(dp); 4768 lxpr_nodetype_t type = lxpnp->lxpr_type; 4769 int error; 4770 4771 ASSERT(dp->v_type == VDIR); 4772 ASSERT(type < LXPR_NFILES); 4773 4774 /* 4775 * we should never get here because the lookup 4776 * is done on the realvp for these nodes 4777 */ 4778 ASSERT(type != LXPR_PID_FD_FD && 4779 type != LXPR_PID_CURDIR && 4780 type != LXPR_PID_ROOTDIR); 4781 4782 /* 4783 * restrict lookup permission to owner or root 4784 */ 4785 if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) { 4786 return (error); 4787 } 4788 4789 /* 4790 * Just return the parent vnode if that's where we are trying to go. 4791 */ 4792 if (strcmp(comp, "..") == 0) { 4793 VN_HOLD(lxpnp->lxpr_parent); 4794 *vpp = lxpnp->lxpr_parent; 4795 return (0); 4796 } 4797 4798 /* 4799 * Special handling for directory searches. Note: null component name 4800 * denotes that the current directory is being searched. 4801 */ 4802 if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) { 4803 VN_HOLD(dp); 4804 *vpp = dp; 4805 return (0); 4806 } 4807 4808 *vpp = (lxpr_lookup_function[type](dp, comp)); 4809 return ((*vpp == NULL) ? ENOENT : 0); 4810 } 4811 4812 /* 4813 * Do a sequential search on the given directory table 4814 */ 4815 static vnode_t * 4816 lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p, 4817 lxpr_dirent_t *dirtab, int dirtablen) 4818 { 4819 lxpr_node_t *lxpnp; 4820 int count; 4821 4822 for (count = 0; count < dirtablen; count++) { 4823 if (strcmp(dirtab[count].d_name, comp) == 0) { 4824 lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0); 4825 dp = LXPTOV(lxpnp); 4826 ASSERT(dp != NULL); 4827 return (dp); 4828 } 4829 } 4830 return (NULL); 4831 } 4832 4833 static vnode_t * 4834 lxpr_lookup_piddir(vnode_t *dp, char *comp) 4835 { 4836 proc_t *p; 4837 4838 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR); 4839 4840 p = lxpr_lock(VTOLXP(dp)->lxpr_pid); 4841 if (p == NULL) 4842 return (NULL); 4843 4844 dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES); 4845 4846 lxpr_unlock(p); 4847 4848 return (dp); 4849 } 4850 4851 /* 4852 * Lookup one of the process's task ID's. 4853 */ 4854 static vnode_t * 4855 lxpr_lookup_taskdir(vnode_t *dp, char *comp) 4856 { 4857 lxpr_node_t *dlxpnp = VTOLXP(dp); 4858 lxpr_node_t *lxpnp; 4859 proc_t *p; 4860 pid_t real_pid; 4861 uint_t tid; 4862 int c; 4863 kthread_t *t; 4864 4865 ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASKDIR); 4866 4867 /* 4868 * convert the string rendition of the filename to a thread ID 4869 */ 4870 tid = 0; 4871 while ((c = *comp++) != '\0') { 4872 int otid; 4873 if (c < '0' || c > '9') 4874 return (NULL); 4875 4876 otid = tid; 4877 tid = 10 * tid + c - '0'; 4878 /* integer overflow */ 4879 if (tid / 10 != otid) 4880 return (NULL); 4881 } 4882 4883 /* 4884 * get the proc to work with and lock it 4885 */ 4886 real_pid = get_real_pid(dlxpnp->lxpr_pid); 4887 p = lxpr_lock(real_pid); 4888 if ((p == NULL)) 4889 return (NULL); 4890 4891 /* 4892 * If the process is a zombie or system process 4893 * it can't have any threads. 4894 */ 4895 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) { 4896 lxpr_unlock(p); 4897 return (NULL); 4898 } 4899 4900 if (p->p_brand == &lx_brand) { 4901 t = lxpr_get_thread(p, tid); 4902 } else { 4903 /* 4904 * Only the main thread is visible for non-branded processes. 4905 */ 4906 t = p->p_tlist; 4907 if (tid != p->p_pid || t == NULL) { 4908 t = NULL; 4909 } else { 4910 thread_lock(t); 4911 } 4912 } 4913 if (t == NULL) { 4914 lxpr_unlock(p); 4915 return (NULL); 4916 } 4917 thread_unlock(t); 4918 4919 /* 4920 * Allocate and fill in a new lx /proc taskid node. 4921 * Instead of the last arg being a fd, it is a tid. 4922 */ 4923 lxpnp = lxpr_getnode(dp, LXPR_PID_TASK_IDDIR, p, tid); 4924 dp = LXPTOV(lxpnp); 4925 ASSERT(dp != NULL); 4926 lxpr_unlock(p); 4927 return (dp); 4928 } 4929 4930 /* 4931 * Lookup one of the process's task ID's. 4932 */ 4933 static vnode_t * 4934 lxpr_lookup_task_tid_dir(vnode_t *dp, char *comp) 4935 { 4936 lxpr_node_t *dlxpnp = VTOLXP(dp); 4937 lxpr_node_t *lxpnp; 4938 proc_t *p; 4939 pid_t real_pid; 4940 kthread_t *t; 4941 int i; 4942 4943 ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASK_IDDIR); 4944 4945 /* 4946 * get the proc to work with and lock it 4947 */ 4948 real_pid = get_real_pid(dlxpnp->lxpr_pid); 4949 p = lxpr_lock(real_pid); 4950 if ((p == NULL)) 4951 return (NULL); 4952 4953 /* 4954 * If the process is a zombie or system process 4955 * it can't have any threads. 4956 */ 4957 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) { 4958 lxpr_unlock(p); 4959 return (NULL); 4960 } 4961 4962 /* need to confirm tid is still there */ 4963 t = lxpr_get_thread(p, dlxpnp->lxpr_desc); 4964 if (t == NULL) { 4965 lxpr_unlock(p); 4966 return (NULL); 4967 } 4968 thread_unlock(t); 4969 4970 /* 4971 * allocate and fill in the new lx /proc taskid dir node 4972 */ 4973 for (i = 0; i < TIDDIRFILES; i++) { 4974 if (strcmp(tiddir[i].d_name, comp) == 0) { 4975 lxpnp = lxpr_getnode(dp, tiddir[i].d_type, p, 4976 dlxpnp->lxpr_desc); 4977 dp = LXPTOV(lxpnp); 4978 ASSERT(dp != NULL); 4979 lxpr_unlock(p); 4980 return (dp); 4981 } 4982 } 4983 4984 lxpr_unlock(p); 4985 return (NULL); 4986 } 4987 4988 /* 4989 * Lookup one of the process's open files. 4990 */ 4991 static vnode_t * 4992 lxpr_lookup_fddir(vnode_t *dp, char *comp) 4993 { 4994 lxpr_node_t *dlxpnp = VTOLXP(dp); 4995 4996 ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR || 4997 dlxpnp->lxpr_type == LXPR_PID_TID_FDDIR); 4998 4999 return (lxpr_lookup_fdnode(dp, comp)); 5000 } 5001 5002 static vnode_t * 5003 lxpr_lookup_netdir(vnode_t *dp, char *comp) 5004 { 5005 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR); 5006 5007 dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES); 5008 5009 return (dp); 5010 } 5011 5012 static vnode_t * 5013 lxpr_lookup_procdir(vnode_t *dp, char *comp) 5014 { 5015 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR); 5016 5017 /* 5018 * We know all the names of files & dirs in our file system structure 5019 * except those that are pid names. These change as pids are created/ 5020 * deleted etc., so we just look for a number as the first char to see 5021 * if we are we doing pid lookups. 5022 * 5023 * Don't need to check for "self" as it is implemented as a symlink 5024 */ 5025 if (*comp >= '0' && *comp <= '9') { 5026 pid_t pid = 0; 5027 lxpr_node_t *lxpnp = NULL; 5028 proc_t *p; 5029 int c; 5030 5031 while ((c = *comp++) != '\0') 5032 pid = 10 * pid + c - '0'; 5033 5034 /* 5035 * Can't continue if the process is still loading or it doesn't 5036 * really exist yet (or maybe it just died!) 5037 */ 5038 p = lxpr_lock(pid); 5039 if (p == NULL) 5040 return (NULL); 5041 5042 if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) { 5043 lxpr_unlock(p); 5044 return (NULL); 5045 } 5046 5047 /* 5048 * allocate and fill in a new lx /proc node 5049 */ 5050 lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0); 5051 5052 lxpr_unlock(p); 5053 5054 dp = LXPTOV(lxpnp); 5055 ASSERT(dp != NULL); 5056 5057 return (dp); 5058 } 5059 5060 /* Lookup fixed names */ 5061 return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES)); 5062 } 5063 5064 static vnode_t * 5065 lxpr_lookup_sysdir(vnode_t *dp, char *comp) 5066 { 5067 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYSDIR); 5068 return (lxpr_lookup_common(dp, comp, NULL, sysdir, SYSDIRFILES)); 5069 } 5070 5071 static vnode_t * 5072 lxpr_lookup_sys_kerneldir(vnode_t *dp, char *comp) 5073 { 5074 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNELDIR); 5075 return (lxpr_lookup_common(dp, comp, NULL, sys_kerneldir, 5076 SYS_KERNELDIRFILES)); 5077 } 5078 5079 static vnode_t * 5080 lxpr_lookup_sys_kdir_randdir(vnode_t *dp, char *comp) 5081 { 5082 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNEL_RANDDIR); 5083 return (lxpr_lookup_common(dp, comp, NULL, sys_randdir, 5084 SYS_RANDDIRFILES)); 5085 } 5086 5087 static vnode_t * 5088 lxpr_lookup_sys_netdir(vnode_t *dp, char *comp) 5089 { 5090 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NETDIR); 5091 return (lxpr_lookup_common(dp, comp, NULL, sys_netdir, 5092 SYS_NETDIRFILES)); 5093 } 5094 5095 static vnode_t * 5096 lxpr_lookup_sys_net_coredir(vnode_t *dp, char *comp) 5097 { 5098 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NET_COREDIR); 5099 return (lxpr_lookup_common(dp, comp, NULL, sys_net_coredir, 5100 SYS_NET_COREDIRFILES)); 5101 } 5102 5103 static vnode_t * 5104 lxpr_lookup_sys_vmdir(vnode_t *dp, char *comp) 5105 { 5106 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_VMDIR); 5107 return (lxpr_lookup_common(dp, comp, NULL, sys_vmdir, 5108 SYS_VMDIRFILES)); 5109 } 5110 5111 static vnode_t * 5112 lxpr_lookup_sys_fsdir(vnode_t *dp, char *comp) 5113 { 5114 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FSDIR); 5115 return (lxpr_lookup_common(dp, comp, NULL, sys_fsdir, 5116 SYS_FSDIRFILES)); 5117 } 5118 5119 static vnode_t * 5120 lxpr_lookup_sys_fs_inotifydir(vnode_t *dp, char *comp) 5121 { 5122 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FS_INOTIFYDIR); 5123 return (lxpr_lookup_common(dp, comp, NULL, sys_fs_inotifydir, 5124 SYS_FS_INOTIFYDIRFILES)); 5125 } 5126 5127 /* 5128 * lxpr_readdir(): Vnode operation for VOP_READDIR() 5129 */ 5130 /* ARGSUSED */ 5131 static int 5132 lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp, 5133 caller_context_t *ct, int flags) 5134 { 5135 lxpr_node_t *lxpnp = VTOLXP(dp); 5136 lxpr_nodetype_t type = lxpnp->lxpr_type; 5137 ssize_t uresid; 5138 off_t uoffset; 5139 int error; 5140 5141 ASSERT(dp->v_type == VDIR); 5142 ASSERT(type < LXPR_NFILES); 5143 5144 /* 5145 * we should never get here because the readdir 5146 * is done on the realvp for these nodes 5147 */ 5148 ASSERT(type != LXPR_PID_FD_FD && 5149 type != LXPR_PID_CURDIR && 5150 type != LXPR_PID_ROOTDIR); 5151 5152 /* 5153 * restrict readdir permission to owner or root 5154 */ 5155 if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0) 5156 return (error); 5157 5158 uoffset = uiop->uio_offset; 5159 uresid = uiop->uio_resid; 5160 5161 /* can't do negative reads */ 5162 if (uoffset < 0 || uresid <= 0) 5163 return (EINVAL); 5164 5165 /* can't read directory entries that don't exist! */ 5166 if (uoffset % LXPR_SDSIZE) 5167 return (ENOENT); 5168 5169 return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp)); 5170 } 5171 5172 /* ARGSUSED */ 5173 static int 5174 lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5175 { 5176 return (ENOTDIR); 5177 } 5178 5179 /* 5180 * This has the common logic for returning directory entries 5181 */ 5182 static int 5183 lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp, 5184 lxpr_dirent_t *dirtab, int dirtablen) 5185 { 5186 /* bp holds one dirent64 structure */ 5187 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; 5188 dirent64_t *dirent = (dirent64_t *)bp; 5189 ssize_t oresid; /* save a copy for testing later */ 5190 ssize_t uresid; 5191 5192 oresid = uiop->uio_resid; 5193 5194 /* clear out the dirent buffer */ 5195 bzero(bp, sizeof (bp)); 5196 5197 /* 5198 * Satisfy user request 5199 */ 5200 while ((uresid = uiop->uio_resid) > 0) { 5201 int dirindex; 5202 off_t uoffset; 5203 int reclen; 5204 int error; 5205 5206 uoffset = uiop->uio_offset; 5207 dirindex = (uoffset / LXPR_SDSIZE) - 2; 5208 5209 if (uoffset == 0) { 5210 5211 dirent->d_ino = lxpnp->lxpr_ino; 5212 dirent->d_name[0] = '.'; 5213 dirent->d_name[1] = '\0'; 5214 reclen = DIRENT64_RECLEN(1); 5215 5216 } else if (uoffset == LXPR_SDSIZE) { 5217 5218 dirent->d_ino = lxpr_parentinode(lxpnp); 5219 dirent->d_name[0] = '.'; 5220 dirent->d_name[1] = '.'; 5221 dirent->d_name[2] = '\0'; 5222 reclen = DIRENT64_RECLEN(2); 5223 5224 } else if (dirindex >= 0 && dirindex < dirtablen) { 5225 int slen = strlen(dirtab[dirindex].d_name); 5226 5227 dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type, 5228 lxpnp->lxpr_pid, 0); 5229 5230 VERIFY(slen < LXPNSIZ); 5231 (void) strcpy(dirent->d_name, dirtab[dirindex].d_name); 5232 reclen = DIRENT64_RECLEN(slen); 5233 5234 } else { 5235 /* Run out of table entries */ 5236 if (eofp) { 5237 *eofp = 1; 5238 } 5239 return (0); 5240 } 5241 5242 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); 5243 dirent->d_reclen = (ushort_t)reclen; 5244 5245 /* 5246 * if the size of the data to transfer is greater 5247 * that that requested then we can't do it this transfer. 5248 */ 5249 if (reclen > uresid) { 5250 /* 5251 * Error if no entries have been returned yet. 5252 */ 5253 if (uresid == oresid) { 5254 return (EINVAL); 5255 } 5256 break; 5257 } 5258 5259 /* 5260 * uiomove() updates both uiop->uio_resid and uiop->uio_offset 5261 * by the same amount. But we want uiop->uio_offset to change 5262 * in increments of LXPR_SDSIZE, which is different from the 5263 * number of bytes being returned to the user. So we set 5264 * uiop->uio_offset separately, ignoring what uiomove() does. 5265 */ 5266 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, 5267 uiop)) != 0) 5268 return (error); 5269 5270 uiop->uio_offset = uoffset + LXPR_SDSIZE; 5271 } 5272 5273 /* Have run out of space, but could have just done last table entry */ 5274 if (eofp) { 5275 *eofp = 5276 (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0; 5277 } 5278 return (0); 5279 } 5280 5281 5282 static int 5283 lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5284 { 5285 /* bp holds one dirent64 structure */ 5286 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; 5287 dirent64_t *dirent = (dirent64_t *)bp; 5288 ssize_t oresid; /* save a copy for testing later */ 5289 ssize_t uresid; 5290 off_t uoffset; 5291 zoneid_t zoneid; 5292 pid_t pid; 5293 int error; 5294 int ceof; 5295 5296 ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR); 5297 5298 oresid = uiop->uio_resid; 5299 zoneid = LXPTOZ(lxpnp)->zone_id; 5300 5301 /* 5302 * We return directory entries in the order: "." and ".." then the 5303 * unique lxproc files, then the directories corresponding to the 5304 * running processes. We have defined this as the ordering because 5305 * it allows us to more easily keep track of where we are betwen calls 5306 * to getdents(). If the number of processes changes between calls 5307 * then we can't lose track of where we are in the lxproc files. 5308 */ 5309 5310 /* Do the fixed entries */ 5311 error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir, 5312 PROCDIRFILES); 5313 5314 /* Finished if we got an error or if we couldn't do all the table */ 5315 if (error != 0 || ceof == 0) 5316 return (error); 5317 5318 /* clear out the dirent buffer */ 5319 bzero(bp, sizeof (bp)); 5320 5321 /* Do the process entries */ 5322 while ((uresid = uiop->uio_resid) > 0) { 5323 proc_t *p; 5324 int len; 5325 int reclen; 5326 int i; 5327 5328 uoffset = uiop->uio_offset; 5329 5330 /* 5331 * Stop when entire proc table has been examined. 5332 */ 5333 i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES; 5334 if (i < 0 || i >= v.v_proc) { 5335 /* Run out of table entries */ 5336 if (eofp) { 5337 *eofp = 1; 5338 } 5339 return (0); 5340 } 5341 mutex_enter(&pidlock); 5342 5343 /* 5344 * Skip indices for which there is no pid_entry, PIDs for 5345 * which there is no corresponding process, a PID of 0, 5346 * and anything the security policy doesn't allow 5347 * us to look at. 5348 */ 5349 if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL || 5350 p->p_pid == 0 || 5351 secpolicy_basic_procinfo(CRED(), p, curproc) != 0) { 5352 mutex_exit(&pidlock); 5353 goto next; 5354 } 5355 mutex_exit(&pidlock); 5356 5357 /* 5358 * Convert pid to the Linux default of 1 if we're the zone's 5359 * init process, or 0 if zsched, otherwise use the value from 5360 * the proc structure 5361 */ 5362 if (p->p_pid == curproc->p_zone->zone_proc_initpid) { 5363 pid = 1; 5364 } else if (p->p_pid == curproc->p_zone->zone_zsched->p_pid) { 5365 pid = 0; 5366 } else { 5367 pid = p->p_pid; 5368 } 5369 5370 /* 5371 * If this /proc was mounted in the global zone, view 5372 * all procs; otherwise, only view zone member procs. 5373 */ 5374 if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) { 5375 goto next; 5376 } 5377 5378 ASSERT(p->p_stat != 0); 5379 5380 dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0); 5381 len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid); 5382 ASSERT(len < LXPNSIZ); 5383 reclen = DIRENT64_RECLEN(len); 5384 5385 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); 5386 dirent->d_reclen = (ushort_t)reclen; 5387 5388 /* 5389 * if the size of the data to transfer is greater 5390 * that that requested then we can't do it this transfer. 5391 */ 5392 if (reclen > uresid) { 5393 /* 5394 * Error if no entries have been returned yet. 5395 */ 5396 if (uresid == oresid) 5397 return (EINVAL); 5398 break; 5399 } 5400 5401 /* 5402 * uiomove() updates both uiop->uio_resid and uiop->uio_offset 5403 * by the same amount. But we want uiop->uio_offset to change 5404 * in increments of LXPR_SDSIZE, which is different from the 5405 * number of bytes being returned to the user. So we set 5406 * uiop->uio_offset separately, in the increment of this for 5407 * the loop, ignoring what uiomove() does. 5408 */ 5409 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, 5410 uiop)) != 0) 5411 return (error); 5412 next: 5413 uiop->uio_offset = uoffset + LXPR_SDSIZE; 5414 } 5415 5416 if (eofp != NULL) { 5417 *eofp = (uiop->uio_offset >= 5418 ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0; 5419 } 5420 5421 return (0); 5422 } 5423 5424 static int 5425 lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5426 { 5427 proc_t *p; 5428 pid_t find_pid; 5429 5430 ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR); 5431 5432 /* can't read its contents if it died */ 5433 mutex_enter(&pidlock); 5434 5435 if (lxpnp->lxpr_pid == 1) { 5436 find_pid = curproc->p_zone->zone_proc_initpid; 5437 } else if (lxpnp->lxpr_pid == 0) { 5438 find_pid = curproc->p_zone->zone_zsched->p_pid; 5439 } else { 5440 find_pid = lxpnp->lxpr_pid; 5441 } 5442 p = prfind(find_pid); 5443 5444 if (p == NULL || p->p_stat == SIDL) { 5445 mutex_exit(&pidlock); 5446 return (ENOENT); 5447 } 5448 mutex_exit(&pidlock); 5449 5450 return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES)); 5451 } 5452 5453 static int 5454 lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5455 { 5456 ASSERT(lxpnp->lxpr_type == LXPR_NETDIR); 5457 return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES)); 5458 } 5459 5460 static int 5461 lxpr_readdir_taskdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5462 { 5463 /* bp holds one dirent64 structure */ 5464 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; 5465 dirent64_t *dirent = (dirent64_t *)bp; 5466 ssize_t oresid; /* save a copy for testing later */ 5467 ssize_t uresid; 5468 off_t uoffset; 5469 int error; 5470 int ceof; 5471 proc_t *p; 5472 int tiddirsize = -1; 5473 int tasknum; 5474 pid_t real_pid; 5475 kthread_t *t; 5476 boolean_t branded = B_FALSE; 5477 5478 ASSERT(lxpnp->lxpr_type == LXPR_PID_TASKDIR); 5479 5480 oresid = uiop->uio_resid; 5481 5482 real_pid = get_real_pid(lxpnp->lxpr_pid); 5483 p = lxpr_lock(real_pid); 5484 5485 /* can't read its contents if it died */ 5486 if (p == NULL) { 5487 return (ENOENT); 5488 } 5489 if (p->p_stat == SIDL) { 5490 lxpr_unlock(p); 5491 return (ENOENT); 5492 } 5493 5494 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) 5495 tiddirsize = 0; 5496 5497 branded = (p->p_brand == &lx_brand); 5498 /* 5499 * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from 5500 * going away while we iterate over its threads. 5501 */ 5502 mutex_exit(&p->p_lock); 5503 5504 if (tiddirsize == -1) 5505 tiddirsize = p->p_lwpcnt; 5506 5507 /* Do the fixed entries (in this case just "." & "..") */ 5508 error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0); 5509 5510 /* Finished if we got an error or if we couldn't do all the table */ 5511 if (error != 0 || ceof == 0) 5512 goto out; 5513 5514 if ((t = p->p_tlist) == NULL) { 5515 if (eofp != NULL) 5516 *eofp = 1; 5517 goto out; 5518 } 5519 5520 /* clear out the dirent buffer */ 5521 bzero(bp, sizeof (bp)); 5522 5523 /* 5524 * Loop until user's request is satisfied or until all thread's have 5525 * been returned. 5526 */ 5527 for (tasknum = 0; (uresid = uiop->uio_resid) > 0; tasknum++) { 5528 int i; 5529 int reclen; 5530 int len; 5531 uint_t emul_tid; 5532 lx_lwp_data_t *lwpd; 5533 5534 uoffset = uiop->uio_offset; 5535 5536 /* 5537 * Stop at the end of the thread list 5538 */ 5539 i = (uoffset / LXPR_SDSIZE) - 2; 5540 if (i < 0 || i >= tiddirsize) { 5541 if (eofp) { 5542 *eofp = 1; 5543 } 5544 goto out; 5545 } 5546 5547 if (i != tasknum) 5548 goto next; 5549 5550 if (!branded) { 5551 /* 5552 * Emulating the goofy linux task model is impossible 5553 * to do for native processes. We can compromise by 5554 * presenting only the main thread to the consumer. 5555 */ 5556 emul_tid = p->p_pid; 5557 } else { 5558 if ((lwpd = ttolxlwp(t)) == NULL) { 5559 goto next; 5560 } 5561 emul_tid = lwpd->br_pid; 5562 /* 5563 * Convert pid to Linux default of 1 if we're the 5564 * zone's init. 5565 */ 5566 if (emul_tid == curproc->p_zone->zone_proc_initpid) 5567 emul_tid = 1; 5568 } 5569 5570 dirent->d_ino = lxpr_inode(LXPR_PID_TASK_IDDIR, lxpnp->lxpr_pid, 5571 emul_tid); 5572 len = snprintf(dirent->d_name, LXPNSIZ, "%d", emul_tid); 5573 ASSERT(len < LXPNSIZ); 5574 reclen = DIRENT64_RECLEN(len); 5575 5576 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); 5577 dirent->d_reclen = (ushort_t)reclen; 5578 5579 if (reclen > uresid) { 5580 /* 5581 * Error if no entries have been returned yet. 5582 */ 5583 if (uresid == oresid) 5584 error = EINVAL; 5585 goto out; 5586 } 5587 5588 /* 5589 * uiomove() updates both uiop->uio_resid and uiop->uio_offset 5590 * by the same amount. But we want uiop->uio_offset to change 5591 * in increments of LXPR_SDSIZE, which is different from the 5592 * number of bytes being returned to the user. So we set 5593 * uiop->uio_offset separately, in the increment of this for 5594 * the loop, ignoring what uiomove() does. 5595 */ 5596 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, 5597 uiop)) != 0) 5598 goto out; 5599 5600 next: 5601 uiop->uio_offset = uoffset + LXPR_SDSIZE; 5602 5603 if ((t = t->t_forw) == p->p_tlist || !branded) { 5604 if (eofp != NULL) 5605 *eofp = 1; 5606 goto out; 5607 } 5608 } 5609 5610 if (eofp != NULL) 5611 *eofp = 0; 5612 5613 out: 5614 mutex_enter(&p->p_lock); 5615 lxpr_unlock(p); 5616 return (error); 5617 } 5618 5619 static int 5620 lxpr_readdir_task_tid_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5621 { 5622 proc_t *p; 5623 pid_t real_pid; 5624 kthread_t *t; 5625 5626 ASSERT(lxpnp->lxpr_type == LXPR_PID_TASK_IDDIR); 5627 5628 mutex_enter(&pidlock); 5629 5630 real_pid = get_real_pid(lxpnp->lxpr_pid); 5631 p = prfind(real_pid); 5632 5633 /* can't read its contents if it died */ 5634 if (p == NULL || p->p_stat == SIDL) { 5635 mutex_exit(&pidlock); 5636 return (ENOENT); 5637 } 5638 5639 mutex_exit(&pidlock); 5640 5641 /* need to confirm tid is still there */ 5642 t = lxpr_get_thread(p, lxpnp->lxpr_desc); 5643 if (t == NULL) { 5644 /* we can't find this specific thread */ 5645 return (NULL); 5646 } 5647 thread_unlock(t); 5648 5649 return (lxpr_readdir_common(lxpnp, uiop, eofp, tiddir, TIDDIRFILES)); 5650 } 5651 5652 static int 5653 lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5654 { 5655 /* bp holds one dirent64 structure */ 5656 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; 5657 dirent64_t *dirent = (dirent64_t *)bp; 5658 ssize_t oresid; /* save a copy for testing later */ 5659 ssize_t uresid; 5660 off_t uoffset; 5661 int error; 5662 int ceof; 5663 proc_t *p; 5664 int fddirsize = -1; 5665 uf_info_t *fip; 5666 5667 ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR || 5668 lxpnp->lxpr_type == LXPR_PID_TID_FDDIR); 5669 5670 oresid = uiop->uio_resid; 5671 5672 /* can't read its contents if it died */ 5673 p = lxpr_lock(lxpnp->lxpr_pid); 5674 if (p == NULL) 5675 return (ENOENT); 5676 5677 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) 5678 fddirsize = 0; 5679 5680 /* 5681 * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from 5682 * going away while we iterate over its fi_list. 5683 */ 5684 mutex_exit(&p->p_lock); 5685 5686 /* Get open file info */ 5687 fip = (&(p)->p_user.u_finfo); 5688 mutex_enter(&fip->fi_lock); 5689 5690 if (fddirsize == -1) 5691 fddirsize = fip->fi_nfiles; 5692 5693 /* Do the fixed entries (in this case just "." & "..") */ 5694 error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0); 5695 5696 /* Finished if we got an error or if we couldn't do all the table */ 5697 if (error != 0 || ceof == 0) 5698 goto out; 5699 5700 /* clear out the dirent buffer */ 5701 bzero(bp, sizeof (bp)); 5702 5703 /* 5704 * Loop until user's request is satisfied or until 5705 * all file descriptors have been examined. 5706 */ 5707 for (; (uresid = uiop->uio_resid) > 0; 5708 uiop->uio_offset = uoffset + LXPR_SDSIZE) { 5709 int reclen; 5710 int fd; 5711 int len; 5712 5713 uoffset = uiop->uio_offset; 5714 5715 /* 5716 * Stop at the end of the fd list 5717 */ 5718 fd = (uoffset / LXPR_SDSIZE) - 2; 5719 if (fd < 0 || fd >= fddirsize) { 5720 if (eofp) { 5721 *eofp = 1; 5722 } 5723 goto out; 5724 } 5725 5726 if (fip->fi_list[fd].uf_file == NULL) 5727 continue; 5728 5729 dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd); 5730 len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd); 5731 ASSERT(len < LXPNSIZ); 5732 reclen = DIRENT64_RECLEN(len); 5733 5734 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); 5735 dirent->d_reclen = (ushort_t)reclen; 5736 5737 if (reclen > uresid) { 5738 /* 5739 * Error if no entries have been returned yet. 5740 */ 5741 if (uresid == oresid) 5742 error = EINVAL; 5743 goto out; 5744 } 5745 5746 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, 5747 uiop)) != 0) 5748 goto out; 5749 } 5750 5751 if (eofp != NULL) { 5752 *eofp = 5753 (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0; 5754 } 5755 5756 out: 5757 mutex_exit(&fip->fi_lock); 5758 mutex_enter(&p->p_lock); 5759 lxpr_unlock(p); 5760 return (error); 5761 } 5762 5763 static int 5764 lxpr_readdir_sysdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5765 { 5766 ASSERT(lxpnp->lxpr_type == LXPR_SYSDIR); 5767 return (lxpr_readdir_common(lxpnp, uiop, eofp, sysdir, SYSDIRFILES)); 5768 } 5769 5770 static int 5771 lxpr_readdir_sys_fsdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5772 { 5773 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FSDIR); 5774 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fsdir, 5775 SYS_FSDIRFILES)); 5776 } 5777 5778 static int 5779 lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5780 { 5781 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFYDIR); 5782 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fs_inotifydir, 5783 SYS_FS_INOTIFYDIRFILES)); 5784 } 5785 5786 static int 5787 lxpr_readdir_sys_kerneldir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5788 { 5789 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNELDIR); 5790 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_kerneldir, 5791 SYS_KERNELDIRFILES)); 5792 } 5793 5794 static int 5795 lxpr_readdir_sys_kdir_randdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5796 { 5797 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RANDDIR); 5798 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_randdir, 5799 SYS_RANDDIRFILES)); 5800 } 5801 5802 static int 5803 lxpr_readdir_sys_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5804 { 5805 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NETDIR); 5806 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_netdir, 5807 SYS_NETDIRFILES)); 5808 } 5809 5810 static int 5811 lxpr_readdir_sys_net_coredir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5812 { 5813 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_COREDIR); 5814 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_net_coredir, 5815 SYS_NET_COREDIRFILES)); 5816 } 5817 5818 static int 5819 lxpr_readdir_sys_vmdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5820 { 5821 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VMDIR); 5822 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_vmdir, 5823 SYS_VMDIRFILES)); 5824 } 5825 5826 static int 5827 lxpr_write_sys_net_core_somaxc(lxpr_node_t *lxpnp, struct uio *uio, 5828 struct cred *cr, caller_context_t *ct) 5829 { 5830 int error; 5831 int res = 0; 5832 size_t olen; 5833 char val[16]; /* big enough for a uint numeric string */ 5834 netstack_t *ns; 5835 mod_prop_info_t *ptbl = NULL; 5836 mod_prop_info_t *pinfo = NULL; 5837 5838 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON); 5839 5840 if (uio->uio_loffset != 0) 5841 return (EINVAL); 5842 5843 if (uio->uio_resid == 0) 5844 return (0); 5845 5846 olen = uio->uio_resid; 5847 if (olen > sizeof (val) - 1) 5848 return (EINVAL); 5849 5850 bzero(val, sizeof (val)); 5851 error = uiomove(val, olen, UIO_WRITE, uio); 5852 if (error != 0) 5853 return (error); 5854 5855 if (val[olen - 1] == '\n') 5856 val[olen - 1] = '\0'; 5857 5858 if (val[0] == '\0') /* no input */ 5859 return (EINVAL); 5860 5861 ns = netstack_get_current(); 5862 if (ns == NULL) 5863 return (EINVAL); 5864 5865 ptbl = ns->netstack_tcp->tcps_propinfo_tbl; 5866 pinfo = mod_prop_lookup(ptbl, "_conn_req_max_q", MOD_PROTO_TCP); 5867 if (pinfo == NULL || pinfo->mpi_setf(ns, cr, pinfo, NULL, val, 0) != 0) 5868 res = EINVAL; 5869 5870 netstack_rele(ns); 5871 return (res); 5872 } 5873 5874 /* ARGSUSED */ 5875 static int 5876 lxpr_write_sys_kernel_corepatt(lxpr_node_t *lxpnp, struct uio *uio, 5877 struct cred *cr, caller_context_t *ct) 5878 { 5879 zone_t *zone = curproc->p_zone; 5880 struct core_globals *cg; 5881 refstr_t *rp, *nrp; 5882 corectl_path_t *ccp; 5883 char val[MAXPATHLEN]; 5884 char valtr[MAXPATHLEN]; 5885 size_t olen; 5886 int error; 5887 5888 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT); 5889 5890 cg = zone_getspecific(core_zone_key, zone); 5891 ASSERT(cg != NULL); 5892 5893 if (secpolicy_coreadm(cr) != 0) 5894 return (EPERM); 5895 5896 if (uio->uio_loffset != 0) 5897 return (EINVAL); 5898 5899 if (uio->uio_resid == 0) 5900 return (0); 5901 5902 olen = uio->uio_resid; 5903 if (olen > sizeof (val) - 1) 5904 return (EINVAL); 5905 5906 bzero(val, sizeof (val)); 5907 error = uiomove(val, olen, UIO_WRITE, uio); 5908 if (error != 0) 5909 return (error); 5910 5911 if (val[olen - 1] == '\n') 5912 val[olen - 1] = '\0'; 5913 5914 if (val[0] == '|') 5915 return (EINVAL); 5916 5917 if ((error = lxpr_core_path_l2s(val, valtr, sizeof (valtr))) != 0) 5918 return (error); 5919 5920 nrp = refstr_alloc(valtr); 5921 5922 ccp = cg->core_default_path; 5923 mutex_enter(&ccp->ccp_mtx); 5924 rp = ccp->ccp_path; 5925 refstr_hold((ccp->ccp_path = nrp)); 5926 cg->core_options |= CC_PROCESS_PATH; 5927 mutex_exit(&ccp->ccp_mtx); 5928 5929 if (rp != NULL) 5930 refstr_rele(rp); 5931 5932 return (0); 5933 } 5934 5935 /* 5936 * lxpr_readlink(): Vnode operation for VOP_READLINK() 5937 */ 5938 /* ARGSUSED */ 5939 static int 5940 lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct) 5941 { 5942 char bp[MAXPATHLEN + 1]; 5943 size_t buflen = sizeof (bp); 5944 lxpr_node_t *lxpnp = VTOLXP(vp); 5945 vnode_t *rvp = lxpnp->lxpr_realvp; 5946 pid_t pid; 5947 int error = 0; 5948 5949 /* 5950 * Linux does something very "clever" for /proc/<pid>/fd/<num> entries. 5951 * Open FDs are represented as symlinks, the link contents 5952 * corresponding to the open resource. For plain files or devices, 5953 * this isn't absurd since one can dereference the symlink to query 5954 * the underlying resource. For sockets or pipes, it becomes ugly in a 5955 * hurry. To maintain this human-readable output, those FD symlinks 5956 * point to bogus targets such as "socket:[<inodenum>]". This requires 5957 * circumventing vfs since the stat/lstat behavior on those FD entries 5958 * will be unusual. (A stat must retrieve information about the open 5959 * socket or pipe. It cannot fail because the link contents point to 5960 * an absent file.) 5961 * 5962 * To accomplish this, lxpr_getnode returns an vnode typed VNON for FD 5963 * entries. This bypasses code paths which would normally 5964 * short-circuit on symlinks and allows us to emulate the vfs behavior 5965 * expected by /proc consumers. 5966 */ 5967 if (vp->v_type != VLNK && lxpnp->lxpr_type != LXPR_PID_FD_FD) 5968 return (EINVAL); 5969 5970 /* Try to produce a symlink name for anything that has a realvp */ 5971 if (rvp != NULL) { 5972 if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0) 5973 return (error); 5974 if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) { 5975 /* 5976 * Special handling possible for /proc/<pid>/fd/<num> 5977 * Generate <type>:[<inode>] links, if allowed. 5978 */ 5979 if (lxpnp->lxpr_type != LXPR_PID_FD_FD || 5980 lxpr_readlink_fdnode(lxpnp, bp, buflen) != 0) { 5981 return (error); 5982 } 5983 } 5984 } else { 5985 switch (lxpnp->lxpr_type) { 5986 case LXPR_SELF: 5987 /* 5988 * Convert pid to the Linux default of 1 if we're the 5989 * zone's init process or 0 if zsched. 5990 */ 5991 if (curproc->p_pid == 5992 curproc->p_zone->zone_proc_initpid) { 5993 pid = 1; 5994 } else if (curproc->p_pid == 5995 curproc->p_zone->zone_zsched->p_pid) { 5996 pid = 0; 5997 } else { 5998 pid = curproc->p_pid; 5999 } 6000 6001 /* 6002 * Don't need to check result as every possible int 6003 * will fit within MAXPATHLEN bytes. 6004 */ 6005 (void) snprintf(bp, buflen, "%d", pid); 6006 break; 6007 case LXPR_PID_CURDIR: 6008 case LXPR_PID_ROOTDIR: 6009 case LXPR_PID_EXE: 6010 return (EACCES); 6011 default: 6012 /* 6013 * Need to return error so that nothing thinks 6014 * that the symlink is empty and hence "." 6015 */ 6016 return (EINVAL); 6017 } 6018 } 6019 6020 /* copy the link data to user space */ 6021 return (uiomove(bp, strlen(bp), UIO_READ, uiop)); 6022 } 6023 6024 6025 /* 6026 * lxpr_inactive(): Vnode operation for VOP_INACTIVE() 6027 * Vnode is no longer referenced, deallocate the file 6028 * and all its resources. 6029 */ 6030 /* ARGSUSED */ 6031 static void 6032 lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 6033 { 6034 lxpr_freenode(VTOLXP(vp)); 6035 } 6036 6037 /* 6038 * lxpr_sync(): Vnode operation for VOP_SYNC() 6039 */ 6040 static int 6041 lxpr_sync() 6042 { 6043 /* 6044 * Nothing to sync but this function must never fail 6045 */ 6046 return (0); 6047 } 6048 6049 /* 6050 * lxpr_cmp(): Vnode operation for VOP_CMP() 6051 */ 6052 static int 6053 lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct) 6054 { 6055 vnode_t *rvp; 6056 6057 while (vn_matchops(vp1, lxpr_vnodeops) && 6058 (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) { 6059 vp1 = rvp; 6060 } 6061 6062 while (vn_matchops(vp2, lxpr_vnodeops) && 6063 (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) { 6064 vp2 = rvp; 6065 } 6066 6067 if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops)) 6068 return (vp1 == vp2); 6069 return (VOP_CMP(vp1, vp2, ct)); 6070 } 6071 6072 /* 6073 * lxpr_realvp(): Vnode operation for VOP_REALVP() 6074 */ 6075 static int 6076 lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct) 6077 { 6078 vnode_t *rvp; 6079 6080 if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) { 6081 vp = rvp; 6082 if (VOP_REALVP(vp, &rvp, ct) == 0) 6083 vp = rvp; 6084 } 6085 6086 *vpp = vp; 6087 return (0); 6088 } 6089 6090 static int 6091 lxpr_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 6092 caller_context_t *ct) 6093 { 6094 lxpr_node_t *lxpnp = VTOLXP(vp); 6095 lxpr_nodetype_t type = lxpnp->lxpr_type; 6096 6097 switch (type) { 6098 case LXPR_SYS_KERNEL_COREPATT: 6099 return (lxpr_write_sys_kernel_corepatt(lxpnp, uiop, cr, ct)); 6100 case LXPR_SYS_NET_CORE_SOMAXCON: 6101 return (lxpr_write_sys_net_core_somaxc(lxpnp, uiop, cr, ct)); 6102 6103 default: 6104 /* pretend we wrote the whole thing */ 6105 uiop->uio_offset += uiop->uio_resid; 6106 uiop->uio_resid = 0; 6107 return (0); 6108 } 6109 } 6110 6111 /* 6112 * We need to allow open with O_CREAT for the oom_score_adj file. 6113 */ 6114 /*ARGSUSED7*/ 6115 static int 6116 lxpr_create(struct vnode *dvp, char *nm, struct vattr *vap, 6117 enum vcexcl exclusive, int mode, struct vnode **vpp, struct cred *cred, 6118 int flag, caller_context_t *ct, vsecattr_t *vsecp) 6119 { 6120 lxpr_node_t *lxpnp = VTOLXP(dvp); 6121 lxpr_nodetype_t type = lxpnp->lxpr_type; 6122 vnode_t *vp = NULL; 6123 int error; 6124 6125 ASSERT(type < LXPR_NFILES); 6126 6127 /* 6128 * restrict create permission to owner or root 6129 */ 6130 if ((error = lxpr_access(dvp, VEXEC, 0, cred, ct)) != 0) { 6131 return (error); 6132 } 6133 6134 if (*nm == '\0') 6135 return (EPERM); 6136 6137 if (dvp->v_type != VDIR) 6138 return (EPERM); 6139 6140 if (exclusive == EXCL) 6141 return (EEXIST); 6142 6143 /* 6144 * We're currently restricting O_CREAT to: 6145 * - /proc/<pid>/fd/<num> 6146 * - /proc/<pid>/oom_score_adj 6147 * - /proc/<pid>/task/<tid>/fd/<num> 6148 * - /proc/<pid>/task/<tid>/oom_score_adj 6149 * - /proc/sys/kernel/core_pattern 6150 * - /proc/sys/net/core/somaxconn 6151 * - /proc/sys/vm/overcommit_memory 6152 * - /proc/sys/vm/swappiness 6153 */ 6154 switch (type) { 6155 case LXPR_PIDDIR: 6156 case LXPR_PID_TASK_IDDIR: 6157 if (strcmp(nm, "oom_score_adj") == 0) { 6158 proc_t *p; 6159 p = lxpr_lock(lxpnp->lxpr_pid); 6160 if (p != NULL) { 6161 vp = lxpr_lookup_common(dvp, nm, p, piddir, 6162 PIDDIRFILES); 6163 } 6164 lxpr_unlock(p); 6165 } 6166 break; 6167 6168 case LXPR_SYS_NET_COREDIR: 6169 if (strcmp(nm, "somaxconn") == 0) { 6170 vp = lxpr_lookup_common(dvp, nm, NULL, sys_net_coredir, 6171 SYS_NET_COREDIRFILES); 6172 } 6173 break; 6174 6175 case LXPR_SYS_KERNELDIR: 6176 if (strcmp(nm, "core_pattern") == 0) { 6177 vp = lxpr_lookup_common(dvp, nm, NULL, sys_kerneldir, 6178 SYS_KERNELDIRFILES); 6179 } 6180 break; 6181 6182 case LXPR_SYS_VMDIR: 6183 if (strcmp(nm, "overcommit_memory") == 0 || 6184 strcmp(nm, "swappiness") == 0) { 6185 vp = lxpr_lookup_common(dvp, nm, NULL, sys_vmdir, 6186 SYS_VMDIRFILES); 6187 } 6188 break; 6189 6190 case LXPR_PID_FDDIR: 6191 case LXPR_PID_TID_FDDIR: 6192 vp = lxpr_lookup_fdnode(dvp, nm); 6193 break; 6194 6195 default: 6196 vp = NULL; 6197 break; 6198 } 6199 6200 if (vp != NULL) { 6201 /* Creating an existing file, allow it for regular files. */ 6202 if (vp->v_type == VDIR) 6203 return (EISDIR); 6204 6205 /* confirm permissions against existing file */ 6206 if ((error = lxpr_access(vp, mode, 0, cred, ct)) != 0) { 6207 VN_RELE(vp); 6208 return (error); 6209 } 6210 6211 *vpp = vp; 6212 return (0); 6213 } 6214 6215 /* 6216 * Linux proc does not allow creation of addition, non-subsystem 6217 * specific files inside the hierarchy. ENOENT is tossed when such 6218 * actions are attempted. 6219 */ 6220 return (ENOENT); 6221 }