1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2016 Joyent, Inc. 25 */ 26 27 /* 28 * lx_proc -- a Linux-compatible /proc for the LX brand 29 * 30 * We have -- confusingly -- two implementations of Linux /proc. One is to 31 * support native (but Linux-borne) programs that wish to view the native 32 * system through the Linux /proc model; the other -- this one -- is to 33 * support Linux binaries via the LX brand. These two implementations differ 34 * greatly in their aspirations (and their willingness to bend the truth 35 * of the system to accommodate those aspirations); they should not be unified. 36 */ 37 38 #include <sys/cpupart.h> 39 #include <sys/cpuvar.h> 40 #include <sys/session.h> 41 #include <sys/vmparam.h> 42 #include <sys/mman.h> 43 #include <vm/rm.h> 44 #include <vm/seg_vn.h> 45 #include <sys/sdt.h> 46 #include <lx_signum.h> 47 #include <sys/strlog.h> 48 #include <sys/stropts.h> 49 #include <sys/cmn_err.h> 50 #include <sys/lx_brand.h> 51 #include <lx_auxv.h> 52 #include <sys/x86_archext.h> 53 #include <sys/archsystm.h> 54 #include <sys/fp.h> 55 #include <sys/pool_pset.h> 56 #include <sys/pset.h> 57 #include <sys/zone.h> 58 #include <sys/pghw.h> 59 #include <sys/vfs_opreg.h> 60 #include <sys/param.h> 61 #include <sys/utsname.h> 62 #include <sys/rctl.h> 63 #include <sys/kstat.h> 64 #include <sys/lx_misc.h> 65 #include <sys/brand.h> 66 #include <sys/cred_impl.h> 67 #include <sys/tihdr.h> 68 #include <sys/corectl.h> 69 #include <inet/ip.h> 70 #include <inet/ip_ire.h> 71 #include <inet/ip6.h> 72 #include <inet/ip_if.h> 73 #include <inet/tcp.h> 74 #include <inet/tcp_impl.h> 75 #include <inet/udp_impl.h> 76 #include <inet/ipclassifier.h> 77 #include <sys/socketvar.h> 78 #include <fs/sockfs/socktpi.h> 79 80 /* Dependent on procfs */ 81 extern kthread_t *prchoose(proc_t *); 82 extern int prreadargv(proc_t *, char *, size_t, size_t *); 83 extern int prreadenvv(proc_t *, char *, size_t, size_t *); 84 extern int prreadbuf(proc_t *, uintptr_t, uint8_t *, size_t, size_t *); 85 86 #include "lx_proc.h" 87 88 extern pgcnt_t swapfs_minfree; 89 extern time_t boot_time; 90 91 /* 92 * Pointer to the vnode ops vector for this fs. 93 * This is instantiated in lxprinit() in lxpr_vfsops.c 94 */ 95 vnodeops_t *lxpr_vnodeops; 96 97 static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *); 98 static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *, 99 caller_context_t *); 100 static int lxpr_create(struct vnode *, char *, struct vattr *, enum vcexcl, 101 int, struct vnode **, struct cred *, int, caller_context_t *, vsecattr_t *); 102 static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *); 103 static int lxpr_write(vnode_t *, uio_t *, int, cred_t *, caller_context_t *); 104 static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *, 105 caller_context_t *); 106 static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *); 107 static int lxpr_lookup(vnode_t *, char *, vnode_t **, 108 pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *, 109 pathname_t *); 110 static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *, 111 caller_context_t *, int); 112 static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *); 113 static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *); 114 static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *); 115 static int lxpr_sync(void); 116 static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *); 117 118 static vnode_t *lxpr_lookup_procdir(vnode_t *, char *); 119 static vnode_t *lxpr_lookup_piddir(vnode_t *, char *); 120 static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *); 121 static vnode_t *lxpr_lookup_fddir(vnode_t *, char *); 122 static vnode_t *lxpr_lookup_netdir(vnode_t *, char *); 123 static vnode_t *lxpr_lookup_sysdir(vnode_t *, char *); 124 static vnode_t *lxpr_lookup_sys_fsdir(vnode_t *, char *); 125 static vnode_t *lxpr_lookup_sys_fs_inotifydir(vnode_t *, char *); 126 static vnode_t *lxpr_lookup_sys_kerneldir(vnode_t *, char *); 127 static vnode_t *lxpr_lookup_sys_kdir_randdir(vnode_t *, char *); 128 static vnode_t *lxpr_lookup_sys_netdir(vnode_t *, char *); 129 static vnode_t *lxpr_lookup_sys_net_coredir(vnode_t *, char *); 130 static vnode_t *lxpr_lookup_sys_vmdir(vnode_t *, char *); 131 static vnode_t *lxpr_lookup_taskdir(vnode_t *, char *); 132 static vnode_t *lxpr_lookup_task_tid_dir(vnode_t *, char *); 133 134 static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *); 135 static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *); 136 static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *); 137 static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *); 138 static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *); 139 static int lxpr_readdir_sysdir(lxpr_node_t *, uio_t *, int *); 140 static int lxpr_readdir_sys_fsdir(lxpr_node_t *, uio_t *, int *); 141 static int lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *, uio_t *, int *); 142 static int lxpr_readdir_sys_kerneldir(lxpr_node_t *, uio_t *, int *); 143 static int lxpr_readdir_sys_kdir_randdir(lxpr_node_t *, uio_t *, int *); 144 static int lxpr_readdir_sys_netdir(lxpr_node_t *, uio_t *, int *); 145 static int lxpr_readdir_sys_net_coredir(lxpr_node_t *, uio_t *, int *); 146 static int lxpr_readdir_sys_vmdir(lxpr_node_t *, uio_t *, int *); 147 static int lxpr_readdir_taskdir(lxpr_node_t *, uio_t *, int *); 148 static int lxpr_readdir_task_tid_dir(lxpr_node_t *, uio_t *, int *); 149 150 static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *); 151 static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *); 152 static void lxpr_read_cgroups(lxpr_node_t *, lxpr_uiobuf_t *); 153 static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *); 154 static void lxpr_read_diskstats(lxpr_node_t *, lxpr_uiobuf_t *); 155 static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *); 156 static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *); 157 static void lxpr_read_filesystems(lxpr_node_t *, lxpr_uiobuf_t *); 158 static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *, ldi_handle_t); 159 static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *); 160 static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *); 161 static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *); 162 static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *); 163 static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *); 164 static void lxpr_read_swaps(lxpr_node_t *, lxpr_uiobuf_t *); 165 static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *); 166 static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *); 167 168 static void lxpr_read_pid_auxv(lxpr_node_t *, lxpr_uiobuf_t *); 169 static void lxpr_read_pid_cgroup(lxpr_node_t *, lxpr_uiobuf_t *); 170 static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *); 171 static void lxpr_read_pid_comm(lxpr_node_t *, lxpr_uiobuf_t *); 172 static void lxpr_read_pid_env(lxpr_node_t *, lxpr_uiobuf_t *); 173 static void lxpr_read_pid_limits(lxpr_node_t *, lxpr_uiobuf_t *); 174 static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *); 175 static void lxpr_read_pid_mountinfo(lxpr_node_t *, lxpr_uiobuf_t *); 176 static void lxpr_read_pid_oom_scr_adj(lxpr_node_t *, lxpr_uiobuf_t *); 177 static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *); 178 static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *); 179 static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *); 180 181 static void lxpr_read_pid_tid_stat(lxpr_node_t *, lxpr_uiobuf_t *); 182 static void lxpr_read_pid_tid_status(lxpr_node_t *, lxpr_uiobuf_t *); 183 184 static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *); 185 static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *); 186 static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *); 187 static void lxpr_read_net_if_inet6(lxpr_node_t *, lxpr_uiobuf_t *); 188 static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *); 189 static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *); 190 static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *); 191 static void lxpr_read_net_ipv6_route(lxpr_node_t *, lxpr_uiobuf_t *); 192 static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *); 193 static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *); 194 static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *); 195 static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *); 196 static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *); 197 static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *); 198 static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *); 199 static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *); 200 static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *); 201 static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *); 202 static void lxpr_read_net_tcp6(lxpr_node_t *, lxpr_uiobuf_t *); 203 static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *); 204 static void lxpr_read_net_udp6(lxpr_node_t *, lxpr_uiobuf_t *); 205 static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *); 206 static void lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *, 207 lxpr_uiobuf_t *); 208 static void lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *, 209 lxpr_uiobuf_t *); 210 static void lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *, 211 lxpr_uiobuf_t *); 212 static void lxpr_read_sys_kernel_caplcap(lxpr_node_t *, lxpr_uiobuf_t *); 213 static void lxpr_read_sys_kernel_corepatt(lxpr_node_t *, lxpr_uiobuf_t *); 214 static void lxpr_read_sys_kernel_hostname(lxpr_node_t *, lxpr_uiobuf_t *); 215 static void lxpr_read_sys_kernel_msgmni(lxpr_node_t *, lxpr_uiobuf_t *); 216 static void lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *, lxpr_uiobuf_t *); 217 static void lxpr_read_sys_kernel_osrel(lxpr_node_t *, lxpr_uiobuf_t *); 218 static void lxpr_read_sys_kernel_pid_max(lxpr_node_t *, lxpr_uiobuf_t *); 219 static void lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *, lxpr_uiobuf_t *); 220 static void lxpr_read_sys_kernel_sem(lxpr_node_t *, lxpr_uiobuf_t *); 221 static void lxpr_read_sys_kernel_shmmax(lxpr_node_t *, lxpr_uiobuf_t *); 222 static void lxpr_read_sys_kernel_shmmni(lxpr_node_t *, lxpr_uiobuf_t *); 223 static void lxpr_read_sys_kernel_threads_max(lxpr_node_t *, lxpr_uiobuf_t *); 224 static void lxpr_read_sys_net_core_somaxc(lxpr_node_t *, lxpr_uiobuf_t *); 225 static void lxpr_read_sys_vm_minfr_kb(lxpr_node_t *, lxpr_uiobuf_t *); 226 static void lxpr_read_sys_vm_nhpages(lxpr_node_t *, lxpr_uiobuf_t *); 227 static void lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *, lxpr_uiobuf_t *); 228 static void lxpr_read_sys_vm_swappiness(lxpr_node_t *, lxpr_uiobuf_t *); 229 230 static int lxpr_write_sys_net_core_somaxc(lxpr_node_t *, uio_t *, cred_t *, 231 caller_context_t *); 232 static int lxpr_write_sys_kernel_corepatt(lxpr_node_t *, uio_t *, cred_t *, 233 caller_context_t *); 234 235 /* 236 * Simple conversion 237 */ 238 #define btok(x) ((x) >> 10) /* bytes to kbytes */ 239 #define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */ 240 241 #define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t)) 242 243 extern rctl_hndl_t rc_process_semmsl; 244 extern rctl_hndl_t rc_process_semopm; 245 extern rctl_hndl_t rc_zone_semmni; 246 247 extern rctl_hndl_t rc_zone_msgmni; 248 extern rctl_hndl_t rc_zone_shmmax; 249 extern rctl_hndl_t rc_zone_shmmni; 250 #define FOURGB 4294967295 251 252 /* 253 * The maximum length of the concatenation of argument vector strings we 254 * will return to the user via the branded procfs. Likewise for the env vector. 255 */ 256 int lxpr_maxargvlen = 4096; 257 int lxpr_maxenvvlen = 4096; 258 259 /* 260 * The lx /proc vnode operations vector 261 */ 262 const fs_operation_def_t lxpr_vnodeops_template[] = { 263 VOPNAME_OPEN, { .vop_open = lxpr_open }, 264 VOPNAME_CLOSE, { .vop_close = lxpr_close }, 265 VOPNAME_READ, { .vop_read = lxpr_read }, 266 VOPNAME_WRITE, { .vop_read = lxpr_write }, 267 VOPNAME_GETATTR, { .vop_getattr = lxpr_getattr }, 268 VOPNAME_ACCESS, { .vop_access = lxpr_access }, 269 VOPNAME_LOOKUP, { .vop_lookup = lxpr_lookup }, 270 VOPNAME_CREATE, { .vop_create = lxpr_create }, 271 VOPNAME_READDIR, { .vop_readdir = lxpr_readdir }, 272 VOPNAME_READLINK, { .vop_readlink = lxpr_readlink }, 273 VOPNAME_FSYNC, { .error = lxpr_sync }, 274 VOPNAME_SEEK, { .error = lxpr_sync }, 275 VOPNAME_INACTIVE, { .vop_inactive = lxpr_inactive }, 276 VOPNAME_CMP, { .vop_cmp = lxpr_cmp }, 277 VOPNAME_REALVP, { .vop_realvp = lxpr_realvp }, 278 NULL, NULL 279 }; 280 281 282 /* 283 * file contents of an lx /proc directory. 284 */ 285 static lxpr_dirent_t lx_procdir[] = { 286 { LXPR_CGROUPS, "cgroups" }, 287 { LXPR_CMDLINE, "cmdline" }, 288 { LXPR_CPUINFO, "cpuinfo" }, 289 { LXPR_DEVICES, "devices" }, 290 { LXPR_DISKSTATS, "diskstats" }, 291 { LXPR_DMA, "dma" }, 292 { LXPR_FILESYSTEMS, "filesystems" }, 293 { LXPR_INTERRUPTS, "interrupts" }, 294 { LXPR_IOPORTS, "ioports" }, 295 { LXPR_KCORE, "kcore" }, 296 { LXPR_KMSG, "kmsg" }, 297 { LXPR_LOADAVG, "loadavg" }, 298 { LXPR_MEMINFO, "meminfo" }, 299 { LXPR_MODULES, "modules" }, 300 { LXPR_MOUNTS, "mounts" }, 301 { LXPR_NETDIR, "net" }, 302 { LXPR_PARTITIONS, "partitions" }, 303 { LXPR_SELF, "self" }, 304 { LXPR_STAT, "stat" }, 305 { LXPR_SWAPS, "swaps" }, 306 { LXPR_SYSDIR, "sys" }, 307 { LXPR_UPTIME, "uptime" }, 308 { LXPR_VERSION, "version" } 309 }; 310 311 #define PROCDIRFILES (sizeof (lx_procdir) / sizeof (lx_procdir[0])) 312 313 /* 314 * Contents of an lx /proc/<pid> directory. 315 */ 316 static lxpr_dirent_t piddir[] = { 317 { LXPR_PID_AUXV, "auxv" }, 318 { LXPR_PID_CGROUP, "cgroup" }, 319 { LXPR_PID_CMDLINE, "cmdline" }, 320 { LXPR_PID_COMM, "comm" }, 321 { LXPR_PID_CPU, "cpu" }, 322 { LXPR_PID_CURDIR, "cwd" }, 323 { LXPR_PID_ENV, "environ" }, 324 { LXPR_PID_EXE, "exe" }, 325 { LXPR_PID_LIMITS, "limits" }, 326 { LXPR_PID_MAPS, "maps" }, 327 { LXPR_PID_MEM, "mem" }, 328 { LXPR_PID_MOUNTINFO, "mountinfo" }, 329 { LXPR_PID_OOM_SCR_ADJ, "oom_score_adj" }, 330 { LXPR_PID_ROOTDIR, "root" }, 331 { LXPR_PID_STAT, "stat" }, 332 { LXPR_PID_STATM, "statm" }, 333 { LXPR_PID_STATUS, "status" }, 334 { LXPR_PID_TASKDIR, "task" }, 335 { LXPR_PID_FDDIR, "fd" } 336 }; 337 338 #define PIDDIRFILES (sizeof (piddir) / sizeof (piddir[0])) 339 340 /* 341 * Contents of an lx /proc/<pid>/task/<tid> directory. 342 */ 343 static lxpr_dirent_t tiddir[] = { 344 { LXPR_PID_TID_AUXV, "auxv" }, 345 { LXPR_PID_CGROUP, "cgroup" }, 346 { LXPR_PID_CMDLINE, "cmdline" }, 347 { LXPR_PID_TID_COMM, "comm" }, 348 { LXPR_PID_CPU, "cpu" }, 349 { LXPR_PID_CURDIR, "cwd" }, 350 { LXPR_PID_ENV, "environ" }, 351 { LXPR_PID_EXE, "exe" }, 352 { LXPR_PID_LIMITS, "limits" }, 353 { LXPR_PID_MAPS, "maps" }, 354 { LXPR_PID_MEM, "mem" }, 355 { LXPR_PID_MOUNTINFO, "mountinfo" }, 356 { LXPR_PID_TID_OOM_SCR_ADJ, "oom_score_adj" }, 357 { LXPR_PID_ROOTDIR, "root" }, 358 { LXPR_PID_TID_STAT, "stat" }, 359 { LXPR_PID_STATM, "statm" }, 360 { LXPR_PID_TID_STATUS, "status" }, 361 { LXPR_PID_FDDIR, "fd" } 362 }; 363 364 #define TIDDIRFILES (sizeof (tiddir) / sizeof (tiddir[0])) 365 366 #define LX_RLIM_INFINITY 0xFFFFFFFFFFFFFFFF 367 368 #define RCTL_INFINITE(x) \ 369 ((x->rcv_flagaction & RCTL_LOCAL_MAXIMAL) && \ 370 (x->rcv_flagaction & RCTL_GLOBAL_INFINITE)) 371 372 typedef struct lxpr_rlimtab { 373 char *rlim_name; /* limit name */ 374 char *rlim_unit; /* limit unit */ 375 char *rlim_rctl; /* rctl source */ 376 } lxpr_rlimtab_t; 377 378 static lxpr_rlimtab_t lxpr_rlimtab[] = { 379 { "Max cpu time", "seconds", "process.max-cpu-time" }, 380 { "Max file size", "bytes", "process.max-file-size" }, 381 { "Max data size", "bytes", "process.max-data-size" }, 382 { "Max stack size", "bytes", "process.max-stack-size" }, 383 { "Max core file size", "bytes", "process.max-core-size" }, 384 { "Max resident set", "bytes", "zone.max-physical-memory" }, 385 { "Max processes", "processes", "zone.max-lwps" }, 386 { "Max open files", "files", "process.max-file-descriptor" }, 387 { "Max locked memory", "bytes", "zone.max-locked-memory" }, 388 { "Max address space", "bytes", "process.max-address-space" }, 389 { "Max file locks", "locks", NULL }, 390 { "Max pending signals", "signals", 391 "process.max-sigqueue-size" }, 392 { "Max msgqueue size", "bytes", "process.max-msg-messages" }, 393 { NULL, NULL, NULL } 394 }; 395 396 397 /* 398 * contents of lx /proc/net directory 399 */ 400 static lxpr_dirent_t netdir[] = { 401 { LXPR_NET_ARP, "arp" }, 402 { LXPR_NET_DEV, "dev" }, 403 { LXPR_NET_DEV_MCAST, "dev_mcast" }, 404 { LXPR_NET_IF_INET6, "if_inet6" }, 405 { LXPR_NET_IGMP, "igmp" }, 406 { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" }, 407 { LXPR_NET_IP_MR_VIF, "ip_mr_vif" }, 408 { LXPR_NET_IPV6_ROUTE, "ipv6_route" }, 409 { LXPR_NET_MCFILTER, "mcfilter" }, 410 { LXPR_NET_NETSTAT, "netstat" }, 411 { LXPR_NET_RAW, "raw" }, 412 { LXPR_NET_ROUTE, "route" }, 413 { LXPR_NET_RPC, "rpc" }, 414 { LXPR_NET_RT_CACHE, "rt_cache" }, 415 { LXPR_NET_SOCKSTAT, "sockstat" }, 416 { LXPR_NET_SNMP, "snmp" }, 417 { LXPR_NET_STAT, "stat" }, 418 { LXPR_NET_TCP, "tcp" }, 419 { LXPR_NET_TCP6, "tcp6" }, 420 { LXPR_NET_UDP, "udp" }, 421 { LXPR_NET_UDP6, "udp6" }, 422 { LXPR_NET_UNIX, "unix" } 423 }; 424 425 #define NETDIRFILES (sizeof (netdir) / sizeof (netdir[0])) 426 427 /* 428 * contents of /proc/sys directory 429 */ 430 static lxpr_dirent_t sysdir[] = { 431 { LXPR_SYS_FSDIR, "fs" }, 432 { LXPR_SYS_KERNELDIR, "kernel" }, 433 { LXPR_SYS_NETDIR, "net" }, 434 { LXPR_SYS_VMDIR, "vm" }, 435 }; 436 437 #define SYSDIRFILES (sizeof (sysdir) / sizeof (sysdir[0])) 438 439 /* 440 * contents of /proc/sys/fs directory 441 */ 442 static lxpr_dirent_t sys_fsdir[] = { 443 { LXPR_SYS_FS_INOTIFYDIR, "inotify" }, 444 }; 445 446 #define SYS_FSDIRFILES (sizeof (sys_fsdir) / sizeof (sys_fsdir[0])) 447 448 /* 449 * contents of /proc/sys/fs/inotify directory 450 */ 451 static lxpr_dirent_t sys_fs_inotifydir[] = { 452 { LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS, "max_queued_events" }, 453 { LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES, "max_user_instances" }, 454 { LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES, "max_user_watches" }, 455 }; 456 457 #define SYS_FS_INOTIFYDIRFILES \ 458 (sizeof (sys_fs_inotifydir) / sizeof (sys_fs_inotifydir[0])) 459 460 /* 461 * contents of /proc/sys/kernel directory 462 */ 463 static lxpr_dirent_t sys_kerneldir[] = { 464 { LXPR_SYS_KERNEL_CAPLCAP, "cap_last_cap" }, 465 { LXPR_SYS_KERNEL_COREPATT, "core_pattern" }, 466 { LXPR_SYS_KERNEL_HOSTNAME, "hostname" }, 467 { LXPR_SYS_KERNEL_MSGMNI, "msgmni" }, 468 { LXPR_SYS_KERNEL_NGROUPS_MAX, "ngroups_max" }, 469 { LXPR_SYS_KERNEL_OSREL, "osrelease" }, 470 { LXPR_SYS_KERNEL_PID_MAX, "pid_max" }, 471 { LXPR_SYS_KERNEL_RANDDIR, "random" }, 472 { LXPR_SYS_KERNEL_SEM, "sem" }, 473 { LXPR_SYS_KERNEL_SHMMAX, "shmmax" }, 474 { LXPR_SYS_KERNEL_SHMMNI, "shmmni" }, 475 { LXPR_SYS_KERNEL_THREADS_MAX, "threads-max" }, 476 }; 477 478 #define SYS_KERNELDIRFILES (sizeof (sys_kerneldir) / sizeof (sys_kerneldir[0])) 479 480 /* 481 * contents of /proc/sys/kernel/random directory 482 */ 483 static lxpr_dirent_t sys_randdir[] = { 484 { LXPR_SYS_KERNEL_RAND_BOOTID, "boot_id" }, 485 }; 486 487 #define SYS_RANDDIRFILES (sizeof (sys_randdir) / sizeof (sys_randdir[0])) 488 489 /* 490 * contents of /proc/sys/net directory 491 */ 492 static lxpr_dirent_t sys_netdir[] = { 493 { LXPR_SYS_NET_COREDIR, "core" }, 494 }; 495 496 #define SYS_NETDIRFILES (sizeof (sys_netdir) / sizeof (sys_netdir[0])) 497 498 /* 499 * contents of /proc/sys/net/core directory 500 */ 501 static lxpr_dirent_t sys_net_coredir[] = { 502 { LXPR_SYS_NET_CORE_SOMAXCON, "somaxconn" }, 503 }; 504 505 #define SYS_NET_COREDIRFILES \ 506 (sizeof (sys_net_coredir) / sizeof (sys_net_coredir[0])) 507 508 /* 509 * contents of /proc/sys/vm directory 510 */ 511 static lxpr_dirent_t sys_vmdir[] = { 512 { LXPR_SYS_VM_MINFR_KB, "min_free_kbytes" }, 513 { LXPR_SYS_VM_NHUGEP, "nr_hugepages" }, 514 { LXPR_SYS_VM_OVERCOMMIT_MEM, "overcommit_memory" }, 515 { LXPR_SYS_VM_SWAPPINESS, "swappiness" }, 516 }; 517 518 #define SYS_VMDIRFILES (sizeof (sys_vmdir) / sizeof (sys_vmdir[0])) 519 520 /* 521 * lxpr_open(): Vnode operation for VOP_OPEN() 522 */ 523 static int 524 lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 525 { 526 vnode_t *vp = *vpp; 527 lxpr_node_t *lxpnp = VTOLXP(vp); 528 lxpr_nodetype_t type = lxpnp->lxpr_type; 529 vnode_t *rvp; 530 int error = 0; 531 532 if (flag & FWRITE) { 533 /* Restrict writes to certain files */ 534 switch (type) { 535 case LXPR_PID_OOM_SCR_ADJ: 536 case LXPR_PID_TID_OOM_SCR_ADJ: 537 case LXPR_SYS_KERNEL_COREPATT: 538 case LXPR_SYS_NET_CORE_SOMAXCON: 539 case LXPR_SYS_VM_OVERCOMMIT_MEM: 540 case LXPR_SYS_VM_SWAPPINESS: 541 case LXPR_PID_FD_FD: 542 case LXPR_PID_TID_FD_FD: 543 break; 544 default: 545 return (EPERM); 546 } 547 } 548 549 /* 550 * If we are opening an underlying file only allow regular files, 551 * fifos or sockets; reject the open for anything else. 552 * Just do it if we are opening the current or root directory. 553 */ 554 if (lxpnp->lxpr_realvp != NULL) { 555 rvp = lxpnp->lxpr_realvp; 556 557 if (type == LXPR_PID_FD_FD && rvp->v_type != VREG && 558 rvp->v_type != VFIFO && rvp->v_type != VSOCK) { 559 error = EACCES; 560 } else { 561 if (type == LXPR_PID_FD_FD && rvp->v_type == VFIFO) { 562 /* 563 * This flag lets the fifo open know that 564 * we're using proc/fd to open a fd which we 565 * already have open. Otherwise, the fifo might 566 * reject an open if the other end has closed. 567 */ 568 flag |= FKLYR; 569 } 570 /* 571 * Need to hold rvp since VOP_OPEN() may release it. 572 */ 573 VN_HOLD(rvp); 574 error = VOP_OPEN(&rvp, flag, cr, ct); 575 if (error) { 576 VN_RELE(rvp); 577 } else { 578 *vpp = rvp; 579 VN_RELE(vp); 580 } 581 } 582 } 583 584 return (error); 585 } 586 587 588 /* 589 * lxpr_close(): Vnode operation for VOP_CLOSE() 590 */ 591 /* ARGSUSED */ 592 static int 593 lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 594 caller_context_t *ct) 595 { 596 lxpr_node_t *lxpr = VTOLXP(vp); 597 lxpr_nodetype_t type = lxpr->lxpr_type; 598 599 /* 600 * we should never get here because the close is done on the realvp 601 * for these nodes 602 */ 603 ASSERT(type != LXPR_PID_FD_FD && 604 type != LXPR_PID_CURDIR && 605 type != LXPR_PID_ROOTDIR && 606 type != LXPR_PID_EXE); 607 608 return (0); 609 } 610 611 static void (*lxpr_read_function[LXPR_NFILES])() = { 612 lxpr_read_isdir, /* /proc */ 613 lxpr_read_isdir, /* /proc/<pid> */ 614 lxpr_read_pid_auxv, /* /proc/<pid>/auxv */ 615 lxpr_read_pid_cgroup, /* /proc/<pid>/cgroup */ 616 lxpr_read_pid_cmdline, /* /proc/<pid>/cmdline */ 617 lxpr_read_pid_comm, /* /proc/<pid>/comm */ 618 lxpr_read_empty, /* /proc/<pid>/cpu */ 619 lxpr_read_invalid, /* /proc/<pid>/cwd */ 620 lxpr_read_pid_env, /* /proc/<pid>/environ */ 621 lxpr_read_invalid, /* /proc/<pid>/exe */ 622 lxpr_read_pid_limits, /* /proc/<pid>/limits */ 623 lxpr_read_pid_maps, /* /proc/<pid>/maps */ 624 lxpr_read_empty, /* /proc/<pid>/mem */ 625 lxpr_read_pid_mountinfo, /* /proc/<pid>/mountinfo */ 626 lxpr_read_pid_oom_scr_adj, /* /proc/<pid>/oom_score_adj */ 627 lxpr_read_invalid, /* /proc/<pid>/root */ 628 lxpr_read_pid_stat, /* /proc/<pid>/stat */ 629 lxpr_read_pid_statm, /* /proc/<pid>/statm */ 630 lxpr_read_pid_status, /* /proc/<pid>/status */ 631 lxpr_read_isdir, /* /proc/<pid>/task */ 632 lxpr_read_isdir, /* /proc/<pid>/task/nn */ 633 lxpr_read_isdir, /* /proc/<pid>/fd */ 634 lxpr_read_fd, /* /proc/<pid>/fd/nn */ 635 lxpr_read_pid_auxv, /* /proc/<pid>/task/<tid>/auxv */ 636 lxpr_read_pid_cgroup, /* /proc/<pid>/task/<tid>/cgroup */ 637 lxpr_read_pid_cmdline, /* /proc/<pid>/task/<tid>/cmdline */ 638 lxpr_read_pid_comm, /* /proc/<pid>/task/<tid>/comm */ 639 lxpr_read_empty, /* /proc/<pid>/task/<tid>/cpu */ 640 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/cwd */ 641 lxpr_read_pid_env, /* /proc/<pid>/task/<tid>/environ */ 642 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/exe */ 643 lxpr_read_pid_limits, /* /proc/<pid>/task/<tid>/limits */ 644 lxpr_read_pid_maps, /* /proc/<pid>/task/<tid>/maps */ 645 lxpr_read_empty, /* /proc/<pid>/task/<tid>/mem */ 646 lxpr_read_pid_mountinfo, /* /proc/<pid>/task/<tid>/mountinfo */ 647 lxpr_read_pid_oom_scr_adj, /* /proc/<pid>/task/<tid>/oom_scr_adj */ 648 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/root */ 649 lxpr_read_pid_tid_stat, /* /proc/<pid>/task/<tid>/stat */ 650 lxpr_read_pid_statm, /* /proc/<pid>/task/<tid>/statm */ 651 lxpr_read_pid_tid_status, /* /proc/<pid>/task/<tid>/status */ 652 lxpr_read_isdir, /* /proc/<pid>/task/<tid>/fd */ 653 lxpr_read_fd, /* /proc/<pid>/task/<tid>/fd/nn */ 654 lxpr_read_cgroups, /* /proc/cgroups */ 655 lxpr_read_empty, /* /proc/cmdline */ 656 lxpr_read_cpuinfo, /* /proc/cpuinfo */ 657 lxpr_read_empty, /* /proc/devices */ 658 lxpr_read_diskstats, /* /proc/diskstats */ 659 lxpr_read_empty, /* /proc/dma */ 660 lxpr_read_filesystems, /* /proc/filesystems */ 661 lxpr_read_empty, /* /proc/interrupts */ 662 lxpr_read_empty, /* /proc/ioports */ 663 lxpr_read_empty, /* /proc/kcore */ 664 lxpr_read_invalid, /* /proc/kmsg -- see lxpr_read() */ 665 lxpr_read_loadavg, /* /proc/loadavg */ 666 lxpr_read_meminfo, /* /proc/meminfo */ 667 lxpr_read_empty, /* /proc/modules */ 668 lxpr_read_mounts, /* /proc/mounts */ 669 lxpr_read_isdir, /* /proc/net */ 670 lxpr_read_net_arp, /* /proc/net/arp */ 671 lxpr_read_net_dev, /* /proc/net/dev */ 672 lxpr_read_net_dev_mcast, /* /proc/net/dev_mcast */ 673 lxpr_read_net_if_inet6, /* /proc/net/if_inet6 */ 674 lxpr_read_net_igmp, /* /proc/net/igmp */ 675 lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */ 676 lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */ 677 lxpr_read_net_ipv6_route, /* /proc/net/ipv6_route */ 678 lxpr_read_net_mcfilter, /* /proc/net/mcfilter */ 679 lxpr_read_net_netstat, /* /proc/net/netstat */ 680 lxpr_read_net_raw, /* /proc/net/raw */ 681 lxpr_read_net_route, /* /proc/net/route */ 682 lxpr_read_net_rpc, /* /proc/net/rpc */ 683 lxpr_read_net_rt_cache, /* /proc/net/rt_cache */ 684 lxpr_read_net_sockstat, /* /proc/net/sockstat */ 685 lxpr_read_net_snmp, /* /proc/net/snmp */ 686 lxpr_read_net_stat, /* /proc/net/stat */ 687 lxpr_read_net_tcp, /* /proc/net/tcp */ 688 lxpr_read_net_tcp6, /* /proc/net/tcp6 */ 689 lxpr_read_net_udp, /* /proc/net/udp */ 690 lxpr_read_net_udp6, /* /proc/net/udp6 */ 691 lxpr_read_net_unix, /* /proc/net/unix */ 692 lxpr_read_partitions, /* /proc/partitions */ 693 lxpr_read_invalid, /* /proc/self */ 694 lxpr_read_stat, /* /proc/stat */ 695 lxpr_read_swaps, /* /proc/swaps */ 696 lxpr_read_invalid, /* /proc/sys */ 697 lxpr_read_invalid, /* /proc/sys/fs */ 698 lxpr_read_invalid, /* /proc/sys/fs/inotify */ 699 lxpr_read_sys_fs_inotify_max_queued_events, /* max_queued_events */ 700 lxpr_read_sys_fs_inotify_max_user_instances, /* max_user_instances */ 701 lxpr_read_sys_fs_inotify_max_user_watches, /* max_user_watches */ 702 lxpr_read_invalid, /* /proc/sys/kernel */ 703 lxpr_read_sys_kernel_caplcap, /* /proc/sys/kernel/cap_last_cap */ 704 lxpr_read_sys_kernel_corepatt, /* /proc/sys/kernel/core_pattern */ 705 lxpr_read_sys_kernel_hostname, /* /proc/sys/kernel/hostname */ 706 lxpr_read_sys_kernel_msgmni, /* /proc/sys/kernel/msgmni */ 707 lxpr_read_sys_kernel_ngroups_max, /* /proc/sys/kernel/ngroups_max */ 708 lxpr_read_sys_kernel_osrel, /* /proc/sys/kernel/osrelease */ 709 lxpr_read_sys_kernel_pid_max, /* /proc/sys/kernel/pid_max */ 710 lxpr_read_invalid, /* /proc/sys/kernel/random */ 711 lxpr_read_sys_kernel_rand_bootid, /* /proc/sys/kernel/random/boot_id */ 712 lxpr_read_sys_kernel_sem, /* /proc/sys/kernel/sem */ 713 lxpr_read_sys_kernel_shmmax, /* /proc/sys/kernel/shmmax */ 714 lxpr_read_sys_kernel_shmmni, /* /proc/sys/kernel/shmmni */ 715 lxpr_read_sys_kernel_threads_max, /* /proc/sys/kernel/threads-max */ 716 lxpr_read_invalid, /* /proc/sys/net */ 717 lxpr_read_invalid, /* /proc/sys/net/core */ 718 lxpr_read_sys_net_core_somaxc, /* /proc/sys/net/core/somaxconn */ 719 lxpr_read_invalid, /* /proc/sys/vm */ 720 lxpr_read_sys_vm_minfr_kb, /* /proc/sys/vm/min_free_kbytes */ 721 lxpr_read_sys_vm_nhpages, /* /proc/sys/vm/nr_hugepages */ 722 lxpr_read_sys_vm_overcommit_mem, /* /proc/sys/vm/overcommit_memory */ 723 lxpr_read_sys_vm_swappiness, /* /proc/sys/vm/swappiness */ 724 lxpr_read_uptime, /* /proc/uptime */ 725 lxpr_read_version, /* /proc/version */ 726 }; 727 728 /* 729 * Array of lookup functions, indexed by lx /proc file type. 730 */ 731 static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = { 732 lxpr_lookup_procdir, /* /proc */ 733 lxpr_lookup_piddir, /* /proc/<pid> */ 734 lxpr_lookup_not_a_dir, /* /proc/<pid>/auxv */ 735 lxpr_lookup_not_a_dir, /* /proc/<pid>/cgroup */ 736 lxpr_lookup_not_a_dir, /* /proc/<pid>/cmdline */ 737 lxpr_lookup_not_a_dir, /* /proc/<pid>/comm */ 738 lxpr_lookup_not_a_dir, /* /proc/<pid>/cpu */ 739 lxpr_lookup_not_a_dir, /* /proc/<pid>/cwd */ 740 lxpr_lookup_not_a_dir, /* /proc/<pid>/environ */ 741 lxpr_lookup_not_a_dir, /* /proc/<pid>/exe */ 742 lxpr_lookup_not_a_dir, /* /proc/<pid>/limits */ 743 lxpr_lookup_not_a_dir, /* /proc/<pid>/maps */ 744 lxpr_lookup_not_a_dir, /* /proc/<pid>/mem */ 745 lxpr_lookup_not_a_dir, /* /proc/<pid>/mountinfo */ 746 lxpr_lookup_not_a_dir, /* /proc/<pid>/oom_score_adj */ 747 lxpr_lookup_not_a_dir, /* /proc/<pid>/root */ 748 lxpr_lookup_not_a_dir, /* /proc/<pid>/stat */ 749 lxpr_lookup_not_a_dir, /* /proc/<pid>/statm */ 750 lxpr_lookup_not_a_dir, /* /proc/<pid>/status */ 751 lxpr_lookup_taskdir, /* /proc/<pid>/task */ 752 lxpr_lookup_task_tid_dir, /* /proc/<pid>/task/nn */ 753 lxpr_lookup_fddir, /* /proc/<pid>/fd */ 754 lxpr_lookup_not_a_dir, /* /proc/<pid>/fd/nn */ 755 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/auxv */ 756 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cgroup */ 757 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cmdline */ 758 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/comm */ 759 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cpu */ 760 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cwd */ 761 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/environ */ 762 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/exe */ 763 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/limits */ 764 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/maps */ 765 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/mem */ 766 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/mountinfo */ 767 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/oom_scr_adj */ 768 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/root */ 769 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/stat */ 770 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/statm */ 771 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/status */ 772 lxpr_lookup_fddir, /* /proc/<pid>/task/<tid>/fd */ 773 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/fd/nn */ 774 lxpr_lookup_not_a_dir, /* /proc/cgroups */ 775 lxpr_lookup_not_a_dir, /* /proc/cmdline */ 776 lxpr_lookup_not_a_dir, /* /proc/cpuinfo */ 777 lxpr_lookup_not_a_dir, /* /proc/devices */ 778 lxpr_lookup_not_a_dir, /* /proc/diskstats */ 779 lxpr_lookup_not_a_dir, /* /proc/dma */ 780 lxpr_lookup_not_a_dir, /* /proc/filesystems */ 781 lxpr_lookup_not_a_dir, /* /proc/interrupts */ 782 lxpr_lookup_not_a_dir, /* /proc/ioports */ 783 lxpr_lookup_not_a_dir, /* /proc/kcore */ 784 lxpr_lookup_not_a_dir, /* /proc/kmsg */ 785 lxpr_lookup_not_a_dir, /* /proc/loadavg */ 786 lxpr_lookup_not_a_dir, /* /proc/meminfo */ 787 lxpr_lookup_not_a_dir, /* /proc/modules */ 788 lxpr_lookup_not_a_dir, /* /proc/mounts */ 789 lxpr_lookup_netdir, /* /proc/net */ 790 lxpr_lookup_not_a_dir, /* /proc/net/arp */ 791 lxpr_lookup_not_a_dir, /* /proc/net/dev */ 792 lxpr_lookup_not_a_dir, /* /proc/net/dev_mcast */ 793 lxpr_lookup_not_a_dir, /* /proc/net/if_inet6 */ 794 lxpr_lookup_not_a_dir, /* /proc/net/igmp */ 795 lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */ 796 lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */ 797 lxpr_lookup_not_a_dir, /* /proc/net/ipv6_route */ 798 lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */ 799 lxpr_lookup_not_a_dir, /* /proc/net/netstat */ 800 lxpr_lookup_not_a_dir, /* /proc/net/raw */ 801 lxpr_lookup_not_a_dir, /* /proc/net/route */ 802 lxpr_lookup_not_a_dir, /* /proc/net/rpc */ 803 lxpr_lookup_not_a_dir, /* /proc/net/rt_cache */ 804 lxpr_lookup_not_a_dir, /* /proc/net/sockstat */ 805 lxpr_lookup_not_a_dir, /* /proc/net/snmp */ 806 lxpr_lookup_not_a_dir, /* /proc/net/stat */ 807 lxpr_lookup_not_a_dir, /* /proc/net/tcp */ 808 lxpr_lookup_not_a_dir, /* /proc/net/tcp6 */ 809 lxpr_lookup_not_a_dir, /* /proc/net/udp */ 810 lxpr_lookup_not_a_dir, /* /proc/net/udp6 */ 811 lxpr_lookup_not_a_dir, /* /proc/net/unix */ 812 lxpr_lookup_not_a_dir, /* /proc/partitions */ 813 lxpr_lookup_not_a_dir, /* /proc/self */ 814 lxpr_lookup_not_a_dir, /* /proc/stat */ 815 lxpr_lookup_not_a_dir, /* /proc/swaps */ 816 lxpr_lookup_sysdir, /* /proc/sys */ 817 lxpr_lookup_sys_fsdir, /* /proc/sys/fs */ 818 lxpr_lookup_sys_fs_inotifydir, /* /proc/sys/fs/inotify */ 819 lxpr_lookup_not_a_dir, /* .../inotify/max_queued_events */ 820 lxpr_lookup_not_a_dir, /* .../inotify/max_user_instances */ 821 lxpr_lookup_not_a_dir, /* .../inotify/max_user_watches */ 822 lxpr_lookup_sys_kerneldir, /* /proc/sys/kernel */ 823 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/cap_last_cap */ 824 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/core_pattern */ 825 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/hostname */ 826 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/msgmni */ 827 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/ngroups_max */ 828 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/osrelease */ 829 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/pid_max */ 830 lxpr_lookup_sys_kdir_randdir, /* /proc/sys/kernel/random */ 831 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/random/boot_id */ 832 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/sem */ 833 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/shmmax */ 834 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/shmmni */ 835 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/threads-max */ 836 lxpr_lookup_sys_netdir, /* /proc/sys/net */ 837 lxpr_lookup_sys_net_coredir, /* /proc/sys/net/core */ 838 lxpr_lookup_not_a_dir, /* /proc/sys/net/core/somaxconn */ 839 lxpr_lookup_sys_vmdir, /* /proc/sys/vm */ 840 lxpr_lookup_not_a_dir, /* /proc/sys/vm/min_free_kbytes */ 841 lxpr_lookup_not_a_dir, /* /proc/sys/vm/nr_hugepages */ 842 lxpr_lookup_not_a_dir, /* /proc/sys/vm/overcommit_memory */ 843 lxpr_lookup_not_a_dir, /* /proc/sys/vm/swappiness */ 844 lxpr_lookup_not_a_dir, /* /proc/uptime */ 845 lxpr_lookup_not_a_dir, /* /proc/version */ 846 }; 847 848 /* 849 * Array of readdir functions, indexed by /proc file type. 850 */ 851 static int (*lxpr_readdir_function[LXPR_NFILES])() = { 852 lxpr_readdir_procdir, /* /proc */ 853 lxpr_readdir_piddir, /* /proc/<pid> */ 854 lxpr_readdir_not_a_dir, /* /proc/<pid>/auxv */ 855 lxpr_readdir_not_a_dir, /* /proc/<pid>/cgroup */ 856 lxpr_readdir_not_a_dir, /* /proc/<pid>/cmdline */ 857 lxpr_readdir_not_a_dir, /* /proc/<pid>/comm */ 858 lxpr_readdir_not_a_dir, /* /proc/<pid>/cpu */ 859 lxpr_readdir_not_a_dir, /* /proc/<pid>/cwd */ 860 lxpr_readdir_not_a_dir, /* /proc/<pid>/environ */ 861 lxpr_readdir_not_a_dir, /* /proc/<pid>/exe */ 862 lxpr_readdir_not_a_dir, /* /proc/<pid>/limits */ 863 lxpr_readdir_not_a_dir, /* /proc/<pid>/maps */ 864 lxpr_readdir_not_a_dir, /* /proc/<pid>/mem */ 865 lxpr_readdir_not_a_dir, /* /proc/<pid>/mountinfo */ 866 lxpr_readdir_not_a_dir, /* /proc/<pid>/oom_score_adj */ 867 lxpr_readdir_not_a_dir, /* /proc/<pid>/root */ 868 lxpr_readdir_not_a_dir, /* /proc/<pid>/stat */ 869 lxpr_readdir_not_a_dir, /* /proc/<pid>/statm */ 870 lxpr_readdir_not_a_dir, /* /proc/<pid>/status */ 871 lxpr_readdir_taskdir, /* /proc/<pid>/task */ 872 lxpr_readdir_task_tid_dir, /* /proc/<pid>/task/nn */ 873 lxpr_readdir_fddir, /* /proc/<pid>/fd */ 874 lxpr_readdir_not_a_dir, /* /proc/<pid>/fd/nn */ 875 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/auxv */ 876 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cgroup */ 877 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cmdline */ 878 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/comm */ 879 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cpu */ 880 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cwd */ 881 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/environ */ 882 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/exe */ 883 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/limits */ 884 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/maps */ 885 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/mem */ 886 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/mountinfo */ 887 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid/oom_scr_adj */ 888 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/root */ 889 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/stat */ 890 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/statm */ 891 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/status */ 892 lxpr_readdir_fddir, /* /proc/<pid>/task/<tid>/fd */ 893 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/fd/nn */ 894 lxpr_readdir_not_a_dir, /* /proc/cgroups */ 895 lxpr_readdir_not_a_dir, /* /proc/cmdline */ 896 lxpr_readdir_not_a_dir, /* /proc/cpuinfo */ 897 lxpr_readdir_not_a_dir, /* /proc/devices */ 898 lxpr_readdir_not_a_dir, /* /proc/diskstats */ 899 lxpr_readdir_not_a_dir, /* /proc/dma */ 900 lxpr_readdir_not_a_dir, /* /proc/filesystems */ 901 lxpr_readdir_not_a_dir, /* /proc/interrupts */ 902 lxpr_readdir_not_a_dir, /* /proc/ioports */ 903 lxpr_readdir_not_a_dir, /* /proc/kcore */ 904 lxpr_readdir_not_a_dir, /* /proc/kmsg */ 905 lxpr_readdir_not_a_dir, /* /proc/loadavg */ 906 lxpr_readdir_not_a_dir, /* /proc/meminfo */ 907 lxpr_readdir_not_a_dir, /* /proc/modules */ 908 lxpr_readdir_not_a_dir, /* /proc/mounts */ 909 lxpr_readdir_netdir, /* /proc/net */ 910 lxpr_readdir_not_a_dir, /* /proc/net/arp */ 911 lxpr_readdir_not_a_dir, /* /proc/net/dev */ 912 lxpr_readdir_not_a_dir, /* /proc/net/dev_mcast */ 913 lxpr_readdir_not_a_dir, /* /proc/net/if_inet6 */ 914 lxpr_readdir_not_a_dir, /* /proc/net/igmp */ 915 lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */ 916 lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */ 917 lxpr_readdir_not_a_dir, /* /proc/net/ipv6_route */ 918 lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */ 919 lxpr_readdir_not_a_dir, /* /proc/net/netstat */ 920 lxpr_readdir_not_a_dir, /* /proc/net/raw */ 921 lxpr_readdir_not_a_dir, /* /proc/net/route */ 922 lxpr_readdir_not_a_dir, /* /proc/net/rpc */ 923 lxpr_readdir_not_a_dir, /* /proc/net/rt_cache */ 924 lxpr_readdir_not_a_dir, /* /proc/net/sockstat */ 925 lxpr_readdir_not_a_dir, /* /proc/net/snmp */ 926 lxpr_readdir_not_a_dir, /* /proc/net/stat */ 927 lxpr_readdir_not_a_dir, /* /proc/net/tcp */ 928 lxpr_readdir_not_a_dir, /* /proc/net/tcp6 */ 929 lxpr_readdir_not_a_dir, /* /proc/net/udp */ 930 lxpr_readdir_not_a_dir, /* /proc/net/udp6 */ 931 lxpr_readdir_not_a_dir, /* /proc/net/unix */ 932 lxpr_readdir_not_a_dir, /* /proc/partitions */ 933 lxpr_readdir_not_a_dir, /* /proc/self */ 934 lxpr_readdir_not_a_dir, /* /proc/stat */ 935 lxpr_readdir_not_a_dir, /* /proc/swaps */ 936 lxpr_readdir_sysdir, /* /proc/sys */ 937 lxpr_readdir_sys_fsdir, /* /proc/sys/fs */ 938 lxpr_readdir_sys_fs_inotifydir, /* /proc/sys/fs/inotify */ 939 lxpr_readdir_not_a_dir, /* .../inotify/max_queued_events */ 940 lxpr_readdir_not_a_dir, /* .../inotify/max_user_instances */ 941 lxpr_readdir_not_a_dir, /* .../inotify/max_user_watches */ 942 lxpr_readdir_sys_kerneldir, /* /proc/sys/kernel */ 943 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/cap_last_cap */ 944 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/core_pattern */ 945 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/hostname */ 946 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/msgmni */ 947 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/ngroups_max */ 948 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/osrelease */ 949 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/pid_max */ 950 lxpr_readdir_sys_kdir_randdir, /* /proc/sys/kernel/random */ 951 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/random/boot_id */ 952 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/sem */ 953 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/shmmax */ 954 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/shmmni */ 955 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/threads-max */ 956 lxpr_readdir_sys_netdir, /* /proc/sys/net */ 957 lxpr_readdir_sys_net_coredir, /* /proc/sys/net/core */ 958 lxpr_readdir_not_a_dir, /* /proc/sys/net/core/somaxconn */ 959 lxpr_readdir_sys_vmdir, /* /proc/sys/vm */ 960 lxpr_readdir_not_a_dir, /* /proc/sys/vm/min_free_kbytes */ 961 lxpr_readdir_not_a_dir, /* /proc/sys/vm/nr_hugepages */ 962 lxpr_readdir_not_a_dir, /* /proc/sys/vm/overcommit_memory */ 963 lxpr_readdir_not_a_dir, /* /proc/sys/vm/swappiness */ 964 lxpr_readdir_not_a_dir, /* /proc/uptime */ 965 lxpr_readdir_not_a_dir, /* /proc/version */ 966 }; 967 968 969 /* 970 * lxpr_read(): Vnode operation for VOP_READ() 971 * 972 * As the format of all the files that can be read in the lx procfs is human 973 * readable and not binary structures there do not have to be different 974 * read variants depending on whether the reading process model is 32 or 64 bits 975 * (at least in general, and certainly the difference is unlikely to be enough 976 * to justify have different routines for 32 and 64 bit reads 977 */ 978 /* ARGSUSED */ 979 static int 980 lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 981 caller_context_t *ct) 982 { 983 lxpr_node_t *lxpnp = VTOLXP(vp); 984 lxpr_nodetype_t type = lxpnp->lxpr_type; 985 lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop); 986 int error; 987 988 ASSERT(type < LXPR_NFILES); 989 990 if (type == LXPR_KMSG) { 991 ldi_ident_t li = VTOLXPM(vp)->lxprm_li; 992 ldi_handle_t ldih; 993 struct strioctl str; 994 int rv; 995 996 /* 997 * Open the zone's console device using the layered driver 998 * interface. 999 */ 1000 if ((error = 1001 ldi_open_by_name("/dev/log", FREAD, cr, &ldih, li)) != 0) 1002 return (error); 1003 1004 /* 1005 * Send an ioctl to the underlying console device, letting it 1006 * know we're interested in getting console messages. 1007 */ 1008 str.ic_cmd = I_CONSLOG; 1009 str.ic_timout = 0; 1010 str.ic_len = 0; 1011 str.ic_dp = NULL; 1012 if ((error = ldi_ioctl(ldih, I_STR, 1013 (intptr_t)&str, FKIOCTL, cr, &rv)) != 0) 1014 return (error); 1015 1016 lxpr_read_kmsg(lxpnp, uiobuf, ldih); 1017 1018 if ((error = ldi_close(ldih, FREAD, cr)) != 0) 1019 return (error); 1020 } else { 1021 lxpr_read_function[type](lxpnp, uiobuf); 1022 } 1023 1024 error = lxpr_uiobuf_flush(uiobuf); 1025 lxpr_uiobuf_free(uiobuf); 1026 1027 return (error); 1028 } 1029 1030 /* 1031 * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty() 1032 * 1033 * Various special case reads: 1034 * - trying to read a directory 1035 * - invalid file (used to mean a file that should be implemented, 1036 * but isn't yet) 1037 * - empty file 1038 * - wait to be able to read a file that will never have anything to read 1039 */ 1040 /* ARGSUSED */ 1041 static void 1042 lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1043 { 1044 lxpr_uiobuf_seterr(uiobuf, EISDIR); 1045 } 1046 1047 /* ARGSUSED */ 1048 static void 1049 lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1050 { 1051 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1052 } 1053 1054 /* ARGSUSED */ 1055 static void 1056 lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1057 { 1058 } 1059 1060 /* 1061 * lxpr_read_pid_auxv(): read process aux vector 1062 */ 1063 static void 1064 lxpr_read_pid_auxv(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1065 { 1066 proc_t *p; 1067 lx_proc_data_t *pd; 1068 lx_elf_data_t *edp = NULL; 1069 int i, cnt; 1070 1071 ASSERT(lxpnp->lxpr_type == LXPR_PID_AUXV || 1072 lxpnp->lxpr_type == LXPR_PID_TID_AUXV); 1073 1074 p = lxpr_lock(lxpnp->lxpr_pid); 1075 1076 if (p == NULL) { 1077 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1078 return; 1079 } 1080 if ((pd = ptolxproc(p)) == NULL) { 1081 /* Emit a single AT_NULL record for non-branded processes */ 1082 auxv_t buf; 1083 1084 bzero(&buf, sizeof (buf)); 1085 lxpr_unlock(p); 1086 lxpr_uiobuf_write(uiobuf, (char *)&buf, sizeof (buf)); 1087 return; 1088 } else { 1089 edp = &pd->l_elf_data; 1090 } 1091 1092 if (p->p_model == DATAMODEL_NATIVE) { 1093 auxv_t buf[__KERN_NAUXV_IMPL]; 1094 1095 /* 1096 * Because a_type is only of size int (not long), the buffer 1097 * contents must be zeroed first to ensure cleanliness. 1098 */ 1099 bzero(buf, sizeof (buf)); 1100 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) { 1101 if (lx_auxv_stol(&p->p_user.u_auxv[i], 1102 &buf[cnt], edp) == 0) { 1103 cnt++; 1104 } 1105 if (p->p_user.u_auxv[i].a_type == AT_NULL) { 1106 break; 1107 } 1108 } 1109 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0])); 1110 lxpr_unlock(p); 1111 } 1112 #if defined(_SYSCALL32_IMPL) 1113 else { 1114 auxv32_t buf[__KERN_NAUXV_IMPL]; 1115 1116 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) { 1117 auxv_t temp; 1118 1119 if (lx_auxv_stol(&p->p_user.u_auxv[i], 1120 &temp, edp) == 0) { 1121 buf[cnt].a_type = (int)temp.a_type; 1122 buf[cnt].a_un.a_val = (int)temp.a_un.a_val; 1123 cnt++; 1124 } 1125 if (p->p_user.u_auxv[i].a_type == AT_NULL) { 1126 break; 1127 } 1128 } 1129 lxpr_unlock(p); 1130 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0])); 1131 } 1132 #endif /* defined(_SYSCALL32_IMPL) */ 1133 } 1134 1135 /* 1136 * lxpr_read_pid_cgroup(): read cgroups for process 1137 */ 1138 static void 1139 lxpr_read_pid_cgroup(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1140 { 1141 proc_t *p; 1142 1143 ASSERT(lxpnp->lxpr_type == LXPR_PID_CGROUP || 1144 lxpnp->lxpr_type == LXPR_PID_TID_CGROUP); 1145 1146 p = lxpr_lock(lxpnp->lxpr_pid); 1147 if (p == NULL) { 1148 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1149 return; 1150 } 1151 1152 /* basic stub, 3rd field will need to be populated */ 1153 lxpr_uiobuf_printf(uiobuf, "1:name=systemd:/\n"); 1154 1155 lxpr_unlock(p); 1156 } 1157 1158 static void 1159 lxpr_copy_cmdline(proc_t *p, lx_proc_data_t *pd, lxpr_uiobuf_t *uiobuf) 1160 { 1161 uio_t *uiop = uiobuf->uiop; 1162 char *buf = uiobuf->buffer; 1163 int bsz = uiobuf->buffsize; 1164 boolean_t env_overflow = B_FALSE; 1165 uintptr_t pos = pd->l_args_start + uiop->uio_offset; 1166 uintptr_t estart = pd->l_envs_start; 1167 uintptr_t eend = pd->l_envs_end; 1168 size_t chunk, copied; 1169 int err = 0; 1170 1171 /* Do not bother with data beyond the end of the envp strings area. */ 1172 if (pos > eend) { 1173 return; 1174 } 1175 mutex_exit(&p->p_lock); 1176 1177 /* 1178 * If the starting or ending bounds are outside the argv strings area, 1179 * check to see if the process has overwritten the terminating NULL. 1180 * If not, no data needs to be copied from oustide the argv area. 1181 */ 1182 if (pos >= estart || (pos + uiop->uio_resid) >= estart) { 1183 uint8_t term; 1184 if (uread(p, &term, sizeof (term), estart - 1) != 0) { 1185 err = EFAULT; 1186 } else if (term != 0) { 1187 env_overflow = B_TRUE; 1188 } 1189 } 1190 1191 1192 /* Data between astart and estart-1 can be copied freely. */ 1193 while (pos < estart && uiop->uio_resid > 0 && err == 0) { 1194 chunk = MIN(estart - pos, uiop->uio_resid); 1195 chunk = MIN(chunk, bsz); 1196 1197 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) != 0 || 1198 copied != chunk) { 1199 err = EFAULT; 1200 break; 1201 } 1202 err = uiomove(buf, copied, UIO_READ, uiop); 1203 pos += copied; 1204 } 1205 1206 /* 1207 * Onward from estart, data is copied as a contiguous string. To 1208 * protect env data from potential snooping, only one buffer-sized copy 1209 * is allowed to avoid complex seek logic. 1210 */ 1211 if (err == 0 && env_overflow && pos == estart && uiop->uio_resid > 0) { 1212 chunk = MIN(eend - pos, uiop->uio_resid); 1213 chunk = MIN(chunk, bsz); 1214 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) == 0) { 1215 int len = strnlen(buf, copied); 1216 if (len > 0) { 1217 err = uiomove(buf, len, UIO_READ, uiop); 1218 } 1219 } 1220 } 1221 1222 uiobuf->error = err; 1223 /* reset any uiobuf state */ 1224 uiobuf->pos = uiobuf->buffer; 1225 uiobuf->beg = 0; 1226 1227 mutex_enter(&p->p_lock); 1228 } 1229 1230 /* 1231 * lxpr_read_pid_cmdline(): read argument vector from process 1232 */ 1233 static void 1234 lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1235 { 1236 proc_t *p; 1237 char *buf; 1238 size_t asz = lxpr_maxargvlen, sz; 1239 lx_proc_data_t *pd; 1240 1241 ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE || 1242 lxpnp->lxpr_type == LXPR_PID_TID_CMDLINE); 1243 1244 buf = kmem_alloc(asz, KM_SLEEP); 1245 1246 p = lxpr_lock(lxpnp->lxpr_pid); 1247 if (p == NULL) { 1248 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1249 kmem_free(buf, asz); 1250 return; 1251 } 1252 1253 if ((pd = ptolxproc(p)) != NULL && pd->l_args_start != 0 && 1254 pd->l_envs_start != 0 && pd->l_envs_end != 0) { 1255 /* Use Linux-style argv bounds if possible. */ 1256 lxpr_copy_cmdline(p, pd, uiobuf); 1257 } else { 1258 if (prreadargv(p, buf, asz, &sz) != 0) { 1259 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1260 } else { 1261 lxpr_uiobuf_write(uiobuf, buf, sz); 1262 } 1263 } 1264 1265 lxpr_unlock(p); 1266 kmem_free(buf, asz); 1267 } 1268 1269 /* 1270 * lxpr_read_pid_comm(): read command from process 1271 */ 1272 static void 1273 lxpr_read_pid_comm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1274 { 1275 proc_t *p; 1276 1277 VERIFY(lxpnp->lxpr_type == LXPR_PID_COMM || 1278 lxpnp->lxpr_type == LXPR_PID_TID_COMM); 1279 1280 /* 1281 * Because prctl(PR_SET_NAME) does not set custom names for threads 1282 * (vs processes), there is no need for special handling here. 1283 */ 1284 if ((p = lxpr_lock(lxpnp->lxpr_pid)) == NULL) { 1285 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1286 return; 1287 } 1288 lxpr_uiobuf_printf(uiobuf, "%s\n", p->p_user.u_comm); 1289 lxpr_unlock(p); 1290 } 1291 1292 /* 1293 * lxpr_read_pid_env(): read env vector from process 1294 */ 1295 static void 1296 lxpr_read_pid_env(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1297 { 1298 proc_t *p; 1299 char *buf; 1300 size_t asz = lxpr_maxenvvlen, sz; 1301 1302 ASSERT(lxpnp->lxpr_type == LXPR_PID_ENV); 1303 1304 buf = kmem_alloc(asz, KM_SLEEP); 1305 1306 p = lxpr_lock(lxpnp->lxpr_pid); 1307 if (p == NULL) { 1308 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1309 kmem_free(buf, asz); 1310 return; 1311 } 1312 1313 if (prreadenvv(p, buf, asz, &sz) != 0) { 1314 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1315 } else { 1316 lxpr_uiobuf_write(uiobuf, buf, sz); 1317 } 1318 1319 lxpr_unlock(p); 1320 kmem_free(buf, asz); 1321 } 1322 1323 /* 1324 * lxpr_read_pid_limits(): ulimit file 1325 */ 1326 static void 1327 lxpr_read_pid_limits(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1328 { 1329 proc_t *p; 1330 rctl_qty_t cur, max; 1331 rctl_val_t *oval, *nval; 1332 rctl_hndl_t hndl; 1333 char *kname; 1334 int i; 1335 1336 ASSERT(lxpnp->lxpr_type == LXPR_PID_LIMITS || 1337 lxpnp->lxpr_type == LXPR_PID_TID_LIMITS); 1338 1339 nval = kmem_alloc(sizeof (rctl_val_t), KM_SLEEP); 1340 1341 p = lxpr_lock(lxpnp->lxpr_pid); 1342 if (p == NULL) { 1343 kmem_free(nval, sizeof (rctl_val_t)); 1344 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1345 return; 1346 } 1347 1348 lxpr_uiobuf_printf(uiobuf, "%-25s %-20s %-20s %-10s\n", 1349 "Limit", "Soft Limit", "Hard Limit", "Units"); 1350 for (i = 0; lxpr_rlimtab[i].rlim_name != NULL; i++) { 1351 kname = lxpr_rlimtab[i].rlim_rctl; 1352 /* default to unlimited for resources without an analog */ 1353 cur = RLIM_INFINITY; 1354 max = RLIM_INFINITY; 1355 if (kname != NULL) { 1356 hndl = rctl_hndl_lookup(kname); 1357 oval = NULL; 1358 while ((hndl != -1) && 1359 rctl_local_get(hndl, oval, nval, p) == 0) { 1360 oval = nval; 1361 switch (nval->rcv_privilege) { 1362 case RCPRIV_BASIC: 1363 if (!RCTL_INFINITE(nval)) 1364 cur = nval->rcv_value; 1365 break; 1366 case RCPRIV_PRIVILEGED: 1367 if (!RCTL_INFINITE(nval)) 1368 max = nval->rcv_value; 1369 break; 1370 } 1371 } 1372 } 1373 1374 lxpr_uiobuf_printf(uiobuf, "%-25s", lxpr_rlimtab[i].rlim_name); 1375 if (cur == RLIM_INFINITY || cur == LX_RLIM_INFINITY) { 1376 lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited"); 1377 } else { 1378 lxpr_uiobuf_printf(uiobuf, " %-20lu", cur); 1379 } 1380 if (max == RLIM_INFINITY || max == LX_RLIM_INFINITY) { 1381 lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited"); 1382 } else { 1383 lxpr_uiobuf_printf(uiobuf, " %-20lu", max); 1384 } 1385 lxpr_uiobuf_printf(uiobuf, " %-10s\n", 1386 lxpr_rlimtab[i].rlim_unit); 1387 } 1388 1389 lxpr_unlock(p); 1390 kmem_free(nval, sizeof (rctl_val_t)); 1391 } 1392 1393 /* 1394 * lxpr_read_pid_maps(): memory map file 1395 */ 1396 static void 1397 lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1398 { 1399 proc_t *p; 1400 struct as *as; 1401 struct seg *seg; 1402 char *buf; 1403 int buflen = MAXPATHLEN; 1404 struct print_data { 1405 uintptr_t saddr; 1406 uintptr_t eaddr; 1407 int type; 1408 char prot[5]; 1409 uintptr_t offset; 1410 vnode_t *vp; 1411 struct print_data *next; 1412 } *print_head = NULL; 1413 struct print_data **print_tail = &print_head; 1414 struct print_data *pbuf; 1415 1416 ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS || 1417 lxpnp->lxpr_type == LXPR_PID_TID_MAPS); 1418 1419 p = lxpr_lock(lxpnp->lxpr_pid); 1420 if (p == NULL) { 1421 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1422 return; 1423 } 1424 1425 as = p->p_as; 1426 1427 if (as == &kas) { 1428 lxpr_unlock(p); 1429 return; 1430 } 1431 1432 mutex_exit(&p->p_lock); 1433 1434 /* Iterate over all segments in the address space */ 1435 AS_LOCK_ENTER(as, RW_READER); 1436 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 1437 vnode_t *vp; 1438 uint_t protbits; 1439 1440 pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP); 1441 1442 pbuf->saddr = (uintptr_t)seg->s_base; 1443 pbuf->eaddr = pbuf->saddr + seg->s_size; 1444 pbuf->type = SEGOP_GETTYPE(seg, seg->s_base); 1445 1446 /* 1447 * Cheat and only use the protection bits of the first page 1448 * in the segment 1449 */ 1450 (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot)); 1451 (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits); 1452 1453 if (protbits & PROT_READ) pbuf->prot[0] = 'r'; 1454 if (protbits & PROT_WRITE) pbuf->prot[1] = 'w'; 1455 if (protbits & PROT_EXEC) pbuf->prot[2] = 'x'; 1456 if (pbuf->type & MAP_SHARED) pbuf->prot[3] = 's'; 1457 else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p'; 1458 1459 if (seg->s_ops == &segvn_ops && 1460 SEGOP_GETVP(seg, seg->s_base, &vp) == 0 && 1461 vp != NULL && vp->v_type == VREG) { 1462 VN_HOLD(vp); 1463 pbuf->vp = vp; 1464 } else { 1465 pbuf->vp = NULL; 1466 } 1467 1468 pbuf->offset = SEGOP_GETOFFSET(seg, (caddr_t)pbuf->saddr); 1469 1470 pbuf->next = NULL; 1471 *print_tail = pbuf; 1472 print_tail = &pbuf->next; 1473 } 1474 AS_LOCK_EXIT(as); 1475 mutex_enter(&p->p_lock); 1476 lxpr_unlock(p); 1477 1478 buf = kmem_alloc(buflen, KM_SLEEP); 1479 1480 /* print the data we've extracted */ 1481 pbuf = print_head; 1482 while (pbuf != NULL) { 1483 struct print_data *pbuf_next; 1484 vattr_t vattr; 1485 1486 int maj = 0; 1487 int min = 0; 1488 ino_t inode = 0; 1489 1490 *buf = '\0'; 1491 if (pbuf->vp != NULL) { 1492 vattr.va_mask = AT_FSID | AT_NODEID; 1493 if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(), 1494 NULL) == 0) { 1495 maj = getmajor(vattr.va_fsid); 1496 min = getminor(vattr.va_fsid); 1497 inode = vattr.va_nodeid; 1498 } 1499 (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED()); 1500 VN_RELE(pbuf->vp); 1501 } 1502 1503 if (p->p_model == DATAMODEL_LP64) { 1504 lxpr_uiobuf_printf(uiobuf, 1505 "%08llx-%08llx %s %08llx %02x:%02x %llu%s%s\n", 1506 pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset, 1507 maj, min, inode, *buf != '\0' ? " " : "", buf); 1508 } else { 1509 lxpr_uiobuf_printf(uiobuf, 1510 "%08x-%08x %s %08x %02x:%02x %llu%s%s\n", 1511 (uint32_t)pbuf->saddr, (uint32_t)pbuf->eaddr, 1512 pbuf->prot, (uint32_t)pbuf->offset, maj, min, 1513 inode, *buf != '\0' ? " " : "", buf); 1514 } 1515 1516 pbuf_next = pbuf->next; 1517 kmem_free(pbuf, sizeof (*pbuf)); 1518 pbuf = pbuf_next; 1519 } 1520 1521 kmem_free(buf, buflen); 1522 } 1523 1524 /* 1525 * lxpr_read_pid_mountinfo(): information about process mount points. e.g.: 1526 * 14 19 0:13 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw 1527 * mntid parid devnums root mntpnt mntopts - fstype mntsrc superopts 1528 * 1529 * We have to make up several of these fields. 1530 */ 1531 static void 1532 lxpr_read_pid_mountinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1533 { 1534 struct vfs *vfsp; 1535 struct vfs *vfslist; 1536 zone_t *zone = LXPTOZ(lxpnp); 1537 struct print_data { 1538 refstr_t *vfs_mntpt; 1539 refstr_t *vfs_resource; 1540 uint_t vfs_flag; 1541 int vfs_fstype; 1542 dev_t vfs_dev; 1543 struct print_data *next; 1544 } *print_head = NULL; 1545 struct print_data **print_tail = &print_head; 1546 struct print_data *printp; 1547 int root_id = 15; /* use a made-up value */ 1548 int mnt_id; 1549 1550 ASSERT(lxpnp->lxpr_type == LXPR_PID_MOUNTINFO || 1551 lxpnp->lxpr_type == LXPR_PID_TID_MOUNTINFO); 1552 1553 vfs_list_read_lock(); 1554 1555 /* root is the top-level, it does not appear in this output */ 1556 if (zone == global_zone) { 1557 vfsp = vfslist = rootvfs; 1558 } else { 1559 vfsp = vfslist = zone->zone_vfslist; 1560 /* 1561 * If the zone has a root entry, it will be the first in 1562 * the list. If it doesn't, we conjure one up. 1563 */ 1564 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt), 1565 zone->zone_rootpath) != 0) { 1566 struct vfs *tvfsp; 1567 /* 1568 * The root of the zone is not a mount point. The vfs 1569 * we want to report is that of the zone's root vnode. 1570 */ 1571 tvfsp = zone->zone_rootvp->v_vfsp; 1572 1573 lxpr_uiobuf_printf(uiobuf, 1574 "%d 1 %d:%d / / %s - %s / %s\n", 1575 root_id, 1576 major(tvfsp->vfs_dev), minor(vfsp->vfs_dev), 1577 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw", 1578 vfssw[tvfsp->vfs_fstype].vsw_name, 1579 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); 1580 1581 } 1582 if (vfslist == NULL) { 1583 vfs_list_unlock(); 1584 return; 1585 } 1586 } 1587 1588 /* 1589 * Later on we have to do a lookupname, which can end up causing 1590 * another vfs_list_read_lock() to be called. Which can lead to a 1591 * deadlock. To avoid this, we extract the data we need into a local 1592 * list, then we can run this list without holding vfs_list_read_lock() 1593 * We keep the list in the same order as the vfs_list 1594 */ 1595 do { 1596 /* Skip mounts we shouldn't show */ 1597 if (vfsp->vfs_flag & VFS_NOMNTTAB) { 1598 goto nextfs; 1599 } 1600 1601 printp = kmem_alloc(sizeof (*printp), KM_SLEEP); 1602 refstr_hold(vfsp->vfs_mntpt); 1603 printp->vfs_mntpt = vfsp->vfs_mntpt; 1604 refstr_hold(vfsp->vfs_resource); 1605 printp->vfs_resource = vfsp->vfs_resource; 1606 printp->vfs_flag = vfsp->vfs_flag; 1607 printp->vfs_fstype = vfsp->vfs_fstype; 1608 printp->vfs_dev = vfsp->vfs_dev; 1609 printp->next = NULL; 1610 1611 *print_tail = printp; 1612 print_tail = &printp->next; 1613 1614 nextfs: 1615 vfsp = (zone == global_zone) ? 1616 vfsp->vfs_next : vfsp->vfs_zone_next; 1617 1618 } while (vfsp != vfslist); 1619 1620 vfs_list_unlock(); 1621 1622 mnt_id = root_id + 1; 1623 1624 /* 1625 * now we can run through what we've extracted without holding 1626 * vfs_list_read_lock() 1627 */ 1628 printp = print_head; 1629 while (printp != NULL) { 1630 struct print_data *printp_next; 1631 const char *resource; 1632 char *mntpt; 1633 struct vnode *vp; 1634 int error; 1635 1636 mntpt = (char *)refstr_value(printp->vfs_mntpt); 1637 resource = refstr_value(printp->vfs_resource); 1638 1639 if (mntpt != NULL && mntpt[0] != '\0') 1640 mntpt = ZONE_PATH_TRANSLATE(mntpt, zone); 1641 else 1642 mntpt = "-"; 1643 1644 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 1645 1646 if (error != 0) 1647 goto nextp; 1648 1649 if (!(vp->v_flag & VROOT)) { 1650 VN_RELE(vp); 1651 goto nextp; 1652 } 1653 VN_RELE(vp); 1654 1655 if (resource != NULL && resource[0] != '\0') { 1656 if (resource[0] == '/') { 1657 resource = ZONE_PATH_VISIBLE(resource, zone) ? 1658 ZONE_PATH_TRANSLATE(resource, zone) : mntpt; 1659 } 1660 } else { 1661 resource = "none"; 1662 } 1663 1664 /* 1665 * XXX parent ID is not tracked correctly here. Currently we 1666 * always assume the parent ID is the root ID. 1667 */ 1668 lxpr_uiobuf_printf(uiobuf, 1669 "%d %d %d:%d / %s %s - %s %s %s\n", 1670 mnt_id, root_id, 1671 major(printp->vfs_dev), minor(printp->vfs_dev), 1672 mntpt, 1673 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw", 1674 vfssw[printp->vfs_fstype].vsw_name, 1675 resource, 1676 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); 1677 1678 nextp: 1679 printp_next = printp->next; 1680 refstr_rele(printp->vfs_mntpt); 1681 refstr_rele(printp->vfs_resource); 1682 kmem_free(printp, sizeof (*printp)); 1683 printp = printp_next; 1684 1685 mnt_id++; 1686 } 1687 } 1688 1689 /* 1690 * lxpr_read_pid_oom_scr_adj(): read oom_score_adj for process 1691 */ 1692 static void 1693 lxpr_read_pid_oom_scr_adj(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1694 { 1695 proc_t *p; 1696 1697 ASSERT(lxpnp->lxpr_type == LXPR_PID_OOM_SCR_ADJ || 1698 lxpnp->lxpr_type == LXPR_PID_TID_OOM_SCR_ADJ); 1699 1700 p = lxpr_lock(lxpnp->lxpr_pid); 1701 if (p == NULL) { 1702 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1703 return; 1704 } 1705 1706 /* always 0 */ 1707 lxpr_uiobuf_printf(uiobuf, "0\n"); 1708 1709 lxpr_unlock(p); 1710 } 1711 1712 1713 /* 1714 * lxpr_read_pid_statm(): memory status file 1715 */ 1716 static void 1717 lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1718 { 1719 proc_t *p; 1720 struct as *as; 1721 size_t vsize; 1722 size_t rss; 1723 1724 ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM || 1725 lxpnp->lxpr_type == LXPR_PID_TID_STATM); 1726 1727 p = lxpr_lock(lxpnp->lxpr_pid); 1728 if (p == NULL) { 1729 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1730 return; 1731 } 1732 1733 as = p->p_as; 1734 1735 mutex_exit(&p->p_lock); 1736 1737 AS_LOCK_ENTER(as, RW_READER); 1738 vsize = btopr(as->a_resvsize); 1739 rss = rm_asrss(as); 1740 AS_LOCK_EXIT(as); 1741 1742 mutex_enter(&p->p_lock); 1743 lxpr_unlock(p); 1744 1745 lxpr_uiobuf_printf(uiobuf, 1746 "%lu %lu %lu %lu %lu %lu %lu\n", 1747 vsize, rss, 0l, rss, 0l, 0l, 0l); 1748 } 1749 1750 /* 1751 * Look for either the main thread (lookup_id is 0) or the specified thread. 1752 * If we're looking for the main thread but the proc does not have one, we 1753 * fallback to using prchoose to get any thread available. 1754 */ 1755 static kthread_t * 1756 lxpr_get_thread(proc_t *p, uint_t lookup_id) 1757 { 1758 kthread_t *t; 1759 uint_t emul_tid; 1760 lx_lwp_data_t *lwpd; 1761 pid_t pid = p->p_pid; 1762 pid_t init_pid = curproc->p_zone->zone_proc_initpid; 1763 boolean_t branded = (p->p_brand == &lx_brand); 1764 1765 /* get specified thread */ 1766 if ((t = p->p_tlist) == NULL) 1767 return (NULL); 1768 1769 do { 1770 if (lookup_id == 0 && t->t_tid == 1) { 1771 thread_lock(t); 1772 return (t); 1773 } 1774 1775 lwpd = ttolxlwp(t); 1776 if (branded && lwpd != NULL) { 1777 if (pid == init_pid && lookup_id == 1) { 1778 emul_tid = t->t_tid; 1779 } else { 1780 emul_tid = lwpd->br_pid; 1781 } 1782 } else { 1783 /* 1784 * Make only the first (assumed to be main) thread 1785 * visible for non-branded processes. 1786 */ 1787 emul_tid = p->p_pid; 1788 } 1789 if (emul_tid == lookup_id) { 1790 thread_lock(t); 1791 return (t); 1792 } 1793 } while ((t = t->t_forw) != p->p_tlist); 1794 1795 if (lookup_id == 0) 1796 return (prchoose(p)); 1797 return (NULL); 1798 } 1799 1800 /* 1801 * Lookup the real pid for procs 0 or 1. 1802 */ 1803 static pid_t 1804 get_real_pid(pid_t p) 1805 { 1806 pid_t find_pid; 1807 1808 if (p == 1) { 1809 find_pid = curproc->p_zone->zone_proc_initpid; 1810 } else if (p == 0) { 1811 find_pid = curproc->p_zone->zone_zsched->p_pid; 1812 } else { 1813 find_pid = p; 1814 } 1815 1816 return (find_pid); 1817 } 1818 1819 /* 1820 * pid/tid common code to read status file 1821 */ 1822 static void 1823 lxpr_read_status_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf, 1824 uint_t lookup_id) 1825 { 1826 proc_t *p; 1827 kthread_t *t; 1828 user_t *up; 1829 cred_t *cr; 1830 const gid_t *groups; 1831 int ngroups; 1832 struct as *as; 1833 char *status; 1834 pid_t pid, ppid; 1835 k_sigset_t current, ignore, handle; 1836 int i, lx_sig; 1837 pid_t real_pid; 1838 1839 real_pid = get_real_pid(lxpnp->lxpr_pid); 1840 p = lxpr_lock(real_pid); 1841 if (p == NULL) { 1842 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1843 return; 1844 } 1845 1846 pid = p->p_pid; 1847 1848 /* 1849 * Convert pid to the Linux default of 1 if we're the zone's init 1850 * process or if we're the zone's zsched the pid is 0. 1851 */ 1852 if (pid == curproc->p_zone->zone_proc_initpid) { 1853 pid = 1; 1854 ppid = 0; /* parent pid for init is 0 */ 1855 } else if (pid == curproc->p_zone->zone_zsched->p_pid) { 1856 pid = 0; /* zsched is pid 0 */ 1857 ppid = 0; /* parent pid for zsched is itself */ 1858 } else { 1859 /* 1860 * Make sure not to reference parent PIDs that reside outside 1861 * the zone 1862 */ 1863 ppid = ((p->p_flag & SZONETOP) 1864 ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid); 1865 1866 /* 1867 * Convert ppid to the Linux default of 1 if our parent is the 1868 * zone's init process 1869 */ 1870 if (ppid == curproc->p_zone->zone_proc_initpid) 1871 ppid = 1; 1872 } 1873 1874 t = lxpr_get_thread(p, lookup_id); 1875 if (t != NULL) { 1876 switch (t->t_state) { 1877 case TS_SLEEP: 1878 status = "S (sleeping)"; 1879 break; 1880 case TS_RUN: 1881 case TS_ONPROC: 1882 status = "R (running)"; 1883 break; 1884 case TS_ZOMB: 1885 status = "Z (zombie)"; 1886 break; 1887 case TS_STOPPED: 1888 status = "T (stopped)"; 1889 break; 1890 default: 1891 status = "! (unknown)"; 1892 break; 1893 } 1894 thread_unlock(t); 1895 } else { 1896 if (lookup_id != 0) { 1897 /* we can't find this specific thread */ 1898 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1899 lxpr_unlock(p); 1900 return; 1901 } 1902 1903 /* 1904 * there is a hole in the exit code, where a proc can have 1905 * no threads but it is yet to be flagged SZOMB. We will 1906 * assume we are about to become a zombie 1907 */ 1908 status = "Z (zombie)"; 1909 } 1910 1911 up = PTOU(p); 1912 mutex_enter(&p->p_crlock); 1913 crhold(cr = p->p_cred); 1914 mutex_exit(&p->p_crlock); 1915 1916 lxpr_uiobuf_printf(uiobuf, 1917 "Name:\t%s\n" 1918 "State:\t%s\n" 1919 "Tgid:\t%d\n" 1920 "Pid:\t%d\n" 1921 "PPid:\t%d\n" 1922 "TracerPid:\t%d\n" 1923 "Uid:\t%u\t%u\t%u\t%u\n" 1924 "Gid:\t%u\t%u\t%u\t%u\n" 1925 "FDSize:\t%d\n" 1926 "Groups:\t", 1927 up->u_comm, 1928 status, 1929 pid, /* thread group id - same as pid */ 1930 (lookup_id == 0) ? pid : lxpnp->lxpr_desc, 1931 ppid, 1932 0, 1933 crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr), 1934 crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr), 1935 p->p_fno_ctl); 1936 1937 1938 ngroups = crgetngroups(cr); 1939 groups = crgetgroups(cr); 1940 for (i = 0; i < ngroups; i++) { 1941 lxpr_uiobuf_printf(uiobuf, 1942 "%u ", 1943 groups[i]); 1944 } 1945 crfree(cr); 1946 1947 as = p->p_as; 1948 if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) { 1949 size_t vsize, nlocked, rss; 1950 1951 mutex_exit(&p->p_lock); 1952 AS_LOCK_ENTER(as, RW_READER); 1953 vsize = as->a_resvsize; 1954 rss = rm_asrss(as); 1955 AS_LOCK_EXIT(as); 1956 mutex_enter(&p->p_lock); 1957 nlocked = p->p_locked_mem; 1958 1959 lxpr_uiobuf_printf(uiobuf, 1960 "\n" 1961 "VmSize:\t%8lu kB\n" 1962 "VmLck:\t%8lu kB\n" 1963 "VmRSS:\t%8lu kB\n" 1964 "VmData:\t%8lu kB\n" 1965 "VmStk:\t%8lu kB\n" 1966 "VmExe:\t%8lu kB\n" 1967 "VmLib:\t%8lu kB", 1968 btok(vsize), 1969 btok(nlocked), 1970 ptok(rss), 1971 0l, 1972 btok(p->p_stksize), 1973 ptok(rss), 1974 0l); 1975 } 1976 1977 lxpr_uiobuf_printf(uiobuf, "\nThreads:\t%u", p->p_lwpcnt); 1978 1979 sigemptyset(¤t); 1980 sigemptyset(&ignore); 1981 sigemptyset(&handle); 1982 1983 for (i = 1; i < NSIG; i++) { 1984 lx_sig = stol_signo[i]; 1985 1986 if ((lx_sig > 0) && (lx_sig <= LX_NSIG)) { 1987 if (sigismember(&p->p_sig, i)) 1988 sigaddset(¤t, lx_sig); 1989 1990 if (up->u_signal[i - 1] == SIG_IGN) 1991 sigaddset(&ignore, lx_sig); 1992 else if (up->u_signal[i - 1] != SIG_DFL) 1993 sigaddset(&handle, lx_sig); 1994 } 1995 } 1996 1997 lxpr_uiobuf_printf(uiobuf, 1998 "\n" 1999 "SigPnd:\t%08x%08x\n" 2000 "SigBlk:\t%08x%08x\n" 2001 "SigIgn:\t%08x%08x\n" 2002 "SigCgt:\t%08x%08x\n" 2003 "CapInh:\t%016x\n" 2004 "CapPrm:\t%016x\n" 2005 "CapEff:\t%016x\n", 2006 current.__sigbits[1], current.__sigbits[0], 2007 0, 0, /* signals blocked on per thread basis */ 2008 ignore.__sigbits[1], ignore.__sigbits[0], 2009 handle.__sigbits[1], handle.__sigbits[0], 2010 /* Can't do anything with linux capabilities */ 2011 0, 2012 0, 2013 0); 2014 2015 lxpr_uiobuf_printf(uiobuf, 2016 "CapBnd:\t%016llx\n", 2017 /* We report the full capability bounding set */ 2018 0x1fffffffffLL); 2019 2020 lxpr_unlock(p); 2021 } 2022 2023 /* 2024 * lxpr_read_pid_status(): status file 2025 */ 2026 static void 2027 lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2028 { 2029 ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS); 2030 2031 lxpr_read_status_common(lxpnp, uiobuf, 0); 2032 } 2033 2034 /* 2035 * lxpr_read_pid_tid_status(): status file 2036 */ 2037 static void 2038 lxpr_read_pid_tid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2039 { 2040 ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STATUS); 2041 lxpr_read_status_common(lxpnp, uiobuf, lxpnp->lxpr_desc); 2042 } 2043 2044 /* 2045 * pid/tid common code to read stat file 2046 */ 2047 static void 2048 lxpr_read_stat_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf, 2049 uint_t lookup_id) 2050 { 2051 proc_t *p; 2052 kthread_t *t; 2053 struct as *as; 2054 char stat; 2055 pid_t pid, ppid, pgpid, spid; 2056 gid_t psgid; 2057 dev_t psdev; 2058 size_t rss, vsize; 2059 int nice, pri; 2060 caddr_t wchan; 2061 processorid_t cpu; 2062 pid_t real_pid; 2063 2064 real_pid = get_real_pid(lxpnp->lxpr_pid); 2065 p = lxpr_lock(real_pid); 2066 if (p == NULL) { 2067 lxpr_uiobuf_seterr(uiobuf, EINVAL); 2068 return; 2069 } 2070 2071 pid = p->p_pid; 2072 2073 /* 2074 * Set Linux defaults if we're the zone's init process 2075 */ 2076 if (pid == curproc->p_zone->zone_proc_initpid) { 2077 pid = 1; /* PID for init */ 2078 ppid = 0; /* parent PID for init is 0 */ 2079 pgpid = 0; /* process group for init is 0 */ 2080 psgid = (gid_t)-1; /* credential GID for init is -1 */ 2081 spid = 0; /* session id for init is 0 */ 2082 psdev = 0; /* session device for init is 0 */ 2083 } else if (pid == curproc->p_zone->zone_zsched->p_pid) { 2084 pid = 0; /* PID for zsched */ 2085 ppid = 0; /* parent PID for zsched is 0 */ 2086 pgpid = 0; /* process group for zsched is 0 */ 2087 psgid = (gid_t)-1; /* credential GID for zsched is -1 */ 2088 spid = 0; /* session id for zsched is 0 */ 2089 psdev = 0; /* session device for zsched is 0 */ 2090 } else { 2091 /* 2092 * Make sure not to reference parent PIDs that reside outside 2093 * the zone 2094 */ 2095 ppid = ((p->p_flag & SZONETOP) ? 2096 curproc->p_zone->zone_zsched->p_pid : p->p_ppid); 2097 2098 /* 2099 * Convert ppid to the Linux default of 1 if our parent is the 2100 * zone's init process 2101 */ 2102 if (ppid == curproc->p_zone->zone_proc_initpid) 2103 ppid = 1; 2104 2105 pgpid = p->p_pgrp; 2106 2107 mutex_enter(&p->p_splock); 2108 mutex_enter(&p->p_sessp->s_lock); 2109 spid = p->p_sessp->s_sid; 2110 psdev = p->p_sessp->s_dev; 2111 if (p->p_sessp->s_cred) 2112 psgid = crgetgid(p->p_sessp->s_cred); 2113 else 2114 psgid = crgetgid(p->p_cred); 2115 2116 mutex_exit(&p->p_sessp->s_lock); 2117 mutex_exit(&p->p_splock); 2118 } 2119 2120 t = lxpr_get_thread(p, lookup_id); 2121 if (t != NULL) { 2122 switch (t->t_state) { 2123 case TS_SLEEP: 2124 stat = 'S'; break; 2125 case TS_RUN: 2126 case TS_ONPROC: 2127 stat = 'R'; break; 2128 case TS_ZOMB: 2129 stat = 'Z'; break; 2130 case TS_STOPPED: 2131 stat = 'T'; break; 2132 default: 2133 stat = '!'; break; 2134 } 2135 2136 if (CL_DONICE(t, NULL, 0, &nice) != 0) 2137 nice = 0; 2138 2139 pri = t->t_pri; 2140 wchan = t->t_wchan; 2141 cpu = t->t_cpu->cpu_id; 2142 thread_unlock(t); 2143 } else { 2144 if (lookup_id != 0) { 2145 /* we can't find this specific thread */ 2146 lxpr_uiobuf_seterr(uiobuf, EINVAL); 2147 lxpr_unlock(p); 2148 return; 2149 } 2150 2151 /* Only zombies have no threads */ 2152 stat = 'Z'; 2153 nice = 0; 2154 pri = 0; 2155 wchan = 0; 2156 cpu = 0; 2157 } 2158 as = p->p_as; 2159 mutex_exit(&p->p_lock); 2160 AS_LOCK_ENTER(as, RW_READER); 2161 vsize = as->a_resvsize; 2162 rss = rm_asrss(as); 2163 AS_LOCK_EXIT(as); 2164 mutex_enter(&p->p_lock); 2165 2166 lxpr_uiobuf_printf(uiobuf, 2167 "%d (%s) %c %d %d %d %d %d " 2168 "%lu %lu %lu %lu %lu " 2169 "%lu %lu %ld %ld " 2170 "%d %d %d " 2171 "%lu " 2172 "%lu " 2173 "%lu %ld %llu " 2174 "%lu %lu %u " 2175 "%lu %lu " 2176 "%lu %lu %lu %lu " 2177 "%lu " 2178 "%lu %lu " 2179 "%d " 2180 "%d" 2181 "\n", 2182 (lookup_id == 0) ? pid : lxpnp->lxpr_desc, 2183 PTOU(p)->u_comm, stat, ppid, pgpid, spid, psdev, psgid, 2184 0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */ 2185 p->p_utime, p->p_stime, p->p_cutime, p->p_cstime, 2186 pri, nice, p->p_lwpcnt, 2187 0l, /* itrealvalue (time before next SIGALRM) */ 2188 PTOU(p)->u_ticks, 2189 vsize, rss, p->p_vmem_ctl, 2190 0l, 0l, USRSTACK, /* startcode, endcode, startstack */ 2191 0l, 0l, /* kstkesp, kstkeip */ 2192 0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */ 2193 wchan, 2194 0l, 0l, /* nswap, cnswap */ 2195 0, /* exit_signal */ 2196 cpu); 2197 2198 lxpr_unlock(p); 2199 } 2200 2201 /* 2202 * lxpr_read_pid_stat(): pid stat file 2203 */ 2204 static void 2205 lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2206 { 2207 ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT); 2208 2209 lxpr_read_stat_common(lxpnp, uiobuf, 0); 2210 } 2211 2212 /* 2213 * lxpr_read_pid_tid_stat(): pid stat file 2214 */ 2215 static void 2216 lxpr_read_pid_tid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2217 { 2218 ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STAT); 2219 lxpr_read_stat_common(lxpnp, uiobuf, lxpnp->lxpr_desc); 2220 } 2221 2222 /* ARGSUSED */ 2223 static void 2224 lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2225 { 2226 } 2227 2228 struct lxpr_ifstat { 2229 uint64_t rx_bytes; 2230 uint64_t rx_packets; 2231 uint64_t rx_errors; 2232 uint64_t rx_drop; 2233 uint64_t tx_bytes; 2234 uint64_t tx_packets; 2235 uint64_t tx_errors; 2236 uint64_t tx_drop; 2237 uint64_t collisions; 2238 uint64_t rx_multicast; 2239 }; 2240 2241 static void * 2242 lxpr_kstat_read(kstat_t *kn, boolean_t byname, size_t *size, int *num) 2243 { 2244 kstat_t *kp; 2245 int i, nrec = 0; 2246 size_t bufsize; 2247 void *buf = NULL; 2248 2249 if (byname == B_TRUE) { 2250 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance, 2251 kn->ks_name, getzoneid()); 2252 } else { 2253 kp = kstat_hold_bykid(kn->ks_kid, getzoneid()); 2254 } 2255 if (kp == NULL) { 2256 return (NULL); 2257 } 2258 if (kp->ks_flags & KSTAT_FLAG_INVALID) { 2259 kstat_rele(kp); 2260 return (NULL); 2261 } 2262 2263 bufsize = kp->ks_data_size + 1; 2264 kstat_rele(kp); 2265 2266 /* 2267 * The kstat in question is released so that kmem_alloc(KM_SLEEP) is 2268 * performed without it held. After the alloc, the kstat is reacquired 2269 * and its size is checked again. If the buffer is no longer large 2270 * enough, the alloc and check are repeated up to three times. 2271 */ 2272 for (i = 0; i < 2; i++) { 2273 buf = kmem_alloc(bufsize, KM_SLEEP); 2274 2275 /* Check if bufsize still appropriate */ 2276 if (byname == B_TRUE) { 2277 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance, 2278 kn->ks_name, getzoneid()); 2279 } else { 2280 kp = kstat_hold_bykid(kn->ks_kid, getzoneid()); 2281 } 2282 if (kp == NULL || kp->ks_flags & KSTAT_FLAG_INVALID) { 2283 if (kp != NULL) { 2284 kstat_rele(kp); 2285 } 2286 kmem_free(buf, bufsize); 2287 return (NULL); 2288 } 2289 KSTAT_ENTER(kp); 2290 (void) KSTAT_UPDATE(kp, KSTAT_READ); 2291 if (bufsize < kp->ks_data_size) { 2292 kmem_free(buf, bufsize); 2293 buf = NULL; 2294 bufsize = kp->ks_data_size + 1; 2295 KSTAT_EXIT(kp); 2296 kstat_rele(kp); 2297 continue; 2298 } else { 2299 if (KSTAT_SNAPSHOT(kp, buf, KSTAT_READ) != 0) { 2300 kmem_free(buf, bufsize); 2301 buf = NULL; 2302 } 2303 nrec = kp->ks_ndata; 2304 KSTAT_EXIT(kp); 2305 kstat_rele(kp); 2306 break; 2307 } 2308 } 2309 2310 if (buf != NULL) { 2311 *size = bufsize; 2312 *num = nrec; 2313 } 2314 return (buf); 2315 } 2316 2317 static int 2318 lxpr_kstat_ifstat(kstat_t *kn, struct lxpr_ifstat *ifs) 2319 { 2320 kstat_named_t *kp; 2321 int i, num; 2322 size_t size; 2323 2324 /* 2325 * Search by name instead of by kid since there's a small window to 2326 * race against kstats being added/removed. 2327 */ 2328 bzero(ifs, sizeof (*ifs)); 2329 kp = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num); 2330 if (kp == NULL) 2331 return (-1); 2332 for (i = 0; i < num; i++) { 2333 if (strncmp(kp[i].name, "rbytes64", KSTAT_STRLEN) == 0) 2334 ifs->rx_bytes = kp[i].value.ui64; 2335 else if (strncmp(kp[i].name, "ipackets64", KSTAT_STRLEN) == 0) 2336 ifs->rx_packets = kp[i].value.ui64; 2337 else if (strncmp(kp[i].name, "ierrors", KSTAT_STRLEN) == 0) 2338 ifs->rx_errors = kp[i].value.ui32; 2339 else if (strncmp(kp[i].name, "norcvbuf", KSTAT_STRLEN) == 0) 2340 ifs->rx_drop = kp[i].value.ui32; 2341 else if (strncmp(kp[i].name, "multircv", KSTAT_STRLEN) == 0) 2342 ifs->rx_multicast = kp[i].value.ui32; 2343 else if (strncmp(kp[i].name, "obytes64", KSTAT_STRLEN) == 0) 2344 ifs->tx_bytes = kp[i].value.ui64; 2345 else if (strncmp(kp[i].name, "opackets64", KSTAT_STRLEN) == 0) 2346 ifs->tx_packets = kp[i].value.ui64; 2347 else if (strncmp(kp[i].name, "oerrors", KSTAT_STRLEN) == 0) 2348 ifs->tx_errors = kp[i].value.ui32; 2349 else if (strncmp(kp[i].name, "noxmtbuf", KSTAT_STRLEN) == 0) 2350 ifs->tx_drop = kp[i].value.ui32; 2351 else if (strncmp(kp[i].name, "collisions", KSTAT_STRLEN) == 0) 2352 ifs->collisions = kp[i].value.ui32; 2353 } 2354 kmem_free(kp, size); 2355 return (0); 2356 } 2357 2358 /* ARGSUSED */ 2359 static void 2360 lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2361 { 2362 kstat_t *ksr; 2363 kstat_t ks0; 2364 int i, nidx; 2365 size_t sidx; 2366 struct lxpr_ifstat ifs; 2367 2368 lxpr_uiobuf_printf(uiobuf, "Inter-| Receive " 2369 " | Transmit\n"); 2370 lxpr_uiobuf_printf(uiobuf, " face |bytes packets errs drop fifo" 2371 " frame compressed multicast|bytes packets errs drop fifo" 2372 " colls carrier compressed\n"); 2373 2374 ks0.ks_kid = 0; 2375 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); 2376 if (ksr == NULL) 2377 return; 2378 2379 for (i = 1; i < nidx; i++) { 2380 if (strncmp(ksr[i].ks_module, "link", KSTAT_STRLEN) == 0 || 2381 strncmp(ksr[i].ks_module, "lo", KSTAT_STRLEN) == 0) { 2382 if (lxpr_kstat_ifstat(&ksr[i], &ifs) != 0) 2383 continue; 2384 2385 /* Overwriting the name is ok in the local snapshot */ 2386 lx_ifname_convert(ksr[i].ks_name, LX_IF_FROMNATIVE); 2387 lxpr_uiobuf_printf(uiobuf, "%6s: %7llu %7llu %4lu " 2388 "%4lu %4u %5u %10u %9lu %8llu %7llu %4lu %4lu %4u " 2389 "%5lu %7u %10u\n", 2390 ksr[i].ks_name, 2391 ifs.rx_bytes, ifs.rx_packets, 2392 ifs.rx_errors, ifs.rx_drop, 2393 0, 0, 0, ifs.rx_multicast, 2394 ifs.tx_bytes, ifs.tx_packets, 2395 ifs.tx_errors, ifs.tx_drop, 2396 0, ifs.collisions, 0, 0); 2397 } 2398 } 2399 2400 kmem_free(ksr, sidx); 2401 } 2402 2403 /* ARGSUSED */ 2404 static void 2405 lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2406 { 2407 } 2408 2409 static void 2410 lxpr_inet6_out(const in6_addr_t *addr, char buf[33]) 2411 { 2412 const uint8_t *ip = addr->s6_addr; 2413 char digits[] = "0123456789abcdef"; 2414 int i; 2415 for (i = 0; i < 16; i++) { 2416 buf[2 * i] = digits[ip[i] >> 4]; 2417 buf[2 * i + 1] = digits[ip[i] & 0xf]; 2418 } 2419 buf[32] = '\0'; 2420 } 2421 2422 /* ARGSUSED */ 2423 static void 2424 lxpr_read_net_if_inet6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2425 { 2426 netstack_t *ns; 2427 ip_stack_t *ipst; 2428 ill_t *ill; 2429 ipif_t *ipif; 2430 ill_walk_context_t ctx; 2431 char ifname[LIFNAMSIZ], ip6out[33]; 2432 2433 ns = netstack_get_current(); 2434 if (ns == NULL) 2435 return; 2436 ipst = ns->netstack_ip; 2437 2438 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2439 ill = ILL_START_WALK_V6(&ctx, ipst); 2440 2441 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 2442 for (ipif = ill->ill_ipif; ipif != NULL; 2443 ipif = ipif->ipif_next) { 2444 uint_t index = ill->ill_phyint->phyint_ifindex; 2445 int plen = ip_mask_to_plen_v6(&ipif->ipif_v6net_mask); 2446 unsigned int scope = lx_ipv6_scope_convert( 2447 &ipif->ipif_v6lcl_addr); 2448 /* Always report PERMANENT flag */ 2449 int flag = 0x80; 2450 2451 (void) snprintf(ifname, LIFNAMSIZ, "%s", ill->ill_name); 2452 lx_ifname_convert(ifname, LX_IF_FROMNATIVE); 2453 lxpr_inet6_out(&ipif->ipif_v6lcl_addr, ip6out); 2454 2455 lxpr_uiobuf_printf(uiobuf, "%32s %02x %02x %02x %02x" 2456 " %8s\n", ip6out, index, plen, scope, flag, ifname); 2457 } 2458 } 2459 rw_exit(&ipst->ips_ill_g_lock); 2460 netstack_rele(ns); 2461 } 2462 2463 /* ARGSUSED */ 2464 static void 2465 lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2466 { 2467 } 2468 2469 /* ARGSUSED */ 2470 static void 2471 lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2472 { 2473 } 2474 2475 /* ARGSUSED */ 2476 static void 2477 lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2478 { 2479 } 2480 2481 static void 2482 lxpr_format_route_ipv6(ire_t *ire, lxpr_uiobuf_t *uiobuf) 2483 { 2484 uint32_t flags; 2485 char name[IFNAMSIZ]; 2486 char ipv6addr[33]; 2487 2488 lxpr_inet6_out(&ire->ire_addr_v6, ipv6addr); 2489 lxpr_uiobuf_printf(uiobuf, "%s %02x ", ipv6addr, 2490 ip_mask_to_plen_v6(&ire->ire_mask_v6)); 2491 2492 /* punt on this for now */ 2493 lxpr_uiobuf_printf(uiobuf, "%s %02x ", 2494 "00000000000000000000000000000000", 0); 2495 2496 lxpr_inet6_out(&ire->ire_gateway_addr_v6, ipv6addr); 2497 lxpr_uiobuf_printf(uiobuf, "%s", ipv6addr); 2498 2499 flags = ire->ire_flags & 2500 (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED); 2501 /* Linux's RTF_LOCAL equivalent */ 2502 if (ire->ire_metrics.iulp_local) 2503 flags |= 0x80000000; 2504 2505 if (ire->ire_ill != NULL) { 2506 ill_get_name(ire->ire_ill, name, sizeof (name)); 2507 lx_ifname_convert(name, LX_IF_FROMNATIVE); 2508 } else { 2509 name[0] = '\0'; 2510 } 2511 2512 lxpr_uiobuf_printf(uiobuf, " %08x %08x %08x %08x %8s\n", 2513 0, /* metric */ 2514 ire->ire_refcnt, 2515 0, 2516 flags, 2517 name); 2518 } 2519 2520 /* ARGSUSED */ 2521 static void 2522 lxpr_read_net_ipv6_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2523 { 2524 netstack_t *ns; 2525 ip_stack_t *ipst; 2526 2527 ns = netstack_get_current(); 2528 if (ns == NULL) 2529 return; 2530 ipst = ns->netstack_ip; 2531 2532 /* 2533 * LX branded zones are expected to have exclusive IP stack, hence 2534 * using ALL_ZONES as the zoneid filter. 2535 */ 2536 ire_walk_v6(&lxpr_format_route_ipv6, uiobuf, ALL_ZONES, ipst); 2537 2538 netstack_rele(ns); 2539 } 2540 2541 /* ARGSUSED */ 2542 static void 2543 lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2544 { 2545 } 2546 2547 /* ARGSUSED */ 2548 static void 2549 lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2550 { 2551 } 2552 2553 /* ARGSUSED */ 2554 static void 2555 lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2556 { 2557 } 2558 2559 #define LXPR_SKIP_ROUTE(type) \ 2560 (((IRE_IF_CLONE | IRE_BROADCAST | IRE_MULTICAST | \ 2561 IRE_NOROUTE | IRE_LOOPBACK | IRE_LOCAL) & type) != 0) 2562 2563 static void 2564 lxpr_format_route_ipv4(ire_t *ire, lxpr_uiobuf_t *uiobuf) 2565 { 2566 uint32_t flags; 2567 char name[IFNAMSIZ]; 2568 ill_t *ill; 2569 ire_t *nire; 2570 ipif_t *ipif; 2571 ipaddr_t gateway; 2572 2573 if (LXPR_SKIP_ROUTE(ire->ire_type) || ire->ire_testhidden != 0) 2574 return; 2575 2576 /* These route flags have direct Linux equivalents */ 2577 flags = ire->ire_flags & 2578 (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED); 2579 2580 /* 2581 * Search for a suitable IRE for naming purposes. 2582 * On Linux, the default route is typically associated with the 2583 * interface used to access gateway. The default IRE on Illumos 2584 * typically lacks an ill reference but its parent might have one. 2585 */ 2586 nire = ire; 2587 do { 2588 ill = nire->ire_ill; 2589 nire = nire->ire_dep_parent; 2590 } while (ill == NULL && nire != NULL); 2591 if (ill != NULL) { 2592 ill_get_name(ill, name, sizeof (name)); 2593 lx_ifname_convert(name, LX_IF_FROMNATIVE); 2594 } else { 2595 name[0] = '*'; 2596 name[1] = '\0'; 2597 } 2598 2599 /* 2600 * Linux suppresses the gateway address for directly connected 2601 * interface networks. To emulate this behavior, we walk all addresses 2602 * of a given route interface. If one matches the gateway, it is 2603 * displayed as NULL. 2604 */ 2605 gateway = ire->ire_gateway_addr; 2606 if ((ill = ire->ire_ill) != NULL) { 2607 for (ipif = ill->ill_ipif; ipif != NULL; 2608 ipif = ipif->ipif_next) { 2609 if (ipif->ipif_lcl_addr == gateway) { 2610 gateway = 0; 2611 break; 2612 } 2613 } 2614 } 2615 2616 lxpr_uiobuf_printf(uiobuf, "%s\t%08X\t%08X\t%04X\t%d\t%u\t" 2617 "%d\t%08X\t%d\t%u\t%u\n", 2618 name, 2619 ire->ire_addr, 2620 gateway, 2621 flags, 0, 0, 2622 0, /* priority */ 2623 ire->ire_mask, 2624 0, 0, /* mss, window */ 2625 ire->ire_metrics.iulp_rtt); 2626 } 2627 2628 /* ARGSUSED */ 2629 static void 2630 lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2631 { 2632 netstack_t *ns; 2633 ip_stack_t *ipst; 2634 2635 lxpr_uiobuf_printf(uiobuf, "Iface\tDestination\tGateway \tFlags\t" 2636 "RefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n"); 2637 2638 ns = netstack_get_current(); 2639 if (ns == NULL) 2640 return; 2641 ipst = ns->netstack_ip; 2642 2643 /* 2644 * LX branded zones are expected to have exclusive IP stack, hence 2645 * using ALL_ZONES as the zoneid filter. 2646 */ 2647 ire_walk_v4(&lxpr_format_route_ipv4, uiobuf, ALL_ZONES, ipst); 2648 2649 netstack_rele(ns); 2650 } 2651 2652 /* ARGSUSED */ 2653 static void 2654 lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2655 { 2656 } 2657 2658 /* ARGSUSED */ 2659 static void 2660 lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2661 { 2662 } 2663 2664 /* ARGSUSED */ 2665 static void 2666 lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2667 { 2668 } 2669 2670 typedef struct lxpr_snmp_table { 2671 const char *lst_proto; 2672 const char *lst_fields[]; 2673 } lxpr_snmp_table_t; 2674 2675 static lxpr_snmp_table_t lxpr_snmp_ip = { "ip", 2676 { 2677 "forwarding", "defaultTTL", "inReceives", "inHdrErrors", 2678 "inAddrErrors", "forwDatagrams", "inUnknownProtos", "inDiscards", 2679 "inDelivers", "outRequests", "outDiscards", "outNoRoutes", 2680 "reasmTimeout", "reasmReqds", "reasmOKs", "reasmFails", "fragOKs", 2681 "fragFails", "fragCreates", 2682 NULL 2683 } 2684 }; 2685 static lxpr_snmp_table_t lxpr_snmp_icmp = { "icmp", 2686 { 2687 "inMsgs", "inErrors", "inCsumErrors", "inDestUnreachs", "inTimeExcds", 2688 "inParmProbs", "inSrcQuenchs", "inRedirects", "inEchos", "inEchoReps", 2689 "inTimestamps", "inTimestampReps", "inAddrMasks", "inAddrMaskReps", 2690 "outMsgs", "outErrors", "outDestUnreachs", "outTimeExcds", 2691 "outParmProbs", "outSrcQuenchs", "outRedirects", "outEchos", 2692 "outEchoReps", "outTimestamps", "outTimestampReps", "outAddrMasks", 2693 "outAddrMaskReps", 2694 NULL 2695 } 2696 }; 2697 static lxpr_snmp_table_t lxpr_snmp_tcp = { "tcp", 2698 { 2699 "rtoAlgorithm", "rtoMin", "rtoMax", "maxConn", "activeOpens", 2700 "passiveOpens", "attemptFails", "estabResets", "currEstab", "inSegs", 2701 "outSegs", "retransSegs", "inErrs", "outRsts", "inCsumErrors", 2702 NULL 2703 } 2704 }; 2705 static lxpr_snmp_table_t lxpr_snmp_udp = { "udp", 2706 { 2707 "inDatagrams", "noPorts", "inErrors", "outDatagrams", "rcvbufErrors", 2708 "sndbufErrors", "inCsumErrors", 2709 NULL 2710 } 2711 }; 2712 2713 static lxpr_snmp_table_t *lxpr_net_snmptab[] = { 2714 &lxpr_snmp_ip, 2715 &lxpr_snmp_icmp, 2716 &lxpr_snmp_tcp, 2717 &lxpr_snmp_udp, 2718 NULL 2719 }; 2720 2721 static void 2722 lxpr_kstat_print_tab(lxpr_uiobuf_t *uiobuf, lxpr_snmp_table_t *table, 2723 kstat_t *kn) 2724 { 2725 kstat_named_t *klist; 2726 char upname[KSTAT_STRLEN], upfield[KSTAT_STRLEN]; 2727 int i, j, num; 2728 size_t size; 2729 2730 klist = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num); 2731 if (klist == NULL) 2732 return; 2733 2734 /* Print the header line, fields capitalized */ 2735 (void) strncpy(upname, table->lst_proto, KSTAT_STRLEN); 2736 upname[0] = toupper(upname[0]); 2737 lxpr_uiobuf_printf(uiobuf, "%s:", upname); 2738 for (i = 0; table->lst_fields[i] != NULL; i++) { 2739 (void) strncpy(upfield, table->lst_fields[i], KSTAT_STRLEN); 2740 upfield[0] = toupper(upfield[0]); 2741 lxpr_uiobuf_printf(uiobuf, " %s", upfield); 2742 } 2743 lxpr_uiobuf_printf(uiobuf, "\n%s:", upname); 2744 2745 /* Then loop back through to print the value line. */ 2746 for (i = 0; table->lst_fields[i] != NULL; i++) { 2747 kstat_named_t *kpoint = NULL; 2748 for (j = 0; j < num; j++) { 2749 if (strncmp(klist[j].name, table->lst_fields[i], 2750 KSTAT_STRLEN) == 0) { 2751 kpoint = &klist[j]; 2752 break; 2753 } 2754 } 2755 if (kpoint == NULL) { 2756 /* Output 0 for unknown fields */ 2757 lxpr_uiobuf_printf(uiobuf, " 0"); 2758 } else { 2759 switch (kpoint->data_type) { 2760 case KSTAT_DATA_INT32: 2761 lxpr_uiobuf_printf(uiobuf, " %d", 2762 kpoint->value.i32); 2763 break; 2764 case KSTAT_DATA_UINT32: 2765 lxpr_uiobuf_printf(uiobuf, " %u", 2766 kpoint->value.ui32); 2767 break; 2768 case KSTAT_DATA_INT64: 2769 lxpr_uiobuf_printf(uiobuf, " %ld", 2770 kpoint->value.l); 2771 break; 2772 case KSTAT_DATA_UINT64: 2773 lxpr_uiobuf_printf(uiobuf, " %lu", 2774 kpoint->value.ul); 2775 break; 2776 } 2777 } 2778 } 2779 lxpr_uiobuf_printf(uiobuf, "\n"); 2780 kmem_free(klist, size); 2781 } 2782 2783 /* ARGSUSED */ 2784 static void 2785 lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2786 { 2787 kstat_t *ksr; 2788 kstat_t ks0; 2789 lxpr_snmp_table_t **table = lxpr_net_snmptab; 2790 int i, t, nidx; 2791 size_t sidx; 2792 2793 ks0.ks_kid = 0; 2794 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); 2795 if (ksr == NULL) 2796 return; 2797 2798 for (t = 0; table[t] != NULL; t++) { 2799 for (i = 0; i < nidx; i++) { 2800 if (strncmp(ksr[i].ks_class, "mib2", KSTAT_STRLEN) != 0) 2801 continue; 2802 if (strncmp(ksr[i].ks_name, table[t]->lst_proto, 2803 KSTAT_STRLEN) == 0) { 2804 lxpr_kstat_print_tab(uiobuf, table[t], &ksr[i]); 2805 break; 2806 } 2807 } 2808 } 2809 kmem_free(ksr, sidx); 2810 } 2811 2812 /* ARGSUSED */ 2813 static void 2814 lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2815 { 2816 } 2817 2818 static int 2819 lxpr_convert_tcp_state(int st) 2820 { 2821 /* 2822 * Derived from the enum located in the Linux kernel sources: 2823 * include/net/tcp_states.h 2824 */ 2825 switch (st) { 2826 case TCPS_ESTABLISHED: 2827 return (1); 2828 case TCPS_SYN_SENT: 2829 return (2); 2830 case TCPS_SYN_RCVD: 2831 return (3); 2832 case TCPS_FIN_WAIT_1: 2833 return (4); 2834 case TCPS_FIN_WAIT_2: 2835 return (5); 2836 case TCPS_TIME_WAIT: 2837 return (6); 2838 case TCPS_CLOSED: 2839 return (7); 2840 case TCPS_CLOSE_WAIT: 2841 return (8); 2842 case TCPS_LAST_ACK: 2843 return (9); 2844 case TCPS_LISTEN: 2845 return (10); 2846 case TCPS_CLOSING: 2847 return (11); 2848 default: 2849 /* No translation for TCPS_IDLE, TCPS_BOUND or anything else */ 2850 return (0); 2851 } 2852 } 2853 2854 static void 2855 lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) 2856 { 2857 int i, sl = 0; 2858 connf_t *connfp; 2859 conn_t *connp; 2860 netstack_t *ns; 2861 ip_stack_t *ipst; 2862 2863 ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION); 2864 if (ipver == IPV4_VERSION) { 2865 lxpr_uiobuf_printf(uiobuf, " sl local_address rem_address " 2866 "st tx_queue rx_queue tr tm->when retrnsmt uid timeout " 2867 "inode\n"); 2868 } else { 2869 lxpr_uiobuf_printf(uiobuf, " sl " 2870 "local_address " 2871 "remote_address " 2872 "st tx_queue rx_queue tr tm->when retrnsmt " 2873 "uid timeout inode\n"); 2874 } 2875 /* 2876 * Due to differences between the Linux and illumos TCP 2877 * implementations, some data will be omitted from the output here. 2878 * 2879 * Valid fields: 2880 * - local_address 2881 * - remote_address 2882 * - st 2883 * - tx_queue 2884 * - rx_queue 2885 * - uid 2886 * - inode 2887 * 2888 * Omitted/invalid fields 2889 * - tr 2890 * - tm->when 2891 * - retrnsmt 2892 * - timeout 2893 */ 2894 2895 ns = netstack_get_current(); 2896 if (ns == NULL) 2897 return; 2898 ipst = ns->netstack_ip; 2899 2900 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2901 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 2902 connp = NULL; 2903 while ((connp = 2904 ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) { 2905 tcp_t *tcp; 2906 vattr_t attr; 2907 sonode_t *so = (sonode_t *)connp->conn_upper_handle; 2908 vnode_t *vp = (so != NULL) ? so->so_vnode : NULL; 2909 if (connp->conn_ipversion != ipver) 2910 continue; 2911 tcp = connp->conn_tcp; 2912 if (ipver == IPV4_VERSION) { 2913 lxpr_uiobuf_printf(uiobuf, 2914 "%4d: %08X:%04X %08X:%04X ", 2915 ++sl, 2916 connp->conn_laddr_v4, 2917 ntohs(connp->conn_lport), 2918 connp->conn_faddr_v4, 2919 ntohs(connp->conn_fport)); 2920 } else { 2921 lxpr_uiobuf_printf(uiobuf, "%4d: " 2922 "%08X%08X%08X%08X:%04X " 2923 "%08X%08X%08X%08X:%04X ", 2924 ++sl, 2925 connp->conn_laddr_v6.s6_addr32[0], 2926 connp->conn_laddr_v6.s6_addr32[1], 2927 connp->conn_laddr_v6.s6_addr32[2], 2928 connp->conn_laddr_v6.s6_addr32[3], 2929 ntohs(connp->conn_lport), 2930 connp->conn_faddr_v6.s6_addr32[0], 2931 connp->conn_faddr_v6.s6_addr32[1], 2932 connp->conn_faddr_v6.s6_addr32[2], 2933 connp->conn_faddr_v6.s6_addr32[3], 2934 ntohs(connp->conn_fport)); 2935 } 2936 2937 /* fetch the simulated inode for the socket */ 2938 if (vp == NULL || 2939 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) 2940 attr.va_nodeid = 0; 2941 2942 lxpr_uiobuf_printf(uiobuf, 2943 "%02X %08X:%08X %02X:%08X %08X " 2944 "%5u %8d %lu %d %p %u %u %u %u %d\n", 2945 lxpr_convert_tcp_state(tcp->tcp_state), 2946 tcp->tcp_rcv_cnt, tcp->tcp_unsent, /* rx/tx queue */ 2947 0, 0, /* tr, when */ 2948 0, /* per-connection rexmits aren't tracked today */ 2949 connp->conn_cred->cr_uid, 2950 0, /* timeout */ 2951 /* inode + more */ 2952 (ino_t)attr.va_nodeid, 0, NULL, 0, 0, 0, 0, 0); 2953 } 2954 } 2955 netstack_rele(ns); 2956 } 2957 2958 /* ARGSUSED */ 2959 static void 2960 lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2961 { 2962 lxpr_format_tcp(uiobuf, IPV4_VERSION); 2963 } 2964 2965 /* ARGSUSED */ 2966 static void 2967 lxpr_read_net_tcp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2968 { 2969 lxpr_format_tcp(uiobuf, IPV6_VERSION); 2970 } 2971 2972 static void 2973 lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) 2974 { 2975 int i, sl = 0; 2976 connf_t *connfp; 2977 conn_t *connp; 2978 netstack_t *ns; 2979 ip_stack_t *ipst; 2980 2981 ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION); 2982 if (ipver == IPV4_VERSION) { 2983 lxpr_uiobuf_printf(uiobuf, " sl local_address rem_address" 2984 " st tx_queue rx_queue tr tm->when retrnsmt uid" 2985 " timeout inode ref pointer drops\n"); 2986 } else { 2987 lxpr_uiobuf_printf(uiobuf, " sl " 2988 "local_address " 2989 "remote_address " 2990 "st tx_queue rx_queue tr tm->when retrnsmt " 2991 "uid timeout inode ref pointer drops\n"); 2992 } 2993 /* 2994 * Due to differences between the Linux and illumos UDP 2995 * implementations, some data will be omitted from the output here. 2996 * 2997 * Valid fields: 2998 * - local_address 2999 * - remote_address 3000 * - st: limited 3001 * - uid 3002 * 3003 * Omitted/invalid fields 3004 * - tx_queue 3005 * - rx_queue 3006 * - tr 3007 * - tm->when 3008 * - retrnsmt 3009 * - timeout 3010 * - inode 3011 */ 3012 3013 ns = netstack_get_current(); 3014 if (ns == NULL) 3015 return; 3016 ipst = ns->netstack_ip; 3017 3018 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 3019 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 3020 connp = NULL; 3021 while ((connp = 3022 ipcl_get_next_conn(connfp, connp, IPCL_UDPCONN)) != NULL) { 3023 udp_t *udp; 3024 int state = 0; 3025 vattr_t attr; 3026 sonode_t *so = (sonode_t *)connp->conn_upper_handle; 3027 vnode_t *vp = (so != NULL) ? so->so_vnode : NULL; 3028 if (connp->conn_ipversion != ipver) 3029 continue; 3030 udp = connp->conn_udp; 3031 if (ipver == IPV4_VERSION) { 3032 lxpr_uiobuf_printf(uiobuf, 3033 "%4d: %08X:%04X %08X:%04X ", 3034 ++sl, 3035 connp->conn_laddr_v4, 3036 ntohs(connp->conn_lport), 3037 connp->conn_faddr_v4, 3038 ntohs(connp->conn_fport)); 3039 } else { 3040 lxpr_uiobuf_printf(uiobuf, "%4d: " 3041 "%08X%08X%08X%08X:%04X " 3042 "%08X%08X%08X%08X:%04X ", 3043 ++sl, 3044 connp->conn_laddr_v6.s6_addr32[0], 3045 connp->conn_laddr_v6.s6_addr32[1], 3046 connp->conn_laddr_v6.s6_addr32[2], 3047 connp->conn_laddr_v6.s6_addr32[3], 3048 ntohs(connp->conn_lport), 3049 connp->conn_faddr_v6.s6_addr32[0], 3050 connp->conn_faddr_v6.s6_addr32[1], 3051 connp->conn_faddr_v6.s6_addr32[2], 3052 connp->conn_faddr_v6.s6_addr32[3], 3053 ntohs(connp->conn_fport)); 3054 } 3055 3056 switch (udp->udp_state) { 3057 case TS_UNBND: 3058 case TS_IDLE: 3059 state = 7; 3060 break; 3061 case TS_DATA_XFER: 3062 state = 1; 3063 break; 3064 } 3065 3066 /* fetch the simulated inode for the socket */ 3067 if (vp == NULL || 3068 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) 3069 attr.va_nodeid = 0; 3070 3071 lxpr_uiobuf_printf(uiobuf, 3072 "%02X %08X:%08X %02X:%08X %08X " 3073 "%5u %8d %lu %d %p %d\n", 3074 state, 3075 0, 0, /* rx/tx queue */ 3076 0, 0, /* tr, when */ 3077 0, /* retrans */ 3078 connp->conn_cred->cr_uid, 3079 0, /* timeout */ 3080 /* inode, ref, pointer, drops */ 3081 (ino_t)attr.va_nodeid, 0, NULL, 0); 3082 } 3083 } 3084 netstack_rele(ns); 3085 } 3086 3087 /* ARGSUSED */ 3088 static void 3089 lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3090 { 3091 lxpr_format_udp(uiobuf, IPV4_VERSION); 3092 } 3093 3094 /* ARGSUSED */ 3095 static void 3096 lxpr_read_net_udp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3097 { 3098 lxpr_format_udp(uiobuf, IPV6_VERSION); 3099 } 3100 3101 /* ARGSUSED */ 3102 static void 3103 lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3104 { 3105 sonode_t *so; 3106 zoneid_t zoneid = getzoneid(); 3107 3108 lxpr_uiobuf_printf(uiobuf, "Num RefCount Protocol Flags Type " 3109 "St Inode Path\n"); 3110 3111 mutex_enter(&socklist.sl_lock); 3112 for (so = socklist.sl_list; so != NULL; 3113 so = _SOTOTPI(so)->sti_next_so) { 3114 vnode_t *vp = so->so_vnode; 3115 vattr_t attr; 3116 sotpi_info_t *sti; 3117 const char *name = NULL; 3118 int status = 0; 3119 int type = 0; 3120 int flags = 0; 3121 3122 /* Only process active sonodes in this zone */ 3123 if (so->so_count == 0 || so->so_zoneid != zoneid) 3124 continue; 3125 3126 /* 3127 * Grab the inode, if possible. 3128 * This must be done before entering so_lock. 3129 */ 3130 if (vp == NULL || 3131 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) 3132 attr.va_nodeid = 0; 3133 3134 mutex_enter(&so->so_lock); 3135 sti = _SOTOTPI(so); 3136 3137 if (sti->sti_laddr_sa != NULL && 3138 sti->sti_laddr_len > 0) { 3139 name = sti->sti_laddr_sa->sa_data; 3140 } else if (sti->sti_faddr_sa != NULL && 3141 sti->sti_faddr_len > 0) { 3142 name = sti->sti_faddr_sa->sa_data; 3143 } 3144 3145 /* 3146 * Derived from enum values in Linux kernel source: 3147 * include/uapi/linux/net.h 3148 */ 3149 if ((so->so_state & SS_ISDISCONNECTING) != 0) { 3150 status = 4; 3151 } else if ((so->so_state & SS_ISCONNECTING) != 0) { 3152 status = 2; 3153 } else if ((so->so_state & SS_ISCONNECTED) != 0) { 3154 status = 3; 3155 } else { 3156 status = 1; 3157 /* Add ACC flag for stream-type server sockets */ 3158 if (so->so_type != SOCK_DGRAM && 3159 sti->sti_laddr_sa != NULL) 3160 flags |= 0x10000; 3161 } 3162 3163 /* Convert to Linux type */ 3164 switch (so->so_type) { 3165 case SOCK_DGRAM: 3166 type = 2; 3167 break; 3168 case SOCK_SEQPACKET: 3169 type = 5; 3170 break; 3171 default: 3172 type = 1; 3173 } 3174 3175 lxpr_uiobuf_printf(uiobuf, "%p: %08X %08X %08X %04X %02X %5llu", 3176 so, 3177 so->so_count, 3178 0, /* proto, always 0 */ 3179 flags, 3180 type, 3181 status, 3182 (ino_t)attr.va_nodeid); 3183 3184 /* 3185 * Due to shortcomings in the abstract socket emulation, they 3186 * cannot be properly represented here (as @<path>). 3187 * 3188 * This will be the case until they are better implemented. 3189 */ 3190 if (name != NULL) 3191 lxpr_uiobuf_printf(uiobuf, " %s\n", name); 3192 else 3193 lxpr_uiobuf_printf(uiobuf, "\n"); 3194 mutex_exit(&so->so_lock); 3195 } 3196 mutex_exit(&socklist.sl_lock); 3197 } 3198 3199 /* 3200 * lxpr_read_kmsg(): read the contents of the kernel message queue. We 3201 * translate this into the reception of console messages for this zone; each 3202 * read copies out a single zone console message, or blocks until the next one 3203 * is produced, unless we're open non-blocking, in which case we return after 3204 * 1ms. 3205 */ 3206 3207 #define LX_KMSG_PRI "<0>" 3208 3209 static void 3210 lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf, ldi_handle_t lh) 3211 { 3212 mblk_t *mp; 3213 timestruc_t to; 3214 timestruc_t *tp = NULL; 3215 3216 ASSERT(lxpnp->lxpr_type == LXPR_KMSG); 3217 3218 if (lxpr_uiobuf_nonblock(uiobuf)) { 3219 to.tv_sec = 0; 3220 to.tv_nsec = 1000000; /* 1msec */ 3221 tp = &to; 3222 } 3223 3224 if (ldi_getmsg(lh, &mp, tp) == 0) { 3225 /* 3226 * lx procfs doesn't like successive reads to the same file 3227 * descriptor unless we do an explicit rewind each time. 3228 */ 3229 lxpr_uiobuf_seek(uiobuf, 0); 3230 3231 lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI, 3232 mp->b_cont->b_rptr); 3233 3234 freemsg(mp); 3235 } 3236 } 3237 3238 /* 3239 * lxpr_read_loadavg(): read the contents of the "loadavg" file. We do just 3240 * enough for uptime and other simple lxproc readers to work 3241 */ 3242 extern int nthread; 3243 3244 static void 3245 lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3246 { 3247 ulong_t avenrun1; 3248 ulong_t avenrun5; 3249 ulong_t avenrun15; 3250 ulong_t avenrun1_cs; 3251 ulong_t avenrun5_cs; 3252 ulong_t avenrun15_cs; 3253 int loadavg[3]; 3254 int *loadbuf; 3255 cpupart_t *cp; 3256 zone_t *zone = LXPTOZ(lxpnp); 3257 3258 uint_t nrunnable = 0; 3259 rctl_qty_t nlwps; 3260 3261 ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG); 3262 3263 mutex_enter(&cpu_lock); 3264 3265 /* 3266 * Need to add up values over all CPU partitions. If pools are active, 3267 * only report the values of the zone's partition, which by definition 3268 * includes the current CPU. 3269 */ 3270 if (pool_pset_enabled()) { 3271 psetid_t psetid = zone_pset_get(curproc->p_zone); 3272 3273 ASSERT(curproc->p_zone != &zone0); 3274 cp = CPU->cpu_part; 3275 3276 nrunnable = cp->cp_nrunning + cp->cp_nrunnable; 3277 (void) cpupart_get_loadavg(psetid, &loadavg[0], 3); 3278 loadbuf = &loadavg[0]; 3279 } else { 3280 cp = cp_list_head; 3281 do { 3282 nrunnable += cp->cp_nrunning + cp->cp_nrunnable; 3283 } while ((cp = cp->cp_next) != cp_list_head); 3284 3285 loadbuf = zone == global_zone ? 3286 &avenrun[0] : zone->zone_avenrun; 3287 } 3288 3289 /* 3290 * If we're in the non-global zone, we'll report the total number of 3291 * LWPs in the zone for the "nproc" parameter of /proc/loadavg, 3292 * otherwise will just use nthread (which will include kernel threads, 3293 * but should be good enough for lxproc). 3294 */ 3295 nlwps = zone == global_zone ? nthread : zone->zone_nlwps; 3296 3297 mutex_exit(&cpu_lock); 3298 3299 avenrun1 = loadbuf[0] >> FSHIFT; 3300 avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT; 3301 avenrun5 = loadbuf[1] >> FSHIFT; 3302 avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT; 3303 avenrun15 = loadbuf[2] >> FSHIFT; 3304 avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT; 3305 3306 lxpr_uiobuf_printf(uiobuf, 3307 "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n", 3308 avenrun1, avenrun1_cs, 3309 avenrun5, avenrun5_cs, 3310 avenrun15, avenrun15_cs, 3311 nrunnable, nlwps, 0); 3312 } 3313 3314 /* 3315 * lxpr_read_meminfo(): read the contents of the "meminfo" file. 3316 */ 3317 static void 3318 lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3319 { 3320 zone_t *zone = LXPTOZ(lxpnp); 3321 int global = zone == global_zone; 3322 long total_mem, free_mem, total_swap, used_swap; 3323 3324 ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO); 3325 3326 if (global || zone->zone_phys_mem_ctl == UINT64_MAX) { 3327 total_mem = physmem * PAGESIZE; 3328 free_mem = freemem * PAGESIZE; 3329 } else { 3330 total_mem = zone->zone_phys_mem_ctl; 3331 free_mem = zone->zone_phys_mem_ctl - zone->zone_phys_mem; 3332 } 3333 3334 if (global || zone->zone_max_swap_ctl == UINT64_MAX) { 3335 total_swap = k_anoninfo.ani_max * PAGESIZE; 3336 used_swap = k_anoninfo.ani_phys_resv * PAGESIZE; 3337 } else { 3338 mutex_enter(&zone->zone_mem_lock); 3339 total_swap = zone->zone_max_swap_ctl; 3340 used_swap = zone->zone_max_swap; 3341 mutex_exit(&zone->zone_mem_lock); 3342 } 3343 3344 lxpr_uiobuf_printf(uiobuf, 3345 "MemTotal: %8lu kB\n" 3346 "MemFree: %8lu kB\n" 3347 "MemShared: %8u kB\n" 3348 "Buffers: %8u kB\n" 3349 "Cached: %8u kB\n" 3350 "SwapCached:%8u kB\n" 3351 "Active: %8u kB\n" 3352 "Inactive: %8u kB\n" 3353 "HighTotal: %8u kB\n" 3354 "HighFree: %8u kB\n" 3355 "LowTotal: %8u kB\n" 3356 "LowFree: %8u kB\n" 3357 "SwapTotal: %8lu kB\n" 3358 "SwapFree: %8lu kB\n", 3359 btok(total_mem), /* MemTotal */ 3360 btok(free_mem), /* MemFree */ 3361 0, /* MemShared */ 3362 0, /* Buffers */ 3363 0, /* Cached */ 3364 0, /* SwapCached */ 3365 0, /* Active */ 3366 0, /* Inactive */ 3367 0, /* HighTotal */ 3368 0, /* HighFree */ 3369 btok(total_mem), /* LowTotal */ 3370 btok(free_mem), /* LowFree */ 3371 btok(total_swap), /* SwapTotal */ 3372 btok(total_swap - used_swap)); /* SwapFree */ 3373 } 3374 3375 /* 3376 * lxpr_read_mounts(): 3377 */ 3378 /* ARGSUSED */ 3379 static void 3380 lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3381 { 3382 struct vfs *vfsp; 3383 struct vfs *vfslist; 3384 zone_t *zone = LXPTOZ(lxpnp); 3385 struct print_data { 3386 refstr_t *vfs_mntpt; 3387 refstr_t *vfs_resource; 3388 uint_t vfs_flag; 3389 int vfs_fstype; 3390 struct print_data *next; 3391 } *print_head = NULL; 3392 struct print_data **print_tail = &print_head; 3393 struct print_data *printp; 3394 3395 vfs_list_read_lock(); 3396 3397 if (zone == global_zone) { 3398 vfsp = vfslist = rootvfs; 3399 } else { 3400 vfsp = vfslist = zone->zone_vfslist; 3401 /* 3402 * If the zone has a root entry, it will be the first in 3403 * the list. If it doesn't, we conjure one up. 3404 */ 3405 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt), 3406 zone->zone_rootpath) != 0) { 3407 struct vfs *tvfsp; 3408 /* 3409 * The root of the zone is not a mount point. The vfs 3410 * we want to report is that of the zone's root vnode. 3411 */ 3412 tvfsp = zone->zone_rootvp->v_vfsp; 3413 3414 lxpr_uiobuf_printf(uiobuf, 3415 "/ / %s %s 0 0\n", 3416 vfssw[tvfsp->vfs_fstype].vsw_name, 3417 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); 3418 3419 } 3420 if (vfslist == NULL) { 3421 vfs_list_unlock(); 3422 return; 3423 } 3424 } 3425 3426 /* 3427 * Later on we have to do a lookupname, which can end up causing 3428 * another vfs_list_read_lock() to be called. Which can lead to a 3429 * deadlock. To avoid this, we extract the data we need into a local 3430 * list, then we can run this list without holding vfs_list_read_lock() 3431 * We keep the list in the same order as the vfs_list 3432 */ 3433 do { 3434 /* Skip mounts we shouldn't show */ 3435 if (vfsp->vfs_flag & VFS_NOMNTTAB) { 3436 goto nextfs; 3437 } 3438 3439 printp = kmem_alloc(sizeof (*printp), KM_SLEEP); 3440 refstr_hold(vfsp->vfs_mntpt); 3441 printp->vfs_mntpt = vfsp->vfs_mntpt; 3442 refstr_hold(vfsp->vfs_resource); 3443 printp->vfs_resource = vfsp->vfs_resource; 3444 printp->vfs_flag = vfsp->vfs_flag; 3445 printp->vfs_fstype = vfsp->vfs_fstype; 3446 printp->next = NULL; 3447 3448 *print_tail = printp; 3449 print_tail = &printp->next; 3450 3451 nextfs: 3452 vfsp = (zone == global_zone) ? 3453 vfsp->vfs_next : vfsp->vfs_zone_next; 3454 3455 } while (vfsp != vfslist); 3456 3457 vfs_list_unlock(); 3458 3459 /* 3460 * now we can run through what we've extracted without holding 3461 * vfs_list_read_lock() 3462 */ 3463 printp = print_head; 3464 while (printp != NULL) { 3465 struct print_data *printp_next; 3466 const char *resource; 3467 char *mntpt; 3468 struct vnode *vp; 3469 int error; 3470 3471 mntpt = (char *)refstr_value(printp->vfs_mntpt); 3472 resource = refstr_value(printp->vfs_resource); 3473 3474 if (mntpt != NULL && mntpt[0] != '\0') 3475 mntpt = ZONE_PATH_TRANSLATE(mntpt, zone); 3476 else 3477 mntpt = "-"; 3478 3479 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 3480 3481 if (error != 0) 3482 goto nextp; 3483 3484 if (!(vp->v_flag & VROOT)) { 3485 VN_RELE(vp); 3486 goto nextp; 3487 } 3488 VN_RELE(vp); 3489 3490 if (resource != NULL && resource[0] != '\0') { 3491 if (resource[0] == '/') { 3492 resource = ZONE_PATH_VISIBLE(resource, zone) ? 3493 ZONE_PATH_TRANSLATE(resource, zone) : 3494 mntpt; 3495 } 3496 } else { 3497 resource = "-"; 3498 } 3499 3500 lxpr_uiobuf_printf(uiobuf, 3501 "%s %s %s %s 0 0\n", 3502 resource, mntpt, vfssw[printp->vfs_fstype].vsw_name, 3503 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); 3504 3505 nextp: 3506 printp_next = printp->next; 3507 refstr_rele(printp->vfs_mntpt); 3508 refstr_rele(printp->vfs_resource); 3509 kmem_free(printp, sizeof (*printp)); 3510 printp = printp_next; 3511 3512 } 3513 } 3514 3515 /* 3516 * lxpr_read_partitions(): 3517 * 3518 * Over the years, /proc/partitions has been made considerably smaller -- to 3519 * the point that it really is only major number, minor number, number of 3520 * blocks (which we report as 0), and partition name. 3521 * 3522 * We support this because some things want to see it to make sense of 3523 * /proc/diskstats, and also because "fdisk -l" and a few other things look 3524 * here to find all disks on the system. 3525 */ 3526 /* ARGSUSED */ 3527 static void 3528 lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3529 { 3530 3531 kstat_t *ksr; 3532 kstat_t ks0; 3533 int nidx, num, i; 3534 size_t sidx, size; 3535 zfs_cmd_t *zc; 3536 nvlist_t *nv = NULL; 3537 nvpair_t *elem = NULL; 3538 lxpr_mnt_t *mnt; 3539 lxpr_zfs_iter_t zfsi; 3540 3541 ASSERT(lxpnp->lxpr_type == LXPR_PARTITIONS); 3542 3543 ks0.ks_kid = 0; 3544 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); 3545 3546 if (ksr == NULL) 3547 return; 3548 3549 lxpr_uiobuf_printf(uiobuf, "major minor #blocks name\n\n"); 3550 3551 for (i = 1; i < nidx; i++) { 3552 kstat_t *ksp = &ksr[i]; 3553 kstat_io_t *kip; 3554 3555 if (ksp->ks_type != KSTAT_TYPE_IO || 3556 strcmp(ksp->ks_class, "disk") != 0) 3557 continue; 3558 3559 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE, 3560 &size, &num)) == NULL) 3561 continue; 3562 3563 if (size < sizeof (kstat_io_t)) { 3564 kmem_free(kip, size); 3565 continue; 3566 } 3567 3568 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d %s\n", 3569 mod_name_to_major(ksp->ks_module), 3570 ksp->ks_instance, 0, ksp->ks_name); 3571 3572 kmem_free(kip, size); 3573 } 3574 3575 kmem_free(ksr, sidx); 3576 3577 /* If we never got to open the zfs LDI, then stop now. */ 3578 mnt = (lxpr_mnt_t *)lxpnp->lxpr_vnode->v_vfsp->vfs_data; 3579 if (mnt->lxprm_zfs_isopen == B_FALSE) 3580 return; 3581 3582 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); 3583 3584 if (lxpr_zfs_list_pools(mnt, zc, &nv) != 0) 3585 goto out; 3586 3587 while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) { 3588 char *pool = nvpair_name(elem); 3589 3590 bzero(&zfsi, sizeof (lxpr_zfs_iter_t)); 3591 while (lxpr_zfs_next_zvol(mnt, pool, zc, &zfsi) == 0) { 3592 major_t major; 3593 minor_t minor; 3594 if (lxpr_zvol_dev(mnt, zc->zc_name, &major, &minor) 3595 != 0) 3596 continue; 3597 3598 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d zvol/dsk/%s\n", 3599 major, minor, 0, zc->zc_name); 3600 } 3601 } 3602 3603 nvlist_free(nv); 3604 out: 3605 kmem_free(zc, sizeof (zfs_cmd_t)); 3606 } 3607 3608 /* 3609 * lxpr_read_diskstats(): 3610 * 3611 * See the block comment above the per-device output-generating line for the 3612 * details of the format. 3613 */ 3614 /* ARGSUSED */ 3615 static void 3616 lxpr_read_diskstats(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3617 { 3618 kstat_t *ksr; 3619 kstat_t ks0; 3620 int nidx, num, i; 3621 size_t sidx, size; 3622 3623 ASSERT(lxpnp->lxpr_type == LXPR_DISKSTATS); 3624 3625 ks0.ks_kid = 0; 3626 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); 3627 3628 if (ksr == NULL) 3629 return; 3630 3631 for (i = 1; i < nidx; i++) { 3632 kstat_t *ksp = &ksr[i]; 3633 kstat_io_t *kip; 3634 3635 if (ksp->ks_type != KSTAT_TYPE_IO || 3636 strcmp(ksp->ks_class, "disk") != 0) 3637 continue; 3638 3639 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE, 3640 &size, &num)) == NULL) 3641 continue; 3642 3643 if (size < sizeof (kstat_io_t)) { 3644 kmem_free(kip, size); 3645 continue; 3646 } 3647 3648 /* 3649 * /proc/diskstats is defined to have one line of output for 3650 * each block device, with each line containing the following 3651 * 14 fields: 3652 * 3653 * 1 - major number 3654 * 2 - minor mumber 3655 * 3 - device name 3656 * 4 - reads completed successfully 3657 * 5 - reads merged 3658 * 6 - sectors read 3659 * 7 - time spent reading (ms) 3660 * 8 - writes completed 3661 * 9 - writes merged 3662 * 10 - sectors written 3663 * 11 - time spent writing (ms) 3664 * 12 - I/Os currently in progress 3665 * 13 - time spent doing I/Os (ms) 3666 * 14 - weighted time spent doing I/Os (ms) 3667 * 3668 * One small hiccup: we don't actually keep track of time 3669 * spent reading vs. time spent writing -- we keep track of 3670 * time waiting vs. time actually performing I/O. While we 3671 * could divide the total time by the I/O mix (making the 3672 * obviously wrong assumption that I/O operations all take the 3673 * same amount of time), this has the undesirable side-effect 3674 * of moving backwards. Instead, we report the total time 3675 * (read + write) for all three stats (read, write, total). 3676 * This is also a lie of sorts, but it should be more 3677 * immediately clear to the user that reads and writes are 3678 * each being double-counted as the other. 3679 */ 3680 lxpr_uiobuf_printf(uiobuf, "%4d %7d %s " 3681 "%llu %llu %llu %llu " 3682 "%llu %llu %llu %llu " 3683 "%llu %llu %llu\n", 3684 mod_name_to_major(ksp->ks_module), 3685 ksp->ks_instance, ksp->ks_name, 3686 (uint64_t)kip->reads, 0LL, 3687 kip->nread / (uint64_t)LXPR_SECTOR_SIZE, 3688 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC), 3689 (uint64_t)kip->writes, 0LL, 3690 kip->nwritten / (uint64_t)LXPR_SECTOR_SIZE, 3691 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC), 3692 (uint64_t)(kip->rcnt + kip->wcnt), 3693 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC), 3694 (kip->rlentime + kip->wlentime) / 3695 (uint64_t)(NANOSEC / MILLISEC)); 3696 3697 kmem_free(kip, size); 3698 } 3699 3700 kmem_free(ksr, sidx); 3701 } 3702 3703 /* 3704 * lxpr_read_version(): read the contents of the "version" file. 3705 */ 3706 /* ARGSUSED */ 3707 static void 3708 lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3709 { 3710 lx_zone_data_t *lxzd = ztolxzd(LXPTOZ(lxpnp)); 3711 lx_proc_data_t *lxpd = ptolxproc(curproc); 3712 const char *release = lxzd->lxzd_kernel_release; 3713 const char *version = lxzd->lxzd_kernel_version; 3714 3715 /* Use per-process overrides, if specified */ 3716 if (lxpd != NULL && lxpd->l_uname_release[0] != '\0') { 3717 release = lxpd->l_uname_release; 3718 } 3719 if (lxpd != NULL && lxpd->l_uname_version[0] != '\0') { 3720 version = lxpd->l_uname_version; 3721 } 3722 3723 lxpr_uiobuf_printf(uiobuf, 3724 "%s version %s (%s version %d.%d.%d) %s\n", 3725 LX_UNAME_SYSNAME, release, 3726 #if defined(__GNUC__) 3727 "gcc", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__, 3728 #else 3729 "cc", 1, 0, 0, 3730 #endif 3731 version); 3732 } 3733 3734 /* 3735 * lxpr_read_stat(): read the contents of the "stat" file. 3736 * 3737 */ 3738 /* ARGSUSED */ 3739 static void 3740 lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3741 { 3742 cpu_t *cp, *cpstart; 3743 int pools_enabled; 3744 ulong_t idle_cum = 0; 3745 ulong_t sys_cum = 0; 3746 ulong_t user_cum = 0; 3747 ulong_t irq_cum = 0; 3748 ulong_t cpu_nrunnable_cum = 0; 3749 ulong_t w_io_cum = 0; 3750 3751 ulong_t pgpgin_cum = 0; 3752 ulong_t pgpgout_cum = 0; 3753 ulong_t pgswapout_cum = 0; 3754 ulong_t pgswapin_cum = 0; 3755 ulong_t intr_cum = 0; 3756 ulong_t pswitch_cum = 0; 3757 ulong_t forks_cum = 0; 3758 hrtime_t msnsecs[NCMSTATES]; 3759 /* is the emulated release > 2.4 */ 3760 boolean_t newer_than24 = lx_kern_release_cmp(LXPTOZ(lxpnp), "2.4") > 0; 3761 /* temporary variable since scalehrtime modifies data in place */ 3762 hrtime_t tmptime; 3763 3764 ASSERT(lxpnp->lxpr_type == LXPR_STAT); 3765 3766 mutex_enter(&cpu_lock); 3767 pools_enabled = pool_pset_enabled(); 3768 3769 /* Calculate cumulative stats */ 3770 cp = cpstart = CPU->cpu_part->cp_cpulist; 3771 do { 3772 int i; 3773 3774 /* 3775 * Don't count CPUs that aren't even in the system 3776 * or aren't up yet. 3777 */ 3778 if ((cp->cpu_flags & CPU_EXISTS) == 0) { 3779 continue; 3780 } 3781 3782 get_cpu_mstate(cp, msnsecs); 3783 3784 idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]); 3785 sys_cum += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); 3786 user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]); 3787 3788 pgpgin_cum += CPU_STATS(cp, vm.pgpgin); 3789 pgpgout_cum += CPU_STATS(cp, vm.pgpgout); 3790 pgswapin_cum += CPU_STATS(cp, vm.pgswapin); 3791 pgswapout_cum += CPU_STATS(cp, vm.pgswapout); 3792 3793 3794 if (newer_than24) { 3795 cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable; 3796 w_io_cum += CPU_STATS(cp, sys.iowait); 3797 for (i = 0; i < NCMSTATES; i++) { 3798 tmptime = cp->cpu_intracct[i]; 3799 scalehrtime(&tmptime); 3800 irq_cum += NSEC_TO_TICK(tmptime); 3801 } 3802 } 3803 3804 for (i = 0; i < PIL_MAX; i++) 3805 intr_cum += CPU_STATS(cp, sys.intr[i]); 3806 3807 pswitch_cum += CPU_STATS(cp, sys.pswitch); 3808 forks_cum += CPU_STATS(cp, sys.sysfork); 3809 forks_cum += CPU_STATS(cp, sys.sysvfork); 3810 3811 if (pools_enabled) 3812 cp = cp->cpu_next_part; 3813 else 3814 cp = cp->cpu_next; 3815 } while (cp != cpstart); 3816 3817 if (newer_than24) { 3818 lxpr_uiobuf_printf(uiobuf, 3819 "cpu %lu %lu %lu %lu %lu %lu %lu\n", 3820 user_cum, 0L, sys_cum, idle_cum, 0L, irq_cum, 0L); 3821 } else { 3822 lxpr_uiobuf_printf(uiobuf, 3823 "cpu %lu %lu %lu %lu\n", 3824 user_cum, 0L, sys_cum, idle_cum); 3825 } 3826 3827 /* Do per processor stats */ 3828 do { 3829 int i; 3830 3831 ulong_t idle_ticks; 3832 ulong_t sys_ticks; 3833 ulong_t user_ticks; 3834 ulong_t irq_ticks = 0; 3835 3836 /* 3837 * Don't count CPUs that aren't even in the system 3838 * or aren't up yet. 3839 */ 3840 if ((cp->cpu_flags & CPU_EXISTS) == 0) { 3841 continue; 3842 } 3843 3844 get_cpu_mstate(cp, msnsecs); 3845 3846 idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]); 3847 sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); 3848 user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]); 3849 3850 for (i = 0; i < NCMSTATES; i++) { 3851 tmptime = cp->cpu_intracct[i]; 3852 scalehrtime(&tmptime); 3853 irq_ticks += NSEC_TO_TICK(tmptime); 3854 } 3855 3856 if (newer_than24) { 3857 lxpr_uiobuf_printf(uiobuf, 3858 "cpu%d %lu %lu %lu %lu %lu %lu %lu\n", 3859 cp->cpu_id, user_ticks, 0L, sys_ticks, idle_ticks, 3860 0L, irq_ticks, 0L); 3861 } else { 3862 lxpr_uiobuf_printf(uiobuf, 3863 "cpu%d %lu %lu %lu %lu\n", 3864 cp->cpu_id, 3865 user_ticks, 0L, sys_ticks, idle_ticks); 3866 } 3867 3868 if (pools_enabled) 3869 cp = cp->cpu_next_part; 3870 else 3871 cp = cp->cpu_next; 3872 } while (cp != cpstart); 3873 3874 mutex_exit(&cpu_lock); 3875 3876 if (newer_than24) { 3877 lxpr_uiobuf_printf(uiobuf, 3878 "page %lu %lu\n" 3879 "swap %lu %lu\n" 3880 "intr %lu\n" 3881 "ctxt %lu\n" 3882 "btime %lu\n" 3883 "processes %lu\n" 3884 "procs_running %lu\n" 3885 "procs_blocked %lu\n", 3886 pgpgin_cum, pgpgout_cum, 3887 pgswapin_cum, pgswapout_cum, 3888 intr_cum, 3889 pswitch_cum, 3890 boot_time, 3891 forks_cum, 3892 cpu_nrunnable_cum, 3893 w_io_cum); 3894 } else { 3895 lxpr_uiobuf_printf(uiobuf, 3896 "page %lu %lu\n" 3897 "swap %lu %lu\n" 3898 "intr %lu\n" 3899 "ctxt %lu\n" 3900 "btime %lu\n" 3901 "processes %lu\n", 3902 pgpgin_cum, pgpgout_cum, 3903 pgswapin_cum, pgswapout_cum, 3904 intr_cum, 3905 pswitch_cum, 3906 boot_time, 3907 forks_cum); 3908 } 3909 } 3910 3911 /* 3912 * lxpr_read_swaps(): 3913 * 3914 * We don't support swap files or partitions, but some programs like to look 3915 * here just to check we have some swap on the system, so we lie and show 3916 * our entire swap cap as one swap partition. 3917 * 3918 * It is important to use formatting identical to the Linux implementation 3919 * so that consumers do not break. See swap_show() in mm/swapfile.c. 3920 */ 3921 /* ARGSUSED */ 3922 static void 3923 lxpr_read_swaps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3924 { 3925 zone_t *zone = curzone; 3926 uint64_t totswap, usedswap; 3927 3928 mutex_enter(&zone->zone_mem_lock); 3929 /* Uses units of 1 kb (2^10). */ 3930 totswap = zone->zone_max_swap_ctl >> 10; 3931 usedswap = zone->zone_max_swap >> 10; 3932 mutex_exit(&zone->zone_mem_lock); 3933 3934 lxpr_uiobuf_printf(uiobuf, 3935 "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n"); 3936 lxpr_uiobuf_printf(uiobuf, "%-40s%s\t%llu\t%llu\t%d\n", 3937 "/dev/swap", "partition", totswap, usedswap, -1); 3938 } 3939 3940 /* 3941 * inotify tunables exported via /proc. 3942 */ 3943 extern int inotify_maxevents; 3944 extern int inotify_maxinstances; 3945 extern int inotify_maxwatches; 3946 3947 static void 3948 lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *lxpnp, 3949 lxpr_uiobuf_t *uiobuf) 3950 { 3951 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS); 3952 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxevents); 3953 } 3954 3955 static void 3956 lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *lxpnp, 3957 lxpr_uiobuf_t *uiobuf) 3958 { 3959 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES); 3960 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxinstances); 3961 } 3962 3963 static void 3964 lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *lxpnp, 3965 lxpr_uiobuf_t *uiobuf) 3966 { 3967 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES); 3968 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxwatches); 3969 } 3970 3971 static void 3972 lxpr_read_sys_kernel_caplcap(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3973 { 3974 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_CAPLCAP); 3975 lxpr_uiobuf_printf(uiobuf, "%d\n", LX_CAP_MAX_VALID); 3976 } 3977 3978 static void 3979 lxpr_read_sys_kernel_corepatt(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3980 { 3981 zone_t *zone = curproc->p_zone; 3982 struct core_globals *cg; 3983 refstr_t *rp; 3984 corectl_path_t *ccp; 3985 char tr[MAXPATHLEN]; 3986 3987 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT); 3988 3989 cg = zone_getspecific(core_zone_key, zone); 3990 ASSERT(cg != NULL); 3991 3992 /* If core dumps are disabled, return an empty string. */ 3993 if ((cg->core_options & CC_PROCESS_PATH) == 0) { 3994 lxpr_uiobuf_printf(uiobuf, "\n"); 3995 return; 3996 } 3997 3998 ccp = cg->core_default_path; 3999 mutex_enter(&ccp->ccp_mtx); 4000 if ((rp = ccp->ccp_path) != NULL) 4001 refstr_hold(rp); 4002 mutex_exit(&ccp->ccp_mtx); 4003 4004 if (rp == NULL) { 4005 lxpr_uiobuf_printf(uiobuf, "\n"); 4006 return; 4007 } 4008 4009 bzero(tr, sizeof (tr)); 4010 if (lxpr_core_path_s2l(refstr_value(rp), tr, sizeof (tr)) != 0) { 4011 refstr_rele(rp); 4012 lxpr_uiobuf_printf(uiobuf, "\n"); 4013 return; 4014 } 4015 4016 refstr_rele(rp); 4017 lxpr_uiobuf_printf(uiobuf, "%s\n", tr); 4018 } 4019 4020 static void 4021 lxpr_read_sys_kernel_hostname(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4022 { 4023 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_HOSTNAME); 4024 lxpr_uiobuf_printf(uiobuf, "%s\n", uts_nodename()); 4025 } 4026 4027 static void 4028 lxpr_read_sys_kernel_msgmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4029 { 4030 rctl_qty_t val; 4031 4032 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_MSGMNI); 4033 4034 mutex_enter(&curproc->p_lock); 4035 val = rctl_enforced_value(rc_zone_msgmni, 4036 curproc->p_zone->zone_rctls, curproc); 4037 mutex_exit(&curproc->p_lock); 4038 4039 lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val); 4040 } 4041 4042 static void 4043 lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4044 { 4045 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_NGROUPS_MAX); 4046 lxpr_uiobuf_printf(uiobuf, "%d\n", ngroups_max); 4047 } 4048 4049 static void 4050 lxpr_read_sys_kernel_osrel(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4051 { 4052 lx_zone_data_t *br_data; 4053 4054 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_OSREL); 4055 br_data = ztolxzd(curproc->p_zone); 4056 if (curproc->p_zone->zone_brand == &lx_brand) { 4057 lxpr_uiobuf_printf(uiobuf, "%s\n", 4058 br_data->lxzd_kernel_version); 4059 } else { 4060 lxpr_uiobuf_printf(uiobuf, "\n"); 4061 } 4062 } 4063 4064 static void 4065 lxpr_read_sys_kernel_pid_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4066 { 4067 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_PID_MAX); 4068 lxpr_uiobuf_printf(uiobuf, "%d\n", maxpid); 4069 } 4070 4071 static void 4072 lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4073 { 4074 /* 4075 * This file isn't documented on the Linux proc(5) man page but 4076 * according to the blog of the author of systemd/journald (the 4077 * consumer), he says: 4078 * boot_id: A random ID that is regenerated on each boot. As such it 4079 * can be used to identify the local machine's current boot. It's 4080 * universally available on any recent Linux kernel. It's a good and 4081 * safe choice if you need to identify a specific boot on a specific 4082 * booted kernel. 4083 * 4084 * We'll just generate a random ID if necessary. On Linux the format 4085 * appears to resemble a uuid but since it is not documented to be a 4086 * uuid, we don't worry about that. 4087 */ 4088 lx_zone_data_t *br_data; 4089 4090 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RAND_BOOTID); 4091 4092 if (curproc->p_zone->zone_brand != &lx_brand) { 4093 lxpr_uiobuf_printf(uiobuf, "0\n"); 4094 return; 4095 } 4096 4097 br_data = ztolxzd(curproc->p_zone); 4098 if (br_data->lxzd_bootid[0] == '\0') { 4099 extern int getrandom(void *, size_t, int); 4100 int i; 4101 4102 for (i = 0; i < 5; i++) { 4103 u_longlong_t n; 4104 char s[32]; 4105 4106 (void) random_get_bytes((uint8_t *)&n, sizeof (n)); 4107 switch (i) { 4108 case 0: (void) snprintf(s, sizeof (s), "%08llx", n); 4109 s[8] = '\0'; 4110 break; 4111 case 4: (void) snprintf(s, sizeof (s), "%012llx", n); 4112 s[12] = '\0'; 4113 break; 4114 default: (void) snprintf(s, sizeof (s), "%04llx", n); 4115 s[4] = '\0'; 4116 break; 4117 } 4118 if (i > 0) 4119 strlcat(br_data->lxzd_bootid, "-", 4120 sizeof (br_data->lxzd_bootid)); 4121 strlcat(br_data->lxzd_bootid, s, 4122 sizeof (br_data->lxzd_bootid)); 4123 } 4124 } 4125 4126 lxpr_uiobuf_printf(uiobuf, "%s\n", br_data->lxzd_bootid); 4127 } 4128 4129 static void 4130 lxpr_read_sys_kernel_sem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4131 { 4132 proc_t *pp = curproc; 4133 rctl_qty_t vmsl, vopm, vmni, vmns; 4134 4135 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SEM); 4136 4137 mutex_enter(&pp->p_lock); 4138 vmsl = rctl_enforced_value(rc_process_semmsl, pp->p_rctls, pp); 4139 vopm = rctl_enforced_value(rc_process_semopm, pp->p_rctls, pp); 4140 vmni = rctl_enforced_value(rc_zone_semmni, pp->p_zone->zone_rctls, pp); 4141 mutex_exit(&pp->p_lock); 4142 vmns = vmsl * vmni; 4143 if (vmns < vmsl || vmns < vmni) { 4144 vmns = ULLONG_MAX; 4145 } 4146 /* 4147 * Format: semmsl semmns semopm semmni 4148 * - semmsl: Limit semaphores in a sempahore set. 4149 * - semmns: Limit semaphores in all semaphore sets 4150 * - semopm: Limit operations in a single semop call 4151 * - semmni: Limit number of semaphore sets 4152 */ 4153 lxpr_uiobuf_printf(uiobuf, "%llu\t%llu\t%llu\t%llu\n", 4154 vmsl, vmns, vopm, vmni); 4155 } 4156 4157 static void 4158 lxpr_read_sys_kernel_shmmax(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4159 { 4160 rctl_qty_t val; 4161 4162 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMAX); 4163 4164 mutex_enter(&curproc->p_lock); 4165 val = rctl_enforced_value(rc_zone_shmmax, 4166 curproc->p_zone->zone_rctls, curproc); 4167 mutex_exit(&curproc->p_lock); 4168 4169 if (val > FOURGB) 4170 val = FOURGB; 4171 4172 lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val); 4173 } 4174 4175 static void 4176 lxpr_read_sys_kernel_shmmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4177 { 4178 rctl_qty_t val; 4179 4180 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMNI); 4181 4182 mutex_enter(&curproc->p_lock); 4183 val = rctl_enforced_value(rc_zone_shmmni, 4184 curproc->p_zone->zone_rctls, curproc); 4185 mutex_exit(&curproc->p_lock); 4186 4187 if (val > FOURGB) 4188 val = FOURGB; 4189 4190 lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val); 4191 } 4192 4193 static void 4194 lxpr_read_sys_kernel_threads_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4195 { 4196 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_THREADS_MAX); 4197 lxpr_uiobuf_printf(uiobuf, "%d\n", curproc->p_zone->zone_nlwps_ctl); 4198 } 4199 4200 static void 4201 lxpr_read_sys_net_core_somaxc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4202 { 4203 netstack_t *ns; 4204 tcp_stack_t *tcps; 4205 4206 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON); 4207 4208 ns = netstack_get_current(); 4209 if (ns == NULL) { 4210 lxpr_uiobuf_printf(uiobuf, "%d\n", SOMAXCONN); 4211 return; 4212 } 4213 4214 tcps = ns->netstack_tcp; 4215 lxpr_uiobuf_printf(uiobuf, "%d\n", tcps->tcps_conn_req_max_q); 4216 netstack_rele(ns); 4217 } 4218 4219 static void 4220 lxpr_read_sys_vm_minfr_kb(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4221 { 4222 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_MINFR_KB); 4223 lxpr_uiobuf_printf(uiobuf, "%d\n", 0); 4224 } 4225 4226 static void 4227 lxpr_read_sys_vm_nhpages(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4228 { 4229 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_NHUGEP); 4230 lxpr_uiobuf_printf(uiobuf, "%d\n", 0); 4231 } 4232 4233 static void 4234 lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4235 { 4236 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_OVERCOMMIT_MEM); 4237 lxpr_uiobuf_printf(uiobuf, "%d\n", 0); 4238 } 4239 4240 static void 4241 lxpr_read_sys_vm_swappiness(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4242 { 4243 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_SWAPPINESS); 4244 lxpr_uiobuf_printf(uiobuf, "%d\n", 0); 4245 } 4246 4247 /* 4248 * lxpr_read_uptime(): read the contents of the "uptime" file. 4249 * 4250 * format is: "%.2lf, %.2lf",uptime_secs, idle_secs 4251 * Use fixed point arithmetic to get 2 decimal places 4252 */ 4253 /* ARGSUSED */ 4254 static void 4255 lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4256 { 4257 cpu_t *cp, *cpstart; 4258 int pools_enabled; 4259 ulong_t idle_cum = 0; 4260 ulong_t cpu_count = 0; 4261 ulong_t idle_s; 4262 ulong_t idle_cs; 4263 ulong_t up_s; 4264 ulong_t up_cs; 4265 hrtime_t birthtime; 4266 hrtime_t centi_sec = 10000000; /* 10^7 */ 4267 4268 ASSERT(lxpnp->lxpr_type == LXPR_UPTIME); 4269 4270 /* Calculate cumulative stats */ 4271 mutex_enter(&cpu_lock); 4272 pools_enabled = pool_pset_enabled(); 4273 4274 cp = cpstart = CPU->cpu_part->cp_cpulist; 4275 do { 4276 /* 4277 * Don't count CPUs that aren't even in the system 4278 * or aren't up yet. 4279 */ 4280 if ((cp->cpu_flags & CPU_EXISTS) == 0) { 4281 continue; 4282 } 4283 4284 idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle); 4285 idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait); 4286 cpu_count += 1; 4287 4288 if (pools_enabled) 4289 cp = cp->cpu_next_part; 4290 else 4291 cp = cp->cpu_next; 4292 } while (cp != cpstart); 4293 mutex_exit(&cpu_lock); 4294 4295 /* Getting the Zone zsched process startup time */ 4296 birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart; 4297 up_cs = (gethrtime() - birthtime) / centi_sec; 4298 up_s = up_cs / 100; 4299 up_cs %= 100; 4300 4301 ASSERT(cpu_count > 0); 4302 idle_cum /= cpu_count; 4303 idle_s = idle_cum / hz; 4304 idle_cs = idle_cum % hz; 4305 idle_cs *= 100; 4306 idle_cs /= hz; 4307 4308 lxpr_uiobuf_printf(uiobuf, 4309 "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs); 4310 } 4311 4312 static const char *amd_x_edx[] = { 4313 NULL, NULL, NULL, NULL, 4314 NULL, NULL, NULL, NULL, 4315 NULL, NULL, NULL, "syscall", 4316 NULL, NULL, NULL, NULL, 4317 NULL, NULL, NULL, "mp", 4318 "nx", NULL, "mmxext", NULL, 4319 NULL, NULL, NULL, NULL, 4320 NULL, "lm", "3dnowext", "3dnow" 4321 }; 4322 4323 static const char *amd_x_ecx[] = { 4324 "lahf_lm", NULL, "svm", NULL, 4325 "altmovcr8" 4326 }; 4327 4328 static const char *tm_x_edx[] = { 4329 "recovery", "longrun", NULL, "lrti" 4330 }; 4331 4332 /* 4333 * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx." 4334 */ 4335 static const char *intc_x_edx[] = { 4336 NULL, NULL, NULL, NULL, 4337 NULL, NULL, NULL, NULL, 4338 NULL, NULL, NULL, "syscall", 4339 NULL, NULL, NULL, NULL, 4340 NULL, NULL, NULL, NULL, 4341 "nx", NULL, NULL, NULL, 4342 NULL, NULL, NULL, NULL, 4343 NULL, "lm", NULL, NULL 4344 }; 4345 4346 static const char *intc_edx[] = { 4347 "fpu", "vme", "de", "pse", 4348 "tsc", "msr", "pae", "mce", 4349 "cx8", "apic", NULL, "sep", 4350 "mtrr", "pge", "mca", "cmov", 4351 "pat", "pse36", "pn", "clflush", 4352 NULL, "dts", "acpi", "mmx", 4353 "fxsr", "sse", "sse2", "ss", 4354 "ht", "tm", "ia64", "pbe" 4355 }; 4356 4357 /* 4358 * "sse3" on linux is called "pni" (Prescott New Instructions). 4359 */ 4360 static const char *intc_ecx[] = { 4361 "pni", NULL, NULL, "monitor", 4362 "ds_cpl", NULL, NULL, "est", 4363 "tm2", NULL, "cid", NULL, 4364 NULL, "cx16", "xtpr" 4365 }; 4366 4367 /* 4368 * Report a list of each cgroup subsystem supported by our emulated cgroup fs. 4369 * This needs to exist for systemd to run but for now we don't report any 4370 * cgroup subsystems as being installed. The commented example below shows 4371 * how to print a subsystem entry. 4372 */ 4373 static void 4374 lxpr_read_cgroups(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4375 { 4376 lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n", 4377 "#subsys_name", "hierarchy", "num_cgroups", "enabled"); 4378 4379 /* 4380 * lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n", 4381 * "cpu,cpuacct", "2", "1", "1"); 4382 */ 4383 } 4384 4385 static void 4386 lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4387 { 4388 int i; 4389 uint32_t bits; 4390 cpu_t *cp, *cpstart; 4391 int pools_enabled; 4392 const char **fp; 4393 char brandstr[CPU_IDSTRLEN]; 4394 struct cpuid_regs cpr; 4395 int maxeax; 4396 int std_ecx, std_edx, ext_ecx, ext_edx; 4397 4398 ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO); 4399 4400 mutex_enter(&cpu_lock); 4401 pools_enabled = pool_pset_enabled(); 4402 4403 cp = cpstart = CPU->cpu_part->cp_cpulist; 4404 do { 4405 /* 4406 * This returns the maximum eax value for standard cpuid 4407 * functions in eax. 4408 */ 4409 cpr.cp_eax = 0; 4410 (void) cpuid_insn(cp, &cpr); 4411 maxeax = cpr.cp_eax; 4412 4413 /* 4414 * Get standard x86 feature flags. 4415 */ 4416 cpr.cp_eax = 1; 4417 (void) cpuid_insn(cp, &cpr); 4418 std_ecx = cpr.cp_ecx; 4419 std_edx = cpr.cp_edx; 4420 4421 /* 4422 * Now get extended feature flags. 4423 */ 4424 cpr.cp_eax = 0x80000001; 4425 (void) cpuid_insn(cp, &cpr); 4426 ext_ecx = cpr.cp_ecx; 4427 ext_edx = cpr.cp_edx; 4428 4429 (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN); 4430 4431 lxpr_uiobuf_printf(uiobuf, 4432 "processor\t: %d\n" 4433 "vendor_id\t: %s\n" 4434 "cpu family\t: %d\n" 4435 "model\t\t: %d\n" 4436 "model name\t: %s\n" 4437 "stepping\t: %d\n" 4438 "cpu MHz\t\t: %u.%03u\n", 4439 cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp), 4440 cpuid_getmodel(cp), brandstr, cpuid_getstep(cp), 4441 (uint32_t)(cpu_freq_hz / 1000000), 4442 ((uint32_t)(cpu_freq_hz / 1000)) % 1000); 4443 4444 lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n", 4445 getl2cacheinfo(cp, NULL, NULL, NULL) / 1024); 4446 4447 if (is_x86_feature(x86_featureset, X86FSET_HTT)) { 4448 /* 4449 * 'siblings' is used for HT-style threads 4450 */ 4451 lxpr_uiobuf_printf(uiobuf, 4452 "physical id\t: %lu\n" 4453 "siblings\t: %u\n", 4454 pg_plat_hw_instance_id(cp, PGHW_CHIP), 4455 cpuid_get_ncpu_per_chip(cp)); 4456 } 4457 4458 /* 4459 * Since we're relatively picky about running on older hardware, 4460 * we can be somewhat cavalier about the answers to these ones. 4461 * 4462 * In fact, given the hardware we support, we just say: 4463 * 4464 * fdiv_bug : no (if we're on a 64-bit kernel) 4465 * hlt_bug : no 4466 * f00f_bug : no 4467 * coma_bug : no 4468 * wp : yes (write protect in supervsr mode) 4469 */ 4470 lxpr_uiobuf_printf(uiobuf, 4471 "fdiv_bug\t: %s\n" 4472 "hlt_bug \t: no\n" 4473 "f00f_bug\t: no\n" 4474 "coma_bug\t: no\n" 4475 "fpu\t\t: %s\n" 4476 "fpu_exception\t: %s\n" 4477 "cpuid level\t: %d\n" 4478 "flags\t\t:", 4479 #if defined(__i386) 4480 fpu_pentium_fdivbug ? "yes" : "no", 4481 #else 4482 "no", 4483 #endif /* __i386 */ 4484 fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no", 4485 maxeax); 4486 4487 for (bits = std_edx, fp = intc_edx, i = 0; 4488 i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++) 4489 if ((bits & (1 << i)) != 0 && *fp) 4490 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4491 4492 /* 4493 * name additional features where appropriate 4494 */ 4495 switch (x86_vendor) { 4496 case X86_VENDOR_Intel: 4497 for (bits = ext_edx, fp = intc_x_edx, i = 0; 4498 i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]); 4499 fp++, i++) 4500 if ((bits & (1 << i)) != 0 && *fp) 4501 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4502 break; 4503 4504 case X86_VENDOR_AMD: 4505 for (bits = ext_edx, fp = amd_x_edx, i = 0; 4506 i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]); 4507 fp++, i++) 4508 if ((bits & (1 << i)) != 0 && *fp) 4509 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4510 4511 for (bits = ext_ecx, fp = amd_x_ecx, i = 0; 4512 i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]); 4513 fp++, i++) 4514 if ((bits & (1 << i)) != 0 && *fp) 4515 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4516 break; 4517 4518 case X86_VENDOR_TM: 4519 for (bits = ext_edx, fp = tm_x_edx, i = 0; 4520 i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]); 4521 fp++, i++) 4522 if ((bits & (1 << i)) != 0 && *fp) 4523 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4524 break; 4525 default: 4526 break; 4527 } 4528 4529 for (bits = std_ecx, fp = intc_ecx, i = 0; 4530 i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++) 4531 if ((bits & (1 << i)) != 0 && *fp) 4532 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4533 4534 lxpr_uiobuf_printf(uiobuf, "\n\n"); 4535 4536 if (pools_enabled) 4537 cp = cp->cpu_next_part; 4538 else 4539 cp = cp->cpu_next; 4540 } while (cp != cpstart); 4541 4542 mutex_exit(&cpu_lock); 4543 } 4544 4545 /* ARGSUSED */ 4546 static void 4547 lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4548 { 4549 ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD); 4550 lxpr_uiobuf_seterr(uiobuf, EFAULT); 4551 } 4552 4553 /* 4554 * Report a list of file systems loaded in the kernel. We only report the ones 4555 * which we support and which may be checked by various components to see if 4556 * they are loaded. 4557 */ 4558 static void 4559 lxpr_read_filesystems(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4560 { 4561 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "autofs"); 4562 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "cgroup"); 4563 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "nfs"); 4564 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "proc"); 4565 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "sysfs"); 4566 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "tmpfs"); 4567 } 4568 4569 /* 4570 * lxpr_getattr(): Vnode operation for VOP_GETATTR() 4571 */ 4572 static int 4573 lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 4574 caller_context_t *ct) 4575 { 4576 register lxpr_node_t *lxpnp = VTOLXP(vp); 4577 lxpr_nodetype_t type = lxpnp->lxpr_type; 4578 extern uint_t nproc; 4579 int error; 4580 4581 /* 4582 * Return attributes of underlying vnode if ATTR_REAL 4583 * 4584 * but keep fd files with the symlink permissions 4585 */ 4586 if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) { 4587 vnode_t *rvp = lxpnp->lxpr_realvp; 4588 4589 /* 4590 * withold attribute information to owner or root 4591 */ 4592 if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) { 4593 return (error); 4594 } 4595 4596 /* 4597 * now its attributes 4598 */ 4599 if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) { 4600 return (error); 4601 } 4602 4603 /* 4604 * if it's a file in lx /proc/pid/fd/xx then set its 4605 * mode and keep it looking like a symlink, fifo or socket 4606 */ 4607 if (type == LXPR_PID_FD_FD) { 4608 vap->va_mode = lxpnp->lxpr_mode; 4609 vap->va_type = lxpnp->lxpr_realvp->v_type; 4610 vap->va_size = 0; 4611 vap->va_nlink = 1; 4612 } 4613 return (0); 4614 } 4615 4616 /* Default attributes, that may be overridden below */ 4617 bzero(vap, sizeof (*vap)); 4618 vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time; 4619 vap->va_nlink = 1; 4620 vap->va_type = vp->v_type; 4621 vap->va_mode = lxpnp->lxpr_mode; 4622 vap->va_fsid = vp->v_vfsp->vfs_dev; 4623 vap->va_blksize = DEV_BSIZE; 4624 vap->va_uid = lxpnp->lxpr_uid; 4625 vap->va_gid = lxpnp->lxpr_gid; 4626 vap->va_nodeid = lxpnp->lxpr_ino; 4627 4628 switch (type) { 4629 case LXPR_PROCDIR: 4630 vap->va_nlink = nproc + 2 + PROCDIRFILES; 4631 vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE; 4632 break; 4633 case LXPR_PIDDIR: 4634 vap->va_nlink = PIDDIRFILES; 4635 vap->va_size = PIDDIRFILES * LXPR_SDSIZE; 4636 break; 4637 case LXPR_PID_TASK_IDDIR: 4638 vap->va_nlink = TIDDIRFILES; 4639 vap->va_size = TIDDIRFILES * LXPR_SDSIZE; 4640 break; 4641 case LXPR_SELF: 4642 vap->va_uid = crgetruid(curproc->p_cred); 4643 vap->va_gid = crgetrgid(curproc->p_cred); 4644 break; 4645 case LXPR_PID_FD_FD: 4646 case LXPR_PID_TID_FD_FD: 4647 /* 4648 * Restore VLNK type for lstat-type activity. 4649 * See lxpr_readlink for more details. 4650 */ 4651 if ((flags & FOLLOW) == 0) 4652 vap->va_type = VLNK; 4653 default: 4654 break; 4655 } 4656 4657 vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size); 4658 return (0); 4659 } 4660 4661 /* 4662 * lxpr_access(): Vnode operation for VOP_ACCESS() 4663 */ 4664 static int 4665 lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct) 4666 { 4667 lxpr_node_t *lxpnp = VTOLXP(vp); 4668 lxpr_nodetype_t type = lxpnp->lxpr_type; 4669 int shift = 0; 4670 proc_t *tp; 4671 4672 /* lx /proc is a read only file system */ 4673 if (mode & VWRITE) { 4674 switch (type) { 4675 case LXPR_PID_OOM_SCR_ADJ: 4676 case LXPR_PID_TID_OOM_SCR_ADJ: 4677 case LXPR_SYS_KERNEL_COREPATT: 4678 case LXPR_SYS_NET_CORE_SOMAXCON: 4679 case LXPR_SYS_VM_OVERCOMMIT_MEM: 4680 case LXPR_SYS_VM_SWAPPINESS: 4681 case LXPR_PID_FD_FD: 4682 case LXPR_PID_TID_FD_FD: 4683 break; 4684 default: 4685 return (EROFS); 4686 } 4687 } 4688 4689 /* 4690 * If this is a restricted file, check access permissions. 4691 */ 4692 switch (type) { 4693 case LXPR_PIDDIR: 4694 return (0); 4695 case LXPR_PID_CURDIR: 4696 case LXPR_PID_ENV: 4697 case LXPR_PID_EXE: 4698 case LXPR_PID_LIMITS: 4699 case LXPR_PID_MAPS: 4700 case LXPR_PID_MEM: 4701 case LXPR_PID_ROOTDIR: 4702 case LXPR_PID_FDDIR: 4703 case LXPR_PID_FD_FD: 4704 case LXPR_PID_TID_FDDIR: 4705 case LXPR_PID_TID_FD_FD: 4706 if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL) 4707 return (ENOENT); 4708 if (tp != curproc && secpolicy_proc_access(cr) != 0 && 4709 priv_proc_cred_perm(cr, tp, NULL, mode) != 0) { 4710 lxpr_unlock(tp); 4711 return (EACCES); 4712 } 4713 lxpr_unlock(tp); 4714 default: 4715 break; 4716 } 4717 4718 if (lxpnp->lxpr_realvp != NULL) { 4719 /* 4720 * For these we use the underlying vnode's accessibility. 4721 */ 4722 return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct)); 4723 } 4724 4725 /* If user is root allow access regardless of permission bits */ 4726 if (secpolicy_proc_access(cr) == 0) 4727 return (0); 4728 4729 /* 4730 * Access check is based on only one of owner, group, public. If not 4731 * owner, then check group. If not a member of the group, then check 4732 * public access. 4733 */ 4734 if (crgetuid(cr) != lxpnp->lxpr_uid) { 4735 shift += 3; 4736 if (!groupmember((uid_t)lxpnp->lxpr_gid, cr)) 4737 shift += 3; 4738 } 4739 4740 mode &= ~(lxpnp->lxpr_mode << shift); 4741 4742 if (mode == 0) 4743 return (0); 4744 4745 return (EACCES); 4746 } 4747 4748 /* ARGSUSED */ 4749 static vnode_t * 4750 lxpr_lookup_not_a_dir(vnode_t *dp, char *comp) 4751 { 4752 return (NULL); 4753 } 4754 4755 /* 4756 * lxpr_lookup(): Vnode operation for VOP_LOOKUP() 4757 */ 4758 /* ARGSUSED */ 4759 static int 4760 lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp, 4761 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 4762 int *direntflags, pathname_t *realpnp) 4763 { 4764 lxpr_node_t *lxpnp = VTOLXP(dp); 4765 lxpr_nodetype_t type = lxpnp->lxpr_type; 4766 int error; 4767 4768 ASSERT(dp->v_type == VDIR); 4769 ASSERT(type < LXPR_NFILES); 4770 4771 /* 4772 * we should never get here because the lookup 4773 * is done on the realvp for these nodes 4774 */ 4775 ASSERT(type != LXPR_PID_FD_FD && 4776 type != LXPR_PID_CURDIR && 4777 type != LXPR_PID_ROOTDIR); 4778 4779 /* 4780 * restrict lookup permission to owner or root 4781 */ 4782 if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) { 4783 return (error); 4784 } 4785 4786 /* 4787 * Just return the parent vnode if that's where we are trying to go. 4788 */ 4789 if (strcmp(comp, "..") == 0) { 4790 VN_HOLD(lxpnp->lxpr_parent); 4791 *vpp = lxpnp->lxpr_parent; 4792 return (0); 4793 } 4794 4795 /* 4796 * Special handling for directory searches. Note: null component name 4797 * denotes that the current directory is being searched. 4798 */ 4799 if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) { 4800 VN_HOLD(dp); 4801 *vpp = dp; 4802 return (0); 4803 } 4804 4805 *vpp = (lxpr_lookup_function[type](dp, comp)); 4806 return ((*vpp == NULL) ? ENOENT : 0); 4807 } 4808 4809 /* 4810 * Do a sequential search on the given directory table 4811 */ 4812 static vnode_t * 4813 lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p, 4814 lxpr_dirent_t *dirtab, int dirtablen) 4815 { 4816 lxpr_node_t *lxpnp; 4817 int count; 4818 4819 for (count = 0; count < dirtablen; count++) { 4820 if (strcmp(dirtab[count].d_name, comp) == 0) { 4821 lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0); 4822 dp = LXPTOV(lxpnp); 4823 ASSERT(dp != NULL); 4824 return (dp); 4825 } 4826 } 4827 return (NULL); 4828 } 4829 4830 static vnode_t * 4831 lxpr_lookup_piddir(vnode_t *dp, char *comp) 4832 { 4833 proc_t *p; 4834 4835 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR); 4836 4837 p = lxpr_lock(VTOLXP(dp)->lxpr_pid); 4838 if (p == NULL) 4839 return (NULL); 4840 4841 dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES); 4842 4843 lxpr_unlock(p); 4844 4845 return (dp); 4846 } 4847 4848 /* 4849 * Lookup one of the process's task ID's. 4850 */ 4851 static vnode_t * 4852 lxpr_lookup_taskdir(vnode_t *dp, char *comp) 4853 { 4854 lxpr_node_t *dlxpnp = VTOLXP(dp); 4855 lxpr_node_t *lxpnp; 4856 proc_t *p; 4857 pid_t real_pid; 4858 uint_t tid; 4859 int c; 4860 kthread_t *t; 4861 4862 ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASKDIR); 4863 4864 /* 4865 * convert the string rendition of the filename to a thread ID 4866 */ 4867 tid = 0; 4868 while ((c = *comp++) != '\0') { 4869 int otid; 4870 if (c < '0' || c > '9') 4871 return (NULL); 4872 4873 otid = tid; 4874 tid = 10 * tid + c - '0'; 4875 /* integer overflow */ 4876 if (tid / 10 != otid) 4877 return (NULL); 4878 } 4879 4880 /* 4881 * get the proc to work with and lock it 4882 */ 4883 real_pid = get_real_pid(dlxpnp->lxpr_pid); 4884 p = lxpr_lock(real_pid); 4885 if ((p == NULL)) 4886 return (NULL); 4887 4888 /* 4889 * If the process is a zombie or system process 4890 * it can't have any threads. 4891 */ 4892 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) { 4893 lxpr_unlock(p); 4894 return (NULL); 4895 } 4896 4897 if (p->p_brand == &lx_brand) { 4898 t = lxpr_get_thread(p, tid); 4899 } else { 4900 /* 4901 * Only the main thread is visible for non-branded processes. 4902 */ 4903 t = p->p_tlist; 4904 if (tid != p->p_pid || t == NULL) { 4905 t = NULL; 4906 } else { 4907 thread_lock(t); 4908 } 4909 } 4910 if (t == NULL) { 4911 lxpr_unlock(p); 4912 return (NULL); 4913 } 4914 thread_unlock(t); 4915 4916 /* 4917 * Allocate and fill in a new lx /proc taskid node. 4918 * Instead of the last arg being a fd, it is a tid. 4919 */ 4920 lxpnp = lxpr_getnode(dp, LXPR_PID_TASK_IDDIR, p, tid); 4921 dp = LXPTOV(lxpnp); 4922 ASSERT(dp != NULL); 4923 lxpr_unlock(p); 4924 return (dp); 4925 } 4926 4927 /* 4928 * Lookup one of the process's task ID's. 4929 */ 4930 static vnode_t * 4931 lxpr_lookup_task_tid_dir(vnode_t *dp, char *comp) 4932 { 4933 lxpr_node_t *dlxpnp = VTOLXP(dp); 4934 lxpr_node_t *lxpnp; 4935 proc_t *p; 4936 pid_t real_pid; 4937 kthread_t *t; 4938 int i; 4939 4940 ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASK_IDDIR); 4941 4942 /* 4943 * get the proc to work with and lock it 4944 */ 4945 real_pid = get_real_pid(dlxpnp->lxpr_pid); 4946 p = lxpr_lock(real_pid); 4947 if ((p == NULL)) 4948 return (NULL); 4949 4950 /* 4951 * If the process is a zombie or system process 4952 * it can't have any threads. 4953 */ 4954 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) { 4955 lxpr_unlock(p); 4956 return (NULL); 4957 } 4958 4959 /* need to confirm tid is still there */ 4960 t = lxpr_get_thread(p, dlxpnp->lxpr_desc); 4961 if (t == NULL) { 4962 lxpr_unlock(p); 4963 return (NULL); 4964 } 4965 thread_unlock(t); 4966 4967 /* 4968 * allocate and fill in the new lx /proc taskid dir node 4969 */ 4970 for (i = 0; i < TIDDIRFILES; i++) { 4971 if (strcmp(tiddir[i].d_name, comp) == 0) { 4972 lxpnp = lxpr_getnode(dp, tiddir[i].d_type, p, 4973 dlxpnp->lxpr_desc); 4974 dp = LXPTOV(lxpnp); 4975 ASSERT(dp != NULL); 4976 lxpr_unlock(p); 4977 return (dp); 4978 } 4979 } 4980 4981 lxpr_unlock(p); 4982 return (NULL); 4983 } 4984 4985 /* 4986 * Lookup one of the process's open files. 4987 */ 4988 static vnode_t * 4989 lxpr_lookup_fddir(vnode_t *dp, char *comp) 4990 { 4991 lxpr_node_t *dlxpnp = VTOLXP(dp); 4992 4993 ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR || 4994 dlxpnp->lxpr_type == LXPR_PID_TID_FDDIR); 4995 4996 return (lxpr_lookup_fdnode(dp, comp)); 4997 } 4998 4999 static vnode_t * 5000 lxpr_lookup_netdir(vnode_t *dp, char *comp) 5001 { 5002 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR); 5003 5004 dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES); 5005 5006 return (dp); 5007 } 5008 5009 static vnode_t * 5010 lxpr_lookup_procdir(vnode_t *dp, char *comp) 5011 { 5012 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR); 5013 5014 /* 5015 * We know all the names of files & dirs in our file system structure 5016 * except those that are pid names. These change as pids are created/ 5017 * deleted etc., so we just look for a number as the first char to see 5018 * if we are we doing pid lookups. 5019 * 5020 * Don't need to check for "self" as it is implemented as a symlink 5021 */ 5022 if (*comp >= '0' && *comp <= '9') { 5023 pid_t pid = 0; 5024 lxpr_node_t *lxpnp = NULL; 5025 proc_t *p; 5026 int c; 5027 5028 while ((c = *comp++) != '\0') 5029 pid = 10 * pid + c - '0'; 5030 5031 /* 5032 * Can't continue if the process is still loading or it doesn't 5033 * really exist yet (or maybe it just died!) 5034 */ 5035 p = lxpr_lock(pid); 5036 if (p == NULL) 5037 return (NULL); 5038 5039 if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) { 5040 lxpr_unlock(p); 5041 return (NULL); 5042 } 5043 5044 /* 5045 * allocate and fill in a new lx /proc node 5046 */ 5047 lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0); 5048 5049 lxpr_unlock(p); 5050 5051 dp = LXPTOV(lxpnp); 5052 ASSERT(dp != NULL); 5053 5054 return (dp); 5055 } 5056 5057 /* Lookup fixed names */ 5058 return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES)); 5059 } 5060 5061 static vnode_t * 5062 lxpr_lookup_sysdir(vnode_t *dp, char *comp) 5063 { 5064 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYSDIR); 5065 return (lxpr_lookup_common(dp, comp, NULL, sysdir, SYSDIRFILES)); 5066 } 5067 5068 static vnode_t * 5069 lxpr_lookup_sys_kerneldir(vnode_t *dp, char *comp) 5070 { 5071 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNELDIR); 5072 return (lxpr_lookup_common(dp, comp, NULL, sys_kerneldir, 5073 SYS_KERNELDIRFILES)); 5074 } 5075 5076 static vnode_t * 5077 lxpr_lookup_sys_kdir_randdir(vnode_t *dp, char *comp) 5078 { 5079 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNEL_RANDDIR); 5080 return (lxpr_lookup_common(dp, comp, NULL, sys_randdir, 5081 SYS_RANDDIRFILES)); 5082 } 5083 5084 static vnode_t * 5085 lxpr_lookup_sys_netdir(vnode_t *dp, char *comp) 5086 { 5087 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NETDIR); 5088 return (lxpr_lookup_common(dp, comp, NULL, sys_netdir, 5089 SYS_NETDIRFILES)); 5090 } 5091 5092 static vnode_t * 5093 lxpr_lookup_sys_net_coredir(vnode_t *dp, char *comp) 5094 { 5095 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NET_COREDIR); 5096 return (lxpr_lookup_common(dp, comp, NULL, sys_net_coredir, 5097 SYS_NET_COREDIRFILES)); 5098 } 5099 5100 static vnode_t * 5101 lxpr_lookup_sys_vmdir(vnode_t *dp, char *comp) 5102 { 5103 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_VMDIR); 5104 return (lxpr_lookup_common(dp, comp, NULL, sys_vmdir, 5105 SYS_VMDIRFILES)); 5106 } 5107 5108 static vnode_t * 5109 lxpr_lookup_sys_fsdir(vnode_t *dp, char *comp) 5110 { 5111 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FSDIR); 5112 return (lxpr_lookup_common(dp, comp, NULL, sys_fsdir, 5113 SYS_FSDIRFILES)); 5114 } 5115 5116 static vnode_t * 5117 lxpr_lookup_sys_fs_inotifydir(vnode_t *dp, char *comp) 5118 { 5119 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FS_INOTIFYDIR); 5120 return (lxpr_lookup_common(dp, comp, NULL, sys_fs_inotifydir, 5121 SYS_FS_INOTIFYDIRFILES)); 5122 } 5123 5124 /* 5125 * lxpr_readdir(): Vnode operation for VOP_READDIR() 5126 */ 5127 /* ARGSUSED */ 5128 static int 5129 lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp, 5130 caller_context_t *ct, int flags) 5131 { 5132 lxpr_node_t *lxpnp = VTOLXP(dp); 5133 lxpr_nodetype_t type = lxpnp->lxpr_type; 5134 ssize_t uresid; 5135 off_t uoffset; 5136 int error; 5137 5138 ASSERT(dp->v_type == VDIR); 5139 ASSERT(type < LXPR_NFILES); 5140 5141 /* 5142 * we should never get here because the readdir 5143 * is done on the realvp for these nodes 5144 */ 5145 ASSERT(type != LXPR_PID_FD_FD && 5146 type != LXPR_PID_CURDIR && 5147 type != LXPR_PID_ROOTDIR); 5148 5149 /* 5150 * restrict readdir permission to owner or root 5151 */ 5152 if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0) 5153 return (error); 5154 5155 uoffset = uiop->uio_offset; 5156 uresid = uiop->uio_resid; 5157 5158 /* can't do negative reads */ 5159 if (uoffset < 0 || uresid <= 0) 5160 return (EINVAL); 5161 5162 /* can't read directory entries that don't exist! */ 5163 if (uoffset % LXPR_SDSIZE) 5164 return (ENOENT); 5165 5166 return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp)); 5167 } 5168 5169 /* ARGSUSED */ 5170 static int 5171 lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5172 { 5173 return (ENOTDIR); 5174 } 5175 5176 /* 5177 * This has the common logic for returning directory entries 5178 */ 5179 static int 5180 lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp, 5181 lxpr_dirent_t *dirtab, int dirtablen) 5182 { 5183 /* bp holds one dirent64 structure */ 5184 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; 5185 dirent64_t *dirent = (dirent64_t *)bp; 5186 ssize_t oresid; /* save a copy for testing later */ 5187 ssize_t uresid; 5188 5189 oresid = uiop->uio_resid; 5190 5191 /* clear out the dirent buffer */ 5192 bzero(bp, sizeof (bp)); 5193 5194 /* 5195 * Satisfy user request 5196 */ 5197 while ((uresid = uiop->uio_resid) > 0) { 5198 int dirindex; 5199 off_t uoffset; 5200 int reclen; 5201 int error; 5202 5203 uoffset = uiop->uio_offset; 5204 dirindex = (uoffset / LXPR_SDSIZE) - 2; 5205 5206 if (uoffset == 0) { 5207 5208 dirent->d_ino = lxpnp->lxpr_ino; 5209 dirent->d_name[0] = '.'; 5210 dirent->d_name[1] = '\0'; 5211 reclen = DIRENT64_RECLEN(1); 5212 5213 } else if (uoffset == LXPR_SDSIZE) { 5214 5215 dirent->d_ino = lxpr_parentinode(lxpnp); 5216 dirent->d_name[0] = '.'; 5217 dirent->d_name[1] = '.'; 5218 dirent->d_name[2] = '\0'; 5219 reclen = DIRENT64_RECLEN(2); 5220 5221 } else if (dirindex >= 0 && dirindex < dirtablen) { 5222 int slen = strlen(dirtab[dirindex].d_name); 5223 5224 dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type, 5225 lxpnp->lxpr_pid, 0); 5226 5227 VERIFY(slen < LXPNSIZ); 5228 (void) strcpy(dirent->d_name, dirtab[dirindex].d_name); 5229 reclen = DIRENT64_RECLEN(slen); 5230 5231 } else { 5232 /* Run out of table entries */ 5233 if (eofp) { 5234 *eofp = 1; 5235 } 5236 return (0); 5237 } 5238 5239 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); 5240 dirent->d_reclen = (ushort_t)reclen; 5241 5242 /* 5243 * if the size of the data to transfer is greater 5244 * that that requested then we can't do it this transfer. 5245 */ 5246 if (reclen > uresid) { 5247 /* 5248 * Error if no entries have been returned yet. 5249 */ 5250 if (uresid == oresid) { 5251 return (EINVAL); 5252 } 5253 break; 5254 } 5255 5256 /* 5257 * uiomove() updates both uiop->uio_resid and uiop->uio_offset 5258 * by the same amount. But we want uiop->uio_offset to change 5259 * in increments of LXPR_SDSIZE, which is different from the 5260 * number of bytes being returned to the user. So we set 5261 * uiop->uio_offset separately, ignoring what uiomove() does. 5262 */ 5263 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, 5264 uiop)) != 0) 5265 return (error); 5266 5267 uiop->uio_offset = uoffset + LXPR_SDSIZE; 5268 } 5269 5270 /* Have run out of space, but could have just done last table entry */ 5271 if (eofp) { 5272 *eofp = 5273 (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0; 5274 } 5275 return (0); 5276 } 5277 5278 5279 static int 5280 lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5281 { 5282 /* bp holds one dirent64 structure */ 5283 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; 5284 dirent64_t *dirent = (dirent64_t *)bp; 5285 ssize_t oresid; /* save a copy for testing later */ 5286 ssize_t uresid; 5287 off_t uoffset; 5288 zoneid_t zoneid; 5289 pid_t pid; 5290 int error; 5291 int ceof; 5292 5293 ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR); 5294 5295 oresid = uiop->uio_resid; 5296 zoneid = LXPTOZ(lxpnp)->zone_id; 5297 5298 /* 5299 * We return directory entries in the order: "." and ".." then the 5300 * unique lxproc files, then the directories corresponding to the 5301 * running processes. We have defined this as the ordering because 5302 * it allows us to more easily keep track of where we are betwen calls 5303 * to getdents(). If the number of processes changes between calls 5304 * then we can't lose track of where we are in the lxproc files. 5305 */ 5306 5307 /* Do the fixed entries */ 5308 error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir, 5309 PROCDIRFILES); 5310 5311 /* Finished if we got an error or if we couldn't do all the table */ 5312 if (error != 0 || ceof == 0) 5313 return (error); 5314 5315 /* clear out the dirent buffer */ 5316 bzero(bp, sizeof (bp)); 5317 5318 /* Do the process entries */ 5319 while ((uresid = uiop->uio_resid) > 0) { 5320 proc_t *p; 5321 int len; 5322 int reclen; 5323 int i; 5324 5325 uoffset = uiop->uio_offset; 5326 5327 /* 5328 * Stop when entire proc table has been examined. 5329 */ 5330 i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES; 5331 if (i < 0 || i >= v.v_proc) { 5332 /* Run out of table entries */ 5333 if (eofp) { 5334 *eofp = 1; 5335 } 5336 return (0); 5337 } 5338 mutex_enter(&pidlock); 5339 5340 /* 5341 * Skip indices for which there is no pid_entry, PIDs for 5342 * which there is no corresponding process, a PID of 0, 5343 * and anything the security policy doesn't allow 5344 * us to look at. 5345 */ 5346 if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL || 5347 p->p_pid == 0 || 5348 secpolicy_basic_procinfo(CRED(), p, curproc) != 0) { 5349 mutex_exit(&pidlock); 5350 goto next; 5351 } 5352 mutex_exit(&pidlock); 5353 5354 /* 5355 * Convert pid to the Linux default of 1 if we're the zone's 5356 * init process, or 0 if zsched, otherwise use the value from 5357 * the proc structure 5358 */ 5359 if (p->p_pid == curproc->p_zone->zone_proc_initpid) { 5360 pid = 1; 5361 } else if (p->p_pid == curproc->p_zone->zone_zsched->p_pid) { 5362 pid = 0; 5363 } else { 5364 pid = p->p_pid; 5365 } 5366 5367 /* 5368 * If this /proc was mounted in the global zone, view 5369 * all procs; otherwise, only view zone member procs. 5370 */ 5371 if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) { 5372 goto next; 5373 } 5374 5375 ASSERT(p->p_stat != 0); 5376 5377 dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0); 5378 len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid); 5379 ASSERT(len < LXPNSIZ); 5380 reclen = DIRENT64_RECLEN(len); 5381 5382 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); 5383 dirent->d_reclen = (ushort_t)reclen; 5384 5385 /* 5386 * if the size of the data to transfer is greater 5387 * that that requested then we can't do it this transfer. 5388 */ 5389 if (reclen > uresid) { 5390 /* 5391 * Error if no entries have been returned yet. 5392 */ 5393 if (uresid == oresid) 5394 return (EINVAL); 5395 break; 5396 } 5397 5398 /* 5399 * uiomove() updates both uiop->uio_resid and uiop->uio_offset 5400 * by the same amount. But we want uiop->uio_offset to change 5401 * in increments of LXPR_SDSIZE, which is different from the 5402 * number of bytes being returned to the user. So we set 5403 * uiop->uio_offset separately, in the increment of this for 5404 * the loop, ignoring what uiomove() does. 5405 */ 5406 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, 5407 uiop)) != 0) 5408 return (error); 5409 next: 5410 uiop->uio_offset = uoffset + LXPR_SDSIZE; 5411 } 5412 5413 if (eofp != NULL) { 5414 *eofp = (uiop->uio_offset >= 5415 ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0; 5416 } 5417 5418 return (0); 5419 } 5420 5421 static int 5422 lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5423 { 5424 proc_t *p; 5425 pid_t find_pid; 5426 5427 ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR); 5428 5429 /* can't read its contents if it died */ 5430 mutex_enter(&pidlock); 5431 5432 if (lxpnp->lxpr_pid == 1) { 5433 find_pid = curproc->p_zone->zone_proc_initpid; 5434 } else if (lxpnp->lxpr_pid == 0) { 5435 find_pid = curproc->p_zone->zone_zsched->p_pid; 5436 } else { 5437 find_pid = lxpnp->lxpr_pid; 5438 } 5439 p = prfind(find_pid); 5440 5441 if (p == NULL || p->p_stat == SIDL) { 5442 mutex_exit(&pidlock); 5443 return (ENOENT); 5444 } 5445 mutex_exit(&pidlock); 5446 5447 return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES)); 5448 } 5449 5450 static int 5451 lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5452 { 5453 ASSERT(lxpnp->lxpr_type == LXPR_NETDIR); 5454 return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES)); 5455 } 5456 5457 static int 5458 lxpr_readdir_taskdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5459 { 5460 /* bp holds one dirent64 structure */ 5461 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; 5462 dirent64_t *dirent = (dirent64_t *)bp; 5463 ssize_t oresid; /* save a copy for testing later */ 5464 ssize_t uresid; 5465 off_t uoffset; 5466 int error; 5467 int ceof; 5468 proc_t *p; 5469 int tiddirsize = -1; 5470 int tasknum; 5471 pid_t real_pid; 5472 kthread_t *t; 5473 boolean_t branded = B_FALSE; 5474 5475 ASSERT(lxpnp->lxpr_type == LXPR_PID_TASKDIR); 5476 5477 oresid = uiop->uio_resid; 5478 5479 real_pid = get_real_pid(lxpnp->lxpr_pid); 5480 p = lxpr_lock(real_pid); 5481 5482 /* can't read its contents if it died */ 5483 if (p == NULL) { 5484 return (ENOENT); 5485 } 5486 if (p->p_stat == SIDL) { 5487 lxpr_unlock(p); 5488 return (ENOENT); 5489 } 5490 5491 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) 5492 tiddirsize = 0; 5493 5494 branded = (p->p_brand == &lx_brand); 5495 /* 5496 * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from 5497 * going away while we iterate over its threads. 5498 */ 5499 mutex_exit(&p->p_lock); 5500 5501 if (tiddirsize == -1) 5502 tiddirsize = p->p_lwpcnt; 5503 5504 /* Do the fixed entries (in this case just "." & "..") */ 5505 error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0); 5506 5507 /* Finished if we got an error or if we couldn't do all the table */ 5508 if (error != 0 || ceof == 0) 5509 goto out; 5510 5511 if ((t = p->p_tlist) == NULL) { 5512 if (eofp != NULL) 5513 *eofp = 1; 5514 goto out; 5515 } 5516 5517 /* clear out the dirent buffer */ 5518 bzero(bp, sizeof (bp)); 5519 5520 /* 5521 * Loop until user's request is satisfied or until all thread's have 5522 * been returned. 5523 */ 5524 for (tasknum = 0; (uresid = uiop->uio_resid) > 0; tasknum++) { 5525 int i; 5526 int reclen; 5527 int len; 5528 uint_t emul_tid; 5529 lx_lwp_data_t *lwpd; 5530 5531 uoffset = uiop->uio_offset; 5532 5533 /* 5534 * Stop at the end of the thread list 5535 */ 5536 i = (uoffset / LXPR_SDSIZE) - 2; 5537 if (i < 0 || i >= tiddirsize) { 5538 if (eofp) { 5539 *eofp = 1; 5540 } 5541 goto out; 5542 } 5543 5544 if (i != tasknum) 5545 goto next; 5546 5547 if (!branded) { 5548 /* 5549 * Emulating the goofy linux task model is impossible 5550 * to do for native processes. We can compromise by 5551 * presenting only the main thread to the consumer. 5552 */ 5553 emul_tid = p->p_pid; 5554 } else { 5555 if ((lwpd = ttolxlwp(t)) == NULL) { 5556 goto next; 5557 } 5558 emul_tid = lwpd->br_pid; 5559 /* 5560 * Convert pid to Linux default of 1 if we're the 5561 * zone's init. 5562 */ 5563 if (emul_tid == curproc->p_zone->zone_proc_initpid) 5564 emul_tid = 1; 5565 } 5566 5567 dirent->d_ino = lxpr_inode(LXPR_PID_TASK_IDDIR, lxpnp->lxpr_pid, 5568 emul_tid); 5569 len = snprintf(dirent->d_name, LXPNSIZ, "%d", emul_tid); 5570 ASSERT(len < LXPNSIZ); 5571 reclen = DIRENT64_RECLEN(len); 5572 5573 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); 5574 dirent->d_reclen = (ushort_t)reclen; 5575 5576 if (reclen > uresid) { 5577 /* 5578 * Error if no entries have been returned yet. 5579 */ 5580 if (uresid == oresid) 5581 error = EINVAL; 5582 goto out; 5583 } 5584 5585 /* 5586 * uiomove() updates both uiop->uio_resid and uiop->uio_offset 5587 * by the same amount. But we want uiop->uio_offset to change 5588 * in increments of LXPR_SDSIZE, which is different from the 5589 * number of bytes being returned to the user. So we set 5590 * uiop->uio_offset separately, in the increment of this for 5591 * the loop, ignoring what uiomove() does. 5592 */ 5593 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, 5594 uiop)) != 0) 5595 goto out; 5596 5597 next: 5598 uiop->uio_offset = uoffset + LXPR_SDSIZE; 5599 5600 if ((t = t->t_forw) == p->p_tlist || !branded) { 5601 if (eofp != NULL) 5602 *eofp = 1; 5603 goto out; 5604 } 5605 } 5606 5607 if (eofp != NULL) 5608 *eofp = 0; 5609 5610 out: 5611 mutex_enter(&p->p_lock); 5612 lxpr_unlock(p); 5613 return (error); 5614 } 5615 5616 static int 5617 lxpr_readdir_task_tid_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5618 { 5619 proc_t *p; 5620 pid_t real_pid; 5621 kthread_t *t; 5622 5623 ASSERT(lxpnp->lxpr_type == LXPR_PID_TASK_IDDIR); 5624 5625 mutex_enter(&pidlock); 5626 5627 real_pid = get_real_pid(lxpnp->lxpr_pid); 5628 p = prfind(real_pid); 5629 5630 /* can't read its contents if it died */ 5631 if (p == NULL || p->p_stat == SIDL) { 5632 mutex_exit(&pidlock); 5633 return (ENOENT); 5634 } 5635 5636 mutex_exit(&pidlock); 5637 5638 /* need to confirm tid is still there */ 5639 t = lxpr_get_thread(p, lxpnp->lxpr_desc); 5640 if (t == NULL) { 5641 /* we can't find this specific thread */ 5642 return (NULL); 5643 } 5644 thread_unlock(t); 5645 5646 return (lxpr_readdir_common(lxpnp, uiop, eofp, tiddir, TIDDIRFILES)); 5647 } 5648 5649 static int 5650 lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5651 { 5652 /* bp holds one dirent64 structure */ 5653 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; 5654 dirent64_t *dirent = (dirent64_t *)bp; 5655 ssize_t oresid; /* save a copy for testing later */ 5656 ssize_t uresid; 5657 off_t uoffset; 5658 int error; 5659 int ceof; 5660 proc_t *p; 5661 int fddirsize = -1; 5662 uf_info_t *fip; 5663 5664 ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR || 5665 lxpnp->lxpr_type == LXPR_PID_TID_FDDIR); 5666 5667 oresid = uiop->uio_resid; 5668 5669 /* can't read its contents if it died */ 5670 p = lxpr_lock(lxpnp->lxpr_pid); 5671 if (p == NULL) 5672 return (ENOENT); 5673 5674 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) 5675 fddirsize = 0; 5676 5677 /* 5678 * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from 5679 * going away while we iterate over its fi_list. 5680 */ 5681 mutex_exit(&p->p_lock); 5682 5683 /* Get open file info */ 5684 fip = (&(p)->p_user.u_finfo); 5685 mutex_enter(&fip->fi_lock); 5686 5687 if (fddirsize == -1) 5688 fddirsize = fip->fi_nfiles; 5689 5690 /* Do the fixed entries (in this case just "." & "..") */ 5691 error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0); 5692 5693 /* Finished if we got an error or if we couldn't do all the table */ 5694 if (error != 0 || ceof == 0) 5695 goto out; 5696 5697 /* clear out the dirent buffer */ 5698 bzero(bp, sizeof (bp)); 5699 5700 /* 5701 * Loop until user's request is satisfied or until 5702 * all file descriptors have been examined. 5703 */ 5704 for (; (uresid = uiop->uio_resid) > 0; 5705 uiop->uio_offset = uoffset + LXPR_SDSIZE) { 5706 int reclen; 5707 int fd; 5708 int len; 5709 5710 uoffset = uiop->uio_offset; 5711 5712 /* 5713 * Stop at the end of the fd list 5714 */ 5715 fd = (uoffset / LXPR_SDSIZE) - 2; 5716 if (fd < 0 || fd >= fddirsize) { 5717 if (eofp) { 5718 *eofp = 1; 5719 } 5720 goto out; 5721 } 5722 5723 if (fip->fi_list[fd].uf_file == NULL) 5724 continue; 5725 5726 dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd); 5727 len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd); 5728 ASSERT(len < LXPNSIZ); 5729 reclen = DIRENT64_RECLEN(len); 5730 5731 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); 5732 dirent->d_reclen = (ushort_t)reclen; 5733 5734 if (reclen > uresid) { 5735 /* 5736 * Error if no entries have been returned yet. 5737 */ 5738 if (uresid == oresid) 5739 error = EINVAL; 5740 goto out; 5741 } 5742 5743 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, 5744 uiop)) != 0) 5745 goto out; 5746 } 5747 5748 if (eofp != NULL) { 5749 *eofp = 5750 (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0; 5751 } 5752 5753 out: 5754 mutex_exit(&fip->fi_lock); 5755 mutex_enter(&p->p_lock); 5756 lxpr_unlock(p); 5757 return (error); 5758 } 5759 5760 static int 5761 lxpr_readdir_sysdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5762 { 5763 ASSERT(lxpnp->lxpr_type == LXPR_SYSDIR); 5764 return (lxpr_readdir_common(lxpnp, uiop, eofp, sysdir, SYSDIRFILES)); 5765 } 5766 5767 static int 5768 lxpr_readdir_sys_fsdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5769 { 5770 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FSDIR); 5771 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fsdir, 5772 SYS_FSDIRFILES)); 5773 } 5774 5775 static int 5776 lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5777 { 5778 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFYDIR); 5779 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fs_inotifydir, 5780 SYS_FS_INOTIFYDIRFILES)); 5781 } 5782 5783 static int 5784 lxpr_readdir_sys_kerneldir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5785 { 5786 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNELDIR); 5787 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_kerneldir, 5788 SYS_KERNELDIRFILES)); 5789 } 5790 5791 static int 5792 lxpr_readdir_sys_kdir_randdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5793 { 5794 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RANDDIR); 5795 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_randdir, 5796 SYS_RANDDIRFILES)); 5797 } 5798 5799 static int 5800 lxpr_readdir_sys_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5801 { 5802 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NETDIR); 5803 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_netdir, 5804 SYS_NETDIRFILES)); 5805 } 5806 5807 static int 5808 lxpr_readdir_sys_net_coredir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5809 { 5810 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_COREDIR); 5811 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_net_coredir, 5812 SYS_NET_COREDIRFILES)); 5813 } 5814 5815 static int 5816 lxpr_readdir_sys_vmdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5817 { 5818 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VMDIR); 5819 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_vmdir, 5820 SYS_VMDIRFILES)); 5821 } 5822 5823 static int 5824 lxpr_write_sys_net_core_somaxc(lxpr_node_t *lxpnp, struct uio *uio, 5825 struct cred *cr, caller_context_t *ct) 5826 { 5827 int error; 5828 int res = 0; 5829 size_t olen; 5830 char val[16]; /* big enough for a uint numeric string */ 5831 netstack_t *ns; 5832 mod_prop_info_t *ptbl = NULL; 5833 mod_prop_info_t *pinfo = NULL; 5834 5835 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON); 5836 5837 if (uio->uio_loffset != 0) 5838 return (EINVAL); 5839 5840 if (uio->uio_resid == 0) 5841 return (0); 5842 5843 olen = uio->uio_resid; 5844 if (olen > sizeof (val) - 1) 5845 return (EINVAL); 5846 5847 bzero(val, sizeof (val)); 5848 error = uiomove(val, olen, UIO_WRITE, uio); 5849 if (error != 0) 5850 return (error); 5851 5852 if (val[olen - 1] == '\n') 5853 val[olen - 1] = '\0'; 5854 5855 if (val[0] == '\0') /* no input */ 5856 return (EINVAL); 5857 5858 ns = netstack_get_current(); 5859 if (ns == NULL) 5860 return (EINVAL); 5861 5862 ptbl = ns->netstack_tcp->tcps_propinfo_tbl; 5863 pinfo = mod_prop_lookup(ptbl, "_conn_req_max_q", MOD_PROTO_TCP); 5864 if (pinfo == NULL || pinfo->mpi_setf(ns, cr, pinfo, NULL, val, 0) != 0) 5865 res = EINVAL; 5866 5867 netstack_rele(ns); 5868 return (res); 5869 } 5870 5871 /* ARGSUSED */ 5872 static int 5873 lxpr_write_sys_kernel_corepatt(lxpr_node_t *lxpnp, struct uio *uio, 5874 struct cred *cr, caller_context_t *ct) 5875 { 5876 zone_t *zone = curproc->p_zone; 5877 struct core_globals *cg; 5878 refstr_t *rp, *nrp; 5879 corectl_path_t *ccp; 5880 char val[MAXPATHLEN]; 5881 char valtr[MAXPATHLEN]; 5882 size_t olen; 5883 int error; 5884 5885 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT); 5886 5887 cg = zone_getspecific(core_zone_key, zone); 5888 ASSERT(cg != NULL); 5889 5890 if (secpolicy_coreadm(cr) != 0) 5891 return (EPERM); 5892 5893 if (uio->uio_loffset != 0) 5894 return (EINVAL); 5895 5896 if (uio->uio_resid == 0) 5897 return (0); 5898 5899 olen = uio->uio_resid; 5900 if (olen > sizeof (val) - 1) 5901 return (EINVAL); 5902 5903 bzero(val, sizeof (val)); 5904 error = uiomove(val, olen, UIO_WRITE, uio); 5905 if (error != 0) 5906 return (error); 5907 5908 if (val[olen - 1] == '\n') 5909 val[olen - 1] = '\0'; 5910 5911 if (val[0] == '|') 5912 return (EINVAL); 5913 5914 if ((error = lxpr_core_path_l2s(val, valtr, sizeof (valtr))) != 0) 5915 return (error); 5916 5917 nrp = refstr_alloc(valtr); 5918 5919 ccp = cg->core_default_path; 5920 mutex_enter(&ccp->ccp_mtx); 5921 rp = ccp->ccp_path; 5922 refstr_hold((ccp->ccp_path = nrp)); 5923 cg->core_options |= CC_PROCESS_PATH; 5924 mutex_exit(&ccp->ccp_mtx); 5925 5926 if (rp != NULL) 5927 refstr_rele(rp); 5928 5929 return (0); 5930 } 5931 5932 /* 5933 * lxpr_readlink(): Vnode operation for VOP_READLINK() 5934 */ 5935 /* ARGSUSED */ 5936 static int 5937 lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct) 5938 { 5939 char bp[MAXPATHLEN + 1]; 5940 size_t buflen = sizeof (bp); 5941 lxpr_node_t *lxpnp = VTOLXP(vp); 5942 vnode_t *rvp = lxpnp->lxpr_realvp; 5943 pid_t pid; 5944 int error = 0; 5945 5946 /* 5947 * Linux does something very "clever" for /proc/<pid>/fd/<num> entries. 5948 * Open FDs are represented as symlinks, the link contents 5949 * corresponding to the open resource. For plain files or devices, 5950 * this isn't absurd since one can dereference the symlink to query 5951 * the underlying resource. For sockets or pipes, it becomes ugly in a 5952 * hurry. To maintain this human-readable output, those FD symlinks 5953 * point to bogus targets such as "socket:[<inodenum>]". This requires 5954 * circumventing vfs since the stat/lstat behavior on those FD entries 5955 * will be unusual. (A stat must retrieve information about the open 5956 * socket or pipe. It cannot fail because the link contents point to 5957 * an absent file.) 5958 * 5959 * To accomplish this, lxpr_getnode returns an vnode typed VNON for FD 5960 * entries. This bypasses code paths which would normally 5961 * short-circuit on symlinks and allows us to emulate the vfs behavior 5962 * expected by /proc consumers. 5963 */ 5964 if (vp->v_type != VLNK && lxpnp->lxpr_type != LXPR_PID_FD_FD) 5965 return (EINVAL); 5966 5967 /* Try to produce a symlink name for anything that has a realvp */ 5968 if (rvp != NULL) { 5969 if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0) 5970 return (error); 5971 if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) { 5972 /* 5973 * Special handling possible for /proc/<pid>/fd/<num> 5974 * Generate <type>:[<inode>] links, if allowed. 5975 */ 5976 if (lxpnp->lxpr_type != LXPR_PID_FD_FD || 5977 lxpr_readlink_fdnode(lxpnp, bp, buflen) != 0) { 5978 return (error); 5979 } 5980 } 5981 } else { 5982 switch (lxpnp->lxpr_type) { 5983 case LXPR_SELF: 5984 /* 5985 * Convert pid to the Linux default of 1 if we're the 5986 * zone's init process or 0 if zsched. 5987 */ 5988 if (curproc->p_pid == 5989 curproc->p_zone->zone_proc_initpid) { 5990 pid = 1; 5991 } else if (curproc->p_pid == 5992 curproc->p_zone->zone_zsched->p_pid) { 5993 pid = 0; 5994 } else { 5995 pid = curproc->p_pid; 5996 } 5997 5998 /* 5999 * Don't need to check result as every possible int 6000 * will fit within MAXPATHLEN bytes. 6001 */ 6002 (void) snprintf(bp, buflen, "%d", pid); 6003 break; 6004 case LXPR_PID_CURDIR: 6005 case LXPR_PID_ROOTDIR: 6006 case LXPR_PID_EXE: 6007 return (EACCES); 6008 default: 6009 /* 6010 * Need to return error so that nothing thinks 6011 * that the symlink is empty and hence "." 6012 */ 6013 return (EINVAL); 6014 } 6015 } 6016 6017 /* copy the link data to user space */ 6018 return (uiomove(bp, strlen(bp), UIO_READ, uiop)); 6019 } 6020 6021 6022 /* 6023 * lxpr_inactive(): Vnode operation for VOP_INACTIVE() 6024 * Vnode is no longer referenced, deallocate the file 6025 * and all its resources. 6026 */ 6027 /* ARGSUSED */ 6028 static void 6029 lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 6030 { 6031 lxpr_freenode(VTOLXP(vp)); 6032 } 6033 6034 /* 6035 * lxpr_sync(): Vnode operation for VOP_SYNC() 6036 */ 6037 static int 6038 lxpr_sync() 6039 { 6040 /* 6041 * Nothing to sync but this function must never fail 6042 */ 6043 return (0); 6044 } 6045 6046 /* 6047 * lxpr_cmp(): Vnode operation for VOP_CMP() 6048 */ 6049 static int 6050 lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct) 6051 { 6052 vnode_t *rvp; 6053 6054 while (vn_matchops(vp1, lxpr_vnodeops) && 6055 (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) { 6056 vp1 = rvp; 6057 } 6058 6059 while (vn_matchops(vp2, lxpr_vnodeops) && 6060 (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) { 6061 vp2 = rvp; 6062 } 6063 6064 if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops)) 6065 return (vp1 == vp2); 6066 return (VOP_CMP(vp1, vp2, ct)); 6067 } 6068 6069 /* 6070 * lxpr_realvp(): Vnode operation for VOP_REALVP() 6071 */ 6072 static int 6073 lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct) 6074 { 6075 vnode_t *rvp; 6076 6077 if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) { 6078 vp = rvp; 6079 if (VOP_REALVP(vp, &rvp, ct) == 0) 6080 vp = rvp; 6081 } 6082 6083 *vpp = vp; 6084 return (0); 6085 } 6086 6087 static int 6088 lxpr_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 6089 caller_context_t *ct) 6090 { 6091 lxpr_node_t *lxpnp = VTOLXP(vp); 6092 lxpr_nodetype_t type = lxpnp->lxpr_type; 6093 6094 switch (type) { 6095 case LXPR_SYS_KERNEL_COREPATT: 6096 return (lxpr_write_sys_kernel_corepatt(lxpnp, uiop, cr, ct)); 6097 case LXPR_SYS_NET_CORE_SOMAXCON: 6098 return (lxpr_write_sys_net_core_somaxc(lxpnp, uiop, cr, ct)); 6099 6100 default: 6101 /* pretend we wrote the whole thing */ 6102 uiop->uio_offset += uiop->uio_resid; 6103 uiop->uio_resid = 0; 6104 return (0); 6105 } 6106 } 6107 6108 /* 6109 * We need to allow open with O_CREAT for the oom_score_adj file. 6110 */ 6111 /*ARGSUSED7*/ 6112 static int 6113 lxpr_create(struct vnode *dvp, char *nm, struct vattr *vap, 6114 enum vcexcl exclusive, int mode, struct vnode **vpp, struct cred *cred, 6115 int flag, caller_context_t *ct, vsecattr_t *vsecp) 6116 { 6117 lxpr_node_t *lxpnp = VTOLXP(dvp); 6118 lxpr_nodetype_t type = lxpnp->lxpr_type; 6119 vnode_t *vp = NULL; 6120 int error; 6121 6122 ASSERT(type < LXPR_NFILES); 6123 6124 /* 6125 * restrict create permission to owner or root 6126 */ 6127 if ((error = lxpr_access(dvp, VEXEC, 0, cred, ct)) != 0) { 6128 return (error); 6129 } 6130 6131 if (*nm == '\0') 6132 return (EPERM); 6133 6134 if (dvp->v_type != VDIR) 6135 return (EPERM); 6136 6137 if (exclusive == EXCL) 6138 return (EEXIST); 6139 6140 /* 6141 * We're currently restricting O_CREAT to: 6142 * - /proc/<pid>/fd/<num> 6143 * - /proc/<pid>/oom_score_adj 6144 * - /proc/<pid>/task/<tid>/fd/<num> 6145 * - /proc/<pid>/task/<tid>/oom_score_adj 6146 * - /proc/sys/kernel/core_pattern 6147 * - /proc/sys/net/core/somaxconn 6148 * - /proc/sys/vm/overcommit_memory 6149 * - /proc/sys/vm/swappiness 6150 */ 6151 switch (type) { 6152 case LXPR_PIDDIR: 6153 case LXPR_PID_TASK_IDDIR: 6154 if (strcmp(nm, "oom_score_adj") == 0) { 6155 proc_t *p; 6156 p = lxpr_lock(lxpnp->lxpr_pid); 6157 if (p != NULL) { 6158 vp = lxpr_lookup_common(dvp, nm, p, piddir, 6159 PIDDIRFILES); 6160 } 6161 lxpr_unlock(p); 6162 } 6163 break; 6164 6165 case LXPR_SYS_NET_COREDIR: 6166 if (strcmp(nm, "somaxconn") == 0) { 6167 vp = lxpr_lookup_common(dvp, nm, NULL, sys_net_coredir, 6168 SYS_NET_COREDIRFILES); 6169 } 6170 break; 6171 6172 case LXPR_SYS_KERNELDIR: 6173 if (strcmp(nm, "core_pattern") == 0) { 6174 vp = lxpr_lookup_common(dvp, nm, NULL, sys_kerneldir, 6175 SYS_KERNELDIRFILES); 6176 } 6177 break; 6178 6179 case LXPR_SYS_VMDIR: 6180 if (strcmp(nm, "overcommit_memory") == 0 || 6181 strcmp(nm, "swappiness") == 0) { 6182 vp = lxpr_lookup_common(dvp, nm, NULL, sys_vmdir, 6183 SYS_VMDIRFILES); 6184 } 6185 break; 6186 6187 case LXPR_PID_FDDIR: 6188 case LXPR_PID_TID_FDDIR: 6189 vp = lxpr_lookup_fdnode(dvp, nm); 6190 break; 6191 6192 default: 6193 vp = NULL; 6194 break; 6195 } 6196 6197 if (vp != NULL) { 6198 /* Creating an existing file, allow it for regular files. */ 6199 if (vp->v_type == VDIR) 6200 return (EISDIR); 6201 6202 /* confirm permissions against existing file */ 6203 if ((error = lxpr_access(vp, mode, 0, cred, ct)) != 0) { 6204 VN_RELE(vp); 6205 return (error); 6206 } 6207 6208 *vpp = vp; 6209 return (0); 6210 } 6211 6212 /* 6213 * Linux proc does not allow creation of addition, non-subsystem 6214 * specific files inside the hierarchy. ENOENT is tossed when such 6215 * actions are attempted. 6216 */ 6217 return (ENOENT); 6218 }