1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2016 Joyent, Inc. 25 */ 26 27 /* 28 * lx_proc -- a Linux-compatible /proc for the LX brand 29 * 30 * We have -- confusingly -- two implementations of Linux /proc. One is to 31 * support native (but Linux-borne) programs that wish to view the native 32 * system through the Linux /proc model; the other -- this one -- is to 33 * support Linux binaries via the LX brand. These two implementations differ 34 * greatly in their aspirations (and their willingness to bend the truth 35 * of the system to accommodate those aspirations); they should not be unified. 36 */ 37 38 #include <sys/cpupart.h> 39 #include <sys/cpuvar.h> 40 #include <sys/session.h> 41 #include <sys/vmparam.h> 42 #include <sys/mman.h> 43 #include <vm/rm.h> 44 #include <vm/seg_vn.h> 45 #include <sys/sdt.h> 46 #include <lx_signum.h> 47 #include <sys/strlog.h> 48 #include <sys/stropts.h> 49 #include <sys/cmn_err.h> 50 #include <sys/lx_brand.h> 51 #include <lx_auxv.h> 52 #include <sys/x86_archext.h> 53 #include <sys/archsystm.h> 54 #include <sys/fp.h> 55 #include <sys/pool_pset.h> 56 #include <sys/pset.h> 57 #include <sys/zone.h> 58 #include <sys/pghw.h> 59 #include <sys/vfs_opreg.h> 60 #include <sys/param.h> 61 #include <sys/utsname.h> 62 #include <sys/rctl.h> 63 #include <sys/kstat.h> 64 #include <sys/lx_misc.h> 65 #include <sys/brand.h> 66 #include <sys/cred_impl.h> 67 #include <sys/tihdr.h> 68 #include <sys/corectl.h> 69 #include <inet/ip.h> 70 #include <inet/ip_ire.h> 71 #include <inet/ip6.h> 72 #include <inet/ip_if.h> 73 #include <inet/tcp.h> 74 #include <inet/tcp_impl.h> 75 #include <inet/udp_impl.h> 76 #include <inet/ipclassifier.h> 77 #include <sys/socketvar.h> 78 #include <fs/sockfs/socktpi.h> 79 80 /* Dependent on procfs */ 81 extern kthread_t *prchoose(proc_t *); 82 extern int prreadargv(proc_t *, char *, size_t, size_t *); 83 extern int prreadenvv(proc_t *, char *, size_t, size_t *); 84 extern int prreadbuf(proc_t *, uintptr_t, uint8_t *, size_t, size_t *); 85 86 #include "lx_proc.h" 87 88 extern pgcnt_t swapfs_minfree; 89 extern time_t boot_time; 90 91 /* 92 * Pointer to the vnode ops vector for this fs. 93 * This is instantiated in lxprinit() in lxpr_vfsops.c 94 */ 95 vnodeops_t *lxpr_vnodeops; 96 97 static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *); 98 static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *, 99 caller_context_t *); 100 static int lxpr_create(struct vnode *, char *, struct vattr *, enum vcexcl, 101 int, struct vnode **, struct cred *, int, caller_context_t *, vsecattr_t *); 102 static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *); 103 static int lxpr_write(vnode_t *, uio_t *, int, cred_t *, caller_context_t *); 104 static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *, 105 caller_context_t *); 106 static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *); 107 static int lxpr_lookup(vnode_t *, char *, vnode_t **, 108 pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *, 109 pathname_t *); 110 static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *, 111 caller_context_t *, int); 112 static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *); 113 static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *); 114 static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *); 115 static int lxpr_sync(void); 116 static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *); 117 118 static vnode_t *lxpr_lookup_procdir(vnode_t *, char *); 119 static vnode_t *lxpr_lookup_piddir(vnode_t *, char *); 120 static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *); 121 static vnode_t *lxpr_lookup_fddir(vnode_t *, char *); 122 static vnode_t *lxpr_lookup_netdir(vnode_t *, char *); 123 static vnode_t *lxpr_lookup_sysdir(vnode_t *, char *); 124 static vnode_t *lxpr_lookup_sys_fsdir(vnode_t *, char *); 125 static vnode_t *lxpr_lookup_sys_fs_inotifydir(vnode_t *, char *); 126 static vnode_t *lxpr_lookup_sys_kerneldir(vnode_t *, char *); 127 static vnode_t *lxpr_lookup_sys_kdir_randdir(vnode_t *, char *); 128 static vnode_t *lxpr_lookup_sys_netdir(vnode_t *, char *); 129 static vnode_t *lxpr_lookup_sys_net_coredir(vnode_t *, char *); 130 static vnode_t *lxpr_lookup_sys_vmdir(vnode_t *, char *); 131 static vnode_t *lxpr_lookup_taskdir(vnode_t *, char *); 132 static vnode_t *lxpr_lookup_task_tid_dir(vnode_t *, char *); 133 134 static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *); 135 static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *); 136 static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *); 137 static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *); 138 static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *); 139 static int lxpr_readdir_sysdir(lxpr_node_t *, uio_t *, int *); 140 static int lxpr_readdir_sys_fsdir(lxpr_node_t *, uio_t *, int *); 141 static int lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *, uio_t *, int *); 142 static int lxpr_readdir_sys_kerneldir(lxpr_node_t *, uio_t *, int *); 143 static int lxpr_readdir_sys_kdir_randdir(lxpr_node_t *, uio_t *, int *); 144 static int lxpr_readdir_sys_netdir(lxpr_node_t *, uio_t *, int *); 145 static int lxpr_readdir_sys_net_coredir(lxpr_node_t *, uio_t *, int *); 146 static int lxpr_readdir_sys_vmdir(lxpr_node_t *, uio_t *, int *); 147 static int lxpr_readdir_taskdir(lxpr_node_t *, uio_t *, int *); 148 static int lxpr_readdir_task_tid_dir(lxpr_node_t *, uio_t *, int *); 149 150 static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *); 151 static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *); 152 static void lxpr_read_cgroups(lxpr_node_t *, lxpr_uiobuf_t *); 153 static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *); 154 static void lxpr_read_diskstats(lxpr_node_t *, lxpr_uiobuf_t *); 155 static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *); 156 static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *); 157 static void lxpr_read_filesystems(lxpr_node_t *, lxpr_uiobuf_t *); 158 static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *, ldi_handle_t); 159 static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *); 160 static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *); 161 static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *); 162 static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *); 163 static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *); 164 static void lxpr_read_swaps(lxpr_node_t *, lxpr_uiobuf_t *); 165 static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *); 166 static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *); 167 168 static void lxpr_read_pid_auxv(lxpr_node_t *, lxpr_uiobuf_t *); 169 static void lxpr_read_pid_cgroup(lxpr_node_t *, lxpr_uiobuf_t *); 170 static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *); 171 static void lxpr_read_pid_comm(lxpr_node_t *, lxpr_uiobuf_t *); 172 static void lxpr_read_pid_env(lxpr_node_t *, lxpr_uiobuf_t *); 173 static void lxpr_read_pid_limits(lxpr_node_t *, lxpr_uiobuf_t *); 174 static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *); 175 static void lxpr_read_pid_mountinfo(lxpr_node_t *, lxpr_uiobuf_t *); 176 static void lxpr_read_pid_oom_scr_adj(lxpr_node_t *, lxpr_uiobuf_t *); 177 static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *); 178 static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *); 179 static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *); 180 181 static void lxpr_read_pid_tid_stat(lxpr_node_t *, lxpr_uiobuf_t *); 182 static void lxpr_read_pid_tid_status(lxpr_node_t *, lxpr_uiobuf_t *); 183 184 static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *); 185 static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *); 186 static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *); 187 static void lxpr_read_net_if_inet6(lxpr_node_t *, lxpr_uiobuf_t *); 188 static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *); 189 static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *); 190 static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *); 191 static void lxpr_read_net_ipv6_route(lxpr_node_t *, lxpr_uiobuf_t *); 192 static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *); 193 static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *); 194 static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *); 195 static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *); 196 static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *); 197 static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *); 198 static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *); 199 static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *); 200 static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *); 201 static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *); 202 static void lxpr_read_net_tcp6(lxpr_node_t *, lxpr_uiobuf_t *); 203 static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *); 204 static void lxpr_read_net_udp6(lxpr_node_t *, lxpr_uiobuf_t *); 205 static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *); 206 static void lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *, 207 lxpr_uiobuf_t *); 208 static void lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *, 209 lxpr_uiobuf_t *); 210 static void lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *, 211 lxpr_uiobuf_t *); 212 static void lxpr_read_sys_kernel_caplcap(lxpr_node_t *, lxpr_uiobuf_t *); 213 static void lxpr_read_sys_kernel_corepatt(lxpr_node_t *, lxpr_uiobuf_t *); 214 static void lxpr_read_sys_kernel_hostname(lxpr_node_t *, lxpr_uiobuf_t *); 215 static void lxpr_read_sys_kernel_msgmni(lxpr_node_t *, lxpr_uiobuf_t *); 216 static void lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *, lxpr_uiobuf_t *); 217 static void lxpr_read_sys_kernel_osrel(lxpr_node_t *, lxpr_uiobuf_t *); 218 static void lxpr_read_sys_kernel_pid_max(lxpr_node_t *, lxpr_uiobuf_t *); 219 static void lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *, lxpr_uiobuf_t *); 220 static void lxpr_read_sys_kernel_shmmax(lxpr_node_t *, lxpr_uiobuf_t *); 221 static void lxpr_read_sys_kernel_threads_max(lxpr_node_t *, lxpr_uiobuf_t *); 222 static void lxpr_read_sys_net_core_somaxc(lxpr_node_t *, lxpr_uiobuf_t *); 223 static void lxpr_read_sys_vm_minfr_kb(lxpr_node_t *, lxpr_uiobuf_t *); 224 static void lxpr_read_sys_vm_nhpages(lxpr_node_t *, lxpr_uiobuf_t *); 225 static void lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *, lxpr_uiobuf_t *); 226 static void lxpr_read_sys_vm_swappiness(lxpr_node_t *, lxpr_uiobuf_t *); 227 228 static int lxpr_write_sys_net_core_somaxc(lxpr_node_t *, uio_t *, cred_t *, 229 caller_context_t *); 230 static int lxpr_write_sys_kernel_corepatt(lxpr_node_t *, uio_t *, cred_t *, 231 caller_context_t *); 232 233 /* 234 * Simple conversion 235 */ 236 #define btok(x) ((x) >> 10) /* bytes to kbytes */ 237 #define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */ 238 239 #define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t)) 240 241 extern rctl_hndl_t rc_zone_msgmni; 242 extern rctl_hndl_t rc_zone_shmmax; 243 #define FOURGB 4294967295 244 245 /* 246 * The maximum length of the concatenation of argument vector strings we 247 * will return to the user via the branded procfs. Likewise for the env vector. 248 */ 249 int lxpr_maxargvlen = 4096; 250 int lxpr_maxenvvlen = 4096; 251 252 /* 253 * The lx /proc vnode operations vector 254 */ 255 const fs_operation_def_t lxpr_vnodeops_template[] = { 256 VOPNAME_OPEN, { .vop_open = lxpr_open }, 257 VOPNAME_CLOSE, { .vop_close = lxpr_close }, 258 VOPNAME_READ, { .vop_read = lxpr_read }, 259 VOPNAME_WRITE, { .vop_read = lxpr_write }, 260 VOPNAME_GETATTR, { .vop_getattr = lxpr_getattr }, 261 VOPNAME_ACCESS, { .vop_access = lxpr_access }, 262 VOPNAME_LOOKUP, { .vop_lookup = lxpr_lookup }, 263 VOPNAME_CREATE, { .vop_create = lxpr_create }, 264 VOPNAME_READDIR, { .vop_readdir = lxpr_readdir }, 265 VOPNAME_READLINK, { .vop_readlink = lxpr_readlink }, 266 VOPNAME_FSYNC, { .error = lxpr_sync }, 267 VOPNAME_SEEK, { .error = lxpr_sync }, 268 VOPNAME_INACTIVE, { .vop_inactive = lxpr_inactive }, 269 VOPNAME_CMP, { .vop_cmp = lxpr_cmp }, 270 VOPNAME_REALVP, { .vop_realvp = lxpr_realvp }, 271 NULL, NULL 272 }; 273 274 275 /* 276 * file contents of an lx /proc directory. 277 */ 278 static lxpr_dirent_t lx_procdir[] = { 279 { LXPR_CGROUPS, "cgroups" }, 280 { LXPR_CMDLINE, "cmdline" }, 281 { LXPR_CPUINFO, "cpuinfo" }, 282 { LXPR_DEVICES, "devices" }, 283 { LXPR_DISKSTATS, "diskstats" }, 284 { LXPR_DMA, "dma" }, 285 { LXPR_FILESYSTEMS, "filesystems" }, 286 { LXPR_INTERRUPTS, "interrupts" }, 287 { LXPR_IOPORTS, "ioports" }, 288 { LXPR_KCORE, "kcore" }, 289 { LXPR_KMSG, "kmsg" }, 290 { LXPR_LOADAVG, "loadavg" }, 291 { LXPR_MEMINFO, "meminfo" }, 292 { LXPR_MODULES, "modules" }, 293 { LXPR_MOUNTS, "mounts" }, 294 { LXPR_NETDIR, "net" }, 295 { LXPR_PARTITIONS, "partitions" }, 296 { LXPR_SELF, "self" }, 297 { LXPR_STAT, "stat" }, 298 { LXPR_SWAPS, "swaps" }, 299 { LXPR_SYSDIR, "sys" }, 300 { LXPR_UPTIME, "uptime" }, 301 { LXPR_VERSION, "version" } 302 }; 303 304 #define PROCDIRFILES (sizeof (lx_procdir) / sizeof (lx_procdir[0])) 305 306 /* 307 * Contents of an lx /proc/<pid> directory. 308 */ 309 static lxpr_dirent_t piddir[] = { 310 { LXPR_PID_AUXV, "auxv" }, 311 { LXPR_PID_CGROUP, "cgroup" }, 312 { LXPR_PID_CMDLINE, "cmdline" }, 313 { LXPR_PID_COMM, "comm" }, 314 { LXPR_PID_CPU, "cpu" }, 315 { LXPR_PID_CURDIR, "cwd" }, 316 { LXPR_PID_ENV, "environ" }, 317 { LXPR_PID_EXE, "exe" }, 318 { LXPR_PID_LIMITS, "limits" }, 319 { LXPR_PID_MAPS, "maps" }, 320 { LXPR_PID_MEM, "mem" }, 321 { LXPR_PID_MOUNTINFO, "mountinfo" }, 322 { LXPR_PID_OOM_SCR_ADJ, "oom_score_adj" }, 323 { LXPR_PID_ROOTDIR, "root" }, 324 { LXPR_PID_STAT, "stat" }, 325 { LXPR_PID_STATM, "statm" }, 326 { LXPR_PID_STATUS, "status" }, 327 { LXPR_PID_TASKDIR, "task" }, 328 { LXPR_PID_FDDIR, "fd" } 329 }; 330 331 #define PIDDIRFILES (sizeof (piddir) / sizeof (piddir[0])) 332 333 /* 334 * Contents of an lx /proc/<pid>/task/<tid> directory. 335 */ 336 static lxpr_dirent_t tiddir[] = { 337 { LXPR_PID_TID_AUXV, "auxv" }, 338 { LXPR_PID_CGROUP, "cgroup" }, 339 { LXPR_PID_CMDLINE, "cmdline" }, 340 { LXPR_PID_TID_COMM, "comm" }, 341 { LXPR_PID_CPU, "cpu" }, 342 { LXPR_PID_CURDIR, "cwd" }, 343 { LXPR_PID_ENV, "environ" }, 344 { LXPR_PID_EXE, "exe" }, 345 { LXPR_PID_LIMITS, "limits" }, 346 { LXPR_PID_MAPS, "maps" }, 347 { LXPR_PID_MEM, "mem" }, 348 { LXPR_PID_MOUNTINFO, "mountinfo" }, 349 { LXPR_PID_TID_OOM_SCR_ADJ, "oom_score_adj" }, 350 { LXPR_PID_ROOTDIR, "root" }, 351 { LXPR_PID_TID_STAT, "stat" }, 352 { LXPR_PID_STATM, "statm" }, 353 { LXPR_PID_TID_STATUS, "status" }, 354 { LXPR_PID_FDDIR, "fd" } 355 }; 356 357 #define TIDDIRFILES (sizeof (tiddir) / sizeof (tiddir[0])) 358 359 #define LX_RLIM_INFINITY 0xFFFFFFFFFFFFFFFF 360 361 #define RCTL_INFINITE(x) \ 362 ((x->rcv_flagaction & RCTL_LOCAL_MAXIMAL) && \ 363 (x->rcv_flagaction & RCTL_GLOBAL_INFINITE)) 364 365 typedef struct lxpr_rlimtab { 366 char *rlim_name; /* limit name */ 367 char *rlim_unit; /* limit unit */ 368 char *rlim_rctl; /* rctl source */ 369 } lxpr_rlimtab_t; 370 371 static lxpr_rlimtab_t lxpr_rlimtab[] = { 372 { "Max cpu time", "seconds", "process.max-cpu-time" }, 373 { "Max file size", "bytes", "process.max-file-size" }, 374 { "Max data size", "bytes", "process.max-data-size" }, 375 { "Max stack size", "bytes", "process.max-stack-size" }, 376 { "Max core file size", "bytes", "process.max-core-size" }, 377 { "Max resident set", "bytes", "zone.max-physical-memory" }, 378 { "Max processes", "processes", "zone.max-lwps" }, 379 { "Max open files", "files", "process.max-file-descriptor" }, 380 { "Max locked memory", "bytes", "zone.max-locked-memory" }, 381 { "Max address space", "bytes", "process.max-address-space" }, 382 { "Max file locks", "locks", NULL }, 383 { "Max pending signals", "signals", 384 "process.max-sigqueue-size" }, 385 { "Max msgqueue size", "bytes", "process.max-msg-messages" }, 386 { NULL, NULL, NULL } 387 }; 388 389 390 /* 391 * contents of lx /proc/net directory 392 */ 393 static lxpr_dirent_t netdir[] = { 394 { LXPR_NET_ARP, "arp" }, 395 { LXPR_NET_DEV, "dev" }, 396 { LXPR_NET_DEV_MCAST, "dev_mcast" }, 397 { LXPR_NET_IF_INET6, "if_inet6" }, 398 { LXPR_NET_IGMP, "igmp" }, 399 { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" }, 400 { LXPR_NET_IP_MR_VIF, "ip_mr_vif" }, 401 { LXPR_NET_IPV6_ROUTE, "ipv6_route" }, 402 { LXPR_NET_MCFILTER, "mcfilter" }, 403 { LXPR_NET_NETSTAT, "netstat" }, 404 { LXPR_NET_RAW, "raw" }, 405 { LXPR_NET_ROUTE, "route" }, 406 { LXPR_NET_RPC, "rpc" }, 407 { LXPR_NET_RT_CACHE, "rt_cache" }, 408 { LXPR_NET_SOCKSTAT, "sockstat" }, 409 { LXPR_NET_SNMP, "snmp" }, 410 { LXPR_NET_STAT, "stat" }, 411 { LXPR_NET_TCP, "tcp" }, 412 { LXPR_NET_TCP6, "tcp6" }, 413 { LXPR_NET_UDP, "udp" }, 414 { LXPR_NET_UDP6, "udp6" }, 415 { LXPR_NET_UNIX, "unix" } 416 }; 417 418 #define NETDIRFILES (sizeof (netdir) / sizeof (netdir[0])) 419 420 /* 421 * contents of /proc/sys directory 422 */ 423 static lxpr_dirent_t sysdir[] = { 424 { LXPR_SYS_FSDIR, "fs" }, 425 { LXPR_SYS_KERNELDIR, "kernel" }, 426 { LXPR_SYS_NETDIR, "net" }, 427 { LXPR_SYS_VMDIR, "vm" }, 428 }; 429 430 #define SYSDIRFILES (sizeof (sysdir) / sizeof (sysdir[0])) 431 432 /* 433 * contents of /proc/sys/fs directory 434 */ 435 static lxpr_dirent_t sys_fsdir[] = { 436 { LXPR_SYS_FS_INOTIFYDIR, "inotify" }, 437 }; 438 439 #define SYS_FSDIRFILES (sizeof (sys_fsdir) / sizeof (sys_fsdir[0])) 440 441 /* 442 * contents of /proc/sys/fs/inotify directory 443 */ 444 static lxpr_dirent_t sys_fs_inotifydir[] = { 445 { LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS, "max_queued_events" }, 446 { LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES, "max_user_instances" }, 447 { LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES, "max_user_watches" }, 448 }; 449 450 #define SYS_FS_INOTIFYDIRFILES \ 451 (sizeof (sys_fs_inotifydir) / sizeof (sys_fs_inotifydir[0])) 452 453 /* 454 * contents of /proc/sys/kernel directory 455 */ 456 static lxpr_dirent_t sys_kerneldir[] = { 457 { LXPR_SYS_KERNEL_CAPLCAP, "cap_last_cap" }, 458 { LXPR_SYS_KERNEL_COREPATT, "core_pattern" }, 459 { LXPR_SYS_KERNEL_HOSTNAME, "hostname" }, 460 { LXPR_SYS_KERNEL_MSGMNI, "msgmni" }, 461 { LXPR_SYS_KERNEL_NGROUPS_MAX, "ngroups_max" }, 462 { LXPR_SYS_KERNEL_OSREL, "osrelease" }, 463 { LXPR_SYS_KERNEL_PID_MAX, "pid_max" }, 464 { LXPR_SYS_KERNEL_RANDDIR, "random" }, 465 { LXPR_SYS_KERNEL_SHMMAX, "shmmax" }, 466 { LXPR_SYS_KERNEL_THREADS_MAX, "threads-max" }, 467 }; 468 469 #define SYS_KERNELDIRFILES (sizeof (sys_kerneldir) / sizeof (sys_kerneldir[0])) 470 471 /* 472 * contents of /proc/sys/kernel/random directory 473 */ 474 static lxpr_dirent_t sys_randdir[] = { 475 { LXPR_SYS_KERNEL_RAND_BOOTID, "boot_id" }, 476 }; 477 478 #define SYS_RANDDIRFILES (sizeof (sys_randdir) / sizeof (sys_randdir[0])) 479 480 /* 481 * contents of /proc/sys/net directory 482 */ 483 static lxpr_dirent_t sys_netdir[] = { 484 { LXPR_SYS_NET_COREDIR, "core" }, 485 }; 486 487 #define SYS_NETDIRFILES (sizeof (sys_netdir) / sizeof (sys_netdir[0])) 488 489 /* 490 * contents of /proc/sys/net/core directory 491 */ 492 static lxpr_dirent_t sys_net_coredir[] = { 493 { LXPR_SYS_NET_CORE_SOMAXCON, "somaxconn" }, 494 }; 495 496 #define SYS_NET_COREDIRFILES \ 497 (sizeof (sys_net_coredir) / sizeof (sys_net_coredir[0])) 498 499 /* 500 * contents of /proc/sys/vm directory 501 */ 502 static lxpr_dirent_t sys_vmdir[] = { 503 { LXPR_SYS_VM_MINFR_KB, "min_free_kbytes" }, 504 { LXPR_SYS_VM_NHUGEP, "nr_hugepages" }, 505 { LXPR_SYS_VM_OVERCOMMIT_MEM, "overcommit_memory" }, 506 { LXPR_SYS_VM_SWAPPINESS, "swappiness" }, 507 }; 508 509 #define SYS_VMDIRFILES (sizeof (sys_vmdir) / sizeof (sys_vmdir[0])) 510 511 /* 512 * lxpr_open(): Vnode operation for VOP_OPEN() 513 */ 514 static int 515 lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 516 { 517 vnode_t *vp = *vpp; 518 lxpr_node_t *lxpnp = VTOLXP(vp); 519 lxpr_nodetype_t type = lxpnp->lxpr_type; 520 vnode_t *rvp; 521 int error = 0; 522 523 if (flag & FWRITE) { 524 /* Restrict writes to certain files */ 525 switch (type) { 526 case LXPR_PID_OOM_SCR_ADJ: 527 case LXPR_PID_TID_OOM_SCR_ADJ: 528 case LXPR_SYS_KERNEL_COREPATT: 529 case LXPR_SYS_NET_CORE_SOMAXCON: 530 case LXPR_SYS_VM_OVERCOMMIT_MEM: 531 case LXPR_SYS_VM_SWAPPINESS: 532 case LXPR_PID_FD_FD: 533 case LXPR_PID_TID_FD_FD: 534 break; 535 default: 536 return (EPERM); 537 } 538 } 539 540 /* 541 * If we are opening an underlying file only allow regular files, 542 * fifos or sockets; reject the open for anything else. 543 * Just do it if we are opening the current or root directory. 544 */ 545 if (lxpnp->lxpr_realvp != NULL) { 546 rvp = lxpnp->lxpr_realvp; 547 548 if (type == LXPR_PID_FD_FD && rvp->v_type != VREG && 549 rvp->v_type != VFIFO && rvp->v_type != VSOCK) { 550 error = EACCES; 551 } else { 552 if (type == LXPR_PID_FD_FD && rvp->v_type == VFIFO) { 553 /* 554 * This flag lets the fifo open know that 555 * we're using proc/fd to open a fd which we 556 * already have open. Otherwise, the fifo might 557 * reject an open if the other end has closed. 558 */ 559 flag |= FKLYR; 560 } 561 /* 562 * Need to hold rvp since VOP_OPEN() may release it. 563 */ 564 VN_HOLD(rvp); 565 error = VOP_OPEN(&rvp, flag, cr, ct); 566 if (error) { 567 VN_RELE(rvp); 568 } else { 569 *vpp = rvp; 570 VN_RELE(vp); 571 } 572 } 573 } 574 575 return (error); 576 } 577 578 579 /* 580 * lxpr_close(): Vnode operation for VOP_CLOSE() 581 */ 582 /* ARGSUSED */ 583 static int 584 lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 585 caller_context_t *ct) 586 { 587 lxpr_node_t *lxpr = VTOLXP(vp); 588 lxpr_nodetype_t type = lxpr->lxpr_type; 589 590 /* 591 * we should never get here because the close is done on the realvp 592 * for these nodes 593 */ 594 ASSERT(type != LXPR_PID_FD_FD && 595 type != LXPR_PID_CURDIR && 596 type != LXPR_PID_ROOTDIR && 597 type != LXPR_PID_EXE); 598 599 return (0); 600 } 601 602 static void (*lxpr_read_function[LXPR_NFILES])() = { 603 lxpr_read_isdir, /* /proc */ 604 lxpr_read_isdir, /* /proc/<pid> */ 605 lxpr_read_pid_auxv, /* /proc/<pid>/auxv */ 606 lxpr_read_pid_cgroup, /* /proc/<pid>/cgroup */ 607 lxpr_read_pid_cmdline, /* /proc/<pid>/cmdline */ 608 lxpr_read_pid_comm, /* /proc/<pid>/comm */ 609 lxpr_read_empty, /* /proc/<pid>/cpu */ 610 lxpr_read_invalid, /* /proc/<pid>/cwd */ 611 lxpr_read_pid_env, /* /proc/<pid>/environ */ 612 lxpr_read_invalid, /* /proc/<pid>/exe */ 613 lxpr_read_pid_limits, /* /proc/<pid>/limits */ 614 lxpr_read_pid_maps, /* /proc/<pid>/maps */ 615 lxpr_read_empty, /* /proc/<pid>/mem */ 616 lxpr_read_pid_mountinfo, /* /proc/<pid>/mountinfo */ 617 lxpr_read_pid_oom_scr_adj, /* /proc/<pid>/oom_score_adj */ 618 lxpr_read_invalid, /* /proc/<pid>/root */ 619 lxpr_read_pid_stat, /* /proc/<pid>/stat */ 620 lxpr_read_pid_statm, /* /proc/<pid>/statm */ 621 lxpr_read_pid_status, /* /proc/<pid>/status */ 622 lxpr_read_isdir, /* /proc/<pid>/task */ 623 lxpr_read_isdir, /* /proc/<pid>/task/nn */ 624 lxpr_read_isdir, /* /proc/<pid>/fd */ 625 lxpr_read_fd, /* /proc/<pid>/fd/nn */ 626 lxpr_read_pid_auxv, /* /proc/<pid>/task/<tid>/auxv */ 627 lxpr_read_pid_cgroup, /* /proc/<pid>/task/<tid>/cgroup */ 628 lxpr_read_pid_cmdline, /* /proc/<pid>/task/<tid>/cmdline */ 629 lxpr_read_pid_comm, /* /proc/<pid>/task/<tid>/comm */ 630 lxpr_read_empty, /* /proc/<pid>/task/<tid>/cpu */ 631 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/cwd */ 632 lxpr_read_pid_env, /* /proc/<pid>/task/<tid>/environ */ 633 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/exe */ 634 lxpr_read_pid_limits, /* /proc/<pid>/task/<tid>/limits */ 635 lxpr_read_pid_maps, /* /proc/<pid>/task/<tid>/maps */ 636 lxpr_read_empty, /* /proc/<pid>/task/<tid>/mem */ 637 lxpr_read_pid_mountinfo, /* /proc/<pid>/task/<tid>/mountinfo */ 638 lxpr_read_pid_oom_scr_adj, /* /proc/<pid>/task/<tid>/oom_scr_adj */ 639 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/root */ 640 lxpr_read_pid_tid_stat, /* /proc/<pid>/task/<tid>/stat */ 641 lxpr_read_pid_statm, /* /proc/<pid>/task/<tid>/statm */ 642 lxpr_read_pid_tid_status, /* /proc/<pid>/task/<tid>/status */ 643 lxpr_read_isdir, /* /proc/<pid>/task/<tid>/fd */ 644 lxpr_read_fd, /* /proc/<pid>/task/<tid>/fd/nn */ 645 lxpr_read_cgroups, /* /proc/cgroups */ 646 lxpr_read_empty, /* /proc/cmdline */ 647 lxpr_read_cpuinfo, /* /proc/cpuinfo */ 648 lxpr_read_empty, /* /proc/devices */ 649 lxpr_read_diskstats, /* /proc/diskstats */ 650 lxpr_read_empty, /* /proc/dma */ 651 lxpr_read_filesystems, /* /proc/filesystems */ 652 lxpr_read_empty, /* /proc/interrupts */ 653 lxpr_read_empty, /* /proc/ioports */ 654 lxpr_read_empty, /* /proc/kcore */ 655 lxpr_read_invalid, /* /proc/kmsg -- see lxpr_read() */ 656 lxpr_read_loadavg, /* /proc/loadavg */ 657 lxpr_read_meminfo, /* /proc/meminfo */ 658 lxpr_read_empty, /* /proc/modules */ 659 lxpr_read_mounts, /* /proc/mounts */ 660 lxpr_read_isdir, /* /proc/net */ 661 lxpr_read_net_arp, /* /proc/net/arp */ 662 lxpr_read_net_dev, /* /proc/net/dev */ 663 lxpr_read_net_dev_mcast, /* /proc/net/dev_mcast */ 664 lxpr_read_net_if_inet6, /* /proc/net/if_inet6 */ 665 lxpr_read_net_igmp, /* /proc/net/igmp */ 666 lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */ 667 lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */ 668 lxpr_read_net_ipv6_route, /* /proc/net/ipv6_route */ 669 lxpr_read_net_mcfilter, /* /proc/net/mcfilter */ 670 lxpr_read_net_netstat, /* /proc/net/netstat */ 671 lxpr_read_net_raw, /* /proc/net/raw */ 672 lxpr_read_net_route, /* /proc/net/route */ 673 lxpr_read_net_rpc, /* /proc/net/rpc */ 674 lxpr_read_net_rt_cache, /* /proc/net/rt_cache */ 675 lxpr_read_net_sockstat, /* /proc/net/sockstat */ 676 lxpr_read_net_snmp, /* /proc/net/snmp */ 677 lxpr_read_net_stat, /* /proc/net/stat */ 678 lxpr_read_net_tcp, /* /proc/net/tcp */ 679 lxpr_read_net_tcp6, /* /proc/net/tcp6 */ 680 lxpr_read_net_udp, /* /proc/net/udp */ 681 lxpr_read_net_udp6, /* /proc/net/udp6 */ 682 lxpr_read_net_unix, /* /proc/net/unix */ 683 lxpr_read_partitions, /* /proc/partitions */ 684 lxpr_read_invalid, /* /proc/self */ 685 lxpr_read_stat, /* /proc/stat */ 686 lxpr_read_swaps, /* /proc/swaps */ 687 lxpr_read_invalid, /* /proc/sys */ 688 lxpr_read_invalid, /* /proc/sys/fs */ 689 lxpr_read_invalid, /* /proc/sys/fs/inotify */ 690 lxpr_read_sys_fs_inotify_max_queued_events, /* max_queued_events */ 691 lxpr_read_sys_fs_inotify_max_user_instances, /* max_user_instances */ 692 lxpr_read_sys_fs_inotify_max_user_watches, /* max_user_watches */ 693 lxpr_read_invalid, /* /proc/sys/kernel */ 694 lxpr_read_sys_kernel_caplcap, /* /proc/sys/kernel/cap_last_cap */ 695 lxpr_read_sys_kernel_corepatt, /* /proc/sys/kernel/core_pattern */ 696 lxpr_read_sys_kernel_hostname, /* /proc/sys/kernel/hostname */ 697 lxpr_read_sys_kernel_msgmni, /* /proc/sys/kernel/msgmni */ 698 lxpr_read_sys_kernel_ngroups_max, /* /proc/sys/kernel/ngroups_max */ 699 lxpr_read_sys_kernel_osrel, /* /proc/sys/kernel/osrelease */ 700 lxpr_read_sys_kernel_pid_max, /* /proc/sys/kernel/pid_max */ 701 lxpr_read_invalid, /* /proc/sys/kernel/random */ 702 lxpr_read_sys_kernel_rand_bootid, /* /proc/sys/kernel/random/boot_id */ 703 lxpr_read_sys_kernel_shmmax, /* /proc/sys/kernel/shmmax */ 704 lxpr_read_sys_kernel_threads_max, /* /proc/sys/kernel/threads-max */ 705 lxpr_read_invalid, /* /proc/sys/net */ 706 lxpr_read_invalid, /* /proc/sys/net/core */ 707 lxpr_read_sys_net_core_somaxc, /* /proc/sys/net/core/somaxconn */ 708 lxpr_read_invalid, /* /proc/sys/vm */ 709 lxpr_read_sys_vm_minfr_kb, /* /proc/sys/vm/min_free_kbytes */ 710 lxpr_read_sys_vm_nhpages, /* /proc/sys/vm/nr_hugepages */ 711 lxpr_read_sys_vm_overcommit_mem, /* /proc/sys/vm/overcommit_memory */ 712 lxpr_read_sys_vm_swappiness, /* /proc/sys/vm/swappiness */ 713 lxpr_read_uptime, /* /proc/uptime */ 714 lxpr_read_version, /* /proc/version */ 715 }; 716 717 /* 718 * Array of lookup functions, indexed by lx /proc file type. 719 */ 720 static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = { 721 lxpr_lookup_procdir, /* /proc */ 722 lxpr_lookup_piddir, /* /proc/<pid> */ 723 lxpr_lookup_not_a_dir, /* /proc/<pid>/auxv */ 724 lxpr_lookup_not_a_dir, /* /proc/<pid>/cgroup */ 725 lxpr_lookup_not_a_dir, /* /proc/<pid>/cmdline */ 726 lxpr_lookup_not_a_dir, /* /proc/<pid>/comm */ 727 lxpr_lookup_not_a_dir, /* /proc/<pid>/cpu */ 728 lxpr_lookup_not_a_dir, /* /proc/<pid>/cwd */ 729 lxpr_lookup_not_a_dir, /* /proc/<pid>/environ */ 730 lxpr_lookup_not_a_dir, /* /proc/<pid>/exe */ 731 lxpr_lookup_not_a_dir, /* /proc/<pid>/limits */ 732 lxpr_lookup_not_a_dir, /* /proc/<pid>/maps */ 733 lxpr_lookup_not_a_dir, /* /proc/<pid>/mem */ 734 lxpr_lookup_not_a_dir, /* /proc/<pid>/mountinfo */ 735 lxpr_lookup_not_a_dir, /* /proc/<pid>/oom_score_adj */ 736 lxpr_lookup_not_a_dir, /* /proc/<pid>/root */ 737 lxpr_lookup_not_a_dir, /* /proc/<pid>/stat */ 738 lxpr_lookup_not_a_dir, /* /proc/<pid>/statm */ 739 lxpr_lookup_not_a_dir, /* /proc/<pid>/status */ 740 lxpr_lookup_taskdir, /* /proc/<pid>/task */ 741 lxpr_lookup_task_tid_dir, /* /proc/<pid>/task/nn */ 742 lxpr_lookup_fddir, /* /proc/<pid>/fd */ 743 lxpr_lookup_not_a_dir, /* /proc/<pid>/fd/nn */ 744 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/auxv */ 745 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cgroup */ 746 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cmdline */ 747 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/comm */ 748 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cpu */ 749 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cwd */ 750 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/environ */ 751 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/exe */ 752 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/limits */ 753 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/maps */ 754 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/mem */ 755 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/mountinfo */ 756 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/oom_scr_adj */ 757 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/root */ 758 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/stat */ 759 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/statm */ 760 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/status */ 761 lxpr_lookup_fddir, /* /proc/<pid>/task/<tid>/fd */ 762 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/fd/nn */ 763 lxpr_lookup_not_a_dir, /* /proc/cgroups */ 764 lxpr_lookup_not_a_dir, /* /proc/cmdline */ 765 lxpr_lookup_not_a_dir, /* /proc/cpuinfo */ 766 lxpr_lookup_not_a_dir, /* /proc/devices */ 767 lxpr_lookup_not_a_dir, /* /proc/diskstats */ 768 lxpr_lookup_not_a_dir, /* /proc/dma */ 769 lxpr_lookup_not_a_dir, /* /proc/filesystems */ 770 lxpr_lookup_not_a_dir, /* /proc/interrupts */ 771 lxpr_lookup_not_a_dir, /* /proc/ioports */ 772 lxpr_lookup_not_a_dir, /* /proc/kcore */ 773 lxpr_lookup_not_a_dir, /* /proc/kmsg */ 774 lxpr_lookup_not_a_dir, /* /proc/loadavg */ 775 lxpr_lookup_not_a_dir, /* /proc/meminfo */ 776 lxpr_lookup_not_a_dir, /* /proc/modules */ 777 lxpr_lookup_not_a_dir, /* /proc/mounts */ 778 lxpr_lookup_netdir, /* /proc/net */ 779 lxpr_lookup_not_a_dir, /* /proc/net/arp */ 780 lxpr_lookup_not_a_dir, /* /proc/net/dev */ 781 lxpr_lookup_not_a_dir, /* /proc/net/dev_mcast */ 782 lxpr_lookup_not_a_dir, /* /proc/net/if_inet6 */ 783 lxpr_lookup_not_a_dir, /* /proc/net/igmp */ 784 lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */ 785 lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */ 786 lxpr_lookup_not_a_dir, /* /proc/net/ipv6_route */ 787 lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */ 788 lxpr_lookup_not_a_dir, /* /proc/net/netstat */ 789 lxpr_lookup_not_a_dir, /* /proc/net/raw */ 790 lxpr_lookup_not_a_dir, /* /proc/net/route */ 791 lxpr_lookup_not_a_dir, /* /proc/net/rpc */ 792 lxpr_lookup_not_a_dir, /* /proc/net/rt_cache */ 793 lxpr_lookup_not_a_dir, /* /proc/net/sockstat */ 794 lxpr_lookup_not_a_dir, /* /proc/net/snmp */ 795 lxpr_lookup_not_a_dir, /* /proc/net/stat */ 796 lxpr_lookup_not_a_dir, /* /proc/net/tcp */ 797 lxpr_lookup_not_a_dir, /* /proc/net/tcp6 */ 798 lxpr_lookup_not_a_dir, /* /proc/net/udp */ 799 lxpr_lookup_not_a_dir, /* /proc/net/udp6 */ 800 lxpr_lookup_not_a_dir, /* /proc/net/unix */ 801 lxpr_lookup_not_a_dir, /* /proc/partitions */ 802 lxpr_lookup_not_a_dir, /* /proc/self */ 803 lxpr_lookup_not_a_dir, /* /proc/stat */ 804 lxpr_lookup_not_a_dir, /* /proc/swaps */ 805 lxpr_lookup_sysdir, /* /proc/sys */ 806 lxpr_lookup_sys_fsdir, /* /proc/sys/fs */ 807 lxpr_lookup_sys_fs_inotifydir, /* /proc/sys/fs/inotify */ 808 lxpr_lookup_not_a_dir, /* .../inotify/max_queued_events */ 809 lxpr_lookup_not_a_dir, /* .../inotify/max_user_instances */ 810 lxpr_lookup_not_a_dir, /* .../inotify/max_user_watches */ 811 lxpr_lookup_sys_kerneldir, /* /proc/sys/kernel */ 812 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/cap_last_cap */ 813 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/core_pattern */ 814 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/hostname */ 815 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/msgmni */ 816 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/ngroups_max */ 817 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/osrelease */ 818 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/pid_max */ 819 lxpr_lookup_sys_kdir_randdir, /* /proc/sys/kernel/random */ 820 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/random/boot_id */ 821 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/shmmax */ 822 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/threads-max */ 823 lxpr_lookup_sys_netdir, /* /proc/sys/net */ 824 lxpr_lookup_sys_net_coredir, /* /proc/sys/net/core */ 825 lxpr_lookup_not_a_dir, /* /proc/sys/net/core/somaxconn */ 826 lxpr_lookup_sys_vmdir, /* /proc/sys/vm */ 827 lxpr_lookup_not_a_dir, /* /proc/sys/vm/min_free_kbytes */ 828 lxpr_lookup_not_a_dir, /* /proc/sys/vm/nr_hugepages */ 829 lxpr_lookup_not_a_dir, /* /proc/sys/vm/overcommit_memory */ 830 lxpr_lookup_not_a_dir, /* /proc/sys/vm/swappiness */ 831 lxpr_lookup_not_a_dir, /* /proc/uptime */ 832 lxpr_lookup_not_a_dir, /* /proc/version */ 833 }; 834 835 /* 836 * Array of readdir functions, indexed by /proc file type. 837 */ 838 static int (*lxpr_readdir_function[LXPR_NFILES])() = { 839 lxpr_readdir_procdir, /* /proc */ 840 lxpr_readdir_piddir, /* /proc/<pid> */ 841 lxpr_readdir_not_a_dir, /* /proc/<pid>/auxv */ 842 lxpr_readdir_not_a_dir, /* /proc/<pid>/cgroup */ 843 lxpr_readdir_not_a_dir, /* /proc/<pid>/cmdline */ 844 lxpr_readdir_not_a_dir, /* /proc/<pid>/comm */ 845 lxpr_readdir_not_a_dir, /* /proc/<pid>/cpu */ 846 lxpr_readdir_not_a_dir, /* /proc/<pid>/cwd */ 847 lxpr_readdir_not_a_dir, /* /proc/<pid>/environ */ 848 lxpr_readdir_not_a_dir, /* /proc/<pid>/exe */ 849 lxpr_readdir_not_a_dir, /* /proc/<pid>/limits */ 850 lxpr_readdir_not_a_dir, /* /proc/<pid>/maps */ 851 lxpr_readdir_not_a_dir, /* /proc/<pid>/mem */ 852 lxpr_readdir_not_a_dir, /* /proc/<pid>/mountinfo */ 853 lxpr_readdir_not_a_dir, /* /proc/<pid>/oom_score_adj */ 854 lxpr_readdir_not_a_dir, /* /proc/<pid>/root */ 855 lxpr_readdir_not_a_dir, /* /proc/<pid>/stat */ 856 lxpr_readdir_not_a_dir, /* /proc/<pid>/statm */ 857 lxpr_readdir_not_a_dir, /* /proc/<pid>/status */ 858 lxpr_readdir_taskdir, /* /proc/<pid>/task */ 859 lxpr_readdir_task_tid_dir, /* /proc/<pid>/task/nn */ 860 lxpr_readdir_fddir, /* /proc/<pid>/fd */ 861 lxpr_readdir_not_a_dir, /* /proc/<pid>/fd/nn */ 862 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/auxv */ 863 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cgroup */ 864 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cmdline */ 865 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/comm */ 866 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cpu */ 867 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cwd */ 868 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/environ */ 869 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/exe */ 870 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/limits */ 871 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/maps */ 872 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/mem */ 873 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/mountinfo */ 874 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid/oom_scr_adj */ 875 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/root */ 876 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/stat */ 877 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/statm */ 878 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/status */ 879 lxpr_readdir_fddir, /* /proc/<pid>/task/<tid>/fd */ 880 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/fd/nn */ 881 lxpr_readdir_not_a_dir, /* /proc/cgroups */ 882 lxpr_readdir_not_a_dir, /* /proc/cmdline */ 883 lxpr_readdir_not_a_dir, /* /proc/cpuinfo */ 884 lxpr_readdir_not_a_dir, /* /proc/devices */ 885 lxpr_readdir_not_a_dir, /* /proc/diskstats */ 886 lxpr_readdir_not_a_dir, /* /proc/dma */ 887 lxpr_readdir_not_a_dir, /* /proc/filesystems */ 888 lxpr_readdir_not_a_dir, /* /proc/interrupts */ 889 lxpr_readdir_not_a_dir, /* /proc/ioports */ 890 lxpr_readdir_not_a_dir, /* /proc/kcore */ 891 lxpr_readdir_not_a_dir, /* /proc/kmsg */ 892 lxpr_readdir_not_a_dir, /* /proc/loadavg */ 893 lxpr_readdir_not_a_dir, /* /proc/meminfo */ 894 lxpr_readdir_not_a_dir, /* /proc/modules */ 895 lxpr_readdir_not_a_dir, /* /proc/mounts */ 896 lxpr_readdir_netdir, /* /proc/net */ 897 lxpr_readdir_not_a_dir, /* /proc/net/arp */ 898 lxpr_readdir_not_a_dir, /* /proc/net/dev */ 899 lxpr_readdir_not_a_dir, /* /proc/net/dev_mcast */ 900 lxpr_readdir_not_a_dir, /* /proc/net/if_inet6 */ 901 lxpr_readdir_not_a_dir, /* /proc/net/igmp */ 902 lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */ 903 lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */ 904 lxpr_readdir_not_a_dir, /* /proc/net/ipv6_route */ 905 lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */ 906 lxpr_readdir_not_a_dir, /* /proc/net/netstat */ 907 lxpr_readdir_not_a_dir, /* /proc/net/raw */ 908 lxpr_readdir_not_a_dir, /* /proc/net/route */ 909 lxpr_readdir_not_a_dir, /* /proc/net/rpc */ 910 lxpr_readdir_not_a_dir, /* /proc/net/rt_cache */ 911 lxpr_readdir_not_a_dir, /* /proc/net/sockstat */ 912 lxpr_readdir_not_a_dir, /* /proc/net/snmp */ 913 lxpr_readdir_not_a_dir, /* /proc/net/stat */ 914 lxpr_readdir_not_a_dir, /* /proc/net/tcp */ 915 lxpr_readdir_not_a_dir, /* /proc/net/tcp6 */ 916 lxpr_readdir_not_a_dir, /* /proc/net/udp */ 917 lxpr_readdir_not_a_dir, /* /proc/net/udp6 */ 918 lxpr_readdir_not_a_dir, /* /proc/net/unix */ 919 lxpr_readdir_not_a_dir, /* /proc/partitions */ 920 lxpr_readdir_not_a_dir, /* /proc/self */ 921 lxpr_readdir_not_a_dir, /* /proc/stat */ 922 lxpr_readdir_not_a_dir, /* /proc/swaps */ 923 lxpr_readdir_sysdir, /* /proc/sys */ 924 lxpr_readdir_sys_fsdir, /* /proc/sys/fs */ 925 lxpr_readdir_sys_fs_inotifydir, /* /proc/sys/fs/inotify */ 926 lxpr_readdir_not_a_dir, /* .../inotify/max_queued_events */ 927 lxpr_readdir_not_a_dir, /* .../inotify/max_user_instances */ 928 lxpr_readdir_not_a_dir, /* .../inotify/max_user_watches */ 929 lxpr_readdir_sys_kerneldir, /* /proc/sys/kernel */ 930 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/cap_last_cap */ 931 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/core_pattern */ 932 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/hostname */ 933 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/msgmni */ 934 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/ngroups_max */ 935 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/osrelease */ 936 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/pid_max */ 937 lxpr_readdir_sys_kdir_randdir, /* /proc/sys/kernel/random */ 938 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/random/boot_id */ 939 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/shmmax */ 940 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/threads-max */ 941 lxpr_readdir_sys_netdir, /* /proc/sys/net */ 942 lxpr_readdir_sys_net_coredir, /* /proc/sys/net/core */ 943 lxpr_readdir_not_a_dir, /* /proc/sys/net/core/somaxconn */ 944 lxpr_readdir_sys_vmdir, /* /proc/sys/vm */ 945 lxpr_readdir_not_a_dir, /* /proc/sys/vm/min_free_kbytes */ 946 lxpr_readdir_not_a_dir, /* /proc/sys/vm/nr_hugepages */ 947 lxpr_readdir_not_a_dir, /* /proc/sys/vm/overcommit_memory */ 948 lxpr_readdir_not_a_dir, /* /proc/sys/vm/swappiness */ 949 lxpr_readdir_not_a_dir, /* /proc/uptime */ 950 lxpr_readdir_not_a_dir, /* /proc/version */ 951 }; 952 953 954 /* 955 * lxpr_read(): Vnode operation for VOP_READ() 956 * 957 * As the format of all the files that can be read in the lx procfs is human 958 * readable and not binary structures there do not have to be different 959 * read variants depending on whether the reading process model is 32 or 64 bits 960 * (at least in general, and certainly the difference is unlikely to be enough 961 * to justify have different routines for 32 and 64 bit reads 962 */ 963 /* ARGSUSED */ 964 static int 965 lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 966 caller_context_t *ct) 967 { 968 lxpr_node_t *lxpnp = VTOLXP(vp); 969 lxpr_nodetype_t type = lxpnp->lxpr_type; 970 lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop); 971 int error; 972 973 ASSERT(type < LXPR_NFILES); 974 975 if (type == LXPR_KMSG) { 976 ldi_ident_t li = VTOLXPM(vp)->lxprm_li; 977 ldi_handle_t ldih; 978 struct strioctl str; 979 int rv; 980 981 /* 982 * Open the zone's console device using the layered driver 983 * interface. 984 */ 985 if ((error = 986 ldi_open_by_name("/dev/log", FREAD, cr, &ldih, li)) != 0) 987 return (error); 988 989 /* 990 * Send an ioctl to the underlying console device, letting it 991 * know we're interested in getting console messages. 992 */ 993 str.ic_cmd = I_CONSLOG; 994 str.ic_timout = 0; 995 str.ic_len = 0; 996 str.ic_dp = NULL; 997 if ((error = ldi_ioctl(ldih, I_STR, 998 (intptr_t)&str, FKIOCTL, cr, &rv)) != 0) 999 return (error); 1000 1001 lxpr_read_kmsg(lxpnp, uiobuf, ldih); 1002 1003 if ((error = ldi_close(ldih, FREAD, cr)) != 0) 1004 return (error); 1005 } else { 1006 lxpr_read_function[type](lxpnp, uiobuf); 1007 } 1008 1009 error = lxpr_uiobuf_flush(uiobuf); 1010 lxpr_uiobuf_free(uiobuf); 1011 1012 return (error); 1013 } 1014 1015 /* 1016 * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty() 1017 * 1018 * Various special case reads: 1019 * - trying to read a directory 1020 * - invalid file (used to mean a file that should be implemented, 1021 * but isn't yet) 1022 * - empty file 1023 * - wait to be able to read a file that will never have anything to read 1024 */ 1025 /* ARGSUSED */ 1026 static void 1027 lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1028 { 1029 lxpr_uiobuf_seterr(uiobuf, EISDIR); 1030 } 1031 1032 /* ARGSUSED */ 1033 static void 1034 lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1035 { 1036 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1037 } 1038 1039 /* ARGSUSED */ 1040 static void 1041 lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1042 { 1043 } 1044 1045 /* 1046 * lxpr_read_pid_auxv(): read process aux vector 1047 */ 1048 static void 1049 lxpr_read_pid_auxv(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1050 { 1051 proc_t *p; 1052 lx_proc_data_t *pd; 1053 lx_elf_data_t *edp = NULL; 1054 int i, cnt; 1055 1056 ASSERT(lxpnp->lxpr_type == LXPR_PID_AUXV || 1057 lxpnp->lxpr_type == LXPR_PID_TID_AUXV); 1058 1059 p = lxpr_lock(lxpnp->lxpr_pid); 1060 1061 if (p == NULL) { 1062 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1063 return; 1064 } 1065 if ((pd = ptolxproc(p)) == NULL) { 1066 /* Emit a single AT_NULL record for non-branded processes */ 1067 auxv_t buf; 1068 1069 bzero(&buf, sizeof (buf)); 1070 lxpr_unlock(p); 1071 lxpr_uiobuf_write(uiobuf, (char *)&buf, sizeof (buf)); 1072 return; 1073 } else { 1074 edp = &pd->l_elf_data; 1075 } 1076 1077 if (p->p_model == DATAMODEL_NATIVE) { 1078 auxv_t buf[__KERN_NAUXV_IMPL]; 1079 1080 /* 1081 * Because a_type is only of size int (not long), the buffer 1082 * contents must be zeroed first to ensure cleanliness. 1083 */ 1084 bzero(buf, sizeof (buf)); 1085 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) { 1086 if (lx_auxv_stol(&p->p_user.u_auxv[i], 1087 &buf[cnt], edp) == 0) { 1088 cnt++; 1089 } 1090 if (p->p_user.u_auxv[i].a_type == AT_NULL) { 1091 break; 1092 } 1093 } 1094 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0])); 1095 lxpr_unlock(p); 1096 } 1097 #if defined(_SYSCALL32_IMPL) 1098 else { 1099 auxv32_t buf[__KERN_NAUXV_IMPL]; 1100 1101 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) { 1102 auxv_t temp; 1103 1104 if (lx_auxv_stol(&p->p_user.u_auxv[i], 1105 &temp, edp) == 0) { 1106 buf[cnt].a_type = (int)temp.a_type; 1107 buf[cnt].a_un.a_val = (int)temp.a_un.a_val; 1108 cnt++; 1109 } 1110 if (p->p_user.u_auxv[i].a_type == AT_NULL) { 1111 break; 1112 } 1113 } 1114 lxpr_unlock(p); 1115 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0])); 1116 } 1117 #endif /* defined(_SYSCALL32_IMPL) */ 1118 } 1119 1120 /* 1121 * lxpr_read_pid_cgroup(): read cgroups for process 1122 */ 1123 static void 1124 lxpr_read_pid_cgroup(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1125 { 1126 proc_t *p; 1127 1128 ASSERT(lxpnp->lxpr_type == LXPR_PID_CGROUP || 1129 lxpnp->lxpr_type == LXPR_PID_TID_CGROUP); 1130 1131 p = lxpr_lock(lxpnp->lxpr_pid); 1132 if (p == NULL) { 1133 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1134 return; 1135 } 1136 1137 /* basic stub, 3rd field will need to be populated */ 1138 lxpr_uiobuf_printf(uiobuf, "1:name=systemd:/\n"); 1139 1140 lxpr_unlock(p); 1141 } 1142 1143 static void 1144 lxpr_copy_cmdline(proc_t *p, lx_proc_data_t *pd, lxpr_uiobuf_t *uiobuf) 1145 { 1146 uio_t *uiop = uiobuf->uiop; 1147 char *buf = uiobuf->buffer; 1148 int bsz = uiobuf->buffsize; 1149 boolean_t env_overflow = B_FALSE; 1150 uintptr_t pos = pd->l_args_start + uiop->uio_offset; 1151 uintptr_t estart = pd->l_envs_start; 1152 uintptr_t eend = pd->l_envs_end; 1153 size_t chunk, copied; 1154 int err = 0; 1155 1156 /* Do not bother with data beyond the end of the envp strings area. */ 1157 if (pos > eend) { 1158 return; 1159 } 1160 mutex_exit(&p->p_lock); 1161 1162 /* 1163 * If the starting or ending bounds are outside the argv strings area, 1164 * check to see if the process has overwritten the terminating NULL. 1165 * If not, no data needs to be copied from oustide the argv area. 1166 */ 1167 if (pos >= estart || (pos + uiop->uio_resid) >= estart) { 1168 uint8_t term; 1169 if (uread(p, &term, sizeof (term), estart - 1) != 0) { 1170 err = EFAULT; 1171 } else if (term != 0) { 1172 env_overflow = B_TRUE; 1173 } 1174 } 1175 1176 1177 /* Data between astart and estart-1 can be copied freely. */ 1178 while (pos < estart && uiop->uio_resid > 0 && err == 0) { 1179 chunk = MIN(estart - pos, uiop->uio_resid); 1180 chunk = MIN(chunk, bsz); 1181 1182 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) != 0 || 1183 copied != chunk) { 1184 err = EFAULT; 1185 break; 1186 } 1187 err = uiomove(buf, copied, UIO_READ, uiop); 1188 pos += copied; 1189 } 1190 1191 /* 1192 * Onward from estart, data is copied as a contiguous string. To 1193 * protect env data from potential snooping, only one buffer-sized copy 1194 * is allowed to avoid complex seek logic. 1195 */ 1196 if (err == 0 && env_overflow && pos == estart && uiop->uio_resid > 0) { 1197 chunk = MIN(eend - pos, uiop->uio_resid); 1198 chunk = MIN(chunk, bsz); 1199 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) == 0) { 1200 int len = strnlen(buf, copied); 1201 if (len > 0) { 1202 err = uiomove(buf, len, UIO_READ, uiop); 1203 } 1204 } 1205 } 1206 1207 uiobuf->error = err; 1208 /* reset any uiobuf state */ 1209 uiobuf->pos = uiobuf->buffer; 1210 uiobuf->beg = 0; 1211 1212 mutex_enter(&p->p_lock); 1213 } 1214 1215 /* 1216 * lxpr_read_pid_cmdline(): read argument vector from process 1217 */ 1218 static void 1219 lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1220 { 1221 proc_t *p; 1222 char *buf; 1223 size_t asz = lxpr_maxargvlen, sz; 1224 lx_proc_data_t *pd; 1225 1226 ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE || 1227 lxpnp->lxpr_type == LXPR_PID_TID_CMDLINE); 1228 1229 buf = kmem_alloc(asz, KM_SLEEP); 1230 1231 p = lxpr_lock(lxpnp->lxpr_pid); 1232 if (p == NULL) { 1233 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1234 kmem_free(buf, asz); 1235 return; 1236 } 1237 1238 if ((pd = ptolxproc(p)) != NULL && pd->l_args_start != 0 && 1239 pd->l_envs_start != 0 && pd->l_envs_end != 0) { 1240 /* Use Linux-style argv bounds if possible. */ 1241 lxpr_copy_cmdline(p, pd, uiobuf); 1242 } else { 1243 if (prreadargv(p, buf, asz, &sz) != 0) { 1244 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1245 } else { 1246 lxpr_uiobuf_write(uiobuf, buf, sz); 1247 } 1248 } 1249 1250 lxpr_unlock(p); 1251 kmem_free(buf, asz); 1252 } 1253 1254 /* 1255 * lxpr_read_pid_comm(): read command from process 1256 */ 1257 static void 1258 lxpr_read_pid_comm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1259 { 1260 proc_t *p; 1261 1262 VERIFY(lxpnp->lxpr_type == LXPR_PID_COMM || 1263 lxpnp->lxpr_type == LXPR_PID_TID_COMM); 1264 1265 /* 1266 * Because prctl(PR_SET_NAME) does not set custom names for threads 1267 * (vs processes), there is no need for special handling here. 1268 */ 1269 if ((p = lxpr_lock(lxpnp->lxpr_pid)) == NULL) { 1270 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1271 return; 1272 } 1273 lxpr_uiobuf_printf(uiobuf, "%s\n", p->p_user.u_comm); 1274 lxpr_unlock(p); 1275 } 1276 1277 /* 1278 * lxpr_read_pid_env(): read env vector from process 1279 */ 1280 static void 1281 lxpr_read_pid_env(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1282 { 1283 proc_t *p; 1284 char *buf; 1285 size_t asz = lxpr_maxenvvlen, sz; 1286 1287 ASSERT(lxpnp->lxpr_type == LXPR_PID_ENV); 1288 1289 buf = kmem_alloc(asz, KM_SLEEP); 1290 1291 p = lxpr_lock(lxpnp->lxpr_pid); 1292 if (p == NULL) { 1293 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1294 kmem_free(buf, asz); 1295 return; 1296 } 1297 1298 if (prreadenvv(p, buf, asz, &sz) != 0) { 1299 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1300 } else { 1301 lxpr_uiobuf_write(uiobuf, buf, sz); 1302 } 1303 1304 lxpr_unlock(p); 1305 kmem_free(buf, asz); 1306 } 1307 1308 /* 1309 * lxpr_read_pid_limits(): ulimit file 1310 */ 1311 static void 1312 lxpr_read_pid_limits(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1313 { 1314 proc_t *p; 1315 rctl_qty_t cur, max; 1316 rctl_val_t *oval, *nval; 1317 rctl_hndl_t hndl; 1318 char *kname; 1319 int i; 1320 1321 ASSERT(lxpnp->lxpr_type == LXPR_PID_LIMITS || 1322 lxpnp->lxpr_type == LXPR_PID_TID_LIMITS); 1323 1324 nval = kmem_alloc(sizeof (rctl_val_t), KM_SLEEP); 1325 1326 p = lxpr_lock(lxpnp->lxpr_pid); 1327 if (p == NULL) { 1328 kmem_free(nval, sizeof (rctl_val_t)); 1329 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1330 return; 1331 } 1332 1333 lxpr_uiobuf_printf(uiobuf, "%-25s %-20s %-20s %-10s\n", 1334 "Limit", "Soft Limit", "Hard Limit", "Units"); 1335 for (i = 0; lxpr_rlimtab[i].rlim_name != NULL; i++) { 1336 kname = lxpr_rlimtab[i].rlim_rctl; 1337 /* default to unlimited for resources without an analog */ 1338 cur = RLIM_INFINITY; 1339 max = RLIM_INFINITY; 1340 if (kname != NULL) { 1341 hndl = rctl_hndl_lookup(kname); 1342 oval = NULL; 1343 while ((hndl != -1) && 1344 rctl_local_get(hndl, oval, nval, p) == 0) { 1345 oval = nval; 1346 switch (nval->rcv_privilege) { 1347 case RCPRIV_BASIC: 1348 if (!RCTL_INFINITE(nval)) 1349 cur = nval->rcv_value; 1350 break; 1351 case RCPRIV_PRIVILEGED: 1352 if (!RCTL_INFINITE(nval)) 1353 max = nval->rcv_value; 1354 break; 1355 } 1356 } 1357 } 1358 1359 lxpr_uiobuf_printf(uiobuf, "%-25s", lxpr_rlimtab[i].rlim_name); 1360 if (cur == RLIM_INFINITY || cur == LX_RLIM_INFINITY) { 1361 lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited"); 1362 } else { 1363 lxpr_uiobuf_printf(uiobuf, " %-20lu", cur); 1364 } 1365 if (max == RLIM_INFINITY || max == LX_RLIM_INFINITY) { 1366 lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited"); 1367 } else { 1368 lxpr_uiobuf_printf(uiobuf, " %-20lu", max); 1369 } 1370 lxpr_uiobuf_printf(uiobuf, " %-10s\n", 1371 lxpr_rlimtab[i].rlim_unit); 1372 } 1373 1374 lxpr_unlock(p); 1375 kmem_free(nval, sizeof (rctl_val_t)); 1376 } 1377 1378 /* 1379 * lxpr_read_pid_maps(): memory map file 1380 */ 1381 static void 1382 lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1383 { 1384 proc_t *p; 1385 struct as *as; 1386 struct seg *seg; 1387 char *buf; 1388 int buflen = MAXPATHLEN; 1389 struct print_data { 1390 uintptr_t saddr; 1391 uintptr_t eaddr; 1392 int type; 1393 char prot[5]; 1394 uintptr_t offset; 1395 vnode_t *vp; 1396 struct print_data *next; 1397 } *print_head = NULL; 1398 struct print_data **print_tail = &print_head; 1399 struct print_data *pbuf; 1400 1401 ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS || 1402 lxpnp->lxpr_type == LXPR_PID_TID_MAPS); 1403 1404 p = lxpr_lock(lxpnp->lxpr_pid); 1405 if (p == NULL) { 1406 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1407 return; 1408 } 1409 1410 as = p->p_as; 1411 1412 if (as == &kas) { 1413 lxpr_unlock(p); 1414 return; 1415 } 1416 1417 mutex_exit(&p->p_lock); 1418 1419 /* Iterate over all segments in the address space */ 1420 AS_LOCK_ENTER(as, RW_READER); 1421 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 1422 vnode_t *vp; 1423 uint_t protbits; 1424 1425 pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP); 1426 1427 pbuf->saddr = (uintptr_t)seg->s_base; 1428 pbuf->eaddr = pbuf->saddr + seg->s_size; 1429 pbuf->type = SEGOP_GETTYPE(seg, seg->s_base); 1430 1431 /* 1432 * Cheat and only use the protection bits of the first page 1433 * in the segment 1434 */ 1435 (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot)); 1436 (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits); 1437 1438 if (protbits & PROT_READ) pbuf->prot[0] = 'r'; 1439 if (protbits & PROT_WRITE) pbuf->prot[1] = 'w'; 1440 if (protbits & PROT_EXEC) pbuf->prot[2] = 'x'; 1441 if (pbuf->type & MAP_SHARED) pbuf->prot[3] = 's'; 1442 else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p'; 1443 1444 if (seg->s_ops == &segvn_ops && 1445 SEGOP_GETVP(seg, seg->s_base, &vp) == 0 && 1446 vp != NULL && vp->v_type == VREG) { 1447 VN_HOLD(vp); 1448 pbuf->vp = vp; 1449 } else { 1450 pbuf->vp = NULL; 1451 } 1452 1453 pbuf->offset = SEGOP_GETOFFSET(seg, (caddr_t)pbuf->saddr); 1454 1455 pbuf->next = NULL; 1456 *print_tail = pbuf; 1457 print_tail = &pbuf->next; 1458 } 1459 AS_LOCK_EXIT(as); 1460 mutex_enter(&p->p_lock); 1461 lxpr_unlock(p); 1462 1463 buf = kmem_alloc(buflen, KM_SLEEP); 1464 1465 /* print the data we've extracted */ 1466 pbuf = print_head; 1467 while (pbuf != NULL) { 1468 struct print_data *pbuf_next; 1469 vattr_t vattr; 1470 1471 int maj = 0; 1472 int min = 0; 1473 ino_t inode = 0; 1474 1475 *buf = '\0'; 1476 if (pbuf->vp != NULL) { 1477 vattr.va_mask = AT_FSID | AT_NODEID; 1478 if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(), 1479 NULL) == 0) { 1480 maj = getmajor(vattr.va_fsid); 1481 min = getminor(vattr.va_fsid); 1482 inode = vattr.va_nodeid; 1483 } 1484 (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED()); 1485 VN_RELE(pbuf->vp); 1486 } 1487 1488 if (p->p_model == DATAMODEL_LP64) { 1489 lxpr_uiobuf_printf(uiobuf, 1490 "%08llx-%08llx %s %08llx %02x:%02x %llu%s%s\n", 1491 pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset, 1492 maj, min, inode, *buf != '\0' ? " " : "", buf); 1493 } else { 1494 lxpr_uiobuf_printf(uiobuf, 1495 "%08x-%08x %s %08x %02x:%02x %llu%s%s\n", 1496 (uint32_t)pbuf->saddr, (uint32_t)pbuf->eaddr, 1497 pbuf->prot, (uint32_t)pbuf->offset, maj, min, 1498 inode, *buf != '\0' ? " " : "", buf); 1499 } 1500 1501 pbuf_next = pbuf->next; 1502 kmem_free(pbuf, sizeof (*pbuf)); 1503 pbuf = pbuf_next; 1504 } 1505 1506 kmem_free(buf, buflen); 1507 } 1508 1509 /* 1510 * lxpr_read_pid_mountinfo(): information about process mount points. e.g.: 1511 * 14 19 0:13 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw 1512 * mntid parid devnums root mntpnt mntopts - fstype mntsrc superopts 1513 * 1514 * We have to make up several of these fields. 1515 */ 1516 static void 1517 lxpr_read_pid_mountinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1518 { 1519 struct vfs *vfsp; 1520 struct vfs *vfslist; 1521 zone_t *zone = LXPTOZ(lxpnp); 1522 struct print_data { 1523 refstr_t *vfs_mntpt; 1524 refstr_t *vfs_resource; 1525 uint_t vfs_flag; 1526 int vfs_fstype; 1527 dev_t vfs_dev; 1528 struct print_data *next; 1529 } *print_head = NULL; 1530 struct print_data **print_tail = &print_head; 1531 struct print_data *printp; 1532 int root_id = 15; /* use a made-up value */ 1533 int mnt_id; 1534 1535 ASSERT(lxpnp->lxpr_type == LXPR_PID_MOUNTINFO || 1536 lxpnp->lxpr_type == LXPR_PID_TID_MOUNTINFO); 1537 1538 vfs_list_read_lock(); 1539 1540 /* root is the top-level, it does not appear in this output */ 1541 if (zone == global_zone) { 1542 vfsp = vfslist = rootvfs; 1543 } else { 1544 vfsp = vfslist = zone->zone_vfslist; 1545 /* 1546 * If the zone has a root entry, it will be the first in 1547 * the list. If it doesn't, we conjure one up. 1548 */ 1549 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt), 1550 zone->zone_rootpath) != 0) { 1551 struct vfs *tvfsp; 1552 /* 1553 * The root of the zone is not a mount point. The vfs 1554 * we want to report is that of the zone's root vnode. 1555 */ 1556 tvfsp = zone->zone_rootvp->v_vfsp; 1557 1558 lxpr_uiobuf_printf(uiobuf, 1559 "%d 1 %d:%d / / %s - %s / %s\n", 1560 root_id, 1561 major(tvfsp->vfs_dev), minor(vfsp->vfs_dev), 1562 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw", 1563 vfssw[tvfsp->vfs_fstype].vsw_name, 1564 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); 1565 1566 } 1567 if (vfslist == NULL) { 1568 vfs_list_unlock(); 1569 return; 1570 } 1571 } 1572 1573 /* 1574 * Later on we have to do a lookupname, which can end up causing 1575 * another vfs_list_read_lock() to be called. Which can lead to a 1576 * deadlock. To avoid this, we extract the data we need into a local 1577 * list, then we can run this list without holding vfs_list_read_lock() 1578 * We keep the list in the same order as the vfs_list 1579 */ 1580 do { 1581 /* Skip mounts we shouldn't show */ 1582 if (vfsp->vfs_flag & VFS_NOMNTTAB) { 1583 goto nextfs; 1584 } 1585 1586 printp = kmem_alloc(sizeof (*printp), KM_SLEEP); 1587 refstr_hold(vfsp->vfs_mntpt); 1588 printp->vfs_mntpt = vfsp->vfs_mntpt; 1589 refstr_hold(vfsp->vfs_resource); 1590 printp->vfs_resource = vfsp->vfs_resource; 1591 printp->vfs_flag = vfsp->vfs_flag; 1592 printp->vfs_fstype = vfsp->vfs_fstype; 1593 printp->vfs_dev = vfsp->vfs_dev; 1594 printp->next = NULL; 1595 1596 *print_tail = printp; 1597 print_tail = &printp->next; 1598 1599 nextfs: 1600 vfsp = (zone == global_zone) ? 1601 vfsp->vfs_next : vfsp->vfs_zone_next; 1602 1603 } while (vfsp != vfslist); 1604 1605 vfs_list_unlock(); 1606 1607 mnt_id = root_id + 1; 1608 1609 /* 1610 * now we can run through what we've extracted without holding 1611 * vfs_list_read_lock() 1612 */ 1613 printp = print_head; 1614 while (printp != NULL) { 1615 struct print_data *printp_next; 1616 const char *resource; 1617 char *mntpt; 1618 struct vnode *vp; 1619 int error; 1620 1621 mntpt = (char *)refstr_value(printp->vfs_mntpt); 1622 resource = refstr_value(printp->vfs_resource); 1623 1624 if (mntpt != NULL && mntpt[0] != '\0') 1625 mntpt = ZONE_PATH_TRANSLATE(mntpt, zone); 1626 else 1627 mntpt = "-"; 1628 1629 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 1630 1631 if (error != 0) 1632 goto nextp; 1633 1634 if (!(vp->v_flag & VROOT)) { 1635 VN_RELE(vp); 1636 goto nextp; 1637 } 1638 VN_RELE(vp); 1639 1640 if (resource != NULL && resource[0] != '\0') { 1641 if (resource[0] == '/') { 1642 resource = ZONE_PATH_VISIBLE(resource, zone) ? 1643 ZONE_PATH_TRANSLATE(resource, zone) : mntpt; 1644 } 1645 } else { 1646 resource = "none"; 1647 } 1648 1649 /* 1650 * XXX parent ID is not tracked correctly here. Currently we 1651 * always assume the parent ID is the root ID. 1652 */ 1653 lxpr_uiobuf_printf(uiobuf, 1654 "%d %d %d:%d / %s %s - %s %s %s\n", 1655 mnt_id, root_id, 1656 major(printp->vfs_dev), minor(printp->vfs_dev), 1657 mntpt, 1658 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw", 1659 vfssw[printp->vfs_fstype].vsw_name, 1660 resource, 1661 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); 1662 1663 nextp: 1664 printp_next = printp->next; 1665 refstr_rele(printp->vfs_mntpt); 1666 refstr_rele(printp->vfs_resource); 1667 kmem_free(printp, sizeof (*printp)); 1668 printp = printp_next; 1669 1670 mnt_id++; 1671 } 1672 } 1673 1674 /* 1675 * lxpr_read_pid_oom_scr_adj(): read oom_score_adj for process 1676 */ 1677 static void 1678 lxpr_read_pid_oom_scr_adj(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1679 { 1680 proc_t *p; 1681 1682 ASSERT(lxpnp->lxpr_type == LXPR_PID_OOM_SCR_ADJ || 1683 lxpnp->lxpr_type == LXPR_PID_TID_OOM_SCR_ADJ); 1684 1685 p = lxpr_lock(lxpnp->lxpr_pid); 1686 if (p == NULL) { 1687 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1688 return; 1689 } 1690 1691 /* always 0 */ 1692 lxpr_uiobuf_printf(uiobuf, "0\n"); 1693 1694 lxpr_unlock(p); 1695 } 1696 1697 1698 /* 1699 * lxpr_read_pid_statm(): memory status file 1700 */ 1701 static void 1702 lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1703 { 1704 proc_t *p; 1705 struct as *as; 1706 size_t vsize; 1707 size_t rss; 1708 1709 ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM || 1710 lxpnp->lxpr_type == LXPR_PID_TID_STATM); 1711 1712 p = lxpr_lock(lxpnp->lxpr_pid); 1713 if (p == NULL) { 1714 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1715 return; 1716 } 1717 1718 as = p->p_as; 1719 1720 mutex_exit(&p->p_lock); 1721 1722 AS_LOCK_ENTER(as, RW_READER); 1723 vsize = btopr(as->a_resvsize); 1724 rss = rm_asrss(as); 1725 AS_LOCK_EXIT(as); 1726 1727 mutex_enter(&p->p_lock); 1728 lxpr_unlock(p); 1729 1730 lxpr_uiobuf_printf(uiobuf, 1731 "%lu %lu %lu %lu %lu %lu %lu\n", 1732 vsize, rss, 0l, rss, 0l, 0l, 0l); 1733 } 1734 1735 /* 1736 * Look for either the main thread (lookup_id is 0) or the specified thread. 1737 * If we're looking for the main thread but the proc does not have one, we 1738 * fallback to using prchoose to get any thread available. 1739 */ 1740 static kthread_t * 1741 lxpr_get_thread(proc_t *p, uint_t lookup_id) 1742 { 1743 kthread_t *t; 1744 uint_t emul_tid; 1745 lx_lwp_data_t *lwpd; 1746 pid_t pid = p->p_pid; 1747 pid_t init_pid = curproc->p_zone->zone_proc_initpid; 1748 boolean_t branded = (p->p_brand == &lx_brand); 1749 1750 /* get specified thread */ 1751 if ((t = p->p_tlist) == NULL) 1752 return (NULL); 1753 1754 do { 1755 if (lookup_id == 0 && t->t_tid == 1) { 1756 thread_lock(t); 1757 return (t); 1758 } 1759 1760 lwpd = ttolxlwp(t); 1761 if (branded && lwpd != NULL) { 1762 if (pid == init_pid && lookup_id == 1) { 1763 emul_tid = t->t_tid; 1764 } else { 1765 emul_tid = lwpd->br_pid; 1766 } 1767 } else { 1768 /* 1769 * Make only the first (assumed to be main) thread 1770 * visible for non-branded processes. 1771 */ 1772 emul_tid = p->p_pid; 1773 } 1774 if (emul_tid == lookup_id) { 1775 thread_lock(t); 1776 return (t); 1777 } 1778 } while ((t = t->t_forw) != p->p_tlist); 1779 1780 if (lookup_id == 0) 1781 return (prchoose(p)); 1782 return (NULL); 1783 } 1784 1785 /* 1786 * Lookup the real pid for procs 0 or 1. 1787 */ 1788 static pid_t 1789 get_real_pid(pid_t p) 1790 { 1791 pid_t find_pid; 1792 1793 if (p == 1) { 1794 find_pid = curproc->p_zone->zone_proc_initpid; 1795 } else if (p == 0) { 1796 find_pid = curproc->p_zone->zone_zsched->p_pid; 1797 } else { 1798 find_pid = p; 1799 } 1800 1801 return (find_pid); 1802 } 1803 1804 /* 1805 * pid/tid common code to read status file 1806 */ 1807 static void 1808 lxpr_read_status_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf, 1809 uint_t lookup_id) 1810 { 1811 proc_t *p; 1812 kthread_t *t; 1813 user_t *up; 1814 cred_t *cr; 1815 const gid_t *groups; 1816 int ngroups; 1817 struct as *as; 1818 char *status; 1819 pid_t pid, ppid; 1820 k_sigset_t current, ignore, handle; 1821 int i, lx_sig; 1822 pid_t real_pid; 1823 1824 real_pid = get_real_pid(lxpnp->lxpr_pid); 1825 p = lxpr_lock(real_pid); 1826 if (p == NULL) { 1827 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1828 return; 1829 } 1830 1831 pid = p->p_pid; 1832 1833 /* 1834 * Convert pid to the Linux default of 1 if we're the zone's init 1835 * process or if we're the zone's zsched the pid is 0. 1836 */ 1837 if (pid == curproc->p_zone->zone_proc_initpid) { 1838 pid = 1; 1839 ppid = 0; /* parent pid for init is 0 */ 1840 } else if (pid == curproc->p_zone->zone_zsched->p_pid) { 1841 pid = 0; /* zsched is pid 0 */ 1842 ppid = 0; /* parent pid for zsched is itself */ 1843 } else { 1844 /* 1845 * Make sure not to reference parent PIDs that reside outside 1846 * the zone 1847 */ 1848 ppid = ((p->p_flag & SZONETOP) 1849 ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid); 1850 1851 /* 1852 * Convert ppid to the Linux default of 1 if our parent is the 1853 * zone's init process 1854 */ 1855 if (ppid == curproc->p_zone->zone_proc_initpid) 1856 ppid = 1; 1857 } 1858 1859 t = lxpr_get_thread(p, lookup_id); 1860 if (t != NULL) { 1861 switch (t->t_state) { 1862 case TS_SLEEP: 1863 status = "S (sleeping)"; 1864 break; 1865 case TS_RUN: 1866 case TS_ONPROC: 1867 status = "R (running)"; 1868 break; 1869 case TS_ZOMB: 1870 status = "Z (zombie)"; 1871 break; 1872 case TS_STOPPED: 1873 status = "T (stopped)"; 1874 break; 1875 default: 1876 status = "! (unknown)"; 1877 break; 1878 } 1879 thread_unlock(t); 1880 } else { 1881 if (lookup_id != 0) { 1882 /* we can't find this specific thread */ 1883 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1884 lxpr_unlock(p); 1885 return; 1886 } 1887 1888 /* 1889 * there is a hole in the exit code, where a proc can have 1890 * no threads but it is yet to be flagged SZOMB. We will 1891 * assume we are about to become a zombie 1892 */ 1893 status = "Z (zombie)"; 1894 } 1895 1896 up = PTOU(p); 1897 mutex_enter(&p->p_crlock); 1898 crhold(cr = p->p_cred); 1899 mutex_exit(&p->p_crlock); 1900 1901 lxpr_uiobuf_printf(uiobuf, 1902 "Name:\t%s\n" 1903 "State:\t%s\n" 1904 "Tgid:\t%d\n" 1905 "Pid:\t%d\n" 1906 "PPid:\t%d\n" 1907 "TracerPid:\t%d\n" 1908 "Uid:\t%u\t%u\t%u\t%u\n" 1909 "Gid:\t%u\t%u\t%u\t%u\n" 1910 "FDSize:\t%d\n" 1911 "Groups:\t", 1912 up->u_comm, 1913 status, 1914 pid, /* thread group id - same as pid */ 1915 (lookup_id == 0) ? pid : lxpnp->lxpr_desc, 1916 ppid, 1917 0, 1918 crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr), 1919 crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr), 1920 p->p_fno_ctl); 1921 1922 1923 ngroups = crgetngroups(cr); 1924 groups = crgetgroups(cr); 1925 for (i = 0; i < ngroups; i++) { 1926 lxpr_uiobuf_printf(uiobuf, 1927 "%u ", 1928 groups[i]); 1929 } 1930 crfree(cr); 1931 1932 as = p->p_as; 1933 if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) { 1934 size_t vsize, nlocked, rss; 1935 1936 mutex_exit(&p->p_lock); 1937 AS_LOCK_ENTER(as, RW_READER); 1938 vsize = as->a_resvsize; 1939 rss = rm_asrss(as); 1940 AS_LOCK_EXIT(as); 1941 mutex_enter(&p->p_lock); 1942 nlocked = p->p_locked_mem; 1943 1944 lxpr_uiobuf_printf(uiobuf, 1945 "\n" 1946 "VmSize:\t%8lu kB\n" 1947 "VmLck:\t%8lu kB\n" 1948 "VmRSS:\t%8lu kB\n" 1949 "VmData:\t%8lu kB\n" 1950 "VmStk:\t%8lu kB\n" 1951 "VmExe:\t%8lu kB\n" 1952 "VmLib:\t%8lu kB", 1953 btok(vsize), 1954 btok(nlocked), 1955 ptok(rss), 1956 0l, 1957 btok(p->p_stksize), 1958 ptok(rss), 1959 0l); 1960 } 1961 1962 lxpr_uiobuf_printf(uiobuf, "\nThreads:\t%u", p->p_lwpcnt); 1963 1964 sigemptyset(¤t); 1965 sigemptyset(&ignore); 1966 sigemptyset(&handle); 1967 1968 for (i = 1; i < NSIG; i++) { 1969 lx_sig = stol_signo[i]; 1970 1971 if ((lx_sig > 0) && (lx_sig <= LX_NSIG)) { 1972 if (sigismember(&p->p_sig, i)) 1973 sigaddset(¤t, lx_sig); 1974 1975 if (up->u_signal[i - 1] == SIG_IGN) 1976 sigaddset(&ignore, lx_sig); 1977 else if (up->u_signal[i - 1] != SIG_DFL) 1978 sigaddset(&handle, lx_sig); 1979 } 1980 } 1981 1982 lxpr_uiobuf_printf(uiobuf, 1983 "\n" 1984 "SigPnd:\t%08x%08x\n" 1985 "SigBlk:\t%08x%08x\n" 1986 "SigIgn:\t%08x%08x\n" 1987 "SigCgt:\t%08x%08x\n" 1988 "CapInh:\t%016x\n" 1989 "CapPrm:\t%016x\n" 1990 "CapEff:\t%016x\n", 1991 current.__sigbits[1], current.__sigbits[0], 1992 0, 0, /* signals blocked on per thread basis */ 1993 ignore.__sigbits[1], ignore.__sigbits[0], 1994 handle.__sigbits[1], handle.__sigbits[0], 1995 /* Can't do anything with linux capabilities */ 1996 0, 1997 0, 1998 0); 1999 2000 lxpr_uiobuf_printf(uiobuf, 2001 "CapBnd:\t%016llx\n", 2002 /* We report the full capability bounding set */ 2003 0x1fffffffffLL); 2004 2005 lxpr_unlock(p); 2006 } 2007 2008 /* 2009 * lxpr_read_pid_status(): status file 2010 */ 2011 static void 2012 lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2013 { 2014 ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS); 2015 2016 lxpr_read_status_common(lxpnp, uiobuf, 0); 2017 } 2018 2019 /* 2020 * lxpr_read_pid_tid_status(): status file 2021 */ 2022 static void 2023 lxpr_read_pid_tid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2024 { 2025 ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STATUS); 2026 lxpr_read_status_common(lxpnp, uiobuf, lxpnp->lxpr_desc); 2027 } 2028 2029 /* 2030 * pid/tid common code to read stat file 2031 */ 2032 static void 2033 lxpr_read_stat_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf, 2034 uint_t lookup_id) 2035 { 2036 proc_t *p; 2037 kthread_t *t; 2038 struct as *as; 2039 char stat; 2040 pid_t pid, ppid, pgpid, spid; 2041 gid_t psgid; 2042 dev_t psdev; 2043 size_t rss, vsize; 2044 int nice, pri; 2045 caddr_t wchan; 2046 processorid_t cpu; 2047 pid_t real_pid; 2048 2049 real_pid = get_real_pid(lxpnp->lxpr_pid); 2050 p = lxpr_lock(real_pid); 2051 if (p == NULL) { 2052 lxpr_uiobuf_seterr(uiobuf, EINVAL); 2053 return; 2054 } 2055 2056 pid = p->p_pid; 2057 2058 /* 2059 * Set Linux defaults if we're the zone's init process 2060 */ 2061 if (pid == curproc->p_zone->zone_proc_initpid) { 2062 pid = 1; /* PID for init */ 2063 ppid = 0; /* parent PID for init is 0 */ 2064 pgpid = 0; /* process group for init is 0 */ 2065 psgid = (gid_t)-1; /* credential GID for init is -1 */ 2066 spid = 0; /* session id for init is 0 */ 2067 psdev = 0; /* session device for init is 0 */ 2068 } else if (pid == curproc->p_zone->zone_zsched->p_pid) { 2069 pid = 0; /* PID for zsched */ 2070 ppid = 0; /* parent PID for zsched is 0 */ 2071 pgpid = 0; /* process group for zsched is 0 */ 2072 psgid = (gid_t)-1; /* credential GID for zsched is -1 */ 2073 spid = 0; /* session id for zsched is 0 */ 2074 psdev = 0; /* session device for zsched is 0 */ 2075 } else { 2076 /* 2077 * Make sure not to reference parent PIDs that reside outside 2078 * the zone 2079 */ 2080 ppid = ((p->p_flag & SZONETOP) ? 2081 curproc->p_zone->zone_zsched->p_pid : p->p_ppid); 2082 2083 /* 2084 * Convert ppid to the Linux default of 1 if our parent is the 2085 * zone's init process 2086 */ 2087 if (ppid == curproc->p_zone->zone_proc_initpid) 2088 ppid = 1; 2089 2090 pgpid = p->p_pgrp; 2091 2092 mutex_enter(&p->p_splock); 2093 mutex_enter(&p->p_sessp->s_lock); 2094 spid = p->p_sessp->s_sid; 2095 psdev = p->p_sessp->s_dev; 2096 if (p->p_sessp->s_cred) 2097 psgid = crgetgid(p->p_sessp->s_cred); 2098 else 2099 psgid = crgetgid(p->p_cred); 2100 2101 mutex_exit(&p->p_sessp->s_lock); 2102 mutex_exit(&p->p_splock); 2103 } 2104 2105 t = lxpr_get_thread(p, lookup_id); 2106 if (t != NULL) { 2107 switch (t->t_state) { 2108 case TS_SLEEP: 2109 stat = 'S'; break; 2110 case TS_RUN: 2111 case TS_ONPROC: 2112 stat = 'R'; break; 2113 case TS_ZOMB: 2114 stat = 'Z'; break; 2115 case TS_STOPPED: 2116 stat = 'T'; break; 2117 default: 2118 stat = '!'; break; 2119 } 2120 2121 if (CL_DONICE(t, NULL, 0, &nice) != 0) 2122 nice = 0; 2123 2124 pri = t->t_pri; 2125 wchan = t->t_wchan; 2126 cpu = t->t_cpu->cpu_id; 2127 thread_unlock(t); 2128 } else { 2129 if (lookup_id != 0) { 2130 /* we can't find this specific thread */ 2131 lxpr_uiobuf_seterr(uiobuf, EINVAL); 2132 lxpr_unlock(p); 2133 return; 2134 } 2135 2136 /* Only zombies have no threads */ 2137 stat = 'Z'; 2138 nice = 0; 2139 pri = 0; 2140 wchan = 0; 2141 cpu = 0; 2142 } 2143 as = p->p_as; 2144 mutex_exit(&p->p_lock); 2145 AS_LOCK_ENTER(as, RW_READER); 2146 vsize = as->a_resvsize; 2147 rss = rm_asrss(as); 2148 AS_LOCK_EXIT(as); 2149 mutex_enter(&p->p_lock); 2150 2151 lxpr_uiobuf_printf(uiobuf, 2152 "%d (%s) %c %d %d %d %d %d " 2153 "%lu %lu %lu %lu %lu " 2154 "%lu %lu %ld %ld " 2155 "%d %d %d " 2156 "%lu " 2157 "%lu " 2158 "%lu %ld %llu " 2159 "%lu %lu %u " 2160 "%lu %lu " 2161 "%lu %lu %lu %lu " 2162 "%lu " 2163 "%lu %lu " 2164 "%d " 2165 "%d" 2166 "\n", 2167 (lookup_id == 0) ? pid : lxpnp->lxpr_desc, 2168 PTOU(p)->u_comm, stat, ppid, pgpid, spid, psdev, psgid, 2169 0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */ 2170 p->p_utime, p->p_stime, p->p_cutime, p->p_cstime, 2171 pri, nice, p->p_lwpcnt, 2172 0l, /* itrealvalue (time before next SIGALRM) */ 2173 PTOU(p)->u_ticks, 2174 vsize, rss, p->p_vmem_ctl, 2175 0l, 0l, USRSTACK, /* startcode, endcode, startstack */ 2176 0l, 0l, /* kstkesp, kstkeip */ 2177 0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */ 2178 wchan, 2179 0l, 0l, /* nswap, cnswap */ 2180 0, /* exit_signal */ 2181 cpu); 2182 2183 lxpr_unlock(p); 2184 } 2185 2186 /* 2187 * lxpr_read_pid_stat(): pid stat file 2188 */ 2189 static void 2190 lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2191 { 2192 ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT); 2193 2194 lxpr_read_stat_common(lxpnp, uiobuf, 0); 2195 } 2196 2197 /* 2198 * lxpr_read_pid_tid_stat(): pid stat file 2199 */ 2200 static void 2201 lxpr_read_pid_tid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2202 { 2203 ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STAT); 2204 lxpr_read_stat_common(lxpnp, uiobuf, lxpnp->lxpr_desc); 2205 } 2206 2207 /* ARGSUSED */ 2208 static void 2209 lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2210 { 2211 } 2212 2213 struct lxpr_ifstat { 2214 uint64_t rx_bytes; 2215 uint64_t rx_packets; 2216 uint64_t rx_errors; 2217 uint64_t rx_drop; 2218 uint64_t tx_bytes; 2219 uint64_t tx_packets; 2220 uint64_t tx_errors; 2221 uint64_t tx_drop; 2222 uint64_t collisions; 2223 uint64_t rx_multicast; 2224 }; 2225 2226 static void * 2227 lxpr_kstat_read(kstat_t *kn, boolean_t byname, size_t *size, int *num) 2228 { 2229 kstat_t *kp; 2230 int i, nrec = 0; 2231 size_t bufsize; 2232 void *buf = NULL; 2233 2234 if (byname == B_TRUE) { 2235 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance, 2236 kn->ks_name, getzoneid()); 2237 } else { 2238 kp = kstat_hold_bykid(kn->ks_kid, getzoneid()); 2239 } 2240 if (kp == NULL) { 2241 return (NULL); 2242 } 2243 if (kp->ks_flags & KSTAT_FLAG_INVALID) { 2244 kstat_rele(kp); 2245 return (NULL); 2246 } 2247 2248 bufsize = kp->ks_data_size + 1; 2249 kstat_rele(kp); 2250 2251 /* 2252 * The kstat in question is released so that kmem_alloc(KM_SLEEP) is 2253 * performed without it held. After the alloc, the kstat is reacquired 2254 * and its size is checked again. If the buffer is no longer large 2255 * enough, the alloc and check are repeated up to three times. 2256 */ 2257 for (i = 0; i < 2; i++) { 2258 buf = kmem_alloc(bufsize, KM_SLEEP); 2259 2260 /* Check if bufsize still appropriate */ 2261 if (byname == B_TRUE) { 2262 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance, 2263 kn->ks_name, getzoneid()); 2264 } else { 2265 kp = kstat_hold_bykid(kn->ks_kid, getzoneid()); 2266 } 2267 if (kp == NULL || kp->ks_flags & KSTAT_FLAG_INVALID) { 2268 if (kp != NULL) { 2269 kstat_rele(kp); 2270 } 2271 kmem_free(buf, bufsize); 2272 return (NULL); 2273 } 2274 KSTAT_ENTER(kp); 2275 (void) KSTAT_UPDATE(kp, KSTAT_READ); 2276 if (bufsize < kp->ks_data_size) { 2277 kmem_free(buf, bufsize); 2278 buf = NULL; 2279 bufsize = kp->ks_data_size + 1; 2280 KSTAT_EXIT(kp); 2281 kstat_rele(kp); 2282 continue; 2283 } else { 2284 if (KSTAT_SNAPSHOT(kp, buf, KSTAT_READ) != 0) { 2285 kmem_free(buf, bufsize); 2286 buf = NULL; 2287 } 2288 nrec = kp->ks_ndata; 2289 KSTAT_EXIT(kp); 2290 kstat_rele(kp); 2291 break; 2292 } 2293 } 2294 2295 if (buf != NULL) { 2296 *size = bufsize; 2297 *num = nrec; 2298 } 2299 return (buf); 2300 } 2301 2302 static int 2303 lxpr_kstat_ifstat(kstat_t *kn, struct lxpr_ifstat *ifs) 2304 { 2305 kstat_named_t *kp; 2306 int i, num; 2307 size_t size; 2308 2309 /* 2310 * Search by name instead of by kid since there's a small window to 2311 * race against kstats being added/removed. 2312 */ 2313 bzero(ifs, sizeof (*ifs)); 2314 kp = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num); 2315 if (kp == NULL) 2316 return (-1); 2317 for (i = 0; i < num; i++) { 2318 if (strncmp(kp[i].name, "rbytes64", KSTAT_STRLEN) == 0) 2319 ifs->rx_bytes = kp[i].value.ui64; 2320 else if (strncmp(kp[i].name, "ipackets64", KSTAT_STRLEN) == 0) 2321 ifs->rx_packets = kp[i].value.ui64; 2322 else if (strncmp(kp[i].name, "ierrors", KSTAT_STRLEN) == 0) 2323 ifs->rx_errors = kp[i].value.ui32; 2324 else if (strncmp(kp[i].name, "norcvbuf", KSTAT_STRLEN) == 0) 2325 ifs->rx_drop = kp[i].value.ui32; 2326 else if (strncmp(kp[i].name, "multircv", KSTAT_STRLEN) == 0) 2327 ifs->rx_multicast = kp[i].value.ui32; 2328 else if (strncmp(kp[i].name, "obytes64", KSTAT_STRLEN) == 0) 2329 ifs->tx_bytes = kp[i].value.ui64; 2330 else if (strncmp(kp[i].name, "opackets64", KSTAT_STRLEN) == 0) 2331 ifs->tx_packets = kp[i].value.ui64; 2332 else if (strncmp(kp[i].name, "oerrors", KSTAT_STRLEN) == 0) 2333 ifs->tx_errors = kp[i].value.ui32; 2334 else if (strncmp(kp[i].name, "noxmtbuf", KSTAT_STRLEN) == 0) 2335 ifs->tx_drop = kp[i].value.ui32; 2336 else if (strncmp(kp[i].name, "collisions", KSTAT_STRLEN) == 0) 2337 ifs->collisions = kp[i].value.ui32; 2338 } 2339 kmem_free(kp, size); 2340 return (0); 2341 } 2342 2343 /* ARGSUSED */ 2344 static void 2345 lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2346 { 2347 kstat_t *ksr; 2348 kstat_t ks0; 2349 int i, nidx; 2350 size_t sidx; 2351 struct lxpr_ifstat ifs; 2352 2353 lxpr_uiobuf_printf(uiobuf, "Inter-| Receive " 2354 " | Transmit\n"); 2355 lxpr_uiobuf_printf(uiobuf, " face |bytes packets errs drop fifo" 2356 " frame compressed multicast|bytes packets errs drop fifo" 2357 " colls carrier compressed\n"); 2358 2359 ks0.ks_kid = 0; 2360 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); 2361 if (ksr == NULL) 2362 return; 2363 2364 for (i = 1; i < nidx; i++) { 2365 if (strncmp(ksr[i].ks_module, "link", KSTAT_STRLEN) == 0 || 2366 strncmp(ksr[i].ks_module, "lo", KSTAT_STRLEN) == 0) { 2367 if (lxpr_kstat_ifstat(&ksr[i], &ifs) != 0) 2368 continue; 2369 2370 /* Overwriting the name is ok in the local snapshot */ 2371 lx_ifname_convert(ksr[i].ks_name, LX_IF_FROMNATIVE); 2372 lxpr_uiobuf_printf(uiobuf, "%6s: %7llu %7llu %4lu " 2373 "%4lu %4u %5u %10u %9lu %8llu %7llu %4lu %4lu %4u " 2374 "%5lu %7u %10u\n", 2375 ksr[i].ks_name, 2376 ifs.rx_bytes, ifs.rx_packets, 2377 ifs.rx_errors, ifs.rx_drop, 2378 0, 0, 0, ifs.rx_multicast, 2379 ifs.tx_bytes, ifs.tx_packets, 2380 ifs.tx_errors, ifs.tx_drop, 2381 0, ifs.collisions, 0, 0); 2382 } 2383 } 2384 2385 kmem_free(ksr, sidx); 2386 } 2387 2388 /* ARGSUSED */ 2389 static void 2390 lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2391 { 2392 } 2393 2394 static void 2395 lxpr_inet6_out(const in6_addr_t *addr, char buf[33]) 2396 { 2397 const uint8_t *ip = addr->s6_addr; 2398 char digits[] = "0123456789abcdef"; 2399 int i; 2400 for (i = 0; i < 16; i++) { 2401 buf[2 * i] = digits[ip[i] >> 4]; 2402 buf[2 * i + 1] = digits[ip[i] & 0xf]; 2403 } 2404 buf[32] = '\0'; 2405 } 2406 2407 /* ARGSUSED */ 2408 static void 2409 lxpr_read_net_if_inet6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2410 { 2411 netstack_t *ns; 2412 ip_stack_t *ipst; 2413 ill_t *ill; 2414 ipif_t *ipif; 2415 ill_walk_context_t ctx; 2416 char ifname[LIFNAMSIZ], ip6out[33]; 2417 2418 ns = netstack_get_current(); 2419 if (ns == NULL) 2420 return; 2421 ipst = ns->netstack_ip; 2422 2423 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2424 ill = ILL_START_WALK_V6(&ctx, ipst); 2425 2426 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 2427 for (ipif = ill->ill_ipif; ipif != NULL; 2428 ipif = ipif->ipif_next) { 2429 uint_t index = ill->ill_phyint->phyint_ifindex; 2430 int plen = ip_mask_to_plen_v6(&ipif->ipif_v6net_mask); 2431 unsigned int scope = lx_ipv6_scope_convert( 2432 &ipif->ipif_v6lcl_addr); 2433 /* Always report PERMANENT flag */ 2434 int flag = 0x80; 2435 2436 (void) snprintf(ifname, LIFNAMSIZ, "%s", ill->ill_name); 2437 lx_ifname_convert(ifname, LX_IF_FROMNATIVE); 2438 lxpr_inet6_out(&ipif->ipif_v6lcl_addr, ip6out); 2439 2440 lxpr_uiobuf_printf(uiobuf, "%32s %02x %02x %02x %02x" 2441 " %8s\n", ip6out, index, plen, scope, flag, ifname); 2442 } 2443 } 2444 rw_exit(&ipst->ips_ill_g_lock); 2445 netstack_rele(ns); 2446 } 2447 2448 /* ARGSUSED */ 2449 static void 2450 lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2451 { 2452 } 2453 2454 /* ARGSUSED */ 2455 static void 2456 lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2457 { 2458 } 2459 2460 /* ARGSUSED */ 2461 static void 2462 lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2463 { 2464 } 2465 2466 static void 2467 lxpr_format_route_ipv6(ire_t *ire, lxpr_uiobuf_t *uiobuf) 2468 { 2469 uint32_t flags; 2470 char name[IFNAMSIZ]; 2471 char ipv6addr[33]; 2472 2473 lxpr_inet6_out(&ire->ire_addr_v6, ipv6addr); 2474 lxpr_uiobuf_printf(uiobuf, "%s %02x ", ipv6addr, 2475 ip_mask_to_plen_v6(&ire->ire_mask_v6)); 2476 2477 /* punt on this for now */ 2478 lxpr_uiobuf_printf(uiobuf, "%s %02x ", 2479 "00000000000000000000000000000000", 0); 2480 2481 lxpr_inet6_out(&ire->ire_gateway_addr_v6, ipv6addr); 2482 lxpr_uiobuf_printf(uiobuf, "%s", ipv6addr); 2483 2484 flags = ire->ire_flags & 2485 (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED); 2486 /* Linux's RTF_LOCAL equivalent */ 2487 if (ire->ire_metrics.iulp_local) 2488 flags |= 0x80000000; 2489 2490 if (ire->ire_ill != NULL) { 2491 ill_get_name(ire->ire_ill, name, sizeof (name)); 2492 lx_ifname_convert(name, LX_IF_FROMNATIVE); 2493 } else { 2494 name[0] = '\0'; 2495 } 2496 2497 lxpr_uiobuf_printf(uiobuf, " %08x %08x %08x %08x %8s\n", 2498 0, /* metric */ 2499 ire->ire_refcnt, 2500 0, 2501 flags, 2502 name); 2503 } 2504 2505 /* ARGSUSED */ 2506 static void 2507 lxpr_read_net_ipv6_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2508 { 2509 netstack_t *ns; 2510 ip_stack_t *ipst; 2511 2512 ns = netstack_get_current(); 2513 if (ns == NULL) 2514 return; 2515 ipst = ns->netstack_ip; 2516 2517 /* 2518 * LX branded zones are expected to have exclusive IP stack, hence 2519 * using ALL_ZONES as the zoneid filter. 2520 */ 2521 ire_walk_v6(&lxpr_format_route_ipv6, uiobuf, ALL_ZONES, ipst); 2522 2523 netstack_rele(ns); 2524 } 2525 2526 /* ARGSUSED */ 2527 static void 2528 lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2529 { 2530 } 2531 2532 /* ARGSUSED */ 2533 static void 2534 lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2535 { 2536 } 2537 2538 /* ARGSUSED */ 2539 static void 2540 lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2541 { 2542 } 2543 2544 #define LXPR_SKIP_ROUTE(type) \ 2545 (((IRE_IF_CLONE | IRE_BROADCAST | IRE_MULTICAST | \ 2546 IRE_NOROUTE | IRE_LOOPBACK | IRE_LOCAL) & type) != 0) 2547 2548 static void 2549 lxpr_format_route_ipv4(ire_t *ire, lxpr_uiobuf_t *uiobuf) 2550 { 2551 uint32_t flags; 2552 char name[IFNAMSIZ]; 2553 ill_t *ill; 2554 ire_t *nire; 2555 ipif_t *ipif; 2556 ipaddr_t gateway; 2557 2558 if (LXPR_SKIP_ROUTE(ire->ire_type) || ire->ire_testhidden != 0) 2559 return; 2560 2561 /* These route flags have direct Linux equivalents */ 2562 flags = ire->ire_flags & 2563 (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED); 2564 2565 /* 2566 * Search for a suitable IRE for naming purposes. 2567 * On Linux, the default route is typically associated with the 2568 * interface used to access gateway. The default IRE on Illumos 2569 * typically lacks an ill reference but its parent might have one. 2570 */ 2571 nire = ire; 2572 do { 2573 ill = nire->ire_ill; 2574 nire = nire->ire_dep_parent; 2575 } while (ill == NULL && nire != NULL); 2576 if (ill != NULL) { 2577 ill_get_name(ill, name, sizeof (name)); 2578 lx_ifname_convert(name, LX_IF_FROMNATIVE); 2579 } else { 2580 name[0] = '*'; 2581 name[1] = '\0'; 2582 } 2583 2584 /* 2585 * Linux suppresses the gateway address for directly connected 2586 * interface networks. To emulate this behavior, we walk all addresses 2587 * of a given route interface. If one matches the gateway, it is 2588 * displayed as NULL. 2589 */ 2590 gateway = ire->ire_gateway_addr; 2591 if ((ill = ire->ire_ill) != NULL) { 2592 for (ipif = ill->ill_ipif; ipif != NULL; 2593 ipif = ipif->ipif_next) { 2594 if (ipif->ipif_lcl_addr == gateway) { 2595 gateway = 0; 2596 break; 2597 } 2598 } 2599 } 2600 2601 lxpr_uiobuf_printf(uiobuf, "%s\t%08X\t%08X\t%04X\t%d\t%u\t" 2602 "%d\t%08X\t%d\t%u\t%u\n", 2603 name, 2604 ire->ire_addr, 2605 gateway, 2606 flags, 0, 0, 2607 0, /* priority */ 2608 ire->ire_mask, 2609 0, 0, /* mss, window */ 2610 ire->ire_metrics.iulp_rtt); 2611 } 2612 2613 /* ARGSUSED */ 2614 static void 2615 lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2616 { 2617 netstack_t *ns; 2618 ip_stack_t *ipst; 2619 2620 lxpr_uiobuf_printf(uiobuf, "Iface\tDestination\tGateway \tFlags\t" 2621 "RefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n"); 2622 2623 ns = netstack_get_current(); 2624 if (ns == NULL) 2625 return; 2626 ipst = ns->netstack_ip; 2627 2628 /* 2629 * LX branded zones are expected to have exclusive IP stack, hence 2630 * using ALL_ZONES as the zoneid filter. 2631 */ 2632 ire_walk_v4(&lxpr_format_route_ipv4, uiobuf, ALL_ZONES, ipst); 2633 2634 netstack_rele(ns); 2635 } 2636 2637 /* ARGSUSED */ 2638 static void 2639 lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2640 { 2641 } 2642 2643 /* ARGSUSED */ 2644 static void 2645 lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2646 { 2647 } 2648 2649 /* ARGSUSED */ 2650 static void 2651 lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2652 { 2653 } 2654 2655 typedef struct lxpr_snmp_table { 2656 const char *lst_proto; 2657 const char *lst_fields[]; 2658 } lxpr_snmp_table_t; 2659 2660 static lxpr_snmp_table_t lxpr_snmp_ip = { "ip", 2661 { 2662 "forwarding", "defaultTTL", "inReceives", "inHdrErrors", 2663 "inAddrErrors", "forwDatagrams", "inUnknownProtos", "inDiscards", 2664 "inDelivers", "outRequests", "outDiscards", "outNoRoutes", 2665 "reasmTimeout", "reasmReqds", "reasmOKs", "reasmFails", "fragOKs", 2666 "fragFails", "fragCreates", 2667 NULL 2668 } 2669 }; 2670 static lxpr_snmp_table_t lxpr_snmp_icmp = { "icmp", 2671 { 2672 "inMsgs", "inErrors", "inCsumErrors", "inDestUnreachs", "inTimeExcds", 2673 "inParmProbs", "inSrcQuenchs", "inRedirects", "inEchos", "inEchoReps", 2674 "inTimestamps", "inTimestampReps", "inAddrMasks", "inAddrMaskReps", 2675 "outMsgs", "outErrors", "outDestUnreachs", "outTimeExcds", 2676 "outParmProbs", "outSrcQuenchs", "outRedirects", "outEchos", 2677 "outEchoReps", "outTimestamps", "outTimestampReps", "outAddrMasks", 2678 "outAddrMaskReps", 2679 NULL 2680 } 2681 }; 2682 static lxpr_snmp_table_t lxpr_snmp_tcp = { "tcp", 2683 { 2684 "rtoAlgorithm", "rtoMin", "rtoMax", "maxConn", "activeOpens", 2685 "passiveOpens", "attemptFails", "estabResets", "currEstab", "inSegs", 2686 "outSegs", "retransSegs", "inErrs", "outRsts", "inCsumErrors", 2687 NULL 2688 } 2689 }; 2690 static lxpr_snmp_table_t lxpr_snmp_udp = { "udp", 2691 { 2692 "inDatagrams", "noPorts", "inErrors", "outDatagrams", "rcvbufErrors", 2693 "sndbufErrors", "inCsumErrors", 2694 NULL 2695 } 2696 }; 2697 2698 static lxpr_snmp_table_t *lxpr_net_snmptab[] = { 2699 &lxpr_snmp_ip, 2700 &lxpr_snmp_icmp, 2701 &lxpr_snmp_tcp, 2702 &lxpr_snmp_udp, 2703 NULL 2704 }; 2705 2706 static void 2707 lxpr_kstat_print_tab(lxpr_uiobuf_t *uiobuf, lxpr_snmp_table_t *table, 2708 kstat_t *kn) 2709 { 2710 kstat_named_t *klist; 2711 char upname[KSTAT_STRLEN], upfield[KSTAT_STRLEN]; 2712 int i, j, num; 2713 size_t size; 2714 2715 klist = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num); 2716 if (klist == NULL) 2717 return; 2718 2719 /* Print the header line, fields capitalized */ 2720 (void) strncpy(upname, table->lst_proto, KSTAT_STRLEN); 2721 upname[0] = toupper(upname[0]); 2722 lxpr_uiobuf_printf(uiobuf, "%s:", upname); 2723 for (i = 0; table->lst_fields[i] != NULL; i++) { 2724 (void) strncpy(upfield, table->lst_fields[i], KSTAT_STRLEN); 2725 upfield[0] = toupper(upfield[0]); 2726 lxpr_uiobuf_printf(uiobuf, " %s", upfield); 2727 } 2728 lxpr_uiobuf_printf(uiobuf, "\n%s:", upname); 2729 2730 /* Then loop back through to print the value line. */ 2731 for (i = 0; table->lst_fields[i] != NULL; i++) { 2732 kstat_named_t *kpoint = NULL; 2733 for (j = 0; j < num; j++) { 2734 if (strncmp(klist[j].name, table->lst_fields[i], 2735 KSTAT_STRLEN) == 0) { 2736 kpoint = &klist[j]; 2737 break; 2738 } 2739 } 2740 if (kpoint == NULL) { 2741 /* Output 0 for unknown fields */ 2742 lxpr_uiobuf_printf(uiobuf, " 0"); 2743 } else { 2744 switch (kpoint->data_type) { 2745 case KSTAT_DATA_INT32: 2746 lxpr_uiobuf_printf(uiobuf, " %d", 2747 kpoint->value.i32); 2748 break; 2749 case KSTAT_DATA_UINT32: 2750 lxpr_uiobuf_printf(uiobuf, " %u", 2751 kpoint->value.ui32); 2752 break; 2753 case KSTAT_DATA_INT64: 2754 lxpr_uiobuf_printf(uiobuf, " %ld", 2755 kpoint->value.l); 2756 break; 2757 case KSTAT_DATA_UINT64: 2758 lxpr_uiobuf_printf(uiobuf, " %lu", 2759 kpoint->value.ul); 2760 break; 2761 } 2762 } 2763 } 2764 lxpr_uiobuf_printf(uiobuf, "\n"); 2765 kmem_free(klist, size); 2766 } 2767 2768 /* ARGSUSED */ 2769 static void 2770 lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2771 { 2772 kstat_t *ksr; 2773 kstat_t ks0; 2774 lxpr_snmp_table_t **table = lxpr_net_snmptab; 2775 int i, t, nidx; 2776 size_t sidx; 2777 2778 ks0.ks_kid = 0; 2779 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); 2780 if (ksr == NULL) 2781 return; 2782 2783 for (t = 0; table[t] != NULL; t++) { 2784 for (i = 0; i < nidx; i++) { 2785 if (strncmp(ksr[i].ks_class, "mib2", KSTAT_STRLEN) != 0) 2786 continue; 2787 if (strncmp(ksr[i].ks_name, table[t]->lst_proto, 2788 KSTAT_STRLEN) == 0) { 2789 lxpr_kstat_print_tab(uiobuf, table[t], &ksr[i]); 2790 break; 2791 } 2792 } 2793 } 2794 kmem_free(ksr, sidx); 2795 } 2796 2797 /* ARGSUSED */ 2798 static void 2799 lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2800 { 2801 } 2802 2803 static int 2804 lxpr_convert_tcp_state(int st) 2805 { 2806 /* 2807 * Derived from the enum located in the Linux kernel sources: 2808 * include/net/tcp_states.h 2809 */ 2810 switch (st) { 2811 case TCPS_ESTABLISHED: 2812 return (1); 2813 case TCPS_SYN_SENT: 2814 return (2); 2815 case TCPS_SYN_RCVD: 2816 return (3); 2817 case TCPS_FIN_WAIT_1: 2818 return (4); 2819 case TCPS_FIN_WAIT_2: 2820 return (5); 2821 case TCPS_TIME_WAIT: 2822 return (6); 2823 case TCPS_CLOSED: 2824 return (7); 2825 case TCPS_CLOSE_WAIT: 2826 return (8); 2827 case TCPS_LAST_ACK: 2828 return (9); 2829 case TCPS_LISTEN: 2830 return (10); 2831 case TCPS_CLOSING: 2832 return (11); 2833 default: 2834 /* No translation for TCPS_IDLE, TCPS_BOUND or anything else */ 2835 return (0); 2836 } 2837 } 2838 2839 static void 2840 lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) 2841 { 2842 int i, sl = 0; 2843 connf_t *connfp; 2844 conn_t *connp; 2845 netstack_t *ns; 2846 ip_stack_t *ipst; 2847 2848 ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION); 2849 if (ipver == IPV4_VERSION) { 2850 lxpr_uiobuf_printf(uiobuf, " sl local_address rem_address " 2851 "st tx_queue rx_queue tr tm->when retrnsmt uid timeout " 2852 "inode\n"); 2853 } else { 2854 lxpr_uiobuf_printf(uiobuf, " sl " 2855 "local_address " 2856 "remote_address " 2857 "st tx_queue rx_queue tr tm->when retrnsmt " 2858 "uid timeout inode\n"); 2859 } 2860 /* 2861 * Due to differences between the Linux and illumos TCP 2862 * implementations, some data will be omitted from the output here. 2863 * 2864 * Valid fields: 2865 * - local_address 2866 * - remote_address 2867 * - st 2868 * - tx_queue 2869 * - rx_queue 2870 * - uid 2871 * - inode 2872 * 2873 * Omitted/invalid fields 2874 * - tr 2875 * - tm->when 2876 * - retrnsmt 2877 * - timeout 2878 */ 2879 2880 ns = netstack_get_current(); 2881 if (ns == NULL) 2882 return; 2883 ipst = ns->netstack_ip; 2884 2885 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2886 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 2887 connp = NULL; 2888 while ((connp = 2889 ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) { 2890 tcp_t *tcp; 2891 vattr_t attr; 2892 sonode_t *so = (sonode_t *)connp->conn_upper_handle; 2893 vnode_t *vp = (so != NULL) ? so->so_vnode : NULL; 2894 if (connp->conn_ipversion != ipver) 2895 continue; 2896 tcp = connp->conn_tcp; 2897 if (ipver == IPV4_VERSION) { 2898 lxpr_uiobuf_printf(uiobuf, 2899 "%4d: %08X:%04X %08X:%04X ", 2900 ++sl, 2901 connp->conn_laddr_v4, 2902 ntohs(connp->conn_lport), 2903 connp->conn_faddr_v4, 2904 ntohs(connp->conn_fport)); 2905 } else { 2906 lxpr_uiobuf_printf(uiobuf, "%4d: " 2907 "%08X%08X%08X%08X:%04X " 2908 "%08X%08X%08X%08X:%04X ", 2909 ++sl, 2910 connp->conn_laddr_v6.s6_addr32[0], 2911 connp->conn_laddr_v6.s6_addr32[1], 2912 connp->conn_laddr_v6.s6_addr32[2], 2913 connp->conn_laddr_v6.s6_addr32[3], 2914 ntohs(connp->conn_lport), 2915 connp->conn_faddr_v6.s6_addr32[0], 2916 connp->conn_faddr_v6.s6_addr32[1], 2917 connp->conn_faddr_v6.s6_addr32[2], 2918 connp->conn_faddr_v6.s6_addr32[3], 2919 ntohs(connp->conn_fport)); 2920 } 2921 2922 /* fetch the simulated inode for the socket */ 2923 if (vp == NULL || 2924 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) 2925 attr.va_nodeid = 0; 2926 2927 lxpr_uiobuf_printf(uiobuf, 2928 "%02X %08X:%08X %02X:%08X %08X " 2929 "%5u %8d %lu %d %p %u %u %u %u %d\n", 2930 lxpr_convert_tcp_state(tcp->tcp_state), 2931 tcp->tcp_rcv_cnt, tcp->tcp_unsent, /* rx/tx queue */ 2932 0, 0, /* tr, when */ 2933 0, /* per-connection rexmits aren't tracked today */ 2934 connp->conn_cred->cr_uid, 2935 0, /* timeout */ 2936 /* inode + more */ 2937 (ino_t)attr.va_nodeid, 0, NULL, 0, 0, 0, 0, 0); 2938 } 2939 } 2940 netstack_rele(ns); 2941 } 2942 2943 /* ARGSUSED */ 2944 static void 2945 lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2946 { 2947 lxpr_format_tcp(uiobuf, IPV4_VERSION); 2948 } 2949 2950 /* ARGSUSED */ 2951 static void 2952 lxpr_read_net_tcp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2953 { 2954 lxpr_format_tcp(uiobuf, IPV6_VERSION); 2955 } 2956 2957 static void 2958 lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) 2959 { 2960 int i, sl = 0; 2961 connf_t *connfp; 2962 conn_t *connp; 2963 netstack_t *ns; 2964 ip_stack_t *ipst; 2965 2966 ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION); 2967 if (ipver == IPV4_VERSION) { 2968 lxpr_uiobuf_printf(uiobuf, " sl local_address rem_address" 2969 " st tx_queue rx_queue tr tm->when retrnsmt uid" 2970 " timeout inode ref pointer drops\n"); 2971 } else { 2972 lxpr_uiobuf_printf(uiobuf, " sl " 2973 "local_address " 2974 "remote_address " 2975 "st tx_queue rx_queue tr tm->when retrnsmt " 2976 "uid timeout inode ref pointer drops\n"); 2977 } 2978 /* 2979 * Due to differences between the Linux and illumos UDP 2980 * implementations, some data will be omitted from the output here. 2981 * 2982 * Valid fields: 2983 * - local_address 2984 * - remote_address 2985 * - st: limited 2986 * - uid 2987 * 2988 * Omitted/invalid fields 2989 * - tx_queue 2990 * - rx_queue 2991 * - tr 2992 * - tm->when 2993 * - retrnsmt 2994 * - timeout 2995 * - inode 2996 */ 2997 2998 ns = netstack_get_current(); 2999 if (ns == NULL) 3000 return; 3001 ipst = ns->netstack_ip; 3002 3003 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 3004 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 3005 connp = NULL; 3006 while ((connp = 3007 ipcl_get_next_conn(connfp, connp, IPCL_UDPCONN)) != NULL) { 3008 udp_t *udp; 3009 int state = 0; 3010 vattr_t attr; 3011 sonode_t *so = (sonode_t *)connp->conn_upper_handle; 3012 vnode_t *vp = (so != NULL) ? so->so_vnode : NULL; 3013 if (connp->conn_ipversion != ipver) 3014 continue; 3015 udp = connp->conn_udp; 3016 if (ipver == IPV4_VERSION) { 3017 lxpr_uiobuf_printf(uiobuf, 3018 "%4d: %08X:%04X %08X:%04X ", 3019 ++sl, 3020 connp->conn_laddr_v4, 3021 ntohs(connp->conn_lport), 3022 connp->conn_faddr_v4, 3023 ntohs(connp->conn_fport)); 3024 } else { 3025 lxpr_uiobuf_printf(uiobuf, "%4d: " 3026 "%08X%08X%08X%08X:%04X " 3027 "%08X%08X%08X%08X:%04X ", 3028 ++sl, 3029 connp->conn_laddr_v6.s6_addr32[0], 3030 connp->conn_laddr_v6.s6_addr32[1], 3031 connp->conn_laddr_v6.s6_addr32[2], 3032 connp->conn_laddr_v6.s6_addr32[3], 3033 ntohs(connp->conn_lport), 3034 connp->conn_faddr_v6.s6_addr32[0], 3035 connp->conn_faddr_v6.s6_addr32[1], 3036 connp->conn_faddr_v6.s6_addr32[2], 3037 connp->conn_faddr_v6.s6_addr32[3], 3038 ntohs(connp->conn_fport)); 3039 } 3040 3041 switch (udp->udp_state) { 3042 case TS_UNBND: 3043 case TS_IDLE: 3044 state = 7; 3045 break; 3046 case TS_DATA_XFER: 3047 state = 1; 3048 break; 3049 } 3050 3051 /* fetch the simulated inode for the socket */ 3052 if (vp == NULL || 3053 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) 3054 attr.va_nodeid = 0; 3055 3056 lxpr_uiobuf_printf(uiobuf, 3057 "%02X %08X:%08X %02X:%08X %08X " 3058 "%5u %8d %lu %d %p %d\n", 3059 state, 3060 0, 0, /* rx/tx queue */ 3061 0, 0, /* tr, when */ 3062 0, /* retrans */ 3063 connp->conn_cred->cr_uid, 3064 0, /* timeout */ 3065 /* inode, ref, pointer, drops */ 3066 (ino_t)attr.va_nodeid, 0, NULL, 0); 3067 } 3068 } 3069 netstack_rele(ns); 3070 } 3071 3072 /* ARGSUSED */ 3073 static void 3074 lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3075 { 3076 lxpr_format_udp(uiobuf, IPV4_VERSION); 3077 } 3078 3079 /* ARGSUSED */ 3080 static void 3081 lxpr_read_net_udp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3082 { 3083 lxpr_format_udp(uiobuf, IPV6_VERSION); 3084 } 3085 3086 /* ARGSUSED */ 3087 static void 3088 lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3089 { 3090 sonode_t *so; 3091 zoneid_t zoneid = getzoneid(); 3092 3093 lxpr_uiobuf_printf(uiobuf, "Num RefCount Protocol Flags Type " 3094 "St Inode Path\n"); 3095 3096 mutex_enter(&socklist.sl_lock); 3097 for (so = socklist.sl_list; so != NULL; 3098 so = _SOTOTPI(so)->sti_next_so) { 3099 vnode_t *vp = so->so_vnode; 3100 vattr_t attr; 3101 sotpi_info_t *sti; 3102 const char *name = NULL; 3103 int status = 0; 3104 int type = 0; 3105 int flags = 0; 3106 3107 /* Only process active sonodes in this zone */ 3108 if (so->so_count == 0 || so->so_zoneid != zoneid) 3109 continue; 3110 3111 /* 3112 * Grab the inode, if possible. 3113 * This must be done before entering so_lock. 3114 */ 3115 if (vp == NULL || 3116 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) 3117 attr.va_nodeid = 0; 3118 3119 mutex_enter(&so->so_lock); 3120 sti = _SOTOTPI(so); 3121 3122 if (sti->sti_laddr_sa != NULL && 3123 sti->sti_laddr_len > 0) { 3124 name = sti->sti_laddr_sa->sa_data; 3125 } else if (sti->sti_faddr_sa != NULL && 3126 sti->sti_faddr_len > 0) { 3127 name = sti->sti_faddr_sa->sa_data; 3128 } 3129 3130 /* 3131 * Derived from enum values in Linux kernel source: 3132 * include/uapi/linux/net.h 3133 */ 3134 if ((so->so_state & SS_ISDISCONNECTING) != 0) { 3135 status = 4; 3136 } else if ((so->so_state & SS_ISCONNECTING) != 0) { 3137 status = 2; 3138 } else if ((so->so_state & SS_ISCONNECTED) != 0) { 3139 status = 3; 3140 } else { 3141 status = 1; 3142 /* Add ACC flag for stream-type server sockets */ 3143 if (so->so_type != SOCK_DGRAM && 3144 sti->sti_laddr_sa != NULL) 3145 flags |= 0x10000; 3146 } 3147 3148 /* Convert to Linux type */ 3149 switch (so->so_type) { 3150 case SOCK_DGRAM: 3151 type = 2; 3152 break; 3153 case SOCK_SEQPACKET: 3154 type = 5; 3155 break; 3156 default: 3157 type = 1; 3158 } 3159 3160 lxpr_uiobuf_printf(uiobuf, "%p: %08X %08X %08X %04X %02X %5llu", 3161 so, 3162 so->so_count, 3163 0, /* proto, always 0 */ 3164 flags, 3165 type, 3166 status, 3167 (ino_t)attr.va_nodeid); 3168 3169 /* 3170 * Due to shortcomings in the abstract socket emulation, they 3171 * cannot be properly represented here (as @<path>). 3172 * 3173 * This will be the case until they are better implemented. 3174 */ 3175 if (name != NULL) 3176 lxpr_uiobuf_printf(uiobuf, " %s\n", name); 3177 else 3178 lxpr_uiobuf_printf(uiobuf, "\n"); 3179 mutex_exit(&so->so_lock); 3180 } 3181 mutex_exit(&socklist.sl_lock); 3182 } 3183 3184 /* 3185 * lxpr_read_kmsg(): read the contents of the kernel message queue. We 3186 * translate this into the reception of console messages for this zone; each 3187 * read copies out a single zone console message, or blocks until the next one 3188 * is produced, unless we're open non-blocking, in which case we return after 3189 * 1ms. 3190 */ 3191 3192 #define LX_KMSG_PRI "<0>" 3193 3194 static void 3195 lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf, ldi_handle_t lh) 3196 { 3197 mblk_t *mp; 3198 timestruc_t to; 3199 timestruc_t *tp = NULL; 3200 3201 ASSERT(lxpnp->lxpr_type == LXPR_KMSG); 3202 3203 if (lxpr_uiobuf_nonblock(uiobuf)) { 3204 to.tv_sec = 0; 3205 to.tv_nsec = 1000000; /* 1msec */ 3206 tp = &to; 3207 } 3208 3209 if (ldi_getmsg(lh, &mp, tp) == 0) { 3210 /* 3211 * lx procfs doesn't like successive reads to the same file 3212 * descriptor unless we do an explicit rewind each time. 3213 */ 3214 lxpr_uiobuf_seek(uiobuf, 0); 3215 3216 lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI, 3217 mp->b_cont->b_rptr); 3218 3219 freemsg(mp); 3220 } 3221 } 3222 3223 /* 3224 * lxpr_read_loadavg(): read the contents of the "loadavg" file. We do just 3225 * enough for uptime and other simple lxproc readers to work 3226 */ 3227 extern int nthread; 3228 3229 static void 3230 lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3231 { 3232 ulong_t avenrun1; 3233 ulong_t avenrun5; 3234 ulong_t avenrun15; 3235 ulong_t avenrun1_cs; 3236 ulong_t avenrun5_cs; 3237 ulong_t avenrun15_cs; 3238 int loadavg[3]; 3239 int *loadbuf; 3240 cpupart_t *cp; 3241 zone_t *zone = LXPTOZ(lxpnp); 3242 3243 uint_t nrunnable = 0; 3244 rctl_qty_t nlwps; 3245 3246 ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG); 3247 3248 mutex_enter(&cpu_lock); 3249 3250 /* 3251 * Need to add up values over all CPU partitions. If pools are active, 3252 * only report the values of the zone's partition, which by definition 3253 * includes the current CPU. 3254 */ 3255 if (pool_pset_enabled()) { 3256 psetid_t psetid = zone_pset_get(curproc->p_zone); 3257 3258 ASSERT(curproc->p_zone != &zone0); 3259 cp = CPU->cpu_part; 3260 3261 nrunnable = cp->cp_nrunning + cp->cp_nrunnable; 3262 (void) cpupart_get_loadavg(psetid, &loadavg[0], 3); 3263 loadbuf = &loadavg[0]; 3264 } else { 3265 cp = cp_list_head; 3266 do { 3267 nrunnable += cp->cp_nrunning + cp->cp_nrunnable; 3268 } while ((cp = cp->cp_next) != cp_list_head); 3269 3270 loadbuf = zone == global_zone ? 3271 &avenrun[0] : zone->zone_avenrun; 3272 } 3273 3274 /* 3275 * If we're in the non-global zone, we'll report the total number of 3276 * LWPs in the zone for the "nproc" parameter of /proc/loadavg, 3277 * otherwise will just use nthread (which will include kernel threads, 3278 * but should be good enough for lxproc). 3279 */ 3280 nlwps = zone == global_zone ? nthread : zone->zone_nlwps; 3281 3282 mutex_exit(&cpu_lock); 3283 3284 avenrun1 = loadbuf[0] >> FSHIFT; 3285 avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT; 3286 avenrun5 = loadbuf[1] >> FSHIFT; 3287 avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT; 3288 avenrun15 = loadbuf[2] >> FSHIFT; 3289 avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT; 3290 3291 lxpr_uiobuf_printf(uiobuf, 3292 "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n", 3293 avenrun1, avenrun1_cs, 3294 avenrun5, avenrun5_cs, 3295 avenrun15, avenrun15_cs, 3296 nrunnable, nlwps, 0); 3297 } 3298 3299 /* 3300 * lxpr_read_meminfo(): read the contents of the "meminfo" file. 3301 */ 3302 static void 3303 lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3304 { 3305 zone_t *zone = LXPTOZ(lxpnp); 3306 int global = zone == global_zone; 3307 long total_mem, free_mem, total_swap, used_swap; 3308 3309 ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO); 3310 3311 if (global || zone->zone_phys_mem_ctl == UINT64_MAX) { 3312 total_mem = physmem * PAGESIZE; 3313 free_mem = freemem * PAGESIZE; 3314 } else { 3315 total_mem = zone->zone_phys_mem_ctl; 3316 free_mem = zone->zone_phys_mem_ctl - zone->zone_phys_mem; 3317 } 3318 3319 if (global || zone->zone_max_swap_ctl == UINT64_MAX) { 3320 total_swap = k_anoninfo.ani_max * PAGESIZE; 3321 used_swap = k_anoninfo.ani_phys_resv * PAGESIZE; 3322 } else { 3323 mutex_enter(&zone->zone_mem_lock); 3324 total_swap = zone->zone_max_swap_ctl; 3325 used_swap = zone->zone_max_swap; 3326 mutex_exit(&zone->zone_mem_lock); 3327 } 3328 3329 lxpr_uiobuf_printf(uiobuf, 3330 "MemTotal: %8lu kB\n" 3331 "MemFree: %8lu kB\n" 3332 "MemShared: %8u kB\n" 3333 "Buffers: %8u kB\n" 3334 "Cached: %8u kB\n" 3335 "SwapCached:%8u kB\n" 3336 "Active: %8u kB\n" 3337 "Inactive: %8u kB\n" 3338 "HighTotal: %8u kB\n" 3339 "HighFree: %8u kB\n" 3340 "LowTotal: %8u kB\n" 3341 "LowFree: %8u kB\n" 3342 "SwapTotal: %8lu kB\n" 3343 "SwapFree: %8lu kB\n", 3344 btok(total_mem), /* MemTotal */ 3345 btok(free_mem), /* MemFree */ 3346 0, /* MemShared */ 3347 0, /* Buffers */ 3348 0, /* Cached */ 3349 0, /* SwapCached */ 3350 0, /* Active */ 3351 0, /* Inactive */ 3352 0, /* HighTotal */ 3353 0, /* HighFree */ 3354 btok(total_mem), /* LowTotal */ 3355 btok(free_mem), /* LowFree */ 3356 btok(total_swap), /* SwapTotal */ 3357 btok(total_swap - used_swap)); /* SwapFree */ 3358 } 3359 3360 /* 3361 * lxpr_read_mounts(): 3362 */ 3363 /* ARGSUSED */ 3364 static void 3365 lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3366 { 3367 struct vfs *vfsp; 3368 struct vfs *vfslist; 3369 zone_t *zone = LXPTOZ(lxpnp); 3370 struct print_data { 3371 refstr_t *vfs_mntpt; 3372 refstr_t *vfs_resource; 3373 uint_t vfs_flag; 3374 int vfs_fstype; 3375 struct print_data *next; 3376 } *print_head = NULL; 3377 struct print_data **print_tail = &print_head; 3378 struct print_data *printp; 3379 3380 vfs_list_read_lock(); 3381 3382 if (zone == global_zone) { 3383 vfsp = vfslist = rootvfs; 3384 } else { 3385 vfsp = vfslist = zone->zone_vfslist; 3386 /* 3387 * If the zone has a root entry, it will be the first in 3388 * the list. If it doesn't, we conjure one up. 3389 */ 3390 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt), 3391 zone->zone_rootpath) != 0) { 3392 struct vfs *tvfsp; 3393 /* 3394 * The root of the zone is not a mount point. The vfs 3395 * we want to report is that of the zone's root vnode. 3396 */ 3397 tvfsp = zone->zone_rootvp->v_vfsp; 3398 3399 lxpr_uiobuf_printf(uiobuf, 3400 "/ / %s %s 0 0\n", 3401 vfssw[tvfsp->vfs_fstype].vsw_name, 3402 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); 3403 3404 } 3405 if (vfslist == NULL) { 3406 vfs_list_unlock(); 3407 return; 3408 } 3409 } 3410 3411 /* 3412 * Later on we have to do a lookupname, which can end up causing 3413 * another vfs_list_read_lock() to be called. Which can lead to a 3414 * deadlock. To avoid this, we extract the data we need into a local 3415 * list, then we can run this list without holding vfs_list_read_lock() 3416 * We keep the list in the same order as the vfs_list 3417 */ 3418 do { 3419 /* Skip mounts we shouldn't show */ 3420 if (vfsp->vfs_flag & VFS_NOMNTTAB) { 3421 goto nextfs; 3422 } 3423 3424 printp = kmem_alloc(sizeof (*printp), KM_SLEEP); 3425 refstr_hold(vfsp->vfs_mntpt); 3426 printp->vfs_mntpt = vfsp->vfs_mntpt; 3427 refstr_hold(vfsp->vfs_resource); 3428 printp->vfs_resource = vfsp->vfs_resource; 3429 printp->vfs_flag = vfsp->vfs_flag; 3430 printp->vfs_fstype = vfsp->vfs_fstype; 3431 printp->next = NULL; 3432 3433 *print_tail = printp; 3434 print_tail = &printp->next; 3435 3436 nextfs: 3437 vfsp = (zone == global_zone) ? 3438 vfsp->vfs_next : vfsp->vfs_zone_next; 3439 3440 } while (vfsp != vfslist); 3441 3442 vfs_list_unlock(); 3443 3444 /* 3445 * now we can run through what we've extracted without holding 3446 * vfs_list_read_lock() 3447 */ 3448 printp = print_head; 3449 while (printp != NULL) { 3450 struct print_data *printp_next; 3451 const char *resource; 3452 char *mntpt; 3453 struct vnode *vp; 3454 int error; 3455 3456 mntpt = (char *)refstr_value(printp->vfs_mntpt); 3457 resource = refstr_value(printp->vfs_resource); 3458 3459 if (mntpt != NULL && mntpt[0] != '\0') 3460 mntpt = ZONE_PATH_TRANSLATE(mntpt, zone); 3461 else 3462 mntpt = "-"; 3463 3464 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 3465 3466 if (error != 0) 3467 goto nextp; 3468 3469 if (!(vp->v_flag & VROOT)) { 3470 VN_RELE(vp); 3471 goto nextp; 3472 } 3473 VN_RELE(vp); 3474 3475 if (resource != NULL && resource[0] != '\0') { 3476 if (resource[0] == '/') { 3477 resource = ZONE_PATH_VISIBLE(resource, zone) ? 3478 ZONE_PATH_TRANSLATE(resource, zone) : 3479 mntpt; 3480 } 3481 } else { 3482 resource = "-"; 3483 } 3484 3485 lxpr_uiobuf_printf(uiobuf, 3486 "%s %s %s %s 0 0\n", 3487 resource, mntpt, vfssw[printp->vfs_fstype].vsw_name, 3488 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); 3489 3490 nextp: 3491 printp_next = printp->next; 3492 refstr_rele(printp->vfs_mntpt); 3493 refstr_rele(printp->vfs_resource); 3494 kmem_free(printp, sizeof (*printp)); 3495 printp = printp_next; 3496 3497 } 3498 } 3499 3500 /* 3501 * lxpr_read_partitions(): 3502 * 3503 * Over the years, /proc/partitions has been made considerably smaller -- to 3504 * the point that it really is only major number, minor number, number of 3505 * blocks (which we report as 0), and partition name. 3506 * 3507 * We support this because some things want to see it to make sense of 3508 * /proc/diskstats, and also because "fdisk -l" and a few other things look 3509 * here to find all disks on the system. 3510 */ 3511 /* ARGSUSED */ 3512 static void 3513 lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3514 { 3515 3516 kstat_t *ksr; 3517 kstat_t ks0; 3518 int nidx, num, i; 3519 size_t sidx, size; 3520 zfs_cmd_t *zc; 3521 nvlist_t *nv = NULL; 3522 nvpair_t *elem = NULL; 3523 lxpr_mnt_t *mnt; 3524 lxpr_zfs_iter_t zfsi; 3525 3526 ASSERT(lxpnp->lxpr_type == LXPR_PARTITIONS); 3527 3528 ks0.ks_kid = 0; 3529 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); 3530 3531 if (ksr == NULL) 3532 return; 3533 3534 lxpr_uiobuf_printf(uiobuf, "major minor #blocks name\n\n"); 3535 3536 for (i = 1; i < nidx; i++) { 3537 kstat_t *ksp = &ksr[i]; 3538 kstat_io_t *kip; 3539 3540 if (ksp->ks_type != KSTAT_TYPE_IO || 3541 strcmp(ksp->ks_class, "disk") != 0) 3542 continue; 3543 3544 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE, 3545 &size, &num)) == NULL) 3546 continue; 3547 3548 if (size < sizeof (kstat_io_t)) { 3549 kmem_free(kip, size); 3550 continue; 3551 } 3552 3553 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d %s\n", 3554 mod_name_to_major(ksp->ks_module), 3555 ksp->ks_instance, 0, ksp->ks_name); 3556 3557 kmem_free(kip, size); 3558 } 3559 3560 kmem_free(ksr, sidx); 3561 3562 /* If we never got to open the zfs LDI, then stop now. */ 3563 mnt = (lxpr_mnt_t *)lxpnp->lxpr_vnode->v_vfsp->vfs_data; 3564 if (mnt->lxprm_zfs_isopen == B_FALSE) 3565 return; 3566 3567 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); 3568 3569 if (lxpr_zfs_list_pools(mnt, zc, &nv) != 0) 3570 goto out; 3571 3572 while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) { 3573 char *pool = nvpair_name(elem); 3574 3575 bzero(&zfsi, sizeof (lxpr_zfs_iter_t)); 3576 while (lxpr_zfs_next_zvol(mnt, pool, zc, &zfsi) == 0) { 3577 major_t major; 3578 minor_t minor; 3579 if (lxpr_zvol_dev(mnt, zc->zc_name, &major, &minor) 3580 != 0) 3581 continue; 3582 3583 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d zvol/dsk/%s\n", 3584 major, minor, 0, zc->zc_name); 3585 } 3586 } 3587 3588 nvlist_free(nv); 3589 out: 3590 kmem_free(zc, sizeof (zfs_cmd_t)); 3591 } 3592 3593 /* 3594 * lxpr_read_diskstats(): 3595 * 3596 * See the block comment above the per-device output-generating line for the 3597 * details of the format. 3598 */ 3599 /* ARGSUSED */ 3600 static void 3601 lxpr_read_diskstats(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3602 { 3603 kstat_t *ksr; 3604 kstat_t ks0; 3605 int nidx, num, i; 3606 size_t sidx, size; 3607 3608 ASSERT(lxpnp->lxpr_type == LXPR_DISKSTATS); 3609 3610 ks0.ks_kid = 0; 3611 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); 3612 3613 if (ksr == NULL) 3614 return; 3615 3616 for (i = 1; i < nidx; i++) { 3617 kstat_t *ksp = &ksr[i]; 3618 kstat_io_t *kip; 3619 3620 if (ksp->ks_type != KSTAT_TYPE_IO || 3621 strcmp(ksp->ks_class, "disk") != 0) 3622 continue; 3623 3624 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE, 3625 &size, &num)) == NULL) 3626 continue; 3627 3628 if (size < sizeof (kstat_io_t)) { 3629 kmem_free(kip, size); 3630 continue; 3631 } 3632 3633 /* 3634 * /proc/diskstats is defined to have one line of output for 3635 * each block device, with each line containing the following 3636 * 14 fields: 3637 * 3638 * 1 - major number 3639 * 2 - minor mumber 3640 * 3 - device name 3641 * 4 - reads completed successfully 3642 * 5 - reads merged 3643 * 6 - sectors read 3644 * 7 - time spent reading (ms) 3645 * 8 - writes completed 3646 * 9 - writes merged 3647 * 10 - sectors written 3648 * 11 - time spent writing (ms) 3649 * 12 - I/Os currently in progress 3650 * 13 - time spent doing I/Os (ms) 3651 * 14 - weighted time spent doing I/Os (ms) 3652 * 3653 * One small hiccup: we don't actually keep track of time 3654 * spent reading vs. time spent writing -- we keep track of 3655 * time waiting vs. time actually performing I/O. While we 3656 * could divide the total time by the I/O mix (making the 3657 * obviously wrong assumption that I/O operations all take the 3658 * same amount of time), this has the undesirable side-effect 3659 * of moving backwards. Instead, we report the total time 3660 * (read + write) for all three stats (read, write, total). 3661 * This is also a lie of sorts, but it should be more 3662 * immediately clear to the user that reads and writes are 3663 * each being double-counted as the other. 3664 */ 3665 lxpr_uiobuf_printf(uiobuf, "%4d %7d %s " 3666 "%llu %llu %llu %llu " 3667 "%llu %llu %llu %llu " 3668 "%llu %llu %llu\n", 3669 mod_name_to_major(ksp->ks_module), 3670 ksp->ks_instance, ksp->ks_name, 3671 (uint64_t)kip->reads, 0LL, 3672 kip->nread / (uint64_t)LXPR_SECTOR_SIZE, 3673 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC), 3674 (uint64_t)kip->writes, 0LL, 3675 kip->nwritten / (uint64_t)LXPR_SECTOR_SIZE, 3676 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC), 3677 (uint64_t)(kip->rcnt + kip->wcnt), 3678 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC), 3679 (kip->rlentime + kip->wlentime) / 3680 (uint64_t)(NANOSEC / MILLISEC)); 3681 3682 kmem_free(kip, size); 3683 } 3684 3685 kmem_free(ksr, sidx); 3686 } 3687 3688 /* 3689 * lxpr_read_version(): read the contents of the "version" file. 3690 */ 3691 /* ARGSUSED */ 3692 static void 3693 lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3694 { 3695 lx_zone_data_t *lxzd = ztolxzd(LXPTOZ(lxpnp)); 3696 lx_proc_data_t *lxpd = ptolxproc(curproc); 3697 const char *release = lxzd->lxzd_kernel_release; 3698 const char *version = lxzd->lxzd_kernel_version; 3699 3700 /* Use per-process overrides, if specified */ 3701 if (lxpd != NULL && lxpd->l_uname_release[0] != '\0') { 3702 release = lxpd->l_uname_release; 3703 } 3704 if (lxpd != NULL && lxpd->l_uname_version[0] != '\0') { 3705 version = lxpd->l_uname_version; 3706 } 3707 3708 lxpr_uiobuf_printf(uiobuf, 3709 "%s version %s (%s version %d.%d.%d) %s\n", 3710 LX_UNAME_SYSNAME, release, 3711 #if defined(__GNUC__) 3712 "gcc", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__, 3713 #else 3714 "cc", 1, 0, 0, 3715 #endif 3716 version); 3717 } 3718 3719 /* 3720 * lxpr_read_stat(): read the contents of the "stat" file. 3721 * 3722 */ 3723 /* ARGSUSED */ 3724 static void 3725 lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3726 { 3727 cpu_t *cp, *cpstart; 3728 int pools_enabled; 3729 ulong_t idle_cum = 0; 3730 ulong_t sys_cum = 0; 3731 ulong_t user_cum = 0; 3732 ulong_t irq_cum = 0; 3733 ulong_t cpu_nrunnable_cum = 0; 3734 ulong_t w_io_cum = 0; 3735 3736 ulong_t pgpgin_cum = 0; 3737 ulong_t pgpgout_cum = 0; 3738 ulong_t pgswapout_cum = 0; 3739 ulong_t pgswapin_cum = 0; 3740 ulong_t intr_cum = 0; 3741 ulong_t pswitch_cum = 0; 3742 ulong_t forks_cum = 0; 3743 hrtime_t msnsecs[NCMSTATES]; 3744 /* is the emulated release > 2.4 */ 3745 boolean_t newer_than24 = lx_kern_release_cmp(LXPTOZ(lxpnp), "2.4") > 0; 3746 /* temporary variable since scalehrtime modifies data in place */ 3747 hrtime_t tmptime; 3748 3749 ASSERT(lxpnp->lxpr_type == LXPR_STAT); 3750 3751 mutex_enter(&cpu_lock); 3752 pools_enabled = pool_pset_enabled(); 3753 3754 /* Calculate cumulative stats */ 3755 cp = cpstart = CPU->cpu_part->cp_cpulist; 3756 do { 3757 int i; 3758 3759 /* 3760 * Don't count CPUs that aren't even in the system 3761 * or aren't up yet. 3762 */ 3763 if ((cp->cpu_flags & CPU_EXISTS) == 0) { 3764 continue; 3765 } 3766 3767 get_cpu_mstate(cp, msnsecs); 3768 3769 idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]); 3770 sys_cum += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); 3771 user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]); 3772 3773 pgpgin_cum += CPU_STATS(cp, vm.pgpgin); 3774 pgpgout_cum += CPU_STATS(cp, vm.pgpgout); 3775 pgswapin_cum += CPU_STATS(cp, vm.pgswapin); 3776 pgswapout_cum += CPU_STATS(cp, vm.pgswapout); 3777 3778 3779 if (newer_than24) { 3780 cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable; 3781 w_io_cum += CPU_STATS(cp, sys.iowait); 3782 for (i = 0; i < NCMSTATES; i++) { 3783 tmptime = cp->cpu_intracct[i]; 3784 scalehrtime(&tmptime); 3785 irq_cum += NSEC_TO_TICK(tmptime); 3786 } 3787 } 3788 3789 for (i = 0; i < PIL_MAX; i++) 3790 intr_cum += CPU_STATS(cp, sys.intr[i]); 3791 3792 pswitch_cum += CPU_STATS(cp, sys.pswitch); 3793 forks_cum += CPU_STATS(cp, sys.sysfork); 3794 forks_cum += CPU_STATS(cp, sys.sysvfork); 3795 3796 if (pools_enabled) 3797 cp = cp->cpu_next_part; 3798 else 3799 cp = cp->cpu_next; 3800 } while (cp != cpstart); 3801 3802 if (newer_than24) { 3803 lxpr_uiobuf_printf(uiobuf, 3804 "cpu %lu %lu %lu %lu %lu %lu %lu\n", 3805 user_cum, 0L, sys_cum, idle_cum, 0L, irq_cum, 0L); 3806 } else { 3807 lxpr_uiobuf_printf(uiobuf, 3808 "cpu %lu %lu %lu %lu\n", 3809 user_cum, 0L, sys_cum, idle_cum); 3810 } 3811 3812 /* Do per processor stats */ 3813 do { 3814 int i; 3815 3816 ulong_t idle_ticks; 3817 ulong_t sys_ticks; 3818 ulong_t user_ticks; 3819 ulong_t irq_ticks = 0; 3820 3821 /* 3822 * Don't count CPUs that aren't even in the system 3823 * or aren't up yet. 3824 */ 3825 if ((cp->cpu_flags & CPU_EXISTS) == 0) { 3826 continue; 3827 } 3828 3829 get_cpu_mstate(cp, msnsecs); 3830 3831 idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]); 3832 sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); 3833 user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]); 3834 3835 for (i = 0; i < NCMSTATES; i++) { 3836 tmptime = cp->cpu_intracct[i]; 3837 scalehrtime(&tmptime); 3838 irq_ticks += NSEC_TO_TICK(tmptime); 3839 } 3840 3841 if (newer_than24) { 3842 lxpr_uiobuf_printf(uiobuf, 3843 "cpu%d %lu %lu %lu %lu %lu %lu %lu\n", 3844 cp->cpu_id, user_ticks, 0L, sys_ticks, idle_ticks, 3845 0L, irq_ticks, 0L); 3846 } else { 3847 lxpr_uiobuf_printf(uiobuf, 3848 "cpu%d %lu %lu %lu %lu\n", 3849 cp->cpu_id, 3850 user_ticks, 0L, sys_ticks, idle_ticks); 3851 } 3852 3853 if (pools_enabled) 3854 cp = cp->cpu_next_part; 3855 else 3856 cp = cp->cpu_next; 3857 } while (cp != cpstart); 3858 3859 mutex_exit(&cpu_lock); 3860 3861 if (newer_than24) { 3862 lxpr_uiobuf_printf(uiobuf, 3863 "page %lu %lu\n" 3864 "swap %lu %lu\n" 3865 "intr %lu\n" 3866 "ctxt %lu\n" 3867 "btime %lu\n" 3868 "processes %lu\n" 3869 "procs_running %lu\n" 3870 "procs_blocked %lu\n", 3871 pgpgin_cum, pgpgout_cum, 3872 pgswapin_cum, pgswapout_cum, 3873 intr_cum, 3874 pswitch_cum, 3875 boot_time, 3876 forks_cum, 3877 cpu_nrunnable_cum, 3878 w_io_cum); 3879 } else { 3880 lxpr_uiobuf_printf(uiobuf, 3881 "page %lu %lu\n" 3882 "swap %lu %lu\n" 3883 "intr %lu\n" 3884 "ctxt %lu\n" 3885 "btime %lu\n" 3886 "processes %lu\n", 3887 pgpgin_cum, pgpgout_cum, 3888 pgswapin_cum, pgswapout_cum, 3889 intr_cum, 3890 pswitch_cum, 3891 boot_time, 3892 forks_cum); 3893 } 3894 } 3895 3896 /* 3897 * lxpr_read_swaps(): 3898 * 3899 * We don't support swap files or partitions, but some programs like to look 3900 * here just to check we have some swap on the system, so we lie and show 3901 * our entire swap cap as one swap partition. 3902 * 3903 * It is important to use formatting identical to the Linux implementation 3904 * so that consumers do not break. See swap_show() in mm/swapfile.c. 3905 */ 3906 /* ARGSUSED */ 3907 static void 3908 lxpr_read_swaps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3909 { 3910 zone_t *zone = curzone; 3911 uint64_t totswap, usedswap; 3912 3913 mutex_enter(&zone->zone_mem_lock); 3914 /* Uses units of 1 kb (2^10). */ 3915 totswap = zone->zone_max_swap_ctl >> 10; 3916 usedswap = zone->zone_max_swap >> 10; 3917 mutex_exit(&zone->zone_mem_lock); 3918 3919 lxpr_uiobuf_printf(uiobuf, 3920 "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n"); 3921 lxpr_uiobuf_printf(uiobuf, "%-40s%s\t%llu\t%llu\t%d\n", 3922 "/dev/swap", "partition", totswap, usedswap, -1); 3923 } 3924 3925 /* 3926 * inotify tunables exported via /proc. 3927 */ 3928 extern int inotify_maxevents; 3929 extern int inotify_maxinstances; 3930 extern int inotify_maxwatches; 3931 3932 static void 3933 lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *lxpnp, 3934 lxpr_uiobuf_t *uiobuf) 3935 { 3936 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS); 3937 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxevents); 3938 } 3939 3940 static void 3941 lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *lxpnp, 3942 lxpr_uiobuf_t *uiobuf) 3943 { 3944 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES); 3945 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxinstances); 3946 } 3947 3948 static void 3949 lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *lxpnp, 3950 lxpr_uiobuf_t *uiobuf) 3951 { 3952 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES); 3953 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxwatches); 3954 } 3955 3956 static void 3957 lxpr_read_sys_kernel_caplcap(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3958 { 3959 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_CAPLCAP); 3960 lxpr_uiobuf_printf(uiobuf, "%d\n", LX_CAP_MAX_VALID); 3961 } 3962 3963 static void 3964 lxpr_read_sys_kernel_corepatt(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3965 { 3966 zone_t *zone = curproc->p_zone; 3967 struct core_globals *cg; 3968 refstr_t *rp; 3969 corectl_path_t *ccp; 3970 char tr[MAXPATHLEN]; 3971 3972 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT); 3973 3974 cg = zone_getspecific(core_zone_key, zone); 3975 ASSERT(cg != NULL); 3976 3977 /* If core dumps are disabled, return an empty string. */ 3978 if ((cg->core_options & CC_PROCESS_PATH) == 0) { 3979 lxpr_uiobuf_printf(uiobuf, "\n"); 3980 return; 3981 } 3982 3983 ccp = cg->core_default_path; 3984 mutex_enter(&ccp->ccp_mtx); 3985 if ((rp = ccp->ccp_path) != NULL) 3986 refstr_hold(rp); 3987 mutex_exit(&ccp->ccp_mtx); 3988 3989 if (rp == NULL) { 3990 lxpr_uiobuf_printf(uiobuf, "\n"); 3991 return; 3992 } 3993 3994 bzero(tr, sizeof (tr)); 3995 if (lxpr_core_path_s2l(refstr_value(rp), tr, sizeof (tr)) != 0) { 3996 refstr_rele(rp); 3997 lxpr_uiobuf_printf(uiobuf, "\n"); 3998 return; 3999 } 4000 4001 refstr_rele(rp); 4002 lxpr_uiobuf_printf(uiobuf, "%s\n", tr); 4003 } 4004 4005 static void 4006 lxpr_read_sys_kernel_hostname(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4007 { 4008 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_HOSTNAME); 4009 lxpr_uiobuf_printf(uiobuf, "%s\n", uts_nodename()); 4010 } 4011 4012 static void 4013 lxpr_read_sys_kernel_msgmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4014 { 4015 rctl_qty_t val; 4016 4017 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_MSGMNI); 4018 4019 mutex_enter(&curproc->p_lock); 4020 val = rctl_enforced_value(rc_zone_msgmni, 4021 curproc->p_zone->zone_rctls, curproc); 4022 mutex_exit(&curproc->p_lock); 4023 4024 lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val); 4025 } 4026 4027 static void 4028 lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4029 { 4030 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_NGROUPS_MAX); 4031 lxpr_uiobuf_printf(uiobuf, "%d\n", ngroups_max); 4032 } 4033 4034 static void 4035 lxpr_read_sys_kernel_osrel(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4036 { 4037 lx_zone_data_t *br_data; 4038 4039 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_OSREL); 4040 br_data = ztolxzd(curproc->p_zone); 4041 if (curproc->p_zone->zone_brand == &lx_brand) { 4042 lxpr_uiobuf_printf(uiobuf, "%s\n", 4043 br_data->lxzd_kernel_version); 4044 } else { 4045 lxpr_uiobuf_printf(uiobuf, "\n"); 4046 } 4047 } 4048 4049 static void 4050 lxpr_read_sys_kernel_pid_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4051 { 4052 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_PID_MAX); 4053 lxpr_uiobuf_printf(uiobuf, "%d\n", maxpid); 4054 } 4055 4056 static void 4057 lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4058 { 4059 /* 4060 * This file isn't documented on the Linux proc(5) man page but 4061 * according to the blog of the author of systemd/journald (the 4062 * consumer), he says: 4063 * boot_id: A random ID that is regenerated on each boot. As such it 4064 * can be used to identify the local machine's current boot. It's 4065 * universally available on any recent Linux kernel. It's a good and 4066 * safe choice if you need to identify a specific boot on a specific 4067 * booted kernel. 4068 * 4069 * We'll just generate a random ID if necessary. On Linux the format 4070 * appears to resemble a uuid but since it is not documented to be a 4071 * uuid, we don't worry about that. 4072 */ 4073 lx_zone_data_t *br_data; 4074 4075 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RAND_BOOTID); 4076 4077 if (curproc->p_zone->zone_brand != &lx_brand) { 4078 lxpr_uiobuf_printf(uiobuf, "0\n"); 4079 return; 4080 } 4081 4082 br_data = ztolxzd(curproc->p_zone); 4083 if (br_data->lxzd_bootid[0] == '\0') { 4084 extern int getrandom(void *, size_t, int); 4085 int i; 4086 4087 for (i = 0; i < 5; i++) { 4088 u_longlong_t n; 4089 char s[32]; 4090 4091 (void) random_get_bytes((uint8_t *)&n, sizeof (n)); 4092 switch (i) { 4093 case 0: (void) snprintf(s, sizeof (s), "%08llx", n); 4094 s[8] = '\0'; 4095 break; 4096 case 4: (void) snprintf(s, sizeof (s), "%012llx", n); 4097 s[12] = '\0'; 4098 break; 4099 default: (void) snprintf(s, sizeof (s), "%04llx", n); 4100 s[4] = '\0'; 4101 break; 4102 } 4103 if (i > 0) 4104 strlcat(br_data->lxzd_bootid, "-", 4105 sizeof (br_data->lxzd_bootid)); 4106 strlcat(br_data->lxzd_bootid, s, 4107 sizeof (br_data->lxzd_bootid)); 4108 } 4109 } 4110 4111 lxpr_uiobuf_printf(uiobuf, "%s\n", br_data->lxzd_bootid); 4112 } 4113 4114 static void 4115 lxpr_read_sys_kernel_shmmax(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4116 { 4117 rctl_qty_t val; 4118 4119 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMAX); 4120 4121 mutex_enter(&curproc->p_lock); 4122 val = rctl_enforced_value(rc_zone_shmmax, 4123 curproc->p_zone->zone_rctls, curproc); 4124 mutex_exit(&curproc->p_lock); 4125 4126 if (val > FOURGB) 4127 val = FOURGB; 4128 4129 lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val); 4130 } 4131 4132 static void 4133 lxpr_read_sys_kernel_threads_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4134 { 4135 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_THREADS_MAX); 4136 lxpr_uiobuf_printf(uiobuf, "%d\n", curproc->p_zone->zone_nlwps_ctl); 4137 } 4138 4139 static void 4140 lxpr_read_sys_net_core_somaxc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4141 { 4142 netstack_t *ns; 4143 tcp_stack_t *tcps; 4144 4145 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON); 4146 4147 ns = netstack_get_current(); 4148 if (ns == NULL) { 4149 lxpr_uiobuf_printf(uiobuf, "%d\n", SOMAXCONN); 4150 return; 4151 } 4152 4153 tcps = ns->netstack_tcp; 4154 lxpr_uiobuf_printf(uiobuf, "%d\n", tcps->tcps_conn_req_max_q); 4155 netstack_rele(ns); 4156 } 4157 4158 static void 4159 lxpr_read_sys_vm_minfr_kb(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4160 { 4161 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_MINFR_KB); 4162 lxpr_uiobuf_printf(uiobuf, "%d\n", 0); 4163 } 4164 4165 static void 4166 lxpr_read_sys_vm_nhpages(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4167 { 4168 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_NHUGEP); 4169 lxpr_uiobuf_printf(uiobuf, "%d\n", 0); 4170 } 4171 4172 static void 4173 lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4174 { 4175 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_OVERCOMMIT_MEM); 4176 lxpr_uiobuf_printf(uiobuf, "%d\n", 0); 4177 } 4178 4179 static void 4180 lxpr_read_sys_vm_swappiness(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4181 { 4182 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_SWAPPINESS); 4183 lxpr_uiobuf_printf(uiobuf, "%d\n", 0); 4184 } 4185 4186 /* 4187 * lxpr_read_uptime(): read the contents of the "uptime" file. 4188 * 4189 * format is: "%.2lf, %.2lf",uptime_secs, idle_secs 4190 * Use fixed point arithmetic to get 2 decimal places 4191 */ 4192 /* ARGSUSED */ 4193 static void 4194 lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4195 { 4196 cpu_t *cp, *cpstart; 4197 int pools_enabled; 4198 ulong_t idle_cum = 0; 4199 ulong_t cpu_count = 0; 4200 ulong_t idle_s; 4201 ulong_t idle_cs; 4202 ulong_t up_s; 4203 ulong_t up_cs; 4204 hrtime_t birthtime; 4205 hrtime_t centi_sec = 10000000; /* 10^7 */ 4206 4207 ASSERT(lxpnp->lxpr_type == LXPR_UPTIME); 4208 4209 /* Calculate cumulative stats */ 4210 mutex_enter(&cpu_lock); 4211 pools_enabled = pool_pset_enabled(); 4212 4213 cp = cpstart = CPU->cpu_part->cp_cpulist; 4214 do { 4215 /* 4216 * Don't count CPUs that aren't even in the system 4217 * or aren't up yet. 4218 */ 4219 if ((cp->cpu_flags & CPU_EXISTS) == 0) { 4220 continue; 4221 } 4222 4223 idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle); 4224 idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait); 4225 cpu_count += 1; 4226 4227 if (pools_enabled) 4228 cp = cp->cpu_next_part; 4229 else 4230 cp = cp->cpu_next; 4231 } while (cp != cpstart); 4232 mutex_exit(&cpu_lock); 4233 4234 /* Getting the Zone zsched process startup time */ 4235 birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart; 4236 up_cs = (gethrtime() - birthtime) / centi_sec; 4237 up_s = up_cs / 100; 4238 up_cs %= 100; 4239 4240 ASSERT(cpu_count > 0); 4241 idle_cum /= cpu_count; 4242 idle_s = idle_cum / hz; 4243 idle_cs = idle_cum % hz; 4244 idle_cs *= 100; 4245 idle_cs /= hz; 4246 4247 lxpr_uiobuf_printf(uiobuf, 4248 "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs); 4249 } 4250 4251 static const char *amd_x_edx[] = { 4252 NULL, NULL, NULL, NULL, 4253 NULL, NULL, NULL, NULL, 4254 NULL, NULL, NULL, "syscall", 4255 NULL, NULL, NULL, NULL, 4256 NULL, NULL, NULL, "mp", 4257 "nx", NULL, "mmxext", NULL, 4258 NULL, NULL, NULL, NULL, 4259 NULL, "lm", "3dnowext", "3dnow" 4260 }; 4261 4262 static const char *amd_x_ecx[] = { 4263 "lahf_lm", NULL, "svm", NULL, 4264 "altmovcr8" 4265 }; 4266 4267 static const char *tm_x_edx[] = { 4268 "recovery", "longrun", NULL, "lrti" 4269 }; 4270 4271 /* 4272 * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx." 4273 */ 4274 static const char *intc_x_edx[] = { 4275 NULL, NULL, NULL, NULL, 4276 NULL, NULL, NULL, NULL, 4277 NULL, NULL, NULL, "syscall", 4278 NULL, NULL, NULL, NULL, 4279 NULL, NULL, NULL, NULL, 4280 "nx", NULL, NULL, NULL, 4281 NULL, NULL, NULL, NULL, 4282 NULL, "lm", NULL, NULL 4283 }; 4284 4285 static const char *intc_edx[] = { 4286 "fpu", "vme", "de", "pse", 4287 "tsc", "msr", "pae", "mce", 4288 "cx8", "apic", NULL, "sep", 4289 "mtrr", "pge", "mca", "cmov", 4290 "pat", "pse36", "pn", "clflush", 4291 NULL, "dts", "acpi", "mmx", 4292 "fxsr", "sse", "sse2", "ss", 4293 "ht", "tm", "ia64", "pbe" 4294 }; 4295 4296 /* 4297 * "sse3" on linux is called "pni" (Prescott New Instructions). 4298 */ 4299 static const char *intc_ecx[] = { 4300 "pni", NULL, NULL, "monitor", 4301 "ds_cpl", NULL, NULL, "est", 4302 "tm2", NULL, "cid", NULL, 4303 NULL, "cx16", "xtpr" 4304 }; 4305 4306 /* 4307 * Report a list of each cgroup subsystem supported by our emulated cgroup fs. 4308 * This needs to exist for systemd to run but for now we don't report any 4309 * cgroup subsystems as being installed. The commented example below shows 4310 * how to print a subsystem entry. 4311 */ 4312 static void 4313 lxpr_read_cgroups(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4314 { 4315 lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n", 4316 "#subsys_name", "hierarchy", "num_cgroups", "enabled"); 4317 4318 /* 4319 * lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n", 4320 * "cpu,cpuacct", "2", "1", "1"); 4321 */ 4322 } 4323 4324 static void 4325 lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4326 { 4327 int i; 4328 uint32_t bits; 4329 cpu_t *cp, *cpstart; 4330 int pools_enabled; 4331 const char **fp; 4332 char brandstr[CPU_IDSTRLEN]; 4333 struct cpuid_regs cpr; 4334 int maxeax; 4335 int std_ecx, std_edx, ext_ecx, ext_edx; 4336 4337 ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO); 4338 4339 mutex_enter(&cpu_lock); 4340 pools_enabled = pool_pset_enabled(); 4341 4342 cp = cpstart = CPU->cpu_part->cp_cpulist; 4343 do { 4344 /* 4345 * This returns the maximum eax value for standard cpuid 4346 * functions in eax. 4347 */ 4348 cpr.cp_eax = 0; 4349 (void) cpuid_insn(cp, &cpr); 4350 maxeax = cpr.cp_eax; 4351 4352 /* 4353 * Get standard x86 feature flags. 4354 */ 4355 cpr.cp_eax = 1; 4356 (void) cpuid_insn(cp, &cpr); 4357 std_ecx = cpr.cp_ecx; 4358 std_edx = cpr.cp_edx; 4359 4360 /* 4361 * Now get extended feature flags. 4362 */ 4363 cpr.cp_eax = 0x80000001; 4364 (void) cpuid_insn(cp, &cpr); 4365 ext_ecx = cpr.cp_ecx; 4366 ext_edx = cpr.cp_edx; 4367 4368 (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN); 4369 4370 lxpr_uiobuf_printf(uiobuf, 4371 "processor\t: %d\n" 4372 "vendor_id\t: %s\n" 4373 "cpu family\t: %d\n" 4374 "model\t\t: %d\n" 4375 "model name\t: %s\n" 4376 "stepping\t: %d\n" 4377 "cpu MHz\t\t: %u.%03u\n", 4378 cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp), 4379 cpuid_getmodel(cp), brandstr, cpuid_getstep(cp), 4380 (uint32_t)(cpu_freq_hz / 1000000), 4381 ((uint32_t)(cpu_freq_hz / 1000)) % 1000); 4382 4383 lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n", 4384 getl2cacheinfo(cp, NULL, NULL, NULL) / 1024); 4385 4386 if (is_x86_feature(x86_featureset, X86FSET_HTT)) { 4387 /* 4388 * 'siblings' is used for HT-style threads 4389 */ 4390 lxpr_uiobuf_printf(uiobuf, 4391 "physical id\t: %lu\n" 4392 "siblings\t: %u\n", 4393 pg_plat_hw_instance_id(cp, PGHW_CHIP), 4394 cpuid_get_ncpu_per_chip(cp)); 4395 } 4396 4397 /* 4398 * Since we're relatively picky about running on older hardware, 4399 * we can be somewhat cavalier about the answers to these ones. 4400 * 4401 * In fact, given the hardware we support, we just say: 4402 * 4403 * fdiv_bug : no (if we're on a 64-bit kernel) 4404 * hlt_bug : no 4405 * f00f_bug : no 4406 * coma_bug : no 4407 * wp : yes (write protect in supervsr mode) 4408 */ 4409 lxpr_uiobuf_printf(uiobuf, 4410 "fdiv_bug\t: %s\n" 4411 "hlt_bug \t: no\n" 4412 "f00f_bug\t: no\n" 4413 "coma_bug\t: no\n" 4414 "fpu\t\t: %s\n" 4415 "fpu_exception\t: %s\n" 4416 "cpuid level\t: %d\n" 4417 "flags\t\t:", 4418 #if defined(__i386) 4419 fpu_pentium_fdivbug ? "yes" : "no", 4420 #else 4421 "no", 4422 #endif /* __i386 */ 4423 fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no", 4424 maxeax); 4425 4426 for (bits = std_edx, fp = intc_edx, i = 0; 4427 i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++) 4428 if ((bits & (1 << i)) != 0 && *fp) 4429 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4430 4431 /* 4432 * name additional features where appropriate 4433 */ 4434 switch (x86_vendor) { 4435 case X86_VENDOR_Intel: 4436 for (bits = ext_edx, fp = intc_x_edx, i = 0; 4437 i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]); 4438 fp++, i++) 4439 if ((bits & (1 << i)) != 0 && *fp) 4440 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4441 break; 4442 4443 case X86_VENDOR_AMD: 4444 for (bits = ext_edx, fp = amd_x_edx, i = 0; 4445 i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]); 4446 fp++, i++) 4447 if ((bits & (1 << i)) != 0 && *fp) 4448 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4449 4450 for (bits = ext_ecx, fp = amd_x_ecx, i = 0; 4451 i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]); 4452 fp++, i++) 4453 if ((bits & (1 << i)) != 0 && *fp) 4454 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4455 break; 4456 4457 case X86_VENDOR_TM: 4458 for (bits = ext_edx, fp = tm_x_edx, i = 0; 4459 i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]); 4460 fp++, i++) 4461 if ((bits & (1 << i)) != 0 && *fp) 4462 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4463 break; 4464 default: 4465 break; 4466 } 4467 4468 for (bits = std_ecx, fp = intc_ecx, i = 0; 4469 i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++) 4470 if ((bits & (1 << i)) != 0 && *fp) 4471 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4472 4473 lxpr_uiobuf_printf(uiobuf, "\n\n"); 4474 4475 if (pools_enabled) 4476 cp = cp->cpu_next_part; 4477 else 4478 cp = cp->cpu_next; 4479 } while (cp != cpstart); 4480 4481 mutex_exit(&cpu_lock); 4482 } 4483 4484 /* ARGSUSED */ 4485 static void 4486 lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4487 { 4488 ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD); 4489 lxpr_uiobuf_seterr(uiobuf, EFAULT); 4490 } 4491 4492 /* 4493 * Report a list of file systems loaded in the kernel. We only report the ones 4494 * which we support and which may be checked by various components to see if 4495 * they are loaded. 4496 */ 4497 static void 4498 lxpr_read_filesystems(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4499 { 4500 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "autofs"); 4501 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "cgroup"); 4502 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "nfs"); 4503 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "proc"); 4504 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "sysfs"); 4505 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "tmpfs"); 4506 } 4507 4508 /* 4509 * lxpr_getattr(): Vnode operation for VOP_GETATTR() 4510 */ 4511 static int 4512 lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 4513 caller_context_t *ct) 4514 { 4515 register lxpr_node_t *lxpnp = VTOLXP(vp); 4516 lxpr_nodetype_t type = lxpnp->lxpr_type; 4517 extern uint_t nproc; 4518 int error; 4519 4520 /* 4521 * Return attributes of underlying vnode if ATTR_REAL 4522 * 4523 * but keep fd files with the symlink permissions 4524 */ 4525 if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) { 4526 vnode_t *rvp = lxpnp->lxpr_realvp; 4527 4528 /* 4529 * withold attribute information to owner or root 4530 */ 4531 if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) { 4532 return (error); 4533 } 4534 4535 /* 4536 * now its attributes 4537 */ 4538 if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) { 4539 return (error); 4540 } 4541 4542 /* 4543 * if it's a file in lx /proc/pid/fd/xx then set its 4544 * mode and keep it looking like a symlink, fifo or socket 4545 */ 4546 if (type == LXPR_PID_FD_FD) { 4547 vap->va_mode = lxpnp->lxpr_mode; 4548 vap->va_type = lxpnp->lxpr_realvp->v_type; 4549 vap->va_size = 0; 4550 vap->va_nlink = 1; 4551 } 4552 return (0); 4553 } 4554 4555 /* Default attributes, that may be overridden below */ 4556 bzero(vap, sizeof (*vap)); 4557 vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time; 4558 vap->va_nlink = 1; 4559 vap->va_type = vp->v_type; 4560 vap->va_mode = lxpnp->lxpr_mode; 4561 vap->va_fsid = vp->v_vfsp->vfs_dev; 4562 vap->va_blksize = DEV_BSIZE; 4563 vap->va_uid = lxpnp->lxpr_uid; 4564 vap->va_gid = lxpnp->lxpr_gid; 4565 vap->va_nodeid = lxpnp->lxpr_ino; 4566 4567 switch (type) { 4568 case LXPR_PROCDIR: 4569 vap->va_nlink = nproc + 2 + PROCDIRFILES; 4570 vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE; 4571 break; 4572 case LXPR_PIDDIR: 4573 vap->va_nlink = PIDDIRFILES; 4574 vap->va_size = PIDDIRFILES * LXPR_SDSIZE; 4575 break; 4576 case LXPR_PID_TASK_IDDIR: 4577 vap->va_nlink = TIDDIRFILES; 4578 vap->va_size = TIDDIRFILES * LXPR_SDSIZE; 4579 break; 4580 case LXPR_SELF: 4581 vap->va_uid = crgetruid(curproc->p_cred); 4582 vap->va_gid = crgetrgid(curproc->p_cred); 4583 break; 4584 case LXPR_PID_FD_FD: 4585 case LXPR_PID_TID_FD_FD: 4586 /* 4587 * Restore VLNK type for lstat-type activity. 4588 * See lxpr_readlink for more details. 4589 */ 4590 if ((flags & FOLLOW) == 0) 4591 vap->va_type = VLNK; 4592 default: 4593 break; 4594 } 4595 4596 vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size); 4597 return (0); 4598 } 4599 4600 /* 4601 * lxpr_access(): Vnode operation for VOP_ACCESS() 4602 */ 4603 static int 4604 lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct) 4605 { 4606 lxpr_node_t *lxpnp = VTOLXP(vp); 4607 lxpr_nodetype_t type = lxpnp->lxpr_type; 4608 int shift = 0; 4609 proc_t *tp; 4610 4611 /* lx /proc is a read only file system */ 4612 if (mode & VWRITE) { 4613 switch (type) { 4614 case LXPR_PID_OOM_SCR_ADJ: 4615 case LXPR_PID_TID_OOM_SCR_ADJ: 4616 case LXPR_SYS_KERNEL_COREPATT: 4617 case LXPR_SYS_NET_CORE_SOMAXCON: 4618 case LXPR_SYS_VM_OVERCOMMIT_MEM: 4619 case LXPR_SYS_VM_SWAPPINESS: 4620 case LXPR_PID_FD_FD: 4621 case LXPR_PID_TID_FD_FD: 4622 break; 4623 default: 4624 return (EROFS); 4625 } 4626 } 4627 4628 /* 4629 * If this is a restricted file, check access permissions. 4630 */ 4631 switch (type) { 4632 case LXPR_PIDDIR: 4633 return (0); 4634 case LXPR_PID_CURDIR: 4635 case LXPR_PID_ENV: 4636 case LXPR_PID_EXE: 4637 case LXPR_PID_LIMITS: 4638 case LXPR_PID_MAPS: 4639 case LXPR_PID_MEM: 4640 case LXPR_PID_ROOTDIR: 4641 case LXPR_PID_FDDIR: 4642 case LXPR_PID_FD_FD: 4643 case LXPR_PID_TID_FDDIR: 4644 case LXPR_PID_TID_FD_FD: 4645 if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL) 4646 return (ENOENT); 4647 if (tp != curproc && secpolicy_proc_access(cr) != 0 && 4648 priv_proc_cred_perm(cr, tp, NULL, mode) != 0) { 4649 lxpr_unlock(tp); 4650 return (EACCES); 4651 } 4652 lxpr_unlock(tp); 4653 default: 4654 break; 4655 } 4656 4657 if (lxpnp->lxpr_realvp != NULL) { 4658 /* 4659 * For these we use the underlying vnode's accessibility. 4660 */ 4661 return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct)); 4662 } 4663 4664 /* If user is root allow access regardless of permission bits */ 4665 if (secpolicy_proc_access(cr) == 0) 4666 return (0); 4667 4668 /* 4669 * Access check is based on only one of owner, group, public. If not 4670 * owner, then check group. If not a member of the group, then check 4671 * public access. 4672 */ 4673 if (crgetuid(cr) != lxpnp->lxpr_uid) { 4674 shift += 3; 4675 if (!groupmember((uid_t)lxpnp->lxpr_gid, cr)) 4676 shift += 3; 4677 } 4678 4679 mode &= ~(lxpnp->lxpr_mode << shift); 4680 4681 if (mode == 0) 4682 return (0); 4683 4684 return (EACCES); 4685 } 4686 4687 /* ARGSUSED */ 4688 static vnode_t * 4689 lxpr_lookup_not_a_dir(vnode_t *dp, char *comp) 4690 { 4691 return (NULL); 4692 } 4693 4694 /* 4695 * lxpr_lookup(): Vnode operation for VOP_LOOKUP() 4696 */ 4697 /* ARGSUSED */ 4698 static int 4699 lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp, 4700 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 4701 int *direntflags, pathname_t *realpnp) 4702 { 4703 lxpr_node_t *lxpnp = VTOLXP(dp); 4704 lxpr_nodetype_t type = lxpnp->lxpr_type; 4705 int error; 4706 4707 ASSERT(dp->v_type == VDIR); 4708 ASSERT(type < LXPR_NFILES); 4709 4710 /* 4711 * we should never get here because the lookup 4712 * is done on the realvp for these nodes 4713 */ 4714 ASSERT(type != LXPR_PID_FD_FD && 4715 type != LXPR_PID_CURDIR && 4716 type != LXPR_PID_ROOTDIR); 4717 4718 /* 4719 * restrict lookup permission to owner or root 4720 */ 4721 if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) { 4722 return (error); 4723 } 4724 4725 /* 4726 * Just return the parent vnode if that's where we are trying to go. 4727 */ 4728 if (strcmp(comp, "..") == 0) { 4729 VN_HOLD(lxpnp->lxpr_parent); 4730 *vpp = lxpnp->lxpr_parent; 4731 return (0); 4732 } 4733 4734 /* 4735 * Special handling for directory searches. Note: null component name 4736 * denotes that the current directory is being searched. 4737 */ 4738 if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) { 4739 VN_HOLD(dp); 4740 *vpp = dp; 4741 return (0); 4742 } 4743 4744 *vpp = (lxpr_lookup_function[type](dp, comp)); 4745 return ((*vpp == NULL) ? ENOENT : 0); 4746 } 4747 4748 /* 4749 * Do a sequential search on the given directory table 4750 */ 4751 static vnode_t * 4752 lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p, 4753 lxpr_dirent_t *dirtab, int dirtablen) 4754 { 4755 lxpr_node_t *lxpnp; 4756 int count; 4757 4758 for (count = 0; count < dirtablen; count++) { 4759 if (strcmp(dirtab[count].d_name, comp) == 0) { 4760 lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0); 4761 dp = LXPTOV(lxpnp); 4762 ASSERT(dp != NULL); 4763 return (dp); 4764 } 4765 } 4766 return (NULL); 4767 } 4768 4769 static vnode_t * 4770 lxpr_lookup_piddir(vnode_t *dp, char *comp) 4771 { 4772 proc_t *p; 4773 4774 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR); 4775 4776 p = lxpr_lock(VTOLXP(dp)->lxpr_pid); 4777 if (p == NULL) 4778 return (NULL); 4779 4780 dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES); 4781 4782 lxpr_unlock(p); 4783 4784 return (dp); 4785 } 4786 4787 /* 4788 * Lookup one of the process's task ID's. 4789 */ 4790 static vnode_t * 4791 lxpr_lookup_taskdir(vnode_t *dp, char *comp) 4792 { 4793 lxpr_node_t *dlxpnp = VTOLXP(dp); 4794 lxpr_node_t *lxpnp; 4795 proc_t *p; 4796 pid_t real_pid; 4797 uint_t tid; 4798 int c; 4799 kthread_t *t; 4800 4801 ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASKDIR); 4802 4803 /* 4804 * convert the string rendition of the filename to a thread ID 4805 */ 4806 tid = 0; 4807 while ((c = *comp++) != '\0') { 4808 int otid; 4809 if (c < '0' || c > '9') 4810 return (NULL); 4811 4812 otid = tid; 4813 tid = 10 * tid + c - '0'; 4814 /* integer overflow */ 4815 if (tid / 10 != otid) 4816 return (NULL); 4817 } 4818 4819 /* 4820 * get the proc to work with and lock it 4821 */ 4822 real_pid = get_real_pid(dlxpnp->lxpr_pid); 4823 p = lxpr_lock(real_pid); 4824 if ((p == NULL)) 4825 return (NULL); 4826 4827 /* 4828 * If the process is a zombie or system process 4829 * it can't have any threads. 4830 */ 4831 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) { 4832 lxpr_unlock(p); 4833 return (NULL); 4834 } 4835 4836 if (p->p_brand == &lx_brand) { 4837 t = lxpr_get_thread(p, tid); 4838 } else { 4839 /* 4840 * Only the main thread is visible for non-branded processes. 4841 */ 4842 t = p->p_tlist; 4843 if (tid != p->p_pid || t == NULL) { 4844 t = NULL; 4845 } else { 4846 thread_lock(t); 4847 } 4848 } 4849 if (t == NULL) { 4850 lxpr_unlock(p); 4851 return (NULL); 4852 } 4853 thread_unlock(t); 4854 4855 /* 4856 * Allocate and fill in a new lx /proc taskid node. 4857 * Instead of the last arg being a fd, it is a tid. 4858 */ 4859 lxpnp = lxpr_getnode(dp, LXPR_PID_TASK_IDDIR, p, tid); 4860 dp = LXPTOV(lxpnp); 4861 ASSERT(dp != NULL); 4862 lxpr_unlock(p); 4863 return (dp); 4864 } 4865 4866 /* 4867 * Lookup one of the process's task ID's. 4868 */ 4869 static vnode_t * 4870 lxpr_lookup_task_tid_dir(vnode_t *dp, char *comp) 4871 { 4872 lxpr_node_t *dlxpnp = VTOLXP(dp); 4873 lxpr_node_t *lxpnp; 4874 proc_t *p; 4875 pid_t real_pid; 4876 kthread_t *t; 4877 int i; 4878 4879 ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASK_IDDIR); 4880 4881 /* 4882 * get the proc to work with and lock it 4883 */ 4884 real_pid = get_real_pid(dlxpnp->lxpr_pid); 4885 p = lxpr_lock(real_pid); 4886 if ((p == NULL)) 4887 return (NULL); 4888 4889 /* 4890 * If the process is a zombie or system process 4891 * it can't have any threads. 4892 */ 4893 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) { 4894 lxpr_unlock(p); 4895 return (NULL); 4896 } 4897 4898 /* need to confirm tid is still there */ 4899 t = lxpr_get_thread(p, dlxpnp->lxpr_desc); 4900 if (t == NULL) { 4901 lxpr_unlock(p); 4902 return (NULL); 4903 } 4904 thread_unlock(t); 4905 4906 /* 4907 * allocate and fill in the new lx /proc taskid dir node 4908 */ 4909 for (i = 0; i < TIDDIRFILES; i++) { 4910 if (strcmp(tiddir[i].d_name, comp) == 0) { 4911 lxpnp = lxpr_getnode(dp, tiddir[i].d_type, p, 4912 dlxpnp->lxpr_desc); 4913 dp = LXPTOV(lxpnp); 4914 ASSERT(dp != NULL); 4915 lxpr_unlock(p); 4916 return (dp); 4917 } 4918 } 4919 4920 lxpr_unlock(p); 4921 return (NULL); 4922 } 4923 4924 /* 4925 * Lookup one of the process's open files. 4926 */ 4927 static vnode_t * 4928 lxpr_lookup_fddir(vnode_t *dp, char *comp) 4929 { 4930 lxpr_node_t *dlxpnp = VTOLXP(dp); 4931 4932 ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR || 4933 dlxpnp->lxpr_type == LXPR_PID_TID_FDDIR); 4934 4935 return (lxpr_lookup_fdnode(dp, comp)); 4936 } 4937 4938 static vnode_t * 4939 lxpr_lookup_netdir(vnode_t *dp, char *comp) 4940 { 4941 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR); 4942 4943 dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES); 4944 4945 return (dp); 4946 } 4947 4948 static vnode_t * 4949 lxpr_lookup_procdir(vnode_t *dp, char *comp) 4950 { 4951 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR); 4952 4953 /* 4954 * We know all the names of files & dirs in our file system structure 4955 * except those that are pid names. These change as pids are created/ 4956 * deleted etc., so we just look for a number as the first char to see 4957 * if we are we doing pid lookups. 4958 * 4959 * Don't need to check for "self" as it is implemented as a symlink 4960 */ 4961 if (*comp >= '0' && *comp <= '9') { 4962 pid_t pid = 0; 4963 lxpr_node_t *lxpnp = NULL; 4964 proc_t *p; 4965 int c; 4966 4967 while ((c = *comp++) != '\0') 4968 pid = 10 * pid + c - '0'; 4969 4970 /* 4971 * Can't continue if the process is still loading or it doesn't 4972 * really exist yet (or maybe it just died!) 4973 */ 4974 p = lxpr_lock(pid); 4975 if (p == NULL) 4976 return (NULL); 4977 4978 if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) { 4979 lxpr_unlock(p); 4980 return (NULL); 4981 } 4982 4983 /* 4984 * allocate and fill in a new lx /proc node 4985 */ 4986 lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0); 4987 4988 lxpr_unlock(p); 4989 4990 dp = LXPTOV(lxpnp); 4991 ASSERT(dp != NULL); 4992 4993 return (dp); 4994 } 4995 4996 /* Lookup fixed names */ 4997 return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES)); 4998 } 4999 5000 static vnode_t * 5001 lxpr_lookup_sysdir(vnode_t *dp, char *comp) 5002 { 5003 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYSDIR); 5004 return (lxpr_lookup_common(dp, comp, NULL, sysdir, SYSDIRFILES)); 5005 } 5006 5007 static vnode_t * 5008 lxpr_lookup_sys_kerneldir(vnode_t *dp, char *comp) 5009 { 5010 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNELDIR); 5011 return (lxpr_lookup_common(dp, comp, NULL, sys_kerneldir, 5012 SYS_KERNELDIRFILES)); 5013 } 5014 5015 static vnode_t * 5016 lxpr_lookup_sys_kdir_randdir(vnode_t *dp, char *comp) 5017 { 5018 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNEL_RANDDIR); 5019 return (lxpr_lookup_common(dp, comp, NULL, sys_randdir, 5020 SYS_RANDDIRFILES)); 5021 } 5022 5023 static vnode_t * 5024 lxpr_lookup_sys_netdir(vnode_t *dp, char *comp) 5025 { 5026 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NETDIR); 5027 return (lxpr_lookup_common(dp, comp, NULL, sys_netdir, 5028 SYS_NETDIRFILES)); 5029 } 5030 5031 static vnode_t * 5032 lxpr_lookup_sys_net_coredir(vnode_t *dp, char *comp) 5033 { 5034 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NET_COREDIR); 5035 return (lxpr_lookup_common(dp, comp, NULL, sys_net_coredir, 5036 SYS_NET_COREDIRFILES)); 5037 } 5038 5039 static vnode_t * 5040 lxpr_lookup_sys_vmdir(vnode_t *dp, char *comp) 5041 { 5042 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_VMDIR); 5043 return (lxpr_lookup_common(dp, comp, NULL, sys_vmdir, 5044 SYS_VMDIRFILES)); 5045 } 5046 5047 static vnode_t * 5048 lxpr_lookup_sys_fsdir(vnode_t *dp, char *comp) 5049 { 5050 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FSDIR); 5051 return (lxpr_lookup_common(dp, comp, NULL, sys_fsdir, 5052 SYS_FSDIRFILES)); 5053 } 5054 5055 static vnode_t * 5056 lxpr_lookup_sys_fs_inotifydir(vnode_t *dp, char *comp) 5057 { 5058 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FS_INOTIFYDIR); 5059 return (lxpr_lookup_common(dp, comp, NULL, sys_fs_inotifydir, 5060 SYS_FS_INOTIFYDIRFILES)); 5061 } 5062 5063 /* 5064 * lxpr_readdir(): Vnode operation for VOP_READDIR() 5065 */ 5066 /* ARGSUSED */ 5067 static int 5068 lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp, 5069 caller_context_t *ct, int flags) 5070 { 5071 lxpr_node_t *lxpnp = VTOLXP(dp); 5072 lxpr_nodetype_t type = lxpnp->lxpr_type; 5073 ssize_t uresid; 5074 off_t uoffset; 5075 int error; 5076 5077 ASSERT(dp->v_type == VDIR); 5078 ASSERT(type < LXPR_NFILES); 5079 5080 /* 5081 * we should never get here because the readdir 5082 * is done on the realvp for these nodes 5083 */ 5084 ASSERT(type != LXPR_PID_FD_FD && 5085 type != LXPR_PID_CURDIR && 5086 type != LXPR_PID_ROOTDIR); 5087 5088 /* 5089 * restrict readdir permission to owner or root 5090 */ 5091 if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0) 5092 return (error); 5093 5094 uoffset = uiop->uio_offset; 5095 uresid = uiop->uio_resid; 5096 5097 /* can't do negative reads */ 5098 if (uoffset < 0 || uresid <= 0) 5099 return (EINVAL); 5100 5101 /* can't read directory entries that don't exist! */ 5102 if (uoffset % LXPR_SDSIZE) 5103 return (ENOENT); 5104 5105 return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp)); 5106 } 5107 5108 /* ARGSUSED */ 5109 static int 5110 lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5111 { 5112 return (ENOTDIR); 5113 } 5114 5115 /* 5116 * This has the common logic for returning directory entries 5117 */ 5118 static int 5119 lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp, 5120 lxpr_dirent_t *dirtab, int dirtablen) 5121 { 5122 /* bp holds one dirent64 structure */ 5123 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; 5124 dirent64_t *dirent = (dirent64_t *)bp; 5125 ssize_t oresid; /* save a copy for testing later */ 5126 ssize_t uresid; 5127 5128 oresid = uiop->uio_resid; 5129 5130 /* clear out the dirent buffer */ 5131 bzero(bp, sizeof (bp)); 5132 5133 /* 5134 * Satisfy user request 5135 */ 5136 while ((uresid = uiop->uio_resid) > 0) { 5137 int dirindex; 5138 off_t uoffset; 5139 int reclen; 5140 int error; 5141 5142 uoffset = uiop->uio_offset; 5143 dirindex = (uoffset / LXPR_SDSIZE) - 2; 5144 5145 if (uoffset == 0) { 5146 5147 dirent->d_ino = lxpnp->lxpr_ino; 5148 dirent->d_name[0] = '.'; 5149 dirent->d_name[1] = '\0'; 5150 reclen = DIRENT64_RECLEN(1); 5151 5152 } else if (uoffset == LXPR_SDSIZE) { 5153 5154 dirent->d_ino = lxpr_parentinode(lxpnp); 5155 dirent->d_name[0] = '.'; 5156 dirent->d_name[1] = '.'; 5157 dirent->d_name[2] = '\0'; 5158 reclen = DIRENT64_RECLEN(2); 5159 5160 } else if (dirindex >= 0 && dirindex < dirtablen) { 5161 int slen = strlen(dirtab[dirindex].d_name); 5162 5163 dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type, 5164 lxpnp->lxpr_pid, 0); 5165 5166 VERIFY(slen < LXPNSIZ); 5167 (void) strcpy(dirent->d_name, dirtab[dirindex].d_name); 5168 reclen = DIRENT64_RECLEN(slen); 5169 5170 } else { 5171 /* Run out of table entries */ 5172 if (eofp) { 5173 *eofp = 1; 5174 } 5175 return (0); 5176 } 5177 5178 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); 5179 dirent->d_reclen = (ushort_t)reclen; 5180 5181 /* 5182 * if the size of the data to transfer is greater 5183 * that that requested then we can't do it this transfer. 5184 */ 5185 if (reclen > uresid) { 5186 /* 5187 * Error if no entries have been returned yet. 5188 */ 5189 if (uresid == oresid) { 5190 return (EINVAL); 5191 } 5192 break; 5193 } 5194 5195 /* 5196 * uiomove() updates both uiop->uio_resid and uiop->uio_offset 5197 * by the same amount. But we want uiop->uio_offset to change 5198 * in increments of LXPR_SDSIZE, which is different from the 5199 * number of bytes being returned to the user. So we set 5200 * uiop->uio_offset separately, ignoring what uiomove() does. 5201 */ 5202 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, 5203 uiop)) != 0) 5204 return (error); 5205 5206 uiop->uio_offset = uoffset + LXPR_SDSIZE; 5207 } 5208 5209 /* Have run out of space, but could have just done last table entry */ 5210 if (eofp) { 5211 *eofp = 5212 (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0; 5213 } 5214 return (0); 5215 } 5216 5217 5218 static int 5219 lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5220 { 5221 /* bp holds one dirent64 structure */ 5222 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; 5223 dirent64_t *dirent = (dirent64_t *)bp; 5224 ssize_t oresid; /* save a copy for testing later */ 5225 ssize_t uresid; 5226 off_t uoffset; 5227 zoneid_t zoneid; 5228 pid_t pid; 5229 int error; 5230 int ceof; 5231 5232 ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR); 5233 5234 oresid = uiop->uio_resid; 5235 zoneid = LXPTOZ(lxpnp)->zone_id; 5236 5237 /* 5238 * We return directory entries in the order: "." and ".." then the 5239 * unique lxproc files, then the directories corresponding to the 5240 * running processes. We have defined this as the ordering because 5241 * it allows us to more easily keep track of where we are betwen calls 5242 * to getdents(). If the number of processes changes between calls 5243 * then we can't lose track of where we are in the lxproc files. 5244 */ 5245 5246 /* Do the fixed entries */ 5247 error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir, 5248 PROCDIRFILES); 5249 5250 /* Finished if we got an error or if we couldn't do all the table */ 5251 if (error != 0 || ceof == 0) 5252 return (error); 5253 5254 /* clear out the dirent buffer */ 5255 bzero(bp, sizeof (bp)); 5256 5257 /* Do the process entries */ 5258 while ((uresid = uiop->uio_resid) > 0) { 5259 proc_t *p; 5260 int len; 5261 int reclen; 5262 int i; 5263 5264 uoffset = uiop->uio_offset; 5265 5266 /* 5267 * Stop when entire proc table has been examined. 5268 */ 5269 i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES; 5270 if (i < 0 || i >= v.v_proc) { 5271 /* Run out of table entries */ 5272 if (eofp) { 5273 *eofp = 1; 5274 } 5275 return (0); 5276 } 5277 mutex_enter(&pidlock); 5278 5279 /* 5280 * Skip indices for which there is no pid_entry, PIDs for 5281 * which there is no corresponding process, a PID of 0, 5282 * and anything the security policy doesn't allow 5283 * us to look at. 5284 */ 5285 if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL || 5286 p->p_pid == 0 || 5287 secpolicy_basic_procinfo(CRED(), p, curproc) != 0) { 5288 mutex_exit(&pidlock); 5289 goto next; 5290 } 5291 mutex_exit(&pidlock); 5292 5293 /* 5294 * Convert pid to the Linux default of 1 if we're the zone's 5295 * init process, or 0 if zsched, otherwise use the value from 5296 * the proc structure 5297 */ 5298 if (p->p_pid == curproc->p_zone->zone_proc_initpid) { 5299 pid = 1; 5300 } else if (p->p_pid == curproc->p_zone->zone_zsched->p_pid) { 5301 pid = 0; 5302 } else { 5303 pid = p->p_pid; 5304 } 5305 5306 /* 5307 * If this /proc was mounted in the global zone, view 5308 * all procs; otherwise, only view zone member procs. 5309 */ 5310 if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) { 5311 goto next; 5312 } 5313 5314 ASSERT(p->p_stat != 0); 5315 5316 dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0); 5317 len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid); 5318 ASSERT(len < LXPNSIZ); 5319 reclen = DIRENT64_RECLEN(len); 5320 5321 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); 5322 dirent->d_reclen = (ushort_t)reclen; 5323 5324 /* 5325 * if the size of the data to transfer is greater 5326 * that that requested then we can't do it this transfer. 5327 */ 5328 if (reclen > uresid) { 5329 /* 5330 * Error if no entries have been returned yet. 5331 */ 5332 if (uresid == oresid) 5333 return (EINVAL); 5334 break; 5335 } 5336 5337 /* 5338 * uiomove() updates both uiop->uio_resid and uiop->uio_offset 5339 * by the same amount. But we want uiop->uio_offset to change 5340 * in increments of LXPR_SDSIZE, which is different from the 5341 * number of bytes being returned to the user. So we set 5342 * uiop->uio_offset separately, in the increment of this for 5343 * the loop, ignoring what uiomove() does. 5344 */ 5345 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, 5346 uiop)) != 0) 5347 return (error); 5348 next: 5349 uiop->uio_offset = uoffset + LXPR_SDSIZE; 5350 } 5351 5352 if (eofp != NULL) { 5353 *eofp = (uiop->uio_offset >= 5354 ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0; 5355 } 5356 5357 return (0); 5358 } 5359 5360 static int 5361 lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5362 { 5363 proc_t *p; 5364 pid_t find_pid; 5365 5366 ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR); 5367 5368 /* can't read its contents if it died */ 5369 mutex_enter(&pidlock); 5370 5371 if (lxpnp->lxpr_pid == 1) { 5372 find_pid = curproc->p_zone->zone_proc_initpid; 5373 } else if (lxpnp->lxpr_pid == 0) { 5374 find_pid = curproc->p_zone->zone_zsched->p_pid; 5375 } else { 5376 find_pid = lxpnp->lxpr_pid; 5377 } 5378 p = prfind(find_pid); 5379 5380 if (p == NULL || p->p_stat == SIDL) { 5381 mutex_exit(&pidlock); 5382 return (ENOENT); 5383 } 5384 mutex_exit(&pidlock); 5385 5386 return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES)); 5387 } 5388 5389 static int 5390 lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5391 { 5392 ASSERT(lxpnp->lxpr_type == LXPR_NETDIR); 5393 return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES)); 5394 } 5395 5396 static int 5397 lxpr_readdir_taskdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5398 { 5399 /* bp holds one dirent64 structure */ 5400 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; 5401 dirent64_t *dirent = (dirent64_t *)bp; 5402 ssize_t oresid; /* save a copy for testing later */ 5403 ssize_t uresid; 5404 off_t uoffset; 5405 int error; 5406 int ceof; 5407 proc_t *p; 5408 int tiddirsize = -1; 5409 int tasknum; 5410 pid_t real_pid; 5411 kthread_t *t; 5412 boolean_t branded = B_FALSE; 5413 5414 ASSERT(lxpnp->lxpr_type == LXPR_PID_TASKDIR); 5415 5416 oresid = uiop->uio_resid; 5417 5418 real_pid = get_real_pid(lxpnp->lxpr_pid); 5419 p = lxpr_lock(real_pid); 5420 5421 /* can't read its contents if it died */ 5422 if (p == NULL) { 5423 return (ENOENT); 5424 } 5425 if (p->p_stat == SIDL) { 5426 lxpr_unlock(p); 5427 return (ENOENT); 5428 } 5429 5430 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) 5431 tiddirsize = 0; 5432 5433 branded = (p->p_brand == &lx_brand); 5434 /* 5435 * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from 5436 * going away while we iterate over its threads. 5437 */ 5438 mutex_exit(&p->p_lock); 5439 5440 if (tiddirsize == -1) 5441 tiddirsize = p->p_lwpcnt; 5442 5443 /* Do the fixed entries (in this case just "." & "..") */ 5444 error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0); 5445 5446 /* Finished if we got an error or if we couldn't do all the table */ 5447 if (error != 0 || ceof == 0) 5448 goto out; 5449 5450 if ((t = p->p_tlist) == NULL) { 5451 if (eofp != NULL) 5452 *eofp = 1; 5453 goto out; 5454 } 5455 5456 /* clear out the dirent buffer */ 5457 bzero(bp, sizeof (bp)); 5458 5459 /* 5460 * Loop until user's request is satisfied or until all thread's have 5461 * been returned. 5462 */ 5463 for (tasknum = 0; (uresid = uiop->uio_resid) > 0; tasknum++) { 5464 int i; 5465 int reclen; 5466 int len; 5467 uint_t emul_tid; 5468 lx_lwp_data_t *lwpd; 5469 5470 uoffset = uiop->uio_offset; 5471 5472 /* 5473 * Stop at the end of the thread list 5474 */ 5475 i = (uoffset / LXPR_SDSIZE) - 2; 5476 if (i < 0 || i >= tiddirsize) { 5477 if (eofp) { 5478 *eofp = 1; 5479 } 5480 goto out; 5481 } 5482 5483 if (i != tasknum) 5484 goto next; 5485 5486 if (!branded) { 5487 /* 5488 * Emulating the goofy linux task model is impossible 5489 * to do for native processes. We can compromise by 5490 * presenting only the main thread to the consumer. 5491 */ 5492 emul_tid = p->p_pid; 5493 } else { 5494 if ((lwpd = ttolxlwp(t)) == NULL) { 5495 goto next; 5496 } 5497 emul_tid = lwpd->br_pid; 5498 /* 5499 * Convert pid to Linux default of 1 if we're the 5500 * zone's init. 5501 */ 5502 if (emul_tid == curproc->p_zone->zone_proc_initpid) 5503 emul_tid = 1; 5504 } 5505 5506 dirent->d_ino = lxpr_inode(LXPR_PID_TASK_IDDIR, lxpnp->lxpr_pid, 5507 emul_tid); 5508 len = snprintf(dirent->d_name, LXPNSIZ, "%d", emul_tid); 5509 ASSERT(len < LXPNSIZ); 5510 reclen = DIRENT64_RECLEN(len); 5511 5512 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); 5513 dirent->d_reclen = (ushort_t)reclen; 5514 5515 if (reclen > uresid) { 5516 /* 5517 * Error if no entries have been returned yet. 5518 */ 5519 if (uresid == oresid) 5520 error = EINVAL; 5521 goto out; 5522 } 5523 5524 /* 5525 * uiomove() updates both uiop->uio_resid and uiop->uio_offset 5526 * by the same amount. But we want uiop->uio_offset to change 5527 * in increments of LXPR_SDSIZE, which is different from the 5528 * number of bytes being returned to the user. So we set 5529 * uiop->uio_offset separately, in the increment of this for 5530 * the loop, ignoring what uiomove() does. 5531 */ 5532 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, 5533 uiop)) != 0) 5534 goto out; 5535 5536 next: 5537 uiop->uio_offset = uoffset + LXPR_SDSIZE; 5538 5539 if ((t = t->t_forw) == p->p_tlist || !branded) { 5540 if (eofp != NULL) 5541 *eofp = 1; 5542 goto out; 5543 } 5544 } 5545 5546 if (eofp != NULL) 5547 *eofp = 0; 5548 5549 out: 5550 mutex_enter(&p->p_lock); 5551 lxpr_unlock(p); 5552 return (error); 5553 } 5554 5555 static int 5556 lxpr_readdir_task_tid_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5557 { 5558 proc_t *p; 5559 pid_t real_pid; 5560 kthread_t *t; 5561 5562 ASSERT(lxpnp->lxpr_type == LXPR_PID_TASK_IDDIR); 5563 5564 mutex_enter(&pidlock); 5565 5566 real_pid = get_real_pid(lxpnp->lxpr_pid); 5567 p = prfind(real_pid); 5568 5569 /* can't read its contents if it died */ 5570 if (p == NULL || p->p_stat == SIDL) { 5571 mutex_exit(&pidlock); 5572 return (ENOENT); 5573 } 5574 5575 mutex_exit(&pidlock); 5576 5577 /* need to confirm tid is still there */ 5578 t = lxpr_get_thread(p, lxpnp->lxpr_desc); 5579 if (t == NULL) { 5580 /* we can't find this specific thread */ 5581 return (NULL); 5582 } 5583 thread_unlock(t); 5584 5585 return (lxpr_readdir_common(lxpnp, uiop, eofp, tiddir, TIDDIRFILES)); 5586 } 5587 5588 static int 5589 lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5590 { 5591 /* bp holds one dirent64 structure */ 5592 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; 5593 dirent64_t *dirent = (dirent64_t *)bp; 5594 ssize_t oresid; /* save a copy for testing later */ 5595 ssize_t uresid; 5596 off_t uoffset; 5597 int error; 5598 int ceof; 5599 proc_t *p; 5600 int fddirsize = -1; 5601 uf_info_t *fip; 5602 5603 ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR || 5604 lxpnp->lxpr_type == LXPR_PID_TID_FDDIR); 5605 5606 oresid = uiop->uio_resid; 5607 5608 /* can't read its contents if it died */ 5609 p = lxpr_lock(lxpnp->lxpr_pid); 5610 if (p == NULL) 5611 return (ENOENT); 5612 5613 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) 5614 fddirsize = 0; 5615 5616 /* 5617 * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from 5618 * going away while we iterate over its fi_list. 5619 */ 5620 mutex_exit(&p->p_lock); 5621 5622 /* Get open file info */ 5623 fip = (&(p)->p_user.u_finfo); 5624 mutex_enter(&fip->fi_lock); 5625 5626 if (fddirsize == -1) 5627 fddirsize = fip->fi_nfiles; 5628 5629 /* Do the fixed entries (in this case just "." & "..") */ 5630 error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0); 5631 5632 /* Finished if we got an error or if we couldn't do all the table */ 5633 if (error != 0 || ceof == 0) 5634 goto out; 5635 5636 /* clear out the dirent buffer */ 5637 bzero(bp, sizeof (bp)); 5638 5639 /* 5640 * Loop until user's request is satisfied or until 5641 * all file descriptors have been examined. 5642 */ 5643 for (; (uresid = uiop->uio_resid) > 0; 5644 uiop->uio_offset = uoffset + LXPR_SDSIZE) { 5645 int reclen; 5646 int fd; 5647 int len; 5648 5649 uoffset = uiop->uio_offset; 5650 5651 /* 5652 * Stop at the end of the fd list 5653 */ 5654 fd = (uoffset / LXPR_SDSIZE) - 2; 5655 if (fd < 0 || fd >= fddirsize) { 5656 if (eofp) { 5657 *eofp = 1; 5658 } 5659 goto out; 5660 } 5661 5662 if (fip->fi_list[fd].uf_file == NULL) 5663 continue; 5664 5665 dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd); 5666 len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd); 5667 ASSERT(len < LXPNSIZ); 5668 reclen = DIRENT64_RECLEN(len); 5669 5670 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); 5671 dirent->d_reclen = (ushort_t)reclen; 5672 5673 if (reclen > uresid) { 5674 /* 5675 * Error if no entries have been returned yet. 5676 */ 5677 if (uresid == oresid) 5678 error = EINVAL; 5679 goto out; 5680 } 5681 5682 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, 5683 uiop)) != 0) 5684 goto out; 5685 } 5686 5687 if (eofp != NULL) { 5688 *eofp = 5689 (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0; 5690 } 5691 5692 out: 5693 mutex_exit(&fip->fi_lock); 5694 mutex_enter(&p->p_lock); 5695 lxpr_unlock(p); 5696 return (error); 5697 } 5698 5699 static int 5700 lxpr_readdir_sysdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5701 { 5702 ASSERT(lxpnp->lxpr_type == LXPR_SYSDIR); 5703 return (lxpr_readdir_common(lxpnp, uiop, eofp, sysdir, SYSDIRFILES)); 5704 } 5705 5706 static int 5707 lxpr_readdir_sys_fsdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5708 { 5709 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FSDIR); 5710 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fsdir, 5711 SYS_FSDIRFILES)); 5712 } 5713 5714 static int 5715 lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5716 { 5717 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFYDIR); 5718 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fs_inotifydir, 5719 SYS_FS_INOTIFYDIRFILES)); 5720 } 5721 5722 static int 5723 lxpr_readdir_sys_kerneldir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5724 { 5725 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNELDIR); 5726 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_kerneldir, 5727 SYS_KERNELDIRFILES)); 5728 } 5729 5730 static int 5731 lxpr_readdir_sys_kdir_randdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5732 { 5733 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RANDDIR); 5734 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_randdir, 5735 SYS_RANDDIRFILES)); 5736 } 5737 5738 static int 5739 lxpr_readdir_sys_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5740 { 5741 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NETDIR); 5742 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_netdir, 5743 SYS_NETDIRFILES)); 5744 } 5745 5746 static int 5747 lxpr_readdir_sys_net_coredir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5748 { 5749 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_COREDIR); 5750 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_net_coredir, 5751 SYS_NET_COREDIRFILES)); 5752 } 5753 5754 static int 5755 lxpr_readdir_sys_vmdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5756 { 5757 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VMDIR); 5758 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_vmdir, 5759 SYS_VMDIRFILES)); 5760 } 5761 5762 static int 5763 lxpr_write_sys_net_core_somaxc(lxpr_node_t *lxpnp, struct uio *uio, 5764 struct cred *cr, caller_context_t *ct) 5765 { 5766 int error; 5767 int res = 0; 5768 size_t olen; 5769 char val[16]; /* big enough for a uint numeric string */ 5770 netstack_t *ns; 5771 mod_prop_info_t *ptbl = NULL; 5772 mod_prop_info_t *pinfo = NULL; 5773 5774 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON); 5775 5776 if (uio->uio_loffset != 0) 5777 return (EINVAL); 5778 5779 if (uio->uio_resid == 0) 5780 return (0); 5781 5782 olen = uio->uio_resid; 5783 if (olen > sizeof (val) - 1) 5784 return (EINVAL); 5785 5786 bzero(val, sizeof (val)); 5787 error = uiomove(val, olen, UIO_WRITE, uio); 5788 if (error != 0) 5789 return (error); 5790 5791 if (val[olen - 1] == '\n') 5792 val[olen - 1] = '\0'; 5793 5794 if (val[0] == '\0') /* no input */ 5795 return (EINVAL); 5796 5797 ns = netstack_get_current(); 5798 if (ns == NULL) 5799 return (EINVAL); 5800 5801 ptbl = ns->netstack_tcp->tcps_propinfo_tbl; 5802 pinfo = mod_prop_lookup(ptbl, "_conn_req_max_q", MOD_PROTO_TCP); 5803 if (pinfo == NULL || pinfo->mpi_setf(ns, cr, pinfo, NULL, val, 0) != 0) 5804 res = EINVAL; 5805 5806 netstack_rele(ns); 5807 return (res); 5808 } 5809 5810 /* ARGSUSED */ 5811 static int 5812 lxpr_write_sys_kernel_corepatt(lxpr_node_t *lxpnp, struct uio *uio, 5813 struct cred *cr, caller_context_t *ct) 5814 { 5815 zone_t *zone = curproc->p_zone; 5816 struct core_globals *cg; 5817 refstr_t *rp, *nrp; 5818 corectl_path_t *ccp; 5819 char val[MAXPATHLEN]; 5820 char valtr[MAXPATHLEN]; 5821 size_t olen; 5822 int error; 5823 5824 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT); 5825 5826 cg = zone_getspecific(core_zone_key, zone); 5827 ASSERT(cg != NULL); 5828 5829 if (secpolicy_coreadm(cr) != 0) 5830 return (EPERM); 5831 5832 if (uio->uio_loffset != 0) 5833 return (EINVAL); 5834 5835 if (uio->uio_resid == 0) 5836 return (0); 5837 5838 olen = uio->uio_resid; 5839 if (olen > sizeof (val) - 1) 5840 return (EINVAL); 5841 5842 bzero(val, sizeof (val)); 5843 error = uiomove(val, olen, UIO_WRITE, uio); 5844 if (error != 0) 5845 return (error); 5846 5847 if (val[olen - 1] == '\n') 5848 val[olen - 1] = '\0'; 5849 5850 if (val[0] == '|') 5851 return (EINVAL); 5852 5853 if ((error = lxpr_core_path_l2s(val, valtr, sizeof (valtr))) != 0) 5854 return (error); 5855 5856 nrp = refstr_alloc(valtr); 5857 5858 ccp = cg->core_default_path; 5859 mutex_enter(&ccp->ccp_mtx); 5860 rp = ccp->ccp_path; 5861 refstr_hold((ccp->ccp_path = nrp)); 5862 cg->core_options |= CC_PROCESS_PATH; 5863 mutex_exit(&ccp->ccp_mtx); 5864 5865 if (rp != NULL) 5866 refstr_rele(rp); 5867 5868 return (0); 5869 } 5870 5871 /* 5872 * lxpr_readlink(): Vnode operation for VOP_READLINK() 5873 */ 5874 /* ARGSUSED */ 5875 static int 5876 lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct) 5877 { 5878 char bp[MAXPATHLEN + 1]; 5879 size_t buflen = sizeof (bp); 5880 lxpr_node_t *lxpnp = VTOLXP(vp); 5881 vnode_t *rvp = lxpnp->lxpr_realvp; 5882 pid_t pid; 5883 int error = 0; 5884 5885 /* 5886 * Linux does something very "clever" for /proc/<pid>/fd/<num> entries. 5887 * Open FDs are represented as symlinks, the link contents 5888 * corresponding to the open resource. For plain files or devices, 5889 * this isn't absurd since one can dereference the symlink to query 5890 * the underlying resource. For sockets or pipes, it becomes ugly in a 5891 * hurry. To maintain this human-readable output, those FD symlinks 5892 * point to bogus targets such as "socket:[<inodenum>]". This requires 5893 * circumventing vfs since the stat/lstat behavior on those FD entries 5894 * will be unusual. (A stat must retrieve information about the open 5895 * socket or pipe. It cannot fail because the link contents point to 5896 * an absent file.) 5897 * 5898 * To accomplish this, lxpr_getnode returns an vnode typed VNON for FD 5899 * entries. This bypasses code paths which would normally 5900 * short-circuit on symlinks and allows us to emulate the vfs behavior 5901 * expected by /proc consumers. 5902 */ 5903 if (vp->v_type != VLNK && lxpnp->lxpr_type != LXPR_PID_FD_FD) 5904 return (EINVAL); 5905 5906 /* Try to produce a symlink name for anything that has a realvp */ 5907 if (rvp != NULL) { 5908 if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0) 5909 return (error); 5910 if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) { 5911 /* 5912 * Special handling possible for /proc/<pid>/fd/<num> 5913 * Generate <type>:[<inode>] links, if allowed. 5914 */ 5915 if (lxpnp->lxpr_type != LXPR_PID_FD_FD || 5916 lxpr_readlink_fdnode(lxpnp, bp, buflen) != 0) { 5917 return (error); 5918 } 5919 } 5920 } else { 5921 switch (lxpnp->lxpr_type) { 5922 case LXPR_SELF: 5923 /* 5924 * Convert pid to the Linux default of 1 if we're the 5925 * zone's init process or 0 if zsched. 5926 */ 5927 if (curproc->p_pid == 5928 curproc->p_zone->zone_proc_initpid) { 5929 pid = 1; 5930 } else if (curproc->p_pid == 5931 curproc->p_zone->zone_zsched->p_pid) { 5932 pid = 0; 5933 } else { 5934 pid = curproc->p_pid; 5935 } 5936 5937 /* 5938 * Don't need to check result as every possible int 5939 * will fit within MAXPATHLEN bytes. 5940 */ 5941 (void) snprintf(bp, buflen, "%d", pid); 5942 break; 5943 case LXPR_PID_CURDIR: 5944 case LXPR_PID_ROOTDIR: 5945 case LXPR_PID_EXE: 5946 return (EACCES); 5947 default: 5948 /* 5949 * Need to return error so that nothing thinks 5950 * that the symlink is empty and hence "." 5951 */ 5952 return (EINVAL); 5953 } 5954 } 5955 5956 /* copy the link data to user space */ 5957 return (uiomove(bp, strlen(bp), UIO_READ, uiop)); 5958 } 5959 5960 5961 /* 5962 * lxpr_inactive(): Vnode operation for VOP_INACTIVE() 5963 * Vnode is no longer referenced, deallocate the file 5964 * and all its resources. 5965 */ 5966 /* ARGSUSED */ 5967 static void 5968 lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 5969 { 5970 lxpr_freenode(VTOLXP(vp)); 5971 } 5972 5973 /* 5974 * lxpr_sync(): Vnode operation for VOP_SYNC() 5975 */ 5976 static int 5977 lxpr_sync() 5978 { 5979 /* 5980 * Nothing to sync but this function must never fail 5981 */ 5982 return (0); 5983 } 5984 5985 /* 5986 * lxpr_cmp(): Vnode operation for VOP_CMP() 5987 */ 5988 static int 5989 lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct) 5990 { 5991 vnode_t *rvp; 5992 5993 while (vn_matchops(vp1, lxpr_vnodeops) && 5994 (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) { 5995 vp1 = rvp; 5996 } 5997 5998 while (vn_matchops(vp2, lxpr_vnodeops) && 5999 (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) { 6000 vp2 = rvp; 6001 } 6002 6003 if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops)) 6004 return (vp1 == vp2); 6005 return (VOP_CMP(vp1, vp2, ct)); 6006 } 6007 6008 /* 6009 * lxpr_realvp(): Vnode operation for VOP_REALVP() 6010 */ 6011 static int 6012 lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct) 6013 { 6014 vnode_t *rvp; 6015 6016 if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) { 6017 vp = rvp; 6018 if (VOP_REALVP(vp, &rvp, ct) == 0) 6019 vp = rvp; 6020 } 6021 6022 *vpp = vp; 6023 return (0); 6024 } 6025 6026 static int 6027 lxpr_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 6028 caller_context_t *ct) 6029 { 6030 lxpr_node_t *lxpnp = VTOLXP(vp); 6031 lxpr_nodetype_t type = lxpnp->lxpr_type; 6032 6033 switch (type) { 6034 case LXPR_SYS_KERNEL_COREPATT: 6035 return (lxpr_write_sys_kernel_corepatt(lxpnp, uiop, cr, ct)); 6036 case LXPR_SYS_NET_CORE_SOMAXCON: 6037 return (lxpr_write_sys_net_core_somaxc(lxpnp, uiop, cr, ct)); 6038 6039 default: 6040 /* pretend we wrote the whole thing */ 6041 uiop->uio_offset += uiop->uio_resid; 6042 uiop->uio_resid = 0; 6043 return (0); 6044 } 6045 } 6046 6047 /* 6048 * We need to allow open with O_CREAT for the oom_score_adj file. 6049 */ 6050 /*ARGSUSED7*/ 6051 static int 6052 lxpr_create(struct vnode *dvp, char *nm, struct vattr *vap, 6053 enum vcexcl exclusive, int mode, struct vnode **vpp, struct cred *cred, 6054 int flag, caller_context_t *ct, vsecattr_t *vsecp) 6055 { 6056 lxpr_node_t *lxpnp = VTOLXP(dvp); 6057 lxpr_nodetype_t type = lxpnp->lxpr_type; 6058 vnode_t *vp = NULL; 6059 int error; 6060 6061 ASSERT(type < LXPR_NFILES); 6062 6063 /* 6064 * restrict create permission to owner or root 6065 */ 6066 if ((error = lxpr_access(dvp, VEXEC, 0, cred, ct)) != 0) { 6067 return (error); 6068 } 6069 6070 if (*nm == '\0') 6071 return (EPERM); 6072 6073 if (dvp->v_type != VDIR) 6074 return (EPERM); 6075 6076 if (exclusive == EXCL) 6077 return (EEXIST); 6078 6079 /* 6080 * We're currently restricting O_CREAT to: 6081 * - /proc/<pid>/fd/<num> 6082 * - /proc/<pid>/oom_score_adj 6083 * - /proc/<pid>/task/<tid>/fd/<num> 6084 * - /proc/<pid>/task/<tid>/oom_score_adj 6085 * - /proc/sys/kernel/core_pattern 6086 * - /proc/sys/net/core/somaxconn 6087 * - /proc/sys/vm/overcommit_memory 6088 * - /proc/sys/vm/swappiness 6089 */ 6090 switch (type) { 6091 case LXPR_PIDDIR: 6092 case LXPR_PID_TASK_IDDIR: 6093 if (strcmp(nm, "oom_score_adj") == 0) { 6094 proc_t *p; 6095 p = lxpr_lock(lxpnp->lxpr_pid); 6096 if (p != NULL) { 6097 vp = lxpr_lookup_common(dvp, nm, p, piddir, 6098 PIDDIRFILES); 6099 } 6100 lxpr_unlock(p); 6101 } 6102 break; 6103 6104 case LXPR_SYS_NET_COREDIR: 6105 if (strcmp(nm, "somaxconn") == 0) { 6106 vp = lxpr_lookup_common(dvp, nm, NULL, sys_net_coredir, 6107 SYS_NET_COREDIRFILES); 6108 } 6109 break; 6110 6111 case LXPR_SYS_KERNELDIR: 6112 if (strcmp(nm, "core_pattern") == 0) { 6113 vp = lxpr_lookup_common(dvp, nm, NULL, sys_kerneldir, 6114 SYS_KERNELDIRFILES); 6115 } 6116 break; 6117 6118 case LXPR_SYS_VMDIR: 6119 if (strcmp(nm, "overcommit_memory") == 0 || 6120 strcmp(nm, "swappiness") == 0) { 6121 vp = lxpr_lookup_common(dvp, nm, NULL, sys_vmdir, 6122 SYS_VMDIRFILES); 6123 } 6124 break; 6125 6126 case LXPR_PID_FDDIR: 6127 case LXPR_PID_TID_FDDIR: 6128 vp = lxpr_lookup_fdnode(dvp, nm); 6129 break; 6130 6131 default: 6132 vp = NULL; 6133 break; 6134 } 6135 6136 if (vp != NULL) { 6137 /* Creating an existing file, allow it for regular files. */ 6138 if (vp->v_type == VDIR) 6139 return (EISDIR); 6140 6141 /* confirm permissions against existing file */ 6142 if ((error = lxpr_access(vp, mode, 0, cred, ct)) != 0) { 6143 VN_RELE(vp); 6144 return (error); 6145 } 6146 6147 *vpp = vp; 6148 return (0); 6149 } 6150 6151 /* 6152 * Linux proc does not allow creation of addition, non-subsystem 6153 * specific files inside the hierarchy. ENOENT is tossed when such 6154 * actions are attempted. 6155 */ 6156 return (ENOENT); 6157 }