1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2016 Joyent, Inc. 25 */ 26 27 /* 28 * lx_proc -- a Linux-compatible /proc for the LX brand 29 * 30 * We have -- confusingly -- two implementations of Linux /proc. One is to 31 * support native (but Linux-borne) programs that wish to view the native 32 * system through the Linux /proc model; the other -- this one -- is to 33 * support Linux binaries via the LX brand. These two implementations differ 34 * greatly in their aspirations (and their willingness to bend the truth 35 * of the system to accommodate those aspirations); they should not be unified. 36 */ 37 38 #include <sys/cpupart.h> 39 #include <sys/cpuvar.h> 40 #include <sys/session.h> 41 #include <sys/vmparam.h> 42 #include <sys/mman.h> 43 #include <vm/rm.h> 44 #include <vm/seg_vn.h> 45 #include <sys/sdt.h> 46 #include <lx_signum.h> 47 #include <sys/strlog.h> 48 #include <sys/stropts.h> 49 #include <sys/cmn_err.h> 50 #include <sys/lx_brand.h> 51 #include <lx_auxv.h> 52 #include <sys/x86_archext.h> 53 #include <sys/archsystm.h> 54 #include <sys/fp.h> 55 #include <sys/pool_pset.h> 56 #include <sys/pset.h> 57 #include <sys/zone.h> 58 #include <sys/pghw.h> 59 #include <sys/vfs_opreg.h> 60 #include <sys/param.h> 61 #include <sys/utsname.h> 62 #include <sys/rctl.h> 63 #include <sys/kstat.h> 64 #include <sys/lx_misc.h> 65 #include <sys/brand.h> 66 #include <sys/cred_impl.h> 67 #include <sys/tihdr.h> 68 #include <sys/corectl.h> 69 #include <inet/ip.h> 70 #include <inet/ip_ire.h> 71 #include <inet/ip6.h> 72 #include <inet/ip_if.h> 73 #include <inet/tcp.h> 74 #include <inet/tcp_impl.h> 75 #include <inet/udp_impl.h> 76 #include <inet/ipclassifier.h> 77 #include <sys/socketvar.h> 78 #include <fs/sockfs/socktpi.h> 79 80 /* Dependent on procfs */ 81 extern kthread_t *prchoose(proc_t *); 82 extern int prreadargv(proc_t *, char *, size_t, size_t *); 83 extern int prreadenvv(proc_t *, char *, size_t, size_t *); 84 extern int prreadbuf(proc_t *, uintptr_t, uint8_t *, size_t, size_t *); 85 86 #include "lx_proc.h" 87 88 extern pgcnt_t swapfs_minfree; 89 extern time_t boot_time; 90 91 /* 92 * Pointer to the vnode ops vector for this fs. 93 * This is instantiated in lxprinit() in lxpr_vfsops.c 94 */ 95 vnodeops_t *lxpr_vnodeops; 96 97 static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *); 98 static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *, 99 caller_context_t *); 100 static int lxpr_create(struct vnode *, char *, struct vattr *, enum vcexcl, 101 int, struct vnode **, struct cred *, int, caller_context_t *, vsecattr_t *); 102 static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *); 103 static int lxpr_write(vnode_t *, uio_t *, int, cred_t *, caller_context_t *); 104 static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *, 105 caller_context_t *); 106 static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *); 107 static int lxpr_lookup(vnode_t *, char *, vnode_t **, 108 pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *, 109 pathname_t *); 110 static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *, 111 caller_context_t *, int); 112 static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *); 113 static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *); 114 static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *); 115 static int lxpr_sync(void); 116 static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *); 117 118 static vnode_t *lxpr_lookup_procdir(vnode_t *, char *); 119 static vnode_t *lxpr_lookup_piddir(vnode_t *, char *); 120 static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *); 121 static vnode_t *lxpr_lookup_fddir(vnode_t *, char *); 122 static vnode_t *lxpr_lookup_netdir(vnode_t *, char *); 123 static vnode_t *lxpr_lookup_sysdir(vnode_t *, char *); 124 static vnode_t *lxpr_lookup_sys_fsdir(vnode_t *, char *); 125 static vnode_t *lxpr_lookup_sys_fs_inotifydir(vnode_t *, char *); 126 static vnode_t *lxpr_lookup_sys_kerneldir(vnode_t *, char *); 127 static vnode_t *lxpr_lookup_sys_kdir_randdir(vnode_t *, char *); 128 static vnode_t *lxpr_lookup_sys_netdir(vnode_t *, char *); 129 static vnode_t *lxpr_lookup_sys_net_coredir(vnode_t *, char *); 130 static vnode_t *lxpr_lookup_sys_vmdir(vnode_t *, char *); 131 static vnode_t *lxpr_lookup_taskdir(vnode_t *, char *); 132 static vnode_t *lxpr_lookup_task_tid_dir(vnode_t *, char *); 133 134 static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *); 135 static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *); 136 static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *); 137 static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *); 138 static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *); 139 static int lxpr_readdir_sysdir(lxpr_node_t *, uio_t *, int *); 140 static int lxpr_readdir_sys_fsdir(lxpr_node_t *, uio_t *, int *); 141 static int lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *, uio_t *, int *); 142 static int lxpr_readdir_sys_kerneldir(lxpr_node_t *, uio_t *, int *); 143 static int lxpr_readdir_sys_kdir_randdir(lxpr_node_t *, uio_t *, int *); 144 static int lxpr_readdir_sys_netdir(lxpr_node_t *, uio_t *, int *); 145 static int lxpr_readdir_sys_net_coredir(lxpr_node_t *, uio_t *, int *); 146 static int lxpr_readdir_sys_vmdir(lxpr_node_t *, uio_t *, int *); 147 static int lxpr_readdir_taskdir(lxpr_node_t *, uio_t *, int *); 148 static int lxpr_readdir_task_tid_dir(lxpr_node_t *, uio_t *, int *); 149 150 static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *); 151 static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *); 152 static void lxpr_read_cgroups(lxpr_node_t *, lxpr_uiobuf_t *); 153 static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *); 154 static void lxpr_read_diskstats(lxpr_node_t *, lxpr_uiobuf_t *); 155 static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *); 156 static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *); 157 static void lxpr_read_filesystems(lxpr_node_t *, lxpr_uiobuf_t *); 158 static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *, ldi_handle_t); 159 static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *); 160 static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *); 161 static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *); 162 static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *); 163 static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *); 164 static void lxpr_read_swaps(lxpr_node_t *, lxpr_uiobuf_t *); 165 static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *); 166 static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *); 167 168 static void lxpr_read_pid_auxv(lxpr_node_t *, lxpr_uiobuf_t *); 169 static void lxpr_read_pid_cgroup(lxpr_node_t *, lxpr_uiobuf_t *); 170 static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *); 171 static void lxpr_read_pid_comm(lxpr_node_t *, lxpr_uiobuf_t *); 172 static void lxpr_read_pid_env(lxpr_node_t *, lxpr_uiobuf_t *); 173 static void lxpr_read_pid_limits(lxpr_node_t *, lxpr_uiobuf_t *); 174 static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *); 175 static void lxpr_read_pid_mountinfo(lxpr_node_t *, lxpr_uiobuf_t *); 176 static void lxpr_read_pid_oom_scr_adj(lxpr_node_t *, lxpr_uiobuf_t *); 177 static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *); 178 static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *); 179 static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *); 180 181 static void lxpr_read_pid_tid_stat(lxpr_node_t *, lxpr_uiobuf_t *); 182 static void lxpr_read_pid_tid_status(lxpr_node_t *, lxpr_uiobuf_t *); 183 184 static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *); 185 static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *); 186 static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *); 187 static void lxpr_read_net_if_inet6(lxpr_node_t *, lxpr_uiobuf_t *); 188 static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *); 189 static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *); 190 static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *); 191 static void lxpr_read_net_ipv6_route(lxpr_node_t *, lxpr_uiobuf_t *); 192 static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *); 193 static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *); 194 static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *); 195 static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *); 196 static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *); 197 static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *); 198 static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *); 199 static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *); 200 static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *); 201 static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *); 202 static void lxpr_read_net_tcp6(lxpr_node_t *, lxpr_uiobuf_t *); 203 static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *); 204 static void lxpr_read_net_udp6(lxpr_node_t *, lxpr_uiobuf_t *); 205 static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *); 206 static void lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *, 207 lxpr_uiobuf_t *); 208 static void lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *, 209 lxpr_uiobuf_t *); 210 static void lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *, 211 lxpr_uiobuf_t *); 212 static void lxpr_read_sys_kernel_caplcap(lxpr_node_t *, lxpr_uiobuf_t *); 213 static void lxpr_read_sys_kernel_corepatt(lxpr_node_t *, lxpr_uiobuf_t *); 214 static void lxpr_read_sys_kernel_hostname(lxpr_node_t *, lxpr_uiobuf_t *); 215 static void lxpr_read_sys_kernel_msgmni(lxpr_node_t *, lxpr_uiobuf_t *); 216 static void lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *, lxpr_uiobuf_t *); 217 static void lxpr_read_sys_kernel_osrel(lxpr_node_t *, lxpr_uiobuf_t *); 218 static void lxpr_read_sys_kernel_pid_max(lxpr_node_t *, lxpr_uiobuf_t *); 219 static void lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *, lxpr_uiobuf_t *); 220 static void lxpr_read_sys_kernel_shmmax(lxpr_node_t *, lxpr_uiobuf_t *); 221 static void lxpr_read_sys_kernel_threads_max(lxpr_node_t *, lxpr_uiobuf_t *); 222 static void lxpr_read_sys_net_core_somaxc(lxpr_node_t *, lxpr_uiobuf_t *); 223 static void lxpr_read_sys_vm_minfr_kb(lxpr_node_t *, lxpr_uiobuf_t *); 224 static void lxpr_read_sys_vm_nhpages(lxpr_node_t *, lxpr_uiobuf_t *); 225 static void lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *, lxpr_uiobuf_t *); 226 static void lxpr_read_sys_vm_swappiness(lxpr_node_t *, lxpr_uiobuf_t *); 227 228 static int lxpr_write_sys_net_core_somaxc(lxpr_node_t *, uio_t *, cred_t *, 229 caller_context_t *); 230 static int lxpr_write_sys_kernel_corepatt(lxpr_node_t *, uio_t *, cred_t *, 231 caller_context_t *); 232 233 /* 234 * Simple conversion 235 */ 236 #define btok(x) ((x) >> 10) /* bytes to kbytes */ 237 #define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */ 238 239 #define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t)) 240 241 extern rctl_hndl_t rc_zone_msgmni; 242 extern rctl_hndl_t rc_zone_shmmax; 243 #define FOURGB 4294967295 244 245 /* 246 * The maximum length of the concatenation of argument vector strings we 247 * will return to the user via the branded procfs. Likewise for the env vector. 248 */ 249 int lxpr_maxargvlen = 4096; 250 int lxpr_maxenvvlen = 4096; 251 252 /* 253 * The lx /proc vnode operations vector 254 */ 255 const fs_operation_def_t lxpr_vnodeops_template[] = { 256 VOPNAME_OPEN, { .vop_open = lxpr_open }, 257 VOPNAME_CLOSE, { .vop_close = lxpr_close }, 258 VOPNAME_READ, { .vop_read = lxpr_read }, 259 VOPNAME_WRITE, { .vop_read = lxpr_write }, 260 VOPNAME_GETATTR, { .vop_getattr = lxpr_getattr }, 261 VOPNAME_ACCESS, { .vop_access = lxpr_access }, 262 VOPNAME_LOOKUP, { .vop_lookup = lxpr_lookup }, 263 VOPNAME_CREATE, { .vop_create = lxpr_create }, 264 VOPNAME_READDIR, { .vop_readdir = lxpr_readdir }, 265 VOPNAME_READLINK, { .vop_readlink = lxpr_readlink }, 266 VOPNAME_FSYNC, { .error = lxpr_sync }, 267 VOPNAME_SEEK, { .error = lxpr_sync }, 268 VOPNAME_INACTIVE, { .vop_inactive = lxpr_inactive }, 269 VOPNAME_CMP, { .vop_cmp = lxpr_cmp }, 270 VOPNAME_REALVP, { .vop_realvp = lxpr_realvp }, 271 NULL, NULL 272 }; 273 274 275 /* 276 * file contents of an lx /proc directory. 277 */ 278 static lxpr_dirent_t lx_procdir[] = { 279 { LXPR_CGROUPS, "cgroups" }, 280 { LXPR_CMDLINE, "cmdline" }, 281 { LXPR_CPUINFO, "cpuinfo" }, 282 { LXPR_DEVICES, "devices" }, 283 { LXPR_DISKSTATS, "diskstats" }, 284 { LXPR_DMA, "dma" }, 285 { LXPR_FILESYSTEMS, "filesystems" }, 286 { LXPR_INTERRUPTS, "interrupts" }, 287 { LXPR_IOPORTS, "ioports" }, 288 { LXPR_KCORE, "kcore" }, 289 { LXPR_KMSG, "kmsg" }, 290 { LXPR_LOADAVG, "loadavg" }, 291 { LXPR_MEMINFO, "meminfo" }, 292 { LXPR_MODULES, "modules" }, 293 { LXPR_MOUNTS, "mounts" }, 294 { LXPR_NETDIR, "net" }, 295 { LXPR_PARTITIONS, "partitions" }, 296 { LXPR_SELF, "self" }, 297 { LXPR_STAT, "stat" }, 298 { LXPR_SWAPS, "swaps" }, 299 { LXPR_SYSDIR, "sys" }, 300 { LXPR_UPTIME, "uptime" }, 301 { LXPR_VERSION, "version" } 302 }; 303 304 #define PROCDIRFILES (sizeof (lx_procdir) / sizeof (lx_procdir[0])) 305 306 /* 307 * Contents of an lx /proc/<pid> directory. 308 */ 309 static lxpr_dirent_t piddir[] = { 310 { LXPR_PID_AUXV, "auxv" }, 311 { LXPR_PID_CGROUP, "cgroup" }, 312 { LXPR_PID_CMDLINE, "cmdline" }, 313 { LXPR_PID_COMM, "comm" }, 314 { LXPR_PID_CPU, "cpu" }, 315 { LXPR_PID_CURDIR, "cwd" }, 316 { LXPR_PID_ENV, "environ" }, 317 { LXPR_PID_EXE, "exe" }, 318 { LXPR_PID_LIMITS, "limits" }, 319 { LXPR_PID_MAPS, "maps" }, 320 { LXPR_PID_MEM, "mem" }, 321 { LXPR_PID_MOUNTINFO, "mountinfo" }, 322 { LXPR_PID_OOM_SCR_ADJ, "oom_score_adj" }, 323 { LXPR_PID_ROOTDIR, "root" }, 324 { LXPR_PID_STAT, "stat" }, 325 { LXPR_PID_STATM, "statm" }, 326 { LXPR_PID_STATUS, "status" }, 327 { LXPR_PID_TASKDIR, "task" }, 328 { LXPR_PID_FDDIR, "fd" } 329 }; 330 331 #define PIDDIRFILES (sizeof (piddir) / sizeof (piddir[0])) 332 333 /* 334 * Contents of an lx /proc/<pid>/task/<tid> directory. 335 */ 336 static lxpr_dirent_t tiddir[] = { 337 { LXPR_PID_TID_AUXV, "auxv" }, 338 { LXPR_PID_CGROUP, "cgroup" }, 339 { LXPR_PID_CMDLINE, "cmdline" }, 340 { LXPR_PID_TID_COMM, "comm" }, 341 { LXPR_PID_CPU, "cpu" }, 342 { LXPR_PID_CURDIR, "cwd" }, 343 { LXPR_PID_ENV, "environ" }, 344 { LXPR_PID_EXE, "exe" }, 345 { LXPR_PID_LIMITS, "limits" }, 346 { LXPR_PID_MAPS, "maps" }, 347 { LXPR_PID_MEM, "mem" }, 348 { LXPR_PID_MOUNTINFO, "mountinfo" }, 349 { LXPR_PID_TID_OOM_SCR_ADJ, "oom_score_adj" }, 350 { LXPR_PID_ROOTDIR, "root" }, 351 { LXPR_PID_TID_STAT, "stat" }, 352 { LXPR_PID_STATM, "statm" }, 353 { LXPR_PID_TID_STATUS, "status" }, 354 { LXPR_PID_FDDIR, "fd" } 355 }; 356 357 #define TIDDIRFILES (sizeof (tiddir) / sizeof (tiddir[0])) 358 359 #define LX_RLIM_INFINITY 0xFFFFFFFFFFFFFFFF 360 361 #define RCTL_INFINITE(x) \ 362 ((x->rcv_flagaction & RCTL_LOCAL_MAXIMAL) && \ 363 (x->rcv_flagaction & RCTL_GLOBAL_INFINITE)) 364 365 typedef struct lxpr_rlimtab { 366 char *rlim_name; /* limit name */ 367 char *rlim_unit; /* limit unit */ 368 char *rlim_rctl; /* rctl source */ 369 } lxpr_rlimtab_t; 370 371 static lxpr_rlimtab_t lxpr_rlimtab[] = { 372 { "Max cpu time", "seconds", "process.max-cpu-time" }, 373 { "Max file size", "bytes", "process.max-file-size" }, 374 { "Max data size", "bytes", "process.max-data-size" }, 375 { "Max stack size", "bytes", "process.max-stack-size" }, 376 { "Max core file size", "bytes", "process.max-core-size" }, 377 { "Max resident set", "bytes", "zone.max-physical-memory" }, 378 { "Max processes", "processes", "zone.max-lwps" }, 379 { "Max open files", "files", "process.max-file-descriptor" }, 380 { "Max locked memory", "bytes", "zone.max-locked-memory" }, 381 { "Max address space", "bytes", "process.max-address-space" }, 382 { "Max file locks", "locks", NULL }, 383 { "Max pending signals", "signals", 384 "process.max-sigqueue-size" }, 385 { "Max msgqueue size", "bytes", "process.max-msg-messages" }, 386 { NULL, NULL, NULL } 387 }; 388 389 390 /* 391 * contents of lx /proc/net directory 392 */ 393 static lxpr_dirent_t netdir[] = { 394 { LXPR_NET_ARP, "arp" }, 395 { LXPR_NET_DEV, "dev" }, 396 { LXPR_NET_DEV_MCAST, "dev_mcast" }, 397 { LXPR_NET_IF_INET6, "if_inet6" }, 398 { LXPR_NET_IGMP, "igmp" }, 399 { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" }, 400 { LXPR_NET_IP_MR_VIF, "ip_mr_vif" }, 401 { LXPR_NET_IPV6_ROUTE, "ipv6_route" }, 402 { LXPR_NET_MCFILTER, "mcfilter" }, 403 { LXPR_NET_NETSTAT, "netstat" }, 404 { LXPR_NET_RAW, "raw" }, 405 { LXPR_NET_ROUTE, "route" }, 406 { LXPR_NET_RPC, "rpc" }, 407 { LXPR_NET_RT_CACHE, "rt_cache" }, 408 { LXPR_NET_SOCKSTAT, "sockstat" }, 409 { LXPR_NET_SNMP, "snmp" }, 410 { LXPR_NET_STAT, "stat" }, 411 { LXPR_NET_TCP, "tcp" }, 412 { LXPR_NET_TCP6, "tcp6" }, 413 { LXPR_NET_UDP, "udp" }, 414 { LXPR_NET_UDP6, "udp6" }, 415 { LXPR_NET_UNIX, "unix" } 416 }; 417 418 #define NETDIRFILES (sizeof (netdir) / sizeof (netdir[0])) 419 420 /* 421 * contents of /proc/sys directory 422 */ 423 static lxpr_dirent_t sysdir[] = { 424 { LXPR_SYS_FSDIR, "fs" }, 425 { LXPR_SYS_KERNELDIR, "kernel" }, 426 { LXPR_SYS_NETDIR, "net" }, 427 { LXPR_SYS_VMDIR, "vm" }, 428 }; 429 430 #define SYSDIRFILES (sizeof (sysdir) / sizeof (sysdir[0])) 431 432 /* 433 * contents of /proc/sys/fs directory 434 */ 435 static lxpr_dirent_t sys_fsdir[] = { 436 { LXPR_SYS_FS_INOTIFYDIR, "inotify" }, 437 }; 438 439 #define SYS_FSDIRFILES (sizeof (sys_fsdir) / sizeof (sys_fsdir[0])) 440 441 /* 442 * contents of /proc/sys/fs/inotify directory 443 */ 444 static lxpr_dirent_t sys_fs_inotifydir[] = { 445 { LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS, "max_queued_events" }, 446 { LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES, "max_user_instances" }, 447 { LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES, "max_user_watches" }, 448 }; 449 450 #define SYS_FS_INOTIFYDIRFILES \ 451 (sizeof (sys_fs_inotifydir) / sizeof (sys_fs_inotifydir[0])) 452 453 /* 454 * contents of /proc/sys/kernel directory 455 */ 456 static lxpr_dirent_t sys_kerneldir[] = { 457 { LXPR_SYS_KERNEL_CAPLCAP, "cap_last_cap" }, 458 { LXPR_SYS_KERNEL_COREPATT, "core_pattern" }, 459 { LXPR_SYS_KERNEL_HOSTNAME, "hostname" }, 460 { LXPR_SYS_KERNEL_MSGMNI, "msgmni" }, 461 { LXPR_SYS_KERNEL_NGROUPS_MAX, "ngroups_max" }, 462 { LXPR_SYS_KERNEL_OSREL, "osrelease" }, 463 { LXPR_SYS_KERNEL_PID_MAX, "pid_max" }, 464 { LXPR_SYS_KERNEL_RANDDIR, "random" }, 465 { LXPR_SYS_KERNEL_SHMMAX, "shmmax" }, 466 { LXPR_SYS_KERNEL_THREADS_MAX, "threads-max" }, 467 }; 468 469 #define SYS_KERNELDIRFILES (sizeof (sys_kerneldir) / sizeof (sys_kerneldir[0])) 470 471 /* 472 * contents of /proc/sys/kernel/random directory 473 */ 474 static lxpr_dirent_t sys_randdir[] = { 475 { LXPR_SYS_KERNEL_RAND_BOOTID, "boot_id" }, 476 }; 477 478 #define SYS_RANDDIRFILES (sizeof (sys_randdir) / sizeof (sys_randdir[0])) 479 480 /* 481 * contents of /proc/sys/net directory 482 */ 483 static lxpr_dirent_t sys_netdir[] = { 484 { LXPR_SYS_NET_COREDIR, "core" }, 485 }; 486 487 #define SYS_NETDIRFILES (sizeof (sys_netdir) / sizeof (sys_netdir[0])) 488 489 /* 490 * contents of /proc/sys/net/core directory 491 */ 492 static lxpr_dirent_t sys_net_coredir[] = { 493 { LXPR_SYS_NET_CORE_SOMAXCON, "somaxconn" }, 494 }; 495 496 #define SYS_NET_COREDIRFILES \ 497 (sizeof (sys_net_coredir) / sizeof (sys_net_coredir[0])) 498 499 /* 500 * contents of /proc/sys/vm directory 501 */ 502 static lxpr_dirent_t sys_vmdir[] = { 503 { LXPR_SYS_VM_MINFR_KB, "min_free_kbytes" }, 504 { LXPR_SYS_VM_NHUGEP, "nr_hugepages" }, 505 { LXPR_SYS_VM_OVERCOMMIT_MEM, "overcommit_memory" }, 506 { LXPR_SYS_VM_SWAPPINESS, "swappiness" }, 507 }; 508 509 #define SYS_VMDIRFILES (sizeof (sys_vmdir) / sizeof (sys_vmdir[0])) 510 511 /* 512 * lxpr_open(): Vnode operation for VOP_OPEN() 513 */ 514 static int 515 lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 516 { 517 vnode_t *vp = *vpp; 518 lxpr_node_t *lxpnp = VTOLXP(vp); 519 lxpr_nodetype_t type = lxpnp->lxpr_type; 520 vnode_t *rvp; 521 int error = 0; 522 523 if (flag & FWRITE) { 524 /* Restrict writes to certain files */ 525 switch (type) { 526 case LXPR_PID_OOM_SCR_ADJ: 527 case LXPR_PID_TID_OOM_SCR_ADJ: 528 case LXPR_SYS_KERNEL_COREPATT: 529 case LXPR_SYS_NET_CORE_SOMAXCON: 530 case LXPR_SYS_VM_OVERCOMMIT_MEM: 531 case LXPR_SYS_VM_SWAPPINESS: 532 case LXPR_PID_FD_FD: 533 case LXPR_PID_TID_FD_FD: 534 break; 535 default: 536 return (EPERM); 537 } 538 } 539 540 /* 541 * If we are opening an underlying file only allow regular files, 542 * fifos or sockets; reject the open for anything else. 543 * Just do it if we are opening the current or root directory. 544 */ 545 if (lxpnp->lxpr_realvp != NULL) { 546 rvp = lxpnp->lxpr_realvp; 547 548 if (type == LXPR_PID_FD_FD && rvp->v_type != VREG && 549 rvp->v_type != VFIFO && rvp->v_type != VSOCK) { 550 error = EACCES; 551 } else { 552 if (type == LXPR_PID_FD_FD && rvp->v_type == VFIFO) { 553 /* 554 * This flag lets the fifo open know that 555 * we're using proc/fd to open a fd which we 556 * already have open. Otherwise, the fifo might 557 * reject an open if the other end has closed. 558 */ 559 flag |= FKLYR; 560 } 561 /* 562 * Need to hold rvp since VOP_OPEN() may release it. 563 */ 564 VN_HOLD(rvp); 565 error = VOP_OPEN(&rvp, flag, cr, ct); 566 if (error) { 567 VN_RELE(rvp); 568 } else { 569 *vpp = rvp; 570 VN_RELE(vp); 571 } 572 } 573 } 574 575 return (error); 576 } 577 578 579 /* 580 * lxpr_close(): Vnode operation for VOP_CLOSE() 581 */ 582 /* ARGSUSED */ 583 static int 584 lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 585 caller_context_t *ct) 586 { 587 lxpr_node_t *lxpr = VTOLXP(vp); 588 lxpr_nodetype_t type = lxpr->lxpr_type; 589 590 /* 591 * we should never get here because the close is done on the realvp 592 * for these nodes 593 */ 594 ASSERT(type != LXPR_PID_FD_FD && 595 type != LXPR_PID_CURDIR && 596 type != LXPR_PID_ROOTDIR && 597 type != LXPR_PID_EXE); 598 599 return (0); 600 } 601 602 static void (*lxpr_read_function[LXPR_NFILES])() = { 603 lxpr_read_isdir, /* /proc */ 604 lxpr_read_isdir, /* /proc/<pid> */ 605 lxpr_read_pid_auxv, /* /proc/<pid>/auxv */ 606 lxpr_read_pid_cgroup, /* /proc/<pid>/cgroup */ 607 lxpr_read_pid_cmdline, /* /proc/<pid>/cmdline */ 608 lxpr_read_pid_comm, /* /proc/<pid>/comm */ 609 lxpr_read_empty, /* /proc/<pid>/cpu */ 610 lxpr_read_invalid, /* /proc/<pid>/cwd */ 611 lxpr_read_pid_env, /* /proc/<pid>/environ */ 612 lxpr_read_invalid, /* /proc/<pid>/exe */ 613 lxpr_read_pid_limits, /* /proc/<pid>/limits */ 614 lxpr_read_pid_maps, /* /proc/<pid>/maps */ 615 lxpr_read_empty, /* /proc/<pid>/mem */ 616 lxpr_read_pid_mountinfo, /* /proc/<pid>/mountinfo */ 617 lxpr_read_pid_oom_scr_adj, /* /proc/<pid>/oom_score_adj */ 618 lxpr_read_invalid, /* /proc/<pid>/root */ 619 lxpr_read_pid_stat, /* /proc/<pid>/stat */ 620 lxpr_read_pid_statm, /* /proc/<pid>/statm */ 621 lxpr_read_pid_status, /* /proc/<pid>/status */ 622 lxpr_read_isdir, /* /proc/<pid>/task */ 623 lxpr_read_isdir, /* /proc/<pid>/task/nn */ 624 lxpr_read_isdir, /* /proc/<pid>/fd */ 625 lxpr_read_fd, /* /proc/<pid>/fd/nn */ 626 lxpr_read_pid_auxv, /* /proc/<pid>/task/<tid>/auxv */ 627 lxpr_read_pid_cgroup, /* /proc/<pid>/task/<tid>/cgroup */ 628 lxpr_read_pid_cmdline, /* /proc/<pid>/task/<tid>/cmdline */ 629 lxpr_read_pid_comm, /* /proc/<pid>/task/<tid>/comm */ 630 lxpr_read_empty, /* /proc/<pid>/task/<tid>/cpu */ 631 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/cwd */ 632 lxpr_read_pid_env, /* /proc/<pid>/task/<tid>/environ */ 633 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/exe */ 634 lxpr_read_pid_limits, /* /proc/<pid>/task/<tid>/limits */ 635 lxpr_read_pid_maps, /* /proc/<pid>/task/<tid>/maps */ 636 lxpr_read_empty, /* /proc/<pid>/task/<tid>/mem */ 637 lxpr_read_pid_mountinfo, /* /proc/<pid>/task/<tid>/mountinfo */ 638 lxpr_read_pid_oom_scr_adj, /* /proc/<pid>/task/<tid>/oom_scr_adj */ 639 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/root */ 640 lxpr_read_pid_tid_stat, /* /proc/<pid>/task/<tid>/stat */ 641 lxpr_read_pid_statm, /* /proc/<pid>/task/<tid>/statm */ 642 lxpr_read_pid_tid_status, /* /proc/<pid>/task/<tid>/status */ 643 lxpr_read_isdir, /* /proc/<pid>/task/<tid>/fd */ 644 lxpr_read_fd, /* /proc/<pid>/task/<tid>/fd/nn */ 645 lxpr_read_cgroups, /* /proc/cgroups */ 646 lxpr_read_empty, /* /proc/cmdline */ 647 lxpr_read_cpuinfo, /* /proc/cpuinfo */ 648 lxpr_read_empty, /* /proc/devices */ 649 lxpr_read_diskstats, /* /proc/diskstats */ 650 lxpr_read_empty, /* /proc/dma */ 651 lxpr_read_filesystems, /* /proc/filesystems */ 652 lxpr_read_empty, /* /proc/interrupts */ 653 lxpr_read_empty, /* /proc/ioports */ 654 lxpr_read_empty, /* /proc/kcore */ 655 lxpr_read_invalid, /* /proc/kmsg -- see lxpr_read() */ 656 lxpr_read_loadavg, /* /proc/loadavg */ 657 lxpr_read_meminfo, /* /proc/meminfo */ 658 lxpr_read_empty, /* /proc/modules */ 659 lxpr_read_mounts, /* /proc/mounts */ 660 lxpr_read_isdir, /* /proc/net */ 661 lxpr_read_net_arp, /* /proc/net/arp */ 662 lxpr_read_net_dev, /* /proc/net/dev */ 663 lxpr_read_net_dev_mcast, /* /proc/net/dev_mcast */ 664 lxpr_read_net_if_inet6, /* /proc/net/if_inet6 */ 665 lxpr_read_net_igmp, /* /proc/net/igmp */ 666 lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */ 667 lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */ 668 lxpr_read_net_ipv6_route, /* /proc/net/ipv6_route */ 669 lxpr_read_net_mcfilter, /* /proc/net/mcfilter */ 670 lxpr_read_net_netstat, /* /proc/net/netstat */ 671 lxpr_read_net_raw, /* /proc/net/raw */ 672 lxpr_read_net_route, /* /proc/net/route */ 673 lxpr_read_net_rpc, /* /proc/net/rpc */ 674 lxpr_read_net_rt_cache, /* /proc/net/rt_cache */ 675 lxpr_read_net_sockstat, /* /proc/net/sockstat */ 676 lxpr_read_net_snmp, /* /proc/net/snmp */ 677 lxpr_read_net_stat, /* /proc/net/stat */ 678 lxpr_read_net_tcp, /* /proc/net/tcp */ 679 lxpr_read_net_tcp6, /* /proc/net/tcp6 */ 680 lxpr_read_net_udp, /* /proc/net/udp */ 681 lxpr_read_net_udp6, /* /proc/net/udp6 */ 682 lxpr_read_net_unix, /* /proc/net/unix */ 683 lxpr_read_partitions, /* /proc/partitions */ 684 lxpr_read_invalid, /* /proc/self */ 685 lxpr_read_stat, /* /proc/stat */ 686 lxpr_read_swaps, /* /proc/swaps */ 687 lxpr_read_invalid, /* /proc/sys */ 688 lxpr_read_invalid, /* /proc/sys/fs */ 689 lxpr_read_invalid, /* /proc/sys/fs/inotify */ 690 lxpr_read_sys_fs_inotify_max_queued_events, /* max_queued_events */ 691 lxpr_read_sys_fs_inotify_max_user_instances, /* max_user_instances */ 692 lxpr_read_sys_fs_inotify_max_user_watches, /* max_user_watches */ 693 lxpr_read_invalid, /* /proc/sys/kernel */ 694 lxpr_read_sys_kernel_caplcap, /* /proc/sys/kernel/cap_last_cap */ 695 lxpr_read_sys_kernel_corepatt, /* /proc/sys/kernel/core_pattern */ 696 lxpr_read_sys_kernel_hostname, /* /proc/sys/kernel/hostname */ 697 lxpr_read_sys_kernel_msgmni, /* /proc/sys/kernel/msgmni */ 698 lxpr_read_sys_kernel_ngroups_max, /* /proc/sys/kernel/ngroups_max */ 699 lxpr_read_sys_kernel_osrel, /* /proc/sys/kernel/osrelease */ 700 lxpr_read_sys_kernel_pid_max, /* /proc/sys/kernel/pid_max */ 701 lxpr_read_invalid, /* /proc/sys/kernel/random */ 702 lxpr_read_sys_kernel_rand_bootid, /* /proc/sys/kernel/random/boot_id */ 703 lxpr_read_sys_kernel_shmmax, /* /proc/sys/kernel/shmmax */ 704 lxpr_read_sys_kernel_threads_max, /* /proc/sys/kernel/threads-max */ 705 lxpr_read_invalid, /* /proc/sys/net */ 706 lxpr_read_invalid, /* /proc/sys/net/core */ 707 lxpr_read_sys_net_core_somaxc, /* /proc/sys/net/core/somaxconn */ 708 lxpr_read_invalid, /* /proc/sys/vm */ 709 lxpr_read_sys_vm_minfr_kb, /* /proc/sys/vm/min_free_kbytes */ 710 lxpr_read_sys_vm_nhpages, /* /proc/sys/vm/nr_hugepages */ 711 lxpr_read_sys_vm_overcommit_mem, /* /proc/sys/vm/overcommit_memory */ 712 lxpr_read_sys_vm_swappiness, /* /proc/sys/vm/swappiness */ 713 lxpr_read_uptime, /* /proc/uptime */ 714 lxpr_read_version, /* /proc/version */ 715 }; 716 717 /* 718 * Array of lookup functions, indexed by lx /proc file type. 719 */ 720 static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = { 721 lxpr_lookup_procdir, /* /proc */ 722 lxpr_lookup_piddir, /* /proc/<pid> */ 723 lxpr_lookup_not_a_dir, /* /proc/<pid>/auxv */ 724 lxpr_lookup_not_a_dir, /* /proc/<pid>/cgroup */ 725 lxpr_lookup_not_a_dir, /* /proc/<pid>/cmdline */ 726 lxpr_lookup_not_a_dir, /* /proc/<pid>/comm */ 727 lxpr_lookup_not_a_dir, /* /proc/<pid>/cpu */ 728 lxpr_lookup_not_a_dir, /* /proc/<pid>/cwd */ 729 lxpr_lookup_not_a_dir, /* /proc/<pid>/environ */ 730 lxpr_lookup_not_a_dir, /* /proc/<pid>/exe */ 731 lxpr_lookup_not_a_dir, /* /proc/<pid>/limits */ 732 lxpr_lookup_not_a_dir, /* /proc/<pid>/maps */ 733 lxpr_lookup_not_a_dir, /* /proc/<pid>/mem */ 734 lxpr_lookup_not_a_dir, /* /proc/<pid>/mountinfo */ 735 lxpr_lookup_not_a_dir, /* /proc/<pid>/oom_score_adj */ 736 lxpr_lookup_not_a_dir, /* /proc/<pid>/root */ 737 lxpr_lookup_not_a_dir, /* /proc/<pid>/stat */ 738 lxpr_lookup_not_a_dir, /* /proc/<pid>/statm */ 739 lxpr_lookup_not_a_dir, /* /proc/<pid>/status */ 740 lxpr_lookup_taskdir, /* /proc/<pid>/task */ 741 lxpr_lookup_task_tid_dir, /* /proc/<pid>/task/nn */ 742 lxpr_lookup_fddir, /* /proc/<pid>/fd */ 743 lxpr_lookup_not_a_dir, /* /proc/<pid>/fd/nn */ 744 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/auxv */ 745 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cgroup */ 746 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cmdline */ 747 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/comm */ 748 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cpu */ 749 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cwd */ 750 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/environ */ 751 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/exe */ 752 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/limits */ 753 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/maps */ 754 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/mem */ 755 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/mountinfo */ 756 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/oom_scr_adj */ 757 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/root */ 758 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/stat */ 759 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/statm */ 760 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/status */ 761 lxpr_lookup_fddir, /* /proc/<pid>/task/<tid>/fd */ 762 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/fd/nn */ 763 lxpr_lookup_not_a_dir, /* /proc/cgroups */ 764 lxpr_lookup_not_a_dir, /* /proc/cmdline */ 765 lxpr_lookup_not_a_dir, /* /proc/cpuinfo */ 766 lxpr_lookup_not_a_dir, /* /proc/devices */ 767 lxpr_lookup_not_a_dir, /* /proc/diskstats */ 768 lxpr_lookup_not_a_dir, /* /proc/dma */ 769 lxpr_lookup_not_a_dir, /* /proc/filesystems */ 770 lxpr_lookup_not_a_dir, /* /proc/interrupts */ 771 lxpr_lookup_not_a_dir, /* /proc/ioports */ 772 lxpr_lookup_not_a_dir, /* /proc/kcore */ 773 lxpr_lookup_not_a_dir, /* /proc/kmsg */ 774 lxpr_lookup_not_a_dir, /* /proc/loadavg */ 775 lxpr_lookup_not_a_dir, /* /proc/meminfo */ 776 lxpr_lookup_not_a_dir, /* /proc/modules */ 777 lxpr_lookup_not_a_dir, /* /proc/mounts */ 778 lxpr_lookup_netdir, /* /proc/net */ 779 lxpr_lookup_not_a_dir, /* /proc/net/arp */ 780 lxpr_lookup_not_a_dir, /* /proc/net/dev */ 781 lxpr_lookup_not_a_dir, /* /proc/net/dev_mcast */ 782 lxpr_lookup_not_a_dir, /* /proc/net/if_inet6 */ 783 lxpr_lookup_not_a_dir, /* /proc/net/igmp */ 784 lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */ 785 lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */ 786 lxpr_lookup_not_a_dir, /* /proc/net/ipv6_route */ 787 lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */ 788 lxpr_lookup_not_a_dir, /* /proc/net/netstat */ 789 lxpr_lookup_not_a_dir, /* /proc/net/raw */ 790 lxpr_lookup_not_a_dir, /* /proc/net/route */ 791 lxpr_lookup_not_a_dir, /* /proc/net/rpc */ 792 lxpr_lookup_not_a_dir, /* /proc/net/rt_cache */ 793 lxpr_lookup_not_a_dir, /* /proc/net/sockstat */ 794 lxpr_lookup_not_a_dir, /* /proc/net/snmp */ 795 lxpr_lookup_not_a_dir, /* /proc/net/stat */ 796 lxpr_lookup_not_a_dir, /* /proc/net/tcp */ 797 lxpr_lookup_not_a_dir, /* /proc/net/tcp6 */ 798 lxpr_lookup_not_a_dir, /* /proc/net/udp */ 799 lxpr_lookup_not_a_dir, /* /proc/net/udp6 */ 800 lxpr_lookup_not_a_dir, /* /proc/net/unix */ 801 lxpr_lookup_not_a_dir, /* /proc/partitions */ 802 lxpr_lookup_not_a_dir, /* /proc/self */ 803 lxpr_lookup_not_a_dir, /* /proc/stat */ 804 lxpr_lookup_not_a_dir, /* /proc/swaps */ 805 lxpr_lookup_sysdir, /* /proc/sys */ 806 lxpr_lookup_sys_fsdir, /* /proc/sys/fs */ 807 lxpr_lookup_sys_fs_inotifydir, /* /proc/sys/fs/inotify */ 808 lxpr_lookup_not_a_dir, /* .../inotify/max_queued_events */ 809 lxpr_lookup_not_a_dir, /* .../inotify/max_user_instances */ 810 lxpr_lookup_not_a_dir, /* .../inotify/max_user_watches */ 811 lxpr_lookup_sys_kerneldir, /* /proc/sys/kernel */ 812 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/cap_last_cap */ 813 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/core_pattern */ 814 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/hostname */ 815 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/msgmni */ 816 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/ngroups_max */ 817 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/osrelease */ 818 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/pid_max */ 819 lxpr_lookup_sys_kdir_randdir, /* /proc/sys/kernel/random */ 820 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/random/boot_id */ 821 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/shmmax */ 822 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/threads-max */ 823 lxpr_lookup_sys_netdir, /* /proc/sys/net */ 824 lxpr_lookup_sys_net_coredir, /* /proc/sys/net/core */ 825 lxpr_lookup_not_a_dir, /* /proc/sys/net/core/somaxconn */ 826 lxpr_lookup_sys_vmdir, /* /proc/sys/vm */ 827 lxpr_lookup_not_a_dir, /* /proc/sys/vm/min_free_kbytes */ 828 lxpr_lookup_not_a_dir, /* /proc/sys/vm/nr_hugepages */ 829 lxpr_lookup_not_a_dir, /* /proc/sys/vm/overcommit_memory */ 830 lxpr_lookup_not_a_dir, /* /proc/sys/vm/swappiness */ 831 lxpr_lookup_not_a_dir, /* /proc/uptime */ 832 lxpr_lookup_not_a_dir, /* /proc/version */ 833 }; 834 835 /* 836 * Array of readdir functions, indexed by /proc file type. 837 */ 838 static int (*lxpr_readdir_function[LXPR_NFILES])() = { 839 lxpr_readdir_procdir, /* /proc */ 840 lxpr_readdir_piddir, /* /proc/<pid> */ 841 lxpr_readdir_not_a_dir, /* /proc/<pid>/auxv */ 842 lxpr_readdir_not_a_dir, /* /proc/<pid>/cgroup */ 843 lxpr_readdir_not_a_dir, /* /proc/<pid>/cmdline */ 844 lxpr_readdir_not_a_dir, /* /proc/<pid>/comm */ 845 lxpr_readdir_not_a_dir, /* /proc/<pid>/cpu */ 846 lxpr_readdir_not_a_dir, /* /proc/<pid>/cwd */ 847 lxpr_readdir_not_a_dir, /* /proc/<pid>/environ */ 848 lxpr_readdir_not_a_dir, /* /proc/<pid>/exe */ 849 lxpr_readdir_not_a_dir, /* /proc/<pid>/limits */ 850 lxpr_readdir_not_a_dir, /* /proc/<pid>/maps */ 851 lxpr_readdir_not_a_dir, /* /proc/<pid>/mem */ 852 lxpr_readdir_not_a_dir, /* /proc/<pid>/mountinfo */ 853 lxpr_readdir_not_a_dir, /* /proc/<pid>/oom_score_adj */ 854 lxpr_readdir_not_a_dir, /* /proc/<pid>/root */ 855 lxpr_readdir_not_a_dir, /* /proc/<pid>/stat */ 856 lxpr_readdir_not_a_dir, /* /proc/<pid>/statm */ 857 lxpr_readdir_not_a_dir, /* /proc/<pid>/status */ 858 lxpr_readdir_taskdir, /* /proc/<pid>/task */ 859 lxpr_readdir_task_tid_dir, /* /proc/<pid>/task/nn */ 860 lxpr_readdir_fddir, /* /proc/<pid>/fd */ 861 lxpr_readdir_not_a_dir, /* /proc/<pid>/fd/nn */ 862 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/auxv */ 863 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cgroup */ 864 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cmdline */ 865 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/comm */ 866 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cpu */ 867 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cwd */ 868 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/environ */ 869 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/exe */ 870 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/limits */ 871 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/maps */ 872 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/mem */ 873 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/mountinfo */ 874 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid/oom_scr_adj */ 875 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/root */ 876 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/stat */ 877 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/statm */ 878 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/status */ 879 lxpr_readdir_fddir, /* /proc/<pid>/task/<tid>/fd */ 880 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/fd/nn */ 881 lxpr_readdir_not_a_dir, /* /proc/cgroups */ 882 lxpr_readdir_not_a_dir, /* /proc/cmdline */ 883 lxpr_readdir_not_a_dir, /* /proc/cpuinfo */ 884 lxpr_readdir_not_a_dir, /* /proc/devices */ 885 lxpr_readdir_not_a_dir, /* /proc/diskstats */ 886 lxpr_readdir_not_a_dir, /* /proc/dma */ 887 lxpr_readdir_not_a_dir, /* /proc/filesystems */ 888 lxpr_readdir_not_a_dir, /* /proc/interrupts */ 889 lxpr_readdir_not_a_dir, /* /proc/ioports */ 890 lxpr_readdir_not_a_dir, /* /proc/kcore */ 891 lxpr_readdir_not_a_dir, /* /proc/kmsg */ 892 lxpr_readdir_not_a_dir, /* /proc/loadavg */ 893 lxpr_readdir_not_a_dir, /* /proc/meminfo */ 894 lxpr_readdir_not_a_dir, /* /proc/modules */ 895 lxpr_readdir_not_a_dir, /* /proc/mounts */ 896 lxpr_readdir_netdir, /* /proc/net */ 897 lxpr_readdir_not_a_dir, /* /proc/net/arp */ 898 lxpr_readdir_not_a_dir, /* /proc/net/dev */ 899 lxpr_readdir_not_a_dir, /* /proc/net/dev_mcast */ 900 lxpr_readdir_not_a_dir, /* /proc/net/if_inet6 */ 901 lxpr_readdir_not_a_dir, /* /proc/net/igmp */ 902 lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */ 903 lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */ 904 lxpr_readdir_not_a_dir, /* /proc/net/ipv6_route */ 905 lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */ 906 lxpr_readdir_not_a_dir, /* /proc/net/netstat */ 907 lxpr_readdir_not_a_dir, /* /proc/net/raw */ 908 lxpr_readdir_not_a_dir, /* /proc/net/route */ 909 lxpr_readdir_not_a_dir, /* /proc/net/rpc */ 910 lxpr_readdir_not_a_dir, /* /proc/net/rt_cache */ 911 lxpr_readdir_not_a_dir, /* /proc/net/sockstat */ 912 lxpr_readdir_not_a_dir, /* /proc/net/snmp */ 913 lxpr_readdir_not_a_dir, /* /proc/net/stat */ 914 lxpr_readdir_not_a_dir, /* /proc/net/tcp */ 915 lxpr_readdir_not_a_dir, /* /proc/net/tcp6 */ 916 lxpr_readdir_not_a_dir, /* /proc/net/udp */ 917 lxpr_readdir_not_a_dir, /* /proc/net/udp6 */ 918 lxpr_readdir_not_a_dir, /* /proc/net/unix */ 919 lxpr_readdir_not_a_dir, /* /proc/partitions */ 920 lxpr_readdir_not_a_dir, /* /proc/self */ 921 lxpr_readdir_not_a_dir, /* /proc/stat */ 922 lxpr_readdir_not_a_dir, /* /proc/swaps */ 923 lxpr_readdir_sysdir, /* /proc/sys */ 924 lxpr_readdir_sys_fsdir, /* /proc/sys/fs */ 925 lxpr_readdir_sys_fs_inotifydir, /* /proc/sys/fs/inotify */ 926 lxpr_readdir_not_a_dir, /* .../inotify/max_queued_events */ 927 lxpr_readdir_not_a_dir, /* .../inotify/max_user_instances */ 928 lxpr_readdir_not_a_dir, /* .../inotify/max_user_watches */ 929 lxpr_readdir_sys_kerneldir, /* /proc/sys/kernel */ 930 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/cap_last_cap */ 931 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/core_pattern */ 932 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/hostname */ 933 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/msgmni */ 934 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/ngroups_max */ 935 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/osrelease */ 936 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/pid_max */ 937 lxpr_readdir_sys_kdir_randdir, /* /proc/sys/kernel/random */ 938 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/random/boot_id */ 939 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/shmmax */ 940 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/threads-max */ 941 lxpr_readdir_sys_netdir, /* /proc/sys/net */ 942 lxpr_readdir_sys_net_coredir, /* /proc/sys/net/core */ 943 lxpr_readdir_not_a_dir, /* /proc/sys/net/core/somaxconn */ 944 lxpr_readdir_sys_vmdir, /* /proc/sys/vm */ 945 lxpr_readdir_not_a_dir, /* /proc/sys/vm/min_free_kbytes */ 946 lxpr_readdir_not_a_dir, /* /proc/sys/vm/nr_hugepages */ 947 lxpr_readdir_not_a_dir, /* /proc/sys/vm/overcommit_memory */ 948 lxpr_readdir_not_a_dir, /* /proc/sys/vm/swappiness */ 949 lxpr_readdir_not_a_dir, /* /proc/uptime */ 950 lxpr_readdir_not_a_dir, /* /proc/version */ 951 }; 952 953 954 /* 955 * lxpr_read(): Vnode operation for VOP_READ() 956 * 957 * As the format of all the files that can be read in the lx procfs is human 958 * readable and not binary structures there do not have to be different 959 * read variants depending on whether the reading process model is 32 or 64 bits 960 * (at least in general, and certainly the difference is unlikely to be enough 961 * to justify have different routines for 32 and 64 bit reads 962 */ 963 /* ARGSUSED */ 964 static int 965 lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 966 caller_context_t *ct) 967 { 968 lxpr_node_t *lxpnp = VTOLXP(vp); 969 lxpr_nodetype_t type = lxpnp->lxpr_type; 970 lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop); 971 int error; 972 973 ASSERT(type < LXPR_NFILES); 974 975 if (type == LXPR_KMSG) { 976 ldi_ident_t li = VTOLXPM(vp)->lxprm_li; 977 ldi_handle_t ldih; 978 struct strioctl str; 979 int rv; 980 981 /* 982 * Open the zone's console device using the layered driver 983 * interface. 984 */ 985 if ((error = 986 ldi_open_by_name("/dev/log", FREAD, cr, &ldih, li)) != 0) 987 return (error); 988 989 /* 990 * Send an ioctl to the underlying console device, letting it 991 * know we're interested in getting console messages. 992 */ 993 str.ic_cmd = I_CONSLOG; 994 str.ic_timout = 0; 995 str.ic_len = 0; 996 str.ic_dp = NULL; 997 if ((error = ldi_ioctl(ldih, I_STR, 998 (intptr_t)&str, FKIOCTL, cr, &rv)) != 0) 999 return (error); 1000 1001 lxpr_read_kmsg(lxpnp, uiobuf, ldih); 1002 1003 if ((error = ldi_close(ldih, FREAD, cr)) != 0) 1004 return (error); 1005 } else { 1006 lxpr_read_function[type](lxpnp, uiobuf); 1007 } 1008 1009 error = lxpr_uiobuf_flush(uiobuf); 1010 lxpr_uiobuf_free(uiobuf); 1011 1012 return (error); 1013 } 1014 1015 /* 1016 * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty() 1017 * 1018 * Various special case reads: 1019 * - trying to read a directory 1020 * - invalid file (used to mean a file that should be implemented, 1021 * but isn't yet) 1022 * - empty file 1023 * - wait to be able to read a file that will never have anything to read 1024 */ 1025 /* ARGSUSED */ 1026 static void 1027 lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1028 { 1029 lxpr_uiobuf_seterr(uiobuf, EISDIR); 1030 } 1031 1032 /* ARGSUSED */ 1033 static void 1034 lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1035 { 1036 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1037 } 1038 1039 /* ARGSUSED */ 1040 static void 1041 lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1042 { 1043 } 1044 1045 /* 1046 * lxpr_read_pid_auxv(): read process aux vector 1047 */ 1048 static void 1049 lxpr_read_pid_auxv(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1050 { 1051 proc_t *p; 1052 lx_proc_data_t *pd; 1053 lx_elf_data_t *edp = NULL; 1054 int i, cnt; 1055 1056 ASSERT(lxpnp->lxpr_type == LXPR_PID_AUXV || 1057 lxpnp->lxpr_type == LXPR_PID_TID_AUXV); 1058 1059 p = lxpr_lock(lxpnp->lxpr_pid); 1060 1061 if (p == NULL) { 1062 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1063 return; 1064 } 1065 if ((pd = ptolxproc(p)) == NULL) { 1066 /* Emit a single AT_NULL record for non-branded processes */ 1067 auxv_t buf; 1068 1069 bzero(&buf, sizeof (buf)); 1070 lxpr_unlock(p); 1071 lxpr_uiobuf_write(uiobuf, (char *)&buf, sizeof (buf)); 1072 return; 1073 } else { 1074 edp = &pd->l_elf_data; 1075 } 1076 1077 if (p->p_model == DATAMODEL_NATIVE) { 1078 auxv_t buf[__KERN_NAUXV_IMPL]; 1079 1080 /* 1081 * Because a_type is only of size int (not long), the buffer 1082 * contents must be zeroed first to ensure cleanliness. 1083 */ 1084 bzero(buf, sizeof (buf)); 1085 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) { 1086 if (lx_auxv_stol(&p->p_user.u_auxv[i], 1087 &buf[cnt], edp) == 0) { 1088 cnt++; 1089 } 1090 if (p->p_user.u_auxv[i].a_type == AT_NULL) { 1091 break; 1092 } 1093 } 1094 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0])); 1095 lxpr_unlock(p); 1096 } 1097 #if defined(_SYSCALL32_IMPL) 1098 else { 1099 auxv32_t buf[__KERN_NAUXV_IMPL]; 1100 1101 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) { 1102 auxv_t temp; 1103 1104 if (lx_auxv_stol(&p->p_user.u_auxv[i], 1105 &temp, edp) == 0) { 1106 buf[cnt].a_type = (int)temp.a_type; 1107 buf[cnt].a_un.a_val = (int)temp.a_un.a_val; 1108 cnt++; 1109 } 1110 if (p->p_user.u_auxv[i].a_type == AT_NULL) { 1111 break; 1112 } 1113 } 1114 lxpr_unlock(p); 1115 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0])); 1116 } 1117 #endif /* defined(_SYSCALL32_IMPL) */ 1118 } 1119 1120 /* 1121 * lxpr_read_pid_cgroup(): read cgroups for process 1122 */ 1123 static void 1124 lxpr_read_pid_cgroup(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1125 { 1126 proc_t *p; 1127 1128 ASSERT(lxpnp->lxpr_type == LXPR_PID_CGROUP || 1129 lxpnp->lxpr_type == LXPR_PID_TID_CGROUP); 1130 1131 p = lxpr_lock(lxpnp->lxpr_pid); 1132 if (p == NULL) { 1133 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1134 return; 1135 } 1136 1137 /* basic stub, 3rd field will need to be populated */ 1138 lxpr_uiobuf_printf(uiobuf, "1:name=systemd:/\n"); 1139 1140 lxpr_unlock(p); 1141 } 1142 1143 static void 1144 lxpr_copy_cmdline(proc_t *p, lx_proc_data_t *pd, lxpr_uiobuf_t *uiobuf) 1145 { 1146 uio_t *uiop = uiobuf->uiop; 1147 char *buf = uiobuf->buffer; 1148 int bsz = uiobuf->buffsize; 1149 boolean_t env_overflow = B_FALSE; 1150 uintptr_t pos = pd->l_args_start + uiop->uio_offset; 1151 uintptr_t estart = pd->l_envs_start; 1152 uintptr_t eend = pd->l_envs_end; 1153 size_t chunk, copied; 1154 int err = 0; 1155 1156 /* Do not bother with data beyond the end of the envp strings area. */ 1157 if (pos > eend) { 1158 return; 1159 } 1160 mutex_exit(&p->p_lock); 1161 1162 /* 1163 * If the starting or ending bounds are outside the argv strings area, 1164 * check to see if the process has overwritten the terminating NULL. 1165 * If not, no data needs to be copied from oustide the argv area. 1166 */ 1167 if (pos >= estart || (pos + uiop->uio_resid) >= estart) { 1168 uint8_t term; 1169 if (uread(p, &term, sizeof (term), estart - 1) != 0) { 1170 err = EFAULT; 1171 } else if (term != 0) { 1172 env_overflow = B_TRUE; 1173 } 1174 } 1175 1176 1177 /* Data between astart and estart-1 can be copied freely. */ 1178 while (pos < estart && uiop->uio_resid > 0 && err == 0) { 1179 chunk = MIN(estart - pos, uiop->uio_resid); 1180 chunk = MIN(chunk, bsz); 1181 1182 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) != 0 || 1183 copied != chunk) { 1184 err = EFAULT; 1185 break; 1186 } 1187 err = uiomove(buf, copied, UIO_READ, uiop); 1188 pos += copied; 1189 } 1190 1191 /* 1192 * Onward from estart, data is copied as a contiguous string. To 1193 * protect env data from potential snooping, only one buffer-sized copy 1194 * is allowed to avoid complex seek logic. 1195 */ 1196 if (err == 0 && env_overflow && pos == estart && uiop->uio_resid > 0) { 1197 chunk = MIN(eend - pos, uiop->uio_resid); 1198 chunk = MIN(chunk, bsz); 1199 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) == 0) { 1200 int len = strnlen(buf, copied); 1201 if (len > 0) { 1202 err = uiomove(buf, len, UIO_READ, uiop); 1203 } 1204 } 1205 } 1206 1207 uiobuf->error = err; 1208 /* reset any uiobuf state */ 1209 uiobuf->pos = uiobuf->buffer; 1210 uiobuf->beg = 0; 1211 1212 mutex_enter(&p->p_lock); 1213 } 1214 1215 /* 1216 * lxpr_read_pid_cmdline(): read argument vector from process 1217 */ 1218 static void 1219 lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1220 { 1221 proc_t *p; 1222 char *buf; 1223 size_t asz = lxpr_maxargvlen, sz; 1224 lx_proc_data_t *pd; 1225 1226 ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE || 1227 lxpnp->lxpr_type == LXPR_PID_TID_CMDLINE); 1228 1229 buf = kmem_alloc(asz, KM_SLEEP); 1230 1231 p = lxpr_lock(lxpnp->lxpr_pid); 1232 if (p == NULL) { 1233 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1234 kmem_free(buf, asz); 1235 return; 1236 } 1237 1238 if ((pd = ptolxproc(p)) != NULL && pd->l_args_start != 0 && 1239 pd->l_envs_start != 0 && pd->l_envs_end != 0) { 1240 /* Use Linux-style argv bounds if possible. */ 1241 lxpr_copy_cmdline(p, pd, uiobuf); 1242 } else { 1243 if (prreadargv(p, buf, asz, &sz) != 0) { 1244 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1245 } else { 1246 lxpr_uiobuf_write(uiobuf, buf, sz); 1247 } 1248 } 1249 1250 lxpr_unlock(p); 1251 kmem_free(buf, asz); 1252 } 1253 1254 /* 1255 * lxpr_read_pid_comm(): read command from process 1256 */ 1257 static void 1258 lxpr_read_pid_comm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1259 { 1260 proc_t *p; 1261 1262 VERIFY(lxpnp->lxpr_type == LXPR_PID_COMM || 1263 lxpnp->lxpr_type == LXPR_PID_TID_COMM); 1264 1265 /* 1266 * Because prctl(PR_SET_NAME) does not set custom names for threads 1267 * (vs processes), there is no need for special handling here. 1268 */ 1269 if ((p = lxpr_lock(lxpnp->lxpr_pid)) == NULL) { 1270 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1271 return; 1272 } 1273 lxpr_uiobuf_printf(uiobuf, "%s\n", p->p_user.u_comm); 1274 lxpr_unlock(p); 1275 } 1276 1277 /* 1278 * lxpr_read_pid_env(): read env vector from process 1279 */ 1280 static void 1281 lxpr_read_pid_env(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1282 { 1283 proc_t *p; 1284 char *buf; 1285 size_t asz = lxpr_maxenvvlen, sz; 1286 1287 ASSERT(lxpnp->lxpr_type == LXPR_PID_ENV); 1288 1289 buf = kmem_alloc(asz, KM_SLEEP); 1290 1291 p = lxpr_lock(lxpnp->lxpr_pid); 1292 if (p == NULL) { 1293 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1294 kmem_free(buf, asz); 1295 return; 1296 } 1297 1298 if (prreadenvv(p, buf, asz, &sz) != 0) { 1299 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1300 } else { 1301 lxpr_uiobuf_write(uiobuf, buf, sz); 1302 } 1303 1304 lxpr_unlock(p); 1305 kmem_free(buf, asz); 1306 } 1307 1308 /* 1309 * lxpr_read_pid_limits(): ulimit file 1310 */ 1311 static void 1312 lxpr_read_pid_limits(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1313 { 1314 proc_t *p; 1315 rctl_qty_t cur, max; 1316 rctl_val_t *oval, *nval; 1317 rctl_hndl_t hndl; 1318 char *kname; 1319 int i; 1320 1321 ASSERT(lxpnp->lxpr_type == LXPR_PID_LIMITS || 1322 lxpnp->lxpr_type == LXPR_PID_TID_LIMITS); 1323 1324 nval = kmem_alloc(sizeof (rctl_val_t), KM_SLEEP); 1325 1326 p = lxpr_lock(lxpnp->lxpr_pid); 1327 if (p == NULL) { 1328 kmem_free(nval, sizeof (rctl_val_t)); 1329 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1330 return; 1331 } 1332 1333 lxpr_uiobuf_printf(uiobuf, "%-25s %-20s %-20s %-10s\n", 1334 "Limit", "Soft Limit", "Hard Limit", "Units"); 1335 for (i = 0; lxpr_rlimtab[i].rlim_name != NULL; i++) { 1336 kname = lxpr_rlimtab[i].rlim_rctl; 1337 /* default to unlimited for resources without an analog */ 1338 cur = RLIM_INFINITY; 1339 max = RLIM_INFINITY; 1340 if (kname != NULL) { 1341 hndl = rctl_hndl_lookup(kname); 1342 oval = NULL; 1343 while ((hndl != -1) && 1344 rctl_local_get(hndl, oval, nval, p) == 0) { 1345 oval = nval; 1346 switch (nval->rcv_privilege) { 1347 case RCPRIV_BASIC: 1348 if (!RCTL_INFINITE(nval)) 1349 cur = nval->rcv_value; 1350 break; 1351 case RCPRIV_PRIVILEGED: 1352 if (!RCTL_INFINITE(nval)) 1353 max = nval->rcv_value; 1354 break; 1355 } 1356 } 1357 } 1358 1359 lxpr_uiobuf_printf(uiobuf, "%-25s", lxpr_rlimtab[i].rlim_name); 1360 if (cur == RLIM_INFINITY || cur == LX_RLIM_INFINITY) { 1361 lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited"); 1362 } else { 1363 lxpr_uiobuf_printf(uiobuf, " %-20lu", cur); 1364 } 1365 if (max == RLIM_INFINITY || max == LX_RLIM_INFINITY) { 1366 lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited"); 1367 } else { 1368 lxpr_uiobuf_printf(uiobuf, " %-20lu", max); 1369 } 1370 lxpr_uiobuf_printf(uiobuf, " %-10s\n", 1371 lxpr_rlimtab[i].rlim_unit); 1372 } 1373 1374 lxpr_unlock(p); 1375 kmem_free(nval, sizeof (rctl_val_t)); 1376 } 1377 1378 /* 1379 * lxpr_read_pid_maps(): memory map file 1380 */ 1381 static void 1382 lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1383 { 1384 proc_t *p; 1385 struct as *as; 1386 struct seg *seg; 1387 char *buf; 1388 int buflen = MAXPATHLEN; 1389 struct print_data { 1390 uintptr_t saddr; 1391 uintptr_t eaddr; 1392 int type; 1393 char prot[5]; 1394 uintptr_t offset; 1395 vnode_t *vp; 1396 struct print_data *next; 1397 } *print_head = NULL; 1398 struct print_data **print_tail = &print_head; 1399 struct print_data *pbuf; 1400 1401 ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS || 1402 lxpnp->lxpr_type == LXPR_PID_TID_MAPS); 1403 1404 p = lxpr_lock(lxpnp->lxpr_pid); 1405 if (p == NULL) { 1406 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1407 return; 1408 } 1409 1410 as = p->p_as; 1411 1412 if (as == &kas) { 1413 lxpr_unlock(p); 1414 return; 1415 } 1416 1417 mutex_exit(&p->p_lock); 1418 1419 /* Iterate over all segments in the address space */ 1420 AS_LOCK_ENTER(as, RW_READER); 1421 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 1422 vnode_t *vp; 1423 uint_t protbits; 1424 1425 pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP); 1426 1427 pbuf->saddr = (uintptr_t)seg->s_base; 1428 pbuf->eaddr = pbuf->saddr + seg->s_size; 1429 pbuf->type = SEGOP_GETTYPE(seg, seg->s_base); 1430 1431 /* 1432 * Cheat and only use the protection bits of the first page 1433 * in the segment 1434 */ 1435 (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot)); 1436 (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits); 1437 1438 if (protbits & PROT_READ) pbuf->prot[0] = 'r'; 1439 if (protbits & PROT_WRITE) pbuf->prot[1] = 'w'; 1440 if (protbits & PROT_EXEC) pbuf->prot[2] = 'x'; 1441 if (pbuf->type & MAP_SHARED) pbuf->prot[3] = 's'; 1442 else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p'; 1443 1444 if (seg->s_ops == &segvn_ops && 1445 SEGOP_GETVP(seg, seg->s_base, &vp) == 0 && 1446 vp != NULL && vp->v_type == VREG) { 1447 VN_HOLD(vp); 1448 pbuf->vp = vp; 1449 } else { 1450 pbuf->vp = NULL; 1451 } 1452 1453 pbuf->offset = SEGOP_GETOFFSET(seg, (caddr_t)pbuf->saddr); 1454 1455 pbuf->next = NULL; 1456 *print_tail = pbuf; 1457 print_tail = &pbuf->next; 1458 } 1459 AS_LOCK_EXIT(as); 1460 mutex_enter(&p->p_lock); 1461 lxpr_unlock(p); 1462 1463 buf = kmem_alloc(buflen, KM_SLEEP); 1464 1465 /* print the data we've extracted */ 1466 pbuf = print_head; 1467 while (pbuf != NULL) { 1468 struct print_data *pbuf_next; 1469 vattr_t vattr; 1470 1471 int maj = 0; 1472 int min = 0; 1473 ino_t inode = 0; 1474 1475 *buf = '\0'; 1476 if (pbuf->vp != NULL) { 1477 vattr.va_mask = AT_FSID | AT_NODEID; 1478 if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(), 1479 NULL) == 0) { 1480 maj = getmajor(vattr.va_fsid); 1481 min = getminor(vattr.va_fsid); 1482 inode = vattr.va_nodeid; 1483 } 1484 (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED()); 1485 VN_RELE(pbuf->vp); 1486 } 1487 1488 if (p->p_model == DATAMODEL_LP64) { 1489 lxpr_uiobuf_printf(uiobuf, 1490 "%08llx-%08llx %s %08llx %02x:%02x %llu%s%s\n", 1491 pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset, 1492 maj, min, inode, *buf != '\0' ? " " : "", buf); 1493 } else { 1494 lxpr_uiobuf_printf(uiobuf, 1495 "%08x-%08x %s %08x %02x:%02x %llu%s%s\n", 1496 (uint32_t)pbuf->saddr, (uint32_t)pbuf->eaddr, 1497 pbuf->prot, (uint32_t)pbuf->offset, maj, min, 1498 inode, *buf != '\0' ? " " : "", buf); 1499 } 1500 1501 pbuf_next = pbuf->next; 1502 kmem_free(pbuf, sizeof (*pbuf)); 1503 pbuf = pbuf_next; 1504 } 1505 1506 kmem_free(buf, buflen); 1507 } 1508 1509 /* 1510 * lxpr_read_pid_mountinfo(): information about process mount points. e.g.: 1511 * 14 19 0:13 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw 1512 * mntid parid devnums root mntpnt mntopts - fstype mntsrc superopts 1513 * 1514 * We have to make up several of these fields. 1515 */ 1516 static void 1517 lxpr_read_pid_mountinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1518 { 1519 struct vfs *vfsp; 1520 struct vfs *vfslist; 1521 zone_t *zone = LXPTOZ(lxpnp); 1522 struct print_data { 1523 refstr_t *vfs_mntpt; 1524 refstr_t *vfs_resource; 1525 uint_t vfs_flag; 1526 int vfs_fstype; 1527 dev_t vfs_dev; 1528 struct print_data *next; 1529 } *print_head = NULL; 1530 struct print_data **print_tail = &print_head; 1531 struct print_data *printp; 1532 int root_id = 15; /* use a made-up value */ 1533 int mnt_id; 1534 1535 ASSERT(lxpnp->lxpr_type == LXPR_PID_MOUNTINFO || 1536 lxpnp->lxpr_type == LXPR_PID_TID_MOUNTINFO); 1537 1538 vfs_list_read_lock(); 1539 1540 /* root is the top-level, it does not appear in this output */ 1541 if (zone == global_zone) { 1542 vfsp = vfslist = rootvfs; 1543 } else { 1544 vfsp = vfslist = zone->zone_vfslist; 1545 /* 1546 * If the zone has a root entry, it will be the first in 1547 * the list. If it doesn't, we conjure one up. 1548 */ 1549 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt), 1550 zone->zone_rootpath) != 0) { 1551 struct vfs *tvfsp; 1552 /* 1553 * The root of the zone is not a mount point. The vfs 1554 * we want to report is that of the zone's root vnode. 1555 */ 1556 tvfsp = zone->zone_rootvp->v_vfsp; 1557 1558 lxpr_uiobuf_printf(uiobuf, 1559 "%d 1 %d:%d / / %s - %s / %s\n", 1560 root_id, 1561 major(tvfsp->vfs_dev), minor(vfsp->vfs_dev), 1562 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw", 1563 vfssw[tvfsp->vfs_fstype].vsw_name, 1564 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); 1565 1566 } 1567 if (vfslist == NULL) { 1568 vfs_list_unlock(); 1569 return; 1570 } 1571 } 1572 1573 /* 1574 * Later on we have to do a lookupname, which can end up causing 1575 * another vfs_list_read_lock() to be called. Which can lead to a 1576 * deadlock. To avoid this, we extract the data we need into a local 1577 * list, then we can run this list without holding vfs_list_read_lock() 1578 * We keep the list in the same order as the vfs_list 1579 */ 1580 do { 1581 /* Skip mounts we shouldn't show */ 1582 if (vfsp->vfs_flag & VFS_NOMNTTAB) { 1583 goto nextfs; 1584 } 1585 1586 printp = kmem_alloc(sizeof (*printp), KM_SLEEP); 1587 refstr_hold(vfsp->vfs_mntpt); 1588 printp->vfs_mntpt = vfsp->vfs_mntpt; 1589 refstr_hold(vfsp->vfs_resource); 1590 printp->vfs_resource = vfsp->vfs_resource; 1591 printp->vfs_flag = vfsp->vfs_flag; 1592 printp->vfs_fstype = vfsp->vfs_fstype; 1593 printp->vfs_dev = vfsp->vfs_dev; 1594 printp->next = NULL; 1595 1596 *print_tail = printp; 1597 print_tail = &printp->next; 1598 1599 nextfs: 1600 vfsp = (zone == global_zone) ? 1601 vfsp->vfs_next : vfsp->vfs_zone_next; 1602 1603 } while (vfsp != vfslist); 1604 1605 vfs_list_unlock(); 1606 1607 mnt_id = root_id + 1; 1608 1609 /* 1610 * now we can run through what we've extracted without holding 1611 * vfs_list_read_lock() 1612 */ 1613 printp = print_head; 1614 while (printp != NULL) { 1615 struct print_data *printp_next; 1616 const char *resource; 1617 char *mntpt; 1618 struct vnode *vp; 1619 int error; 1620 1621 mntpt = (char *)refstr_value(printp->vfs_mntpt); 1622 resource = refstr_value(printp->vfs_resource); 1623 1624 if (mntpt != NULL && mntpt[0] != '\0') 1625 mntpt = ZONE_PATH_TRANSLATE(mntpt, zone); 1626 else 1627 mntpt = "-"; 1628 1629 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 1630 1631 if (error != 0) 1632 goto nextp; 1633 1634 if (!(vp->v_flag & VROOT)) { 1635 VN_RELE(vp); 1636 goto nextp; 1637 } 1638 VN_RELE(vp); 1639 1640 if (resource != NULL && resource[0] != '\0') { 1641 if (resource[0] == '/') { 1642 resource = ZONE_PATH_VISIBLE(resource, zone) ? 1643 ZONE_PATH_TRANSLATE(resource, zone) : mntpt; 1644 } 1645 } else { 1646 resource = "none"; 1647 } 1648 1649 /* 1650 * XXX parent ID is not tracked correctly here. Currently we 1651 * always assume the parent ID is the root ID. 1652 */ 1653 lxpr_uiobuf_printf(uiobuf, 1654 "%d %d %d:%d / %s %s - %s %s %s\n", 1655 mnt_id, root_id, 1656 major(printp->vfs_dev), minor(printp->vfs_dev), 1657 mntpt, 1658 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw", 1659 vfssw[printp->vfs_fstype].vsw_name, 1660 resource, 1661 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); 1662 1663 nextp: 1664 printp_next = printp->next; 1665 refstr_rele(printp->vfs_mntpt); 1666 refstr_rele(printp->vfs_resource); 1667 kmem_free(printp, sizeof (*printp)); 1668 printp = printp_next; 1669 1670 mnt_id++; 1671 } 1672 } 1673 1674 /* 1675 * lxpr_read_pid_oom_scr_adj(): read oom_score_adj for process 1676 */ 1677 static void 1678 lxpr_read_pid_oom_scr_adj(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1679 { 1680 proc_t *p; 1681 1682 ASSERT(lxpnp->lxpr_type == LXPR_PID_OOM_SCR_ADJ || 1683 lxpnp->lxpr_type == LXPR_PID_TID_OOM_SCR_ADJ); 1684 1685 p = lxpr_lock(lxpnp->lxpr_pid); 1686 if (p == NULL) { 1687 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1688 return; 1689 } 1690 1691 /* always 0 */ 1692 lxpr_uiobuf_printf(uiobuf, "0\n"); 1693 1694 lxpr_unlock(p); 1695 } 1696 1697 1698 /* 1699 * lxpr_read_pid_statm(): memory status file 1700 */ 1701 static void 1702 lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 1703 { 1704 proc_t *p; 1705 struct as *as; 1706 size_t vsize; 1707 size_t rss; 1708 1709 ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM || 1710 lxpnp->lxpr_type == LXPR_PID_TID_STATM); 1711 1712 p = lxpr_lock(lxpnp->lxpr_pid); 1713 if (p == NULL) { 1714 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1715 return; 1716 } 1717 1718 as = p->p_as; 1719 1720 mutex_exit(&p->p_lock); 1721 1722 AS_LOCK_ENTER(as, RW_READER); 1723 vsize = btopr(as->a_resvsize); 1724 rss = rm_asrss(as); 1725 AS_LOCK_EXIT(as); 1726 1727 mutex_enter(&p->p_lock); 1728 lxpr_unlock(p); 1729 1730 lxpr_uiobuf_printf(uiobuf, 1731 "%lu %lu %lu %lu %lu %lu %lu\n", 1732 vsize, rss, 0l, rss, 0l, 0l, 0l); 1733 } 1734 1735 /* 1736 * Look for either the main thread (lookup_id is 0) or the specified thread. 1737 * If we're looking for the main thread but the proc does not have one, we 1738 * fallback to using prchoose to get any thread available. 1739 */ 1740 static kthread_t * 1741 lxpr_get_thread(proc_t *p, uint_t lookup_id) 1742 { 1743 kthread_t *t; 1744 uint_t emul_tid; 1745 lx_lwp_data_t *lwpd; 1746 pid_t pid = p->p_pid; 1747 pid_t init_pid = curproc->p_zone->zone_proc_initpid; 1748 boolean_t branded = (p->p_brand == &lx_brand); 1749 1750 /* get specified thread */ 1751 if ((t = p->p_tlist) == NULL) 1752 return (NULL); 1753 1754 do { 1755 if (lookup_id == 0 && t->t_tid == 1) { 1756 thread_lock(t); 1757 return (t); 1758 } 1759 1760 lwpd = ttolxlwp(t); 1761 if (branded && lwpd != NULL) { 1762 if (pid == init_pid && lookup_id == 1) { 1763 emul_tid = t->t_tid; 1764 } else { 1765 emul_tid = lwpd->br_pid; 1766 } 1767 } else { 1768 /* 1769 * Make only the first (assumed to be main) thread 1770 * visible for non-branded processes. 1771 */ 1772 emul_tid = p->p_pid; 1773 } 1774 if (emul_tid == lookup_id) { 1775 thread_lock(t); 1776 return (t); 1777 } 1778 } while ((t = t->t_forw) != p->p_tlist); 1779 1780 if (lookup_id == 0) 1781 return (prchoose(p)); 1782 return (NULL); 1783 } 1784 1785 /* 1786 * Lookup the real pid for procs 0 or 1. 1787 */ 1788 static pid_t 1789 get_real_pid(pid_t p) 1790 { 1791 pid_t find_pid; 1792 1793 if (p == 1) { 1794 find_pid = curproc->p_zone->zone_proc_initpid; 1795 } else if (p == 0) { 1796 find_pid = curproc->p_zone->zone_zsched->p_pid; 1797 } else { 1798 find_pid = p; 1799 } 1800 1801 return (find_pid); 1802 } 1803 1804 /* 1805 * pid/tid common code to read status file 1806 */ 1807 static void 1808 lxpr_read_status_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf, 1809 uint_t lookup_id) 1810 { 1811 proc_t *p; 1812 kthread_t *t; 1813 user_t *up; 1814 cred_t *cr; 1815 const gid_t *groups; 1816 int ngroups; 1817 struct as *as; 1818 char *status; 1819 pid_t pid, ppid; 1820 k_sigset_t current, ignore, handle; 1821 int i, lx_sig; 1822 pid_t real_pid; 1823 1824 real_pid = get_real_pid(lxpnp->lxpr_pid); 1825 p = lxpr_lock(real_pid); 1826 if (p == NULL) { 1827 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1828 return; 1829 } 1830 1831 pid = p->p_pid; 1832 1833 /* 1834 * Convert pid to the Linux default of 1 if we're the zone's init 1835 * process or if we're the zone's zsched the pid is 0. 1836 */ 1837 if (pid == curproc->p_zone->zone_proc_initpid) { 1838 pid = 1; 1839 ppid = 0; /* parent pid for init is 0 */ 1840 } else if (pid == curproc->p_zone->zone_zsched->p_pid) { 1841 pid = 0; /* zsched is pid 0 */ 1842 ppid = 0; /* parent pid for zsched is itself */ 1843 } else { 1844 /* 1845 * Make sure not to reference parent PIDs that reside outside 1846 * the zone 1847 */ 1848 ppid = ((p->p_flag & SZONETOP) 1849 ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid); 1850 1851 /* 1852 * Convert ppid to the Linux default of 1 if our parent is the 1853 * zone's init process 1854 */ 1855 if (ppid == curproc->p_zone->zone_proc_initpid) 1856 ppid = 1; 1857 } 1858 1859 t = lxpr_get_thread(p, lookup_id); 1860 if (t != NULL) { 1861 switch (t->t_state) { 1862 case TS_SLEEP: 1863 status = "S (sleeping)"; 1864 break; 1865 case TS_RUN: 1866 case TS_ONPROC: 1867 status = "R (running)"; 1868 break; 1869 case TS_ZOMB: 1870 status = "Z (zombie)"; 1871 break; 1872 case TS_STOPPED: 1873 status = "T (stopped)"; 1874 break; 1875 default: 1876 status = "! (unknown)"; 1877 break; 1878 } 1879 thread_unlock(t); 1880 } else { 1881 if (lookup_id != 0) { 1882 /* we can't find this specific thread */ 1883 lxpr_uiobuf_seterr(uiobuf, EINVAL); 1884 lxpr_unlock(p); 1885 return; 1886 } 1887 1888 /* 1889 * there is a hole in the exit code, where a proc can have 1890 * no threads but it is yet to be flagged SZOMB. We will 1891 * assume we are about to become a zombie 1892 */ 1893 status = "Z (zombie)"; 1894 } 1895 1896 up = PTOU(p); 1897 mutex_enter(&p->p_crlock); 1898 crhold(cr = p->p_cred); 1899 mutex_exit(&p->p_crlock); 1900 1901 lxpr_uiobuf_printf(uiobuf, 1902 "Name:\t%s\n" 1903 "State:\t%s\n" 1904 "Tgid:\t%d\n" 1905 "Pid:\t%d\n" 1906 "PPid:\t%d\n" 1907 "TracerPid:\t%d\n" 1908 "Uid:\t%u\t%u\t%u\t%u\n" 1909 "Gid:\t%u\t%u\t%u\t%u\n" 1910 "FDSize:\t%d\n" 1911 "Groups:\t", 1912 up->u_comm, 1913 status, 1914 pid, /* thread group id - same as pid */ 1915 (lookup_id == 0) ? pid : lxpnp->lxpr_desc, 1916 ppid, 1917 0, 1918 crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr), 1919 crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr), 1920 p->p_fno_ctl); 1921 1922 1923 ngroups = crgetngroups(cr); 1924 groups = crgetgroups(cr); 1925 for (i = 0; i < ngroups; i++) { 1926 lxpr_uiobuf_printf(uiobuf, 1927 "%u ", 1928 groups[i]); 1929 } 1930 crfree(cr); 1931 1932 as = p->p_as; 1933 if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) { 1934 size_t vsize, nlocked, rss; 1935 1936 mutex_exit(&p->p_lock); 1937 AS_LOCK_ENTER(as, RW_READER); 1938 vsize = as->a_resvsize; 1939 rss = rm_asrss(as); 1940 AS_LOCK_EXIT(as); 1941 mutex_enter(&p->p_lock); 1942 nlocked = p->p_locked_mem; 1943 1944 lxpr_uiobuf_printf(uiobuf, 1945 "\n" 1946 "VmSize:\t%8lu kB\n" 1947 "VmLck:\t%8lu kB\n" 1948 "VmRSS:\t%8lu kB\n" 1949 "VmData:\t%8lu kB\n" 1950 "VmStk:\t%8lu kB\n" 1951 "VmExe:\t%8lu kB\n" 1952 "VmLib:\t%8lu kB", 1953 btok(vsize), 1954 btok(nlocked), 1955 ptok(rss), 1956 0l, 1957 btok(p->p_stksize), 1958 ptok(rss), 1959 0l); 1960 } 1961 1962 lxpr_uiobuf_printf(uiobuf, "\nThreads:\t%u", p->p_lwpcnt); 1963 1964 sigemptyset(¤t); 1965 sigemptyset(&ignore); 1966 sigemptyset(&handle); 1967 1968 for (i = 1; i < NSIG; i++) { 1969 lx_sig = stol_signo[i]; 1970 1971 if ((lx_sig > 0) && (lx_sig <= LX_NSIG)) { 1972 if (sigismember(&p->p_sig, i)) 1973 sigaddset(¤t, lx_sig); 1974 1975 if (up->u_signal[i - 1] == SIG_IGN) 1976 sigaddset(&ignore, lx_sig); 1977 else if (up->u_signal[i - 1] != SIG_DFL) 1978 sigaddset(&handle, lx_sig); 1979 } 1980 } 1981 1982 lxpr_uiobuf_printf(uiobuf, 1983 "\n" 1984 "SigPnd:\t%08x%08x\n" 1985 "SigBlk:\t%08x%08x\n" 1986 "SigIgn:\t%08x%08x\n" 1987 "SigCgt:\t%08x%08x\n" 1988 "CapInh:\t%016x\n" 1989 "CapPrm:\t%016x\n" 1990 "CapEff:\t%016x\n", 1991 current.__sigbits[1], current.__sigbits[0], 1992 0, 0, /* signals blocked on per thread basis */ 1993 ignore.__sigbits[1], ignore.__sigbits[0], 1994 handle.__sigbits[1], handle.__sigbits[0], 1995 /* Can't do anything with linux capabilities */ 1996 0, 1997 0, 1998 0); 1999 2000 lxpr_uiobuf_printf(uiobuf, 2001 "CapBnd:\t%016llx\n", 2002 /* We report the full capability bounding set */ 2003 0x1fffffffffLL); 2004 2005 lxpr_unlock(p); 2006 } 2007 2008 /* 2009 * lxpr_read_pid_status(): status file 2010 */ 2011 static void 2012 lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2013 { 2014 ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS); 2015 2016 lxpr_read_status_common(lxpnp, uiobuf, 0); 2017 } 2018 2019 /* 2020 * lxpr_read_pid_tid_status(): status file 2021 */ 2022 static void 2023 lxpr_read_pid_tid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2024 { 2025 ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STATUS); 2026 lxpr_read_status_common(lxpnp, uiobuf, lxpnp->lxpr_desc); 2027 } 2028 2029 /* 2030 * pid/tid common code to read stat file 2031 */ 2032 static void 2033 lxpr_read_stat_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf, 2034 uint_t lookup_id) 2035 { 2036 proc_t *p; 2037 kthread_t *t; 2038 struct as *as; 2039 char stat; 2040 pid_t pid, ppid, pgpid, spid; 2041 gid_t psgid; 2042 dev_t psdev; 2043 size_t rss, vsize; 2044 int nice, pri; 2045 caddr_t wchan; 2046 processorid_t cpu; 2047 pid_t real_pid; 2048 2049 real_pid = get_real_pid(lxpnp->lxpr_pid); 2050 p = lxpr_lock(real_pid); 2051 if (p == NULL) { 2052 lxpr_uiobuf_seterr(uiobuf, EINVAL); 2053 return; 2054 } 2055 2056 pid = p->p_pid; 2057 2058 /* 2059 * Set Linux defaults if we're the zone's init process 2060 */ 2061 if (pid == curproc->p_zone->zone_proc_initpid) { 2062 pid = 1; /* PID for init */ 2063 ppid = 0; /* parent PID for init is 0 */ 2064 pgpid = 0; /* process group for init is 0 */ 2065 psgid = (gid_t)-1; /* credential GID for init is -1 */ 2066 spid = 0; /* session id for init is 0 */ 2067 psdev = 0; /* session device for init is 0 */ 2068 } else if (pid == curproc->p_zone->zone_zsched->p_pid) { 2069 pid = 0; /* PID for zsched */ 2070 ppid = 0; /* parent PID for zsched is 0 */ 2071 pgpid = 0; /* process group for zsched is 0 */ 2072 psgid = (gid_t)-1; /* credential GID for zsched is -1 */ 2073 spid = 0; /* session id for zsched is 0 */ 2074 psdev = 0; /* session device for zsched is 0 */ 2075 } else { 2076 /* 2077 * Make sure not to reference parent PIDs that reside outside 2078 * the zone 2079 */ 2080 ppid = ((p->p_flag & SZONETOP) ? 2081 curproc->p_zone->zone_zsched->p_pid : p->p_ppid); 2082 2083 /* 2084 * Convert ppid to the Linux default of 1 if our parent is the 2085 * zone's init process 2086 */ 2087 if (ppid == curproc->p_zone->zone_proc_initpid) 2088 ppid = 1; 2089 2090 pgpid = p->p_pgrp; 2091 2092 mutex_enter(&p->p_splock); 2093 mutex_enter(&p->p_sessp->s_lock); 2094 spid = p->p_sessp->s_sid; 2095 psdev = p->p_sessp->s_dev; 2096 if (p->p_sessp->s_cred) 2097 psgid = crgetgid(p->p_sessp->s_cred); 2098 else 2099 psgid = crgetgid(p->p_cred); 2100 2101 mutex_exit(&p->p_sessp->s_lock); 2102 mutex_exit(&p->p_splock); 2103 } 2104 2105 t = lxpr_get_thread(p, lookup_id); 2106 if (t != NULL) { 2107 switch (t->t_state) { 2108 case TS_SLEEP: 2109 stat = 'S'; break; 2110 case TS_RUN: 2111 case TS_ONPROC: 2112 stat = 'R'; break; 2113 case TS_ZOMB: 2114 stat = 'Z'; break; 2115 case TS_STOPPED: 2116 stat = 'T'; break; 2117 default: 2118 stat = '!'; break; 2119 } 2120 2121 if (CL_DONICE(t, NULL, 0, &nice) != 0) 2122 nice = 0; 2123 2124 pri = t->t_pri; 2125 wchan = t->t_wchan; 2126 cpu = t->t_cpu->cpu_id; 2127 thread_unlock(t); 2128 } else { 2129 if (lookup_id != 0) { 2130 /* we can't find this specific thread */ 2131 lxpr_uiobuf_seterr(uiobuf, EINVAL); 2132 lxpr_unlock(p); 2133 return; 2134 } 2135 2136 /* Only zombies have no threads */ 2137 stat = 'Z'; 2138 nice = 0; 2139 pri = 0; 2140 wchan = 0; 2141 cpu = 0; 2142 } 2143 as = p->p_as; 2144 mutex_exit(&p->p_lock); 2145 AS_LOCK_ENTER(as, RW_READER); 2146 vsize = as->a_resvsize; 2147 rss = rm_asrss(as); 2148 AS_LOCK_EXIT(as); 2149 mutex_enter(&p->p_lock); 2150 2151 lxpr_uiobuf_printf(uiobuf, 2152 "%d (%s) %c %d %d %d %d %d " 2153 "%lu %lu %lu %lu %lu " 2154 "%lu %lu %ld %ld " 2155 "%d %d %d " 2156 "%lu " 2157 "%lu " 2158 "%lu %ld %llu " 2159 "%lu %lu %u " 2160 "%lu %lu " 2161 "%lu %lu %lu %lu " 2162 "%lu " 2163 "%lu %lu " 2164 "%d " 2165 "%d" 2166 "\n", 2167 (lookup_id == 0) ? pid : lxpnp->lxpr_desc, 2168 PTOU(p)->u_comm, stat, ppid, pgpid, spid, psdev, psgid, 2169 0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */ 2170 p->p_utime, p->p_stime, p->p_cutime, p->p_cstime, 2171 pri, nice, p->p_lwpcnt, 2172 0l, /* itrealvalue (time before next SIGALRM) */ 2173 PTOU(p)->u_ticks, 2174 vsize, rss, p->p_vmem_ctl, 2175 0l, 0l, USRSTACK, /* startcode, endcode, startstack */ 2176 0l, 0l, /* kstkesp, kstkeip */ 2177 0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */ 2178 wchan, 2179 0l, 0l, /* nswap, cnswap */ 2180 0, /* exit_signal */ 2181 cpu); 2182 2183 lxpr_unlock(p); 2184 } 2185 2186 /* 2187 * lxpr_read_pid_stat(): pid stat file 2188 */ 2189 static void 2190 lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2191 { 2192 ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT); 2193 2194 lxpr_read_stat_common(lxpnp, uiobuf, 0); 2195 } 2196 2197 /* 2198 * lxpr_read_pid_tid_stat(): pid stat file 2199 */ 2200 static void 2201 lxpr_read_pid_tid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2202 { 2203 ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STAT); 2204 lxpr_read_stat_common(lxpnp, uiobuf, lxpnp->lxpr_desc); 2205 } 2206 2207 /* ARGSUSED */ 2208 static void 2209 lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2210 { 2211 } 2212 2213 struct lxpr_ifstat { 2214 uint64_t rx_bytes; 2215 uint64_t rx_packets; 2216 uint64_t rx_errors; 2217 uint64_t rx_drop; 2218 uint64_t tx_bytes; 2219 uint64_t tx_packets; 2220 uint64_t tx_errors; 2221 uint64_t tx_drop; 2222 uint64_t collisions; 2223 uint64_t rx_multicast; 2224 }; 2225 2226 static void * 2227 lxpr_kstat_read(kstat_t *kn, boolean_t byname, size_t *size, int *num) 2228 { 2229 kstat_t *kp; 2230 int i, nrec = 0; 2231 size_t bufsize; 2232 void *buf = NULL; 2233 2234 if (byname == B_TRUE) { 2235 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance, 2236 kn->ks_name, getzoneid()); 2237 } else { 2238 kp = kstat_hold_bykid(kn->ks_kid, getzoneid()); 2239 } 2240 if (kp == NULL) { 2241 return (NULL); 2242 } 2243 if (kp->ks_flags & KSTAT_FLAG_INVALID) { 2244 kstat_rele(kp); 2245 return (NULL); 2246 } 2247 2248 bufsize = kp->ks_data_size + 1; 2249 kstat_rele(kp); 2250 2251 /* 2252 * The kstat in question is released so that kmem_alloc(KM_SLEEP) is 2253 * performed without it held. After the alloc, the kstat is reacquired 2254 * and its size is checked again. If the buffer is no longer large 2255 * enough, the alloc and check are repeated up to three times. 2256 */ 2257 for (i = 0; i < 2; i++) { 2258 buf = kmem_alloc(bufsize, KM_SLEEP); 2259 2260 /* Check if bufsize still appropriate */ 2261 if (byname == B_TRUE) { 2262 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance, 2263 kn->ks_name, getzoneid()); 2264 } else { 2265 kp = kstat_hold_bykid(kn->ks_kid, getzoneid()); 2266 } 2267 if (kp == NULL || kp->ks_flags & KSTAT_FLAG_INVALID) { 2268 if (kp != NULL) { 2269 kstat_rele(kp); 2270 } 2271 kmem_free(buf, bufsize); 2272 return (NULL); 2273 } 2274 KSTAT_ENTER(kp); 2275 (void) KSTAT_UPDATE(kp, KSTAT_READ); 2276 if (bufsize < kp->ks_data_size) { 2277 kmem_free(buf, bufsize); 2278 buf = NULL; 2279 bufsize = kp->ks_data_size + 1; 2280 KSTAT_EXIT(kp); 2281 kstat_rele(kp); 2282 continue; 2283 } else { 2284 if (KSTAT_SNAPSHOT(kp, buf, KSTAT_READ) != 0) { 2285 kmem_free(buf, bufsize); 2286 buf = NULL; 2287 } 2288 nrec = kp->ks_ndata; 2289 KSTAT_EXIT(kp); 2290 kstat_rele(kp); 2291 break; 2292 } 2293 } 2294 2295 if (buf != NULL) { 2296 *size = bufsize; 2297 *num = nrec; 2298 } 2299 return (buf); 2300 } 2301 2302 static int 2303 lxpr_kstat_ifstat(kstat_t *kn, struct lxpr_ifstat *ifs) 2304 { 2305 kstat_named_t *kp; 2306 int i, num; 2307 size_t size; 2308 2309 /* 2310 * Search by name instead of by kid since there's a small window to 2311 * race against kstats being added/removed. 2312 */ 2313 bzero(ifs, sizeof (*ifs)); 2314 kp = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num); 2315 if (kp == NULL) 2316 return (-1); 2317 for (i = 0; i < num; i++) { 2318 if (strncmp(kp[i].name, "rbytes64", KSTAT_STRLEN) == 0) 2319 ifs->rx_bytes = kp[i].value.ui64; 2320 else if (strncmp(kp[i].name, "ipackets64", KSTAT_STRLEN) == 0) 2321 ifs->rx_packets = kp[i].value.ui64; 2322 else if (strncmp(kp[i].name, "ierrors", KSTAT_STRLEN) == 0) 2323 ifs->rx_errors = kp[i].value.ui32; 2324 else if (strncmp(kp[i].name, "norcvbuf", KSTAT_STRLEN) == 0) 2325 ifs->rx_drop = kp[i].value.ui32; 2326 else if (strncmp(kp[i].name, "multircv", KSTAT_STRLEN) == 0) 2327 ifs->rx_multicast = kp[i].value.ui32; 2328 else if (strncmp(kp[i].name, "obytes64", KSTAT_STRLEN) == 0) 2329 ifs->tx_bytes = kp[i].value.ui64; 2330 else if (strncmp(kp[i].name, "opackets64", KSTAT_STRLEN) == 0) 2331 ifs->tx_packets = kp[i].value.ui64; 2332 else if (strncmp(kp[i].name, "oerrors", KSTAT_STRLEN) == 0) 2333 ifs->tx_errors = kp[i].value.ui32; 2334 else if (strncmp(kp[i].name, "noxmtbuf", KSTAT_STRLEN) == 0) 2335 ifs->tx_drop = kp[i].value.ui32; 2336 else if (strncmp(kp[i].name, "collisions", KSTAT_STRLEN) == 0) 2337 ifs->collisions = kp[i].value.ui32; 2338 } 2339 kmem_free(kp, size); 2340 return (0); 2341 } 2342 2343 /* ARGSUSED */ 2344 static void 2345 lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2346 { 2347 kstat_t *ksr; 2348 kstat_t ks0; 2349 int i, nidx; 2350 size_t sidx; 2351 struct lxpr_ifstat ifs; 2352 2353 lxpr_uiobuf_printf(uiobuf, "Inter-| Receive " 2354 " | Transmit\n"); 2355 lxpr_uiobuf_printf(uiobuf, " face |bytes packets errs drop fifo" 2356 " frame compressed multicast|bytes packets errs drop fifo" 2357 " colls carrier compressed\n"); 2358 2359 ks0.ks_kid = 0; 2360 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); 2361 if (ksr == NULL) 2362 return; 2363 2364 for (i = 1; i < nidx; i++) { 2365 if (strncmp(ksr[i].ks_module, "link", KSTAT_STRLEN) == 0 || 2366 strncmp(ksr[i].ks_module, "lo", KSTAT_STRLEN) == 0) { 2367 if (lxpr_kstat_ifstat(&ksr[i], &ifs) != 0) 2368 continue; 2369 2370 /* Overwriting the name is ok in the local snapshot */ 2371 lx_ifname_convert(ksr[i].ks_name, LX_IF_FROMNATIVE); 2372 lxpr_uiobuf_printf(uiobuf, "%6s: %7llu %7llu %4lu " 2373 "%4lu %4u %5u %10u %9lu %8llu %7llu %4lu %4lu %4u " 2374 "%5lu %7u %10u\n", 2375 ksr[i].ks_name, 2376 ifs.rx_bytes, ifs.rx_packets, 2377 ifs.rx_errors, ifs.rx_drop, 2378 0, 0, 0, ifs.rx_multicast, 2379 ifs.tx_bytes, ifs.tx_packets, 2380 ifs.tx_errors, ifs.tx_drop, 2381 0, ifs.collisions, 0, 0); 2382 } 2383 } 2384 2385 kmem_free(ksr, sidx); 2386 } 2387 2388 /* ARGSUSED */ 2389 static void 2390 lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2391 { 2392 } 2393 2394 static void 2395 lxpr_inet6_out(const in6_addr_t *addr, char buf[33]) 2396 { 2397 const uint8_t *ip = addr->s6_addr; 2398 char digits[] = "0123456789abcdef"; 2399 int i; 2400 for (i = 0; i < 16; i++) { 2401 buf[2 * i] = digits[ip[i] >> 4]; 2402 buf[2 * i + 1] = digits[ip[i] & 0xf]; 2403 } 2404 buf[32] = '\0'; 2405 } 2406 2407 /* ARGSUSED */ 2408 static void 2409 lxpr_read_net_if_inet6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2410 { 2411 netstack_t *ns; 2412 ip_stack_t *ipst; 2413 ill_t *ill; 2414 ipif_t *ipif; 2415 ill_walk_context_t ctx; 2416 char ifname[LIFNAMSIZ], ip6out[33]; 2417 2418 ns = netstack_get_current(); 2419 if (ns == NULL) 2420 return; 2421 ipst = ns->netstack_ip; 2422 2423 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2424 ill = ILL_START_WALK_V6(&ctx, ipst); 2425 2426 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 2427 for (ipif = ill->ill_ipif; ipif != NULL; 2428 ipif = ipif->ipif_next) { 2429 uint_t index = ill->ill_phyint->phyint_ifindex; 2430 int plen = ip_mask_to_plen_v6(&ipif->ipif_v6net_mask); 2431 unsigned int scope = lx_ipv6_scope_convert( 2432 &ipif->ipif_v6lcl_addr); 2433 /* Always report PERMANENT flag */ 2434 int flag = 0x80; 2435 2436 (void) snprintf(ifname, LIFNAMSIZ, "%s", ill->ill_name); 2437 lx_ifname_convert(ifname, LX_IF_FROMNATIVE); 2438 lxpr_inet6_out(&ipif->ipif_v6lcl_addr, ip6out); 2439 2440 lxpr_uiobuf_printf(uiobuf, "%32s %02x %02x %02x %02x" 2441 " %8s\n", ip6out, index, plen, scope, flag, ifname); 2442 } 2443 } 2444 rw_exit(&ipst->ips_ill_g_lock); 2445 netstack_rele(ns); 2446 } 2447 2448 /* ARGSUSED */ 2449 static void 2450 lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2451 { 2452 } 2453 2454 /* ARGSUSED */ 2455 static void 2456 lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2457 { 2458 } 2459 2460 /* ARGSUSED */ 2461 static void 2462 lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2463 { 2464 } 2465 2466 static void 2467 lxpr_format_route_ipv6(ire_t *ire, lxpr_uiobuf_t *uiobuf) 2468 { 2469 uint32_t flags; 2470 char name[IFNAMSIZ]; 2471 char ipv6addr[33]; 2472 2473 lxpr_inet6_out(&ire->ire_addr_v6, ipv6addr); 2474 lxpr_uiobuf_printf(uiobuf, "%s %02x ", ipv6addr, 2475 ip_mask_to_plen_v6(&ire->ire_mask_v6)); 2476 2477 /* punt on this for now */ 2478 lxpr_uiobuf_printf(uiobuf, "%s %02x ", 2479 "00000000000000000000000000000000", 0); 2480 2481 lxpr_inet6_out(&ire->ire_gateway_addr_v6, ipv6addr); 2482 lxpr_uiobuf_printf(uiobuf, "%s", ipv6addr); 2483 2484 flags = ire->ire_flags & 2485 (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED); 2486 /* Linux's RTF_LOCAL equivalent */ 2487 if (ire->ire_metrics.iulp_local) 2488 flags |= 0x80000000; 2489 2490 if (ire->ire_ill != NULL) { 2491 ill_get_name(ire->ire_ill, name, sizeof (name)); 2492 lx_ifname_convert(name, LX_IF_FROMNATIVE); 2493 } else { 2494 name[0] = '\0'; 2495 } 2496 2497 lxpr_uiobuf_printf(uiobuf, " %08x %08x %08x %08x %8s\n", 2498 0, /* metric */ 2499 ire->ire_refcnt, 2500 0, 2501 flags, 2502 name); 2503 } 2504 2505 /* ARGSUSED */ 2506 static void 2507 lxpr_read_net_ipv6_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2508 { 2509 netstack_t *ns; 2510 ip_stack_t *ipst; 2511 2512 ns = netstack_get_current(); 2513 if (ns == NULL) 2514 return; 2515 ipst = ns->netstack_ip; 2516 2517 /* 2518 * LX branded zones are expected to have exclusive IP stack, hence 2519 * using ALL_ZONES as the zoneid filter. 2520 */ 2521 ire_walk_v6(&lxpr_format_route_ipv6, uiobuf, ALL_ZONES, ipst); 2522 2523 netstack_rele(ns); 2524 } 2525 2526 /* ARGSUSED */ 2527 static void 2528 lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2529 { 2530 } 2531 2532 /* ARGSUSED */ 2533 static void 2534 lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2535 { 2536 } 2537 2538 /* ARGSUSED */ 2539 static void 2540 lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2541 { 2542 } 2543 2544 #define LXPR_SKIP_ROUTE(type) \ 2545 (((IRE_IF_CLONE | IRE_BROADCAST | IRE_MULTICAST | \ 2546 IRE_NOROUTE | IRE_LOOPBACK | IRE_LOCAL) & type) != 0) 2547 2548 static void 2549 lxpr_format_route_ipv4(ire_t *ire, lxpr_uiobuf_t *uiobuf) 2550 { 2551 uint32_t flags; 2552 char name[IFNAMSIZ]; 2553 ill_t *ill; 2554 ire_t *nire; 2555 ipif_t *ipif; 2556 ipaddr_t gateway; 2557 2558 if (LXPR_SKIP_ROUTE(ire->ire_type) || ire->ire_testhidden != 0) 2559 return; 2560 2561 /* These route flags have direct Linux equivalents */ 2562 flags = ire->ire_flags & 2563 (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED); 2564 2565 /* 2566 * Search for a suitable IRE for naming purposes. 2567 * On Linux, the default route is typically associated with the 2568 * interface used to access gateway. The default IRE on Illumos 2569 * typically lacks an ill reference but its parent might have one. 2570 */ 2571 nire = ire; 2572 do { 2573 ill = nire->ire_ill; 2574 nire = nire->ire_dep_parent; 2575 } while (ill == NULL && nire != NULL); 2576 if (ill != NULL) { 2577 ill_get_name(ill, name, sizeof (name)); 2578 lx_ifname_convert(name, LX_IF_FROMNATIVE); 2579 } else { 2580 name[0] = '*'; 2581 name[1] = '\0'; 2582 } 2583 2584 /* 2585 * Linux suppresses the gateway address for directly connected 2586 * interface networks. To emulate this behavior, we walk all addresses 2587 * of a given route interface. If one matches the gateway, it is 2588 * displayed as NULL. 2589 */ 2590 gateway = ire->ire_gateway_addr; 2591 if ((ill = ire->ire_ill) != NULL) { 2592 for (ipif = ill->ill_ipif; ipif != NULL; 2593 ipif = ipif->ipif_next) { 2594 if (ipif->ipif_lcl_addr == gateway) { 2595 gateway = 0; 2596 break; 2597 } 2598 } 2599 } 2600 2601 lxpr_uiobuf_printf(uiobuf, "%s\t%08X\t%08X\t%04X\t%d\t%u\t" 2602 "%d\t%08X\t%d\t%u\t%u\n", 2603 name, 2604 ire->ire_addr, 2605 gateway, 2606 flags, 0, 0, 2607 0, /* priority */ 2608 ire->ire_mask, 2609 0, 0, /* mss, window */ 2610 ire->ire_metrics.iulp_rtt); 2611 } 2612 2613 /* ARGSUSED */ 2614 static void 2615 lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2616 { 2617 netstack_t *ns; 2618 ip_stack_t *ipst; 2619 2620 lxpr_uiobuf_printf(uiobuf, "Iface\tDestination\tGateway \tFlags\t" 2621 "RefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n"); 2622 2623 ns = netstack_get_current(); 2624 if (ns == NULL) 2625 return; 2626 ipst = ns->netstack_ip; 2627 2628 /* 2629 * LX branded zones are expected to have exclusive IP stack, hence 2630 * using ALL_ZONES as the zoneid filter. 2631 */ 2632 ire_walk_v4(&lxpr_format_route_ipv4, uiobuf, ALL_ZONES, ipst); 2633 2634 netstack_rele(ns); 2635 } 2636 2637 /* ARGSUSED */ 2638 static void 2639 lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2640 { 2641 } 2642 2643 /* ARGSUSED */ 2644 static void 2645 lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2646 { 2647 } 2648 2649 /* ARGSUSED */ 2650 static void 2651 lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2652 { 2653 } 2654 2655 typedef struct lxpr_snmp_table { 2656 const char *lst_proto; 2657 const char *lst_fields[]; 2658 } lxpr_snmp_table_t; 2659 2660 static lxpr_snmp_table_t lxpr_snmp_ip = { "ip", 2661 { 2662 "forwarding", "defaultTTL", "inReceives", "inHdrErrors", 2663 "inAddrErrors", "forwDatagrams", "inUnknownProtos", "inDiscards", 2664 "inDelivers", "outRequests", "outDiscards", "outNoRoutes", 2665 "reasmTimeout", "reasmReqds", "reasmOKs", "reasmFails", "fragOKs", 2666 "fragFails", "fragCreates", 2667 NULL 2668 } 2669 }; 2670 static lxpr_snmp_table_t lxpr_snmp_icmp = { "icmp", 2671 { 2672 "inMsgs", "inErrors", "inCsumErrors", "inDestUnreachs", "inTimeExcds", 2673 "inParmProbs", "inSrcQuenchs", "inRedirects", "inEchos", "inEchoReps", 2674 "inTimestamps", "inTimestampReps", "inAddrMasks", "inAddrMaskReps", 2675 "outMsgs", "outErrors", "outDestUnreachs", "outTimeExcds", 2676 "outParmProbs", "outSrcQuenchs", "outRedirects", "outEchos", 2677 "outEchoReps", "outTimestamps", "outTimestampReps", "outAddrMasks", 2678 "outAddrMaskReps", 2679 NULL 2680 } 2681 }; 2682 static lxpr_snmp_table_t lxpr_snmp_tcp = { "tcp", 2683 { 2684 "rtoAlgorithm", "rtoMin", "rtoMax", "maxConn", "activeOpens", 2685 "passiveOpens", "attemptFails", "estabResets", "currEstab", "inSegs", 2686 "outSegs", "retransSegs", "inErrs", "outRsts", "inCsumErrors", 2687 NULL 2688 } 2689 }; 2690 static lxpr_snmp_table_t lxpr_snmp_udp = { "udp", 2691 { 2692 "inDatagrams", "noPorts", "inErrors", "outDatagrams", "rcvbufErrors", 2693 "sndbufErrors", "inCsumErrors", 2694 NULL 2695 } 2696 }; 2697 2698 static lxpr_snmp_table_t *lxpr_net_snmptab[] = { 2699 &lxpr_snmp_ip, 2700 &lxpr_snmp_icmp, 2701 &lxpr_snmp_tcp, 2702 &lxpr_snmp_udp, 2703 NULL 2704 }; 2705 2706 static void 2707 lxpr_kstat_print_tab(lxpr_uiobuf_t *uiobuf, lxpr_snmp_table_t *table, 2708 kstat_t *kn) 2709 { 2710 kstat_named_t *klist; 2711 char upname[KSTAT_STRLEN], upfield[KSTAT_STRLEN]; 2712 int i, j, num; 2713 size_t size; 2714 2715 klist = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num); 2716 if (klist == NULL) 2717 return; 2718 2719 /* Print the header line, fields capitalized */ 2720 (void) strncpy(upname, table->lst_proto, KSTAT_STRLEN); 2721 upname[0] = toupper(upname[0]); 2722 lxpr_uiobuf_printf(uiobuf, "%s:", upname); 2723 for (i = 0; table->lst_fields[i] != NULL; i++) { 2724 (void) strncpy(upfield, table->lst_fields[i], KSTAT_STRLEN); 2725 upfield[0] = toupper(upfield[0]); 2726 lxpr_uiobuf_printf(uiobuf, " %s", upfield); 2727 } 2728 lxpr_uiobuf_printf(uiobuf, "\n%s:", upname); 2729 2730 /* Then loop back through to print the value line. */ 2731 for (i = 0; table->lst_fields[i] != NULL; i++) { 2732 kstat_named_t *kpoint = NULL; 2733 for (j = 0; j < num; j++) { 2734 if (strncmp(klist[j].name, table->lst_fields[i], 2735 KSTAT_STRLEN) == 0) { 2736 kpoint = &klist[j]; 2737 break; 2738 } 2739 } 2740 if (kpoint == NULL) { 2741 /* Output 0 for unknown fields */ 2742 lxpr_uiobuf_printf(uiobuf, " 0"); 2743 } else { 2744 switch (kpoint->data_type) { 2745 case KSTAT_DATA_INT32: 2746 lxpr_uiobuf_printf(uiobuf, " %d", 2747 kpoint->value.i32); 2748 break; 2749 case KSTAT_DATA_UINT32: 2750 lxpr_uiobuf_printf(uiobuf, " %u", 2751 kpoint->value.ui32); 2752 break; 2753 case KSTAT_DATA_INT64: 2754 lxpr_uiobuf_printf(uiobuf, " %ld", 2755 kpoint->value.l); 2756 break; 2757 case KSTAT_DATA_UINT64: 2758 lxpr_uiobuf_printf(uiobuf, " %lu", 2759 kpoint->value.ul); 2760 break; 2761 } 2762 } 2763 } 2764 lxpr_uiobuf_printf(uiobuf, "\n"); 2765 kmem_free(klist, size); 2766 } 2767 2768 /* ARGSUSED */ 2769 static void 2770 lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2771 { 2772 kstat_t *ksr; 2773 kstat_t ks0; 2774 lxpr_snmp_table_t **table = lxpr_net_snmptab; 2775 int i, t, nidx; 2776 size_t sidx; 2777 2778 ks0.ks_kid = 0; 2779 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); 2780 if (ksr == NULL) 2781 return; 2782 2783 for (t = 0; table[t] != NULL; t++) { 2784 for (i = 0; i < nidx; i++) { 2785 if (strncmp(ksr[i].ks_class, "mib2", KSTAT_STRLEN) != 0) 2786 continue; 2787 if (strncmp(ksr[i].ks_name, table[t]->lst_proto, 2788 KSTAT_STRLEN) == 0) { 2789 lxpr_kstat_print_tab(uiobuf, table[t], &ksr[i]); 2790 break; 2791 } 2792 } 2793 } 2794 kmem_free(ksr, sidx); 2795 } 2796 2797 /* ARGSUSED */ 2798 static void 2799 lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2800 { 2801 } 2802 2803 static int 2804 lxpr_convert_tcp_state(int st) 2805 { 2806 /* 2807 * Derived from the enum located in the Linux kernel sources: 2808 * include/net/tcp_states.h 2809 */ 2810 switch (st) { 2811 case TCPS_ESTABLISHED: 2812 return (1); 2813 case TCPS_SYN_SENT: 2814 return (2); 2815 case TCPS_SYN_RCVD: 2816 return (3); 2817 case TCPS_FIN_WAIT_1: 2818 return (4); 2819 case TCPS_FIN_WAIT_2: 2820 return (5); 2821 case TCPS_TIME_WAIT: 2822 return (6); 2823 case TCPS_CLOSED: 2824 return (7); 2825 case TCPS_CLOSE_WAIT: 2826 return (8); 2827 case TCPS_LAST_ACK: 2828 return (9); 2829 case TCPS_LISTEN: 2830 return (10); 2831 case TCPS_CLOSING: 2832 return (11); 2833 default: 2834 /* No translation for TCPS_IDLE, TCPS_BOUND or anything else */ 2835 return (0); 2836 } 2837 } 2838 2839 static void 2840 lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) 2841 { 2842 int i, sl = 0; 2843 connf_t *connfp; 2844 conn_t *connp; 2845 netstack_t *ns; 2846 ip_stack_t *ipst; 2847 2848 ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION); 2849 if (ipver == IPV4_VERSION) { 2850 lxpr_uiobuf_printf(uiobuf, " sl local_address rem_address " 2851 "st tx_queue rx_queue tr tm->when retrnsmt uid timeout " 2852 "inode\n"); 2853 } else { 2854 lxpr_uiobuf_printf(uiobuf, " sl " 2855 "local_address " 2856 "remote_address " 2857 "st tx_queue rx_queue tr tm->when retrnsmt " 2858 "uid timeout inode\n"); 2859 } 2860 /* 2861 * Due to differences between the Linux and illumos TCP 2862 * implementations, some data will be omitted from the output here. 2863 * 2864 * Valid fields: 2865 * - local_address 2866 * - remote_address 2867 * - st 2868 * - tx_queue 2869 * - rx_queue 2870 * - uid 2871 * - inode 2872 * 2873 * Omitted/invalid fields 2874 * - tr 2875 * - tm->when 2876 * - retrnsmt 2877 * - timeout 2878 */ 2879 2880 ns = netstack_get_current(); 2881 if (ns == NULL) 2882 return; 2883 ipst = ns->netstack_ip; 2884 2885 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2886 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 2887 connp = NULL; 2888 while ((connp = 2889 ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) { 2890 tcp_t *tcp; 2891 vattr_t attr; 2892 sonode_t *so = (sonode_t *)connp->conn_upper_handle; 2893 vnode_t *vp = (so != NULL) ? so->so_vnode : NULL; 2894 if (connp->conn_ipversion != ipver) 2895 continue; 2896 tcp = connp->conn_tcp; 2897 if (ipver == IPV4_VERSION) { 2898 lxpr_uiobuf_printf(uiobuf, 2899 "%4d: %08X:%04X %08X:%04X ", 2900 ++sl, 2901 connp->conn_laddr_v4, 2902 ntohs(connp->conn_lport), 2903 connp->conn_faddr_v4, 2904 ntohs(connp->conn_fport)); 2905 } else { 2906 lxpr_uiobuf_printf(uiobuf, "%4d: " 2907 "%08X%08X%08X%08X:%04X " 2908 "%08X%08X%08X%08X:%04X ", 2909 ++sl, 2910 connp->conn_laddr_v6.s6_addr32[0], 2911 connp->conn_laddr_v6.s6_addr32[1], 2912 connp->conn_laddr_v6.s6_addr32[2], 2913 connp->conn_laddr_v6.s6_addr32[3], 2914 ntohs(connp->conn_lport), 2915 connp->conn_faddr_v6.s6_addr32[0], 2916 connp->conn_faddr_v6.s6_addr32[1], 2917 connp->conn_faddr_v6.s6_addr32[2], 2918 connp->conn_faddr_v6.s6_addr32[3], 2919 ntohs(connp->conn_fport)); 2920 } 2921 2922 /* fetch the simulated inode for the socket */ 2923 if (vp == NULL || 2924 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) 2925 attr.va_nodeid = 0; 2926 2927 lxpr_uiobuf_printf(uiobuf, 2928 "%02X %08X:%08X %02X:%08X %08X " 2929 "%5u %8d %lu %d %p %u %u %u %u %d\n", 2930 lxpr_convert_tcp_state(tcp->tcp_state), 2931 tcp->tcp_rcv_cnt, tcp->tcp_unsent, /* rx/tx queue */ 2932 0, 0, /* tr, when */ 2933 0, /* per-connection rexmits aren't tracked today */ 2934 connp->conn_cred->cr_uid, 2935 0, /* timeout */ 2936 /* inode + more */ 2937 (ino_t)attr.va_nodeid, 0, NULL, 0, 0, 0, 0, 0); 2938 } 2939 } 2940 netstack_rele(ns); 2941 } 2942 2943 /* ARGSUSED */ 2944 static void 2945 lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2946 { 2947 lxpr_format_tcp(uiobuf, IPV4_VERSION); 2948 } 2949 2950 /* ARGSUSED */ 2951 static void 2952 lxpr_read_net_tcp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 2953 { 2954 lxpr_format_tcp(uiobuf, IPV6_VERSION); 2955 } 2956 2957 static void 2958 lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver) 2959 { 2960 int i, sl = 0; 2961 connf_t *connfp; 2962 conn_t *connp; 2963 netstack_t *ns; 2964 ip_stack_t *ipst; 2965 2966 ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION); 2967 if (ipver == IPV4_VERSION) { 2968 lxpr_uiobuf_printf(uiobuf, " sl local_address rem_address" 2969 " st tx_queue rx_queue tr tm->when retrnsmt uid" 2970 " timeout inode ref pointer drops\n"); 2971 } else { 2972 lxpr_uiobuf_printf(uiobuf, " sl " 2973 "local_address " 2974 "remote_address " 2975 "st tx_queue rx_queue tr tm->when retrnsmt " 2976 "uid timeout inode ref pointer drops\n"); 2977 } 2978 /* 2979 * Due to differences between the Linux and illumos UDP 2980 * implementations, some data will be omitted from the output here. 2981 * 2982 * Valid fields: 2983 * - local_address 2984 * - remote_address 2985 * - st: limited 2986 * - uid 2987 * 2988 * Omitted/invalid fields 2989 * - tx_queue 2990 * - rx_queue 2991 * - tr 2992 * - tm->when 2993 * - retrnsmt 2994 * - timeout 2995 * - inode 2996 */ 2997 2998 ns = netstack_get_current(); 2999 if (ns == NULL) 3000 return; 3001 ipst = ns->netstack_ip; 3002 3003 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 3004 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 3005 connp = NULL; 3006 while ((connp = 3007 ipcl_get_next_conn(connfp, connp, IPCL_UDPCONN)) != NULL) { 3008 udp_t *udp; 3009 int state = 0; 3010 vattr_t attr; 3011 sonode_t *so = (sonode_t *)connp->conn_upper_handle; 3012 vnode_t *vp = (so != NULL) ? so->so_vnode : NULL; 3013 if (connp->conn_ipversion != ipver) 3014 continue; 3015 udp = connp->conn_udp; 3016 if (ipver == IPV4_VERSION) { 3017 lxpr_uiobuf_printf(uiobuf, 3018 "%4d: %08X:%04X %08X:%04X ", 3019 ++sl, 3020 connp->conn_laddr_v4, 3021 ntohs(connp->conn_lport), 3022 connp->conn_faddr_v4, 3023 ntohs(connp->conn_fport)); 3024 } else { 3025 lxpr_uiobuf_printf(uiobuf, "%4d: " 3026 "%08X%08X%08X%08X:%04X " 3027 "%08X%08X%08X%08X:%04X ", 3028 ++sl, 3029 connp->conn_laddr_v6.s6_addr32[0], 3030 connp->conn_laddr_v6.s6_addr32[1], 3031 connp->conn_laddr_v6.s6_addr32[2], 3032 connp->conn_laddr_v6.s6_addr32[3], 3033 ntohs(connp->conn_lport), 3034 connp->conn_faddr_v6.s6_addr32[0], 3035 connp->conn_faddr_v6.s6_addr32[1], 3036 connp->conn_faddr_v6.s6_addr32[2], 3037 connp->conn_faddr_v6.s6_addr32[3], 3038 ntohs(connp->conn_fport)); 3039 } 3040 3041 switch (udp->udp_state) { 3042 case TS_UNBND: 3043 case TS_IDLE: 3044 state = 7; 3045 break; 3046 case TS_DATA_XFER: 3047 state = 1; 3048 break; 3049 } 3050 3051 /* fetch the simulated inode for the socket */ 3052 if (vp == NULL || 3053 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) 3054 attr.va_nodeid = 0; 3055 3056 lxpr_uiobuf_printf(uiobuf, 3057 "%02X %08X:%08X %02X:%08X %08X " 3058 "%5u %8d %lu %d %p %d\n", 3059 state, 3060 0, 0, /* rx/tx queue */ 3061 0, 0, /* tr, when */ 3062 0, /* retrans */ 3063 connp->conn_cred->cr_uid, 3064 0, /* timeout */ 3065 /* inode, ref, pointer, drops */ 3066 (ino_t)attr.va_nodeid, 0, NULL, 0); 3067 } 3068 } 3069 netstack_rele(ns); 3070 } 3071 3072 /* ARGSUSED */ 3073 static void 3074 lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3075 { 3076 lxpr_format_udp(uiobuf, IPV4_VERSION); 3077 } 3078 3079 /* ARGSUSED */ 3080 static void 3081 lxpr_read_net_udp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3082 { 3083 lxpr_format_udp(uiobuf, IPV6_VERSION); 3084 } 3085 3086 /* ARGSUSED */ 3087 static void 3088 lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3089 { 3090 sonode_t *so; 3091 zoneid_t zoneid = getzoneid(); 3092 3093 lxpr_uiobuf_printf(uiobuf, "Num RefCount Protocol Flags Type " 3094 "St Inode Path\n"); 3095 3096 mutex_enter(&socklist.sl_lock); 3097 for (so = socklist.sl_list; so != NULL; 3098 so = _SOTOTPI(so)->sti_next_so) { 3099 vnode_t *vp = so->so_vnode; 3100 vattr_t attr; 3101 sotpi_info_t *sti; 3102 const char *name = NULL; 3103 int status = 0; 3104 int type = 0; 3105 int flags = 0; 3106 3107 /* Only process active sonodes in this zone */ 3108 if (so->so_count == 0 || so->so_zoneid != zoneid) 3109 continue; 3110 3111 /* 3112 * Grab the inode, if possible. 3113 * This must be done before entering so_lock. 3114 */ 3115 if (vp == NULL || 3116 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0) 3117 attr.va_nodeid = 0; 3118 3119 mutex_enter(&so->so_lock); 3120 sti = _SOTOTPI(so); 3121 3122 if (sti->sti_laddr_sa != NULL && 3123 sti->sti_laddr_len > 0) { 3124 name = sti->sti_laddr_sa->sa_data; 3125 } else if (sti->sti_faddr_sa != NULL && 3126 sti->sti_faddr_len > 0) { 3127 name = sti->sti_faddr_sa->sa_data; 3128 } 3129 3130 /* 3131 * Derived from enum values in Linux kernel source: 3132 * include/uapi/linux/net.h 3133 */ 3134 if ((so->so_state & SS_ISDISCONNECTING) != 0) { 3135 status = 4; 3136 } else if ((so->so_state & SS_ISCONNECTING) != 0) { 3137 status = 2; 3138 } else if ((so->so_state & SS_ISCONNECTED) != 0) { 3139 status = 3; 3140 } else { 3141 status = 1; 3142 /* Add ACC flag for stream-type server sockets */ 3143 if (so->so_type != SOCK_DGRAM && 3144 sti->sti_laddr_sa != NULL) 3145 flags |= 0x10000; 3146 } 3147 3148 /* Convert to Linux type */ 3149 switch (so->so_type) { 3150 case SOCK_DGRAM: 3151 type = 2; 3152 break; 3153 case SOCK_SEQPACKET: 3154 type = 5; 3155 break; 3156 default: 3157 type = 1; 3158 } 3159 3160 lxpr_uiobuf_printf(uiobuf, "%p: %08X %08X %08X %04X %02X %5llu", 3161 so, 3162 so->so_count, 3163 0, /* proto, always 0 */ 3164 flags, 3165 type, 3166 status, 3167 (ino_t)attr.va_nodeid); 3168 3169 /* 3170 * Due to shortcomings in the abstract socket emulation, they 3171 * cannot be properly represented here (as @<path>). 3172 * 3173 * This will be the case until they are better implemented. 3174 */ 3175 if (name != NULL) 3176 lxpr_uiobuf_printf(uiobuf, " %s\n", name); 3177 else 3178 lxpr_uiobuf_printf(uiobuf, "\n"); 3179 mutex_exit(&so->so_lock); 3180 } 3181 mutex_exit(&socklist.sl_lock); 3182 } 3183 3184 /* 3185 * lxpr_read_kmsg(): read the contents of the kernel message queue. We 3186 * translate this into the reception of console messages for this zone; each 3187 * read copies out a single zone console message, or blocks until the next one 3188 * is produced, unless we're open non-blocking, in which case we return after 3189 * 1ms. 3190 */ 3191 3192 #define LX_KMSG_PRI "<0>" 3193 3194 static void 3195 lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf, ldi_handle_t lh) 3196 { 3197 mblk_t *mp; 3198 timestruc_t to; 3199 timestruc_t *tp = NULL; 3200 3201 ASSERT(lxpnp->lxpr_type == LXPR_KMSG); 3202 3203 if (lxpr_uiobuf_nonblock(uiobuf)) { 3204 to.tv_sec = 0; 3205 to.tv_nsec = 1000000; /* 1msec */ 3206 tp = &to; 3207 } 3208 3209 if (ldi_getmsg(lh, &mp, tp) == 0) { 3210 /* 3211 * lx procfs doesn't like successive reads to the same file 3212 * descriptor unless we do an explicit rewind each time. 3213 */ 3214 lxpr_uiobuf_seek(uiobuf, 0); 3215 3216 lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI, 3217 mp->b_cont->b_rptr); 3218 3219 freemsg(mp); 3220 } 3221 } 3222 3223 /* 3224 * lxpr_read_loadavg(): read the contents of the "loadavg" file. We do just 3225 * enough for uptime and other simple lxproc readers to work 3226 */ 3227 extern int nthread; 3228 3229 static void 3230 lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3231 { 3232 ulong_t avenrun1; 3233 ulong_t avenrun5; 3234 ulong_t avenrun15; 3235 ulong_t avenrun1_cs; 3236 ulong_t avenrun5_cs; 3237 ulong_t avenrun15_cs; 3238 int loadavg[3]; 3239 int *loadbuf; 3240 cpupart_t *cp; 3241 zone_t *zone = LXPTOZ(lxpnp); 3242 3243 uint_t nrunnable = 0; 3244 rctl_qty_t nlwps; 3245 3246 ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG); 3247 3248 mutex_enter(&cpu_lock); 3249 3250 /* 3251 * Need to add up values over all CPU partitions. If pools are active, 3252 * only report the values of the zone's partition, which by definition 3253 * includes the current CPU. 3254 */ 3255 if (pool_pset_enabled()) { 3256 psetid_t psetid = zone_pset_get(curproc->p_zone); 3257 3258 ASSERT(curproc->p_zone != &zone0); 3259 cp = CPU->cpu_part; 3260 3261 nrunnable = cp->cp_nrunning + cp->cp_nrunnable; 3262 (void) cpupart_get_loadavg(psetid, &loadavg[0], 3); 3263 loadbuf = &loadavg[0]; 3264 } else { 3265 cp = cp_list_head; 3266 do { 3267 nrunnable += cp->cp_nrunning + cp->cp_nrunnable; 3268 } while ((cp = cp->cp_next) != cp_list_head); 3269 3270 loadbuf = zone == global_zone ? 3271 &avenrun[0] : zone->zone_avenrun; 3272 } 3273 3274 /* 3275 * If we're in the non-global zone, we'll report the total number of 3276 * LWPs in the zone for the "nproc" parameter of /proc/loadavg, 3277 * otherwise will just use nthread (which will include kernel threads, 3278 * but should be good enough for lxproc). 3279 */ 3280 nlwps = zone == global_zone ? nthread : zone->zone_nlwps; 3281 3282 mutex_exit(&cpu_lock); 3283 3284 avenrun1 = loadbuf[0] >> FSHIFT; 3285 avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT; 3286 avenrun5 = loadbuf[1] >> FSHIFT; 3287 avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT; 3288 avenrun15 = loadbuf[2] >> FSHIFT; 3289 avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT; 3290 3291 lxpr_uiobuf_printf(uiobuf, 3292 "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n", 3293 avenrun1, avenrun1_cs, 3294 avenrun5, avenrun5_cs, 3295 avenrun15, avenrun15_cs, 3296 nrunnable, nlwps, 0); 3297 } 3298 3299 /* 3300 * lxpr_read_meminfo(): read the contents of the "meminfo" file. 3301 */ 3302 static void 3303 lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3304 { 3305 zone_t *zone = LXPTOZ(lxpnp); 3306 int global = zone == global_zone; 3307 long total_mem, free_mem, total_swap, used_swap; 3308 3309 ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO); 3310 3311 if (global || zone->zone_phys_mem_ctl == UINT64_MAX) { 3312 total_mem = physmem * PAGESIZE; 3313 free_mem = freemem * PAGESIZE; 3314 } else { 3315 total_mem = zone->zone_phys_mem_ctl; 3316 free_mem = zone->zone_phys_mem_ctl - zone->zone_phys_mem; 3317 } 3318 3319 if (global || zone->zone_max_swap_ctl == UINT64_MAX) { 3320 total_swap = k_anoninfo.ani_max * PAGESIZE; 3321 used_swap = k_anoninfo.ani_phys_resv * PAGESIZE; 3322 } else { 3323 mutex_enter(&zone->zone_mem_lock); 3324 total_swap = zone->zone_max_swap_ctl; 3325 used_swap = zone->zone_max_swap; 3326 mutex_exit(&zone->zone_mem_lock); 3327 } 3328 3329 lxpr_uiobuf_printf(uiobuf, 3330 "MemTotal: %8lu kB\n" 3331 "MemFree: %8lu kB\n" 3332 "MemShared: %8u kB\n" 3333 "Buffers: %8u kB\n" 3334 "Cached: %8u kB\n" 3335 "SwapCached:%8u kB\n" 3336 "Active: %8u kB\n" 3337 "Inactive: %8u kB\n" 3338 "HighTotal: %8u kB\n" 3339 "HighFree: %8u kB\n" 3340 "LowTotal: %8u kB\n" 3341 "LowFree: %8u kB\n" 3342 "SwapTotal: %8lu kB\n" 3343 "SwapFree: %8lu kB\n", 3344 btok(total_mem), /* MemTotal */ 3345 btok(free_mem), /* MemFree */ 3346 0, /* MemShared */ 3347 0, /* Buffers */ 3348 0, /* Cached */ 3349 0, /* SwapCached */ 3350 0, /* Active */ 3351 0, /* Inactive */ 3352 0, /* HighTotal */ 3353 0, /* HighFree */ 3354 btok(total_mem), /* LowTotal */ 3355 btok(free_mem), /* LowFree */ 3356 btok(total_swap), /* SwapTotal */ 3357 btok(total_swap - used_swap)); /* SwapFree */ 3358 } 3359 3360 /* 3361 * lxpr_read_mounts(): 3362 */ 3363 /* ARGSUSED */ 3364 static void 3365 lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3366 { 3367 struct vfs *vfsp; 3368 struct vfs *vfslist; 3369 zone_t *zone = LXPTOZ(lxpnp); 3370 struct print_data { 3371 refstr_t *vfs_mntpt; 3372 refstr_t *vfs_resource; 3373 uint_t vfs_flag; 3374 int vfs_fstype; 3375 struct print_data *next; 3376 } *print_head = NULL; 3377 struct print_data **print_tail = &print_head; 3378 struct print_data *printp; 3379 3380 vfs_list_read_lock(); 3381 3382 if (zone == global_zone) { 3383 vfsp = vfslist = rootvfs; 3384 } else { 3385 vfsp = vfslist = zone->zone_vfslist; 3386 /* 3387 * If the zone has a root entry, it will be the first in 3388 * the list. If it doesn't, we conjure one up. 3389 */ 3390 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt), 3391 zone->zone_rootpath) != 0) { 3392 struct vfs *tvfsp; 3393 /* 3394 * The root of the zone is not a mount point. The vfs 3395 * we want to report is that of the zone's root vnode. 3396 */ 3397 tvfsp = zone->zone_rootvp->v_vfsp; 3398 3399 lxpr_uiobuf_printf(uiobuf, 3400 "/ / %s %s 0 0\n", 3401 vfssw[tvfsp->vfs_fstype].vsw_name, 3402 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); 3403 3404 } 3405 if (vfslist == NULL) { 3406 vfs_list_unlock(); 3407 return; 3408 } 3409 } 3410 3411 /* 3412 * Later on we have to do a lookupname, which can end up causing 3413 * another vfs_list_read_lock() to be called. Which can lead to a 3414 * deadlock. To avoid this, we extract the data we need into a local 3415 * list, then we can run this list without holding vfs_list_read_lock() 3416 * We keep the list in the same order as the vfs_list 3417 */ 3418 do { 3419 /* Skip mounts we shouldn't show */ 3420 if (vfsp->vfs_flag & VFS_NOMNTTAB) { 3421 goto nextfs; 3422 } 3423 3424 printp = kmem_alloc(sizeof (*printp), KM_SLEEP); 3425 refstr_hold(vfsp->vfs_mntpt); 3426 printp->vfs_mntpt = vfsp->vfs_mntpt; 3427 refstr_hold(vfsp->vfs_resource); 3428 printp->vfs_resource = vfsp->vfs_resource; 3429 printp->vfs_flag = vfsp->vfs_flag; 3430 printp->vfs_fstype = vfsp->vfs_fstype; 3431 printp->next = NULL; 3432 3433 *print_tail = printp; 3434 print_tail = &printp->next; 3435 3436 nextfs: 3437 vfsp = (zone == global_zone) ? 3438 vfsp->vfs_next : vfsp->vfs_zone_next; 3439 3440 } while (vfsp != vfslist); 3441 3442 vfs_list_unlock(); 3443 3444 /* 3445 * now we can run through what we've extracted without holding 3446 * vfs_list_read_lock() 3447 */ 3448 printp = print_head; 3449 while (printp != NULL) { 3450 struct print_data *printp_next; 3451 const char *resource; 3452 char *mntpt; 3453 struct vnode *vp; 3454 int error; 3455 3456 mntpt = (char *)refstr_value(printp->vfs_mntpt); 3457 resource = refstr_value(printp->vfs_resource); 3458 3459 if (mntpt != NULL && mntpt[0] != '\0') 3460 mntpt = ZONE_PATH_TRANSLATE(mntpt, zone); 3461 else 3462 mntpt = "-"; 3463 3464 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 3465 3466 if (error != 0) 3467 goto nextp; 3468 3469 if (!(vp->v_flag & VROOT)) { 3470 VN_RELE(vp); 3471 goto nextp; 3472 } 3473 VN_RELE(vp); 3474 3475 if (resource != NULL && resource[0] != '\0') { 3476 if (resource[0] == '/') { 3477 resource = ZONE_PATH_VISIBLE(resource, zone) ? 3478 ZONE_PATH_TRANSLATE(resource, zone) : 3479 mntpt; 3480 } 3481 } else { 3482 resource = "-"; 3483 } 3484 3485 lxpr_uiobuf_printf(uiobuf, 3486 "%s %s %s %s 0 0\n", 3487 resource, mntpt, vfssw[printp->vfs_fstype].vsw_name, 3488 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw"); 3489 3490 nextp: 3491 printp_next = printp->next; 3492 refstr_rele(printp->vfs_mntpt); 3493 refstr_rele(printp->vfs_resource); 3494 kmem_free(printp, sizeof (*printp)); 3495 printp = printp_next; 3496 3497 } 3498 } 3499 3500 /* 3501 * lxpr_read_partitions(): 3502 * 3503 * Over the years, /proc/partitions has been made considerably smaller -- to 3504 * the point that it really is only major number, minor number, number of 3505 * blocks (which we report as 0), and partition name. 3506 * 3507 * We support this because some things want to see it to make sense of 3508 * /proc/diskstats, and also because "fdisk -l" and a few other things look 3509 * here to find all disks on the system. 3510 */ 3511 /* ARGSUSED */ 3512 static void 3513 lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3514 { 3515 3516 kstat_t *ksr; 3517 kstat_t ks0; 3518 int nidx, num, i; 3519 size_t sidx, size; 3520 zfs_cmd_t *zc; 3521 nvlist_t *nv = NULL; 3522 nvpair_t *elem = NULL; 3523 lxpr_mnt_t *mnt; 3524 lxpr_zfs_iter_t zfsi; 3525 3526 ASSERT(lxpnp->lxpr_type == LXPR_PARTITIONS); 3527 3528 ks0.ks_kid = 0; 3529 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); 3530 3531 if (ksr == NULL) 3532 return; 3533 3534 lxpr_uiobuf_printf(uiobuf, "major minor #blocks name\n\n"); 3535 3536 for (i = 1; i < nidx; i++) { 3537 kstat_t *ksp = &ksr[i]; 3538 kstat_io_t *kip; 3539 3540 if (ksp->ks_type != KSTAT_TYPE_IO || 3541 strcmp(ksp->ks_class, "disk") != 0) 3542 continue; 3543 3544 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE, 3545 &size, &num)) == NULL) 3546 continue; 3547 3548 if (size < sizeof (kstat_io_t)) { 3549 kmem_free(kip, size); 3550 continue; 3551 } 3552 3553 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d %s\n", 3554 mod_name_to_major(ksp->ks_module), 3555 ksp->ks_instance, 0, ksp->ks_name); 3556 3557 kmem_free(kip, size); 3558 } 3559 3560 kmem_free(ksr, sidx); 3561 3562 /* If we never got to open the zfs LDI, then stop now. */ 3563 mnt = (lxpr_mnt_t *)lxpnp->lxpr_vnode->v_vfsp->vfs_data; 3564 if (mnt->lxprm_zfs_isopen == B_FALSE) 3565 return; 3566 3567 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); 3568 3569 if (lxpr_zfs_list_pools(mnt, zc, &nv) != 0) 3570 goto out; 3571 3572 while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) { 3573 char *pool = nvpair_name(elem); 3574 3575 bzero(&zfsi, sizeof (lxpr_zfs_iter_t)); 3576 while (lxpr_zfs_next_zvol(mnt, pool, zc, &zfsi) == 0) { 3577 major_t major; 3578 minor_t minor; 3579 if (lxpr_zvol_dev(mnt, zc->zc_name, &major, &minor) 3580 != 0) 3581 continue; 3582 3583 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d zvol/dsk/%s\n", 3584 major, minor, 0, zc->zc_name); 3585 } 3586 } 3587 3588 nvlist_free(nv); 3589 out: 3590 kmem_free(zc, sizeof (zfs_cmd_t)); 3591 } 3592 3593 /* 3594 * lxpr_read_diskstats(): 3595 * 3596 * See the block comment above the per-device output-generating line for the 3597 * details of the format. 3598 */ 3599 /* ARGSUSED */ 3600 static void 3601 lxpr_read_diskstats(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3602 { 3603 kstat_t *ksr; 3604 kstat_t ks0; 3605 int nidx, num, i; 3606 size_t sidx, size; 3607 3608 ASSERT(lxpnp->lxpr_type == LXPR_DISKSTATS); 3609 3610 ks0.ks_kid = 0; 3611 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx); 3612 3613 if (ksr == NULL) 3614 return; 3615 3616 for (i = 1; i < nidx; i++) { 3617 kstat_t *ksp = &ksr[i]; 3618 kstat_io_t *kip; 3619 3620 if (ksp->ks_type != KSTAT_TYPE_IO || 3621 strcmp(ksp->ks_class, "disk") != 0) 3622 continue; 3623 3624 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE, 3625 &size, &num)) == NULL) 3626 continue; 3627 3628 if (size < sizeof (kstat_io_t)) { 3629 kmem_free(kip, size); 3630 continue; 3631 } 3632 3633 /* 3634 * /proc/diskstats is defined to have one line of output for 3635 * each block device, with each line containing the following 3636 * 14 fields: 3637 * 3638 * 1 - major number 3639 * 2 - minor mumber 3640 * 3 - device name 3641 * 4 - reads completed successfully 3642 * 5 - reads merged 3643 * 6 - sectors read 3644 * 7 - time spent reading (ms) 3645 * 8 - writes completed 3646 * 9 - writes merged 3647 * 10 - sectors written 3648 * 11 - time spent writing (ms) 3649 * 12 - I/Os currently in progress 3650 * 13 - time spent doing I/Os (ms) 3651 * 14 - weighted time spent doing I/Os (ms) 3652 * 3653 * One small hiccup: we don't actually keep track of time 3654 * spent reading vs. time spent writing -- we keep track of 3655 * time waiting vs. time actually performing I/O. While we 3656 * could divide the total time by the I/O mix (making the 3657 * obviously wrong assumption that I/O operations all take the 3658 * same amount of time), this has the undesirable side-effect 3659 * of moving backwards. Instead, we report the total time 3660 * (read + write) for all three stats (read, write, total). 3661 * This is also a lie of sorts, but it should be more 3662 * immediately clear to the user that reads and writes are 3663 * each being double-counted as the other. 3664 */ 3665 lxpr_uiobuf_printf(uiobuf, "%4d %7d %s " 3666 "%llu %llu %llu %llu " 3667 "%llu %llu %llu %llu " 3668 "%llu %llu %llu\n", 3669 mod_name_to_major(ksp->ks_module), 3670 ksp->ks_instance, ksp->ks_name, 3671 (uint64_t)kip->reads, 0LL, 3672 kip->nread / (uint64_t)LXPR_SECTOR_SIZE, 3673 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC), 3674 (uint64_t)kip->writes, 0LL, 3675 kip->nwritten / (uint64_t)LXPR_SECTOR_SIZE, 3676 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC), 3677 (uint64_t)(kip->rcnt + kip->wcnt), 3678 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC), 3679 (kip->rlentime + kip->wlentime) / 3680 (uint64_t)(NANOSEC / MILLISEC)); 3681 3682 kmem_free(kip, size); 3683 } 3684 3685 kmem_free(ksr, sidx); 3686 } 3687 3688 /* 3689 * lxpr_read_version(): read the contents of the "version" file. 3690 */ 3691 /* ARGSUSED */ 3692 static void 3693 lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3694 { 3695 lx_zone_data_t *lxzd = ztolxzd(LXPTOZ(lxpnp)); 3696 lx_proc_data_t *lxpd = ptolxproc(curproc); 3697 const char *release = lxzd->lxzd_kernel_release; 3698 const char *version = lxzd->lxzd_kernel_version; 3699 3700 /* Use per-process overrides, if specified */ 3701 if (lxpd != NULL && lxpd->l_uname_release[0] != '\0') { 3702 release = lxpd->l_uname_release; 3703 } 3704 if (lxpd != NULL && lxpd->l_uname_version[0] != '\0') { 3705 version = lxpd->l_uname_version; 3706 } 3707 3708 lxpr_uiobuf_printf(uiobuf, 3709 "%s version %s (%s version %d.%d.%d) %s\n", 3710 LX_UNAME_SYSNAME, release, 3711 #if defined(__GNUC__) 3712 "gcc", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__, 3713 #else 3714 "cc", 1, 0, 0, 3715 #endif 3716 version); 3717 } 3718 3719 /* 3720 * lxpr_read_stat(): read the contents of the "stat" file. 3721 * 3722 */ 3723 /* ARGSUSED */ 3724 static void 3725 lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3726 { 3727 cpu_t *cp, *cpstart; 3728 int pools_enabled; 3729 ulong_t idle_cum = 0; 3730 ulong_t sys_cum = 0; 3731 ulong_t user_cum = 0; 3732 ulong_t irq_cum = 0; 3733 ulong_t cpu_nrunnable_cum = 0; 3734 ulong_t w_io_cum = 0; 3735 3736 ulong_t pgpgin_cum = 0; 3737 ulong_t pgpgout_cum = 0; 3738 ulong_t pgswapout_cum = 0; 3739 ulong_t pgswapin_cum = 0; 3740 ulong_t intr_cum = 0; 3741 ulong_t pswitch_cum = 0; 3742 ulong_t forks_cum = 0; 3743 hrtime_t msnsecs[NCMSTATES]; 3744 /* is the emulated release > 2.4 */ 3745 boolean_t newer_than24 = lx_kern_release_cmp(LXPTOZ(lxpnp), "2.4") > 0; 3746 /* temporary variable since scalehrtime modifies data in place */ 3747 hrtime_t tmptime; 3748 3749 ASSERT(lxpnp->lxpr_type == LXPR_STAT); 3750 3751 mutex_enter(&cpu_lock); 3752 pools_enabled = pool_pset_enabled(); 3753 3754 /* Calculate cumulative stats */ 3755 cp = cpstart = CPU->cpu_part->cp_cpulist; 3756 do { 3757 int i; 3758 3759 /* 3760 * Don't count CPUs that aren't even in the system 3761 * or aren't up yet. 3762 */ 3763 if ((cp->cpu_flags & CPU_EXISTS) == 0) { 3764 continue; 3765 } 3766 3767 get_cpu_mstate(cp, msnsecs); 3768 3769 idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]); 3770 sys_cum += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); 3771 user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]); 3772 3773 pgpgin_cum += CPU_STATS(cp, vm.pgpgin); 3774 pgpgout_cum += CPU_STATS(cp, vm.pgpgout); 3775 pgswapin_cum += CPU_STATS(cp, vm.pgswapin); 3776 pgswapout_cum += CPU_STATS(cp, vm.pgswapout); 3777 3778 3779 if (newer_than24) { 3780 cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable; 3781 w_io_cum += CPU_STATS(cp, sys.iowait); 3782 for (i = 0; i < NCMSTATES; i++) { 3783 tmptime = cp->cpu_intracct[i]; 3784 scalehrtime(&tmptime); 3785 irq_cum += NSEC_TO_TICK(tmptime); 3786 } 3787 } 3788 3789 for (i = 0; i < PIL_MAX; i++) 3790 intr_cum += CPU_STATS(cp, sys.intr[i]); 3791 3792 pswitch_cum += CPU_STATS(cp, sys.pswitch); 3793 forks_cum += CPU_STATS(cp, sys.sysfork); 3794 forks_cum += CPU_STATS(cp, sys.sysvfork); 3795 3796 if (pools_enabled) 3797 cp = cp->cpu_next_part; 3798 else 3799 cp = cp->cpu_next; 3800 } while (cp != cpstart); 3801 3802 if (newer_than24) { 3803 lxpr_uiobuf_printf(uiobuf, 3804 "cpu %lu %lu %lu %lu %lu %lu %lu\n", 3805 user_cum, 0L, sys_cum, idle_cum, 0L, irq_cum, 0L); 3806 } else { 3807 lxpr_uiobuf_printf(uiobuf, 3808 "cpu %lu %lu %lu %lu\n", 3809 user_cum, 0L, sys_cum, idle_cum); 3810 } 3811 3812 /* Do per processor stats */ 3813 do { 3814 int i; 3815 3816 ulong_t idle_ticks; 3817 ulong_t sys_ticks; 3818 ulong_t user_ticks; 3819 ulong_t irq_ticks = 0; 3820 3821 /* 3822 * Don't count CPUs that aren't even in the system 3823 * or aren't up yet. 3824 */ 3825 if ((cp->cpu_flags & CPU_EXISTS) == 0) { 3826 continue; 3827 } 3828 3829 get_cpu_mstate(cp, msnsecs); 3830 3831 idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]); 3832 sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); 3833 user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]); 3834 3835 for (i = 0; i < NCMSTATES; i++) { 3836 tmptime = cp->cpu_intracct[i]; 3837 scalehrtime(&tmptime); 3838 irq_ticks += NSEC_TO_TICK(tmptime); 3839 } 3840 3841 if (newer_than24) { 3842 lxpr_uiobuf_printf(uiobuf, 3843 "cpu%d %lu %lu %lu %lu %lu %lu %lu\n", 3844 cp->cpu_id, user_ticks, 0L, sys_ticks, idle_ticks, 3845 0L, irq_ticks, 0L); 3846 } else { 3847 lxpr_uiobuf_printf(uiobuf, 3848 "cpu%d %lu %lu %lu %lu\n", 3849 cp->cpu_id, 3850 user_ticks, 0L, sys_ticks, idle_ticks); 3851 } 3852 3853 if (pools_enabled) 3854 cp = cp->cpu_next_part; 3855 else 3856 cp = cp->cpu_next; 3857 } while (cp != cpstart); 3858 3859 mutex_exit(&cpu_lock); 3860 3861 if (newer_than24) { 3862 lxpr_uiobuf_printf(uiobuf, 3863 "page %lu %lu\n" 3864 "swap %lu %lu\n" 3865 "intr %lu\n" 3866 "ctxt %lu\n" 3867 "btime %lu\n" 3868 "processes %lu\n" 3869 "procs_running %lu\n" 3870 "procs_blocked %lu\n", 3871 pgpgin_cum, pgpgout_cum, 3872 pgswapin_cum, pgswapout_cum, 3873 intr_cum, 3874 pswitch_cum, 3875 boot_time, 3876 forks_cum, 3877 cpu_nrunnable_cum, 3878 w_io_cum); 3879 } else { 3880 lxpr_uiobuf_printf(uiobuf, 3881 "page %lu %lu\n" 3882 "swap %lu %lu\n" 3883 "intr %lu\n" 3884 "ctxt %lu\n" 3885 "btime %lu\n" 3886 "processes %lu\n", 3887 pgpgin_cum, pgpgout_cum, 3888 pgswapin_cum, pgswapout_cum, 3889 intr_cum, 3890 pswitch_cum, 3891 boot_time, 3892 forks_cum); 3893 } 3894 } 3895 3896 /* 3897 * lxpr_read_swaps(): 3898 * 3899 * We don't support swap files or partitions, but some programs like to look 3900 * here just to check we have some swap on the system, so we lie and show 3901 * our entire swap cap as one swap partition. 3902 */ 3903 /* ARGSUSED */ 3904 static void 3905 lxpr_read_swaps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3906 { 3907 zone_t *zone = curzone; 3908 uint64_t totswap, usedswap; 3909 3910 mutex_enter(&zone->zone_mem_lock); 3911 /* Uses units of 1 kb (2^10). */ 3912 totswap = zone->zone_max_swap_ctl >> 10; 3913 usedswap = zone->zone_max_swap >> 10; 3914 mutex_exit(&zone->zone_mem_lock); 3915 3916 lxpr_uiobuf_printf(uiobuf, 3917 "Filename " 3918 "Type Size Used Priority\n"); 3919 lxpr_uiobuf_printf(uiobuf, "%-40s%-16s%-8llu%-8llu%-8d\n", 3920 "/dev/swap", "partition", totswap, usedswap, -1); 3921 } 3922 3923 /* 3924 * inotify tunables exported via /proc. 3925 */ 3926 extern int inotify_maxevents; 3927 extern int inotify_maxinstances; 3928 extern int inotify_maxwatches; 3929 3930 static void 3931 lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *lxpnp, 3932 lxpr_uiobuf_t *uiobuf) 3933 { 3934 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS); 3935 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxevents); 3936 } 3937 3938 static void 3939 lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *lxpnp, 3940 lxpr_uiobuf_t *uiobuf) 3941 { 3942 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES); 3943 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxinstances); 3944 } 3945 3946 static void 3947 lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *lxpnp, 3948 lxpr_uiobuf_t *uiobuf) 3949 { 3950 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES); 3951 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxwatches); 3952 } 3953 3954 static void 3955 lxpr_read_sys_kernel_caplcap(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3956 { 3957 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_CAPLCAP); 3958 lxpr_uiobuf_printf(uiobuf, "%d\n", LX_CAP_MAX_VALID); 3959 } 3960 3961 static void 3962 lxpr_read_sys_kernel_corepatt(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 3963 { 3964 zone_t *zone = curproc->p_zone; 3965 struct core_globals *cg; 3966 refstr_t *rp; 3967 corectl_path_t *ccp; 3968 char tr[MAXPATHLEN]; 3969 3970 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT); 3971 3972 cg = zone_getspecific(core_zone_key, zone); 3973 ASSERT(cg != NULL); 3974 3975 /* If core dumps are disabled, return an empty string. */ 3976 if ((cg->core_options & CC_PROCESS_PATH) == 0) { 3977 lxpr_uiobuf_printf(uiobuf, "\n"); 3978 return; 3979 } 3980 3981 ccp = cg->core_default_path; 3982 mutex_enter(&ccp->ccp_mtx); 3983 if ((rp = ccp->ccp_path) != NULL) 3984 refstr_hold(rp); 3985 mutex_exit(&ccp->ccp_mtx); 3986 3987 if (rp == NULL) { 3988 lxpr_uiobuf_printf(uiobuf, "\n"); 3989 return; 3990 } 3991 3992 bzero(tr, sizeof (tr)); 3993 if (lxpr_core_path_s2l(refstr_value(rp), tr, sizeof (tr)) != 0) { 3994 refstr_rele(rp); 3995 lxpr_uiobuf_printf(uiobuf, "\n"); 3996 return; 3997 } 3998 3999 refstr_rele(rp); 4000 lxpr_uiobuf_printf(uiobuf, "%s\n", tr); 4001 } 4002 4003 static void 4004 lxpr_read_sys_kernel_hostname(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4005 { 4006 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_HOSTNAME); 4007 lxpr_uiobuf_printf(uiobuf, "%s\n", uts_nodename()); 4008 } 4009 4010 static void 4011 lxpr_read_sys_kernel_msgmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4012 { 4013 rctl_qty_t val; 4014 4015 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_MSGMNI); 4016 4017 mutex_enter(&curproc->p_lock); 4018 val = rctl_enforced_value(rc_zone_msgmni, 4019 curproc->p_zone->zone_rctls, curproc); 4020 mutex_exit(&curproc->p_lock); 4021 4022 lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val); 4023 } 4024 4025 static void 4026 lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4027 { 4028 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_NGROUPS_MAX); 4029 lxpr_uiobuf_printf(uiobuf, "%d\n", ngroups_max); 4030 } 4031 4032 static void 4033 lxpr_read_sys_kernel_osrel(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4034 { 4035 lx_zone_data_t *br_data; 4036 4037 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_OSREL); 4038 br_data = ztolxzd(curproc->p_zone); 4039 if (curproc->p_zone->zone_brand == &lx_brand) { 4040 lxpr_uiobuf_printf(uiobuf, "%s\n", 4041 br_data->lxzd_kernel_version); 4042 } else { 4043 lxpr_uiobuf_printf(uiobuf, "\n"); 4044 } 4045 } 4046 4047 static void 4048 lxpr_read_sys_kernel_pid_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4049 { 4050 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_PID_MAX); 4051 lxpr_uiobuf_printf(uiobuf, "%d\n", maxpid); 4052 } 4053 4054 static void 4055 lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4056 { 4057 /* 4058 * This file isn't documented on the Linux proc(5) man page but 4059 * according to the blog of the author of systemd/journald (the 4060 * consumer), he says: 4061 * boot_id: A random ID that is regenerated on each boot. As such it 4062 * can be used to identify the local machine's current boot. It's 4063 * universally available on any recent Linux kernel. It's a good and 4064 * safe choice if you need to identify a specific boot on a specific 4065 * booted kernel. 4066 * 4067 * We'll just generate a random ID if necessary. On Linux the format 4068 * appears to resemble a uuid but since it is not documented to be a 4069 * uuid, we don't worry about that. 4070 */ 4071 lx_zone_data_t *br_data; 4072 4073 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RAND_BOOTID); 4074 4075 if (curproc->p_zone->zone_brand != &lx_brand) { 4076 lxpr_uiobuf_printf(uiobuf, "0\n"); 4077 return; 4078 } 4079 4080 br_data = ztolxzd(curproc->p_zone); 4081 if (br_data->lxzd_bootid[0] == '\0') { 4082 extern int getrandom(void *, size_t, int); 4083 int i; 4084 4085 for (i = 0; i < 5; i++) { 4086 u_longlong_t n; 4087 char s[32]; 4088 4089 (void) random_get_bytes((uint8_t *)&n, sizeof (n)); 4090 switch (i) { 4091 case 0: (void) snprintf(s, sizeof (s), "%08llx", n); 4092 s[8] = '\0'; 4093 break; 4094 case 4: (void) snprintf(s, sizeof (s), "%012llx", n); 4095 s[12] = '\0'; 4096 break; 4097 default: (void) snprintf(s, sizeof (s), "%04llx", n); 4098 s[4] = '\0'; 4099 break; 4100 } 4101 if (i > 0) 4102 strlcat(br_data->lxzd_bootid, "-", 4103 sizeof (br_data->lxzd_bootid)); 4104 strlcat(br_data->lxzd_bootid, s, 4105 sizeof (br_data->lxzd_bootid)); 4106 } 4107 } 4108 4109 lxpr_uiobuf_printf(uiobuf, "%s\n", br_data->lxzd_bootid); 4110 } 4111 4112 static void 4113 lxpr_read_sys_kernel_shmmax(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4114 { 4115 rctl_qty_t val; 4116 4117 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMAX); 4118 4119 mutex_enter(&curproc->p_lock); 4120 val = rctl_enforced_value(rc_zone_shmmax, 4121 curproc->p_zone->zone_rctls, curproc); 4122 mutex_exit(&curproc->p_lock); 4123 4124 if (val > FOURGB) 4125 val = FOURGB; 4126 4127 lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val); 4128 } 4129 4130 static void 4131 lxpr_read_sys_kernel_threads_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4132 { 4133 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_THREADS_MAX); 4134 lxpr_uiobuf_printf(uiobuf, "%d\n", curproc->p_zone->zone_nlwps_ctl); 4135 } 4136 4137 static void 4138 lxpr_read_sys_net_core_somaxc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4139 { 4140 netstack_t *ns; 4141 tcp_stack_t *tcps; 4142 4143 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON); 4144 4145 ns = netstack_get_current(); 4146 if (ns == NULL) { 4147 lxpr_uiobuf_printf(uiobuf, "%d\n", SOMAXCONN); 4148 return; 4149 } 4150 4151 tcps = ns->netstack_tcp; 4152 lxpr_uiobuf_printf(uiobuf, "%d\n", tcps->tcps_conn_req_max_q); 4153 netstack_rele(ns); 4154 } 4155 4156 static void 4157 lxpr_read_sys_vm_minfr_kb(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4158 { 4159 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_MINFR_KB); 4160 lxpr_uiobuf_printf(uiobuf, "%d\n", 0); 4161 } 4162 4163 static void 4164 lxpr_read_sys_vm_nhpages(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4165 { 4166 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_NHUGEP); 4167 lxpr_uiobuf_printf(uiobuf, "%d\n", 0); 4168 } 4169 4170 static void 4171 lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4172 { 4173 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_OVERCOMMIT_MEM); 4174 lxpr_uiobuf_printf(uiobuf, "%d\n", 0); 4175 } 4176 4177 static void 4178 lxpr_read_sys_vm_swappiness(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4179 { 4180 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_SWAPPINESS); 4181 lxpr_uiobuf_printf(uiobuf, "%d\n", 0); 4182 } 4183 4184 /* 4185 * lxpr_read_uptime(): read the contents of the "uptime" file. 4186 * 4187 * format is: "%.2lf, %.2lf",uptime_secs, idle_secs 4188 * Use fixed point arithmetic to get 2 decimal places 4189 */ 4190 /* ARGSUSED */ 4191 static void 4192 lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4193 { 4194 cpu_t *cp, *cpstart; 4195 int pools_enabled; 4196 ulong_t idle_cum = 0; 4197 ulong_t cpu_count = 0; 4198 ulong_t idle_s; 4199 ulong_t idle_cs; 4200 ulong_t up_s; 4201 ulong_t up_cs; 4202 hrtime_t birthtime; 4203 hrtime_t centi_sec = 10000000; /* 10^7 */ 4204 4205 ASSERT(lxpnp->lxpr_type == LXPR_UPTIME); 4206 4207 /* Calculate cumulative stats */ 4208 mutex_enter(&cpu_lock); 4209 pools_enabled = pool_pset_enabled(); 4210 4211 cp = cpstart = CPU->cpu_part->cp_cpulist; 4212 do { 4213 /* 4214 * Don't count CPUs that aren't even in the system 4215 * or aren't up yet. 4216 */ 4217 if ((cp->cpu_flags & CPU_EXISTS) == 0) { 4218 continue; 4219 } 4220 4221 idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle); 4222 idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait); 4223 cpu_count += 1; 4224 4225 if (pools_enabled) 4226 cp = cp->cpu_next_part; 4227 else 4228 cp = cp->cpu_next; 4229 } while (cp != cpstart); 4230 mutex_exit(&cpu_lock); 4231 4232 /* Getting the Zone zsched process startup time */ 4233 birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart; 4234 up_cs = (gethrtime() - birthtime) / centi_sec; 4235 up_s = up_cs / 100; 4236 up_cs %= 100; 4237 4238 ASSERT(cpu_count > 0); 4239 idle_cum /= cpu_count; 4240 idle_s = idle_cum / hz; 4241 idle_cs = idle_cum % hz; 4242 idle_cs *= 100; 4243 idle_cs /= hz; 4244 4245 lxpr_uiobuf_printf(uiobuf, 4246 "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs); 4247 } 4248 4249 static const char *amd_x_edx[] = { 4250 NULL, NULL, NULL, NULL, 4251 NULL, NULL, NULL, NULL, 4252 NULL, NULL, NULL, "syscall", 4253 NULL, NULL, NULL, NULL, 4254 NULL, NULL, NULL, "mp", 4255 "nx", NULL, "mmxext", NULL, 4256 NULL, NULL, NULL, NULL, 4257 NULL, "lm", "3dnowext", "3dnow" 4258 }; 4259 4260 static const char *amd_x_ecx[] = { 4261 "lahf_lm", NULL, "svm", NULL, 4262 "altmovcr8" 4263 }; 4264 4265 static const char *tm_x_edx[] = { 4266 "recovery", "longrun", NULL, "lrti" 4267 }; 4268 4269 /* 4270 * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx." 4271 */ 4272 static const char *intc_x_edx[] = { 4273 NULL, NULL, NULL, NULL, 4274 NULL, NULL, NULL, NULL, 4275 NULL, NULL, NULL, "syscall", 4276 NULL, NULL, NULL, NULL, 4277 NULL, NULL, NULL, NULL, 4278 "nx", NULL, NULL, NULL, 4279 NULL, NULL, NULL, NULL, 4280 NULL, "lm", NULL, NULL 4281 }; 4282 4283 static const char *intc_edx[] = { 4284 "fpu", "vme", "de", "pse", 4285 "tsc", "msr", "pae", "mce", 4286 "cx8", "apic", NULL, "sep", 4287 "mtrr", "pge", "mca", "cmov", 4288 "pat", "pse36", "pn", "clflush", 4289 NULL, "dts", "acpi", "mmx", 4290 "fxsr", "sse", "sse2", "ss", 4291 "ht", "tm", "ia64", "pbe" 4292 }; 4293 4294 /* 4295 * "sse3" on linux is called "pni" (Prescott New Instructions). 4296 */ 4297 static const char *intc_ecx[] = { 4298 "pni", NULL, NULL, "monitor", 4299 "ds_cpl", NULL, NULL, "est", 4300 "tm2", NULL, "cid", NULL, 4301 NULL, "cx16", "xtpr" 4302 }; 4303 4304 /* 4305 * Report a list of each cgroup subsystem supported by our emulated cgroup fs. 4306 * This needs to exist for systemd to run but for now we don't report any 4307 * cgroup subsystems as being installed. The commented example below shows 4308 * how to print a subsystem entry. 4309 */ 4310 static void 4311 lxpr_read_cgroups(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4312 { 4313 lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n", 4314 "#subsys_name", "hierarchy", "num_cgroups", "enabled"); 4315 4316 /* 4317 * lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n", 4318 * "cpu,cpuacct", "2", "1", "1"); 4319 */ 4320 } 4321 4322 static void 4323 lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4324 { 4325 int i; 4326 uint32_t bits; 4327 cpu_t *cp, *cpstart; 4328 int pools_enabled; 4329 const char **fp; 4330 char brandstr[CPU_IDSTRLEN]; 4331 struct cpuid_regs cpr; 4332 int maxeax; 4333 int std_ecx, std_edx, ext_ecx, ext_edx; 4334 4335 ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO); 4336 4337 mutex_enter(&cpu_lock); 4338 pools_enabled = pool_pset_enabled(); 4339 4340 cp = cpstart = CPU->cpu_part->cp_cpulist; 4341 do { 4342 /* 4343 * This returns the maximum eax value for standard cpuid 4344 * functions in eax. 4345 */ 4346 cpr.cp_eax = 0; 4347 (void) cpuid_insn(cp, &cpr); 4348 maxeax = cpr.cp_eax; 4349 4350 /* 4351 * Get standard x86 feature flags. 4352 */ 4353 cpr.cp_eax = 1; 4354 (void) cpuid_insn(cp, &cpr); 4355 std_ecx = cpr.cp_ecx; 4356 std_edx = cpr.cp_edx; 4357 4358 /* 4359 * Now get extended feature flags. 4360 */ 4361 cpr.cp_eax = 0x80000001; 4362 (void) cpuid_insn(cp, &cpr); 4363 ext_ecx = cpr.cp_ecx; 4364 ext_edx = cpr.cp_edx; 4365 4366 (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN); 4367 4368 lxpr_uiobuf_printf(uiobuf, 4369 "processor\t: %d\n" 4370 "vendor_id\t: %s\n" 4371 "cpu family\t: %d\n" 4372 "model\t\t: %d\n" 4373 "model name\t: %s\n" 4374 "stepping\t: %d\n" 4375 "cpu MHz\t\t: %u.%03u\n", 4376 cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp), 4377 cpuid_getmodel(cp), brandstr, cpuid_getstep(cp), 4378 (uint32_t)(cpu_freq_hz / 1000000), 4379 ((uint32_t)(cpu_freq_hz / 1000)) % 1000); 4380 4381 lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n", 4382 getl2cacheinfo(cp, NULL, NULL, NULL) / 1024); 4383 4384 if (is_x86_feature(x86_featureset, X86FSET_HTT)) { 4385 /* 4386 * 'siblings' is used for HT-style threads 4387 */ 4388 lxpr_uiobuf_printf(uiobuf, 4389 "physical id\t: %lu\n" 4390 "siblings\t: %u\n", 4391 pg_plat_hw_instance_id(cp, PGHW_CHIP), 4392 cpuid_get_ncpu_per_chip(cp)); 4393 } 4394 4395 /* 4396 * Since we're relatively picky about running on older hardware, 4397 * we can be somewhat cavalier about the answers to these ones. 4398 * 4399 * In fact, given the hardware we support, we just say: 4400 * 4401 * fdiv_bug : no (if we're on a 64-bit kernel) 4402 * hlt_bug : no 4403 * f00f_bug : no 4404 * coma_bug : no 4405 * wp : yes (write protect in supervsr mode) 4406 */ 4407 lxpr_uiobuf_printf(uiobuf, 4408 "fdiv_bug\t: %s\n" 4409 "hlt_bug \t: no\n" 4410 "f00f_bug\t: no\n" 4411 "coma_bug\t: no\n" 4412 "fpu\t\t: %s\n" 4413 "fpu_exception\t: %s\n" 4414 "cpuid level\t: %d\n" 4415 "flags\t\t:", 4416 #if defined(__i386) 4417 fpu_pentium_fdivbug ? "yes" : "no", 4418 #else 4419 "no", 4420 #endif /* __i386 */ 4421 fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no", 4422 maxeax); 4423 4424 for (bits = std_edx, fp = intc_edx, i = 0; 4425 i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++) 4426 if ((bits & (1 << i)) != 0 && *fp) 4427 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4428 4429 /* 4430 * name additional features where appropriate 4431 */ 4432 switch (x86_vendor) { 4433 case X86_VENDOR_Intel: 4434 for (bits = ext_edx, fp = intc_x_edx, i = 0; 4435 i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]); 4436 fp++, i++) 4437 if ((bits & (1 << i)) != 0 && *fp) 4438 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4439 break; 4440 4441 case X86_VENDOR_AMD: 4442 for (bits = ext_edx, fp = amd_x_edx, i = 0; 4443 i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]); 4444 fp++, i++) 4445 if ((bits & (1 << i)) != 0 && *fp) 4446 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4447 4448 for (bits = ext_ecx, fp = amd_x_ecx, i = 0; 4449 i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]); 4450 fp++, i++) 4451 if ((bits & (1 << i)) != 0 && *fp) 4452 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4453 break; 4454 4455 case X86_VENDOR_TM: 4456 for (bits = ext_edx, fp = tm_x_edx, i = 0; 4457 i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]); 4458 fp++, i++) 4459 if ((bits & (1 << i)) != 0 && *fp) 4460 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4461 break; 4462 default: 4463 break; 4464 } 4465 4466 for (bits = std_ecx, fp = intc_ecx, i = 0; 4467 i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++) 4468 if ((bits & (1 << i)) != 0 && *fp) 4469 lxpr_uiobuf_printf(uiobuf, " %s", *fp); 4470 4471 lxpr_uiobuf_printf(uiobuf, "\n\n"); 4472 4473 if (pools_enabled) 4474 cp = cp->cpu_next_part; 4475 else 4476 cp = cp->cpu_next; 4477 } while (cp != cpstart); 4478 4479 mutex_exit(&cpu_lock); 4480 } 4481 4482 /* ARGSUSED */ 4483 static void 4484 lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4485 { 4486 ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD); 4487 lxpr_uiobuf_seterr(uiobuf, EFAULT); 4488 } 4489 4490 /* 4491 * Report a list of file systems loaded in the kernel. We only report the ones 4492 * which we support and which may be checked by various components to see if 4493 * they are loaded. 4494 */ 4495 static void 4496 lxpr_read_filesystems(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf) 4497 { 4498 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "autofs"); 4499 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "cgroup"); 4500 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "nfs"); 4501 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "proc"); 4502 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "sysfs"); 4503 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "tmpfs"); 4504 } 4505 4506 /* 4507 * lxpr_getattr(): Vnode operation for VOP_GETATTR() 4508 */ 4509 static int 4510 lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 4511 caller_context_t *ct) 4512 { 4513 register lxpr_node_t *lxpnp = VTOLXP(vp); 4514 lxpr_nodetype_t type = lxpnp->lxpr_type; 4515 extern uint_t nproc; 4516 int error; 4517 4518 /* 4519 * Return attributes of underlying vnode if ATTR_REAL 4520 * 4521 * but keep fd files with the symlink permissions 4522 */ 4523 if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) { 4524 vnode_t *rvp = lxpnp->lxpr_realvp; 4525 4526 /* 4527 * withold attribute information to owner or root 4528 */ 4529 if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) { 4530 return (error); 4531 } 4532 4533 /* 4534 * now its attributes 4535 */ 4536 if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) { 4537 return (error); 4538 } 4539 4540 /* 4541 * if it's a file in lx /proc/pid/fd/xx then set its 4542 * mode and keep it looking like a symlink, fifo or socket 4543 */ 4544 if (type == LXPR_PID_FD_FD) { 4545 vap->va_mode = lxpnp->lxpr_mode; 4546 vap->va_type = lxpnp->lxpr_realvp->v_type; 4547 vap->va_size = 0; 4548 vap->va_nlink = 1; 4549 } 4550 return (0); 4551 } 4552 4553 /* Default attributes, that may be overridden below */ 4554 bzero(vap, sizeof (*vap)); 4555 vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time; 4556 vap->va_nlink = 1; 4557 vap->va_type = vp->v_type; 4558 vap->va_mode = lxpnp->lxpr_mode; 4559 vap->va_fsid = vp->v_vfsp->vfs_dev; 4560 vap->va_blksize = DEV_BSIZE; 4561 vap->va_uid = lxpnp->lxpr_uid; 4562 vap->va_gid = lxpnp->lxpr_gid; 4563 vap->va_nodeid = lxpnp->lxpr_ino; 4564 4565 switch (type) { 4566 case LXPR_PROCDIR: 4567 vap->va_nlink = nproc + 2 + PROCDIRFILES; 4568 vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE; 4569 break; 4570 case LXPR_PIDDIR: 4571 vap->va_nlink = PIDDIRFILES; 4572 vap->va_size = PIDDIRFILES * LXPR_SDSIZE; 4573 break; 4574 case LXPR_PID_TASK_IDDIR: 4575 vap->va_nlink = TIDDIRFILES; 4576 vap->va_size = TIDDIRFILES * LXPR_SDSIZE; 4577 break; 4578 case LXPR_SELF: 4579 vap->va_uid = crgetruid(curproc->p_cred); 4580 vap->va_gid = crgetrgid(curproc->p_cred); 4581 break; 4582 case LXPR_PID_FD_FD: 4583 case LXPR_PID_TID_FD_FD: 4584 /* 4585 * Restore VLNK type for lstat-type activity. 4586 * See lxpr_readlink for more details. 4587 */ 4588 if ((flags & FOLLOW) == 0) 4589 vap->va_type = VLNK; 4590 default: 4591 break; 4592 } 4593 4594 vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size); 4595 return (0); 4596 } 4597 4598 /* 4599 * lxpr_access(): Vnode operation for VOP_ACCESS() 4600 */ 4601 static int 4602 lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct) 4603 { 4604 lxpr_node_t *lxpnp = VTOLXP(vp); 4605 lxpr_nodetype_t type = lxpnp->lxpr_type; 4606 int shift = 0; 4607 proc_t *tp; 4608 4609 /* lx /proc is a read only file system */ 4610 if (mode & VWRITE) { 4611 switch (type) { 4612 case LXPR_PID_OOM_SCR_ADJ: 4613 case LXPR_PID_TID_OOM_SCR_ADJ: 4614 case LXPR_SYS_KERNEL_COREPATT: 4615 case LXPR_SYS_NET_CORE_SOMAXCON: 4616 case LXPR_SYS_VM_OVERCOMMIT_MEM: 4617 case LXPR_SYS_VM_SWAPPINESS: 4618 case LXPR_PID_FD_FD: 4619 case LXPR_PID_TID_FD_FD: 4620 break; 4621 default: 4622 return (EROFS); 4623 } 4624 } 4625 4626 /* 4627 * If this is a restricted file, check access permissions. 4628 */ 4629 switch (type) { 4630 case LXPR_PIDDIR: 4631 return (0); 4632 case LXPR_PID_CURDIR: 4633 case LXPR_PID_ENV: 4634 case LXPR_PID_EXE: 4635 case LXPR_PID_LIMITS: 4636 case LXPR_PID_MAPS: 4637 case LXPR_PID_MEM: 4638 case LXPR_PID_ROOTDIR: 4639 case LXPR_PID_FDDIR: 4640 case LXPR_PID_FD_FD: 4641 case LXPR_PID_TID_FDDIR: 4642 case LXPR_PID_TID_FD_FD: 4643 if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL) 4644 return (ENOENT); 4645 if (tp != curproc && secpolicy_proc_access(cr) != 0 && 4646 priv_proc_cred_perm(cr, tp, NULL, mode) != 0) { 4647 lxpr_unlock(tp); 4648 return (EACCES); 4649 } 4650 lxpr_unlock(tp); 4651 default: 4652 break; 4653 } 4654 4655 if (lxpnp->lxpr_realvp != NULL) { 4656 /* 4657 * For these we use the underlying vnode's accessibility. 4658 */ 4659 return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct)); 4660 } 4661 4662 /* If user is root allow access regardless of permission bits */ 4663 if (secpolicy_proc_access(cr) == 0) 4664 return (0); 4665 4666 /* 4667 * Access check is based on only one of owner, group, public. If not 4668 * owner, then check group. If not a member of the group, then check 4669 * public access. 4670 */ 4671 if (crgetuid(cr) != lxpnp->lxpr_uid) { 4672 shift += 3; 4673 if (!groupmember((uid_t)lxpnp->lxpr_gid, cr)) 4674 shift += 3; 4675 } 4676 4677 mode &= ~(lxpnp->lxpr_mode << shift); 4678 4679 if (mode == 0) 4680 return (0); 4681 4682 return (EACCES); 4683 } 4684 4685 /* ARGSUSED */ 4686 static vnode_t * 4687 lxpr_lookup_not_a_dir(vnode_t *dp, char *comp) 4688 { 4689 return (NULL); 4690 } 4691 4692 /* 4693 * lxpr_lookup(): Vnode operation for VOP_LOOKUP() 4694 */ 4695 /* ARGSUSED */ 4696 static int 4697 lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp, 4698 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 4699 int *direntflags, pathname_t *realpnp) 4700 { 4701 lxpr_node_t *lxpnp = VTOLXP(dp); 4702 lxpr_nodetype_t type = lxpnp->lxpr_type; 4703 int error; 4704 4705 ASSERT(dp->v_type == VDIR); 4706 ASSERT(type < LXPR_NFILES); 4707 4708 /* 4709 * we should never get here because the lookup 4710 * is done on the realvp for these nodes 4711 */ 4712 ASSERT(type != LXPR_PID_FD_FD && 4713 type != LXPR_PID_CURDIR && 4714 type != LXPR_PID_ROOTDIR); 4715 4716 /* 4717 * restrict lookup permission to owner or root 4718 */ 4719 if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) { 4720 return (error); 4721 } 4722 4723 /* 4724 * Just return the parent vnode if that's where we are trying to go. 4725 */ 4726 if (strcmp(comp, "..") == 0) { 4727 VN_HOLD(lxpnp->lxpr_parent); 4728 *vpp = lxpnp->lxpr_parent; 4729 return (0); 4730 } 4731 4732 /* 4733 * Special handling for directory searches. Note: null component name 4734 * denotes that the current directory is being searched. 4735 */ 4736 if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) { 4737 VN_HOLD(dp); 4738 *vpp = dp; 4739 return (0); 4740 } 4741 4742 *vpp = (lxpr_lookup_function[type](dp, comp)); 4743 return ((*vpp == NULL) ? ENOENT : 0); 4744 } 4745 4746 /* 4747 * Do a sequential search on the given directory table 4748 */ 4749 static vnode_t * 4750 lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p, 4751 lxpr_dirent_t *dirtab, int dirtablen) 4752 { 4753 lxpr_node_t *lxpnp; 4754 int count; 4755 4756 for (count = 0; count < dirtablen; count++) { 4757 if (strcmp(dirtab[count].d_name, comp) == 0) { 4758 lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0); 4759 dp = LXPTOV(lxpnp); 4760 ASSERT(dp != NULL); 4761 return (dp); 4762 } 4763 } 4764 return (NULL); 4765 } 4766 4767 static vnode_t * 4768 lxpr_lookup_piddir(vnode_t *dp, char *comp) 4769 { 4770 proc_t *p; 4771 4772 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR); 4773 4774 p = lxpr_lock(VTOLXP(dp)->lxpr_pid); 4775 if (p == NULL) 4776 return (NULL); 4777 4778 dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES); 4779 4780 lxpr_unlock(p); 4781 4782 return (dp); 4783 } 4784 4785 /* 4786 * Lookup one of the process's task ID's. 4787 */ 4788 static vnode_t * 4789 lxpr_lookup_taskdir(vnode_t *dp, char *comp) 4790 { 4791 lxpr_node_t *dlxpnp = VTOLXP(dp); 4792 lxpr_node_t *lxpnp; 4793 proc_t *p; 4794 pid_t real_pid; 4795 uint_t tid; 4796 int c; 4797 kthread_t *t; 4798 4799 ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASKDIR); 4800 4801 /* 4802 * convert the string rendition of the filename to a thread ID 4803 */ 4804 tid = 0; 4805 while ((c = *comp++) != '\0') { 4806 int otid; 4807 if (c < '0' || c > '9') 4808 return (NULL); 4809 4810 otid = tid; 4811 tid = 10 * tid + c - '0'; 4812 /* integer overflow */ 4813 if (tid / 10 != otid) 4814 return (NULL); 4815 } 4816 4817 /* 4818 * get the proc to work with and lock it 4819 */ 4820 real_pid = get_real_pid(dlxpnp->lxpr_pid); 4821 p = lxpr_lock(real_pid); 4822 if ((p == NULL)) 4823 return (NULL); 4824 4825 /* 4826 * If the process is a zombie or system process 4827 * it can't have any threads. 4828 */ 4829 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) { 4830 lxpr_unlock(p); 4831 return (NULL); 4832 } 4833 4834 if (p->p_brand == &lx_brand) { 4835 t = lxpr_get_thread(p, tid); 4836 } else { 4837 /* 4838 * Only the main thread is visible for non-branded processes. 4839 */ 4840 t = p->p_tlist; 4841 if (tid != p->p_pid || t == NULL) { 4842 t = NULL; 4843 } else { 4844 thread_lock(t); 4845 } 4846 } 4847 if (t == NULL) { 4848 lxpr_unlock(p); 4849 return (NULL); 4850 } 4851 thread_unlock(t); 4852 4853 /* 4854 * Allocate and fill in a new lx /proc taskid node. 4855 * Instead of the last arg being a fd, it is a tid. 4856 */ 4857 lxpnp = lxpr_getnode(dp, LXPR_PID_TASK_IDDIR, p, tid); 4858 dp = LXPTOV(lxpnp); 4859 ASSERT(dp != NULL); 4860 lxpr_unlock(p); 4861 return (dp); 4862 } 4863 4864 /* 4865 * Lookup one of the process's task ID's. 4866 */ 4867 static vnode_t * 4868 lxpr_lookup_task_tid_dir(vnode_t *dp, char *comp) 4869 { 4870 lxpr_node_t *dlxpnp = VTOLXP(dp); 4871 lxpr_node_t *lxpnp; 4872 proc_t *p; 4873 pid_t real_pid; 4874 kthread_t *t; 4875 int i; 4876 4877 ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASK_IDDIR); 4878 4879 /* 4880 * get the proc to work with and lock it 4881 */ 4882 real_pid = get_real_pid(dlxpnp->lxpr_pid); 4883 p = lxpr_lock(real_pid); 4884 if ((p == NULL)) 4885 return (NULL); 4886 4887 /* 4888 * If the process is a zombie or system process 4889 * it can't have any threads. 4890 */ 4891 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) { 4892 lxpr_unlock(p); 4893 return (NULL); 4894 } 4895 4896 /* need to confirm tid is still there */ 4897 t = lxpr_get_thread(p, dlxpnp->lxpr_desc); 4898 if (t == NULL) { 4899 lxpr_unlock(p); 4900 return (NULL); 4901 } 4902 thread_unlock(t); 4903 4904 /* 4905 * allocate and fill in the new lx /proc taskid dir node 4906 */ 4907 for (i = 0; i < TIDDIRFILES; i++) { 4908 if (strcmp(tiddir[i].d_name, comp) == 0) { 4909 lxpnp = lxpr_getnode(dp, tiddir[i].d_type, p, 4910 dlxpnp->lxpr_desc); 4911 dp = LXPTOV(lxpnp); 4912 ASSERT(dp != NULL); 4913 lxpr_unlock(p); 4914 return (dp); 4915 } 4916 } 4917 4918 lxpr_unlock(p); 4919 return (NULL); 4920 } 4921 4922 /* 4923 * Lookup one of the process's open files. 4924 */ 4925 static vnode_t * 4926 lxpr_lookup_fddir(vnode_t *dp, char *comp) 4927 { 4928 lxpr_node_t *dlxpnp = VTOLXP(dp); 4929 4930 ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR || 4931 dlxpnp->lxpr_type == LXPR_PID_TID_FDDIR); 4932 4933 return (lxpr_lookup_fdnode(dp, comp)); 4934 } 4935 4936 static vnode_t * 4937 lxpr_lookup_netdir(vnode_t *dp, char *comp) 4938 { 4939 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR); 4940 4941 dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES); 4942 4943 return (dp); 4944 } 4945 4946 static vnode_t * 4947 lxpr_lookup_procdir(vnode_t *dp, char *comp) 4948 { 4949 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR); 4950 4951 /* 4952 * We know all the names of files & dirs in our file system structure 4953 * except those that are pid names. These change as pids are created/ 4954 * deleted etc., so we just look for a number as the first char to see 4955 * if we are we doing pid lookups. 4956 * 4957 * Don't need to check for "self" as it is implemented as a symlink 4958 */ 4959 if (*comp >= '0' && *comp <= '9') { 4960 pid_t pid = 0; 4961 lxpr_node_t *lxpnp = NULL; 4962 proc_t *p; 4963 int c; 4964 4965 while ((c = *comp++) != '\0') 4966 pid = 10 * pid + c - '0'; 4967 4968 /* 4969 * Can't continue if the process is still loading or it doesn't 4970 * really exist yet (or maybe it just died!) 4971 */ 4972 p = lxpr_lock(pid); 4973 if (p == NULL) 4974 return (NULL); 4975 4976 if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) { 4977 lxpr_unlock(p); 4978 return (NULL); 4979 } 4980 4981 /* 4982 * allocate and fill in a new lx /proc node 4983 */ 4984 lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0); 4985 4986 lxpr_unlock(p); 4987 4988 dp = LXPTOV(lxpnp); 4989 ASSERT(dp != NULL); 4990 4991 return (dp); 4992 } 4993 4994 /* Lookup fixed names */ 4995 return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES)); 4996 } 4997 4998 static vnode_t * 4999 lxpr_lookup_sysdir(vnode_t *dp, char *comp) 5000 { 5001 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYSDIR); 5002 return (lxpr_lookup_common(dp, comp, NULL, sysdir, SYSDIRFILES)); 5003 } 5004 5005 static vnode_t * 5006 lxpr_lookup_sys_kerneldir(vnode_t *dp, char *comp) 5007 { 5008 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNELDIR); 5009 return (lxpr_lookup_common(dp, comp, NULL, sys_kerneldir, 5010 SYS_KERNELDIRFILES)); 5011 } 5012 5013 static vnode_t * 5014 lxpr_lookup_sys_kdir_randdir(vnode_t *dp, char *comp) 5015 { 5016 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNEL_RANDDIR); 5017 return (lxpr_lookup_common(dp, comp, NULL, sys_randdir, 5018 SYS_RANDDIRFILES)); 5019 } 5020 5021 static vnode_t * 5022 lxpr_lookup_sys_netdir(vnode_t *dp, char *comp) 5023 { 5024 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NETDIR); 5025 return (lxpr_lookup_common(dp, comp, NULL, sys_netdir, 5026 SYS_NETDIRFILES)); 5027 } 5028 5029 static vnode_t * 5030 lxpr_lookup_sys_net_coredir(vnode_t *dp, char *comp) 5031 { 5032 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NET_COREDIR); 5033 return (lxpr_lookup_common(dp, comp, NULL, sys_net_coredir, 5034 SYS_NET_COREDIRFILES)); 5035 } 5036 5037 static vnode_t * 5038 lxpr_lookup_sys_vmdir(vnode_t *dp, char *comp) 5039 { 5040 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_VMDIR); 5041 return (lxpr_lookup_common(dp, comp, NULL, sys_vmdir, 5042 SYS_VMDIRFILES)); 5043 } 5044 5045 static vnode_t * 5046 lxpr_lookup_sys_fsdir(vnode_t *dp, char *comp) 5047 { 5048 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FSDIR); 5049 return (lxpr_lookup_common(dp, comp, NULL, sys_fsdir, 5050 SYS_FSDIRFILES)); 5051 } 5052 5053 static vnode_t * 5054 lxpr_lookup_sys_fs_inotifydir(vnode_t *dp, char *comp) 5055 { 5056 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FS_INOTIFYDIR); 5057 return (lxpr_lookup_common(dp, comp, NULL, sys_fs_inotifydir, 5058 SYS_FS_INOTIFYDIRFILES)); 5059 } 5060 5061 /* 5062 * lxpr_readdir(): Vnode operation for VOP_READDIR() 5063 */ 5064 /* ARGSUSED */ 5065 static int 5066 lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp, 5067 caller_context_t *ct, int flags) 5068 { 5069 lxpr_node_t *lxpnp = VTOLXP(dp); 5070 lxpr_nodetype_t type = lxpnp->lxpr_type; 5071 ssize_t uresid; 5072 off_t uoffset; 5073 int error; 5074 5075 ASSERT(dp->v_type == VDIR); 5076 ASSERT(type < LXPR_NFILES); 5077 5078 /* 5079 * we should never get here because the readdir 5080 * is done on the realvp for these nodes 5081 */ 5082 ASSERT(type != LXPR_PID_FD_FD && 5083 type != LXPR_PID_CURDIR && 5084 type != LXPR_PID_ROOTDIR); 5085 5086 /* 5087 * restrict readdir permission to owner or root 5088 */ 5089 if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0) 5090 return (error); 5091 5092 uoffset = uiop->uio_offset; 5093 uresid = uiop->uio_resid; 5094 5095 /* can't do negative reads */ 5096 if (uoffset < 0 || uresid <= 0) 5097 return (EINVAL); 5098 5099 /* can't read directory entries that don't exist! */ 5100 if (uoffset % LXPR_SDSIZE) 5101 return (ENOENT); 5102 5103 return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp)); 5104 } 5105 5106 /* ARGSUSED */ 5107 static int 5108 lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5109 { 5110 return (ENOTDIR); 5111 } 5112 5113 /* 5114 * This has the common logic for returning directory entries 5115 */ 5116 static int 5117 lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp, 5118 lxpr_dirent_t *dirtab, int dirtablen) 5119 { 5120 /* bp holds one dirent64 structure */ 5121 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; 5122 dirent64_t *dirent = (dirent64_t *)bp; 5123 ssize_t oresid; /* save a copy for testing later */ 5124 ssize_t uresid; 5125 5126 oresid = uiop->uio_resid; 5127 5128 /* clear out the dirent buffer */ 5129 bzero(bp, sizeof (bp)); 5130 5131 /* 5132 * Satisfy user request 5133 */ 5134 while ((uresid = uiop->uio_resid) > 0) { 5135 int dirindex; 5136 off_t uoffset; 5137 int reclen; 5138 int error; 5139 5140 uoffset = uiop->uio_offset; 5141 dirindex = (uoffset / LXPR_SDSIZE) - 2; 5142 5143 if (uoffset == 0) { 5144 5145 dirent->d_ino = lxpnp->lxpr_ino; 5146 dirent->d_name[0] = '.'; 5147 dirent->d_name[1] = '\0'; 5148 reclen = DIRENT64_RECLEN(1); 5149 5150 } else if (uoffset == LXPR_SDSIZE) { 5151 5152 dirent->d_ino = lxpr_parentinode(lxpnp); 5153 dirent->d_name[0] = '.'; 5154 dirent->d_name[1] = '.'; 5155 dirent->d_name[2] = '\0'; 5156 reclen = DIRENT64_RECLEN(2); 5157 5158 } else if (dirindex >= 0 && dirindex < dirtablen) { 5159 int slen = strlen(dirtab[dirindex].d_name); 5160 5161 dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type, 5162 lxpnp->lxpr_pid, 0); 5163 5164 VERIFY(slen < LXPNSIZ); 5165 (void) strcpy(dirent->d_name, dirtab[dirindex].d_name); 5166 reclen = DIRENT64_RECLEN(slen); 5167 5168 } else { 5169 /* Run out of table entries */ 5170 if (eofp) { 5171 *eofp = 1; 5172 } 5173 return (0); 5174 } 5175 5176 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); 5177 dirent->d_reclen = (ushort_t)reclen; 5178 5179 /* 5180 * if the size of the data to transfer is greater 5181 * that that requested then we can't do it this transfer. 5182 */ 5183 if (reclen > uresid) { 5184 /* 5185 * Error if no entries have been returned yet. 5186 */ 5187 if (uresid == oresid) { 5188 return (EINVAL); 5189 } 5190 break; 5191 } 5192 5193 /* 5194 * uiomove() updates both uiop->uio_resid and uiop->uio_offset 5195 * by the same amount. But we want uiop->uio_offset to change 5196 * in increments of LXPR_SDSIZE, which is different from the 5197 * number of bytes being returned to the user. So we set 5198 * uiop->uio_offset separately, ignoring what uiomove() does. 5199 */ 5200 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, 5201 uiop)) != 0) 5202 return (error); 5203 5204 uiop->uio_offset = uoffset + LXPR_SDSIZE; 5205 } 5206 5207 /* Have run out of space, but could have just done last table entry */ 5208 if (eofp) { 5209 *eofp = 5210 (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0; 5211 } 5212 return (0); 5213 } 5214 5215 5216 static int 5217 lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5218 { 5219 /* bp holds one dirent64 structure */ 5220 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; 5221 dirent64_t *dirent = (dirent64_t *)bp; 5222 ssize_t oresid; /* save a copy for testing later */ 5223 ssize_t uresid; 5224 off_t uoffset; 5225 zoneid_t zoneid; 5226 pid_t pid; 5227 int error; 5228 int ceof; 5229 5230 ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR); 5231 5232 oresid = uiop->uio_resid; 5233 zoneid = LXPTOZ(lxpnp)->zone_id; 5234 5235 /* 5236 * We return directory entries in the order: "." and ".." then the 5237 * unique lxproc files, then the directories corresponding to the 5238 * running processes. We have defined this as the ordering because 5239 * it allows us to more easily keep track of where we are betwen calls 5240 * to getdents(). If the number of processes changes between calls 5241 * then we can't lose track of where we are in the lxproc files. 5242 */ 5243 5244 /* Do the fixed entries */ 5245 error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir, 5246 PROCDIRFILES); 5247 5248 /* Finished if we got an error or if we couldn't do all the table */ 5249 if (error != 0 || ceof == 0) 5250 return (error); 5251 5252 /* clear out the dirent buffer */ 5253 bzero(bp, sizeof (bp)); 5254 5255 /* Do the process entries */ 5256 while ((uresid = uiop->uio_resid) > 0) { 5257 proc_t *p; 5258 int len; 5259 int reclen; 5260 int i; 5261 5262 uoffset = uiop->uio_offset; 5263 5264 /* 5265 * Stop when entire proc table has been examined. 5266 */ 5267 i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES; 5268 if (i < 0 || i >= v.v_proc) { 5269 /* Run out of table entries */ 5270 if (eofp) { 5271 *eofp = 1; 5272 } 5273 return (0); 5274 } 5275 mutex_enter(&pidlock); 5276 5277 /* 5278 * Skip indices for which there is no pid_entry, PIDs for 5279 * which there is no corresponding process, a PID of 0, 5280 * and anything the security policy doesn't allow 5281 * us to look at. 5282 */ 5283 if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL || 5284 p->p_pid == 0 || 5285 secpolicy_basic_procinfo(CRED(), p, curproc) != 0) { 5286 mutex_exit(&pidlock); 5287 goto next; 5288 } 5289 mutex_exit(&pidlock); 5290 5291 /* 5292 * Convert pid to the Linux default of 1 if we're the zone's 5293 * init process, or 0 if zsched, otherwise use the value from 5294 * the proc structure 5295 */ 5296 if (p->p_pid == curproc->p_zone->zone_proc_initpid) { 5297 pid = 1; 5298 } else if (p->p_pid == curproc->p_zone->zone_zsched->p_pid) { 5299 pid = 0; 5300 } else { 5301 pid = p->p_pid; 5302 } 5303 5304 /* 5305 * If this /proc was mounted in the global zone, view 5306 * all procs; otherwise, only view zone member procs. 5307 */ 5308 if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) { 5309 goto next; 5310 } 5311 5312 ASSERT(p->p_stat != 0); 5313 5314 dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0); 5315 len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid); 5316 ASSERT(len < LXPNSIZ); 5317 reclen = DIRENT64_RECLEN(len); 5318 5319 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); 5320 dirent->d_reclen = (ushort_t)reclen; 5321 5322 /* 5323 * if the size of the data to transfer is greater 5324 * that that requested then we can't do it this transfer. 5325 */ 5326 if (reclen > uresid) { 5327 /* 5328 * Error if no entries have been returned yet. 5329 */ 5330 if (uresid == oresid) 5331 return (EINVAL); 5332 break; 5333 } 5334 5335 /* 5336 * uiomove() updates both uiop->uio_resid and uiop->uio_offset 5337 * by the same amount. But we want uiop->uio_offset to change 5338 * in increments of LXPR_SDSIZE, which is different from the 5339 * number of bytes being returned to the user. So we set 5340 * uiop->uio_offset separately, in the increment of this for 5341 * the loop, ignoring what uiomove() does. 5342 */ 5343 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, 5344 uiop)) != 0) 5345 return (error); 5346 next: 5347 uiop->uio_offset = uoffset + LXPR_SDSIZE; 5348 } 5349 5350 if (eofp != NULL) { 5351 *eofp = (uiop->uio_offset >= 5352 ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0; 5353 } 5354 5355 return (0); 5356 } 5357 5358 static int 5359 lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5360 { 5361 proc_t *p; 5362 pid_t find_pid; 5363 5364 ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR); 5365 5366 /* can't read its contents if it died */ 5367 mutex_enter(&pidlock); 5368 5369 if (lxpnp->lxpr_pid == 1) { 5370 find_pid = curproc->p_zone->zone_proc_initpid; 5371 } else if (lxpnp->lxpr_pid == 0) { 5372 find_pid = curproc->p_zone->zone_zsched->p_pid; 5373 } else { 5374 find_pid = lxpnp->lxpr_pid; 5375 } 5376 p = prfind(find_pid); 5377 5378 if (p == NULL || p->p_stat == SIDL) { 5379 mutex_exit(&pidlock); 5380 return (ENOENT); 5381 } 5382 mutex_exit(&pidlock); 5383 5384 return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES)); 5385 } 5386 5387 static int 5388 lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5389 { 5390 ASSERT(lxpnp->lxpr_type == LXPR_NETDIR); 5391 return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES)); 5392 } 5393 5394 static int 5395 lxpr_readdir_taskdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5396 { 5397 /* bp holds one dirent64 structure */ 5398 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; 5399 dirent64_t *dirent = (dirent64_t *)bp; 5400 ssize_t oresid; /* save a copy for testing later */ 5401 ssize_t uresid; 5402 off_t uoffset; 5403 int error; 5404 int ceof; 5405 proc_t *p; 5406 int tiddirsize = -1; 5407 int tasknum; 5408 pid_t real_pid; 5409 kthread_t *t; 5410 boolean_t branded = B_FALSE; 5411 5412 ASSERT(lxpnp->lxpr_type == LXPR_PID_TASKDIR); 5413 5414 oresid = uiop->uio_resid; 5415 5416 real_pid = get_real_pid(lxpnp->lxpr_pid); 5417 p = lxpr_lock(real_pid); 5418 5419 /* can't read its contents if it died */ 5420 if (p == NULL) { 5421 return (ENOENT); 5422 } 5423 if (p->p_stat == SIDL) { 5424 lxpr_unlock(p); 5425 return (ENOENT); 5426 } 5427 5428 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) 5429 tiddirsize = 0; 5430 5431 branded = (p->p_brand == &lx_brand); 5432 /* 5433 * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from 5434 * going away while we iterate over its threads. 5435 */ 5436 mutex_exit(&p->p_lock); 5437 5438 if (tiddirsize == -1) 5439 tiddirsize = p->p_lwpcnt; 5440 5441 /* Do the fixed entries (in this case just "." & "..") */ 5442 error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0); 5443 5444 /* Finished if we got an error or if we couldn't do all the table */ 5445 if (error != 0 || ceof == 0) 5446 goto out; 5447 5448 if ((t = p->p_tlist) == NULL) { 5449 if (eofp != NULL) 5450 *eofp = 1; 5451 goto out; 5452 } 5453 5454 /* clear out the dirent buffer */ 5455 bzero(bp, sizeof (bp)); 5456 5457 /* 5458 * Loop until user's request is satisfied or until all thread's have 5459 * been returned. 5460 */ 5461 for (tasknum = 0; (uresid = uiop->uio_resid) > 0; tasknum++) { 5462 int i; 5463 int reclen; 5464 int len; 5465 uint_t emul_tid; 5466 lx_lwp_data_t *lwpd; 5467 5468 uoffset = uiop->uio_offset; 5469 5470 /* 5471 * Stop at the end of the thread list 5472 */ 5473 i = (uoffset / LXPR_SDSIZE) - 2; 5474 if (i < 0 || i >= tiddirsize) { 5475 if (eofp) { 5476 *eofp = 1; 5477 } 5478 goto out; 5479 } 5480 5481 if (i != tasknum) 5482 goto next; 5483 5484 if (!branded) { 5485 /* 5486 * Emulating the goofy linux task model is impossible 5487 * to do for native processes. We can compromise by 5488 * presenting only the main thread to the consumer. 5489 */ 5490 emul_tid = p->p_pid; 5491 } else { 5492 if ((lwpd = ttolxlwp(t)) == NULL) { 5493 goto next; 5494 } 5495 emul_tid = lwpd->br_pid; 5496 /* 5497 * Convert pid to Linux default of 1 if we're the 5498 * zone's init. 5499 */ 5500 if (emul_tid == curproc->p_zone->zone_proc_initpid) 5501 emul_tid = 1; 5502 } 5503 5504 dirent->d_ino = lxpr_inode(LXPR_PID_TASK_IDDIR, lxpnp->lxpr_pid, 5505 emul_tid); 5506 len = snprintf(dirent->d_name, LXPNSIZ, "%d", emul_tid); 5507 ASSERT(len < LXPNSIZ); 5508 reclen = DIRENT64_RECLEN(len); 5509 5510 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); 5511 dirent->d_reclen = (ushort_t)reclen; 5512 5513 if (reclen > uresid) { 5514 /* 5515 * Error if no entries have been returned yet. 5516 */ 5517 if (uresid == oresid) 5518 error = EINVAL; 5519 goto out; 5520 } 5521 5522 /* 5523 * uiomove() updates both uiop->uio_resid and uiop->uio_offset 5524 * by the same amount. But we want uiop->uio_offset to change 5525 * in increments of LXPR_SDSIZE, which is different from the 5526 * number of bytes being returned to the user. So we set 5527 * uiop->uio_offset separately, in the increment of this for 5528 * the loop, ignoring what uiomove() does. 5529 */ 5530 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, 5531 uiop)) != 0) 5532 goto out; 5533 5534 next: 5535 uiop->uio_offset = uoffset + LXPR_SDSIZE; 5536 5537 if ((t = t->t_forw) == p->p_tlist || !branded) { 5538 if (eofp != NULL) 5539 *eofp = 1; 5540 goto out; 5541 } 5542 } 5543 5544 if (eofp != NULL) 5545 *eofp = 0; 5546 5547 out: 5548 mutex_enter(&p->p_lock); 5549 lxpr_unlock(p); 5550 return (error); 5551 } 5552 5553 static int 5554 lxpr_readdir_task_tid_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5555 { 5556 proc_t *p; 5557 pid_t real_pid; 5558 kthread_t *t; 5559 5560 ASSERT(lxpnp->lxpr_type == LXPR_PID_TASK_IDDIR); 5561 5562 mutex_enter(&pidlock); 5563 5564 real_pid = get_real_pid(lxpnp->lxpr_pid); 5565 p = prfind(real_pid); 5566 5567 /* can't read its contents if it died */ 5568 if (p == NULL || p->p_stat == SIDL) { 5569 mutex_exit(&pidlock); 5570 return (ENOENT); 5571 } 5572 5573 mutex_exit(&pidlock); 5574 5575 /* need to confirm tid is still there */ 5576 t = lxpr_get_thread(p, lxpnp->lxpr_desc); 5577 if (t == NULL) { 5578 /* we can't find this specific thread */ 5579 return (NULL); 5580 } 5581 thread_unlock(t); 5582 5583 return (lxpr_readdir_common(lxpnp, uiop, eofp, tiddir, TIDDIRFILES)); 5584 } 5585 5586 static int 5587 lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5588 { 5589 /* bp holds one dirent64 structure */ 5590 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)]; 5591 dirent64_t *dirent = (dirent64_t *)bp; 5592 ssize_t oresid; /* save a copy for testing later */ 5593 ssize_t uresid; 5594 off_t uoffset; 5595 int error; 5596 int ceof; 5597 proc_t *p; 5598 int fddirsize = -1; 5599 uf_info_t *fip; 5600 5601 ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR || 5602 lxpnp->lxpr_type == LXPR_PID_TID_FDDIR); 5603 5604 oresid = uiop->uio_resid; 5605 5606 /* can't read its contents if it died */ 5607 p = lxpr_lock(lxpnp->lxpr_pid); 5608 if (p == NULL) 5609 return (ENOENT); 5610 5611 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) 5612 fddirsize = 0; 5613 5614 /* 5615 * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from 5616 * going away while we iterate over its fi_list. 5617 */ 5618 mutex_exit(&p->p_lock); 5619 5620 /* Get open file info */ 5621 fip = (&(p)->p_user.u_finfo); 5622 mutex_enter(&fip->fi_lock); 5623 5624 if (fddirsize == -1) 5625 fddirsize = fip->fi_nfiles; 5626 5627 /* Do the fixed entries (in this case just "." & "..") */ 5628 error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0); 5629 5630 /* Finished if we got an error or if we couldn't do all the table */ 5631 if (error != 0 || ceof == 0) 5632 goto out; 5633 5634 /* clear out the dirent buffer */ 5635 bzero(bp, sizeof (bp)); 5636 5637 /* 5638 * Loop until user's request is satisfied or until 5639 * all file descriptors have been examined. 5640 */ 5641 for (; (uresid = uiop->uio_resid) > 0; 5642 uiop->uio_offset = uoffset + LXPR_SDSIZE) { 5643 int reclen; 5644 int fd; 5645 int len; 5646 5647 uoffset = uiop->uio_offset; 5648 5649 /* 5650 * Stop at the end of the fd list 5651 */ 5652 fd = (uoffset / LXPR_SDSIZE) - 2; 5653 if (fd < 0 || fd >= fddirsize) { 5654 if (eofp) { 5655 *eofp = 1; 5656 } 5657 goto out; 5658 } 5659 5660 if (fip->fi_list[fd].uf_file == NULL) 5661 continue; 5662 5663 dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd); 5664 len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd); 5665 ASSERT(len < LXPNSIZ); 5666 reclen = DIRENT64_RECLEN(len); 5667 5668 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE); 5669 dirent->d_reclen = (ushort_t)reclen; 5670 5671 if (reclen > uresid) { 5672 /* 5673 * Error if no entries have been returned yet. 5674 */ 5675 if (uresid == oresid) 5676 error = EINVAL; 5677 goto out; 5678 } 5679 5680 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, 5681 uiop)) != 0) 5682 goto out; 5683 } 5684 5685 if (eofp != NULL) { 5686 *eofp = 5687 (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0; 5688 } 5689 5690 out: 5691 mutex_exit(&fip->fi_lock); 5692 mutex_enter(&p->p_lock); 5693 lxpr_unlock(p); 5694 return (error); 5695 } 5696 5697 static int 5698 lxpr_readdir_sysdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5699 { 5700 ASSERT(lxpnp->lxpr_type == LXPR_SYSDIR); 5701 return (lxpr_readdir_common(lxpnp, uiop, eofp, sysdir, SYSDIRFILES)); 5702 } 5703 5704 static int 5705 lxpr_readdir_sys_fsdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5706 { 5707 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FSDIR); 5708 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fsdir, 5709 SYS_FSDIRFILES)); 5710 } 5711 5712 static int 5713 lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5714 { 5715 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFYDIR); 5716 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fs_inotifydir, 5717 SYS_FS_INOTIFYDIRFILES)); 5718 } 5719 5720 static int 5721 lxpr_readdir_sys_kerneldir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5722 { 5723 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNELDIR); 5724 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_kerneldir, 5725 SYS_KERNELDIRFILES)); 5726 } 5727 5728 static int 5729 lxpr_readdir_sys_kdir_randdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5730 { 5731 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RANDDIR); 5732 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_randdir, 5733 SYS_RANDDIRFILES)); 5734 } 5735 5736 static int 5737 lxpr_readdir_sys_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5738 { 5739 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NETDIR); 5740 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_netdir, 5741 SYS_NETDIRFILES)); 5742 } 5743 5744 static int 5745 lxpr_readdir_sys_net_coredir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5746 { 5747 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_COREDIR); 5748 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_net_coredir, 5749 SYS_NET_COREDIRFILES)); 5750 } 5751 5752 static int 5753 lxpr_readdir_sys_vmdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp) 5754 { 5755 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VMDIR); 5756 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_vmdir, 5757 SYS_VMDIRFILES)); 5758 } 5759 5760 static int 5761 lxpr_write_sys_net_core_somaxc(lxpr_node_t *lxpnp, struct uio *uio, 5762 struct cred *cr, caller_context_t *ct) 5763 { 5764 int error; 5765 int res = 0; 5766 size_t olen; 5767 char val[16]; /* big enough for a uint numeric string */ 5768 netstack_t *ns; 5769 mod_prop_info_t *ptbl = NULL; 5770 mod_prop_info_t *pinfo = NULL; 5771 5772 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON); 5773 5774 if (uio->uio_loffset != 0) 5775 return (EINVAL); 5776 5777 if (uio->uio_resid == 0) 5778 return (0); 5779 5780 olen = uio->uio_resid; 5781 if (olen > sizeof (val) - 1) 5782 return (EINVAL); 5783 5784 bzero(val, sizeof (val)); 5785 error = uiomove(val, olen, UIO_WRITE, uio); 5786 if (error != 0) 5787 return (error); 5788 5789 if (val[olen - 1] == '\n') 5790 val[olen - 1] = '\0'; 5791 5792 if (val[0] == '\0') /* no input */ 5793 return (EINVAL); 5794 5795 ns = netstack_get_current(); 5796 if (ns == NULL) 5797 return (EINVAL); 5798 5799 ptbl = ns->netstack_tcp->tcps_propinfo_tbl; 5800 pinfo = mod_prop_lookup(ptbl, "_conn_req_max_q", MOD_PROTO_TCP); 5801 if (pinfo == NULL || pinfo->mpi_setf(ns, cr, pinfo, NULL, val, 0) != 0) 5802 res = EINVAL; 5803 5804 netstack_rele(ns); 5805 return (res); 5806 } 5807 5808 /* ARGSUSED */ 5809 static int 5810 lxpr_write_sys_kernel_corepatt(lxpr_node_t *lxpnp, struct uio *uio, 5811 struct cred *cr, caller_context_t *ct) 5812 { 5813 zone_t *zone = curproc->p_zone; 5814 struct core_globals *cg; 5815 refstr_t *rp, *nrp; 5816 corectl_path_t *ccp; 5817 char val[MAXPATHLEN]; 5818 char valtr[MAXPATHLEN]; 5819 size_t olen; 5820 int error; 5821 5822 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT); 5823 5824 cg = zone_getspecific(core_zone_key, zone); 5825 ASSERT(cg != NULL); 5826 5827 if (secpolicy_coreadm(cr) != 0) 5828 return (EPERM); 5829 5830 if (uio->uio_loffset != 0) 5831 return (EINVAL); 5832 5833 if (uio->uio_resid == 0) 5834 return (0); 5835 5836 olen = uio->uio_resid; 5837 if (olen > sizeof (val) - 1) 5838 return (EINVAL); 5839 5840 bzero(val, sizeof (val)); 5841 error = uiomove(val, olen, UIO_WRITE, uio); 5842 if (error != 0) 5843 return (error); 5844 5845 if (val[olen - 1] == '\n') 5846 val[olen - 1] = '\0'; 5847 5848 if (val[0] == '|') 5849 return (EINVAL); 5850 5851 if ((error = lxpr_core_path_l2s(val, valtr, sizeof (valtr))) != 0) 5852 return (error); 5853 5854 nrp = refstr_alloc(valtr); 5855 5856 ccp = cg->core_default_path; 5857 mutex_enter(&ccp->ccp_mtx); 5858 rp = ccp->ccp_path; 5859 refstr_hold((ccp->ccp_path = nrp)); 5860 cg->core_options |= CC_PROCESS_PATH; 5861 mutex_exit(&ccp->ccp_mtx); 5862 5863 if (rp != NULL) 5864 refstr_rele(rp); 5865 5866 return (0); 5867 } 5868 5869 /* 5870 * lxpr_readlink(): Vnode operation for VOP_READLINK() 5871 */ 5872 /* ARGSUSED */ 5873 static int 5874 lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct) 5875 { 5876 char bp[MAXPATHLEN + 1]; 5877 size_t buflen = sizeof (bp); 5878 lxpr_node_t *lxpnp = VTOLXP(vp); 5879 vnode_t *rvp = lxpnp->lxpr_realvp; 5880 pid_t pid; 5881 int error = 0; 5882 5883 /* 5884 * Linux does something very "clever" for /proc/<pid>/fd/<num> entries. 5885 * Open FDs are represented as symlinks, the link contents 5886 * corresponding to the open resource. For plain files or devices, 5887 * this isn't absurd since one can dereference the symlink to query 5888 * the underlying resource. For sockets or pipes, it becomes ugly in a 5889 * hurry. To maintain this human-readable output, those FD symlinks 5890 * point to bogus targets such as "socket:[<inodenum>]". This requires 5891 * circumventing vfs since the stat/lstat behavior on those FD entries 5892 * will be unusual. (A stat must retrieve information about the open 5893 * socket or pipe. It cannot fail because the link contents point to 5894 * an absent file.) 5895 * 5896 * To accomplish this, lxpr_getnode returns an vnode typed VNON for FD 5897 * entries. This bypasses code paths which would normally 5898 * short-circuit on symlinks and allows us to emulate the vfs behavior 5899 * expected by /proc consumers. 5900 */ 5901 if (vp->v_type != VLNK && lxpnp->lxpr_type != LXPR_PID_FD_FD) 5902 return (EINVAL); 5903 5904 /* Try to produce a symlink name for anything that has a realvp */ 5905 if (rvp != NULL) { 5906 if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0) 5907 return (error); 5908 if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) { 5909 /* 5910 * Special handling possible for /proc/<pid>/fd/<num> 5911 * Generate <type>:[<inode>] links, if allowed. 5912 */ 5913 if (lxpnp->lxpr_type != LXPR_PID_FD_FD || 5914 lxpr_readlink_fdnode(lxpnp, bp, buflen) != 0) { 5915 return (error); 5916 } 5917 } 5918 } else { 5919 switch (lxpnp->lxpr_type) { 5920 case LXPR_SELF: 5921 /* 5922 * Convert pid to the Linux default of 1 if we're the 5923 * zone's init process or 0 if zsched. 5924 */ 5925 if (curproc->p_pid == 5926 curproc->p_zone->zone_proc_initpid) { 5927 pid = 1; 5928 } else if (curproc->p_pid == 5929 curproc->p_zone->zone_zsched->p_pid) { 5930 pid = 0; 5931 } else { 5932 pid = curproc->p_pid; 5933 } 5934 5935 /* 5936 * Don't need to check result as every possible int 5937 * will fit within MAXPATHLEN bytes. 5938 */ 5939 (void) snprintf(bp, buflen, "%d", pid); 5940 break; 5941 case LXPR_PID_CURDIR: 5942 case LXPR_PID_ROOTDIR: 5943 case LXPR_PID_EXE: 5944 return (EACCES); 5945 default: 5946 /* 5947 * Need to return error so that nothing thinks 5948 * that the symlink is empty and hence "." 5949 */ 5950 return (EINVAL); 5951 } 5952 } 5953 5954 /* copy the link data to user space */ 5955 return (uiomove(bp, strlen(bp), UIO_READ, uiop)); 5956 } 5957 5958 5959 /* 5960 * lxpr_inactive(): Vnode operation for VOP_INACTIVE() 5961 * Vnode is no longer referenced, deallocate the file 5962 * and all its resources. 5963 */ 5964 /* ARGSUSED */ 5965 static void 5966 lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 5967 { 5968 lxpr_freenode(VTOLXP(vp)); 5969 } 5970 5971 /* 5972 * lxpr_sync(): Vnode operation for VOP_SYNC() 5973 */ 5974 static int 5975 lxpr_sync() 5976 { 5977 /* 5978 * Nothing to sync but this function must never fail 5979 */ 5980 return (0); 5981 } 5982 5983 /* 5984 * lxpr_cmp(): Vnode operation for VOP_CMP() 5985 */ 5986 static int 5987 lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct) 5988 { 5989 vnode_t *rvp; 5990 5991 while (vn_matchops(vp1, lxpr_vnodeops) && 5992 (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) { 5993 vp1 = rvp; 5994 } 5995 5996 while (vn_matchops(vp2, lxpr_vnodeops) && 5997 (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) { 5998 vp2 = rvp; 5999 } 6000 6001 if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops)) 6002 return (vp1 == vp2); 6003 return (VOP_CMP(vp1, vp2, ct)); 6004 } 6005 6006 /* 6007 * lxpr_realvp(): Vnode operation for VOP_REALVP() 6008 */ 6009 static int 6010 lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct) 6011 { 6012 vnode_t *rvp; 6013 6014 if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) { 6015 vp = rvp; 6016 if (VOP_REALVP(vp, &rvp, ct) == 0) 6017 vp = rvp; 6018 } 6019 6020 *vpp = vp; 6021 return (0); 6022 } 6023 6024 static int 6025 lxpr_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 6026 caller_context_t *ct) 6027 { 6028 lxpr_node_t *lxpnp = VTOLXP(vp); 6029 lxpr_nodetype_t type = lxpnp->lxpr_type; 6030 6031 switch (type) { 6032 case LXPR_SYS_KERNEL_COREPATT: 6033 return (lxpr_write_sys_kernel_corepatt(lxpnp, uiop, cr, ct)); 6034 case LXPR_SYS_NET_CORE_SOMAXCON: 6035 return (lxpr_write_sys_net_core_somaxc(lxpnp, uiop, cr, ct)); 6036 6037 default: 6038 /* pretend we wrote the whole thing */ 6039 uiop->uio_offset += uiop->uio_resid; 6040 uiop->uio_resid = 0; 6041 return (0); 6042 } 6043 } 6044 6045 /* 6046 * We need to allow open with O_CREAT for the oom_score_adj file. 6047 */ 6048 /*ARGSUSED7*/ 6049 static int 6050 lxpr_create(struct vnode *dvp, char *nm, struct vattr *vap, 6051 enum vcexcl exclusive, int mode, struct vnode **vpp, struct cred *cred, 6052 int flag, caller_context_t *ct, vsecattr_t *vsecp) 6053 { 6054 lxpr_node_t *lxpnp = VTOLXP(dvp); 6055 lxpr_nodetype_t type = lxpnp->lxpr_type; 6056 vnode_t *vp = NULL; 6057 int error; 6058 6059 ASSERT(type < LXPR_NFILES); 6060 6061 /* 6062 * restrict create permission to owner or root 6063 */ 6064 if ((error = lxpr_access(dvp, VEXEC, 0, cred, ct)) != 0) { 6065 return (error); 6066 } 6067 6068 if (*nm == '\0') 6069 return (EPERM); 6070 6071 if (dvp->v_type != VDIR) 6072 return (EPERM); 6073 6074 if (exclusive == EXCL) 6075 return (EEXIST); 6076 6077 /* 6078 * We're currently restricting O_CREAT to: 6079 * - /proc/<pid>/fd/<num> 6080 * - /proc/<pid>/oom_score_adj 6081 * - /proc/<pid>/task/<tid>/fd/<num> 6082 * - /proc/<pid>/task/<tid>/oom_score_adj 6083 * - /proc/sys/kernel/core_pattern 6084 * - /proc/sys/net/core/somaxconn 6085 * - /proc/sys/vm/overcommit_memory 6086 * - /proc/sys/vm/swappiness 6087 */ 6088 switch (type) { 6089 case LXPR_PIDDIR: 6090 case LXPR_PID_TASK_IDDIR: 6091 if (strcmp(nm, "oom_score_adj") == 0) { 6092 proc_t *p; 6093 p = lxpr_lock(lxpnp->lxpr_pid); 6094 if (p != NULL) { 6095 vp = lxpr_lookup_common(dvp, nm, p, piddir, 6096 PIDDIRFILES); 6097 } 6098 lxpr_unlock(p); 6099 } 6100 break; 6101 6102 case LXPR_SYS_NET_COREDIR: 6103 if (strcmp(nm, "somaxconn") == 0) { 6104 vp = lxpr_lookup_common(dvp, nm, NULL, sys_net_coredir, 6105 SYS_NET_COREDIRFILES); 6106 } 6107 break; 6108 6109 case LXPR_SYS_KERNELDIR: 6110 if (strcmp(nm, "core_pattern") == 0) { 6111 vp = lxpr_lookup_common(dvp, nm, NULL, sys_kerneldir, 6112 SYS_KERNELDIRFILES); 6113 } 6114 break; 6115 6116 case LXPR_SYS_VMDIR: 6117 if (strcmp(nm, "overcommit_memory") == 0 || 6118 strcmp(nm, "swappiness") == 0) { 6119 vp = lxpr_lookup_common(dvp, nm, NULL, sys_vmdir, 6120 SYS_VMDIRFILES); 6121 } 6122 break; 6123 6124 case LXPR_PID_FDDIR: 6125 case LXPR_PID_TID_FDDIR: 6126 vp = lxpr_lookup_fdnode(dvp, nm); 6127 break; 6128 6129 default: 6130 vp = NULL; 6131 break; 6132 } 6133 6134 if (vp != NULL) { 6135 /* Creating an existing file, allow it for regular files. */ 6136 if (vp->v_type == VDIR) 6137 return (EISDIR); 6138 6139 /* confirm permissions against existing file */ 6140 if ((error = lxpr_access(vp, mode, 0, cred, ct)) != 0) { 6141 VN_RELE(vp); 6142 return (error); 6143 } 6144 6145 *vpp = vp; 6146 return (0); 6147 } 6148 6149 /* 6150 * Linux proc does not allow creation of addition, non-subsystem 6151 * specific files inside the hierarchy. ENOENT is tossed when such 6152 * actions are attempted. 6153 */ 6154 return (ENOENT); 6155 }