1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2016 Joyent, Inc.
25 */
26
27 /*
28 * lx_proc -- a Linux-compatible /proc for the LX brand
29 *
30 * We have -- confusingly -- two implementations of Linux /proc. One is to
31 * support native (but Linux-borne) programs that wish to view the native
32 * system through the Linux /proc model; the other -- this one -- is to
33 * support Linux binaries via the LX brand. These two implementations differ
34 * greatly in their aspirations (and their willingness to bend the truth
35 * of the system to accommodate those aspirations); they should not be unified.
36 */
37
38 #include <sys/cpupart.h>
39 #include <sys/cpuvar.h>
40 #include <sys/session.h>
41 #include <sys/vmparam.h>
42 #include <sys/mman.h>
43 #include <vm/rm.h>
44 #include <vm/seg_vn.h>
45 #include <sys/sdt.h>
46 #include <lx_signum.h>
47 #include <sys/strlog.h>
48 #include <sys/stropts.h>
49 #include <sys/cmn_err.h>
50 #include <sys/lx_brand.h>
51 #include <lx_auxv.h>
52 #include <sys/x86_archext.h>
53 #include <sys/archsystm.h>
54 #include <sys/fp.h>
55 #include <sys/pool_pset.h>
56 #include <sys/pset.h>
57 #include <sys/zone.h>
58 #include <sys/pghw.h>
59 #include <sys/vfs_opreg.h>
60 #include <sys/param.h>
61 #include <sys/utsname.h>
62 #include <sys/rctl.h>
63 #include <sys/kstat.h>
64 #include <sys/lx_misc.h>
65 #include <sys/brand.h>
66 #include <sys/cred_impl.h>
67 #include <sys/tihdr.h>
68 #include <sys/corectl.h>
69 #include <inet/ip.h>
70 #include <inet/ip_ire.h>
71 #include <inet/ip6.h>
72 #include <inet/ip_if.h>
73 #include <inet/tcp.h>
74 #include <inet/tcp_impl.h>
75 #include <inet/udp_impl.h>
76 #include <inet/ipclassifier.h>
77 #include <sys/socketvar.h>
78 #include <fs/sockfs/socktpi.h>
79
80 /* Dependent on procfs */
81 extern kthread_t *prchoose(proc_t *);
82 extern int prreadargv(proc_t *, char *, size_t, size_t *);
83 extern int prreadenvv(proc_t *, char *, size_t, size_t *);
84 extern int prreadbuf(proc_t *, uintptr_t, uint8_t *, size_t, size_t *);
85
86 #include "lx_proc.h"
87
88 extern pgcnt_t swapfs_minfree;
89 extern time_t boot_time;
90
91 /*
92 * Pointer to the vnode ops vector for this fs.
93 * This is instantiated in lxprinit() in lxpr_vfsops.c
94 */
95 vnodeops_t *lxpr_vnodeops;
96
97 static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *);
98 static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *,
99 caller_context_t *);
100 static int lxpr_create(struct vnode *, char *, struct vattr *, enum vcexcl,
101 int, struct vnode **, struct cred *, int, caller_context_t *, vsecattr_t *);
102 static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
103 static int lxpr_write(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
104 static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *,
105 caller_context_t *);
106 static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *);
107 static int lxpr_lookup(vnode_t *, char *, vnode_t **,
108 pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *,
109 pathname_t *);
110 static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *,
111 caller_context_t *, int);
112 static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
113 static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *);
114 static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *);
115 static int lxpr_sync(void);
116 static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *);
117
118 static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
119 static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
120 static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
121 static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
122 static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
123 static vnode_t *lxpr_lookup_sysdir(vnode_t *, char *);
124 static vnode_t *lxpr_lookup_sys_fsdir(vnode_t *, char *);
125 static vnode_t *lxpr_lookup_sys_fs_inotifydir(vnode_t *, char *);
126 static vnode_t *lxpr_lookup_sys_kerneldir(vnode_t *, char *);
127 static vnode_t *lxpr_lookup_sys_kdir_randdir(vnode_t *, char *);
128 static vnode_t *lxpr_lookup_sys_netdir(vnode_t *, char *);
129 static vnode_t *lxpr_lookup_sys_net_coredir(vnode_t *, char *);
130 static vnode_t *lxpr_lookup_sys_vmdir(vnode_t *, char *);
131 static vnode_t *lxpr_lookup_taskdir(vnode_t *, char *);
132 static vnode_t *lxpr_lookup_task_tid_dir(vnode_t *, char *);
133
134 static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
135 static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
136 static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
137 static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
138 static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
139 static int lxpr_readdir_sysdir(lxpr_node_t *, uio_t *, int *);
140 static int lxpr_readdir_sys_fsdir(lxpr_node_t *, uio_t *, int *);
141 static int lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *, uio_t *, int *);
142 static int lxpr_readdir_sys_kerneldir(lxpr_node_t *, uio_t *, int *);
143 static int lxpr_readdir_sys_kdir_randdir(lxpr_node_t *, uio_t *, int *);
144 static int lxpr_readdir_sys_netdir(lxpr_node_t *, uio_t *, int *);
145 static int lxpr_readdir_sys_net_coredir(lxpr_node_t *, uio_t *, int *);
146 static int lxpr_readdir_sys_vmdir(lxpr_node_t *, uio_t *, int *);
147 static int lxpr_readdir_taskdir(lxpr_node_t *, uio_t *, int *);
148 static int lxpr_readdir_task_tid_dir(lxpr_node_t *, uio_t *, int *);
149
150 static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
151 static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
152 static void lxpr_read_cgroups(lxpr_node_t *, lxpr_uiobuf_t *);
153 static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
154 static void lxpr_read_diskstats(lxpr_node_t *, lxpr_uiobuf_t *);
155 static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
156 static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
157 static void lxpr_read_filesystems(lxpr_node_t *, lxpr_uiobuf_t *);
158 static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *, ldi_handle_t);
159 static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
160 static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
161 static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
162 static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
163 static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
164 static void lxpr_read_swaps(lxpr_node_t *, lxpr_uiobuf_t *);
165 static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
166 static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
167
168 static void lxpr_read_pid_auxv(lxpr_node_t *, lxpr_uiobuf_t *);
169 static void lxpr_read_pid_cgroup(lxpr_node_t *, lxpr_uiobuf_t *);
170 static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
171 static void lxpr_read_pid_comm(lxpr_node_t *, lxpr_uiobuf_t *);
172 static void lxpr_read_pid_env(lxpr_node_t *, lxpr_uiobuf_t *);
173 static void lxpr_read_pid_limits(lxpr_node_t *, lxpr_uiobuf_t *);
174 static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
175 static void lxpr_read_pid_mountinfo(lxpr_node_t *, lxpr_uiobuf_t *);
176 static void lxpr_read_pid_oom_scr_adj(lxpr_node_t *, lxpr_uiobuf_t *);
177 static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
178 static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
179 static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
180
181 static void lxpr_read_pid_tid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
182 static void lxpr_read_pid_tid_status(lxpr_node_t *, lxpr_uiobuf_t *);
183
184 static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
185 static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
186 static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
187 static void lxpr_read_net_if_inet6(lxpr_node_t *, lxpr_uiobuf_t *);
188 static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
189 static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
190 static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
191 static void lxpr_read_net_ipv6_route(lxpr_node_t *, lxpr_uiobuf_t *);
192 static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
193 static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
194 static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
195 static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
196 static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
197 static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
198 static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
199 static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
200 static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
201 static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
202 static void lxpr_read_net_tcp6(lxpr_node_t *, lxpr_uiobuf_t *);
203 static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
204 static void lxpr_read_net_udp6(lxpr_node_t *, lxpr_uiobuf_t *);
205 static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
206 static void lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *,
207 lxpr_uiobuf_t *);
208 static void lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *,
209 lxpr_uiobuf_t *);
210 static void lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *,
211 lxpr_uiobuf_t *);
212 static void lxpr_read_sys_kernel_caplcap(lxpr_node_t *, lxpr_uiobuf_t *);
213 static void lxpr_read_sys_kernel_corepatt(lxpr_node_t *, lxpr_uiobuf_t *);
214 static void lxpr_read_sys_kernel_hostname(lxpr_node_t *, lxpr_uiobuf_t *);
215 static void lxpr_read_sys_kernel_msgmni(lxpr_node_t *, lxpr_uiobuf_t *);
216 static void lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *, lxpr_uiobuf_t *);
217 static void lxpr_read_sys_kernel_osrel(lxpr_node_t *, lxpr_uiobuf_t *);
218 static void lxpr_read_sys_kernel_pid_max(lxpr_node_t *, lxpr_uiobuf_t *);
219 static void lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *, lxpr_uiobuf_t *);
220 static void lxpr_read_sys_kernel_shmmax(lxpr_node_t *, lxpr_uiobuf_t *);
221 static void lxpr_read_sys_kernel_threads_max(lxpr_node_t *, lxpr_uiobuf_t *);
222 static void lxpr_read_sys_net_core_somaxc(lxpr_node_t *, lxpr_uiobuf_t *);
223 static void lxpr_read_sys_vm_minfr_kb(lxpr_node_t *, lxpr_uiobuf_t *);
224 static void lxpr_read_sys_vm_nhpages(lxpr_node_t *, lxpr_uiobuf_t *);
225 static void lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *, lxpr_uiobuf_t *);
226 static void lxpr_read_sys_vm_swappiness(lxpr_node_t *, lxpr_uiobuf_t *);
227
228 static int lxpr_write_sys_net_core_somaxc(lxpr_node_t *, uio_t *, cred_t *,
229 caller_context_t *);
230 static int lxpr_write_sys_kernel_corepatt(lxpr_node_t *, uio_t *, cred_t *,
231 caller_context_t *);
232
233 /*
234 * Simple conversion
235 */
236 #define btok(x) ((x) >> 10) /* bytes to kbytes */
237 #define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */
238
239 #define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t))
240
241 extern rctl_hndl_t rc_zone_msgmni;
242 extern rctl_hndl_t rc_zone_shmmax;
243 #define FOURGB 4294967295
244
245 /*
246 * The maximum length of the concatenation of argument vector strings we
247 * will return to the user via the branded procfs. Likewise for the env vector.
248 */
249 int lxpr_maxargvlen = 4096;
250 int lxpr_maxenvvlen = 4096;
251
252 /*
253 * The lx /proc vnode operations vector
254 */
255 const fs_operation_def_t lxpr_vnodeops_template[] = {
256 VOPNAME_OPEN, { .vop_open = lxpr_open },
257 VOPNAME_CLOSE, { .vop_close = lxpr_close },
258 VOPNAME_READ, { .vop_read = lxpr_read },
259 VOPNAME_WRITE, { .vop_read = lxpr_write },
260 VOPNAME_GETATTR, { .vop_getattr = lxpr_getattr },
261 VOPNAME_ACCESS, { .vop_access = lxpr_access },
262 VOPNAME_LOOKUP, { .vop_lookup = lxpr_lookup },
263 VOPNAME_CREATE, { .vop_create = lxpr_create },
264 VOPNAME_READDIR, { .vop_readdir = lxpr_readdir },
265 VOPNAME_READLINK, { .vop_readlink = lxpr_readlink },
266 VOPNAME_FSYNC, { .error = lxpr_sync },
267 VOPNAME_SEEK, { .error = lxpr_sync },
268 VOPNAME_INACTIVE, { .vop_inactive = lxpr_inactive },
269 VOPNAME_CMP, { .vop_cmp = lxpr_cmp },
270 VOPNAME_REALVP, { .vop_realvp = lxpr_realvp },
271 NULL, NULL
272 };
273
274
275 /*
276 * file contents of an lx /proc directory.
277 */
278 static lxpr_dirent_t lx_procdir[] = {
279 { LXPR_CGROUPS, "cgroups" },
280 { LXPR_CMDLINE, "cmdline" },
281 { LXPR_CPUINFO, "cpuinfo" },
282 { LXPR_DEVICES, "devices" },
283 { LXPR_DISKSTATS, "diskstats" },
284 { LXPR_DMA, "dma" },
285 { LXPR_FILESYSTEMS, "filesystems" },
286 { LXPR_INTERRUPTS, "interrupts" },
287 { LXPR_IOPORTS, "ioports" },
288 { LXPR_KCORE, "kcore" },
289 { LXPR_KMSG, "kmsg" },
290 { LXPR_LOADAVG, "loadavg" },
291 { LXPR_MEMINFO, "meminfo" },
292 { LXPR_MODULES, "modules" },
293 { LXPR_MOUNTS, "mounts" },
294 { LXPR_NETDIR, "net" },
295 { LXPR_PARTITIONS, "partitions" },
296 { LXPR_SELF, "self" },
297 { LXPR_STAT, "stat" },
298 { LXPR_SWAPS, "swaps" },
299 { LXPR_SYSDIR, "sys" },
300 { LXPR_UPTIME, "uptime" },
301 { LXPR_VERSION, "version" }
302 };
303
304 #define PROCDIRFILES (sizeof (lx_procdir) / sizeof (lx_procdir[0]))
305
306 /*
307 * Contents of an lx /proc/<pid> directory.
308 */
309 static lxpr_dirent_t piddir[] = {
310 { LXPR_PID_AUXV, "auxv" },
311 { LXPR_PID_CGROUP, "cgroup" },
312 { LXPR_PID_CMDLINE, "cmdline" },
313 { LXPR_PID_COMM, "comm" },
314 { LXPR_PID_CPU, "cpu" },
315 { LXPR_PID_CURDIR, "cwd" },
316 { LXPR_PID_ENV, "environ" },
317 { LXPR_PID_EXE, "exe" },
318 { LXPR_PID_LIMITS, "limits" },
319 { LXPR_PID_MAPS, "maps" },
320 { LXPR_PID_MEM, "mem" },
321 { LXPR_PID_MOUNTINFO, "mountinfo" },
322 { LXPR_PID_OOM_SCR_ADJ, "oom_score_adj" },
323 { LXPR_PID_ROOTDIR, "root" },
324 { LXPR_PID_STAT, "stat" },
325 { LXPR_PID_STATM, "statm" },
326 { LXPR_PID_STATUS, "status" },
327 { LXPR_PID_TASKDIR, "task" },
328 { LXPR_PID_FDDIR, "fd" }
329 };
330
331 #define PIDDIRFILES (sizeof (piddir) / sizeof (piddir[0]))
332
333 /*
334 * Contents of an lx /proc/<pid>/task/<tid> directory.
335 */
336 static lxpr_dirent_t tiddir[] = {
337 { LXPR_PID_TID_AUXV, "auxv" },
338 { LXPR_PID_CGROUP, "cgroup" },
339 { LXPR_PID_CMDLINE, "cmdline" },
340 { LXPR_PID_TID_COMM, "comm" },
341 { LXPR_PID_CPU, "cpu" },
342 { LXPR_PID_CURDIR, "cwd" },
343 { LXPR_PID_ENV, "environ" },
344 { LXPR_PID_EXE, "exe" },
345 { LXPR_PID_LIMITS, "limits" },
346 { LXPR_PID_MAPS, "maps" },
347 { LXPR_PID_MEM, "mem" },
348 { LXPR_PID_MOUNTINFO, "mountinfo" },
349 { LXPR_PID_TID_OOM_SCR_ADJ, "oom_score_adj" },
350 { LXPR_PID_ROOTDIR, "root" },
351 { LXPR_PID_TID_STAT, "stat" },
352 { LXPR_PID_STATM, "statm" },
353 { LXPR_PID_TID_STATUS, "status" },
354 { LXPR_PID_FDDIR, "fd" }
355 };
356
357 #define TIDDIRFILES (sizeof (tiddir) / sizeof (tiddir[0]))
358
359 #define LX_RLIM_INFINITY 0xFFFFFFFFFFFFFFFF
360
361 #define RCTL_INFINITE(x) \
362 ((x->rcv_flagaction & RCTL_LOCAL_MAXIMAL) && \
363 (x->rcv_flagaction & RCTL_GLOBAL_INFINITE))
364
365 typedef struct lxpr_rlimtab {
366 char *rlim_name; /* limit name */
367 char *rlim_unit; /* limit unit */
368 char *rlim_rctl; /* rctl source */
369 } lxpr_rlimtab_t;
370
371 static lxpr_rlimtab_t lxpr_rlimtab[] = {
372 { "Max cpu time", "seconds", "process.max-cpu-time" },
373 { "Max file size", "bytes", "process.max-file-size" },
374 { "Max data size", "bytes", "process.max-data-size" },
375 { "Max stack size", "bytes", "process.max-stack-size" },
376 { "Max core file size", "bytes", "process.max-core-size" },
377 { "Max resident set", "bytes", "zone.max-physical-memory" },
378 { "Max processes", "processes", "zone.max-lwps" },
379 { "Max open files", "files", "process.max-file-descriptor" },
380 { "Max locked memory", "bytes", "zone.max-locked-memory" },
381 { "Max address space", "bytes", "process.max-address-space" },
382 { "Max file locks", "locks", NULL },
383 { "Max pending signals", "signals",
384 "process.max-sigqueue-size" },
385 { "Max msgqueue size", "bytes", "process.max-msg-messages" },
386 { NULL, NULL, NULL }
387 };
388
389
390 /*
391 * contents of lx /proc/net directory
392 */
393 static lxpr_dirent_t netdir[] = {
394 { LXPR_NET_ARP, "arp" },
395 { LXPR_NET_DEV, "dev" },
396 { LXPR_NET_DEV_MCAST, "dev_mcast" },
397 { LXPR_NET_IF_INET6, "if_inet6" },
398 { LXPR_NET_IGMP, "igmp" },
399 { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" },
400 { LXPR_NET_IP_MR_VIF, "ip_mr_vif" },
401 { LXPR_NET_IPV6_ROUTE, "ipv6_route" },
402 { LXPR_NET_MCFILTER, "mcfilter" },
403 { LXPR_NET_NETSTAT, "netstat" },
404 { LXPR_NET_RAW, "raw" },
405 { LXPR_NET_ROUTE, "route" },
406 { LXPR_NET_RPC, "rpc" },
407 { LXPR_NET_RT_CACHE, "rt_cache" },
408 { LXPR_NET_SOCKSTAT, "sockstat" },
409 { LXPR_NET_SNMP, "snmp" },
410 { LXPR_NET_STAT, "stat" },
411 { LXPR_NET_TCP, "tcp" },
412 { LXPR_NET_TCP6, "tcp6" },
413 { LXPR_NET_UDP, "udp" },
414 { LXPR_NET_UDP6, "udp6" },
415 { LXPR_NET_UNIX, "unix" }
416 };
417
418 #define NETDIRFILES (sizeof (netdir) / sizeof (netdir[0]))
419
420 /*
421 * contents of /proc/sys directory
422 */
423 static lxpr_dirent_t sysdir[] = {
424 { LXPR_SYS_FSDIR, "fs" },
425 { LXPR_SYS_KERNELDIR, "kernel" },
426 { LXPR_SYS_NETDIR, "net" },
427 { LXPR_SYS_VMDIR, "vm" },
428 };
429
430 #define SYSDIRFILES (sizeof (sysdir) / sizeof (sysdir[0]))
431
432 /*
433 * contents of /proc/sys/fs directory
434 */
435 static lxpr_dirent_t sys_fsdir[] = {
436 { LXPR_SYS_FS_INOTIFYDIR, "inotify" },
437 };
438
439 #define SYS_FSDIRFILES (sizeof (sys_fsdir) / sizeof (sys_fsdir[0]))
440
441 /*
442 * contents of /proc/sys/fs/inotify directory
443 */
444 static lxpr_dirent_t sys_fs_inotifydir[] = {
445 { LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS, "max_queued_events" },
446 { LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES, "max_user_instances" },
447 { LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES, "max_user_watches" },
448 };
449
450 #define SYS_FS_INOTIFYDIRFILES \
451 (sizeof (sys_fs_inotifydir) / sizeof (sys_fs_inotifydir[0]))
452
453 /*
454 * contents of /proc/sys/kernel directory
455 */
456 static lxpr_dirent_t sys_kerneldir[] = {
457 { LXPR_SYS_KERNEL_CAPLCAP, "cap_last_cap" },
458 { LXPR_SYS_KERNEL_COREPATT, "core_pattern" },
459 { LXPR_SYS_KERNEL_HOSTNAME, "hostname" },
460 { LXPR_SYS_KERNEL_MSGMNI, "msgmni" },
461 { LXPR_SYS_KERNEL_NGROUPS_MAX, "ngroups_max" },
462 { LXPR_SYS_KERNEL_OSREL, "osrelease" },
463 { LXPR_SYS_KERNEL_PID_MAX, "pid_max" },
464 { LXPR_SYS_KERNEL_RANDDIR, "random" },
465 { LXPR_SYS_KERNEL_SHMMAX, "shmmax" },
466 { LXPR_SYS_KERNEL_THREADS_MAX, "threads-max" },
467 };
468
469 #define SYS_KERNELDIRFILES (sizeof (sys_kerneldir) / sizeof (sys_kerneldir[0]))
470
471 /*
472 * contents of /proc/sys/kernel/random directory
473 */
474 static lxpr_dirent_t sys_randdir[] = {
475 { LXPR_SYS_KERNEL_RAND_BOOTID, "boot_id" },
476 };
477
478 #define SYS_RANDDIRFILES (sizeof (sys_randdir) / sizeof (sys_randdir[0]))
479
480 /*
481 * contents of /proc/sys/net directory
482 */
483 static lxpr_dirent_t sys_netdir[] = {
484 { LXPR_SYS_NET_COREDIR, "core" },
485 };
486
487 #define SYS_NETDIRFILES (sizeof (sys_netdir) / sizeof (sys_netdir[0]))
488
489 /*
490 * contents of /proc/sys/net/core directory
491 */
492 static lxpr_dirent_t sys_net_coredir[] = {
493 { LXPR_SYS_NET_CORE_SOMAXCON, "somaxconn" },
494 };
495
496 #define SYS_NET_COREDIRFILES \
497 (sizeof (sys_net_coredir) / sizeof (sys_net_coredir[0]))
498
499 /*
500 * contents of /proc/sys/vm directory
501 */
502 static lxpr_dirent_t sys_vmdir[] = {
503 { LXPR_SYS_VM_MINFR_KB, "min_free_kbytes" },
504 { LXPR_SYS_VM_NHUGEP, "nr_hugepages" },
505 { LXPR_SYS_VM_OVERCOMMIT_MEM, "overcommit_memory" },
506 { LXPR_SYS_VM_SWAPPINESS, "swappiness" },
507 };
508
509 #define SYS_VMDIRFILES (sizeof (sys_vmdir) / sizeof (sys_vmdir[0]))
510
511 /*
512 * lxpr_open(): Vnode operation for VOP_OPEN()
513 */
514 static int
515 lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
516 {
517 vnode_t *vp = *vpp;
518 lxpr_node_t *lxpnp = VTOLXP(vp);
519 lxpr_nodetype_t type = lxpnp->lxpr_type;
520 vnode_t *rvp;
521 int error = 0;
522
523 if (flag & FWRITE) {
524 /* Restrict writes to certain files */
525 switch (type) {
526 case LXPR_PID_OOM_SCR_ADJ:
527 case LXPR_PID_TID_OOM_SCR_ADJ:
528 case LXPR_SYS_KERNEL_COREPATT:
529 case LXPR_SYS_NET_CORE_SOMAXCON:
530 case LXPR_SYS_VM_OVERCOMMIT_MEM:
531 case LXPR_SYS_VM_SWAPPINESS:
532 case LXPR_PID_FD_FD:
533 case LXPR_PID_TID_FD_FD:
534 break;
535 default:
536 return (EPERM);
537 }
538 }
539
540 /*
541 * If we are opening an underlying file only allow regular files,
542 * fifos or sockets; reject the open for anything else.
543 * Just do it if we are opening the current or root directory.
544 */
545 if (lxpnp->lxpr_realvp != NULL) {
546 rvp = lxpnp->lxpr_realvp;
547
548 if (type == LXPR_PID_FD_FD && rvp->v_type != VREG &&
549 rvp->v_type != VFIFO && rvp->v_type != VSOCK) {
550 error = EACCES;
551 } else {
552 if (type == LXPR_PID_FD_FD && rvp->v_type == VFIFO) {
553 /*
554 * This flag lets the fifo open know that
555 * we're using proc/fd to open a fd which we
556 * already have open. Otherwise, the fifo might
557 * reject an open if the other end has closed.
558 */
559 flag |= FKLYR;
560 }
561 /*
562 * Need to hold rvp since VOP_OPEN() may release it.
563 */
564 VN_HOLD(rvp);
565 error = VOP_OPEN(&rvp, flag, cr, ct);
566 if (error) {
567 VN_RELE(rvp);
568 } else {
569 *vpp = rvp;
570 VN_RELE(vp);
571 }
572 }
573 }
574
575 return (error);
576 }
577
578
579 /*
580 * lxpr_close(): Vnode operation for VOP_CLOSE()
581 */
582 /* ARGSUSED */
583 static int
584 lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
585 caller_context_t *ct)
586 {
587 lxpr_node_t *lxpr = VTOLXP(vp);
588 lxpr_nodetype_t type = lxpr->lxpr_type;
589
590 /*
591 * we should never get here because the close is done on the realvp
592 * for these nodes
593 */
594 ASSERT(type != LXPR_PID_FD_FD &&
595 type != LXPR_PID_CURDIR &&
596 type != LXPR_PID_ROOTDIR &&
597 type != LXPR_PID_EXE);
598
599 return (0);
600 }
601
602 static void (*lxpr_read_function[LXPR_NFILES])() = {
603 lxpr_read_isdir, /* /proc */
604 lxpr_read_isdir, /* /proc/<pid> */
605 lxpr_read_pid_auxv, /* /proc/<pid>/auxv */
606 lxpr_read_pid_cgroup, /* /proc/<pid>/cgroup */
607 lxpr_read_pid_cmdline, /* /proc/<pid>/cmdline */
608 lxpr_read_pid_comm, /* /proc/<pid>/comm */
609 lxpr_read_empty, /* /proc/<pid>/cpu */
610 lxpr_read_invalid, /* /proc/<pid>/cwd */
611 lxpr_read_pid_env, /* /proc/<pid>/environ */
612 lxpr_read_invalid, /* /proc/<pid>/exe */
613 lxpr_read_pid_limits, /* /proc/<pid>/limits */
614 lxpr_read_pid_maps, /* /proc/<pid>/maps */
615 lxpr_read_empty, /* /proc/<pid>/mem */
616 lxpr_read_pid_mountinfo, /* /proc/<pid>/mountinfo */
617 lxpr_read_pid_oom_scr_adj, /* /proc/<pid>/oom_score_adj */
618 lxpr_read_invalid, /* /proc/<pid>/root */
619 lxpr_read_pid_stat, /* /proc/<pid>/stat */
620 lxpr_read_pid_statm, /* /proc/<pid>/statm */
621 lxpr_read_pid_status, /* /proc/<pid>/status */
622 lxpr_read_isdir, /* /proc/<pid>/task */
623 lxpr_read_isdir, /* /proc/<pid>/task/nn */
624 lxpr_read_isdir, /* /proc/<pid>/fd */
625 lxpr_read_fd, /* /proc/<pid>/fd/nn */
626 lxpr_read_pid_auxv, /* /proc/<pid>/task/<tid>/auxv */
627 lxpr_read_pid_cgroup, /* /proc/<pid>/task/<tid>/cgroup */
628 lxpr_read_pid_cmdline, /* /proc/<pid>/task/<tid>/cmdline */
629 lxpr_read_pid_comm, /* /proc/<pid>/task/<tid>/comm */
630 lxpr_read_empty, /* /proc/<pid>/task/<tid>/cpu */
631 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/cwd */
632 lxpr_read_pid_env, /* /proc/<pid>/task/<tid>/environ */
633 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/exe */
634 lxpr_read_pid_limits, /* /proc/<pid>/task/<tid>/limits */
635 lxpr_read_pid_maps, /* /proc/<pid>/task/<tid>/maps */
636 lxpr_read_empty, /* /proc/<pid>/task/<tid>/mem */
637 lxpr_read_pid_mountinfo, /* /proc/<pid>/task/<tid>/mountinfo */
638 lxpr_read_pid_oom_scr_adj, /* /proc/<pid>/task/<tid>/oom_scr_adj */
639 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/root */
640 lxpr_read_pid_tid_stat, /* /proc/<pid>/task/<tid>/stat */
641 lxpr_read_pid_statm, /* /proc/<pid>/task/<tid>/statm */
642 lxpr_read_pid_tid_status, /* /proc/<pid>/task/<tid>/status */
643 lxpr_read_isdir, /* /proc/<pid>/task/<tid>/fd */
644 lxpr_read_fd, /* /proc/<pid>/task/<tid>/fd/nn */
645 lxpr_read_cgroups, /* /proc/cgroups */
646 lxpr_read_empty, /* /proc/cmdline */
647 lxpr_read_cpuinfo, /* /proc/cpuinfo */
648 lxpr_read_empty, /* /proc/devices */
649 lxpr_read_diskstats, /* /proc/diskstats */
650 lxpr_read_empty, /* /proc/dma */
651 lxpr_read_filesystems, /* /proc/filesystems */
652 lxpr_read_empty, /* /proc/interrupts */
653 lxpr_read_empty, /* /proc/ioports */
654 lxpr_read_empty, /* /proc/kcore */
655 lxpr_read_invalid, /* /proc/kmsg -- see lxpr_read() */
656 lxpr_read_loadavg, /* /proc/loadavg */
657 lxpr_read_meminfo, /* /proc/meminfo */
658 lxpr_read_empty, /* /proc/modules */
659 lxpr_read_mounts, /* /proc/mounts */
660 lxpr_read_isdir, /* /proc/net */
661 lxpr_read_net_arp, /* /proc/net/arp */
662 lxpr_read_net_dev, /* /proc/net/dev */
663 lxpr_read_net_dev_mcast, /* /proc/net/dev_mcast */
664 lxpr_read_net_if_inet6, /* /proc/net/if_inet6 */
665 lxpr_read_net_igmp, /* /proc/net/igmp */
666 lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */
667 lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */
668 lxpr_read_net_ipv6_route, /* /proc/net/ipv6_route */
669 lxpr_read_net_mcfilter, /* /proc/net/mcfilter */
670 lxpr_read_net_netstat, /* /proc/net/netstat */
671 lxpr_read_net_raw, /* /proc/net/raw */
672 lxpr_read_net_route, /* /proc/net/route */
673 lxpr_read_net_rpc, /* /proc/net/rpc */
674 lxpr_read_net_rt_cache, /* /proc/net/rt_cache */
675 lxpr_read_net_sockstat, /* /proc/net/sockstat */
676 lxpr_read_net_snmp, /* /proc/net/snmp */
677 lxpr_read_net_stat, /* /proc/net/stat */
678 lxpr_read_net_tcp, /* /proc/net/tcp */
679 lxpr_read_net_tcp6, /* /proc/net/tcp6 */
680 lxpr_read_net_udp, /* /proc/net/udp */
681 lxpr_read_net_udp6, /* /proc/net/udp6 */
682 lxpr_read_net_unix, /* /proc/net/unix */
683 lxpr_read_partitions, /* /proc/partitions */
684 lxpr_read_invalid, /* /proc/self */
685 lxpr_read_stat, /* /proc/stat */
686 lxpr_read_swaps, /* /proc/swaps */
687 lxpr_read_invalid, /* /proc/sys */
688 lxpr_read_invalid, /* /proc/sys/fs */
689 lxpr_read_invalid, /* /proc/sys/fs/inotify */
690 lxpr_read_sys_fs_inotify_max_queued_events, /* max_queued_events */
691 lxpr_read_sys_fs_inotify_max_user_instances, /* max_user_instances */
692 lxpr_read_sys_fs_inotify_max_user_watches, /* max_user_watches */
693 lxpr_read_invalid, /* /proc/sys/kernel */
694 lxpr_read_sys_kernel_caplcap, /* /proc/sys/kernel/cap_last_cap */
695 lxpr_read_sys_kernel_corepatt, /* /proc/sys/kernel/core_pattern */
696 lxpr_read_sys_kernel_hostname, /* /proc/sys/kernel/hostname */
697 lxpr_read_sys_kernel_msgmni, /* /proc/sys/kernel/msgmni */
698 lxpr_read_sys_kernel_ngroups_max, /* /proc/sys/kernel/ngroups_max */
699 lxpr_read_sys_kernel_osrel, /* /proc/sys/kernel/osrelease */
700 lxpr_read_sys_kernel_pid_max, /* /proc/sys/kernel/pid_max */
701 lxpr_read_invalid, /* /proc/sys/kernel/random */
702 lxpr_read_sys_kernel_rand_bootid, /* /proc/sys/kernel/random/boot_id */
703 lxpr_read_sys_kernel_shmmax, /* /proc/sys/kernel/shmmax */
704 lxpr_read_sys_kernel_threads_max, /* /proc/sys/kernel/threads-max */
705 lxpr_read_invalid, /* /proc/sys/net */
706 lxpr_read_invalid, /* /proc/sys/net/core */
707 lxpr_read_sys_net_core_somaxc, /* /proc/sys/net/core/somaxconn */
708 lxpr_read_invalid, /* /proc/sys/vm */
709 lxpr_read_sys_vm_minfr_kb, /* /proc/sys/vm/min_free_kbytes */
710 lxpr_read_sys_vm_nhpages, /* /proc/sys/vm/nr_hugepages */
711 lxpr_read_sys_vm_overcommit_mem, /* /proc/sys/vm/overcommit_memory */
712 lxpr_read_sys_vm_swappiness, /* /proc/sys/vm/swappiness */
713 lxpr_read_uptime, /* /proc/uptime */
714 lxpr_read_version, /* /proc/version */
715 };
716
717 /*
718 * Array of lookup functions, indexed by lx /proc file type.
719 */
720 static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
721 lxpr_lookup_procdir, /* /proc */
722 lxpr_lookup_piddir, /* /proc/<pid> */
723 lxpr_lookup_not_a_dir, /* /proc/<pid>/auxv */
724 lxpr_lookup_not_a_dir, /* /proc/<pid>/cgroup */
725 lxpr_lookup_not_a_dir, /* /proc/<pid>/cmdline */
726 lxpr_lookup_not_a_dir, /* /proc/<pid>/comm */
727 lxpr_lookup_not_a_dir, /* /proc/<pid>/cpu */
728 lxpr_lookup_not_a_dir, /* /proc/<pid>/cwd */
729 lxpr_lookup_not_a_dir, /* /proc/<pid>/environ */
730 lxpr_lookup_not_a_dir, /* /proc/<pid>/exe */
731 lxpr_lookup_not_a_dir, /* /proc/<pid>/limits */
732 lxpr_lookup_not_a_dir, /* /proc/<pid>/maps */
733 lxpr_lookup_not_a_dir, /* /proc/<pid>/mem */
734 lxpr_lookup_not_a_dir, /* /proc/<pid>/mountinfo */
735 lxpr_lookup_not_a_dir, /* /proc/<pid>/oom_score_adj */
736 lxpr_lookup_not_a_dir, /* /proc/<pid>/root */
737 lxpr_lookup_not_a_dir, /* /proc/<pid>/stat */
738 lxpr_lookup_not_a_dir, /* /proc/<pid>/statm */
739 lxpr_lookup_not_a_dir, /* /proc/<pid>/status */
740 lxpr_lookup_taskdir, /* /proc/<pid>/task */
741 lxpr_lookup_task_tid_dir, /* /proc/<pid>/task/nn */
742 lxpr_lookup_fddir, /* /proc/<pid>/fd */
743 lxpr_lookup_not_a_dir, /* /proc/<pid>/fd/nn */
744 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/auxv */
745 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cgroup */
746 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cmdline */
747 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/comm */
748 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cpu */
749 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cwd */
750 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/environ */
751 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/exe */
752 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/limits */
753 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/maps */
754 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/mem */
755 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/mountinfo */
756 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/oom_scr_adj */
757 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/root */
758 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/stat */
759 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/statm */
760 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/status */
761 lxpr_lookup_fddir, /* /proc/<pid>/task/<tid>/fd */
762 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/fd/nn */
763 lxpr_lookup_not_a_dir, /* /proc/cgroups */
764 lxpr_lookup_not_a_dir, /* /proc/cmdline */
765 lxpr_lookup_not_a_dir, /* /proc/cpuinfo */
766 lxpr_lookup_not_a_dir, /* /proc/devices */
767 lxpr_lookup_not_a_dir, /* /proc/diskstats */
768 lxpr_lookup_not_a_dir, /* /proc/dma */
769 lxpr_lookup_not_a_dir, /* /proc/filesystems */
770 lxpr_lookup_not_a_dir, /* /proc/interrupts */
771 lxpr_lookup_not_a_dir, /* /proc/ioports */
772 lxpr_lookup_not_a_dir, /* /proc/kcore */
773 lxpr_lookup_not_a_dir, /* /proc/kmsg */
774 lxpr_lookup_not_a_dir, /* /proc/loadavg */
775 lxpr_lookup_not_a_dir, /* /proc/meminfo */
776 lxpr_lookup_not_a_dir, /* /proc/modules */
777 lxpr_lookup_not_a_dir, /* /proc/mounts */
778 lxpr_lookup_netdir, /* /proc/net */
779 lxpr_lookup_not_a_dir, /* /proc/net/arp */
780 lxpr_lookup_not_a_dir, /* /proc/net/dev */
781 lxpr_lookup_not_a_dir, /* /proc/net/dev_mcast */
782 lxpr_lookup_not_a_dir, /* /proc/net/if_inet6 */
783 lxpr_lookup_not_a_dir, /* /proc/net/igmp */
784 lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */
785 lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */
786 lxpr_lookup_not_a_dir, /* /proc/net/ipv6_route */
787 lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */
788 lxpr_lookup_not_a_dir, /* /proc/net/netstat */
789 lxpr_lookup_not_a_dir, /* /proc/net/raw */
790 lxpr_lookup_not_a_dir, /* /proc/net/route */
791 lxpr_lookup_not_a_dir, /* /proc/net/rpc */
792 lxpr_lookup_not_a_dir, /* /proc/net/rt_cache */
793 lxpr_lookup_not_a_dir, /* /proc/net/sockstat */
794 lxpr_lookup_not_a_dir, /* /proc/net/snmp */
795 lxpr_lookup_not_a_dir, /* /proc/net/stat */
796 lxpr_lookup_not_a_dir, /* /proc/net/tcp */
797 lxpr_lookup_not_a_dir, /* /proc/net/tcp6 */
798 lxpr_lookup_not_a_dir, /* /proc/net/udp */
799 lxpr_lookup_not_a_dir, /* /proc/net/udp6 */
800 lxpr_lookup_not_a_dir, /* /proc/net/unix */
801 lxpr_lookup_not_a_dir, /* /proc/partitions */
802 lxpr_lookup_not_a_dir, /* /proc/self */
803 lxpr_lookup_not_a_dir, /* /proc/stat */
804 lxpr_lookup_not_a_dir, /* /proc/swaps */
805 lxpr_lookup_sysdir, /* /proc/sys */
806 lxpr_lookup_sys_fsdir, /* /proc/sys/fs */
807 lxpr_lookup_sys_fs_inotifydir, /* /proc/sys/fs/inotify */
808 lxpr_lookup_not_a_dir, /* .../inotify/max_queued_events */
809 lxpr_lookup_not_a_dir, /* .../inotify/max_user_instances */
810 lxpr_lookup_not_a_dir, /* .../inotify/max_user_watches */
811 lxpr_lookup_sys_kerneldir, /* /proc/sys/kernel */
812 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/cap_last_cap */
813 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/core_pattern */
814 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/hostname */
815 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/msgmni */
816 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/ngroups_max */
817 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/osrelease */
818 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/pid_max */
819 lxpr_lookup_sys_kdir_randdir, /* /proc/sys/kernel/random */
820 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/random/boot_id */
821 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/shmmax */
822 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/threads-max */
823 lxpr_lookup_sys_netdir, /* /proc/sys/net */
824 lxpr_lookup_sys_net_coredir, /* /proc/sys/net/core */
825 lxpr_lookup_not_a_dir, /* /proc/sys/net/core/somaxconn */
826 lxpr_lookup_sys_vmdir, /* /proc/sys/vm */
827 lxpr_lookup_not_a_dir, /* /proc/sys/vm/min_free_kbytes */
828 lxpr_lookup_not_a_dir, /* /proc/sys/vm/nr_hugepages */
829 lxpr_lookup_not_a_dir, /* /proc/sys/vm/overcommit_memory */
830 lxpr_lookup_not_a_dir, /* /proc/sys/vm/swappiness */
831 lxpr_lookup_not_a_dir, /* /proc/uptime */
832 lxpr_lookup_not_a_dir, /* /proc/version */
833 };
834
835 /*
836 * Array of readdir functions, indexed by /proc file type.
837 */
838 static int (*lxpr_readdir_function[LXPR_NFILES])() = {
839 lxpr_readdir_procdir, /* /proc */
840 lxpr_readdir_piddir, /* /proc/<pid> */
841 lxpr_readdir_not_a_dir, /* /proc/<pid>/auxv */
842 lxpr_readdir_not_a_dir, /* /proc/<pid>/cgroup */
843 lxpr_readdir_not_a_dir, /* /proc/<pid>/cmdline */
844 lxpr_readdir_not_a_dir, /* /proc/<pid>/comm */
845 lxpr_readdir_not_a_dir, /* /proc/<pid>/cpu */
846 lxpr_readdir_not_a_dir, /* /proc/<pid>/cwd */
847 lxpr_readdir_not_a_dir, /* /proc/<pid>/environ */
848 lxpr_readdir_not_a_dir, /* /proc/<pid>/exe */
849 lxpr_readdir_not_a_dir, /* /proc/<pid>/limits */
850 lxpr_readdir_not_a_dir, /* /proc/<pid>/maps */
851 lxpr_readdir_not_a_dir, /* /proc/<pid>/mem */
852 lxpr_readdir_not_a_dir, /* /proc/<pid>/mountinfo */
853 lxpr_readdir_not_a_dir, /* /proc/<pid>/oom_score_adj */
854 lxpr_readdir_not_a_dir, /* /proc/<pid>/root */
855 lxpr_readdir_not_a_dir, /* /proc/<pid>/stat */
856 lxpr_readdir_not_a_dir, /* /proc/<pid>/statm */
857 lxpr_readdir_not_a_dir, /* /proc/<pid>/status */
858 lxpr_readdir_taskdir, /* /proc/<pid>/task */
859 lxpr_readdir_task_tid_dir, /* /proc/<pid>/task/nn */
860 lxpr_readdir_fddir, /* /proc/<pid>/fd */
861 lxpr_readdir_not_a_dir, /* /proc/<pid>/fd/nn */
862 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/auxv */
863 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cgroup */
864 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cmdline */
865 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/comm */
866 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cpu */
867 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cwd */
868 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/environ */
869 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/exe */
870 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/limits */
871 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/maps */
872 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/mem */
873 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/mountinfo */
874 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid/oom_scr_adj */
875 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/root */
876 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/stat */
877 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/statm */
878 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/status */
879 lxpr_readdir_fddir, /* /proc/<pid>/task/<tid>/fd */
880 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/fd/nn */
881 lxpr_readdir_not_a_dir, /* /proc/cgroups */
882 lxpr_readdir_not_a_dir, /* /proc/cmdline */
883 lxpr_readdir_not_a_dir, /* /proc/cpuinfo */
884 lxpr_readdir_not_a_dir, /* /proc/devices */
885 lxpr_readdir_not_a_dir, /* /proc/diskstats */
886 lxpr_readdir_not_a_dir, /* /proc/dma */
887 lxpr_readdir_not_a_dir, /* /proc/filesystems */
888 lxpr_readdir_not_a_dir, /* /proc/interrupts */
889 lxpr_readdir_not_a_dir, /* /proc/ioports */
890 lxpr_readdir_not_a_dir, /* /proc/kcore */
891 lxpr_readdir_not_a_dir, /* /proc/kmsg */
892 lxpr_readdir_not_a_dir, /* /proc/loadavg */
893 lxpr_readdir_not_a_dir, /* /proc/meminfo */
894 lxpr_readdir_not_a_dir, /* /proc/modules */
895 lxpr_readdir_not_a_dir, /* /proc/mounts */
896 lxpr_readdir_netdir, /* /proc/net */
897 lxpr_readdir_not_a_dir, /* /proc/net/arp */
898 lxpr_readdir_not_a_dir, /* /proc/net/dev */
899 lxpr_readdir_not_a_dir, /* /proc/net/dev_mcast */
900 lxpr_readdir_not_a_dir, /* /proc/net/if_inet6 */
901 lxpr_readdir_not_a_dir, /* /proc/net/igmp */
902 lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */
903 lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */
904 lxpr_readdir_not_a_dir, /* /proc/net/ipv6_route */
905 lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */
906 lxpr_readdir_not_a_dir, /* /proc/net/netstat */
907 lxpr_readdir_not_a_dir, /* /proc/net/raw */
908 lxpr_readdir_not_a_dir, /* /proc/net/route */
909 lxpr_readdir_not_a_dir, /* /proc/net/rpc */
910 lxpr_readdir_not_a_dir, /* /proc/net/rt_cache */
911 lxpr_readdir_not_a_dir, /* /proc/net/sockstat */
912 lxpr_readdir_not_a_dir, /* /proc/net/snmp */
913 lxpr_readdir_not_a_dir, /* /proc/net/stat */
914 lxpr_readdir_not_a_dir, /* /proc/net/tcp */
915 lxpr_readdir_not_a_dir, /* /proc/net/tcp6 */
916 lxpr_readdir_not_a_dir, /* /proc/net/udp */
917 lxpr_readdir_not_a_dir, /* /proc/net/udp6 */
918 lxpr_readdir_not_a_dir, /* /proc/net/unix */
919 lxpr_readdir_not_a_dir, /* /proc/partitions */
920 lxpr_readdir_not_a_dir, /* /proc/self */
921 lxpr_readdir_not_a_dir, /* /proc/stat */
922 lxpr_readdir_not_a_dir, /* /proc/swaps */
923 lxpr_readdir_sysdir, /* /proc/sys */
924 lxpr_readdir_sys_fsdir, /* /proc/sys/fs */
925 lxpr_readdir_sys_fs_inotifydir, /* /proc/sys/fs/inotify */
926 lxpr_readdir_not_a_dir, /* .../inotify/max_queued_events */
927 lxpr_readdir_not_a_dir, /* .../inotify/max_user_instances */
928 lxpr_readdir_not_a_dir, /* .../inotify/max_user_watches */
929 lxpr_readdir_sys_kerneldir, /* /proc/sys/kernel */
930 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/cap_last_cap */
931 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/core_pattern */
932 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/hostname */
933 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/msgmni */
934 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/ngroups_max */
935 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/osrelease */
936 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/pid_max */
937 lxpr_readdir_sys_kdir_randdir, /* /proc/sys/kernel/random */
938 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/random/boot_id */
939 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/shmmax */
940 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/threads-max */
941 lxpr_readdir_sys_netdir, /* /proc/sys/net */
942 lxpr_readdir_sys_net_coredir, /* /proc/sys/net/core */
943 lxpr_readdir_not_a_dir, /* /proc/sys/net/core/somaxconn */
944 lxpr_readdir_sys_vmdir, /* /proc/sys/vm */
945 lxpr_readdir_not_a_dir, /* /proc/sys/vm/min_free_kbytes */
946 lxpr_readdir_not_a_dir, /* /proc/sys/vm/nr_hugepages */
947 lxpr_readdir_not_a_dir, /* /proc/sys/vm/overcommit_memory */
948 lxpr_readdir_not_a_dir, /* /proc/sys/vm/swappiness */
949 lxpr_readdir_not_a_dir, /* /proc/uptime */
950 lxpr_readdir_not_a_dir, /* /proc/version */
951 };
952
953
954 /*
955 * lxpr_read(): Vnode operation for VOP_READ()
956 *
957 * As the format of all the files that can be read in the lx procfs is human
958 * readable and not binary structures there do not have to be different
959 * read variants depending on whether the reading process model is 32 or 64 bits
960 * (at least in general, and certainly the difference is unlikely to be enough
961 * to justify have different routines for 32 and 64 bit reads
962 */
963 /* ARGSUSED */
964 static int
965 lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
966 caller_context_t *ct)
967 {
968 lxpr_node_t *lxpnp = VTOLXP(vp);
969 lxpr_nodetype_t type = lxpnp->lxpr_type;
970 lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
971 int error;
972
973 ASSERT(type < LXPR_NFILES);
974
975 if (type == LXPR_KMSG) {
976 ldi_ident_t li = VTOLXPM(vp)->lxprm_li;
977 ldi_handle_t ldih;
978 struct strioctl str;
979 int rv;
980
981 /*
982 * Open the zone's console device using the layered driver
983 * interface.
984 */
985 if ((error =
986 ldi_open_by_name("/dev/log", FREAD, cr, &ldih, li)) != 0)
987 return (error);
988
989 /*
990 * Send an ioctl to the underlying console device, letting it
991 * know we're interested in getting console messages.
992 */
993 str.ic_cmd = I_CONSLOG;
994 str.ic_timout = 0;
995 str.ic_len = 0;
996 str.ic_dp = NULL;
997 if ((error = ldi_ioctl(ldih, I_STR,
998 (intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
999 return (error);
1000
1001 lxpr_read_kmsg(lxpnp, uiobuf, ldih);
1002
1003 if ((error = ldi_close(ldih, FREAD, cr)) != 0)
1004 return (error);
1005 } else {
1006 lxpr_read_function[type](lxpnp, uiobuf);
1007 }
1008
1009 error = lxpr_uiobuf_flush(uiobuf);
1010 lxpr_uiobuf_free(uiobuf);
1011
1012 return (error);
1013 }
1014
1015 /*
1016 * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
1017 *
1018 * Various special case reads:
1019 * - trying to read a directory
1020 * - invalid file (used to mean a file that should be implemented,
1021 * but isn't yet)
1022 * - empty file
1023 * - wait to be able to read a file that will never have anything to read
1024 */
1025 /* ARGSUSED */
1026 static void
1027 lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1028 {
1029 lxpr_uiobuf_seterr(uiobuf, EISDIR);
1030 }
1031
1032 /* ARGSUSED */
1033 static void
1034 lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1035 {
1036 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1037 }
1038
1039 /* ARGSUSED */
1040 static void
1041 lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1042 {
1043 }
1044
1045 /*
1046 * lxpr_read_pid_auxv(): read process aux vector
1047 */
1048 static void
1049 lxpr_read_pid_auxv(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1050 {
1051 proc_t *p;
1052 lx_proc_data_t *pd;
1053 lx_elf_data_t *edp = NULL;
1054 int i, cnt;
1055
1056 ASSERT(lxpnp->lxpr_type == LXPR_PID_AUXV ||
1057 lxpnp->lxpr_type == LXPR_PID_TID_AUXV);
1058
1059 p = lxpr_lock(lxpnp->lxpr_pid);
1060
1061 if (p == NULL) {
1062 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1063 return;
1064 }
1065 if ((pd = ptolxproc(p)) == NULL) {
1066 /* Emit a single AT_NULL record for non-branded processes */
1067 auxv_t buf;
1068
1069 bzero(&buf, sizeof (buf));
1070 lxpr_unlock(p);
1071 lxpr_uiobuf_write(uiobuf, (char *)&buf, sizeof (buf));
1072 return;
1073 } else {
1074 edp = &pd->l_elf_data;
1075 }
1076
1077 if (p->p_model == DATAMODEL_NATIVE) {
1078 auxv_t buf[__KERN_NAUXV_IMPL];
1079
1080 /*
1081 * Because a_type is only of size int (not long), the buffer
1082 * contents must be zeroed first to ensure cleanliness.
1083 */
1084 bzero(buf, sizeof (buf));
1085 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) {
1086 if (lx_auxv_stol(&p->p_user.u_auxv[i],
1087 &buf[cnt], edp) == 0) {
1088 cnt++;
1089 }
1090 if (p->p_user.u_auxv[i].a_type == AT_NULL) {
1091 break;
1092 }
1093 }
1094 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0]));
1095 lxpr_unlock(p);
1096 }
1097 #if defined(_SYSCALL32_IMPL)
1098 else {
1099 auxv32_t buf[__KERN_NAUXV_IMPL];
1100
1101 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) {
1102 auxv_t temp;
1103
1104 if (lx_auxv_stol(&p->p_user.u_auxv[i],
1105 &temp, edp) == 0) {
1106 buf[cnt].a_type = (int)temp.a_type;
1107 buf[cnt].a_un.a_val = (int)temp.a_un.a_val;
1108 cnt++;
1109 }
1110 if (p->p_user.u_auxv[i].a_type == AT_NULL) {
1111 break;
1112 }
1113 }
1114 lxpr_unlock(p);
1115 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0]));
1116 }
1117 #endif /* defined(_SYSCALL32_IMPL) */
1118 }
1119
1120 /*
1121 * lxpr_read_pid_cgroup(): read cgroups for process
1122 */
1123 static void
1124 lxpr_read_pid_cgroup(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1125 {
1126 proc_t *p;
1127
1128 ASSERT(lxpnp->lxpr_type == LXPR_PID_CGROUP ||
1129 lxpnp->lxpr_type == LXPR_PID_TID_CGROUP);
1130
1131 p = lxpr_lock(lxpnp->lxpr_pid);
1132 if (p == NULL) {
1133 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1134 return;
1135 }
1136
1137 /* basic stub, 3rd field will need to be populated */
1138 lxpr_uiobuf_printf(uiobuf, "1:name=systemd:/\n");
1139
1140 lxpr_unlock(p);
1141 }
1142
1143 static void
1144 lxpr_copy_cmdline(proc_t *p, lx_proc_data_t *pd, lxpr_uiobuf_t *uiobuf)
1145 {
1146 uio_t *uiop = uiobuf->uiop;
1147 char *buf = uiobuf->buffer;
1148 int bsz = uiobuf->buffsize;
1149 boolean_t env_overflow = B_FALSE;
1150 uintptr_t pos = pd->l_args_start + uiop->uio_offset;
1151 uintptr_t estart = pd->l_envs_start;
1152 uintptr_t eend = pd->l_envs_end;
1153 size_t chunk, copied;
1154 int err = 0;
1155
1156 /* Do not bother with data beyond the end of the envp strings area. */
1157 if (pos > eend) {
1158 return;
1159 }
1160 mutex_exit(&p->p_lock);
1161
1162 /*
1163 * If the starting or ending bounds are outside the argv strings area,
1164 * check to see if the process has overwritten the terminating NULL.
1165 * If not, no data needs to be copied from oustide the argv area.
1166 */
1167 if (pos >= estart || (pos + uiop->uio_resid) >= estart) {
1168 uint8_t term;
1169 if (uread(p, &term, sizeof (term), estart - 1) != 0) {
1170 err = EFAULT;
1171 } else if (term != 0) {
1172 env_overflow = B_TRUE;
1173 }
1174 }
1175
1176
1177 /* Data between astart and estart-1 can be copied freely. */
1178 while (pos < estart && uiop->uio_resid > 0 && err == 0) {
1179 chunk = MIN(estart - pos, uiop->uio_resid);
1180 chunk = MIN(chunk, bsz);
1181
1182 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) != 0 ||
1183 copied != chunk) {
1184 err = EFAULT;
1185 break;
1186 }
1187 err = uiomove(buf, copied, UIO_READ, uiop);
1188 pos += copied;
1189 }
1190
1191 /*
1192 * Onward from estart, data is copied as a contiguous string. To
1193 * protect env data from potential snooping, only one buffer-sized copy
1194 * is allowed to avoid complex seek logic.
1195 */
1196 if (err == 0 && env_overflow && pos == estart && uiop->uio_resid > 0) {
1197 chunk = MIN(eend - pos, uiop->uio_resid);
1198 chunk = MIN(chunk, bsz);
1199 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) == 0) {
1200 int len = strnlen(buf, copied);
1201 if (len > 0) {
1202 err = uiomove(buf, len, UIO_READ, uiop);
1203 }
1204 }
1205 }
1206
1207 uiobuf->error = err;
1208 /* reset any uiobuf state */
1209 uiobuf->pos = uiobuf->buffer;
1210 uiobuf->beg = 0;
1211
1212 mutex_enter(&p->p_lock);
1213 }
1214
1215 /*
1216 * lxpr_read_pid_cmdline(): read argument vector from process
1217 */
1218 static void
1219 lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1220 {
1221 proc_t *p;
1222 char *buf;
1223 size_t asz = lxpr_maxargvlen, sz;
1224 lx_proc_data_t *pd;
1225
1226 ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE ||
1227 lxpnp->lxpr_type == LXPR_PID_TID_CMDLINE);
1228
1229 buf = kmem_alloc(asz, KM_SLEEP);
1230
1231 p = lxpr_lock(lxpnp->lxpr_pid);
1232 if (p == NULL) {
1233 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1234 kmem_free(buf, asz);
1235 return;
1236 }
1237
1238 if ((pd = ptolxproc(p)) != NULL && pd->l_args_start != 0 &&
1239 pd->l_envs_start != 0 && pd->l_envs_end != 0) {
1240 /* Use Linux-style argv bounds if possible. */
1241 lxpr_copy_cmdline(p, pd, uiobuf);
1242 } else {
1243 if (prreadargv(p, buf, asz, &sz) != 0) {
1244 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1245 } else {
1246 lxpr_uiobuf_write(uiobuf, buf, sz);
1247 }
1248 }
1249
1250 lxpr_unlock(p);
1251 kmem_free(buf, asz);
1252 }
1253
1254 /*
1255 * lxpr_read_pid_comm(): read command from process
1256 */
1257 static void
1258 lxpr_read_pid_comm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1259 {
1260 proc_t *p;
1261
1262 VERIFY(lxpnp->lxpr_type == LXPR_PID_COMM ||
1263 lxpnp->lxpr_type == LXPR_PID_TID_COMM);
1264
1265 /*
1266 * Because prctl(PR_SET_NAME) does not set custom names for threads
1267 * (vs processes), there is no need for special handling here.
1268 */
1269 if ((p = lxpr_lock(lxpnp->lxpr_pid)) == NULL) {
1270 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1271 return;
1272 }
1273 lxpr_uiobuf_printf(uiobuf, "%s\n", p->p_user.u_comm);
1274 lxpr_unlock(p);
1275 }
1276
1277 /*
1278 * lxpr_read_pid_env(): read env vector from process
1279 */
1280 static void
1281 lxpr_read_pid_env(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1282 {
1283 proc_t *p;
1284 char *buf;
1285 size_t asz = lxpr_maxenvvlen, sz;
1286
1287 ASSERT(lxpnp->lxpr_type == LXPR_PID_ENV);
1288
1289 buf = kmem_alloc(asz, KM_SLEEP);
1290
1291 p = lxpr_lock(lxpnp->lxpr_pid);
1292 if (p == NULL) {
1293 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1294 kmem_free(buf, asz);
1295 return;
1296 }
1297
1298 if (prreadenvv(p, buf, asz, &sz) != 0) {
1299 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1300 } else {
1301 lxpr_uiobuf_write(uiobuf, buf, sz);
1302 }
1303
1304 lxpr_unlock(p);
1305 kmem_free(buf, asz);
1306 }
1307
1308 /*
1309 * lxpr_read_pid_limits(): ulimit file
1310 */
1311 static void
1312 lxpr_read_pid_limits(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1313 {
1314 proc_t *p;
1315 rctl_qty_t cur, max;
1316 rctl_val_t *oval, *nval;
1317 rctl_hndl_t hndl;
1318 char *kname;
1319 int i;
1320
1321 ASSERT(lxpnp->lxpr_type == LXPR_PID_LIMITS ||
1322 lxpnp->lxpr_type == LXPR_PID_TID_LIMITS);
1323
1324 nval = kmem_alloc(sizeof (rctl_val_t), KM_SLEEP);
1325
1326 p = lxpr_lock(lxpnp->lxpr_pid);
1327 if (p == NULL) {
1328 kmem_free(nval, sizeof (rctl_val_t));
1329 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1330 return;
1331 }
1332
1333 lxpr_uiobuf_printf(uiobuf, "%-25s %-20s %-20s %-10s\n",
1334 "Limit", "Soft Limit", "Hard Limit", "Units");
1335 for (i = 0; lxpr_rlimtab[i].rlim_name != NULL; i++) {
1336 kname = lxpr_rlimtab[i].rlim_rctl;
1337 /* default to unlimited for resources without an analog */
1338 cur = RLIM_INFINITY;
1339 max = RLIM_INFINITY;
1340 if (kname != NULL) {
1341 hndl = rctl_hndl_lookup(kname);
1342 oval = NULL;
1343 while ((hndl != -1) &&
1344 rctl_local_get(hndl, oval, nval, p) == 0) {
1345 oval = nval;
1346 switch (nval->rcv_privilege) {
1347 case RCPRIV_BASIC:
1348 if (!RCTL_INFINITE(nval))
1349 cur = nval->rcv_value;
1350 break;
1351 case RCPRIV_PRIVILEGED:
1352 if (!RCTL_INFINITE(nval))
1353 max = nval->rcv_value;
1354 break;
1355 }
1356 }
1357 }
1358
1359 lxpr_uiobuf_printf(uiobuf, "%-25s", lxpr_rlimtab[i].rlim_name);
1360 if (cur == RLIM_INFINITY || cur == LX_RLIM_INFINITY) {
1361 lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
1362 } else {
1363 lxpr_uiobuf_printf(uiobuf, " %-20lu", cur);
1364 }
1365 if (max == RLIM_INFINITY || max == LX_RLIM_INFINITY) {
1366 lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
1367 } else {
1368 lxpr_uiobuf_printf(uiobuf, " %-20lu", max);
1369 }
1370 lxpr_uiobuf_printf(uiobuf, " %-10s\n",
1371 lxpr_rlimtab[i].rlim_unit);
1372 }
1373
1374 lxpr_unlock(p);
1375 kmem_free(nval, sizeof (rctl_val_t));
1376 }
1377
1378 /*
1379 * lxpr_read_pid_maps(): memory map file
1380 */
1381 static void
1382 lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1383 {
1384 proc_t *p;
1385 struct as *as;
1386 struct seg *seg;
1387 char *buf;
1388 int buflen = MAXPATHLEN;
1389 struct print_data {
1390 uintptr_t saddr;
1391 uintptr_t eaddr;
1392 int type;
1393 char prot[5];
1394 uintptr_t offset;
1395 vnode_t *vp;
1396 struct print_data *next;
1397 } *print_head = NULL;
1398 struct print_data **print_tail = &print_head;
1399 struct print_data *pbuf;
1400
1401 ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS ||
1402 lxpnp->lxpr_type == LXPR_PID_TID_MAPS);
1403
1404 p = lxpr_lock(lxpnp->lxpr_pid);
1405 if (p == NULL) {
1406 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1407 return;
1408 }
1409
1410 as = p->p_as;
1411
1412 if (as == &kas) {
1413 lxpr_unlock(p);
1414 return;
1415 }
1416
1417 mutex_exit(&p->p_lock);
1418
1419 /* Iterate over all segments in the address space */
1420 AS_LOCK_ENTER(as, RW_READER);
1421 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1422 vnode_t *vp;
1423 uint_t protbits;
1424
1425 pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
1426
1427 pbuf->saddr = (uintptr_t)seg->s_base;
1428 pbuf->eaddr = pbuf->saddr + seg->s_size;
1429 pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
1430
1431 /*
1432 * Cheat and only use the protection bits of the first page
1433 * in the segment
1434 */
1435 (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
1436 (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
1437
1438 if (protbits & PROT_READ) pbuf->prot[0] = 'r';
1439 if (protbits & PROT_WRITE) pbuf->prot[1] = 'w';
1440 if (protbits & PROT_EXEC) pbuf->prot[2] = 'x';
1441 if (pbuf->type & MAP_SHARED) pbuf->prot[3] = 's';
1442 else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
1443
1444 if (seg->s_ops == &segvn_ops &&
1445 SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
1446 vp != NULL && vp->v_type == VREG) {
1447 VN_HOLD(vp);
1448 pbuf->vp = vp;
1449 } else {
1450 pbuf->vp = NULL;
1451 }
1452
1453 pbuf->offset = SEGOP_GETOFFSET(seg, (caddr_t)pbuf->saddr);
1454
1455 pbuf->next = NULL;
1456 *print_tail = pbuf;
1457 print_tail = &pbuf->next;
1458 }
1459 AS_LOCK_EXIT(as);
1460 mutex_enter(&p->p_lock);
1461 lxpr_unlock(p);
1462
1463 buf = kmem_alloc(buflen, KM_SLEEP);
1464
1465 /* print the data we've extracted */
1466 pbuf = print_head;
1467 while (pbuf != NULL) {
1468 struct print_data *pbuf_next;
1469 vattr_t vattr;
1470
1471 int maj = 0;
1472 int min = 0;
1473 ino_t inode = 0;
1474
1475 *buf = '\0';
1476 if (pbuf->vp != NULL) {
1477 vattr.va_mask = AT_FSID | AT_NODEID;
1478 if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(),
1479 NULL) == 0) {
1480 maj = getmajor(vattr.va_fsid);
1481 min = getminor(vattr.va_fsid);
1482 inode = vattr.va_nodeid;
1483 }
1484 (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
1485 VN_RELE(pbuf->vp);
1486 }
1487
1488 if (p->p_model == DATAMODEL_LP64) {
1489 lxpr_uiobuf_printf(uiobuf,
1490 "%08llx-%08llx %s %08llx %02x:%02x %llu%s%s\n",
1491 pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
1492 maj, min, inode, *buf != '\0' ? " " : "", buf);
1493 } else {
1494 lxpr_uiobuf_printf(uiobuf,
1495 "%08x-%08x %s %08x %02x:%02x %llu%s%s\n",
1496 (uint32_t)pbuf->saddr, (uint32_t)pbuf->eaddr,
1497 pbuf->prot, (uint32_t)pbuf->offset, maj, min,
1498 inode, *buf != '\0' ? " " : "", buf);
1499 }
1500
1501 pbuf_next = pbuf->next;
1502 kmem_free(pbuf, sizeof (*pbuf));
1503 pbuf = pbuf_next;
1504 }
1505
1506 kmem_free(buf, buflen);
1507 }
1508
1509 /*
1510 * lxpr_read_pid_mountinfo(): information about process mount points. e.g.:
1511 * 14 19 0:13 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
1512 * mntid parid devnums root mntpnt mntopts - fstype mntsrc superopts
1513 *
1514 * We have to make up several of these fields.
1515 */
1516 static void
1517 lxpr_read_pid_mountinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1518 {
1519 struct vfs *vfsp;
1520 struct vfs *vfslist;
1521 zone_t *zone = LXPTOZ(lxpnp);
1522 struct print_data {
1523 refstr_t *vfs_mntpt;
1524 refstr_t *vfs_resource;
1525 uint_t vfs_flag;
1526 int vfs_fstype;
1527 dev_t vfs_dev;
1528 struct print_data *next;
1529 } *print_head = NULL;
1530 struct print_data **print_tail = &print_head;
1531 struct print_data *printp;
1532 int root_id = 15; /* use a made-up value */
1533 int mnt_id;
1534
1535 ASSERT(lxpnp->lxpr_type == LXPR_PID_MOUNTINFO ||
1536 lxpnp->lxpr_type == LXPR_PID_TID_MOUNTINFO);
1537
1538 vfs_list_read_lock();
1539
1540 /* root is the top-level, it does not appear in this output */
1541 if (zone == global_zone) {
1542 vfsp = vfslist = rootvfs;
1543 } else {
1544 vfsp = vfslist = zone->zone_vfslist;
1545 /*
1546 * If the zone has a root entry, it will be the first in
1547 * the list. If it doesn't, we conjure one up.
1548 */
1549 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
1550 zone->zone_rootpath) != 0) {
1551 struct vfs *tvfsp;
1552 /*
1553 * The root of the zone is not a mount point. The vfs
1554 * we want to report is that of the zone's root vnode.
1555 */
1556 tvfsp = zone->zone_rootvp->v_vfsp;
1557
1558 lxpr_uiobuf_printf(uiobuf,
1559 "%d 1 %d:%d / / %s - %s / %s\n",
1560 root_id,
1561 major(tvfsp->vfs_dev), minor(vfsp->vfs_dev),
1562 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw",
1563 vfssw[tvfsp->vfs_fstype].vsw_name,
1564 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1565
1566 }
1567 if (vfslist == NULL) {
1568 vfs_list_unlock();
1569 return;
1570 }
1571 }
1572
1573 /*
1574 * Later on we have to do a lookupname, which can end up causing
1575 * another vfs_list_read_lock() to be called. Which can lead to a
1576 * deadlock. To avoid this, we extract the data we need into a local
1577 * list, then we can run this list without holding vfs_list_read_lock()
1578 * We keep the list in the same order as the vfs_list
1579 */
1580 do {
1581 /* Skip mounts we shouldn't show */
1582 if (vfsp->vfs_flag & VFS_NOMNTTAB) {
1583 goto nextfs;
1584 }
1585
1586 printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
1587 refstr_hold(vfsp->vfs_mntpt);
1588 printp->vfs_mntpt = vfsp->vfs_mntpt;
1589 refstr_hold(vfsp->vfs_resource);
1590 printp->vfs_resource = vfsp->vfs_resource;
1591 printp->vfs_flag = vfsp->vfs_flag;
1592 printp->vfs_fstype = vfsp->vfs_fstype;
1593 printp->vfs_dev = vfsp->vfs_dev;
1594 printp->next = NULL;
1595
1596 *print_tail = printp;
1597 print_tail = &printp->next;
1598
1599 nextfs:
1600 vfsp = (zone == global_zone) ?
1601 vfsp->vfs_next : vfsp->vfs_zone_next;
1602
1603 } while (vfsp != vfslist);
1604
1605 vfs_list_unlock();
1606
1607 mnt_id = root_id + 1;
1608
1609 /*
1610 * now we can run through what we've extracted without holding
1611 * vfs_list_read_lock()
1612 */
1613 printp = print_head;
1614 while (printp != NULL) {
1615 struct print_data *printp_next;
1616 const char *resource;
1617 char *mntpt;
1618 struct vnode *vp;
1619 int error;
1620
1621 mntpt = (char *)refstr_value(printp->vfs_mntpt);
1622 resource = refstr_value(printp->vfs_resource);
1623
1624 if (mntpt != NULL && mntpt[0] != '\0')
1625 mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
1626 else
1627 mntpt = "-";
1628
1629 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
1630
1631 if (error != 0)
1632 goto nextp;
1633
1634 if (!(vp->v_flag & VROOT)) {
1635 VN_RELE(vp);
1636 goto nextp;
1637 }
1638 VN_RELE(vp);
1639
1640 if (resource != NULL && resource[0] != '\0') {
1641 if (resource[0] == '/') {
1642 resource = ZONE_PATH_VISIBLE(resource, zone) ?
1643 ZONE_PATH_TRANSLATE(resource, zone) : mntpt;
1644 }
1645 } else {
1646 resource = "none";
1647 }
1648
1649 /*
1650 * XXX parent ID is not tracked correctly here. Currently we
1651 * always assume the parent ID is the root ID.
1652 */
1653 lxpr_uiobuf_printf(uiobuf,
1654 "%d %d %d:%d / %s %s - %s %s %s\n",
1655 mnt_id, root_id,
1656 major(printp->vfs_dev), minor(printp->vfs_dev),
1657 mntpt,
1658 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw",
1659 vfssw[printp->vfs_fstype].vsw_name,
1660 resource,
1661 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1662
1663 nextp:
1664 printp_next = printp->next;
1665 refstr_rele(printp->vfs_mntpt);
1666 refstr_rele(printp->vfs_resource);
1667 kmem_free(printp, sizeof (*printp));
1668 printp = printp_next;
1669
1670 mnt_id++;
1671 }
1672 }
1673
1674 /*
1675 * lxpr_read_pid_oom_scr_adj(): read oom_score_adj for process
1676 */
1677 static void
1678 lxpr_read_pid_oom_scr_adj(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1679 {
1680 proc_t *p;
1681
1682 ASSERT(lxpnp->lxpr_type == LXPR_PID_OOM_SCR_ADJ ||
1683 lxpnp->lxpr_type == LXPR_PID_TID_OOM_SCR_ADJ);
1684
1685 p = lxpr_lock(lxpnp->lxpr_pid);
1686 if (p == NULL) {
1687 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1688 return;
1689 }
1690
1691 /* always 0 */
1692 lxpr_uiobuf_printf(uiobuf, "0\n");
1693
1694 lxpr_unlock(p);
1695 }
1696
1697
1698 /*
1699 * lxpr_read_pid_statm(): memory status file
1700 */
1701 static void
1702 lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1703 {
1704 proc_t *p;
1705 struct as *as;
1706 size_t vsize;
1707 size_t rss;
1708
1709 ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM ||
1710 lxpnp->lxpr_type == LXPR_PID_TID_STATM);
1711
1712 p = lxpr_lock(lxpnp->lxpr_pid);
1713 if (p == NULL) {
1714 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1715 return;
1716 }
1717
1718 as = p->p_as;
1719
1720 mutex_exit(&p->p_lock);
1721
1722 AS_LOCK_ENTER(as, RW_READER);
1723 vsize = btopr(as->a_resvsize);
1724 rss = rm_asrss(as);
1725 AS_LOCK_EXIT(as);
1726
1727 mutex_enter(&p->p_lock);
1728 lxpr_unlock(p);
1729
1730 lxpr_uiobuf_printf(uiobuf,
1731 "%lu %lu %lu %lu %lu %lu %lu\n",
1732 vsize, rss, 0l, rss, 0l, 0l, 0l);
1733 }
1734
1735 /*
1736 * Look for either the main thread (lookup_id is 0) or the specified thread.
1737 * If we're looking for the main thread but the proc does not have one, we
1738 * fallback to using prchoose to get any thread available.
1739 */
1740 static kthread_t *
1741 lxpr_get_thread(proc_t *p, uint_t lookup_id)
1742 {
1743 kthread_t *t;
1744 uint_t emul_tid;
1745 lx_lwp_data_t *lwpd;
1746 pid_t pid = p->p_pid;
1747 pid_t init_pid = curproc->p_zone->zone_proc_initpid;
1748 boolean_t branded = (p->p_brand == &lx_brand);
1749
1750 /* get specified thread */
1751 if ((t = p->p_tlist) == NULL)
1752 return (NULL);
1753
1754 do {
1755 if (lookup_id == 0 && t->t_tid == 1) {
1756 thread_lock(t);
1757 return (t);
1758 }
1759
1760 lwpd = ttolxlwp(t);
1761 if (branded && lwpd != NULL) {
1762 if (pid == init_pid && lookup_id == 1) {
1763 emul_tid = t->t_tid;
1764 } else {
1765 emul_tid = lwpd->br_pid;
1766 }
1767 } else {
1768 /*
1769 * Make only the first (assumed to be main) thread
1770 * visible for non-branded processes.
1771 */
1772 emul_tid = p->p_pid;
1773 }
1774 if (emul_tid == lookup_id) {
1775 thread_lock(t);
1776 return (t);
1777 }
1778 } while ((t = t->t_forw) != p->p_tlist);
1779
1780 if (lookup_id == 0)
1781 return (prchoose(p));
1782 return (NULL);
1783 }
1784
1785 /*
1786 * Lookup the real pid for procs 0 or 1.
1787 */
1788 static pid_t
1789 get_real_pid(pid_t p)
1790 {
1791 pid_t find_pid;
1792
1793 if (p == 1) {
1794 find_pid = curproc->p_zone->zone_proc_initpid;
1795 } else if (p == 0) {
1796 find_pid = curproc->p_zone->zone_zsched->p_pid;
1797 } else {
1798 find_pid = p;
1799 }
1800
1801 return (find_pid);
1802 }
1803
1804 /*
1805 * pid/tid common code to read status file
1806 */
1807 static void
1808 lxpr_read_status_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf,
1809 uint_t lookup_id)
1810 {
1811 proc_t *p;
1812 kthread_t *t;
1813 user_t *up;
1814 cred_t *cr;
1815 const gid_t *groups;
1816 int ngroups;
1817 struct as *as;
1818 char *status;
1819 pid_t pid, ppid;
1820 k_sigset_t current, ignore, handle;
1821 int i, lx_sig;
1822 pid_t real_pid;
1823
1824 real_pid = get_real_pid(lxpnp->lxpr_pid);
1825 p = lxpr_lock(real_pid);
1826 if (p == NULL) {
1827 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1828 return;
1829 }
1830
1831 pid = p->p_pid;
1832
1833 /*
1834 * Convert pid to the Linux default of 1 if we're the zone's init
1835 * process or if we're the zone's zsched the pid is 0.
1836 */
1837 if (pid == curproc->p_zone->zone_proc_initpid) {
1838 pid = 1;
1839 ppid = 0; /* parent pid for init is 0 */
1840 } else if (pid == curproc->p_zone->zone_zsched->p_pid) {
1841 pid = 0; /* zsched is pid 0 */
1842 ppid = 0; /* parent pid for zsched is itself */
1843 } else {
1844 /*
1845 * Make sure not to reference parent PIDs that reside outside
1846 * the zone
1847 */
1848 ppid = ((p->p_flag & SZONETOP)
1849 ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
1850
1851 /*
1852 * Convert ppid to the Linux default of 1 if our parent is the
1853 * zone's init process
1854 */
1855 if (ppid == curproc->p_zone->zone_proc_initpid)
1856 ppid = 1;
1857 }
1858
1859 t = lxpr_get_thread(p, lookup_id);
1860 if (t != NULL) {
1861 switch (t->t_state) {
1862 case TS_SLEEP:
1863 status = "S (sleeping)";
1864 break;
1865 case TS_RUN:
1866 case TS_ONPROC:
1867 status = "R (running)";
1868 break;
1869 case TS_ZOMB:
1870 status = "Z (zombie)";
1871 break;
1872 case TS_STOPPED:
1873 status = "T (stopped)";
1874 break;
1875 default:
1876 status = "! (unknown)";
1877 break;
1878 }
1879 thread_unlock(t);
1880 } else {
1881 if (lookup_id != 0) {
1882 /* we can't find this specific thread */
1883 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1884 lxpr_unlock(p);
1885 return;
1886 }
1887
1888 /*
1889 * there is a hole in the exit code, where a proc can have
1890 * no threads but it is yet to be flagged SZOMB. We will
1891 * assume we are about to become a zombie
1892 */
1893 status = "Z (zombie)";
1894 }
1895
1896 up = PTOU(p);
1897 mutex_enter(&p->p_crlock);
1898 crhold(cr = p->p_cred);
1899 mutex_exit(&p->p_crlock);
1900
1901 lxpr_uiobuf_printf(uiobuf,
1902 "Name:\t%s\n"
1903 "State:\t%s\n"
1904 "Tgid:\t%d\n"
1905 "Pid:\t%d\n"
1906 "PPid:\t%d\n"
1907 "TracerPid:\t%d\n"
1908 "Uid:\t%u\t%u\t%u\t%u\n"
1909 "Gid:\t%u\t%u\t%u\t%u\n"
1910 "FDSize:\t%d\n"
1911 "Groups:\t",
1912 up->u_comm,
1913 status,
1914 pid, /* thread group id - same as pid */
1915 (lookup_id == 0) ? pid : lxpnp->lxpr_desc,
1916 ppid,
1917 0,
1918 crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
1919 crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
1920 p->p_fno_ctl);
1921
1922
1923 ngroups = crgetngroups(cr);
1924 groups = crgetgroups(cr);
1925 for (i = 0; i < ngroups; i++) {
1926 lxpr_uiobuf_printf(uiobuf,
1927 "%u ",
1928 groups[i]);
1929 }
1930 crfree(cr);
1931
1932 as = p->p_as;
1933 if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) {
1934 size_t vsize, nlocked, rss;
1935
1936 mutex_exit(&p->p_lock);
1937 AS_LOCK_ENTER(as, RW_READER);
1938 vsize = as->a_resvsize;
1939 rss = rm_asrss(as);
1940 AS_LOCK_EXIT(as);
1941 mutex_enter(&p->p_lock);
1942 nlocked = p->p_locked_mem;
1943
1944 lxpr_uiobuf_printf(uiobuf,
1945 "\n"
1946 "VmSize:\t%8lu kB\n"
1947 "VmLck:\t%8lu kB\n"
1948 "VmRSS:\t%8lu kB\n"
1949 "VmData:\t%8lu kB\n"
1950 "VmStk:\t%8lu kB\n"
1951 "VmExe:\t%8lu kB\n"
1952 "VmLib:\t%8lu kB",
1953 btok(vsize),
1954 btok(nlocked),
1955 ptok(rss),
1956 0l,
1957 btok(p->p_stksize),
1958 ptok(rss),
1959 0l);
1960 }
1961
1962 lxpr_uiobuf_printf(uiobuf, "\nThreads:\t%u", p->p_lwpcnt);
1963
1964 sigemptyset(¤t);
1965 sigemptyset(&ignore);
1966 sigemptyset(&handle);
1967
1968 for (i = 1; i < NSIG; i++) {
1969 lx_sig = stol_signo[i];
1970
1971 if ((lx_sig > 0) && (lx_sig <= LX_NSIG)) {
1972 if (sigismember(&p->p_sig, i))
1973 sigaddset(¤t, lx_sig);
1974
1975 if (up->u_signal[i - 1] == SIG_IGN)
1976 sigaddset(&ignore, lx_sig);
1977 else if (up->u_signal[i - 1] != SIG_DFL)
1978 sigaddset(&handle, lx_sig);
1979 }
1980 }
1981
1982 lxpr_uiobuf_printf(uiobuf,
1983 "\n"
1984 "SigPnd:\t%08x%08x\n"
1985 "SigBlk:\t%08x%08x\n"
1986 "SigIgn:\t%08x%08x\n"
1987 "SigCgt:\t%08x%08x\n"
1988 "CapInh:\t%016x\n"
1989 "CapPrm:\t%016x\n"
1990 "CapEff:\t%016x\n",
1991 current.__sigbits[1], current.__sigbits[0],
1992 0, 0, /* signals blocked on per thread basis */
1993 ignore.__sigbits[1], ignore.__sigbits[0],
1994 handle.__sigbits[1], handle.__sigbits[0],
1995 /* Can't do anything with linux capabilities */
1996 0,
1997 0,
1998 0);
1999
2000 lxpr_uiobuf_printf(uiobuf,
2001 "CapBnd:\t%016llx\n",
2002 /* We report the full capability bounding set */
2003 0x1fffffffffLL);
2004
2005 lxpr_unlock(p);
2006 }
2007
2008 /*
2009 * lxpr_read_pid_status(): status file
2010 */
2011 static void
2012 lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2013 {
2014 ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
2015
2016 lxpr_read_status_common(lxpnp, uiobuf, 0);
2017 }
2018
2019 /*
2020 * lxpr_read_pid_tid_status(): status file
2021 */
2022 static void
2023 lxpr_read_pid_tid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2024 {
2025 ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STATUS);
2026 lxpr_read_status_common(lxpnp, uiobuf, lxpnp->lxpr_desc);
2027 }
2028
2029 /*
2030 * pid/tid common code to read stat file
2031 */
2032 static void
2033 lxpr_read_stat_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf,
2034 uint_t lookup_id)
2035 {
2036 proc_t *p;
2037 kthread_t *t;
2038 struct as *as;
2039 char stat;
2040 pid_t pid, ppid, pgpid, spid;
2041 gid_t psgid;
2042 dev_t psdev;
2043 size_t rss, vsize;
2044 int nice, pri;
2045 caddr_t wchan;
2046 processorid_t cpu;
2047 pid_t real_pid;
2048
2049 real_pid = get_real_pid(lxpnp->lxpr_pid);
2050 p = lxpr_lock(real_pid);
2051 if (p == NULL) {
2052 lxpr_uiobuf_seterr(uiobuf, EINVAL);
2053 return;
2054 }
2055
2056 pid = p->p_pid;
2057
2058 /*
2059 * Set Linux defaults if we're the zone's init process
2060 */
2061 if (pid == curproc->p_zone->zone_proc_initpid) {
2062 pid = 1; /* PID for init */
2063 ppid = 0; /* parent PID for init is 0 */
2064 pgpid = 0; /* process group for init is 0 */
2065 psgid = (gid_t)-1; /* credential GID for init is -1 */
2066 spid = 0; /* session id for init is 0 */
2067 psdev = 0; /* session device for init is 0 */
2068 } else if (pid == curproc->p_zone->zone_zsched->p_pid) {
2069 pid = 0; /* PID for zsched */
2070 ppid = 0; /* parent PID for zsched is 0 */
2071 pgpid = 0; /* process group for zsched is 0 */
2072 psgid = (gid_t)-1; /* credential GID for zsched is -1 */
2073 spid = 0; /* session id for zsched is 0 */
2074 psdev = 0; /* session device for zsched is 0 */
2075 } else {
2076 /*
2077 * Make sure not to reference parent PIDs that reside outside
2078 * the zone
2079 */
2080 ppid = ((p->p_flag & SZONETOP) ?
2081 curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
2082
2083 /*
2084 * Convert ppid to the Linux default of 1 if our parent is the
2085 * zone's init process
2086 */
2087 if (ppid == curproc->p_zone->zone_proc_initpid)
2088 ppid = 1;
2089
2090 pgpid = p->p_pgrp;
2091
2092 mutex_enter(&p->p_splock);
2093 mutex_enter(&p->p_sessp->s_lock);
2094 spid = p->p_sessp->s_sid;
2095 psdev = p->p_sessp->s_dev;
2096 if (p->p_sessp->s_cred)
2097 psgid = crgetgid(p->p_sessp->s_cred);
2098 else
2099 psgid = crgetgid(p->p_cred);
2100
2101 mutex_exit(&p->p_sessp->s_lock);
2102 mutex_exit(&p->p_splock);
2103 }
2104
2105 t = lxpr_get_thread(p, lookup_id);
2106 if (t != NULL) {
2107 switch (t->t_state) {
2108 case TS_SLEEP:
2109 stat = 'S'; break;
2110 case TS_RUN:
2111 case TS_ONPROC:
2112 stat = 'R'; break;
2113 case TS_ZOMB:
2114 stat = 'Z'; break;
2115 case TS_STOPPED:
2116 stat = 'T'; break;
2117 default:
2118 stat = '!'; break;
2119 }
2120
2121 if (CL_DONICE(t, NULL, 0, &nice) != 0)
2122 nice = 0;
2123
2124 pri = t->t_pri;
2125 wchan = t->t_wchan;
2126 cpu = t->t_cpu->cpu_id;
2127 thread_unlock(t);
2128 } else {
2129 if (lookup_id != 0) {
2130 /* we can't find this specific thread */
2131 lxpr_uiobuf_seterr(uiobuf, EINVAL);
2132 lxpr_unlock(p);
2133 return;
2134 }
2135
2136 /* Only zombies have no threads */
2137 stat = 'Z';
2138 nice = 0;
2139 pri = 0;
2140 wchan = 0;
2141 cpu = 0;
2142 }
2143 as = p->p_as;
2144 mutex_exit(&p->p_lock);
2145 AS_LOCK_ENTER(as, RW_READER);
2146 vsize = as->a_resvsize;
2147 rss = rm_asrss(as);
2148 AS_LOCK_EXIT(as);
2149 mutex_enter(&p->p_lock);
2150
2151 lxpr_uiobuf_printf(uiobuf,
2152 "%d (%s) %c %d %d %d %d %d "
2153 "%lu %lu %lu %lu %lu "
2154 "%lu %lu %ld %ld "
2155 "%d %d %d "
2156 "%lu "
2157 "%lu "
2158 "%lu %ld %llu "
2159 "%lu %lu %u "
2160 "%lu %lu "
2161 "%lu %lu %lu %lu "
2162 "%lu "
2163 "%lu %lu "
2164 "%d "
2165 "%d"
2166 "\n",
2167 (lookup_id == 0) ? pid : lxpnp->lxpr_desc,
2168 PTOU(p)->u_comm, stat, ppid, pgpid, spid, psdev, psgid,
2169 0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
2170 p->p_utime, p->p_stime, p->p_cutime, p->p_cstime,
2171 pri, nice, p->p_lwpcnt,
2172 0l, /* itrealvalue (time before next SIGALRM) */
2173 PTOU(p)->u_ticks,
2174 vsize, rss, p->p_vmem_ctl,
2175 0l, 0l, USRSTACK, /* startcode, endcode, startstack */
2176 0l, 0l, /* kstkesp, kstkeip */
2177 0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */
2178 wchan,
2179 0l, 0l, /* nswap, cnswap */
2180 0, /* exit_signal */
2181 cpu);
2182
2183 lxpr_unlock(p);
2184 }
2185
2186 /*
2187 * lxpr_read_pid_stat(): pid stat file
2188 */
2189 static void
2190 lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2191 {
2192 ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
2193
2194 lxpr_read_stat_common(lxpnp, uiobuf, 0);
2195 }
2196
2197 /*
2198 * lxpr_read_pid_tid_stat(): pid stat file
2199 */
2200 static void
2201 lxpr_read_pid_tid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2202 {
2203 ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STAT);
2204 lxpr_read_stat_common(lxpnp, uiobuf, lxpnp->lxpr_desc);
2205 }
2206
2207 /* ARGSUSED */
2208 static void
2209 lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2210 {
2211 }
2212
2213 struct lxpr_ifstat {
2214 uint64_t rx_bytes;
2215 uint64_t rx_packets;
2216 uint64_t rx_errors;
2217 uint64_t rx_drop;
2218 uint64_t tx_bytes;
2219 uint64_t tx_packets;
2220 uint64_t tx_errors;
2221 uint64_t tx_drop;
2222 uint64_t collisions;
2223 uint64_t rx_multicast;
2224 };
2225
2226 static void *
2227 lxpr_kstat_read(kstat_t *kn, boolean_t byname, size_t *size, int *num)
2228 {
2229 kstat_t *kp;
2230 int i, nrec = 0;
2231 size_t bufsize;
2232 void *buf = NULL;
2233
2234 if (byname == B_TRUE) {
2235 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance,
2236 kn->ks_name, getzoneid());
2237 } else {
2238 kp = kstat_hold_bykid(kn->ks_kid, getzoneid());
2239 }
2240 if (kp == NULL) {
2241 return (NULL);
2242 }
2243 if (kp->ks_flags & KSTAT_FLAG_INVALID) {
2244 kstat_rele(kp);
2245 return (NULL);
2246 }
2247
2248 bufsize = kp->ks_data_size + 1;
2249 kstat_rele(kp);
2250
2251 /*
2252 * The kstat in question is released so that kmem_alloc(KM_SLEEP) is
2253 * performed without it held. After the alloc, the kstat is reacquired
2254 * and its size is checked again. If the buffer is no longer large
2255 * enough, the alloc and check are repeated up to three times.
2256 */
2257 for (i = 0; i < 2; i++) {
2258 buf = kmem_alloc(bufsize, KM_SLEEP);
2259
2260 /* Check if bufsize still appropriate */
2261 if (byname == B_TRUE) {
2262 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance,
2263 kn->ks_name, getzoneid());
2264 } else {
2265 kp = kstat_hold_bykid(kn->ks_kid, getzoneid());
2266 }
2267 if (kp == NULL || kp->ks_flags & KSTAT_FLAG_INVALID) {
2268 if (kp != NULL) {
2269 kstat_rele(kp);
2270 }
2271 kmem_free(buf, bufsize);
2272 return (NULL);
2273 }
2274 KSTAT_ENTER(kp);
2275 (void) KSTAT_UPDATE(kp, KSTAT_READ);
2276 if (bufsize < kp->ks_data_size) {
2277 kmem_free(buf, bufsize);
2278 buf = NULL;
2279 bufsize = kp->ks_data_size + 1;
2280 KSTAT_EXIT(kp);
2281 kstat_rele(kp);
2282 continue;
2283 } else {
2284 if (KSTAT_SNAPSHOT(kp, buf, KSTAT_READ) != 0) {
2285 kmem_free(buf, bufsize);
2286 buf = NULL;
2287 }
2288 nrec = kp->ks_ndata;
2289 KSTAT_EXIT(kp);
2290 kstat_rele(kp);
2291 break;
2292 }
2293 }
2294
2295 if (buf != NULL) {
2296 *size = bufsize;
2297 *num = nrec;
2298 }
2299 return (buf);
2300 }
2301
2302 static int
2303 lxpr_kstat_ifstat(kstat_t *kn, struct lxpr_ifstat *ifs)
2304 {
2305 kstat_named_t *kp;
2306 int i, num;
2307 size_t size;
2308
2309 /*
2310 * Search by name instead of by kid since there's a small window to
2311 * race against kstats being added/removed.
2312 */
2313 bzero(ifs, sizeof (*ifs));
2314 kp = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num);
2315 if (kp == NULL)
2316 return (-1);
2317 for (i = 0; i < num; i++) {
2318 if (strncmp(kp[i].name, "rbytes64", KSTAT_STRLEN) == 0)
2319 ifs->rx_bytes = kp[i].value.ui64;
2320 else if (strncmp(kp[i].name, "ipackets64", KSTAT_STRLEN) == 0)
2321 ifs->rx_packets = kp[i].value.ui64;
2322 else if (strncmp(kp[i].name, "ierrors", KSTAT_STRLEN) == 0)
2323 ifs->rx_errors = kp[i].value.ui32;
2324 else if (strncmp(kp[i].name, "norcvbuf", KSTAT_STRLEN) == 0)
2325 ifs->rx_drop = kp[i].value.ui32;
2326 else if (strncmp(kp[i].name, "multircv", KSTAT_STRLEN) == 0)
2327 ifs->rx_multicast = kp[i].value.ui32;
2328 else if (strncmp(kp[i].name, "obytes64", KSTAT_STRLEN) == 0)
2329 ifs->tx_bytes = kp[i].value.ui64;
2330 else if (strncmp(kp[i].name, "opackets64", KSTAT_STRLEN) == 0)
2331 ifs->tx_packets = kp[i].value.ui64;
2332 else if (strncmp(kp[i].name, "oerrors", KSTAT_STRLEN) == 0)
2333 ifs->tx_errors = kp[i].value.ui32;
2334 else if (strncmp(kp[i].name, "noxmtbuf", KSTAT_STRLEN) == 0)
2335 ifs->tx_drop = kp[i].value.ui32;
2336 else if (strncmp(kp[i].name, "collisions", KSTAT_STRLEN) == 0)
2337 ifs->collisions = kp[i].value.ui32;
2338 }
2339 kmem_free(kp, size);
2340 return (0);
2341 }
2342
2343 /* ARGSUSED */
2344 static void
2345 lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2346 {
2347 kstat_t *ksr;
2348 kstat_t ks0;
2349 int i, nidx;
2350 size_t sidx;
2351 struct lxpr_ifstat ifs;
2352
2353 lxpr_uiobuf_printf(uiobuf, "Inter-| Receive "
2354 " | Transmit\n");
2355 lxpr_uiobuf_printf(uiobuf, " face |bytes packets errs drop fifo"
2356 " frame compressed multicast|bytes packets errs drop fifo"
2357 " colls carrier compressed\n");
2358
2359 ks0.ks_kid = 0;
2360 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
2361 if (ksr == NULL)
2362 return;
2363
2364 for (i = 1; i < nidx; i++) {
2365 if (strncmp(ksr[i].ks_module, "link", KSTAT_STRLEN) == 0 ||
2366 strncmp(ksr[i].ks_module, "lo", KSTAT_STRLEN) == 0) {
2367 if (lxpr_kstat_ifstat(&ksr[i], &ifs) != 0)
2368 continue;
2369
2370 /* Overwriting the name is ok in the local snapshot */
2371 lx_ifname_convert(ksr[i].ks_name, LX_IF_FROMNATIVE);
2372 lxpr_uiobuf_printf(uiobuf, "%6s: %7llu %7llu %4lu "
2373 "%4lu %4u %5u %10u %9lu %8llu %7llu %4lu %4lu %4u "
2374 "%5lu %7u %10u\n",
2375 ksr[i].ks_name,
2376 ifs.rx_bytes, ifs.rx_packets,
2377 ifs.rx_errors, ifs.rx_drop,
2378 0, 0, 0, ifs.rx_multicast,
2379 ifs.tx_bytes, ifs.tx_packets,
2380 ifs.tx_errors, ifs.tx_drop,
2381 0, ifs.collisions, 0, 0);
2382 }
2383 }
2384
2385 kmem_free(ksr, sidx);
2386 }
2387
2388 /* ARGSUSED */
2389 static void
2390 lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2391 {
2392 }
2393
2394 static void
2395 lxpr_inet6_out(const in6_addr_t *addr, char buf[33])
2396 {
2397 const uint8_t *ip = addr->s6_addr;
2398 char digits[] = "0123456789abcdef";
2399 int i;
2400 for (i = 0; i < 16; i++) {
2401 buf[2 * i] = digits[ip[i] >> 4];
2402 buf[2 * i + 1] = digits[ip[i] & 0xf];
2403 }
2404 buf[32] = '\0';
2405 }
2406
2407 /* ARGSUSED */
2408 static void
2409 lxpr_read_net_if_inet6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2410 {
2411 netstack_t *ns;
2412 ip_stack_t *ipst;
2413 ill_t *ill;
2414 ipif_t *ipif;
2415 ill_walk_context_t ctx;
2416 char ifname[LIFNAMSIZ], ip6out[33];
2417
2418 ns = netstack_get_current();
2419 if (ns == NULL)
2420 return;
2421 ipst = ns->netstack_ip;
2422
2423 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
2424 ill = ILL_START_WALK_V6(&ctx, ipst);
2425
2426 for (; ill != NULL; ill = ill_next(&ctx, ill)) {
2427 for (ipif = ill->ill_ipif; ipif != NULL;
2428 ipif = ipif->ipif_next) {
2429 uint_t index = ill->ill_phyint->phyint_ifindex;
2430 int plen = ip_mask_to_plen_v6(&ipif->ipif_v6net_mask);
2431 unsigned int scope = lx_ipv6_scope_convert(
2432 &ipif->ipif_v6lcl_addr);
2433 /* Always report PERMANENT flag */
2434 int flag = 0x80;
2435
2436 (void) snprintf(ifname, LIFNAMSIZ, "%s", ill->ill_name);
2437 lx_ifname_convert(ifname, LX_IF_FROMNATIVE);
2438 lxpr_inet6_out(&ipif->ipif_v6lcl_addr, ip6out);
2439
2440 lxpr_uiobuf_printf(uiobuf, "%32s %02x %02x %02x %02x"
2441 " %8s\n", ip6out, index, plen, scope, flag, ifname);
2442 }
2443 }
2444 rw_exit(&ipst->ips_ill_g_lock);
2445 netstack_rele(ns);
2446 }
2447
2448 /* ARGSUSED */
2449 static void
2450 lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2451 {
2452 }
2453
2454 /* ARGSUSED */
2455 static void
2456 lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2457 {
2458 }
2459
2460 /* ARGSUSED */
2461 static void
2462 lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2463 {
2464 }
2465
2466 static void
2467 lxpr_format_route_ipv6(ire_t *ire, lxpr_uiobuf_t *uiobuf)
2468 {
2469 uint32_t flags;
2470 char name[IFNAMSIZ];
2471 char ipv6addr[33];
2472
2473 lxpr_inet6_out(&ire->ire_addr_v6, ipv6addr);
2474 lxpr_uiobuf_printf(uiobuf, "%s %02x ", ipv6addr,
2475 ip_mask_to_plen_v6(&ire->ire_mask_v6));
2476
2477 /* punt on this for now */
2478 lxpr_uiobuf_printf(uiobuf, "%s %02x ",
2479 "00000000000000000000000000000000", 0);
2480
2481 lxpr_inet6_out(&ire->ire_gateway_addr_v6, ipv6addr);
2482 lxpr_uiobuf_printf(uiobuf, "%s", ipv6addr);
2483
2484 flags = ire->ire_flags &
2485 (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
2486 /* Linux's RTF_LOCAL equivalent */
2487 if (ire->ire_metrics.iulp_local)
2488 flags |= 0x80000000;
2489
2490 if (ire->ire_ill != NULL) {
2491 ill_get_name(ire->ire_ill, name, sizeof (name));
2492 lx_ifname_convert(name, LX_IF_FROMNATIVE);
2493 } else {
2494 name[0] = '\0';
2495 }
2496
2497 lxpr_uiobuf_printf(uiobuf, " %08x %08x %08x %08x %8s\n",
2498 0, /* metric */
2499 ire->ire_refcnt,
2500 0,
2501 flags,
2502 name);
2503 }
2504
2505 /* ARGSUSED */
2506 static void
2507 lxpr_read_net_ipv6_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2508 {
2509 netstack_t *ns;
2510 ip_stack_t *ipst;
2511
2512 ns = netstack_get_current();
2513 if (ns == NULL)
2514 return;
2515 ipst = ns->netstack_ip;
2516
2517 /*
2518 * LX branded zones are expected to have exclusive IP stack, hence
2519 * using ALL_ZONES as the zoneid filter.
2520 */
2521 ire_walk_v6(&lxpr_format_route_ipv6, uiobuf, ALL_ZONES, ipst);
2522
2523 netstack_rele(ns);
2524 }
2525
2526 /* ARGSUSED */
2527 static void
2528 lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2529 {
2530 }
2531
2532 /* ARGSUSED */
2533 static void
2534 lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2535 {
2536 }
2537
2538 /* ARGSUSED */
2539 static void
2540 lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2541 {
2542 }
2543
2544 #define LXPR_SKIP_ROUTE(type) \
2545 (((IRE_IF_CLONE | IRE_BROADCAST | IRE_MULTICAST | \
2546 IRE_NOROUTE | IRE_LOOPBACK | IRE_LOCAL) & type) != 0)
2547
2548 static void
2549 lxpr_format_route_ipv4(ire_t *ire, lxpr_uiobuf_t *uiobuf)
2550 {
2551 uint32_t flags;
2552 char name[IFNAMSIZ];
2553 ill_t *ill;
2554 ire_t *nire;
2555 ipif_t *ipif;
2556 ipaddr_t gateway;
2557
2558 if (LXPR_SKIP_ROUTE(ire->ire_type) || ire->ire_testhidden != 0)
2559 return;
2560
2561 /* These route flags have direct Linux equivalents */
2562 flags = ire->ire_flags &
2563 (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
2564
2565 /*
2566 * Search for a suitable IRE for naming purposes.
2567 * On Linux, the default route is typically associated with the
2568 * interface used to access gateway. The default IRE on Illumos
2569 * typically lacks an ill reference but its parent might have one.
2570 */
2571 nire = ire;
2572 do {
2573 ill = nire->ire_ill;
2574 nire = nire->ire_dep_parent;
2575 } while (ill == NULL && nire != NULL);
2576 if (ill != NULL) {
2577 ill_get_name(ill, name, sizeof (name));
2578 lx_ifname_convert(name, LX_IF_FROMNATIVE);
2579 } else {
2580 name[0] = '*';
2581 name[1] = '\0';
2582 }
2583
2584 /*
2585 * Linux suppresses the gateway address for directly connected
2586 * interface networks. To emulate this behavior, we walk all addresses
2587 * of a given route interface. If one matches the gateway, it is
2588 * displayed as NULL.
2589 */
2590 gateway = ire->ire_gateway_addr;
2591 if ((ill = ire->ire_ill) != NULL) {
2592 for (ipif = ill->ill_ipif; ipif != NULL;
2593 ipif = ipif->ipif_next) {
2594 if (ipif->ipif_lcl_addr == gateway) {
2595 gateway = 0;
2596 break;
2597 }
2598 }
2599 }
2600
2601 lxpr_uiobuf_printf(uiobuf, "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2602 "%d\t%08X\t%d\t%u\t%u\n",
2603 name,
2604 ire->ire_addr,
2605 gateway,
2606 flags, 0, 0,
2607 0, /* priority */
2608 ire->ire_mask,
2609 0, 0, /* mss, window */
2610 ire->ire_metrics.iulp_rtt);
2611 }
2612
2613 /* ARGSUSED */
2614 static void
2615 lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2616 {
2617 netstack_t *ns;
2618 ip_stack_t *ipst;
2619
2620 lxpr_uiobuf_printf(uiobuf, "Iface\tDestination\tGateway \tFlags\t"
2621 "RefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n");
2622
2623 ns = netstack_get_current();
2624 if (ns == NULL)
2625 return;
2626 ipst = ns->netstack_ip;
2627
2628 /*
2629 * LX branded zones are expected to have exclusive IP stack, hence
2630 * using ALL_ZONES as the zoneid filter.
2631 */
2632 ire_walk_v4(&lxpr_format_route_ipv4, uiobuf, ALL_ZONES, ipst);
2633
2634 netstack_rele(ns);
2635 }
2636
2637 /* ARGSUSED */
2638 static void
2639 lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2640 {
2641 }
2642
2643 /* ARGSUSED */
2644 static void
2645 lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2646 {
2647 }
2648
2649 /* ARGSUSED */
2650 static void
2651 lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2652 {
2653 }
2654
2655 typedef struct lxpr_snmp_table {
2656 const char *lst_proto;
2657 const char *lst_fields[];
2658 } lxpr_snmp_table_t;
2659
2660 static lxpr_snmp_table_t lxpr_snmp_ip = { "ip",
2661 {
2662 "forwarding", "defaultTTL", "inReceives", "inHdrErrors",
2663 "inAddrErrors", "forwDatagrams", "inUnknownProtos", "inDiscards",
2664 "inDelivers", "outRequests", "outDiscards", "outNoRoutes",
2665 "reasmTimeout", "reasmReqds", "reasmOKs", "reasmFails", "fragOKs",
2666 "fragFails", "fragCreates",
2667 NULL
2668 }
2669 };
2670 static lxpr_snmp_table_t lxpr_snmp_icmp = { "icmp",
2671 {
2672 "inMsgs", "inErrors", "inCsumErrors", "inDestUnreachs", "inTimeExcds",
2673 "inParmProbs", "inSrcQuenchs", "inRedirects", "inEchos", "inEchoReps",
2674 "inTimestamps", "inTimestampReps", "inAddrMasks", "inAddrMaskReps",
2675 "outMsgs", "outErrors", "outDestUnreachs", "outTimeExcds",
2676 "outParmProbs", "outSrcQuenchs", "outRedirects", "outEchos",
2677 "outEchoReps", "outTimestamps", "outTimestampReps", "outAddrMasks",
2678 "outAddrMaskReps",
2679 NULL
2680 }
2681 };
2682 static lxpr_snmp_table_t lxpr_snmp_tcp = { "tcp",
2683 {
2684 "rtoAlgorithm", "rtoMin", "rtoMax", "maxConn", "activeOpens",
2685 "passiveOpens", "attemptFails", "estabResets", "currEstab", "inSegs",
2686 "outSegs", "retransSegs", "inErrs", "outRsts", "inCsumErrors",
2687 NULL
2688 }
2689 };
2690 static lxpr_snmp_table_t lxpr_snmp_udp = { "udp",
2691 {
2692 "inDatagrams", "noPorts", "inErrors", "outDatagrams", "rcvbufErrors",
2693 "sndbufErrors", "inCsumErrors",
2694 NULL
2695 }
2696 };
2697
2698 static lxpr_snmp_table_t *lxpr_net_snmptab[] = {
2699 &lxpr_snmp_ip,
2700 &lxpr_snmp_icmp,
2701 &lxpr_snmp_tcp,
2702 &lxpr_snmp_udp,
2703 NULL
2704 };
2705
2706 static void
2707 lxpr_kstat_print_tab(lxpr_uiobuf_t *uiobuf, lxpr_snmp_table_t *table,
2708 kstat_t *kn)
2709 {
2710 kstat_named_t *klist;
2711 char upname[KSTAT_STRLEN], upfield[KSTAT_STRLEN];
2712 int i, j, num;
2713 size_t size;
2714
2715 klist = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num);
2716 if (klist == NULL)
2717 return;
2718
2719 /* Print the header line, fields capitalized */
2720 (void) strncpy(upname, table->lst_proto, KSTAT_STRLEN);
2721 upname[0] = toupper(upname[0]);
2722 lxpr_uiobuf_printf(uiobuf, "%s:", upname);
2723 for (i = 0; table->lst_fields[i] != NULL; i++) {
2724 (void) strncpy(upfield, table->lst_fields[i], KSTAT_STRLEN);
2725 upfield[0] = toupper(upfield[0]);
2726 lxpr_uiobuf_printf(uiobuf, " %s", upfield);
2727 }
2728 lxpr_uiobuf_printf(uiobuf, "\n%s:", upname);
2729
2730 /* Then loop back through to print the value line. */
2731 for (i = 0; table->lst_fields[i] != NULL; i++) {
2732 kstat_named_t *kpoint = NULL;
2733 for (j = 0; j < num; j++) {
2734 if (strncmp(klist[j].name, table->lst_fields[i],
2735 KSTAT_STRLEN) == 0) {
2736 kpoint = &klist[j];
2737 break;
2738 }
2739 }
2740 if (kpoint == NULL) {
2741 /* Output 0 for unknown fields */
2742 lxpr_uiobuf_printf(uiobuf, " 0");
2743 } else {
2744 switch (kpoint->data_type) {
2745 case KSTAT_DATA_INT32:
2746 lxpr_uiobuf_printf(uiobuf, " %d",
2747 kpoint->value.i32);
2748 break;
2749 case KSTAT_DATA_UINT32:
2750 lxpr_uiobuf_printf(uiobuf, " %u",
2751 kpoint->value.ui32);
2752 break;
2753 case KSTAT_DATA_INT64:
2754 lxpr_uiobuf_printf(uiobuf, " %ld",
2755 kpoint->value.l);
2756 break;
2757 case KSTAT_DATA_UINT64:
2758 lxpr_uiobuf_printf(uiobuf, " %lu",
2759 kpoint->value.ul);
2760 break;
2761 }
2762 }
2763 }
2764 lxpr_uiobuf_printf(uiobuf, "\n");
2765 kmem_free(klist, size);
2766 }
2767
2768 /* ARGSUSED */
2769 static void
2770 lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2771 {
2772 kstat_t *ksr;
2773 kstat_t ks0;
2774 lxpr_snmp_table_t **table = lxpr_net_snmptab;
2775 int i, t, nidx;
2776 size_t sidx;
2777
2778 ks0.ks_kid = 0;
2779 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
2780 if (ksr == NULL)
2781 return;
2782
2783 for (t = 0; table[t] != NULL; t++) {
2784 for (i = 0; i < nidx; i++) {
2785 if (strncmp(ksr[i].ks_class, "mib2", KSTAT_STRLEN) != 0)
2786 continue;
2787 if (strncmp(ksr[i].ks_name, table[t]->lst_proto,
2788 KSTAT_STRLEN) == 0) {
2789 lxpr_kstat_print_tab(uiobuf, table[t], &ksr[i]);
2790 break;
2791 }
2792 }
2793 }
2794 kmem_free(ksr, sidx);
2795 }
2796
2797 /* ARGSUSED */
2798 static void
2799 lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2800 {
2801 }
2802
2803 static int
2804 lxpr_convert_tcp_state(int st)
2805 {
2806 /*
2807 * Derived from the enum located in the Linux kernel sources:
2808 * include/net/tcp_states.h
2809 */
2810 switch (st) {
2811 case TCPS_ESTABLISHED:
2812 return (1);
2813 case TCPS_SYN_SENT:
2814 return (2);
2815 case TCPS_SYN_RCVD:
2816 return (3);
2817 case TCPS_FIN_WAIT_1:
2818 return (4);
2819 case TCPS_FIN_WAIT_2:
2820 return (5);
2821 case TCPS_TIME_WAIT:
2822 return (6);
2823 case TCPS_CLOSED:
2824 return (7);
2825 case TCPS_CLOSE_WAIT:
2826 return (8);
2827 case TCPS_LAST_ACK:
2828 return (9);
2829 case TCPS_LISTEN:
2830 return (10);
2831 case TCPS_CLOSING:
2832 return (11);
2833 default:
2834 /* No translation for TCPS_IDLE, TCPS_BOUND or anything else */
2835 return (0);
2836 }
2837 }
2838
2839 static void
2840 lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
2841 {
2842 int i, sl = 0;
2843 connf_t *connfp;
2844 conn_t *connp;
2845 netstack_t *ns;
2846 ip_stack_t *ipst;
2847
2848 ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION);
2849 if (ipver == IPV4_VERSION) {
2850 lxpr_uiobuf_printf(uiobuf, " sl local_address rem_address "
2851 "st tx_queue rx_queue tr tm->when retrnsmt uid timeout "
2852 "inode\n");
2853 } else {
2854 lxpr_uiobuf_printf(uiobuf, " sl "
2855 "local_address "
2856 "remote_address "
2857 "st tx_queue rx_queue tr tm->when retrnsmt "
2858 "uid timeout inode\n");
2859 }
2860 /*
2861 * Due to differences between the Linux and illumos TCP
2862 * implementations, some data will be omitted from the output here.
2863 *
2864 * Valid fields:
2865 * - local_address
2866 * - remote_address
2867 * - st
2868 * - tx_queue
2869 * - rx_queue
2870 * - uid
2871 * - inode
2872 *
2873 * Omitted/invalid fields
2874 * - tr
2875 * - tm->when
2876 * - retrnsmt
2877 * - timeout
2878 */
2879
2880 ns = netstack_get_current();
2881 if (ns == NULL)
2882 return;
2883 ipst = ns->netstack_ip;
2884
2885 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2886 connfp = &ipst->ips_ipcl_globalhash_fanout[i];
2887 connp = NULL;
2888 while ((connp =
2889 ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) {
2890 tcp_t *tcp;
2891 vattr_t attr;
2892 sonode_t *so = (sonode_t *)connp->conn_upper_handle;
2893 vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
2894 if (connp->conn_ipversion != ipver)
2895 continue;
2896 tcp = connp->conn_tcp;
2897 if (ipver == IPV4_VERSION) {
2898 lxpr_uiobuf_printf(uiobuf,
2899 "%4d: %08X:%04X %08X:%04X ",
2900 ++sl,
2901 connp->conn_laddr_v4,
2902 ntohs(connp->conn_lport),
2903 connp->conn_faddr_v4,
2904 ntohs(connp->conn_fport));
2905 } else {
2906 lxpr_uiobuf_printf(uiobuf, "%4d: "
2907 "%08X%08X%08X%08X:%04X "
2908 "%08X%08X%08X%08X:%04X ",
2909 ++sl,
2910 connp->conn_laddr_v6.s6_addr32[0],
2911 connp->conn_laddr_v6.s6_addr32[1],
2912 connp->conn_laddr_v6.s6_addr32[2],
2913 connp->conn_laddr_v6.s6_addr32[3],
2914 ntohs(connp->conn_lport),
2915 connp->conn_faddr_v6.s6_addr32[0],
2916 connp->conn_faddr_v6.s6_addr32[1],
2917 connp->conn_faddr_v6.s6_addr32[2],
2918 connp->conn_faddr_v6.s6_addr32[3],
2919 ntohs(connp->conn_fport));
2920 }
2921
2922 /* fetch the simulated inode for the socket */
2923 if (vp == NULL ||
2924 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
2925 attr.va_nodeid = 0;
2926
2927 lxpr_uiobuf_printf(uiobuf,
2928 "%02X %08X:%08X %02X:%08X %08X "
2929 "%5u %8d %lu %d %p %u %u %u %u %d\n",
2930 lxpr_convert_tcp_state(tcp->tcp_state),
2931 tcp->tcp_rcv_cnt, tcp->tcp_unsent, /* rx/tx queue */
2932 0, 0, /* tr, when */
2933 0, /* per-connection rexmits aren't tracked today */
2934 connp->conn_cred->cr_uid,
2935 0, /* timeout */
2936 /* inode + more */
2937 (ino_t)attr.va_nodeid, 0, NULL, 0, 0, 0, 0, 0);
2938 }
2939 }
2940 netstack_rele(ns);
2941 }
2942
2943 /* ARGSUSED */
2944 static void
2945 lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2946 {
2947 lxpr_format_tcp(uiobuf, IPV4_VERSION);
2948 }
2949
2950 /* ARGSUSED */
2951 static void
2952 lxpr_read_net_tcp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2953 {
2954 lxpr_format_tcp(uiobuf, IPV6_VERSION);
2955 }
2956
2957 static void
2958 lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
2959 {
2960 int i, sl = 0;
2961 connf_t *connfp;
2962 conn_t *connp;
2963 netstack_t *ns;
2964 ip_stack_t *ipst;
2965
2966 ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION);
2967 if (ipver == IPV4_VERSION) {
2968 lxpr_uiobuf_printf(uiobuf, " sl local_address rem_address"
2969 " st tx_queue rx_queue tr tm->when retrnsmt uid"
2970 " timeout inode ref pointer drops\n");
2971 } else {
2972 lxpr_uiobuf_printf(uiobuf, " sl "
2973 "local_address "
2974 "remote_address "
2975 "st tx_queue rx_queue tr tm->when retrnsmt "
2976 "uid timeout inode ref pointer drops\n");
2977 }
2978 /*
2979 * Due to differences between the Linux and illumos UDP
2980 * implementations, some data will be omitted from the output here.
2981 *
2982 * Valid fields:
2983 * - local_address
2984 * - remote_address
2985 * - st: limited
2986 * - uid
2987 *
2988 * Omitted/invalid fields
2989 * - tx_queue
2990 * - rx_queue
2991 * - tr
2992 * - tm->when
2993 * - retrnsmt
2994 * - timeout
2995 * - inode
2996 */
2997
2998 ns = netstack_get_current();
2999 if (ns == NULL)
3000 return;
3001 ipst = ns->netstack_ip;
3002
3003 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
3004 connfp = &ipst->ips_ipcl_globalhash_fanout[i];
3005 connp = NULL;
3006 while ((connp =
3007 ipcl_get_next_conn(connfp, connp, IPCL_UDPCONN)) != NULL) {
3008 udp_t *udp;
3009 int state = 0;
3010 vattr_t attr;
3011 sonode_t *so = (sonode_t *)connp->conn_upper_handle;
3012 vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
3013 if (connp->conn_ipversion != ipver)
3014 continue;
3015 udp = connp->conn_udp;
3016 if (ipver == IPV4_VERSION) {
3017 lxpr_uiobuf_printf(uiobuf,
3018 "%4d: %08X:%04X %08X:%04X ",
3019 ++sl,
3020 connp->conn_laddr_v4,
3021 ntohs(connp->conn_lport),
3022 connp->conn_faddr_v4,
3023 ntohs(connp->conn_fport));
3024 } else {
3025 lxpr_uiobuf_printf(uiobuf, "%4d: "
3026 "%08X%08X%08X%08X:%04X "
3027 "%08X%08X%08X%08X:%04X ",
3028 ++sl,
3029 connp->conn_laddr_v6.s6_addr32[0],
3030 connp->conn_laddr_v6.s6_addr32[1],
3031 connp->conn_laddr_v6.s6_addr32[2],
3032 connp->conn_laddr_v6.s6_addr32[3],
3033 ntohs(connp->conn_lport),
3034 connp->conn_faddr_v6.s6_addr32[0],
3035 connp->conn_faddr_v6.s6_addr32[1],
3036 connp->conn_faddr_v6.s6_addr32[2],
3037 connp->conn_faddr_v6.s6_addr32[3],
3038 ntohs(connp->conn_fport));
3039 }
3040
3041 switch (udp->udp_state) {
3042 case TS_UNBND:
3043 case TS_IDLE:
3044 state = 7;
3045 break;
3046 case TS_DATA_XFER:
3047 state = 1;
3048 break;
3049 }
3050
3051 /* fetch the simulated inode for the socket */
3052 if (vp == NULL ||
3053 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
3054 attr.va_nodeid = 0;
3055
3056 lxpr_uiobuf_printf(uiobuf,
3057 "%02X %08X:%08X %02X:%08X %08X "
3058 "%5u %8d %lu %d %p %d\n",
3059 state,
3060 0, 0, /* rx/tx queue */
3061 0, 0, /* tr, when */
3062 0, /* retrans */
3063 connp->conn_cred->cr_uid,
3064 0, /* timeout */
3065 /* inode, ref, pointer, drops */
3066 (ino_t)attr.va_nodeid, 0, NULL, 0);
3067 }
3068 }
3069 netstack_rele(ns);
3070 }
3071
3072 /* ARGSUSED */
3073 static void
3074 lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3075 {
3076 lxpr_format_udp(uiobuf, IPV4_VERSION);
3077 }
3078
3079 /* ARGSUSED */
3080 static void
3081 lxpr_read_net_udp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3082 {
3083 lxpr_format_udp(uiobuf, IPV6_VERSION);
3084 }
3085
3086 /* ARGSUSED */
3087 static void
3088 lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3089 {
3090 sonode_t *so;
3091 zoneid_t zoneid = getzoneid();
3092
3093 lxpr_uiobuf_printf(uiobuf, "Num RefCount Protocol Flags Type "
3094 "St Inode Path\n");
3095
3096 mutex_enter(&socklist.sl_lock);
3097 for (so = socklist.sl_list; so != NULL;
3098 so = _SOTOTPI(so)->sti_next_so) {
3099 vnode_t *vp = so->so_vnode;
3100 vattr_t attr;
3101 sotpi_info_t *sti;
3102 const char *name = NULL;
3103 int status = 0;
3104 int type = 0;
3105 int flags = 0;
3106
3107 /* Only process active sonodes in this zone */
3108 if (so->so_count == 0 || so->so_zoneid != zoneid)
3109 continue;
3110
3111 /*
3112 * Grab the inode, if possible.
3113 * This must be done before entering so_lock.
3114 */
3115 if (vp == NULL ||
3116 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
3117 attr.va_nodeid = 0;
3118
3119 mutex_enter(&so->so_lock);
3120 sti = _SOTOTPI(so);
3121
3122 if (sti->sti_laddr_sa != NULL &&
3123 sti->sti_laddr_len > 0) {
3124 name = sti->sti_laddr_sa->sa_data;
3125 } else if (sti->sti_faddr_sa != NULL &&
3126 sti->sti_faddr_len > 0) {
3127 name = sti->sti_faddr_sa->sa_data;
3128 }
3129
3130 /*
3131 * Derived from enum values in Linux kernel source:
3132 * include/uapi/linux/net.h
3133 */
3134 if ((so->so_state & SS_ISDISCONNECTING) != 0) {
3135 status = 4;
3136 } else if ((so->so_state & SS_ISCONNECTING) != 0) {
3137 status = 2;
3138 } else if ((so->so_state & SS_ISCONNECTED) != 0) {
3139 status = 3;
3140 } else {
3141 status = 1;
3142 /* Add ACC flag for stream-type server sockets */
3143 if (so->so_type != SOCK_DGRAM &&
3144 sti->sti_laddr_sa != NULL)
3145 flags |= 0x10000;
3146 }
3147
3148 /* Convert to Linux type */
3149 switch (so->so_type) {
3150 case SOCK_DGRAM:
3151 type = 2;
3152 break;
3153 case SOCK_SEQPACKET:
3154 type = 5;
3155 break;
3156 default:
3157 type = 1;
3158 }
3159
3160 lxpr_uiobuf_printf(uiobuf, "%p: %08X %08X %08X %04X %02X %5llu",
3161 so,
3162 so->so_count,
3163 0, /* proto, always 0 */
3164 flags,
3165 type,
3166 status,
3167 (ino_t)attr.va_nodeid);
3168
3169 /*
3170 * Due to shortcomings in the abstract socket emulation, they
3171 * cannot be properly represented here (as @<path>).
3172 *
3173 * This will be the case until they are better implemented.
3174 */
3175 if (name != NULL)
3176 lxpr_uiobuf_printf(uiobuf, " %s\n", name);
3177 else
3178 lxpr_uiobuf_printf(uiobuf, "\n");
3179 mutex_exit(&so->so_lock);
3180 }
3181 mutex_exit(&socklist.sl_lock);
3182 }
3183
3184 /*
3185 * lxpr_read_kmsg(): read the contents of the kernel message queue. We
3186 * translate this into the reception of console messages for this zone; each
3187 * read copies out a single zone console message, or blocks until the next one
3188 * is produced, unless we're open non-blocking, in which case we return after
3189 * 1ms.
3190 */
3191
3192 #define LX_KMSG_PRI "<0>"
3193
3194 static void
3195 lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf, ldi_handle_t lh)
3196 {
3197 mblk_t *mp;
3198 timestruc_t to;
3199 timestruc_t *tp = NULL;
3200
3201 ASSERT(lxpnp->lxpr_type == LXPR_KMSG);
3202
3203 if (lxpr_uiobuf_nonblock(uiobuf)) {
3204 to.tv_sec = 0;
3205 to.tv_nsec = 1000000; /* 1msec */
3206 tp = &to;
3207 }
3208
3209 if (ldi_getmsg(lh, &mp, tp) == 0) {
3210 /*
3211 * lx procfs doesn't like successive reads to the same file
3212 * descriptor unless we do an explicit rewind each time.
3213 */
3214 lxpr_uiobuf_seek(uiobuf, 0);
3215
3216 lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
3217 mp->b_cont->b_rptr);
3218
3219 freemsg(mp);
3220 }
3221 }
3222
3223 /*
3224 * lxpr_read_loadavg(): read the contents of the "loadavg" file. We do just
3225 * enough for uptime and other simple lxproc readers to work
3226 */
3227 extern int nthread;
3228
3229 static void
3230 lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3231 {
3232 ulong_t avenrun1;
3233 ulong_t avenrun5;
3234 ulong_t avenrun15;
3235 ulong_t avenrun1_cs;
3236 ulong_t avenrun5_cs;
3237 ulong_t avenrun15_cs;
3238 int loadavg[3];
3239 int *loadbuf;
3240 cpupart_t *cp;
3241 zone_t *zone = LXPTOZ(lxpnp);
3242
3243 uint_t nrunnable = 0;
3244 rctl_qty_t nlwps;
3245
3246 ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
3247
3248 mutex_enter(&cpu_lock);
3249
3250 /*
3251 * Need to add up values over all CPU partitions. If pools are active,
3252 * only report the values of the zone's partition, which by definition
3253 * includes the current CPU.
3254 */
3255 if (pool_pset_enabled()) {
3256 psetid_t psetid = zone_pset_get(curproc->p_zone);
3257
3258 ASSERT(curproc->p_zone != &zone0);
3259 cp = CPU->cpu_part;
3260
3261 nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
3262 (void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
3263 loadbuf = &loadavg[0];
3264 } else {
3265 cp = cp_list_head;
3266 do {
3267 nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
3268 } while ((cp = cp->cp_next) != cp_list_head);
3269
3270 loadbuf = zone == global_zone ?
3271 &avenrun[0] : zone->zone_avenrun;
3272 }
3273
3274 /*
3275 * If we're in the non-global zone, we'll report the total number of
3276 * LWPs in the zone for the "nproc" parameter of /proc/loadavg,
3277 * otherwise will just use nthread (which will include kernel threads,
3278 * but should be good enough for lxproc).
3279 */
3280 nlwps = zone == global_zone ? nthread : zone->zone_nlwps;
3281
3282 mutex_exit(&cpu_lock);
3283
3284 avenrun1 = loadbuf[0] >> FSHIFT;
3285 avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
3286 avenrun5 = loadbuf[1] >> FSHIFT;
3287 avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
3288 avenrun15 = loadbuf[2] >> FSHIFT;
3289 avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
3290
3291 lxpr_uiobuf_printf(uiobuf,
3292 "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
3293 avenrun1, avenrun1_cs,
3294 avenrun5, avenrun5_cs,
3295 avenrun15, avenrun15_cs,
3296 nrunnable, nlwps, 0);
3297 }
3298
3299 /*
3300 * lxpr_read_meminfo(): read the contents of the "meminfo" file.
3301 */
3302 static void
3303 lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3304 {
3305 zone_t *zone = LXPTOZ(lxpnp);
3306 int global = zone == global_zone;
3307 long total_mem, free_mem, total_swap, used_swap;
3308
3309 ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
3310
3311 if (global || zone->zone_phys_mem_ctl == UINT64_MAX) {
3312 total_mem = physmem * PAGESIZE;
3313 free_mem = freemem * PAGESIZE;
3314 } else {
3315 total_mem = zone->zone_phys_mem_ctl;
3316 free_mem = zone->zone_phys_mem_ctl - zone->zone_phys_mem;
3317 }
3318
3319 if (global || zone->zone_max_swap_ctl == UINT64_MAX) {
3320 total_swap = k_anoninfo.ani_max * PAGESIZE;
3321 used_swap = k_anoninfo.ani_phys_resv * PAGESIZE;
3322 } else {
3323 mutex_enter(&zone->zone_mem_lock);
3324 total_swap = zone->zone_max_swap_ctl;
3325 used_swap = zone->zone_max_swap;
3326 mutex_exit(&zone->zone_mem_lock);
3327 }
3328
3329 lxpr_uiobuf_printf(uiobuf,
3330 "MemTotal: %8lu kB\n"
3331 "MemFree: %8lu kB\n"
3332 "MemShared: %8u kB\n"
3333 "Buffers: %8u kB\n"
3334 "Cached: %8u kB\n"
3335 "SwapCached:%8u kB\n"
3336 "Active: %8u kB\n"
3337 "Inactive: %8u kB\n"
3338 "HighTotal: %8u kB\n"
3339 "HighFree: %8u kB\n"
3340 "LowTotal: %8u kB\n"
3341 "LowFree: %8u kB\n"
3342 "SwapTotal: %8lu kB\n"
3343 "SwapFree: %8lu kB\n",
3344 btok(total_mem), /* MemTotal */
3345 btok(free_mem), /* MemFree */
3346 0, /* MemShared */
3347 0, /* Buffers */
3348 0, /* Cached */
3349 0, /* SwapCached */
3350 0, /* Active */
3351 0, /* Inactive */
3352 0, /* HighTotal */
3353 0, /* HighFree */
3354 btok(total_mem), /* LowTotal */
3355 btok(free_mem), /* LowFree */
3356 btok(total_swap), /* SwapTotal */
3357 btok(total_swap - used_swap)); /* SwapFree */
3358 }
3359
3360 /*
3361 * lxpr_read_mounts():
3362 */
3363 /* ARGSUSED */
3364 static void
3365 lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3366 {
3367 struct vfs *vfsp;
3368 struct vfs *vfslist;
3369 zone_t *zone = LXPTOZ(lxpnp);
3370 struct print_data {
3371 refstr_t *vfs_mntpt;
3372 refstr_t *vfs_resource;
3373 uint_t vfs_flag;
3374 int vfs_fstype;
3375 struct print_data *next;
3376 } *print_head = NULL;
3377 struct print_data **print_tail = &print_head;
3378 struct print_data *printp;
3379
3380 vfs_list_read_lock();
3381
3382 if (zone == global_zone) {
3383 vfsp = vfslist = rootvfs;
3384 } else {
3385 vfsp = vfslist = zone->zone_vfslist;
3386 /*
3387 * If the zone has a root entry, it will be the first in
3388 * the list. If it doesn't, we conjure one up.
3389 */
3390 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
3391 zone->zone_rootpath) != 0) {
3392 struct vfs *tvfsp;
3393 /*
3394 * The root of the zone is not a mount point. The vfs
3395 * we want to report is that of the zone's root vnode.
3396 */
3397 tvfsp = zone->zone_rootvp->v_vfsp;
3398
3399 lxpr_uiobuf_printf(uiobuf,
3400 "/ / %s %s 0 0\n",
3401 vfssw[tvfsp->vfs_fstype].vsw_name,
3402 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
3403
3404 }
3405 if (vfslist == NULL) {
3406 vfs_list_unlock();
3407 return;
3408 }
3409 }
3410
3411 /*
3412 * Later on we have to do a lookupname, which can end up causing
3413 * another vfs_list_read_lock() to be called. Which can lead to a
3414 * deadlock. To avoid this, we extract the data we need into a local
3415 * list, then we can run this list without holding vfs_list_read_lock()
3416 * We keep the list in the same order as the vfs_list
3417 */
3418 do {
3419 /* Skip mounts we shouldn't show */
3420 if (vfsp->vfs_flag & VFS_NOMNTTAB) {
3421 goto nextfs;
3422 }
3423
3424 printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
3425 refstr_hold(vfsp->vfs_mntpt);
3426 printp->vfs_mntpt = vfsp->vfs_mntpt;
3427 refstr_hold(vfsp->vfs_resource);
3428 printp->vfs_resource = vfsp->vfs_resource;
3429 printp->vfs_flag = vfsp->vfs_flag;
3430 printp->vfs_fstype = vfsp->vfs_fstype;
3431 printp->next = NULL;
3432
3433 *print_tail = printp;
3434 print_tail = &printp->next;
3435
3436 nextfs:
3437 vfsp = (zone == global_zone) ?
3438 vfsp->vfs_next : vfsp->vfs_zone_next;
3439
3440 } while (vfsp != vfslist);
3441
3442 vfs_list_unlock();
3443
3444 /*
3445 * now we can run through what we've extracted without holding
3446 * vfs_list_read_lock()
3447 */
3448 printp = print_head;
3449 while (printp != NULL) {
3450 struct print_data *printp_next;
3451 const char *resource;
3452 char *mntpt;
3453 struct vnode *vp;
3454 int error;
3455
3456 mntpt = (char *)refstr_value(printp->vfs_mntpt);
3457 resource = refstr_value(printp->vfs_resource);
3458
3459 if (mntpt != NULL && mntpt[0] != '\0')
3460 mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
3461 else
3462 mntpt = "-";
3463
3464 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
3465
3466 if (error != 0)
3467 goto nextp;
3468
3469 if (!(vp->v_flag & VROOT)) {
3470 VN_RELE(vp);
3471 goto nextp;
3472 }
3473 VN_RELE(vp);
3474
3475 if (resource != NULL && resource[0] != '\0') {
3476 if (resource[0] == '/') {
3477 resource = ZONE_PATH_VISIBLE(resource, zone) ?
3478 ZONE_PATH_TRANSLATE(resource, zone) :
3479 mntpt;
3480 }
3481 } else {
3482 resource = "-";
3483 }
3484
3485 lxpr_uiobuf_printf(uiobuf,
3486 "%s %s %s %s 0 0\n",
3487 resource, mntpt, vfssw[printp->vfs_fstype].vsw_name,
3488 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
3489
3490 nextp:
3491 printp_next = printp->next;
3492 refstr_rele(printp->vfs_mntpt);
3493 refstr_rele(printp->vfs_resource);
3494 kmem_free(printp, sizeof (*printp));
3495 printp = printp_next;
3496
3497 }
3498 }
3499
3500 /*
3501 * lxpr_read_partitions():
3502 *
3503 * Over the years, /proc/partitions has been made considerably smaller -- to
3504 * the point that it really is only major number, minor number, number of
3505 * blocks (which we report as 0), and partition name.
3506 *
3507 * We support this because some things want to see it to make sense of
3508 * /proc/diskstats, and also because "fdisk -l" and a few other things look
3509 * here to find all disks on the system.
3510 */
3511 /* ARGSUSED */
3512 static void
3513 lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3514 {
3515
3516 kstat_t *ksr;
3517 kstat_t ks0;
3518 int nidx, num, i;
3519 size_t sidx, size;
3520 zfs_cmd_t *zc;
3521 nvlist_t *nv = NULL;
3522 nvpair_t *elem = NULL;
3523 lxpr_mnt_t *mnt;
3524 lxpr_zfs_iter_t zfsi;
3525
3526 ASSERT(lxpnp->lxpr_type == LXPR_PARTITIONS);
3527
3528 ks0.ks_kid = 0;
3529 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
3530
3531 if (ksr == NULL)
3532 return;
3533
3534 lxpr_uiobuf_printf(uiobuf, "major minor #blocks name\n\n");
3535
3536 for (i = 1; i < nidx; i++) {
3537 kstat_t *ksp = &ksr[i];
3538 kstat_io_t *kip;
3539
3540 if (ksp->ks_type != KSTAT_TYPE_IO ||
3541 strcmp(ksp->ks_class, "disk") != 0)
3542 continue;
3543
3544 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE,
3545 &size, &num)) == NULL)
3546 continue;
3547
3548 if (size < sizeof (kstat_io_t)) {
3549 kmem_free(kip, size);
3550 continue;
3551 }
3552
3553 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d %s\n",
3554 mod_name_to_major(ksp->ks_module),
3555 ksp->ks_instance, 0, ksp->ks_name);
3556
3557 kmem_free(kip, size);
3558 }
3559
3560 kmem_free(ksr, sidx);
3561
3562 /* If we never got to open the zfs LDI, then stop now. */
3563 mnt = (lxpr_mnt_t *)lxpnp->lxpr_vnode->v_vfsp->vfs_data;
3564 if (mnt->lxprm_zfs_isopen == B_FALSE)
3565 return;
3566
3567 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
3568
3569 if (lxpr_zfs_list_pools(mnt, zc, &nv) != 0)
3570 goto out;
3571
3572 while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) {
3573 char *pool = nvpair_name(elem);
3574
3575 bzero(&zfsi, sizeof (lxpr_zfs_iter_t));
3576 while (lxpr_zfs_next_zvol(mnt, pool, zc, &zfsi) == 0) {
3577 major_t major;
3578 minor_t minor;
3579 if (lxpr_zvol_dev(mnt, zc->zc_name, &major, &minor)
3580 != 0)
3581 continue;
3582
3583 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d zvol/dsk/%s\n",
3584 major, minor, 0, zc->zc_name);
3585 }
3586 }
3587
3588 nvlist_free(nv);
3589 out:
3590 kmem_free(zc, sizeof (zfs_cmd_t));
3591 }
3592
3593 /*
3594 * lxpr_read_diskstats():
3595 *
3596 * See the block comment above the per-device output-generating line for the
3597 * details of the format.
3598 */
3599 /* ARGSUSED */
3600 static void
3601 lxpr_read_diskstats(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3602 {
3603 kstat_t *ksr;
3604 kstat_t ks0;
3605 int nidx, num, i;
3606 size_t sidx, size;
3607
3608 ASSERT(lxpnp->lxpr_type == LXPR_DISKSTATS);
3609
3610 ks0.ks_kid = 0;
3611 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
3612
3613 if (ksr == NULL)
3614 return;
3615
3616 for (i = 1; i < nidx; i++) {
3617 kstat_t *ksp = &ksr[i];
3618 kstat_io_t *kip;
3619
3620 if (ksp->ks_type != KSTAT_TYPE_IO ||
3621 strcmp(ksp->ks_class, "disk") != 0)
3622 continue;
3623
3624 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE,
3625 &size, &num)) == NULL)
3626 continue;
3627
3628 if (size < sizeof (kstat_io_t)) {
3629 kmem_free(kip, size);
3630 continue;
3631 }
3632
3633 /*
3634 * /proc/diskstats is defined to have one line of output for
3635 * each block device, with each line containing the following
3636 * 14 fields:
3637 *
3638 * 1 - major number
3639 * 2 - minor mumber
3640 * 3 - device name
3641 * 4 - reads completed successfully
3642 * 5 - reads merged
3643 * 6 - sectors read
3644 * 7 - time spent reading (ms)
3645 * 8 - writes completed
3646 * 9 - writes merged
3647 * 10 - sectors written
3648 * 11 - time spent writing (ms)
3649 * 12 - I/Os currently in progress
3650 * 13 - time spent doing I/Os (ms)
3651 * 14 - weighted time spent doing I/Os (ms)
3652 *
3653 * One small hiccup: we don't actually keep track of time
3654 * spent reading vs. time spent writing -- we keep track of
3655 * time waiting vs. time actually performing I/O. While we
3656 * could divide the total time by the I/O mix (making the
3657 * obviously wrong assumption that I/O operations all take the
3658 * same amount of time), this has the undesirable side-effect
3659 * of moving backwards. Instead, we report the total time
3660 * (read + write) for all three stats (read, write, total).
3661 * This is also a lie of sorts, but it should be more
3662 * immediately clear to the user that reads and writes are
3663 * each being double-counted as the other.
3664 */
3665 lxpr_uiobuf_printf(uiobuf, "%4d %7d %s "
3666 "%llu %llu %llu %llu "
3667 "%llu %llu %llu %llu "
3668 "%llu %llu %llu\n",
3669 mod_name_to_major(ksp->ks_module),
3670 ksp->ks_instance, ksp->ks_name,
3671 (uint64_t)kip->reads, 0LL,
3672 kip->nread / (uint64_t)LXPR_SECTOR_SIZE,
3673 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3674 (uint64_t)kip->writes, 0LL,
3675 kip->nwritten / (uint64_t)LXPR_SECTOR_SIZE,
3676 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3677 (uint64_t)(kip->rcnt + kip->wcnt),
3678 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3679 (kip->rlentime + kip->wlentime) /
3680 (uint64_t)(NANOSEC / MILLISEC));
3681
3682 kmem_free(kip, size);
3683 }
3684
3685 kmem_free(ksr, sidx);
3686 }
3687
3688 /*
3689 * lxpr_read_version(): read the contents of the "version" file.
3690 */
3691 /* ARGSUSED */
3692 static void
3693 lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3694 {
3695 lx_zone_data_t *lxzd = ztolxzd(LXPTOZ(lxpnp));
3696 lx_proc_data_t *lxpd = ptolxproc(curproc);
3697 const char *release = lxzd->lxzd_kernel_release;
3698 const char *version = lxzd->lxzd_kernel_version;
3699
3700 /* Use per-process overrides, if specified */
3701 if (lxpd != NULL && lxpd->l_uname_release[0] != '\0') {
3702 release = lxpd->l_uname_release;
3703 }
3704 if (lxpd != NULL && lxpd->l_uname_version[0] != '\0') {
3705 version = lxpd->l_uname_version;
3706 }
3707
3708 lxpr_uiobuf_printf(uiobuf,
3709 "%s version %s (%s version %d.%d.%d) %s\n",
3710 LX_UNAME_SYSNAME, release,
3711 #if defined(__GNUC__)
3712 "gcc", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__,
3713 #else
3714 "cc", 1, 0, 0,
3715 #endif
3716 version);
3717 }
3718
3719 /*
3720 * lxpr_read_stat(): read the contents of the "stat" file.
3721 *
3722 */
3723 /* ARGSUSED */
3724 static void
3725 lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3726 {
3727 cpu_t *cp, *cpstart;
3728 int pools_enabled;
3729 ulong_t idle_cum = 0;
3730 ulong_t sys_cum = 0;
3731 ulong_t user_cum = 0;
3732 ulong_t irq_cum = 0;
3733 ulong_t cpu_nrunnable_cum = 0;
3734 ulong_t w_io_cum = 0;
3735
3736 ulong_t pgpgin_cum = 0;
3737 ulong_t pgpgout_cum = 0;
3738 ulong_t pgswapout_cum = 0;
3739 ulong_t pgswapin_cum = 0;
3740 ulong_t intr_cum = 0;
3741 ulong_t pswitch_cum = 0;
3742 ulong_t forks_cum = 0;
3743 hrtime_t msnsecs[NCMSTATES];
3744 /* is the emulated release > 2.4 */
3745 boolean_t newer_than24 = lx_kern_release_cmp(LXPTOZ(lxpnp), "2.4") > 0;
3746 /* temporary variable since scalehrtime modifies data in place */
3747 hrtime_t tmptime;
3748
3749 ASSERT(lxpnp->lxpr_type == LXPR_STAT);
3750
3751 mutex_enter(&cpu_lock);
3752 pools_enabled = pool_pset_enabled();
3753
3754 /* Calculate cumulative stats */
3755 cp = cpstart = CPU->cpu_part->cp_cpulist;
3756 do {
3757 int i;
3758
3759 /*
3760 * Don't count CPUs that aren't even in the system
3761 * or aren't up yet.
3762 */
3763 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
3764 continue;
3765 }
3766
3767 get_cpu_mstate(cp, msnsecs);
3768
3769 idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
3770 sys_cum += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
3771 user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
3772
3773 pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
3774 pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
3775 pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
3776 pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
3777
3778
3779 if (newer_than24) {
3780 cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable;
3781 w_io_cum += CPU_STATS(cp, sys.iowait);
3782 for (i = 0; i < NCMSTATES; i++) {
3783 tmptime = cp->cpu_intracct[i];
3784 scalehrtime(&tmptime);
3785 irq_cum += NSEC_TO_TICK(tmptime);
3786 }
3787 }
3788
3789 for (i = 0; i < PIL_MAX; i++)
3790 intr_cum += CPU_STATS(cp, sys.intr[i]);
3791
3792 pswitch_cum += CPU_STATS(cp, sys.pswitch);
3793 forks_cum += CPU_STATS(cp, sys.sysfork);
3794 forks_cum += CPU_STATS(cp, sys.sysvfork);
3795
3796 if (pools_enabled)
3797 cp = cp->cpu_next_part;
3798 else
3799 cp = cp->cpu_next;
3800 } while (cp != cpstart);
3801
3802 if (newer_than24) {
3803 lxpr_uiobuf_printf(uiobuf,
3804 "cpu %lu %lu %lu %lu %lu %lu %lu\n",
3805 user_cum, 0L, sys_cum, idle_cum, 0L, irq_cum, 0L);
3806 } else {
3807 lxpr_uiobuf_printf(uiobuf,
3808 "cpu %lu %lu %lu %lu\n",
3809 user_cum, 0L, sys_cum, idle_cum);
3810 }
3811
3812 /* Do per processor stats */
3813 do {
3814 int i;
3815
3816 ulong_t idle_ticks;
3817 ulong_t sys_ticks;
3818 ulong_t user_ticks;
3819 ulong_t irq_ticks = 0;
3820
3821 /*
3822 * Don't count CPUs that aren't even in the system
3823 * or aren't up yet.
3824 */
3825 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
3826 continue;
3827 }
3828
3829 get_cpu_mstate(cp, msnsecs);
3830
3831 idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
3832 sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
3833 user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
3834
3835 for (i = 0; i < NCMSTATES; i++) {
3836 tmptime = cp->cpu_intracct[i];
3837 scalehrtime(&tmptime);
3838 irq_ticks += NSEC_TO_TICK(tmptime);
3839 }
3840
3841 if (newer_than24) {
3842 lxpr_uiobuf_printf(uiobuf,
3843 "cpu%d %lu %lu %lu %lu %lu %lu %lu\n",
3844 cp->cpu_id, user_ticks, 0L, sys_ticks, idle_ticks,
3845 0L, irq_ticks, 0L);
3846 } else {
3847 lxpr_uiobuf_printf(uiobuf,
3848 "cpu%d %lu %lu %lu %lu\n",
3849 cp->cpu_id,
3850 user_ticks, 0L, sys_ticks, idle_ticks);
3851 }
3852
3853 if (pools_enabled)
3854 cp = cp->cpu_next_part;
3855 else
3856 cp = cp->cpu_next;
3857 } while (cp != cpstart);
3858
3859 mutex_exit(&cpu_lock);
3860
3861 if (newer_than24) {
3862 lxpr_uiobuf_printf(uiobuf,
3863 "page %lu %lu\n"
3864 "swap %lu %lu\n"
3865 "intr %lu\n"
3866 "ctxt %lu\n"
3867 "btime %lu\n"
3868 "processes %lu\n"
3869 "procs_running %lu\n"
3870 "procs_blocked %lu\n",
3871 pgpgin_cum, pgpgout_cum,
3872 pgswapin_cum, pgswapout_cum,
3873 intr_cum,
3874 pswitch_cum,
3875 boot_time,
3876 forks_cum,
3877 cpu_nrunnable_cum,
3878 w_io_cum);
3879 } else {
3880 lxpr_uiobuf_printf(uiobuf,
3881 "page %lu %lu\n"
3882 "swap %lu %lu\n"
3883 "intr %lu\n"
3884 "ctxt %lu\n"
3885 "btime %lu\n"
3886 "processes %lu\n",
3887 pgpgin_cum, pgpgout_cum,
3888 pgswapin_cum, pgswapout_cum,
3889 intr_cum,
3890 pswitch_cum,
3891 boot_time,
3892 forks_cum);
3893 }
3894 }
3895
3896 /*
3897 * lxpr_read_swaps():
3898 *
3899 * We don't support swap files or partitions, but some programs like to look
3900 * here just to check we have some swap on the system, so we lie and show
3901 * our entire swap cap as one swap partition.
3902 */
3903 /* ARGSUSED */
3904 static void
3905 lxpr_read_swaps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3906 {
3907 zone_t *zone = curzone;
3908 uint64_t totswap, usedswap;
3909
3910 mutex_enter(&zone->zone_mem_lock);
3911 /* Uses units of 1 kb (2^10). */
3912 totswap = zone->zone_max_swap_ctl >> 10;
3913 usedswap = zone->zone_max_swap >> 10;
3914 mutex_exit(&zone->zone_mem_lock);
3915
3916 lxpr_uiobuf_printf(uiobuf,
3917 "Filename "
3918 "Type Size Used Priority\n");
3919 lxpr_uiobuf_printf(uiobuf, "%-40s%-16s%-8llu%-8llu%-8d\n",
3920 "/dev/swap", "partition", totswap, usedswap, -1);
3921 }
3922
3923 /*
3924 * inotify tunables exported via /proc.
3925 */
3926 extern int inotify_maxevents;
3927 extern int inotify_maxinstances;
3928 extern int inotify_maxwatches;
3929
3930 static void
3931 lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *lxpnp,
3932 lxpr_uiobuf_t *uiobuf)
3933 {
3934 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS);
3935 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxevents);
3936 }
3937
3938 static void
3939 lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *lxpnp,
3940 lxpr_uiobuf_t *uiobuf)
3941 {
3942 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES);
3943 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxinstances);
3944 }
3945
3946 static void
3947 lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *lxpnp,
3948 lxpr_uiobuf_t *uiobuf)
3949 {
3950 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES);
3951 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxwatches);
3952 }
3953
3954 static void
3955 lxpr_read_sys_kernel_caplcap(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3956 {
3957 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_CAPLCAP);
3958 lxpr_uiobuf_printf(uiobuf, "%d\n", LX_CAP_MAX_VALID);
3959 }
3960
3961 static void
3962 lxpr_read_sys_kernel_corepatt(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3963 {
3964 zone_t *zone = curproc->p_zone;
3965 struct core_globals *cg;
3966 refstr_t *rp;
3967 corectl_path_t *ccp;
3968 char tr[MAXPATHLEN];
3969
3970 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT);
3971
3972 cg = zone_getspecific(core_zone_key, zone);
3973 ASSERT(cg != NULL);
3974
3975 /* If core dumps are disabled, return an empty string. */
3976 if ((cg->core_options & CC_PROCESS_PATH) == 0) {
3977 lxpr_uiobuf_printf(uiobuf, "\n");
3978 return;
3979 }
3980
3981 ccp = cg->core_default_path;
3982 mutex_enter(&ccp->ccp_mtx);
3983 if ((rp = ccp->ccp_path) != NULL)
3984 refstr_hold(rp);
3985 mutex_exit(&ccp->ccp_mtx);
3986
3987 if (rp == NULL) {
3988 lxpr_uiobuf_printf(uiobuf, "\n");
3989 return;
3990 }
3991
3992 bzero(tr, sizeof (tr));
3993 if (lxpr_core_path_s2l(refstr_value(rp), tr, sizeof (tr)) != 0) {
3994 refstr_rele(rp);
3995 lxpr_uiobuf_printf(uiobuf, "\n");
3996 return;
3997 }
3998
3999 refstr_rele(rp);
4000 lxpr_uiobuf_printf(uiobuf, "%s\n", tr);
4001 }
4002
4003 static void
4004 lxpr_read_sys_kernel_hostname(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4005 {
4006 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_HOSTNAME);
4007 lxpr_uiobuf_printf(uiobuf, "%s\n", uts_nodename());
4008 }
4009
4010 static void
4011 lxpr_read_sys_kernel_msgmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4012 {
4013 rctl_qty_t val;
4014
4015 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_MSGMNI);
4016
4017 mutex_enter(&curproc->p_lock);
4018 val = rctl_enforced_value(rc_zone_msgmni,
4019 curproc->p_zone->zone_rctls, curproc);
4020 mutex_exit(&curproc->p_lock);
4021
4022 lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
4023 }
4024
4025 static void
4026 lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4027 {
4028 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_NGROUPS_MAX);
4029 lxpr_uiobuf_printf(uiobuf, "%d\n", ngroups_max);
4030 }
4031
4032 static void
4033 lxpr_read_sys_kernel_osrel(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4034 {
4035 lx_zone_data_t *br_data;
4036
4037 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_OSREL);
4038 br_data = ztolxzd(curproc->p_zone);
4039 if (curproc->p_zone->zone_brand == &lx_brand) {
4040 lxpr_uiobuf_printf(uiobuf, "%s\n",
4041 br_data->lxzd_kernel_version);
4042 } else {
4043 lxpr_uiobuf_printf(uiobuf, "\n");
4044 }
4045 }
4046
4047 static void
4048 lxpr_read_sys_kernel_pid_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4049 {
4050 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_PID_MAX);
4051 lxpr_uiobuf_printf(uiobuf, "%d\n", maxpid);
4052 }
4053
4054 static void
4055 lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4056 {
4057 /*
4058 * This file isn't documented on the Linux proc(5) man page but
4059 * according to the blog of the author of systemd/journald (the
4060 * consumer), he says:
4061 * boot_id: A random ID that is regenerated on each boot. As such it
4062 * can be used to identify the local machine's current boot. It's
4063 * universally available on any recent Linux kernel. It's a good and
4064 * safe choice if you need to identify a specific boot on a specific
4065 * booted kernel.
4066 *
4067 * We'll just generate a random ID if necessary. On Linux the format
4068 * appears to resemble a uuid but since it is not documented to be a
4069 * uuid, we don't worry about that.
4070 */
4071 lx_zone_data_t *br_data;
4072
4073 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RAND_BOOTID);
4074
4075 if (curproc->p_zone->zone_brand != &lx_brand) {
4076 lxpr_uiobuf_printf(uiobuf, "0\n");
4077 return;
4078 }
4079
4080 br_data = ztolxzd(curproc->p_zone);
4081 if (br_data->lxzd_bootid[0] == '\0') {
4082 extern int getrandom(void *, size_t, int);
4083 int i;
4084
4085 for (i = 0; i < 5; i++) {
4086 u_longlong_t n;
4087 char s[32];
4088
4089 (void) random_get_bytes((uint8_t *)&n, sizeof (n));
4090 switch (i) {
4091 case 0: (void) snprintf(s, sizeof (s), "%08llx", n);
4092 s[8] = '\0';
4093 break;
4094 case 4: (void) snprintf(s, sizeof (s), "%012llx", n);
4095 s[12] = '\0';
4096 break;
4097 default: (void) snprintf(s, sizeof (s), "%04llx", n);
4098 s[4] = '\0';
4099 break;
4100 }
4101 if (i > 0)
4102 strlcat(br_data->lxzd_bootid, "-",
4103 sizeof (br_data->lxzd_bootid));
4104 strlcat(br_data->lxzd_bootid, s,
4105 sizeof (br_data->lxzd_bootid));
4106 }
4107 }
4108
4109 lxpr_uiobuf_printf(uiobuf, "%s\n", br_data->lxzd_bootid);
4110 }
4111
4112 static void
4113 lxpr_read_sys_kernel_shmmax(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4114 {
4115 rctl_qty_t val;
4116
4117 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMAX);
4118
4119 mutex_enter(&curproc->p_lock);
4120 val = rctl_enforced_value(rc_zone_shmmax,
4121 curproc->p_zone->zone_rctls, curproc);
4122 mutex_exit(&curproc->p_lock);
4123
4124 if (val > FOURGB)
4125 val = FOURGB;
4126
4127 lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
4128 }
4129
4130 static void
4131 lxpr_read_sys_kernel_threads_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4132 {
4133 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_THREADS_MAX);
4134 lxpr_uiobuf_printf(uiobuf, "%d\n", curproc->p_zone->zone_nlwps_ctl);
4135 }
4136
4137 static void
4138 lxpr_read_sys_net_core_somaxc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4139 {
4140 netstack_t *ns;
4141 tcp_stack_t *tcps;
4142
4143 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON);
4144
4145 ns = netstack_get_current();
4146 if (ns == NULL) {
4147 lxpr_uiobuf_printf(uiobuf, "%d\n", SOMAXCONN);
4148 return;
4149 }
4150
4151 tcps = ns->netstack_tcp;
4152 lxpr_uiobuf_printf(uiobuf, "%d\n", tcps->tcps_conn_req_max_q);
4153 netstack_rele(ns);
4154 }
4155
4156 static void
4157 lxpr_read_sys_vm_minfr_kb(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4158 {
4159 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_MINFR_KB);
4160 lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4161 }
4162
4163 static void
4164 lxpr_read_sys_vm_nhpages(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4165 {
4166 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_NHUGEP);
4167 lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4168 }
4169
4170 static void
4171 lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4172 {
4173 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_OVERCOMMIT_MEM);
4174 lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4175 }
4176
4177 static void
4178 lxpr_read_sys_vm_swappiness(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4179 {
4180 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_SWAPPINESS);
4181 lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4182 }
4183
4184 /*
4185 * lxpr_read_uptime(): read the contents of the "uptime" file.
4186 *
4187 * format is: "%.2lf, %.2lf",uptime_secs, idle_secs
4188 * Use fixed point arithmetic to get 2 decimal places
4189 */
4190 /* ARGSUSED */
4191 static void
4192 lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4193 {
4194 cpu_t *cp, *cpstart;
4195 int pools_enabled;
4196 ulong_t idle_cum = 0;
4197 ulong_t cpu_count = 0;
4198 ulong_t idle_s;
4199 ulong_t idle_cs;
4200 ulong_t up_s;
4201 ulong_t up_cs;
4202 hrtime_t birthtime;
4203 hrtime_t centi_sec = 10000000; /* 10^7 */
4204
4205 ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
4206
4207 /* Calculate cumulative stats */
4208 mutex_enter(&cpu_lock);
4209 pools_enabled = pool_pset_enabled();
4210
4211 cp = cpstart = CPU->cpu_part->cp_cpulist;
4212 do {
4213 /*
4214 * Don't count CPUs that aren't even in the system
4215 * or aren't up yet.
4216 */
4217 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
4218 continue;
4219 }
4220
4221 idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
4222 idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
4223 cpu_count += 1;
4224
4225 if (pools_enabled)
4226 cp = cp->cpu_next_part;
4227 else
4228 cp = cp->cpu_next;
4229 } while (cp != cpstart);
4230 mutex_exit(&cpu_lock);
4231
4232 /* Getting the Zone zsched process startup time */
4233 birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart;
4234 up_cs = (gethrtime() - birthtime) / centi_sec;
4235 up_s = up_cs / 100;
4236 up_cs %= 100;
4237
4238 ASSERT(cpu_count > 0);
4239 idle_cum /= cpu_count;
4240 idle_s = idle_cum / hz;
4241 idle_cs = idle_cum % hz;
4242 idle_cs *= 100;
4243 idle_cs /= hz;
4244
4245 lxpr_uiobuf_printf(uiobuf,
4246 "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
4247 }
4248
4249 static const char *amd_x_edx[] = {
4250 NULL, NULL, NULL, NULL,
4251 NULL, NULL, NULL, NULL,
4252 NULL, NULL, NULL, "syscall",
4253 NULL, NULL, NULL, NULL,
4254 NULL, NULL, NULL, "mp",
4255 "nx", NULL, "mmxext", NULL,
4256 NULL, NULL, NULL, NULL,
4257 NULL, "lm", "3dnowext", "3dnow"
4258 };
4259
4260 static const char *amd_x_ecx[] = {
4261 "lahf_lm", NULL, "svm", NULL,
4262 "altmovcr8"
4263 };
4264
4265 static const char *tm_x_edx[] = {
4266 "recovery", "longrun", NULL, "lrti"
4267 };
4268
4269 /*
4270 * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
4271 */
4272 static const char *intc_x_edx[] = {
4273 NULL, NULL, NULL, NULL,
4274 NULL, NULL, NULL, NULL,
4275 NULL, NULL, NULL, "syscall",
4276 NULL, NULL, NULL, NULL,
4277 NULL, NULL, NULL, NULL,
4278 "nx", NULL, NULL, NULL,
4279 NULL, NULL, NULL, NULL,
4280 NULL, "lm", NULL, NULL
4281 };
4282
4283 static const char *intc_edx[] = {
4284 "fpu", "vme", "de", "pse",
4285 "tsc", "msr", "pae", "mce",
4286 "cx8", "apic", NULL, "sep",
4287 "mtrr", "pge", "mca", "cmov",
4288 "pat", "pse36", "pn", "clflush",
4289 NULL, "dts", "acpi", "mmx",
4290 "fxsr", "sse", "sse2", "ss",
4291 "ht", "tm", "ia64", "pbe"
4292 };
4293
4294 /*
4295 * "sse3" on linux is called "pni" (Prescott New Instructions).
4296 */
4297 static const char *intc_ecx[] = {
4298 "pni", NULL, NULL, "monitor",
4299 "ds_cpl", NULL, NULL, "est",
4300 "tm2", NULL, "cid", NULL,
4301 NULL, "cx16", "xtpr"
4302 };
4303
4304 /*
4305 * Report a list of each cgroup subsystem supported by our emulated cgroup fs.
4306 * This needs to exist for systemd to run but for now we don't report any
4307 * cgroup subsystems as being installed. The commented example below shows
4308 * how to print a subsystem entry.
4309 */
4310 static void
4311 lxpr_read_cgroups(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4312 {
4313 lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n",
4314 "#subsys_name", "hierarchy", "num_cgroups", "enabled");
4315
4316 /*
4317 * lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n",
4318 * "cpu,cpuacct", "2", "1", "1");
4319 */
4320 }
4321
4322 static void
4323 lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4324 {
4325 int i;
4326 uint32_t bits;
4327 cpu_t *cp, *cpstart;
4328 int pools_enabled;
4329 const char **fp;
4330 char brandstr[CPU_IDSTRLEN];
4331 struct cpuid_regs cpr;
4332 int maxeax;
4333 int std_ecx, std_edx, ext_ecx, ext_edx;
4334
4335 ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
4336
4337 mutex_enter(&cpu_lock);
4338 pools_enabled = pool_pset_enabled();
4339
4340 cp = cpstart = CPU->cpu_part->cp_cpulist;
4341 do {
4342 /*
4343 * This returns the maximum eax value for standard cpuid
4344 * functions in eax.
4345 */
4346 cpr.cp_eax = 0;
4347 (void) cpuid_insn(cp, &cpr);
4348 maxeax = cpr.cp_eax;
4349
4350 /*
4351 * Get standard x86 feature flags.
4352 */
4353 cpr.cp_eax = 1;
4354 (void) cpuid_insn(cp, &cpr);
4355 std_ecx = cpr.cp_ecx;
4356 std_edx = cpr.cp_edx;
4357
4358 /*
4359 * Now get extended feature flags.
4360 */
4361 cpr.cp_eax = 0x80000001;
4362 (void) cpuid_insn(cp, &cpr);
4363 ext_ecx = cpr.cp_ecx;
4364 ext_edx = cpr.cp_edx;
4365
4366 (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
4367
4368 lxpr_uiobuf_printf(uiobuf,
4369 "processor\t: %d\n"
4370 "vendor_id\t: %s\n"
4371 "cpu family\t: %d\n"
4372 "model\t\t: %d\n"
4373 "model name\t: %s\n"
4374 "stepping\t: %d\n"
4375 "cpu MHz\t\t: %u.%03u\n",
4376 cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
4377 cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
4378 (uint32_t)(cpu_freq_hz / 1000000),
4379 ((uint32_t)(cpu_freq_hz / 1000)) % 1000);
4380
4381 lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
4382 getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
4383
4384 if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
4385 /*
4386 * 'siblings' is used for HT-style threads
4387 */
4388 lxpr_uiobuf_printf(uiobuf,
4389 "physical id\t: %lu\n"
4390 "siblings\t: %u\n",
4391 pg_plat_hw_instance_id(cp, PGHW_CHIP),
4392 cpuid_get_ncpu_per_chip(cp));
4393 }
4394
4395 /*
4396 * Since we're relatively picky about running on older hardware,
4397 * we can be somewhat cavalier about the answers to these ones.
4398 *
4399 * In fact, given the hardware we support, we just say:
4400 *
4401 * fdiv_bug : no (if we're on a 64-bit kernel)
4402 * hlt_bug : no
4403 * f00f_bug : no
4404 * coma_bug : no
4405 * wp : yes (write protect in supervsr mode)
4406 */
4407 lxpr_uiobuf_printf(uiobuf,
4408 "fdiv_bug\t: %s\n"
4409 "hlt_bug \t: no\n"
4410 "f00f_bug\t: no\n"
4411 "coma_bug\t: no\n"
4412 "fpu\t\t: %s\n"
4413 "fpu_exception\t: %s\n"
4414 "cpuid level\t: %d\n"
4415 "flags\t\t:",
4416 #if defined(__i386)
4417 fpu_pentium_fdivbug ? "yes" : "no",
4418 #else
4419 "no",
4420 #endif /* __i386 */
4421 fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
4422 maxeax);
4423
4424 for (bits = std_edx, fp = intc_edx, i = 0;
4425 i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
4426 if ((bits & (1 << i)) != 0 && *fp)
4427 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4428
4429 /*
4430 * name additional features where appropriate
4431 */
4432 switch (x86_vendor) {
4433 case X86_VENDOR_Intel:
4434 for (bits = ext_edx, fp = intc_x_edx, i = 0;
4435 i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
4436 fp++, i++)
4437 if ((bits & (1 << i)) != 0 && *fp)
4438 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4439 break;
4440
4441 case X86_VENDOR_AMD:
4442 for (bits = ext_edx, fp = amd_x_edx, i = 0;
4443 i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
4444 fp++, i++)
4445 if ((bits & (1 << i)) != 0 && *fp)
4446 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4447
4448 for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
4449 i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
4450 fp++, i++)
4451 if ((bits & (1 << i)) != 0 && *fp)
4452 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4453 break;
4454
4455 case X86_VENDOR_TM:
4456 for (bits = ext_edx, fp = tm_x_edx, i = 0;
4457 i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
4458 fp++, i++)
4459 if ((bits & (1 << i)) != 0 && *fp)
4460 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4461 break;
4462 default:
4463 break;
4464 }
4465
4466 for (bits = std_ecx, fp = intc_ecx, i = 0;
4467 i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
4468 if ((bits & (1 << i)) != 0 && *fp)
4469 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4470
4471 lxpr_uiobuf_printf(uiobuf, "\n\n");
4472
4473 if (pools_enabled)
4474 cp = cp->cpu_next_part;
4475 else
4476 cp = cp->cpu_next;
4477 } while (cp != cpstart);
4478
4479 mutex_exit(&cpu_lock);
4480 }
4481
4482 /* ARGSUSED */
4483 static void
4484 lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4485 {
4486 ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
4487 lxpr_uiobuf_seterr(uiobuf, EFAULT);
4488 }
4489
4490 /*
4491 * Report a list of file systems loaded in the kernel. We only report the ones
4492 * which we support and which may be checked by various components to see if
4493 * they are loaded.
4494 */
4495 static void
4496 lxpr_read_filesystems(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4497 {
4498 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "autofs");
4499 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "cgroup");
4500 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "nfs");
4501 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "proc");
4502 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "sysfs");
4503 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "tmpfs");
4504 }
4505
4506 /*
4507 * lxpr_getattr(): Vnode operation for VOP_GETATTR()
4508 */
4509 static int
4510 lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
4511 caller_context_t *ct)
4512 {
4513 register lxpr_node_t *lxpnp = VTOLXP(vp);
4514 lxpr_nodetype_t type = lxpnp->lxpr_type;
4515 extern uint_t nproc;
4516 int error;
4517
4518 /*
4519 * Return attributes of underlying vnode if ATTR_REAL
4520 *
4521 * but keep fd files with the symlink permissions
4522 */
4523 if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
4524 vnode_t *rvp = lxpnp->lxpr_realvp;
4525
4526 /*
4527 * withold attribute information to owner or root
4528 */
4529 if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) {
4530 return (error);
4531 }
4532
4533 /*
4534 * now its attributes
4535 */
4536 if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) {
4537 return (error);
4538 }
4539
4540 /*
4541 * if it's a file in lx /proc/pid/fd/xx then set its
4542 * mode and keep it looking like a symlink, fifo or socket
4543 */
4544 if (type == LXPR_PID_FD_FD) {
4545 vap->va_mode = lxpnp->lxpr_mode;
4546 vap->va_type = lxpnp->lxpr_realvp->v_type;
4547 vap->va_size = 0;
4548 vap->va_nlink = 1;
4549 }
4550 return (0);
4551 }
4552
4553 /* Default attributes, that may be overridden below */
4554 bzero(vap, sizeof (*vap));
4555 vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
4556 vap->va_nlink = 1;
4557 vap->va_type = vp->v_type;
4558 vap->va_mode = lxpnp->lxpr_mode;
4559 vap->va_fsid = vp->v_vfsp->vfs_dev;
4560 vap->va_blksize = DEV_BSIZE;
4561 vap->va_uid = lxpnp->lxpr_uid;
4562 vap->va_gid = lxpnp->lxpr_gid;
4563 vap->va_nodeid = lxpnp->lxpr_ino;
4564
4565 switch (type) {
4566 case LXPR_PROCDIR:
4567 vap->va_nlink = nproc + 2 + PROCDIRFILES;
4568 vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
4569 break;
4570 case LXPR_PIDDIR:
4571 vap->va_nlink = PIDDIRFILES;
4572 vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
4573 break;
4574 case LXPR_PID_TASK_IDDIR:
4575 vap->va_nlink = TIDDIRFILES;
4576 vap->va_size = TIDDIRFILES * LXPR_SDSIZE;
4577 break;
4578 case LXPR_SELF:
4579 vap->va_uid = crgetruid(curproc->p_cred);
4580 vap->va_gid = crgetrgid(curproc->p_cred);
4581 break;
4582 case LXPR_PID_FD_FD:
4583 case LXPR_PID_TID_FD_FD:
4584 /*
4585 * Restore VLNK type for lstat-type activity.
4586 * See lxpr_readlink for more details.
4587 */
4588 if ((flags & FOLLOW) == 0)
4589 vap->va_type = VLNK;
4590 default:
4591 break;
4592 }
4593
4594 vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
4595 return (0);
4596 }
4597
4598 /*
4599 * lxpr_access(): Vnode operation for VOP_ACCESS()
4600 */
4601 static int
4602 lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
4603 {
4604 lxpr_node_t *lxpnp = VTOLXP(vp);
4605 lxpr_nodetype_t type = lxpnp->lxpr_type;
4606 int shift = 0;
4607 proc_t *tp;
4608
4609 /* lx /proc is a read only file system */
4610 if (mode & VWRITE) {
4611 switch (type) {
4612 case LXPR_PID_OOM_SCR_ADJ:
4613 case LXPR_PID_TID_OOM_SCR_ADJ:
4614 case LXPR_SYS_KERNEL_COREPATT:
4615 case LXPR_SYS_NET_CORE_SOMAXCON:
4616 case LXPR_SYS_VM_OVERCOMMIT_MEM:
4617 case LXPR_SYS_VM_SWAPPINESS:
4618 case LXPR_PID_FD_FD:
4619 case LXPR_PID_TID_FD_FD:
4620 break;
4621 default:
4622 return (EROFS);
4623 }
4624 }
4625
4626 /*
4627 * If this is a restricted file, check access permissions.
4628 */
4629 switch (type) {
4630 case LXPR_PIDDIR:
4631 return (0);
4632 case LXPR_PID_CURDIR:
4633 case LXPR_PID_ENV:
4634 case LXPR_PID_EXE:
4635 case LXPR_PID_LIMITS:
4636 case LXPR_PID_MAPS:
4637 case LXPR_PID_MEM:
4638 case LXPR_PID_ROOTDIR:
4639 case LXPR_PID_FDDIR:
4640 case LXPR_PID_FD_FD:
4641 case LXPR_PID_TID_FDDIR:
4642 case LXPR_PID_TID_FD_FD:
4643 if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL)
4644 return (ENOENT);
4645 if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
4646 priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
4647 lxpr_unlock(tp);
4648 return (EACCES);
4649 }
4650 lxpr_unlock(tp);
4651 default:
4652 break;
4653 }
4654
4655 if (lxpnp->lxpr_realvp != NULL) {
4656 /*
4657 * For these we use the underlying vnode's accessibility.
4658 */
4659 return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct));
4660 }
4661
4662 /* If user is root allow access regardless of permission bits */
4663 if (secpolicy_proc_access(cr) == 0)
4664 return (0);
4665
4666 /*
4667 * Access check is based on only one of owner, group, public. If not
4668 * owner, then check group. If not a member of the group, then check
4669 * public access.
4670 */
4671 if (crgetuid(cr) != lxpnp->lxpr_uid) {
4672 shift += 3;
4673 if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
4674 shift += 3;
4675 }
4676
4677 mode &= ~(lxpnp->lxpr_mode << shift);
4678
4679 if (mode == 0)
4680 return (0);
4681
4682 return (EACCES);
4683 }
4684
4685 /* ARGSUSED */
4686 static vnode_t *
4687 lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
4688 {
4689 return (NULL);
4690 }
4691
4692 /*
4693 * lxpr_lookup(): Vnode operation for VOP_LOOKUP()
4694 */
4695 /* ARGSUSED */
4696 static int
4697 lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
4698 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
4699 int *direntflags, pathname_t *realpnp)
4700 {
4701 lxpr_node_t *lxpnp = VTOLXP(dp);
4702 lxpr_nodetype_t type = lxpnp->lxpr_type;
4703 int error;
4704
4705 ASSERT(dp->v_type == VDIR);
4706 ASSERT(type < LXPR_NFILES);
4707
4708 /*
4709 * we should never get here because the lookup
4710 * is done on the realvp for these nodes
4711 */
4712 ASSERT(type != LXPR_PID_FD_FD &&
4713 type != LXPR_PID_CURDIR &&
4714 type != LXPR_PID_ROOTDIR);
4715
4716 /*
4717 * restrict lookup permission to owner or root
4718 */
4719 if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) {
4720 return (error);
4721 }
4722
4723 /*
4724 * Just return the parent vnode if that's where we are trying to go.
4725 */
4726 if (strcmp(comp, "..") == 0) {
4727 VN_HOLD(lxpnp->lxpr_parent);
4728 *vpp = lxpnp->lxpr_parent;
4729 return (0);
4730 }
4731
4732 /*
4733 * Special handling for directory searches. Note: null component name
4734 * denotes that the current directory is being searched.
4735 */
4736 if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
4737 VN_HOLD(dp);
4738 *vpp = dp;
4739 return (0);
4740 }
4741
4742 *vpp = (lxpr_lookup_function[type](dp, comp));
4743 return ((*vpp == NULL) ? ENOENT : 0);
4744 }
4745
4746 /*
4747 * Do a sequential search on the given directory table
4748 */
4749 static vnode_t *
4750 lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
4751 lxpr_dirent_t *dirtab, int dirtablen)
4752 {
4753 lxpr_node_t *lxpnp;
4754 int count;
4755
4756 for (count = 0; count < dirtablen; count++) {
4757 if (strcmp(dirtab[count].d_name, comp) == 0) {
4758 lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
4759 dp = LXPTOV(lxpnp);
4760 ASSERT(dp != NULL);
4761 return (dp);
4762 }
4763 }
4764 return (NULL);
4765 }
4766
4767 static vnode_t *
4768 lxpr_lookup_piddir(vnode_t *dp, char *comp)
4769 {
4770 proc_t *p;
4771
4772 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
4773
4774 p = lxpr_lock(VTOLXP(dp)->lxpr_pid);
4775 if (p == NULL)
4776 return (NULL);
4777
4778 dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
4779
4780 lxpr_unlock(p);
4781
4782 return (dp);
4783 }
4784
4785 /*
4786 * Lookup one of the process's task ID's.
4787 */
4788 static vnode_t *
4789 lxpr_lookup_taskdir(vnode_t *dp, char *comp)
4790 {
4791 lxpr_node_t *dlxpnp = VTOLXP(dp);
4792 lxpr_node_t *lxpnp;
4793 proc_t *p;
4794 pid_t real_pid;
4795 uint_t tid;
4796 int c;
4797 kthread_t *t;
4798
4799 ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASKDIR);
4800
4801 /*
4802 * convert the string rendition of the filename to a thread ID
4803 */
4804 tid = 0;
4805 while ((c = *comp++) != '\0') {
4806 int otid;
4807 if (c < '0' || c > '9')
4808 return (NULL);
4809
4810 otid = tid;
4811 tid = 10 * tid + c - '0';
4812 /* integer overflow */
4813 if (tid / 10 != otid)
4814 return (NULL);
4815 }
4816
4817 /*
4818 * get the proc to work with and lock it
4819 */
4820 real_pid = get_real_pid(dlxpnp->lxpr_pid);
4821 p = lxpr_lock(real_pid);
4822 if ((p == NULL))
4823 return (NULL);
4824
4825 /*
4826 * If the process is a zombie or system process
4827 * it can't have any threads.
4828 */
4829 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
4830 lxpr_unlock(p);
4831 return (NULL);
4832 }
4833
4834 if (p->p_brand == &lx_brand) {
4835 t = lxpr_get_thread(p, tid);
4836 } else {
4837 /*
4838 * Only the main thread is visible for non-branded processes.
4839 */
4840 t = p->p_tlist;
4841 if (tid != p->p_pid || t == NULL) {
4842 t = NULL;
4843 } else {
4844 thread_lock(t);
4845 }
4846 }
4847 if (t == NULL) {
4848 lxpr_unlock(p);
4849 return (NULL);
4850 }
4851 thread_unlock(t);
4852
4853 /*
4854 * Allocate and fill in a new lx /proc taskid node.
4855 * Instead of the last arg being a fd, it is a tid.
4856 */
4857 lxpnp = lxpr_getnode(dp, LXPR_PID_TASK_IDDIR, p, tid);
4858 dp = LXPTOV(lxpnp);
4859 ASSERT(dp != NULL);
4860 lxpr_unlock(p);
4861 return (dp);
4862 }
4863
4864 /*
4865 * Lookup one of the process's task ID's.
4866 */
4867 static vnode_t *
4868 lxpr_lookup_task_tid_dir(vnode_t *dp, char *comp)
4869 {
4870 lxpr_node_t *dlxpnp = VTOLXP(dp);
4871 lxpr_node_t *lxpnp;
4872 proc_t *p;
4873 pid_t real_pid;
4874 kthread_t *t;
4875 int i;
4876
4877 ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASK_IDDIR);
4878
4879 /*
4880 * get the proc to work with and lock it
4881 */
4882 real_pid = get_real_pid(dlxpnp->lxpr_pid);
4883 p = lxpr_lock(real_pid);
4884 if ((p == NULL))
4885 return (NULL);
4886
4887 /*
4888 * If the process is a zombie or system process
4889 * it can't have any threads.
4890 */
4891 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
4892 lxpr_unlock(p);
4893 return (NULL);
4894 }
4895
4896 /* need to confirm tid is still there */
4897 t = lxpr_get_thread(p, dlxpnp->lxpr_desc);
4898 if (t == NULL) {
4899 lxpr_unlock(p);
4900 return (NULL);
4901 }
4902 thread_unlock(t);
4903
4904 /*
4905 * allocate and fill in the new lx /proc taskid dir node
4906 */
4907 for (i = 0; i < TIDDIRFILES; i++) {
4908 if (strcmp(tiddir[i].d_name, comp) == 0) {
4909 lxpnp = lxpr_getnode(dp, tiddir[i].d_type, p,
4910 dlxpnp->lxpr_desc);
4911 dp = LXPTOV(lxpnp);
4912 ASSERT(dp != NULL);
4913 lxpr_unlock(p);
4914 return (dp);
4915 }
4916 }
4917
4918 lxpr_unlock(p);
4919 return (NULL);
4920 }
4921
4922 /*
4923 * Lookup one of the process's open files.
4924 */
4925 static vnode_t *
4926 lxpr_lookup_fddir(vnode_t *dp, char *comp)
4927 {
4928 lxpr_node_t *dlxpnp = VTOLXP(dp);
4929
4930 ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR ||
4931 dlxpnp->lxpr_type == LXPR_PID_TID_FDDIR);
4932
4933 return (lxpr_lookup_fdnode(dp, comp));
4934 }
4935
4936 static vnode_t *
4937 lxpr_lookup_netdir(vnode_t *dp, char *comp)
4938 {
4939 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
4940
4941 dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
4942
4943 return (dp);
4944 }
4945
4946 static vnode_t *
4947 lxpr_lookup_procdir(vnode_t *dp, char *comp)
4948 {
4949 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
4950
4951 /*
4952 * We know all the names of files & dirs in our file system structure
4953 * except those that are pid names. These change as pids are created/
4954 * deleted etc., so we just look for a number as the first char to see
4955 * if we are we doing pid lookups.
4956 *
4957 * Don't need to check for "self" as it is implemented as a symlink
4958 */
4959 if (*comp >= '0' && *comp <= '9') {
4960 pid_t pid = 0;
4961 lxpr_node_t *lxpnp = NULL;
4962 proc_t *p;
4963 int c;
4964
4965 while ((c = *comp++) != '\0')
4966 pid = 10 * pid + c - '0';
4967
4968 /*
4969 * Can't continue if the process is still loading or it doesn't
4970 * really exist yet (or maybe it just died!)
4971 */
4972 p = lxpr_lock(pid);
4973 if (p == NULL)
4974 return (NULL);
4975
4976 if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
4977 lxpr_unlock(p);
4978 return (NULL);
4979 }
4980
4981 /*
4982 * allocate and fill in a new lx /proc node
4983 */
4984 lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
4985
4986 lxpr_unlock(p);
4987
4988 dp = LXPTOV(lxpnp);
4989 ASSERT(dp != NULL);
4990
4991 return (dp);
4992 }
4993
4994 /* Lookup fixed names */
4995 return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES));
4996 }
4997
4998 static vnode_t *
4999 lxpr_lookup_sysdir(vnode_t *dp, char *comp)
5000 {
5001 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYSDIR);
5002 return (lxpr_lookup_common(dp, comp, NULL, sysdir, SYSDIRFILES));
5003 }
5004
5005 static vnode_t *
5006 lxpr_lookup_sys_kerneldir(vnode_t *dp, char *comp)
5007 {
5008 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNELDIR);
5009 return (lxpr_lookup_common(dp, comp, NULL, sys_kerneldir,
5010 SYS_KERNELDIRFILES));
5011 }
5012
5013 static vnode_t *
5014 lxpr_lookup_sys_kdir_randdir(vnode_t *dp, char *comp)
5015 {
5016 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNEL_RANDDIR);
5017 return (lxpr_lookup_common(dp, comp, NULL, sys_randdir,
5018 SYS_RANDDIRFILES));
5019 }
5020
5021 static vnode_t *
5022 lxpr_lookup_sys_netdir(vnode_t *dp, char *comp)
5023 {
5024 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NETDIR);
5025 return (lxpr_lookup_common(dp, comp, NULL, sys_netdir,
5026 SYS_NETDIRFILES));
5027 }
5028
5029 static vnode_t *
5030 lxpr_lookup_sys_net_coredir(vnode_t *dp, char *comp)
5031 {
5032 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NET_COREDIR);
5033 return (lxpr_lookup_common(dp, comp, NULL, sys_net_coredir,
5034 SYS_NET_COREDIRFILES));
5035 }
5036
5037 static vnode_t *
5038 lxpr_lookup_sys_vmdir(vnode_t *dp, char *comp)
5039 {
5040 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_VMDIR);
5041 return (lxpr_lookup_common(dp, comp, NULL, sys_vmdir,
5042 SYS_VMDIRFILES));
5043 }
5044
5045 static vnode_t *
5046 lxpr_lookup_sys_fsdir(vnode_t *dp, char *comp)
5047 {
5048 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FSDIR);
5049 return (lxpr_lookup_common(dp, comp, NULL, sys_fsdir,
5050 SYS_FSDIRFILES));
5051 }
5052
5053 static vnode_t *
5054 lxpr_lookup_sys_fs_inotifydir(vnode_t *dp, char *comp)
5055 {
5056 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
5057 return (lxpr_lookup_common(dp, comp, NULL, sys_fs_inotifydir,
5058 SYS_FS_INOTIFYDIRFILES));
5059 }
5060
5061 /*
5062 * lxpr_readdir(): Vnode operation for VOP_READDIR()
5063 */
5064 /* ARGSUSED */
5065 static int
5066 lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp,
5067 caller_context_t *ct, int flags)
5068 {
5069 lxpr_node_t *lxpnp = VTOLXP(dp);
5070 lxpr_nodetype_t type = lxpnp->lxpr_type;
5071 ssize_t uresid;
5072 off_t uoffset;
5073 int error;
5074
5075 ASSERT(dp->v_type == VDIR);
5076 ASSERT(type < LXPR_NFILES);
5077
5078 /*
5079 * we should never get here because the readdir
5080 * is done on the realvp for these nodes
5081 */
5082 ASSERT(type != LXPR_PID_FD_FD &&
5083 type != LXPR_PID_CURDIR &&
5084 type != LXPR_PID_ROOTDIR);
5085
5086 /*
5087 * restrict readdir permission to owner or root
5088 */
5089 if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0)
5090 return (error);
5091
5092 uoffset = uiop->uio_offset;
5093 uresid = uiop->uio_resid;
5094
5095 /* can't do negative reads */
5096 if (uoffset < 0 || uresid <= 0)
5097 return (EINVAL);
5098
5099 /* can't read directory entries that don't exist! */
5100 if (uoffset % LXPR_SDSIZE)
5101 return (ENOENT);
5102
5103 return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
5104 }
5105
5106 /* ARGSUSED */
5107 static int
5108 lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5109 {
5110 return (ENOTDIR);
5111 }
5112
5113 /*
5114 * This has the common logic for returning directory entries
5115 */
5116 static int
5117 lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
5118 lxpr_dirent_t *dirtab, int dirtablen)
5119 {
5120 /* bp holds one dirent64 structure */
5121 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5122 dirent64_t *dirent = (dirent64_t *)bp;
5123 ssize_t oresid; /* save a copy for testing later */
5124 ssize_t uresid;
5125
5126 oresid = uiop->uio_resid;
5127
5128 /* clear out the dirent buffer */
5129 bzero(bp, sizeof (bp));
5130
5131 /*
5132 * Satisfy user request
5133 */
5134 while ((uresid = uiop->uio_resid) > 0) {
5135 int dirindex;
5136 off_t uoffset;
5137 int reclen;
5138 int error;
5139
5140 uoffset = uiop->uio_offset;
5141 dirindex = (uoffset / LXPR_SDSIZE) - 2;
5142
5143 if (uoffset == 0) {
5144
5145 dirent->d_ino = lxpnp->lxpr_ino;
5146 dirent->d_name[0] = '.';
5147 dirent->d_name[1] = '\0';
5148 reclen = DIRENT64_RECLEN(1);
5149
5150 } else if (uoffset == LXPR_SDSIZE) {
5151
5152 dirent->d_ino = lxpr_parentinode(lxpnp);
5153 dirent->d_name[0] = '.';
5154 dirent->d_name[1] = '.';
5155 dirent->d_name[2] = '\0';
5156 reclen = DIRENT64_RECLEN(2);
5157
5158 } else if (dirindex >= 0 && dirindex < dirtablen) {
5159 int slen = strlen(dirtab[dirindex].d_name);
5160
5161 dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
5162 lxpnp->lxpr_pid, 0);
5163
5164 VERIFY(slen < LXPNSIZ);
5165 (void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
5166 reclen = DIRENT64_RECLEN(slen);
5167
5168 } else {
5169 /* Run out of table entries */
5170 if (eofp) {
5171 *eofp = 1;
5172 }
5173 return (0);
5174 }
5175
5176 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5177 dirent->d_reclen = (ushort_t)reclen;
5178
5179 /*
5180 * if the size of the data to transfer is greater
5181 * that that requested then we can't do it this transfer.
5182 */
5183 if (reclen > uresid) {
5184 /*
5185 * Error if no entries have been returned yet.
5186 */
5187 if (uresid == oresid) {
5188 return (EINVAL);
5189 }
5190 break;
5191 }
5192
5193 /*
5194 * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5195 * by the same amount. But we want uiop->uio_offset to change
5196 * in increments of LXPR_SDSIZE, which is different from the
5197 * number of bytes being returned to the user. So we set
5198 * uiop->uio_offset separately, ignoring what uiomove() does.
5199 */
5200 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5201 uiop)) != 0)
5202 return (error);
5203
5204 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5205 }
5206
5207 /* Have run out of space, but could have just done last table entry */
5208 if (eofp) {
5209 *eofp =
5210 (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
5211 }
5212 return (0);
5213 }
5214
5215
5216 static int
5217 lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5218 {
5219 /* bp holds one dirent64 structure */
5220 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5221 dirent64_t *dirent = (dirent64_t *)bp;
5222 ssize_t oresid; /* save a copy for testing later */
5223 ssize_t uresid;
5224 off_t uoffset;
5225 zoneid_t zoneid;
5226 pid_t pid;
5227 int error;
5228 int ceof;
5229
5230 ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
5231
5232 oresid = uiop->uio_resid;
5233 zoneid = LXPTOZ(lxpnp)->zone_id;
5234
5235 /*
5236 * We return directory entries in the order: "." and ".." then the
5237 * unique lxproc files, then the directories corresponding to the
5238 * running processes. We have defined this as the ordering because
5239 * it allows us to more easily keep track of where we are betwen calls
5240 * to getdents(). If the number of processes changes between calls
5241 * then we can't lose track of where we are in the lxproc files.
5242 */
5243
5244 /* Do the fixed entries */
5245 error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir,
5246 PROCDIRFILES);
5247
5248 /* Finished if we got an error or if we couldn't do all the table */
5249 if (error != 0 || ceof == 0)
5250 return (error);
5251
5252 /* clear out the dirent buffer */
5253 bzero(bp, sizeof (bp));
5254
5255 /* Do the process entries */
5256 while ((uresid = uiop->uio_resid) > 0) {
5257 proc_t *p;
5258 int len;
5259 int reclen;
5260 int i;
5261
5262 uoffset = uiop->uio_offset;
5263
5264 /*
5265 * Stop when entire proc table has been examined.
5266 */
5267 i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
5268 if (i < 0 || i >= v.v_proc) {
5269 /* Run out of table entries */
5270 if (eofp) {
5271 *eofp = 1;
5272 }
5273 return (0);
5274 }
5275 mutex_enter(&pidlock);
5276
5277 /*
5278 * Skip indices for which there is no pid_entry, PIDs for
5279 * which there is no corresponding process, a PID of 0,
5280 * and anything the security policy doesn't allow
5281 * us to look at.
5282 */
5283 if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
5284 p->p_pid == 0 ||
5285 secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
5286 mutex_exit(&pidlock);
5287 goto next;
5288 }
5289 mutex_exit(&pidlock);
5290
5291 /*
5292 * Convert pid to the Linux default of 1 if we're the zone's
5293 * init process, or 0 if zsched, otherwise use the value from
5294 * the proc structure
5295 */
5296 if (p->p_pid == curproc->p_zone->zone_proc_initpid) {
5297 pid = 1;
5298 } else if (p->p_pid == curproc->p_zone->zone_zsched->p_pid) {
5299 pid = 0;
5300 } else {
5301 pid = p->p_pid;
5302 }
5303
5304 /*
5305 * If this /proc was mounted in the global zone, view
5306 * all procs; otherwise, only view zone member procs.
5307 */
5308 if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
5309 goto next;
5310 }
5311
5312 ASSERT(p->p_stat != 0);
5313
5314 dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
5315 len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
5316 ASSERT(len < LXPNSIZ);
5317 reclen = DIRENT64_RECLEN(len);
5318
5319 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5320 dirent->d_reclen = (ushort_t)reclen;
5321
5322 /*
5323 * if the size of the data to transfer is greater
5324 * that that requested then we can't do it this transfer.
5325 */
5326 if (reclen > uresid) {
5327 /*
5328 * Error if no entries have been returned yet.
5329 */
5330 if (uresid == oresid)
5331 return (EINVAL);
5332 break;
5333 }
5334
5335 /*
5336 * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5337 * by the same amount. But we want uiop->uio_offset to change
5338 * in increments of LXPR_SDSIZE, which is different from the
5339 * number of bytes being returned to the user. So we set
5340 * uiop->uio_offset separately, in the increment of this for
5341 * the loop, ignoring what uiomove() does.
5342 */
5343 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5344 uiop)) != 0)
5345 return (error);
5346 next:
5347 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5348 }
5349
5350 if (eofp != NULL) {
5351 *eofp = (uiop->uio_offset >=
5352 ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
5353 }
5354
5355 return (0);
5356 }
5357
5358 static int
5359 lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5360 {
5361 proc_t *p;
5362 pid_t find_pid;
5363
5364 ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
5365
5366 /* can't read its contents if it died */
5367 mutex_enter(&pidlock);
5368
5369 if (lxpnp->lxpr_pid == 1) {
5370 find_pid = curproc->p_zone->zone_proc_initpid;
5371 } else if (lxpnp->lxpr_pid == 0) {
5372 find_pid = curproc->p_zone->zone_zsched->p_pid;
5373 } else {
5374 find_pid = lxpnp->lxpr_pid;
5375 }
5376 p = prfind(find_pid);
5377
5378 if (p == NULL || p->p_stat == SIDL) {
5379 mutex_exit(&pidlock);
5380 return (ENOENT);
5381 }
5382 mutex_exit(&pidlock);
5383
5384 return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
5385 }
5386
5387 static int
5388 lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5389 {
5390 ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
5391 return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
5392 }
5393
5394 static int
5395 lxpr_readdir_taskdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5396 {
5397 /* bp holds one dirent64 structure */
5398 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5399 dirent64_t *dirent = (dirent64_t *)bp;
5400 ssize_t oresid; /* save a copy for testing later */
5401 ssize_t uresid;
5402 off_t uoffset;
5403 int error;
5404 int ceof;
5405 proc_t *p;
5406 int tiddirsize = -1;
5407 int tasknum;
5408 pid_t real_pid;
5409 kthread_t *t;
5410 boolean_t branded = B_FALSE;
5411
5412 ASSERT(lxpnp->lxpr_type == LXPR_PID_TASKDIR);
5413
5414 oresid = uiop->uio_resid;
5415
5416 real_pid = get_real_pid(lxpnp->lxpr_pid);
5417 p = lxpr_lock(real_pid);
5418
5419 /* can't read its contents if it died */
5420 if (p == NULL) {
5421 return (ENOENT);
5422 }
5423 if (p->p_stat == SIDL) {
5424 lxpr_unlock(p);
5425 return (ENOENT);
5426 }
5427
5428 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
5429 tiddirsize = 0;
5430
5431 branded = (p->p_brand == &lx_brand);
5432 /*
5433 * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
5434 * going away while we iterate over its threads.
5435 */
5436 mutex_exit(&p->p_lock);
5437
5438 if (tiddirsize == -1)
5439 tiddirsize = p->p_lwpcnt;
5440
5441 /* Do the fixed entries (in this case just "." & "..") */
5442 error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
5443
5444 /* Finished if we got an error or if we couldn't do all the table */
5445 if (error != 0 || ceof == 0)
5446 goto out;
5447
5448 if ((t = p->p_tlist) == NULL) {
5449 if (eofp != NULL)
5450 *eofp = 1;
5451 goto out;
5452 }
5453
5454 /* clear out the dirent buffer */
5455 bzero(bp, sizeof (bp));
5456
5457 /*
5458 * Loop until user's request is satisfied or until all thread's have
5459 * been returned.
5460 */
5461 for (tasknum = 0; (uresid = uiop->uio_resid) > 0; tasknum++) {
5462 int i;
5463 int reclen;
5464 int len;
5465 uint_t emul_tid;
5466 lx_lwp_data_t *lwpd;
5467
5468 uoffset = uiop->uio_offset;
5469
5470 /*
5471 * Stop at the end of the thread list
5472 */
5473 i = (uoffset / LXPR_SDSIZE) - 2;
5474 if (i < 0 || i >= tiddirsize) {
5475 if (eofp) {
5476 *eofp = 1;
5477 }
5478 goto out;
5479 }
5480
5481 if (i != tasknum)
5482 goto next;
5483
5484 if (!branded) {
5485 /*
5486 * Emulating the goofy linux task model is impossible
5487 * to do for native processes. We can compromise by
5488 * presenting only the main thread to the consumer.
5489 */
5490 emul_tid = p->p_pid;
5491 } else {
5492 if ((lwpd = ttolxlwp(t)) == NULL) {
5493 goto next;
5494 }
5495 emul_tid = lwpd->br_pid;
5496 /*
5497 * Convert pid to Linux default of 1 if we're the
5498 * zone's init.
5499 */
5500 if (emul_tid == curproc->p_zone->zone_proc_initpid)
5501 emul_tid = 1;
5502 }
5503
5504 dirent->d_ino = lxpr_inode(LXPR_PID_TASK_IDDIR, lxpnp->lxpr_pid,
5505 emul_tid);
5506 len = snprintf(dirent->d_name, LXPNSIZ, "%d", emul_tid);
5507 ASSERT(len < LXPNSIZ);
5508 reclen = DIRENT64_RECLEN(len);
5509
5510 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5511 dirent->d_reclen = (ushort_t)reclen;
5512
5513 if (reclen > uresid) {
5514 /*
5515 * Error if no entries have been returned yet.
5516 */
5517 if (uresid == oresid)
5518 error = EINVAL;
5519 goto out;
5520 }
5521
5522 /*
5523 * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5524 * by the same amount. But we want uiop->uio_offset to change
5525 * in increments of LXPR_SDSIZE, which is different from the
5526 * number of bytes being returned to the user. So we set
5527 * uiop->uio_offset separately, in the increment of this for
5528 * the loop, ignoring what uiomove() does.
5529 */
5530 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5531 uiop)) != 0)
5532 goto out;
5533
5534 next:
5535 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5536
5537 if ((t = t->t_forw) == p->p_tlist || !branded) {
5538 if (eofp != NULL)
5539 *eofp = 1;
5540 goto out;
5541 }
5542 }
5543
5544 if (eofp != NULL)
5545 *eofp = 0;
5546
5547 out:
5548 mutex_enter(&p->p_lock);
5549 lxpr_unlock(p);
5550 return (error);
5551 }
5552
5553 static int
5554 lxpr_readdir_task_tid_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5555 {
5556 proc_t *p;
5557 pid_t real_pid;
5558 kthread_t *t;
5559
5560 ASSERT(lxpnp->lxpr_type == LXPR_PID_TASK_IDDIR);
5561
5562 mutex_enter(&pidlock);
5563
5564 real_pid = get_real_pid(lxpnp->lxpr_pid);
5565 p = prfind(real_pid);
5566
5567 /* can't read its contents if it died */
5568 if (p == NULL || p->p_stat == SIDL) {
5569 mutex_exit(&pidlock);
5570 return (ENOENT);
5571 }
5572
5573 mutex_exit(&pidlock);
5574
5575 /* need to confirm tid is still there */
5576 t = lxpr_get_thread(p, lxpnp->lxpr_desc);
5577 if (t == NULL) {
5578 /* we can't find this specific thread */
5579 return (NULL);
5580 }
5581 thread_unlock(t);
5582
5583 return (lxpr_readdir_common(lxpnp, uiop, eofp, tiddir, TIDDIRFILES));
5584 }
5585
5586 static int
5587 lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5588 {
5589 /* bp holds one dirent64 structure */
5590 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5591 dirent64_t *dirent = (dirent64_t *)bp;
5592 ssize_t oresid; /* save a copy for testing later */
5593 ssize_t uresid;
5594 off_t uoffset;
5595 int error;
5596 int ceof;
5597 proc_t *p;
5598 int fddirsize = -1;
5599 uf_info_t *fip;
5600
5601 ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR ||
5602 lxpnp->lxpr_type == LXPR_PID_TID_FDDIR);
5603
5604 oresid = uiop->uio_resid;
5605
5606 /* can't read its contents if it died */
5607 p = lxpr_lock(lxpnp->lxpr_pid);
5608 if (p == NULL)
5609 return (ENOENT);
5610
5611 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
5612 fddirsize = 0;
5613
5614 /*
5615 * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
5616 * going away while we iterate over its fi_list.
5617 */
5618 mutex_exit(&p->p_lock);
5619
5620 /* Get open file info */
5621 fip = (&(p)->p_user.u_finfo);
5622 mutex_enter(&fip->fi_lock);
5623
5624 if (fddirsize == -1)
5625 fddirsize = fip->fi_nfiles;
5626
5627 /* Do the fixed entries (in this case just "." & "..") */
5628 error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
5629
5630 /* Finished if we got an error or if we couldn't do all the table */
5631 if (error != 0 || ceof == 0)
5632 goto out;
5633
5634 /* clear out the dirent buffer */
5635 bzero(bp, sizeof (bp));
5636
5637 /*
5638 * Loop until user's request is satisfied or until
5639 * all file descriptors have been examined.
5640 */
5641 for (; (uresid = uiop->uio_resid) > 0;
5642 uiop->uio_offset = uoffset + LXPR_SDSIZE) {
5643 int reclen;
5644 int fd;
5645 int len;
5646
5647 uoffset = uiop->uio_offset;
5648
5649 /*
5650 * Stop at the end of the fd list
5651 */
5652 fd = (uoffset / LXPR_SDSIZE) - 2;
5653 if (fd < 0 || fd >= fddirsize) {
5654 if (eofp) {
5655 *eofp = 1;
5656 }
5657 goto out;
5658 }
5659
5660 if (fip->fi_list[fd].uf_file == NULL)
5661 continue;
5662
5663 dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
5664 len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
5665 ASSERT(len < LXPNSIZ);
5666 reclen = DIRENT64_RECLEN(len);
5667
5668 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5669 dirent->d_reclen = (ushort_t)reclen;
5670
5671 if (reclen > uresid) {
5672 /*
5673 * Error if no entries have been returned yet.
5674 */
5675 if (uresid == oresid)
5676 error = EINVAL;
5677 goto out;
5678 }
5679
5680 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5681 uiop)) != 0)
5682 goto out;
5683 }
5684
5685 if (eofp != NULL) {
5686 *eofp =
5687 (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
5688 }
5689
5690 out:
5691 mutex_exit(&fip->fi_lock);
5692 mutex_enter(&p->p_lock);
5693 lxpr_unlock(p);
5694 return (error);
5695 }
5696
5697 static int
5698 lxpr_readdir_sysdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5699 {
5700 ASSERT(lxpnp->lxpr_type == LXPR_SYSDIR);
5701 return (lxpr_readdir_common(lxpnp, uiop, eofp, sysdir, SYSDIRFILES));
5702 }
5703
5704 static int
5705 lxpr_readdir_sys_fsdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5706 {
5707 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FSDIR);
5708 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fsdir,
5709 SYS_FSDIRFILES));
5710 }
5711
5712 static int
5713 lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5714 {
5715 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
5716 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fs_inotifydir,
5717 SYS_FS_INOTIFYDIRFILES));
5718 }
5719
5720 static int
5721 lxpr_readdir_sys_kerneldir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5722 {
5723 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNELDIR);
5724 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_kerneldir,
5725 SYS_KERNELDIRFILES));
5726 }
5727
5728 static int
5729 lxpr_readdir_sys_kdir_randdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5730 {
5731 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RANDDIR);
5732 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_randdir,
5733 SYS_RANDDIRFILES));
5734 }
5735
5736 static int
5737 lxpr_readdir_sys_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5738 {
5739 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NETDIR);
5740 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_netdir,
5741 SYS_NETDIRFILES));
5742 }
5743
5744 static int
5745 lxpr_readdir_sys_net_coredir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5746 {
5747 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_COREDIR);
5748 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_net_coredir,
5749 SYS_NET_COREDIRFILES));
5750 }
5751
5752 static int
5753 lxpr_readdir_sys_vmdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5754 {
5755 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VMDIR);
5756 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_vmdir,
5757 SYS_VMDIRFILES));
5758 }
5759
5760 static int
5761 lxpr_write_sys_net_core_somaxc(lxpr_node_t *lxpnp, struct uio *uio,
5762 struct cred *cr, caller_context_t *ct)
5763 {
5764 int error;
5765 int res = 0;
5766 size_t olen;
5767 char val[16]; /* big enough for a uint numeric string */
5768 netstack_t *ns;
5769 mod_prop_info_t *ptbl = NULL;
5770 mod_prop_info_t *pinfo = NULL;
5771
5772 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON);
5773
5774 if (uio->uio_loffset != 0)
5775 return (EINVAL);
5776
5777 if (uio->uio_resid == 0)
5778 return (0);
5779
5780 olen = uio->uio_resid;
5781 if (olen > sizeof (val) - 1)
5782 return (EINVAL);
5783
5784 bzero(val, sizeof (val));
5785 error = uiomove(val, olen, UIO_WRITE, uio);
5786 if (error != 0)
5787 return (error);
5788
5789 if (val[olen - 1] == '\n')
5790 val[olen - 1] = '\0';
5791
5792 if (val[0] == '\0') /* no input */
5793 return (EINVAL);
5794
5795 ns = netstack_get_current();
5796 if (ns == NULL)
5797 return (EINVAL);
5798
5799 ptbl = ns->netstack_tcp->tcps_propinfo_tbl;
5800 pinfo = mod_prop_lookup(ptbl, "_conn_req_max_q", MOD_PROTO_TCP);
5801 if (pinfo == NULL || pinfo->mpi_setf(ns, cr, pinfo, NULL, val, 0) != 0)
5802 res = EINVAL;
5803
5804 netstack_rele(ns);
5805 return (res);
5806 }
5807
5808 /* ARGSUSED */
5809 static int
5810 lxpr_write_sys_kernel_corepatt(lxpr_node_t *lxpnp, struct uio *uio,
5811 struct cred *cr, caller_context_t *ct)
5812 {
5813 zone_t *zone = curproc->p_zone;
5814 struct core_globals *cg;
5815 refstr_t *rp, *nrp;
5816 corectl_path_t *ccp;
5817 char val[MAXPATHLEN];
5818 char valtr[MAXPATHLEN];
5819 size_t olen;
5820 int error;
5821
5822 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT);
5823
5824 cg = zone_getspecific(core_zone_key, zone);
5825 ASSERT(cg != NULL);
5826
5827 if (secpolicy_coreadm(cr) != 0)
5828 return (EPERM);
5829
5830 if (uio->uio_loffset != 0)
5831 return (EINVAL);
5832
5833 if (uio->uio_resid == 0)
5834 return (0);
5835
5836 olen = uio->uio_resid;
5837 if (olen > sizeof (val) - 1)
5838 return (EINVAL);
5839
5840 bzero(val, sizeof (val));
5841 error = uiomove(val, olen, UIO_WRITE, uio);
5842 if (error != 0)
5843 return (error);
5844
5845 if (val[olen - 1] == '\n')
5846 val[olen - 1] = '\0';
5847
5848 if (val[0] == '|')
5849 return (EINVAL);
5850
5851 if ((error = lxpr_core_path_l2s(val, valtr, sizeof (valtr))) != 0)
5852 return (error);
5853
5854 nrp = refstr_alloc(valtr);
5855
5856 ccp = cg->core_default_path;
5857 mutex_enter(&ccp->ccp_mtx);
5858 rp = ccp->ccp_path;
5859 refstr_hold((ccp->ccp_path = nrp));
5860 cg->core_options |= CC_PROCESS_PATH;
5861 mutex_exit(&ccp->ccp_mtx);
5862
5863 if (rp != NULL)
5864 refstr_rele(rp);
5865
5866 return (0);
5867 }
5868
5869 /*
5870 * lxpr_readlink(): Vnode operation for VOP_READLINK()
5871 */
5872 /* ARGSUSED */
5873 static int
5874 lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
5875 {
5876 char bp[MAXPATHLEN + 1];
5877 size_t buflen = sizeof (bp);
5878 lxpr_node_t *lxpnp = VTOLXP(vp);
5879 vnode_t *rvp = lxpnp->lxpr_realvp;
5880 pid_t pid;
5881 int error = 0;
5882
5883 /*
5884 * Linux does something very "clever" for /proc/<pid>/fd/<num> entries.
5885 * Open FDs are represented as symlinks, the link contents
5886 * corresponding to the open resource. For plain files or devices,
5887 * this isn't absurd since one can dereference the symlink to query
5888 * the underlying resource. For sockets or pipes, it becomes ugly in a
5889 * hurry. To maintain this human-readable output, those FD symlinks
5890 * point to bogus targets such as "socket:[<inodenum>]". This requires
5891 * circumventing vfs since the stat/lstat behavior on those FD entries
5892 * will be unusual. (A stat must retrieve information about the open
5893 * socket or pipe. It cannot fail because the link contents point to
5894 * an absent file.)
5895 *
5896 * To accomplish this, lxpr_getnode returns an vnode typed VNON for FD
5897 * entries. This bypasses code paths which would normally
5898 * short-circuit on symlinks and allows us to emulate the vfs behavior
5899 * expected by /proc consumers.
5900 */
5901 if (vp->v_type != VLNK && lxpnp->lxpr_type != LXPR_PID_FD_FD)
5902 return (EINVAL);
5903
5904 /* Try to produce a symlink name for anything that has a realvp */
5905 if (rvp != NULL) {
5906 if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0)
5907 return (error);
5908 if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) {
5909 /*
5910 * Special handling possible for /proc/<pid>/fd/<num>
5911 * Generate <type>:[<inode>] links, if allowed.
5912 */
5913 if (lxpnp->lxpr_type != LXPR_PID_FD_FD ||
5914 lxpr_readlink_fdnode(lxpnp, bp, buflen) != 0) {
5915 return (error);
5916 }
5917 }
5918 } else {
5919 switch (lxpnp->lxpr_type) {
5920 case LXPR_SELF:
5921 /*
5922 * Convert pid to the Linux default of 1 if we're the
5923 * zone's init process or 0 if zsched.
5924 */
5925 if (curproc->p_pid ==
5926 curproc->p_zone->zone_proc_initpid) {
5927 pid = 1;
5928 } else if (curproc->p_pid ==
5929 curproc->p_zone->zone_zsched->p_pid) {
5930 pid = 0;
5931 } else {
5932 pid = curproc->p_pid;
5933 }
5934
5935 /*
5936 * Don't need to check result as every possible int
5937 * will fit within MAXPATHLEN bytes.
5938 */
5939 (void) snprintf(bp, buflen, "%d", pid);
5940 break;
5941 case LXPR_PID_CURDIR:
5942 case LXPR_PID_ROOTDIR:
5943 case LXPR_PID_EXE:
5944 return (EACCES);
5945 default:
5946 /*
5947 * Need to return error so that nothing thinks
5948 * that the symlink is empty and hence "."
5949 */
5950 return (EINVAL);
5951 }
5952 }
5953
5954 /* copy the link data to user space */
5955 return (uiomove(bp, strlen(bp), UIO_READ, uiop));
5956 }
5957
5958
5959 /*
5960 * lxpr_inactive(): Vnode operation for VOP_INACTIVE()
5961 * Vnode is no longer referenced, deallocate the file
5962 * and all its resources.
5963 */
5964 /* ARGSUSED */
5965 static void
5966 lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
5967 {
5968 lxpr_freenode(VTOLXP(vp));
5969 }
5970
5971 /*
5972 * lxpr_sync(): Vnode operation for VOP_SYNC()
5973 */
5974 static int
5975 lxpr_sync()
5976 {
5977 /*
5978 * Nothing to sync but this function must never fail
5979 */
5980 return (0);
5981 }
5982
5983 /*
5984 * lxpr_cmp(): Vnode operation for VOP_CMP()
5985 */
5986 static int
5987 lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
5988 {
5989 vnode_t *rvp;
5990
5991 while (vn_matchops(vp1, lxpr_vnodeops) &&
5992 (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) {
5993 vp1 = rvp;
5994 }
5995
5996 while (vn_matchops(vp2, lxpr_vnodeops) &&
5997 (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) {
5998 vp2 = rvp;
5999 }
6000
6001 if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
6002 return (vp1 == vp2);
6003 return (VOP_CMP(vp1, vp2, ct));
6004 }
6005
6006 /*
6007 * lxpr_realvp(): Vnode operation for VOP_REALVP()
6008 */
6009 static int
6010 lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
6011 {
6012 vnode_t *rvp;
6013
6014 if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
6015 vp = rvp;
6016 if (VOP_REALVP(vp, &rvp, ct) == 0)
6017 vp = rvp;
6018 }
6019
6020 *vpp = vp;
6021 return (0);
6022 }
6023
6024 static int
6025 lxpr_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
6026 caller_context_t *ct)
6027 {
6028 lxpr_node_t *lxpnp = VTOLXP(vp);
6029 lxpr_nodetype_t type = lxpnp->lxpr_type;
6030
6031 switch (type) {
6032 case LXPR_SYS_KERNEL_COREPATT:
6033 return (lxpr_write_sys_kernel_corepatt(lxpnp, uiop, cr, ct));
6034 case LXPR_SYS_NET_CORE_SOMAXCON:
6035 return (lxpr_write_sys_net_core_somaxc(lxpnp, uiop, cr, ct));
6036
6037 default:
6038 /* pretend we wrote the whole thing */
6039 uiop->uio_offset += uiop->uio_resid;
6040 uiop->uio_resid = 0;
6041 return (0);
6042 }
6043 }
6044
6045 /*
6046 * We need to allow open with O_CREAT for the oom_score_adj file.
6047 */
6048 /*ARGSUSED7*/
6049 static int
6050 lxpr_create(struct vnode *dvp, char *nm, struct vattr *vap,
6051 enum vcexcl exclusive, int mode, struct vnode **vpp, struct cred *cred,
6052 int flag, caller_context_t *ct, vsecattr_t *vsecp)
6053 {
6054 lxpr_node_t *lxpnp = VTOLXP(dvp);
6055 lxpr_nodetype_t type = lxpnp->lxpr_type;
6056 vnode_t *vp = NULL;
6057 int error;
6058
6059 ASSERT(type < LXPR_NFILES);
6060
6061 /*
6062 * restrict create permission to owner or root
6063 */
6064 if ((error = lxpr_access(dvp, VEXEC, 0, cred, ct)) != 0) {
6065 return (error);
6066 }
6067
6068 if (*nm == '\0')
6069 return (EPERM);
6070
6071 if (dvp->v_type != VDIR)
6072 return (EPERM);
6073
6074 if (exclusive == EXCL)
6075 return (EEXIST);
6076
6077 /*
6078 * We're currently restricting O_CREAT to:
6079 * - /proc/<pid>/fd/<num>
6080 * - /proc/<pid>/oom_score_adj
6081 * - /proc/<pid>/task/<tid>/fd/<num>
6082 * - /proc/<pid>/task/<tid>/oom_score_adj
6083 * - /proc/sys/kernel/core_pattern
6084 * - /proc/sys/net/core/somaxconn
6085 * - /proc/sys/vm/overcommit_memory
6086 * - /proc/sys/vm/swappiness
6087 */
6088 switch (type) {
6089 case LXPR_PIDDIR:
6090 case LXPR_PID_TASK_IDDIR:
6091 if (strcmp(nm, "oom_score_adj") == 0) {
6092 proc_t *p;
6093 p = lxpr_lock(lxpnp->lxpr_pid);
6094 if (p != NULL) {
6095 vp = lxpr_lookup_common(dvp, nm, p, piddir,
6096 PIDDIRFILES);
6097 }
6098 lxpr_unlock(p);
6099 }
6100 break;
6101
6102 case LXPR_SYS_NET_COREDIR:
6103 if (strcmp(nm, "somaxconn") == 0) {
6104 vp = lxpr_lookup_common(dvp, nm, NULL, sys_net_coredir,
6105 SYS_NET_COREDIRFILES);
6106 }
6107 break;
6108
6109 case LXPR_SYS_KERNELDIR:
6110 if (strcmp(nm, "core_pattern") == 0) {
6111 vp = lxpr_lookup_common(dvp, nm, NULL, sys_kerneldir,
6112 SYS_KERNELDIRFILES);
6113 }
6114 break;
6115
6116 case LXPR_SYS_VMDIR:
6117 if (strcmp(nm, "overcommit_memory") == 0 ||
6118 strcmp(nm, "swappiness") == 0) {
6119 vp = lxpr_lookup_common(dvp, nm, NULL, sys_vmdir,
6120 SYS_VMDIRFILES);
6121 }
6122 break;
6123
6124 case LXPR_PID_FDDIR:
6125 case LXPR_PID_TID_FDDIR:
6126 vp = lxpr_lookup_fdnode(dvp, nm);
6127 break;
6128
6129 default:
6130 vp = NULL;
6131 break;
6132 }
6133
6134 if (vp != NULL) {
6135 /* Creating an existing file, allow it for regular files. */
6136 if (vp->v_type == VDIR)
6137 return (EISDIR);
6138
6139 /* confirm permissions against existing file */
6140 if ((error = lxpr_access(vp, mode, 0, cred, ct)) != 0) {
6141 VN_RELE(vp);
6142 return (error);
6143 }
6144
6145 *vpp = vp;
6146 return (0);
6147 }
6148
6149 /*
6150 * Linux proc does not allow creation of addition, non-subsystem
6151 * specific files inside the hierarchy. ENOENT is tossed when such
6152 * actions are attempted.
6153 */
6154 return (ENOENT);
6155 }