Print this page
OS-5083 lx_proc /proc/swaps needs to try harder
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
+++ new/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 * Copyright 2016 Joyent, Inc.
25 25 */
26 26
27 27 /*
28 28 * lx_proc -- a Linux-compatible /proc for the LX brand
29 29 *
30 30 * We have -- confusingly -- two implementations of Linux /proc. One is to
31 31 * support native (but Linux-borne) programs that wish to view the native
32 32 * system through the Linux /proc model; the other -- this one -- is to
33 33 * support Linux binaries via the LX brand. These two implementations differ
34 34 * greatly in their aspirations (and their willingness to bend the truth
35 35 * of the system to accommodate those aspirations); they should not be unified.
36 36 */
37 37
38 38 #include <sys/cpupart.h>
39 39 #include <sys/cpuvar.h>
40 40 #include <sys/session.h>
41 41 #include <sys/vmparam.h>
42 42 #include <sys/mman.h>
43 43 #include <vm/rm.h>
44 44 #include <vm/seg_vn.h>
45 45 #include <sys/sdt.h>
46 46 #include <lx_signum.h>
47 47 #include <sys/strlog.h>
48 48 #include <sys/stropts.h>
49 49 #include <sys/cmn_err.h>
50 50 #include <sys/lx_brand.h>
51 51 #include <lx_auxv.h>
52 52 #include <sys/x86_archext.h>
53 53 #include <sys/archsystm.h>
54 54 #include <sys/fp.h>
55 55 #include <sys/pool_pset.h>
56 56 #include <sys/pset.h>
57 57 #include <sys/zone.h>
58 58 #include <sys/pghw.h>
59 59 #include <sys/vfs_opreg.h>
60 60 #include <sys/param.h>
61 61 #include <sys/utsname.h>
62 62 #include <sys/rctl.h>
63 63 #include <sys/kstat.h>
64 64 #include <sys/lx_misc.h>
65 65 #include <sys/brand.h>
66 66 #include <sys/cred_impl.h>
67 67 #include <sys/tihdr.h>
68 68 #include <sys/corectl.h>
69 69 #include <inet/ip.h>
70 70 #include <inet/ip_ire.h>
71 71 #include <inet/ip6.h>
72 72 #include <inet/ip_if.h>
73 73 #include <inet/tcp.h>
74 74 #include <inet/tcp_impl.h>
75 75 #include <inet/udp_impl.h>
76 76 #include <inet/ipclassifier.h>
77 77 #include <sys/socketvar.h>
78 78 #include <fs/sockfs/socktpi.h>
79 79
80 80 /* Dependent on procfs */
81 81 extern kthread_t *prchoose(proc_t *);
82 82 extern int prreadargv(proc_t *, char *, size_t, size_t *);
83 83 extern int prreadenvv(proc_t *, char *, size_t, size_t *);
84 84 extern int prreadbuf(proc_t *, uintptr_t, uint8_t *, size_t, size_t *);
85 85
86 86 #include "lx_proc.h"
87 87
88 88 extern pgcnt_t swapfs_minfree;
89 89 extern time_t boot_time;
90 90
91 91 /*
92 92 * Pointer to the vnode ops vector for this fs.
93 93 * This is instantiated in lxprinit() in lxpr_vfsops.c
94 94 */
95 95 vnodeops_t *lxpr_vnodeops;
96 96
97 97 static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *);
98 98 static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *,
99 99 caller_context_t *);
100 100 static int lxpr_create(struct vnode *, char *, struct vattr *, enum vcexcl,
101 101 int, struct vnode **, struct cred *, int, caller_context_t *, vsecattr_t *);
102 102 static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
103 103 static int lxpr_write(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
104 104 static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *,
105 105 caller_context_t *);
106 106 static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *);
107 107 static int lxpr_lookup(vnode_t *, char *, vnode_t **,
108 108 pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *,
109 109 pathname_t *);
110 110 static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *,
111 111 caller_context_t *, int);
112 112 static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
113 113 static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *);
114 114 static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *);
115 115 static int lxpr_sync(void);
116 116 static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *);
117 117
118 118 static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
119 119 static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
120 120 static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
121 121 static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
122 122 static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
123 123 static vnode_t *lxpr_lookup_sysdir(vnode_t *, char *);
124 124 static vnode_t *lxpr_lookup_sys_fsdir(vnode_t *, char *);
125 125 static vnode_t *lxpr_lookup_sys_fs_inotifydir(vnode_t *, char *);
126 126 static vnode_t *lxpr_lookup_sys_kerneldir(vnode_t *, char *);
127 127 static vnode_t *lxpr_lookup_sys_kdir_randdir(vnode_t *, char *);
128 128 static vnode_t *lxpr_lookup_sys_netdir(vnode_t *, char *);
129 129 static vnode_t *lxpr_lookup_sys_net_coredir(vnode_t *, char *);
130 130 static vnode_t *lxpr_lookup_sys_vmdir(vnode_t *, char *);
131 131 static vnode_t *lxpr_lookup_taskdir(vnode_t *, char *);
132 132 static vnode_t *lxpr_lookup_task_tid_dir(vnode_t *, char *);
133 133
134 134 static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
135 135 static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
136 136 static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
137 137 static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
138 138 static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
139 139 static int lxpr_readdir_sysdir(lxpr_node_t *, uio_t *, int *);
140 140 static int lxpr_readdir_sys_fsdir(lxpr_node_t *, uio_t *, int *);
141 141 static int lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *, uio_t *, int *);
142 142 static int lxpr_readdir_sys_kerneldir(lxpr_node_t *, uio_t *, int *);
143 143 static int lxpr_readdir_sys_kdir_randdir(lxpr_node_t *, uio_t *, int *);
144 144 static int lxpr_readdir_sys_netdir(lxpr_node_t *, uio_t *, int *);
145 145 static int lxpr_readdir_sys_net_coredir(lxpr_node_t *, uio_t *, int *);
146 146 static int lxpr_readdir_sys_vmdir(lxpr_node_t *, uio_t *, int *);
147 147 static int lxpr_readdir_taskdir(lxpr_node_t *, uio_t *, int *);
148 148 static int lxpr_readdir_task_tid_dir(lxpr_node_t *, uio_t *, int *);
149 149
150 150 static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
151 151 static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
152 152 static void lxpr_read_cgroups(lxpr_node_t *, lxpr_uiobuf_t *);
153 153 static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
154 154 static void lxpr_read_diskstats(lxpr_node_t *, lxpr_uiobuf_t *);
155 155 static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
156 156 static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
157 157 static void lxpr_read_filesystems(lxpr_node_t *, lxpr_uiobuf_t *);
158 158 static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *, ldi_handle_t);
159 159 static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
160 160 static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
161 161 static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
162 162 static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
163 163 static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
164 164 static void lxpr_read_swaps(lxpr_node_t *, lxpr_uiobuf_t *);
165 165 static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
166 166 static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
167 167
168 168 static void lxpr_read_pid_auxv(lxpr_node_t *, lxpr_uiobuf_t *);
169 169 static void lxpr_read_pid_cgroup(lxpr_node_t *, lxpr_uiobuf_t *);
170 170 static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
171 171 static void lxpr_read_pid_comm(lxpr_node_t *, lxpr_uiobuf_t *);
172 172 static void lxpr_read_pid_env(lxpr_node_t *, lxpr_uiobuf_t *);
173 173 static void lxpr_read_pid_limits(lxpr_node_t *, lxpr_uiobuf_t *);
174 174 static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
175 175 static void lxpr_read_pid_mountinfo(lxpr_node_t *, lxpr_uiobuf_t *);
176 176 static void lxpr_read_pid_oom_scr_adj(lxpr_node_t *, lxpr_uiobuf_t *);
177 177 static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
178 178 static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
179 179 static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
180 180
181 181 static void lxpr_read_pid_tid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
182 182 static void lxpr_read_pid_tid_status(lxpr_node_t *, lxpr_uiobuf_t *);
183 183
184 184 static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
185 185 static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
186 186 static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
187 187 static void lxpr_read_net_if_inet6(lxpr_node_t *, lxpr_uiobuf_t *);
188 188 static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
189 189 static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
190 190 static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
191 191 static void lxpr_read_net_ipv6_route(lxpr_node_t *, lxpr_uiobuf_t *);
192 192 static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
193 193 static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
194 194 static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
195 195 static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
196 196 static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
197 197 static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
198 198 static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
199 199 static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
200 200 static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
201 201 static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
202 202 static void lxpr_read_net_tcp6(lxpr_node_t *, lxpr_uiobuf_t *);
203 203 static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
204 204 static void lxpr_read_net_udp6(lxpr_node_t *, lxpr_uiobuf_t *);
205 205 static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
206 206 static void lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *,
207 207 lxpr_uiobuf_t *);
208 208 static void lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *,
209 209 lxpr_uiobuf_t *);
210 210 static void lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *,
211 211 lxpr_uiobuf_t *);
212 212 static void lxpr_read_sys_kernel_caplcap(lxpr_node_t *, lxpr_uiobuf_t *);
213 213 static void lxpr_read_sys_kernel_corepatt(lxpr_node_t *, lxpr_uiobuf_t *);
214 214 static void lxpr_read_sys_kernel_hostname(lxpr_node_t *, lxpr_uiobuf_t *);
215 215 static void lxpr_read_sys_kernel_msgmni(lxpr_node_t *, lxpr_uiobuf_t *);
216 216 static void lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *, lxpr_uiobuf_t *);
217 217 static void lxpr_read_sys_kernel_osrel(lxpr_node_t *, lxpr_uiobuf_t *);
218 218 static void lxpr_read_sys_kernel_pid_max(lxpr_node_t *, lxpr_uiobuf_t *);
219 219 static void lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *, lxpr_uiobuf_t *);
220 220 static void lxpr_read_sys_kernel_shmmax(lxpr_node_t *, lxpr_uiobuf_t *);
221 221 static void lxpr_read_sys_kernel_threads_max(lxpr_node_t *, lxpr_uiobuf_t *);
222 222 static void lxpr_read_sys_net_core_somaxc(lxpr_node_t *, lxpr_uiobuf_t *);
223 223 static void lxpr_read_sys_vm_minfr_kb(lxpr_node_t *, lxpr_uiobuf_t *);
224 224 static void lxpr_read_sys_vm_nhpages(lxpr_node_t *, lxpr_uiobuf_t *);
225 225 static void lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *, lxpr_uiobuf_t *);
226 226 static void lxpr_read_sys_vm_swappiness(lxpr_node_t *, lxpr_uiobuf_t *);
227 227
228 228 static int lxpr_write_sys_net_core_somaxc(lxpr_node_t *, uio_t *, cred_t *,
229 229 caller_context_t *);
230 230 static int lxpr_write_sys_kernel_corepatt(lxpr_node_t *, uio_t *, cred_t *,
231 231 caller_context_t *);
232 232
233 233 /*
234 234 * Simple conversion
235 235 */
236 236 #define btok(x) ((x) >> 10) /* bytes to kbytes */
237 237 #define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */
238 238
239 239 #define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t))
240 240
241 241 extern rctl_hndl_t rc_zone_msgmni;
242 242 extern rctl_hndl_t rc_zone_shmmax;
243 243 #define FOURGB 4294967295
244 244
245 245 /*
246 246 * The maximum length of the concatenation of argument vector strings we
247 247 * will return to the user via the branded procfs. Likewise for the env vector.
248 248 */
249 249 int lxpr_maxargvlen = 4096;
250 250 int lxpr_maxenvvlen = 4096;
251 251
252 252 /*
253 253 * The lx /proc vnode operations vector
254 254 */
255 255 const fs_operation_def_t lxpr_vnodeops_template[] = {
256 256 VOPNAME_OPEN, { .vop_open = lxpr_open },
257 257 VOPNAME_CLOSE, { .vop_close = lxpr_close },
258 258 VOPNAME_READ, { .vop_read = lxpr_read },
259 259 VOPNAME_WRITE, { .vop_read = lxpr_write },
260 260 VOPNAME_GETATTR, { .vop_getattr = lxpr_getattr },
261 261 VOPNAME_ACCESS, { .vop_access = lxpr_access },
262 262 VOPNAME_LOOKUP, { .vop_lookup = lxpr_lookup },
263 263 VOPNAME_CREATE, { .vop_create = lxpr_create },
264 264 VOPNAME_READDIR, { .vop_readdir = lxpr_readdir },
265 265 VOPNAME_READLINK, { .vop_readlink = lxpr_readlink },
266 266 VOPNAME_FSYNC, { .error = lxpr_sync },
267 267 VOPNAME_SEEK, { .error = lxpr_sync },
268 268 VOPNAME_INACTIVE, { .vop_inactive = lxpr_inactive },
269 269 VOPNAME_CMP, { .vop_cmp = lxpr_cmp },
270 270 VOPNAME_REALVP, { .vop_realvp = lxpr_realvp },
271 271 NULL, NULL
272 272 };
273 273
274 274
275 275 /*
276 276 * file contents of an lx /proc directory.
277 277 */
278 278 static lxpr_dirent_t lx_procdir[] = {
279 279 { LXPR_CGROUPS, "cgroups" },
280 280 { LXPR_CMDLINE, "cmdline" },
281 281 { LXPR_CPUINFO, "cpuinfo" },
282 282 { LXPR_DEVICES, "devices" },
283 283 { LXPR_DISKSTATS, "diskstats" },
284 284 { LXPR_DMA, "dma" },
285 285 { LXPR_FILESYSTEMS, "filesystems" },
286 286 { LXPR_INTERRUPTS, "interrupts" },
287 287 { LXPR_IOPORTS, "ioports" },
288 288 { LXPR_KCORE, "kcore" },
289 289 { LXPR_KMSG, "kmsg" },
290 290 { LXPR_LOADAVG, "loadavg" },
291 291 { LXPR_MEMINFO, "meminfo" },
292 292 { LXPR_MODULES, "modules" },
293 293 { LXPR_MOUNTS, "mounts" },
294 294 { LXPR_NETDIR, "net" },
295 295 { LXPR_PARTITIONS, "partitions" },
296 296 { LXPR_SELF, "self" },
297 297 { LXPR_STAT, "stat" },
298 298 { LXPR_SWAPS, "swaps" },
299 299 { LXPR_SYSDIR, "sys" },
300 300 { LXPR_UPTIME, "uptime" },
301 301 { LXPR_VERSION, "version" }
302 302 };
303 303
304 304 #define PROCDIRFILES (sizeof (lx_procdir) / sizeof (lx_procdir[0]))
305 305
306 306 /*
307 307 * Contents of an lx /proc/<pid> directory.
308 308 */
309 309 static lxpr_dirent_t piddir[] = {
310 310 { LXPR_PID_AUXV, "auxv" },
311 311 { LXPR_PID_CGROUP, "cgroup" },
312 312 { LXPR_PID_CMDLINE, "cmdline" },
313 313 { LXPR_PID_COMM, "comm" },
314 314 { LXPR_PID_CPU, "cpu" },
315 315 { LXPR_PID_CURDIR, "cwd" },
316 316 { LXPR_PID_ENV, "environ" },
317 317 { LXPR_PID_EXE, "exe" },
318 318 { LXPR_PID_LIMITS, "limits" },
319 319 { LXPR_PID_MAPS, "maps" },
320 320 { LXPR_PID_MEM, "mem" },
321 321 { LXPR_PID_MOUNTINFO, "mountinfo" },
322 322 { LXPR_PID_OOM_SCR_ADJ, "oom_score_adj" },
323 323 { LXPR_PID_ROOTDIR, "root" },
324 324 { LXPR_PID_STAT, "stat" },
325 325 { LXPR_PID_STATM, "statm" },
326 326 { LXPR_PID_STATUS, "status" },
327 327 { LXPR_PID_TASKDIR, "task" },
328 328 { LXPR_PID_FDDIR, "fd" }
329 329 };
330 330
331 331 #define PIDDIRFILES (sizeof (piddir) / sizeof (piddir[0]))
332 332
333 333 /*
334 334 * Contents of an lx /proc/<pid>/task/<tid> directory.
335 335 */
336 336 static lxpr_dirent_t tiddir[] = {
337 337 { LXPR_PID_TID_AUXV, "auxv" },
338 338 { LXPR_PID_CGROUP, "cgroup" },
339 339 { LXPR_PID_CMDLINE, "cmdline" },
340 340 { LXPR_PID_TID_COMM, "comm" },
341 341 { LXPR_PID_CPU, "cpu" },
342 342 { LXPR_PID_CURDIR, "cwd" },
343 343 { LXPR_PID_ENV, "environ" },
344 344 { LXPR_PID_EXE, "exe" },
345 345 { LXPR_PID_LIMITS, "limits" },
346 346 { LXPR_PID_MAPS, "maps" },
347 347 { LXPR_PID_MEM, "mem" },
348 348 { LXPR_PID_MOUNTINFO, "mountinfo" },
349 349 { LXPR_PID_TID_OOM_SCR_ADJ, "oom_score_adj" },
350 350 { LXPR_PID_ROOTDIR, "root" },
351 351 { LXPR_PID_TID_STAT, "stat" },
352 352 { LXPR_PID_STATM, "statm" },
353 353 { LXPR_PID_TID_STATUS, "status" },
354 354 { LXPR_PID_FDDIR, "fd" }
355 355 };
356 356
357 357 #define TIDDIRFILES (sizeof (tiddir) / sizeof (tiddir[0]))
358 358
359 359 #define LX_RLIM_INFINITY 0xFFFFFFFFFFFFFFFF
360 360
361 361 #define RCTL_INFINITE(x) \
362 362 ((x->rcv_flagaction & RCTL_LOCAL_MAXIMAL) && \
363 363 (x->rcv_flagaction & RCTL_GLOBAL_INFINITE))
364 364
365 365 typedef struct lxpr_rlimtab {
366 366 char *rlim_name; /* limit name */
367 367 char *rlim_unit; /* limit unit */
368 368 char *rlim_rctl; /* rctl source */
369 369 } lxpr_rlimtab_t;
370 370
371 371 static lxpr_rlimtab_t lxpr_rlimtab[] = {
372 372 { "Max cpu time", "seconds", "process.max-cpu-time" },
373 373 { "Max file size", "bytes", "process.max-file-size" },
374 374 { "Max data size", "bytes", "process.max-data-size" },
375 375 { "Max stack size", "bytes", "process.max-stack-size" },
376 376 { "Max core file size", "bytes", "process.max-core-size" },
377 377 { "Max resident set", "bytes", "zone.max-physical-memory" },
378 378 { "Max processes", "processes", "zone.max-lwps" },
379 379 { "Max open files", "files", "process.max-file-descriptor" },
380 380 { "Max locked memory", "bytes", "zone.max-locked-memory" },
381 381 { "Max address space", "bytes", "process.max-address-space" },
382 382 { "Max file locks", "locks", NULL },
383 383 { "Max pending signals", "signals",
384 384 "process.max-sigqueue-size" },
385 385 { "Max msgqueue size", "bytes", "process.max-msg-messages" },
386 386 { NULL, NULL, NULL }
387 387 };
388 388
389 389
390 390 /*
391 391 * contents of lx /proc/net directory
392 392 */
393 393 static lxpr_dirent_t netdir[] = {
394 394 { LXPR_NET_ARP, "arp" },
395 395 { LXPR_NET_DEV, "dev" },
396 396 { LXPR_NET_DEV_MCAST, "dev_mcast" },
397 397 { LXPR_NET_IF_INET6, "if_inet6" },
398 398 { LXPR_NET_IGMP, "igmp" },
399 399 { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" },
400 400 { LXPR_NET_IP_MR_VIF, "ip_mr_vif" },
401 401 { LXPR_NET_IPV6_ROUTE, "ipv6_route" },
402 402 { LXPR_NET_MCFILTER, "mcfilter" },
403 403 { LXPR_NET_NETSTAT, "netstat" },
404 404 { LXPR_NET_RAW, "raw" },
405 405 { LXPR_NET_ROUTE, "route" },
406 406 { LXPR_NET_RPC, "rpc" },
407 407 { LXPR_NET_RT_CACHE, "rt_cache" },
408 408 { LXPR_NET_SOCKSTAT, "sockstat" },
409 409 { LXPR_NET_SNMP, "snmp" },
410 410 { LXPR_NET_STAT, "stat" },
411 411 { LXPR_NET_TCP, "tcp" },
412 412 { LXPR_NET_TCP6, "tcp6" },
413 413 { LXPR_NET_UDP, "udp" },
414 414 { LXPR_NET_UDP6, "udp6" },
415 415 { LXPR_NET_UNIX, "unix" }
416 416 };
417 417
418 418 #define NETDIRFILES (sizeof (netdir) / sizeof (netdir[0]))
419 419
420 420 /*
421 421 * contents of /proc/sys directory
422 422 */
423 423 static lxpr_dirent_t sysdir[] = {
424 424 { LXPR_SYS_FSDIR, "fs" },
425 425 { LXPR_SYS_KERNELDIR, "kernel" },
426 426 { LXPR_SYS_NETDIR, "net" },
427 427 { LXPR_SYS_VMDIR, "vm" },
428 428 };
429 429
430 430 #define SYSDIRFILES (sizeof (sysdir) / sizeof (sysdir[0]))
431 431
432 432 /*
433 433 * contents of /proc/sys/fs directory
434 434 */
435 435 static lxpr_dirent_t sys_fsdir[] = {
436 436 { LXPR_SYS_FS_INOTIFYDIR, "inotify" },
437 437 };
438 438
439 439 #define SYS_FSDIRFILES (sizeof (sys_fsdir) / sizeof (sys_fsdir[0]))
440 440
441 441 /*
442 442 * contents of /proc/sys/fs/inotify directory
443 443 */
444 444 static lxpr_dirent_t sys_fs_inotifydir[] = {
445 445 { LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS, "max_queued_events" },
446 446 { LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES, "max_user_instances" },
447 447 { LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES, "max_user_watches" },
448 448 };
449 449
450 450 #define SYS_FS_INOTIFYDIRFILES \
451 451 (sizeof (sys_fs_inotifydir) / sizeof (sys_fs_inotifydir[0]))
452 452
453 453 /*
454 454 * contents of /proc/sys/kernel directory
455 455 */
456 456 static lxpr_dirent_t sys_kerneldir[] = {
457 457 { LXPR_SYS_KERNEL_CAPLCAP, "cap_last_cap" },
458 458 { LXPR_SYS_KERNEL_COREPATT, "core_pattern" },
459 459 { LXPR_SYS_KERNEL_HOSTNAME, "hostname" },
460 460 { LXPR_SYS_KERNEL_MSGMNI, "msgmni" },
461 461 { LXPR_SYS_KERNEL_NGROUPS_MAX, "ngroups_max" },
462 462 { LXPR_SYS_KERNEL_OSREL, "osrelease" },
463 463 { LXPR_SYS_KERNEL_PID_MAX, "pid_max" },
464 464 { LXPR_SYS_KERNEL_RANDDIR, "random" },
465 465 { LXPR_SYS_KERNEL_SHMMAX, "shmmax" },
466 466 { LXPR_SYS_KERNEL_THREADS_MAX, "threads-max" },
467 467 };
468 468
469 469 #define SYS_KERNELDIRFILES (sizeof (sys_kerneldir) / sizeof (sys_kerneldir[0]))
470 470
471 471 /*
472 472 * contents of /proc/sys/kernel/random directory
473 473 */
474 474 static lxpr_dirent_t sys_randdir[] = {
475 475 { LXPR_SYS_KERNEL_RAND_BOOTID, "boot_id" },
476 476 };
477 477
478 478 #define SYS_RANDDIRFILES (sizeof (sys_randdir) / sizeof (sys_randdir[0]))
479 479
480 480 /*
481 481 * contents of /proc/sys/net directory
482 482 */
483 483 static lxpr_dirent_t sys_netdir[] = {
484 484 { LXPR_SYS_NET_COREDIR, "core" },
485 485 };
486 486
487 487 #define SYS_NETDIRFILES (sizeof (sys_netdir) / sizeof (sys_netdir[0]))
488 488
489 489 /*
490 490 * contents of /proc/sys/net/core directory
491 491 */
492 492 static lxpr_dirent_t sys_net_coredir[] = {
493 493 { LXPR_SYS_NET_CORE_SOMAXCON, "somaxconn" },
494 494 };
495 495
496 496 #define SYS_NET_COREDIRFILES \
497 497 (sizeof (sys_net_coredir) / sizeof (sys_net_coredir[0]))
498 498
499 499 /*
500 500 * contents of /proc/sys/vm directory
501 501 */
502 502 static lxpr_dirent_t sys_vmdir[] = {
503 503 { LXPR_SYS_VM_MINFR_KB, "min_free_kbytes" },
504 504 { LXPR_SYS_VM_NHUGEP, "nr_hugepages" },
505 505 { LXPR_SYS_VM_OVERCOMMIT_MEM, "overcommit_memory" },
506 506 { LXPR_SYS_VM_SWAPPINESS, "swappiness" },
507 507 };
508 508
509 509 #define SYS_VMDIRFILES (sizeof (sys_vmdir) / sizeof (sys_vmdir[0]))
510 510
511 511 /*
512 512 * lxpr_open(): Vnode operation for VOP_OPEN()
513 513 */
514 514 static int
515 515 lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
516 516 {
517 517 vnode_t *vp = *vpp;
518 518 lxpr_node_t *lxpnp = VTOLXP(vp);
519 519 lxpr_nodetype_t type = lxpnp->lxpr_type;
520 520 vnode_t *rvp;
521 521 int error = 0;
522 522
523 523 if (flag & FWRITE) {
524 524 /* Restrict writes to certain files */
525 525 switch (type) {
526 526 case LXPR_PID_OOM_SCR_ADJ:
527 527 case LXPR_PID_TID_OOM_SCR_ADJ:
528 528 case LXPR_SYS_KERNEL_COREPATT:
529 529 case LXPR_SYS_NET_CORE_SOMAXCON:
530 530 case LXPR_SYS_VM_OVERCOMMIT_MEM:
531 531 case LXPR_SYS_VM_SWAPPINESS:
532 532 case LXPR_PID_FD_FD:
533 533 case LXPR_PID_TID_FD_FD:
534 534 break;
535 535 default:
536 536 return (EPERM);
537 537 }
538 538 }
539 539
540 540 /*
541 541 * If we are opening an underlying file only allow regular files,
542 542 * fifos or sockets; reject the open for anything else.
543 543 * Just do it if we are opening the current or root directory.
544 544 */
545 545 if (lxpnp->lxpr_realvp != NULL) {
546 546 rvp = lxpnp->lxpr_realvp;
547 547
548 548 if (type == LXPR_PID_FD_FD && rvp->v_type != VREG &&
549 549 rvp->v_type != VFIFO && rvp->v_type != VSOCK) {
550 550 error = EACCES;
551 551 } else {
552 552 if (type == LXPR_PID_FD_FD && rvp->v_type == VFIFO) {
553 553 /*
554 554 * This flag lets the fifo open know that
555 555 * we're using proc/fd to open a fd which we
556 556 * already have open. Otherwise, the fifo might
557 557 * reject an open if the other end has closed.
558 558 */
559 559 flag |= FKLYR;
560 560 }
561 561 /*
562 562 * Need to hold rvp since VOP_OPEN() may release it.
563 563 */
564 564 VN_HOLD(rvp);
565 565 error = VOP_OPEN(&rvp, flag, cr, ct);
566 566 if (error) {
567 567 VN_RELE(rvp);
568 568 } else {
569 569 *vpp = rvp;
570 570 VN_RELE(vp);
571 571 }
572 572 }
573 573 }
574 574
575 575 return (error);
576 576 }
577 577
578 578
579 579 /*
580 580 * lxpr_close(): Vnode operation for VOP_CLOSE()
581 581 */
582 582 /* ARGSUSED */
583 583 static int
584 584 lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
585 585 caller_context_t *ct)
586 586 {
587 587 lxpr_node_t *lxpr = VTOLXP(vp);
588 588 lxpr_nodetype_t type = lxpr->lxpr_type;
589 589
590 590 /*
591 591 * we should never get here because the close is done on the realvp
592 592 * for these nodes
593 593 */
594 594 ASSERT(type != LXPR_PID_FD_FD &&
595 595 type != LXPR_PID_CURDIR &&
596 596 type != LXPR_PID_ROOTDIR &&
597 597 type != LXPR_PID_EXE);
598 598
599 599 return (0);
600 600 }
601 601
602 602 static void (*lxpr_read_function[LXPR_NFILES])() = {
603 603 lxpr_read_isdir, /* /proc */
604 604 lxpr_read_isdir, /* /proc/<pid> */
605 605 lxpr_read_pid_auxv, /* /proc/<pid>/auxv */
606 606 lxpr_read_pid_cgroup, /* /proc/<pid>/cgroup */
607 607 lxpr_read_pid_cmdline, /* /proc/<pid>/cmdline */
608 608 lxpr_read_pid_comm, /* /proc/<pid>/comm */
609 609 lxpr_read_empty, /* /proc/<pid>/cpu */
610 610 lxpr_read_invalid, /* /proc/<pid>/cwd */
611 611 lxpr_read_pid_env, /* /proc/<pid>/environ */
612 612 lxpr_read_invalid, /* /proc/<pid>/exe */
613 613 lxpr_read_pid_limits, /* /proc/<pid>/limits */
614 614 lxpr_read_pid_maps, /* /proc/<pid>/maps */
615 615 lxpr_read_empty, /* /proc/<pid>/mem */
616 616 lxpr_read_pid_mountinfo, /* /proc/<pid>/mountinfo */
617 617 lxpr_read_pid_oom_scr_adj, /* /proc/<pid>/oom_score_adj */
618 618 lxpr_read_invalid, /* /proc/<pid>/root */
619 619 lxpr_read_pid_stat, /* /proc/<pid>/stat */
620 620 lxpr_read_pid_statm, /* /proc/<pid>/statm */
621 621 lxpr_read_pid_status, /* /proc/<pid>/status */
622 622 lxpr_read_isdir, /* /proc/<pid>/task */
623 623 lxpr_read_isdir, /* /proc/<pid>/task/nn */
624 624 lxpr_read_isdir, /* /proc/<pid>/fd */
625 625 lxpr_read_fd, /* /proc/<pid>/fd/nn */
626 626 lxpr_read_pid_auxv, /* /proc/<pid>/task/<tid>/auxv */
627 627 lxpr_read_pid_cgroup, /* /proc/<pid>/task/<tid>/cgroup */
628 628 lxpr_read_pid_cmdline, /* /proc/<pid>/task/<tid>/cmdline */
629 629 lxpr_read_pid_comm, /* /proc/<pid>/task/<tid>/comm */
630 630 lxpr_read_empty, /* /proc/<pid>/task/<tid>/cpu */
631 631 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/cwd */
632 632 lxpr_read_pid_env, /* /proc/<pid>/task/<tid>/environ */
633 633 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/exe */
634 634 lxpr_read_pid_limits, /* /proc/<pid>/task/<tid>/limits */
635 635 lxpr_read_pid_maps, /* /proc/<pid>/task/<tid>/maps */
636 636 lxpr_read_empty, /* /proc/<pid>/task/<tid>/mem */
637 637 lxpr_read_pid_mountinfo, /* /proc/<pid>/task/<tid>/mountinfo */
638 638 lxpr_read_pid_oom_scr_adj, /* /proc/<pid>/task/<tid>/oom_scr_adj */
639 639 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/root */
640 640 lxpr_read_pid_tid_stat, /* /proc/<pid>/task/<tid>/stat */
641 641 lxpr_read_pid_statm, /* /proc/<pid>/task/<tid>/statm */
642 642 lxpr_read_pid_tid_status, /* /proc/<pid>/task/<tid>/status */
643 643 lxpr_read_isdir, /* /proc/<pid>/task/<tid>/fd */
644 644 lxpr_read_fd, /* /proc/<pid>/task/<tid>/fd/nn */
645 645 lxpr_read_cgroups, /* /proc/cgroups */
646 646 lxpr_read_empty, /* /proc/cmdline */
647 647 lxpr_read_cpuinfo, /* /proc/cpuinfo */
648 648 lxpr_read_empty, /* /proc/devices */
649 649 lxpr_read_diskstats, /* /proc/diskstats */
650 650 lxpr_read_empty, /* /proc/dma */
651 651 lxpr_read_filesystems, /* /proc/filesystems */
652 652 lxpr_read_empty, /* /proc/interrupts */
653 653 lxpr_read_empty, /* /proc/ioports */
654 654 lxpr_read_empty, /* /proc/kcore */
655 655 lxpr_read_invalid, /* /proc/kmsg -- see lxpr_read() */
656 656 lxpr_read_loadavg, /* /proc/loadavg */
657 657 lxpr_read_meminfo, /* /proc/meminfo */
658 658 lxpr_read_empty, /* /proc/modules */
659 659 lxpr_read_mounts, /* /proc/mounts */
660 660 lxpr_read_isdir, /* /proc/net */
661 661 lxpr_read_net_arp, /* /proc/net/arp */
662 662 lxpr_read_net_dev, /* /proc/net/dev */
663 663 lxpr_read_net_dev_mcast, /* /proc/net/dev_mcast */
664 664 lxpr_read_net_if_inet6, /* /proc/net/if_inet6 */
665 665 lxpr_read_net_igmp, /* /proc/net/igmp */
666 666 lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */
667 667 lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */
668 668 lxpr_read_net_ipv6_route, /* /proc/net/ipv6_route */
669 669 lxpr_read_net_mcfilter, /* /proc/net/mcfilter */
670 670 lxpr_read_net_netstat, /* /proc/net/netstat */
671 671 lxpr_read_net_raw, /* /proc/net/raw */
672 672 lxpr_read_net_route, /* /proc/net/route */
673 673 lxpr_read_net_rpc, /* /proc/net/rpc */
674 674 lxpr_read_net_rt_cache, /* /proc/net/rt_cache */
675 675 lxpr_read_net_sockstat, /* /proc/net/sockstat */
676 676 lxpr_read_net_snmp, /* /proc/net/snmp */
677 677 lxpr_read_net_stat, /* /proc/net/stat */
678 678 lxpr_read_net_tcp, /* /proc/net/tcp */
679 679 lxpr_read_net_tcp6, /* /proc/net/tcp6 */
680 680 lxpr_read_net_udp, /* /proc/net/udp */
681 681 lxpr_read_net_udp6, /* /proc/net/udp6 */
682 682 lxpr_read_net_unix, /* /proc/net/unix */
683 683 lxpr_read_partitions, /* /proc/partitions */
684 684 lxpr_read_invalid, /* /proc/self */
685 685 lxpr_read_stat, /* /proc/stat */
686 686 lxpr_read_swaps, /* /proc/swaps */
687 687 lxpr_read_invalid, /* /proc/sys */
688 688 lxpr_read_invalid, /* /proc/sys/fs */
689 689 lxpr_read_invalid, /* /proc/sys/fs/inotify */
690 690 lxpr_read_sys_fs_inotify_max_queued_events, /* max_queued_events */
691 691 lxpr_read_sys_fs_inotify_max_user_instances, /* max_user_instances */
692 692 lxpr_read_sys_fs_inotify_max_user_watches, /* max_user_watches */
693 693 lxpr_read_invalid, /* /proc/sys/kernel */
694 694 lxpr_read_sys_kernel_caplcap, /* /proc/sys/kernel/cap_last_cap */
695 695 lxpr_read_sys_kernel_corepatt, /* /proc/sys/kernel/core_pattern */
696 696 lxpr_read_sys_kernel_hostname, /* /proc/sys/kernel/hostname */
697 697 lxpr_read_sys_kernel_msgmni, /* /proc/sys/kernel/msgmni */
698 698 lxpr_read_sys_kernel_ngroups_max, /* /proc/sys/kernel/ngroups_max */
699 699 lxpr_read_sys_kernel_osrel, /* /proc/sys/kernel/osrelease */
700 700 lxpr_read_sys_kernel_pid_max, /* /proc/sys/kernel/pid_max */
701 701 lxpr_read_invalid, /* /proc/sys/kernel/random */
702 702 lxpr_read_sys_kernel_rand_bootid, /* /proc/sys/kernel/random/boot_id */
703 703 lxpr_read_sys_kernel_shmmax, /* /proc/sys/kernel/shmmax */
704 704 lxpr_read_sys_kernel_threads_max, /* /proc/sys/kernel/threads-max */
705 705 lxpr_read_invalid, /* /proc/sys/net */
706 706 lxpr_read_invalid, /* /proc/sys/net/core */
707 707 lxpr_read_sys_net_core_somaxc, /* /proc/sys/net/core/somaxconn */
708 708 lxpr_read_invalid, /* /proc/sys/vm */
709 709 lxpr_read_sys_vm_minfr_kb, /* /proc/sys/vm/min_free_kbytes */
710 710 lxpr_read_sys_vm_nhpages, /* /proc/sys/vm/nr_hugepages */
711 711 lxpr_read_sys_vm_overcommit_mem, /* /proc/sys/vm/overcommit_memory */
712 712 lxpr_read_sys_vm_swappiness, /* /proc/sys/vm/swappiness */
713 713 lxpr_read_uptime, /* /proc/uptime */
714 714 lxpr_read_version, /* /proc/version */
715 715 };
716 716
717 717 /*
718 718 * Array of lookup functions, indexed by lx /proc file type.
719 719 */
720 720 static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
721 721 lxpr_lookup_procdir, /* /proc */
722 722 lxpr_lookup_piddir, /* /proc/<pid> */
723 723 lxpr_lookup_not_a_dir, /* /proc/<pid>/auxv */
724 724 lxpr_lookup_not_a_dir, /* /proc/<pid>/cgroup */
725 725 lxpr_lookup_not_a_dir, /* /proc/<pid>/cmdline */
726 726 lxpr_lookup_not_a_dir, /* /proc/<pid>/comm */
727 727 lxpr_lookup_not_a_dir, /* /proc/<pid>/cpu */
728 728 lxpr_lookup_not_a_dir, /* /proc/<pid>/cwd */
729 729 lxpr_lookup_not_a_dir, /* /proc/<pid>/environ */
730 730 lxpr_lookup_not_a_dir, /* /proc/<pid>/exe */
731 731 lxpr_lookup_not_a_dir, /* /proc/<pid>/limits */
732 732 lxpr_lookup_not_a_dir, /* /proc/<pid>/maps */
733 733 lxpr_lookup_not_a_dir, /* /proc/<pid>/mem */
734 734 lxpr_lookup_not_a_dir, /* /proc/<pid>/mountinfo */
735 735 lxpr_lookup_not_a_dir, /* /proc/<pid>/oom_score_adj */
736 736 lxpr_lookup_not_a_dir, /* /proc/<pid>/root */
737 737 lxpr_lookup_not_a_dir, /* /proc/<pid>/stat */
738 738 lxpr_lookup_not_a_dir, /* /proc/<pid>/statm */
739 739 lxpr_lookup_not_a_dir, /* /proc/<pid>/status */
740 740 lxpr_lookup_taskdir, /* /proc/<pid>/task */
741 741 lxpr_lookup_task_tid_dir, /* /proc/<pid>/task/nn */
742 742 lxpr_lookup_fddir, /* /proc/<pid>/fd */
743 743 lxpr_lookup_not_a_dir, /* /proc/<pid>/fd/nn */
744 744 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/auxv */
745 745 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cgroup */
746 746 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cmdline */
747 747 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/comm */
748 748 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cpu */
749 749 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cwd */
750 750 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/environ */
751 751 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/exe */
752 752 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/limits */
753 753 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/maps */
754 754 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/mem */
755 755 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/mountinfo */
756 756 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/oom_scr_adj */
757 757 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/root */
758 758 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/stat */
759 759 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/statm */
760 760 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/status */
761 761 lxpr_lookup_fddir, /* /proc/<pid>/task/<tid>/fd */
762 762 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/fd/nn */
763 763 lxpr_lookup_not_a_dir, /* /proc/cgroups */
764 764 lxpr_lookup_not_a_dir, /* /proc/cmdline */
765 765 lxpr_lookup_not_a_dir, /* /proc/cpuinfo */
766 766 lxpr_lookup_not_a_dir, /* /proc/devices */
767 767 lxpr_lookup_not_a_dir, /* /proc/diskstats */
768 768 lxpr_lookup_not_a_dir, /* /proc/dma */
769 769 lxpr_lookup_not_a_dir, /* /proc/filesystems */
770 770 lxpr_lookup_not_a_dir, /* /proc/interrupts */
771 771 lxpr_lookup_not_a_dir, /* /proc/ioports */
772 772 lxpr_lookup_not_a_dir, /* /proc/kcore */
773 773 lxpr_lookup_not_a_dir, /* /proc/kmsg */
774 774 lxpr_lookup_not_a_dir, /* /proc/loadavg */
775 775 lxpr_lookup_not_a_dir, /* /proc/meminfo */
776 776 lxpr_lookup_not_a_dir, /* /proc/modules */
777 777 lxpr_lookup_not_a_dir, /* /proc/mounts */
778 778 lxpr_lookup_netdir, /* /proc/net */
779 779 lxpr_lookup_not_a_dir, /* /proc/net/arp */
780 780 lxpr_lookup_not_a_dir, /* /proc/net/dev */
781 781 lxpr_lookup_not_a_dir, /* /proc/net/dev_mcast */
782 782 lxpr_lookup_not_a_dir, /* /proc/net/if_inet6 */
783 783 lxpr_lookup_not_a_dir, /* /proc/net/igmp */
784 784 lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */
785 785 lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */
786 786 lxpr_lookup_not_a_dir, /* /proc/net/ipv6_route */
787 787 lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */
788 788 lxpr_lookup_not_a_dir, /* /proc/net/netstat */
789 789 lxpr_lookup_not_a_dir, /* /proc/net/raw */
790 790 lxpr_lookup_not_a_dir, /* /proc/net/route */
791 791 lxpr_lookup_not_a_dir, /* /proc/net/rpc */
792 792 lxpr_lookup_not_a_dir, /* /proc/net/rt_cache */
793 793 lxpr_lookup_not_a_dir, /* /proc/net/sockstat */
794 794 lxpr_lookup_not_a_dir, /* /proc/net/snmp */
795 795 lxpr_lookup_not_a_dir, /* /proc/net/stat */
796 796 lxpr_lookup_not_a_dir, /* /proc/net/tcp */
797 797 lxpr_lookup_not_a_dir, /* /proc/net/tcp6 */
798 798 lxpr_lookup_not_a_dir, /* /proc/net/udp */
799 799 lxpr_lookup_not_a_dir, /* /proc/net/udp6 */
800 800 lxpr_lookup_not_a_dir, /* /proc/net/unix */
801 801 lxpr_lookup_not_a_dir, /* /proc/partitions */
802 802 lxpr_lookup_not_a_dir, /* /proc/self */
803 803 lxpr_lookup_not_a_dir, /* /proc/stat */
804 804 lxpr_lookup_not_a_dir, /* /proc/swaps */
805 805 lxpr_lookup_sysdir, /* /proc/sys */
806 806 lxpr_lookup_sys_fsdir, /* /proc/sys/fs */
807 807 lxpr_lookup_sys_fs_inotifydir, /* /proc/sys/fs/inotify */
808 808 lxpr_lookup_not_a_dir, /* .../inotify/max_queued_events */
809 809 lxpr_lookup_not_a_dir, /* .../inotify/max_user_instances */
810 810 lxpr_lookup_not_a_dir, /* .../inotify/max_user_watches */
811 811 lxpr_lookup_sys_kerneldir, /* /proc/sys/kernel */
812 812 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/cap_last_cap */
813 813 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/core_pattern */
814 814 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/hostname */
815 815 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/msgmni */
816 816 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/ngroups_max */
817 817 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/osrelease */
818 818 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/pid_max */
819 819 lxpr_lookup_sys_kdir_randdir, /* /proc/sys/kernel/random */
820 820 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/random/boot_id */
821 821 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/shmmax */
822 822 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/threads-max */
823 823 lxpr_lookup_sys_netdir, /* /proc/sys/net */
824 824 lxpr_lookup_sys_net_coredir, /* /proc/sys/net/core */
825 825 lxpr_lookup_not_a_dir, /* /proc/sys/net/core/somaxconn */
826 826 lxpr_lookup_sys_vmdir, /* /proc/sys/vm */
827 827 lxpr_lookup_not_a_dir, /* /proc/sys/vm/min_free_kbytes */
828 828 lxpr_lookup_not_a_dir, /* /proc/sys/vm/nr_hugepages */
829 829 lxpr_lookup_not_a_dir, /* /proc/sys/vm/overcommit_memory */
830 830 lxpr_lookup_not_a_dir, /* /proc/sys/vm/swappiness */
831 831 lxpr_lookup_not_a_dir, /* /proc/uptime */
832 832 lxpr_lookup_not_a_dir, /* /proc/version */
833 833 };
834 834
835 835 /*
836 836 * Array of readdir functions, indexed by /proc file type.
837 837 */
838 838 static int (*lxpr_readdir_function[LXPR_NFILES])() = {
839 839 lxpr_readdir_procdir, /* /proc */
840 840 lxpr_readdir_piddir, /* /proc/<pid> */
841 841 lxpr_readdir_not_a_dir, /* /proc/<pid>/auxv */
842 842 lxpr_readdir_not_a_dir, /* /proc/<pid>/cgroup */
843 843 lxpr_readdir_not_a_dir, /* /proc/<pid>/cmdline */
844 844 lxpr_readdir_not_a_dir, /* /proc/<pid>/comm */
845 845 lxpr_readdir_not_a_dir, /* /proc/<pid>/cpu */
846 846 lxpr_readdir_not_a_dir, /* /proc/<pid>/cwd */
847 847 lxpr_readdir_not_a_dir, /* /proc/<pid>/environ */
848 848 lxpr_readdir_not_a_dir, /* /proc/<pid>/exe */
849 849 lxpr_readdir_not_a_dir, /* /proc/<pid>/limits */
850 850 lxpr_readdir_not_a_dir, /* /proc/<pid>/maps */
851 851 lxpr_readdir_not_a_dir, /* /proc/<pid>/mem */
852 852 lxpr_readdir_not_a_dir, /* /proc/<pid>/mountinfo */
853 853 lxpr_readdir_not_a_dir, /* /proc/<pid>/oom_score_adj */
854 854 lxpr_readdir_not_a_dir, /* /proc/<pid>/root */
855 855 lxpr_readdir_not_a_dir, /* /proc/<pid>/stat */
856 856 lxpr_readdir_not_a_dir, /* /proc/<pid>/statm */
857 857 lxpr_readdir_not_a_dir, /* /proc/<pid>/status */
858 858 lxpr_readdir_taskdir, /* /proc/<pid>/task */
859 859 lxpr_readdir_task_tid_dir, /* /proc/<pid>/task/nn */
860 860 lxpr_readdir_fddir, /* /proc/<pid>/fd */
861 861 lxpr_readdir_not_a_dir, /* /proc/<pid>/fd/nn */
862 862 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/auxv */
863 863 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cgroup */
864 864 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cmdline */
865 865 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/comm */
866 866 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cpu */
867 867 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cwd */
868 868 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/environ */
869 869 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/exe */
870 870 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/limits */
871 871 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/maps */
872 872 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/mem */
873 873 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/mountinfo */
874 874 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid/oom_scr_adj */
875 875 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/root */
876 876 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/stat */
877 877 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/statm */
878 878 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/status */
879 879 lxpr_readdir_fddir, /* /proc/<pid>/task/<tid>/fd */
880 880 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/fd/nn */
881 881 lxpr_readdir_not_a_dir, /* /proc/cgroups */
882 882 lxpr_readdir_not_a_dir, /* /proc/cmdline */
883 883 lxpr_readdir_not_a_dir, /* /proc/cpuinfo */
884 884 lxpr_readdir_not_a_dir, /* /proc/devices */
885 885 lxpr_readdir_not_a_dir, /* /proc/diskstats */
886 886 lxpr_readdir_not_a_dir, /* /proc/dma */
887 887 lxpr_readdir_not_a_dir, /* /proc/filesystems */
888 888 lxpr_readdir_not_a_dir, /* /proc/interrupts */
889 889 lxpr_readdir_not_a_dir, /* /proc/ioports */
890 890 lxpr_readdir_not_a_dir, /* /proc/kcore */
891 891 lxpr_readdir_not_a_dir, /* /proc/kmsg */
892 892 lxpr_readdir_not_a_dir, /* /proc/loadavg */
893 893 lxpr_readdir_not_a_dir, /* /proc/meminfo */
894 894 lxpr_readdir_not_a_dir, /* /proc/modules */
895 895 lxpr_readdir_not_a_dir, /* /proc/mounts */
896 896 lxpr_readdir_netdir, /* /proc/net */
897 897 lxpr_readdir_not_a_dir, /* /proc/net/arp */
898 898 lxpr_readdir_not_a_dir, /* /proc/net/dev */
899 899 lxpr_readdir_not_a_dir, /* /proc/net/dev_mcast */
900 900 lxpr_readdir_not_a_dir, /* /proc/net/if_inet6 */
901 901 lxpr_readdir_not_a_dir, /* /proc/net/igmp */
902 902 lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */
903 903 lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */
904 904 lxpr_readdir_not_a_dir, /* /proc/net/ipv6_route */
905 905 lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */
906 906 lxpr_readdir_not_a_dir, /* /proc/net/netstat */
907 907 lxpr_readdir_not_a_dir, /* /proc/net/raw */
908 908 lxpr_readdir_not_a_dir, /* /proc/net/route */
909 909 lxpr_readdir_not_a_dir, /* /proc/net/rpc */
910 910 lxpr_readdir_not_a_dir, /* /proc/net/rt_cache */
911 911 lxpr_readdir_not_a_dir, /* /proc/net/sockstat */
912 912 lxpr_readdir_not_a_dir, /* /proc/net/snmp */
913 913 lxpr_readdir_not_a_dir, /* /proc/net/stat */
914 914 lxpr_readdir_not_a_dir, /* /proc/net/tcp */
915 915 lxpr_readdir_not_a_dir, /* /proc/net/tcp6 */
916 916 lxpr_readdir_not_a_dir, /* /proc/net/udp */
917 917 lxpr_readdir_not_a_dir, /* /proc/net/udp6 */
918 918 lxpr_readdir_not_a_dir, /* /proc/net/unix */
919 919 lxpr_readdir_not_a_dir, /* /proc/partitions */
920 920 lxpr_readdir_not_a_dir, /* /proc/self */
921 921 lxpr_readdir_not_a_dir, /* /proc/stat */
922 922 lxpr_readdir_not_a_dir, /* /proc/swaps */
923 923 lxpr_readdir_sysdir, /* /proc/sys */
924 924 lxpr_readdir_sys_fsdir, /* /proc/sys/fs */
925 925 lxpr_readdir_sys_fs_inotifydir, /* /proc/sys/fs/inotify */
926 926 lxpr_readdir_not_a_dir, /* .../inotify/max_queued_events */
927 927 lxpr_readdir_not_a_dir, /* .../inotify/max_user_instances */
928 928 lxpr_readdir_not_a_dir, /* .../inotify/max_user_watches */
929 929 lxpr_readdir_sys_kerneldir, /* /proc/sys/kernel */
930 930 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/cap_last_cap */
931 931 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/core_pattern */
932 932 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/hostname */
933 933 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/msgmni */
934 934 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/ngroups_max */
935 935 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/osrelease */
936 936 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/pid_max */
937 937 lxpr_readdir_sys_kdir_randdir, /* /proc/sys/kernel/random */
938 938 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/random/boot_id */
939 939 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/shmmax */
940 940 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/threads-max */
941 941 lxpr_readdir_sys_netdir, /* /proc/sys/net */
942 942 lxpr_readdir_sys_net_coredir, /* /proc/sys/net/core */
943 943 lxpr_readdir_not_a_dir, /* /proc/sys/net/core/somaxconn */
944 944 lxpr_readdir_sys_vmdir, /* /proc/sys/vm */
945 945 lxpr_readdir_not_a_dir, /* /proc/sys/vm/min_free_kbytes */
946 946 lxpr_readdir_not_a_dir, /* /proc/sys/vm/nr_hugepages */
947 947 lxpr_readdir_not_a_dir, /* /proc/sys/vm/overcommit_memory */
948 948 lxpr_readdir_not_a_dir, /* /proc/sys/vm/swappiness */
949 949 lxpr_readdir_not_a_dir, /* /proc/uptime */
950 950 lxpr_readdir_not_a_dir, /* /proc/version */
951 951 };
952 952
953 953
954 954 /*
955 955 * lxpr_read(): Vnode operation for VOP_READ()
956 956 *
957 957 * As the format of all the files that can be read in the lx procfs is human
958 958 * readable and not binary structures there do not have to be different
959 959 * read variants depending on whether the reading process model is 32 or 64 bits
960 960 * (at least in general, and certainly the difference is unlikely to be enough
961 961 * to justify have different routines for 32 and 64 bit reads
962 962 */
963 963 /* ARGSUSED */
964 964 static int
965 965 lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
966 966 caller_context_t *ct)
967 967 {
968 968 lxpr_node_t *lxpnp = VTOLXP(vp);
969 969 lxpr_nodetype_t type = lxpnp->lxpr_type;
970 970 lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
971 971 int error;
972 972
973 973 ASSERT(type < LXPR_NFILES);
974 974
975 975 if (type == LXPR_KMSG) {
976 976 ldi_ident_t li = VTOLXPM(vp)->lxprm_li;
977 977 ldi_handle_t ldih;
978 978 struct strioctl str;
979 979 int rv;
980 980
981 981 /*
982 982 * Open the zone's console device using the layered driver
983 983 * interface.
984 984 */
985 985 if ((error =
986 986 ldi_open_by_name("/dev/log", FREAD, cr, &ldih, li)) != 0)
987 987 return (error);
988 988
989 989 /*
990 990 * Send an ioctl to the underlying console device, letting it
991 991 * know we're interested in getting console messages.
992 992 */
993 993 str.ic_cmd = I_CONSLOG;
994 994 str.ic_timout = 0;
995 995 str.ic_len = 0;
996 996 str.ic_dp = NULL;
997 997 if ((error = ldi_ioctl(ldih, I_STR,
998 998 (intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
999 999 return (error);
1000 1000
1001 1001 lxpr_read_kmsg(lxpnp, uiobuf, ldih);
1002 1002
1003 1003 if ((error = ldi_close(ldih, FREAD, cr)) != 0)
1004 1004 return (error);
1005 1005 } else {
1006 1006 lxpr_read_function[type](lxpnp, uiobuf);
1007 1007 }
1008 1008
1009 1009 error = lxpr_uiobuf_flush(uiobuf);
1010 1010 lxpr_uiobuf_free(uiobuf);
1011 1011
1012 1012 return (error);
1013 1013 }
1014 1014
1015 1015 /*
1016 1016 * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
1017 1017 *
1018 1018 * Various special case reads:
1019 1019 * - trying to read a directory
1020 1020 * - invalid file (used to mean a file that should be implemented,
1021 1021 * but isn't yet)
1022 1022 * - empty file
1023 1023 * - wait to be able to read a file that will never have anything to read
1024 1024 */
1025 1025 /* ARGSUSED */
1026 1026 static void
1027 1027 lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1028 1028 {
1029 1029 lxpr_uiobuf_seterr(uiobuf, EISDIR);
1030 1030 }
1031 1031
1032 1032 /* ARGSUSED */
1033 1033 static void
1034 1034 lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1035 1035 {
1036 1036 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1037 1037 }
1038 1038
1039 1039 /* ARGSUSED */
1040 1040 static void
1041 1041 lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1042 1042 {
1043 1043 }
1044 1044
1045 1045 /*
1046 1046 * lxpr_read_pid_auxv(): read process aux vector
1047 1047 */
1048 1048 static void
1049 1049 lxpr_read_pid_auxv(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1050 1050 {
1051 1051 proc_t *p;
1052 1052 lx_proc_data_t *pd;
1053 1053 lx_elf_data_t *edp = NULL;
1054 1054 int i, cnt;
1055 1055
1056 1056 ASSERT(lxpnp->lxpr_type == LXPR_PID_AUXV ||
1057 1057 lxpnp->lxpr_type == LXPR_PID_TID_AUXV);
1058 1058
1059 1059 p = lxpr_lock(lxpnp->lxpr_pid);
1060 1060
1061 1061 if (p == NULL) {
1062 1062 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1063 1063 return;
1064 1064 }
1065 1065 if ((pd = ptolxproc(p)) == NULL) {
1066 1066 /* Emit a single AT_NULL record for non-branded processes */
1067 1067 auxv_t buf;
1068 1068
1069 1069 bzero(&buf, sizeof (buf));
1070 1070 lxpr_unlock(p);
1071 1071 lxpr_uiobuf_write(uiobuf, (char *)&buf, sizeof (buf));
1072 1072 return;
1073 1073 } else {
1074 1074 edp = &pd->l_elf_data;
1075 1075 }
1076 1076
1077 1077 if (p->p_model == DATAMODEL_NATIVE) {
1078 1078 auxv_t buf[__KERN_NAUXV_IMPL];
1079 1079
1080 1080 /*
1081 1081 * Because a_type is only of size int (not long), the buffer
1082 1082 * contents must be zeroed first to ensure cleanliness.
1083 1083 */
1084 1084 bzero(buf, sizeof (buf));
1085 1085 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) {
1086 1086 if (lx_auxv_stol(&p->p_user.u_auxv[i],
1087 1087 &buf[cnt], edp) == 0) {
1088 1088 cnt++;
1089 1089 }
1090 1090 if (p->p_user.u_auxv[i].a_type == AT_NULL) {
1091 1091 break;
1092 1092 }
1093 1093 }
1094 1094 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0]));
1095 1095 lxpr_unlock(p);
1096 1096 }
1097 1097 #if defined(_SYSCALL32_IMPL)
1098 1098 else {
1099 1099 auxv32_t buf[__KERN_NAUXV_IMPL];
1100 1100
1101 1101 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) {
1102 1102 auxv_t temp;
1103 1103
1104 1104 if (lx_auxv_stol(&p->p_user.u_auxv[i],
1105 1105 &temp, edp) == 0) {
1106 1106 buf[cnt].a_type = (int)temp.a_type;
1107 1107 buf[cnt].a_un.a_val = (int)temp.a_un.a_val;
1108 1108 cnt++;
1109 1109 }
1110 1110 if (p->p_user.u_auxv[i].a_type == AT_NULL) {
1111 1111 break;
1112 1112 }
1113 1113 }
1114 1114 lxpr_unlock(p);
1115 1115 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0]));
1116 1116 }
1117 1117 #endif /* defined(_SYSCALL32_IMPL) */
1118 1118 }
1119 1119
1120 1120 /*
1121 1121 * lxpr_read_pid_cgroup(): read cgroups for process
1122 1122 */
1123 1123 static void
1124 1124 lxpr_read_pid_cgroup(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1125 1125 {
1126 1126 proc_t *p;
1127 1127
1128 1128 ASSERT(lxpnp->lxpr_type == LXPR_PID_CGROUP ||
1129 1129 lxpnp->lxpr_type == LXPR_PID_TID_CGROUP);
1130 1130
1131 1131 p = lxpr_lock(lxpnp->lxpr_pid);
1132 1132 if (p == NULL) {
1133 1133 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1134 1134 return;
1135 1135 }
1136 1136
1137 1137 /* basic stub, 3rd field will need to be populated */
1138 1138 lxpr_uiobuf_printf(uiobuf, "1:name=systemd:/\n");
1139 1139
1140 1140 lxpr_unlock(p);
1141 1141 }
1142 1142
1143 1143 static void
1144 1144 lxpr_copy_cmdline(proc_t *p, lx_proc_data_t *pd, lxpr_uiobuf_t *uiobuf)
1145 1145 {
1146 1146 uio_t *uiop = uiobuf->uiop;
1147 1147 char *buf = uiobuf->buffer;
1148 1148 int bsz = uiobuf->buffsize;
1149 1149 boolean_t env_overflow = B_FALSE;
1150 1150 uintptr_t pos = pd->l_args_start + uiop->uio_offset;
1151 1151 uintptr_t estart = pd->l_envs_start;
1152 1152 uintptr_t eend = pd->l_envs_end;
1153 1153 size_t chunk, copied;
1154 1154 int err = 0;
1155 1155
1156 1156 /* Do not bother with data beyond the end of the envp strings area. */
1157 1157 if (pos > eend) {
1158 1158 return;
1159 1159 }
1160 1160 mutex_exit(&p->p_lock);
1161 1161
1162 1162 /*
1163 1163 * If the starting or ending bounds are outside the argv strings area,
1164 1164 * check to see if the process has overwritten the terminating NULL.
1165 1165 * If not, no data needs to be copied from oustide the argv area.
1166 1166 */
1167 1167 if (pos >= estart || (pos + uiop->uio_resid) >= estart) {
1168 1168 uint8_t term;
1169 1169 if (uread(p, &term, sizeof (term), estart - 1) != 0) {
1170 1170 err = EFAULT;
1171 1171 } else if (term != 0) {
1172 1172 env_overflow = B_TRUE;
1173 1173 }
1174 1174 }
1175 1175
1176 1176
1177 1177 /* Data between astart and estart-1 can be copied freely. */
1178 1178 while (pos < estart && uiop->uio_resid > 0 && err == 0) {
1179 1179 chunk = MIN(estart - pos, uiop->uio_resid);
1180 1180 chunk = MIN(chunk, bsz);
1181 1181
1182 1182 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) != 0 ||
1183 1183 copied != chunk) {
1184 1184 err = EFAULT;
1185 1185 break;
1186 1186 }
1187 1187 err = uiomove(buf, copied, UIO_READ, uiop);
1188 1188 pos += copied;
1189 1189 }
1190 1190
1191 1191 /*
1192 1192 * Onward from estart, data is copied as a contiguous string. To
1193 1193 * protect env data from potential snooping, only one buffer-sized copy
1194 1194 * is allowed to avoid complex seek logic.
1195 1195 */
1196 1196 if (err == 0 && env_overflow && pos == estart && uiop->uio_resid > 0) {
1197 1197 chunk = MIN(eend - pos, uiop->uio_resid);
1198 1198 chunk = MIN(chunk, bsz);
1199 1199 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) == 0) {
1200 1200 int len = strnlen(buf, copied);
1201 1201 if (len > 0) {
1202 1202 err = uiomove(buf, len, UIO_READ, uiop);
1203 1203 }
1204 1204 }
1205 1205 }
1206 1206
1207 1207 uiobuf->error = err;
1208 1208 /* reset any uiobuf state */
1209 1209 uiobuf->pos = uiobuf->buffer;
1210 1210 uiobuf->beg = 0;
1211 1211
1212 1212 mutex_enter(&p->p_lock);
1213 1213 }
1214 1214
1215 1215 /*
1216 1216 * lxpr_read_pid_cmdline(): read argument vector from process
1217 1217 */
1218 1218 static void
1219 1219 lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1220 1220 {
1221 1221 proc_t *p;
1222 1222 char *buf;
1223 1223 size_t asz = lxpr_maxargvlen, sz;
1224 1224 lx_proc_data_t *pd;
1225 1225
1226 1226 ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE ||
1227 1227 lxpnp->lxpr_type == LXPR_PID_TID_CMDLINE);
1228 1228
1229 1229 buf = kmem_alloc(asz, KM_SLEEP);
1230 1230
1231 1231 p = lxpr_lock(lxpnp->lxpr_pid);
1232 1232 if (p == NULL) {
1233 1233 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1234 1234 kmem_free(buf, asz);
1235 1235 return;
1236 1236 }
1237 1237
1238 1238 if ((pd = ptolxproc(p)) != NULL && pd->l_args_start != 0 &&
1239 1239 pd->l_envs_start != 0 && pd->l_envs_end != 0) {
1240 1240 /* Use Linux-style argv bounds if possible. */
1241 1241 lxpr_copy_cmdline(p, pd, uiobuf);
1242 1242 } else {
1243 1243 if (prreadargv(p, buf, asz, &sz) != 0) {
1244 1244 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1245 1245 } else {
1246 1246 lxpr_uiobuf_write(uiobuf, buf, sz);
1247 1247 }
1248 1248 }
1249 1249
1250 1250 lxpr_unlock(p);
1251 1251 kmem_free(buf, asz);
1252 1252 }
1253 1253
1254 1254 /*
1255 1255 * lxpr_read_pid_comm(): read command from process
1256 1256 */
1257 1257 static void
1258 1258 lxpr_read_pid_comm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1259 1259 {
1260 1260 proc_t *p;
1261 1261
1262 1262 VERIFY(lxpnp->lxpr_type == LXPR_PID_COMM ||
1263 1263 lxpnp->lxpr_type == LXPR_PID_TID_COMM);
1264 1264
1265 1265 /*
1266 1266 * Because prctl(PR_SET_NAME) does not set custom names for threads
1267 1267 * (vs processes), there is no need for special handling here.
1268 1268 */
1269 1269 if ((p = lxpr_lock(lxpnp->lxpr_pid)) == NULL) {
1270 1270 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1271 1271 return;
1272 1272 }
1273 1273 lxpr_uiobuf_printf(uiobuf, "%s\n", p->p_user.u_comm);
1274 1274 lxpr_unlock(p);
1275 1275 }
1276 1276
1277 1277 /*
1278 1278 * lxpr_read_pid_env(): read env vector from process
1279 1279 */
1280 1280 static void
1281 1281 lxpr_read_pid_env(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1282 1282 {
1283 1283 proc_t *p;
1284 1284 char *buf;
1285 1285 size_t asz = lxpr_maxenvvlen, sz;
1286 1286
1287 1287 ASSERT(lxpnp->lxpr_type == LXPR_PID_ENV);
1288 1288
1289 1289 buf = kmem_alloc(asz, KM_SLEEP);
1290 1290
1291 1291 p = lxpr_lock(lxpnp->lxpr_pid);
1292 1292 if (p == NULL) {
1293 1293 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1294 1294 kmem_free(buf, asz);
1295 1295 return;
1296 1296 }
1297 1297
1298 1298 if (prreadenvv(p, buf, asz, &sz) != 0) {
1299 1299 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1300 1300 } else {
1301 1301 lxpr_uiobuf_write(uiobuf, buf, sz);
1302 1302 }
1303 1303
1304 1304 lxpr_unlock(p);
1305 1305 kmem_free(buf, asz);
1306 1306 }
1307 1307
1308 1308 /*
1309 1309 * lxpr_read_pid_limits(): ulimit file
1310 1310 */
1311 1311 static void
1312 1312 lxpr_read_pid_limits(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1313 1313 {
1314 1314 proc_t *p;
1315 1315 rctl_qty_t cur, max;
1316 1316 rctl_val_t *oval, *nval;
1317 1317 rctl_hndl_t hndl;
1318 1318 char *kname;
1319 1319 int i;
1320 1320
1321 1321 ASSERT(lxpnp->lxpr_type == LXPR_PID_LIMITS ||
1322 1322 lxpnp->lxpr_type == LXPR_PID_TID_LIMITS);
1323 1323
1324 1324 nval = kmem_alloc(sizeof (rctl_val_t), KM_SLEEP);
1325 1325
1326 1326 p = lxpr_lock(lxpnp->lxpr_pid);
1327 1327 if (p == NULL) {
1328 1328 kmem_free(nval, sizeof (rctl_val_t));
1329 1329 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1330 1330 return;
1331 1331 }
1332 1332
1333 1333 lxpr_uiobuf_printf(uiobuf, "%-25s %-20s %-20s %-10s\n",
1334 1334 "Limit", "Soft Limit", "Hard Limit", "Units");
1335 1335 for (i = 0; lxpr_rlimtab[i].rlim_name != NULL; i++) {
1336 1336 kname = lxpr_rlimtab[i].rlim_rctl;
1337 1337 /* default to unlimited for resources without an analog */
1338 1338 cur = RLIM_INFINITY;
1339 1339 max = RLIM_INFINITY;
1340 1340 if (kname != NULL) {
1341 1341 hndl = rctl_hndl_lookup(kname);
1342 1342 oval = NULL;
1343 1343 while ((hndl != -1) &&
1344 1344 rctl_local_get(hndl, oval, nval, p) == 0) {
1345 1345 oval = nval;
1346 1346 switch (nval->rcv_privilege) {
1347 1347 case RCPRIV_BASIC:
1348 1348 if (!RCTL_INFINITE(nval))
1349 1349 cur = nval->rcv_value;
1350 1350 break;
1351 1351 case RCPRIV_PRIVILEGED:
1352 1352 if (!RCTL_INFINITE(nval))
1353 1353 max = nval->rcv_value;
1354 1354 break;
1355 1355 }
1356 1356 }
1357 1357 }
1358 1358
1359 1359 lxpr_uiobuf_printf(uiobuf, "%-25s", lxpr_rlimtab[i].rlim_name);
1360 1360 if (cur == RLIM_INFINITY || cur == LX_RLIM_INFINITY) {
1361 1361 lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
1362 1362 } else {
1363 1363 lxpr_uiobuf_printf(uiobuf, " %-20lu", cur);
1364 1364 }
1365 1365 if (max == RLIM_INFINITY || max == LX_RLIM_INFINITY) {
1366 1366 lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
1367 1367 } else {
1368 1368 lxpr_uiobuf_printf(uiobuf, " %-20lu", max);
1369 1369 }
1370 1370 lxpr_uiobuf_printf(uiobuf, " %-10s\n",
1371 1371 lxpr_rlimtab[i].rlim_unit);
1372 1372 }
1373 1373
1374 1374 lxpr_unlock(p);
1375 1375 kmem_free(nval, sizeof (rctl_val_t));
1376 1376 }
1377 1377
1378 1378 /*
1379 1379 * lxpr_read_pid_maps(): memory map file
1380 1380 */
1381 1381 static void
1382 1382 lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1383 1383 {
1384 1384 proc_t *p;
1385 1385 struct as *as;
1386 1386 struct seg *seg;
1387 1387 char *buf;
1388 1388 int buflen = MAXPATHLEN;
1389 1389 struct print_data {
1390 1390 uintptr_t saddr;
1391 1391 uintptr_t eaddr;
1392 1392 int type;
1393 1393 char prot[5];
1394 1394 uintptr_t offset;
1395 1395 vnode_t *vp;
1396 1396 struct print_data *next;
1397 1397 } *print_head = NULL;
1398 1398 struct print_data **print_tail = &print_head;
1399 1399 struct print_data *pbuf;
1400 1400
1401 1401 ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS ||
1402 1402 lxpnp->lxpr_type == LXPR_PID_TID_MAPS);
1403 1403
1404 1404 p = lxpr_lock(lxpnp->lxpr_pid);
1405 1405 if (p == NULL) {
1406 1406 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1407 1407 return;
1408 1408 }
1409 1409
1410 1410 as = p->p_as;
1411 1411
1412 1412 if (as == &kas) {
1413 1413 lxpr_unlock(p);
1414 1414 return;
1415 1415 }
1416 1416
1417 1417 mutex_exit(&p->p_lock);
1418 1418
1419 1419 /* Iterate over all segments in the address space */
1420 1420 AS_LOCK_ENTER(as, RW_READER);
1421 1421 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1422 1422 vnode_t *vp;
1423 1423 uint_t protbits;
1424 1424
1425 1425 pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
1426 1426
1427 1427 pbuf->saddr = (uintptr_t)seg->s_base;
1428 1428 pbuf->eaddr = pbuf->saddr + seg->s_size;
1429 1429 pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
1430 1430
1431 1431 /*
1432 1432 * Cheat and only use the protection bits of the first page
1433 1433 * in the segment
1434 1434 */
1435 1435 (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
1436 1436 (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
1437 1437
1438 1438 if (protbits & PROT_READ) pbuf->prot[0] = 'r';
1439 1439 if (protbits & PROT_WRITE) pbuf->prot[1] = 'w';
1440 1440 if (protbits & PROT_EXEC) pbuf->prot[2] = 'x';
1441 1441 if (pbuf->type & MAP_SHARED) pbuf->prot[3] = 's';
1442 1442 else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
1443 1443
1444 1444 if (seg->s_ops == &segvn_ops &&
1445 1445 SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
1446 1446 vp != NULL && vp->v_type == VREG) {
1447 1447 VN_HOLD(vp);
1448 1448 pbuf->vp = vp;
1449 1449 } else {
1450 1450 pbuf->vp = NULL;
1451 1451 }
1452 1452
1453 1453 pbuf->offset = SEGOP_GETOFFSET(seg, (caddr_t)pbuf->saddr);
1454 1454
1455 1455 pbuf->next = NULL;
1456 1456 *print_tail = pbuf;
1457 1457 print_tail = &pbuf->next;
1458 1458 }
1459 1459 AS_LOCK_EXIT(as);
1460 1460 mutex_enter(&p->p_lock);
1461 1461 lxpr_unlock(p);
1462 1462
1463 1463 buf = kmem_alloc(buflen, KM_SLEEP);
1464 1464
1465 1465 /* print the data we've extracted */
1466 1466 pbuf = print_head;
1467 1467 while (pbuf != NULL) {
1468 1468 struct print_data *pbuf_next;
1469 1469 vattr_t vattr;
1470 1470
1471 1471 int maj = 0;
1472 1472 int min = 0;
1473 1473 ino_t inode = 0;
1474 1474
1475 1475 *buf = '\0';
1476 1476 if (pbuf->vp != NULL) {
1477 1477 vattr.va_mask = AT_FSID | AT_NODEID;
1478 1478 if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(),
1479 1479 NULL) == 0) {
1480 1480 maj = getmajor(vattr.va_fsid);
1481 1481 min = getminor(vattr.va_fsid);
1482 1482 inode = vattr.va_nodeid;
1483 1483 }
1484 1484 (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
1485 1485 VN_RELE(pbuf->vp);
1486 1486 }
1487 1487
1488 1488 if (p->p_model == DATAMODEL_LP64) {
1489 1489 lxpr_uiobuf_printf(uiobuf,
1490 1490 "%08llx-%08llx %s %08llx %02x:%02x %llu%s%s\n",
1491 1491 pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
1492 1492 maj, min, inode, *buf != '\0' ? " " : "", buf);
1493 1493 } else {
1494 1494 lxpr_uiobuf_printf(uiobuf,
1495 1495 "%08x-%08x %s %08x %02x:%02x %llu%s%s\n",
1496 1496 (uint32_t)pbuf->saddr, (uint32_t)pbuf->eaddr,
1497 1497 pbuf->prot, (uint32_t)pbuf->offset, maj, min,
1498 1498 inode, *buf != '\0' ? " " : "", buf);
1499 1499 }
1500 1500
1501 1501 pbuf_next = pbuf->next;
1502 1502 kmem_free(pbuf, sizeof (*pbuf));
1503 1503 pbuf = pbuf_next;
1504 1504 }
1505 1505
1506 1506 kmem_free(buf, buflen);
1507 1507 }
1508 1508
1509 1509 /*
1510 1510 * lxpr_read_pid_mountinfo(): information about process mount points. e.g.:
1511 1511 * 14 19 0:13 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
1512 1512 * mntid parid devnums root mntpnt mntopts - fstype mntsrc superopts
1513 1513 *
1514 1514 * We have to make up several of these fields.
1515 1515 */
1516 1516 static void
1517 1517 lxpr_read_pid_mountinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1518 1518 {
1519 1519 struct vfs *vfsp;
1520 1520 struct vfs *vfslist;
1521 1521 zone_t *zone = LXPTOZ(lxpnp);
1522 1522 struct print_data {
1523 1523 refstr_t *vfs_mntpt;
1524 1524 refstr_t *vfs_resource;
1525 1525 uint_t vfs_flag;
1526 1526 int vfs_fstype;
1527 1527 dev_t vfs_dev;
1528 1528 struct print_data *next;
1529 1529 } *print_head = NULL;
1530 1530 struct print_data **print_tail = &print_head;
1531 1531 struct print_data *printp;
1532 1532 int root_id = 15; /* use a made-up value */
1533 1533 int mnt_id;
1534 1534
1535 1535 ASSERT(lxpnp->lxpr_type == LXPR_PID_MOUNTINFO ||
1536 1536 lxpnp->lxpr_type == LXPR_PID_TID_MOUNTINFO);
1537 1537
1538 1538 vfs_list_read_lock();
1539 1539
1540 1540 /* root is the top-level, it does not appear in this output */
1541 1541 if (zone == global_zone) {
1542 1542 vfsp = vfslist = rootvfs;
1543 1543 } else {
1544 1544 vfsp = vfslist = zone->zone_vfslist;
1545 1545 /*
1546 1546 * If the zone has a root entry, it will be the first in
1547 1547 * the list. If it doesn't, we conjure one up.
1548 1548 */
1549 1549 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
1550 1550 zone->zone_rootpath) != 0) {
1551 1551 struct vfs *tvfsp;
1552 1552 /*
1553 1553 * The root of the zone is not a mount point. The vfs
1554 1554 * we want to report is that of the zone's root vnode.
1555 1555 */
1556 1556 tvfsp = zone->zone_rootvp->v_vfsp;
1557 1557
1558 1558 lxpr_uiobuf_printf(uiobuf,
1559 1559 "%d 1 %d:%d / / %s - %s / %s\n",
1560 1560 root_id,
1561 1561 major(tvfsp->vfs_dev), minor(vfsp->vfs_dev),
1562 1562 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw",
1563 1563 vfssw[tvfsp->vfs_fstype].vsw_name,
1564 1564 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1565 1565
1566 1566 }
1567 1567 if (vfslist == NULL) {
1568 1568 vfs_list_unlock();
1569 1569 return;
1570 1570 }
1571 1571 }
1572 1572
1573 1573 /*
1574 1574 * Later on we have to do a lookupname, which can end up causing
1575 1575 * another vfs_list_read_lock() to be called. Which can lead to a
1576 1576 * deadlock. To avoid this, we extract the data we need into a local
1577 1577 * list, then we can run this list without holding vfs_list_read_lock()
1578 1578 * We keep the list in the same order as the vfs_list
1579 1579 */
1580 1580 do {
1581 1581 /* Skip mounts we shouldn't show */
1582 1582 if (vfsp->vfs_flag & VFS_NOMNTTAB) {
1583 1583 goto nextfs;
1584 1584 }
1585 1585
1586 1586 printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
1587 1587 refstr_hold(vfsp->vfs_mntpt);
1588 1588 printp->vfs_mntpt = vfsp->vfs_mntpt;
1589 1589 refstr_hold(vfsp->vfs_resource);
1590 1590 printp->vfs_resource = vfsp->vfs_resource;
1591 1591 printp->vfs_flag = vfsp->vfs_flag;
1592 1592 printp->vfs_fstype = vfsp->vfs_fstype;
1593 1593 printp->vfs_dev = vfsp->vfs_dev;
1594 1594 printp->next = NULL;
1595 1595
1596 1596 *print_tail = printp;
1597 1597 print_tail = &printp->next;
1598 1598
1599 1599 nextfs:
1600 1600 vfsp = (zone == global_zone) ?
1601 1601 vfsp->vfs_next : vfsp->vfs_zone_next;
1602 1602
1603 1603 } while (vfsp != vfslist);
1604 1604
1605 1605 vfs_list_unlock();
1606 1606
1607 1607 mnt_id = root_id + 1;
1608 1608
1609 1609 /*
1610 1610 * now we can run through what we've extracted without holding
1611 1611 * vfs_list_read_lock()
1612 1612 */
1613 1613 printp = print_head;
1614 1614 while (printp != NULL) {
1615 1615 struct print_data *printp_next;
1616 1616 const char *resource;
1617 1617 char *mntpt;
1618 1618 struct vnode *vp;
1619 1619 int error;
1620 1620
1621 1621 mntpt = (char *)refstr_value(printp->vfs_mntpt);
1622 1622 resource = refstr_value(printp->vfs_resource);
1623 1623
1624 1624 if (mntpt != NULL && mntpt[0] != '\0')
1625 1625 mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
1626 1626 else
1627 1627 mntpt = "-";
1628 1628
1629 1629 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
1630 1630
1631 1631 if (error != 0)
1632 1632 goto nextp;
1633 1633
1634 1634 if (!(vp->v_flag & VROOT)) {
1635 1635 VN_RELE(vp);
1636 1636 goto nextp;
1637 1637 }
1638 1638 VN_RELE(vp);
1639 1639
1640 1640 if (resource != NULL && resource[0] != '\0') {
1641 1641 if (resource[0] == '/') {
1642 1642 resource = ZONE_PATH_VISIBLE(resource, zone) ?
1643 1643 ZONE_PATH_TRANSLATE(resource, zone) : mntpt;
1644 1644 }
1645 1645 } else {
1646 1646 resource = "none";
1647 1647 }
1648 1648
1649 1649 /*
1650 1650 * XXX parent ID is not tracked correctly here. Currently we
1651 1651 * always assume the parent ID is the root ID.
1652 1652 */
1653 1653 lxpr_uiobuf_printf(uiobuf,
1654 1654 "%d %d %d:%d / %s %s - %s %s %s\n",
1655 1655 mnt_id, root_id,
1656 1656 major(printp->vfs_dev), minor(printp->vfs_dev),
1657 1657 mntpt,
1658 1658 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw",
1659 1659 vfssw[printp->vfs_fstype].vsw_name,
1660 1660 resource,
1661 1661 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1662 1662
1663 1663 nextp:
1664 1664 printp_next = printp->next;
1665 1665 refstr_rele(printp->vfs_mntpt);
1666 1666 refstr_rele(printp->vfs_resource);
1667 1667 kmem_free(printp, sizeof (*printp));
1668 1668 printp = printp_next;
1669 1669
1670 1670 mnt_id++;
1671 1671 }
1672 1672 }
1673 1673
1674 1674 /*
1675 1675 * lxpr_read_pid_oom_scr_adj(): read oom_score_adj for process
1676 1676 */
1677 1677 static void
1678 1678 lxpr_read_pid_oom_scr_adj(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1679 1679 {
1680 1680 proc_t *p;
1681 1681
1682 1682 ASSERT(lxpnp->lxpr_type == LXPR_PID_OOM_SCR_ADJ ||
1683 1683 lxpnp->lxpr_type == LXPR_PID_TID_OOM_SCR_ADJ);
1684 1684
1685 1685 p = lxpr_lock(lxpnp->lxpr_pid);
1686 1686 if (p == NULL) {
1687 1687 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1688 1688 return;
1689 1689 }
1690 1690
1691 1691 /* always 0 */
1692 1692 lxpr_uiobuf_printf(uiobuf, "0\n");
1693 1693
1694 1694 lxpr_unlock(p);
1695 1695 }
1696 1696
1697 1697
1698 1698 /*
1699 1699 * lxpr_read_pid_statm(): memory status file
1700 1700 */
1701 1701 static void
1702 1702 lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1703 1703 {
1704 1704 proc_t *p;
1705 1705 struct as *as;
1706 1706 size_t vsize;
1707 1707 size_t rss;
1708 1708
1709 1709 ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM ||
1710 1710 lxpnp->lxpr_type == LXPR_PID_TID_STATM);
1711 1711
1712 1712 p = lxpr_lock(lxpnp->lxpr_pid);
1713 1713 if (p == NULL) {
1714 1714 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1715 1715 return;
1716 1716 }
1717 1717
1718 1718 as = p->p_as;
1719 1719
1720 1720 mutex_exit(&p->p_lock);
1721 1721
1722 1722 AS_LOCK_ENTER(as, RW_READER);
1723 1723 vsize = btopr(as->a_resvsize);
1724 1724 rss = rm_asrss(as);
1725 1725 AS_LOCK_EXIT(as);
1726 1726
1727 1727 mutex_enter(&p->p_lock);
1728 1728 lxpr_unlock(p);
1729 1729
1730 1730 lxpr_uiobuf_printf(uiobuf,
1731 1731 "%lu %lu %lu %lu %lu %lu %lu\n",
1732 1732 vsize, rss, 0l, rss, 0l, 0l, 0l);
1733 1733 }
1734 1734
1735 1735 /*
1736 1736 * Look for either the main thread (lookup_id is 0) or the specified thread.
1737 1737 * If we're looking for the main thread but the proc does not have one, we
1738 1738 * fallback to using prchoose to get any thread available.
1739 1739 */
1740 1740 static kthread_t *
1741 1741 lxpr_get_thread(proc_t *p, uint_t lookup_id)
1742 1742 {
1743 1743 kthread_t *t;
1744 1744 uint_t emul_tid;
1745 1745 lx_lwp_data_t *lwpd;
1746 1746 pid_t pid = p->p_pid;
1747 1747 pid_t init_pid = curproc->p_zone->zone_proc_initpid;
1748 1748 boolean_t branded = (p->p_brand == &lx_brand);
1749 1749
1750 1750 /* get specified thread */
1751 1751 if ((t = p->p_tlist) == NULL)
1752 1752 return (NULL);
1753 1753
1754 1754 do {
1755 1755 if (lookup_id == 0 && t->t_tid == 1) {
1756 1756 thread_lock(t);
1757 1757 return (t);
1758 1758 }
1759 1759
1760 1760 lwpd = ttolxlwp(t);
1761 1761 if (branded && lwpd != NULL) {
1762 1762 if (pid == init_pid && lookup_id == 1) {
1763 1763 emul_tid = t->t_tid;
1764 1764 } else {
1765 1765 emul_tid = lwpd->br_pid;
1766 1766 }
1767 1767 } else {
1768 1768 /*
1769 1769 * Make only the first (assumed to be main) thread
1770 1770 * visible for non-branded processes.
1771 1771 */
1772 1772 emul_tid = p->p_pid;
1773 1773 }
1774 1774 if (emul_tid == lookup_id) {
1775 1775 thread_lock(t);
1776 1776 return (t);
1777 1777 }
1778 1778 } while ((t = t->t_forw) != p->p_tlist);
1779 1779
1780 1780 if (lookup_id == 0)
1781 1781 return (prchoose(p));
1782 1782 return (NULL);
1783 1783 }
1784 1784
1785 1785 /*
1786 1786 * Lookup the real pid for procs 0 or 1.
1787 1787 */
1788 1788 static pid_t
1789 1789 get_real_pid(pid_t p)
1790 1790 {
1791 1791 pid_t find_pid;
1792 1792
1793 1793 if (p == 1) {
1794 1794 find_pid = curproc->p_zone->zone_proc_initpid;
1795 1795 } else if (p == 0) {
1796 1796 find_pid = curproc->p_zone->zone_zsched->p_pid;
1797 1797 } else {
1798 1798 find_pid = p;
1799 1799 }
1800 1800
1801 1801 return (find_pid);
1802 1802 }
1803 1803
1804 1804 /*
1805 1805 * pid/tid common code to read status file
1806 1806 */
1807 1807 static void
1808 1808 lxpr_read_status_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf,
1809 1809 uint_t lookup_id)
1810 1810 {
1811 1811 proc_t *p;
1812 1812 kthread_t *t;
1813 1813 user_t *up;
1814 1814 cred_t *cr;
1815 1815 const gid_t *groups;
1816 1816 int ngroups;
1817 1817 struct as *as;
1818 1818 char *status;
1819 1819 pid_t pid, ppid;
1820 1820 k_sigset_t current, ignore, handle;
1821 1821 int i, lx_sig;
1822 1822 pid_t real_pid;
1823 1823
1824 1824 real_pid = get_real_pid(lxpnp->lxpr_pid);
1825 1825 p = lxpr_lock(real_pid);
1826 1826 if (p == NULL) {
1827 1827 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1828 1828 return;
1829 1829 }
1830 1830
1831 1831 pid = p->p_pid;
1832 1832
1833 1833 /*
1834 1834 * Convert pid to the Linux default of 1 if we're the zone's init
1835 1835 * process or if we're the zone's zsched the pid is 0.
1836 1836 */
1837 1837 if (pid == curproc->p_zone->zone_proc_initpid) {
1838 1838 pid = 1;
1839 1839 ppid = 0; /* parent pid for init is 0 */
1840 1840 } else if (pid == curproc->p_zone->zone_zsched->p_pid) {
1841 1841 pid = 0; /* zsched is pid 0 */
1842 1842 ppid = 0; /* parent pid for zsched is itself */
1843 1843 } else {
1844 1844 /*
1845 1845 * Make sure not to reference parent PIDs that reside outside
1846 1846 * the zone
1847 1847 */
1848 1848 ppid = ((p->p_flag & SZONETOP)
1849 1849 ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
1850 1850
1851 1851 /*
1852 1852 * Convert ppid to the Linux default of 1 if our parent is the
1853 1853 * zone's init process
1854 1854 */
1855 1855 if (ppid == curproc->p_zone->zone_proc_initpid)
1856 1856 ppid = 1;
1857 1857 }
1858 1858
1859 1859 t = lxpr_get_thread(p, lookup_id);
1860 1860 if (t != NULL) {
1861 1861 switch (t->t_state) {
1862 1862 case TS_SLEEP:
1863 1863 status = "S (sleeping)";
1864 1864 break;
1865 1865 case TS_RUN:
1866 1866 case TS_ONPROC:
1867 1867 status = "R (running)";
1868 1868 break;
1869 1869 case TS_ZOMB:
1870 1870 status = "Z (zombie)";
1871 1871 break;
1872 1872 case TS_STOPPED:
1873 1873 status = "T (stopped)";
1874 1874 break;
1875 1875 default:
1876 1876 status = "! (unknown)";
1877 1877 break;
1878 1878 }
1879 1879 thread_unlock(t);
1880 1880 } else {
1881 1881 if (lookup_id != 0) {
1882 1882 /* we can't find this specific thread */
1883 1883 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1884 1884 lxpr_unlock(p);
1885 1885 return;
1886 1886 }
1887 1887
1888 1888 /*
1889 1889 * there is a hole in the exit code, where a proc can have
1890 1890 * no threads but it is yet to be flagged SZOMB. We will
1891 1891 * assume we are about to become a zombie
1892 1892 */
1893 1893 status = "Z (zombie)";
1894 1894 }
1895 1895
1896 1896 up = PTOU(p);
1897 1897 mutex_enter(&p->p_crlock);
1898 1898 crhold(cr = p->p_cred);
1899 1899 mutex_exit(&p->p_crlock);
1900 1900
1901 1901 lxpr_uiobuf_printf(uiobuf,
1902 1902 "Name:\t%s\n"
1903 1903 "State:\t%s\n"
1904 1904 "Tgid:\t%d\n"
1905 1905 "Pid:\t%d\n"
1906 1906 "PPid:\t%d\n"
1907 1907 "TracerPid:\t%d\n"
1908 1908 "Uid:\t%u\t%u\t%u\t%u\n"
1909 1909 "Gid:\t%u\t%u\t%u\t%u\n"
1910 1910 "FDSize:\t%d\n"
1911 1911 "Groups:\t",
1912 1912 up->u_comm,
1913 1913 status,
1914 1914 pid, /* thread group id - same as pid */
1915 1915 (lookup_id == 0) ? pid : lxpnp->lxpr_desc,
1916 1916 ppid,
1917 1917 0,
1918 1918 crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
1919 1919 crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
1920 1920 p->p_fno_ctl);
1921 1921
1922 1922
1923 1923 ngroups = crgetngroups(cr);
1924 1924 groups = crgetgroups(cr);
1925 1925 for (i = 0; i < ngroups; i++) {
1926 1926 lxpr_uiobuf_printf(uiobuf,
1927 1927 "%u ",
1928 1928 groups[i]);
1929 1929 }
1930 1930 crfree(cr);
1931 1931
1932 1932 as = p->p_as;
1933 1933 if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) {
1934 1934 size_t vsize, nlocked, rss;
1935 1935
1936 1936 mutex_exit(&p->p_lock);
1937 1937 AS_LOCK_ENTER(as, RW_READER);
1938 1938 vsize = as->a_resvsize;
1939 1939 rss = rm_asrss(as);
1940 1940 AS_LOCK_EXIT(as);
1941 1941 mutex_enter(&p->p_lock);
1942 1942 nlocked = p->p_locked_mem;
1943 1943
1944 1944 lxpr_uiobuf_printf(uiobuf,
1945 1945 "\n"
1946 1946 "VmSize:\t%8lu kB\n"
1947 1947 "VmLck:\t%8lu kB\n"
1948 1948 "VmRSS:\t%8lu kB\n"
1949 1949 "VmData:\t%8lu kB\n"
1950 1950 "VmStk:\t%8lu kB\n"
1951 1951 "VmExe:\t%8lu kB\n"
1952 1952 "VmLib:\t%8lu kB",
1953 1953 btok(vsize),
1954 1954 btok(nlocked),
1955 1955 ptok(rss),
1956 1956 0l,
1957 1957 btok(p->p_stksize),
1958 1958 ptok(rss),
1959 1959 0l);
1960 1960 }
1961 1961
1962 1962 lxpr_uiobuf_printf(uiobuf, "\nThreads:\t%u", p->p_lwpcnt);
1963 1963
1964 1964 sigemptyset(¤t);
1965 1965 sigemptyset(&ignore);
1966 1966 sigemptyset(&handle);
1967 1967
1968 1968 for (i = 1; i < NSIG; i++) {
1969 1969 lx_sig = stol_signo[i];
1970 1970
1971 1971 if ((lx_sig > 0) && (lx_sig <= LX_NSIG)) {
1972 1972 if (sigismember(&p->p_sig, i))
1973 1973 sigaddset(¤t, lx_sig);
1974 1974
1975 1975 if (up->u_signal[i - 1] == SIG_IGN)
1976 1976 sigaddset(&ignore, lx_sig);
1977 1977 else if (up->u_signal[i - 1] != SIG_DFL)
1978 1978 sigaddset(&handle, lx_sig);
1979 1979 }
1980 1980 }
1981 1981
1982 1982 lxpr_uiobuf_printf(uiobuf,
1983 1983 "\n"
1984 1984 "SigPnd:\t%08x%08x\n"
1985 1985 "SigBlk:\t%08x%08x\n"
1986 1986 "SigIgn:\t%08x%08x\n"
1987 1987 "SigCgt:\t%08x%08x\n"
1988 1988 "CapInh:\t%016x\n"
1989 1989 "CapPrm:\t%016x\n"
1990 1990 "CapEff:\t%016x\n",
1991 1991 current.__sigbits[1], current.__sigbits[0],
1992 1992 0, 0, /* signals blocked on per thread basis */
1993 1993 ignore.__sigbits[1], ignore.__sigbits[0],
1994 1994 handle.__sigbits[1], handle.__sigbits[0],
1995 1995 /* Can't do anything with linux capabilities */
1996 1996 0,
1997 1997 0,
1998 1998 0);
1999 1999
2000 2000 lxpr_uiobuf_printf(uiobuf,
2001 2001 "CapBnd:\t%016llx\n",
2002 2002 /* We report the full capability bounding set */
2003 2003 0x1fffffffffLL);
2004 2004
2005 2005 lxpr_unlock(p);
2006 2006 }
2007 2007
2008 2008 /*
2009 2009 * lxpr_read_pid_status(): status file
2010 2010 */
2011 2011 static void
2012 2012 lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2013 2013 {
2014 2014 ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
2015 2015
2016 2016 lxpr_read_status_common(lxpnp, uiobuf, 0);
2017 2017 }
2018 2018
2019 2019 /*
2020 2020 * lxpr_read_pid_tid_status(): status file
2021 2021 */
2022 2022 static void
2023 2023 lxpr_read_pid_tid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2024 2024 {
2025 2025 ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STATUS);
2026 2026 lxpr_read_status_common(lxpnp, uiobuf, lxpnp->lxpr_desc);
2027 2027 }
2028 2028
2029 2029 /*
2030 2030 * pid/tid common code to read stat file
2031 2031 */
2032 2032 static void
2033 2033 lxpr_read_stat_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf,
2034 2034 uint_t lookup_id)
2035 2035 {
2036 2036 proc_t *p;
2037 2037 kthread_t *t;
2038 2038 struct as *as;
2039 2039 char stat;
2040 2040 pid_t pid, ppid, pgpid, spid;
2041 2041 gid_t psgid;
2042 2042 dev_t psdev;
2043 2043 size_t rss, vsize;
2044 2044 int nice, pri;
2045 2045 caddr_t wchan;
2046 2046 processorid_t cpu;
2047 2047 pid_t real_pid;
2048 2048
2049 2049 real_pid = get_real_pid(lxpnp->lxpr_pid);
2050 2050 p = lxpr_lock(real_pid);
2051 2051 if (p == NULL) {
2052 2052 lxpr_uiobuf_seterr(uiobuf, EINVAL);
2053 2053 return;
2054 2054 }
2055 2055
2056 2056 pid = p->p_pid;
2057 2057
2058 2058 /*
2059 2059 * Set Linux defaults if we're the zone's init process
2060 2060 */
2061 2061 if (pid == curproc->p_zone->zone_proc_initpid) {
2062 2062 pid = 1; /* PID for init */
2063 2063 ppid = 0; /* parent PID for init is 0 */
2064 2064 pgpid = 0; /* process group for init is 0 */
2065 2065 psgid = (gid_t)-1; /* credential GID for init is -1 */
2066 2066 spid = 0; /* session id for init is 0 */
2067 2067 psdev = 0; /* session device for init is 0 */
2068 2068 } else if (pid == curproc->p_zone->zone_zsched->p_pid) {
2069 2069 pid = 0; /* PID for zsched */
2070 2070 ppid = 0; /* parent PID for zsched is 0 */
2071 2071 pgpid = 0; /* process group for zsched is 0 */
2072 2072 psgid = (gid_t)-1; /* credential GID for zsched is -1 */
2073 2073 spid = 0; /* session id for zsched is 0 */
2074 2074 psdev = 0; /* session device for zsched is 0 */
2075 2075 } else {
2076 2076 /*
2077 2077 * Make sure not to reference parent PIDs that reside outside
2078 2078 * the zone
2079 2079 */
2080 2080 ppid = ((p->p_flag & SZONETOP) ?
2081 2081 curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
2082 2082
2083 2083 /*
2084 2084 * Convert ppid to the Linux default of 1 if our parent is the
2085 2085 * zone's init process
2086 2086 */
2087 2087 if (ppid == curproc->p_zone->zone_proc_initpid)
2088 2088 ppid = 1;
2089 2089
2090 2090 pgpid = p->p_pgrp;
2091 2091
2092 2092 mutex_enter(&p->p_splock);
2093 2093 mutex_enter(&p->p_sessp->s_lock);
2094 2094 spid = p->p_sessp->s_sid;
2095 2095 psdev = p->p_sessp->s_dev;
2096 2096 if (p->p_sessp->s_cred)
2097 2097 psgid = crgetgid(p->p_sessp->s_cred);
2098 2098 else
2099 2099 psgid = crgetgid(p->p_cred);
2100 2100
2101 2101 mutex_exit(&p->p_sessp->s_lock);
2102 2102 mutex_exit(&p->p_splock);
2103 2103 }
2104 2104
2105 2105 t = lxpr_get_thread(p, lookup_id);
2106 2106 if (t != NULL) {
2107 2107 switch (t->t_state) {
2108 2108 case TS_SLEEP:
2109 2109 stat = 'S'; break;
2110 2110 case TS_RUN:
2111 2111 case TS_ONPROC:
2112 2112 stat = 'R'; break;
2113 2113 case TS_ZOMB:
2114 2114 stat = 'Z'; break;
2115 2115 case TS_STOPPED:
2116 2116 stat = 'T'; break;
2117 2117 default:
2118 2118 stat = '!'; break;
2119 2119 }
2120 2120
2121 2121 if (CL_DONICE(t, NULL, 0, &nice) != 0)
2122 2122 nice = 0;
2123 2123
2124 2124 pri = t->t_pri;
2125 2125 wchan = t->t_wchan;
2126 2126 cpu = t->t_cpu->cpu_id;
2127 2127 thread_unlock(t);
2128 2128 } else {
2129 2129 if (lookup_id != 0) {
2130 2130 /* we can't find this specific thread */
2131 2131 lxpr_uiobuf_seterr(uiobuf, EINVAL);
2132 2132 lxpr_unlock(p);
2133 2133 return;
2134 2134 }
2135 2135
2136 2136 /* Only zombies have no threads */
2137 2137 stat = 'Z';
2138 2138 nice = 0;
2139 2139 pri = 0;
2140 2140 wchan = 0;
2141 2141 cpu = 0;
2142 2142 }
2143 2143 as = p->p_as;
2144 2144 mutex_exit(&p->p_lock);
2145 2145 AS_LOCK_ENTER(as, RW_READER);
2146 2146 vsize = as->a_resvsize;
2147 2147 rss = rm_asrss(as);
2148 2148 AS_LOCK_EXIT(as);
2149 2149 mutex_enter(&p->p_lock);
2150 2150
2151 2151 lxpr_uiobuf_printf(uiobuf,
2152 2152 "%d (%s) %c %d %d %d %d %d "
2153 2153 "%lu %lu %lu %lu %lu "
2154 2154 "%lu %lu %ld %ld "
2155 2155 "%d %d %d "
2156 2156 "%lu "
2157 2157 "%lu "
2158 2158 "%lu %ld %llu "
2159 2159 "%lu %lu %u "
2160 2160 "%lu %lu "
2161 2161 "%lu %lu %lu %lu "
2162 2162 "%lu "
2163 2163 "%lu %lu "
2164 2164 "%d "
2165 2165 "%d"
2166 2166 "\n",
2167 2167 (lookup_id == 0) ? pid : lxpnp->lxpr_desc,
2168 2168 PTOU(p)->u_comm, stat, ppid, pgpid, spid, psdev, psgid,
2169 2169 0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
2170 2170 p->p_utime, p->p_stime, p->p_cutime, p->p_cstime,
2171 2171 pri, nice, p->p_lwpcnt,
2172 2172 0l, /* itrealvalue (time before next SIGALRM) */
2173 2173 PTOU(p)->u_ticks,
2174 2174 vsize, rss, p->p_vmem_ctl,
2175 2175 0l, 0l, USRSTACK, /* startcode, endcode, startstack */
2176 2176 0l, 0l, /* kstkesp, kstkeip */
2177 2177 0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */
2178 2178 wchan,
2179 2179 0l, 0l, /* nswap, cnswap */
2180 2180 0, /* exit_signal */
2181 2181 cpu);
2182 2182
2183 2183 lxpr_unlock(p);
2184 2184 }
2185 2185
2186 2186 /*
2187 2187 * lxpr_read_pid_stat(): pid stat file
2188 2188 */
2189 2189 static void
2190 2190 lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2191 2191 {
2192 2192 ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
2193 2193
2194 2194 lxpr_read_stat_common(lxpnp, uiobuf, 0);
2195 2195 }
2196 2196
2197 2197 /*
2198 2198 * lxpr_read_pid_tid_stat(): pid stat file
2199 2199 */
2200 2200 static void
2201 2201 lxpr_read_pid_tid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2202 2202 {
2203 2203 ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STAT);
2204 2204 lxpr_read_stat_common(lxpnp, uiobuf, lxpnp->lxpr_desc);
2205 2205 }
2206 2206
2207 2207 /* ARGSUSED */
2208 2208 static void
2209 2209 lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2210 2210 {
2211 2211 }
2212 2212
2213 2213 struct lxpr_ifstat {
2214 2214 uint64_t rx_bytes;
2215 2215 uint64_t rx_packets;
2216 2216 uint64_t rx_errors;
2217 2217 uint64_t rx_drop;
2218 2218 uint64_t tx_bytes;
2219 2219 uint64_t tx_packets;
2220 2220 uint64_t tx_errors;
2221 2221 uint64_t tx_drop;
2222 2222 uint64_t collisions;
2223 2223 uint64_t rx_multicast;
2224 2224 };
2225 2225
2226 2226 static void *
2227 2227 lxpr_kstat_read(kstat_t *kn, boolean_t byname, size_t *size, int *num)
2228 2228 {
2229 2229 kstat_t *kp;
2230 2230 int i, nrec = 0;
2231 2231 size_t bufsize;
2232 2232 void *buf = NULL;
2233 2233
2234 2234 if (byname == B_TRUE) {
2235 2235 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance,
2236 2236 kn->ks_name, getzoneid());
2237 2237 } else {
2238 2238 kp = kstat_hold_bykid(kn->ks_kid, getzoneid());
2239 2239 }
2240 2240 if (kp == NULL) {
2241 2241 return (NULL);
2242 2242 }
2243 2243 if (kp->ks_flags & KSTAT_FLAG_INVALID) {
2244 2244 kstat_rele(kp);
2245 2245 return (NULL);
2246 2246 }
2247 2247
2248 2248 bufsize = kp->ks_data_size + 1;
2249 2249 kstat_rele(kp);
2250 2250
2251 2251 /*
2252 2252 * The kstat in question is released so that kmem_alloc(KM_SLEEP) is
2253 2253 * performed without it held. After the alloc, the kstat is reacquired
2254 2254 * and its size is checked again. If the buffer is no longer large
2255 2255 * enough, the alloc and check are repeated up to three times.
2256 2256 */
2257 2257 for (i = 0; i < 2; i++) {
2258 2258 buf = kmem_alloc(bufsize, KM_SLEEP);
2259 2259
2260 2260 /* Check if bufsize still appropriate */
2261 2261 if (byname == B_TRUE) {
2262 2262 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance,
2263 2263 kn->ks_name, getzoneid());
2264 2264 } else {
2265 2265 kp = kstat_hold_bykid(kn->ks_kid, getzoneid());
2266 2266 }
2267 2267 if (kp == NULL || kp->ks_flags & KSTAT_FLAG_INVALID) {
2268 2268 if (kp != NULL) {
2269 2269 kstat_rele(kp);
2270 2270 }
2271 2271 kmem_free(buf, bufsize);
2272 2272 return (NULL);
2273 2273 }
2274 2274 KSTAT_ENTER(kp);
2275 2275 (void) KSTAT_UPDATE(kp, KSTAT_READ);
2276 2276 if (bufsize < kp->ks_data_size) {
2277 2277 kmem_free(buf, bufsize);
2278 2278 buf = NULL;
2279 2279 bufsize = kp->ks_data_size + 1;
2280 2280 KSTAT_EXIT(kp);
2281 2281 kstat_rele(kp);
2282 2282 continue;
2283 2283 } else {
2284 2284 if (KSTAT_SNAPSHOT(kp, buf, KSTAT_READ) != 0) {
2285 2285 kmem_free(buf, bufsize);
2286 2286 buf = NULL;
2287 2287 }
2288 2288 nrec = kp->ks_ndata;
2289 2289 KSTAT_EXIT(kp);
2290 2290 kstat_rele(kp);
2291 2291 break;
2292 2292 }
2293 2293 }
2294 2294
2295 2295 if (buf != NULL) {
2296 2296 *size = bufsize;
2297 2297 *num = nrec;
2298 2298 }
2299 2299 return (buf);
2300 2300 }
2301 2301
2302 2302 static int
2303 2303 lxpr_kstat_ifstat(kstat_t *kn, struct lxpr_ifstat *ifs)
2304 2304 {
2305 2305 kstat_named_t *kp;
2306 2306 int i, num;
2307 2307 size_t size;
2308 2308
2309 2309 /*
2310 2310 * Search by name instead of by kid since there's a small window to
2311 2311 * race against kstats being added/removed.
2312 2312 */
2313 2313 bzero(ifs, sizeof (*ifs));
2314 2314 kp = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num);
2315 2315 if (kp == NULL)
2316 2316 return (-1);
2317 2317 for (i = 0; i < num; i++) {
2318 2318 if (strncmp(kp[i].name, "rbytes64", KSTAT_STRLEN) == 0)
2319 2319 ifs->rx_bytes = kp[i].value.ui64;
2320 2320 else if (strncmp(kp[i].name, "ipackets64", KSTAT_STRLEN) == 0)
2321 2321 ifs->rx_packets = kp[i].value.ui64;
2322 2322 else if (strncmp(kp[i].name, "ierrors", KSTAT_STRLEN) == 0)
2323 2323 ifs->rx_errors = kp[i].value.ui32;
2324 2324 else if (strncmp(kp[i].name, "norcvbuf", KSTAT_STRLEN) == 0)
2325 2325 ifs->rx_drop = kp[i].value.ui32;
2326 2326 else if (strncmp(kp[i].name, "multircv", KSTAT_STRLEN) == 0)
2327 2327 ifs->rx_multicast = kp[i].value.ui32;
2328 2328 else if (strncmp(kp[i].name, "obytes64", KSTAT_STRLEN) == 0)
2329 2329 ifs->tx_bytes = kp[i].value.ui64;
2330 2330 else if (strncmp(kp[i].name, "opackets64", KSTAT_STRLEN) == 0)
2331 2331 ifs->tx_packets = kp[i].value.ui64;
2332 2332 else if (strncmp(kp[i].name, "oerrors", KSTAT_STRLEN) == 0)
2333 2333 ifs->tx_errors = kp[i].value.ui32;
2334 2334 else if (strncmp(kp[i].name, "noxmtbuf", KSTAT_STRLEN) == 0)
2335 2335 ifs->tx_drop = kp[i].value.ui32;
2336 2336 else if (strncmp(kp[i].name, "collisions", KSTAT_STRLEN) == 0)
2337 2337 ifs->collisions = kp[i].value.ui32;
2338 2338 }
2339 2339 kmem_free(kp, size);
2340 2340 return (0);
2341 2341 }
2342 2342
2343 2343 /* ARGSUSED */
2344 2344 static void
2345 2345 lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2346 2346 {
2347 2347 kstat_t *ksr;
2348 2348 kstat_t ks0;
2349 2349 int i, nidx;
2350 2350 size_t sidx;
2351 2351 struct lxpr_ifstat ifs;
2352 2352
2353 2353 lxpr_uiobuf_printf(uiobuf, "Inter-| Receive "
2354 2354 " | Transmit\n");
2355 2355 lxpr_uiobuf_printf(uiobuf, " face |bytes packets errs drop fifo"
2356 2356 " frame compressed multicast|bytes packets errs drop fifo"
2357 2357 " colls carrier compressed\n");
2358 2358
2359 2359 ks0.ks_kid = 0;
2360 2360 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
2361 2361 if (ksr == NULL)
2362 2362 return;
2363 2363
2364 2364 for (i = 1; i < nidx; i++) {
2365 2365 if (strncmp(ksr[i].ks_module, "link", KSTAT_STRLEN) == 0 ||
2366 2366 strncmp(ksr[i].ks_module, "lo", KSTAT_STRLEN) == 0) {
2367 2367 if (lxpr_kstat_ifstat(&ksr[i], &ifs) != 0)
2368 2368 continue;
2369 2369
2370 2370 /* Overwriting the name is ok in the local snapshot */
2371 2371 lx_ifname_convert(ksr[i].ks_name, LX_IF_FROMNATIVE);
2372 2372 lxpr_uiobuf_printf(uiobuf, "%6s: %7llu %7llu %4lu "
2373 2373 "%4lu %4u %5u %10u %9lu %8llu %7llu %4lu %4lu %4u "
2374 2374 "%5lu %7u %10u\n",
2375 2375 ksr[i].ks_name,
2376 2376 ifs.rx_bytes, ifs.rx_packets,
2377 2377 ifs.rx_errors, ifs.rx_drop,
2378 2378 0, 0, 0, ifs.rx_multicast,
2379 2379 ifs.tx_bytes, ifs.tx_packets,
2380 2380 ifs.tx_errors, ifs.tx_drop,
2381 2381 0, ifs.collisions, 0, 0);
2382 2382 }
2383 2383 }
2384 2384
2385 2385 kmem_free(ksr, sidx);
2386 2386 }
2387 2387
2388 2388 /* ARGSUSED */
2389 2389 static void
2390 2390 lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2391 2391 {
2392 2392 }
2393 2393
2394 2394 static void
2395 2395 lxpr_inet6_out(const in6_addr_t *addr, char buf[33])
2396 2396 {
2397 2397 const uint8_t *ip = addr->s6_addr;
2398 2398 char digits[] = "0123456789abcdef";
2399 2399 int i;
2400 2400 for (i = 0; i < 16; i++) {
2401 2401 buf[2 * i] = digits[ip[i] >> 4];
2402 2402 buf[2 * i + 1] = digits[ip[i] & 0xf];
2403 2403 }
2404 2404 buf[32] = '\0';
2405 2405 }
2406 2406
2407 2407 /* ARGSUSED */
2408 2408 static void
2409 2409 lxpr_read_net_if_inet6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2410 2410 {
2411 2411 netstack_t *ns;
2412 2412 ip_stack_t *ipst;
2413 2413 ill_t *ill;
2414 2414 ipif_t *ipif;
2415 2415 ill_walk_context_t ctx;
2416 2416 char ifname[LIFNAMSIZ], ip6out[33];
2417 2417
2418 2418 ns = netstack_get_current();
2419 2419 if (ns == NULL)
2420 2420 return;
2421 2421 ipst = ns->netstack_ip;
2422 2422
2423 2423 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
2424 2424 ill = ILL_START_WALK_V6(&ctx, ipst);
2425 2425
2426 2426 for (; ill != NULL; ill = ill_next(&ctx, ill)) {
2427 2427 for (ipif = ill->ill_ipif; ipif != NULL;
2428 2428 ipif = ipif->ipif_next) {
2429 2429 uint_t index = ill->ill_phyint->phyint_ifindex;
2430 2430 int plen = ip_mask_to_plen_v6(&ipif->ipif_v6net_mask);
2431 2431 unsigned int scope = lx_ipv6_scope_convert(
2432 2432 &ipif->ipif_v6lcl_addr);
2433 2433 /* Always report PERMANENT flag */
2434 2434 int flag = 0x80;
2435 2435
2436 2436 (void) snprintf(ifname, LIFNAMSIZ, "%s", ill->ill_name);
2437 2437 lx_ifname_convert(ifname, LX_IF_FROMNATIVE);
2438 2438 lxpr_inet6_out(&ipif->ipif_v6lcl_addr, ip6out);
2439 2439
2440 2440 lxpr_uiobuf_printf(uiobuf, "%32s %02x %02x %02x %02x"
2441 2441 " %8s\n", ip6out, index, plen, scope, flag, ifname);
2442 2442 }
2443 2443 }
2444 2444 rw_exit(&ipst->ips_ill_g_lock);
2445 2445 netstack_rele(ns);
2446 2446 }
2447 2447
2448 2448 /* ARGSUSED */
2449 2449 static void
2450 2450 lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2451 2451 {
2452 2452 }
2453 2453
2454 2454 /* ARGSUSED */
2455 2455 static void
2456 2456 lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2457 2457 {
2458 2458 }
2459 2459
2460 2460 /* ARGSUSED */
2461 2461 static void
2462 2462 lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2463 2463 {
2464 2464 }
2465 2465
2466 2466 static void
2467 2467 lxpr_format_route_ipv6(ire_t *ire, lxpr_uiobuf_t *uiobuf)
2468 2468 {
2469 2469 uint32_t flags;
2470 2470 char name[IFNAMSIZ];
2471 2471 char ipv6addr[33];
2472 2472
2473 2473 lxpr_inet6_out(&ire->ire_addr_v6, ipv6addr);
2474 2474 lxpr_uiobuf_printf(uiobuf, "%s %02x ", ipv6addr,
2475 2475 ip_mask_to_plen_v6(&ire->ire_mask_v6));
2476 2476
2477 2477 /* punt on this for now */
2478 2478 lxpr_uiobuf_printf(uiobuf, "%s %02x ",
2479 2479 "00000000000000000000000000000000", 0);
2480 2480
2481 2481 lxpr_inet6_out(&ire->ire_gateway_addr_v6, ipv6addr);
2482 2482 lxpr_uiobuf_printf(uiobuf, "%s", ipv6addr);
2483 2483
2484 2484 flags = ire->ire_flags &
2485 2485 (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
2486 2486 /* Linux's RTF_LOCAL equivalent */
2487 2487 if (ire->ire_metrics.iulp_local)
2488 2488 flags |= 0x80000000;
2489 2489
2490 2490 if (ire->ire_ill != NULL) {
2491 2491 ill_get_name(ire->ire_ill, name, sizeof (name));
2492 2492 lx_ifname_convert(name, LX_IF_FROMNATIVE);
2493 2493 } else {
2494 2494 name[0] = '\0';
2495 2495 }
2496 2496
2497 2497 lxpr_uiobuf_printf(uiobuf, " %08x %08x %08x %08x %8s\n",
2498 2498 0, /* metric */
2499 2499 ire->ire_refcnt,
2500 2500 0,
2501 2501 flags,
2502 2502 name);
2503 2503 }
2504 2504
2505 2505 /* ARGSUSED */
2506 2506 static void
2507 2507 lxpr_read_net_ipv6_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2508 2508 {
2509 2509 netstack_t *ns;
2510 2510 ip_stack_t *ipst;
2511 2511
2512 2512 ns = netstack_get_current();
2513 2513 if (ns == NULL)
2514 2514 return;
2515 2515 ipst = ns->netstack_ip;
2516 2516
2517 2517 /*
2518 2518 * LX branded zones are expected to have exclusive IP stack, hence
2519 2519 * using ALL_ZONES as the zoneid filter.
2520 2520 */
2521 2521 ire_walk_v6(&lxpr_format_route_ipv6, uiobuf, ALL_ZONES, ipst);
2522 2522
2523 2523 netstack_rele(ns);
2524 2524 }
2525 2525
2526 2526 /* ARGSUSED */
2527 2527 static void
2528 2528 lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2529 2529 {
2530 2530 }
2531 2531
2532 2532 /* ARGSUSED */
2533 2533 static void
2534 2534 lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2535 2535 {
2536 2536 }
2537 2537
2538 2538 /* ARGSUSED */
2539 2539 static void
2540 2540 lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2541 2541 {
2542 2542 }
2543 2543
2544 2544 #define LXPR_SKIP_ROUTE(type) \
2545 2545 (((IRE_IF_CLONE | IRE_BROADCAST | IRE_MULTICAST | \
2546 2546 IRE_NOROUTE | IRE_LOOPBACK | IRE_LOCAL) & type) != 0)
2547 2547
2548 2548 static void
2549 2549 lxpr_format_route_ipv4(ire_t *ire, lxpr_uiobuf_t *uiobuf)
2550 2550 {
2551 2551 uint32_t flags;
2552 2552 char name[IFNAMSIZ];
2553 2553 ill_t *ill;
2554 2554 ire_t *nire;
2555 2555 ipif_t *ipif;
2556 2556 ipaddr_t gateway;
2557 2557
2558 2558 if (LXPR_SKIP_ROUTE(ire->ire_type) || ire->ire_testhidden != 0)
2559 2559 return;
2560 2560
2561 2561 /* These route flags have direct Linux equivalents */
2562 2562 flags = ire->ire_flags &
2563 2563 (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
2564 2564
2565 2565 /*
2566 2566 * Search for a suitable IRE for naming purposes.
2567 2567 * On Linux, the default route is typically associated with the
2568 2568 * interface used to access gateway. The default IRE on Illumos
2569 2569 * typically lacks an ill reference but its parent might have one.
2570 2570 */
2571 2571 nire = ire;
2572 2572 do {
2573 2573 ill = nire->ire_ill;
2574 2574 nire = nire->ire_dep_parent;
2575 2575 } while (ill == NULL && nire != NULL);
2576 2576 if (ill != NULL) {
2577 2577 ill_get_name(ill, name, sizeof (name));
2578 2578 lx_ifname_convert(name, LX_IF_FROMNATIVE);
2579 2579 } else {
2580 2580 name[0] = '*';
2581 2581 name[1] = '\0';
2582 2582 }
2583 2583
2584 2584 /*
2585 2585 * Linux suppresses the gateway address for directly connected
2586 2586 * interface networks. To emulate this behavior, we walk all addresses
2587 2587 * of a given route interface. If one matches the gateway, it is
2588 2588 * displayed as NULL.
2589 2589 */
2590 2590 gateway = ire->ire_gateway_addr;
2591 2591 if ((ill = ire->ire_ill) != NULL) {
2592 2592 for (ipif = ill->ill_ipif; ipif != NULL;
2593 2593 ipif = ipif->ipif_next) {
2594 2594 if (ipif->ipif_lcl_addr == gateway) {
2595 2595 gateway = 0;
2596 2596 break;
2597 2597 }
2598 2598 }
2599 2599 }
2600 2600
2601 2601 lxpr_uiobuf_printf(uiobuf, "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2602 2602 "%d\t%08X\t%d\t%u\t%u\n",
2603 2603 name,
2604 2604 ire->ire_addr,
2605 2605 gateway,
2606 2606 flags, 0, 0,
2607 2607 0, /* priority */
2608 2608 ire->ire_mask,
2609 2609 0, 0, /* mss, window */
2610 2610 ire->ire_metrics.iulp_rtt);
2611 2611 }
2612 2612
2613 2613 /* ARGSUSED */
2614 2614 static void
2615 2615 lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2616 2616 {
2617 2617 netstack_t *ns;
2618 2618 ip_stack_t *ipst;
2619 2619
2620 2620 lxpr_uiobuf_printf(uiobuf, "Iface\tDestination\tGateway \tFlags\t"
2621 2621 "RefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n");
2622 2622
2623 2623 ns = netstack_get_current();
2624 2624 if (ns == NULL)
2625 2625 return;
2626 2626 ipst = ns->netstack_ip;
2627 2627
2628 2628 /*
2629 2629 * LX branded zones are expected to have exclusive IP stack, hence
2630 2630 * using ALL_ZONES as the zoneid filter.
2631 2631 */
2632 2632 ire_walk_v4(&lxpr_format_route_ipv4, uiobuf, ALL_ZONES, ipst);
2633 2633
2634 2634 netstack_rele(ns);
2635 2635 }
2636 2636
2637 2637 /* ARGSUSED */
2638 2638 static void
2639 2639 lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2640 2640 {
2641 2641 }
2642 2642
2643 2643 /* ARGSUSED */
2644 2644 static void
2645 2645 lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2646 2646 {
2647 2647 }
2648 2648
2649 2649 /* ARGSUSED */
2650 2650 static void
2651 2651 lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2652 2652 {
2653 2653 }
2654 2654
2655 2655 typedef struct lxpr_snmp_table {
2656 2656 const char *lst_proto;
2657 2657 const char *lst_fields[];
2658 2658 } lxpr_snmp_table_t;
2659 2659
2660 2660 static lxpr_snmp_table_t lxpr_snmp_ip = { "ip",
2661 2661 {
2662 2662 "forwarding", "defaultTTL", "inReceives", "inHdrErrors",
2663 2663 "inAddrErrors", "forwDatagrams", "inUnknownProtos", "inDiscards",
2664 2664 "inDelivers", "outRequests", "outDiscards", "outNoRoutes",
2665 2665 "reasmTimeout", "reasmReqds", "reasmOKs", "reasmFails", "fragOKs",
2666 2666 "fragFails", "fragCreates",
2667 2667 NULL
2668 2668 }
2669 2669 };
2670 2670 static lxpr_snmp_table_t lxpr_snmp_icmp = { "icmp",
2671 2671 {
2672 2672 "inMsgs", "inErrors", "inCsumErrors", "inDestUnreachs", "inTimeExcds",
2673 2673 "inParmProbs", "inSrcQuenchs", "inRedirects", "inEchos", "inEchoReps",
2674 2674 "inTimestamps", "inTimestampReps", "inAddrMasks", "inAddrMaskReps",
2675 2675 "outMsgs", "outErrors", "outDestUnreachs", "outTimeExcds",
2676 2676 "outParmProbs", "outSrcQuenchs", "outRedirects", "outEchos",
2677 2677 "outEchoReps", "outTimestamps", "outTimestampReps", "outAddrMasks",
2678 2678 "outAddrMaskReps",
2679 2679 NULL
2680 2680 }
2681 2681 };
2682 2682 static lxpr_snmp_table_t lxpr_snmp_tcp = { "tcp",
2683 2683 {
2684 2684 "rtoAlgorithm", "rtoMin", "rtoMax", "maxConn", "activeOpens",
2685 2685 "passiveOpens", "attemptFails", "estabResets", "currEstab", "inSegs",
2686 2686 "outSegs", "retransSegs", "inErrs", "outRsts", "inCsumErrors",
2687 2687 NULL
2688 2688 }
2689 2689 };
2690 2690 static lxpr_snmp_table_t lxpr_snmp_udp = { "udp",
2691 2691 {
2692 2692 "inDatagrams", "noPorts", "inErrors", "outDatagrams", "rcvbufErrors",
2693 2693 "sndbufErrors", "inCsumErrors",
2694 2694 NULL
2695 2695 }
2696 2696 };
2697 2697
2698 2698 static lxpr_snmp_table_t *lxpr_net_snmptab[] = {
2699 2699 &lxpr_snmp_ip,
2700 2700 &lxpr_snmp_icmp,
2701 2701 &lxpr_snmp_tcp,
2702 2702 &lxpr_snmp_udp,
2703 2703 NULL
2704 2704 };
2705 2705
2706 2706 static void
2707 2707 lxpr_kstat_print_tab(lxpr_uiobuf_t *uiobuf, lxpr_snmp_table_t *table,
2708 2708 kstat_t *kn)
2709 2709 {
2710 2710 kstat_named_t *klist;
2711 2711 char upname[KSTAT_STRLEN], upfield[KSTAT_STRLEN];
2712 2712 int i, j, num;
2713 2713 size_t size;
2714 2714
2715 2715 klist = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num);
2716 2716 if (klist == NULL)
2717 2717 return;
2718 2718
2719 2719 /* Print the header line, fields capitalized */
2720 2720 (void) strncpy(upname, table->lst_proto, KSTAT_STRLEN);
2721 2721 upname[0] = toupper(upname[0]);
2722 2722 lxpr_uiobuf_printf(uiobuf, "%s:", upname);
2723 2723 for (i = 0; table->lst_fields[i] != NULL; i++) {
2724 2724 (void) strncpy(upfield, table->lst_fields[i], KSTAT_STRLEN);
2725 2725 upfield[0] = toupper(upfield[0]);
2726 2726 lxpr_uiobuf_printf(uiobuf, " %s", upfield);
2727 2727 }
2728 2728 lxpr_uiobuf_printf(uiobuf, "\n%s:", upname);
2729 2729
2730 2730 /* Then loop back through to print the value line. */
2731 2731 for (i = 0; table->lst_fields[i] != NULL; i++) {
2732 2732 kstat_named_t *kpoint = NULL;
2733 2733 for (j = 0; j < num; j++) {
2734 2734 if (strncmp(klist[j].name, table->lst_fields[i],
2735 2735 KSTAT_STRLEN) == 0) {
2736 2736 kpoint = &klist[j];
2737 2737 break;
2738 2738 }
2739 2739 }
2740 2740 if (kpoint == NULL) {
2741 2741 /* Output 0 for unknown fields */
2742 2742 lxpr_uiobuf_printf(uiobuf, " 0");
2743 2743 } else {
2744 2744 switch (kpoint->data_type) {
2745 2745 case KSTAT_DATA_INT32:
2746 2746 lxpr_uiobuf_printf(uiobuf, " %d",
2747 2747 kpoint->value.i32);
2748 2748 break;
2749 2749 case KSTAT_DATA_UINT32:
2750 2750 lxpr_uiobuf_printf(uiobuf, " %u",
2751 2751 kpoint->value.ui32);
2752 2752 break;
2753 2753 case KSTAT_DATA_INT64:
2754 2754 lxpr_uiobuf_printf(uiobuf, " %ld",
2755 2755 kpoint->value.l);
2756 2756 break;
2757 2757 case KSTAT_DATA_UINT64:
2758 2758 lxpr_uiobuf_printf(uiobuf, " %lu",
2759 2759 kpoint->value.ul);
2760 2760 break;
2761 2761 }
2762 2762 }
2763 2763 }
2764 2764 lxpr_uiobuf_printf(uiobuf, "\n");
2765 2765 kmem_free(klist, size);
2766 2766 }
2767 2767
2768 2768 /* ARGSUSED */
2769 2769 static void
2770 2770 lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2771 2771 {
2772 2772 kstat_t *ksr;
2773 2773 kstat_t ks0;
2774 2774 lxpr_snmp_table_t **table = lxpr_net_snmptab;
2775 2775 int i, t, nidx;
2776 2776 size_t sidx;
2777 2777
2778 2778 ks0.ks_kid = 0;
2779 2779 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
2780 2780 if (ksr == NULL)
2781 2781 return;
2782 2782
2783 2783 for (t = 0; table[t] != NULL; t++) {
2784 2784 for (i = 0; i < nidx; i++) {
2785 2785 if (strncmp(ksr[i].ks_class, "mib2", KSTAT_STRLEN) != 0)
2786 2786 continue;
2787 2787 if (strncmp(ksr[i].ks_name, table[t]->lst_proto,
2788 2788 KSTAT_STRLEN) == 0) {
2789 2789 lxpr_kstat_print_tab(uiobuf, table[t], &ksr[i]);
2790 2790 break;
2791 2791 }
2792 2792 }
2793 2793 }
2794 2794 kmem_free(ksr, sidx);
2795 2795 }
2796 2796
2797 2797 /* ARGSUSED */
2798 2798 static void
2799 2799 lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2800 2800 {
2801 2801 }
2802 2802
2803 2803 static int
2804 2804 lxpr_convert_tcp_state(int st)
2805 2805 {
2806 2806 /*
2807 2807 * Derived from the enum located in the Linux kernel sources:
2808 2808 * include/net/tcp_states.h
2809 2809 */
2810 2810 switch (st) {
2811 2811 case TCPS_ESTABLISHED:
2812 2812 return (1);
2813 2813 case TCPS_SYN_SENT:
2814 2814 return (2);
2815 2815 case TCPS_SYN_RCVD:
2816 2816 return (3);
2817 2817 case TCPS_FIN_WAIT_1:
2818 2818 return (4);
2819 2819 case TCPS_FIN_WAIT_2:
2820 2820 return (5);
2821 2821 case TCPS_TIME_WAIT:
2822 2822 return (6);
2823 2823 case TCPS_CLOSED:
2824 2824 return (7);
2825 2825 case TCPS_CLOSE_WAIT:
2826 2826 return (8);
2827 2827 case TCPS_LAST_ACK:
2828 2828 return (9);
2829 2829 case TCPS_LISTEN:
2830 2830 return (10);
2831 2831 case TCPS_CLOSING:
2832 2832 return (11);
2833 2833 default:
2834 2834 /* No translation for TCPS_IDLE, TCPS_BOUND or anything else */
2835 2835 return (0);
2836 2836 }
2837 2837 }
2838 2838
2839 2839 static void
2840 2840 lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
2841 2841 {
2842 2842 int i, sl = 0;
2843 2843 connf_t *connfp;
2844 2844 conn_t *connp;
2845 2845 netstack_t *ns;
2846 2846 ip_stack_t *ipst;
2847 2847
2848 2848 ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION);
2849 2849 if (ipver == IPV4_VERSION) {
2850 2850 lxpr_uiobuf_printf(uiobuf, " sl local_address rem_address "
2851 2851 "st tx_queue rx_queue tr tm->when retrnsmt uid timeout "
2852 2852 "inode\n");
2853 2853 } else {
2854 2854 lxpr_uiobuf_printf(uiobuf, " sl "
2855 2855 "local_address "
2856 2856 "remote_address "
2857 2857 "st tx_queue rx_queue tr tm->when retrnsmt "
2858 2858 "uid timeout inode\n");
2859 2859 }
2860 2860 /*
2861 2861 * Due to differences between the Linux and illumos TCP
2862 2862 * implementations, some data will be omitted from the output here.
2863 2863 *
2864 2864 * Valid fields:
2865 2865 * - local_address
2866 2866 * - remote_address
2867 2867 * - st
2868 2868 * - tx_queue
2869 2869 * - rx_queue
2870 2870 * - uid
2871 2871 * - inode
2872 2872 *
2873 2873 * Omitted/invalid fields
2874 2874 * - tr
2875 2875 * - tm->when
2876 2876 * - retrnsmt
2877 2877 * - timeout
2878 2878 */
2879 2879
2880 2880 ns = netstack_get_current();
2881 2881 if (ns == NULL)
2882 2882 return;
2883 2883 ipst = ns->netstack_ip;
2884 2884
2885 2885 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2886 2886 connfp = &ipst->ips_ipcl_globalhash_fanout[i];
2887 2887 connp = NULL;
2888 2888 while ((connp =
2889 2889 ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) {
2890 2890 tcp_t *tcp;
2891 2891 vattr_t attr;
2892 2892 sonode_t *so = (sonode_t *)connp->conn_upper_handle;
2893 2893 vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
2894 2894 if (connp->conn_ipversion != ipver)
2895 2895 continue;
2896 2896 tcp = connp->conn_tcp;
2897 2897 if (ipver == IPV4_VERSION) {
2898 2898 lxpr_uiobuf_printf(uiobuf,
2899 2899 "%4d: %08X:%04X %08X:%04X ",
2900 2900 ++sl,
2901 2901 connp->conn_laddr_v4,
2902 2902 ntohs(connp->conn_lport),
2903 2903 connp->conn_faddr_v4,
2904 2904 ntohs(connp->conn_fport));
2905 2905 } else {
2906 2906 lxpr_uiobuf_printf(uiobuf, "%4d: "
2907 2907 "%08X%08X%08X%08X:%04X "
2908 2908 "%08X%08X%08X%08X:%04X ",
2909 2909 ++sl,
2910 2910 connp->conn_laddr_v6.s6_addr32[0],
2911 2911 connp->conn_laddr_v6.s6_addr32[1],
2912 2912 connp->conn_laddr_v6.s6_addr32[2],
2913 2913 connp->conn_laddr_v6.s6_addr32[3],
2914 2914 ntohs(connp->conn_lport),
2915 2915 connp->conn_faddr_v6.s6_addr32[0],
2916 2916 connp->conn_faddr_v6.s6_addr32[1],
2917 2917 connp->conn_faddr_v6.s6_addr32[2],
2918 2918 connp->conn_faddr_v6.s6_addr32[3],
2919 2919 ntohs(connp->conn_fport));
2920 2920 }
2921 2921
2922 2922 /* fetch the simulated inode for the socket */
2923 2923 if (vp == NULL ||
2924 2924 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
2925 2925 attr.va_nodeid = 0;
2926 2926
2927 2927 lxpr_uiobuf_printf(uiobuf,
2928 2928 "%02X %08X:%08X %02X:%08X %08X "
2929 2929 "%5u %8d %lu %d %p %u %u %u %u %d\n",
2930 2930 lxpr_convert_tcp_state(tcp->tcp_state),
2931 2931 tcp->tcp_rcv_cnt, tcp->tcp_unsent, /* rx/tx queue */
2932 2932 0, 0, /* tr, when */
2933 2933 0, /* per-connection rexmits aren't tracked today */
2934 2934 connp->conn_cred->cr_uid,
2935 2935 0, /* timeout */
2936 2936 /* inode + more */
2937 2937 (ino_t)attr.va_nodeid, 0, NULL, 0, 0, 0, 0, 0);
2938 2938 }
2939 2939 }
2940 2940 netstack_rele(ns);
2941 2941 }
2942 2942
2943 2943 /* ARGSUSED */
2944 2944 static void
2945 2945 lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2946 2946 {
2947 2947 lxpr_format_tcp(uiobuf, IPV4_VERSION);
2948 2948 }
2949 2949
2950 2950 /* ARGSUSED */
2951 2951 static void
2952 2952 lxpr_read_net_tcp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2953 2953 {
2954 2954 lxpr_format_tcp(uiobuf, IPV6_VERSION);
2955 2955 }
2956 2956
2957 2957 static void
2958 2958 lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
2959 2959 {
2960 2960 int i, sl = 0;
2961 2961 connf_t *connfp;
2962 2962 conn_t *connp;
2963 2963 netstack_t *ns;
2964 2964 ip_stack_t *ipst;
2965 2965
2966 2966 ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION);
2967 2967 if (ipver == IPV4_VERSION) {
2968 2968 lxpr_uiobuf_printf(uiobuf, " sl local_address rem_address"
2969 2969 " st tx_queue rx_queue tr tm->when retrnsmt uid"
2970 2970 " timeout inode ref pointer drops\n");
2971 2971 } else {
2972 2972 lxpr_uiobuf_printf(uiobuf, " sl "
2973 2973 "local_address "
2974 2974 "remote_address "
2975 2975 "st tx_queue rx_queue tr tm->when retrnsmt "
2976 2976 "uid timeout inode ref pointer drops\n");
2977 2977 }
2978 2978 /*
2979 2979 * Due to differences between the Linux and illumos UDP
2980 2980 * implementations, some data will be omitted from the output here.
2981 2981 *
2982 2982 * Valid fields:
2983 2983 * - local_address
2984 2984 * - remote_address
2985 2985 * - st: limited
2986 2986 * - uid
2987 2987 *
2988 2988 * Omitted/invalid fields
2989 2989 * - tx_queue
2990 2990 * - rx_queue
2991 2991 * - tr
2992 2992 * - tm->when
2993 2993 * - retrnsmt
2994 2994 * - timeout
2995 2995 * - inode
2996 2996 */
2997 2997
2998 2998 ns = netstack_get_current();
2999 2999 if (ns == NULL)
3000 3000 return;
3001 3001 ipst = ns->netstack_ip;
3002 3002
3003 3003 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
3004 3004 connfp = &ipst->ips_ipcl_globalhash_fanout[i];
3005 3005 connp = NULL;
3006 3006 while ((connp =
3007 3007 ipcl_get_next_conn(connfp, connp, IPCL_UDPCONN)) != NULL) {
3008 3008 udp_t *udp;
3009 3009 int state = 0;
3010 3010 vattr_t attr;
3011 3011 sonode_t *so = (sonode_t *)connp->conn_upper_handle;
3012 3012 vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
3013 3013 if (connp->conn_ipversion != ipver)
3014 3014 continue;
3015 3015 udp = connp->conn_udp;
3016 3016 if (ipver == IPV4_VERSION) {
3017 3017 lxpr_uiobuf_printf(uiobuf,
3018 3018 "%4d: %08X:%04X %08X:%04X ",
3019 3019 ++sl,
3020 3020 connp->conn_laddr_v4,
3021 3021 ntohs(connp->conn_lport),
3022 3022 connp->conn_faddr_v4,
3023 3023 ntohs(connp->conn_fport));
3024 3024 } else {
3025 3025 lxpr_uiobuf_printf(uiobuf, "%4d: "
3026 3026 "%08X%08X%08X%08X:%04X "
3027 3027 "%08X%08X%08X%08X:%04X ",
3028 3028 ++sl,
3029 3029 connp->conn_laddr_v6.s6_addr32[0],
3030 3030 connp->conn_laddr_v6.s6_addr32[1],
3031 3031 connp->conn_laddr_v6.s6_addr32[2],
3032 3032 connp->conn_laddr_v6.s6_addr32[3],
3033 3033 ntohs(connp->conn_lport),
3034 3034 connp->conn_faddr_v6.s6_addr32[0],
3035 3035 connp->conn_faddr_v6.s6_addr32[1],
3036 3036 connp->conn_faddr_v6.s6_addr32[2],
3037 3037 connp->conn_faddr_v6.s6_addr32[3],
3038 3038 ntohs(connp->conn_fport));
3039 3039 }
3040 3040
3041 3041 switch (udp->udp_state) {
3042 3042 case TS_UNBND:
3043 3043 case TS_IDLE:
3044 3044 state = 7;
3045 3045 break;
3046 3046 case TS_DATA_XFER:
3047 3047 state = 1;
3048 3048 break;
3049 3049 }
3050 3050
3051 3051 /* fetch the simulated inode for the socket */
3052 3052 if (vp == NULL ||
3053 3053 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
3054 3054 attr.va_nodeid = 0;
3055 3055
3056 3056 lxpr_uiobuf_printf(uiobuf,
3057 3057 "%02X %08X:%08X %02X:%08X %08X "
3058 3058 "%5u %8d %lu %d %p %d\n",
3059 3059 state,
3060 3060 0, 0, /* rx/tx queue */
3061 3061 0, 0, /* tr, when */
3062 3062 0, /* retrans */
3063 3063 connp->conn_cred->cr_uid,
3064 3064 0, /* timeout */
3065 3065 /* inode, ref, pointer, drops */
3066 3066 (ino_t)attr.va_nodeid, 0, NULL, 0);
3067 3067 }
3068 3068 }
3069 3069 netstack_rele(ns);
3070 3070 }
3071 3071
3072 3072 /* ARGSUSED */
3073 3073 static void
3074 3074 lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3075 3075 {
3076 3076 lxpr_format_udp(uiobuf, IPV4_VERSION);
3077 3077 }
3078 3078
3079 3079 /* ARGSUSED */
3080 3080 static void
3081 3081 lxpr_read_net_udp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3082 3082 {
3083 3083 lxpr_format_udp(uiobuf, IPV6_VERSION);
3084 3084 }
3085 3085
3086 3086 /* ARGSUSED */
3087 3087 static void
3088 3088 lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3089 3089 {
3090 3090 sonode_t *so;
3091 3091 zoneid_t zoneid = getzoneid();
3092 3092
3093 3093 lxpr_uiobuf_printf(uiobuf, "Num RefCount Protocol Flags Type "
3094 3094 "St Inode Path\n");
3095 3095
3096 3096 mutex_enter(&socklist.sl_lock);
3097 3097 for (so = socklist.sl_list; so != NULL;
3098 3098 so = _SOTOTPI(so)->sti_next_so) {
3099 3099 vnode_t *vp = so->so_vnode;
3100 3100 vattr_t attr;
3101 3101 sotpi_info_t *sti;
3102 3102 const char *name = NULL;
3103 3103 int status = 0;
3104 3104 int type = 0;
3105 3105 int flags = 0;
3106 3106
3107 3107 /* Only process active sonodes in this zone */
3108 3108 if (so->so_count == 0 || so->so_zoneid != zoneid)
3109 3109 continue;
3110 3110
3111 3111 /*
3112 3112 * Grab the inode, if possible.
3113 3113 * This must be done before entering so_lock.
3114 3114 */
3115 3115 if (vp == NULL ||
3116 3116 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
3117 3117 attr.va_nodeid = 0;
3118 3118
3119 3119 mutex_enter(&so->so_lock);
3120 3120 sti = _SOTOTPI(so);
3121 3121
3122 3122 if (sti->sti_laddr_sa != NULL &&
3123 3123 sti->sti_laddr_len > 0) {
3124 3124 name = sti->sti_laddr_sa->sa_data;
3125 3125 } else if (sti->sti_faddr_sa != NULL &&
3126 3126 sti->sti_faddr_len > 0) {
3127 3127 name = sti->sti_faddr_sa->sa_data;
3128 3128 }
3129 3129
3130 3130 /*
3131 3131 * Derived from enum values in Linux kernel source:
3132 3132 * include/uapi/linux/net.h
3133 3133 */
3134 3134 if ((so->so_state & SS_ISDISCONNECTING) != 0) {
3135 3135 status = 4;
3136 3136 } else if ((so->so_state & SS_ISCONNECTING) != 0) {
3137 3137 status = 2;
3138 3138 } else if ((so->so_state & SS_ISCONNECTED) != 0) {
3139 3139 status = 3;
3140 3140 } else {
3141 3141 status = 1;
3142 3142 /* Add ACC flag for stream-type server sockets */
3143 3143 if (so->so_type != SOCK_DGRAM &&
3144 3144 sti->sti_laddr_sa != NULL)
3145 3145 flags |= 0x10000;
3146 3146 }
3147 3147
3148 3148 /* Convert to Linux type */
3149 3149 switch (so->so_type) {
3150 3150 case SOCK_DGRAM:
3151 3151 type = 2;
3152 3152 break;
3153 3153 case SOCK_SEQPACKET:
3154 3154 type = 5;
3155 3155 break;
3156 3156 default:
3157 3157 type = 1;
3158 3158 }
3159 3159
3160 3160 lxpr_uiobuf_printf(uiobuf, "%p: %08X %08X %08X %04X %02X %5llu",
3161 3161 so,
3162 3162 so->so_count,
3163 3163 0, /* proto, always 0 */
3164 3164 flags,
3165 3165 type,
3166 3166 status,
3167 3167 (ino_t)attr.va_nodeid);
3168 3168
3169 3169 /*
3170 3170 * Due to shortcomings in the abstract socket emulation, they
3171 3171 * cannot be properly represented here (as @<path>).
3172 3172 *
3173 3173 * This will be the case until they are better implemented.
3174 3174 */
3175 3175 if (name != NULL)
3176 3176 lxpr_uiobuf_printf(uiobuf, " %s\n", name);
3177 3177 else
3178 3178 lxpr_uiobuf_printf(uiobuf, "\n");
3179 3179 mutex_exit(&so->so_lock);
3180 3180 }
3181 3181 mutex_exit(&socklist.sl_lock);
3182 3182 }
3183 3183
3184 3184 /*
3185 3185 * lxpr_read_kmsg(): read the contents of the kernel message queue. We
3186 3186 * translate this into the reception of console messages for this zone; each
3187 3187 * read copies out a single zone console message, or blocks until the next one
3188 3188 * is produced, unless we're open non-blocking, in which case we return after
3189 3189 * 1ms.
3190 3190 */
3191 3191
3192 3192 #define LX_KMSG_PRI "<0>"
3193 3193
3194 3194 static void
3195 3195 lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf, ldi_handle_t lh)
3196 3196 {
3197 3197 mblk_t *mp;
3198 3198 timestruc_t to;
3199 3199 timestruc_t *tp = NULL;
3200 3200
3201 3201 ASSERT(lxpnp->lxpr_type == LXPR_KMSG);
3202 3202
3203 3203 if (lxpr_uiobuf_nonblock(uiobuf)) {
3204 3204 to.tv_sec = 0;
3205 3205 to.tv_nsec = 1000000; /* 1msec */
3206 3206 tp = &to;
3207 3207 }
3208 3208
3209 3209 if (ldi_getmsg(lh, &mp, tp) == 0) {
3210 3210 /*
3211 3211 * lx procfs doesn't like successive reads to the same file
3212 3212 * descriptor unless we do an explicit rewind each time.
3213 3213 */
3214 3214 lxpr_uiobuf_seek(uiobuf, 0);
3215 3215
3216 3216 lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
3217 3217 mp->b_cont->b_rptr);
3218 3218
3219 3219 freemsg(mp);
3220 3220 }
3221 3221 }
3222 3222
3223 3223 /*
3224 3224 * lxpr_read_loadavg(): read the contents of the "loadavg" file. We do just
3225 3225 * enough for uptime and other simple lxproc readers to work
3226 3226 */
3227 3227 extern int nthread;
3228 3228
3229 3229 static void
3230 3230 lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3231 3231 {
3232 3232 ulong_t avenrun1;
3233 3233 ulong_t avenrun5;
3234 3234 ulong_t avenrun15;
3235 3235 ulong_t avenrun1_cs;
3236 3236 ulong_t avenrun5_cs;
3237 3237 ulong_t avenrun15_cs;
3238 3238 int loadavg[3];
3239 3239 int *loadbuf;
3240 3240 cpupart_t *cp;
3241 3241 zone_t *zone = LXPTOZ(lxpnp);
3242 3242
3243 3243 uint_t nrunnable = 0;
3244 3244 rctl_qty_t nlwps;
3245 3245
3246 3246 ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
3247 3247
3248 3248 mutex_enter(&cpu_lock);
3249 3249
3250 3250 /*
3251 3251 * Need to add up values over all CPU partitions. If pools are active,
3252 3252 * only report the values of the zone's partition, which by definition
3253 3253 * includes the current CPU.
3254 3254 */
3255 3255 if (pool_pset_enabled()) {
3256 3256 psetid_t psetid = zone_pset_get(curproc->p_zone);
3257 3257
3258 3258 ASSERT(curproc->p_zone != &zone0);
3259 3259 cp = CPU->cpu_part;
3260 3260
3261 3261 nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
3262 3262 (void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
3263 3263 loadbuf = &loadavg[0];
3264 3264 } else {
3265 3265 cp = cp_list_head;
3266 3266 do {
3267 3267 nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
3268 3268 } while ((cp = cp->cp_next) != cp_list_head);
3269 3269
3270 3270 loadbuf = zone == global_zone ?
3271 3271 &avenrun[0] : zone->zone_avenrun;
3272 3272 }
3273 3273
3274 3274 /*
3275 3275 * If we're in the non-global zone, we'll report the total number of
3276 3276 * LWPs in the zone for the "nproc" parameter of /proc/loadavg,
3277 3277 * otherwise will just use nthread (which will include kernel threads,
3278 3278 * but should be good enough for lxproc).
3279 3279 */
3280 3280 nlwps = zone == global_zone ? nthread : zone->zone_nlwps;
3281 3281
3282 3282 mutex_exit(&cpu_lock);
3283 3283
3284 3284 avenrun1 = loadbuf[0] >> FSHIFT;
3285 3285 avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
3286 3286 avenrun5 = loadbuf[1] >> FSHIFT;
3287 3287 avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
3288 3288 avenrun15 = loadbuf[2] >> FSHIFT;
3289 3289 avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
3290 3290
3291 3291 lxpr_uiobuf_printf(uiobuf,
3292 3292 "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
3293 3293 avenrun1, avenrun1_cs,
3294 3294 avenrun5, avenrun5_cs,
3295 3295 avenrun15, avenrun15_cs,
3296 3296 nrunnable, nlwps, 0);
3297 3297 }
3298 3298
3299 3299 /*
3300 3300 * lxpr_read_meminfo(): read the contents of the "meminfo" file.
3301 3301 */
3302 3302 static void
3303 3303 lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3304 3304 {
3305 3305 zone_t *zone = LXPTOZ(lxpnp);
3306 3306 int global = zone == global_zone;
3307 3307 long total_mem, free_mem, total_swap, used_swap;
3308 3308
3309 3309 ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
3310 3310
3311 3311 if (global || zone->zone_phys_mem_ctl == UINT64_MAX) {
3312 3312 total_mem = physmem * PAGESIZE;
3313 3313 free_mem = freemem * PAGESIZE;
3314 3314 } else {
3315 3315 total_mem = zone->zone_phys_mem_ctl;
3316 3316 free_mem = zone->zone_phys_mem_ctl - zone->zone_phys_mem;
3317 3317 }
3318 3318
3319 3319 if (global || zone->zone_max_swap_ctl == UINT64_MAX) {
3320 3320 total_swap = k_anoninfo.ani_max * PAGESIZE;
3321 3321 used_swap = k_anoninfo.ani_phys_resv * PAGESIZE;
3322 3322 } else {
3323 3323 mutex_enter(&zone->zone_mem_lock);
3324 3324 total_swap = zone->zone_max_swap_ctl;
3325 3325 used_swap = zone->zone_max_swap;
3326 3326 mutex_exit(&zone->zone_mem_lock);
3327 3327 }
3328 3328
3329 3329 lxpr_uiobuf_printf(uiobuf,
3330 3330 "MemTotal: %8lu kB\n"
3331 3331 "MemFree: %8lu kB\n"
3332 3332 "MemShared: %8u kB\n"
3333 3333 "Buffers: %8u kB\n"
3334 3334 "Cached: %8u kB\n"
3335 3335 "SwapCached:%8u kB\n"
3336 3336 "Active: %8u kB\n"
3337 3337 "Inactive: %8u kB\n"
3338 3338 "HighTotal: %8u kB\n"
3339 3339 "HighFree: %8u kB\n"
3340 3340 "LowTotal: %8u kB\n"
3341 3341 "LowFree: %8u kB\n"
3342 3342 "SwapTotal: %8lu kB\n"
3343 3343 "SwapFree: %8lu kB\n",
3344 3344 btok(total_mem), /* MemTotal */
3345 3345 btok(free_mem), /* MemFree */
3346 3346 0, /* MemShared */
3347 3347 0, /* Buffers */
3348 3348 0, /* Cached */
3349 3349 0, /* SwapCached */
3350 3350 0, /* Active */
3351 3351 0, /* Inactive */
3352 3352 0, /* HighTotal */
3353 3353 0, /* HighFree */
3354 3354 btok(total_mem), /* LowTotal */
3355 3355 btok(free_mem), /* LowFree */
3356 3356 btok(total_swap), /* SwapTotal */
3357 3357 btok(total_swap - used_swap)); /* SwapFree */
3358 3358 }
3359 3359
3360 3360 /*
3361 3361 * lxpr_read_mounts():
3362 3362 */
3363 3363 /* ARGSUSED */
3364 3364 static void
3365 3365 lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3366 3366 {
3367 3367 struct vfs *vfsp;
3368 3368 struct vfs *vfslist;
3369 3369 zone_t *zone = LXPTOZ(lxpnp);
3370 3370 struct print_data {
3371 3371 refstr_t *vfs_mntpt;
3372 3372 refstr_t *vfs_resource;
3373 3373 uint_t vfs_flag;
3374 3374 int vfs_fstype;
3375 3375 struct print_data *next;
3376 3376 } *print_head = NULL;
3377 3377 struct print_data **print_tail = &print_head;
3378 3378 struct print_data *printp;
3379 3379
3380 3380 vfs_list_read_lock();
3381 3381
3382 3382 if (zone == global_zone) {
3383 3383 vfsp = vfslist = rootvfs;
3384 3384 } else {
3385 3385 vfsp = vfslist = zone->zone_vfslist;
3386 3386 /*
3387 3387 * If the zone has a root entry, it will be the first in
3388 3388 * the list. If it doesn't, we conjure one up.
3389 3389 */
3390 3390 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
3391 3391 zone->zone_rootpath) != 0) {
3392 3392 struct vfs *tvfsp;
3393 3393 /*
3394 3394 * The root of the zone is not a mount point. The vfs
3395 3395 * we want to report is that of the zone's root vnode.
3396 3396 */
3397 3397 tvfsp = zone->zone_rootvp->v_vfsp;
3398 3398
3399 3399 lxpr_uiobuf_printf(uiobuf,
3400 3400 "/ / %s %s 0 0\n",
3401 3401 vfssw[tvfsp->vfs_fstype].vsw_name,
3402 3402 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
3403 3403
3404 3404 }
3405 3405 if (vfslist == NULL) {
3406 3406 vfs_list_unlock();
3407 3407 return;
3408 3408 }
3409 3409 }
3410 3410
3411 3411 /*
3412 3412 * Later on we have to do a lookupname, which can end up causing
3413 3413 * another vfs_list_read_lock() to be called. Which can lead to a
3414 3414 * deadlock. To avoid this, we extract the data we need into a local
3415 3415 * list, then we can run this list without holding vfs_list_read_lock()
3416 3416 * We keep the list in the same order as the vfs_list
3417 3417 */
3418 3418 do {
3419 3419 /* Skip mounts we shouldn't show */
3420 3420 if (vfsp->vfs_flag & VFS_NOMNTTAB) {
3421 3421 goto nextfs;
3422 3422 }
3423 3423
3424 3424 printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
3425 3425 refstr_hold(vfsp->vfs_mntpt);
3426 3426 printp->vfs_mntpt = vfsp->vfs_mntpt;
3427 3427 refstr_hold(vfsp->vfs_resource);
3428 3428 printp->vfs_resource = vfsp->vfs_resource;
3429 3429 printp->vfs_flag = vfsp->vfs_flag;
3430 3430 printp->vfs_fstype = vfsp->vfs_fstype;
3431 3431 printp->next = NULL;
3432 3432
3433 3433 *print_tail = printp;
3434 3434 print_tail = &printp->next;
3435 3435
3436 3436 nextfs:
3437 3437 vfsp = (zone == global_zone) ?
3438 3438 vfsp->vfs_next : vfsp->vfs_zone_next;
3439 3439
3440 3440 } while (vfsp != vfslist);
3441 3441
3442 3442 vfs_list_unlock();
3443 3443
3444 3444 /*
3445 3445 * now we can run through what we've extracted without holding
3446 3446 * vfs_list_read_lock()
3447 3447 */
3448 3448 printp = print_head;
3449 3449 while (printp != NULL) {
3450 3450 struct print_data *printp_next;
3451 3451 const char *resource;
3452 3452 char *mntpt;
3453 3453 struct vnode *vp;
3454 3454 int error;
3455 3455
3456 3456 mntpt = (char *)refstr_value(printp->vfs_mntpt);
3457 3457 resource = refstr_value(printp->vfs_resource);
3458 3458
3459 3459 if (mntpt != NULL && mntpt[0] != '\0')
3460 3460 mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
3461 3461 else
3462 3462 mntpt = "-";
3463 3463
3464 3464 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
3465 3465
3466 3466 if (error != 0)
3467 3467 goto nextp;
3468 3468
3469 3469 if (!(vp->v_flag & VROOT)) {
3470 3470 VN_RELE(vp);
3471 3471 goto nextp;
3472 3472 }
3473 3473 VN_RELE(vp);
3474 3474
3475 3475 if (resource != NULL && resource[0] != '\0') {
3476 3476 if (resource[0] == '/') {
3477 3477 resource = ZONE_PATH_VISIBLE(resource, zone) ?
3478 3478 ZONE_PATH_TRANSLATE(resource, zone) :
3479 3479 mntpt;
3480 3480 }
3481 3481 } else {
3482 3482 resource = "-";
3483 3483 }
3484 3484
3485 3485 lxpr_uiobuf_printf(uiobuf,
3486 3486 "%s %s %s %s 0 0\n",
3487 3487 resource, mntpt, vfssw[printp->vfs_fstype].vsw_name,
3488 3488 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
3489 3489
3490 3490 nextp:
3491 3491 printp_next = printp->next;
3492 3492 refstr_rele(printp->vfs_mntpt);
3493 3493 refstr_rele(printp->vfs_resource);
3494 3494 kmem_free(printp, sizeof (*printp));
3495 3495 printp = printp_next;
3496 3496
3497 3497 }
3498 3498 }
3499 3499
3500 3500 /*
3501 3501 * lxpr_read_partitions():
3502 3502 *
3503 3503 * Over the years, /proc/partitions has been made considerably smaller -- to
3504 3504 * the point that it really is only major number, minor number, number of
3505 3505 * blocks (which we report as 0), and partition name.
3506 3506 *
3507 3507 * We support this because some things want to see it to make sense of
3508 3508 * /proc/diskstats, and also because "fdisk -l" and a few other things look
3509 3509 * here to find all disks on the system.
3510 3510 */
3511 3511 /* ARGSUSED */
3512 3512 static void
3513 3513 lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3514 3514 {
3515 3515
3516 3516 kstat_t *ksr;
3517 3517 kstat_t ks0;
3518 3518 int nidx, num, i;
3519 3519 size_t sidx, size;
3520 3520 zfs_cmd_t *zc;
3521 3521 nvlist_t *nv = NULL;
3522 3522 nvpair_t *elem = NULL;
3523 3523 lxpr_mnt_t *mnt;
3524 3524 lxpr_zfs_iter_t zfsi;
3525 3525
3526 3526 ASSERT(lxpnp->lxpr_type == LXPR_PARTITIONS);
3527 3527
3528 3528 ks0.ks_kid = 0;
3529 3529 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
3530 3530
3531 3531 if (ksr == NULL)
3532 3532 return;
3533 3533
3534 3534 lxpr_uiobuf_printf(uiobuf, "major minor #blocks name\n\n");
3535 3535
3536 3536 for (i = 1; i < nidx; i++) {
3537 3537 kstat_t *ksp = &ksr[i];
3538 3538 kstat_io_t *kip;
3539 3539
3540 3540 if (ksp->ks_type != KSTAT_TYPE_IO ||
3541 3541 strcmp(ksp->ks_class, "disk") != 0)
3542 3542 continue;
3543 3543
3544 3544 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE,
3545 3545 &size, &num)) == NULL)
3546 3546 continue;
3547 3547
3548 3548 if (size < sizeof (kstat_io_t)) {
3549 3549 kmem_free(kip, size);
3550 3550 continue;
3551 3551 }
3552 3552
3553 3553 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d %s\n",
3554 3554 mod_name_to_major(ksp->ks_module),
3555 3555 ksp->ks_instance, 0, ksp->ks_name);
3556 3556
3557 3557 kmem_free(kip, size);
3558 3558 }
3559 3559
3560 3560 kmem_free(ksr, sidx);
3561 3561
3562 3562 /* If we never got to open the zfs LDI, then stop now. */
3563 3563 mnt = (lxpr_mnt_t *)lxpnp->lxpr_vnode->v_vfsp->vfs_data;
3564 3564 if (mnt->lxprm_zfs_isopen == B_FALSE)
3565 3565 return;
3566 3566
3567 3567 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
3568 3568
3569 3569 if (lxpr_zfs_list_pools(mnt, zc, &nv) != 0)
3570 3570 goto out;
3571 3571
3572 3572 while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) {
3573 3573 char *pool = nvpair_name(elem);
3574 3574
3575 3575 bzero(&zfsi, sizeof (lxpr_zfs_iter_t));
3576 3576 while (lxpr_zfs_next_zvol(mnt, pool, zc, &zfsi) == 0) {
3577 3577 major_t major;
3578 3578 minor_t minor;
3579 3579 if (lxpr_zvol_dev(mnt, zc->zc_name, &major, &minor)
3580 3580 != 0)
3581 3581 continue;
3582 3582
3583 3583 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d zvol/dsk/%s\n",
3584 3584 major, minor, 0, zc->zc_name);
3585 3585 }
3586 3586 }
3587 3587
3588 3588 nvlist_free(nv);
3589 3589 out:
3590 3590 kmem_free(zc, sizeof (zfs_cmd_t));
3591 3591 }
3592 3592
3593 3593 /*
3594 3594 * lxpr_read_diskstats():
3595 3595 *
3596 3596 * See the block comment above the per-device output-generating line for the
3597 3597 * details of the format.
3598 3598 */
3599 3599 /* ARGSUSED */
3600 3600 static void
3601 3601 lxpr_read_diskstats(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3602 3602 {
3603 3603 kstat_t *ksr;
3604 3604 kstat_t ks0;
3605 3605 int nidx, num, i;
3606 3606 size_t sidx, size;
3607 3607
3608 3608 ASSERT(lxpnp->lxpr_type == LXPR_DISKSTATS);
3609 3609
3610 3610 ks0.ks_kid = 0;
3611 3611 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
3612 3612
3613 3613 if (ksr == NULL)
3614 3614 return;
3615 3615
3616 3616 for (i = 1; i < nidx; i++) {
3617 3617 kstat_t *ksp = &ksr[i];
3618 3618 kstat_io_t *kip;
3619 3619
3620 3620 if (ksp->ks_type != KSTAT_TYPE_IO ||
3621 3621 strcmp(ksp->ks_class, "disk") != 0)
3622 3622 continue;
3623 3623
3624 3624 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE,
3625 3625 &size, &num)) == NULL)
3626 3626 continue;
3627 3627
3628 3628 if (size < sizeof (kstat_io_t)) {
3629 3629 kmem_free(kip, size);
3630 3630 continue;
3631 3631 }
3632 3632
3633 3633 /*
3634 3634 * /proc/diskstats is defined to have one line of output for
3635 3635 * each block device, with each line containing the following
3636 3636 * 14 fields:
3637 3637 *
3638 3638 * 1 - major number
3639 3639 * 2 - minor mumber
3640 3640 * 3 - device name
3641 3641 * 4 - reads completed successfully
3642 3642 * 5 - reads merged
3643 3643 * 6 - sectors read
3644 3644 * 7 - time spent reading (ms)
3645 3645 * 8 - writes completed
3646 3646 * 9 - writes merged
3647 3647 * 10 - sectors written
3648 3648 * 11 - time spent writing (ms)
3649 3649 * 12 - I/Os currently in progress
3650 3650 * 13 - time spent doing I/Os (ms)
3651 3651 * 14 - weighted time spent doing I/Os (ms)
3652 3652 *
3653 3653 * One small hiccup: we don't actually keep track of time
3654 3654 * spent reading vs. time spent writing -- we keep track of
3655 3655 * time waiting vs. time actually performing I/O. While we
3656 3656 * could divide the total time by the I/O mix (making the
3657 3657 * obviously wrong assumption that I/O operations all take the
3658 3658 * same amount of time), this has the undesirable side-effect
3659 3659 * of moving backwards. Instead, we report the total time
3660 3660 * (read + write) for all three stats (read, write, total).
3661 3661 * This is also a lie of sorts, but it should be more
3662 3662 * immediately clear to the user that reads and writes are
3663 3663 * each being double-counted as the other.
3664 3664 */
3665 3665 lxpr_uiobuf_printf(uiobuf, "%4d %7d %s "
3666 3666 "%llu %llu %llu %llu "
3667 3667 "%llu %llu %llu %llu "
3668 3668 "%llu %llu %llu\n",
3669 3669 mod_name_to_major(ksp->ks_module),
3670 3670 ksp->ks_instance, ksp->ks_name,
3671 3671 (uint64_t)kip->reads, 0LL,
3672 3672 kip->nread / (uint64_t)LXPR_SECTOR_SIZE,
3673 3673 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3674 3674 (uint64_t)kip->writes, 0LL,
3675 3675 kip->nwritten / (uint64_t)LXPR_SECTOR_SIZE,
3676 3676 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3677 3677 (uint64_t)(kip->rcnt + kip->wcnt),
3678 3678 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3679 3679 (kip->rlentime + kip->wlentime) /
3680 3680 (uint64_t)(NANOSEC / MILLISEC));
3681 3681
3682 3682 kmem_free(kip, size);
3683 3683 }
3684 3684
3685 3685 kmem_free(ksr, sidx);
3686 3686 }
3687 3687
3688 3688 /*
3689 3689 * lxpr_read_version(): read the contents of the "version" file.
3690 3690 */
3691 3691 /* ARGSUSED */
3692 3692 static void
3693 3693 lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3694 3694 {
3695 3695 lx_zone_data_t *lxzd = ztolxzd(LXPTOZ(lxpnp));
3696 3696 lx_proc_data_t *lxpd = ptolxproc(curproc);
3697 3697 const char *release = lxzd->lxzd_kernel_release;
3698 3698 const char *version = lxzd->lxzd_kernel_version;
3699 3699
3700 3700 /* Use per-process overrides, if specified */
3701 3701 if (lxpd != NULL && lxpd->l_uname_release[0] != '\0') {
3702 3702 release = lxpd->l_uname_release;
3703 3703 }
3704 3704 if (lxpd != NULL && lxpd->l_uname_version[0] != '\0') {
3705 3705 version = lxpd->l_uname_version;
3706 3706 }
3707 3707
3708 3708 lxpr_uiobuf_printf(uiobuf,
3709 3709 "%s version %s (%s version %d.%d.%d) %s\n",
3710 3710 LX_UNAME_SYSNAME, release,
3711 3711 #if defined(__GNUC__)
3712 3712 "gcc", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__,
3713 3713 #else
3714 3714 "cc", 1, 0, 0,
3715 3715 #endif
3716 3716 version);
3717 3717 }
3718 3718
3719 3719 /*
3720 3720 * lxpr_read_stat(): read the contents of the "stat" file.
3721 3721 *
3722 3722 */
3723 3723 /* ARGSUSED */
3724 3724 static void
3725 3725 lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3726 3726 {
3727 3727 cpu_t *cp, *cpstart;
3728 3728 int pools_enabled;
3729 3729 ulong_t idle_cum = 0;
3730 3730 ulong_t sys_cum = 0;
3731 3731 ulong_t user_cum = 0;
3732 3732 ulong_t irq_cum = 0;
3733 3733 ulong_t cpu_nrunnable_cum = 0;
3734 3734 ulong_t w_io_cum = 0;
3735 3735
3736 3736 ulong_t pgpgin_cum = 0;
3737 3737 ulong_t pgpgout_cum = 0;
3738 3738 ulong_t pgswapout_cum = 0;
3739 3739 ulong_t pgswapin_cum = 0;
3740 3740 ulong_t intr_cum = 0;
3741 3741 ulong_t pswitch_cum = 0;
3742 3742 ulong_t forks_cum = 0;
3743 3743 hrtime_t msnsecs[NCMSTATES];
3744 3744 /* is the emulated release > 2.4 */
3745 3745 boolean_t newer_than24 = lx_kern_release_cmp(LXPTOZ(lxpnp), "2.4") > 0;
3746 3746 /* temporary variable since scalehrtime modifies data in place */
3747 3747 hrtime_t tmptime;
3748 3748
3749 3749 ASSERT(lxpnp->lxpr_type == LXPR_STAT);
3750 3750
3751 3751 mutex_enter(&cpu_lock);
3752 3752 pools_enabled = pool_pset_enabled();
3753 3753
3754 3754 /* Calculate cumulative stats */
3755 3755 cp = cpstart = CPU->cpu_part->cp_cpulist;
3756 3756 do {
3757 3757 int i;
3758 3758
3759 3759 /*
3760 3760 * Don't count CPUs that aren't even in the system
3761 3761 * or aren't up yet.
3762 3762 */
3763 3763 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
3764 3764 continue;
3765 3765 }
3766 3766
3767 3767 get_cpu_mstate(cp, msnsecs);
3768 3768
3769 3769 idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
3770 3770 sys_cum += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
3771 3771 user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
3772 3772
3773 3773 pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
3774 3774 pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
3775 3775 pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
3776 3776 pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
3777 3777
3778 3778
3779 3779 if (newer_than24) {
3780 3780 cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable;
3781 3781 w_io_cum += CPU_STATS(cp, sys.iowait);
3782 3782 for (i = 0; i < NCMSTATES; i++) {
3783 3783 tmptime = cp->cpu_intracct[i];
3784 3784 scalehrtime(&tmptime);
3785 3785 irq_cum += NSEC_TO_TICK(tmptime);
3786 3786 }
3787 3787 }
3788 3788
3789 3789 for (i = 0; i < PIL_MAX; i++)
3790 3790 intr_cum += CPU_STATS(cp, sys.intr[i]);
3791 3791
3792 3792 pswitch_cum += CPU_STATS(cp, sys.pswitch);
3793 3793 forks_cum += CPU_STATS(cp, sys.sysfork);
3794 3794 forks_cum += CPU_STATS(cp, sys.sysvfork);
3795 3795
3796 3796 if (pools_enabled)
3797 3797 cp = cp->cpu_next_part;
3798 3798 else
3799 3799 cp = cp->cpu_next;
3800 3800 } while (cp != cpstart);
3801 3801
3802 3802 if (newer_than24) {
3803 3803 lxpr_uiobuf_printf(uiobuf,
3804 3804 "cpu %lu %lu %lu %lu %lu %lu %lu\n",
3805 3805 user_cum, 0L, sys_cum, idle_cum, 0L, irq_cum, 0L);
3806 3806 } else {
3807 3807 lxpr_uiobuf_printf(uiobuf,
3808 3808 "cpu %lu %lu %lu %lu\n",
3809 3809 user_cum, 0L, sys_cum, idle_cum);
3810 3810 }
3811 3811
3812 3812 /* Do per processor stats */
3813 3813 do {
3814 3814 int i;
3815 3815
3816 3816 ulong_t idle_ticks;
3817 3817 ulong_t sys_ticks;
3818 3818 ulong_t user_ticks;
3819 3819 ulong_t irq_ticks = 0;
3820 3820
3821 3821 /*
3822 3822 * Don't count CPUs that aren't even in the system
3823 3823 * or aren't up yet.
3824 3824 */
3825 3825 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
3826 3826 continue;
3827 3827 }
3828 3828
3829 3829 get_cpu_mstate(cp, msnsecs);
3830 3830
3831 3831 idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
3832 3832 sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
3833 3833 user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
3834 3834
3835 3835 for (i = 0; i < NCMSTATES; i++) {
3836 3836 tmptime = cp->cpu_intracct[i];
3837 3837 scalehrtime(&tmptime);
3838 3838 irq_ticks += NSEC_TO_TICK(tmptime);
3839 3839 }
3840 3840
3841 3841 if (newer_than24) {
3842 3842 lxpr_uiobuf_printf(uiobuf,
3843 3843 "cpu%d %lu %lu %lu %lu %lu %lu %lu\n",
3844 3844 cp->cpu_id, user_ticks, 0L, sys_ticks, idle_ticks,
3845 3845 0L, irq_ticks, 0L);
3846 3846 } else {
3847 3847 lxpr_uiobuf_printf(uiobuf,
3848 3848 "cpu%d %lu %lu %lu %lu\n",
3849 3849 cp->cpu_id,
3850 3850 user_ticks, 0L, sys_ticks, idle_ticks);
3851 3851 }
3852 3852
3853 3853 if (pools_enabled)
3854 3854 cp = cp->cpu_next_part;
3855 3855 else
3856 3856 cp = cp->cpu_next;
3857 3857 } while (cp != cpstart);
3858 3858
3859 3859 mutex_exit(&cpu_lock);
3860 3860
3861 3861 if (newer_than24) {
3862 3862 lxpr_uiobuf_printf(uiobuf,
3863 3863 "page %lu %lu\n"
3864 3864 "swap %lu %lu\n"
3865 3865 "intr %lu\n"
3866 3866 "ctxt %lu\n"
3867 3867 "btime %lu\n"
3868 3868 "processes %lu\n"
3869 3869 "procs_running %lu\n"
3870 3870 "procs_blocked %lu\n",
3871 3871 pgpgin_cum, pgpgout_cum,
3872 3872 pgswapin_cum, pgswapout_cum,
3873 3873 intr_cum,
3874 3874 pswitch_cum,
3875 3875 boot_time,
3876 3876 forks_cum,
3877 3877 cpu_nrunnable_cum,
3878 3878 w_io_cum);
3879 3879 } else {
3880 3880 lxpr_uiobuf_printf(uiobuf,
3881 3881 "page %lu %lu\n"
3882 3882 "swap %lu %lu\n"
3883 3883 "intr %lu\n"
3884 3884 "ctxt %lu\n"
3885 3885 "btime %lu\n"
3886 3886 "processes %lu\n",
3887 3887 pgpgin_cum, pgpgout_cum,
3888 3888 pgswapin_cum, pgswapout_cum,
3889 3889 intr_cum,
3890 3890 pswitch_cum,
3891 3891 boot_time,
↓ open down ↓ |
3891 lines elided |
↑ open up ↑ |
3892 3892 forks_cum);
3893 3893 }
3894 3894 }
3895 3895
3896 3896 /*
3897 3897 * lxpr_read_swaps():
3898 3898 *
3899 3899 * We don't support swap files or partitions, but some programs like to look
3900 3900 * here just to check we have some swap on the system, so we lie and show
3901 3901 * our entire swap cap as one swap partition.
3902 + *
3903 + * It is important to use formatting identical to the Linux implementation
3904 + * so that consumers do not break. See swap_show() in mm/swapfile.c.
3902 3905 */
3903 3906 /* ARGSUSED */
3904 3907 static void
3905 3908 lxpr_read_swaps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3906 3909 {
3907 3910 zone_t *zone = curzone;
3908 3911 uint64_t totswap, usedswap;
3909 3912
3910 3913 mutex_enter(&zone->zone_mem_lock);
3911 3914 /* Uses units of 1 kb (2^10). */
3912 3915 totswap = zone->zone_max_swap_ctl >> 10;
3913 3916 usedswap = zone->zone_max_swap >> 10;
3914 3917 mutex_exit(&zone->zone_mem_lock);
3915 3918
3916 3919 lxpr_uiobuf_printf(uiobuf,
3917 - "Filename "
3918 - "Type Size Used Priority\n");
3919 - lxpr_uiobuf_printf(uiobuf, "%-40s%-16s%-8llu%-8llu%-8d\n",
3920 + "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
3921 + lxpr_uiobuf_printf(uiobuf, "%-40s%s\t%llu\t%llu\t%d\n",
3920 3922 "/dev/swap", "partition", totswap, usedswap, -1);
3921 3923 }
3922 3924
3923 3925 /*
3924 3926 * inotify tunables exported via /proc.
3925 3927 */
3926 3928 extern int inotify_maxevents;
3927 3929 extern int inotify_maxinstances;
3928 3930 extern int inotify_maxwatches;
3929 3931
3930 3932 static void
3931 3933 lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *lxpnp,
3932 3934 lxpr_uiobuf_t *uiobuf)
3933 3935 {
3934 3936 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS);
3935 3937 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxevents);
3936 3938 }
3937 3939
3938 3940 static void
3939 3941 lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *lxpnp,
3940 3942 lxpr_uiobuf_t *uiobuf)
3941 3943 {
3942 3944 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES);
3943 3945 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxinstances);
3944 3946 }
3945 3947
3946 3948 static void
3947 3949 lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *lxpnp,
3948 3950 lxpr_uiobuf_t *uiobuf)
3949 3951 {
3950 3952 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES);
3951 3953 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxwatches);
3952 3954 }
3953 3955
3954 3956 static void
3955 3957 lxpr_read_sys_kernel_caplcap(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3956 3958 {
3957 3959 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_CAPLCAP);
3958 3960 lxpr_uiobuf_printf(uiobuf, "%d\n", LX_CAP_MAX_VALID);
3959 3961 }
3960 3962
3961 3963 static void
3962 3964 lxpr_read_sys_kernel_corepatt(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3963 3965 {
3964 3966 zone_t *zone = curproc->p_zone;
3965 3967 struct core_globals *cg;
3966 3968 refstr_t *rp;
3967 3969 corectl_path_t *ccp;
3968 3970 char tr[MAXPATHLEN];
3969 3971
3970 3972 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT);
3971 3973
3972 3974 cg = zone_getspecific(core_zone_key, zone);
3973 3975 ASSERT(cg != NULL);
3974 3976
3975 3977 /* If core dumps are disabled, return an empty string. */
3976 3978 if ((cg->core_options & CC_PROCESS_PATH) == 0) {
3977 3979 lxpr_uiobuf_printf(uiobuf, "\n");
3978 3980 return;
3979 3981 }
3980 3982
3981 3983 ccp = cg->core_default_path;
3982 3984 mutex_enter(&ccp->ccp_mtx);
3983 3985 if ((rp = ccp->ccp_path) != NULL)
3984 3986 refstr_hold(rp);
3985 3987 mutex_exit(&ccp->ccp_mtx);
3986 3988
3987 3989 if (rp == NULL) {
3988 3990 lxpr_uiobuf_printf(uiobuf, "\n");
3989 3991 return;
3990 3992 }
3991 3993
3992 3994 bzero(tr, sizeof (tr));
3993 3995 if (lxpr_core_path_s2l(refstr_value(rp), tr, sizeof (tr)) != 0) {
3994 3996 refstr_rele(rp);
3995 3997 lxpr_uiobuf_printf(uiobuf, "\n");
3996 3998 return;
3997 3999 }
3998 4000
3999 4001 refstr_rele(rp);
4000 4002 lxpr_uiobuf_printf(uiobuf, "%s\n", tr);
4001 4003 }
4002 4004
4003 4005 static void
4004 4006 lxpr_read_sys_kernel_hostname(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4005 4007 {
4006 4008 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_HOSTNAME);
4007 4009 lxpr_uiobuf_printf(uiobuf, "%s\n", uts_nodename());
4008 4010 }
4009 4011
4010 4012 static void
4011 4013 lxpr_read_sys_kernel_msgmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4012 4014 {
4013 4015 rctl_qty_t val;
4014 4016
4015 4017 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_MSGMNI);
4016 4018
4017 4019 mutex_enter(&curproc->p_lock);
4018 4020 val = rctl_enforced_value(rc_zone_msgmni,
4019 4021 curproc->p_zone->zone_rctls, curproc);
4020 4022 mutex_exit(&curproc->p_lock);
4021 4023
4022 4024 lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
4023 4025 }
4024 4026
4025 4027 static void
4026 4028 lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4027 4029 {
4028 4030 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_NGROUPS_MAX);
4029 4031 lxpr_uiobuf_printf(uiobuf, "%d\n", ngroups_max);
4030 4032 }
4031 4033
4032 4034 static void
4033 4035 lxpr_read_sys_kernel_osrel(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4034 4036 {
4035 4037 lx_zone_data_t *br_data;
4036 4038
4037 4039 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_OSREL);
4038 4040 br_data = ztolxzd(curproc->p_zone);
4039 4041 if (curproc->p_zone->zone_brand == &lx_brand) {
4040 4042 lxpr_uiobuf_printf(uiobuf, "%s\n",
4041 4043 br_data->lxzd_kernel_version);
4042 4044 } else {
4043 4045 lxpr_uiobuf_printf(uiobuf, "\n");
4044 4046 }
4045 4047 }
4046 4048
4047 4049 static void
4048 4050 lxpr_read_sys_kernel_pid_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4049 4051 {
4050 4052 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_PID_MAX);
4051 4053 lxpr_uiobuf_printf(uiobuf, "%d\n", maxpid);
4052 4054 }
4053 4055
4054 4056 static void
4055 4057 lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4056 4058 {
4057 4059 /*
4058 4060 * This file isn't documented on the Linux proc(5) man page but
4059 4061 * according to the blog of the author of systemd/journald (the
4060 4062 * consumer), he says:
4061 4063 * boot_id: A random ID that is regenerated on each boot. As such it
4062 4064 * can be used to identify the local machine's current boot. It's
4063 4065 * universally available on any recent Linux kernel. It's a good and
4064 4066 * safe choice if you need to identify a specific boot on a specific
4065 4067 * booted kernel.
4066 4068 *
4067 4069 * We'll just generate a random ID if necessary. On Linux the format
4068 4070 * appears to resemble a uuid but since it is not documented to be a
4069 4071 * uuid, we don't worry about that.
4070 4072 */
4071 4073 lx_zone_data_t *br_data;
4072 4074
4073 4075 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RAND_BOOTID);
4074 4076
4075 4077 if (curproc->p_zone->zone_brand != &lx_brand) {
4076 4078 lxpr_uiobuf_printf(uiobuf, "0\n");
4077 4079 return;
4078 4080 }
4079 4081
4080 4082 br_data = ztolxzd(curproc->p_zone);
4081 4083 if (br_data->lxzd_bootid[0] == '\0') {
4082 4084 extern int getrandom(void *, size_t, int);
4083 4085 int i;
4084 4086
4085 4087 for (i = 0; i < 5; i++) {
4086 4088 u_longlong_t n;
4087 4089 char s[32];
4088 4090
4089 4091 (void) random_get_bytes((uint8_t *)&n, sizeof (n));
4090 4092 switch (i) {
4091 4093 case 0: (void) snprintf(s, sizeof (s), "%08llx", n);
4092 4094 s[8] = '\0';
4093 4095 break;
4094 4096 case 4: (void) snprintf(s, sizeof (s), "%012llx", n);
4095 4097 s[12] = '\0';
4096 4098 break;
4097 4099 default: (void) snprintf(s, sizeof (s), "%04llx", n);
4098 4100 s[4] = '\0';
4099 4101 break;
4100 4102 }
4101 4103 if (i > 0)
4102 4104 strlcat(br_data->lxzd_bootid, "-",
4103 4105 sizeof (br_data->lxzd_bootid));
4104 4106 strlcat(br_data->lxzd_bootid, s,
4105 4107 sizeof (br_data->lxzd_bootid));
4106 4108 }
4107 4109 }
4108 4110
4109 4111 lxpr_uiobuf_printf(uiobuf, "%s\n", br_data->lxzd_bootid);
4110 4112 }
4111 4113
4112 4114 static void
4113 4115 lxpr_read_sys_kernel_shmmax(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4114 4116 {
4115 4117 rctl_qty_t val;
4116 4118
4117 4119 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMAX);
4118 4120
4119 4121 mutex_enter(&curproc->p_lock);
4120 4122 val = rctl_enforced_value(rc_zone_shmmax,
4121 4123 curproc->p_zone->zone_rctls, curproc);
4122 4124 mutex_exit(&curproc->p_lock);
4123 4125
4124 4126 if (val > FOURGB)
4125 4127 val = FOURGB;
4126 4128
4127 4129 lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
4128 4130 }
4129 4131
4130 4132 static void
4131 4133 lxpr_read_sys_kernel_threads_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4132 4134 {
4133 4135 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_THREADS_MAX);
4134 4136 lxpr_uiobuf_printf(uiobuf, "%d\n", curproc->p_zone->zone_nlwps_ctl);
4135 4137 }
4136 4138
4137 4139 static void
4138 4140 lxpr_read_sys_net_core_somaxc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4139 4141 {
4140 4142 netstack_t *ns;
4141 4143 tcp_stack_t *tcps;
4142 4144
4143 4145 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON);
4144 4146
4145 4147 ns = netstack_get_current();
4146 4148 if (ns == NULL) {
4147 4149 lxpr_uiobuf_printf(uiobuf, "%d\n", SOMAXCONN);
4148 4150 return;
4149 4151 }
4150 4152
4151 4153 tcps = ns->netstack_tcp;
4152 4154 lxpr_uiobuf_printf(uiobuf, "%d\n", tcps->tcps_conn_req_max_q);
4153 4155 netstack_rele(ns);
4154 4156 }
4155 4157
4156 4158 static void
4157 4159 lxpr_read_sys_vm_minfr_kb(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4158 4160 {
4159 4161 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_MINFR_KB);
4160 4162 lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4161 4163 }
4162 4164
4163 4165 static void
4164 4166 lxpr_read_sys_vm_nhpages(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4165 4167 {
4166 4168 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_NHUGEP);
4167 4169 lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4168 4170 }
4169 4171
4170 4172 static void
4171 4173 lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4172 4174 {
4173 4175 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_OVERCOMMIT_MEM);
4174 4176 lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4175 4177 }
4176 4178
4177 4179 static void
4178 4180 lxpr_read_sys_vm_swappiness(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4179 4181 {
4180 4182 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_SWAPPINESS);
4181 4183 lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4182 4184 }
4183 4185
4184 4186 /*
4185 4187 * lxpr_read_uptime(): read the contents of the "uptime" file.
4186 4188 *
4187 4189 * format is: "%.2lf, %.2lf",uptime_secs, idle_secs
4188 4190 * Use fixed point arithmetic to get 2 decimal places
4189 4191 */
4190 4192 /* ARGSUSED */
4191 4193 static void
4192 4194 lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4193 4195 {
4194 4196 cpu_t *cp, *cpstart;
4195 4197 int pools_enabled;
4196 4198 ulong_t idle_cum = 0;
4197 4199 ulong_t cpu_count = 0;
4198 4200 ulong_t idle_s;
4199 4201 ulong_t idle_cs;
4200 4202 ulong_t up_s;
4201 4203 ulong_t up_cs;
4202 4204 hrtime_t birthtime;
4203 4205 hrtime_t centi_sec = 10000000; /* 10^7 */
4204 4206
4205 4207 ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
4206 4208
4207 4209 /* Calculate cumulative stats */
4208 4210 mutex_enter(&cpu_lock);
4209 4211 pools_enabled = pool_pset_enabled();
4210 4212
4211 4213 cp = cpstart = CPU->cpu_part->cp_cpulist;
4212 4214 do {
4213 4215 /*
4214 4216 * Don't count CPUs that aren't even in the system
4215 4217 * or aren't up yet.
4216 4218 */
4217 4219 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
4218 4220 continue;
4219 4221 }
4220 4222
4221 4223 idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
4222 4224 idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
4223 4225 cpu_count += 1;
4224 4226
4225 4227 if (pools_enabled)
4226 4228 cp = cp->cpu_next_part;
4227 4229 else
4228 4230 cp = cp->cpu_next;
4229 4231 } while (cp != cpstart);
4230 4232 mutex_exit(&cpu_lock);
4231 4233
4232 4234 /* Getting the Zone zsched process startup time */
4233 4235 birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart;
4234 4236 up_cs = (gethrtime() - birthtime) / centi_sec;
4235 4237 up_s = up_cs / 100;
4236 4238 up_cs %= 100;
4237 4239
4238 4240 ASSERT(cpu_count > 0);
4239 4241 idle_cum /= cpu_count;
4240 4242 idle_s = idle_cum / hz;
4241 4243 idle_cs = idle_cum % hz;
4242 4244 idle_cs *= 100;
4243 4245 idle_cs /= hz;
4244 4246
4245 4247 lxpr_uiobuf_printf(uiobuf,
4246 4248 "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
4247 4249 }
4248 4250
4249 4251 static const char *amd_x_edx[] = {
4250 4252 NULL, NULL, NULL, NULL,
4251 4253 NULL, NULL, NULL, NULL,
4252 4254 NULL, NULL, NULL, "syscall",
4253 4255 NULL, NULL, NULL, NULL,
4254 4256 NULL, NULL, NULL, "mp",
4255 4257 "nx", NULL, "mmxext", NULL,
4256 4258 NULL, NULL, NULL, NULL,
4257 4259 NULL, "lm", "3dnowext", "3dnow"
4258 4260 };
4259 4261
4260 4262 static const char *amd_x_ecx[] = {
4261 4263 "lahf_lm", NULL, "svm", NULL,
4262 4264 "altmovcr8"
4263 4265 };
4264 4266
4265 4267 static const char *tm_x_edx[] = {
4266 4268 "recovery", "longrun", NULL, "lrti"
4267 4269 };
4268 4270
4269 4271 /*
4270 4272 * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
4271 4273 */
4272 4274 static const char *intc_x_edx[] = {
4273 4275 NULL, NULL, NULL, NULL,
4274 4276 NULL, NULL, NULL, NULL,
4275 4277 NULL, NULL, NULL, "syscall",
4276 4278 NULL, NULL, NULL, NULL,
4277 4279 NULL, NULL, NULL, NULL,
4278 4280 "nx", NULL, NULL, NULL,
4279 4281 NULL, NULL, NULL, NULL,
4280 4282 NULL, "lm", NULL, NULL
4281 4283 };
4282 4284
4283 4285 static const char *intc_edx[] = {
4284 4286 "fpu", "vme", "de", "pse",
4285 4287 "tsc", "msr", "pae", "mce",
4286 4288 "cx8", "apic", NULL, "sep",
4287 4289 "mtrr", "pge", "mca", "cmov",
4288 4290 "pat", "pse36", "pn", "clflush",
4289 4291 NULL, "dts", "acpi", "mmx",
4290 4292 "fxsr", "sse", "sse2", "ss",
4291 4293 "ht", "tm", "ia64", "pbe"
4292 4294 };
4293 4295
4294 4296 /*
4295 4297 * "sse3" on linux is called "pni" (Prescott New Instructions).
4296 4298 */
4297 4299 static const char *intc_ecx[] = {
4298 4300 "pni", NULL, NULL, "monitor",
4299 4301 "ds_cpl", NULL, NULL, "est",
4300 4302 "tm2", NULL, "cid", NULL,
4301 4303 NULL, "cx16", "xtpr"
4302 4304 };
4303 4305
4304 4306 /*
4305 4307 * Report a list of each cgroup subsystem supported by our emulated cgroup fs.
4306 4308 * This needs to exist for systemd to run but for now we don't report any
4307 4309 * cgroup subsystems as being installed. The commented example below shows
4308 4310 * how to print a subsystem entry.
4309 4311 */
4310 4312 static void
4311 4313 lxpr_read_cgroups(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4312 4314 {
4313 4315 lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n",
4314 4316 "#subsys_name", "hierarchy", "num_cgroups", "enabled");
4315 4317
4316 4318 /*
4317 4319 * lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n",
4318 4320 * "cpu,cpuacct", "2", "1", "1");
4319 4321 */
4320 4322 }
4321 4323
4322 4324 static void
4323 4325 lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4324 4326 {
4325 4327 int i;
4326 4328 uint32_t bits;
4327 4329 cpu_t *cp, *cpstart;
4328 4330 int pools_enabled;
4329 4331 const char **fp;
4330 4332 char brandstr[CPU_IDSTRLEN];
4331 4333 struct cpuid_regs cpr;
4332 4334 int maxeax;
4333 4335 int std_ecx, std_edx, ext_ecx, ext_edx;
4334 4336
4335 4337 ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
4336 4338
4337 4339 mutex_enter(&cpu_lock);
4338 4340 pools_enabled = pool_pset_enabled();
4339 4341
4340 4342 cp = cpstart = CPU->cpu_part->cp_cpulist;
4341 4343 do {
4342 4344 /*
4343 4345 * This returns the maximum eax value for standard cpuid
4344 4346 * functions in eax.
4345 4347 */
4346 4348 cpr.cp_eax = 0;
4347 4349 (void) cpuid_insn(cp, &cpr);
4348 4350 maxeax = cpr.cp_eax;
4349 4351
4350 4352 /*
4351 4353 * Get standard x86 feature flags.
4352 4354 */
4353 4355 cpr.cp_eax = 1;
4354 4356 (void) cpuid_insn(cp, &cpr);
4355 4357 std_ecx = cpr.cp_ecx;
4356 4358 std_edx = cpr.cp_edx;
4357 4359
4358 4360 /*
4359 4361 * Now get extended feature flags.
4360 4362 */
4361 4363 cpr.cp_eax = 0x80000001;
4362 4364 (void) cpuid_insn(cp, &cpr);
4363 4365 ext_ecx = cpr.cp_ecx;
4364 4366 ext_edx = cpr.cp_edx;
4365 4367
4366 4368 (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
4367 4369
4368 4370 lxpr_uiobuf_printf(uiobuf,
4369 4371 "processor\t: %d\n"
4370 4372 "vendor_id\t: %s\n"
4371 4373 "cpu family\t: %d\n"
4372 4374 "model\t\t: %d\n"
4373 4375 "model name\t: %s\n"
4374 4376 "stepping\t: %d\n"
4375 4377 "cpu MHz\t\t: %u.%03u\n",
4376 4378 cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
4377 4379 cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
4378 4380 (uint32_t)(cpu_freq_hz / 1000000),
4379 4381 ((uint32_t)(cpu_freq_hz / 1000)) % 1000);
4380 4382
4381 4383 lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
4382 4384 getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
4383 4385
4384 4386 if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
4385 4387 /*
4386 4388 * 'siblings' is used for HT-style threads
4387 4389 */
4388 4390 lxpr_uiobuf_printf(uiobuf,
4389 4391 "physical id\t: %lu\n"
4390 4392 "siblings\t: %u\n",
4391 4393 pg_plat_hw_instance_id(cp, PGHW_CHIP),
4392 4394 cpuid_get_ncpu_per_chip(cp));
4393 4395 }
4394 4396
4395 4397 /*
4396 4398 * Since we're relatively picky about running on older hardware,
4397 4399 * we can be somewhat cavalier about the answers to these ones.
4398 4400 *
4399 4401 * In fact, given the hardware we support, we just say:
4400 4402 *
4401 4403 * fdiv_bug : no (if we're on a 64-bit kernel)
4402 4404 * hlt_bug : no
4403 4405 * f00f_bug : no
4404 4406 * coma_bug : no
4405 4407 * wp : yes (write protect in supervsr mode)
4406 4408 */
4407 4409 lxpr_uiobuf_printf(uiobuf,
4408 4410 "fdiv_bug\t: %s\n"
4409 4411 "hlt_bug \t: no\n"
4410 4412 "f00f_bug\t: no\n"
4411 4413 "coma_bug\t: no\n"
4412 4414 "fpu\t\t: %s\n"
4413 4415 "fpu_exception\t: %s\n"
4414 4416 "cpuid level\t: %d\n"
4415 4417 "flags\t\t:",
4416 4418 #if defined(__i386)
4417 4419 fpu_pentium_fdivbug ? "yes" : "no",
4418 4420 #else
4419 4421 "no",
4420 4422 #endif /* __i386 */
4421 4423 fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
4422 4424 maxeax);
4423 4425
4424 4426 for (bits = std_edx, fp = intc_edx, i = 0;
4425 4427 i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
4426 4428 if ((bits & (1 << i)) != 0 && *fp)
4427 4429 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4428 4430
4429 4431 /*
4430 4432 * name additional features where appropriate
4431 4433 */
4432 4434 switch (x86_vendor) {
4433 4435 case X86_VENDOR_Intel:
4434 4436 for (bits = ext_edx, fp = intc_x_edx, i = 0;
4435 4437 i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
4436 4438 fp++, i++)
4437 4439 if ((bits & (1 << i)) != 0 && *fp)
4438 4440 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4439 4441 break;
4440 4442
4441 4443 case X86_VENDOR_AMD:
4442 4444 for (bits = ext_edx, fp = amd_x_edx, i = 0;
4443 4445 i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
4444 4446 fp++, i++)
4445 4447 if ((bits & (1 << i)) != 0 && *fp)
4446 4448 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4447 4449
4448 4450 for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
4449 4451 i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
4450 4452 fp++, i++)
4451 4453 if ((bits & (1 << i)) != 0 && *fp)
4452 4454 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4453 4455 break;
4454 4456
4455 4457 case X86_VENDOR_TM:
4456 4458 for (bits = ext_edx, fp = tm_x_edx, i = 0;
4457 4459 i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
4458 4460 fp++, i++)
4459 4461 if ((bits & (1 << i)) != 0 && *fp)
4460 4462 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4461 4463 break;
4462 4464 default:
4463 4465 break;
4464 4466 }
4465 4467
4466 4468 for (bits = std_ecx, fp = intc_ecx, i = 0;
4467 4469 i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
4468 4470 if ((bits & (1 << i)) != 0 && *fp)
4469 4471 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4470 4472
4471 4473 lxpr_uiobuf_printf(uiobuf, "\n\n");
4472 4474
4473 4475 if (pools_enabled)
4474 4476 cp = cp->cpu_next_part;
4475 4477 else
4476 4478 cp = cp->cpu_next;
4477 4479 } while (cp != cpstart);
4478 4480
4479 4481 mutex_exit(&cpu_lock);
4480 4482 }
4481 4483
4482 4484 /* ARGSUSED */
4483 4485 static void
4484 4486 lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4485 4487 {
4486 4488 ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
4487 4489 lxpr_uiobuf_seterr(uiobuf, EFAULT);
4488 4490 }
4489 4491
4490 4492 /*
4491 4493 * Report a list of file systems loaded in the kernel. We only report the ones
4492 4494 * which we support and which may be checked by various components to see if
4493 4495 * they are loaded.
4494 4496 */
4495 4497 static void
4496 4498 lxpr_read_filesystems(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4497 4499 {
4498 4500 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "autofs");
4499 4501 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "cgroup");
4500 4502 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "nfs");
4501 4503 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "proc");
4502 4504 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "sysfs");
4503 4505 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "tmpfs");
4504 4506 }
4505 4507
4506 4508 /*
4507 4509 * lxpr_getattr(): Vnode operation for VOP_GETATTR()
4508 4510 */
4509 4511 static int
4510 4512 lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
4511 4513 caller_context_t *ct)
4512 4514 {
4513 4515 register lxpr_node_t *lxpnp = VTOLXP(vp);
4514 4516 lxpr_nodetype_t type = lxpnp->lxpr_type;
4515 4517 extern uint_t nproc;
4516 4518 int error;
4517 4519
4518 4520 /*
4519 4521 * Return attributes of underlying vnode if ATTR_REAL
4520 4522 *
4521 4523 * but keep fd files with the symlink permissions
4522 4524 */
4523 4525 if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
4524 4526 vnode_t *rvp = lxpnp->lxpr_realvp;
4525 4527
4526 4528 /*
4527 4529 * withold attribute information to owner or root
4528 4530 */
4529 4531 if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) {
4530 4532 return (error);
4531 4533 }
4532 4534
4533 4535 /*
4534 4536 * now its attributes
4535 4537 */
4536 4538 if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) {
4537 4539 return (error);
4538 4540 }
4539 4541
4540 4542 /*
4541 4543 * if it's a file in lx /proc/pid/fd/xx then set its
4542 4544 * mode and keep it looking like a symlink, fifo or socket
4543 4545 */
4544 4546 if (type == LXPR_PID_FD_FD) {
4545 4547 vap->va_mode = lxpnp->lxpr_mode;
4546 4548 vap->va_type = lxpnp->lxpr_realvp->v_type;
4547 4549 vap->va_size = 0;
4548 4550 vap->va_nlink = 1;
4549 4551 }
4550 4552 return (0);
4551 4553 }
4552 4554
4553 4555 /* Default attributes, that may be overridden below */
4554 4556 bzero(vap, sizeof (*vap));
4555 4557 vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
4556 4558 vap->va_nlink = 1;
4557 4559 vap->va_type = vp->v_type;
4558 4560 vap->va_mode = lxpnp->lxpr_mode;
4559 4561 vap->va_fsid = vp->v_vfsp->vfs_dev;
4560 4562 vap->va_blksize = DEV_BSIZE;
4561 4563 vap->va_uid = lxpnp->lxpr_uid;
4562 4564 vap->va_gid = lxpnp->lxpr_gid;
4563 4565 vap->va_nodeid = lxpnp->lxpr_ino;
4564 4566
4565 4567 switch (type) {
4566 4568 case LXPR_PROCDIR:
4567 4569 vap->va_nlink = nproc + 2 + PROCDIRFILES;
4568 4570 vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
4569 4571 break;
4570 4572 case LXPR_PIDDIR:
4571 4573 vap->va_nlink = PIDDIRFILES;
4572 4574 vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
4573 4575 break;
4574 4576 case LXPR_PID_TASK_IDDIR:
4575 4577 vap->va_nlink = TIDDIRFILES;
4576 4578 vap->va_size = TIDDIRFILES * LXPR_SDSIZE;
4577 4579 break;
4578 4580 case LXPR_SELF:
4579 4581 vap->va_uid = crgetruid(curproc->p_cred);
4580 4582 vap->va_gid = crgetrgid(curproc->p_cred);
4581 4583 break;
4582 4584 case LXPR_PID_FD_FD:
4583 4585 case LXPR_PID_TID_FD_FD:
4584 4586 /*
4585 4587 * Restore VLNK type for lstat-type activity.
4586 4588 * See lxpr_readlink for more details.
4587 4589 */
4588 4590 if ((flags & FOLLOW) == 0)
4589 4591 vap->va_type = VLNK;
4590 4592 default:
4591 4593 break;
4592 4594 }
4593 4595
4594 4596 vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
4595 4597 return (0);
4596 4598 }
4597 4599
4598 4600 /*
4599 4601 * lxpr_access(): Vnode operation for VOP_ACCESS()
4600 4602 */
4601 4603 static int
4602 4604 lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
4603 4605 {
4604 4606 lxpr_node_t *lxpnp = VTOLXP(vp);
4605 4607 lxpr_nodetype_t type = lxpnp->lxpr_type;
4606 4608 int shift = 0;
4607 4609 proc_t *tp;
4608 4610
4609 4611 /* lx /proc is a read only file system */
4610 4612 if (mode & VWRITE) {
4611 4613 switch (type) {
4612 4614 case LXPR_PID_OOM_SCR_ADJ:
4613 4615 case LXPR_PID_TID_OOM_SCR_ADJ:
4614 4616 case LXPR_SYS_KERNEL_COREPATT:
4615 4617 case LXPR_SYS_NET_CORE_SOMAXCON:
4616 4618 case LXPR_SYS_VM_OVERCOMMIT_MEM:
4617 4619 case LXPR_SYS_VM_SWAPPINESS:
4618 4620 case LXPR_PID_FD_FD:
4619 4621 case LXPR_PID_TID_FD_FD:
4620 4622 break;
4621 4623 default:
4622 4624 return (EROFS);
4623 4625 }
4624 4626 }
4625 4627
4626 4628 /*
4627 4629 * If this is a restricted file, check access permissions.
4628 4630 */
4629 4631 switch (type) {
4630 4632 case LXPR_PIDDIR:
4631 4633 return (0);
4632 4634 case LXPR_PID_CURDIR:
4633 4635 case LXPR_PID_ENV:
4634 4636 case LXPR_PID_EXE:
4635 4637 case LXPR_PID_LIMITS:
4636 4638 case LXPR_PID_MAPS:
4637 4639 case LXPR_PID_MEM:
4638 4640 case LXPR_PID_ROOTDIR:
4639 4641 case LXPR_PID_FDDIR:
4640 4642 case LXPR_PID_FD_FD:
4641 4643 case LXPR_PID_TID_FDDIR:
4642 4644 case LXPR_PID_TID_FD_FD:
4643 4645 if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL)
4644 4646 return (ENOENT);
4645 4647 if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
4646 4648 priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
4647 4649 lxpr_unlock(tp);
4648 4650 return (EACCES);
4649 4651 }
4650 4652 lxpr_unlock(tp);
4651 4653 default:
4652 4654 break;
4653 4655 }
4654 4656
4655 4657 if (lxpnp->lxpr_realvp != NULL) {
4656 4658 /*
4657 4659 * For these we use the underlying vnode's accessibility.
4658 4660 */
4659 4661 return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct));
4660 4662 }
4661 4663
4662 4664 /* If user is root allow access regardless of permission bits */
4663 4665 if (secpolicy_proc_access(cr) == 0)
4664 4666 return (0);
4665 4667
4666 4668 /*
4667 4669 * Access check is based on only one of owner, group, public. If not
4668 4670 * owner, then check group. If not a member of the group, then check
4669 4671 * public access.
4670 4672 */
4671 4673 if (crgetuid(cr) != lxpnp->lxpr_uid) {
4672 4674 shift += 3;
4673 4675 if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
4674 4676 shift += 3;
4675 4677 }
4676 4678
4677 4679 mode &= ~(lxpnp->lxpr_mode << shift);
4678 4680
4679 4681 if (mode == 0)
4680 4682 return (0);
4681 4683
4682 4684 return (EACCES);
4683 4685 }
4684 4686
4685 4687 /* ARGSUSED */
4686 4688 static vnode_t *
4687 4689 lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
4688 4690 {
4689 4691 return (NULL);
4690 4692 }
4691 4693
4692 4694 /*
4693 4695 * lxpr_lookup(): Vnode operation for VOP_LOOKUP()
4694 4696 */
4695 4697 /* ARGSUSED */
4696 4698 static int
4697 4699 lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
4698 4700 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
4699 4701 int *direntflags, pathname_t *realpnp)
4700 4702 {
4701 4703 lxpr_node_t *lxpnp = VTOLXP(dp);
4702 4704 lxpr_nodetype_t type = lxpnp->lxpr_type;
4703 4705 int error;
4704 4706
4705 4707 ASSERT(dp->v_type == VDIR);
4706 4708 ASSERT(type < LXPR_NFILES);
4707 4709
4708 4710 /*
4709 4711 * we should never get here because the lookup
4710 4712 * is done on the realvp for these nodes
4711 4713 */
4712 4714 ASSERT(type != LXPR_PID_FD_FD &&
4713 4715 type != LXPR_PID_CURDIR &&
4714 4716 type != LXPR_PID_ROOTDIR);
4715 4717
4716 4718 /*
4717 4719 * restrict lookup permission to owner or root
4718 4720 */
4719 4721 if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) {
4720 4722 return (error);
4721 4723 }
4722 4724
4723 4725 /*
4724 4726 * Just return the parent vnode if that's where we are trying to go.
4725 4727 */
4726 4728 if (strcmp(comp, "..") == 0) {
4727 4729 VN_HOLD(lxpnp->lxpr_parent);
4728 4730 *vpp = lxpnp->lxpr_parent;
4729 4731 return (0);
4730 4732 }
4731 4733
4732 4734 /*
4733 4735 * Special handling for directory searches. Note: null component name
4734 4736 * denotes that the current directory is being searched.
4735 4737 */
4736 4738 if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
4737 4739 VN_HOLD(dp);
4738 4740 *vpp = dp;
4739 4741 return (0);
4740 4742 }
4741 4743
4742 4744 *vpp = (lxpr_lookup_function[type](dp, comp));
4743 4745 return ((*vpp == NULL) ? ENOENT : 0);
4744 4746 }
4745 4747
4746 4748 /*
4747 4749 * Do a sequential search on the given directory table
4748 4750 */
4749 4751 static vnode_t *
4750 4752 lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
4751 4753 lxpr_dirent_t *dirtab, int dirtablen)
4752 4754 {
4753 4755 lxpr_node_t *lxpnp;
4754 4756 int count;
4755 4757
4756 4758 for (count = 0; count < dirtablen; count++) {
4757 4759 if (strcmp(dirtab[count].d_name, comp) == 0) {
4758 4760 lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
4759 4761 dp = LXPTOV(lxpnp);
4760 4762 ASSERT(dp != NULL);
4761 4763 return (dp);
4762 4764 }
4763 4765 }
4764 4766 return (NULL);
4765 4767 }
4766 4768
4767 4769 static vnode_t *
4768 4770 lxpr_lookup_piddir(vnode_t *dp, char *comp)
4769 4771 {
4770 4772 proc_t *p;
4771 4773
4772 4774 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
4773 4775
4774 4776 p = lxpr_lock(VTOLXP(dp)->lxpr_pid);
4775 4777 if (p == NULL)
4776 4778 return (NULL);
4777 4779
4778 4780 dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
4779 4781
4780 4782 lxpr_unlock(p);
4781 4783
4782 4784 return (dp);
4783 4785 }
4784 4786
4785 4787 /*
4786 4788 * Lookup one of the process's task ID's.
4787 4789 */
4788 4790 static vnode_t *
4789 4791 lxpr_lookup_taskdir(vnode_t *dp, char *comp)
4790 4792 {
4791 4793 lxpr_node_t *dlxpnp = VTOLXP(dp);
4792 4794 lxpr_node_t *lxpnp;
4793 4795 proc_t *p;
4794 4796 pid_t real_pid;
4795 4797 uint_t tid;
4796 4798 int c;
4797 4799 kthread_t *t;
4798 4800
4799 4801 ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASKDIR);
4800 4802
4801 4803 /*
4802 4804 * convert the string rendition of the filename to a thread ID
4803 4805 */
4804 4806 tid = 0;
4805 4807 while ((c = *comp++) != '\0') {
4806 4808 int otid;
4807 4809 if (c < '0' || c > '9')
4808 4810 return (NULL);
4809 4811
4810 4812 otid = tid;
4811 4813 tid = 10 * tid + c - '0';
4812 4814 /* integer overflow */
4813 4815 if (tid / 10 != otid)
4814 4816 return (NULL);
4815 4817 }
4816 4818
4817 4819 /*
4818 4820 * get the proc to work with and lock it
4819 4821 */
4820 4822 real_pid = get_real_pid(dlxpnp->lxpr_pid);
4821 4823 p = lxpr_lock(real_pid);
4822 4824 if ((p == NULL))
4823 4825 return (NULL);
4824 4826
4825 4827 /*
4826 4828 * If the process is a zombie or system process
4827 4829 * it can't have any threads.
4828 4830 */
4829 4831 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
4830 4832 lxpr_unlock(p);
4831 4833 return (NULL);
4832 4834 }
4833 4835
4834 4836 if (p->p_brand == &lx_brand) {
4835 4837 t = lxpr_get_thread(p, tid);
4836 4838 } else {
4837 4839 /*
4838 4840 * Only the main thread is visible for non-branded processes.
4839 4841 */
4840 4842 t = p->p_tlist;
4841 4843 if (tid != p->p_pid || t == NULL) {
4842 4844 t = NULL;
4843 4845 } else {
4844 4846 thread_lock(t);
4845 4847 }
4846 4848 }
4847 4849 if (t == NULL) {
4848 4850 lxpr_unlock(p);
4849 4851 return (NULL);
4850 4852 }
4851 4853 thread_unlock(t);
4852 4854
4853 4855 /*
4854 4856 * Allocate and fill in a new lx /proc taskid node.
4855 4857 * Instead of the last arg being a fd, it is a tid.
4856 4858 */
4857 4859 lxpnp = lxpr_getnode(dp, LXPR_PID_TASK_IDDIR, p, tid);
4858 4860 dp = LXPTOV(lxpnp);
4859 4861 ASSERT(dp != NULL);
4860 4862 lxpr_unlock(p);
4861 4863 return (dp);
4862 4864 }
4863 4865
4864 4866 /*
4865 4867 * Lookup one of the process's task ID's.
4866 4868 */
4867 4869 static vnode_t *
4868 4870 lxpr_lookup_task_tid_dir(vnode_t *dp, char *comp)
4869 4871 {
4870 4872 lxpr_node_t *dlxpnp = VTOLXP(dp);
4871 4873 lxpr_node_t *lxpnp;
4872 4874 proc_t *p;
4873 4875 pid_t real_pid;
4874 4876 kthread_t *t;
4875 4877 int i;
4876 4878
4877 4879 ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASK_IDDIR);
4878 4880
4879 4881 /*
4880 4882 * get the proc to work with and lock it
4881 4883 */
4882 4884 real_pid = get_real_pid(dlxpnp->lxpr_pid);
4883 4885 p = lxpr_lock(real_pid);
4884 4886 if ((p == NULL))
4885 4887 return (NULL);
4886 4888
4887 4889 /*
4888 4890 * If the process is a zombie or system process
4889 4891 * it can't have any threads.
4890 4892 */
4891 4893 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
4892 4894 lxpr_unlock(p);
4893 4895 return (NULL);
4894 4896 }
4895 4897
4896 4898 /* need to confirm tid is still there */
4897 4899 t = lxpr_get_thread(p, dlxpnp->lxpr_desc);
4898 4900 if (t == NULL) {
4899 4901 lxpr_unlock(p);
4900 4902 return (NULL);
4901 4903 }
4902 4904 thread_unlock(t);
4903 4905
4904 4906 /*
4905 4907 * allocate and fill in the new lx /proc taskid dir node
4906 4908 */
4907 4909 for (i = 0; i < TIDDIRFILES; i++) {
4908 4910 if (strcmp(tiddir[i].d_name, comp) == 0) {
4909 4911 lxpnp = lxpr_getnode(dp, tiddir[i].d_type, p,
4910 4912 dlxpnp->lxpr_desc);
4911 4913 dp = LXPTOV(lxpnp);
4912 4914 ASSERT(dp != NULL);
4913 4915 lxpr_unlock(p);
4914 4916 return (dp);
4915 4917 }
4916 4918 }
4917 4919
4918 4920 lxpr_unlock(p);
4919 4921 return (NULL);
4920 4922 }
4921 4923
4922 4924 /*
4923 4925 * Lookup one of the process's open files.
4924 4926 */
4925 4927 static vnode_t *
4926 4928 lxpr_lookup_fddir(vnode_t *dp, char *comp)
4927 4929 {
4928 4930 lxpr_node_t *dlxpnp = VTOLXP(dp);
4929 4931
4930 4932 ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR ||
4931 4933 dlxpnp->lxpr_type == LXPR_PID_TID_FDDIR);
4932 4934
4933 4935 return (lxpr_lookup_fdnode(dp, comp));
4934 4936 }
4935 4937
4936 4938 static vnode_t *
4937 4939 lxpr_lookup_netdir(vnode_t *dp, char *comp)
4938 4940 {
4939 4941 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
4940 4942
4941 4943 dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
4942 4944
4943 4945 return (dp);
4944 4946 }
4945 4947
4946 4948 static vnode_t *
4947 4949 lxpr_lookup_procdir(vnode_t *dp, char *comp)
4948 4950 {
4949 4951 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
4950 4952
4951 4953 /*
4952 4954 * We know all the names of files & dirs in our file system structure
4953 4955 * except those that are pid names. These change as pids are created/
4954 4956 * deleted etc., so we just look for a number as the first char to see
4955 4957 * if we are we doing pid lookups.
4956 4958 *
4957 4959 * Don't need to check for "self" as it is implemented as a symlink
4958 4960 */
4959 4961 if (*comp >= '0' && *comp <= '9') {
4960 4962 pid_t pid = 0;
4961 4963 lxpr_node_t *lxpnp = NULL;
4962 4964 proc_t *p;
4963 4965 int c;
4964 4966
4965 4967 while ((c = *comp++) != '\0')
4966 4968 pid = 10 * pid + c - '0';
4967 4969
4968 4970 /*
4969 4971 * Can't continue if the process is still loading or it doesn't
4970 4972 * really exist yet (or maybe it just died!)
4971 4973 */
4972 4974 p = lxpr_lock(pid);
4973 4975 if (p == NULL)
4974 4976 return (NULL);
4975 4977
4976 4978 if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
4977 4979 lxpr_unlock(p);
4978 4980 return (NULL);
4979 4981 }
4980 4982
4981 4983 /*
4982 4984 * allocate and fill in a new lx /proc node
4983 4985 */
4984 4986 lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
4985 4987
4986 4988 lxpr_unlock(p);
4987 4989
4988 4990 dp = LXPTOV(lxpnp);
4989 4991 ASSERT(dp != NULL);
4990 4992
4991 4993 return (dp);
4992 4994 }
4993 4995
4994 4996 /* Lookup fixed names */
4995 4997 return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES));
4996 4998 }
4997 4999
4998 5000 static vnode_t *
4999 5001 lxpr_lookup_sysdir(vnode_t *dp, char *comp)
5000 5002 {
5001 5003 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYSDIR);
5002 5004 return (lxpr_lookup_common(dp, comp, NULL, sysdir, SYSDIRFILES));
5003 5005 }
5004 5006
5005 5007 static vnode_t *
5006 5008 lxpr_lookup_sys_kerneldir(vnode_t *dp, char *comp)
5007 5009 {
5008 5010 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNELDIR);
5009 5011 return (lxpr_lookup_common(dp, comp, NULL, sys_kerneldir,
5010 5012 SYS_KERNELDIRFILES));
5011 5013 }
5012 5014
5013 5015 static vnode_t *
5014 5016 lxpr_lookup_sys_kdir_randdir(vnode_t *dp, char *comp)
5015 5017 {
5016 5018 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNEL_RANDDIR);
5017 5019 return (lxpr_lookup_common(dp, comp, NULL, sys_randdir,
5018 5020 SYS_RANDDIRFILES));
5019 5021 }
5020 5022
5021 5023 static vnode_t *
5022 5024 lxpr_lookup_sys_netdir(vnode_t *dp, char *comp)
5023 5025 {
5024 5026 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NETDIR);
5025 5027 return (lxpr_lookup_common(dp, comp, NULL, sys_netdir,
5026 5028 SYS_NETDIRFILES));
5027 5029 }
5028 5030
5029 5031 static vnode_t *
5030 5032 lxpr_lookup_sys_net_coredir(vnode_t *dp, char *comp)
5031 5033 {
5032 5034 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NET_COREDIR);
5033 5035 return (lxpr_lookup_common(dp, comp, NULL, sys_net_coredir,
5034 5036 SYS_NET_COREDIRFILES));
5035 5037 }
5036 5038
5037 5039 static vnode_t *
5038 5040 lxpr_lookup_sys_vmdir(vnode_t *dp, char *comp)
5039 5041 {
5040 5042 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_VMDIR);
5041 5043 return (lxpr_lookup_common(dp, comp, NULL, sys_vmdir,
5042 5044 SYS_VMDIRFILES));
5043 5045 }
5044 5046
5045 5047 static vnode_t *
5046 5048 lxpr_lookup_sys_fsdir(vnode_t *dp, char *comp)
5047 5049 {
5048 5050 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FSDIR);
5049 5051 return (lxpr_lookup_common(dp, comp, NULL, sys_fsdir,
5050 5052 SYS_FSDIRFILES));
5051 5053 }
5052 5054
5053 5055 static vnode_t *
5054 5056 lxpr_lookup_sys_fs_inotifydir(vnode_t *dp, char *comp)
5055 5057 {
5056 5058 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
5057 5059 return (lxpr_lookup_common(dp, comp, NULL, sys_fs_inotifydir,
5058 5060 SYS_FS_INOTIFYDIRFILES));
5059 5061 }
5060 5062
5061 5063 /*
5062 5064 * lxpr_readdir(): Vnode operation for VOP_READDIR()
5063 5065 */
5064 5066 /* ARGSUSED */
5065 5067 static int
5066 5068 lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp,
5067 5069 caller_context_t *ct, int flags)
5068 5070 {
5069 5071 lxpr_node_t *lxpnp = VTOLXP(dp);
5070 5072 lxpr_nodetype_t type = lxpnp->lxpr_type;
5071 5073 ssize_t uresid;
5072 5074 off_t uoffset;
5073 5075 int error;
5074 5076
5075 5077 ASSERT(dp->v_type == VDIR);
5076 5078 ASSERT(type < LXPR_NFILES);
5077 5079
5078 5080 /*
5079 5081 * we should never get here because the readdir
5080 5082 * is done on the realvp for these nodes
5081 5083 */
5082 5084 ASSERT(type != LXPR_PID_FD_FD &&
5083 5085 type != LXPR_PID_CURDIR &&
5084 5086 type != LXPR_PID_ROOTDIR);
5085 5087
5086 5088 /*
5087 5089 * restrict readdir permission to owner or root
5088 5090 */
5089 5091 if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0)
5090 5092 return (error);
5091 5093
5092 5094 uoffset = uiop->uio_offset;
5093 5095 uresid = uiop->uio_resid;
5094 5096
5095 5097 /* can't do negative reads */
5096 5098 if (uoffset < 0 || uresid <= 0)
5097 5099 return (EINVAL);
5098 5100
5099 5101 /* can't read directory entries that don't exist! */
5100 5102 if (uoffset % LXPR_SDSIZE)
5101 5103 return (ENOENT);
5102 5104
5103 5105 return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
5104 5106 }
5105 5107
5106 5108 /* ARGSUSED */
5107 5109 static int
5108 5110 lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5109 5111 {
5110 5112 return (ENOTDIR);
5111 5113 }
5112 5114
5113 5115 /*
5114 5116 * This has the common logic for returning directory entries
5115 5117 */
5116 5118 static int
5117 5119 lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
5118 5120 lxpr_dirent_t *dirtab, int dirtablen)
5119 5121 {
5120 5122 /* bp holds one dirent64 structure */
5121 5123 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5122 5124 dirent64_t *dirent = (dirent64_t *)bp;
5123 5125 ssize_t oresid; /* save a copy for testing later */
5124 5126 ssize_t uresid;
5125 5127
5126 5128 oresid = uiop->uio_resid;
5127 5129
5128 5130 /* clear out the dirent buffer */
5129 5131 bzero(bp, sizeof (bp));
5130 5132
5131 5133 /*
5132 5134 * Satisfy user request
5133 5135 */
5134 5136 while ((uresid = uiop->uio_resid) > 0) {
5135 5137 int dirindex;
5136 5138 off_t uoffset;
5137 5139 int reclen;
5138 5140 int error;
5139 5141
5140 5142 uoffset = uiop->uio_offset;
5141 5143 dirindex = (uoffset / LXPR_SDSIZE) - 2;
5142 5144
5143 5145 if (uoffset == 0) {
5144 5146
5145 5147 dirent->d_ino = lxpnp->lxpr_ino;
5146 5148 dirent->d_name[0] = '.';
5147 5149 dirent->d_name[1] = '\0';
5148 5150 reclen = DIRENT64_RECLEN(1);
5149 5151
5150 5152 } else if (uoffset == LXPR_SDSIZE) {
5151 5153
5152 5154 dirent->d_ino = lxpr_parentinode(lxpnp);
5153 5155 dirent->d_name[0] = '.';
5154 5156 dirent->d_name[1] = '.';
5155 5157 dirent->d_name[2] = '\0';
5156 5158 reclen = DIRENT64_RECLEN(2);
5157 5159
5158 5160 } else if (dirindex >= 0 && dirindex < dirtablen) {
5159 5161 int slen = strlen(dirtab[dirindex].d_name);
5160 5162
5161 5163 dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
5162 5164 lxpnp->lxpr_pid, 0);
5163 5165
5164 5166 VERIFY(slen < LXPNSIZ);
5165 5167 (void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
5166 5168 reclen = DIRENT64_RECLEN(slen);
5167 5169
5168 5170 } else {
5169 5171 /* Run out of table entries */
5170 5172 if (eofp) {
5171 5173 *eofp = 1;
5172 5174 }
5173 5175 return (0);
5174 5176 }
5175 5177
5176 5178 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5177 5179 dirent->d_reclen = (ushort_t)reclen;
5178 5180
5179 5181 /*
5180 5182 * if the size of the data to transfer is greater
5181 5183 * that that requested then we can't do it this transfer.
5182 5184 */
5183 5185 if (reclen > uresid) {
5184 5186 /*
5185 5187 * Error if no entries have been returned yet.
5186 5188 */
5187 5189 if (uresid == oresid) {
5188 5190 return (EINVAL);
5189 5191 }
5190 5192 break;
5191 5193 }
5192 5194
5193 5195 /*
5194 5196 * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5195 5197 * by the same amount. But we want uiop->uio_offset to change
5196 5198 * in increments of LXPR_SDSIZE, which is different from the
5197 5199 * number of bytes being returned to the user. So we set
5198 5200 * uiop->uio_offset separately, ignoring what uiomove() does.
5199 5201 */
5200 5202 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5201 5203 uiop)) != 0)
5202 5204 return (error);
5203 5205
5204 5206 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5205 5207 }
5206 5208
5207 5209 /* Have run out of space, but could have just done last table entry */
5208 5210 if (eofp) {
5209 5211 *eofp =
5210 5212 (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
5211 5213 }
5212 5214 return (0);
5213 5215 }
5214 5216
5215 5217
5216 5218 static int
5217 5219 lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5218 5220 {
5219 5221 /* bp holds one dirent64 structure */
5220 5222 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5221 5223 dirent64_t *dirent = (dirent64_t *)bp;
5222 5224 ssize_t oresid; /* save a copy for testing later */
5223 5225 ssize_t uresid;
5224 5226 off_t uoffset;
5225 5227 zoneid_t zoneid;
5226 5228 pid_t pid;
5227 5229 int error;
5228 5230 int ceof;
5229 5231
5230 5232 ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
5231 5233
5232 5234 oresid = uiop->uio_resid;
5233 5235 zoneid = LXPTOZ(lxpnp)->zone_id;
5234 5236
5235 5237 /*
5236 5238 * We return directory entries in the order: "." and ".." then the
5237 5239 * unique lxproc files, then the directories corresponding to the
5238 5240 * running processes. We have defined this as the ordering because
5239 5241 * it allows us to more easily keep track of where we are betwen calls
5240 5242 * to getdents(). If the number of processes changes between calls
5241 5243 * then we can't lose track of where we are in the lxproc files.
5242 5244 */
5243 5245
5244 5246 /* Do the fixed entries */
5245 5247 error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir,
5246 5248 PROCDIRFILES);
5247 5249
5248 5250 /* Finished if we got an error or if we couldn't do all the table */
5249 5251 if (error != 0 || ceof == 0)
5250 5252 return (error);
5251 5253
5252 5254 /* clear out the dirent buffer */
5253 5255 bzero(bp, sizeof (bp));
5254 5256
5255 5257 /* Do the process entries */
5256 5258 while ((uresid = uiop->uio_resid) > 0) {
5257 5259 proc_t *p;
5258 5260 int len;
5259 5261 int reclen;
5260 5262 int i;
5261 5263
5262 5264 uoffset = uiop->uio_offset;
5263 5265
5264 5266 /*
5265 5267 * Stop when entire proc table has been examined.
5266 5268 */
5267 5269 i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
5268 5270 if (i < 0 || i >= v.v_proc) {
5269 5271 /* Run out of table entries */
5270 5272 if (eofp) {
5271 5273 *eofp = 1;
5272 5274 }
5273 5275 return (0);
5274 5276 }
5275 5277 mutex_enter(&pidlock);
5276 5278
5277 5279 /*
5278 5280 * Skip indices for which there is no pid_entry, PIDs for
5279 5281 * which there is no corresponding process, a PID of 0,
5280 5282 * and anything the security policy doesn't allow
5281 5283 * us to look at.
5282 5284 */
5283 5285 if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
5284 5286 p->p_pid == 0 ||
5285 5287 secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
5286 5288 mutex_exit(&pidlock);
5287 5289 goto next;
5288 5290 }
5289 5291 mutex_exit(&pidlock);
5290 5292
5291 5293 /*
5292 5294 * Convert pid to the Linux default of 1 if we're the zone's
5293 5295 * init process, or 0 if zsched, otherwise use the value from
5294 5296 * the proc structure
5295 5297 */
5296 5298 if (p->p_pid == curproc->p_zone->zone_proc_initpid) {
5297 5299 pid = 1;
5298 5300 } else if (p->p_pid == curproc->p_zone->zone_zsched->p_pid) {
5299 5301 pid = 0;
5300 5302 } else {
5301 5303 pid = p->p_pid;
5302 5304 }
5303 5305
5304 5306 /*
5305 5307 * If this /proc was mounted in the global zone, view
5306 5308 * all procs; otherwise, only view zone member procs.
5307 5309 */
5308 5310 if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
5309 5311 goto next;
5310 5312 }
5311 5313
5312 5314 ASSERT(p->p_stat != 0);
5313 5315
5314 5316 dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
5315 5317 len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
5316 5318 ASSERT(len < LXPNSIZ);
5317 5319 reclen = DIRENT64_RECLEN(len);
5318 5320
5319 5321 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5320 5322 dirent->d_reclen = (ushort_t)reclen;
5321 5323
5322 5324 /*
5323 5325 * if the size of the data to transfer is greater
5324 5326 * that that requested then we can't do it this transfer.
5325 5327 */
5326 5328 if (reclen > uresid) {
5327 5329 /*
5328 5330 * Error if no entries have been returned yet.
5329 5331 */
5330 5332 if (uresid == oresid)
5331 5333 return (EINVAL);
5332 5334 break;
5333 5335 }
5334 5336
5335 5337 /*
5336 5338 * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5337 5339 * by the same amount. But we want uiop->uio_offset to change
5338 5340 * in increments of LXPR_SDSIZE, which is different from the
5339 5341 * number of bytes being returned to the user. So we set
5340 5342 * uiop->uio_offset separately, in the increment of this for
5341 5343 * the loop, ignoring what uiomove() does.
5342 5344 */
5343 5345 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5344 5346 uiop)) != 0)
5345 5347 return (error);
5346 5348 next:
5347 5349 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5348 5350 }
5349 5351
5350 5352 if (eofp != NULL) {
5351 5353 *eofp = (uiop->uio_offset >=
5352 5354 ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
5353 5355 }
5354 5356
5355 5357 return (0);
5356 5358 }
5357 5359
5358 5360 static int
5359 5361 lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5360 5362 {
5361 5363 proc_t *p;
5362 5364 pid_t find_pid;
5363 5365
5364 5366 ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
5365 5367
5366 5368 /* can't read its contents if it died */
5367 5369 mutex_enter(&pidlock);
5368 5370
5369 5371 if (lxpnp->lxpr_pid == 1) {
5370 5372 find_pid = curproc->p_zone->zone_proc_initpid;
5371 5373 } else if (lxpnp->lxpr_pid == 0) {
5372 5374 find_pid = curproc->p_zone->zone_zsched->p_pid;
5373 5375 } else {
5374 5376 find_pid = lxpnp->lxpr_pid;
5375 5377 }
5376 5378 p = prfind(find_pid);
5377 5379
5378 5380 if (p == NULL || p->p_stat == SIDL) {
5379 5381 mutex_exit(&pidlock);
5380 5382 return (ENOENT);
5381 5383 }
5382 5384 mutex_exit(&pidlock);
5383 5385
5384 5386 return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
5385 5387 }
5386 5388
5387 5389 static int
5388 5390 lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5389 5391 {
5390 5392 ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
5391 5393 return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
5392 5394 }
5393 5395
5394 5396 static int
5395 5397 lxpr_readdir_taskdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5396 5398 {
5397 5399 /* bp holds one dirent64 structure */
5398 5400 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5399 5401 dirent64_t *dirent = (dirent64_t *)bp;
5400 5402 ssize_t oresid; /* save a copy for testing later */
5401 5403 ssize_t uresid;
5402 5404 off_t uoffset;
5403 5405 int error;
5404 5406 int ceof;
5405 5407 proc_t *p;
5406 5408 int tiddirsize = -1;
5407 5409 int tasknum;
5408 5410 pid_t real_pid;
5409 5411 kthread_t *t;
5410 5412 boolean_t branded = B_FALSE;
5411 5413
5412 5414 ASSERT(lxpnp->lxpr_type == LXPR_PID_TASKDIR);
5413 5415
5414 5416 oresid = uiop->uio_resid;
5415 5417
5416 5418 real_pid = get_real_pid(lxpnp->lxpr_pid);
5417 5419 p = lxpr_lock(real_pid);
5418 5420
5419 5421 /* can't read its contents if it died */
5420 5422 if (p == NULL) {
5421 5423 return (ENOENT);
5422 5424 }
5423 5425 if (p->p_stat == SIDL) {
5424 5426 lxpr_unlock(p);
5425 5427 return (ENOENT);
5426 5428 }
5427 5429
5428 5430 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
5429 5431 tiddirsize = 0;
5430 5432
5431 5433 branded = (p->p_brand == &lx_brand);
5432 5434 /*
5433 5435 * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
5434 5436 * going away while we iterate over its threads.
5435 5437 */
5436 5438 mutex_exit(&p->p_lock);
5437 5439
5438 5440 if (tiddirsize == -1)
5439 5441 tiddirsize = p->p_lwpcnt;
5440 5442
5441 5443 /* Do the fixed entries (in this case just "." & "..") */
5442 5444 error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
5443 5445
5444 5446 /* Finished if we got an error or if we couldn't do all the table */
5445 5447 if (error != 0 || ceof == 0)
5446 5448 goto out;
5447 5449
5448 5450 if ((t = p->p_tlist) == NULL) {
5449 5451 if (eofp != NULL)
5450 5452 *eofp = 1;
5451 5453 goto out;
5452 5454 }
5453 5455
5454 5456 /* clear out the dirent buffer */
5455 5457 bzero(bp, sizeof (bp));
5456 5458
5457 5459 /*
5458 5460 * Loop until user's request is satisfied or until all thread's have
5459 5461 * been returned.
5460 5462 */
5461 5463 for (tasknum = 0; (uresid = uiop->uio_resid) > 0; tasknum++) {
5462 5464 int i;
5463 5465 int reclen;
5464 5466 int len;
5465 5467 uint_t emul_tid;
5466 5468 lx_lwp_data_t *lwpd;
5467 5469
5468 5470 uoffset = uiop->uio_offset;
5469 5471
5470 5472 /*
5471 5473 * Stop at the end of the thread list
5472 5474 */
5473 5475 i = (uoffset / LXPR_SDSIZE) - 2;
5474 5476 if (i < 0 || i >= tiddirsize) {
5475 5477 if (eofp) {
5476 5478 *eofp = 1;
5477 5479 }
5478 5480 goto out;
5479 5481 }
5480 5482
5481 5483 if (i != tasknum)
5482 5484 goto next;
5483 5485
5484 5486 if (!branded) {
5485 5487 /*
5486 5488 * Emulating the goofy linux task model is impossible
5487 5489 * to do for native processes. We can compromise by
5488 5490 * presenting only the main thread to the consumer.
5489 5491 */
5490 5492 emul_tid = p->p_pid;
5491 5493 } else {
5492 5494 if ((lwpd = ttolxlwp(t)) == NULL) {
5493 5495 goto next;
5494 5496 }
5495 5497 emul_tid = lwpd->br_pid;
5496 5498 /*
5497 5499 * Convert pid to Linux default of 1 if we're the
5498 5500 * zone's init.
5499 5501 */
5500 5502 if (emul_tid == curproc->p_zone->zone_proc_initpid)
5501 5503 emul_tid = 1;
5502 5504 }
5503 5505
5504 5506 dirent->d_ino = lxpr_inode(LXPR_PID_TASK_IDDIR, lxpnp->lxpr_pid,
5505 5507 emul_tid);
5506 5508 len = snprintf(dirent->d_name, LXPNSIZ, "%d", emul_tid);
5507 5509 ASSERT(len < LXPNSIZ);
5508 5510 reclen = DIRENT64_RECLEN(len);
5509 5511
5510 5512 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5511 5513 dirent->d_reclen = (ushort_t)reclen;
5512 5514
5513 5515 if (reclen > uresid) {
5514 5516 /*
5515 5517 * Error if no entries have been returned yet.
5516 5518 */
5517 5519 if (uresid == oresid)
5518 5520 error = EINVAL;
5519 5521 goto out;
5520 5522 }
5521 5523
5522 5524 /*
5523 5525 * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5524 5526 * by the same amount. But we want uiop->uio_offset to change
5525 5527 * in increments of LXPR_SDSIZE, which is different from the
5526 5528 * number of bytes being returned to the user. So we set
5527 5529 * uiop->uio_offset separately, in the increment of this for
5528 5530 * the loop, ignoring what uiomove() does.
5529 5531 */
5530 5532 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5531 5533 uiop)) != 0)
5532 5534 goto out;
5533 5535
5534 5536 next:
5535 5537 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5536 5538
5537 5539 if ((t = t->t_forw) == p->p_tlist || !branded) {
5538 5540 if (eofp != NULL)
5539 5541 *eofp = 1;
5540 5542 goto out;
5541 5543 }
5542 5544 }
5543 5545
5544 5546 if (eofp != NULL)
5545 5547 *eofp = 0;
5546 5548
5547 5549 out:
5548 5550 mutex_enter(&p->p_lock);
5549 5551 lxpr_unlock(p);
5550 5552 return (error);
5551 5553 }
5552 5554
5553 5555 static int
5554 5556 lxpr_readdir_task_tid_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5555 5557 {
5556 5558 proc_t *p;
5557 5559 pid_t real_pid;
5558 5560 kthread_t *t;
5559 5561
5560 5562 ASSERT(lxpnp->lxpr_type == LXPR_PID_TASK_IDDIR);
5561 5563
5562 5564 mutex_enter(&pidlock);
5563 5565
5564 5566 real_pid = get_real_pid(lxpnp->lxpr_pid);
5565 5567 p = prfind(real_pid);
5566 5568
5567 5569 /* can't read its contents if it died */
5568 5570 if (p == NULL || p->p_stat == SIDL) {
5569 5571 mutex_exit(&pidlock);
5570 5572 return (ENOENT);
5571 5573 }
5572 5574
5573 5575 mutex_exit(&pidlock);
5574 5576
5575 5577 /* need to confirm tid is still there */
5576 5578 t = lxpr_get_thread(p, lxpnp->lxpr_desc);
5577 5579 if (t == NULL) {
5578 5580 /* we can't find this specific thread */
5579 5581 return (NULL);
5580 5582 }
5581 5583 thread_unlock(t);
5582 5584
5583 5585 return (lxpr_readdir_common(lxpnp, uiop, eofp, tiddir, TIDDIRFILES));
5584 5586 }
5585 5587
5586 5588 static int
5587 5589 lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5588 5590 {
5589 5591 /* bp holds one dirent64 structure */
5590 5592 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5591 5593 dirent64_t *dirent = (dirent64_t *)bp;
5592 5594 ssize_t oresid; /* save a copy for testing later */
5593 5595 ssize_t uresid;
5594 5596 off_t uoffset;
5595 5597 int error;
5596 5598 int ceof;
5597 5599 proc_t *p;
5598 5600 int fddirsize = -1;
5599 5601 uf_info_t *fip;
5600 5602
5601 5603 ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR ||
5602 5604 lxpnp->lxpr_type == LXPR_PID_TID_FDDIR);
5603 5605
5604 5606 oresid = uiop->uio_resid;
5605 5607
5606 5608 /* can't read its contents if it died */
5607 5609 p = lxpr_lock(lxpnp->lxpr_pid);
5608 5610 if (p == NULL)
5609 5611 return (ENOENT);
5610 5612
5611 5613 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
5612 5614 fddirsize = 0;
5613 5615
5614 5616 /*
5615 5617 * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
5616 5618 * going away while we iterate over its fi_list.
5617 5619 */
5618 5620 mutex_exit(&p->p_lock);
5619 5621
5620 5622 /* Get open file info */
5621 5623 fip = (&(p)->p_user.u_finfo);
5622 5624 mutex_enter(&fip->fi_lock);
5623 5625
5624 5626 if (fddirsize == -1)
5625 5627 fddirsize = fip->fi_nfiles;
5626 5628
5627 5629 /* Do the fixed entries (in this case just "." & "..") */
5628 5630 error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
5629 5631
5630 5632 /* Finished if we got an error or if we couldn't do all the table */
5631 5633 if (error != 0 || ceof == 0)
5632 5634 goto out;
5633 5635
5634 5636 /* clear out the dirent buffer */
5635 5637 bzero(bp, sizeof (bp));
5636 5638
5637 5639 /*
5638 5640 * Loop until user's request is satisfied or until
5639 5641 * all file descriptors have been examined.
5640 5642 */
5641 5643 for (; (uresid = uiop->uio_resid) > 0;
5642 5644 uiop->uio_offset = uoffset + LXPR_SDSIZE) {
5643 5645 int reclen;
5644 5646 int fd;
5645 5647 int len;
5646 5648
5647 5649 uoffset = uiop->uio_offset;
5648 5650
5649 5651 /*
5650 5652 * Stop at the end of the fd list
5651 5653 */
5652 5654 fd = (uoffset / LXPR_SDSIZE) - 2;
5653 5655 if (fd < 0 || fd >= fddirsize) {
5654 5656 if (eofp) {
5655 5657 *eofp = 1;
5656 5658 }
5657 5659 goto out;
5658 5660 }
5659 5661
5660 5662 if (fip->fi_list[fd].uf_file == NULL)
5661 5663 continue;
5662 5664
5663 5665 dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
5664 5666 len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
5665 5667 ASSERT(len < LXPNSIZ);
5666 5668 reclen = DIRENT64_RECLEN(len);
5667 5669
5668 5670 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5669 5671 dirent->d_reclen = (ushort_t)reclen;
5670 5672
5671 5673 if (reclen > uresid) {
5672 5674 /*
5673 5675 * Error if no entries have been returned yet.
5674 5676 */
5675 5677 if (uresid == oresid)
5676 5678 error = EINVAL;
5677 5679 goto out;
5678 5680 }
5679 5681
5680 5682 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5681 5683 uiop)) != 0)
5682 5684 goto out;
5683 5685 }
5684 5686
5685 5687 if (eofp != NULL) {
5686 5688 *eofp =
5687 5689 (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
5688 5690 }
5689 5691
5690 5692 out:
5691 5693 mutex_exit(&fip->fi_lock);
5692 5694 mutex_enter(&p->p_lock);
5693 5695 lxpr_unlock(p);
5694 5696 return (error);
5695 5697 }
5696 5698
5697 5699 static int
5698 5700 lxpr_readdir_sysdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5699 5701 {
5700 5702 ASSERT(lxpnp->lxpr_type == LXPR_SYSDIR);
5701 5703 return (lxpr_readdir_common(lxpnp, uiop, eofp, sysdir, SYSDIRFILES));
5702 5704 }
5703 5705
5704 5706 static int
5705 5707 lxpr_readdir_sys_fsdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5706 5708 {
5707 5709 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FSDIR);
5708 5710 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fsdir,
5709 5711 SYS_FSDIRFILES));
5710 5712 }
5711 5713
5712 5714 static int
5713 5715 lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5714 5716 {
5715 5717 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
5716 5718 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fs_inotifydir,
5717 5719 SYS_FS_INOTIFYDIRFILES));
5718 5720 }
5719 5721
5720 5722 static int
5721 5723 lxpr_readdir_sys_kerneldir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5722 5724 {
5723 5725 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNELDIR);
5724 5726 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_kerneldir,
5725 5727 SYS_KERNELDIRFILES));
5726 5728 }
5727 5729
5728 5730 static int
5729 5731 lxpr_readdir_sys_kdir_randdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5730 5732 {
5731 5733 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RANDDIR);
5732 5734 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_randdir,
5733 5735 SYS_RANDDIRFILES));
5734 5736 }
5735 5737
5736 5738 static int
5737 5739 lxpr_readdir_sys_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5738 5740 {
5739 5741 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NETDIR);
5740 5742 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_netdir,
5741 5743 SYS_NETDIRFILES));
5742 5744 }
5743 5745
5744 5746 static int
5745 5747 lxpr_readdir_sys_net_coredir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5746 5748 {
5747 5749 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_COREDIR);
5748 5750 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_net_coredir,
5749 5751 SYS_NET_COREDIRFILES));
5750 5752 }
5751 5753
5752 5754 static int
5753 5755 lxpr_readdir_sys_vmdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5754 5756 {
5755 5757 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VMDIR);
5756 5758 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_vmdir,
5757 5759 SYS_VMDIRFILES));
5758 5760 }
5759 5761
5760 5762 static int
5761 5763 lxpr_write_sys_net_core_somaxc(lxpr_node_t *lxpnp, struct uio *uio,
5762 5764 struct cred *cr, caller_context_t *ct)
5763 5765 {
5764 5766 int error;
5765 5767 int res = 0;
5766 5768 size_t olen;
5767 5769 char val[16]; /* big enough for a uint numeric string */
5768 5770 netstack_t *ns;
5769 5771 mod_prop_info_t *ptbl = NULL;
5770 5772 mod_prop_info_t *pinfo = NULL;
5771 5773
5772 5774 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON);
5773 5775
5774 5776 if (uio->uio_loffset != 0)
5775 5777 return (EINVAL);
5776 5778
5777 5779 if (uio->uio_resid == 0)
5778 5780 return (0);
5779 5781
5780 5782 olen = uio->uio_resid;
5781 5783 if (olen > sizeof (val) - 1)
5782 5784 return (EINVAL);
5783 5785
5784 5786 bzero(val, sizeof (val));
5785 5787 error = uiomove(val, olen, UIO_WRITE, uio);
5786 5788 if (error != 0)
5787 5789 return (error);
5788 5790
5789 5791 if (val[olen - 1] == '\n')
5790 5792 val[olen - 1] = '\0';
5791 5793
5792 5794 if (val[0] == '\0') /* no input */
5793 5795 return (EINVAL);
5794 5796
5795 5797 ns = netstack_get_current();
5796 5798 if (ns == NULL)
5797 5799 return (EINVAL);
5798 5800
5799 5801 ptbl = ns->netstack_tcp->tcps_propinfo_tbl;
5800 5802 pinfo = mod_prop_lookup(ptbl, "_conn_req_max_q", MOD_PROTO_TCP);
5801 5803 if (pinfo == NULL || pinfo->mpi_setf(ns, cr, pinfo, NULL, val, 0) != 0)
5802 5804 res = EINVAL;
5803 5805
5804 5806 netstack_rele(ns);
5805 5807 return (res);
5806 5808 }
5807 5809
5808 5810 /* ARGSUSED */
5809 5811 static int
5810 5812 lxpr_write_sys_kernel_corepatt(lxpr_node_t *lxpnp, struct uio *uio,
5811 5813 struct cred *cr, caller_context_t *ct)
5812 5814 {
5813 5815 zone_t *zone = curproc->p_zone;
5814 5816 struct core_globals *cg;
5815 5817 refstr_t *rp, *nrp;
5816 5818 corectl_path_t *ccp;
5817 5819 char val[MAXPATHLEN];
5818 5820 char valtr[MAXPATHLEN];
5819 5821 size_t olen;
5820 5822 int error;
5821 5823
5822 5824 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT);
5823 5825
5824 5826 cg = zone_getspecific(core_zone_key, zone);
5825 5827 ASSERT(cg != NULL);
5826 5828
5827 5829 if (secpolicy_coreadm(cr) != 0)
5828 5830 return (EPERM);
5829 5831
5830 5832 if (uio->uio_loffset != 0)
5831 5833 return (EINVAL);
5832 5834
5833 5835 if (uio->uio_resid == 0)
5834 5836 return (0);
5835 5837
5836 5838 olen = uio->uio_resid;
5837 5839 if (olen > sizeof (val) - 1)
5838 5840 return (EINVAL);
5839 5841
5840 5842 bzero(val, sizeof (val));
5841 5843 error = uiomove(val, olen, UIO_WRITE, uio);
5842 5844 if (error != 0)
5843 5845 return (error);
5844 5846
5845 5847 if (val[olen - 1] == '\n')
5846 5848 val[olen - 1] = '\0';
5847 5849
5848 5850 if (val[0] == '|')
5849 5851 return (EINVAL);
5850 5852
5851 5853 if ((error = lxpr_core_path_l2s(val, valtr, sizeof (valtr))) != 0)
5852 5854 return (error);
5853 5855
5854 5856 nrp = refstr_alloc(valtr);
5855 5857
5856 5858 ccp = cg->core_default_path;
5857 5859 mutex_enter(&ccp->ccp_mtx);
5858 5860 rp = ccp->ccp_path;
5859 5861 refstr_hold((ccp->ccp_path = nrp));
5860 5862 cg->core_options |= CC_PROCESS_PATH;
5861 5863 mutex_exit(&ccp->ccp_mtx);
5862 5864
5863 5865 if (rp != NULL)
5864 5866 refstr_rele(rp);
5865 5867
5866 5868 return (0);
5867 5869 }
5868 5870
5869 5871 /*
5870 5872 * lxpr_readlink(): Vnode operation for VOP_READLINK()
5871 5873 */
5872 5874 /* ARGSUSED */
5873 5875 static int
5874 5876 lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
5875 5877 {
5876 5878 char bp[MAXPATHLEN + 1];
5877 5879 size_t buflen = sizeof (bp);
5878 5880 lxpr_node_t *lxpnp = VTOLXP(vp);
5879 5881 vnode_t *rvp = lxpnp->lxpr_realvp;
5880 5882 pid_t pid;
5881 5883 int error = 0;
5882 5884
5883 5885 /*
5884 5886 * Linux does something very "clever" for /proc/<pid>/fd/<num> entries.
5885 5887 * Open FDs are represented as symlinks, the link contents
5886 5888 * corresponding to the open resource. For plain files or devices,
5887 5889 * this isn't absurd since one can dereference the symlink to query
5888 5890 * the underlying resource. For sockets or pipes, it becomes ugly in a
5889 5891 * hurry. To maintain this human-readable output, those FD symlinks
5890 5892 * point to bogus targets such as "socket:[<inodenum>]". This requires
5891 5893 * circumventing vfs since the stat/lstat behavior on those FD entries
5892 5894 * will be unusual. (A stat must retrieve information about the open
5893 5895 * socket or pipe. It cannot fail because the link contents point to
5894 5896 * an absent file.)
5895 5897 *
5896 5898 * To accomplish this, lxpr_getnode returns an vnode typed VNON for FD
5897 5899 * entries. This bypasses code paths which would normally
5898 5900 * short-circuit on symlinks and allows us to emulate the vfs behavior
5899 5901 * expected by /proc consumers.
5900 5902 */
5901 5903 if (vp->v_type != VLNK && lxpnp->lxpr_type != LXPR_PID_FD_FD)
5902 5904 return (EINVAL);
5903 5905
5904 5906 /* Try to produce a symlink name for anything that has a realvp */
5905 5907 if (rvp != NULL) {
5906 5908 if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0)
5907 5909 return (error);
5908 5910 if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) {
5909 5911 /*
5910 5912 * Special handling possible for /proc/<pid>/fd/<num>
5911 5913 * Generate <type>:[<inode>] links, if allowed.
5912 5914 */
5913 5915 if (lxpnp->lxpr_type != LXPR_PID_FD_FD ||
5914 5916 lxpr_readlink_fdnode(lxpnp, bp, buflen) != 0) {
5915 5917 return (error);
5916 5918 }
5917 5919 }
5918 5920 } else {
5919 5921 switch (lxpnp->lxpr_type) {
5920 5922 case LXPR_SELF:
5921 5923 /*
5922 5924 * Convert pid to the Linux default of 1 if we're the
5923 5925 * zone's init process or 0 if zsched.
5924 5926 */
5925 5927 if (curproc->p_pid ==
5926 5928 curproc->p_zone->zone_proc_initpid) {
5927 5929 pid = 1;
5928 5930 } else if (curproc->p_pid ==
5929 5931 curproc->p_zone->zone_zsched->p_pid) {
5930 5932 pid = 0;
5931 5933 } else {
5932 5934 pid = curproc->p_pid;
5933 5935 }
5934 5936
5935 5937 /*
5936 5938 * Don't need to check result as every possible int
5937 5939 * will fit within MAXPATHLEN bytes.
5938 5940 */
5939 5941 (void) snprintf(bp, buflen, "%d", pid);
5940 5942 break;
5941 5943 case LXPR_PID_CURDIR:
5942 5944 case LXPR_PID_ROOTDIR:
5943 5945 case LXPR_PID_EXE:
5944 5946 return (EACCES);
5945 5947 default:
5946 5948 /*
5947 5949 * Need to return error so that nothing thinks
5948 5950 * that the symlink is empty and hence "."
5949 5951 */
5950 5952 return (EINVAL);
5951 5953 }
5952 5954 }
5953 5955
5954 5956 /* copy the link data to user space */
5955 5957 return (uiomove(bp, strlen(bp), UIO_READ, uiop));
5956 5958 }
5957 5959
5958 5960
5959 5961 /*
5960 5962 * lxpr_inactive(): Vnode operation for VOP_INACTIVE()
5961 5963 * Vnode is no longer referenced, deallocate the file
5962 5964 * and all its resources.
5963 5965 */
5964 5966 /* ARGSUSED */
5965 5967 static void
5966 5968 lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
5967 5969 {
5968 5970 lxpr_freenode(VTOLXP(vp));
5969 5971 }
5970 5972
5971 5973 /*
5972 5974 * lxpr_sync(): Vnode operation for VOP_SYNC()
5973 5975 */
5974 5976 static int
5975 5977 lxpr_sync()
5976 5978 {
5977 5979 /*
5978 5980 * Nothing to sync but this function must never fail
5979 5981 */
5980 5982 return (0);
5981 5983 }
5982 5984
5983 5985 /*
5984 5986 * lxpr_cmp(): Vnode operation for VOP_CMP()
5985 5987 */
5986 5988 static int
5987 5989 lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
5988 5990 {
5989 5991 vnode_t *rvp;
5990 5992
5991 5993 while (vn_matchops(vp1, lxpr_vnodeops) &&
5992 5994 (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) {
5993 5995 vp1 = rvp;
5994 5996 }
5995 5997
5996 5998 while (vn_matchops(vp2, lxpr_vnodeops) &&
5997 5999 (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) {
5998 6000 vp2 = rvp;
5999 6001 }
6000 6002
6001 6003 if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
6002 6004 return (vp1 == vp2);
6003 6005 return (VOP_CMP(vp1, vp2, ct));
6004 6006 }
6005 6007
6006 6008 /*
6007 6009 * lxpr_realvp(): Vnode operation for VOP_REALVP()
6008 6010 */
6009 6011 static int
6010 6012 lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
6011 6013 {
6012 6014 vnode_t *rvp;
6013 6015
6014 6016 if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
6015 6017 vp = rvp;
6016 6018 if (VOP_REALVP(vp, &rvp, ct) == 0)
6017 6019 vp = rvp;
6018 6020 }
6019 6021
6020 6022 *vpp = vp;
6021 6023 return (0);
6022 6024 }
6023 6025
6024 6026 static int
6025 6027 lxpr_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
6026 6028 caller_context_t *ct)
6027 6029 {
6028 6030 lxpr_node_t *lxpnp = VTOLXP(vp);
6029 6031 lxpr_nodetype_t type = lxpnp->lxpr_type;
6030 6032
6031 6033 switch (type) {
6032 6034 case LXPR_SYS_KERNEL_COREPATT:
6033 6035 return (lxpr_write_sys_kernel_corepatt(lxpnp, uiop, cr, ct));
6034 6036 case LXPR_SYS_NET_CORE_SOMAXCON:
6035 6037 return (lxpr_write_sys_net_core_somaxc(lxpnp, uiop, cr, ct));
6036 6038
6037 6039 default:
6038 6040 /* pretend we wrote the whole thing */
6039 6041 uiop->uio_offset += uiop->uio_resid;
6040 6042 uiop->uio_resid = 0;
6041 6043 return (0);
6042 6044 }
6043 6045 }
6044 6046
6045 6047 /*
6046 6048 * We need to allow open with O_CREAT for the oom_score_adj file.
6047 6049 */
6048 6050 /*ARGSUSED7*/
6049 6051 static int
6050 6052 lxpr_create(struct vnode *dvp, char *nm, struct vattr *vap,
6051 6053 enum vcexcl exclusive, int mode, struct vnode **vpp, struct cred *cred,
6052 6054 int flag, caller_context_t *ct, vsecattr_t *vsecp)
6053 6055 {
6054 6056 lxpr_node_t *lxpnp = VTOLXP(dvp);
6055 6057 lxpr_nodetype_t type = lxpnp->lxpr_type;
6056 6058 vnode_t *vp = NULL;
6057 6059 int error;
6058 6060
6059 6061 ASSERT(type < LXPR_NFILES);
6060 6062
6061 6063 /*
6062 6064 * restrict create permission to owner or root
6063 6065 */
6064 6066 if ((error = lxpr_access(dvp, VEXEC, 0, cred, ct)) != 0) {
6065 6067 return (error);
6066 6068 }
6067 6069
6068 6070 if (*nm == '\0')
6069 6071 return (EPERM);
6070 6072
6071 6073 if (dvp->v_type != VDIR)
6072 6074 return (EPERM);
6073 6075
6074 6076 if (exclusive == EXCL)
6075 6077 return (EEXIST);
6076 6078
6077 6079 /*
6078 6080 * We're currently restricting O_CREAT to:
6079 6081 * - /proc/<pid>/fd/<num>
6080 6082 * - /proc/<pid>/oom_score_adj
6081 6083 * - /proc/<pid>/task/<tid>/fd/<num>
6082 6084 * - /proc/<pid>/task/<tid>/oom_score_adj
6083 6085 * - /proc/sys/kernel/core_pattern
6084 6086 * - /proc/sys/net/core/somaxconn
6085 6087 * - /proc/sys/vm/overcommit_memory
6086 6088 * - /proc/sys/vm/swappiness
6087 6089 */
6088 6090 switch (type) {
6089 6091 case LXPR_PIDDIR:
6090 6092 case LXPR_PID_TASK_IDDIR:
6091 6093 if (strcmp(nm, "oom_score_adj") == 0) {
6092 6094 proc_t *p;
6093 6095 p = lxpr_lock(lxpnp->lxpr_pid);
6094 6096 if (p != NULL) {
6095 6097 vp = lxpr_lookup_common(dvp, nm, p, piddir,
6096 6098 PIDDIRFILES);
6097 6099 }
6098 6100 lxpr_unlock(p);
6099 6101 }
6100 6102 break;
6101 6103
6102 6104 case LXPR_SYS_NET_COREDIR:
6103 6105 if (strcmp(nm, "somaxconn") == 0) {
6104 6106 vp = lxpr_lookup_common(dvp, nm, NULL, sys_net_coredir,
6105 6107 SYS_NET_COREDIRFILES);
6106 6108 }
6107 6109 break;
6108 6110
6109 6111 case LXPR_SYS_KERNELDIR:
6110 6112 if (strcmp(nm, "core_pattern") == 0) {
6111 6113 vp = lxpr_lookup_common(dvp, nm, NULL, sys_kerneldir,
6112 6114 SYS_KERNELDIRFILES);
6113 6115 }
6114 6116 break;
6115 6117
6116 6118 case LXPR_SYS_VMDIR:
6117 6119 if (strcmp(nm, "overcommit_memory") == 0 ||
6118 6120 strcmp(nm, "swappiness") == 0) {
6119 6121 vp = lxpr_lookup_common(dvp, nm, NULL, sys_vmdir,
6120 6122 SYS_VMDIRFILES);
6121 6123 }
6122 6124 break;
6123 6125
6124 6126 case LXPR_PID_FDDIR:
6125 6127 case LXPR_PID_TID_FDDIR:
6126 6128 vp = lxpr_lookup_fdnode(dvp, nm);
6127 6129 break;
6128 6130
6129 6131 default:
6130 6132 vp = NULL;
6131 6133 break;
6132 6134 }
6133 6135
6134 6136 if (vp != NULL) {
6135 6137 /* Creating an existing file, allow it for regular files. */
6136 6138 if (vp->v_type == VDIR)
6137 6139 return (EISDIR);
6138 6140
6139 6141 /* confirm permissions against existing file */
6140 6142 if ((error = lxpr_access(vp, mode, 0, cred, ct)) != 0) {
6141 6143 VN_RELE(vp);
6142 6144 return (error);
6143 6145 }
6144 6146
6145 6147 *vpp = vp;
6146 6148 return (0);
6147 6149 }
6148 6150
6149 6151 /*
6150 6152 * Linux proc does not allow creation of addition, non-subsystem
6151 6153 * specific files inside the hierarchy. ENOENT is tossed when such
6152 6154 * actions are attempted.
6153 6155 */
6154 6156 return (ENOENT);
6155 6157 }
↓ open down ↓ |
2226 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX