Print this page
kernel panic in lxpr_read_pid_env
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
+++ new/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 * Copyright 2016 Joyent, Inc.
25 25 */
26 26
27 27 /*
28 28 * lx_proc -- a Linux-compatible /proc for the LX brand
29 29 *
30 30 * We have -- confusingly -- two implementations of Linux /proc. One is to
31 31 * support native (but Linux-borne) programs that wish to view the native
32 32 * system through the Linux /proc model; the other -- this one -- is to
33 33 * support Linux binaries via the LX brand. These two implementations differ
34 34 * greatly in their aspirations (and their willingness to bend the truth
35 35 * of the system to accommodate those aspirations); they should not be unified.
36 36 */
37 37
38 38 #include <sys/cpupart.h>
39 39 #include <sys/cpuvar.h>
40 40 #include <sys/session.h>
41 41 #include <sys/vmparam.h>
42 42 #include <sys/mman.h>
43 43 #include <vm/rm.h>
44 44 #include <vm/seg_vn.h>
45 45 #include <sys/sdt.h>
46 46 #include <lx_signum.h>
47 47 #include <sys/strlog.h>
48 48 #include <sys/stropts.h>
49 49 #include <sys/cmn_err.h>
50 50 #include <sys/lx_brand.h>
51 51 #include <lx_auxv.h>
52 52 #include <sys/x86_archext.h>
53 53 #include <sys/archsystm.h>
54 54 #include <sys/fp.h>
55 55 #include <sys/pool_pset.h>
56 56 #include <sys/pset.h>
57 57 #include <sys/zone.h>
58 58 #include <sys/pghw.h>
59 59 #include <sys/vfs_opreg.h>
60 60 #include <sys/param.h>
61 61 #include <sys/utsname.h>
62 62 #include <sys/rctl.h>
63 63 #include <sys/kstat.h>
64 64 #include <sys/lx_misc.h>
65 65 #include <sys/brand.h>
66 66 #include <sys/cred_impl.h>
67 67 #include <sys/tihdr.h>
68 68 #include <sys/corectl.h>
69 69 #include <inet/ip.h>
70 70 #include <inet/ip_ire.h>
71 71 #include <inet/ip6.h>
72 72 #include <inet/ip_if.h>
73 73 #include <inet/tcp.h>
74 74 #include <inet/tcp_impl.h>
75 75 #include <inet/udp_impl.h>
76 76 #include <inet/ipclassifier.h>
77 77 #include <sys/socketvar.h>
78 78 #include <fs/sockfs/socktpi.h>
79 79
80 80 /* Dependent on procfs */
81 81 extern kthread_t *prchoose(proc_t *);
82 82 extern int prreadargv(proc_t *, char *, size_t, size_t *);
83 83 extern int prreadenvv(proc_t *, char *, size_t, size_t *);
84 84 extern int prreadbuf(proc_t *, uintptr_t, uint8_t *, size_t, size_t *);
85 85
86 86 #include "lx_proc.h"
87 87
88 88 extern pgcnt_t swapfs_minfree;
89 89 extern time_t boot_time;
90 90
91 91 /*
92 92 * Pointer to the vnode ops vector for this fs.
93 93 * This is instantiated in lxprinit() in lxpr_vfsops.c
94 94 */
95 95 vnodeops_t *lxpr_vnodeops;
96 96
97 97 static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *);
98 98 static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *,
99 99 caller_context_t *);
100 100 static int lxpr_create(struct vnode *, char *, struct vattr *, enum vcexcl,
101 101 int, struct vnode **, struct cred *, int, caller_context_t *, vsecattr_t *);
102 102 static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
103 103 static int lxpr_write(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
104 104 static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *,
105 105 caller_context_t *);
106 106 static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *);
107 107 static int lxpr_lookup(vnode_t *, char *, vnode_t **,
108 108 pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *,
109 109 pathname_t *);
110 110 static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *,
111 111 caller_context_t *, int);
112 112 static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
113 113 static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *);
114 114 static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *);
115 115 static int lxpr_sync(void);
116 116 static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *);
117 117
118 118 static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
119 119 static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
120 120 static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
121 121 static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
122 122 static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
123 123 static vnode_t *lxpr_lookup_sysdir(vnode_t *, char *);
124 124 static vnode_t *lxpr_lookup_sys_fsdir(vnode_t *, char *);
125 125 static vnode_t *lxpr_lookup_sys_fs_inotifydir(vnode_t *, char *);
126 126 static vnode_t *lxpr_lookup_sys_kerneldir(vnode_t *, char *);
127 127 static vnode_t *lxpr_lookup_sys_kdir_randdir(vnode_t *, char *);
128 128 static vnode_t *lxpr_lookup_sys_netdir(vnode_t *, char *);
129 129 static vnode_t *lxpr_lookup_sys_net_coredir(vnode_t *, char *);
130 130 static vnode_t *lxpr_lookup_sys_vmdir(vnode_t *, char *);
131 131 static vnode_t *lxpr_lookup_taskdir(vnode_t *, char *);
132 132 static vnode_t *lxpr_lookup_task_tid_dir(vnode_t *, char *);
133 133
134 134 static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
135 135 static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
136 136 static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
137 137 static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
138 138 static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
139 139 static int lxpr_readdir_sysdir(lxpr_node_t *, uio_t *, int *);
140 140 static int lxpr_readdir_sys_fsdir(lxpr_node_t *, uio_t *, int *);
141 141 static int lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *, uio_t *, int *);
142 142 static int lxpr_readdir_sys_kerneldir(lxpr_node_t *, uio_t *, int *);
143 143 static int lxpr_readdir_sys_kdir_randdir(lxpr_node_t *, uio_t *, int *);
144 144 static int lxpr_readdir_sys_netdir(lxpr_node_t *, uio_t *, int *);
145 145 static int lxpr_readdir_sys_net_coredir(lxpr_node_t *, uio_t *, int *);
146 146 static int lxpr_readdir_sys_vmdir(lxpr_node_t *, uio_t *, int *);
147 147 static int lxpr_readdir_taskdir(lxpr_node_t *, uio_t *, int *);
148 148 static int lxpr_readdir_task_tid_dir(lxpr_node_t *, uio_t *, int *);
149 149
150 150 static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
151 151 static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
152 152 static void lxpr_read_cgroups(lxpr_node_t *, lxpr_uiobuf_t *);
153 153 static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
154 154 static void lxpr_read_diskstats(lxpr_node_t *, lxpr_uiobuf_t *);
155 155 static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
156 156 static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
157 157 static void lxpr_read_filesystems(lxpr_node_t *, lxpr_uiobuf_t *);
158 158 static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *, ldi_handle_t);
159 159 static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
160 160 static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
161 161 static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
162 162 static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
163 163 static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
164 164 static void lxpr_read_swaps(lxpr_node_t *, lxpr_uiobuf_t *);
165 165 static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
166 166 static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
167 167
168 168 static void lxpr_read_pid_auxv(lxpr_node_t *, lxpr_uiobuf_t *);
169 169 static void lxpr_read_pid_cgroup(lxpr_node_t *, lxpr_uiobuf_t *);
170 170 static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
171 171 static void lxpr_read_pid_comm(lxpr_node_t *, lxpr_uiobuf_t *);
172 172 static void lxpr_read_pid_env(lxpr_node_t *, lxpr_uiobuf_t *);
173 173 static void lxpr_read_pid_limits(lxpr_node_t *, lxpr_uiobuf_t *);
174 174 static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
175 175 static void lxpr_read_pid_mountinfo(lxpr_node_t *, lxpr_uiobuf_t *);
176 176 static void lxpr_read_pid_oom_scr_adj(lxpr_node_t *, lxpr_uiobuf_t *);
177 177 static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
178 178 static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
179 179 static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
180 180
181 181 static void lxpr_read_pid_tid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
182 182 static void lxpr_read_pid_tid_status(lxpr_node_t *, lxpr_uiobuf_t *);
183 183
184 184 static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
185 185 static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
186 186 static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
187 187 static void lxpr_read_net_if_inet6(lxpr_node_t *, lxpr_uiobuf_t *);
188 188 static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
189 189 static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
190 190 static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
191 191 static void lxpr_read_net_ipv6_route(lxpr_node_t *, lxpr_uiobuf_t *);
192 192 static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
193 193 static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
194 194 static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
195 195 static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
196 196 static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
197 197 static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
198 198 static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
199 199 static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
200 200 static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
201 201 static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
202 202 static void lxpr_read_net_tcp6(lxpr_node_t *, lxpr_uiobuf_t *);
203 203 static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
204 204 static void lxpr_read_net_udp6(lxpr_node_t *, lxpr_uiobuf_t *);
205 205 static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
206 206 static void lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *,
207 207 lxpr_uiobuf_t *);
208 208 static void lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *,
209 209 lxpr_uiobuf_t *);
210 210 static void lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *,
211 211 lxpr_uiobuf_t *);
212 212 static void lxpr_read_sys_kernel_caplcap(lxpr_node_t *, lxpr_uiobuf_t *);
213 213 static void lxpr_read_sys_kernel_corepatt(lxpr_node_t *, lxpr_uiobuf_t *);
214 214 static void lxpr_read_sys_kernel_hostname(lxpr_node_t *, lxpr_uiobuf_t *);
215 215 static void lxpr_read_sys_kernel_msgmni(lxpr_node_t *, lxpr_uiobuf_t *);
216 216 static void lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *, lxpr_uiobuf_t *);
217 217 static void lxpr_read_sys_kernel_osrel(lxpr_node_t *, lxpr_uiobuf_t *);
218 218 static void lxpr_read_sys_kernel_pid_max(lxpr_node_t *, lxpr_uiobuf_t *);
219 219 static void lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *, lxpr_uiobuf_t *);
220 220 static void lxpr_read_sys_kernel_sem(lxpr_node_t *, lxpr_uiobuf_t *);
221 221 static void lxpr_read_sys_kernel_shmmax(lxpr_node_t *, lxpr_uiobuf_t *);
222 222 static void lxpr_read_sys_kernel_shmmni(lxpr_node_t *, lxpr_uiobuf_t *);
223 223 static void lxpr_read_sys_kernel_threads_max(lxpr_node_t *, lxpr_uiobuf_t *);
224 224 static void lxpr_read_sys_net_core_somaxc(lxpr_node_t *, lxpr_uiobuf_t *);
225 225 static void lxpr_read_sys_vm_minfr_kb(lxpr_node_t *, lxpr_uiobuf_t *);
226 226 static void lxpr_read_sys_vm_nhpages(lxpr_node_t *, lxpr_uiobuf_t *);
227 227 static void lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *, lxpr_uiobuf_t *);
228 228 static void lxpr_read_sys_vm_swappiness(lxpr_node_t *, lxpr_uiobuf_t *);
229 229
230 230 static int lxpr_write_sys_net_core_somaxc(lxpr_node_t *, uio_t *, cred_t *,
231 231 caller_context_t *);
232 232 static int lxpr_write_sys_kernel_corepatt(lxpr_node_t *, uio_t *, cred_t *,
233 233 caller_context_t *);
234 234
235 235 /*
236 236 * Simple conversion
237 237 */
238 238 #define btok(x) ((x) >> 10) /* bytes to kbytes */
239 239 #define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */
240 240
241 241 #define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t))
242 242
243 243 extern rctl_hndl_t rc_process_semmsl;
244 244 extern rctl_hndl_t rc_process_semopm;
245 245 extern rctl_hndl_t rc_zone_semmni;
246 246
247 247 extern rctl_hndl_t rc_zone_msgmni;
248 248 extern rctl_hndl_t rc_zone_shmmax;
249 249 extern rctl_hndl_t rc_zone_shmmni;
250 250 #define FOURGB 4294967295
251 251
252 252 /*
253 253 * The maximum length of the concatenation of argument vector strings we
254 254 * will return to the user via the branded procfs. Likewise for the env vector.
255 255 */
256 256 int lxpr_maxargvlen = 4096;
257 257 int lxpr_maxenvvlen = 4096;
258 258
259 259 /*
260 260 * The lx /proc vnode operations vector
261 261 */
262 262 const fs_operation_def_t lxpr_vnodeops_template[] = {
263 263 VOPNAME_OPEN, { .vop_open = lxpr_open },
264 264 VOPNAME_CLOSE, { .vop_close = lxpr_close },
265 265 VOPNAME_READ, { .vop_read = lxpr_read },
266 266 VOPNAME_WRITE, { .vop_read = lxpr_write },
267 267 VOPNAME_GETATTR, { .vop_getattr = lxpr_getattr },
268 268 VOPNAME_ACCESS, { .vop_access = lxpr_access },
269 269 VOPNAME_LOOKUP, { .vop_lookup = lxpr_lookup },
270 270 VOPNAME_CREATE, { .vop_create = lxpr_create },
271 271 VOPNAME_READDIR, { .vop_readdir = lxpr_readdir },
272 272 VOPNAME_READLINK, { .vop_readlink = lxpr_readlink },
273 273 VOPNAME_FSYNC, { .error = lxpr_sync },
274 274 VOPNAME_SEEK, { .error = lxpr_sync },
275 275 VOPNAME_INACTIVE, { .vop_inactive = lxpr_inactive },
276 276 VOPNAME_CMP, { .vop_cmp = lxpr_cmp },
277 277 VOPNAME_REALVP, { .vop_realvp = lxpr_realvp },
278 278 NULL, NULL
279 279 };
280 280
281 281
282 282 /*
283 283 * file contents of an lx /proc directory.
284 284 */
285 285 static lxpr_dirent_t lx_procdir[] = {
286 286 { LXPR_CGROUPS, "cgroups" },
287 287 { LXPR_CMDLINE, "cmdline" },
288 288 { LXPR_CPUINFO, "cpuinfo" },
289 289 { LXPR_DEVICES, "devices" },
290 290 { LXPR_DISKSTATS, "diskstats" },
291 291 { LXPR_DMA, "dma" },
292 292 { LXPR_FILESYSTEMS, "filesystems" },
293 293 { LXPR_INTERRUPTS, "interrupts" },
294 294 { LXPR_IOPORTS, "ioports" },
295 295 { LXPR_KCORE, "kcore" },
296 296 { LXPR_KMSG, "kmsg" },
297 297 { LXPR_LOADAVG, "loadavg" },
298 298 { LXPR_MEMINFO, "meminfo" },
299 299 { LXPR_MODULES, "modules" },
300 300 { LXPR_MOUNTS, "mounts" },
301 301 { LXPR_NETDIR, "net" },
302 302 { LXPR_PARTITIONS, "partitions" },
303 303 { LXPR_SELF, "self" },
304 304 { LXPR_STAT, "stat" },
305 305 { LXPR_SWAPS, "swaps" },
306 306 { LXPR_SYSDIR, "sys" },
307 307 { LXPR_UPTIME, "uptime" },
308 308 { LXPR_VERSION, "version" }
309 309 };
310 310
311 311 #define PROCDIRFILES (sizeof (lx_procdir) / sizeof (lx_procdir[0]))
312 312
313 313 /*
314 314 * Contents of an lx /proc/<pid> directory.
315 315 */
316 316 static lxpr_dirent_t piddir[] = {
317 317 { LXPR_PID_AUXV, "auxv" },
318 318 { LXPR_PID_CGROUP, "cgroup" },
319 319 { LXPR_PID_CMDLINE, "cmdline" },
320 320 { LXPR_PID_COMM, "comm" },
321 321 { LXPR_PID_CPU, "cpu" },
322 322 { LXPR_PID_CURDIR, "cwd" },
323 323 { LXPR_PID_ENV, "environ" },
324 324 { LXPR_PID_EXE, "exe" },
325 325 { LXPR_PID_LIMITS, "limits" },
326 326 { LXPR_PID_MAPS, "maps" },
327 327 { LXPR_PID_MEM, "mem" },
328 328 { LXPR_PID_MOUNTINFO, "mountinfo" },
329 329 { LXPR_PID_OOM_SCR_ADJ, "oom_score_adj" },
330 330 { LXPR_PID_ROOTDIR, "root" },
331 331 { LXPR_PID_STAT, "stat" },
332 332 { LXPR_PID_STATM, "statm" },
333 333 { LXPR_PID_STATUS, "status" },
334 334 { LXPR_PID_TASKDIR, "task" },
335 335 { LXPR_PID_FDDIR, "fd" }
336 336 };
337 337
338 338 #define PIDDIRFILES (sizeof (piddir) / sizeof (piddir[0]))
339 339
340 340 /*
341 341 * Contents of an lx /proc/<pid>/task/<tid> directory.
342 342 */
343 343 static lxpr_dirent_t tiddir[] = {
344 344 { LXPR_PID_TID_AUXV, "auxv" },
345 345 { LXPR_PID_CGROUP, "cgroup" },
346 346 { LXPR_PID_CMDLINE, "cmdline" },
347 347 { LXPR_PID_TID_COMM, "comm" },
348 348 { LXPR_PID_CPU, "cpu" },
349 349 { LXPR_PID_CURDIR, "cwd" },
350 350 { LXPR_PID_ENV, "environ" },
351 351 { LXPR_PID_EXE, "exe" },
352 352 { LXPR_PID_LIMITS, "limits" },
353 353 { LXPR_PID_MAPS, "maps" },
354 354 { LXPR_PID_MEM, "mem" },
355 355 { LXPR_PID_MOUNTINFO, "mountinfo" },
356 356 { LXPR_PID_TID_OOM_SCR_ADJ, "oom_score_adj" },
357 357 { LXPR_PID_ROOTDIR, "root" },
358 358 { LXPR_PID_TID_STAT, "stat" },
359 359 { LXPR_PID_STATM, "statm" },
360 360 { LXPR_PID_TID_STATUS, "status" },
361 361 { LXPR_PID_FDDIR, "fd" }
362 362 };
363 363
364 364 #define TIDDIRFILES (sizeof (tiddir) / sizeof (tiddir[0]))
365 365
366 366 #define LX_RLIM_INFINITY 0xFFFFFFFFFFFFFFFF
367 367
368 368 #define RCTL_INFINITE(x) \
369 369 ((x->rcv_flagaction & RCTL_LOCAL_MAXIMAL) && \
370 370 (x->rcv_flagaction & RCTL_GLOBAL_INFINITE))
371 371
372 372 typedef struct lxpr_rlimtab {
373 373 char *rlim_name; /* limit name */
374 374 char *rlim_unit; /* limit unit */
375 375 char *rlim_rctl; /* rctl source */
376 376 } lxpr_rlimtab_t;
377 377
378 378 static lxpr_rlimtab_t lxpr_rlimtab[] = {
379 379 { "Max cpu time", "seconds", "process.max-cpu-time" },
380 380 { "Max file size", "bytes", "process.max-file-size" },
381 381 { "Max data size", "bytes", "process.max-data-size" },
382 382 { "Max stack size", "bytes", "process.max-stack-size" },
383 383 { "Max core file size", "bytes", "process.max-core-size" },
384 384 { "Max resident set", "bytes", "zone.max-physical-memory" },
385 385 { "Max processes", "processes", "zone.max-lwps" },
386 386 { "Max open files", "files", "process.max-file-descriptor" },
387 387 { "Max locked memory", "bytes", "zone.max-locked-memory" },
388 388 { "Max address space", "bytes", "process.max-address-space" },
389 389 { "Max file locks", "locks", NULL },
390 390 { "Max pending signals", "signals",
391 391 "process.max-sigqueue-size" },
392 392 { "Max msgqueue size", "bytes", "process.max-msg-messages" },
393 393 { NULL, NULL, NULL }
394 394 };
395 395
396 396
397 397 /*
398 398 * contents of lx /proc/net directory
399 399 */
400 400 static lxpr_dirent_t netdir[] = {
401 401 { LXPR_NET_ARP, "arp" },
402 402 { LXPR_NET_DEV, "dev" },
403 403 { LXPR_NET_DEV_MCAST, "dev_mcast" },
404 404 { LXPR_NET_IF_INET6, "if_inet6" },
405 405 { LXPR_NET_IGMP, "igmp" },
406 406 { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" },
407 407 { LXPR_NET_IP_MR_VIF, "ip_mr_vif" },
408 408 { LXPR_NET_IPV6_ROUTE, "ipv6_route" },
409 409 { LXPR_NET_MCFILTER, "mcfilter" },
410 410 { LXPR_NET_NETSTAT, "netstat" },
411 411 { LXPR_NET_RAW, "raw" },
412 412 { LXPR_NET_ROUTE, "route" },
413 413 { LXPR_NET_RPC, "rpc" },
414 414 { LXPR_NET_RT_CACHE, "rt_cache" },
415 415 { LXPR_NET_SOCKSTAT, "sockstat" },
416 416 { LXPR_NET_SNMP, "snmp" },
417 417 { LXPR_NET_STAT, "stat" },
418 418 { LXPR_NET_TCP, "tcp" },
419 419 { LXPR_NET_TCP6, "tcp6" },
420 420 { LXPR_NET_UDP, "udp" },
421 421 { LXPR_NET_UDP6, "udp6" },
422 422 { LXPR_NET_UNIX, "unix" }
423 423 };
424 424
425 425 #define NETDIRFILES (sizeof (netdir) / sizeof (netdir[0]))
426 426
427 427 /*
428 428 * contents of /proc/sys directory
429 429 */
430 430 static lxpr_dirent_t sysdir[] = {
431 431 { LXPR_SYS_FSDIR, "fs" },
432 432 { LXPR_SYS_KERNELDIR, "kernel" },
433 433 { LXPR_SYS_NETDIR, "net" },
434 434 { LXPR_SYS_VMDIR, "vm" },
435 435 };
436 436
437 437 #define SYSDIRFILES (sizeof (sysdir) / sizeof (sysdir[0]))
438 438
439 439 /*
440 440 * contents of /proc/sys/fs directory
441 441 */
442 442 static lxpr_dirent_t sys_fsdir[] = {
443 443 { LXPR_SYS_FS_INOTIFYDIR, "inotify" },
444 444 };
445 445
446 446 #define SYS_FSDIRFILES (sizeof (sys_fsdir) / sizeof (sys_fsdir[0]))
447 447
448 448 /*
449 449 * contents of /proc/sys/fs/inotify directory
450 450 */
451 451 static lxpr_dirent_t sys_fs_inotifydir[] = {
452 452 { LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS, "max_queued_events" },
453 453 { LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES, "max_user_instances" },
454 454 { LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES, "max_user_watches" },
455 455 };
456 456
457 457 #define SYS_FS_INOTIFYDIRFILES \
458 458 (sizeof (sys_fs_inotifydir) / sizeof (sys_fs_inotifydir[0]))
459 459
460 460 /*
461 461 * contents of /proc/sys/kernel directory
462 462 */
463 463 static lxpr_dirent_t sys_kerneldir[] = {
464 464 { LXPR_SYS_KERNEL_CAPLCAP, "cap_last_cap" },
465 465 { LXPR_SYS_KERNEL_COREPATT, "core_pattern" },
466 466 { LXPR_SYS_KERNEL_HOSTNAME, "hostname" },
467 467 { LXPR_SYS_KERNEL_MSGMNI, "msgmni" },
468 468 { LXPR_SYS_KERNEL_NGROUPS_MAX, "ngroups_max" },
469 469 { LXPR_SYS_KERNEL_OSREL, "osrelease" },
470 470 { LXPR_SYS_KERNEL_PID_MAX, "pid_max" },
471 471 { LXPR_SYS_KERNEL_RANDDIR, "random" },
472 472 { LXPR_SYS_KERNEL_SEM, "sem" },
473 473 { LXPR_SYS_KERNEL_SHMMAX, "shmmax" },
474 474 { LXPR_SYS_KERNEL_SHMMNI, "shmmni" },
475 475 { LXPR_SYS_KERNEL_THREADS_MAX, "threads-max" },
476 476 };
477 477
478 478 #define SYS_KERNELDIRFILES (sizeof (sys_kerneldir) / sizeof (sys_kerneldir[0]))
479 479
480 480 /*
481 481 * contents of /proc/sys/kernel/random directory
482 482 */
483 483 static lxpr_dirent_t sys_randdir[] = {
484 484 { LXPR_SYS_KERNEL_RAND_BOOTID, "boot_id" },
485 485 };
486 486
487 487 #define SYS_RANDDIRFILES (sizeof (sys_randdir) / sizeof (sys_randdir[0]))
488 488
489 489 /*
490 490 * contents of /proc/sys/net directory
491 491 */
492 492 static lxpr_dirent_t sys_netdir[] = {
493 493 { LXPR_SYS_NET_COREDIR, "core" },
494 494 };
495 495
496 496 #define SYS_NETDIRFILES (sizeof (sys_netdir) / sizeof (sys_netdir[0]))
497 497
498 498 /*
499 499 * contents of /proc/sys/net/core directory
500 500 */
501 501 static lxpr_dirent_t sys_net_coredir[] = {
502 502 { LXPR_SYS_NET_CORE_SOMAXCON, "somaxconn" },
503 503 };
504 504
505 505 #define SYS_NET_COREDIRFILES \
506 506 (sizeof (sys_net_coredir) / sizeof (sys_net_coredir[0]))
507 507
508 508 /*
509 509 * contents of /proc/sys/vm directory
510 510 */
511 511 static lxpr_dirent_t sys_vmdir[] = {
512 512 { LXPR_SYS_VM_MINFR_KB, "min_free_kbytes" },
513 513 { LXPR_SYS_VM_NHUGEP, "nr_hugepages" },
514 514 { LXPR_SYS_VM_OVERCOMMIT_MEM, "overcommit_memory" },
515 515 { LXPR_SYS_VM_SWAPPINESS, "swappiness" },
516 516 };
517 517
518 518 #define SYS_VMDIRFILES (sizeof (sys_vmdir) / sizeof (sys_vmdir[0]))
519 519
520 520 /*
521 521 * lxpr_open(): Vnode operation for VOP_OPEN()
522 522 */
523 523 static int
524 524 lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
525 525 {
526 526 vnode_t *vp = *vpp;
527 527 lxpr_node_t *lxpnp = VTOLXP(vp);
528 528 lxpr_nodetype_t type = lxpnp->lxpr_type;
529 529 vnode_t *rvp;
530 530 int error = 0;
531 531
532 532 if (flag & FWRITE) {
533 533 /* Restrict writes to certain files */
534 534 switch (type) {
535 535 case LXPR_PID_OOM_SCR_ADJ:
536 536 case LXPR_PID_TID_OOM_SCR_ADJ:
537 537 case LXPR_SYS_KERNEL_COREPATT:
538 538 case LXPR_SYS_NET_CORE_SOMAXCON:
539 539 case LXPR_SYS_VM_OVERCOMMIT_MEM:
540 540 case LXPR_SYS_VM_SWAPPINESS:
541 541 case LXPR_PID_FD_FD:
542 542 case LXPR_PID_TID_FD_FD:
543 543 break;
544 544 default:
545 545 return (EPERM);
546 546 }
547 547 }
548 548
549 549 /*
550 550 * If we are opening an underlying file only allow regular files,
551 551 * fifos or sockets; reject the open for anything else.
552 552 * Just do it if we are opening the current or root directory.
553 553 */
554 554 if (lxpnp->lxpr_realvp != NULL) {
555 555 rvp = lxpnp->lxpr_realvp;
556 556
557 557 if (type == LXPR_PID_FD_FD && rvp->v_type != VREG &&
558 558 rvp->v_type != VFIFO && rvp->v_type != VSOCK) {
559 559 error = EACCES;
560 560 } else {
561 561 if (type == LXPR_PID_FD_FD && rvp->v_type == VFIFO) {
562 562 /*
563 563 * This flag lets the fifo open know that
564 564 * we're using proc/fd to open a fd which we
565 565 * already have open. Otherwise, the fifo might
566 566 * reject an open if the other end has closed.
567 567 */
568 568 flag |= FKLYR;
569 569 }
570 570 /*
571 571 * Need to hold rvp since VOP_OPEN() may release it.
572 572 */
573 573 VN_HOLD(rvp);
574 574 error = VOP_OPEN(&rvp, flag, cr, ct);
575 575 if (error) {
576 576 VN_RELE(rvp);
577 577 } else {
578 578 *vpp = rvp;
579 579 VN_RELE(vp);
580 580 }
581 581 }
582 582 }
583 583
584 584 return (error);
585 585 }
586 586
587 587
588 588 /*
589 589 * lxpr_close(): Vnode operation for VOP_CLOSE()
590 590 */
591 591 /* ARGSUSED */
592 592 static int
593 593 lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
594 594 caller_context_t *ct)
595 595 {
596 596 lxpr_node_t *lxpr = VTOLXP(vp);
597 597 lxpr_nodetype_t type = lxpr->lxpr_type;
598 598
599 599 /*
600 600 * we should never get here because the close is done on the realvp
601 601 * for these nodes
602 602 */
603 603 ASSERT(type != LXPR_PID_FD_FD &&
604 604 type != LXPR_PID_CURDIR &&
605 605 type != LXPR_PID_ROOTDIR &&
606 606 type != LXPR_PID_EXE);
607 607
608 608 return (0);
609 609 }
610 610
611 611 static void (*lxpr_read_function[LXPR_NFILES])() = {
612 612 lxpr_read_isdir, /* /proc */
613 613 lxpr_read_isdir, /* /proc/<pid> */
614 614 lxpr_read_pid_auxv, /* /proc/<pid>/auxv */
615 615 lxpr_read_pid_cgroup, /* /proc/<pid>/cgroup */
616 616 lxpr_read_pid_cmdline, /* /proc/<pid>/cmdline */
617 617 lxpr_read_pid_comm, /* /proc/<pid>/comm */
618 618 lxpr_read_empty, /* /proc/<pid>/cpu */
619 619 lxpr_read_invalid, /* /proc/<pid>/cwd */
620 620 lxpr_read_pid_env, /* /proc/<pid>/environ */
621 621 lxpr_read_invalid, /* /proc/<pid>/exe */
622 622 lxpr_read_pid_limits, /* /proc/<pid>/limits */
623 623 lxpr_read_pid_maps, /* /proc/<pid>/maps */
624 624 lxpr_read_empty, /* /proc/<pid>/mem */
625 625 lxpr_read_pid_mountinfo, /* /proc/<pid>/mountinfo */
626 626 lxpr_read_pid_oom_scr_adj, /* /proc/<pid>/oom_score_adj */
627 627 lxpr_read_invalid, /* /proc/<pid>/root */
628 628 lxpr_read_pid_stat, /* /proc/<pid>/stat */
629 629 lxpr_read_pid_statm, /* /proc/<pid>/statm */
630 630 lxpr_read_pid_status, /* /proc/<pid>/status */
631 631 lxpr_read_isdir, /* /proc/<pid>/task */
632 632 lxpr_read_isdir, /* /proc/<pid>/task/nn */
633 633 lxpr_read_isdir, /* /proc/<pid>/fd */
634 634 lxpr_read_fd, /* /proc/<pid>/fd/nn */
635 635 lxpr_read_pid_auxv, /* /proc/<pid>/task/<tid>/auxv */
636 636 lxpr_read_pid_cgroup, /* /proc/<pid>/task/<tid>/cgroup */
637 637 lxpr_read_pid_cmdline, /* /proc/<pid>/task/<tid>/cmdline */
638 638 lxpr_read_pid_comm, /* /proc/<pid>/task/<tid>/comm */
639 639 lxpr_read_empty, /* /proc/<pid>/task/<tid>/cpu */
640 640 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/cwd */
641 641 lxpr_read_pid_env, /* /proc/<pid>/task/<tid>/environ */
642 642 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/exe */
643 643 lxpr_read_pid_limits, /* /proc/<pid>/task/<tid>/limits */
644 644 lxpr_read_pid_maps, /* /proc/<pid>/task/<tid>/maps */
645 645 lxpr_read_empty, /* /proc/<pid>/task/<tid>/mem */
646 646 lxpr_read_pid_mountinfo, /* /proc/<pid>/task/<tid>/mountinfo */
647 647 lxpr_read_pid_oom_scr_adj, /* /proc/<pid>/task/<tid>/oom_scr_adj */
648 648 lxpr_read_invalid, /* /proc/<pid>/task/<tid>/root */
649 649 lxpr_read_pid_tid_stat, /* /proc/<pid>/task/<tid>/stat */
650 650 lxpr_read_pid_statm, /* /proc/<pid>/task/<tid>/statm */
651 651 lxpr_read_pid_tid_status, /* /proc/<pid>/task/<tid>/status */
652 652 lxpr_read_isdir, /* /proc/<pid>/task/<tid>/fd */
653 653 lxpr_read_fd, /* /proc/<pid>/task/<tid>/fd/nn */
654 654 lxpr_read_cgroups, /* /proc/cgroups */
655 655 lxpr_read_empty, /* /proc/cmdline */
656 656 lxpr_read_cpuinfo, /* /proc/cpuinfo */
657 657 lxpr_read_empty, /* /proc/devices */
658 658 lxpr_read_diskstats, /* /proc/diskstats */
659 659 lxpr_read_empty, /* /proc/dma */
660 660 lxpr_read_filesystems, /* /proc/filesystems */
661 661 lxpr_read_empty, /* /proc/interrupts */
662 662 lxpr_read_empty, /* /proc/ioports */
663 663 lxpr_read_empty, /* /proc/kcore */
664 664 lxpr_read_invalid, /* /proc/kmsg -- see lxpr_read() */
665 665 lxpr_read_loadavg, /* /proc/loadavg */
666 666 lxpr_read_meminfo, /* /proc/meminfo */
667 667 lxpr_read_empty, /* /proc/modules */
668 668 lxpr_read_mounts, /* /proc/mounts */
669 669 lxpr_read_isdir, /* /proc/net */
670 670 lxpr_read_net_arp, /* /proc/net/arp */
671 671 lxpr_read_net_dev, /* /proc/net/dev */
672 672 lxpr_read_net_dev_mcast, /* /proc/net/dev_mcast */
673 673 lxpr_read_net_if_inet6, /* /proc/net/if_inet6 */
674 674 lxpr_read_net_igmp, /* /proc/net/igmp */
675 675 lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */
676 676 lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */
677 677 lxpr_read_net_ipv6_route, /* /proc/net/ipv6_route */
678 678 lxpr_read_net_mcfilter, /* /proc/net/mcfilter */
679 679 lxpr_read_net_netstat, /* /proc/net/netstat */
680 680 lxpr_read_net_raw, /* /proc/net/raw */
681 681 lxpr_read_net_route, /* /proc/net/route */
682 682 lxpr_read_net_rpc, /* /proc/net/rpc */
683 683 lxpr_read_net_rt_cache, /* /proc/net/rt_cache */
684 684 lxpr_read_net_sockstat, /* /proc/net/sockstat */
685 685 lxpr_read_net_snmp, /* /proc/net/snmp */
686 686 lxpr_read_net_stat, /* /proc/net/stat */
687 687 lxpr_read_net_tcp, /* /proc/net/tcp */
688 688 lxpr_read_net_tcp6, /* /proc/net/tcp6 */
689 689 lxpr_read_net_udp, /* /proc/net/udp */
690 690 lxpr_read_net_udp6, /* /proc/net/udp6 */
691 691 lxpr_read_net_unix, /* /proc/net/unix */
692 692 lxpr_read_partitions, /* /proc/partitions */
693 693 lxpr_read_invalid, /* /proc/self */
694 694 lxpr_read_stat, /* /proc/stat */
695 695 lxpr_read_swaps, /* /proc/swaps */
696 696 lxpr_read_invalid, /* /proc/sys */
697 697 lxpr_read_invalid, /* /proc/sys/fs */
698 698 lxpr_read_invalid, /* /proc/sys/fs/inotify */
699 699 lxpr_read_sys_fs_inotify_max_queued_events, /* max_queued_events */
700 700 lxpr_read_sys_fs_inotify_max_user_instances, /* max_user_instances */
701 701 lxpr_read_sys_fs_inotify_max_user_watches, /* max_user_watches */
702 702 lxpr_read_invalid, /* /proc/sys/kernel */
703 703 lxpr_read_sys_kernel_caplcap, /* /proc/sys/kernel/cap_last_cap */
704 704 lxpr_read_sys_kernel_corepatt, /* /proc/sys/kernel/core_pattern */
705 705 lxpr_read_sys_kernel_hostname, /* /proc/sys/kernel/hostname */
706 706 lxpr_read_sys_kernel_msgmni, /* /proc/sys/kernel/msgmni */
707 707 lxpr_read_sys_kernel_ngroups_max, /* /proc/sys/kernel/ngroups_max */
708 708 lxpr_read_sys_kernel_osrel, /* /proc/sys/kernel/osrelease */
709 709 lxpr_read_sys_kernel_pid_max, /* /proc/sys/kernel/pid_max */
710 710 lxpr_read_invalid, /* /proc/sys/kernel/random */
711 711 lxpr_read_sys_kernel_rand_bootid, /* /proc/sys/kernel/random/boot_id */
712 712 lxpr_read_sys_kernel_sem, /* /proc/sys/kernel/sem */
713 713 lxpr_read_sys_kernel_shmmax, /* /proc/sys/kernel/shmmax */
714 714 lxpr_read_sys_kernel_shmmni, /* /proc/sys/kernel/shmmni */
715 715 lxpr_read_sys_kernel_threads_max, /* /proc/sys/kernel/threads-max */
716 716 lxpr_read_invalid, /* /proc/sys/net */
717 717 lxpr_read_invalid, /* /proc/sys/net/core */
718 718 lxpr_read_sys_net_core_somaxc, /* /proc/sys/net/core/somaxconn */
719 719 lxpr_read_invalid, /* /proc/sys/vm */
720 720 lxpr_read_sys_vm_minfr_kb, /* /proc/sys/vm/min_free_kbytes */
721 721 lxpr_read_sys_vm_nhpages, /* /proc/sys/vm/nr_hugepages */
722 722 lxpr_read_sys_vm_overcommit_mem, /* /proc/sys/vm/overcommit_memory */
723 723 lxpr_read_sys_vm_swappiness, /* /proc/sys/vm/swappiness */
724 724 lxpr_read_uptime, /* /proc/uptime */
725 725 lxpr_read_version, /* /proc/version */
726 726 };
727 727
728 728 /*
729 729 * Array of lookup functions, indexed by lx /proc file type.
730 730 */
731 731 static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
732 732 lxpr_lookup_procdir, /* /proc */
733 733 lxpr_lookup_piddir, /* /proc/<pid> */
734 734 lxpr_lookup_not_a_dir, /* /proc/<pid>/auxv */
735 735 lxpr_lookup_not_a_dir, /* /proc/<pid>/cgroup */
736 736 lxpr_lookup_not_a_dir, /* /proc/<pid>/cmdline */
737 737 lxpr_lookup_not_a_dir, /* /proc/<pid>/comm */
738 738 lxpr_lookup_not_a_dir, /* /proc/<pid>/cpu */
739 739 lxpr_lookup_not_a_dir, /* /proc/<pid>/cwd */
740 740 lxpr_lookup_not_a_dir, /* /proc/<pid>/environ */
741 741 lxpr_lookup_not_a_dir, /* /proc/<pid>/exe */
742 742 lxpr_lookup_not_a_dir, /* /proc/<pid>/limits */
743 743 lxpr_lookup_not_a_dir, /* /proc/<pid>/maps */
744 744 lxpr_lookup_not_a_dir, /* /proc/<pid>/mem */
745 745 lxpr_lookup_not_a_dir, /* /proc/<pid>/mountinfo */
746 746 lxpr_lookup_not_a_dir, /* /proc/<pid>/oom_score_adj */
747 747 lxpr_lookup_not_a_dir, /* /proc/<pid>/root */
748 748 lxpr_lookup_not_a_dir, /* /proc/<pid>/stat */
749 749 lxpr_lookup_not_a_dir, /* /proc/<pid>/statm */
750 750 lxpr_lookup_not_a_dir, /* /proc/<pid>/status */
751 751 lxpr_lookup_taskdir, /* /proc/<pid>/task */
752 752 lxpr_lookup_task_tid_dir, /* /proc/<pid>/task/nn */
753 753 lxpr_lookup_fddir, /* /proc/<pid>/fd */
754 754 lxpr_lookup_not_a_dir, /* /proc/<pid>/fd/nn */
755 755 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/auxv */
756 756 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cgroup */
757 757 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cmdline */
758 758 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/comm */
759 759 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cpu */
760 760 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cwd */
761 761 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/environ */
762 762 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/exe */
763 763 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/limits */
764 764 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/maps */
765 765 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/mem */
766 766 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/mountinfo */
767 767 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/oom_scr_adj */
768 768 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/root */
769 769 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/stat */
770 770 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/statm */
771 771 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/status */
772 772 lxpr_lookup_fddir, /* /proc/<pid>/task/<tid>/fd */
773 773 lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/fd/nn */
774 774 lxpr_lookup_not_a_dir, /* /proc/cgroups */
775 775 lxpr_lookup_not_a_dir, /* /proc/cmdline */
776 776 lxpr_lookup_not_a_dir, /* /proc/cpuinfo */
777 777 lxpr_lookup_not_a_dir, /* /proc/devices */
778 778 lxpr_lookup_not_a_dir, /* /proc/diskstats */
779 779 lxpr_lookup_not_a_dir, /* /proc/dma */
780 780 lxpr_lookup_not_a_dir, /* /proc/filesystems */
781 781 lxpr_lookup_not_a_dir, /* /proc/interrupts */
782 782 lxpr_lookup_not_a_dir, /* /proc/ioports */
783 783 lxpr_lookup_not_a_dir, /* /proc/kcore */
784 784 lxpr_lookup_not_a_dir, /* /proc/kmsg */
785 785 lxpr_lookup_not_a_dir, /* /proc/loadavg */
786 786 lxpr_lookup_not_a_dir, /* /proc/meminfo */
787 787 lxpr_lookup_not_a_dir, /* /proc/modules */
788 788 lxpr_lookup_not_a_dir, /* /proc/mounts */
789 789 lxpr_lookup_netdir, /* /proc/net */
790 790 lxpr_lookup_not_a_dir, /* /proc/net/arp */
791 791 lxpr_lookup_not_a_dir, /* /proc/net/dev */
792 792 lxpr_lookup_not_a_dir, /* /proc/net/dev_mcast */
793 793 lxpr_lookup_not_a_dir, /* /proc/net/if_inet6 */
794 794 lxpr_lookup_not_a_dir, /* /proc/net/igmp */
795 795 lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */
796 796 lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */
797 797 lxpr_lookup_not_a_dir, /* /proc/net/ipv6_route */
798 798 lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */
799 799 lxpr_lookup_not_a_dir, /* /proc/net/netstat */
800 800 lxpr_lookup_not_a_dir, /* /proc/net/raw */
801 801 lxpr_lookup_not_a_dir, /* /proc/net/route */
802 802 lxpr_lookup_not_a_dir, /* /proc/net/rpc */
803 803 lxpr_lookup_not_a_dir, /* /proc/net/rt_cache */
804 804 lxpr_lookup_not_a_dir, /* /proc/net/sockstat */
805 805 lxpr_lookup_not_a_dir, /* /proc/net/snmp */
806 806 lxpr_lookup_not_a_dir, /* /proc/net/stat */
807 807 lxpr_lookup_not_a_dir, /* /proc/net/tcp */
808 808 lxpr_lookup_not_a_dir, /* /proc/net/tcp6 */
809 809 lxpr_lookup_not_a_dir, /* /proc/net/udp */
810 810 lxpr_lookup_not_a_dir, /* /proc/net/udp6 */
811 811 lxpr_lookup_not_a_dir, /* /proc/net/unix */
812 812 lxpr_lookup_not_a_dir, /* /proc/partitions */
813 813 lxpr_lookup_not_a_dir, /* /proc/self */
814 814 lxpr_lookup_not_a_dir, /* /proc/stat */
815 815 lxpr_lookup_not_a_dir, /* /proc/swaps */
816 816 lxpr_lookup_sysdir, /* /proc/sys */
817 817 lxpr_lookup_sys_fsdir, /* /proc/sys/fs */
818 818 lxpr_lookup_sys_fs_inotifydir, /* /proc/sys/fs/inotify */
819 819 lxpr_lookup_not_a_dir, /* .../inotify/max_queued_events */
820 820 lxpr_lookup_not_a_dir, /* .../inotify/max_user_instances */
821 821 lxpr_lookup_not_a_dir, /* .../inotify/max_user_watches */
822 822 lxpr_lookup_sys_kerneldir, /* /proc/sys/kernel */
823 823 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/cap_last_cap */
824 824 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/core_pattern */
825 825 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/hostname */
826 826 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/msgmni */
827 827 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/ngroups_max */
828 828 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/osrelease */
829 829 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/pid_max */
830 830 lxpr_lookup_sys_kdir_randdir, /* /proc/sys/kernel/random */
831 831 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/random/boot_id */
832 832 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/sem */
833 833 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/shmmax */
834 834 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/shmmni */
835 835 lxpr_lookup_not_a_dir, /* /proc/sys/kernel/threads-max */
836 836 lxpr_lookup_sys_netdir, /* /proc/sys/net */
837 837 lxpr_lookup_sys_net_coredir, /* /proc/sys/net/core */
838 838 lxpr_lookup_not_a_dir, /* /proc/sys/net/core/somaxconn */
839 839 lxpr_lookup_sys_vmdir, /* /proc/sys/vm */
840 840 lxpr_lookup_not_a_dir, /* /proc/sys/vm/min_free_kbytes */
841 841 lxpr_lookup_not_a_dir, /* /proc/sys/vm/nr_hugepages */
842 842 lxpr_lookup_not_a_dir, /* /proc/sys/vm/overcommit_memory */
843 843 lxpr_lookup_not_a_dir, /* /proc/sys/vm/swappiness */
844 844 lxpr_lookup_not_a_dir, /* /proc/uptime */
845 845 lxpr_lookup_not_a_dir, /* /proc/version */
846 846 };
847 847
848 848 /*
849 849 * Array of readdir functions, indexed by /proc file type.
850 850 */
851 851 static int (*lxpr_readdir_function[LXPR_NFILES])() = {
852 852 lxpr_readdir_procdir, /* /proc */
853 853 lxpr_readdir_piddir, /* /proc/<pid> */
854 854 lxpr_readdir_not_a_dir, /* /proc/<pid>/auxv */
855 855 lxpr_readdir_not_a_dir, /* /proc/<pid>/cgroup */
856 856 lxpr_readdir_not_a_dir, /* /proc/<pid>/cmdline */
857 857 lxpr_readdir_not_a_dir, /* /proc/<pid>/comm */
858 858 lxpr_readdir_not_a_dir, /* /proc/<pid>/cpu */
859 859 lxpr_readdir_not_a_dir, /* /proc/<pid>/cwd */
860 860 lxpr_readdir_not_a_dir, /* /proc/<pid>/environ */
861 861 lxpr_readdir_not_a_dir, /* /proc/<pid>/exe */
862 862 lxpr_readdir_not_a_dir, /* /proc/<pid>/limits */
863 863 lxpr_readdir_not_a_dir, /* /proc/<pid>/maps */
864 864 lxpr_readdir_not_a_dir, /* /proc/<pid>/mem */
865 865 lxpr_readdir_not_a_dir, /* /proc/<pid>/mountinfo */
866 866 lxpr_readdir_not_a_dir, /* /proc/<pid>/oom_score_adj */
867 867 lxpr_readdir_not_a_dir, /* /proc/<pid>/root */
868 868 lxpr_readdir_not_a_dir, /* /proc/<pid>/stat */
869 869 lxpr_readdir_not_a_dir, /* /proc/<pid>/statm */
870 870 lxpr_readdir_not_a_dir, /* /proc/<pid>/status */
871 871 lxpr_readdir_taskdir, /* /proc/<pid>/task */
872 872 lxpr_readdir_task_tid_dir, /* /proc/<pid>/task/nn */
873 873 lxpr_readdir_fddir, /* /proc/<pid>/fd */
874 874 lxpr_readdir_not_a_dir, /* /proc/<pid>/fd/nn */
875 875 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/auxv */
876 876 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cgroup */
877 877 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cmdline */
878 878 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/comm */
879 879 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cpu */
880 880 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cwd */
881 881 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/environ */
882 882 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/exe */
883 883 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/limits */
884 884 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/maps */
885 885 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/mem */
886 886 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/mountinfo */
887 887 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid/oom_scr_adj */
888 888 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/root */
889 889 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/stat */
890 890 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/statm */
891 891 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/status */
892 892 lxpr_readdir_fddir, /* /proc/<pid>/task/<tid>/fd */
893 893 lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/fd/nn */
894 894 lxpr_readdir_not_a_dir, /* /proc/cgroups */
895 895 lxpr_readdir_not_a_dir, /* /proc/cmdline */
896 896 lxpr_readdir_not_a_dir, /* /proc/cpuinfo */
897 897 lxpr_readdir_not_a_dir, /* /proc/devices */
898 898 lxpr_readdir_not_a_dir, /* /proc/diskstats */
899 899 lxpr_readdir_not_a_dir, /* /proc/dma */
900 900 lxpr_readdir_not_a_dir, /* /proc/filesystems */
901 901 lxpr_readdir_not_a_dir, /* /proc/interrupts */
902 902 lxpr_readdir_not_a_dir, /* /proc/ioports */
903 903 lxpr_readdir_not_a_dir, /* /proc/kcore */
904 904 lxpr_readdir_not_a_dir, /* /proc/kmsg */
905 905 lxpr_readdir_not_a_dir, /* /proc/loadavg */
906 906 lxpr_readdir_not_a_dir, /* /proc/meminfo */
907 907 lxpr_readdir_not_a_dir, /* /proc/modules */
908 908 lxpr_readdir_not_a_dir, /* /proc/mounts */
909 909 lxpr_readdir_netdir, /* /proc/net */
910 910 lxpr_readdir_not_a_dir, /* /proc/net/arp */
911 911 lxpr_readdir_not_a_dir, /* /proc/net/dev */
912 912 lxpr_readdir_not_a_dir, /* /proc/net/dev_mcast */
913 913 lxpr_readdir_not_a_dir, /* /proc/net/if_inet6 */
914 914 lxpr_readdir_not_a_dir, /* /proc/net/igmp */
915 915 lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */
916 916 lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */
917 917 lxpr_readdir_not_a_dir, /* /proc/net/ipv6_route */
918 918 lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */
919 919 lxpr_readdir_not_a_dir, /* /proc/net/netstat */
920 920 lxpr_readdir_not_a_dir, /* /proc/net/raw */
921 921 lxpr_readdir_not_a_dir, /* /proc/net/route */
922 922 lxpr_readdir_not_a_dir, /* /proc/net/rpc */
923 923 lxpr_readdir_not_a_dir, /* /proc/net/rt_cache */
924 924 lxpr_readdir_not_a_dir, /* /proc/net/sockstat */
925 925 lxpr_readdir_not_a_dir, /* /proc/net/snmp */
926 926 lxpr_readdir_not_a_dir, /* /proc/net/stat */
927 927 lxpr_readdir_not_a_dir, /* /proc/net/tcp */
928 928 lxpr_readdir_not_a_dir, /* /proc/net/tcp6 */
929 929 lxpr_readdir_not_a_dir, /* /proc/net/udp */
930 930 lxpr_readdir_not_a_dir, /* /proc/net/udp6 */
931 931 lxpr_readdir_not_a_dir, /* /proc/net/unix */
932 932 lxpr_readdir_not_a_dir, /* /proc/partitions */
933 933 lxpr_readdir_not_a_dir, /* /proc/self */
934 934 lxpr_readdir_not_a_dir, /* /proc/stat */
935 935 lxpr_readdir_not_a_dir, /* /proc/swaps */
936 936 lxpr_readdir_sysdir, /* /proc/sys */
937 937 lxpr_readdir_sys_fsdir, /* /proc/sys/fs */
938 938 lxpr_readdir_sys_fs_inotifydir, /* /proc/sys/fs/inotify */
939 939 lxpr_readdir_not_a_dir, /* .../inotify/max_queued_events */
940 940 lxpr_readdir_not_a_dir, /* .../inotify/max_user_instances */
941 941 lxpr_readdir_not_a_dir, /* .../inotify/max_user_watches */
942 942 lxpr_readdir_sys_kerneldir, /* /proc/sys/kernel */
943 943 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/cap_last_cap */
944 944 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/core_pattern */
945 945 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/hostname */
946 946 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/msgmni */
947 947 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/ngroups_max */
948 948 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/osrelease */
949 949 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/pid_max */
950 950 lxpr_readdir_sys_kdir_randdir, /* /proc/sys/kernel/random */
951 951 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/random/boot_id */
952 952 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/sem */
953 953 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/shmmax */
954 954 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/shmmni */
955 955 lxpr_readdir_not_a_dir, /* /proc/sys/kernel/threads-max */
956 956 lxpr_readdir_sys_netdir, /* /proc/sys/net */
957 957 lxpr_readdir_sys_net_coredir, /* /proc/sys/net/core */
958 958 lxpr_readdir_not_a_dir, /* /proc/sys/net/core/somaxconn */
959 959 lxpr_readdir_sys_vmdir, /* /proc/sys/vm */
960 960 lxpr_readdir_not_a_dir, /* /proc/sys/vm/min_free_kbytes */
961 961 lxpr_readdir_not_a_dir, /* /proc/sys/vm/nr_hugepages */
962 962 lxpr_readdir_not_a_dir, /* /proc/sys/vm/overcommit_memory */
963 963 lxpr_readdir_not_a_dir, /* /proc/sys/vm/swappiness */
964 964 lxpr_readdir_not_a_dir, /* /proc/uptime */
965 965 lxpr_readdir_not_a_dir, /* /proc/version */
966 966 };
967 967
968 968
969 969 /*
970 970 * lxpr_read(): Vnode operation for VOP_READ()
971 971 *
972 972 * As the format of all the files that can be read in the lx procfs is human
973 973 * readable and not binary structures there do not have to be different
974 974 * read variants depending on whether the reading process model is 32 or 64 bits
975 975 * (at least in general, and certainly the difference is unlikely to be enough
976 976 * to justify have different routines for 32 and 64 bit reads
977 977 */
978 978 /* ARGSUSED */
979 979 static int
980 980 lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
981 981 caller_context_t *ct)
982 982 {
983 983 lxpr_node_t *lxpnp = VTOLXP(vp);
984 984 lxpr_nodetype_t type = lxpnp->lxpr_type;
985 985 lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
986 986 int error;
987 987
988 988 ASSERT(type < LXPR_NFILES);
989 989
990 990 if (type == LXPR_KMSG) {
991 991 ldi_ident_t li = VTOLXPM(vp)->lxprm_li;
992 992 ldi_handle_t ldih;
993 993 struct strioctl str;
994 994 int rv;
995 995
996 996 /*
997 997 * Open the zone's console device using the layered driver
998 998 * interface.
999 999 */
1000 1000 if ((error =
1001 1001 ldi_open_by_name("/dev/log", FREAD, cr, &ldih, li)) != 0)
1002 1002 return (error);
1003 1003
1004 1004 /*
1005 1005 * Send an ioctl to the underlying console device, letting it
1006 1006 * know we're interested in getting console messages.
1007 1007 */
1008 1008 str.ic_cmd = I_CONSLOG;
1009 1009 str.ic_timout = 0;
1010 1010 str.ic_len = 0;
1011 1011 str.ic_dp = NULL;
1012 1012 if ((error = ldi_ioctl(ldih, I_STR,
1013 1013 (intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
1014 1014 return (error);
1015 1015
1016 1016 lxpr_read_kmsg(lxpnp, uiobuf, ldih);
1017 1017
1018 1018 if ((error = ldi_close(ldih, FREAD, cr)) != 0)
1019 1019 return (error);
1020 1020 } else {
1021 1021 lxpr_read_function[type](lxpnp, uiobuf);
1022 1022 }
1023 1023
1024 1024 error = lxpr_uiobuf_flush(uiobuf);
1025 1025 lxpr_uiobuf_free(uiobuf);
1026 1026
1027 1027 return (error);
1028 1028 }
1029 1029
1030 1030 /*
1031 1031 * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
1032 1032 *
1033 1033 * Various special case reads:
1034 1034 * - trying to read a directory
1035 1035 * - invalid file (used to mean a file that should be implemented,
1036 1036 * but isn't yet)
1037 1037 * - empty file
1038 1038 * - wait to be able to read a file that will never have anything to read
1039 1039 */
1040 1040 /* ARGSUSED */
1041 1041 static void
1042 1042 lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1043 1043 {
1044 1044 lxpr_uiobuf_seterr(uiobuf, EISDIR);
1045 1045 }
1046 1046
1047 1047 /* ARGSUSED */
1048 1048 static void
1049 1049 lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1050 1050 {
1051 1051 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1052 1052 }
1053 1053
1054 1054 /* ARGSUSED */
1055 1055 static void
1056 1056 lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1057 1057 {
1058 1058 }
1059 1059
1060 1060 /*
1061 1061 * lxpr_read_pid_auxv(): read process aux vector
1062 1062 */
1063 1063 static void
1064 1064 lxpr_read_pid_auxv(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1065 1065 {
1066 1066 proc_t *p;
1067 1067 lx_proc_data_t *pd;
1068 1068 lx_elf_data_t *edp = NULL;
1069 1069 int i, cnt;
1070 1070
1071 1071 ASSERT(lxpnp->lxpr_type == LXPR_PID_AUXV ||
1072 1072 lxpnp->lxpr_type == LXPR_PID_TID_AUXV);
1073 1073
1074 1074 p = lxpr_lock(lxpnp->lxpr_pid);
1075 1075
1076 1076 if (p == NULL) {
1077 1077 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1078 1078 return;
1079 1079 }
1080 1080 if ((pd = ptolxproc(p)) == NULL) {
1081 1081 /* Emit a single AT_NULL record for non-branded processes */
1082 1082 auxv_t buf;
1083 1083
1084 1084 bzero(&buf, sizeof (buf));
1085 1085 lxpr_unlock(p);
1086 1086 lxpr_uiobuf_write(uiobuf, (char *)&buf, sizeof (buf));
1087 1087 return;
1088 1088 } else {
1089 1089 edp = &pd->l_elf_data;
1090 1090 }
1091 1091
1092 1092 if (p->p_model == DATAMODEL_NATIVE) {
1093 1093 auxv_t buf[__KERN_NAUXV_IMPL];
1094 1094
1095 1095 /*
1096 1096 * Because a_type is only of size int (not long), the buffer
1097 1097 * contents must be zeroed first to ensure cleanliness.
1098 1098 */
1099 1099 bzero(buf, sizeof (buf));
1100 1100 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) {
1101 1101 if (lx_auxv_stol(&p->p_user.u_auxv[i],
1102 1102 &buf[cnt], edp) == 0) {
1103 1103 cnt++;
1104 1104 }
1105 1105 if (p->p_user.u_auxv[i].a_type == AT_NULL) {
1106 1106 break;
1107 1107 }
1108 1108 }
1109 1109 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0]));
1110 1110 lxpr_unlock(p);
1111 1111 }
1112 1112 #if defined(_SYSCALL32_IMPL)
1113 1113 else {
1114 1114 auxv32_t buf[__KERN_NAUXV_IMPL];
1115 1115
1116 1116 for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) {
1117 1117 auxv_t temp;
1118 1118
1119 1119 if (lx_auxv_stol(&p->p_user.u_auxv[i],
1120 1120 &temp, edp) == 0) {
1121 1121 buf[cnt].a_type = (int)temp.a_type;
1122 1122 buf[cnt].a_un.a_val = (int)temp.a_un.a_val;
1123 1123 cnt++;
1124 1124 }
1125 1125 if (p->p_user.u_auxv[i].a_type == AT_NULL) {
1126 1126 break;
1127 1127 }
1128 1128 }
1129 1129 lxpr_unlock(p);
1130 1130 lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0]));
1131 1131 }
1132 1132 #endif /* defined(_SYSCALL32_IMPL) */
1133 1133 }
1134 1134
1135 1135 /*
1136 1136 * lxpr_read_pid_cgroup(): read cgroups for process
1137 1137 */
1138 1138 static void
1139 1139 lxpr_read_pid_cgroup(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1140 1140 {
1141 1141 proc_t *p;
1142 1142
1143 1143 ASSERT(lxpnp->lxpr_type == LXPR_PID_CGROUP ||
1144 1144 lxpnp->lxpr_type == LXPR_PID_TID_CGROUP);
1145 1145
1146 1146 p = lxpr_lock(lxpnp->lxpr_pid);
1147 1147 if (p == NULL) {
1148 1148 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1149 1149 return;
1150 1150 }
1151 1151
1152 1152 /* basic stub, 3rd field will need to be populated */
1153 1153 lxpr_uiobuf_printf(uiobuf, "1:name=systemd:/\n");
1154 1154
1155 1155 lxpr_unlock(p);
1156 1156 }
1157 1157
1158 1158 static void
1159 1159 lxpr_copy_cmdline(proc_t *p, lx_proc_data_t *pd, lxpr_uiobuf_t *uiobuf)
1160 1160 {
1161 1161 uio_t *uiop = uiobuf->uiop;
1162 1162 char *buf = uiobuf->buffer;
1163 1163 int bsz = uiobuf->buffsize;
1164 1164 boolean_t env_overflow = B_FALSE;
1165 1165 uintptr_t pos = pd->l_args_start + uiop->uio_offset;
1166 1166 uintptr_t estart = pd->l_envs_start;
1167 1167 uintptr_t eend = pd->l_envs_end;
1168 1168 size_t chunk, copied;
1169 1169 int err = 0;
1170 1170
1171 1171 /* Do not bother with data beyond the end of the envp strings area. */
1172 1172 if (pos > eend) {
1173 1173 return;
1174 1174 }
1175 1175 mutex_exit(&p->p_lock);
1176 1176
1177 1177 /*
1178 1178 * If the starting or ending bounds are outside the argv strings area,
1179 1179 * check to see if the process has overwritten the terminating NULL.
1180 1180 * If not, no data needs to be copied from oustide the argv area.
1181 1181 */
1182 1182 if (pos >= estart || (pos + uiop->uio_resid) >= estart) {
1183 1183 uint8_t term;
1184 1184 if (uread(p, &term, sizeof (term), estart - 1) != 0) {
1185 1185 err = EFAULT;
1186 1186 } else if (term != 0) {
1187 1187 env_overflow = B_TRUE;
1188 1188 }
1189 1189 }
1190 1190
1191 1191
1192 1192 /* Data between astart and estart-1 can be copied freely. */
1193 1193 while (pos < estart && uiop->uio_resid > 0 && err == 0) {
1194 1194 chunk = MIN(estart - pos, uiop->uio_resid);
1195 1195 chunk = MIN(chunk, bsz);
1196 1196
1197 1197 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) != 0 ||
1198 1198 copied != chunk) {
1199 1199 err = EFAULT;
1200 1200 break;
1201 1201 }
1202 1202 err = uiomove(buf, copied, UIO_READ, uiop);
1203 1203 pos += copied;
1204 1204 }
1205 1205
1206 1206 /*
1207 1207 * Onward from estart, data is copied as a contiguous string. To
1208 1208 * protect env data from potential snooping, only one buffer-sized copy
1209 1209 * is allowed to avoid complex seek logic.
1210 1210 */
1211 1211 if (err == 0 && env_overflow && pos == estart && uiop->uio_resid > 0) {
1212 1212 chunk = MIN(eend - pos, uiop->uio_resid);
1213 1213 chunk = MIN(chunk, bsz);
1214 1214 if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) == 0) {
1215 1215 int len = strnlen(buf, copied);
1216 1216 if (len > 0) {
1217 1217 err = uiomove(buf, len, UIO_READ, uiop);
1218 1218 }
1219 1219 }
1220 1220 }
1221 1221
1222 1222 uiobuf->error = err;
1223 1223 /* reset any uiobuf state */
1224 1224 uiobuf->pos = uiobuf->buffer;
1225 1225 uiobuf->beg = 0;
1226 1226
1227 1227 mutex_enter(&p->p_lock);
1228 1228 }
1229 1229
1230 1230 /*
1231 1231 * lxpr_read_pid_cmdline(): read argument vector from process
1232 1232 */
1233 1233 static void
1234 1234 lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1235 1235 {
1236 1236 proc_t *p;
1237 1237 char *buf;
1238 1238 size_t asz = lxpr_maxargvlen, sz;
1239 1239 lx_proc_data_t *pd;
1240 1240
1241 1241 ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE ||
1242 1242 lxpnp->lxpr_type == LXPR_PID_TID_CMDLINE);
1243 1243
1244 1244 buf = kmem_alloc(asz, KM_SLEEP);
1245 1245
1246 1246 p = lxpr_lock(lxpnp->lxpr_pid);
1247 1247 if (p == NULL) {
1248 1248 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1249 1249 kmem_free(buf, asz);
1250 1250 return;
1251 1251 }
1252 1252
1253 1253 if ((pd = ptolxproc(p)) != NULL && pd->l_args_start != 0 &&
1254 1254 pd->l_envs_start != 0 && pd->l_envs_end != 0) {
1255 1255 /* Use Linux-style argv bounds if possible. */
1256 1256 lxpr_copy_cmdline(p, pd, uiobuf);
1257 1257 } else {
1258 1258 if (prreadargv(p, buf, asz, &sz) != 0) {
1259 1259 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1260 1260 } else {
1261 1261 lxpr_uiobuf_write(uiobuf, buf, sz);
1262 1262 }
1263 1263 }
1264 1264
1265 1265 lxpr_unlock(p);
1266 1266 kmem_free(buf, asz);
1267 1267 }
1268 1268
1269 1269 /*
1270 1270 * lxpr_read_pid_comm(): read command from process
1271 1271 */
1272 1272 static void
1273 1273 lxpr_read_pid_comm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1274 1274 {
1275 1275 proc_t *p;
1276 1276
1277 1277 VERIFY(lxpnp->lxpr_type == LXPR_PID_COMM ||
1278 1278 lxpnp->lxpr_type == LXPR_PID_TID_COMM);
1279 1279
1280 1280 /*
1281 1281 * Because prctl(PR_SET_NAME) does not set custom names for threads
1282 1282 * (vs processes), there is no need for special handling here.
1283 1283 */
1284 1284 if ((p = lxpr_lock(lxpnp->lxpr_pid)) == NULL) {
1285 1285 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1286 1286 return;
1287 1287 }
1288 1288 lxpr_uiobuf_printf(uiobuf, "%s\n", p->p_user.u_comm);
1289 1289 lxpr_unlock(p);
1290 1290 }
↓ open down ↓ |
1290 lines elided |
↑ open up ↑ |
1291 1291
1292 1292 /*
1293 1293 * lxpr_read_pid_env(): read env vector from process
1294 1294 */
1295 1295 static void
1296 1296 lxpr_read_pid_env(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1297 1297 {
1298 1298 proc_t *p;
1299 1299 char *buf;
1300 1300 size_t asz = lxpr_maxenvvlen, sz;
1301 + int r;
1301 1302
1302 1303 ASSERT(lxpnp->lxpr_type == LXPR_PID_ENV);
1303 1304
1304 1305 buf = kmem_alloc(asz, KM_SLEEP);
1305 1306
1306 1307 p = lxpr_lock(lxpnp->lxpr_pid);
1307 1308 if (p == NULL) {
1308 1309 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1309 1310 kmem_free(buf, asz);
1310 1311 return;
1311 1312 }
1312 1313
1313 - if (prreadenvv(p, buf, asz, &sz) != 0) {
1314 + r = prreadenvv(p, buf, asz, &sz);
1315 + lxpr_unlock(p);
1316 +
1317 + if (r != 0) {
1314 1318 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1315 1319 } else {
1316 1320 lxpr_uiobuf_write(uiobuf, buf, sz);
1317 1321 }
1318 1322
1319 - lxpr_unlock(p);
1320 1323 kmem_free(buf, asz);
1321 1324 }
1322 1325
1323 1326 /*
1324 1327 * lxpr_read_pid_limits(): ulimit file
1325 1328 */
1326 1329 static void
1327 1330 lxpr_read_pid_limits(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1328 1331 {
1329 1332 proc_t *p;
1330 1333 rctl_qty_t cur, max;
1331 1334 rctl_val_t *oval, *nval;
1332 1335 rctl_hndl_t hndl;
1333 1336 char *kname;
1334 1337 int i;
1335 1338
1336 1339 ASSERT(lxpnp->lxpr_type == LXPR_PID_LIMITS ||
1337 1340 lxpnp->lxpr_type == LXPR_PID_TID_LIMITS);
1338 1341
1339 1342 nval = kmem_alloc(sizeof (rctl_val_t), KM_SLEEP);
1340 1343
1341 1344 p = lxpr_lock(lxpnp->lxpr_pid);
1342 1345 if (p == NULL) {
1343 1346 kmem_free(nval, sizeof (rctl_val_t));
1344 1347 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1345 1348 return;
1346 1349 }
1347 1350
1348 1351 lxpr_uiobuf_printf(uiobuf, "%-25s %-20s %-20s %-10s\n",
1349 1352 "Limit", "Soft Limit", "Hard Limit", "Units");
1350 1353 for (i = 0; lxpr_rlimtab[i].rlim_name != NULL; i++) {
1351 1354 kname = lxpr_rlimtab[i].rlim_rctl;
1352 1355 /* default to unlimited for resources without an analog */
1353 1356 cur = RLIM_INFINITY;
1354 1357 max = RLIM_INFINITY;
1355 1358 if (kname != NULL) {
1356 1359 hndl = rctl_hndl_lookup(kname);
1357 1360 oval = NULL;
1358 1361 while ((hndl != -1) &&
1359 1362 rctl_local_get(hndl, oval, nval, p) == 0) {
1360 1363 oval = nval;
1361 1364 switch (nval->rcv_privilege) {
1362 1365 case RCPRIV_BASIC:
1363 1366 if (!RCTL_INFINITE(nval))
1364 1367 cur = nval->rcv_value;
1365 1368 break;
1366 1369 case RCPRIV_PRIVILEGED:
1367 1370 if (!RCTL_INFINITE(nval))
1368 1371 max = nval->rcv_value;
1369 1372 break;
1370 1373 }
1371 1374 }
1372 1375 }
1373 1376
1374 1377 lxpr_uiobuf_printf(uiobuf, "%-25s", lxpr_rlimtab[i].rlim_name);
1375 1378 if (cur == RLIM_INFINITY || cur == LX_RLIM_INFINITY) {
1376 1379 lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
1377 1380 } else {
1378 1381 lxpr_uiobuf_printf(uiobuf, " %-20lu", cur);
1379 1382 }
1380 1383 if (max == RLIM_INFINITY || max == LX_RLIM_INFINITY) {
1381 1384 lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
1382 1385 } else {
1383 1386 lxpr_uiobuf_printf(uiobuf, " %-20lu", max);
1384 1387 }
1385 1388 lxpr_uiobuf_printf(uiobuf, " %-10s\n",
1386 1389 lxpr_rlimtab[i].rlim_unit);
1387 1390 }
1388 1391
1389 1392 lxpr_unlock(p);
1390 1393 kmem_free(nval, sizeof (rctl_val_t));
1391 1394 }
1392 1395
1393 1396 /*
1394 1397 * lxpr_read_pid_maps(): memory map file
1395 1398 */
1396 1399 static void
1397 1400 lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1398 1401 {
1399 1402 proc_t *p;
1400 1403 struct as *as;
1401 1404 struct seg *seg;
1402 1405 char *buf;
1403 1406 int buflen = MAXPATHLEN;
1404 1407 struct print_data {
1405 1408 uintptr_t saddr;
1406 1409 uintptr_t eaddr;
1407 1410 int type;
1408 1411 char prot[5];
1409 1412 uintptr_t offset;
1410 1413 vnode_t *vp;
1411 1414 struct print_data *next;
1412 1415 } *print_head = NULL;
1413 1416 struct print_data **print_tail = &print_head;
1414 1417 struct print_data *pbuf;
1415 1418
1416 1419 ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS ||
1417 1420 lxpnp->lxpr_type == LXPR_PID_TID_MAPS);
1418 1421
1419 1422 p = lxpr_lock(lxpnp->lxpr_pid);
1420 1423 if (p == NULL) {
1421 1424 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1422 1425 return;
1423 1426 }
1424 1427
1425 1428 as = p->p_as;
1426 1429
1427 1430 if (as == &kas) {
1428 1431 lxpr_unlock(p);
1429 1432 return;
1430 1433 }
1431 1434
1432 1435 mutex_exit(&p->p_lock);
1433 1436
1434 1437 /* Iterate over all segments in the address space */
1435 1438 AS_LOCK_ENTER(as, RW_READER);
1436 1439 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1437 1440 vnode_t *vp;
1438 1441 uint_t protbits;
1439 1442
1440 1443 pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
1441 1444
1442 1445 pbuf->saddr = (uintptr_t)seg->s_base;
1443 1446 pbuf->eaddr = pbuf->saddr + seg->s_size;
1444 1447 pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
1445 1448
1446 1449 /*
1447 1450 * Cheat and only use the protection bits of the first page
1448 1451 * in the segment
1449 1452 */
1450 1453 (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
1451 1454 (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
1452 1455
1453 1456 if (protbits & PROT_READ) pbuf->prot[0] = 'r';
1454 1457 if (protbits & PROT_WRITE) pbuf->prot[1] = 'w';
1455 1458 if (protbits & PROT_EXEC) pbuf->prot[2] = 'x';
1456 1459 if (pbuf->type & MAP_SHARED) pbuf->prot[3] = 's';
1457 1460 else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
1458 1461
1459 1462 if (seg->s_ops == &segvn_ops &&
1460 1463 SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
1461 1464 vp != NULL && vp->v_type == VREG) {
1462 1465 VN_HOLD(vp);
1463 1466 pbuf->vp = vp;
1464 1467 } else {
1465 1468 pbuf->vp = NULL;
1466 1469 }
1467 1470
1468 1471 pbuf->offset = SEGOP_GETOFFSET(seg, (caddr_t)pbuf->saddr);
1469 1472
1470 1473 pbuf->next = NULL;
1471 1474 *print_tail = pbuf;
1472 1475 print_tail = &pbuf->next;
1473 1476 }
1474 1477 AS_LOCK_EXIT(as);
1475 1478 mutex_enter(&p->p_lock);
1476 1479 lxpr_unlock(p);
1477 1480
1478 1481 buf = kmem_alloc(buflen, KM_SLEEP);
1479 1482
1480 1483 /* print the data we've extracted */
1481 1484 pbuf = print_head;
1482 1485 while (pbuf != NULL) {
1483 1486 struct print_data *pbuf_next;
1484 1487 vattr_t vattr;
1485 1488
1486 1489 int maj = 0;
1487 1490 int min = 0;
1488 1491 ino_t inode = 0;
1489 1492
1490 1493 *buf = '\0';
1491 1494 if (pbuf->vp != NULL) {
1492 1495 vattr.va_mask = AT_FSID | AT_NODEID;
1493 1496 if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(),
1494 1497 NULL) == 0) {
1495 1498 maj = getmajor(vattr.va_fsid);
1496 1499 min = getminor(vattr.va_fsid);
1497 1500 inode = vattr.va_nodeid;
1498 1501 }
1499 1502 (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
1500 1503 VN_RELE(pbuf->vp);
1501 1504 }
1502 1505
1503 1506 if (p->p_model == DATAMODEL_LP64) {
1504 1507 lxpr_uiobuf_printf(uiobuf,
1505 1508 "%08llx-%08llx %s %08llx %02x:%02x %llu%s%s\n",
1506 1509 pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
1507 1510 maj, min, inode, *buf != '\0' ? " " : "", buf);
1508 1511 } else {
1509 1512 lxpr_uiobuf_printf(uiobuf,
1510 1513 "%08x-%08x %s %08x %02x:%02x %llu%s%s\n",
1511 1514 (uint32_t)pbuf->saddr, (uint32_t)pbuf->eaddr,
1512 1515 pbuf->prot, (uint32_t)pbuf->offset, maj, min,
1513 1516 inode, *buf != '\0' ? " " : "", buf);
1514 1517 }
1515 1518
1516 1519 pbuf_next = pbuf->next;
1517 1520 kmem_free(pbuf, sizeof (*pbuf));
1518 1521 pbuf = pbuf_next;
1519 1522 }
1520 1523
1521 1524 kmem_free(buf, buflen);
1522 1525 }
1523 1526
1524 1527 /*
1525 1528 * lxpr_read_pid_mountinfo(): information about process mount points. e.g.:
1526 1529 * 14 19 0:13 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
1527 1530 * mntid parid devnums root mntpnt mntopts - fstype mntsrc superopts
1528 1531 *
1529 1532 * We have to make up several of these fields.
1530 1533 */
1531 1534 static void
1532 1535 lxpr_read_pid_mountinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1533 1536 {
1534 1537 struct vfs *vfsp;
1535 1538 struct vfs *vfslist;
1536 1539 zone_t *zone = LXPTOZ(lxpnp);
1537 1540 struct print_data {
1538 1541 refstr_t *vfs_mntpt;
1539 1542 refstr_t *vfs_resource;
1540 1543 uint_t vfs_flag;
1541 1544 int vfs_fstype;
1542 1545 dev_t vfs_dev;
1543 1546 struct print_data *next;
1544 1547 } *print_head = NULL;
1545 1548 struct print_data **print_tail = &print_head;
1546 1549 struct print_data *printp;
1547 1550 int root_id = 15; /* use a made-up value */
1548 1551 int mnt_id;
1549 1552
1550 1553 ASSERT(lxpnp->lxpr_type == LXPR_PID_MOUNTINFO ||
1551 1554 lxpnp->lxpr_type == LXPR_PID_TID_MOUNTINFO);
1552 1555
1553 1556 vfs_list_read_lock();
1554 1557
1555 1558 /* root is the top-level, it does not appear in this output */
1556 1559 if (zone == global_zone) {
1557 1560 vfsp = vfslist = rootvfs;
1558 1561 } else {
1559 1562 vfsp = vfslist = zone->zone_vfslist;
1560 1563 /*
1561 1564 * If the zone has a root entry, it will be the first in
1562 1565 * the list. If it doesn't, we conjure one up.
1563 1566 */
1564 1567 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
1565 1568 zone->zone_rootpath) != 0) {
1566 1569 struct vfs *tvfsp;
1567 1570 /*
1568 1571 * The root of the zone is not a mount point. The vfs
1569 1572 * we want to report is that of the zone's root vnode.
1570 1573 */
1571 1574 tvfsp = zone->zone_rootvp->v_vfsp;
1572 1575
1573 1576 lxpr_uiobuf_printf(uiobuf,
1574 1577 "%d 1 %d:%d / / %s - %s / %s\n",
1575 1578 root_id,
1576 1579 major(tvfsp->vfs_dev), minor(vfsp->vfs_dev),
1577 1580 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw",
1578 1581 vfssw[tvfsp->vfs_fstype].vsw_name,
1579 1582 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1580 1583
1581 1584 }
1582 1585 if (vfslist == NULL) {
1583 1586 vfs_list_unlock();
1584 1587 return;
1585 1588 }
1586 1589 }
1587 1590
1588 1591 /*
1589 1592 * Later on we have to do a lookupname, which can end up causing
1590 1593 * another vfs_list_read_lock() to be called. Which can lead to a
1591 1594 * deadlock. To avoid this, we extract the data we need into a local
1592 1595 * list, then we can run this list without holding vfs_list_read_lock()
1593 1596 * We keep the list in the same order as the vfs_list
1594 1597 */
1595 1598 do {
1596 1599 /* Skip mounts we shouldn't show */
1597 1600 if (vfsp->vfs_flag & VFS_NOMNTTAB) {
1598 1601 goto nextfs;
1599 1602 }
1600 1603
1601 1604 printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
1602 1605 refstr_hold(vfsp->vfs_mntpt);
1603 1606 printp->vfs_mntpt = vfsp->vfs_mntpt;
1604 1607 refstr_hold(vfsp->vfs_resource);
1605 1608 printp->vfs_resource = vfsp->vfs_resource;
1606 1609 printp->vfs_flag = vfsp->vfs_flag;
1607 1610 printp->vfs_fstype = vfsp->vfs_fstype;
1608 1611 printp->vfs_dev = vfsp->vfs_dev;
1609 1612 printp->next = NULL;
1610 1613
1611 1614 *print_tail = printp;
1612 1615 print_tail = &printp->next;
1613 1616
1614 1617 nextfs:
1615 1618 vfsp = (zone == global_zone) ?
1616 1619 vfsp->vfs_next : vfsp->vfs_zone_next;
1617 1620
1618 1621 } while (vfsp != vfslist);
1619 1622
1620 1623 vfs_list_unlock();
1621 1624
1622 1625 mnt_id = root_id + 1;
1623 1626
1624 1627 /*
1625 1628 * now we can run through what we've extracted without holding
1626 1629 * vfs_list_read_lock()
1627 1630 */
1628 1631 printp = print_head;
1629 1632 while (printp != NULL) {
1630 1633 struct print_data *printp_next;
1631 1634 const char *resource;
1632 1635 char *mntpt;
1633 1636 struct vnode *vp;
1634 1637 int error;
1635 1638
1636 1639 mntpt = (char *)refstr_value(printp->vfs_mntpt);
1637 1640 resource = refstr_value(printp->vfs_resource);
1638 1641
1639 1642 if (mntpt != NULL && mntpt[0] != '\0')
1640 1643 mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
1641 1644 else
1642 1645 mntpt = "-";
1643 1646
1644 1647 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
1645 1648
1646 1649 if (error != 0)
1647 1650 goto nextp;
1648 1651
1649 1652 if (!(vp->v_flag & VROOT)) {
1650 1653 VN_RELE(vp);
1651 1654 goto nextp;
1652 1655 }
1653 1656 VN_RELE(vp);
1654 1657
1655 1658 if (resource != NULL && resource[0] != '\0') {
1656 1659 if (resource[0] == '/') {
1657 1660 resource = ZONE_PATH_VISIBLE(resource, zone) ?
1658 1661 ZONE_PATH_TRANSLATE(resource, zone) : mntpt;
1659 1662 }
1660 1663 } else {
1661 1664 resource = "none";
1662 1665 }
1663 1666
1664 1667 /*
1665 1668 * XXX parent ID is not tracked correctly here. Currently we
1666 1669 * always assume the parent ID is the root ID.
1667 1670 */
1668 1671 lxpr_uiobuf_printf(uiobuf,
1669 1672 "%d %d %d:%d / %s %s - %s %s %s\n",
1670 1673 mnt_id, root_id,
1671 1674 major(printp->vfs_dev), minor(printp->vfs_dev),
1672 1675 mntpt,
1673 1676 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw",
1674 1677 vfssw[printp->vfs_fstype].vsw_name,
1675 1678 resource,
1676 1679 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1677 1680
1678 1681 nextp:
1679 1682 printp_next = printp->next;
1680 1683 refstr_rele(printp->vfs_mntpt);
1681 1684 refstr_rele(printp->vfs_resource);
1682 1685 kmem_free(printp, sizeof (*printp));
1683 1686 printp = printp_next;
1684 1687
1685 1688 mnt_id++;
1686 1689 }
1687 1690 }
1688 1691
1689 1692 /*
1690 1693 * lxpr_read_pid_oom_scr_adj(): read oom_score_adj for process
1691 1694 */
1692 1695 static void
1693 1696 lxpr_read_pid_oom_scr_adj(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1694 1697 {
1695 1698 proc_t *p;
1696 1699
1697 1700 ASSERT(lxpnp->lxpr_type == LXPR_PID_OOM_SCR_ADJ ||
1698 1701 lxpnp->lxpr_type == LXPR_PID_TID_OOM_SCR_ADJ);
1699 1702
1700 1703 p = lxpr_lock(lxpnp->lxpr_pid);
1701 1704 if (p == NULL) {
1702 1705 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1703 1706 return;
1704 1707 }
1705 1708
1706 1709 /* always 0 */
1707 1710 lxpr_uiobuf_printf(uiobuf, "0\n");
1708 1711
1709 1712 lxpr_unlock(p);
1710 1713 }
1711 1714
1712 1715
1713 1716 /*
1714 1717 * lxpr_read_pid_statm(): memory status file
1715 1718 */
1716 1719 static void
1717 1720 lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1718 1721 {
1719 1722 proc_t *p;
1720 1723 struct as *as;
1721 1724 size_t vsize;
1722 1725 size_t rss;
1723 1726
1724 1727 ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM ||
1725 1728 lxpnp->lxpr_type == LXPR_PID_TID_STATM);
1726 1729
1727 1730 p = lxpr_lock(lxpnp->lxpr_pid);
1728 1731 if (p == NULL) {
1729 1732 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1730 1733 return;
1731 1734 }
1732 1735
1733 1736 as = p->p_as;
1734 1737
1735 1738 mutex_exit(&p->p_lock);
1736 1739
1737 1740 AS_LOCK_ENTER(as, RW_READER);
1738 1741 vsize = btopr(as->a_resvsize);
1739 1742 rss = rm_asrss(as);
1740 1743 AS_LOCK_EXIT(as);
1741 1744
1742 1745 mutex_enter(&p->p_lock);
1743 1746 lxpr_unlock(p);
1744 1747
1745 1748 lxpr_uiobuf_printf(uiobuf,
1746 1749 "%lu %lu %lu %lu %lu %lu %lu\n",
1747 1750 vsize, rss, 0l, rss, 0l, 0l, 0l);
1748 1751 }
1749 1752
1750 1753 /*
1751 1754 * Look for either the main thread (lookup_id is 0) or the specified thread.
1752 1755 * If we're looking for the main thread but the proc does not have one, we
1753 1756 * fallback to using prchoose to get any thread available.
1754 1757 */
1755 1758 static kthread_t *
1756 1759 lxpr_get_thread(proc_t *p, uint_t lookup_id)
1757 1760 {
1758 1761 kthread_t *t;
1759 1762 uint_t emul_tid;
1760 1763 lx_lwp_data_t *lwpd;
1761 1764 pid_t pid = p->p_pid;
1762 1765 pid_t init_pid = curproc->p_zone->zone_proc_initpid;
1763 1766 boolean_t branded = (p->p_brand == &lx_brand);
1764 1767
1765 1768 /* get specified thread */
1766 1769 if ((t = p->p_tlist) == NULL)
1767 1770 return (NULL);
1768 1771
1769 1772 do {
1770 1773 if (lookup_id == 0 && t->t_tid == 1) {
1771 1774 thread_lock(t);
1772 1775 return (t);
1773 1776 }
1774 1777
1775 1778 lwpd = ttolxlwp(t);
1776 1779 if (branded && lwpd != NULL) {
1777 1780 if (pid == init_pid && lookup_id == 1) {
1778 1781 emul_tid = t->t_tid;
1779 1782 } else {
1780 1783 emul_tid = lwpd->br_pid;
1781 1784 }
1782 1785 } else {
1783 1786 /*
1784 1787 * Make only the first (assumed to be main) thread
1785 1788 * visible for non-branded processes.
1786 1789 */
1787 1790 emul_tid = p->p_pid;
1788 1791 }
1789 1792 if (emul_tid == lookup_id) {
1790 1793 thread_lock(t);
1791 1794 return (t);
1792 1795 }
1793 1796 } while ((t = t->t_forw) != p->p_tlist);
1794 1797
1795 1798 if (lookup_id == 0)
1796 1799 return (prchoose(p));
1797 1800 return (NULL);
1798 1801 }
1799 1802
1800 1803 /*
1801 1804 * Lookup the real pid for procs 0 or 1.
1802 1805 */
1803 1806 static pid_t
1804 1807 get_real_pid(pid_t p)
1805 1808 {
1806 1809 pid_t find_pid;
1807 1810
1808 1811 if (p == 1) {
1809 1812 find_pid = curproc->p_zone->zone_proc_initpid;
1810 1813 } else if (p == 0) {
1811 1814 find_pid = curproc->p_zone->zone_zsched->p_pid;
1812 1815 } else {
1813 1816 find_pid = p;
1814 1817 }
1815 1818
1816 1819 return (find_pid);
1817 1820 }
1818 1821
1819 1822 /*
1820 1823 * pid/tid common code to read status file
1821 1824 */
1822 1825 static void
1823 1826 lxpr_read_status_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf,
1824 1827 uint_t lookup_id)
1825 1828 {
1826 1829 proc_t *p;
1827 1830 kthread_t *t;
1828 1831 user_t *up;
1829 1832 cred_t *cr;
1830 1833 const gid_t *groups;
1831 1834 int ngroups;
1832 1835 struct as *as;
1833 1836 char *status;
1834 1837 pid_t pid, ppid;
1835 1838 k_sigset_t current, ignore, handle;
1836 1839 int i, lx_sig;
1837 1840 pid_t real_pid;
1838 1841
1839 1842 real_pid = get_real_pid(lxpnp->lxpr_pid);
1840 1843 p = lxpr_lock(real_pid);
1841 1844 if (p == NULL) {
1842 1845 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1843 1846 return;
1844 1847 }
1845 1848
1846 1849 pid = p->p_pid;
1847 1850
1848 1851 /*
1849 1852 * Convert pid to the Linux default of 1 if we're the zone's init
1850 1853 * process or if we're the zone's zsched the pid is 0.
1851 1854 */
1852 1855 if (pid == curproc->p_zone->zone_proc_initpid) {
1853 1856 pid = 1;
1854 1857 ppid = 0; /* parent pid for init is 0 */
1855 1858 } else if (pid == curproc->p_zone->zone_zsched->p_pid) {
1856 1859 pid = 0; /* zsched is pid 0 */
1857 1860 ppid = 0; /* parent pid for zsched is itself */
1858 1861 } else {
1859 1862 /*
1860 1863 * Make sure not to reference parent PIDs that reside outside
1861 1864 * the zone
1862 1865 */
1863 1866 ppid = ((p->p_flag & SZONETOP)
1864 1867 ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
1865 1868
1866 1869 /*
1867 1870 * Convert ppid to the Linux default of 1 if our parent is the
1868 1871 * zone's init process
1869 1872 */
1870 1873 if (ppid == curproc->p_zone->zone_proc_initpid)
1871 1874 ppid = 1;
1872 1875 }
1873 1876
1874 1877 t = lxpr_get_thread(p, lookup_id);
1875 1878 if (t != NULL) {
1876 1879 switch (t->t_state) {
1877 1880 case TS_SLEEP:
1878 1881 status = "S (sleeping)";
1879 1882 break;
1880 1883 case TS_RUN:
1881 1884 case TS_ONPROC:
1882 1885 status = "R (running)";
1883 1886 break;
1884 1887 case TS_ZOMB:
1885 1888 status = "Z (zombie)";
1886 1889 break;
1887 1890 case TS_STOPPED:
1888 1891 status = "T (stopped)";
1889 1892 break;
1890 1893 default:
1891 1894 status = "! (unknown)";
1892 1895 break;
1893 1896 }
1894 1897 thread_unlock(t);
1895 1898 } else {
1896 1899 if (lookup_id != 0) {
1897 1900 /* we can't find this specific thread */
1898 1901 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1899 1902 lxpr_unlock(p);
1900 1903 return;
1901 1904 }
1902 1905
1903 1906 /*
1904 1907 * there is a hole in the exit code, where a proc can have
1905 1908 * no threads but it is yet to be flagged SZOMB. We will
1906 1909 * assume we are about to become a zombie
1907 1910 */
1908 1911 status = "Z (zombie)";
1909 1912 }
1910 1913
1911 1914 up = PTOU(p);
1912 1915 mutex_enter(&p->p_crlock);
1913 1916 crhold(cr = p->p_cred);
1914 1917 mutex_exit(&p->p_crlock);
1915 1918
1916 1919 lxpr_uiobuf_printf(uiobuf,
1917 1920 "Name:\t%s\n"
1918 1921 "State:\t%s\n"
1919 1922 "Tgid:\t%d\n"
1920 1923 "Pid:\t%d\n"
1921 1924 "PPid:\t%d\n"
1922 1925 "TracerPid:\t%d\n"
1923 1926 "Uid:\t%u\t%u\t%u\t%u\n"
1924 1927 "Gid:\t%u\t%u\t%u\t%u\n"
1925 1928 "FDSize:\t%d\n"
1926 1929 "Groups:\t",
1927 1930 up->u_comm,
1928 1931 status,
1929 1932 pid, /* thread group id - same as pid */
1930 1933 (lookup_id == 0) ? pid : lxpnp->lxpr_desc,
1931 1934 ppid,
1932 1935 0,
1933 1936 crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
1934 1937 crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
1935 1938 p->p_fno_ctl);
1936 1939
1937 1940
1938 1941 ngroups = crgetngroups(cr);
1939 1942 groups = crgetgroups(cr);
1940 1943 for (i = 0; i < ngroups; i++) {
1941 1944 lxpr_uiobuf_printf(uiobuf,
1942 1945 "%u ",
1943 1946 groups[i]);
1944 1947 }
1945 1948 crfree(cr);
1946 1949
1947 1950 as = p->p_as;
1948 1951 if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) {
1949 1952 size_t vsize, nlocked, rss;
1950 1953
1951 1954 mutex_exit(&p->p_lock);
1952 1955 AS_LOCK_ENTER(as, RW_READER);
1953 1956 vsize = as->a_resvsize;
1954 1957 rss = rm_asrss(as);
1955 1958 AS_LOCK_EXIT(as);
1956 1959 mutex_enter(&p->p_lock);
1957 1960 nlocked = p->p_locked_mem;
1958 1961
1959 1962 lxpr_uiobuf_printf(uiobuf,
1960 1963 "\n"
1961 1964 "VmSize:\t%8lu kB\n"
1962 1965 "VmLck:\t%8lu kB\n"
1963 1966 "VmRSS:\t%8lu kB\n"
1964 1967 "VmData:\t%8lu kB\n"
1965 1968 "VmStk:\t%8lu kB\n"
1966 1969 "VmExe:\t%8lu kB\n"
1967 1970 "VmLib:\t%8lu kB",
1968 1971 btok(vsize),
1969 1972 btok(nlocked),
1970 1973 ptok(rss),
1971 1974 0l,
1972 1975 btok(p->p_stksize),
1973 1976 ptok(rss),
1974 1977 0l);
1975 1978 }
1976 1979
1977 1980 lxpr_uiobuf_printf(uiobuf, "\nThreads:\t%u", p->p_lwpcnt);
1978 1981
1979 1982 sigemptyset(¤t);
1980 1983 sigemptyset(&ignore);
1981 1984 sigemptyset(&handle);
1982 1985
1983 1986 for (i = 1; i < NSIG; i++) {
1984 1987 lx_sig = stol_signo[i];
1985 1988
1986 1989 if ((lx_sig > 0) && (lx_sig <= LX_NSIG)) {
1987 1990 if (sigismember(&p->p_sig, i))
1988 1991 sigaddset(¤t, lx_sig);
1989 1992
1990 1993 if (up->u_signal[i - 1] == SIG_IGN)
1991 1994 sigaddset(&ignore, lx_sig);
1992 1995 else if (up->u_signal[i - 1] != SIG_DFL)
1993 1996 sigaddset(&handle, lx_sig);
1994 1997 }
1995 1998 }
1996 1999
1997 2000 lxpr_uiobuf_printf(uiobuf,
1998 2001 "\n"
1999 2002 "SigPnd:\t%08x%08x\n"
2000 2003 "SigBlk:\t%08x%08x\n"
2001 2004 "SigIgn:\t%08x%08x\n"
2002 2005 "SigCgt:\t%08x%08x\n"
2003 2006 "CapInh:\t%016x\n"
2004 2007 "CapPrm:\t%016x\n"
2005 2008 "CapEff:\t%016x\n",
2006 2009 current.__sigbits[1], current.__sigbits[0],
2007 2010 0, 0, /* signals blocked on per thread basis */
2008 2011 ignore.__sigbits[1], ignore.__sigbits[0],
2009 2012 handle.__sigbits[1], handle.__sigbits[0],
2010 2013 /* Can't do anything with linux capabilities */
2011 2014 0,
2012 2015 0,
2013 2016 0);
2014 2017
2015 2018 lxpr_uiobuf_printf(uiobuf,
2016 2019 "CapBnd:\t%016llx\n",
2017 2020 /* We report the full capability bounding set */
2018 2021 0x1fffffffffLL);
2019 2022
2020 2023 lxpr_unlock(p);
2021 2024 }
2022 2025
2023 2026 /*
2024 2027 * lxpr_read_pid_status(): status file
2025 2028 */
2026 2029 static void
2027 2030 lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2028 2031 {
2029 2032 ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
2030 2033
2031 2034 lxpr_read_status_common(lxpnp, uiobuf, 0);
2032 2035 }
2033 2036
2034 2037 /*
2035 2038 * lxpr_read_pid_tid_status(): status file
2036 2039 */
2037 2040 static void
2038 2041 lxpr_read_pid_tid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2039 2042 {
2040 2043 ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STATUS);
2041 2044 lxpr_read_status_common(lxpnp, uiobuf, lxpnp->lxpr_desc);
2042 2045 }
2043 2046
2044 2047 /*
2045 2048 * pid/tid common code to read stat file
2046 2049 */
2047 2050 static void
2048 2051 lxpr_read_stat_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf,
2049 2052 uint_t lookup_id)
2050 2053 {
2051 2054 proc_t *p;
2052 2055 kthread_t *t;
2053 2056 struct as *as;
2054 2057 char stat;
2055 2058 pid_t pid, ppid, pgpid, spid;
2056 2059 gid_t psgid;
2057 2060 dev_t psdev;
2058 2061 size_t rss, vsize;
2059 2062 int nice, pri;
2060 2063 caddr_t wchan;
2061 2064 processorid_t cpu;
2062 2065 pid_t real_pid;
2063 2066
2064 2067 real_pid = get_real_pid(lxpnp->lxpr_pid);
2065 2068 p = lxpr_lock(real_pid);
2066 2069 if (p == NULL) {
2067 2070 lxpr_uiobuf_seterr(uiobuf, EINVAL);
2068 2071 return;
2069 2072 }
2070 2073
2071 2074 pid = p->p_pid;
2072 2075
2073 2076 /*
2074 2077 * Set Linux defaults if we're the zone's init process
2075 2078 */
2076 2079 if (pid == curproc->p_zone->zone_proc_initpid) {
2077 2080 pid = 1; /* PID for init */
2078 2081 ppid = 0; /* parent PID for init is 0 */
2079 2082 pgpid = 0; /* process group for init is 0 */
2080 2083 psgid = (gid_t)-1; /* credential GID for init is -1 */
2081 2084 spid = 0; /* session id for init is 0 */
2082 2085 psdev = 0; /* session device for init is 0 */
2083 2086 } else if (pid == curproc->p_zone->zone_zsched->p_pid) {
2084 2087 pid = 0; /* PID for zsched */
2085 2088 ppid = 0; /* parent PID for zsched is 0 */
2086 2089 pgpid = 0; /* process group for zsched is 0 */
2087 2090 psgid = (gid_t)-1; /* credential GID for zsched is -1 */
2088 2091 spid = 0; /* session id for zsched is 0 */
2089 2092 psdev = 0; /* session device for zsched is 0 */
2090 2093 } else {
2091 2094 /*
2092 2095 * Make sure not to reference parent PIDs that reside outside
2093 2096 * the zone
2094 2097 */
2095 2098 ppid = ((p->p_flag & SZONETOP) ?
2096 2099 curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
2097 2100
2098 2101 /*
2099 2102 * Convert ppid to the Linux default of 1 if our parent is the
2100 2103 * zone's init process
2101 2104 */
2102 2105 if (ppid == curproc->p_zone->zone_proc_initpid)
2103 2106 ppid = 1;
2104 2107
2105 2108 pgpid = p->p_pgrp;
2106 2109
2107 2110 mutex_enter(&p->p_splock);
2108 2111 mutex_enter(&p->p_sessp->s_lock);
2109 2112 spid = p->p_sessp->s_sid;
2110 2113 psdev = p->p_sessp->s_dev;
2111 2114 if (p->p_sessp->s_cred)
2112 2115 psgid = crgetgid(p->p_sessp->s_cred);
2113 2116 else
2114 2117 psgid = crgetgid(p->p_cred);
2115 2118
2116 2119 mutex_exit(&p->p_sessp->s_lock);
2117 2120 mutex_exit(&p->p_splock);
2118 2121 }
2119 2122
2120 2123 t = lxpr_get_thread(p, lookup_id);
2121 2124 if (t != NULL) {
2122 2125 switch (t->t_state) {
2123 2126 case TS_SLEEP:
2124 2127 stat = 'S'; break;
2125 2128 case TS_RUN:
2126 2129 case TS_ONPROC:
2127 2130 stat = 'R'; break;
2128 2131 case TS_ZOMB:
2129 2132 stat = 'Z'; break;
2130 2133 case TS_STOPPED:
2131 2134 stat = 'T'; break;
2132 2135 default:
2133 2136 stat = '!'; break;
2134 2137 }
2135 2138
2136 2139 if (CL_DONICE(t, NULL, 0, &nice) != 0)
2137 2140 nice = 0;
2138 2141
2139 2142 pri = t->t_pri;
2140 2143 wchan = t->t_wchan;
2141 2144 cpu = t->t_cpu->cpu_id;
2142 2145 thread_unlock(t);
2143 2146 } else {
2144 2147 if (lookup_id != 0) {
2145 2148 /* we can't find this specific thread */
2146 2149 lxpr_uiobuf_seterr(uiobuf, EINVAL);
2147 2150 lxpr_unlock(p);
2148 2151 return;
2149 2152 }
2150 2153
2151 2154 /* Only zombies have no threads */
2152 2155 stat = 'Z';
2153 2156 nice = 0;
2154 2157 pri = 0;
2155 2158 wchan = 0;
2156 2159 cpu = 0;
2157 2160 }
2158 2161 as = p->p_as;
2159 2162 mutex_exit(&p->p_lock);
2160 2163 AS_LOCK_ENTER(as, RW_READER);
2161 2164 vsize = as->a_resvsize;
2162 2165 rss = rm_asrss(as);
2163 2166 AS_LOCK_EXIT(as);
2164 2167 mutex_enter(&p->p_lock);
2165 2168
2166 2169 lxpr_uiobuf_printf(uiobuf,
2167 2170 "%d (%s) %c %d %d %d %d %d "
2168 2171 "%lu %lu %lu %lu %lu "
2169 2172 "%lu %lu %ld %ld "
2170 2173 "%d %d %d "
2171 2174 "%lu "
2172 2175 "%lu "
2173 2176 "%lu %ld %llu "
2174 2177 "%lu %lu %u "
2175 2178 "%lu %lu "
2176 2179 "%lu %lu %lu %lu "
2177 2180 "%lu "
2178 2181 "%lu %lu "
2179 2182 "%d "
2180 2183 "%d"
2181 2184 "\n",
2182 2185 (lookup_id == 0) ? pid : lxpnp->lxpr_desc,
2183 2186 PTOU(p)->u_comm, stat, ppid, pgpid, spid, psdev, psgid,
2184 2187 0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
2185 2188 p->p_utime, p->p_stime, p->p_cutime, p->p_cstime,
2186 2189 pri, nice, p->p_lwpcnt,
2187 2190 0l, /* itrealvalue (time before next SIGALRM) */
2188 2191 PTOU(p)->u_ticks,
2189 2192 vsize, rss, p->p_vmem_ctl,
2190 2193 0l, 0l, USRSTACK, /* startcode, endcode, startstack */
2191 2194 0l, 0l, /* kstkesp, kstkeip */
2192 2195 0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */
2193 2196 wchan,
2194 2197 0l, 0l, /* nswap, cnswap */
2195 2198 0, /* exit_signal */
2196 2199 cpu);
2197 2200
2198 2201 lxpr_unlock(p);
2199 2202 }
2200 2203
2201 2204 /*
2202 2205 * lxpr_read_pid_stat(): pid stat file
2203 2206 */
2204 2207 static void
2205 2208 lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2206 2209 {
2207 2210 ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
2208 2211
2209 2212 lxpr_read_stat_common(lxpnp, uiobuf, 0);
2210 2213 }
2211 2214
2212 2215 /*
2213 2216 * lxpr_read_pid_tid_stat(): pid stat file
2214 2217 */
2215 2218 static void
2216 2219 lxpr_read_pid_tid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2217 2220 {
2218 2221 ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STAT);
2219 2222 lxpr_read_stat_common(lxpnp, uiobuf, lxpnp->lxpr_desc);
2220 2223 }
2221 2224
2222 2225 /* ARGSUSED */
2223 2226 static void
2224 2227 lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2225 2228 {
2226 2229 }
2227 2230
2228 2231 struct lxpr_ifstat {
2229 2232 uint64_t rx_bytes;
2230 2233 uint64_t rx_packets;
2231 2234 uint64_t rx_errors;
2232 2235 uint64_t rx_drop;
2233 2236 uint64_t tx_bytes;
2234 2237 uint64_t tx_packets;
2235 2238 uint64_t tx_errors;
2236 2239 uint64_t tx_drop;
2237 2240 uint64_t collisions;
2238 2241 uint64_t rx_multicast;
2239 2242 };
2240 2243
2241 2244 static void *
2242 2245 lxpr_kstat_read(kstat_t *kn, boolean_t byname, size_t *size, int *num)
2243 2246 {
2244 2247 kstat_t *kp;
2245 2248 int i, nrec = 0;
2246 2249 size_t bufsize;
2247 2250 void *buf = NULL;
2248 2251
2249 2252 if (byname == B_TRUE) {
2250 2253 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance,
2251 2254 kn->ks_name, getzoneid());
2252 2255 } else {
2253 2256 kp = kstat_hold_bykid(kn->ks_kid, getzoneid());
2254 2257 }
2255 2258 if (kp == NULL) {
2256 2259 return (NULL);
2257 2260 }
2258 2261 if (kp->ks_flags & KSTAT_FLAG_INVALID) {
2259 2262 kstat_rele(kp);
2260 2263 return (NULL);
2261 2264 }
2262 2265
2263 2266 bufsize = kp->ks_data_size + 1;
2264 2267 kstat_rele(kp);
2265 2268
2266 2269 /*
2267 2270 * The kstat in question is released so that kmem_alloc(KM_SLEEP) is
2268 2271 * performed without it held. After the alloc, the kstat is reacquired
2269 2272 * and its size is checked again. If the buffer is no longer large
2270 2273 * enough, the alloc and check are repeated up to three times.
2271 2274 */
2272 2275 for (i = 0; i < 2; i++) {
2273 2276 buf = kmem_alloc(bufsize, KM_SLEEP);
2274 2277
2275 2278 /* Check if bufsize still appropriate */
2276 2279 if (byname == B_TRUE) {
2277 2280 kp = kstat_hold_byname(kn->ks_module, kn->ks_instance,
2278 2281 kn->ks_name, getzoneid());
2279 2282 } else {
2280 2283 kp = kstat_hold_bykid(kn->ks_kid, getzoneid());
2281 2284 }
2282 2285 if (kp == NULL || kp->ks_flags & KSTAT_FLAG_INVALID) {
2283 2286 if (kp != NULL) {
2284 2287 kstat_rele(kp);
2285 2288 }
2286 2289 kmem_free(buf, bufsize);
2287 2290 return (NULL);
2288 2291 }
2289 2292 KSTAT_ENTER(kp);
2290 2293 (void) KSTAT_UPDATE(kp, KSTAT_READ);
2291 2294 if (bufsize < kp->ks_data_size) {
2292 2295 kmem_free(buf, bufsize);
2293 2296 buf = NULL;
2294 2297 bufsize = kp->ks_data_size + 1;
2295 2298 KSTAT_EXIT(kp);
2296 2299 kstat_rele(kp);
2297 2300 continue;
2298 2301 } else {
2299 2302 if (KSTAT_SNAPSHOT(kp, buf, KSTAT_READ) != 0) {
2300 2303 kmem_free(buf, bufsize);
2301 2304 buf = NULL;
2302 2305 }
2303 2306 nrec = kp->ks_ndata;
2304 2307 KSTAT_EXIT(kp);
2305 2308 kstat_rele(kp);
2306 2309 break;
2307 2310 }
2308 2311 }
2309 2312
2310 2313 if (buf != NULL) {
2311 2314 *size = bufsize;
2312 2315 *num = nrec;
2313 2316 }
2314 2317 return (buf);
2315 2318 }
2316 2319
2317 2320 static int
2318 2321 lxpr_kstat_ifstat(kstat_t *kn, struct lxpr_ifstat *ifs)
2319 2322 {
2320 2323 kstat_named_t *kp;
2321 2324 int i, num;
2322 2325 size_t size;
2323 2326
2324 2327 /*
2325 2328 * Search by name instead of by kid since there's a small window to
2326 2329 * race against kstats being added/removed.
2327 2330 */
2328 2331 bzero(ifs, sizeof (*ifs));
2329 2332 kp = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num);
2330 2333 if (kp == NULL)
2331 2334 return (-1);
2332 2335 for (i = 0; i < num; i++) {
2333 2336 if (strncmp(kp[i].name, "rbytes64", KSTAT_STRLEN) == 0)
2334 2337 ifs->rx_bytes = kp[i].value.ui64;
2335 2338 else if (strncmp(kp[i].name, "ipackets64", KSTAT_STRLEN) == 0)
2336 2339 ifs->rx_packets = kp[i].value.ui64;
2337 2340 else if (strncmp(kp[i].name, "ierrors", KSTAT_STRLEN) == 0)
2338 2341 ifs->rx_errors = kp[i].value.ui32;
2339 2342 else if (strncmp(kp[i].name, "norcvbuf", KSTAT_STRLEN) == 0)
2340 2343 ifs->rx_drop = kp[i].value.ui32;
2341 2344 else if (strncmp(kp[i].name, "multircv", KSTAT_STRLEN) == 0)
2342 2345 ifs->rx_multicast = kp[i].value.ui32;
2343 2346 else if (strncmp(kp[i].name, "obytes64", KSTAT_STRLEN) == 0)
2344 2347 ifs->tx_bytes = kp[i].value.ui64;
2345 2348 else if (strncmp(kp[i].name, "opackets64", KSTAT_STRLEN) == 0)
2346 2349 ifs->tx_packets = kp[i].value.ui64;
2347 2350 else if (strncmp(kp[i].name, "oerrors", KSTAT_STRLEN) == 0)
2348 2351 ifs->tx_errors = kp[i].value.ui32;
2349 2352 else if (strncmp(kp[i].name, "noxmtbuf", KSTAT_STRLEN) == 0)
2350 2353 ifs->tx_drop = kp[i].value.ui32;
2351 2354 else if (strncmp(kp[i].name, "collisions", KSTAT_STRLEN) == 0)
2352 2355 ifs->collisions = kp[i].value.ui32;
2353 2356 }
2354 2357 kmem_free(kp, size);
2355 2358 return (0);
2356 2359 }
2357 2360
2358 2361 /* ARGSUSED */
2359 2362 static void
2360 2363 lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2361 2364 {
2362 2365 kstat_t *ksr;
2363 2366 kstat_t ks0;
2364 2367 int i, nidx;
2365 2368 size_t sidx;
2366 2369 struct lxpr_ifstat ifs;
2367 2370
2368 2371 lxpr_uiobuf_printf(uiobuf, "Inter-| Receive "
2369 2372 " | Transmit\n");
2370 2373 lxpr_uiobuf_printf(uiobuf, " face |bytes packets errs drop fifo"
2371 2374 " frame compressed multicast|bytes packets errs drop fifo"
2372 2375 " colls carrier compressed\n");
2373 2376
2374 2377 ks0.ks_kid = 0;
2375 2378 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
2376 2379 if (ksr == NULL)
2377 2380 return;
2378 2381
2379 2382 for (i = 1; i < nidx; i++) {
2380 2383 if (strncmp(ksr[i].ks_module, "link", KSTAT_STRLEN) == 0 ||
2381 2384 strncmp(ksr[i].ks_module, "lo", KSTAT_STRLEN) == 0) {
2382 2385 if (lxpr_kstat_ifstat(&ksr[i], &ifs) != 0)
2383 2386 continue;
2384 2387
2385 2388 /* Overwriting the name is ok in the local snapshot */
2386 2389 lx_ifname_convert(ksr[i].ks_name, LX_IF_FROMNATIVE);
2387 2390 lxpr_uiobuf_printf(uiobuf, "%6s: %7llu %7llu %4lu "
2388 2391 "%4lu %4u %5u %10u %9lu %8llu %7llu %4lu %4lu %4u "
2389 2392 "%5lu %7u %10u\n",
2390 2393 ksr[i].ks_name,
2391 2394 ifs.rx_bytes, ifs.rx_packets,
2392 2395 ifs.rx_errors, ifs.rx_drop,
2393 2396 0, 0, 0, ifs.rx_multicast,
2394 2397 ifs.tx_bytes, ifs.tx_packets,
2395 2398 ifs.tx_errors, ifs.tx_drop,
2396 2399 0, ifs.collisions, 0, 0);
2397 2400 }
2398 2401 }
2399 2402
2400 2403 kmem_free(ksr, sidx);
2401 2404 }
2402 2405
2403 2406 /* ARGSUSED */
2404 2407 static void
2405 2408 lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2406 2409 {
2407 2410 }
2408 2411
2409 2412 static void
2410 2413 lxpr_inet6_out(const in6_addr_t *addr, char buf[33])
2411 2414 {
2412 2415 const uint8_t *ip = addr->s6_addr;
2413 2416 char digits[] = "0123456789abcdef";
2414 2417 int i;
2415 2418 for (i = 0; i < 16; i++) {
2416 2419 buf[2 * i] = digits[ip[i] >> 4];
2417 2420 buf[2 * i + 1] = digits[ip[i] & 0xf];
2418 2421 }
2419 2422 buf[32] = '\0';
2420 2423 }
2421 2424
2422 2425 /* ARGSUSED */
2423 2426 static void
2424 2427 lxpr_read_net_if_inet6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2425 2428 {
2426 2429 netstack_t *ns;
2427 2430 ip_stack_t *ipst;
2428 2431 ill_t *ill;
2429 2432 ipif_t *ipif;
2430 2433 ill_walk_context_t ctx;
2431 2434 char ifname[LIFNAMSIZ], ip6out[33];
2432 2435
2433 2436 ns = netstack_get_current();
2434 2437 if (ns == NULL)
2435 2438 return;
2436 2439 ipst = ns->netstack_ip;
2437 2440
2438 2441 rw_enter(&ipst->ips_ill_g_lock, RW_READER);
2439 2442 ill = ILL_START_WALK_V6(&ctx, ipst);
2440 2443
2441 2444 for (; ill != NULL; ill = ill_next(&ctx, ill)) {
2442 2445 for (ipif = ill->ill_ipif; ipif != NULL;
2443 2446 ipif = ipif->ipif_next) {
2444 2447 uint_t index = ill->ill_phyint->phyint_ifindex;
2445 2448 int plen = ip_mask_to_plen_v6(&ipif->ipif_v6net_mask);
2446 2449 unsigned int scope = lx_ipv6_scope_convert(
2447 2450 &ipif->ipif_v6lcl_addr);
2448 2451 /* Always report PERMANENT flag */
2449 2452 int flag = 0x80;
2450 2453
2451 2454 (void) snprintf(ifname, LIFNAMSIZ, "%s", ill->ill_name);
2452 2455 lx_ifname_convert(ifname, LX_IF_FROMNATIVE);
2453 2456 lxpr_inet6_out(&ipif->ipif_v6lcl_addr, ip6out);
2454 2457
2455 2458 lxpr_uiobuf_printf(uiobuf, "%32s %02x %02x %02x %02x"
2456 2459 " %8s\n", ip6out, index, plen, scope, flag, ifname);
2457 2460 }
2458 2461 }
2459 2462 rw_exit(&ipst->ips_ill_g_lock);
2460 2463 netstack_rele(ns);
2461 2464 }
2462 2465
2463 2466 /* ARGSUSED */
2464 2467 static void
2465 2468 lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2466 2469 {
2467 2470 }
2468 2471
2469 2472 /* ARGSUSED */
2470 2473 static void
2471 2474 lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2472 2475 {
2473 2476 }
2474 2477
2475 2478 /* ARGSUSED */
2476 2479 static void
2477 2480 lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2478 2481 {
2479 2482 }
2480 2483
2481 2484 static void
2482 2485 lxpr_format_route_ipv6(ire_t *ire, lxpr_uiobuf_t *uiobuf)
2483 2486 {
2484 2487 uint32_t flags;
2485 2488 char name[IFNAMSIZ];
2486 2489 char ipv6addr[33];
2487 2490
2488 2491 lxpr_inet6_out(&ire->ire_addr_v6, ipv6addr);
2489 2492 lxpr_uiobuf_printf(uiobuf, "%s %02x ", ipv6addr,
2490 2493 ip_mask_to_plen_v6(&ire->ire_mask_v6));
2491 2494
2492 2495 /* punt on this for now */
2493 2496 lxpr_uiobuf_printf(uiobuf, "%s %02x ",
2494 2497 "00000000000000000000000000000000", 0);
2495 2498
2496 2499 lxpr_inet6_out(&ire->ire_gateway_addr_v6, ipv6addr);
2497 2500 lxpr_uiobuf_printf(uiobuf, "%s", ipv6addr);
2498 2501
2499 2502 flags = ire->ire_flags &
2500 2503 (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
2501 2504 /* Linux's RTF_LOCAL equivalent */
2502 2505 if (ire->ire_metrics.iulp_local)
2503 2506 flags |= 0x80000000;
2504 2507
2505 2508 if (ire->ire_ill != NULL) {
2506 2509 ill_get_name(ire->ire_ill, name, sizeof (name));
2507 2510 lx_ifname_convert(name, LX_IF_FROMNATIVE);
2508 2511 } else {
2509 2512 name[0] = '\0';
2510 2513 }
2511 2514
2512 2515 lxpr_uiobuf_printf(uiobuf, " %08x %08x %08x %08x %8s\n",
2513 2516 0, /* metric */
2514 2517 ire->ire_refcnt,
2515 2518 0,
2516 2519 flags,
2517 2520 name);
2518 2521 }
2519 2522
2520 2523 /* ARGSUSED */
2521 2524 static void
2522 2525 lxpr_read_net_ipv6_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2523 2526 {
2524 2527 netstack_t *ns;
2525 2528 ip_stack_t *ipst;
2526 2529
2527 2530 ns = netstack_get_current();
2528 2531 if (ns == NULL)
2529 2532 return;
2530 2533 ipst = ns->netstack_ip;
2531 2534
2532 2535 /*
2533 2536 * LX branded zones are expected to have exclusive IP stack, hence
2534 2537 * using ALL_ZONES as the zoneid filter.
2535 2538 */
2536 2539 ire_walk_v6(&lxpr_format_route_ipv6, uiobuf, ALL_ZONES, ipst);
2537 2540
2538 2541 netstack_rele(ns);
2539 2542 }
2540 2543
2541 2544 /* ARGSUSED */
2542 2545 static void
2543 2546 lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2544 2547 {
2545 2548 }
2546 2549
2547 2550 /* ARGSUSED */
2548 2551 static void
2549 2552 lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2550 2553 {
2551 2554 }
2552 2555
2553 2556 /* ARGSUSED */
2554 2557 static void
2555 2558 lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2556 2559 {
2557 2560 }
2558 2561
2559 2562 #define LXPR_SKIP_ROUTE(type) \
2560 2563 (((IRE_IF_CLONE | IRE_BROADCAST | IRE_MULTICAST | \
2561 2564 IRE_NOROUTE | IRE_LOOPBACK | IRE_LOCAL) & type) != 0)
2562 2565
2563 2566 static void
2564 2567 lxpr_format_route_ipv4(ire_t *ire, lxpr_uiobuf_t *uiobuf)
2565 2568 {
2566 2569 uint32_t flags;
2567 2570 char name[IFNAMSIZ];
2568 2571 ill_t *ill;
2569 2572 ire_t *nire;
2570 2573 ipif_t *ipif;
2571 2574 ipaddr_t gateway;
2572 2575
2573 2576 if (LXPR_SKIP_ROUTE(ire->ire_type) || ire->ire_testhidden != 0)
2574 2577 return;
2575 2578
2576 2579 /* These route flags have direct Linux equivalents */
2577 2580 flags = ire->ire_flags &
2578 2581 (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
2579 2582
2580 2583 /*
2581 2584 * Search for a suitable IRE for naming purposes.
2582 2585 * On Linux, the default route is typically associated with the
2583 2586 * interface used to access gateway. The default IRE on Illumos
2584 2587 * typically lacks an ill reference but its parent might have one.
2585 2588 */
2586 2589 nire = ire;
2587 2590 do {
2588 2591 ill = nire->ire_ill;
2589 2592 nire = nire->ire_dep_parent;
2590 2593 } while (ill == NULL && nire != NULL);
2591 2594 if (ill != NULL) {
2592 2595 ill_get_name(ill, name, sizeof (name));
2593 2596 lx_ifname_convert(name, LX_IF_FROMNATIVE);
2594 2597 } else {
2595 2598 name[0] = '*';
2596 2599 name[1] = '\0';
2597 2600 }
2598 2601
2599 2602 /*
2600 2603 * Linux suppresses the gateway address for directly connected
2601 2604 * interface networks. To emulate this behavior, we walk all addresses
2602 2605 * of a given route interface. If one matches the gateway, it is
2603 2606 * displayed as NULL.
2604 2607 */
2605 2608 gateway = ire->ire_gateway_addr;
2606 2609 if ((ill = ire->ire_ill) != NULL) {
2607 2610 for (ipif = ill->ill_ipif; ipif != NULL;
2608 2611 ipif = ipif->ipif_next) {
2609 2612 if (ipif->ipif_lcl_addr == gateway) {
2610 2613 gateway = 0;
2611 2614 break;
2612 2615 }
2613 2616 }
2614 2617 }
2615 2618
2616 2619 lxpr_uiobuf_printf(uiobuf, "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2617 2620 "%d\t%08X\t%d\t%u\t%u\n",
2618 2621 name,
2619 2622 ire->ire_addr,
2620 2623 gateway,
2621 2624 flags, 0, 0,
2622 2625 0, /* priority */
2623 2626 ire->ire_mask,
2624 2627 0, 0, /* mss, window */
2625 2628 ire->ire_metrics.iulp_rtt);
2626 2629 }
2627 2630
2628 2631 /* ARGSUSED */
2629 2632 static void
2630 2633 lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2631 2634 {
2632 2635 netstack_t *ns;
2633 2636 ip_stack_t *ipst;
2634 2637
2635 2638 lxpr_uiobuf_printf(uiobuf, "Iface\tDestination\tGateway \tFlags\t"
2636 2639 "RefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n");
2637 2640
2638 2641 ns = netstack_get_current();
2639 2642 if (ns == NULL)
2640 2643 return;
2641 2644 ipst = ns->netstack_ip;
2642 2645
2643 2646 /*
2644 2647 * LX branded zones are expected to have exclusive IP stack, hence
2645 2648 * using ALL_ZONES as the zoneid filter.
2646 2649 */
2647 2650 ire_walk_v4(&lxpr_format_route_ipv4, uiobuf, ALL_ZONES, ipst);
2648 2651
2649 2652 netstack_rele(ns);
2650 2653 }
2651 2654
2652 2655 /* ARGSUSED */
2653 2656 static void
2654 2657 lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2655 2658 {
2656 2659 }
2657 2660
2658 2661 /* ARGSUSED */
2659 2662 static void
2660 2663 lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2661 2664 {
2662 2665 }
2663 2666
2664 2667 /* ARGSUSED */
2665 2668 static void
2666 2669 lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2667 2670 {
2668 2671 }
2669 2672
2670 2673 typedef struct lxpr_snmp_table {
2671 2674 const char *lst_proto;
2672 2675 const char *lst_fields[];
2673 2676 } lxpr_snmp_table_t;
2674 2677
2675 2678 static lxpr_snmp_table_t lxpr_snmp_ip = { "ip",
2676 2679 {
2677 2680 "forwarding", "defaultTTL", "inReceives", "inHdrErrors",
2678 2681 "inAddrErrors", "forwDatagrams", "inUnknownProtos", "inDiscards",
2679 2682 "inDelivers", "outRequests", "outDiscards", "outNoRoutes",
2680 2683 "reasmTimeout", "reasmReqds", "reasmOKs", "reasmFails", "fragOKs",
2681 2684 "fragFails", "fragCreates",
2682 2685 NULL
2683 2686 }
2684 2687 };
2685 2688 static lxpr_snmp_table_t lxpr_snmp_icmp = { "icmp",
2686 2689 {
2687 2690 "inMsgs", "inErrors", "inCsumErrors", "inDestUnreachs", "inTimeExcds",
2688 2691 "inParmProbs", "inSrcQuenchs", "inRedirects", "inEchos", "inEchoReps",
2689 2692 "inTimestamps", "inTimestampReps", "inAddrMasks", "inAddrMaskReps",
2690 2693 "outMsgs", "outErrors", "outDestUnreachs", "outTimeExcds",
2691 2694 "outParmProbs", "outSrcQuenchs", "outRedirects", "outEchos",
2692 2695 "outEchoReps", "outTimestamps", "outTimestampReps", "outAddrMasks",
2693 2696 "outAddrMaskReps",
2694 2697 NULL
2695 2698 }
2696 2699 };
2697 2700 static lxpr_snmp_table_t lxpr_snmp_tcp = { "tcp",
2698 2701 {
2699 2702 "rtoAlgorithm", "rtoMin", "rtoMax", "maxConn", "activeOpens",
2700 2703 "passiveOpens", "attemptFails", "estabResets", "currEstab", "inSegs",
2701 2704 "outSegs", "retransSegs", "inErrs", "outRsts", "inCsumErrors",
2702 2705 NULL
2703 2706 }
2704 2707 };
2705 2708 static lxpr_snmp_table_t lxpr_snmp_udp = { "udp",
2706 2709 {
2707 2710 "inDatagrams", "noPorts", "inErrors", "outDatagrams", "rcvbufErrors",
2708 2711 "sndbufErrors", "inCsumErrors",
2709 2712 NULL
2710 2713 }
2711 2714 };
2712 2715
2713 2716 static lxpr_snmp_table_t *lxpr_net_snmptab[] = {
2714 2717 &lxpr_snmp_ip,
2715 2718 &lxpr_snmp_icmp,
2716 2719 &lxpr_snmp_tcp,
2717 2720 &lxpr_snmp_udp,
2718 2721 NULL
2719 2722 };
2720 2723
2721 2724 static void
2722 2725 lxpr_kstat_print_tab(lxpr_uiobuf_t *uiobuf, lxpr_snmp_table_t *table,
2723 2726 kstat_t *kn)
2724 2727 {
2725 2728 kstat_named_t *klist;
2726 2729 char upname[KSTAT_STRLEN], upfield[KSTAT_STRLEN];
2727 2730 int i, j, num;
2728 2731 size_t size;
2729 2732
2730 2733 klist = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num);
2731 2734 if (klist == NULL)
2732 2735 return;
2733 2736
2734 2737 /* Print the header line, fields capitalized */
2735 2738 (void) strncpy(upname, table->lst_proto, KSTAT_STRLEN);
2736 2739 upname[0] = toupper(upname[0]);
2737 2740 lxpr_uiobuf_printf(uiobuf, "%s:", upname);
2738 2741 for (i = 0; table->lst_fields[i] != NULL; i++) {
2739 2742 (void) strncpy(upfield, table->lst_fields[i], KSTAT_STRLEN);
2740 2743 upfield[0] = toupper(upfield[0]);
2741 2744 lxpr_uiobuf_printf(uiobuf, " %s", upfield);
2742 2745 }
2743 2746 lxpr_uiobuf_printf(uiobuf, "\n%s:", upname);
2744 2747
2745 2748 /* Then loop back through to print the value line. */
2746 2749 for (i = 0; table->lst_fields[i] != NULL; i++) {
2747 2750 kstat_named_t *kpoint = NULL;
2748 2751 for (j = 0; j < num; j++) {
2749 2752 if (strncmp(klist[j].name, table->lst_fields[i],
2750 2753 KSTAT_STRLEN) == 0) {
2751 2754 kpoint = &klist[j];
2752 2755 break;
2753 2756 }
2754 2757 }
2755 2758 if (kpoint == NULL) {
2756 2759 /* Output 0 for unknown fields */
2757 2760 lxpr_uiobuf_printf(uiobuf, " 0");
2758 2761 } else {
2759 2762 switch (kpoint->data_type) {
2760 2763 case KSTAT_DATA_INT32:
2761 2764 lxpr_uiobuf_printf(uiobuf, " %d",
2762 2765 kpoint->value.i32);
2763 2766 break;
2764 2767 case KSTAT_DATA_UINT32:
2765 2768 lxpr_uiobuf_printf(uiobuf, " %u",
2766 2769 kpoint->value.ui32);
2767 2770 break;
2768 2771 case KSTAT_DATA_INT64:
2769 2772 lxpr_uiobuf_printf(uiobuf, " %ld",
2770 2773 kpoint->value.l);
2771 2774 break;
2772 2775 case KSTAT_DATA_UINT64:
2773 2776 lxpr_uiobuf_printf(uiobuf, " %lu",
2774 2777 kpoint->value.ul);
2775 2778 break;
2776 2779 }
2777 2780 }
2778 2781 }
2779 2782 lxpr_uiobuf_printf(uiobuf, "\n");
2780 2783 kmem_free(klist, size);
2781 2784 }
2782 2785
2783 2786 /* ARGSUSED */
2784 2787 static void
2785 2788 lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2786 2789 {
2787 2790 kstat_t *ksr;
2788 2791 kstat_t ks0;
2789 2792 lxpr_snmp_table_t **table = lxpr_net_snmptab;
2790 2793 int i, t, nidx;
2791 2794 size_t sidx;
2792 2795
2793 2796 ks0.ks_kid = 0;
2794 2797 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
2795 2798 if (ksr == NULL)
2796 2799 return;
2797 2800
2798 2801 for (t = 0; table[t] != NULL; t++) {
2799 2802 for (i = 0; i < nidx; i++) {
2800 2803 if (strncmp(ksr[i].ks_class, "mib2", KSTAT_STRLEN) != 0)
2801 2804 continue;
2802 2805 if (strncmp(ksr[i].ks_name, table[t]->lst_proto,
2803 2806 KSTAT_STRLEN) == 0) {
2804 2807 lxpr_kstat_print_tab(uiobuf, table[t], &ksr[i]);
2805 2808 break;
2806 2809 }
2807 2810 }
2808 2811 }
2809 2812 kmem_free(ksr, sidx);
2810 2813 }
2811 2814
2812 2815 /* ARGSUSED */
2813 2816 static void
2814 2817 lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2815 2818 {
2816 2819 }
2817 2820
2818 2821 static int
2819 2822 lxpr_convert_tcp_state(int st)
2820 2823 {
2821 2824 /*
2822 2825 * Derived from the enum located in the Linux kernel sources:
2823 2826 * include/net/tcp_states.h
2824 2827 */
2825 2828 switch (st) {
2826 2829 case TCPS_ESTABLISHED:
2827 2830 return (1);
2828 2831 case TCPS_SYN_SENT:
2829 2832 return (2);
2830 2833 case TCPS_SYN_RCVD:
2831 2834 return (3);
2832 2835 case TCPS_FIN_WAIT_1:
2833 2836 return (4);
2834 2837 case TCPS_FIN_WAIT_2:
2835 2838 return (5);
2836 2839 case TCPS_TIME_WAIT:
2837 2840 return (6);
2838 2841 case TCPS_CLOSED:
2839 2842 return (7);
2840 2843 case TCPS_CLOSE_WAIT:
2841 2844 return (8);
2842 2845 case TCPS_LAST_ACK:
2843 2846 return (9);
2844 2847 case TCPS_LISTEN:
2845 2848 return (10);
2846 2849 case TCPS_CLOSING:
2847 2850 return (11);
2848 2851 default:
2849 2852 /* No translation for TCPS_IDLE, TCPS_BOUND or anything else */
2850 2853 return (0);
2851 2854 }
2852 2855 }
2853 2856
2854 2857 static void
2855 2858 lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
2856 2859 {
2857 2860 int i, sl = 0;
2858 2861 connf_t *connfp;
2859 2862 conn_t *connp;
2860 2863 netstack_t *ns;
2861 2864 ip_stack_t *ipst;
2862 2865
2863 2866 ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION);
2864 2867 if (ipver == IPV4_VERSION) {
2865 2868 lxpr_uiobuf_printf(uiobuf, " sl local_address rem_address "
2866 2869 "st tx_queue rx_queue tr tm->when retrnsmt uid timeout "
2867 2870 "inode\n");
2868 2871 } else {
2869 2872 lxpr_uiobuf_printf(uiobuf, " sl "
2870 2873 "local_address "
2871 2874 "remote_address "
2872 2875 "st tx_queue rx_queue tr tm->when retrnsmt "
2873 2876 "uid timeout inode\n");
2874 2877 }
2875 2878 /*
2876 2879 * Due to differences between the Linux and illumos TCP
2877 2880 * implementations, some data will be omitted from the output here.
2878 2881 *
2879 2882 * Valid fields:
2880 2883 * - local_address
2881 2884 * - remote_address
2882 2885 * - st
2883 2886 * - tx_queue
2884 2887 * - rx_queue
2885 2888 * - uid
2886 2889 * - inode
2887 2890 *
2888 2891 * Omitted/invalid fields
2889 2892 * - tr
2890 2893 * - tm->when
2891 2894 * - retrnsmt
2892 2895 * - timeout
2893 2896 */
2894 2897
2895 2898 ns = netstack_get_current();
2896 2899 if (ns == NULL)
2897 2900 return;
2898 2901 ipst = ns->netstack_ip;
2899 2902
2900 2903 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2901 2904 connfp = &ipst->ips_ipcl_globalhash_fanout[i];
2902 2905 connp = NULL;
2903 2906 while ((connp =
2904 2907 ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) {
2905 2908 tcp_t *tcp;
2906 2909 vattr_t attr;
2907 2910 sonode_t *so = (sonode_t *)connp->conn_upper_handle;
2908 2911 vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
2909 2912 if (connp->conn_ipversion != ipver)
2910 2913 continue;
2911 2914 tcp = connp->conn_tcp;
2912 2915 if (ipver == IPV4_VERSION) {
2913 2916 lxpr_uiobuf_printf(uiobuf,
2914 2917 "%4d: %08X:%04X %08X:%04X ",
2915 2918 ++sl,
2916 2919 connp->conn_laddr_v4,
2917 2920 ntohs(connp->conn_lport),
2918 2921 connp->conn_faddr_v4,
2919 2922 ntohs(connp->conn_fport));
2920 2923 } else {
2921 2924 lxpr_uiobuf_printf(uiobuf, "%4d: "
2922 2925 "%08X%08X%08X%08X:%04X "
2923 2926 "%08X%08X%08X%08X:%04X ",
2924 2927 ++sl,
2925 2928 connp->conn_laddr_v6.s6_addr32[0],
2926 2929 connp->conn_laddr_v6.s6_addr32[1],
2927 2930 connp->conn_laddr_v6.s6_addr32[2],
2928 2931 connp->conn_laddr_v6.s6_addr32[3],
2929 2932 ntohs(connp->conn_lport),
2930 2933 connp->conn_faddr_v6.s6_addr32[0],
2931 2934 connp->conn_faddr_v6.s6_addr32[1],
2932 2935 connp->conn_faddr_v6.s6_addr32[2],
2933 2936 connp->conn_faddr_v6.s6_addr32[3],
2934 2937 ntohs(connp->conn_fport));
2935 2938 }
2936 2939
2937 2940 /* fetch the simulated inode for the socket */
2938 2941 if (vp == NULL ||
2939 2942 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
2940 2943 attr.va_nodeid = 0;
2941 2944
2942 2945 lxpr_uiobuf_printf(uiobuf,
2943 2946 "%02X %08X:%08X %02X:%08X %08X "
2944 2947 "%5u %8d %lu %d %p %u %u %u %u %d\n",
2945 2948 lxpr_convert_tcp_state(tcp->tcp_state),
2946 2949 tcp->tcp_rcv_cnt, tcp->tcp_unsent, /* rx/tx queue */
2947 2950 0, 0, /* tr, when */
2948 2951 0, /* per-connection rexmits aren't tracked today */
2949 2952 connp->conn_cred->cr_uid,
2950 2953 0, /* timeout */
2951 2954 /* inode + more */
2952 2955 (ino_t)attr.va_nodeid, 0, NULL, 0, 0, 0, 0, 0);
2953 2956 }
2954 2957 }
2955 2958 netstack_rele(ns);
2956 2959 }
2957 2960
2958 2961 /* ARGSUSED */
2959 2962 static void
2960 2963 lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2961 2964 {
2962 2965 lxpr_format_tcp(uiobuf, IPV4_VERSION);
2963 2966 }
2964 2967
2965 2968 /* ARGSUSED */
2966 2969 static void
2967 2970 lxpr_read_net_tcp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2968 2971 {
2969 2972 lxpr_format_tcp(uiobuf, IPV6_VERSION);
2970 2973 }
2971 2974
2972 2975 static void
2973 2976 lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
2974 2977 {
2975 2978 int i, sl = 0;
2976 2979 connf_t *connfp;
2977 2980 conn_t *connp;
2978 2981 netstack_t *ns;
2979 2982 ip_stack_t *ipst;
2980 2983
2981 2984 ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION);
2982 2985 if (ipver == IPV4_VERSION) {
2983 2986 lxpr_uiobuf_printf(uiobuf, " sl local_address rem_address"
2984 2987 " st tx_queue rx_queue tr tm->when retrnsmt uid"
2985 2988 " timeout inode ref pointer drops\n");
2986 2989 } else {
2987 2990 lxpr_uiobuf_printf(uiobuf, " sl "
2988 2991 "local_address "
2989 2992 "remote_address "
2990 2993 "st tx_queue rx_queue tr tm->when retrnsmt "
2991 2994 "uid timeout inode ref pointer drops\n");
2992 2995 }
2993 2996 /*
2994 2997 * Due to differences between the Linux and illumos UDP
2995 2998 * implementations, some data will be omitted from the output here.
2996 2999 *
2997 3000 * Valid fields:
2998 3001 * - local_address
2999 3002 * - remote_address
3000 3003 * - st: limited
3001 3004 * - uid
3002 3005 *
3003 3006 * Omitted/invalid fields
3004 3007 * - tx_queue
3005 3008 * - rx_queue
3006 3009 * - tr
3007 3010 * - tm->when
3008 3011 * - retrnsmt
3009 3012 * - timeout
3010 3013 * - inode
3011 3014 */
3012 3015
3013 3016 ns = netstack_get_current();
3014 3017 if (ns == NULL)
3015 3018 return;
3016 3019 ipst = ns->netstack_ip;
3017 3020
3018 3021 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
3019 3022 connfp = &ipst->ips_ipcl_globalhash_fanout[i];
3020 3023 connp = NULL;
3021 3024 while ((connp =
3022 3025 ipcl_get_next_conn(connfp, connp, IPCL_UDPCONN)) != NULL) {
3023 3026 udp_t *udp;
3024 3027 int state = 0;
3025 3028 vattr_t attr;
3026 3029 sonode_t *so = (sonode_t *)connp->conn_upper_handle;
3027 3030 vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
3028 3031 if (connp->conn_ipversion != ipver)
3029 3032 continue;
3030 3033 udp = connp->conn_udp;
3031 3034 if (ipver == IPV4_VERSION) {
3032 3035 lxpr_uiobuf_printf(uiobuf,
3033 3036 "%4d: %08X:%04X %08X:%04X ",
3034 3037 ++sl,
3035 3038 connp->conn_laddr_v4,
3036 3039 ntohs(connp->conn_lport),
3037 3040 connp->conn_faddr_v4,
3038 3041 ntohs(connp->conn_fport));
3039 3042 } else {
3040 3043 lxpr_uiobuf_printf(uiobuf, "%4d: "
3041 3044 "%08X%08X%08X%08X:%04X "
3042 3045 "%08X%08X%08X%08X:%04X ",
3043 3046 ++sl,
3044 3047 connp->conn_laddr_v6.s6_addr32[0],
3045 3048 connp->conn_laddr_v6.s6_addr32[1],
3046 3049 connp->conn_laddr_v6.s6_addr32[2],
3047 3050 connp->conn_laddr_v6.s6_addr32[3],
3048 3051 ntohs(connp->conn_lport),
3049 3052 connp->conn_faddr_v6.s6_addr32[0],
3050 3053 connp->conn_faddr_v6.s6_addr32[1],
3051 3054 connp->conn_faddr_v6.s6_addr32[2],
3052 3055 connp->conn_faddr_v6.s6_addr32[3],
3053 3056 ntohs(connp->conn_fport));
3054 3057 }
3055 3058
3056 3059 switch (udp->udp_state) {
3057 3060 case TS_UNBND:
3058 3061 case TS_IDLE:
3059 3062 state = 7;
3060 3063 break;
3061 3064 case TS_DATA_XFER:
3062 3065 state = 1;
3063 3066 break;
3064 3067 }
3065 3068
3066 3069 /* fetch the simulated inode for the socket */
3067 3070 if (vp == NULL ||
3068 3071 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
3069 3072 attr.va_nodeid = 0;
3070 3073
3071 3074 lxpr_uiobuf_printf(uiobuf,
3072 3075 "%02X %08X:%08X %02X:%08X %08X "
3073 3076 "%5u %8d %lu %d %p %d\n",
3074 3077 state,
3075 3078 0, 0, /* rx/tx queue */
3076 3079 0, 0, /* tr, when */
3077 3080 0, /* retrans */
3078 3081 connp->conn_cred->cr_uid,
3079 3082 0, /* timeout */
3080 3083 /* inode, ref, pointer, drops */
3081 3084 (ino_t)attr.va_nodeid, 0, NULL, 0);
3082 3085 }
3083 3086 }
3084 3087 netstack_rele(ns);
3085 3088 }
3086 3089
3087 3090 /* ARGSUSED */
3088 3091 static void
3089 3092 lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3090 3093 {
3091 3094 lxpr_format_udp(uiobuf, IPV4_VERSION);
3092 3095 }
3093 3096
3094 3097 /* ARGSUSED */
3095 3098 static void
3096 3099 lxpr_read_net_udp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3097 3100 {
3098 3101 lxpr_format_udp(uiobuf, IPV6_VERSION);
3099 3102 }
3100 3103
3101 3104 /* ARGSUSED */
3102 3105 static void
3103 3106 lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3104 3107 {
3105 3108 sonode_t *so;
3106 3109 zoneid_t zoneid = getzoneid();
3107 3110
3108 3111 lxpr_uiobuf_printf(uiobuf, "Num RefCount Protocol Flags Type "
3109 3112 "St Inode Path\n");
3110 3113
3111 3114 mutex_enter(&socklist.sl_lock);
3112 3115 for (so = socklist.sl_list; so != NULL;
3113 3116 so = _SOTOTPI(so)->sti_next_so) {
3114 3117 vnode_t *vp = so->so_vnode;
3115 3118 vattr_t attr;
3116 3119 sotpi_info_t *sti;
3117 3120 const char *name = NULL;
3118 3121 int status = 0;
3119 3122 int type = 0;
3120 3123 int flags = 0;
3121 3124
3122 3125 /* Only process active sonodes in this zone */
3123 3126 if (so->so_count == 0 || so->so_zoneid != zoneid)
3124 3127 continue;
3125 3128
3126 3129 /*
3127 3130 * Grab the inode, if possible.
3128 3131 * This must be done before entering so_lock.
3129 3132 */
3130 3133 if (vp == NULL ||
3131 3134 VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
3132 3135 attr.va_nodeid = 0;
3133 3136
3134 3137 mutex_enter(&so->so_lock);
3135 3138 sti = _SOTOTPI(so);
3136 3139
3137 3140 if (sti->sti_laddr_sa != NULL &&
3138 3141 sti->sti_laddr_len > 0) {
3139 3142 name = sti->sti_laddr_sa->sa_data;
3140 3143 } else if (sti->sti_faddr_sa != NULL &&
3141 3144 sti->sti_faddr_len > 0) {
3142 3145 name = sti->sti_faddr_sa->sa_data;
3143 3146 }
3144 3147
3145 3148 /*
3146 3149 * Derived from enum values in Linux kernel source:
3147 3150 * include/uapi/linux/net.h
3148 3151 */
3149 3152 if ((so->so_state & SS_ISDISCONNECTING) != 0) {
3150 3153 status = 4;
3151 3154 } else if ((so->so_state & SS_ISCONNECTING) != 0) {
3152 3155 status = 2;
3153 3156 } else if ((so->so_state & SS_ISCONNECTED) != 0) {
3154 3157 status = 3;
3155 3158 } else {
3156 3159 status = 1;
3157 3160 /* Add ACC flag for stream-type server sockets */
3158 3161 if (so->so_type != SOCK_DGRAM &&
3159 3162 sti->sti_laddr_sa != NULL)
3160 3163 flags |= 0x10000;
3161 3164 }
3162 3165
3163 3166 /* Convert to Linux type */
3164 3167 switch (so->so_type) {
3165 3168 case SOCK_DGRAM:
3166 3169 type = 2;
3167 3170 break;
3168 3171 case SOCK_SEQPACKET:
3169 3172 type = 5;
3170 3173 break;
3171 3174 default:
3172 3175 type = 1;
3173 3176 }
3174 3177
3175 3178 lxpr_uiobuf_printf(uiobuf, "%p: %08X %08X %08X %04X %02X %5llu",
3176 3179 so,
3177 3180 so->so_count,
3178 3181 0, /* proto, always 0 */
3179 3182 flags,
3180 3183 type,
3181 3184 status,
3182 3185 (ino_t)attr.va_nodeid);
3183 3186
3184 3187 /*
3185 3188 * Due to shortcomings in the abstract socket emulation, they
3186 3189 * cannot be properly represented here (as @<path>).
3187 3190 *
3188 3191 * This will be the case until they are better implemented.
3189 3192 */
3190 3193 if (name != NULL)
3191 3194 lxpr_uiobuf_printf(uiobuf, " %s\n", name);
3192 3195 else
3193 3196 lxpr_uiobuf_printf(uiobuf, "\n");
3194 3197 mutex_exit(&so->so_lock);
3195 3198 }
3196 3199 mutex_exit(&socklist.sl_lock);
3197 3200 }
3198 3201
3199 3202 /*
3200 3203 * lxpr_read_kmsg(): read the contents of the kernel message queue. We
3201 3204 * translate this into the reception of console messages for this zone; each
3202 3205 * read copies out a single zone console message, or blocks until the next one
3203 3206 * is produced, unless we're open non-blocking, in which case we return after
3204 3207 * 1ms.
3205 3208 */
3206 3209
3207 3210 #define LX_KMSG_PRI "<0>"
3208 3211
3209 3212 static void
3210 3213 lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf, ldi_handle_t lh)
3211 3214 {
3212 3215 mblk_t *mp;
3213 3216 timestruc_t to;
3214 3217 timestruc_t *tp = NULL;
3215 3218
3216 3219 ASSERT(lxpnp->lxpr_type == LXPR_KMSG);
3217 3220
3218 3221 if (lxpr_uiobuf_nonblock(uiobuf)) {
3219 3222 to.tv_sec = 0;
3220 3223 to.tv_nsec = 1000000; /* 1msec */
3221 3224 tp = &to;
3222 3225 }
3223 3226
3224 3227 if (ldi_getmsg(lh, &mp, tp) == 0) {
3225 3228 /*
3226 3229 * lx procfs doesn't like successive reads to the same file
3227 3230 * descriptor unless we do an explicit rewind each time.
3228 3231 */
3229 3232 lxpr_uiobuf_seek(uiobuf, 0);
3230 3233
3231 3234 lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
3232 3235 mp->b_cont->b_rptr);
3233 3236
3234 3237 freemsg(mp);
3235 3238 }
3236 3239 }
3237 3240
3238 3241 /*
3239 3242 * lxpr_read_loadavg(): read the contents of the "loadavg" file. We do just
3240 3243 * enough for uptime and other simple lxproc readers to work
3241 3244 */
3242 3245 extern int nthread;
3243 3246
3244 3247 static void
3245 3248 lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3246 3249 {
3247 3250 ulong_t avenrun1;
3248 3251 ulong_t avenrun5;
3249 3252 ulong_t avenrun15;
3250 3253 ulong_t avenrun1_cs;
3251 3254 ulong_t avenrun5_cs;
3252 3255 ulong_t avenrun15_cs;
3253 3256 int loadavg[3];
3254 3257 int *loadbuf;
3255 3258 cpupart_t *cp;
3256 3259 zone_t *zone = LXPTOZ(lxpnp);
3257 3260
3258 3261 uint_t nrunnable = 0;
3259 3262 rctl_qty_t nlwps;
3260 3263
3261 3264 ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
3262 3265
3263 3266 mutex_enter(&cpu_lock);
3264 3267
3265 3268 /*
3266 3269 * Need to add up values over all CPU partitions. If pools are active,
3267 3270 * only report the values of the zone's partition, which by definition
3268 3271 * includes the current CPU.
3269 3272 */
3270 3273 if (pool_pset_enabled()) {
3271 3274 psetid_t psetid = zone_pset_get(curproc->p_zone);
3272 3275
3273 3276 ASSERT(curproc->p_zone != &zone0);
3274 3277 cp = CPU->cpu_part;
3275 3278
3276 3279 nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
3277 3280 (void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
3278 3281 loadbuf = &loadavg[0];
3279 3282 } else {
3280 3283 cp = cp_list_head;
3281 3284 do {
3282 3285 nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
3283 3286 } while ((cp = cp->cp_next) != cp_list_head);
3284 3287
3285 3288 loadbuf = zone == global_zone ?
3286 3289 &avenrun[0] : zone->zone_avenrun;
3287 3290 }
3288 3291
3289 3292 /*
3290 3293 * If we're in the non-global zone, we'll report the total number of
3291 3294 * LWPs in the zone for the "nproc" parameter of /proc/loadavg,
3292 3295 * otherwise will just use nthread (which will include kernel threads,
3293 3296 * but should be good enough for lxproc).
3294 3297 */
3295 3298 nlwps = zone == global_zone ? nthread : zone->zone_nlwps;
3296 3299
3297 3300 mutex_exit(&cpu_lock);
3298 3301
3299 3302 avenrun1 = loadbuf[0] >> FSHIFT;
3300 3303 avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
3301 3304 avenrun5 = loadbuf[1] >> FSHIFT;
3302 3305 avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
3303 3306 avenrun15 = loadbuf[2] >> FSHIFT;
3304 3307 avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
3305 3308
3306 3309 lxpr_uiobuf_printf(uiobuf,
3307 3310 "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
3308 3311 avenrun1, avenrun1_cs,
3309 3312 avenrun5, avenrun5_cs,
3310 3313 avenrun15, avenrun15_cs,
3311 3314 nrunnable, nlwps, 0);
3312 3315 }
3313 3316
3314 3317 /*
3315 3318 * lxpr_read_meminfo(): read the contents of the "meminfo" file.
3316 3319 */
3317 3320 static void
3318 3321 lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3319 3322 {
3320 3323 zone_t *zone = LXPTOZ(lxpnp);
3321 3324 int global = zone == global_zone;
3322 3325 long total_mem, free_mem, total_swap, used_swap;
3323 3326
3324 3327 ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
3325 3328
3326 3329 if (global || zone->zone_phys_mem_ctl == UINT64_MAX) {
3327 3330 total_mem = physmem * PAGESIZE;
3328 3331 free_mem = freemem * PAGESIZE;
3329 3332 } else {
3330 3333 total_mem = zone->zone_phys_mem_ctl;
3331 3334 free_mem = zone->zone_phys_mem_ctl - zone->zone_phys_mem;
3332 3335 }
3333 3336
3334 3337 if (global || zone->zone_max_swap_ctl == UINT64_MAX) {
3335 3338 total_swap = k_anoninfo.ani_max * PAGESIZE;
3336 3339 used_swap = k_anoninfo.ani_phys_resv * PAGESIZE;
3337 3340 } else {
3338 3341 mutex_enter(&zone->zone_mem_lock);
3339 3342 total_swap = zone->zone_max_swap_ctl;
3340 3343 used_swap = zone->zone_max_swap;
3341 3344 mutex_exit(&zone->zone_mem_lock);
3342 3345 }
3343 3346
3344 3347 lxpr_uiobuf_printf(uiobuf,
3345 3348 "MemTotal: %8lu kB\n"
3346 3349 "MemFree: %8lu kB\n"
3347 3350 "MemShared: %8u kB\n"
3348 3351 "Buffers: %8u kB\n"
3349 3352 "Cached: %8u kB\n"
3350 3353 "SwapCached:%8u kB\n"
3351 3354 "Active: %8u kB\n"
3352 3355 "Inactive: %8u kB\n"
3353 3356 "HighTotal: %8u kB\n"
3354 3357 "HighFree: %8u kB\n"
3355 3358 "LowTotal: %8u kB\n"
3356 3359 "LowFree: %8u kB\n"
3357 3360 "SwapTotal: %8lu kB\n"
3358 3361 "SwapFree: %8lu kB\n",
3359 3362 btok(total_mem), /* MemTotal */
3360 3363 btok(free_mem), /* MemFree */
3361 3364 0, /* MemShared */
3362 3365 0, /* Buffers */
3363 3366 0, /* Cached */
3364 3367 0, /* SwapCached */
3365 3368 0, /* Active */
3366 3369 0, /* Inactive */
3367 3370 0, /* HighTotal */
3368 3371 0, /* HighFree */
3369 3372 btok(total_mem), /* LowTotal */
3370 3373 btok(free_mem), /* LowFree */
3371 3374 btok(total_swap), /* SwapTotal */
3372 3375 btok(total_swap - used_swap)); /* SwapFree */
3373 3376 }
3374 3377
3375 3378 /*
3376 3379 * lxpr_read_mounts():
3377 3380 */
3378 3381 /* ARGSUSED */
3379 3382 static void
3380 3383 lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3381 3384 {
3382 3385 struct vfs *vfsp;
3383 3386 struct vfs *vfslist;
3384 3387 zone_t *zone = LXPTOZ(lxpnp);
3385 3388 struct print_data {
3386 3389 refstr_t *vfs_mntpt;
3387 3390 refstr_t *vfs_resource;
3388 3391 uint_t vfs_flag;
3389 3392 int vfs_fstype;
3390 3393 struct print_data *next;
3391 3394 } *print_head = NULL;
3392 3395 struct print_data **print_tail = &print_head;
3393 3396 struct print_data *printp;
3394 3397
3395 3398 vfs_list_read_lock();
3396 3399
3397 3400 if (zone == global_zone) {
3398 3401 vfsp = vfslist = rootvfs;
3399 3402 } else {
3400 3403 vfsp = vfslist = zone->zone_vfslist;
3401 3404 /*
3402 3405 * If the zone has a root entry, it will be the first in
3403 3406 * the list. If it doesn't, we conjure one up.
3404 3407 */
3405 3408 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
3406 3409 zone->zone_rootpath) != 0) {
3407 3410 struct vfs *tvfsp;
3408 3411 /*
3409 3412 * The root of the zone is not a mount point. The vfs
3410 3413 * we want to report is that of the zone's root vnode.
3411 3414 */
3412 3415 tvfsp = zone->zone_rootvp->v_vfsp;
3413 3416
3414 3417 lxpr_uiobuf_printf(uiobuf,
3415 3418 "/ / %s %s 0 0\n",
3416 3419 vfssw[tvfsp->vfs_fstype].vsw_name,
3417 3420 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
3418 3421
3419 3422 }
3420 3423 if (vfslist == NULL) {
3421 3424 vfs_list_unlock();
3422 3425 return;
3423 3426 }
3424 3427 }
3425 3428
3426 3429 /*
3427 3430 * Later on we have to do a lookupname, which can end up causing
3428 3431 * another vfs_list_read_lock() to be called. Which can lead to a
3429 3432 * deadlock. To avoid this, we extract the data we need into a local
3430 3433 * list, then we can run this list without holding vfs_list_read_lock()
3431 3434 * We keep the list in the same order as the vfs_list
3432 3435 */
3433 3436 do {
3434 3437 /* Skip mounts we shouldn't show */
3435 3438 if (vfsp->vfs_flag & VFS_NOMNTTAB) {
3436 3439 goto nextfs;
3437 3440 }
3438 3441
3439 3442 printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
3440 3443 refstr_hold(vfsp->vfs_mntpt);
3441 3444 printp->vfs_mntpt = vfsp->vfs_mntpt;
3442 3445 refstr_hold(vfsp->vfs_resource);
3443 3446 printp->vfs_resource = vfsp->vfs_resource;
3444 3447 printp->vfs_flag = vfsp->vfs_flag;
3445 3448 printp->vfs_fstype = vfsp->vfs_fstype;
3446 3449 printp->next = NULL;
3447 3450
3448 3451 *print_tail = printp;
3449 3452 print_tail = &printp->next;
3450 3453
3451 3454 nextfs:
3452 3455 vfsp = (zone == global_zone) ?
3453 3456 vfsp->vfs_next : vfsp->vfs_zone_next;
3454 3457
3455 3458 } while (vfsp != vfslist);
3456 3459
3457 3460 vfs_list_unlock();
3458 3461
3459 3462 /*
3460 3463 * now we can run through what we've extracted without holding
3461 3464 * vfs_list_read_lock()
3462 3465 */
3463 3466 printp = print_head;
3464 3467 while (printp != NULL) {
3465 3468 struct print_data *printp_next;
3466 3469 const char *resource;
3467 3470 char *mntpt;
3468 3471 struct vnode *vp;
3469 3472 int error;
3470 3473
3471 3474 mntpt = (char *)refstr_value(printp->vfs_mntpt);
3472 3475 resource = refstr_value(printp->vfs_resource);
3473 3476
3474 3477 if (mntpt != NULL && mntpt[0] != '\0')
3475 3478 mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
3476 3479 else
3477 3480 mntpt = "-";
3478 3481
3479 3482 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
3480 3483
3481 3484 if (error != 0)
3482 3485 goto nextp;
3483 3486
3484 3487 if (!(vp->v_flag & VROOT)) {
3485 3488 VN_RELE(vp);
3486 3489 goto nextp;
3487 3490 }
3488 3491 VN_RELE(vp);
3489 3492
3490 3493 if (resource != NULL && resource[0] != '\0') {
3491 3494 if (resource[0] == '/') {
3492 3495 resource = ZONE_PATH_VISIBLE(resource, zone) ?
3493 3496 ZONE_PATH_TRANSLATE(resource, zone) :
3494 3497 mntpt;
3495 3498 }
3496 3499 } else {
3497 3500 resource = "-";
3498 3501 }
3499 3502
3500 3503 lxpr_uiobuf_printf(uiobuf,
3501 3504 "%s %s %s %s 0 0\n",
3502 3505 resource, mntpt, vfssw[printp->vfs_fstype].vsw_name,
3503 3506 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
3504 3507
3505 3508 nextp:
3506 3509 printp_next = printp->next;
3507 3510 refstr_rele(printp->vfs_mntpt);
3508 3511 refstr_rele(printp->vfs_resource);
3509 3512 kmem_free(printp, sizeof (*printp));
3510 3513 printp = printp_next;
3511 3514
3512 3515 }
3513 3516 }
3514 3517
3515 3518 /*
3516 3519 * lxpr_read_partitions():
3517 3520 *
3518 3521 * Over the years, /proc/partitions has been made considerably smaller -- to
3519 3522 * the point that it really is only major number, minor number, number of
3520 3523 * blocks (which we report as 0), and partition name.
3521 3524 *
3522 3525 * We support this because some things want to see it to make sense of
3523 3526 * /proc/diskstats, and also because "fdisk -l" and a few other things look
3524 3527 * here to find all disks on the system.
3525 3528 */
3526 3529 /* ARGSUSED */
3527 3530 static void
3528 3531 lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3529 3532 {
3530 3533
3531 3534 kstat_t *ksr;
3532 3535 kstat_t ks0;
3533 3536 int nidx, num, i;
3534 3537 size_t sidx, size;
3535 3538 zfs_cmd_t *zc;
3536 3539 nvlist_t *nv = NULL;
3537 3540 nvpair_t *elem = NULL;
3538 3541 lxpr_mnt_t *mnt;
3539 3542 lxpr_zfs_iter_t zfsi;
3540 3543
3541 3544 ASSERT(lxpnp->lxpr_type == LXPR_PARTITIONS);
3542 3545
3543 3546 ks0.ks_kid = 0;
3544 3547 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
3545 3548
3546 3549 if (ksr == NULL)
3547 3550 return;
3548 3551
3549 3552 lxpr_uiobuf_printf(uiobuf, "major minor #blocks name\n\n");
3550 3553
3551 3554 for (i = 1; i < nidx; i++) {
3552 3555 kstat_t *ksp = &ksr[i];
3553 3556 kstat_io_t *kip;
3554 3557
3555 3558 if (ksp->ks_type != KSTAT_TYPE_IO ||
3556 3559 strcmp(ksp->ks_class, "disk") != 0)
3557 3560 continue;
3558 3561
3559 3562 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE,
3560 3563 &size, &num)) == NULL)
3561 3564 continue;
3562 3565
3563 3566 if (size < sizeof (kstat_io_t)) {
3564 3567 kmem_free(kip, size);
3565 3568 continue;
3566 3569 }
3567 3570
3568 3571 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d %s\n",
3569 3572 mod_name_to_major(ksp->ks_module),
3570 3573 ksp->ks_instance, 0, ksp->ks_name);
3571 3574
3572 3575 kmem_free(kip, size);
3573 3576 }
3574 3577
3575 3578 kmem_free(ksr, sidx);
3576 3579
3577 3580 /* If we never got to open the zfs LDI, then stop now. */
3578 3581 mnt = (lxpr_mnt_t *)lxpnp->lxpr_vnode->v_vfsp->vfs_data;
3579 3582 if (mnt->lxprm_zfs_isopen == B_FALSE)
3580 3583 return;
3581 3584
3582 3585 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
3583 3586
3584 3587 if (lxpr_zfs_list_pools(mnt, zc, &nv) != 0)
3585 3588 goto out;
3586 3589
3587 3590 while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) {
3588 3591 char *pool = nvpair_name(elem);
3589 3592
3590 3593 bzero(&zfsi, sizeof (lxpr_zfs_iter_t));
3591 3594 while (lxpr_zfs_next_zvol(mnt, pool, zc, &zfsi) == 0) {
3592 3595 major_t major;
3593 3596 minor_t minor;
3594 3597 if (lxpr_zvol_dev(mnt, zc->zc_name, &major, &minor)
3595 3598 != 0)
3596 3599 continue;
3597 3600
3598 3601 lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d zvol/dsk/%s\n",
3599 3602 major, minor, 0, zc->zc_name);
3600 3603 }
3601 3604 }
3602 3605
3603 3606 nvlist_free(nv);
3604 3607 out:
3605 3608 kmem_free(zc, sizeof (zfs_cmd_t));
3606 3609 }
3607 3610
3608 3611 /*
3609 3612 * lxpr_read_diskstats():
3610 3613 *
3611 3614 * See the block comment above the per-device output-generating line for the
3612 3615 * details of the format.
3613 3616 */
3614 3617 /* ARGSUSED */
3615 3618 static void
3616 3619 lxpr_read_diskstats(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3617 3620 {
3618 3621 kstat_t *ksr;
3619 3622 kstat_t ks0;
3620 3623 int nidx, num, i;
3621 3624 size_t sidx, size;
3622 3625
3623 3626 ASSERT(lxpnp->lxpr_type == LXPR_DISKSTATS);
3624 3627
3625 3628 ks0.ks_kid = 0;
3626 3629 ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
3627 3630
3628 3631 if (ksr == NULL)
3629 3632 return;
3630 3633
3631 3634 for (i = 1; i < nidx; i++) {
3632 3635 kstat_t *ksp = &ksr[i];
3633 3636 kstat_io_t *kip;
3634 3637
3635 3638 if (ksp->ks_type != KSTAT_TYPE_IO ||
3636 3639 strcmp(ksp->ks_class, "disk") != 0)
3637 3640 continue;
3638 3641
3639 3642 if ((kip = (kstat_io_t *)lxpr_kstat_read(ksp, B_TRUE,
3640 3643 &size, &num)) == NULL)
3641 3644 continue;
3642 3645
3643 3646 if (size < sizeof (kstat_io_t)) {
3644 3647 kmem_free(kip, size);
3645 3648 continue;
3646 3649 }
3647 3650
3648 3651 /*
3649 3652 * /proc/diskstats is defined to have one line of output for
3650 3653 * each block device, with each line containing the following
3651 3654 * 14 fields:
3652 3655 *
3653 3656 * 1 - major number
3654 3657 * 2 - minor mumber
3655 3658 * 3 - device name
3656 3659 * 4 - reads completed successfully
3657 3660 * 5 - reads merged
3658 3661 * 6 - sectors read
3659 3662 * 7 - time spent reading (ms)
3660 3663 * 8 - writes completed
3661 3664 * 9 - writes merged
3662 3665 * 10 - sectors written
3663 3666 * 11 - time spent writing (ms)
3664 3667 * 12 - I/Os currently in progress
3665 3668 * 13 - time spent doing I/Os (ms)
3666 3669 * 14 - weighted time spent doing I/Os (ms)
3667 3670 *
3668 3671 * One small hiccup: we don't actually keep track of time
3669 3672 * spent reading vs. time spent writing -- we keep track of
3670 3673 * time waiting vs. time actually performing I/O. While we
3671 3674 * could divide the total time by the I/O mix (making the
3672 3675 * obviously wrong assumption that I/O operations all take the
3673 3676 * same amount of time), this has the undesirable side-effect
3674 3677 * of moving backwards. Instead, we report the total time
3675 3678 * (read + write) for all three stats (read, write, total).
3676 3679 * This is also a lie of sorts, but it should be more
3677 3680 * immediately clear to the user that reads and writes are
3678 3681 * each being double-counted as the other.
3679 3682 */
3680 3683 lxpr_uiobuf_printf(uiobuf, "%4d %7d %s "
3681 3684 "%llu %llu %llu %llu "
3682 3685 "%llu %llu %llu %llu "
3683 3686 "%llu %llu %llu\n",
3684 3687 mod_name_to_major(ksp->ks_module),
3685 3688 ksp->ks_instance, ksp->ks_name,
3686 3689 (uint64_t)kip->reads, 0LL,
3687 3690 kip->nread / (uint64_t)LXPR_SECTOR_SIZE,
3688 3691 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3689 3692 (uint64_t)kip->writes, 0LL,
3690 3693 kip->nwritten / (uint64_t)LXPR_SECTOR_SIZE,
3691 3694 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3692 3695 (uint64_t)(kip->rcnt + kip->wcnt),
3693 3696 (kip->rtime + kip->wtime) / (uint64_t)(NANOSEC / MILLISEC),
3694 3697 (kip->rlentime + kip->wlentime) /
3695 3698 (uint64_t)(NANOSEC / MILLISEC));
3696 3699
3697 3700 kmem_free(kip, size);
3698 3701 }
3699 3702
3700 3703 kmem_free(ksr, sidx);
3701 3704 }
3702 3705
3703 3706 /*
3704 3707 * lxpr_read_version(): read the contents of the "version" file.
3705 3708 */
3706 3709 /* ARGSUSED */
3707 3710 static void
3708 3711 lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3709 3712 {
3710 3713 lx_zone_data_t *lxzd = ztolxzd(LXPTOZ(lxpnp));
3711 3714 lx_proc_data_t *lxpd = ptolxproc(curproc);
3712 3715 const char *release = lxzd->lxzd_kernel_release;
3713 3716 const char *version = lxzd->lxzd_kernel_version;
3714 3717
3715 3718 /* Use per-process overrides, if specified */
3716 3719 if (lxpd != NULL && lxpd->l_uname_release[0] != '\0') {
3717 3720 release = lxpd->l_uname_release;
3718 3721 }
3719 3722 if (lxpd != NULL && lxpd->l_uname_version[0] != '\0') {
3720 3723 version = lxpd->l_uname_version;
3721 3724 }
3722 3725
3723 3726 lxpr_uiobuf_printf(uiobuf,
3724 3727 "%s version %s (%s version %d.%d.%d) %s\n",
3725 3728 LX_UNAME_SYSNAME, release,
3726 3729 #if defined(__GNUC__)
3727 3730 "gcc", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__,
3728 3731 #else
3729 3732 "cc", 1, 0, 0,
3730 3733 #endif
3731 3734 version);
3732 3735 }
3733 3736
3734 3737 /*
3735 3738 * lxpr_read_stat(): read the contents of the "stat" file.
3736 3739 *
3737 3740 */
3738 3741 /* ARGSUSED */
3739 3742 static void
3740 3743 lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3741 3744 {
3742 3745 cpu_t *cp, *cpstart;
3743 3746 int pools_enabled;
3744 3747 ulong_t idle_cum = 0;
3745 3748 ulong_t sys_cum = 0;
3746 3749 ulong_t user_cum = 0;
3747 3750 ulong_t irq_cum = 0;
3748 3751 ulong_t cpu_nrunnable_cum = 0;
3749 3752 ulong_t w_io_cum = 0;
3750 3753
3751 3754 ulong_t pgpgin_cum = 0;
3752 3755 ulong_t pgpgout_cum = 0;
3753 3756 ulong_t pgswapout_cum = 0;
3754 3757 ulong_t pgswapin_cum = 0;
3755 3758 ulong_t intr_cum = 0;
3756 3759 ulong_t pswitch_cum = 0;
3757 3760 ulong_t forks_cum = 0;
3758 3761 hrtime_t msnsecs[NCMSTATES];
3759 3762 /* is the emulated release > 2.4 */
3760 3763 boolean_t newer_than24 = lx_kern_release_cmp(LXPTOZ(lxpnp), "2.4") > 0;
3761 3764 /* temporary variable since scalehrtime modifies data in place */
3762 3765 hrtime_t tmptime;
3763 3766
3764 3767 ASSERT(lxpnp->lxpr_type == LXPR_STAT);
3765 3768
3766 3769 mutex_enter(&cpu_lock);
3767 3770 pools_enabled = pool_pset_enabled();
3768 3771
3769 3772 /* Calculate cumulative stats */
3770 3773 cp = cpstart = CPU->cpu_part->cp_cpulist;
3771 3774 do {
3772 3775 int i;
3773 3776
3774 3777 /*
3775 3778 * Don't count CPUs that aren't even in the system
3776 3779 * or aren't up yet.
3777 3780 */
3778 3781 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
3779 3782 continue;
3780 3783 }
3781 3784
3782 3785 get_cpu_mstate(cp, msnsecs);
3783 3786
3784 3787 idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
3785 3788 sys_cum += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
3786 3789 user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
3787 3790
3788 3791 pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
3789 3792 pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
3790 3793 pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
3791 3794 pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
3792 3795
3793 3796
3794 3797 if (newer_than24) {
3795 3798 cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable;
3796 3799 w_io_cum += CPU_STATS(cp, sys.iowait);
3797 3800 for (i = 0; i < NCMSTATES; i++) {
3798 3801 tmptime = cp->cpu_intracct[i];
3799 3802 scalehrtime(&tmptime);
3800 3803 irq_cum += NSEC_TO_TICK(tmptime);
3801 3804 }
3802 3805 }
3803 3806
3804 3807 for (i = 0; i < PIL_MAX; i++)
3805 3808 intr_cum += CPU_STATS(cp, sys.intr[i]);
3806 3809
3807 3810 pswitch_cum += CPU_STATS(cp, sys.pswitch);
3808 3811 forks_cum += CPU_STATS(cp, sys.sysfork);
3809 3812 forks_cum += CPU_STATS(cp, sys.sysvfork);
3810 3813
3811 3814 if (pools_enabled)
3812 3815 cp = cp->cpu_next_part;
3813 3816 else
3814 3817 cp = cp->cpu_next;
3815 3818 } while (cp != cpstart);
3816 3819
3817 3820 if (newer_than24) {
3818 3821 lxpr_uiobuf_printf(uiobuf,
3819 3822 "cpu %lu %lu %lu %lu %lu %lu %lu\n",
3820 3823 user_cum, 0L, sys_cum, idle_cum, 0L, irq_cum, 0L);
3821 3824 } else {
3822 3825 lxpr_uiobuf_printf(uiobuf,
3823 3826 "cpu %lu %lu %lu %lu\n",
3824 3827 user_cum, 0L, sys_cum, idle_cum);
3825 3828 }
3826 3829
3827 3830 /* Do per processor stats */
3828 3831 do {
3829 3832 int i;
3830 3833
3831 3834 ulong_t idle_ticks;
3832 3835 ulong_t sys_ticks;
3833 3836 ulong_t user_ticks;
3834 3837 ulong_t irq_ticks = 0;
3835 3838
3836 3839 /*
3837 3840 * Don't count CPUs that aren't even in the system
3838 3841 * or aren't up yet.
3839 3842 */
3840 3843 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
3841 3844 continue;
3842 3845 }
3843 3846
3844 3847 get_cpu_mstate(cp, msnsecs);
3845 3848
3846 3849 idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
3847 3850 sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
3848 3851 user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
3849 3852
3850 3853 for (i = 0; i < NCMSTATES; i++) {
3851 3854 tmptime = cp->cpu_intracct[i];
3852 3855 scalehrtime(&tmptime);
3853 3856 irq_ticks += NSEC_TO_TICK(tmptime);
3854 3857 }
3855 3858
3856 3859 if (newer_than24) {
3857 3860 lxpr_uiobuf_printf(uiobuf,
3858 3861 "cpu%d %lu %lu %lu %lu %lu %lu %lu\n",
3859 3862 cp->cpu_id, user_ticks, 0L, sys_ticks, idle_ticks,
3860 3863 0L, irq_ticks, 0L);
3861 3864 } else {
3862 3865 lxpr_uiobuf_printf(uiobuf,
3863 3866 "cpu%d %lu %lu %lu %lu\n",
3864 3867 cp->cpu_id,
3865 3868 user_ticks, 0L, sys_ticks, idle_ticks);
3866 3869 }
3867 3870
3868 3871 if (pools_enabled)
3869 3872 cp = cp->cpu_next_part;
3870 3873 else
3871 3874 cp = cp->cpu_next;
3872 3875 } while (cp != cpstart);
3873 3876
3874 3877 mutex_exit(&cpu_lock);
3875 3878
3876 3879 if (newer_than24) {
3877 3880 lxpr_uiobuf_printf(uiobuf,
3878 3881 "page %lu %lu\n"
3879 3882 "swap %lu %lu\n"
3880 3883 "intr %lu\n"
3881 3884 "ctxt %lu\n"
3882 3885 "btime %lu\n"
3883 3886 "processes %lu\n"
3884 3887 "procs_running %lu\n"
3885 3888 "procs_blocked %lu\n",
3886 3889 pgpgin_cum, pgpgout_cum,
3887 3890 pgswapin_cum, pgswapout_cum,
3888 3891 intr_cum,
3889 3892 pswitch_cum,
3890 3893 boot_time,
3891 3894 forks_cum,
3892 3895 cpu_nrunnable_cum,
3893 3896 w_io_cum);
3894 3897 } else {
3895 3898 lxpr_uiobuf_printf(uiobuf,
3896 3899 "page %lu %lu\n"
3897 3900 "swap %lu %lu\n"
3898 3901 "intr %lu\n"
3899 3902 "ctxt %lu\n"
3900 3903 "btime %lu\n"
3901 3904 "processes %lu\n",
3902 3905 pgpgin_cum, pgpgout_cum,
3903 3906 pgswapin_cum, pgswapout_cum,
3904 3907 intr_cum,
3905 3908 pswitch_cum,
3906 3909 boot_time,
3907 3910 forks_cum);
3908 3911 }
3909 3912 }
3910 3913
3911 3914 /*
3912 3915 * lxpr_read_swaps():
3913 3916 *
3914 3917 * We don't support swap files or partitions, but some programs like to look
3915 3918 * here just to check we have some swap on the system, so we lie and show
3916 3919 * our entire swap cap as one swap partition.
3917 3920 *
3918 3921 * It is important to use formatting identical to the Linux implementation
3919 3922 * so that consumers do not break. See swap_show() in mm/swapfile.c.
3920 3923 */
3921 3924 /* ARGSUSED */
3922 3925 static void
3923 3926 lxpr_read_swaps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3924 3927 {
3925 3928 zone_t *zone = curzone;
3926 3929 uint64_t totswap, usedswap;
3927 3930
3928 3931 mutex_enter(&zone->zone_mem_lock);
3929 3932 /* Uses units of 1 kb (2^10). */
3930 3933 totswap = zone->zone_max_swap_ctl >> 10;
3931 3934 usedswap = zone->zone_max_swap >> 10;
3932 3935 mutex_exit(&zone->zone_mem_lock);
3933 3936
3934 3937 lxpr_uiobuf_printf(uiobuf,
3935 3938 "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
3936 3939 lxpr_uiobuf_printf(uiobuf, "%-40s%s\t%llu\t%llu\t%d\n",
3937 3940 "/dev/swap", "partition", totswap, usedswap, -1);
3938 3941 }
3939 3942
3940 3943 /*
3941 3944 * inotify tunables exported via /proc.
3942 3945 */
3943 3946 extern int inotify_maxevents;
3944 3947 extern int inotify_maxinstances;
3945 3948 extern int inotify_maxwatches;
3946 3949
3947 3950 static void
3948 3951 lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *lxpnp,
3949 3952 lxpr_uiobuf_t *uiobuf)
3950 3953 {
3951 3954 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS);
3952 3955 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxevents);
3953 3956 }
3954 3957
3955 3958 static void
3956 3959 lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *lxpnp,
3957 3960 lxpr_uiobuf_t *uiobuf)
3958 3961 {
3959 3962 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES);
3960 3963 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxinstances);
3961 3964 }
3962 3965
3963 3966 static void
3964 3967 lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *lxpnp,
3965 3968 lxpr_uiobuf_t *uiobuf)
3966 3969 {
3967 3970 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES);
3968 3971 lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxwatches);
3969 3972 }
3970 3973
3971 3974 static void
3972 3975 lxpr_read_sys_kernel_caplcap(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3973 3976 {
3974 3977 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_CAPLCAP);
3975 3978 lxpr_uiobuf_printf(uiobuf, "%d\n", LX_CAP_MAX_VALID);
3976 3979 }
3977 3980
3978 3981 static void
3979 3982 lxpr_read_sys_kernel_corepatt(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
3980 3983 {
3981 3984 zone_t *zone = curproc->p_zone;
3982 3985 struct core_globals *cg;
3983 3986 refstr_t *rp;
3984 3987 corectl_path_t *ccp;
3985 3988 char tr[MAXPATHLEN];
3986 3989
3987 3990 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT);
3988 3991
3989 3992 cg = zone_getspecific(core_zone_key, zone);
3990 3993 ASSERT(cg != NULL);
3991 3994
3992 3995 /* If core dumps are disabled, return an empty string. */
3993 3996 if ((cg->core_options & CC_PROCESS_PATH) == 0) {
3994 3997 lxpr_uiobuf_printf(uiobuf, "\n");
3995 3998 return;
3996 3999 }
3997 4000
3998 4001 ccp = cg->core_default_path;
3999 4002 mutex_enter(&ccp->ccp_mtx);
4000 4003 if ((rp = ccp->ccp_path) != NULL)
4001 4004 refstr_hold(rp);
4002 4005 mutex_exit(&ccp->ccp_mtx);
4003 4006
4004 4007 if (rp == NULL) {
4005 4008 lxpr_uiobuf_printf(uiobuf, "\n");
4006 4009 return;
4007 4010 }
4008 4011
4009 4012 bzero(tr, sizeof (tr));
4010 4013 if (lxpr_core_path_s2l(refstr_value(rp), tr, sizeof (tr)) != 0) {
4011 4014 refstr_rele(rp);
4012 4015 lxpr_uiobuf_printf(uiobuf, "\n");
4013 4016 return;
4014 4017 }
4015 4018
4016 4019 refstr_rele(rp);
4017 4020 lxpr_uiobuf_printf(uiobuf, "%s\n", tr);
4018 4021 }
4019 4022
4020 4023 static void
4021 4024 lxpr_read_sys_kernel_hostname(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4022 4025 {
4023 4026 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_HOSTNAME);
4024 4027 lxpr_uiobuf_printf(uiobuf, "%s\n", uts_nodename());
4025 4028 }
4026 4029
4027 4030 static void
4028 4031 lxpr_read_sys_kernel_msgmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4029 4032 {
4030 4033 rctl_qty_t val;
4031 4034
4032 4035 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_MSGMNI);
4033 4036
4034 4037 mutex_enter(&curproc->p_lock);
4035 4038 val = rctl_enforced_value(rc_zone_msgmni,
4036 4039 curproc->p_zone->zone_rctls, curproc);
4037 4040 mutex_exit(&curproc->p_lock);
4038 4041
4039 4042 lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
4040 4043 }
4041 4044
4042 4045 static void
4043 4046 lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4044 4047 {
4045 4048 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_NGROUPS_MAX);
4046 4049 lxpr_uiobuf_printf(uiobuf, "%d\n", ngroups_max);
4047 4050 }
4048 4051
4049 4052 static void
4050 4053 lxpr_read_sys_kernel_osrel(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4051 4054 {
4052 4055 lx_zone_data_t *br_data;
4053 4056
4054 4057 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_OSREL);
4055 4058 br_data = ztolxzd(curproc->p_zone);
4056 4059 if (curproc->p_zone->zone_brand == &lx_brand) {
4057 4060 lxpr_uiobuf_printf(uiobuf, "%s\n",
4058 4061 br_data->lxzd_kernel_version);
4059 4062 } else {
4060 4063 lxpr_uiobuf_printf(uiobuf, "\n");
4061 4064 }
4062 4065 }
4063 4066
4064 4067 static void
4065 4068 lxpr_read_sys_kernel_pid_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4066 4069 {
4067 4070 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_PID_MAX);
4068 4071 lxpr_uiobuf_printf(uiobuf, "%d\n", maxpid);
4069 4072 }
4070 4073
4071 4074 static void
4072 4075 lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4073 4076 {
4074 4077 /*
4075 4078 * This file isn't documented on the Linux proc(5) man page but
4076 4079 * according to the blog of the author of systemd/journald (the
4077 4080 * consumer), he says:
4078 4081 * boot_id: A random ID that is regenerated on each boot. As such it
4079 4082 * can be used to identify the local machine's current boot. It's
4080 4083 * universally available on any recent Linux kernel. It's a good and
4081 4084 * safe choice if you need to identify a specific boot on a specific
4082 4085 * booted kernel.
4083 4086 *
4084 4087 * We'll just generate a random ID if necessary. On Linux the format
4085 4088 * appears to resemble a uuid but since it is not documented to be a
4086 4089 * uuid, we don't worry about that.
4087 4090 */
4088 4091 lx_zone_data_t *br_data;
4089 4092
4090 4093 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RAND_BOOTID);
4091 4094
4092 4095 if (curproc->p_zone->zone_brand != &lx_brand) {
4093 4096 lxpr_uiobuf_printf(uiobuf, "0\n");
4094 4097 return;
4095 4098 }
4096 4099
4097 4100 br_data = ztolxzd(curproc->p_zone);
4098 4101 if (br_data->lxzd_bootid[0] == '\0') {
4099 4102 extern int getrandom(void *, size_t, int);
4100 4103 int i;
4101 4104
4102 4105 for (i = 0; i < 5; i++) {
4103 4106 u_longlong_t n;
4104 4107 char s[32];
4105 4108
4106 4109 (void) random_get_bytes((uint8_t *)&n, sizeof (n));
4107 4110 switch (i) {
4108 4111 case 0: (void) snprintf(s, sizeof (s), "%08llx", n);
4109 4112 s[8] = '\0';
4110 4113 break;
4111 4114 case 4: (void) snprintf(s, sizeof (s), "%012llx", n);
4112 4115 s[12] = '\0';
4113 4116 break;
4114 4117 default: (void) snprintf(s, sizeof (s), "%04llx", n);
4115 4118 s[4] = '\0';
4116 4119 break;
4117 4120 }
4118 4121 if (i > 0)
4119 4122 strlcat(br_data->lxzd_bootid, "-",
4120 4123 sizeof (br_data->lxzd_bootid));
4121 4124 strlcat(br_data->lxzd_bootid, s,
4122 4125 sizeof (br_data->lxzd_bootid));
4123 4126 }
4124 4127 }
4125 4128
4126 4129 lxpr_uiobuf_printf(uiobuf, "%s\n", br_data->lxzd_bootid);
4127 4130 }
4128 4131
4129 4132 static void
4130 4133 lxpr_read_sys_kernel_sem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4131 4134 {
4132 4135 proc_t *pp = curproc;
4133 4136 rctl_qty_t vmsl, vopm, vmni, vmns;
4134 4137
4135 4138 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SEM);
4136 4139
4137 4140 mutex_enter(&pp->p_lock);
4138 4141 vmsl = rctl_enforced_value(rc_process_semmsl, pp->p_rctls, pp);
4139 4142 vopm = rctl_enforced_value(rc_process_semopm, pp->p_rctls, pp);
4140 4143 vmni = rctl_enforced_value(rc_zone_semmni, pp->p_zone->zone_rctls, pp);
4141 4144 mutex_exit(&pp->p_lock);
4142 4145 vmns = vmsl * vmni;
4143 4146 if (vmns < vmsl || vmns < vmni) {
4144 4147 vmns = ULLONG_MAX;
4145 4148 }
4146 4149 /*
4147 4150 * Format: semmsl semmns semopm semmni
4148 4151 * - semmsl: Limit semaphores in a sempahore set.
4149 4152 * - semmns: Limit semaphores in all semaphore sets
4150 4153 * - semopm: Limit operations in a single semop call
4151 4154 * - semmni: Limit number of semaphore sets
4152 4155 */
4153 4156 lxpr_uiobuf_printf(uiobuf, "%llu\t%llu\t%llu\t%llu\n",
4154 4157 vmsl, vmns, vopm, vmni);
4155 4158 }
4156 4159
4157 4160 static void
4158 4161 lxpr_read_sys_kernel_shmmax(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4159 4162 {
4160 4163 rctl_qty_t val;
4161 4164
4162 4165 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMAX);
4163 4166
4164 4167 mutex_enter(&curproc->p_lock);
4165 4168 val = rctl_enforced_value(rc_zone_shmmax,
4166 4169 curproc->p_zone->zone_rctls, curproc);
4167 4170 mutex_exit(&curproc->p_lock);
4168 4171
4169 4172 if (val > FOURGB)
4170 4173 val = FOURGB;
4171 4174
4172 4175 lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
4173 4176 }
4174 4177
4175 4178 static void
4176 4179 lxpr_read_sys_kernel_shmmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4177 4180 {
4178 4181 rctl_qty_t val;
4179 4182
4180 4183 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMNI);
4181 4184
4182 4185 mutex_enter(&curproc->p_lock);
4183 4186 val = rctl_enforced_value(rc_zone_shmmni,
4184 4187 curproc->p_zone->zone_rctls, curproc);
4185 4188 mutex_exit(&curproc->p_lock);
4186 4189
4187 4190 if (val > FOURGB)
4188 4191 val = FOURGB;
4189 4192
4190 4193 lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
4191 4194 }
4192 4195
4193 4196 static void
4194 4197 lxpr_read_sys_kernel_threads_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4195 4198 {
4196 4199 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_THREADS_MAX);
4197 4200 lxpr_uiobuf_printf(uiobuf, "%d\n", curproc->p_zone->zone_nlwps_ctl);
4198 4201 }
4199 4202
4200 4203 static void
4201 4204 lxpr_read_sys_net_core_somaxc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4202 4205 {
4203 4206 netstack_t *ns;
4204 4207 tcp_stack_t *tcps;
4205 4208
4206 4209 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON);
4207 4210
4208 4211 ns = netstack_get_current();
4209 4212 if (ns == NULL) {
4210 4213 lxpr_uiobuf_printf(uiobuf, "%d\n", SOMAXCONN);
4211 4214 return;
4212 4215 }
4213 4216
4214 4217 tcps = ns->netstack_tcp;
4215 4218 lxpr_uiobuf_printf(uiobuf, "%d\n", tcps->tcps_conn_req_max_q);
4216 4219 netstack_rele(ns);
4217 4220 }
4218 4221
4219 4222 static void
4220 4223 lxpr_read_sys_vm_minfr_kb(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4221 4224 {
4222 4225 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_MINFR_KB);
4223 4226 lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4224 4227 }
4225 4228
4226 4229 static void
4227 4230 lxpr_read_sys_vm_nhpages(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4228 4231 {
4229 4232 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_NHUGEP);
4230 4233 lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4231 4234 }
4232 4235
4233 4236 static void
4234 4237 lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4235 4238 {
4236 4239 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_OVERCOMMIT_MEM);
4237 4240 lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4238 4241 }
4239 4242
4240 4243 static void
4241 4244 lxpr_read_sys_vm_swappiness(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4242 4245 {
4243 4246 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_SWAPPINESS);
4244 4247 lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
4245 4248 }
4246 4249
4247 4250 /*
4248 4251 * lxpr_read_uptime(): read the contents of the "uptime" file.
4249 4252 *
4250 4253 * format is: "%.2lf, %.2lf",uptime_secs, idle_secs
4251 4254 * Use fixed point arithmetic to get 2 decimal places
4252 4255 */
4253 4256 /* ARGSUSED */
4254 4257 static void
4255 4258 lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4256 4259 {
4257 4260 cpu_t *cp, *cpstart;
4258 4261 int pools_enabled;
4259 4262 ulong_t idle_cum = 0;
4260 4263 ulong_t cpu_count = 0;
4261 4264 ulong_t idle_s;
4262 4265 ulong_t idle_cs;
4263 4266 ulong_t up_s;
4264 4267 ulong_t up_cs;
4265 4268 hrtime_t birthtime;
4266 4269 hrtime_t centi_sec = 10000000; /* 10^7 */
4267 4270
4268 4271 ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
4269 4272
4270 4273 /* Calculate cumulative stats */
4271 4274 mutex_enter(&cpu_lock);
4272 4275 pools_enabled = pool_pset_enabled();
4273 4276
4274 4277 cp = cpstart = CPU->cpu_part->cp_cpulist;
4275 4278 do {
4276 4279 /*
4277 4280 * Don't count CPUs that aren't even in the system
4278 4281 * or aren't up yet.
4279 4282 */
4280 4283 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
4281 4284 continue;
4282 4285 }
4283 4286
4284 4287 idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
4285 4288 idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
4286 4289 cpu_count += 1;
4287 4290
4288 4291 if (pools_enabled)
4289 4292 cp = cp->cpu_next_part;
4290 4293 else
4291 4294 cp = cp->cpu_next;
4292 4295 } while (cp != cpstart);
4293 4296 mutex_exit(&cpu_lock);
4294 4297
4295 4298 /* Getting the Zone zsched process startup time */
4296 4299 birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart;
4297 4300 up_cs = (gethrtime() - birthtime) / centi_sec;
4298 4301 up_s = up_cs / 100;
4299 4302 up_cs %= 100;
4300 4303
4301 4304 ASSERT(cpu_count > 0);
4302 4305 idle_cum /= cpu_count;
4303 4306 idle_s = idle_cum / hz;
4304 4307 idle_cs = idle_cum % hz;
4305 4308 idle_cs *= 100;
4306 4309 idle_cs /= hz;
4307 4310
4308 4311 lxpr_uiobuf_printf(uiobuf,
4309 4312 "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
4310 4313 }
4311 4314
4312 4315 static const char *amd_x_edx[] = {
4313 4316 NULL, NULL, NULL, NULL,
4314 4317 NULL, NULL, NULL, NULL,
4315 4318 NULL, NULL, NULL, "syscall",
4316 4319 NULL, NULL, NULL, NULL,
4317 4320 NULL, NULL, NULL, "mp",
4318 4321 "nx", NULL, "mmxext", NULL,
4319 4322 NULL, NULL, NULL, NULL,
4320 4323 NULL, "lm", "3dnowext", "3dnow"
4321 4324 };
4322 4325
4323 4326 static const char *amd_x_ecx[] = {
4324 4327 "lahf_lm", NULL, "svm", NULL,
4325 4328 "altmovcr8"
4326 4329 };
4327 4330
4328 4331 static const char *tm_x_edx[] = {
4329 4332 "recovery", "longrun", NULL, "lrti"
4330 4333 };
4331 4334
4332 4335 /*
4333 4336 * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
4334 4337 */
4335 4338 static const char *intc_x_edx[] = {
4336 4339 NULL, NULL, NULL, NULL,
4337 4340 NULL, NULL, NULL, NULL,
4338 4341 NULL, NULL, NULL, "syscall",
4339 4342 NULL, NULL, NULL, NULL,
4340 4343 NULL, NULL, NULL, NULL,
4341 4344 "nx", NULL, NULL, NULL,
4342 4345 NULL, NULL, NULL, NULL,
4343 4346 NULL, "lm", NULL, NULL
4344 4347 };
4345 4348
4346 4349 static const char *intc_edx[] = {
4347 4350 "fpu", "vme", "de", "pse",
4348 4351 "tsc", "msr", "pae", "mce",
4349 4352 "cx8", "apic", NULL, "sep",
4350 4353 "mtrr", "pge", "mca", "cmov",
4351 4354 "pat", "pse36", "pn", "clflush",
4352 4355 NULL, "dts", "acpi", "mmx",
4353 4356 "fxsr", "sse", "sse2", "ss",
4354 4357 "ht", "tm", "ia64", "pbe"
4355 4358 };
4356 4359
4357 4360 /*
4358 4361 * "sse3" on linux is called "pni" (Prescott New Instructions).
4359 4362 */
4360 4363 static const char *intc_ecx[] = {
4361 4364 "pni", NULL, NULL, "monitor",
4362 4365 "ds_cpl", NULL, NULL, "est",
4363 4366 "tm2", NULL, "cid", NULL,
4364 4367 NULL, "cx16", "xtpr"
4365 4368 };
4366 4369
4367 4370 /*
4368 4371 * Report a list of each cgroup subsystem supported by our emulated cgroup fs.
4369 4372 * This needs to exist for systemd to run but for now we don't report any
4370 4373 * cgroup subsystems as being installed. The commented example below shows
4371 4374 * how to print a subsystem entry.
4372 4375 */
4373 4376 static void
4374 4377 lxpr_read_cgroups(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4375 4378 {
4376 4379 lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n",
4377 4380 "#subsys_name", "hierarchy", "num_cgroups", "enabled");
4378 4381
4379 4382 /*
4380 4383 * lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n",
4381 4384 * "cpu,cpuacct", "2", "1", "1");
4382 4385 */
4383 4386 }
4384 4387
4385 4388 static void
4386 4389 lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4387 4390 {
4388 4391 int i;
4389 4392 uint32_t bits;
4390 4393 cpu_t *cp, *cpstart;
4391 4394 int pools_enabled;
4392 4395 const char **fp;
4393 4396 char brandstr[CPU_IDSTRLEN];
4394 4397 struct cpuid_regs cpr;
4395 4398 int maxeax;
4396 4399 int std_ecx, std_edx, ext_ecx, ext_edx;
4397 4400
4398 4401 ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
4399 4402
4400 4403 mutex_enter(&cpu_lock);
4401 4404 pools_enabled = pool_pset_enabled();
4402 4405
4403 4406 cp = cpstart = CPU->cpu_part->cp_cpulist;
4404 4407 do {
4405 4408 /*
4406 4409 * This returns the maximum eax value for standard cpuid
4407 4410 * functions in eax.
4408 4411 */
4409 4412 cpr.cp_eax = 0;
4410 4413 (void) cpuid_insn(cp, &cpr);
4411 4414 maxeax = cpr.cp_eax;
4412 4415
4413 4416 /*
4414 4417 * Get standard x86 feature flags.
4415 4418 */
4416 4419 cpr.cp_eax = 1;
4417 4420 (void) cpuid_insn(cp, &cpr);
4418 4421 std_ecx = cpr.cp_ecx;
4419 4422 std_edx = cpr.cp_edx;
4420 4423
4421 4424 /*
4422 4425 * Now get extended feature flags.
4423 4426 */
4424 4427 cpr.cp_eax = 0x80000001;
4425 4428 (void) cpuid_insn(cp, &cpr);
4426 4429 ext_ecx = cpr.cp_ecx;
4427 4430 ext_edx = cpr.cp_edx;
4428 4431
4429 4432 (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
4430 4433
4431 4434 lxpr_uiobuf_printf(uiobuf,
4432 4435 "processor\t: %d\n"
4433 4436 "vendor_id\t: %s\n"
4434 4437 "cpu family\t: %d\n"
4435 4438 "model\t\t: %d\n"
4436 4439 "model name\t: %s\n"
4437 4440 "stepping\t: %d\n"
4438 4441 "cpu MHz\t\t: %u.%03u\n",
4439 4442 cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
4440 4443 cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
4441 4444 (uint32_t)(cpu_freq_hz / 1000000),
4442 4445 ((uint32_t)(cpu_freq_hz / 1000)) % 1000);
4443 4446
4444 4447 lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
4445 4448 getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
4446 4449
4447 4450 if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
4448 4451 /*
4449 4452 * 'siblings' is used for HT-style threads
4450 4453 */
4451 4454 lxpr_uiobuf_printf(uiobuf,
4452 4455 "physical id\t: %lu\n"
4453 4456 "siblings\t: %u\n",
4454 4457 pg_plat_hw_instance_id(cp, PGHW_CHIP),
4455 4458 cpuid_get_ncpu_per_chip(cp));
4456 4459 }
4457 4460
4458 4461 /*
4459 4462 * Since we're relatively picky about running on older hardware,
4460 4463 * we can be somewhat cavalier about the answers to these ones.
4461 4464 *
4462 4465 * In fact, given the hardware we support, we just say:
4463 4466 *
4464 4467 * fdiv_bug : no (if we're on a 64-bit kernel)
4465 4468 * hlt_bug : no
4466 4469 * f00f_bug : no
4467 4470 * coma_bug : no
4468 4471 * wp : yes (write protect in supervsr mode)
4469 4472 */
4470 4473 lxpr_uiobuf_printf(uiobuf,
4471 4474 "fdiv_bug\t: %s\n"
4472 4475 "hlt_bug \t: no\n"
4473 4476 "f00f_bug\t: no\n"
4474 4477 "coma_bug\t: no\n"
4475 4478 "fpu\t\t: %s\n"
4476 4479 "fpu_exception\t: %s\n"
4477 4480 "cpuid level\t: %d\n"
4478 4481 "flags\t\t:",
4479 4482 #if defined(__i386)
4480 4483 fpu_pentium_fdivbug ? "yes" : "no",
4481 4484 #else
4482 4485 "no",
4483 4486 #endif /* __i386 */
4484 4487 fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
4485 4488 maxeax);
4486 4489
4487 4490 for (bits = std_edx, fp = intc_edx, i = 0;
4488 4491 i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
4489 4492 if ((bits & (1 << i)) != 0 && *fp)
4490 4493 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4491 4494
4492 4495 /*
4493 4496 * name additional features where appropriate
4494 4497 */
4495 4498 switch (x86_vendor) {
4496 4499 case X86_VENDOR_Intel:
4497 4500 for (bits = ext_edx, fp = intc_x_edx, i = 0;
4498 4501 i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
4499 4502 fp++, i++)
4500 4503 if ((bits & (1 << i)) != 0 && *fp)
4501 4504 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4502 4505 break;
4503 4506
4504 4507 case X86_VENDOR_AMD:
4505 4508 for (bits = ext_edx, fp = amd_x_edx, i = 0;
4506 4509 i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
4507 4510 fp++, i++)
4508 4511 if ((bits & (1 << i)) != 0 && *fp)
4509 4512 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4510 4513
4511 4514 for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
4512 4515 i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
4513 4516 fp++, i++)
4514 4517 if ((bits & (1 << i)) != 0 && *fp)
4515 4518 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4516 4519 break;
4517 4520
4518 4521 case X86_VENDOR_TM:
4519 4522 for (bits = ext_edx, fp = tm_x_edx, i = 0;
4520 4523 i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
4521 4524 fp++, i++)
4522 4525 if ((bits & (1 << i)) != 0 && *fp)
4523 4526 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4524 4527 break;
4525 4528 default:
4526 4529 break;
4527 4530 }
4528 4531
4529 4532 for (bits = std_ecx, fp = intc_ecx, i = 0;
4530 4533 i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
4531 4534 if ((bits & (1 << i)) != 0 && *fp)
4532 4535 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
4533 4536
4534 4537 lxpr_uiobuf_printf(uiobuf, "\n\n");
4535 4538
4536 4539 if (pools_enabled)
4537 4540 cp = cp->cpu_next_part;
4538 4541 else
4539 4542 cp = cp->cpu_next;
4540 4543 } while (cp != cpstart);
4541 4544
4542 4545 mutex_exit(&cpu_lock);
4543 4546 }
4544 4547
4545 4548 /* ARGSUSED */
4546 4549 static void
4547 4550 lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4548 4551 {
4549 4552 ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
4550 4553 lxpr_uiobuf_seterr(uiobuf, EFAULT);
4551 4554 }
4552 4555
4553 4556 /*
4554 4557 * Report a list of file systems loaded in the kernel. We only report the ones
4555 4558 * which we support and which may be checked by various components to see if
4556 4559 * they are loaded.
4557 4560 */
4558 4561 static void
4559 4562 lxpr_read_filesystems(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
4560 4563 {
4561 4564 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "autofs");
4562 4565 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "cgroup");
4563 4566 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "nfs");
4564 4567 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "proc");
4565 4568 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "sysfs");
4566 4569 lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "tmpfs");
4567 4570 }
4568 4571
4569 4572 /*
4570 4573 * lxpr_getattr(): Vnode operation for VOP_GETATTR()
4571 4574 */
4572 4575 static int
4573 4576 lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
4574 4577 caller_context_t *ct)
4575 4578 {
4576 4579 register lxpr_node_t *lxpnp = VTOLXP(vp);
4577 4580 lxpr_nodetype_t type = lxpnp->lxpr_type;
4578 4581 extern uint_t nproc;
4579 4582 int error;
4580 4583
4581 4584 /*
4582 4585 * Return attributes of underlying vnode if ATTR_REAL
4583 4586 *
4584 4587 * but keep fd files with the symlink permissions
4585 4588 */
4586 4589 if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
4587 4590 vnode_t *rvp = lxpnp->lxpr_realvp;
4588 4591
4589 4592 /*
4590 4593 * withold attribute information to owner or root
4591 4594 */
4592 4595 if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) {
4593 4596 return (error);
4594 4597 }
4595 4598
4596 4599 /*
4597 4600 * now its attributes
4598 4601 */
4599 4602 if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) {
4600 4603 return (error);
4601 4604 }
4602 4605
4603 4606 /*
4604 4607 * if it's a file in lx /proc/pid/fd/xx then set its
4605 4608 * mode and keep it looking like a symlink, fifo or socket
4606 4609 */
4607 4610 if (type == LXPR_PID_FD_FD) {
4608 4611 vap->va_mode = lxpnp->lxpr_mode;
4609 4612 vap->va_type = lxpnp->lxpr_realvp->v_type;
4610 4613 vap->va_size = 0;
4611 4614 vap->va_nlink = 1;
4612 4615 }
4613 4616 return (0);
4614 4617 }
4615 4618
4616 4619 /* Default attributes, that may be overridden below */
4617 4620 bzero(vap, sizeof (*vap));
4618 4621 vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
4619 4622 vap->va_nlink = 1;
4620 4623 vap->va_type = vp->v_type;
4621 4624 vap->va_mode = lxpnp->lxpr_mode;
4622 4625 vap->va_fsid = vp->v_vfsp->vfs_dev;
4623 4626 vap->va_blksize = DEV_BSIZE;
4624 4627 vap->va_uid = lxpnp->lxpr_uid;
4625 4628 vap->va_gid = lxpnp->lxpr_gid;
4626 4629 vap->va_nodeid = lxpnp->lxpr_ino;
4627 4630
4628 4631 switch (type) {
4629 4632 case LXPR_PROCDIR:
4630 4633 vap->va_nlink = nproc + 2 + PROCDIRFILES;
4631 4634 vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
4632 4635 break;
4633 4636 case LXPR_PIDDIR:
4634 4637 vap->va_nlink = PIDDIRFILES;
4635 4638 vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
4636 4639 break;
4637 4640 case LXPR_PID_TASK_IDDIR:
4638 4641 vap->va_nlink = TIDDIRFILES;
4639 4642 vap->va_size = TIDDIRFILES * LXPR_SDSIZE;
4640 4643 break;
4641 4644 case LXPR_SELF:
4642 4645 vap->va_uid = crgetruid(curproc->p_cred);
4643 4646 vap->va_gid = crgetrgid(curproc->p_cred);
4644 4647 break;
4645 4648 case LXPR_PID_FD_FD:
4646 4649 case LXPR_PID_TID_FD_FD:
4647 4650 /*
4648 4651 * Restore VLNK type for lstat-type activity.
4649 4652 * See lxpr_readlink for more details.
4650 4653 */
4651 4654 if ((flags & FOLLOW) == 0)
4652 4655 vap->va_type = VLNK;
4653 4656 default:
4654 4657 break;
4655 4658 }
4656 4659
4657 4660 vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
4658 4661 return (0);
4659 4662 }
4660 4663
4661 4664 /*
4662 4665 * lxpr_access(): Vnode operation for VOP_ACCESS()
4663 4666 */
4664 4667 static int
4665 4668 lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
4666 4669 {
4667 4670 lxpr_node_t *lxpnp = VTOLXP(vp);
4668 4671 lxpr_nodetype_t type = lxpnp->lxpr_type;
4669 4672 int shift = 0;
4670 4673 proc_t *tp;
4671 4674
4672 4675 /* lx /proc is a read only file system */
4673 4676 if (mode & VWRITE) {
4674 4677 switch (type) {
4675 4678 case LXPR_PID_OOM_SCR_ADJ:
4676 4679 case LXPR_PID_TID_OOM_SCR_ADJ:
4677 4680 case LXPR_SYS_KERNEL_COREPATT:
4678 4681 case LXPR_SYS_NET_CORE_SOMAXCON:
4679 4682 case LXPR_SYS_VM_OVERCOMMIT_MEM:
4680 4683 case LXPR_SYS_VM_SWAPPINESS:
4681 4684 case LXPR_PID_FD_FD:
4682 4685 case LXPR_PID_TID_FD_FD:
4683 4686 break;
4684 4687 default:
4685 4688 return (EROFS);
4686 4689 }
4687 4690 }
4688 4691
4689 4692 /*
4690 4693 * If this is a restricted file, check access permissions.
4691 4694 */
4692 4695 switch (type) {
4693 4696 case LXPR_PIDDIR:
4694 4697 return (0);
4695 4698 case LXPR_PID_CURDIR:
4696 4699 case LXPR_PID_ENV:
4697 4700 case LXPR_PID_EXE:
4698 4701 case LXPR_PID_LIMITS:
4699 4702 case LXPR_PID_MAPS:
4700 4703 case LXPR_PID_MEM:
4701 4704 case LXPR_PID_ROOTDIR:
4702 4705 case LXPR_PID_FDDIR:
4703 4706 case LXPR_PID_FD_FD:
4704 4707 case LXPR_PID_TID_FDDIR:
4705 4708 case LXPR_PID_TID_FD_FD:
4706 4709 if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL)
4707 4710 return (ENOENT);
4708 4711 if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
4709 4712 priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
4710 4713 lxpr_unlock(tp);
4711 4714 return (EACCES);
4712 4715 }
4713 4716 lxpr_unlock(tp);
4714 4717 default:
4715 4718 break;
4716 4719 }
4717 4720
4718 4721 if (lxpnp->lxpr_realvp != NULL) {
4719 4722 /*
4720 4723 * For these we use the underlying vnode's accessibility.
4721 4724 */
4722 4725 return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct));
4723 4726 }
4724 4727
4725 4728 /* If user is root allow access regardless of permission bits */
4726 4729 if (secpolicy_proc_access(cr) == 0)
4727 4730 return (0);
4728 4731
4729 4732 /*
4730 4733 * Access check is based on only one of owner, group, public. If not
4731 4734 * owner, then check group. If not a member of the group, then check
4732 4735 * public access.
4733 4736 */
4734 4737 if (crgetuid(cr) != lxpnp->lxpr_uid) {
4735 4738 shift += 3;
4736 4739 if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
4737 4740 shift += 3;
4738 4741 }
4739 4742
4740 4743 mode &= ~(lxpnp->lxpr_mode << shift);
4741 4744
4742 4745 if (mode == 0)
4743 4746 return (0);
4744 4747
4745 4748 return (EACCES);
4746 4749 }
4747 4750
4748 4751 /* ARGSUSED */
4749 4752 static vnode_t *
4750 4753 lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
4751 4754 {
4752 4755 return (NULL);
4753 4756 }
4754 4757
4755 4758 /*
4756 4759 * lxpr_lookup(): Vnode operation for VOP_LOOKUP()
4757 4760 */
4758 4761 /* ARGSUSED */
4759 4762 static int
4760 4763 lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
4761 4764 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
4762 4765 int *direntflags, pathname_t *realpnp)
4763 4766 {
4764 4767 lxpr_node_t *lxpnp = VTOLXP(dp);
4765 4768 lxpr_nodetype_t type = lxpnp->lxpr_type;
4766 4769 int error;
4767 4770
4768 4771 ASSERT(dp->v_type == VDIR);
4769 4772 ASSERT(type < LXPR_NFILES);
4770 4773
4771 4774 /*
4772 4775 * we should never get here because the lookup
4773 4776 * is done on the realvp for these nodes
4774 4777 */
4775 4778 ASSERT(type != LXPR_PID_FD_FD &&
4776 4779 type != LXPR_PID_CURDIR &&
4777 4780 type != LXPR_PID_ROOTDIR);
4778 4781
4779 4782 /*
4780 4783 * restrict lookup permission to owner or root
4781 4784 */
4782 4785 if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) {
4783 4786 return (error);
4784 4787 }
4785 4788
4786 4789 /*
4787 4790 * Just return the parent vnode if that's where we are trying to go.
4788 4791 */
4789 4792 if (strcmp(comp, "..") == 0) {
4790 4793 VN_HOLD(lxpnp->lxpr_parent);
4791 4794 *vpp = lxpnp->lxpr_parent;
4792 4795 return (0);
4793 4796 }
4794 4797
4795 4798 /*
4796 4799 * Special handling for directory searches. Note: null component name
4797 4800 * denotes that the current directory is being searched.
4798 4801 */
4799 4802 if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
4800 4803 VN_HOLD(dp);
4801 4804 *vpp = dp;
4802 4805 return (0);
4803 4806 }
4804 4807
4805 4808 *vpp = (lxpr_lookup_function[type](dp, comp));
4806 4809 return ((*vpp == NULL) ? ENOENT : 0);
4807 4810 }
4808 4811
4809 4812 /*
4810 4813 * Do a sequential search on the given directory table
4811 4814 */
4812 4815 static vnode_t *
4813 4816 lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
4814 4817 lxpr_dirent_t *dirtab, int dirtablen)
4815 4818 {
4816 4819 lxpr_node_t *lxpnp;
4817 4820 int count;
4818 4821
4819 4822 for (count = 0; count < dirtablen; count++) {
4820 4823 if (strcmp(dirtab[count].d_name, comp) == 0) {
4821 4824 lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
4822 4825 dp = LXPTOV(lxpnp);
4823 4826 ASSERT(dp != NULL);
4824 4827 return (dp);
4825 4828 }
4826 4829 }
4827 4830 return (NULL);
4828 4831 }
4829 4832
4830 4833 static vnode_t *
4831 4834 lxpr_lookup_piddir(vnode_t *dp, char *comp)
4832 4835 {
4833 4836 proc_t *p;
4834 4837
4835 4838 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
4836 4839
4837 4840 p = lxpr_lock(VTOLXP(dp)->lxpr_pid);
4838 4841 if (p == NULL)
4839 4842 return (NULL);
4840 4843
4841 4844 dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
4842 4845
4843 4846 lxpr_unlock(p);
4844 4847
4845 4848 return (dp);
4846 4849 }
4847 4850
4848 4851 /*
4849 4852 * Lookup one of the process's task ID's.
4850 4853 */
4851 4854 static vnode_t *
4852 4855 lxpr_lookup_taskdir(vnode_t *dp, char *comp)
4853 4856 {
4854 4857 lxpr_node_t *dlxpnp = VTOLXP(dp);
4855 4858 lxpr_node_t *lxpnp;
4856 4859 proc_t *p;
4857 4860 pid_t real_pid;
4858 4861 uint_t tid;
4859 4862 int c;
4860 4863 kthread_t *t;
4861 4864
4862 4865 ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASKDIR);
4863 4866
4864 4867 /*
4865 4868 * convert the string rendition of the filename to a thread ID
4866 4869 */
4867 4870 tid = 0;
4868 4871 while ((c = *comp++) != '\0') {
4869 4872 int otid;
4870 4873 if (c < '0' || c > '9')
4871 4874 return (NULL);
4872 4875
4873 4876 otid = tid;
4874 4877 tid = 10 * tid + c - '0';
4875 4878 /* integer overflow */
4876 4879 if (tid / 10 != otid)
4877 4880 return (NULL);
4878 4881 }
4879 4882
4880 4883 /*
4881 4884 * get the proc to work with and lock it
4882 4885 */
4883 4886 real_pid = get_real_pid(dlxpnp->lxpr_pid);
4884 4887 p = lxpr_lock(real_pid);
4885 4888 if ((p == NULL))
4886 4889 return (NULL);
4887 4890
4888 4891 /*
4889 4892 * If the process is a zombie or system process
4890 4893 * it can't have any threads.
4891 4894 */
4892 4895 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
4893 4896 lxpr_unlock(p);
4894 4897 return (NULL);
4895 4898 }
4896 4899
4897 4900 if (p->p_brand == &lx_brand) {
4898 4901 t = lxpr_get_thread(p, tid);
4899 4902 } else {
4900 4903 /*
4901 4904 * Only the main thread is visible for non-branded processes.
4902 4905 */
4903 4906 t = p->p_tlist;
4904 4907 if (tid != p->p_pid || t == NULL) {
4905 4908 t = NULL;
4906 4909 } else {
4907 4910 thread_lock(t);
4908 4911 }
4909 4912 }
4910 4913 if (t == NULL) {
4911 4914 lxpr_unlock(p);
4912 4915 return (NULL);
4913 4916 }
4914 4917 thread_unlock(t);
4915 4918
4916 4919 /*
4917 4920 * Allocate and fill in a new lx /proc taskid node.
4918 4921 * Instead of the last arg being a fd, it is a tid.
4919 4922 */
4920 4923 lxpnp = lxpr_getnode(dp, LXPR_PID_TASK_IDDIR, p, tid);
4921 4924 dp = LXPTOV(lxpnp);
4922 4925 ASSERT(dp != NULL);
4923 4926 lxpr_unlock(p);
4924 4927 return (dp);
4925 4928 }
4926 4929
4927 4930 /*
4928 4931 * Lookup one of the process's task ID's.
4929 4932 */
4930 4933 static vnode_t *
4931 4934 lxpr_lookup_task_tid_dir(vnode_t *dp, char *comp)
4932 4935 {
4933 4936 lxpr_node_t *dlxpnp = VTOLXP(dp);
4934 4937 lxpr_node_t *lxpnp;
4935 4938 proc_t *p;
4936 4939 pid_t real_pid;
4937 4940 kthread_t *t;
4938 4941 int i;
4939 4942
4940 4943 ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASK_IDDIR);
4941 4944
4942 4945 /*
4943 4946 * get the proc to work with and lock it
4944 4947 */
4945 4948 real_pid = get_real_pid(dlxpnp->lxpr_pid);
4946 4949 p = lxpr_lock(real_pid);
4947 4950 if ((p == NULL))
4948 4951 return (NULL);
4949 4952
4950 4953 /*
4951 4954 * If the process is a zombie or system process
4952 4955 * it can't have any threads.
4953 4956 */
4954 4957 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
4955 4958 lxpr_unlock(p);
4956 4959 return (NULL);
4957 4960 }
4958 4961
4959 4962 /* need to confirm tid is still there */
4960 4963 t = lxpr_get_thread(p, dlxpnp->lxpr_desc);
4961 4964 if (t == NULL) {
4962 4965 lxpr_unlock(p);
4963 4966 return (NULL);
4964 4967 }
4965 4968 thread_unlock(t);
4966 4969
4967 4970 /*
4968 4971 * allocate and fill in the new lx /proc taskid dir node
4969 4972 */
4970 4973 for (i = 0; i < TIDDIRFILES; i++) {
4971 4974 if (strcmp(tiddir[i].d_name, comp) == 0) {
4972 4975 lxpnp = lxpr_getnode(dp, tiddir[i].d_type, p,
4973 4976 dlxpnp->lxpr_desc);
4974 4977 dp = LXPTOV(lxpnp);
4975 4978 ASSERT(dp != NULL);
4976 4979 lxpr_unlock(p);
4977 4980 return (dp);
4978 4981 }
4979 4982 }
4980 4983
4981 4984 lxpr_unlock(p);
4982 4985 return (NULL);
4983 4986 }
4984 4987
4985 4988 /*
4986 4989 * Lookup one of the process's open files.
4987 4990 */
4988 4991 static vnode_t *
4989 4992 lxpr_lookup_fddir(vnode_t *dp, char *comp)
4990 4993 {
4991 4994 lxpr_node_t *dlxpnp = VTOLXP(dp);
4992 4995
4993 4996 ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR ||
4994 4997 dlxpnp->lxpr_type == LXPR_PID_TID_FDDIR);
4995 4998
4996 4999 return (lxpr_lookup_fdnode(dp, comp));
4997 5000 }
4998 5001
4999 5002 static vnode_t *
5000 5003 lxpr_lookup_netdir(vnode_t *dp, char *comp)
5001 5004 {
5002 5005 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
5003 5006
5004 5007 dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
5005 5008
5006 5009 return (dp);
5007 5010 }
5008 5011
5009 5012 static vnode_t *
5010 5013 lxpr_lookup_procdir(vnode_t *dp, char *comp)
5011 5014 {
5012 5015 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
5013 5016
5014 5017 /*
5015 5018 * We know all the names of files & dirs in our file system structure
5016 5019 * except those that are pid names. These change as pids are created/
5017 5020 * deleted etc., so we just look for a number as the first char to see
5018 5021 * if we are we doing pid lookups.
5019 5022 *
5020 5023 * Don't need to check for "self" as it is implemented as a symlink
5021 5024 */
5022 5025 if (*comp >= '0' && *comp <= '9') {
5023 5026 pid_t pid = 0;
5024 5027 lxpr_node_t *lxpnp = NULL;
5025 5028 proc_t *p;
5026 5029 int c;
5027 5030
5028 5031 while ((c = *comp++) != '\0')
5029 5032 pid = 10 * pid + c - '0';
5030 5033
5031 5034 /*
5032 5035 * Can't continue if the process is still loading or it doesn't
5033 5036 * really exist yet (or maybe it just died!)
5034 5037 */
5035 5038 p = lxpr_lock(pid);
5036 5039 if (p == NULL)
5037 5040 return (NULL);
5038 5041
5039 5042 if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
5040 5043 lxpr_unlock(p);
5041 5044 return (NULL);
5042 5045 }
5043 5046
5044 5047 /*
5045 5048 * allocate and fill in a new lx /proc node
5046 5049 */
5047 5050 lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
5048 5051
5049 5052 lxpr_unlock(p);
5050 5053
5051 5054 dp = LXPTOV(lxpnp);
5052 5055 ASSERT(dp != NULL);
5053 5056
5054 5057 return (dp);
5055 5058 }
5056 5059
5057 5060 /* Lookup fixed names */
5058 5061 return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES));
5059 5062 }
5060 5063
5061 5064 static vnode_t *
5062 5065 lxpr_lookup_sysdir(vnode_t *dp, char *comp)
5063 5066 {
5064 5067 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYSDIR);
5065 5068 return (lxpr_lookup_common(dp, comp, NULL, sysdir, SYSDIRFILES));
5066 5069 }
5067 5070
5068 5071 static vnode_t *
5069 5072 lxpr_lookup_sys_kerneldir(vnode_t *dp, char *comp)
5070 5073 {
5071 5074 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNELDIR);
5072 5075 return (lxpr_lookup_common(dp, comp, NULL, sys_kerneldir,
5073 5076 SYS_KERNELDIRFILES));
5074 5077 }
5075 5078
5076 5079 static vnode_t *
5077 5080 lxpr_lookup_sys_kdir_randdir(vnode_t *dp, char *comp)
5078 5081 {
5079 5082 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNEL_RANDDIR);
5080 5083 return (lxpr_lookup_common(dp, comp, NULL, sys_randdir,
5081 5084 SYS_RANDDIRFILES));
5082 5085 }
5083 5086
5084 5087 static vnode_t *
5085 5088 lxpr_lookup_sys_netdir(vnode_t *dp, char *comp)
5086 5089 {
5087 5090 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NETDIR);
5088 5091 return (lxpr_lookup_common(dp, comp, NULL, sys_netdir,
5089 5092 SYS_NETDIRFILES));
5090 5093 }
5091 5094
5092 5095 static vnode_t *
5093 5096 lxpr_lookup_sys_net_coredir(vnode_t *dp, char *comp)
5094 5097 {
5095 5098 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NET_COREDIR);
5096 5099 return (lxpr_lookup_common(dp, comp, NULL, sys_net_coredir,
5097 5100 SYS_NET_COREDIRFILES));
5098 5101 }
5099 5102
5100 5103 static vnode_t *
5101 5104 lxpr_lookup_sys_vmdir(vnode_t *dp, char *comp)
5102 5105 {
5103 5106 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_VMDIR);
5104 5107 return (lxpr_lookup_common(dp, comp, NULL, sys_vmdir,
5105 5108 SYS_VMDIRFILES));
5106 5109 }
5107 5110
5108 5111 static vnode_t *
5109 5112 lxpr_lookup_sys_fsdir(vnode_t *dp, char *comp)
5110 5113 {
5111 5114 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FSDIR);
5112 5115 return (lxpr_lookup_common(dp, comp, NULL, sys_fsdir,
5113 5116 SYS_FSDIRFILES));
5114 5117 }
5115 5118
5116 5119 static vnode_t *
5117 5120 lxpr_lookup_sys_fs_inotifydir(vnode_t *dp, char *comp)
5118 5121 {
5119 5122 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
5120 5123 return (lxpr_lookup_common(dp, comp, NULL, sys_fs_inotifydir,
5121 5124 SYS_FS_INOTIFYDIRFILES));
5122 5125 }
5123 5126
5124 5127 /*
5125 5128 * lxpr_readdir(): Vnode operation for VOP_READDIR()
5126 5129 */
5127 5130 /* ARGSUSED */
5128 5131 static int
5129 5132 lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp,
5130 5133 caller_context_t *ct, int flags)
5131 5134 {
5132 5135 lxpr_node_t *lxpnp = VTOLXP(dp);
5133 5136 lxpr_nodetype_t type = lxpnp->lxpr_type;
5134 5137 ssize_t uresid;
5135 5138 off_t uoffset;
5136 5139 int error;
5137 5140
5138 5141 ASSERT(dp->v_type == VDIR);
5139 5142 ASSERT(type < LXPR_NFILES);
5140 5143
5141 5144 /*
5142 5145 * we should never get here because the readdir
5143 5146 * is done on the realvp for these nodes
5144 5147 */
5145 5148 ASSERT(type != LXPR_PID_FD_FD &&
5146 5149 type != LXPR_PID_CURDIR &&
5147 5150 type != LXPR_PID_ROOTDIR);
5148 5151
5149 5152 /*
5150 5153 * restrict readdir permission to owner or root
5151 5154 */
5152 5155 if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0)
5153 5156 return (error);
5154 5157
5155 5158 uoffset = uiop->uio_offset;
5156 5159 uresid = uiop->uio_resid;
5157 5160
5158 5161 /* can't do negative reads */
5159 5162 if (uoffset < 0 || uresid <= 0)
5160 5163 return (EINVAL);
5161 5164
5162 5165 /* can't read directory entries that don't exist! */
5163 5166 if (uoffset % LXPR_SDSIZE)
5164 5167 return (ENOENT);
5165 5168
5166 5169 return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
5167 5170 }
5168 5171
5169 5172 /* ARGSUSED */
5170 5173 static int
5171 5174 lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5172 5175 {
5173 5176 return (ENOTDIR);
5174 5177 }
5175 5178
5176 5179 /*
5177 5180 * This has the common logic for returning directory entries
5178 5181 */
5179 5182 static int
5180 5183 lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
5181 5184 lxpr_dirent_t *dirtab, int dirtablen)
5182 5185 {
5183 5186 /* bp holds one dirent64 structure */
5184 5187 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5185 5188 dirent64_t *dirent = (dirent64_t *)bp;
5186 5189 ssize_t oresid; /* save a copy for testing later */
5187 5190 ssize_t uresid;
5188 5191
5189 5192 oresid = uiop->uio_resid;
5190 5193
5191 5194 /* clear out the dirent buffer */
5192 5195 bzero(bp, sizeof (bp));
5193 5196
5194 5197 /*
5195 5198 * Satisfy user request
5196 5199 */
5197 5200 while ((uresid = uiop->uio_resid) > 0) {
5198 5201 int dirindex;
5199 5202 off_t uoffset;
5200 5203 int reclen;
5201 5204 int error;
5202 5205
5203 5206 uoffset = uiop->uio_offset;
5204 5207 dirindex = (uoffset / LXPR_SDSIZE) - 2;
5205 5208
5206 5209 if (uoffset == 0) {
5207 5210
5208 5211 dirent->d_ino = lxpnp->lxpr_ino;
5209 5212 dirent->d_name[0] = '.';
5210 5213 dirent->d_name[1] = '\0';
5211 5214 reclen = DIRENT64_RECLEN(1);
5212 5215
5213 5216 } else if (uoffset == LXPR_SDSIZE) {
5214 5217
5215 5218 dirent->d_ino = lxpr_parentinode(lxpnp);
5216 5219 dirent->d_name[0] = '.';
5217 5220 dirent->d_name[1] = '.';
5218 5221 dirent->d_name[2] = '\0';
5219 5222 reclen = DIRENT64_RECLEN(2);
5220 5223
5221 5224 } else if (dirindex >= 0 && dirindex < dirtablen) {
5222 5225 int slen = strlen(dirtab[dirindex].d_name);
5223 5226
5224 5227 dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
5225 5228 lxpnp->lxpr_pid, 0);
5226 5229
5227 5230 VERIFY(slen < LXPNSIZ);
5228 5231 (void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
5229 5232 reclen = DIRENT64_RECLEN(slen);
5230 5233
5231 5234 } else {
5232 5235 /* Run out of table entries */
5233 5236 if (eofp) {
5234 5237 *eofp = 1;
5235 5238 }
5236 5239 return (0);
5237 5240 }
5238 5241
5239 5242 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5240 5243 dirent->d_reclen = (ushort_t)reclen;
5241 5244
5242 5245 /*
5243 5246 * if the size of the data to transfer is greater
5244 5247 * that that requested then we can't do it this transfer.
5245 5248 */
5246 5249 if (reclen > uresid) {
5247 5250 /*
5248 5251 * Error if no entries have been returned yet.
5249 5252 */
5250 5253 if (uresid == oresid) {
5251 5254 return (EINVAL);
5252 5255 }
5253 5256 break;
5254 5257 }
5255 5258
5256 5259 /*
5257 5260 * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5258 5261 * by the same amount. But we want uiop->uio_offset to change
5259 5262 * in increments of LXPR_SDSIZE, which is different from the
5260 5263 * number of bytes being returned to the user. So we set
5261 5264 * uiop->uio_offset separately, ignoring what uiomove() does.
5262 5265 */
5263 5266 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5264 5267 uiop)) != 0)
5265 5268 return (error);
5266 5269
5267 5270 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5268 5271 }
5269 5272
5270 5273 /* Have run out of space, but could have just done last table entry */
5271 5274 if (eofp) {
5272 5275 *eofp =
5273 5276 (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
5274 5277 }
5275 5278 return (0);
5276 5279 }
5277 5280
5278 5281
5279 5282 static int
5280 5283 lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5281 5284 {
5282 5285 /* bp holds one dirent64 structure */
5283 5286 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5284 5287 dirent64_t *dirent = (dirent64_t *)bp;
5285 5288 ssize_t oresid; /* save a copy for testing later */
5286 5289 ssize_t uresid;
5287 5290 off_t uoffset;
5288 5291 zoneid_t zoneid;
5289 5292 pid_t pid;
5290 5293 int error;
5291 5294 int ceof;
5292 5295
5293 5296 ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
5294 5297
5295 5298 oresid = uiop->uio_resid;
5296 5299 zoneid = LXPTOZ(lxpnp)->zone_id;
5297 5300
5298 5301 /*
5299 5302 * We return directory entries in the order: "." and ".." then the
5300 5303 * unique lxproc files, then the directories corresponding to the
5301 5304 * running processes. We have defined this as the ordering because
5302 5305 * it allows us to more easily keep track of where we are betwen calls
5303 5306 * to getdents(). If the number of processes changes between calls
5304 5307 * then we can't lose track of where we are in the lxproc files.
5305 5308 */
5306 5309
5307 5310 /* Do the fixed entries */
5308 5311 error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir,
5309 5312 PROCDIRFILES);
5310 5313
5311 5314 /* Finished if we got an error or if we couldn't do all the table */
5312 5315 if (error != 0 || ceof == 0)
5313 5316 return (error);
5314 5317
5315 5318 /* clear out the dirent buffer */
5316 5319 bzero(bp, sizeof (bp));
5317 5320
5318 5321 /* Do the process entries */
5319 5322 while ((uresid = uiop->uio_resid) > 0) {
5320 5323 proc_t *p;
5321 5324 int len;
5322 5325 int reclen;
5323 5326 int i;
5324 5327
5325 5328 uoffset = uiop->uio_offset;
5326 5329
5327 5330 /*
5328 5331 * Stop when entire proc table has been examined.
5329 5332 */
5330 5333 i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
5331 5334 if (i < 0 || i >= v.v_proc) {
5332 5335 /* Run out of table entries */
5333 5336 if (eofp) {
5334 5337 *eofp = 1;
5335 5338 }
5336 5339 return (0);
5337 5340 }
5338 5341 mutex_enter(&pidlock);
5339 5342
5340 5343 /*
5341 5344 * Skip indices for which there is no pid_entry, PIDs for
5342 5345 * which there is no corresponding process, a PID of 0,
5343 5346 * and anything the security policy doesn't allow
5344 5347 * us to look at.
5345 5348 */
5346 5349 if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
5347 5350 p->p_pid == 0 ||
5348 5351 secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
5349 5352 mutex_exit(&pidlock);
5350 5353 goto next;
5351 5354 }
5352 5355 mutex_exit(&pidlock);
5353 5356
5354 5357 /*
5355 5358 * Convert pid to the Linux default of 1 if we're the zone's
5356 5359 * init process, or 0 if zsched, otherwise use the value from
5357 5360 * the proc structure
5358 5361 */
5359 5362 if (p->p_pid == curproc->p_zone->zone_proc_initpid) {
5360 5363 pid = 1;
5361 5364 } else if (p->p_pid == curproc->p_zone->zone_zsched->p_pid) {
5362 5365 pid = 0;
5363 5366 } else {
5364 5367 pid = p->p_pid;
5365 5368 }
5366 5369
5367 5370 /*
5368 5371 * If this /proc was mounted in the global zone, view
5369 5372 * all procs; otherwise, only view zone member procs.
5370 5373 */
5371 5374 if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
5372 5375 goto next;
5373 5376 }
5374 5377
5375 5378 ASSERT(p->p_stat != 0);
5376 5379
5377 5380 dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
5378 5381 len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
5379 5382 ASSERT(len < LXPNSIZ);
5380 5383 reclen = DIRENT64_RECLEN(len);
5381 5384
5382 5385 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5383 5386 dirent->d_reclen = (ushort_t)reclen;
5384 5387
5385 5388 /*
5386 5389 * if the size of the data to transfer is greater
5387 5390 * that that requested then we can't do it this transfer.
5388 5391 */
5389 5392 if (reclen > uresid) {
5390 5393 /*
5391 5394 * Error if no entries have been returned yet.
5392 5395 */
5393 5396 if (uresid == oresid)
5394 5397 return (EINVAL);
5395 5398 break;
5396 5399 }
5397 5400
5398 5401 /*
5399 5402 * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5400 5403 * by the same amount. But we want uiop->uio_offset to change
5401 5404 * in increments of LXPR_SDSIZE, which is different from the
5402 5405 * number of bytes being returned to the user. So we set
5403 5406 * uiop->uio_offset separately, in the increment of this for
5404 5407 * the loop, ignoring what uiomove() does.
5405 5408 */
5406 5409 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5407 5410 uiop)) != 0)
5408 5411 return (error);
5409 5412 next:
5410 5413 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5411 5414 }
5412 5415
5413 5416 if (eofp != NULL) {
5414 5417 *eofp = (uiop->uio_offset >=
5415 5418 ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
5416 5419 }
5417 5420
5418 5421 return (0);
5419 5422 }
5420 5423
5421 5424 static int
5422 5425 lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5423 5426 {
5424 5427 proc_t *p;
5425 5428 pid_t find_pid;
5426 5429
5427 5430 ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
5428 5431
5429 5432 /* can't read its contents if it died */
5430 5433 mutex_enter(&pidlock);
5431 5434
5432 5435 if (lxpnp->lxpr_pid == 1) {
5433 5436 find_pid = curproc->p_zone->zone_proc_initpid;
5434 5437 } else if (lxpnp->lxpr_pid == 0) {
5435 5438 find_pid = curproc->p_zone->zone_zsched->p_pid;
5436 5439 } else {
5437 5440 find_pid = lxpnp->lxpr_pid;
5438 5441 }
5439 5442 p = prfind(find_pid);
5440 5443
5441 5444 if (p == NULL || p->p_stat == SIDL) {
5442 5445 mutex_exit(&pidlock);
5443 5446 return (ENOENT);
5444 5447 }
5445 5448 mutex_exit(&pidlock);
5446 5449
5447 5450 return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
5448 5451 }
5449 5452
5450 5453 static int
5451 5454 lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5452 5455 {
5453 5456 ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
5454 5457 return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
5455 5458 }
5456 5459
5457 5460 static int
5458 5461 lxpr_readdir_taskdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5459 5462 {
5460 5463 /* bp holds one dirent64 structure */
5461 5464 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5462 5465 dirent64_t *dirent = (dirent64_t *)bp;
5463 5466 ssize_t oresid; /* save a copy for testing later */
5464 5467 ssize_t uresid;
5465 5468 off_t uoffset;
5466 5469 int error;
5467 5470 int ceof;
5468 5471 proc_t *p;
5469 5472 int tiddirsize = -1;
5470 5473 int tasknum;
5471 5474 pid_t real_pid;
5472 5475 kthread_t *t;
5473 5476 boolean_t branded = B_FALSE;
5474 5477
5475 5478 ASSERT(lxpnp->lxpr_type == LXPR_PID_TASKDIR);
5476 5479
5477 5480 oresid = uiop->uio_resid;
5478 5481
5479 5482 real_pid = get_real_pid(lxpnp->lxpr_pid);
5480 5483 p = lxpr_lock(real_pid);
5481 5484
5482 5485 /* can't read its contents if it died */
5483 5486 if (p == NULL) {
5484 5487 return (ENOENT);
5485 5488 }
5486 5489 if (p->p_stat == SIDL) {
5487 5490 lxpr_unlock(p);
5488 5491 return (ENOENT);
5489 5492 }
5490 5493
5491 5494 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
5492 5495 tiddirsize = 0;
5493 5496
5494 5497 branded = (p->p_brand == &lx_brand);
5495 5498 /*
5496 5499 * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
5497 5500 * going away while we iterate over its threads.
5498 5501 */
5499 5502 mutex_exit(&p->p_lock);
5500 5503
5501 5504 if (tiddirsize == -1)
5502 5505 tiddirsize = p->p_lwpcnt;
5503 5506
5504 5507 /* Do the fixed entries (in this case just "." & "..") */
5505 5508 error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
5506 5509
5507 5510 /* Finished if we got an error or if we couldn't do all the table */
5508 5511 if (error != 0 || ceof == 0)
5509 5512 goto out;
5510 5513
5511 5514 if ((t = p->p_tlist) == NULL) {
5512 5515 if (eofp != NULL)
5513 5516 *eofp = 1;
5514 5517 goto out;
5515 5518 }
5516 5519
5517 5520 /* clear out the dirent buffer */
5518 5521 bzero(bp, sizeof (bp));
5519 5522
5520 5523 /*
5521 5524 * Loop until user's request is satisfied or until all thread's have
5522 5525 * been returned.
5523 5526 */
5524 5527 for (tasknum = 0; (uresid = uiop->uio_resid) > 0; tasknum++) {
5525 5528 int i;
5526 5529 int reclen;
5527 5530 int len;
5528 5531 uint_t emul_tid;
5529 5532 lx_lwp_data_t *lwpd;
5530 5533
5531 5534 uoffset = uiop->uio_offset;
5532 5535
5533 5536 /*
5534 5537 * Stop at the end of the thread list
5535 5538 */
5536 5539 i = (uoffset / LXPR_SDSIZE) - 2;
5537 5540 if (i < 0 || i >= tiddirsize) {
5538 5541 if (eofp) {
5539 5542 *eofp = 1;
5540 5543 }
5541 5544 goto out;
5542 5545 }
5543 5546
5544 5547 if (i != tasknum)
5545 5548 goto next;
5546 5549
5547 5550 if (!branded) {
5548 5551 /*
5549 5552 * Emulating the goofy linux task model is impossible
5550 5553 * to do for native processes. We can compromise by
5551 5554 * presenting only the main thread to the consumer.
5552 5555 */
5553 5556 emul_tid = p->p_pid;
5554 5557 } else {
5555 5558 if ((lwpd = ttolxlwp(t)) == NULL) {
5556 5559 goto next;
5557 5560 }
5558 5561 emul_tid = lwpd->br_pid;
5559 5562 /*
5560 5563 * Convert pid to Linux default of 1 if we're the
5561 5564 * zone's init.
5562 5565 */
5563 5566 if (emul_tid == curproc->p_zone->zone_proc_initpid)
5564 5567 emul_tid = 1;
5565 5568 }
5566 5569
5567 5570 dirent->d_ino = lxpr_inode(LXPR_PID_TASK_IDDIR, lxpnp->lxpr_pid,
5568 5571 emul_tid);
5569 5572 len = snprintf(dirent->d_name, LXPNSIZ, "%d", emul_tid);
5570 5573 ASSERT(len < LXPNSIZ);
5571 5574 reclen = DIRENT64_RECLEN(len);
5572 5575
5573 5576 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5574 5577 dirent->d_reclen = (ushort_t)reclen;
5575 5578
5576 5579 if (reclen > uresid) {
5577 5580 /*
5578 5581 * Error if no entries have been returned yet.
5579 5582 */
5580 5583 if (uresid == oresid)
5581 5584 error = EINVAL;
5582 5585 goto out;
5583 5586 }
5584 5587
5585 5588 /*
5586 5589 * uiomove() updates both uiop->uio_resid and uiop->uio_offset
5587 5590 * by the same amount. But we want uiop->uio_offset to change
5588 5591 * in increments of LXPR_SDSIZE, which is different from the
5589 5592 * number of bytes being returned to the user. So we set
5590 5593 * uiop->uio_offset separately, in the increment of this for
5591 5594 * the loop, ignoring what uiomove() does.
5592 5595 */
5593 5596 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5594 5597 uiop)) != 0)
5595 5598 goto out;
5596 5599
5597 5600 next:
5598 5601 uiop->uio_offset = uoffset + LXPR_SDSIZE;
5599 5602
5600 5603 if ((t = t->t_forw) == p->p_tlist || !branded) {
5601 5604 if (eofp != NULL)
5602 5605 *eofp = 1;
5603 5606 goto out;
5604 5607 }
5605 5608 }
5606 5609
5607 5610 if (eofp != NULL)
5608 5611 *eofp = 0;
5609 5612
5610 5613 out:
5611 5614 mutex_enter(&p->p_lock);
5612 5615 lxpr_unlock(p);
5613 5616 return (error);
5614 5617 }
5615 5618
5616 5619 static int
5617 5620 lxpr_readdir_task_tid_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5618 5621 {
5619 5622 proc_t *p;
5620 5623 pid_t real_pid;
5621 5624 kthread_t *t;
5622 5625
5623 5626 ASSERT(lxpnp->lxpr_type == LXPR_PID_TASK_IDDIR);
5624 5627
5625 5628 mutex_enter(&pidlock);
5626 5629
5627 5630 real_pid = get_real_pid(lxpnp->lxpr_pid);
5628 5631 p = prfind(real_pid);
5629 5632
5630 5633 /* can't read its contents if it died */
5631 5634 if (p == NULL || p->p_stat == SIDL) {
5632 5635 mutex_exit(&pidlock);
5633 5636 return (ENOENT);
5634 5637 }
5635 5638
5636 5639 mutex_exit(&pidlock);
5637 5640
5638 5641 /* need to confirm tid is still there */
5639 5642 t = lxpr_get_thread(p, lxpnp->lxpr_desc);
5640 5643 if (t == NULL) {
5641 5644 /* we can't find this specific thread */
5642 5645 return (NULL);
5643 5646 }
5644 5647 thread_unlock(t);
5645 5648
5646 5649 return (lxpr_readdir_common(lxpnp, uiop, eofp, tiddir, TIDDIRFILES));
5647 5650 }
5648 5651
5649 5652 static int
5650 5653 lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5651 5654 {
5652 5655 /* bp holds one dirent64 structure */
5653 5656 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
5654 5657 dirent64_t *dirent = (dirent64_t *)bp;
5655 5658 ssize_t oresid; /* save a copy for testing later */
5656 5659 ssize_t uresid;
5657 5660 off_t uoffset;
5658 5661 int error;
5659 5662 int ceof;
5660 5663 proc_t *p;
5661 5664 int fddirsize = -1;
5662 5665 uf_info_t *fip;
5663 5666
5664 5667 ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR ||
5665 5668 lxpnp->lxpr_type == LXPR_PID_TID_FDDIR);
5666 5669
5667 5670 oresid = uiop->uio_resid;
5668 5671
5669 5672 /* can't read its contents if it died */
5670 5673 p = lxpr_lock(lxpnp->lxpr_pid);
5671 5674 if (p == NULL)
5672 5675 return (ENOENT);
5673 5676
5674 5677 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
5675 5678 fddirsize = 0;
5676 5679
5677 5680 /*
5678 5681 * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
5679 5682 * going away while we iterate over its fi_list.
5680 5683 */
5681 5684 mutex_exit(&p->p_lock);
5682 5685
5683 5686 /* Get open file info */
5684 5687 fip = (&(p)->p_user.u_finfo);
5685 5688 mutex_enter(&fip->fi_lock);
5686 5689
5687 5690 if (fddirsize == -1)
5688 5691 fddirsize = fip->fi_nfiles;
5689 5692
5690 5693 /* Do the fixed entries (in this case just "." & "..") */
5691 5694 error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
5692 5695
5693 5696 /* Finished if we got an error or if we couldn't do all the table */
5694 5697 if (error != 0 || ceof == 0)
5695 5698 goto out;
5696 5699
5697 5700 /* clear out the dirent buffer */
5698 5701 bzero(bp, sizeof (bp));
5699 5702
5700 5703 /*
5701 5704 * Loop until user's request is satisfied or until
5702 5705 * all file descriptors have been examined.
5703 5706 */
5704 5707 for (; (uresid = uiop->uio_resid) > 0;
5705 5708 uiop->uio_offset = uoffset + LXPR_SDSIZE) {
5706 5709 int reclen;
5707 5710 int fd;
5708 5711 int len;
5709 5712
5710 5713 uoffset = uiop->uio_offset;
5711 5714
5712 5715 /*
5713 5716 * Stop at the end of the fd list
5714 5717 */
5715 5718 fd = (uoffset / LXPR_SDSIZE) - 2;
5716 5719 if (fd < 0 || fd >= fddirsize) {
5717 5720 if (eofp) {
5718 5721 *eofp = 1;
5719 5722 }
5720 5723 goto out;
5721 5724 }
5722 5725
5723 5726 if (fip->fi_list[fd].uf_file == NULL)
5724 5727 continue;
5725 5728
5726 5729 dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
5727 5730 len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
5728 5731 ASSERT(len < LXPNSIZ);
5729 5732 reclen = DIRENT64_RECLEN(len);
5730 5733
5731 5734 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
5732 5735 dirent->d_reclen = (ushort_t)reclen;
5733 5736
5734 5737 if (reclen > uresid) {
5735 5738 /*
5736 5739 * Error if no entries have been returned yet.
5737 5740 */
5738 5741 if (uresid == oresid)
5739 5742 error = EINVAL;
5740 5743 goto out;
5741 5744 }
5742 5745
5743 5746 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
5744 5747 uiop)) != 0)
5745 5748 goto out;
5746 5749 }
5747 5750
5748 5751 if (eofp != NULL) {
5749 5752 *eofp =
5750 5753 (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
5751 5754 }
5752 5755
5753 5756 out:
5754 5757 mutex_exit(&fip->fi_lock);
5755 5758 mutex_enter(&p->p_lock);
5756 5759 lxpr_unlock(p);
5757 5760 return (error);
5758 5761 }
5759 5762
5760 5763 static int
5761 5764 lxpr_readdir_sysdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5762 5765 {
5763 5766 ASSERT(lxpnp->lxpr_type == LXPR_SYSDIR);
5764 5767 return (lxpr_readdir_common(lxpnp, uiop, eofp, sysdir, SYSDIRFILES));
5765 5768 }
5766 5769
5767 5770 static int
5768 5771 lxpr_readdir_sys_fsdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5769 5772 {
5770 5773 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FSDIR);
5771 5774 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fsdir,
5772 5775 SYS_FSDIRFILES));
5773 5776 }
5774 5777
5775 5778 static int
5776 5779 lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5777 5780 {
5778 5781 ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
5779 5782 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fs_inotifydir,
5780 5783 SYS_FS_INOTIFYDIRFILES));
5781 5784 }
5782 5785
5783 5786 static int
5784 5787 lxpr_readdir_sys_kerneldir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5785 5788 {
5786 5789 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNELDIR);
5787 5790 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_kerneldir,
5788 5791 SYS_KERNELDIRFILES));
5789 5792 }
5790 5793
5791 5794 static int
5792 5795 lxpr_readdir_sys_kdir_randdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5793 5796 {
5794 5797 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RANDDIR);
5795 5798 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_randdir,
5796 5799 SYS_RANDDIRFILES));
5797 5800 }
5798 5801
5799 5802 static int
5800 5803 lxpr_readdir_sys_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5801 5804 {
5802 5805 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NETDIR);
5803 5806 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_netdir,
5804 5807 SYS_NETDIRFILES));
5805 5808 }
5806 5809
5807 5810 static int
5808 5811 lxpr_readdir_sys_net_coredir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5809 5812 {
5810 5813 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_COREDIR);
5811 5814 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_net_coredir,
5812 5815 SYS_NET_COREDIRFILES));
5813 5816 }
5814 5817
5815 5818 static int
5816 5819 lxpr_readdir_sys_vmdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
5817 5820 {
5818 5821 ASSERT(lxpnp->lxpr_type == LXPR_SYS_VMDIR);
5819 5822 return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_vmdir,
5820 5823 SYS_VMDIRFILES));
5821 5824 }
5822 5825
5823 5826 static int
5824 5827 lxpr_write_sys_net_core_somaxc(lxpr_node_t *lxpnp, struct uio *uio,
5825 5828 struct cred *cr, caller_context_t *ct)
5826 5829 {
5827 5830 int error;
5828 5831 int res = 0;
5829 5832 size_t olen;
5830 5833 char val[16]; /* big enough for a uint numeric string */
5831 5834 netstack_t *ns;
5832 5835 mod_prop_info_t *ptbl = NULL;
5833 5836 mod_prop_info_t *pinfo = NULL;
5834 5837
5835 5838 ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON);
5836 5839
5837 5840 if (uio->uio_loffset != 0)
5838 5841 return (EINVAL);
5839 5842
5840 5843 if (uio->uio_resid == 0)
5841 5844 return (0);
5842 5845
5843 5846 olen = uio->uio_resid;
5844 5847 if (olen > sizeof (val) - 1)
5845 5848 return (EINVAL);
5846 5849
5847 5850 bzero(val, sizeof (val));
5848 5851 error = uiomove(val, olen, UIO_WRITE, uio);
5849 5852 if (error != 0)
5850 5853 return (error);
5851 5854
5852 5855 if (val[olen - 1] == '\n')
5853 5856 val[olen - 1] = '\0';
5854 5857
5855 5858 if (val[0] == '\0') /* no input */
5856 5859 return (EINVAL);
5857 5860
5858 5861 ns = netstack_get_current();
5859 5862 if (ns == NULL)
5860 5863 return (EINVAL);
5861 5864
5862 5865 ptbl = ns->netstack_tcp->tcps_propinfo_tbl;
5863 5866 pinfo = mod_prop_lookup(ptbl, "_conn_req_max_q", MOD_PROTO_TCP);
5864 5867 if (pinfo == NULL || pinfo->mpi_setf(ns, cr, pinfo, NULL, val, 0) != 0)
5865 5868 res = EINVAL;
5866 5869
5867 5870 netstack_rele(ns);
5868 5871 return (res);
5869 5872 }
5870 5873
5871 5874 /* ARGSUSED */
5872 5875 static int
5873 5876 lxpr_write_sys_kernel_corepatt(lxpr_node_t *lxpnp, struct uio *uio,
5874 5877 struct cred *cr, caller_context_t *ct)
5875 5878 {
5876 5879 zone_t *zone = curproc->p_zone;
5877 5880 struct core_globals *cg;
5878 5881 refstr_t *rp, *nrp;
5879 5882 corectl_path_t *ccp;
5880 5883 char val[MAXPATHLEN];
5881 5884 char valtr[MAXPATHLEN];
5882 5885 size_t olen;
5883 5886 int error;
5884 5887
5885 5888 ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT);
5886 5889
5887 5890 cg = zone_getspecific(core_zone_key, zone);
5888 5891 ASSERT(cg != NULL);
5889 5892
5890 5893 if (secpolicy_coreadm(cr) != 0)
5891 5894 return (EPERM);
5892 5895
5893 5896 if (uio->uio_loffset != 0)
5894 5897 return (EINVAL);
5895 5898
5896 5899 if (uio->uio_resid == 0)
5897 5900 return (0);
5898 5901
5899 5902 olen = uio->uio_resid;
5900 5903 if (olen > sizeof (val) - 1)
5901 5904 return (EINVAL);
5902 5905
5903 5906 bzero(val, sizeof (val));
5904 5907 error = uiomove(val, olen, UIO_WRITE, uio);
5905 5908 if (error != 0)
5906 5909 return (error);
5907 5910
5908 5911 if (val[olen - 1] == '\n')
5909 5912 val[olen - 1] = '\0';
5910 5913
5911 5914 if (val[0] == '|')
5912 5915 return (EINVAL);
5913 5916
5914 5917 if ((error = lxpr_core_path_l2s(val, valtr, sizeof (valtr))) != 0)
5915 5918 return (error);
5916 5919
5917 5920 nrp = refstr_alloc(valtr);
5918 5921
5919 5922 ccp = cg->core_default_path;
5920 5923 mutex_enter(&ccp->ccp_mtx);
5921 5924 rp = ccp->ccp_path;
5922 5925 refstr_hold((ccp->ccp_path = nrp));
5923 5926 cg->core_options |= CC_PROCESS_PATH;
5924 5927 mutex_exit(&ccp->ccp_mtx);
5925 5928
5926 5929 if (rp != NULL)
5927 5930 refstr_rele(rp);
5928 5931
5929 5932 return (0);
5930 5933 }
5931 5934
5932 5935 /*
5933 5936 * lxpr_readlink(): Vnode operation for VOP_READLINK()
5934 5937 */
5935 5938 /* ARGSUSED */
5936 5939 static int
5937 5940 lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
5938 5941 {
5939 5942 char bp[MAXPATHLEN + 1];
5940 5943 size_t buflen = sizeof (bp);
5941 5944 lxpr_node_t *lxpnp = VTOLXP(vp);
5942 5945 vnode_t *rvp = lxpnp->lxpr_realvp;
5943 5946 pid_t pid;
5944 5947 int error = 0;
5945 5948
5946 5949 /*
5947 5950 * Linux does something very "clever" for /proc/<pid>/fd/<num> entries.
5948 5951 * Open FDs are represented as symlinks, the link contents
5949 5952 * corresponding to the open resource. For plain files or devices,
5950 5953 * this isn't absurd since one can dereference the symlink to query
5951 5954 * the underlying resource. For sockets or pipes, it becomes ugly in a
5952 5955 * hurry. To maintain this human-readable output, those FD symlinks
5953 5956 * point to bogus targets such as "socket:[<inodenum>]". This requires
5954 5957 * circumventing vfs since the stat/lstat behavior on those FD entries
5955 5958 * will be unusual. (A stat must retrieve information about the open
5956 5959 * socket or pipe. It cannot fail because the link contents point to
5957 5960 * an absent file.)
5958 5961 *
5959 5962 * To accomplish this, lxpr_getnode returns an vnode typed VNON for FD
5960 5963 * entries. This bypasses code paths which would normally
5961 5964 * short-circuit on symlinks and allows us to emulate the vfs behavior
5962 5965 * expected by /proc consumers.
5963 5966 */
5964 5967 if (vp->v_type != VLNK && lxpnp->lxpr_type != LXPR_PID_FD_FD)
5965 5968 return (EINVAL);
5966 5969
5967 5970 /* Try to produce a symlink name for anything that has a realvp */
5968 5971 if (rvp != NULL) {
5969 5972 if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0)
5970 5973 return (error);
5971 5974 if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) {
5972 5975 /*
5973 5976 * Special handling possible for /proc/<pid>/fd/<num>
5974 5977 * Generate <type>:[<inode>] links, if allowed.
5975 5978 */
5976 5979 if (lxpnp->lxpr_type != LXPR_PID_FD_FD ||
5977 5980 lxpr_readlink_fdnode(lxpnp, bp, buflen) != 0) {
5978 5981 return (error);
5979 5982 }
5980 5983 }
5981 5984 } else {
5982 5985 switch (lxpnp->lxpr_type) {
5983 5986 case LXPR_SELF:
5984 5987 /*
5985 5988 * Convert pid to the Linux default of 1 if we're the
5986 5989 * zone's init process or 0 if zsched.
5987 5990 */
5988 5991 if (curproc->p_pid ==
5989 5992 curproc->p_zone->zone_proc_initpid) {
5990 5993 pid = 1;
5991 5994 } else if (curproc->p_pid ==
5992 5995 curproc->p_zone->zone_zsched->p_pid) {
5993 5996 pid = 0;
5994 5997 } else {
5995 5998 pid = curproc->p_pid;
5996 5999 }
5997 6000
5998 6001 /*
5999 6002 * Don't need to check result as every possible int
6000 6003 * will fit within MAXPATHLEN bytes.
6001 6004 */
6002 6005 (void) snprintf(bp, buflen, "%d", pid);
6003 6006 break;
6004 6007 case LXPR_PID_CURDIR:
6005 6008 case LXPR_PID_ROOTDIR:
6006 6009 case LXPR_PID_EXE:
6007 6010 return (EACCES);
6008 6011 default:
6009 6012 /*
6010 6013 * Need to return error so that nothing thinks
6011 6014 * that the symlink is empty and hence "."
6012 6015 */
6013 6016 return (EINVAL);
6014 6017 }
6015 6018 }
6016 6019
6017 6020 /* copy the link data to user space */
6018 6021 return (uiomove(bp, strlen(bp), UIO_READ, uiop));
6019 6022 }
6020 6023
6021 6024
6022 6025 /*
6023 6026 * lxpr_inactive(): Vnode operation for VOP_INACTIVE()
6024 6027 * Vnode is no longer referenced, deallocate the file
6025 6028 * and all its resources.
6026 6029 */
6027 6030 /* ARGSUSED */
6028 6031 static void
6029 6032 lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
6030 6033 {
6031 6034 lxpr_freenode(VTOLXP(vp));
6032 6035 }
6033 6036
6034 6037 /*
6035 6038 * lxpr_sync(): Vnode operation for VOP_SYNC()
6036 6039 */
6037 6040 static int
6038 6041 lxpr_sync()
6039 6042 {
6040 6043 /*
6041 6044 * Nothing to sync but this function must never fail
6042 6045 */
6043 6046 return (0);
6044 6047 }
6045 6048
6046 6049 /*
6047 6050 * lxpr_cmp(): Vnode operation for VOP_CMP()
6048 6051 */
6049 6052 static int
6050 6053 lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
6051 6054 {
6052 6055 vnode_t *rvp;
6053 6056
6054 6057 while (vn_matchops(vp1, lxpr_vnodeops) &&
6055 6058 (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) {
6056 6059 vp1 = rvp;
6057 6060 }
6058 6061
6059 6062 while (vn_matchops(vp2, lxpr_vnodeops) &&
6060 6063 (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) {
6061 6064 vp2 = rvp;
6062 6065 }
6063 6066
6064 6067 if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
6065 6068 return (vp1 == vp2);
6066 6069 return (VOP_CMP(vp1, vp2, ct));
6067 6070 }
6068 6071
6069 6072 /*
6070 6073 * lxpr_realvp(): Vnode operation for VOP_REALVP()
6071 6074 */
6072 6075 static int
6073 6076 lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
6074 6077 {
6075 6078 vnode_t *rvp;
6076 6079
6077 6080 if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
6078 6081 vp = rvp;
6079 6082 if (VOP_REALVP(vp, &rvp, ct) == 0)
6080 6083 vp = rvp;
6081 6084 }
6082 6085
6083 6086 *vpp = vp;
6084 6087 return (0);
6085 6088 }
6086 6089
6087 6090 static int
6088 6091 lxpr_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
6089 6092 caller_context_t *ct)
6090 6093 {
6091 6094 lxpr_node_t *lxpnp = VTOLXP(vp);
6092 6095 lxpr_nodetype_t type = lxpnp->lxpr_type;
6093 6096
6094 6097 switch (type) {
6095 6098 case LXPR_SYS_KERNEL_COREPATT:
6096 6099 return (lxpr_write_sys_kernel_corepatt(lxpnp, uiop, cr, ct));
6097 6100 case LXPR_SYS_NET_CORE_SOMAXCON:
6098 6101 return (lxpr_write_sys_net_core_somaxc(lxpnp, uiop, cr, ct));
6099 6102
6100 6103 default:
6101 6104 /* pretend we wrote the whole thing */
6102 6105 uiop->uio_offset += uiop->uio_resid;
6103 6106 uiop->uio_resid = 0;
6104 6107 return (0);
6105 6108 }
6106 6109 }
6107 6110
6108 6111 /*
6109 6112 * We need to allow open with O_CREAT for the oom_score_adj file.
6110 6113 */
6111 6114 /*ARGSUSED7*/
6112 6115 static int
6113 6116 lxpr_create(struct vnode *dvp, char *nm, struct vattr *vap,
6114 6117 enum vcexcl exclusive, int mode, struct vnode **vpp, struct cred *cred,
6115 6118 int flag, caller_context_t *ct, vsecattr_t *vsecp)
6116 6119 {
6117 6120 lxpr_node_t *lxpnp = VTOLXP(dvp);
6118 6121 lxpr_nodetype_t type = lxpnp->lxpr_type;
6119 6122 vnode_t *vp = NULL;
6120 6123 int error;
6121 6124
6122 6125 ASSERT(type < LXPR_NFILES);
6123 6126
6124 6127 /*
6125 6128 * restrict create permission to owner or root
6126 6129 */
6127 6130 if ((error = lxpr_access(dvp, VEXEC, 0, cred, ct)) != 0) {
6128 6131 return (error);
6129 6132 }
6130 6133
6131 6134 if (*nm == '\0')
6132 6135 return (EPERM);
6133 6136
6134 6137 if (dvp->v_type != VDIR)
6135 6138 return (EPERM);
6136 6139
6137 6140 if (exclusive == EXCL)
6138 6141 return (EEXIST);
6139 6142
6140 6143 /*
6141 6144 * We're currently restricting O_CREAT to:
6142 6145 * - /proc/<pid>/fd/<num>
6143 6146 * - /proc/<pid>/oom_score_adj
6144 6147 * - /proc/<pid>/task/<tid>/fd/<num>
6145 6148 * - /proc/<pid>/task/<tid>/oom_score_adj
6146 6149 * - /proc/sys/kernel/core_pattern
6147 6150 * - /proc/sys/net/core/somaxconn
6148 6151 * - /proc/sys/vm/overcommit_memory
6149 6152 * - /proc/sys/vm/swappiness
6150 6153 */
6151 6154 switch (type) {
6152 6155 case LXPR_PIDDIR:
6153 6156 case LXPR_PID_TASK_IDDIR:
6154 6157 if (strcmp(nm, "oom_score_adj") == 0) {
6155 6158 proc_t *p;
6156 6159 p = lxpr_lock(lxpnp->lxpr_pid);
6157 6160 if (p != NULL) {
6158 6161 vp = lxpr_lookup_common(dvp, nm, p, piddir,
6159 6162 PIDDIRFILES);
6160 6163 }
6161 6164 lxpr_unlock(p);
6162 6165 }
6163 6166 break;
6164 6167
6165 6168 case LXPR_SYS_NET_COREDIR:
6166 6169 if (strcmp(nm, "somaxconn") == 0) {
6167 6170 vp = lxpr_lookup_common(dvp, nm, NULL, sys_net_coredir,
6168 6171 SYS_NET_COREDIRFILES);
6169 6172 }
6170 6173 break;
6171 6174
6172 6175 case LXPR_SYS_KERNELDIR:
6173 6176 if (strcmp(nm, "core_pattern") == 0) {
6174 6177 vp = lxpr_lookup_common(dvp, nm, NULL, sys_kerneldir,
6175 6178 SYS_KERNELDIRFILES);
6176 6179 }
6177 6180 break;
6178 6181
6179 6182 case LXPR_SYS_VMDIR:
6180 6183 if (strcmp(nm, "overcommit_memory") == 0 ||
6181 6184 strcmp(nm, "swappiness") == 0) {
6182 6185 vp = lxpr_lookup_common(dvp, nm, NULL, sys_vmdir,
6183 6186 SYS_VMDIRFILES);
6184 6187 }
6185 6188 break;
6186 6189
6187 6190 case LXPR_PID_FDDIR:
6188 6191 case LXPR_PID_TID_FDDIR:
6189 6192 vp = lxpr_lookup_fdnode(dvp, nm);
6190 6193 break;
6191 6194
6192 6195 default:
6193 6196 vp = NULL;
6194 6197 break;
6195 6198 }
6196 6199
6197 6200 if (vp != NULL) {
6198 6201 /* Creating an existing file, allow it for regular files. */
6199 6202 if (vp->v_type == VDIR)
6200 6203 return (EISDIR);
6201 6204
6202 6205 /* confirm permissions against existing file */
6203 6206 if ((error = lxpr_access(vp, mode, 0, cred, ct)) != 0) {
6204 6207 VN_RELE(vp);
6205 6208 return (error);
6206 6209 }
6207 6210
6208 6211 *vpp = vp;
6209 6212 return (0);
6210 6213 }
6211 6214
6212 6215 /*
6213 6216 * Linux proc does not allow creation of addition, non-subsystem
6214 6217 * specific files inside the hierarchy. ENOENT is tossed when such
6215 6218 * actions are attempted.
6216 6219 */
6217 6220 return (ENOENT);
6218 6221 }
↓ open down ↓ |
4889 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX