FreeBSD kernel kern code
kern_fail.c
Go to the documentation of this file.
1 /*-
2  * Copyright (c) 2009 Isilon Inc http://www.isilon.com/
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
52 #include <sys/cdefs.h>
53 __FBSDID("$BSDSUniX$");
54 
55 #include <sys/ctype.h>
56 #include <sys/errno.h>
57 #include <sys/fail.h>
58 #include <sys/kernel.h>
59 #include <sys/libkern.h>
60 #include <sys/lock.h>
61 #include <sys/malloc.h>
62 #include <sys/mutex.h>
63 #include <sys/proc.h>
64 #include <sys/sbuf.h>
65 
66 #include <machine/stdarg.h>
67 
68 #ifdef ILOG_DEFINE_FOR_FILE
69 ILOG_DEFINE_FOR_FILE(L_ISI_FAIL_POINT, L_ILOG, fail_point);
70 #endif
71 
72 static MALLOC_DEFINE(M_FAIL_POINT, "Fail Points", "fail points system");
73 #define fp_free(ptr) free(ptr, M_FAIL_POINT)
74 #define fp_malloc(size, flags) malloc((size), M_FAIL_POINT, (flags))
75 
76 static struct mtx g_fp_mtx;
77 MTX_SYSINIT(g_fp_mtx, &g_fp_mtx, "fail point mtx", MTX_DEF);
78 #define FP_LOCK() mtx_lock(&g_fp_mtx)
79 #define FP_UNLOCK() mtx_unlock(&g_fp_mtx)
80 
94 };
95 
96 static struct {
97  const char *name;
98  int nmlen;
99 } fail_type_strings[] = {
100 #define FP_TYPE_NM_LEN(s) { s, sizeof(s) - 1 }
101  [FAIL_POINT_OFF] = FP_TYPE_NM_LEN("off"),
102  [FAIL_POINT_PANIC] = FP_TYPE_NM_LEN("panic"),
103  [FAIL_POINT_RETURN] = FP_TYPE_NM_LEN("return"),
104  [FAIL_POINT_BREAK] = FP_TYPE_NM_LEN("break"),
105  [FAIL_POINT_PRINT] = FP_TYPE_NM_LEN("print"),
106  [FAIL_POINT_SLEEP] = FP_TYPE_NM_LEN("sleep"),
107 };
108 
115  int fe_arg;
116  int fe_prob;
117  int fe_count;
118  pid_t fe_pid;
119  TAILQ_ENTRY(fail_point_entry) fe_entries;
120 };
121 
122 static inline void
123 fail_point_sleep(struct fail_point *fp, struct fail_point_entry *ent,
124  int msecs, enum fail_point_return_code *pret)
125 {
126  /* convert from millisecs to ticks, rounding up */
127  int timo = ((msecs * hz) + 999) / 1000;
128 
129  if (timo > 0) {
130  if (fp->fp_sleep_fn == NULL) {
131  msleep(fp, &g_fp_mtx, PWAIT, "failpt", timo);
132  } else {
133  timeout(fp->fp_sleep_fn, fp->fp_sleep_arg, timo);
134  *pret = FAIL_POINT_RC_QUEUED;
135  }
136  }
137 }
138 
139 
143 enum {
144  PROB_MAX = 1000000, /* probability between zero and this number */
145  PROB_DIGITS = 6, /* number of zero's in above number */
146 };
147 
148 static char *parse_fail_point(struct fail_point_entries *, char *);
149 static char *parse_term(struct fail_point_entries *, char *);
150 static char *parse_number(int *out_units, int *out_decimal, char *);
151 static char *parse_type(struct fail_point_entry *, char *);
152 static void free_entry(struct fail_point_entries *, struct fail_point_entry *);
153 static void clear_entries(struct fail_point_entries *);
154 
163 void
164 fail_point_init(struct fail_point *fp, const char *fmt, ...)
165 {
166  va_list ap;
167  char *name;
168  int n;
169 
170  TAILQ_INIT(&fp->fp_entries);
171  fp->fp_flags = 0;
172 
173  /* Figure out the size of the name. */
174  va_start(ap, fmt);
175  n = vsnprintf(NULL, 0, fmt, ap);
176  va_end(ap);
177 
178  /* Allocate the name and fill it in. */
179  name = fp_malloc(n + 1, M_WAITOK);
180  if (name != NULL) {
181  va_start(ap, fmt);
182  vsnprintf(name, n + 1, fmt, ap);
183  va_end(ap);
184  }
185  fp->fp_name = name;
186  fp->fp_location = "";
187  fp->fp_flags |= FAIL_POINT_DYNAMIC_NAME;
188  fp->fp_sleep_fn = NULL;
189  fp->fp_sleep_arg = NULL;
190 }
191 
197 void
198 fail_point_destroy(struct fail_point *fp)
199 {
200 
201  if ((fp->fp_flags & FAIL_POINT_DYNAMIC_NAME) != 0) {
202  fp_free(__DECONST(void *, fp->fp_name));
203  fp->fp_name = NULL;
204  }
205  fp->fp_flags = 0;
206  clear_entries(&fp->fp_entries);
207 }
208 
216 enum fail_point_return_code
217 fail_point_eval_nontrivial(struct fail_point *fp, int *return_value)
218 {
219  enum fail_point_return_code ret = FAIL_POINT_RC_CONTINUE;
220  struct fail_point_entry *ent, *next;
221  int msecs;
222 
223  FP_LOCK();
224 
225  TAILQ_FOREACH_SAFE(ent, &fp->fp_entries, fe_entries, next) {
226  int cont = 0; /* don't continue by default */
227 
228  if (ent->fe_prob < PROB_MAX &&
229  ent->fe_prob < random() % PROB_MAX)
230  continue;
231  if (ent->fe_pid != NO_PID && ent->fe_pid != curproc->p_pid)
232  continue;
233 
234  switch (ent->fe_type) {
235  case FAIL_POINT_PANIC:
236  panic("fail point %s panicking", fp->fp_name);
237  /* NOTREACHED */
238 
239  case FAIL_POINT_RETURN:
240  if (return_value != NULL)
241  *return_value = ent->fe_arg;
242  ret = FAIL_POINT_RC_RETURN;
243  break;
244 
245  case FAIL_POINT_BREAK:
246  printf("fail point %s breaking to debugger\n",
247  fp->fp_name);
248  breakpoint();
249  break;
250 
251  case FAIL_POINT_PRINT:
252  printf("fail point %s executing\n", fp->fp_name);
253  cont = ent->fe_arg;
254  break;
255 
256  case FAIL_POINT_SLEEP:
257  /*
258  * Free the entry now if necessary, since
259  * we're about to drop the mutex and sleep.
260  */
261  msecs = ent->fe_arg;
262  if (ent->fe_count > 0 && --ent->fe_count == 0) {
263  free_entry(&fp->fp_entries, ent);
264  ent = NULL;
265  }
266 
267  if (msecs)
268  fail_point_sleep(fp, ent, msecs, &ret);
269  break;
270 
271  default:
272  break;
273  }
274 
275  if (ent != NULL && ent->fe_count > 0 && --ent->fe_count == 0)
276  free_entry(&fp->fp_entries, ent);
277  if (cont == 0)
278  break;
279  }
280 
281  /* Get rid of "off"s at the end. */
282  while ((ent = TAILQ_LAST(&fp->fp_entries, fail_point_entries)) &&
283  ent->fe_type == FAIL_POINT_OFF)
284  free_entry(&fp->fp_entries, ent);
285 
286  FP_UNLOCK();
287 
288  return (ret);
289 }
290 
294 static void
295 fail_point_get(struct fail_point *fp, struct sbuf *sb)
296 {
297  struct fail_point_entry *ent;
298 
299  FP_LOCK();
300 
301  TAILQ_FOREACH(ent, &fp->fp_entries, fe_entries) {
302  if (ent->fe_prob < PROB_MAX) {
303  int decimal = ent->fe_prob % (PROB_MAX / 100);
304  int units = ent->fe_prob / (PROB_MAX / 100);
305  sbuf_printf(sb, "%d", units);
306  if (decimal) {
307  int digits = PROB_DIGITS - 2;
308  while (!(decimal % 10)) {
309  digits--;
310  decimal /= 10;
311  }
312  sbuf_printf(sb, ".%0*d", digits, decimal);
313  }
314  sbuf_printf(sb, "%%");
315  }
316  if (ent->fe_count > 0)
317  sbuf_printf(sb, "%d*", ent->fe_count);
318  sbuf_printf(sb, "%s", fail_type_strings[ent->fe_type].name);
319  if (ent->fe_arg)
320  sbuf_printf(sb, "(%d)", ent->fe_arg);
321  if (ent->fe_pid != NO_PID)
322  sbuf_printf(sb, "[pid %d]", ent->fe_pid);
323  if (TAILQ_NEXT(ent, fe_entries))
324  sbuf_printf(sb, "->");
325  }
326  if (TAILQ_EMPTY(&fp->fp_entries))
327  sbuf_printf(sb, "off");
328 
329  FP_UNLOCK();
330 }
331 
336 static int
337 fail_point_set(struct fail_point *fp, char *buf)
338 {
339  int error = 0;
340  struct fail_point_entry *ent, *ent_next;
341  struct fail_point_entries new_entries;
342 
343  /* Parse new entries. */
344  TAILQ_INIT(&new_entries);
345  if (!parse_fail_point(&new_entries, buf)) {
346  clear_entries(&new_entries);
347  error = EINVAL;
348  goto end;
349  }
350 
351  FP_LOCK();
352 
353  /* Move new entries in. */
354  TAILQ_SWAP(&fp->fp_entries, &new_entries, fail_point_entry, fe_entries);
355  clear_entries(&new_entries);
356 
357  /* Get rid of useless zero probability entries. */
358  TAILQ_FOREACH_SAFE(ent, &fp->fp_entries, fe_entries, ent_next) {
359  if (ent->fe_prob == 0)
360  free_entry(&fp->fp_entries, ent);
361  }
362 
363  /* Get rid of "off"s at the end. */
364  while ((ent = TAILQ_LAST(&fp->fp_entries, fail_point_entries)) &&
365  ent->fe_type == FAIL_POINT_OFF)
366  free_entry(&fp->fp_entries, ent);
367 
368  FP_UNLOCK();
369 
370  end:
371 #ifdef IWARNING
372  if (error)
373  IWARNING("Failed to set %s %s to %s",
374  fp->fp_name, fp->fp_location, buf);
375  else
376  INOTICE("Set %s %s to %s",
377  fp->fp_name, fp->fp_location, buf);
378 #endif /* IWARNING */
379 
380  return (error);
381 }
382 
383 #define MAX_FAIL_POINT_BUF 1023
384 
388 int
389 fail_point_sysctl(SYSCTL_HANDLER_ARGS)
390 {
391  struct fail_point *fp = arg1;
392  char *buf = NULL;
393  struct sbuf sb;
394  int error;
395 
396  /* Retrieving */
397  sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND);
398  fail_point_get(fp, &sb);
399  sbuf_trim(&sb);
400  sbuf_finish(&sb);
401  error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb));
402  sbuf_delete(&sb);
403 
404  /* Setting */
405  if (!error && req->newptr) {
406  if (req->newlen > MAX_FAIL_POINT_BUF) {
407  error = EINVAL;
408  goto out;
409  }
410 
411  buf = fp_malloc(req->newlen + 1, M_WAITOK);
412 
413  error = SYSCTL_IN(req, buf, req->newlen);
414  if (error)
415  goto out;
416  buf[req->newlen] = '\0';
417 
418  error = fail_point_set(fp, buf);
419  }
420 
421 out:
422  fp_free(buf);
423  return (error);
424 }
425 
430 static char *
431 parse_fail_point(struct fail_point_entries *ents, char *p)
432 {
433  /* <fail_point> ::
434  * <term> ( "->" <term> )*
435  */
436  p = parse_term(ents, p);
437  if (p == NULL)
438  return (NULL);
439  while (*p != '\0') {
440  if (p[0] != '-' || p[1] != '>')
441  return (NULL);
442  p = parse_term(ents, p + 2);
443  if (p == NULL)
444  return (NULL);
445  }
446  return (p);
447 }
448 
452 static char *
453 parse_term(struct fail_point_entries *ents, char *p)
454 {
455  struct fail_point_entry *ent;
456 
457  ent = fp_malloc(sizeof *ent, M_WAITOK | M_ZERO);
458  ent->fe_prob = PROB_MAX;
459  ent->fe_pid = NO_PID;
460  TAILQ_INSERT_TAIL(ents, ent, fe_entries);
461 
462  /*
463  * <term> ::
464  * ( (<float> "%") | (<integer> "*" ) )*
465  * <type>
466  * [ "(" <integer> ")" ]
467  * [ "[pid " <integer> "]" ]
468  */
469 
470  /* ( (<float> "%") | (<integer> "*" ) )* */
471  while (isdigit(*p) || *p == '.') {
472  int units, decimal;
473 
474  p = parse_number(&units, &decimal, p);
475  if (p == NULL)
476  return (NULL);
477 
478  if (*p == '%') {
479  if (units > 100) /* prevent overflow early */
480  units = 100;
481  ent->fe_prob = units * (PROB_MAX / 100) + decimal;
482  if (ent->fe_prob > PROB_MAX)
483  ent->fe_prob = PROB_MAX;
484  } else if (*p == '*') {
485  if (!units || decimal)
486  return (NULL);
487  ent->fe_count = units;
488  } else
489  return (NULL);
490  p++;
491  }
492 
493  /* <type> */
494  p = parse_type(ent, p);
495  if (p == NULL)
496  return (NULL);
497  if (*p == '\0')
498  return (p);
499 
500  /* [ "(" <integer> ")" ] */
501  if (*p != '(')
502  return p;
503  p++;
504  if (!isdigit(*p) && *p != '-')
505  return (NULL);
506  ent->fe_arg = strtol(p, &p, 0);
507  if (*p++ != ')')
508  return (NULL);
509 
510  /* [ "[pid " <integer> "]" ] */
511 #define PID_STRING "[pid "
512  if (strncmp(p, PID_STRING, sizeof(PID_STRING) - 1) != 0)
513  return (p);
514  p += sizeof(PID_STRING) - 1;
515  if (!isdigit(*p))
516  return (NULL);
517  ent->fe_pid = strtol(p, &p, 0);
518  if (*p++ != ']')
519  return (NULL);
520 
521  return (p);
522 }
523 
527 static char *
528 parse_number(int *out_units, int *out_decimal, char *p)
529 {
530  char *old_p;
531 
532  /*
533  * <number> ::
534  * <integer> [ "." <integer> ] |
535  * "." <integer>
536  */
537 
538  /* whole part */
539  old_p = p;
540  *out_units = strtol(p, &p, 10);
541  if (p == old_p && *p != '.')
542  return (NULL);
543 
544  /* fractional part */
545  *out_decimal = 0;
546  if (*p == '.') {
547  int digits = 0;
548  p++;
549  while (isdigit(*p)) {
550  int digit = *p - '0';
551  if (digits < PROB_DIGITS - 2)
552  *out_decimal = *out_decimal * 10 + digit;
553  else if (digits == PROB_DIGITS - 2 && digit >= 5)
554  (*out_decimal)++;
555  digits++;
556  p++;
557  }
558  if (!digits) /* need at least one digit after '.' */
559  return (NULL);
560  while (digits++ < PROB_DIGITS - 2) /* add implicit zeros */
561  *out_decimal *= 10;
562  }
563 
564  return (p); /* success */
565 }
566 
570 static char *
571 parse_type(struct fail_point_entry *ent, char *beg)
572 {
573  enum fail_point_t type;
574  int len;
575 
576  for (type = FAIL_POINT_OFF; type < FAIL_POINT_NUMTYPES; type++) {
577  len = fail_type_strings[type].nmlen;
578  if (strncmp(fail_type_strings[type].name, beg, len) == 0) {
579  ent->fe_type = type;
580  return (beg + len);
581  }
582  }
583  return (NULL);
584 }
585 
589 static void
590 free_entry(struct fail_point_entries *ents, struct fail_point_entry *ent)
591 {
592  TAILQ_REMOVE(ents, ent, fe_entries);
593  fp_free(ent);
594 }
595 
600 static void
601 clear_entries(struct fail_point_entries *ents)
602 {
603  struct fail_point_entry *ent, *ent_next;
604 
605  TAILQ_FOREACH_SAFE(ent, ents, fe_entries, ent_next)
606  fp_free(ent);
607  TAILQ_INIT(ents);
608 }
609 
610 /* The fail point sysctl tree. */
611 SYSCTL_NODE(_debug, OID_AUTO, fail_point, CTLFLAG_RW, 0, "fail points");
MTX_SYSINIT(g_fp_mtx,&g_fp_mtx,"fail point mtx", MTX_DEF)
void fail_point_init(struct fail_point *fp, const char *fmt,...)
Definition: kern_fail.c:164
#define FP_LOCK()
Definition: kern_fail.c:78
struct callout_handle timeout(timeout_t *ftn, void *arg, int to_ticks)
Definition: kern_timeout.c:713
struct buf * buf
Definition: vfs_bio.c:97
#define FP_UNLOCK()
Definition: kern_fail.c:79
ssize_t sbuf_len(struct sbuf *s)
Definition: subr_sbuf.c:736
static void fail_point_get(struct fail_point *fp, struct sbuf *sb)
Definition: kern_fail.c:295
#define fp_free(ptr)
Definition: kern_fail.c:73
fail_point_t
Definition: kern_fail.c:86
int nmlen
Definition: kern_fail.c:98
static void clear_entries(struct fail_point_entries *)
Definition: kern_fail.c:601
enum fail_point_return_code fail_point_eval_nontrivial(struct fail_point *fp, int *return_value)
Definition: kern_fail.c:217
void panic(const char *fmt,...)
static char * parse_number(int *out_units, int *out_decimal, char *)
Definition: kern_fail.c:528
const char * name
Definition: kern_fail.c:97
static MALLOC_DEFINE(M_FAIL_POINT,"Fail Points","fail points system")
static struct mtx g_fp_mtx
Definition: kern_fail.c:76
static struct @2 fail_type_strings[]
static int fail_point_set(struct fail_point *fp, char *buf)
Definition: kern_fail.c:337
int * type
Definition: cpufreq_if.m:98
int vsnprintf(char *str, size_t size, const char *format, va_list ap)
Definition: subr_prf.c:524
enum fail_point_t fe_type
Definition: kern_fail.c:114
#define MAX_FAIL_POINT_BUF
Definition: kern_fail.c:383
void fail_point_destroy(struct fail_point *fp)
Definition: kern_fail.c:198
int sbuf_printf(struct sbuf *s, const char *fmt,...)
Definition: subr_sbuf.c:632
SYSCTL_NODE(_debug, OID_AUTO, fail_point, CTLFLAG_RW, 0,"fail points")
#define FP_TYPE_NM_LEN(s)
static char * parse_term(struct fail_point_entries *, char *)
Definition: kern_fail.c:453
struct sbuf * sbuf_new(struct sbuf *s, char *buf, int length, int flags)
Definition: subr_sbuf.c:211
#define fp_malloc(size, flags)
Definition: kern_fail.c:74
__FBSDID("$BSDSUniX$")
#define PID_STRING
int printf(const char *fmt,...)
Definition: subr_prf.c:367
void sbuf_delete(struct sbuf *s)
Definition: subr_sbuf.c:753
static char * parse_fail_point(struct fail_point_entries *, char *)
Definition: kern_fail.c:431
int fail_point_sysctl(SYSCTL_HANDLER_ARGS)
Definition: kern_fail.c:389
char * sbuf_data(struct sbuf *s)
Definition: subr_sbuf.c:721
int sbuf_finish(struct sbuf *s)
Definition: subr_sbuf.c:694
int sbuf_trim(struct sbuf *s)
Definition: subr_sbuf.c:660
static char * parse_type(struct fail_point_entry *, char *)
Definition: kern_fail.c:571
Definition: kern_fail.c:113
static void fail_point_sleep(struct fail_point *fp, struct fail_point_entry *ent, int msecs, enum fail_point_return_code *pret)
Definition: kern_fail.c:123
static void free_entry(struct fail_point_entries *, struct fail_point_entry *)
Definition: kern_fail.c:590
int hz
Definition: subr_param.c:84