diff options
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | lcd/Makefile | 9 | ||||
-rw-r--r-- | lcd/lcdtee.c | 21 | ||||
-rw-r--r-- | lcd/lcdwrite.c | 34 | ||||
l--------- | remount/remountw | 1 | ||||
-rw-r--r-- | wd/wdd.8 | 85 | ||||
-rw-r--r-- | wd/wdd.c | 53 |
7 files changed, 198 insertions, 9 deletions
@@ -63,3 +63,7 @@ - wdd now also does a 'stat("/")' which is useful in case of broken NFSROOT. - a new deployment script for wdd +2004/08/06 + - now 'wdd' supports checks of arbitrary file lists and gets a man page. + - lcdwrite and lcdtee sources have been brought back + diff --git a/lcd/Makefile b/lcd/Makefile new file mode 100644 index 0000000..06e0926 --- /dev/null +++ b/lcd/Makefile @@ -0,0 +1,9 @@ +OBJS=lcdwrite lcdtee +include ../include/rules.make +CFLAGS+=-fomit-frame-pointer + +%: %.c + $(CC) $(LDFLAGS) $(CFLAGS) -o $@ $< + strip -R .comment -R .note $@ + objdump -h $@ | grep -q '\.data[ ]*00000000' && strip -R .data $@ || true + sstrip $@ diff --git a/lcd/lcdtee.c b/lcd/lcdtee.c new file mode 100644 index 0000000..2cab2ef --- /dev/null +++ b/lcd/lcdtee.c @@ -0,0 +1,21 @@ +/* simply write all args to /dev/lcd, and don't complain if it doesn't work */ +#include <sys/fcntl.h> + +main(int argc, char **argv) { + int fd; + char buf[1024]; + int len; + + fd = open("/dev/lcd", O_WRONLY); + + while ((len = read(0, buf, sizeof(buf))) > 0) { + write(1, buf, len); + if (fd > 0) + write(fd, buf, len); + } + + if (fd > 0) + close(fd); + return 0; +} + diff --git a/lcd/lcdwrite.c b/lcd/lcdwrite.c new file mode 100644 index 0000000..5f86c46 --- /dev/null +++ b/lcd/lcdwrite.c @@ -0,0 +1,34 @@ +/* simply write all args to /dev/lcd, and don't complain if it doesn't work */ +#include <sys/fcntl.h> + +main(int argc, char **argv) { + int fd; + char buf[1024]; + int len; + + + if ((fd = open("/dev/lcd", O_WRONLY)) == -1) + return 0; + + + if (--argc == 0) { + while ((len = read(0, buf, sizeof(buf))) > 0) + write(fd, buf, len); + } + else { + argv++; + + while (argc--) { + len = strlen(*argv); + if (argc) /* still other args */ + argv[0][len++]=' '; /* add a delimitor */ + else + argv[0][len++]='\n'; /* end with a newline */ + write(fd, *argv, len); + argv++; + } + } + close(fd); + return 0; +} + diff --git a/remount/remountw b/remount/remountw deleted file mode 120000 index 5bc7a05..0000000 --- a/remount/remountw +++ /dev/null @@ -1 +0,0 @@ -remountr
\ No newline at end of file diff --git a/wd/wdd.8 b/wd/wdd.8 new file mode 100644 index 0000000..9d56386 --- /dev/null +++ b/wd/wdd.8 @@ -0,0 +1,85 @@ +.TH wdd "8" "August 2004" "wdd (flxutils) 0.1.23" "Simple Watchdog Daemon" +.SH NAME +wdd - Simple Watchdog Daemon +.SH SYNOPSIS +.B wdd [file]... +.SH DESCRIPTION +Wdd is a simple daemon which periodically pings the watchdog attached +to /dev/watchdog to keep it alive, and parallely performs a series of +system health checks to ensure everything is working correctly. If it +detects an error, it exits so that the watchdog driver doesn't receive +its keep-alives anymore and the system will quickly reboot. It is +particularly targetted at remotely managed systems where accessibility +is a prior concern. + +As a bonus, it's really tiny, it consumes between 12 and 20 kB of +memory on x86. + +It can optionnally take a list of files in arguments. These files will +be checked upon startup, and all those which are accessible will be +periodically checked (one file per second) and the daemon will exit +as soon as it cannot access any of them. This is particularly useful +on \fB/dev\fP, \fB/var\fP, \fB/tmp\fP, \fB/proc\fP and more generally +any remotely mounted file-system (including \fB/\fP in case of NFS +root). As a special case, all files which are not accessible at +startup are removed from the tests and get their first character +rewritten with a '\fB!\fP' in the argument list, so that they become +easily identifiable with \fBps\fP. + +.SH SYSTEM CHECKS +System checks are performed in this order : +.LP +.TP +\- Opening of \fB/dev/watchdog\fP if it was not previously open ; +.TP +\- Allocation then release of 4 kB of memory to check that the VM +subsystem is still operating, and to give the daemon a chance to die +under Out-of-Memory conditions (OOM) ; +.TP +\- Fork a child and wait for its immediate death. This ensures that the +system still has free PIDs and some memory, can schedule, and can +deliver signals. +.TP +\- File access on the next file in the argument list. Once the last one +has been checked, the check loops back to the first one. If no +argument was given, a test is performed on \fB/\fP to ensure that the +VFS still works and that an eventual NFS root is still accessible. +.TP +\- The daemon then pauses one second before starting the checks again. + +.SH FILES +.TP +\fB/sbin/wdd\fP +.br +The daemon itself + +.SH EXAMPLES +.LP +.TP +\fBnice -n 10 /sbin/wdd\fP +.br +Starts the daemon with a +10 renice value, and checks the +accessibility of the root directory every second. Launching it with +\fBnice\fP is recommended since it makes it more sensible to fork +bombs because it soon will not get enough time slices to ping the +driver. +.TP +\fBwdd / /dev/watchdog /proc/version /var/run /tmp/. +.br +Periodically checks all of these existing entries. This ensures that +\fB/\fP is always reachable, that \fB/dev\fP has not been wiped out +nor experienced a mount or unmount, that \fB/proc\fP is mounted, that +\fB/var\fP is mounted and has not been wiped out, and that \fB/tmp\fP +either is mounted or is a symbolic link to a valid directory. + +.SH BUGS +.LP +.TP +The daemon cannot renice itself, for this you need the 'nice' command. +.TP +There's absolutely no output, neither debug nor error. + +.SH SEE ALSO +\fIDocumentation/watchdog-api.txt\fP in the Linux kernel sources. +.SH AUTHORS +Willy Tarreau <willy@meta-x.org> @@ -44,25 +44,51 @@ static inline void try_fork() { /* - * This function checks if the system can access its root FS - * In case of failure, we exit so that the watchdog device - * notices it and can reboot. + * This function checks if the system can stat a given directory entry on the + * VFS. In case of failure, we either report the problem, or exit so that the + * watchdog device notices it and can reboot. */ -static inline void try_stat() { +static inline int try_stat(const char *file, int do_exit) { void *heap; + int ret; heap = (void*)sbrk(NULL); if (brk(heap + sizeof (struct stat))) exit(1); memset(heap, 0, sizeof (struct stat)); - if (stat(root_str, heap) == -1) - exit(1); + ret = stat(file, heap); if (brk(heap)) exit(1); + + if (ret == -1) { + if (do_exit) + exit(1); + else + return 0; + } + return 1; } -int main (void) { +int main (int argc, char **argv) { int dev; + int curr_file; + + if (argc > 1) { + /* we'll do a quick check on all the arguments to + * ensure that they are valid at load time, and avoid + * an accidental start of the watchdog which could be + * a disaster in case of a file name error. + */ + for (curr_file = 1; curr_file < argc; ) { + if (try_stat(argv[curr_file], 0)) + curr_file++; + else { + /* remove this file from the list, and make it noticeable from 'ps' */ + *argv[curr_file] = '!'; + argv[curr_file] = argv[--argc]; + } + } + } if (fork() > 0) return 0; @@ -70,6 +96,8 @@ int main (void) { close(dev); chdir(root_str); setsid(); + + curr_file = 1; /* start with first file in the list */ /* let's try indefinitely to open the watchdog device */ /* note that dev is -1 now ;-) */ while (1) { @@ -82,7 +110,16 @@ int main (void) { } try_malloc(); try_fork(); - try_stat(); + + if (argc > 1) { + try_stat(argv[curr_file], 1); + curr_file++; + if (curr_file >= argc) + curr_file = 1; + } else { + try_stat(root_str, 1); + } + /* avoid a fast loop */ sleep(1); } |