diff options
-rw-r--r-- | wd/wdd.8 | 23 | ||||
-rw-r--r-- | wd/wdd.c | 47 |
2 files changed, 67 insertions, 3 deletions
@@ -1,8 +1,8 @@ -.TH wdd "8" "August 2004" "wdd (flxutils) 0.1.23" "Simple Watchdog Daemon" +.TH wdd "3" "September 2008" "wdd (flxutils) 0.1.32" "Simple Watchdog Daemon" .SH NAME wdd - Simple Watchdog Daemon .SH SYNOPSIS -.B wdd [file]... +.B wdd [-c count -f file] [file]... .SH DESCRIPTION Wdd is a simple daemon which periodically pings the watchdog attached to /dev/watchdog to keep it alive, and parallely performs a series of @@ -26,6 +26,15 @@ startup are removed from the tests and get their first character rewritten with a '\fB!\fP' in the argument list, so that they become easily identifiable with \fBps\fP. +Another option consists in checking an "alive file". It is a file +which is checked every second, and which causes the daemon to exit +if it is not touched within a number of seconds. The file name must be +passed after the optionnal argument "-f", and the maximal count of +acceptable checks which report the same status must be passed after +the optionnal argument "-c". If either of these arguments is omitted, +the file check is disabled, which is the default. A non-existing file +is considered different from an existing one. + .SH SYSTEM CHECKS System checks are performed in this order : .LP @@ -47,6 +56,8 @@ has been checked, the check loops back to the first one. If no argument was given, a test is performed on \fB/\fP to ensure that the VFS still works and that an eventual NFS root is still accessible. .TP +\- The alive file is checked for changes. +.TP \- The daemon then pauses one second before starting the checks again. .SH FILES @@ -74,6 +85,12 @@ nor experienced a mount or unmount, that \fB/proc\fP is mounted, that \fB/var\fP is mounted and has not been wiped out, and that \fB/tmp\fP either is mounted or is a symbolic link to a valid directory. +.TP +\fBwdd -c 3600 -f /var/state/.wdalive / /proc/self/root +.br +If the status of the file \fP/var/state/.wdalive\fP has not changed +during the last 3600 seconds, the daemon will exit. + .SH BUGS .LP .TP @@ -84,4 +101,4 @@ There's absolutely no output, neither debug nor error. .SH SEE ALSO \fIDocumentation/watchdog-api.txt\fP in the Linux kernel sources. .SH AUTHORS -Willy Tarreau <willy@meta-x.org> +Willy Tarreau <w@1wt.eu> @@ -73,6 +73,9 @@ static inline int try_stat(const char *file, int do_exit) { int main (int argc, char **argv) { int dev; int curr_file; + int curr_count, stat_count = 0; + char *touch_file = NULL; + struct stat file_stat; if (argc > 1) { /* we'll do a quick check on all the arguments to @@ -80,6 +83,28 @@ int main (int argc, char **argv) { * an accidental start of the watchdog which could be * a disaster in case of a file name error. */ + while (argc > 1 && argv[1][0] == '-') { + argc--; argv++; + if (argv[0][1] == '-') { + /* -- */ + break; + } + else if (argv[0][1] == 'c') { + /* -c <count> */ + if (argc < 2) + break; + stat_count = atol(argv[1]); + argc--; argv++; + } + else if (argv[0][1] == 'f') { + /* -f <file> */ + if (argc < 2) + break; + touch_file = argv[1]; + argc--; argv++; + } + } + for (curr_file = 1; curr_file < argc; ) { if (try_stat(argv[curr_file], 0)) curr_file++; @@ -98,6 +123,8 @@ int main (int argc, char **argv) { chdir(root_str); setsid(); + curr_count = stat_count; + memset(&file_stat, 0, sizeof(file_stat)); curr_file = 1; /* start with first file in the list */ /* let's try indefinitely to open the watchdog device */ /* note that dev is -1 now ;-) */ @@ -123,6 +150,26 @@ int main (int argc, char **argv) { try_stat(root_str, 1); } + /* we may want to check that the touch_file has been touched */ + if (touch_file && stat_count) { + struct stat tmp; + + /* an absent file sets an empty struct stat */ + if (stat(touch_file, &tmp) < 0) + memset(&tmp, 0, sizeof(tmp)); + + if (memcmp(&file_stat, &tmp, sizeof(tmp)) != 0) { + /* the file has been touched */ + memcpy(&file_stat, &tmp, sizeof(tmp)); + curr_count = stat_count; + } else { + /* still no change */ + curr_count--; + if (!curr_count) + exit(1); + } + } + /* avoid a fast loop */ sleep(1); } |