summaryrefslogtreecommitdiffstats
path: root/support/logresolve.c
diff options
context:
space:
mode:
authorRoy T. Fielding <fielding@apache.org>1999-08-24 08:46:03 +0200
committerRoy T. Fielding <fielding@apache.org>1999-08-24 08:46:03 +0200
commit6f96ad52275b5b35226cdb2ce66b3832e9dfb605 (patch)
tree8892afb8b2e31b3d6e7c9bca839afe51b4e11c40 /support/logresolve.c
parentApache 1.3.9 baseline for the Apache 2.0 repository. (diff)
downloadapache2-6f96ad52275b5b35226cdb2ce66b3832e9dfb605.tar.xz
apache2-6f96ad52275b5b35226cdb2ce66b3832e9dfb605.zip
Apache 1.3.9 baseline for the Apache 2.0 repository.
Obtained from: Apache 1.3.9 (minus unused files), tag APACHE_1_3_9 Submitted by: Apache Group git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@83750 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'support/logresolve.c')
-rw-r--r--support/logresolve.c345
1 files changed, 345 insertions, 0 deletions
diff --git a/support/logresolve.c b/support/logresolve.c
new file mode 100644
index 0000000000..bcdc6e51b5
--- /dev/null
+++ b/support/logresolve.c
@@ -0,0 +1,345 @@
+/*
+ * logresolve 1.1
+ *
+ * Tom Rathborne - tomr@uunet.ca - http://www.uunet.ca/~tomr/
+ * UUNET Canada, April 16, 1995
+ *
+ * Rewritten by David Robinson. (drtr@ast.cam.ac.uk)
+ *
+ * Usage: logresolve [-s filename] [-c] < access_log > new_log
+ *
+ * Arguments:
+ * -s filename name of a file to record statistics
+ * -c check the DNS for a matching A record for the host.
+ *
+ * Notes:
+ *
+ * To generate meaningful statistics from an HTTPD log file, it's good
+ * to have the domain name of each machine that accessed your site, but
+ * doing this on the fly can slow HTTPD down.
+ *
+ * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
+ * resolution off. Before running your stats program, just run your log
+ * file through this program (logresolve) and all of your IP numbers will
+ * be resolved into hostnames (where possible).
+ *
+ * logresolve takes an HTTPD access log (in the COMMON log file format,
+ * or any other format that has the IP number/domain name as the first
+ * field for that matter), and outputs the same file with all of the
+ * domain names looked up. Where no domain name can be found, the IP
+ * number is left in.
+ *
+ * To minimize impact on your nameserver, logresolve has its very own
+ * internal hash-table cache. This means that each IP number will only
+ * be looked up the first time it is found in the log file.
+ *
+ * The -c option causes logresolve to apply the same check as httpd
+ * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
+ * address, it looks up the IP addresses for the hostname and checks
+ * that one of these matches the original address.
+ */
+
+#include "ap_config.h"
+#include <sys/types.h>
+
+#include <ctype.h>
+
+#ifndef MPE
+#include <arpa/inet.h>
+#endif
+
+static void cgethost(struct in_addr ipnum, char *string, int check);
+static int getline(char *s, int n);
+static void stats(FILE *output);
+
+
+/* maximum line length */
+#define MAXLINE 1024
+
+/* maximum length of a domain name */
+#ifndef MAXDNAME
+#define MAXDNAME 256
+#endif
+
+/* number of buckets in cache hash table */
+#define BUCKETS 256
+
+#if defined(NEED_STRDUP)
+char *strdup (const char *str)
+{
+ char *dup;
+
+ if (!(dup = (char *) malloc(strlen(str) + 1)))
+ return NULL;
+ dup = strcpy(dup, str);
+
+ return dup;
+}
+#endif
+
+/*
+ * struct nsrec - record of nameservice for cache linked list
+ *
+ * ipnum - IP number hostname - hostname noname - nonzero if IP number has no
+ * hostname, i.e. hostname=IP number
+ */
+
+struct nsrec {
+ struct in_addr ipnum;
+ char *hostname;
+ int noname;
+ struct nsrec *next;
+} *nscache[BUCKETS];
+
+/*
+ * statistics - obvious
+ */
+
+#ifndef h_errno
+extern int h_errno; /* some machines don't have this in their headers */
+#endif
+
+/* largeste value for h_errno */
+#define MAX_ERR (NO_ADDRESS)
+#define UNKNOWN_ERR (MAX_ERR+1)
+#define NO_REVERSE (MAX_ERR+2)
+
+static int cachehits = 0;
+static int cachesize = 0;
+static int entries = 0;
+static int resolves = 0;
+static int withname = 0;
+static int errors[MAX_ERR + 3];
+
+/*
+ * cgethost - gets hostname by IP address, caching, and adding unresolvable
+ * IP numbers with their IP number as hostname, setting noname flag
+ */
+
+static void cgethost (struct in_addr ipnum, char *string, int check)
+{
+ struct nsrec **current, *new;
+ struct hostent *hostdata;
+ char *name;
+
+ current = &nscache[((ipnum.s_addr + (ipnum.s_addr >> 8) +
+ (ipnum.s_addr >> 16) + (ipnum.s_addr >> 24)) % BUCKETS)];
+
+ while (*current != NULL && ipnum.s_addr != (*current)->ipnum.s_addr)
+ current = &(*current)->next;
+
+ if (*current == NULL) {
+ cachesize++;
+ new = (struct nsrec *) malloc(sizeof(struct nsrec));
+ if (new == NULL) {
+ perror("malloc");
+ fprintf(stderr, "Insufficient memory\n");
+ exit(1);
+ }
+ *current = new;
+ new->next = NULL;
+
+ new->ipnum = ipnum;
+
+ hostdata = gethostbyaddr((const char *) &ipnum, sizeof(struct in_addr),
+ AF_INET);
+ if (hostdata == NULL) {
+ if (h_errno > MAX_ERR)
+ errors[UNKNOWN_ERR]++;
+ else
+ errors[h_errno]++;
+ new->noname = h_errno;
+ name = strdup(inet_ntoa(ipnum));
+ }
+ else {
+ new->noname = 0;
+ name = strdup(hostdata->h_name);
+ if (check) {
+ if (name == NULL) {
+ perror("strdup");
+ fprintf(stderr, "Insufficient memory\n");
+ exit(1);
+ }
+ hostdata = gethostbyname(name);
+ if (hostdata != NULL) {
+ char **hptr;
+
+ for (hptr = hostdata->h_addr_list; *hptr != NULL; hptr++)
+ if (((struct in_addr *) (*hptr))->s_addr == ipnum.s_addr)
+ break;
+ if (*hptr == NULL)
+ hostdata = NULL;
+ }
+ if (hostdata == NULL) {
+ fprintf(stderr, "Bad host: %s != %s\n", name,
+ inet_ntoa(ipnum));
+ new->noname = NO_REVERSE;
+ free(name);
+ name = strdup(inet_ntoa(ipnum));
+ errors[NO_REVERSE]++;
+ }
+ }
+ }
+ new->hostname = name;
+ if (new->hostname == NULL) {
+ perror("strdup");
+ fprintf(stderr, "Insufficient memory\n");
+ exit(1);
+ }
+ }
+ else
+ cachehits++;
+
+ /* size of string == MAXDNAME +1 */
+ strncpy(string, (*current)->hostname, MAXDNAME);
+ string[MAXDNAME] = '\0';
+}
+
+/*
+ * prints various statistics to output
+ */
+
+static void stats (FILE *output)
+{
+ int i;
+ char *ipstring;
+ struct nsrec *current;
+ char *errstring[MAX_ERR + 3];
+
+ for (i = 0; i < MAX_ERR + 3; i++)
+ errstring[i] = "Unknown error";
+ errstring[HOST_NOT_FOUND] = "Host not found";
+ errstring[TRY_AGAIN] = "Try again";
+ errstring[NO_RECOVERY] = "Non recoverable error";
+ errstring[NO_DATA] = "No data record";
+ errstring[NO_ADDRESS] = "No address";
+ errstring[NO_REVERSE] = "No reverse entry";
+
+ fprintf(output, "logresolve Statistics:\n");
+
+ fprintf(output, "Entries: %d\n", entries);
+ fprintf(output, " With name : %d\n", withname);
+ fprintf(output, " Resolves : %d\n", resolves);
+ if (errors[HOST_NOT_FOUND])
+ fprintf(output, " - Not found : %d\n", errors[HOST_NOT_FOUND]);
+ if (errors[TRY_AGAIN])
+ fprintf(output, " - Try again : %d\n", errors[TRY_AGAIN]);
+ if (errors[NO_DATA])
+ fprintf(output, " - No data : %d\n", errors[NO_DATA]);
+ if (errors[NO_ADDRESS])
+ fprintf(output, " - No address: %d\n", errors[NO_ADDRESS]);
+ if (errors[NO_REVERSE])
+ fprintf(output, " - No reverse: %d\n", errors[NO_REVERSE]);
+ fprintf(output, "Cache hits : %d\n", cachehits);
+ fprintf(output, "Cache size : %d\n", cachesize);
+ fprintf(output, "Cache buckets : IP number * hostname\n");
+
+ for (i = 0; i < BUCKETS; i++)
+ for (current = nscache[i]; current != NULL; current = current->next) {
+ ipstring = inet_ntoa(current->ipnum);
+ if (current->noname == 0)
+ fprintf(output, " %3d %15s - %s\n", i, ipstring,
+ current->hostname);
+ else {
+ if (current->noname > MAX_ERR + 2)
+ fprintf(output, " %3d %15s : Unknown error\n", i,
+ ipstring);
+ else
+ fprintf(output, " %3d %15s : %s\n", i, ipstring,
+ errstring[current->noname]);
+ }
+ }
+}
+
+
+/*
+ * gets a line from stdin
+ */
+
+static int getline (char *s, int n)
+{
+ char *cp;
+
+ if (!fgets(s, n, stdin))
+ return (0);
+ cp = strchr(s, '\n');
+ if (cp)
+ *cp = '\0';
+ return (1);
+}
+
+int main (int argc, char *argv[])
+{
+ struct in_addr ipnum;
+ char *bar, hoststring[MAXDNAME + 1], line[MAXLINE], *statfile;
+ int i, check;
+
+ check = 0;
+ statfile = NULL;
+ for (i = 1; i < argc; i++) {
+ if (strcmp(argv[i], "-c") == 0)
+ check = 1;
+ else if (strcmp(argv[i], "-s") == 0) {
+ if (i == argc - 1) {
+ fprintf(stderr, "logresolve: missing filename to -s\n");
+ exit(1);
+ }
+ i++;
+ statfile = argv[i];
+ }
+ else {
+ fprintf(stderr, "Usage: logresolve [-s statfile] [-c] < input > output\n");
+ exit(0);
+ }
+ }
+
+
+ for (i = 0; i < BUCKETS; i++)
+ nscache[i] = NULL;
+ for (i = 0; i < MAX_ERR + 2; i++)
+ errors[i] = 0;
+
+ while (getline(line, MAXLINE)) {
+ if (line[0] == '\0')
+ continue;
+ entries++;
+ if (!isdigit(line[0])) { /* short cut */
+ puts(line);
+ withname++;
+ continue;
+ }
+ bar = strchr(line, ' ');
+ if (bar != NULL)
+ *bar = '\0';
+ ipnum.s_addr = inet_addr(line);
+ if (ipnum.s_addr == 0xffffffffu) {
+ if (bar != NULL)
+ *bar = ' ';
+ puts(line);
+ withname++;
+ continue;
+ }
+
+ resolves++;
+
+ cgethost(ipnum, hoststring, check);
+ if (bar != NULL)
+ printf("%s %s\n", hoststring, bar + 1);
+ else
+ puts(hoststring);
+ }
+
+ if (statfile != NULL) {
+ FILE *fp;
+ fp = fopen(statfile, "w");
+ if (fp == NULL) {
+ fprintf(stderr, "logresolve: could not open statistics file '%s'\n"
+ ,statfile);
+ exit(1);
+ }
+ stats(fp);
+ fclose(fp);
+ }
+
+ return (0);
+}