The following is the full source code of Visitors,
a fast web log analyzer under the GPL license.
If you want to
download the source code as a tar.gz file or find more information
about Visitors, please follow this link.
The following source code is Copyright (C) 2004 Salvatore Sanfilippo (antirez dot invece dot org).
Contents
- aht.h
- antigetopt.h
- sleep.h
- aht.c
- antigetopt.c
- tail.c
- visitors.c
aht.h 1/7
[top][prev][next]
/* An implementation of hash tables:
* Copyright(C) 2000-2004 by Salvatore Sanfilippo <antirez@invece.org>
*
* This software is under the GNU GPL license
*/
#include <sys/types.h>
#ifndef _AHT_H
#define _AHT_H
/* Fix to compile under WIN32/MINGW and SunOS */
#if defined(WIN32) || defined(__sun__)
#ifndef u_int8_t
#define u_int8_t unsigned char
#define u_int16_t unsigned short
#define u_int32_t unsigned int
#endif
#endif
/* ------------------------------ exit codes -------------------------------- */
#define HT_OK 0 /* Success */
#define HT_FOUND 1 /* Key found */
#define HT_NOTFOUND 2 /* Key not found */
#define HT_BUSY 3 /* Key already exist */
#define HT_NOMEM 4 /* Out of memory */
#define HT_IOVERFLOW 5 /* Index overflow */
#define HT_INVALID 6 /* Invalid argument */
#define HT_INITIAL_SIZE 256
/* ----------------------- hash table structures -----------------------------*/
struct ht_ele {
void *key;
void *data;
};
struct hashtable {
struct ht_ele **table;
unsigned int size;
unsigned int sizemask;
unsigned int used;
unsigned int collisions;
u_int32_t (*hashf)(void *key);
int (*key_compare)(void *key1, void *key2);
void (*key_destructor)(void *key);
void (*val_destructor)(void *obj);
};
/* ----------------------------- Prototypes ----------------------------------*/
int ht_init(struct hashtable *t);
int ht_move(struct hashtable *orig, struct hashtable *dest, unsigned int index);
int ht_expand(struct hashtable *t, size_t size);
int ht_add(struct hashtable *t, void *key, void *data);
int ht_replace(struct hashtable *t, void *key, void *data);
int ht_rm(struct hashtable *t, void *key);
int ht_destroy(struct hashtable *t);
int ht_free(struct hashtable *t, unsigned int index);
int ht_search(struct hashtable *t, void *key, unsigned int *found_index);
int ht_get_byindex(struct hashtable *t, unsigned int index);
int ht_resize(struct hashtable *t);
void **ht_get_array(struct hashtable *t);
/* provided destructors */
void ht_destructor_free(void *obj);
#define ht_no_destructor NULL
/* provided compare functions */
int ht_compare_ptr(void *key1, void *key2);
int ht_compare_string(void *key1, void *key2);
/* ------------------------ The hash functions ------------------------------ */
u_int32_t djb_hash(unsigned char *buf, size_t len);
u_int32_t djb_hashR(unsigned char *buf, size_t len);
u_int32_t trivial_hash(unsigned char *buf, size_t len);
u_int32_t trivial_hashR(unsigned char *buf, size_t len);
u_int32_t ht_strong_hash(u_int8_t *k, u_int32_t length, u_int32_t initval);
u_int32_t __ht_strong_hash(u_int8_t *k, u_int32_t length, u_int32_t initval);
/* ----------------- hash functions for common data types ------------------- */
u_int32_t ht_hash_string(void *key);
u_int32_t ht_hash_pointer(void *key);
/* ----------------------------- macros --------------------------------------*/
#define ht_set_hash(t,f) ((t)->hashf = (f))
#define ht_set_key_destructor(t,f) ((t)->key_destructor = (f))
#define ht_set_val_destructor(t,f) ((t)->val_destructor = (f))
#define ht_set_key_compare(t,f) ((t)->key_compare = (f))
#define ht_collisions(t) ((t)->collisions)
#define ht_size(t) ((t)->size)
#define ht_used(t) ((t)->used)
#define ht_key(t, i) ((t)->table[(i)]->key)
#define ht_value(t, i) ((t)->table[(i)]->data)
#if AHT_DEBUG
#define dprintf(x...) do { printf(x); fflush(stdout); } while(0)
#else
#define dprintf(x...)
#endif
#endif /* _AHT_H */
antigetopt.h 2/7
[top][prev][next]
#ifndef __ANTIGETOPT_H
#define __ANTIGETOPT_H
/* special return codes */
enum { AGO_EOF=4000, AGO_ALONE, AGO_UNKNOWN, AGO_REQARG, AGO_RESET, AGO_AMBIG };
/* option flags */
#define AGO_NOARG (1<<0) /* no argument */
#define AGO_NEEDARG (1<<1) /* required argument */
#define AGO_OPTARG (1<<2) /* optional argument */
#define AGO_EXCEPT0 (1<<3) /* exception #0 */
#define AGO_EXCEPT1 (1<<4) /* exception #1 */
#define AGO_EXCEPT2 (1<<5) /* exception #3 */
#define AGO_ENDOFLIST (1<<15) /* end of argument list marker */
/* option list null term */
#define AGO_LIST_TERM {'\0',NULL,0,AGO_ENDOFLIST}
/* The structure that defines an argument */
struct ago_optlist {
char ao_short;
char *ao_long;
int ao_id;
int ao_flags;
};
extern char *ago_optarg;
extern char *ago_optname;
extern char ago_optchar;
int antigetopt(int argc, char **argv, struct ago_optlist *list);
void ago_gnu_error(char *pname, int error);
int ago_set_exception(int except_nr, int (*tester)(void), char *msg);
#endif /* __ANTIGETOPT_H */
sleep.h 3/7
[top][prev][next]
/* compatibility for the unix/win32 sleep() function */
#ifndef __VI_SLEEP_H
#define __VI_SLEEP_H
#ifdef WIN32
#include <windows.h>
#define vi_sleep(x) Sleep((x)*1000)
#else
#include <unistd.h>
#define vi_sleep(x) sleep(x)
#endif
#endif /* __VI_SLEEP_H */
aht.c 4/7
[top][prev][next]
/* An implementation of in-memory hash tables:
* Copyright (c) 2000-2004 Salvatore Sanfilippo <antirez@invece.org>
*
* -- VERSION 2004.05.22 --
*
* COPYRIGHT AND PERMISSION NOTICE
* -------------------------------
*
* Copyright (c) 2000 Salvatore Sanfilippo <antirez@invece.org>
* Copyright (c) 2001 Salvatore Sanfilippo <antirez@invece.org>
* Copyright (c) 2002 Salvatore Sanfilippo <antirez@invece.org>
* Copyright (c) 2003 Salvatore Sanfilippo <antirez@invece.org>
* Copyright (c) 2004 Salvatore Sanfilippo <antirez@invece.org>
*
* All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, and/or sell copies of the Software, and to permit persons
* to whom the Software is furnished to do so, provided that the above
* copyright notice(s) and this permission notice appear in all copies of
* the Software and that both the above copyright notice(s) and this
* permission notice appear in supporting documentation.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
* OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
* INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Except as contained in this notice, the name of a copyright holder
* shall not be used in advertising or otherwise to promote the sale, use
* or other dealings in this Software without prior written authorization
* of the copyright holder.
*
* CHANGELOG
* ---------
*
* 22May2004 - Fixed a but in ht_destroy(). Now after this call the
* hashtable is really ready to be reused. Fixed also a memory leak
* in the same function. Luckly this function is only called at exit
* in many programs.
*
* OVERVIEW
* --------
*
* AHT is an implementation of a dictionary with support for
* INSERT, DELETE and SEARCH operations. It uses the hash table
* as base data structure to provide almost constant times for
* the three operations. AHT also automatically care about the
* size of the current key-values set increasing the hash table
* as needed.
*
* DESIGN PRINCIPLE
* ----------------
*
* - AHT try to resist to attacker-induced worst-case behaviour
* trought the randomization of the hash-function. This is
* optional.
*
* - AHT take care of the hash table expansion when needed.
* The hash table load ranges from 0 to 0.5, the hash table
* size is a power of two.
*
* - A simple implementation. The collisions resolution used
* is a simple linear probing, that takes advantage of
* the modern CPU caches, the low hash table max load and
* the use of a strong hash function provided with this library
* (ht_strong_hash), should mitigate the primary clustering
* enough. Experimental results shown that double hashing
* was a performance lost with common key types in modern
* CPUs.
*
* - Moderatly method oriented, it is possible to define the hash
* function, key/value destructors, key compare function, for a
* given hash table, but not with a per-element base.
*
* === WARNING ===
* = Before to use this library, think about the -fact- that the
* = worst case is O(N). Like for the quick sort algorithm, it may
* = be a bad idea to use this library in medical software, or other
* = software for wich the worst case should be taken in account
* = even if not likely to happen.
* = Good alternatives are red-black trees, and other trees with
* = a good worst-case behavior.
* ===============
*
* TODO
* ----
*
* - Write the documentation
* - ht_copy() to copy an element between hash tables
* - ht_dup() to duplicate an entire hash table
* - ht_merge() to add the content of one hash table to another
* - disk operations, the ability to save an hashtable from the
* memory to the disk and the reverse operation.
*
* Most of this features needs additional methods, like one
* to copy an object, and should return an error if such methods
* are not defined.
*
*/
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "aht.h"
/* -------------------------- private prototypes ---------------------------- */
static int ht_expand_if_needed(struct hashtable *t);
static unsigned int next_power(unsigned int size);
static int ht_insert(struct hashtable *t, void *key, unsigned int *avail_index);
/* The special ht_free_element pointer is used to mark
* a freed element in the hash table (note that the elements
* neven used are just NULL pointers) */
static struct ht_ele *ht_free_element = (void*) -1;
/* -------------------------- hash functions -------------------------------- */
/* The djb hash function, that's under public domain */
u_int32_t djb_hash(unsigned char *buf, size_t len)
{
u_int32_t h = 5381;
while(len--)
h = (h + (h << 5)) ^ *buf++;
return h;
}
u_int32_t djb_hashR(unsigned char *buf, size_t len)
{
u_int32_t h = 5381;
buf += len-1;
while(len--)
h = (h + (h << 5)) ^ *buf--;
return h;
}
/* Another trivial hash function */
#define ROT32R(x,n) (((x)>>n)|(x<<(32-n)))
u_int32_t trivial_hash(unsigned char *buf, size_t len)
{
u_int32_t h = 0;
while(len--) {
h = h + *buf++;
h = ROT32R(h, 3);
}
return h;
}
u_int32_t trivial_hashR(unsigned char *buf, size_t len)
{
u_int32_t h = 0;
buf += len-1;
while(len--) {
h = h + *buf--;
h = ROT32R(h, 3);
}
return h;
}
/* A strong hash function that should be the default with this
* hashtable implementation. Our hash tables does not support
* double hashing for design: the idea is to avoid double
* hashing and use a bit slower but very strong hash function like
* this. This should provide quite good performances with
* all the kinds of keys if you take the default max load of 50%.
*
* For more information see: http://burtleburtle.net/bob/hash/evahash.html */
/* The mixing step */
#define mix(a,b,c) \
{ \
a=a-b; a=a-c; a=a^(c>>13); \
b=b-c; b=b-a; b=b^(a<<8); \
c=c-a; c=c-b; c=c^(b>>13); \
a=a-b; a=a-c; a=a^(c>>12); \
b=b-c; b=b-a; b=b^(a<<16); \
c=c-a; c=c-b; c=c^(b>>5); \
a=a-b; a=a-c; a=a^(c>>3); \
b=b-c; b=b-a; b=b^(a<<10); \
c=c-a; c=c-b; c=c^(b>>15); \
}
/* The whole new hash function */
u_int32_t __ht_strong_hash(u_int8_t *k, u_int32_t length, u_int32_t initval)
{
u_int32_t a,b,c; /* the internal state */
u_int32_t len; /* how many key bytes still need mixing */
/* Set up the internal state */
len = length;
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
c = initval; /* variable initialization of internal state */
/*---------------------------------------- handle most of the key */
while (len >= 12)
{
a=a+(k[0]+((u_int32_t)k[1]<<8)+((u_int32_t)k[2]<<16)+
((u_int32_t)k[3]<<24));
b=b+(k[4]+((u_int32_t)k[5]<<8)+((u_int32_t)k[6]<<16)+
((u_int32_t)k[7]<<24));
c=c+(k[8]+((u_int32_t)k[9]<<8)+((u_int32_t)k[10]<<16)+
((u_int32_t)k[11]<<24));
mix(a,b,c);
k = k+12; len = len-12;
}
/*------------------------------------- handle the last 11 bytes */
c = c+length;
switch(len) /* all the case statements fall through */
{
case 11: c=c+((u_int32_t)k[10]<<24);
case 10: c=c+((u_int32_t)k[9]<<16);
case 9 : c=c+((u_int32_t)k[8]<<8);
/* the first byte of c is reserved for the length */
case 8 : b=b+((u_int32_t)k[7]<<24);
case 7 : b=b+((u_int32_t)k[6]<<16);
case 6 : b=b+((u_int32_t)k[5]<<8);
case 5 : b=b+k[4];
case 4 : a=a+((u_int32_t)k[3]<<24);
case 3 : a=a+((u_int32_t)k[2]<<16);
case 2 : a=a+((u_int32_t)k[1]<<8);
case 1 : a=a+k[0];
/* case 0: nothing left to add */
}
mix(a,b,c);
/*-------------------------------------------- report the result */
return c;
}
/* ----------------------------- API implementation ------------------------- */
/* reset an hashtable already initialized with ht_init().
* NOTE: This function should only called by ht_destroy(). */
static void ht_reset(struct hashtable *t)
{
t->table = NULL;
t->size = 0;
t->sizemask = 0;
t->used = 0;
t->collisions = 0;
}
/* Initialize the hash table */
int ht_init(struct hashtable *t)
{
ht_reset(t);
t->hashf = ht_hash_pointer;
t->key_destructor = ht_no_destructor;
t->val_destructor = ht_no_destructor;
t->key_compare = ht_compare_ptr;
return HT_OK;
}
/* Resize the table to the minimal size that contains all the elements */
int ht_resize(struct hashtable *t)
{
int minimal = (t->used * 2)+1;
if (minimal < HT_INITIAL_SIZE)
minimal = HT_INITIAL_SIZE;
return ht_expand(t, minimal);
}
/* Move an element accross hash tables */
int ht_move(struct hashtable *orig, struct hashtable *dest, unsigned int index)
{
int ret;
unsigned int new_index;
/* If the element isn't in the table ht_search will store
* the index of the free ht_ele in the integer pointer by *index */
ret = ht_insert(dest, orig->table[index]->key, &new_index);
if (ret != HT_OK)
return ret;
/* Move the element */
dest->table[new_index] = orig->table[index];
orig->table[index] = ht_free_element;
orig->used--;
dest->used++;
return HT_OK;
}
/* Expand or create the hashtable */
int ht_expand(struct hashtable *t, size_t size)
{
struct hashtable n; /* the new hashtable */
unsigned int realsize = next_power(size), i;
/* the size is invalid if it is smaller than the number of
* elements already inside the hashtable */
if (t->used >= size)
return HT_INVALID;
ht_init(&n);
n.size = realsize;
n.sizemask = realsize-1;
n.table = malloc(realsize*sizeof(struct ht_ele*));
if (n.table == NULL)
return HT_NOMEM;
/* Copy methods */
n.hashf = t->hashf;
n.key_destructor = t->key_destructor;
n.val_destructor = t->val_destructor;
n.key_compare= t->key_compare;
/* Initialize all the pointers to NULL */
memset(n.table, 0, realsize*sizeof(struct ht_ele*));
/* Copy all the elements from the old to the new table:
* note that if the old hash table is empty t->size is zero,
* so ht_expand() acts like an ht_create() */
n.used = t->used;
for (i = 0; i < t->size && t->used > 0; i++) {
if (t->table[i] != NULL && t->table[i] != ht_free_element) {
u_int32_t h;
/* Get the new element index: note that we
* know that there aren't freed elements in 'n' */
h = n.hashf(t->table[i]->key) & n.sizemask;
if (n.table[h]) {
n.collisions++;
while(1) {
h = (h+1) & n.sizemask;
if (!n.table[h])
break;
n.collisions++;
}
}
/* Move the element */
n.table[h] = t->table[i];
t->used--;
}
}
assert(t->used == 0);
free(t->table);
/* Remap the new hashtable in the old */
*t = n;
return HT_OK;
}
/* Add an element, discarding the old if the key already exists */
int ht_replace(struct hashtable *t, void *key, void *data)
{
int ret;
unsigned int index;
/* Try to add the element */
ret = ht_add(t, key, data);
if (ret == HT_OK || ret != HT_BUSY)
return ret;
/* It already exists, get the index */
ret = ht_search(t, key, &index);
assert(ret == HT_FOUND);
/* Remove the old */
ret = ht_free(t, index);
assert(ret == HT_OK);
/* And add the new */
return ht_add(t, key, data);
}
/* Add an element to the target hash table */
int ht_add(struct hashtable *t, void *key, void *data)
{
int ret;
unsigned int index;
/* If the element isn't in the table ht_insert() will store
* the index of the free ht_ele in the integer pointer by *index */
ret = ht_insert(t, key, &index);
if (ret != HT_OK)
return ret;
/* Allocates the memory and stores key */
if ((t->table[index] = malloc(sizeof(struct ht_ele))) == NULL)
return HT_NOMEM;
/* Store the pointers */
t->table[index]->key = key;
t->table[index]->data = data;
t->used++;
return HT_OK;
}
/* search and remove an element */
int ht_rm(struct hashtable *t, void *key)
{
int ret;
unsigned int index;
if ((ret = ht_search(t, key, &index)) != HT_FOUND)
return ret;
return ht_free(t, index);
}
/* Destroy an entire hash table */
int ht_destroy(struct hashtable *t)
{
unsigned int i;
/* Free all the elements */
for (i = 0; i < t->size && t->used > 0; i++) {
if (t->table[i] != NULL && t->table[i] != ht_free_element) {
if (t->key_destructor)
t->key_destructor(t->table[i]->key);
if (t->val_destructor)
t->val_destructor(t->table[i]->data);
free(t->table[i]);
t->used--;
}
}
/* Free the table and the allocated cache structure */
free(t->table);
/* Re-initialize the table */
ht_reset(t);
return HT_OK; /* Actually ht_destroy never fails */
}
/* Free an element in the hash table */
int ht_free(struct hashtable *t, unsigned int index)
{
if (index >= t->size)
return HT_IOVERFLOW; /* Index overflow */
/* ht_free() calls against non-existent elements are ignored */
if (t->table[index] != NULL && t->table[index] != ht_free_element) {
/* release the key */
if (t->key_destructor)
t->key_destructor(t->table[index]->key);
/* release the value */
if (t->val_destructor)
t->val_destructor(t->table[index]->data);
/* free the element structure */
free(t->table[index]);
/* mark the element as freed */
t->table[index] = ht_free_element;
t->used--;
}
return HT_OK;
}
/* Search the element with the given key */
int ht_search(struct hashtable *t, void *key, unsigned int *found_index)
{
int ret;
u_int32_t h;
/* Expand the hashtable if needed */
if (t->size == 0) {
if ((ret = ht_expand_if_needed(t)) != HT_OK)
return ret;
}
/* Try using the first hash functions */
h = t->hashf(key) & t->sizemask;
/* this handles the removed elements */
if (!t->table[h])
return HT_NOTFOUND;
if (t->table[h] != ht_free_element &&
t->key_compare(key, t->table[h]->key))
{
*found_index = h;
return HT_FOUND;
}
while(1) {
h = (h+1) & t->sizemask;
/* this handles the removed elements */
if (t->table[h] == ht_free_element)
continue;
if (!t->table[h])
return HT_NOTFOUND;
if (t->key_compare(key, t->table[h]->key)) {
*found_index = h;
return HT_FOUND;
}
}
}
/* This function is used to run the entire hash table,
* it returns:
* 1 if the element with the given index is valid
* 0 if the element with the given index is empty or marked free
* -1 if the element if out of the range */
int ht_get_byindex(struct hashtable *t, unsigned int index)
{
if (index >= t->size)
return -1;
if (t->table[index] == NULL || t->table[index] == ht_free_element)
return 0;
return 1;
}
/* Returns the hash table as an array of paris of key/value void* pointers.
* The array is allocated with malloc() and should be freed when no
* longer useful. The key and value pointers should not be freed or
* altered in any way, they will be handled by the hash table structure.
*
* This function is mainly useful to sort the hashtable's content
* without to alter the hashtable itself.
*
* Returns NULL on out of memory. */
void **ht_get_array(struct hashtable *t)
{
int used = ht_used(t);
void **table, **tptr;
long idx;
if ((table = (void**) malloc(sizeof(void*)*(used*2))) == NULL)
return NULL;
tptr = table;
for (idx = 0; ;idx++) {
int type = ht_get_byindex(t, idx);
if (type == -1) break;
if (type == 0) continue;
*tptr++ = ht_key(t, idx);
*tptr++ = ht_value(t, idx);
}
return table;
}
/* ------------------------- private functions ------------------------------ */
/* Expand the hash table if needed */
static int ht_expand_if_needed(struct hashtable *t)
{
/* If the hash table is empty expand it to the intial size,
* if the table is half-full redobule its size. */
if (t->size == 0)
return ht_expand(t, HT_INITIAL_SIZE);
if (t->size <= (t->used << 1))
return ht_expand(t, t->size << 1);
return HT_OK;
}
/* Our hash table capability is a power of two */
static unsigned int next_power(unsigned int size)
{
unsigned int i = 256;
if (size >= 2147483648U)
return 2147483648U;
while(1) {
if (i >= size)
return i;
i *= 2;
}
}
/* the insert function to add elements out of ht expansion */
static int ht_insert(struct hashtable *t, void *key, unsigned int *avail_index)
{
int ret;
u_int32_t h;
/* Expand the hashtable if needed */
if ((ret = ht_expand_if_needed(t)) != HT_OK)
return ret;
/* Try using the first hash functions */
h = t->hashf(key) & t->sizemask;
/* this handles the removed elements */
if (!t->table[h] || t->table[h] == ht_free_element) {
*avail_index = h;
return HT_OK;
}
t->collisions++;
if (t->key_compare(key, t->table[h]->key))
return HT_BUSY;
while(1) {
h = (h+1) & t->sizemask;
/* this handles the removed elements */
if (!t->table[h] || t->table[h] == ht_free_element) {
*avail_index = h;
return HT_OK;
}
t->collisions++;
if (t->key_compare(key, t->table[h]->key))
return HT_BUSY;
}
}
/* ------------------------- provided destructors --------------------------- */
/* destructor for heap allocated keys/values */
void ht_destructor_free(void *obj)
{
free(obj);
}
/* ------------------------- provided comparators --------------------------- */
/* default key_compare method */
int ht_compare_ptr(void *key1, void *key2)
{
return (key1 == key2);
}
/* key compare for nul-terminated strings */
int ht_compare_string(void *key1, void *key2)
{
return (strcmp(key1, key2) == 0) ? 1 : 0;
}
/* -------------------- hash functions for common data types --------------- */
/* We make this global to allow hash function randomization,
* as security measure against attacker-induced worst case behaviuor.
*
* Note that being H_i the strong hash function with init value of i
* and H_i' the same hash function with init value of i' than:
*
* if H_i(StringOne) is equal to H_i(CollidingStringTwo)
*
* it is NOT true that
*
* H_i'(StringOne) is equal to H_i''(CollidingStringTwo)
*/
static u_int32_t strong_hash_init_val = 0xF937A21;
/* Set the secret initialization value. It should be set from
* a secure PRNG like /dev/urandom at program initialization time */
void ht_set_strong_hash_init_val(u_int32_t secret)
{
strong_hash_init_val = secret;
}
/* __ht_strong_hash wrapper that mix a user-provided initval
* with the global strong_hash_init_val. __ht_strong_hash is
* even exported directly. */
u_int32_t ht_strong_hash(u_int8_t *k, u_int32_t length, u_int32_t initval)
{
return __ht_strong_hash(k, length, initval^strong_hash_init_val);
}
/* Hash function suitable for C strings and other data types using
* a 0-byte as terminator */
u_int32_t ht_hash_string(void *key)
{
return __ht_strong_hash(key, strlen(key), strong_hash_init_val);
}
/* This one is to hash the value of the pointer itself. */
u_int32_t ht_hash_pointer(void *key)
{
return __ht_strong_hash((void*)&key, sizeof(void*), strong_hash_init_val);
}
antigetopt.c 5/7
[top][prev][next]
/* antigetopt -- a getopt replacement
* Copyright(C) 2001 Salvatore Sanfilippo <antirez@invece.org>
* This software is released under the GPL license
* see the COPYING file for more information */
/* $Id: antigetopt.c,v 1.2 2003/09/01 00:22:06 antirez Exp $ */
/* TODO:
* argument list sanity check */
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "antigetopt.h"
/* global vars */
char *ago_optarg = NULL;
char *ago_optname = NULL;
char ago_optchar = '\0';
/* static vars */
static struct ago_exception {
int (*tester)(void);
char *msg;
} ago_exceptions[3] = {
{ NULL, NULL },
{ NULL, NULL },
{ NULL, NULL }
};
static int ago_exception_bits[] = { AGO_EXCEPT0, AGO_EXCEPT1, AGO_EXCEPT2 };
/* static functions */
static struct ago_optlist
*ago_lookup(struct ago_optlist *list, char *arg, int *islong, int *amb);
static int strinitcmp(char *a, char *b);
/*----------------------------- implementation ------------------------------ */
int antigetopt(int argc, char **argv, struct ago_optlist *list)
{
static char **save_argv = NULL;
static char *chain = NULL;
static int endoptions = 0;
struct ago_optlist *opt;
int islong;
argc = argc; /* avoid a warning */
/* Reset */
if (argv == NULL) {
save_argv = NULL;
chain = NULL;
endoptions = 0;
return AGO_RESET;
} else {
if (save_argv == NULL) {
save_argv = argv+1; /* skips the argv[0] */
/* XXX: argument list sanity check */
}
}
chain_start:
if (chain) {
if (*chain == '\0')
chain = NULL;
else {
if ((opt = ago_lookup(list, chain, &islong, NULL))
== NULL)
return AGO_UNKNOWN;
if (!(opt->ao_flags & AGO_NOARG)) {
/* the if expression maybe false if the
* argument is optional */
if (chain[1] == '\0' && *save_argv)
ago_optarg = *save_argv++;
/* while it is mandatory for the NEEDARG type */
else if (opt->ao_flags & AGO_NEEDARG)
return AGO_REQARG;
}
chain++;
return opt->ao_id;
}
}
argv = save_argv;
/* handle the "--" special option */
if (*argv && strcmp(*argv, "--") == 0) {
endoptions = 1;
argv++;
save_argv++;
}
while(*argv) {
/* The option must start with '-' */
if (!endoptions && argv[0][0] == '-' && argv[0][1] != '\0') {
int amb;
/* note: ago_lookup also sets ago_optname */
if ((opt = ago_lookup(list, argv[0], &islong, &amb))
== NULL)
return amb ? AGO_AMBIG : AGO_UNKNOWN;
/* handle the collapsed short options */
if (!islong && argv[0][2] != '\0') {
chain = argv[0]+1;
save_argv++;
goto chain_start;
}
/* if the option require or may have an argument */
ago_optarg = NULL;
/* If the argument is needed we get the next argv[]
* element without care about what it contains */
if (opt->ao_flags & AGO_NEEDARG) {
if (argv[1] == NULL)
return AGO_REQARG;
ago_optarg = argv[1];
argv++;
}
/* If the argument is optional we only recognize it
* as argument if it does not starts with '-' */
else if (opt->ao_flags & AGO_OPTARG) {
if (argv[1] && argv[1][0] != '-') {
ago_optarg = argv[1];
argv++;
}
}
save_argv = argv+1;
return opt->ao_id;
} else {
save_argv = argv+1;
ago_optarg = argv[0];
ago_optchar = '\0';
ago_optname = NULL;
return AGO_ALONE;
}
}
return AGO_EOF;
}
#define UNK_SHORT_ERRSTRING "invalid option -- %c\n"
#define UNK_LONG_ERRSTRING "unrecognized option `--%s'\n"
#define ARG_SHORT_ERRSTRING "option requires an argument -- %c\n"
#define ARG_LONG_ERRSTRING "option `--%s' requires an argument\n"
#define AMB_ERRSTRING "option `--%s' is ambiguos\n"
#define IERR_ERRSTRING "internal error. ago_gnu_error() called with " \
"a bad error code (%d)\n"
void ago_gnu_error(char *pname, int error)
{
if (pname)
fprintf(stderr, "%s: ", pname);
switch(error) {
case AGO_UNKNOWN:
if (ago_optname)
fprintf(stderr, UNK_LONG_ERRSTRING,
ago_optname);
else
fprintf(stderr, UNK_SHORT_ERRSTRING,
ago_optchar);
break;
case AGO_REQARG:
if (ago_optname)
fprintf(stderr, ARG_LONG_ERRSTRING,
ago_optname);
else
fprintf(stderr, ARG_SHORT_ERRSTRING,
ago_optchar);
break;
case AGO_AMBIG:
fprintf(stderr, AMB_ERRSTRING, ago_optname);
break;
default:
fprintf(stderr, IERR_ERRSTRING, error);
break;
}
}
int ago_set_exception(int except_nr, int (*tester)(void), char *msg)
{
if (tester == NULL || msg == NULL || except_nr < 0 || except_nr >= 3)
return -1;
ago_exceptions[except_nr].tester = tester;
ago_exceptions[except_nr].msg = msg;
return 0;
}
/*-------------------------- static functions ------------------------------- */
struct ago_optlist
*ago_lookup(struct ago_optlist *list, char *arg, int *islong, int *amb)
{
int i;
/* ago_lookup can be receive as `arg' a pointer to a
* long argument, like --option, a pointer to a short
* argument like -O, or just a pointer to a char sequence
* in the case of collapsed short arguments like -abcde. */
/* Clear the 'ambiguos' flag, used to report the caller
* an ambiguos option abbreviation error */
if (amb) *amb = 0;
if (*arg == '-') /* skips the first - if any */
arg++;
switch(*arg) {
case '\0':
return NULL;
case '-':
*islong = 1;
arg++; /* skip the last - */
break;
default:
*islong = 0;
break;
}
/* search the argument in the list */
if (*islong) {
int retval;
struct ago_optlist *last = NULL;
while(!(list->ao_flags & AGO_ENDOFLIST)) {
ago_optname = arg;
ago_optchar = '\0';
if ((retval = strinitcmp(arg, list->ao_long)) != 0) {
switch(retval) {
case 1:
if (last) {
if (amb) *amb = 1;
return NULL;
}
last = list;
break;
case 2:
goto ok;
}
}
list++;
}
if (last) {
ago_optname = last->ao_long;
list = last;
goto ok;
}
} else {
ago_optchar = *arg;
ago_optname = NULL;
while(!(list->ao_flags & AGO_ENDOFLIST)) {
if (*arg == list->ao_short)
goto ok;
list++;
}
}
return NULL;
ok:
/* handle the exceptions if any */
for (i = 0; i < 3; i++) {
if ((list->ao_flags & ago_exception_bits[i]) &&
ago_exceptions[i].tester)
{
if (ago_exceptions[i].tester()) {
if (ago_optname) {
fprintf(stderr, "%s `--%s'\n",
ago_exceptions[i].msg,
ago_optname);
} else {
fprintf(stderr, "%s `-%c'\n",
ago_exceptions[i].msg,
ago_optchar);
}
exit(1);
}
}
}
return list;
}
/* Given two strings this function returns:
* 1, if the strings are the same for the len of the first string (abc, abcde)
* 2, if the strings are exactly the same: (abcd, abcd)
* otherwise zero is returned (abcde, abcd) ... (djf, 293492) */
int strinitcmp(char *a, char *b)
{
if (!a || !b)
return 0;
while (*a && *b) {
if (*a != *b)
return 0;
a++; b++;
}
if (*a)
return 0;
if (*a == *b)
return 2;
return 1;
}
tail.c 6/7
[top][prev][next]
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "sleep.h"
/* Open a file, seek at the end, and store in '*len' the file length */
static FILE *vi_openAtEnd(char *filename, long *len)
{
FILE *fp = NULL;
if ((fp = fopen(filename, "rb")) == NULL) goto err; /* open */
if ((fseek(fp, 0, SEEK_END)) == -1) goto err; /* seek at end */
if ((*len = ftell(fp)) == -1) goto err; /* read the file length */
return fp;
err:
if (fp != NULL) fclose(fp);
return NULL;
}
/* Output 'len' bytes of file 'fp' starting from 'offset'.
* The function returns 0 on success, -1 on error. */
#define TAILOUT_BUFLEN 1024
static int vi_tailOutput(FILE *fp, long offset, long len)
{
char buf[TAILOUT_BUFLEN];
if (fseek(fp, offset, SEEK_SET) == -1) return -1;
while(len) {
unsigned int min = (len > TAILOUT_BUFLEN) ? TAILOUT_BUFLEN : len;
if (fread(buf, 1, min, fp) != min) return -1;
fwrite(buf, 1, min, stdout);
fflush(stdout);
len -= min;
}
return 0;
}
/* An interation for the 'tail -f' simulation. Open the
* file at every iteration in order to continue to work
* when files are rotated. */
static void vi_tailIteration(char *filename, long *len)
{
long newlen, datalen;
FILE *fp = NULL;
fp = vi_openAtEnd(filename, &newlen);
if (fp != NULL) {
if (*len == -1) {
/* Initialization */
*len = newlen;
} else if (newlen < *len) {
/* Shorter file condition */
*len = 0; /* the next iteration will read
the new data */
} else if (newlen > *len) {
/* Data ready condition */
datalen = newlen - *len;
if (vi_tailOutput(fp, *len, datalen) != -1)
*len = newlen;
}
}
if (fp != NULL) fclose(fp);
}
void vi_tail(int filec, char **filev)
{
long *len;
int i;
if (filec <= 0) {
fprintf(stderr, "No files specified in tail-mode\n");
exit(1);
}
len = malloc(filec);
if (!len) {
fprintf(stderr, "Out of memory\n");
exit(1);
}
for (i = 0; i < filec; i++)
len[i] = -1;
while(1) {
for (i = 0; i < filec; i++)
vi_tailIteration(filev[i], &len[i]);
vi_sleep(1);
}
}
visitors.c 7/7
[top][prev][next]
/* visitors -- very fast web logs analyzer.
*
* Copyright (C) 2004 by Salvatore Sanfilippo <antirez (at) invece (dot) org>
* All Rights Reserved.
*
* This software is released under the terms of the GPL license version 2.
* Read the COPYING file in this distribution for more details. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <stdarg.h>
#include <errno.h>
#include <locale.h>
#include <ctype.h>
#include "aht.h"
#include "antigetopt.h"
#include "sleep.h"
/* Max length of an error stored in the visitors handle */
#define VI_ERROR_MAX 1024
/* Max length of a log line */
#define VI_LINE_MAX 4096
/* Max number of filenames in the command line */
#define VI_FILENAMES_MAX 1024
/* Max number of prefixes in the command line */
#define VI_PREFIXES_MAX 1024
/* Abbreviation length for HTML outputs */
#define VI_HTML_ABBR_LEN 100
/* Version as a string */
#define VI_VERSION_STR "0.4"
/*------------------------------- data structures ----------------------------*/
/* visitors handle */
struct vih {
int startt;
int endt;
int processed;
int invalid;
int hour[24];
int weekday[7];
int weekdayhour[7][24]; /* hour and weekday combined data */
struct hashtable visitors;
struct hashtable googlevisitors;
struct hashtable pages;
struct hashtable images;
struct hashtable error404;
struct hashtable pageviews;
struct hashtable pageviews_grouped;
struct hashtable referers;
struct hashtable referersage;
struct hashtable date;
struct hashtable googledate;
struct hashtable agents;
struct hashtable googled;
struct hashtable googlevisits;
struct hashtable googlekeyphrases;
struct hashtable trails;
struct hashtable tld;
struct hashtable os;
struct hashtable browsers;
char *error;
};
/* info associated with a line of log */
struct logline {
char *host;
char *date;
char *hour;
char *timezone;
char *req;
char *ref;
char *agent;
time_t time;
struct tm tm;
};
/* output module structure. See below for the definition of
* the text and html output modules. */
struct outputmodule {
void (*print_header)(FILE *fp);
void (*print_footer)(FILE *fp);
void (*print_title)(FILE *fp, char *title);
void (*print_subtitle)(FILE *fp, char *title);
void (*print_numkey_info)(FILE *fp, char *key, int val);
void (*print_keykey_entry)(FILE *fp, char *key1, char *key2, int num);
void (*print_numkey_entry)(FILE *fp, char *key, int val, char *link,
int num);
void (*print_numkeybar_entry)(FILE *fp, char *key, int max, int tot,
int this);
void (*print_numkeycomparativebar_entry)(FILE *fp, char *key, int tot,
int this);
void (*print_bidimentional_map)(FILE *fp, int xlen, int ylen,
char **xlabel, char **ylabel, int *value);
void (*print_hline)(FILE *fp);
void (*print_credits)(FILE *fp);
void (*print_report_link)(FILE *fp, char *report);
};
/* Just a string with cached length */
struct vistring {
char *str;
int len;
};
/* ---------------------- global configuration parameters ------------------- */
int Config_max_referers = 20;
int Config_max_referers_age = 20;
int Config_max_pages = 20;
int Config_max_images = 20;
int Config_max_error404 = 20;
int Config_max_agents = 20;
int Config_max_googled = 20;
int Config_max_google_keyphrases = 20;
int Config_max_trails = 20;
int Config_max_tld = 20;
int Config_process_agents = 0;
int Config_process_google = 0;
int Config_process_google_keyphrases;
int Config_process_web_trails = 0;
int Config_process_weekdayhour_map = 0;
int Config_process_referers_age = 0;
int Config_process_tld = 0;
int Config_process_os = 0;
int Config_process_browsers = 0;
int Config_process_error404 = 0;
int Config_process_pageviews = 0;
int Config_graphviz_mode = 0;
int Config_tail_mode = 0;
int Config_stream_mode = 0;
int Config_update_every = 60*10; /* update every 10 minutes for default. */
int Config_reset_every = 0; /* never reset for default */
int Config_time_delta = 0; /* adjustable time difference */
char *Config_output_file = NULL; /* stdout if not set. */
struct outputmodule *Output = NULL; /* intialized to 'text' in main() */
/* Prefixes */
int Config_prefix_num = 0; /* number of prefixes specified */
struct vistring Config_prefix[VI_PREFIXES_MAX];
/*----------------------------------- Tables ---------------------------------*/
static char *vi_wdname[7] = {"Mo", "Tu", "We", "Th", "Fr", "Sa", "Su"};
/* -------------------------------- prototypes ------------------------------ */
void vi_clear_error(struct vih *vih);
void vi_tail(int filec, char **filev);
/*------------------------------ support functions -------------------------- */
/* Returns non-zero if the link seems like a google link, zero otherwise.
* Note that this function only checks for a prefix of www.google.<something>.
* so may be fooled. */
int vi_is_google_link(char *s)
{
return !strncmp(s, "http://www.google.", 18);
}
/* Returns non-zero if the url matches some user-specified prefix.
* being a link "internal" to the site. Otherwise zero is returned.
*
* When there is a match, the value returned is the length of
* the matching prefix. */
int vi_is_internal_link(char *url)
{
int i, l;
if (!Config_prefix_num) return 0; /* no prefixes set? */
l = strlen(url);
for (i = 0; i < Config_prefix_num; i++) {
if (Config_prefix[i].len <= l &&
!strncasecmp(url, Config_prefix[i].str,
Config_prefix[i].len))
{
return Config_prefix[i].len;
}
}
return 0;
}
/* returns non-zero if the URL 's' seems an image or a CSS file. */
int vi_is_image(char *s)
{
int l = strlen(s);
char *end = s + l; /* point to the nul term */
if (l < 5) return 0;
if (!memcmp(end-4, ".css", 4) ||
!memcmp(end-4, ".jpg", 4) ||
!memcmp(end-4, ".gif", 4) ||
!memcmp(end-4, ".png", 4) ||
!memcmp(end-4, ".ico", 4) ||
!memcmp(end-5, ".jpeg", 5) ||
!memcmp(end-4, ".CSS", 4) ||
!memcmp(end-4, ".JPG", 4) ||
!memcmp(end-4, ".GIF", 4) ||
!memcmp(end-4, ".PNG", 4) ||
!memcmp(end-4, ".ICO", 4) ||
!memcmp(end-4, ".JPEG", 5)) return 1;
return 0;
}
/* returns non-zero if the URL 's' seems a real page. */
int vi_is_pageview(char *s)
{
int l = strlen(s);
char *end = s + l; /* point to the nul term */
char *dot, *slash;
if (s[l-1] == '/') return 1;
if (l >= 6 &&
(!memcmp(end-5, ".html", 5) ||
!memcmp(end-4, ".htm", 4) ||
!memcmp(end-4, ".php", 4) ||
!memcmp(end-4, ".asp", 4) ||
!memcmp(end-4, ".jsp", 4) ||
!memcmp(end-4, ".xdl", 4) ||
!memcmp(end-5, ".xhtml", 5) ||
!memcmp(end-4, ".xml", 4) ||
!memcmp(end-4, ".cgi", 4) ||
!memcmp(end-3, ".pl", 3) ||
!memcmp(end-6, ".shtml", 6) ||
!memcmp(end-5, ".HTML", 5) ||
!memcmp(end-4, ".HTM", 4) ||
!memcmp(end-4, ".PHP", 4) ||
!memcmp(end-4, ".ASP", 4) ||
!memcmp(end-4, ".JSP", 4) ||
!memcmp(end-4, ".XDL", 4) ||
!memcmp(end-6, ".XHTML", 6) ||
!memcmp(end-4, ".XML", 4) ||
!memcmp(end-4, ".CGI", 4) ||
!memcmp(end-3, ".PL", 3) ||
!memcmp(end-6, ".SHTML", 6))) return 1;
dot = strrchr(s, '.');
if (!dot) return 1;
slash = strrchr(s, '/');
if (slash && slash > dot) return 1;
return 0;
}
/* returns non-zero if 'ip' seems a string representing an IP address
* like "1.2.3.4". Note that 'ip' is always an IP or an hostname
* so this function actually test if the string pointed by 'ip' only
* contains characters in the "[0-9.]" set */
int vi_is_numeric_address(char *ip)
{
unsigned int l = strlen(ip);
return strspn(ip, "0123456789.") == l;
}
/* returns the time converted into a time_t value.
* On error (time_t) -1 is returned.
* Note that this function is specific for the following format:
* "10/May/2004:04:15:33". Works if the month is not an abbreviation, or if the
* year is abbreviated to only the last two digits.
* The time can be omitted like in "10/May/2004". */
time_t parse_date(char *s, struct tm *tmptr)
{
struct tm tm;
time_t t;
char *months[] = {
"jan", "feb", "mar", "apr", "may", "jun",
"jul", "aug", "sep", "oct", "nov", "dec",
};
char *day, *month, *year, *time = NULL;
char monthaux[32];
int i, len;
/* make a copy to mess with it */
len = strlen(s);
if (len >= 32) goto fmterr;
memcpy(monthaux, s, len);
monthaux[len] = '\0';
/* Inizialize the tm structure. We just fill three fields */
tm.tm_sec = 0;
tm.tm_min = 0;
tm.tm_hour = 0;
tm.tm_mday = 0;
tm.tm_mon = 0;
tm.tm_year = 0;
tm.tm_wday = 0;
tm.tm_yday = 0;
tm.tm_isdst = -1;
/* search delimiters */
day = monthaux;
if ((month = strchr(day, '/')) == NULL) goto fmterr;
*month++ = '\0';
if ((year = strchr(month, '/')) == NULL) goto fmterr;
*year++ = '\0';
/* time, optional for this parser. */
if ((time = strchr(year, ':')) != NULL) {
*time++ = '\0';
}
/* convert day */
tm.tm_mday = atoi(day);
if (tm.tm_mday < 1 || tm.tm_mday > 31) goto fmterr;
/* convert month */
if (strlen(month) < 3) goto fmterr;
month[0] = tolower(month[0]);
month[1] = tolower(month[1]);
month[2] = tolower(month[2]);
for (i = 0; i < 12; i++) {
if (memcmp(month, months[i], 3) == 0) break;
}
if (i == 12) goto fmterr;
tm.tm_mon = i;
/* convert year */
tm.tm_year = atoi(year);
if (tm.tm_year > 100) {
if (tm.tm_year < 1900 || tm.tm_year > 2500) goto fmterr;
tm.tm_year -= 1900;
} else {
/* if the year is in two-digits form, the 0 - 68 range
* is converted to 2000 - 2068 */
if (tm.tm_year < 69)
tm.tm_year += 100;
}
/* convert time */
if (time) { /* format is HH:MM:SS */
if (strlen(time) < 8) goto fmterr;
tm.tm_hour = ((time[0]-'0')*10)+(time[1]-'0');
if (tm.tm_hour < 0 || tm.tm_hour > 23) goto fmterr;
tm.tm_min = ((time[3]-'0')*10)+(time[4]-'0');
if (tm.tm_min < 0 || tm.tm_min > 59) goto fmterr;
tm.tm_sec = ((time[6]-'0')*10)+(time[7]-'0');
if (tm.tm_sec < 0 || tm.tm_sec > 60) goto fmterr;
}
t = mktime(&tm);
if (t == (time_t)-1) goto fmterr;
t += (Config_time_delta*3600);
if (tmptr) {
struct tm *auxtm;
if ((auxtm = localtime(&t)) != NULL)
*tmptr = *auxtm;
}
return t;
fmterr: /* format error */
return (time_t) -1;
}
/* returns 1 if the given date is Saturday or Sunday.
* Zero is otherwise returned. */
int vi_is_weekend(char *s)
{
struct tm tm;
if (parse_date(s, &tm) != (time_t)-1) {
if (tm.tm_wday == 0 || tm.tm_wday == 6)
return 1;
}
return 0;
}
/* URL decoding and white spaces trimming function.
* Input: the encoded string 's'.
* Output: the decoded string written at 'd' that has room for at least 'n'
* bytes of data. */
void vi_urldecode(char *d, char *s, int n)
{
char *start = d;
if (n < 1) return;
while(*s && n > 1) {
int c = *s;
switch(c) {
case '+': c = ' '; break;
case '%':
if (*(s+1) && *(s+2)) {
int high = toupper(*(s+1));
int low = toupper(*(s+2));
if (high <= '9') high -= '0';
else high = (high - 'A') + 10;
if (low <= '9') low -= '0';
else low = (low - 'A') + 10;
c = (high << 4)+low;
s += 2;
}
break;
}
if (c != ' ' || d != start) {
*d++ = c;
n--;
}
s++;
}
/* Right trim */
*d = '\0';
d--;
while (d >= start && *d == ' ') {
*d = '\0';
d--;
}
}
/* URL encoding function
* Input: the unencoded string 's'.
* Output: the url-encoded string written at 'd' that has room for at least 'n'
* bytes of data. */
void vi_urlencode(char *d, char *s, int n)
{
if (n < 1) return;
n--;
while(*s && n > 0) {
int c = *s;
if ((c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z') ||
(c >= '0' && c <= '9'))
{
*d++ = c;
n--;
} else if (c == ' ') {
*d++ = '+';
n--;
} else if (c == '\n') {
if (n < 6) break;
memcpy(d, "%0d%0a", 6);
d += 6;
n -= 6;
} else {
unsigned int t;
char *hexset = "0123456789abcdf";
if (n < 3) break;
t = (unsigned) c;
*d++ = '%';
*d++ = hexset [(t & 0xF0) >> 4];
*d++ = hexset [(t & 0x0F)];
n -= 3;
}
s++;
}
*d = '\0';
}
/* Convert a nul-term string to lowercase in place */
void vi_strtolower(char *s)
{
while (*s) {
*s = tolower(*s);
s++;
}
}
/* Note: the following function strlcat and strlcpy are (possibly) modified
* version of OpenBSD's functions. Original copyright notice:
* Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
* Originally under the BSD license. */
int vi_strlcpy(char *dst, char *src, int siz)
{
char *d = dst;
const char *s = src;
int n = siz;
/* Copy as many bytes as will fit */
if (n != 0 && --n != 0) {
do {
if ((*d++ = *s++) == 0)
break;
} while (--n != 0);
}
/* Not enough room in dst, add NUL and traverse rest of src */
if (n == 0) {
if (siz != 0)
*d = '\0'; /* NUL-terminate dst */
while (*s++)
;
}
return(s - src - 1); /* count does not include NUL */
}
int vi_strlcat(char *dst, const char *src, int siz)
{
char *d = dst;
const char *s = src;
size_t n = siz;
size_t dlen;
/* Find the end of dst and adjust bytes left but don't go past end */
while (n-- != 0 && *d != '\0')
d++;
dlen = d - dst;
n = siz - dlen;
if (n == 0)
return(dlen + strlen(s));
while (*s != '\0') {
if (n != 1) {
*d++ = *s;
n--;
}
s++;
}
*d = '\0';
return(dlen + (s - src)); /* count does not include NUL */
}
/*-------------------------- visitors handler functions --------------------- */
/* Init the hashtable with methods suitable for an "occurrences counter" */
void vi_ht_init(struct hashtable *ht)
{
ht_init(ht);
ht_set_hash(ht, ht_hash_string);
ht_set_key_destructor(ht, ht_destructor_free);
ht_set_val_destructor(ht, ht_no_destructor);
ht_set_key_compare(ht, ht_compare_string);
}
/* Reset the weekday/hour info in the visitors handler. */
void vi_reset_weekday_hour(struct vih *vih)
{
int i, j;
for (i = 0; i < 24; i++) {
vih->hour[i] = 0;
for (j = 0; j < 7; j++)
vih->weekdayhour[j][i] = 0;
}
for (i = 0; i < 7; i++) vih->weekday[i] = 0;
}
/* Reset the hashtables from the handler, that are left
* in a reusable state (but all empty). */
void vi_reset_hashtables(struct vih *vih)
{
ht_destroy(&vih->visitors);
ht_destroy(&vih->googlevisitors);
ht_destroy(&vih->pages);
ht_destroy(&vih->images);
ht_destroy(&vih->error404);
ht_destroy(&vih->pageviews);
ht_destroy(&vih->pageviews_grouped);
ht_destroy(&vih->referers);
ht_destroy(&vih->referersage);
ht_destroy(&vih->agents);
ht_destroy(&vih->googled);
ht_destroy(&vih->googlekeyphrases);
ht_destroy(&vih->googlevisits);
ht_destroy(&vih->trails);
ht_destroy(&vih->tld);
ht_destroy(&vih->os);
ht_destroy(&vih->browsers);
ht_destroy(&vih->date);
ht_destroy(&vih->googledate);
}
/* Reset handler informations to support --reset option in
* stream mode. */
void vi_reset(struct vih *vih)
{
vi_reset_weekday_hour(vih);
vi_reset_hashtables(vih);
}
/* Return a new visitors handle.
* On out of memory NULL is returned.
* The handle obtained with this call must be released with vi_free()
* when no longer useful. */
struct vih *vi_new(void)
{
struct vih *vih;
if ((vih = malloc(sizeof(*vih))) == NULL)
return NULL;
/* Initialization */
vih->startt = vih->endt = time(NULL);
vih->processed = 0;
vih->invalid = 0;
vi_reset_weekday_hour(vih);
vih->error = NULL;
vi_ht_init(&vih->visitors);
vi_ht_init(&vih->googlevisitors);
vi_ht_init(&vih->pages);
vi_ht_init(&vih->images);
vi_ht_init(&vih->error404);
vi_ht_init(&vih->pageviews);
vi_ht_init(&vih->pageviews_grouped);
vi_ht_init(&vih->referers);
vi_ht_init(&vih->referersage);
vi_ht_init(&vih->agents);
vi_ht_init(&vih->googled);
vi_ht_init(&vih->googlevisits);
vi_ht_init(&vih->googlekeyphrases);
vi_ht_init(&vih->trails);
vi_ht_init(&vih->tld);
vi_ht_init(&vih->os);
vi_ht_init(&vih->browsers);
vi_ht_init(&vih->date);
vi_ht_init(&vih->googledate);
return vih;
}
/* Free an handle created with vi_new(). */
void vi_free(struct vih *vih)
{
if (!vih) return;
vi_reset_hashtables(vih);
vi_clear_error(vih);
free(vih);
}
/* Add a new entry in the counter hashtable. If the key does not
* exists creates a new entry with "1" as number of hits, otherwise
* increment the old value.
*
* Return the value of hits after the increment or creation. If the
* returned value is greater than one, the key was already seen.
*
* Return 0 on out of memory.
*
* NOTE: the pointer of the "value" part of the hashtable entry is
* used as a counter casting it to a "long" integer. */
int vi_counter_incr(struct hashtable *ht, char *key)
{
char *k;
unsigned int idx;
int r;
long val;
r = ht_search(ht, key, &idx);
if (r == HT_NOTFOUND) {
k = strdup(key);
if (k == NULL) return 0;
if (ht_add(ht, k, (void*)1) != HT_OK) {
free(k);
return 0;
}
return 1;
} else {
val = (long) ht_value(ht, idx);
val++;
ht_value(ht, idx) = (void*) val;
return val;
}
}
/* Similar to vi_counter_incr, but only read the old value of
* the counter without to alter it. If the specified key does not
* exists zero is returned. */
int vi_counter_val(struct hashtable *ht, char *key)
{
unsigned int idx;
int r;
long val;
r = ht_search(ht, key, &idx);
if (r == HT_NOTFOUND) {
return 0;
} else {
val = (long) ht_value(ht, idx);
return val;
}
}
/* Set a key/value pair inside the hash table with
* a create-else-replace semantic.
*
* Return non-zero on out of memory. */
int vi_replace(struct hashtable *ht, char *key, char *value)
{
char *k, *v;
k = strdup(key);
v = strdup(value);
if (!k || !v) goto err;
if (ht_replace(ht, k, v) != HT_OK)
goto err;
return 0;
err:
if (k) free(k);
if (v) free(v);
return 1;
}
/* Replace the time value of the given key with the new one if this
* is newer/older of the old one. If the key is new, it's just added
* to the hash table with the specified time as value.
*
* If the 'ifolder' flag is set, values are replaced with older one,
* otherwise with newer.
* This function is only used by wrappers replace_if_older() and
* replace_if_newer().
*
* Return 0 on success, non-zero on out of memory. */
int vi_replace_time(struct hashtable *ht, char *key, time_t time, int ifolder)
{
char *k = NULL;
unsigned int idx;
int r;
r = ht_search(ht, key, &idx);
if (r == HT_NOTFOUND) {
k = strdup(key);
if (!k) goto err;
if (ht_add(ht, k, (void*)time) != HT_OK) goto err;
} else {
time_t oldt = (time_t) ht_value(ht, idx);
/* Update the date if this one is older/nwer. */
if (ifolder) {
if (time < oldt)
ht_value(ht, idx) = (void*) time;
} else {
if (time > oldt)
ht_value(ht, idx) = (void*) time;
}
}
return 0;
err:
if (k) free(k);
return 1;
}
/* see vi_replace_time */
int vi_replace_if_older(struct hashtable *ht, char *key, time_t time)
{
return vi_replace_time(ht, key, time, 1);
}
/* see vi_replace_time */
int vi_replace_if_newer(struct hashtable *ht, char *key, time_t time)
{
return vi_replace_time(ht, key, time, 0);
}
/* Set an error in the visitors handle */
void vi_set_error(struct vih *vih, char *fmt, ...)
{
va_list ap;
char buf[VI_ERROR_MAX];
va_start(ap, fmt);
vsnprintf(buf, VI_ERROR_MAX, fmt, ap);
buf[VI_ERROR_MAX-1] = '\0';
free(vih->error);
vih->error = strdup(buf);
va_end(ap);
}
/* Get the error */
char *vi_get_error(struct vih *vih)
{
if (!vih->error) {
return "No error";
}
return vih->error;
}
/* Clear the error */
void vi_clear_error(struct vih *vih)
{
free(vih->error);
vih->error = NULL;
}
/*----------------------------------- parsing ----------------------------- */
/* Parse a line of log, and fill the logline structure with
* appropriate values. On error (bad line format) non-zero is returned. */
int vi_parse_line(struct logline *ll, char *l)
{
char *date, *hour, *timezone, *host, *agent, *req, *ref, *p;
char *agent_start = NULL, *req_end = NULL, *ref_end = NULL;
/* Seek the start of the different components */
/* host */
host = l;
/* date */
if ((date = strchr(l, '[')) == NULL) return 1;
date++;
/* agent */
if ((agent = strchr(l, '(')) == NULL) {
agent = "";
} else {
p = agent;
while (p >= l) {
if (*p == '"') {
agent_start = p;
break;
}
p--;
}
}
/* req */
if ((req = strstr(l, "\"GET")) != NULL ||
(req = strstr(l, "\"POST")) != NULL ||
(req = strstr(l, "\"HEAD")) != NULL ||
(req = strstr(l, "\"get")) != NULL ||
(req = strstr(l, "\"post")) != NULL ||
(req = strstr(l, "\"head")) != NULL)
{
req++;
} else {
req = "";
}
/* ref */
if ((ref = strstr(l, "\"http")) != NULL ||
(ref = strstr(l, "\"HTTP")) != NULL)
{
ref++;
} else {
ref = "";
}
/* Nul-term the components */
/* host */
if ((p = strchr(host, ' ')) == NULL) return 1;
*p = '\0';
/* date */
if ((p = strchr(date, ']')) == NULL) return 1;
*p = '\0';
ll->time = parse_date(date, &ll->tm);
if (ll->time == (time_t)-1) return 1;
/* hour */
if ((p = strchr(date, ':')) == NULL) return 1;
hour = p+1;
*p = '\0';
/* timezone */
if ((p = strchr(hour, ' ')) == NULL) return 1;
timezone = p+1;
*p = '\0';
/* req */
if ((p = strchr(req, '"')) == NULL) {
req = "";
} else {
req_end = p;
*p = '\0';
if ((p = strchr(req, ' ')) != NULL) {
req = p+1;
if ((p = strchr(req, ' ')) != NULL)
*p = '\0';
}
}
/* ref */
if ((p = strchr(ref, '"')) == NULL) {
ref = "";
} else {
ref_end = p;
*p = '\0';
}
/* agent */
if ((p = strchr(agent, ')')) == NULL) {
agent = "";
} else {
char *aux;
aux = strchr(p, '"');
if (aux)
*aux = '\0';
else
*(p+1) = '\0';
if (agent_start) {
if ((!req_end || (req_end != agent_start)) &&
(!ref_end || (ref_end != agent_start))) {
agent = agent_start+1;
}
}
}
/* Fill the struture */
ll->host = host;
ll->date = date;
ll->hour = hour;
ll->timezone = timezone;
ll->agent = agent;
ll->req = req;
ll->ref = ref;
return 0;
}
/* process the weekday and hour information */
void vi_process_date_and_hour(struct vih *vih, int weekday, int hour)
{
/* Note, the following sanity check is useless in theory. */
if (weekday < 0 || weekday > 6 || hour < 0 || hour > 23) return;
vih->weekday[weekday]++;
vih->hour[hour]++;
/* store the combined info. We always compute this information
* even if the report is disabled because it's cheap. */
vih->weekdayhour[weekday][hour]++;
}
/* Process unique visitors populating the relative hash table.
* Return non-zero on out of memory. This is also used to populate
* the hashtable used for the "pageviews per user" statistics. */
int vi_process_visitors_per_day(struct vih *vih, char *host, char *agent, char *date, char *ref, char *req)
{
char visday[VI_LINE_MAX], *p;
int res, host_len, agent_len, date_len;
host_len = strlen(host);
agent_len = strlen(agent);
date_len = strlen(date);
if (host_len+agent_len+date_len+4 > VI_LINE_MAX)
return 0;
p = visday;
memcpy(p, host, host_len); p += host_len;
*p++ = '|';
memcpy(p, date, date_len); p += date_len;
*p++ = '|';
memcpy(p, agent, agent_len); p += agent_len;
*p = '\0';
/* Visits with Google as referer are also stored in another hash
* table. */
if (vi_is_google_link(ref)) {
res = vi_counter_incr(&vih->googlevisitors, visday);
if (res == 0) return 1; /* out of memory */
if (res == 1) { /* new visit! */
res = vi_counter_incr(&vih->googledate, date);
if (res == 0) return 1; /* out of memory */
}
}
/* Populate the 'pageviews per visitor' hash table */
if (Config_process_pageviews && vi_is_pageview(req)) {
res = vi_counter_incr(&vih->pageviews, visday);
if (res == 0) return 1; /* out of memory */
}
/* Mark the visit in the non-google-specific hashtable */
res = vi_counter_incr(&vih->visitors, visday);
if (res == 0) return 1; /* out of memory */
if (res > 1) return 0; /* visit alredy seen. */
res = vi_counter_incr(&vih->date, date);
if (res == 0) return 1;
return 0;
}
/* Process referers populating the relative hash tables.
* Return non-zero on out of memory. */
int vi_process_referer(struct vih *vih, char *ref, time_t age)
{
int res;
/* Don't count internal referer (specified by the user
* using --prefix options), nor google referers. */
if (vi_is_internal_link(ref))
return !vi_counter_incr(&vih->referers, "Internal Link");
if (vi_is_google_link(ref))
return !vi_counter_incr(&vih->referers, "Google Search Engine");
res = vi_counter_incr(&vih->referers, ref);
if (res == 0) return 1;
/* Process the referers age if enabled */
if (Config_process_referers_age) {
if (vi_replace_if_older(&vih->referersage, ref, age)) return 1;
}
return 0;
}
/* Process requested URLs. Split the entries in two hash tables,
* one for pages and one for images.
* Return non-zero on out of memory. */
int vi_process_page_request(struct vih *vih, char *url)
{
int res;
char urldecoded[VI_LINE_MAX];
vi_urldecode(urldecoded, url, VI_LINE_MAX);
if (vi_is_image(url))
res = vi_counter_incr(&vih->images, urldecoded);
else
res = vi_counter_incr(&vih->pages, urldecoded);
if (res == 0) return 1;
return 0;
}
/* Process log lines for 404 errors report. */
int vi_process_error404(struct vih *vih, char *l, char *url)
{
char urldecoded[VI_LINE_MAX];
vi_urldecode(urldecoded, url, VI_LINE_MAX);
if (strstr(l, " 404 ") && !strstr(l, " 200 "))
return !vi_counter_incr(&vih->error404, urldecoded);
return 0;
}
/* Process agents populating the relative hash table.
* Return non-zero on out of memory. */
int vi_process_agents(struct vih *vih, char *agent)
{
int res;
res = vi_counter_incr(&vih->agents, agent);
if (res == 0) return 1;
return 0;
}
/* Match the list of keywords 't' against the string 's', and if
* a match is found increment the matching keyword in the hashtable.
* Return zero on success, non-zero on out of memory . */
int vi_counter_incr_matchtable(struct hashtable *ht, char *s, char **t)
{
while(*t) {
int res;
if ((*t)[0] == '\0' || strstr(s, *t) != NULL) {
char *key = *(t+1) ? *(t+1) : *t;
res = vi_counter_incr(ht, key);
if (res == 0) return 1;
return 0;
}
t += 2;
}
return 0;
}
/* Process Operating Systems populating the relative hash table.
* Return non-zero on out of memory. */
int vi_process_os(struct vih *vih, char *agent)
{
/* Order may matter. */
char *oslist[] = {
"Windows", NULL,
"Win98", "Windows",
"Win95", "Windows&