I want a list of the unique IP addresses for the machines that have requested pages from a specific directory of a Web application.
// pull out the requests for just this directory
grep DIRECTORY/request_log > DIRECTORY/request_log
// remove all hits to non-pages (e.g., graphics, CSS, JS, etc)
cat DIRECTORY/request_log | grep -v "\.gif" | grep -v "\.js" | grep -v "\.jpg" | grep -v "\.css" > DIRECTORY/page_hit_log
// pull out the IPs, sort them file and remove duplicates