#!/usr/bin/perl -w # # parse: "http://www.google.com/search?hl=en&lr=&q=gadamer+truth+and+method" # parse: "http://www.google.com/search?hl=en&lr=&q=gadamer+truth+and+method&" # parse: "http://www.google.com/search?as_q=trastevere" # parse: "http://www.google.com/search?as_q=trastevere&" # and get the search query # # also parse yahoo # parse "http://search.yahoo.com/search?p=gadamer+truth+and+method&" # # uri_escape($val) == urlencode($val) # uri_unescape($val) == urldecode($val) # # qParser.pl by barce [at] cyphgen.com # (cc) 2.5 # use URI::Escape; use strict; my $all = 0; my @files = qw(); # change these my $s_dir = "/Users/barce/pl/logs"; my $s_file = "cyphgen.com.access_log"; if ($#ARGV == 0) { $s_dir = $ARGV[0]; } if ($#ARGV == 1) { $s_dir = $ARGV[0]; $s_file = $ARGV[1]; } opendir(LS, ".") || die "no . directory: $!\n"; foreach my $name (sort readdir(LS)) { if ($name =~ /access_log/) { if ($#ARGV == 1) { push @files, $name if $name eq $s_file; } else { push @files, $name; } } } closedir(LS); foreach (@files) { parse_file($_); } sub parse_file { my $s_file = shift; my $s_query = ""; my $i_line = 1; my $s_ip = ""; open(IN, "$s_file") || die "cannot open $s_file: $!\n"; while() { if ($_ =~ /([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})\s+/) { $s_ip = $1; } if ($_ =~ /google\..*?\/search.*q=(.*?)["&]/) { $s_query = $1; $s_query = uri_unescape($s_query); $s_query =~ s/\+/ /g; $s_file =~ s/.*log\.(.*)/$1/; print "$s_file: google $s_ip (" . $s_query . ")\n" if length $s_query > 0; } # handle: &as_epq & as_q if ($_ =~ /google\..*?\/search.*as_(epq|q)=(.*?)["&]/) { $s_query = $2; $s_query = uri_unescape($s_query); $s_query =~ s/\+/ /g; $s_file =~ s/.*log\.(.*)/$1/; print "$s_file: google $s_ip (" . $s_query . ")\n"; } if ($_ =~ /yahoo\..*?\/search.*\?p=(.*?)["&]/) { $s_query = $1; $s_query = uri_unescape($s_query); $s_query =~ s/\+/ /g; $s_file =~ s/.*log\.(.*)/$1/; print "$s_file: yahoo $s_ip (" . $s_query . ")\n"; } $s_query = ""; $s_ip = ""; } close(IN) || die "cannot close $s_file: $!\n"; }