#!/usr/bin/perl # Source code to pika.cgi, minus the part that translates. # You'll want to change all instances of eye-of-newt.com in this # script to whatever your server is. # http://pikachize.eye-of-newt.com # Copyright 2001-2003 Aneel Nazareth use LWP::UserAgent; use CGI qw(param); # This doesn't really do what I'd hoped use POSIX 'locale_h'; setlocale(LC_CTYPE, 'en_US.iso-8859-1'); use locale; # takes a single argument, a word. sub pikachize_word { # this piece is what separates the pikachizer from a generic translator # since I have no interest in seeing the pikachizer duplicated, but # I'm happy to facilitate other translators, I've replaced this # with a trivial transformation return uc(shift); } # takes two arguments: text to be excited, and a numeric excitement level sub excite_word { my $text = shift || ''; my $excitement = shift || 0; # perturb this a bit, for variety $excitement += int(rand(3)-1); # warn "exciting '$text' by $excitement\n"; if ($excitement < 1) { return $text; } if ($excitement < 2) { return ucfirst($text); } if ($excitement < 3) { return uc($text); } return uc($text) . "!" x ($excitement - 2); } # takes two arguments: text to be pikachized, and a numeric excitement level sub pikachize { my $text = shift || ''; my $excitement = shift || 0; $text =~ s/\w+/excite_word(pikachize_word($&), $excitement)/ge; return $text; } # --------------------------------------------------------------------------- # The body of the script... # --------------------------------------------------------------------------- $debug = 0; $start = time; $url = param('url'); if ($url) { # this serves the dual purpose of fixing urls and # preventing the user from entering file:/ urls # that return files on the local system if ($url !~ /^http:\/\//i) { $url = 'http://' . $url; } #warn "prequash url: $url\n"; # ignore multiple invocations of this script $url =~ s{http://pika[^\.]*\.eye-of-newt\.com/pika\.cgi\?url=}{}g; #warn "url: $url\n"; open(LOG, ">>/var/log/pikachizer.log"); print LOG "$$ \t". time . " \t$url \t" . length($content) . " \t" . (time - $start) . " \tStarting...\n"; close(LOG); $ua = LWP::UserAgent->new(timeout => 30, requests_redirectable => []); $response = $ua->head($url); $content_type = $response->content_type(); $content_length = $response->content_length(); if ($content_type && $content_type !~ /^text/i) { # don't use my upstream bandwidth for this... print "Location: $url\n\n"; # print "Content-type: $content_type\n\n$content"; open(LOG, ">>/var/log/pikachizer.log"); print LOG "$$ \t". time . " \t$url \t" . length($content) . " \t" . (time - $start) . " \tLocation\n"; close(LOG); exit(0); } if ($content_type && $content_type =~ m/charset=([a-zA-Z0-9-]+)/) { setlocale(LC_CTYPE, "en_US.$1"); } warn "locale now: " . setlocale(LC_CTYPE); if ($content_length > 500000) { print "Content-type: text/html\n\n"; print "

Content Too Large

This document $content_length bytes, which is larger than most reasonable web pages. Are you trying to Pikachize an MP3 or something?\n"; open(LOG, ">>/var/log/pikachizer.log"); print LOG "$$ \t". time . " \t$url \t" . length($content) . " \t" . (time - $start) . " \tError: Too Large\n"; close(LOG); exit(0); } unless (defined ($content = $ua->get($url)->content())) { unless (defined ($content = $ua->get($url)->content())) { unless (defined ($content = $ua->get("$url/")->content())) { print "Content-type: text/html\n\n"; print "

URL Error

Couldn't get contents of '$url'\n"; open(LOG, ">>/var/log/pikachizer.log"); print LOG "$$ \t". time . " \t$url \t" . length($content) . " \t" . (time - $start) . " \tERROR: Couldn't fetch\n"; close(LOG); exit(0); } } } $url =~ m/(.*[^:\/])\//; $dir = $1 || $url; # hopefully, this is the url, up to the last slash.. $dir =~ s/\/?$/\//; # make sure dir ends with a / $dir =~ s%/([^/]*)%"/" . CGI::escape($1)%ge; $url =~ m/((\/\/|[^\/])*)/; # hopefully, this is the url, up to $server = $1; # the first slash after :// $server =~ s/\/?$/\//; # make sure server ends with a / warn "dir=$dir\n" if $debug; warn "server=$server\n" if $debug; # We need to test for (illegal) base tags that do not contain HREFs if ($content !~ m/]*HREF/i) { warn "Didn't find real base tag\n" if $debug; if ($content !~ s///ig) { warn "Didn't find head tag\n" if $debug; $content = "\n" . $content; } } } else { $content = param('text'); } print "Content-type: text/html\n\n"; if (!$content) { print "

Content Error

There is nothing to pikachize!\n"; open(LOG, ">>/var/log/pikachizer.log"); print LOG "$$ \t". time . " \t$url \t" . length($content) . " \t" . (time - $start) . "ERROR: No content\n"; close(LOG); exit(0); } if (!$url) { $content = '

' . $content . '

'; } $output = ''; $excitement = 0; while ($content ne '') { if ($content =~ m/^<(b|bold|i|italic|strong|em)>/i) { # warn "found $&. increasing excitement\n"; $excitement++; } if ($content =~ m/^<\/(b|bold|i|italic|strong|em)>/i) { # warn "found $&. decreasing excitement\n"; $excitement--; } # Escape anything between two SCRIPT tags (don't pikachize JavaScript!) if ($content =~ s/^]*>(.|\n)*?<\/SCRIPT[^>]*>//i) { warn "found whole script\n" if $debug; $output .= $&; print $&; } # Escape anything between two STYLE tags (don't pikachize CSS!) elsif ($content =~ s/^]*>(.|\n)*?<\/STYLE[^>]*>//i) { $output .= $&; print $&; } # Escape anything between two NOPIKA tags elsif ($content =~ s/^]*>(.|\n)*?<\/NOPIKA[^>]*>//i) { $output .= $&; print $&; } # Check for a meta tag describing the language elsif ($content =~ s/^]*>//i) { setlocale(LC_CTYPE, "en_US.$1"); } # Otherwise, pikachize everything that's not a tag... elsif ($content =~ s/^<[^>]*>//) { $tag = $&; if ($tag =~ /href|src/i) { if ($tag !~ /^<((BASE)|(LINK)|(IMG))/i && $tag !~ /pika=["']?no["']?/i) { $newurl = ""; # Encode hrefs to go through this script #$newurl .= #"http://$ENV{'SERVER_NAME'}$ENV{'SCRIPT_NAME'}?url="; $newurl .= 'http://pikachize.eye-of-newt.com/pika.cgi?url='; # if this is a mailto link, skip it if ($tag !~ /href="?mailto:/i) { # if this is an absolute url (contains ':/'... not a good # metric, I know) wrap it if ($tag !~ s/(href|src)(=["']?)([^'"\W]*:\/)/$1$2$newurl$3/i) { # if it starts with a /, prepend server, # otherwise add $dir if ($tag !~ s/((href|src)=["']?)\//$1$newurl$server/i) { $tag =~ s/((href|src)=["']?)(\.\/)?/$1$newurl$dir/i; } } } $tag =~ s/\/[^\/]+\/\.\.\//\//g; } } # pikachize alt tags $tag =~ s/^(