#!/usr/bin/perl -w

use strict;
use IO::File;
use POSIX qw(strftime);
require LWP::UserAgent;

# Utility Functions
my $ua = LWP::UserAgent->new();

sub get_page {
  my ($url, $file_to_store) = @_;
  my $request = HTTP::Request->new("GET", $url);
  my $response = $ua->request($request, $file_to_store);

  if ($response->is_success) {
    return $response->content();
  } else {
    print $response->error_as_HTML;
    return 0;
  }
}


my %dilbert = ( 'url' => 'http://www.dilbert.com' );
my %ucomics = ( 'url' => 'http://www.ucomics.com' );

sub date_str_YYMMDD {
	return strftime "%y%m%d", localtime;
}
	
# A parser returns (url of the strip, url of the img) for a comic

my %alt_name = ( 'Dilbert' => 'Dilbert.com' );

sub dilbert_parser {
	my ($comic) = @_;
	defined($alt_name{$comic}) and $comic = $alt_name{$comic};

	my ($url) = $dilbert{page} =~ /OPTION VALUE="(\S*)">\s*$comic/i;
	$url or return;
	#print "XXX $url\n";
	$url = $dilbert{url} . $url if $url !~ /^http/i;
	my $buf = get_page($url);
	$buf =~ /IMG\s*SRC=\S*["'](\S*\d\d\d\d\d\d\d+\.(?:gif|jpg))["'][^>]*alt/i 
									or return;
	my $img = $1;
	$img = $dilbert{url} . $img if $img !~ /^http/i;
	return ($url, $img);
}

my %ucomics_abbr = ( 'Garfield' => 'ga',
                     'FoxTrot' => 'ft',
                     'The Boondocks' => 'bo',
                     'Calvin & Hobbes' => 'ch');

sub ucomics_parser {
	my ($comic) = @_;
	my ($url) = $ucomics{page} =~ /OPTION VALUE="(\S*)">\s*$comic/i;
	$url or return;
	$url = $ucomics{url} . $url if $url !~ /^http/i;
	my $abbr = $ucomics_abbr{$comic};
	if ($abbr) {
	    return ($url, strftime("http://images.ucomics.com/comics/$abbr/".
				   "%Y/$abbr%y%m%d.gif", localtime));
	}
	my $buf = get_page($url);
        $buf =~ /IMG\s*SRC=\S*"(\S*\/\S+\d\d\d+\.(?:gif|jpg))/i or return;
	return ($url, $1);
} 

#info for images that have fixed structure. No need for external info.
my %fixed_image_info = (
	'Sherman\'s Lagoon' => { 
		'url' => 'http://cgibin.rcn.com/fillmore.dnai/cgi-bin/sviewer.pl',
		'prefix' => 'http://www.slagoon.com/dailies/SL',
		'suffix' => '.gif',
		'variable' => \&date_str_YYMMDD
	},
	#include Jaimie Hollenback's comic as he requested:
	'Kim & Jason' => {
		'url' => 'http://www.kimandjason.com',
		'prefix' => 'http://www.kimandjason.com/stuff/kj_today.gif'
	}		
); 		 		

sub fixed_url {
	my ($comic) = @_;
	my $info = $fixed_image_info{$comic} or return;
	my $fn = $$info{variable};
	my $img = "";
	$$info{prefix} and $img .= $$info{prefix};
	$fn and $img .= &$fn($comic, $info);
	$$info{suffix} and $img .= $$info{suffix};
	return ($$info{url}, $img);
}
	
my @sites = ( \%dilbert, \%ucomics );

my @comics = ('Dilbert' 	=> \&dilbert_parser,
	      'Garfield' 	=> \&ucomics_parser,
	      'Peanuts' 	=> \&dilbert_parser,
	      'Betty'  		=> \&dilbert_parser,
	      'Nancy' 		=> \&dilbert_parser,
	      'Calvin & Hobbes'	=> \&ucomics_parser,
	      'FoxTrot'	        => \&ucomics_parser,
	      'Get Fuzzy'	=> \&dilbert_parser,
	      'Frank and Ernest'=> \&dilbert_parser,
	      'Ziggy' 		=> \&ucomics_parser,
	      'Herman'		=> \&dilbert_parser,
	      'Non Sequitur'	=> \&ucomics_parser,
	      'Reality Check'	=> \&dilbert_parser,
	      'Pickles'		=> \&dilbert_parser,
	      'For Better or For Worse'	=> \&ucomics_parser,
	      'Sherman\'s Lagoon'=> \&fixed_url, 
	      'The Boondocks'	=> \&ucomics_parser,
	      'B.C.'		=> \&dilbert_parser,
	      'PC and Pixel'	=> \&dilbert_parser,
	      'Pearls Before Swine' => \&dilbert_parser,
	      'The Born Loser'	=> \&dilbert_parser,
	      'Doonesbury'	=> \&ucomics_parser,
	      'Wizard of Id'	=> \&dilbert_parser,
	      'Shoe'		=> \&ucomics_parser,
	      'Cathy'		=> \&ucomics_parser,
	      'Speed Bump'	=> \&dilbert_parser,
	      'Jump Start'	=> \&dilbert_parser,
	      'Kim & Jason'     => \&fixed_url
	     );

foreach my $site (@sites) {
  $site->{page} = get_page($site->{url})
    or $site->{page} = "NONE\n";
}

chdir(($^O =~ /Win32/) ? "d:/public_html" : "/home/public_html") or 
					die "Could not chdir: $!";

my $time = time();

#check for yesterdays page
my $date = strftime("%m%d%Y", localtime($time));


my $html_str = "";
sub output {
  $html_str .= "@_";	
}

my $now = localtime($time);
output "<html> <title> Comic Strips</title> <body bgcolor=white>".
  "<meta http-equiv=\"expires\" content=\"Sun, 13 Aug 2000 16:30:00 PDT\">".
  "<br> <br>".
  "<center><large><b> Comic Strips</b><large> (This page: ".
  "$now PDT)".
  "<br></center>\n".
  "<br><table width=100% cellpadding=0 cellspacing=0 id=\"messagetable\">
<tr><td width=10%><td><b> Uclick.com has asked me not to show images here. 
I am just providing direct links to the strips. Aditya has created a 
<a href=\"http://userscripts.org/scripts/show/5815\">Firefox script</a> that 
loads the images. He also has a related 
<a href=\"http://abaditya.wordpress.com/2006/10/05/daily-comics-on-the-net/\">blog entry</a>. The instructions are located on the script page.
Soon this message will move to the bottom and could be hidden 
when the images are loaded by the script.<td width=10%></table><br>".
"<table width=100\% cellpadding=0 cellspacing=0>";

my @missing_comics = ();

for (my $i=0; $i<@comics/2; $i++) {
  my ($comic, $fn) = ($comics[$i*2], $comics[$i*2+1]);

  push @missing_comics, $comic;

  my ($url, $img) = &$fn($comic);
  
  $img or next;

  #print "XXXX: $url $img\n";
	
  output "<tr> <td width=10\%> </td> <td> ".
    "<STRONG> <a href=\"$url\">$comic</a> </strong> ".
      " : <a href=\"$img\" alt=\"$comic\" border=0> Image </a>".
	"<br><br></td> </tr>\n";

  pop @missing_comics;
}

output "</table> <br> <br> ".
  ((@missing_comics) ? "Missing: @missing_comics <br>\n" : '');

output 'I love <a href="http://www.gocomics.com/calvinandhobbes/">Calvin & Hobbes</a>. It will be back soon after my vacation is over. Please hold on. <br>';

output "<br> ".
  "<em> <a href=\"http://www.angadi.org/\">Raghu Angadi</a> </em>".
  "</body> </html>\n";

open(OUT, "> comics.html") or die "Could not open comics.html: $!";
print OUT $html_str;


