#!/usr/bin/perl -w # Australian TV Guide XMLTV grabber by Damon Searle # Derived from a yahoo XMLTV grabber by Ron Kellam which was itself... # Derived from original code by Justin Hawkins # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # 30 Oct 2004 # Damon Searle # - wrote first version # - gets data from NineMSN as a backup. Its not that fancy, # 31 Oct 2004 # Fred Donelly # - added an option so that the output file can be specified on the # command line and from the quick test I gave it, it now works with # mythfilldatabase. # - $offset set to +1000 at the top and then had "+1000" set in a # output string further down rather than the variable # 4 Nov 2004 # Paul Andreassen # - learned some perl and now wants to go back to python # - added and then reduced status info # - retry on failure to getstore # - changed cache to '/var/local/tv_grab_au' # - added threading for each day # 5 Nov 2004 # - improved threading with use of queue # Eyal Lebedinsky # - easier location selection # 8 Nov 2004 # Paul # - fixed pid=0 bug # - did some merging, I hate merging # 9 Nov 2004 # Rob Hill # - added Sydney # 10 Nov 2004 # Mary Wright # - digital info for Sydney # Paul # - more cleanup and improved error checking # - used mirror instead of getstore to get any updates # - mirror didn't work replaced with own smarts to check for updates to times use strict; use Getopt::Long; use XMLTV; use LWP::Simple; use Date::Manip; use File::Path; use threads; use Thread::Queue; # Instructions: # Select your region and source. # If your location isn't listed below, go to # http://tvguide.ninemsn.com.au/guide/ select your area # look at the last number in the URL before ".asp" and set # the region variable below. Then put the channel names as listed # on the tv guide site into the variables below. # Then set your XMLTV ids from the database in the _XMLTVID variables. # # If it doesn't work with mythfilldatabase, try: # ./tv_grab_au # mythfilldatabase --file 1 -1 /var/local/tv_grab_au/guide.xml # pick your region # my $location = "Brisbane"; #my $location = "Canberra"; #my $location = "Sydney"; # pick your source # my $source = "free"; #my $source = "freesd"; #my $source = "freehd"; # choose the XMLID URL suffix that mythtv knows # my $XMLTVID_URL = "d1.com.au"; # change to how you think it should work my $days_to_grab = 7; my $threads = 5; my $retrys = 3; my $secondsbeforeretry = 2; # Variables my $guide_url = "http://tvguide.ninemsn.com.au/guide/"; my $details_url = "http://tvguide.ninemsn.com.au/closeup/default.asp?pid="; my $cache_dir = "/var/local/tv_grab_au"; my $XMLTV_prefix = $source . "." . $location . "."; my $XMLTV_suffix = "." . $XMLTVID_URL; my $region; my $offset; my $ABC; my $Prime; my $SBS; my $Ten; my $WIN; my $ABC_XMLTVID; my $Prime_XMLTVID; my $SBS_XMLTVID; my $Ten_XMLTVID; my $WIN_XMLTVID; if ("Canberra" eq $location) { $region = "126"; $offset = "+1100"; $ABC = "ABC NSW"; $Prime = "Prime Southern"; #Channel 7 in Sydney/Melbourne/etc $SBS = "SBS Sydney"; $Ten = "Southern Cross TEN Capital"; $WIN = "WIN Television NSW"; #Channel 9 in Sydney/Melbourne/etc if ("free" eq $source) { $ABC_XMLTVID = $XMLTV_prefix . "2" . $XMLTV_suffix; $Prime_XMLTVID = $XMLTV_prefix . "PrimS" . $XMLTV_suffix; $SBS_XMLTVID = $XMLTV_prefix . "SBS" . $XMLTV_suffix; $Ten_XMLTVID = $XMLTV_prefix . "10Cap" . $XMLTV_suffix; $WIN_XMLTVID = $XMLTV_prefix . "WIN" . $XMLTV_suffix; } elsif ("freesd" eq $source) { $ABC_XMLTVID = $XMLTV_prefix . "2" . $XMLTV_suffix; $Prime_XMLTVID = $XMLTV_prefix . "7" . $XMLTV_suffix; $SBS_XMLTVID = $XMLTV_prefix . "SBS" . $XMLTV_suffix; $Ten_XMLTVID = $XMLTV_prefix . "10" . $XMLTV_suffix; $WIN_XMLTVID = $XMLTV_prefix . "9" . $XMLTV_suffix; } else { print "Unknows source '$source' for $location\n"; exit (1); } } elsif ("Brisbane" eq $location) { $region = "79"; $offset = "+1000"; $ABC = "ABC QLD"; $Prime = "Channel Seven Queensland"; $SBS = "SBS Queensland"; $Ten = "Southern Cross TEN Queensland"; $WIN = "WIN Television QLD"; if ("free" eq $source) { $ABC_XMLTVID = $XMLTV_prefix . "2" . $XMLTV_suffix; $Prime_XMLTVID = $XMLTV_prefix . "7" . $XMLTV_suffix; $SBS_XMLTVID = $XMLTV_prefix . "SBS" . $XMLTV_suffix; $Ten_XMLTVID = $XMLTV_prefix . "10" . $XMLTV_suffix; $WIN_XMLTVID = $XMLTV_prefix . "9" . $XMLTV_suffix; } else { print "Unknows source '$source' for $location\n"; exit (1); } } elsif ("Sydney" eq $location) { $region = "73"; $offset = "+1000"; $ABC = "ABC NSW"; $Prime = "Channel Seven Sydney"; $SBS = "SBS Sydney"; $Ten = "Network TEN Sydney"; $WIN = "Channel Nine Sydney"; if ("free" eq $source) { $ABC_XMLTVID = $XMLTV_prefix . "2" . $XMLTV_suffix; $Prime_XMLTVID = $XMLTV_prefix . "7" . $XMLTV_suffix; $SBS_XMLTVID = $XMLTV_prefix . "SBS" . $XMLTV_suffix; $Ten_XMLTVID = $XMLTV_prefix . "10" . $XMLTV_suffix; $WIN_XMLTVID = $XMLTV_prefix . "9" . $XMLTV_suffix; } elsif ("freesd" eq $source) { $ABC_XMLTVID = $XMLTV_prefix . "2" . $XMLTV_suffix; $Prime_XMLTVID = $XMLTV_prefix . "7" . $XMLTV_suffix; $SBS_XMLTVID = $XMLTV_prefix . "SBS" . $XMLTV_suffix; $Ten_XMLTVID = $XMLTV_prefix . "10" . $XMLTV_suffix; $WIN_XMLTVID = $XMLTV_prefix . "9" . $XMLTV_suffix; } else { print "Unknows source '$source' for $location\n"; exit (1); } } else { print "Unknows location '$location'\n"; exit (1); } my $prog_ref; my $chan_ref; $$chan_ref{$ABC} = { 'id' => $ABC_XMLTVID, 'display-name' => [ [ $ABC, undef ]]}; $$chan_ref{$Prime} = { 'id' => $Prime_XMLTVID, 'display-name' => [ [ $Prime, undef ]]}; $$chan_ref{$SBS} = { 'id' => $SBS_XMLTVID, 'display-name' => [ [ $SBS, undef ]]}; $$chan_ref{$Ten} = { 'id' => $Ten_XMLTVID, 'display-name' => [ [ $Ten, undef ]]}; $$chan_ref{$WIN} = { 'id' => $WIN_XMLTVID, 'display-name' => [ [ $WIN, undef ]]}; # Options my $opt_days; my $opt_output; my $opt_config; GetOptions('days=i' => \$opt_days, 'output=s' => \$opt_output, 'config=s' => \$opt_config, ); if ($opt_days) { $days_to_grab = $opt_days } if (!($opt_output)) { $opt_output = $cache_dir . "/guide.xml"; } # $opt_config should probably do something print "grabing $days_to_grab days into $opt_output\n"; print "starting $threads threads\n"; my @thrlist; my $datepids = Thread::Queue->new; for (my $thread=0; $thread<$threads; $thread++) { push @thrlist, threads->new(\&fetch_details); } print "loading queue\n"; my $currentday = &ParseDate("today"); my $day_counter = 1; while ($day_counter <= $days_to_grab) { my $date = &UnixDate($currentday, "%d%m%Y"); my @day_lines = get_day($date,1); if (@day_lines == 0) { $currentday = &DateCalc($currentday, "+ 1 day"); $day_counter++; next; } my @pids; my @rowspans; foreach my $line (@day_lines) { foreach my $link (split /\n|tr|TR|TD|tr/, $line ) { if ($link =~ /closeup\/default.asp/) { my $rowspan = $link; $rowspan =~ s/.+rowspan=//g; $rowspan =~ s/ .+//g; $link =~ s/.+pid=//g; $link =~ s/".+//g; if (($rowspan =~ /\d+/) and ($link =~ /\d\d+/)) { push @pids, $link; push @rowspans, $rowspan; } } } } if (!same_guide($date,@pids,@rowspans)) { for (my $count=0; $count <= $#pids; $count++) { $datepids->enqueue($date . "-" . $pids[$count]); } } $day_counter++; $currentday = &DateCalc($currentday, "+ 1 day"); } for (my $thread=0; $thread<$threads; $thread++) { $datepids->enqueue(0 . "-" . 0); } print "queue is complete\n"; foreach my $thr (@thrlist) { $thr->join; } print "all threads done\n"; print "building xml structure\n"; $currentday = &ParseDate("today"); $day_counter = 1; while ($day_counter <= $days_to_grab) { my @pids; my $date = &UnixDate($currentday, "%d%m%Y"); my $guide_pr_file = $cache_dir . "/" . $date . "/guide.pr"; if (open(PR, $guide_pr_file)) { my @pr = split / /, ; close(PR); if ($#pr != 0) { my $pidcount = ($#pr - 1)/2; @pids=@pr[0..$pidcount]; } else { @pids=(); } } else { print "can't read $guide_pr_file\n"; $currentday = &DateCalc($currentday, "+ 1 day"); $day_counter++; next; } my $retry = 0; foreach my $pid (@pids) { my @details = get_details($date, $pid); if (@details == 0) { next; } my $show_details_table = ""; my $use_line = 0; foreach my $line (@details) { if ($line =~ /bgColor=#f7f3e8/) { $use_line = 0; } if ($use_line == 1) { $show_details_table .= $line; } if ($line =~ /bgcolor=#ffffff/) { $use_line = 1; } } $show_details_table =~ s/<[^>]*>/\n/g; $show_details_table =~ s/\ \;//g; #$show_details_table =~ s/
||<\/B><\/b>/\n/g; #$show_details_table =~ s/Genre://g; #$show_details_table =~ s/Rated:/\n/g; my $count = 0; my $channel = ""; my $start_date = &UnixDate($currentday, "%Y-%m-%d"); my $time; my $title1 = ""; my $title2 = ""; my $genre = ""; my $descr = ""; my $details = ""; my $duration; #print $show_details_table. "\n\n\n"; foreach my $line (split /\n/, $show_details_table) { if ($count == 4){ #print "Time: " . $line . "\n"; $time = $line; } elsif ($count == 7){ $channel = $line; #print "Channel: " . $line . "\n"; } elsif ($count == 19){ $title1 = $line; #print "Program: " . $line . "\n"; } elsif ($count == 20){ $line =~ s/ - //g; $title2 = $line; #print "Subtitle: " . $line . "\n"; } elsif ($count == 21){ $line =~ s/\D//g; $duration = $line; #print "Run time: " . $line . "\n"; } elsif ($count == 22){ $line =~ s/[^A-Z]//g; $details = $line; #print "Rating: " . $line . "\n"; } elsif ($count == 26){ $line =~ s/ //g; $genre = $line; #print "Genre: " . $line . "\n"; } elsif ($count == 28 && $line =~ /[a-zA-Z]/){ $descr = $line; #print "Description: " . $line . "\n"; } #elsif ($count == 26 && $line =~ /[a-zA-Z]/){ # $descr = $line; # print "Description: " . $line . "\n"; #} #print $count .": " . $line . "\n"; ++$count; } if ($count < 21) { my $name = $cache_dir . "/" . $date . "/" . $pid . ".html"; if ($retry++ >= $retrys) { print "Giving up on $name\n"; $retry=0; next; } print "$name is too short, removing and trying again\n"; unlink $name; push @pids, $pid; next; } my $start_time = &UnixDate($time, "%H:%M"); # my $start_datetime = $start_date . " " . $start_time; if ($start_time =~ /00:|01:|02:|03:|04:|05:/) { $start_date = &DateCalc($start_date, "+ 1 day"); } $start_date = &UnixDate($start_date, "%Y%m%d"); my $end_time = &DateCalc($start_time, " + " . $duration . "minutes"); $end_time = &UnixDate($end_time, "%H:%M"); my $end_date; if (&Date_Cmp($start_time, $end_time) <= 0) { $end_date = $start_date; } else { my $err; my $edate = &DateCalc($start_date, "+ 1 day", \$err); $end_date = &UnixDate($edate, "%Y%m%d"); } if ($channel =~ /$ABC/) { $channel = $ABC_XMLTVID; } elsif ($channel =~ /$Prime/) { $channel = $Prime_XMLTVID; } elsif ($channel =~ /$SBS/) { $channel = $SBS_XMLTVID; } elsif ($channel =~ /$Ten/) { $channel = $Ten_XMLTVID; } elsif ($channel =~ /$WIN/) { $channel = $WIN_XMLTVID; } my $start; my $stop; $start = $start_date . &UnixDate($start_time,"%H%M") . "00 " . $offset; $stop = $end_date . &UnixDate($end_time,"%H%M") . "00 " . $offset; my $a_prog = { channel => $channel, start => $start, stop => $stop, title => [ [ $title1, undef ] ] }; $descr =~ s/^\s+//; $descr =~ s/\s+$//; if ($title2) { $$a_prog{'sub-title'} = [ [ $title2, undef ] ]; } if ($descr) { $$a_prog{desc} = [ [ $descr, undef ] ]; } if ($genre) { $$a_prog{category} = [ [ $genre, undef ] ]; } push @$prog_ref, $a_prog; } $currentday = &DateCalc($currentday, "+ 1 day"); $day_counter++; } my $data = [ 'ISO-8859-1', { 'source-info-name' => 'http://tvguide.ninemsn.com.au/', 'generator-info-name' => 'NineMSN grabber', 'generator-info-url' => '', 'generator-info-name' => "XMLTV - tv_grab_au NineMSN v0.2" }, $chan_ref, $prog_ref ]; my $hour=&UnixDate(&ParseDate("now"),"%H"); if ($hour < 6) { print "can't update between 0:00 and 6:00\n"; # If we update between these hours we lose any data we had up to 6:00. # This is because the web site starts a day at 6:00 and ends at 6:00 the next day # This could be fixed by read the previous days info and adding the needed shows. # I did try adding the whole previous day but got lots of mythfilldatabase errors. exit(1); } print "writing file\n"; my $fh = new IO::File ">$opt_output"; XMLTV::write_data($data, OUTPUT=>$fh); print "done\n"; # subroutines sub get_day { my $date = shift; my $force = shift; my $url = $guide_url . $date . "_" . $region . ".asp"; my $guide_dir = $cache_dir . "/" . $date; my $guide_file = $guide_dir . "/guide.html"; mkpath ($guide_dir); for (my $retry=0; (($force==1) || (!(-e $guide_file))) && is_error(getstore($url, $guide_file)) && ($retry<$retrys); $retry++) { sleep($secondsbeforeretry); print "."; } my @guide_lines; if (open(GUIDE, $guide_file)) { @guide_lines = ; close(GUIDE); } else { @guide_lines = (); print "Giving up on $guide_file\n"; } return @guide_lines; } sub get_details { my $date = shift; my $program_id = shift; my $url = $details_url . $program_id; my $guide_dir = $cache_dir . "/" . $date; my $details_file = $guide_dir . "/" . $program_id . ".html"; mkpath ($guide_dir); for (my $retry=0; (!(-e $details_file)) && is_error(getstore($url, $details_file)) && ($retry<$retrys); $retry++) { sleep($secondsbeforeretry); print "."; } my @details_lines; if (open(DETAILS, $details_file)) { @details_lines =
; close(DETAILS); } else { @details_lines = (); print "Giving up on $details_file\n"; } return @details_lines; } sub fetch_details { my $datepid=$datepids->dequeue; my @datepidl=split /-/, $datepid; my $date = $datepidl[0]; my $pid = $datepidl[1]; while (($date!=0) and ($pid!=0)) { my $guide_dir = $cache_dir . "/" . $date; mkpath ($guide_dir); my $url = $details_url . $pid; my $details_file = $guide_dir . "/" . $pid . ".html"; for (my $retry=0; is_error(getstore($url, $details_file)) && ($retry<$retrys); $retry++) { sleep($secondsbeforeretry); } $datepid=$datepids->dequeue; @datepidl=split /-/, $datepid; $date = $datepidl[0]; $pid = $datepidl[1]; } } sub same_guide { my $date = shift; my @pidsrowspans = @_; my $guide_pr_file = $cache_dir . "/" . $date . "/guide.pr"; if (open(PR, $guide_pr_file)) { my @pr = split / /, ; close(PR); if ($#pr == $#pidsrowspans) { my $count; for ($count=0; ($count <= $#pr) && ($pr[$count]==$pidsrowspans[$count]); $count++) { } if (--$count==$#pr) { print "guide is the same as last time for $date\n"; return 1; } } } print "guide different for $date and must be retrieve\n"; if (open(PR, ">", $guide_pr_file)) { print PR "@pidsrowspans"; close(PR); } else { print "can't open for writing $guide_pr_file\n"; } return 0; }