#!/usr/bin/perl -w # Australian TV Guide XMLTV grabber by Damon Searle # Derived from a yahoo XMLTV grabber by Ron Kellam which was itself... # Derived from original code by Justin Hawkins # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # 30 Oct 2004 # Damon Searle # - wrote first version # - gets data from NineMSN as a backup. Its not that fancy, # 31 Oct 2004 # Fred Donelly # - added an option so that the output file can be specified on the # command line and from the quick test I gave it, it now works with # mythfilldatabase. # - $offset set to +1000 at the top and then had "+1000" set in a # output string further down rather than the variable # 4 Nov 2004 # Paul Andreassen # - learned some perl and now wants to go back to python # - added and then reduced status info # - retry on failure to getstore # - changed cache to '/var/local/tv_grab_au' # - added threading for each day # 5 Nov 2004 # - improved threading with use of queue # Eyal Lebedinsky # - easier location selection # 8 Nov 2004 # Paul # - fixed pid=0 bug # - did some merging, I hate merging # 9 Nov 2004 # Rob Hill # - added Sydney # 10 Nov 2004 # Mary Wright # - digital info for Sydney # Paul # - more cleanup and improved error checking # - used mirror instead of getstore to get any updates # - mirror didn't work replaced with own smarts to check for updates to times # 11 Nov 2004 # - added program name in check # 13 Nov 2004 # - added freesd for Brisbane # 14 Nov 2004 # - --configure to exit nicely # - if no program data then skip program nicely, mainly for foxtel data # - added foxtel channels # 17 Nov 2004 # Michael Cheshire # - added remaining foxtel channels # Eyal Lebedinsky # - Fix misspelling Unknows -> Unknown # - Note: is Sydney now is on summer time +1100? # 28 Nov 2004 # Paul # - converts & -> &, " -> " # - option to change all & to and # - added data from yesterday to fill 0:00 to 6:00 # - deletes old cache days, may miss some if not run for a while # 1 Dec 2004 # Mark Spieth with changes by Paul # - offset # - melbourne # - rated # 10 Dec 2004 # Paul # - added Queensland # - another attempt at offset calculation # - start and stop time cleanup # 11 Dec 2004 # - more logical offset calculation use strict; use Getopt::Long; use XMLTV; use LWP::Simple; use Date::Manip; # timezone setup and calculations aren't correct use Time::Local; use File::Path; use threads; use Thread::Queue; # Instructions: # Select your region and source. # If your location isn't listed below, go to # http://tvguide.ninemsn.com.au/guide/ select your area # look at the last number in the URL before ".asp" and set # the region variable below. Then put the channel names as listed # on the tv guide site into the variables below. # Then set your XMLTV ids from the database in the XMLTVID_URL variable. # # If it doesn't work with mythfilldatabase, try: # ./tv_grab_au # mythfilldatabase --file 1 -1 /var/local/tv_grab_au/guide.xml # pick your region # #my $location = "Canberra"; #my $location = "Brisbane"; my $location = "Queensland"; #my $location = "Sydney"; #my $location = "Melbourne"; #my $location = "Australia"; # pick your source # #my $source = "free"; my $source = "freesd"; #my $source = "freehd"; #my $source = "foxtel"; # choose the XMLID URL suffix that mythtv knows # my $XMLTVID_URL = "d1.com.au"; # change to how you think it should work my $days_to_grab = 7; my $threads = 5; my $retrys = 3; my $seconds_before_retry = 2; my $convert_ands = 1; my $days_to_clean = 7; # Variables &Date_Init("TZ=UT"); # Date::Manip timezone setup and calculations aren't correct so disable my $today = &UnixDate("today", "%Y%m%d"); my $yesterday = &DateCalc($today, "-1 day"); my $guide_url = "http://tvguide.ninemsn.com.au/guide/"; my $details_url = "http://tvguide.ninemsn.com.au/closeup/default.asp?pid="; my $cache_dir = "/var/local/tv_grab_au"; my $XMLTV_prefix = $source . "." . $location . "."; my $XMLTV_suffix = "." . $XMLTVID_URL; # Options my $opt_days; my $opt_output; my $opt_configfile; my $opt_configure = 0; GetOptions('days=i' => \$opt_days, 'output=s' => \$opt_output, 'config-file=s' => \$opt_configfile, 'configure' => \$opt_configure, ); if ($opt_days) { $days_to_grab = $opt_days } if (!($opt_output)) { $opt_output = $cache_dir . "/guide.xml"; } # $opt_configfile should probably do something ('/home/mythtv/.mythtv/tv_grab_au.xmltv') if ($opt_configure == 1) { print "configuration must be done in this script at $0\n"; exit (0); } print "grabing $days_to_grab days into $opt_output\n"; # Setup my $region; my %channels; if ("Canberra" eq $location) { $region = "126"; if ("free" eq $source) { $channels{"ABC NSW"}="2"; $channels{"Prime Southern"}="PrimS"; $channels{"SBS Sydney"}="SBS"; $channels{"Southern Cross TEN Capital"}="10Cap"; $channels{"WIN Television NSW"}="WIN" } elsif ("freesd" eq $source or "freehd" eq $source) { $channels{"ABC NSW"}="2"; $channels{"Prime Southern"}="7"; $channels{"SBS Sydney"}="SBS"; $channels{"Southern Cross TEN Capital"}="10"; $channels{"WIN Television NSW"}="9" } else { print "Unknown source '$source' for $location\n"; exit (1); } } elsif ("Brisbane" eq $location) { $region = "75"; if (("free" eq $source)||("freesd" eq $source)) { $channels{"ABC QLD"}="2"; $channels{"Channel Seven Brisbane"}="7"; $channels{"SBS Queensland"}="SBS"; $channels{"Network TEN Brisbane"}="10"; $channels{"Channel Nine Brisbane Metro"}="9"; } else { print "Unknown source '$source' for $location\n"; exit (1); } } elsif ("Queensland" eq $location) { $region = "79"; if (("free" eq $source)||("freesd" eq $source)) { $channels{"ABC QLD"}="2"; $channels{"Channel Seven Queensland"}="7"; $channels{"SBS Queensland"}="SBS"; $channels{"Southern Cross TEN Queensland"}="10"; $channels{"WIN Television QLD"}="9"; } else { print "Unknown source '$source' for $location\n"; exit (1); } } elsif ("Sydney" eq $location) { $region = "73"; if (("free" eq $source)||("freesd" eq $source)) { $channels{"ABC NSW"}="2"; $channels{"Channel Seven Sydney"}="7"; $channels{"SBS Sydney"}="SBS"; $channels{"Network TEN Sydney"}="10"; $channels{"Channel Nine Sydney"}="9"; } else { print "Unknown source '$source' for $location\n"; exit (1); } } elsif ("Melbourne" eq $location) { $region = "94"; if (("free" eq $source)||("freesd" eq $source)||("freehd" eq $source)) { $channels{"ABC VIC"}="2"; $channels{"Channel Seven Melbourne"}="7"; $channels{"SBS Melbourne"}="SBS"; $channels{"Network TEN Melbourne"}="10"; $channels{"Channel Nine Melbourne"}="9"; } else { print "Unknown source '$source' for $location\n"; exit (1); } } elsif ("Adelaide" eq $location) { $region = "81"; if (("free" eq $source)||("freesd" eq $source)) { $channels{"ABC SA"}="2"; $channels{"Channel Seven Adelaide"}="7"; $channels{"SBS SA"}="SBS"; $channels{"Network TEN Adelaide"}="10"; $channels{"Channel Nine Adelaide"}="9"; } else { print "Unknown source '$source' for $location\n"; exit (1); } } elsif ("Australia" eq $location) { $region = "123"; if ("foxtel" eq $source) { $channels{"Arena TV"}="Arena"; $channels{"BBC World"}="BBC"; $channels{"Cartoon Network"}="Cartoon"; $channels{"Channel [V]"}="Red"; $channels{"CNBC"}="CNBC"; $channels{"CNN"}="CNN"; $channels{"Discovery Channel"}="Disc"; $channels{"FOX News"}="FoxFNC"; $channels{"FOX8"}="FOX"; $channels{"MAX"}="FoxMMX"; $channels{"National Geographic Channel"}="NatGe"; $channels{"Nickelodeon"}="Nick"; $channels{"Showtime"}="Show"; $channels{"Showtime 2"}="FoxSH2"; $channels{"Sky News"}="SkyNews"; $channels{"TV1"}="TV1"; $channels{"UKTV"}="UKTV"; $channels{"Showtime Greats"}="ShowGreats"; $channels{"World Movies"}="wmov"; $channels{"WCH"}="WCH"; $channels{"TVSN"}="TVSN"; $channels{"Sky Racing"}="SkyRa"; $channels{"Ovation"}="Ovation"; $channels{"Disney Channel"}="Disney"; $channels{"Animal Planet"}="Animal"; $channels{"The Comedy Channel"}="Com"; $channels{"The LifeStyle Channel"}="Lifes"; $channels{"FOX Sports 1"}="FoxFS1"; $channels{"Movie One"}="Movie1"; $channels{"TCM"}="TCM"; $channels{"MTV"}="MTV"; $channels{"FOX Sports 2"}="FoxSP2"; $channels{"FOX Footy Channel"}="FFC"; $channels{"Movie Extra"}="MovieEx"; $channels{"Hallmark Channel"}="Hall"; $channels{"The History Channel"}="FoxHST"; $channels{"ESPN"}="ESPN"; $channels{"FOX Classics"}="FoxCLA"; $channels{"Movie Greats"}="MovieGr"; } else { print "Unknown source '$source' for $location\n"; exit (1); } } else { print "Unknown location '$location'\n"; exit (1); } my $prog_ref; my $chan_ref; foreach my $channel (keys %channels) { $$chan_ref{$channel} = { 'id' => $XMLTV_prefix . $channels{$channel} . $XMLTV_suffix, 'display-name' => [ [ $channel, undef ]] }; } # Program print "starting $threads threads\n"; my @thrlist; my $datepids = Thread::Queue->new; for (my $thread=0; $thread<$threads; $thread++) { push @thrlist, threads->new(\&fetch_details); } print "loading queue\n"; my $currentday = $yesterday; my $day_counter = 0; while ($day_counter <= $days_to_grab) { my $date = &UnixDate($currentday, "%d%m%Y"); my @day_lines = get_day($date,1); if (@day_lines == 0) { $currentday = &DateCalc($currentday, "+ 1 day"); $day_counter++; next; } my @pids; my @rowspans; my @names; foreach my $line (@day_lines) { foreach my $link (split /\n|tr|TR|TD|tr/, $line ) { if ($link =~ /closeup\/default.asp/) { my $rowspan = $link; $rowspan =~ s/.+rowspan=//g; $rowspan =~ s/ .+//g; my $name = $link; $name =~ s/.+target=new>(

|)//g; $name =~ s/<\/a>.+//g; $link =~ s/.+pid=//g; $link =~ s/".+//g; #" if (($rowspan =~ /\d+/) and ($link =~ /\d\d+/)) { push @pids, $link; push @rowspans, $rowspan; push @names, $name; } } } } if (changed_guide($date,@pids,@rowspans,@names)) { for (my $count=0; $count <= $#pids; $count++) { $datepids->enqueue($date . "-" . $pids[$count]); } } $day_counter++; $currentday = &DateCalc($currentday, "+ 1 day"); } for (my $thread=0; $thread<$threads; $thread++) { $datepids->enqueue(0 . "-" . 0); } print "queue is complete\n"; foreach my $thr (@thrlist) { $thr->join; } print "all threads done\n"; print "building xml structure\n"; $currentday = $yesterday; $day_counter = 0; while ($day_counter <= $days_to_grab) { my @pids; my $date = &UnixDate($currentday, "%d%m%Y"); my $guide_prn_file = $cache_dir . "/" . $date . "/guide.prn"; if (open(PRN, $guide_prn_file)) { my @prn = split />/, ; close(PRN); if ($#prn > 1) { my $pidlast = ($#prn + 1)/3 - 1; @pids=@prn[0..$pidlast]; } else { print "no pids in $guide_prn_file\n"; @pids=(); } } else { print "can't read $guide_prn_file\n"; @pids=() } my $retry = 0; foreach my $pid (@pids) { my @details = get_details($date, $pid); if (@details == 0) { next; } my $show_details_table = ""; my $use_line = 0; my $close_html = 0; foreach my $line (@details) { if ($line =~ /bgColor=#f7f3e8/) { $use_line = 0; } if ($use_line == 1) { $show_details_table .= $line; } if ($line =~ /bgcolor=#ffffff/) { $use_line = 1; } if ($line =~ /<\/HTML>/) { $close_html = 1; } } if ($close_html == 0) { my $name = $cache_dir . "/" . $date . "/" . $pid . ".html"; if ($retry++ >= $retrys) { print "giving up on truncated $name\n"; $retry=0; next; } unlink $name; push @pids, $pid; print "t"; # truncated sleep($seconds_before_retry); next; } $retry=0; if ((length $show_details_table) == 0) { print "m"; # missing: can't do anything about this next; } $show_details_table =~ s/<[^>]*>/\n/g; $show_details_table =~ s/\ \;//g; #$show_details_table =~ s/
||<\/B><\/b>/\n/g; #$show_details_table =~ s/Genre://g; #$show_details_table =~ s/Rated:/\n/g; $show_details_table =~ s/\"\;/"/g; #" if ($convert_ands == 1) { $show_details_table =~ s/( |)\&\;( |)/ and /g; $show_details_table =~ s/( |)\&( |)/ and /g; } else { $show_details_table =~ s/\&\;/\&/g; } my $count = 0; my $time; my $channel = ""; my $title1 = ""; my $title2 = ""; my $duration; my $rating = ""; my $genre = ""; my $descr = ""; #print $show_details_table. "\n\n\n"; foreach my $line (split /\n/, $show_details_table) { if ($count == 4) { #print "Time: " . $line . "\n"; $time = $line; } elsif ($count == 7) { $channel = $line; #print "Channel: " . $line . "\n"; } elsif ($count == 19) { $title1 = $line; #print "Program: " . $line . "\n"; } elsif ($count == 20) { $line =~ s/ - //g; $title2 = $line; #print "Subtitle: " . $line . "\n"; } elsif ($count == 21) { $line =~ s/\D//g; $duration = $line; #print "Run time: " . $line . "\n"; } elsif ($count == 22) { $line =~ s/^.*: (.*)\).*$/$1/; if ($line =~ /[a-zA-Z]/) { $rating = $line; } #print "Rating: " . $line . "\n"; } elsif ($count == 26) { $line =~ s/ //g; $genre = $line; #print "Genre: " . $line . "\n"; } elsif ($count == 28 && $line =~ /[a-zA-Z]/) { $descr = $line; #print "Description: " . $line . "\n"; } #print $count .": " . $line . "\n"; ++$count; } if (defined $channels{$channel}) { $channel = $XMLTV_prefix . $channels{$channel} . $XMLTV_suffix; } else { print "unknown channel $channel\n"; next; } my $start=&ParseDate(&UnixDate($currentday,"%Y%m%d ") . $time); if (&Date_Cmp($time,"6:00") < 0) { $start=&DateCalc($start,"+ 1 day"); } my $stop=&DateCalc($start,"+ " . $duration . " minutes"); if (&Date_Cmp($start,$today) < 0) { # skip programs that start before today next; } # Date::Manip timezone setup and calculations aren't correct so don't use $start = &UnixDate($start, "%q ") . get_TZ_Offset($start); $stop = &UnixDate($stop, "%q ") . get_TZ_Offset($stop); my $a_prog = { channel => $channel, start => $start, stop => $stop, title => [ [ $title1, undef ] ] }; $descr =~ s/^\s+//; $descr =~ s/\s+$//; if ($title2) { $$a_prog{'sub-title'} = [ [ $title2, undef ] ]; } if ($rating) { $$a_prog{rating} = [ [ $rating, "CTVA", undef ] ]; } if ($genre) { $$a_prog{category} = [ [ $genre, undef ] ]; } if ($descr) { $$a_prog{desc} = [ [ $descr, undef ] ]; } push @$prog_ref, $a_prog; } $currentday = &DateCalc($currentday, "+ 1 day"); $day_counter++; } my $data = [ 'ISO-8859-1', { 'source-info-name' => 'http://tvguide.ninemsn.com.au/', 'generator-info-name' => 'NineMSN grabber', 'generator-info-url' => '', 'generator-info-name' => "XMLTV - tv_grab_au NineMSN v0.2" }, $chan_ref, $prog_ref ]; print "writing file\n"; my $fh = new IO::File ">$opt_output"; XMLTV::write_data($data, OUTPUT=>$fh); print "cleaning $days_to_clean days\n"; $currentday = &DateCalc($yesterday, "-1 day"); $day_counter = 0; while ($day_counter < $days_to_clean) { remove_day(&UnixDate($currentday, "%d%m%Y")); $currentday = &DateCalc($currentday, "-1 day"); $day_counter++; } print "done\n"; # subroutines sub get_TZ_Offset { # Date::Manip timezone setup and calculations aren't correct so don't use my @date=UnixDate(shift,("%S","%M","%H","%d","%m","%Y")); $date[4] -= 1; # stupid month fix my $offset=timegm(@date)-timelocal(@date); return sprintf "+%d%02d",$offset/3600,($offset/60)%60; } sub get_day { my $date = shift; my $force = shift; my $url = $guide_url . $date . "_" . $region . ".asp"; my $guide_dir = $cache_dir . "/" . $date; my $guide_file = $guide_dir . "/guide.html"; mkpath ($guide_dir); for (my $retry=0; (($force==1) || (!(-e $guide_file))) && is_error(getstore($url, $guide_file)) && ($retry<$retrys); $retry++) { print "."; sleep($seconds_before_retry); } my @guide_lines; if (open(GUIDE, $guide_file)) { @guide_lines = ; close(GUIDE); } else { @guide_lines = (); print "giving up on $guide_file\n"; } return @guide_lines; } sub remove_day { my $date = shift; my $guide_dir = $cache_dir . "/" . $date; my @files= glob($guide_dir . "/*"); unlink @files; return rmdir $guide_dir; } sub get_details { my $date = shift; my $program_id = shift; my $url = $details_url . $program_id; my $guide_dir = $cache_dir . "/" . $date; my $details_file = $guide_dir . "/" . $program_id . ".html"; mkpath ($guide_dir); for (my $retry=0; (!(-e $details_file)) && is_error(getstore($url, $details_file)) && ($retry<$retrys); $retry++) { print "."; sleep($seconds_before_retry); } my @details_lines; if (open(DETAILS, $details_file)) { @details_lines =

; close(DETAILS); } else { @details_lines = (); print "giving up on $details_file\n"; } return @details_lines; } sub fetch_details { my $datepid=$datepids->dequeue; my @datepidl=split /-/, $datepid; my $date = $datepidl[0]; my $pid = $datepidl[1]; while (($date!=0) and ($pid!=0)) { my $guide_dir = $cache_dir . "/" . $date; mkpath ($guide_dir); my $url = $details_url . $pid; my $details_file = $guide_dir . "/" . $pid . ".html"; for (my $retry=0; is_error(getstore($url, $details_file)) && ($retry<$retrys); $retry++) { sleep($seconds_before_retry); } $datepid=$datepids->dequeue; @datepidl=split /-/, $datepid; $date = $datepidl[0]; $pid = $datepidl[1]; } } sub changed_guide { my $date = shift; my @pidsrowspansnames = @_; my $guide_prn_file = $cache_dir . "/" . $date . "/guide.prn"; if (open(PRN, $guide_prn_file)) { my @prn = split />/, ; close(PRN); if (($#prn > 1) and ($#prn == $#pidsrowspansnames)) { my $count; my $diff = ((($#prn+1)*2)/3)-1; for ($count=0; ($count <= $diff) && ($prn[$count]==$pidsrowspansnames[$count]); $count++) { } if ($count==($diff+1)) { for (; ($count <= $#prn) && ($prn[$count] eq $pidsrowspansnames[$count]); $count++) { } if ($count==($#prn+1)) { print "$date unchanged\n"; return 0; } } } } print "$date downloading\n"; if (open(PRN, ">", $guide_prn_file)) { for (my $count=0; $count<$#pidsrowspansnames; $count++) { print PRN "$pidsrowspansnames[$count]>"; } print PRN "$pidsrowspansnames[$#pidsrowspansnames]"; close(PRN); } else { print "can't open for writing $guide_prn_file\n"; } return 1; }