#!/usr/bin/perl -w # Australian TV Guide XMLTV grabber by Damon Searle # Derived from a yahoo XMLTV grabber by Ron Kellam which was itself... # Derived from original code by Justin Hawkins # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # 30 Oct 2004 # Damon Searle # - wrote first version # - gets data from NineMSN as a backup. Its not that fancy, # 31 Oct 2004 # Fred Donelly # - added an option so that the output file can be specified on the # command line and from the quick test I gave it, it now works with # mythfilldatabase. # - $offset set to +1000 at the top and then had "+1000" set in a # output string further down rather than the variable # 4 Nov 2004 # Paul Andreassen # - learned some perl and now wants to go back to python # - added and then reduced status info # - retry on failure to getstore # - changed cache to '/var/local/tv_grab_au' # - added threading # *** Only tested with Queensland and ACT data *** # Instructions: # Go to http://tvguide.ninemsn.com.au/guide/ select your area # Look at the last number in the URL before ".asp" and set # the region variable below. Then put the channel names as listed # on the tv guide site into the variables below. # Then set your XMLTV ids from the database in the _XMLTVID variables. # If it doesn't work with mythfilldatabase, try: # ./tv_grab_au # mythfilldatabase --file 1 -1 /var/local/tv_grab_au/guide.xml use strict; use Getopt::Long; use XMLTV; use LWP::Simple; use Date::Manip; use File::Path; use threads; # Variables my $days_to_grab = 7; my $region = "123"; # 126 = ACT, 123 = Fox, 83 = Adelaide my $guide_url = "http://tvguide.ninemsn.com.au/guide/"; my $details_url = "http://tvguide.ninemsn.com.au/closeup/default.asp?pid="; my $cache_dir = "/var/local/tv_grab_au.foxtel"; my $offset = "+0930"; my $FOX_ARENA_TV = "Arena TV"; my $FOX_BBC_WORLD = "BBC World"; my $FOX_CARTOON_NETWORK = "Cartoon Network"; my $FOX_CHANNEL_V = "Channel [V]"; my $FOX_CNBC = "CNBC"; my $FOX_CNN = "CNN"; my $FOX_DISCOVERY = "Discovery Channel"; my $FOX_FOX_NEWS = "FOX News"; my $FOX_FOX8 = "FOX8"; my $FOX_MAX = "MAX"; my $FOX_NATIONAL_GEOGRAPHIC = "National Geographic Channel"; my $FOX_NICKELODEON = "Nickelodeon"; my $FOX_SHOWTIME = "Showtime"; my $FOX_SHOWTIME2 = "Showtime 2"; my $FOX_SKY_NEWS = "Sky News"; my $FOX_TV1 = "TV1"; my $FOX_UKTV = "UKTV"; my $FOX_ARENA_TV_XMLTVID = "foxtel.australia.Arena.d1.com.au"; my $FOX_BBC_WORLD_XMLTVID = "foxtel.australia.BBC.d1.com.au"; my $FOX_CARTOON_NETWORK_XMLTVID = "foxtel.australia.Cartoon.d1.com.au"; my $FOX_CHANNEL_V_XMLTVID = "foxtel.australia.Red.d1.com.au"; my $FOX_CNBC_XMLTVID = "foxtel.australia.CNBC.d1.com.au"; my $FOX_CNN_XMLTVID = "foxtel.australia.CNN.d1.com.au"; my $FOX_DISCOVERY_XMLTVID = "foxtel.australia.Disc.d1.com.au"; my $FOX_FOX_NEWS_XMLTVID = "foxtel.australia.FoxFNC.d1.com.au"; my $FOX_FOX8_XMLTVID = "foxtel.australia.FOX.d1.com.au"; my $FOX_MAX_XMLTVID = "foxtel.australia.FoxMMX.d1.com.au"; my $FOX_NATIONAL_GEOGRAPHIC_XMLTVID = "foxtel.australia.NatGe.d1.com.au"; my $FOX_NICKELODEON_XMLTVID = "foxtel.australia.Nick.d1.com.au"; my $FOX_SHOWTIME_XMLTVID = "foxtel.australia.Show.d1.com.au"; my $FOX_SHOWTIME2_XMLTVID = "foxtel.australia.FoxSH2.d1.com.au"; my $FOX_SKY_NEWS_XMLTVID = "foxtel.australia.SkyNews.d1.com.au"; my $FOX_TV1_XMLTVID = "foxtel.australia.TV1.d1.com.au"; my $FOX_UKTV_XMLTVID = "foxtel.australia.UKTV.d1.com.au"; my $opt_days; my $opt_output; GetOptions('days=i' => \$opt_days, 'output=s' => \$opt_output ); if ($opt_days) { $days_to_grab = $opt_days } if (!($opt_output)) { $opt_output = $cache_dir . "/guide.xml"; } print "$days_to_grab, $opt_output\n"; #exit(0); my $currentday = &ParseDate("today"); my $prog_ref; my $chan_ref; $$chan_ref{$FOX_ARENA_TV} = { 'id' => $FOX_ARENA_TV_XMLTVID, 'display-name' => [ [ $FOX_ARENA_TV, undef ]]}; $$chan_ref{$FOX_BBC_WORLD} = { 'id' => $FOX_BBC_WORLD_XMLTVID, 'display-name' => [ [ $FOX_BBC_WORLD, undef ]]}; $$chan_ref{$FOX_CARTOON_NETWORK} = { 'id' => $FOX_CARTOON_NETWORK_XMLTVID, 'display-name' => [ [ $FOX_CARTOON_NETWORK, undef ]]}; $$chan_ref{$FOX_CHANNEL_V} = { 'id' => $FOX_CHANNEL_V_XMLTVID, 'display-name' => [ [ $FOX_CHANNEL_V, undef ]]}; $$chan_ref{$FOX_CNBC} = { 'id' => $FOX_CNBC_XMLTVID, 'display-name' => [ [ $FOX_CNBC, undef ]]}; $$chan_ref{$FOX_CNN} = { 'id' => $FOX_CNN_XMLTVID, 'display-name' => [ [ $FOX_CNN, undef ]]}; $$chan_ref{$FOX_DISCOVERY} = { 'id' => $FOX_DISCOVERY_XMLTVID, 'display-name' => [ [ $FOX_DISCOVERY, undef ]]}; $$chan_ref{$FOX_FOX_NEWS} = { 'id' => $FOX_FOX_NEWS_XMLTVID, 'display-name' => [ [ $FOX_FOX_NEWS, undef ]]}; $$chan_ref{$FOX_FOX8} = { 'id' => $FOX_FOX8_XMLTVID, 'display-name' => [ [ $FOX_FOX8, undef ]]}; $$chan_ref{$FOX_MAX} = { 'id' => $FOX_MAX_XMLTVID, 'display-name' => [ [ $FOX_MAX, undef ]]}; $$chan_ref{$FOX_NATIONAL_GEOGRAPHIC} = { 'id' => $FOX_NATIONAL_GEOGRAPHIC_XMLTVID, 'display-name' => [ [ $FOX_NATIONAL_GEOGRAPHIC, undef ]]}; $$chan_ref{$FOX_NICKELODEON} = { 'id' => $FOX_NICKELODEON_XMLTVID, 'display-name' => [ [ $FOX_NICKELODEON, undef ]]}; $$chan_ref{$FOX_SHOWTIME} = { 'id' => $FOX_SHOWTIME_XMLTVID, 'display-name' => [ [ $FOX_SHOWTIME, undef ]]}; $$chan_ref{$FOX_SHOWTIME2} = { 'id' => $FOX_SHOWTIME2_XMLTVID, 'display-name' => [ [ $FOX_SHOWTIME2, undef ]]}; $$chan_ref{$FOX_SKY_NEWS} = { 'id' => $FOX_SKY_NEWS_XMLTVID, 'display-name' => [ [ $FOX_SKY_NEWS, undef ]]}; $$chan_ref{$FOX_TV1} = { 'id' => $FOX_TV1_XMLTVID, 'display-name' => [ [ $FOX_TV1, undef ]]}; $$chan_ref{$FOX_UKTV} = { 'id' => $FOX_UKTV_XMLTVID, 'display-name' => [ [ $FOX_UKTV, undef ]]}; print "starting threads\n"; my @thrlist; my $day_counter = 1; while ($day_counter <= $days_to_grab) { my $date = &UnixDate($currentday, "%d%m%Y"); my @day_lines = get_day($date); my @pids; foreach my $line (@day_lines) { foreach my $link (split /\n|tr|TR|TD|tr/, $line ) { if ($link =~ /closeup\/default.asp/) { $link =~ s/.+pid=//g; $link =~ s/".+//g; if ($link =~ /\d+/) { push @pids, $link; } } } } push @thrlist, threads->new(\&fetch_details, $date, @pids); $day_counter++; $currentday = &DateCalc($currentday, "+ 1 day"); } print "all threads started\n"; foreach my $thr (@thrlist) { $thr->join; } print "all threads done\n"; $day_counter = 1; $currentday = &ParseDate("today"); while ($day_counter <= $days_to_grab) { my $date = &UnixDate($currentday, "%d%m%Y"); my @day_lines = get_day($date); my @pids; foreach my $line (@day_lines) { foreach my $link (split /\n|tr|TR|TD|tr/, $line ) { if ($link =~ /closeup\/default.asp/) { $link =~ s/.+pid=//g; $link =~ s/".+//g; if ($link =~ /\d+/) { push @pids, $link; } } } } foreach my $pid (@pids) { my @details = get_details($date, $pid); my $show_details_table = ""; my $use_line = 0; foreach my $line (@details) { if ($line =~ /bgColor=#f7f3e8/) { $use_line = 0; } if ($use_line == 1) { $show_details_table .= $line; } if ($line =~ /bgcolor=#ffffff/) { $use_line = 1; } } $show_details_table =~ s/<[^>]*>/\n/g; $show_details_table =~ s/\ \;//g; #$show_details_table =~ s/
||<\/B><\/b>/\n/g; #$show_details_table =~ s/Genre://g; #$show_details_table =~ s/Rated:/\n/g; my $count = 0; my $channel = ""; my $start_date = &UnixDate($currentday, "%Y-%m-%d"); my $time; my $title1 = ""; my $title2 = ""; my $genre = ""; my $descr = ""; my $details = ""; my $duration; #print $show_details_table. "\n\n\n"; foreach my $line (split /\n/, $show_details_table) { if ($count == 4){ #print "Time: " . $line . "\n"; $time = $line; } elsif ($count == 7){ $channel = $line; #print "Channel: " . $line . "\n"; } elsif ($count == 19){ $title1 = $line; #print "Program: " . $line . "\n"; } elsif ($count == 20){ $line =~ s/ - //g; $title2 = $line; #print "Subtitle: " . $line . "\n"; } elsif ($count == 21){ $line =~ s/\D//g; $duration = $line; #print "Run time: " . $line . "\n"; } elsif ($count == 22){ $line =~ s/[^A-Z]//g; $details = $line; #print "Rating: " . $line . "\n"; } elsif ($count == 26){ $line =~ s/ //g; $genre = $line; #print "Genre: " . $line . "\n"; } elsif ($count == 28 && $line =~ /[a-zA-Z]/){ $descr = $line; #print "Description: " . $line . "\n"; } #elsif ($count == 26 && $line =~ /[a-zA-Z]/){ # $descr = $line; # print "Description: " . $line . "\n"; #} #print $count .": " . $line . "\n"; ++$count; } if ($count < 28) { my $name = $cache_dir . "/" . $date . "/" . $pid . ".html"; print "\n$name is too short, removing and trying again\n"; unlink $name; push @pids, $pid; next; } my $start_time = &UnixDate($time, "%H:%M"); # my $start_datetime = $start_date . " " . $start_time; if ($start_time =~ /00:|01:|02:|03:|04:|05:/) { $start_date = &DateCalc($start_date, "+ 1 day"); } $start_date = &UnixDate($start_date, "%Y%m%d"); my $end_time = &DateCalc($start_time, " + " . $duration . "minutes"); $end_time = &UnixDate($end_time, "%H:%M"); my $end_date; if (&Date_Cmp($start_time, $end_time) <= 0) { $end_date = $start_date; } else { my $err; my $edate = &DateCalc($start_date, "+ 1 day", \$err); $end_date = &UnixDate($edate, "%Y%m%d"); } if ($channel =~ /$FOX_ARENA_TV/) { $channel = $FOX_ARENA_TV_XMLTVID; } elsif ($channel =~ /$FOX_BBC_WORLD/) { $channel = $FOX_BBC_WORLD_XMLTVID; } elsif ($channel =~ /$FOX_CARTOON_NETWORK/) { $channel = $FOX_CARTOON_NETWORK_XMLTVID; } elsif ($channel =~ /$FOX_CHANNEL_V/) { $channel = $FOX_CHANNEL_V_XMLTVID; } elsif ($channel =~ /$FOX_CNBC/) { $channel = $FOX_CNBC_XMLTVID; } elsif ($channel =~ /$FOX_CNN/) { $channel = $FOX_CNN_XMLTVID; } elsif ($channel =~ /$FOX_DISCOVERY/) { $channel = $FOX_DISCOVERY_XMLTVID; } elsif ($channel =~ /$FOX_FOX_NEWS/) { $channel = $FOX_FOX_NEWS_XMLTVID; } elsif ($channel =~ /$FOX_FOX8/) { $channel = $FOX_FOX8_XMLTVID; } elsif ($channel =~ /$FOX_MAX/) { $channel = $FOX_MAX_XMLTVID; } elsif ($channel =~ /$FOX_NATIONAL_GEOGRAPHIC/) { $channel = $FOX_NATIONAL_GEOGRAPHIC_XMLTVID; } elsif ($channel =~ /$FOX_NICKELODEON/) { $channel = $FOX_NICKELODEON_XMLTVID; } elsif ($channel =~ /$FOX_SHOWTIME/) { $channel = $FOX_SHOWTIME_XMLTVID; } elsif ($channel =~ /$FOX_SHOWTIME2/) { $channel = $FOX_SHOWTIME2_XMLTVID; } elsif ($channel =~ /$FOX_SKY_NEWS/) { $channel = $FOX_SKY_NEWS_XMLTVID; } elsif ($channel =~ /$FOX_TV1/) { $channel = $FOX_TV1_XMLTVID; } elsif ($channel =~ /$FOX_UKTV/) { $channel = $FOX_UKTV_XMLTVID; } my $start; my $stop; $start = $start_date . &UnixDate($start_time,"%H%M") . "00 " . $offset; $stop = $end_date . &UnixDate($end_time,"%H%M") . "00 " . $offset; my $a_prog = { channel => $channel, start => $start, stop => $stop, title => [ [ $title1, undef ] ] }; $descr =~ s/^\s+//; $descr =~ s/\s+$//; if ($title2) { $$a_prog{'sub-title'} = [ [ $title2, undef ] ]; } if ($descr) { $$a_prog{desc} = [ [ $descr, undef ] ]; } if ($genre) { $$a_prog{category} = [ [ $genre, undef ] ]; } push @$prog_ref, $a_prog; } $day_counter++; $currentday = &DateCalc($currentday, "+ 1 day"); } my $data = [ 'ISO-8859-1', { 'source-info-name' => 'http://tvguide.ninemsn.com.au/', 'generator-info-name' => 'NineMSN grabber', 'generator-info-url' => '', 'generator-info-name' => "XMLTV - tv_grab_au NineMSN v0.2" }, $chan_ref, $prog_ref ]; #my $outfile = $cache_dir . "/guide.xml"; my $outfile = $opt_output; my $fh = new IO::File ">$outfile"; XMLTV::write_data($data, OUTPUT=>$fh); # download the guide for the date to file sub get_day { my $date = shift; my $url = $guide_url . $date . "_" . $region . ".asp"; my $guide_dir = $cache_dir . "/" . $date; my $guide_file = $guide_dir . "/guide.html"; mkpath ($guide_dir); if (!(-e $guide_file)) { getstore($url, $guide_file); print "."; while (!(-e $guide_file)) { getstore($url, $guide_file); print ":"; } } open(GUIDE, $guide_file) or print "\nCan't open $guide_file\n"; my @guide_lines = ; close(GUIDE); return @guide_lines; } sub get_details { my $date = shift; my $program_id = shift; my $url = $details_url . $program_id; my $guide_dir = $cache_dir . "/" . $date; my $details_file = $guide_dir . "/" . $program_id . ".html"; mkpath ($guide_dir); if (!(-e $details_file)) { getstore($url, $details_file); print "."; while (!(-e $details_file)) { getstore($url, $details_file); print ":"; } } open(DETAILS, $details_file) or print "\nCan't open $details_file\n"; my @details_lines =
; close(DETAILS); return @details_lines; } sub fetch_details { my $date = shift; my @pids = @_; my $guide_dir = $cache_dir . "/" . $date; mkpath ($guide_dir); foreach my $program_id (@pids) { my $url = $details_url . $program_id; my $details_file = $guide_dir . "/" . $program_id . ".html"; if (!(-e $details_file)) { getstore($url, $details_file); while (!(-e $details_file)) { getstore($url, $details_file); } } } }