#!/usr/bin/perl -w eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}' if 0; # not running under some shell # =========================================================================== # --- new NMSN Australian TV grabber by Michael 'Immir' Smith... # --- $Id: tv_grab_au,v 1.46 2006/02/02 12:19:04 michael Exp michael $ # # A current version of this script should usually be available here: # # # # pod documentation coming later... # # in brief: --configure configure the grabber # --config-file use specified config file # --list-channels show subscribed channels and xmltvids # --show-config show configuration details # --slow download a details page for every show # --fast opposite of --slow (to override config) # --days days to grab # --output xml output file # --ignore-cache ignore cached information # --static write to /dev/null (for testing) # # When run with mythfilldatabase, the config file may end up as # ~/.mythtv/tv_grab_au.xmltv and this is where xmltvids for channels can # be modified. # # NOTE: this duplicate is deprecated --- myth supports duplicated xmltvids # --- email me if you still want it (I'll be stripping it at some point). # # Listing information can be duplicated for more than one channel id # using a construction like this (be careful of syntax) in the config file: # # $duplicate = { # 'act.abc.gov.au' => { 'ABC HD' => 'hd.abc.gov.au', # 'ABC CHAN 22' => '22.abc.gov.au' }, # 'eastern.sbs.com.au' => { 'SBS HD' => 'hd.eastern.sbs.com.au' } # }; # # Downloading of details pages can be controlled by the --slow option # (possibly set by default during configuration) and regular expressions # in additional config files (see end of script for more information). # # Recent changes: # # 1.27: TZ change added as suggested by Greg Boundy # 1.31: use parsed details url to make it more resilient to site changes # 1.34: manual configuration of duplication for channels # 1.36: user agent fix for proxy (thanks Carl Lewis) # 1.37: fixed various issues with start/stop times around midnight/6am # (thanks to Darryl Young for suggestions of the cause) # 1.39: fixed dupes from last-then-first listed programs across 6am # 1.42: ridiculous encrypted data (standard java functions + Caesar slide # by 1!); javascript modification of closeup url (Michael Cowell) # 1.43: move deobfuscate to get_content_base to cope with --config mode # 1.44: make channel names matched on web page regular expressions/substrings # to alleviate problems when NMSN changes column names # 1.45: fix problem with regexp match of channel name on webpage matching # shorter names (eg tv12 matched with tv1 instead of tv2) (Dave Oxley) use strict; use Getopt::Long; use LWP::UserAgent; use Date::Manip; use File::Path; use File::Basename; use Data::Dumper; use HTML::TreeBuilder; use XMLTV; use XMLTV::Ask; use XMLTV::ProgressBar; use XMLTV::Config_file; # --------------------------------------------------------------------------- # --- global parameters/constants (read in from config) my $conf = {}; # ref to hash of configuration parameters my $channels = {}; # ref to hash of subscribed channel names to xmltvids my $duplicate = {}; # hashref for channel duplication my $lang = "en"; my $spoofMSID = 1; # spoof random MSIDs to avoid redirects? big speed up! my $output_dir = "/var/local/tv_grab_au"; my $cache_file = "$output_dir/cached.pl"; my $want_details_file = "$ENV{HOME}/.tv_grab_au-detailed"; my $skip_details_file = "$ENV{HOME}/.tv_grab_au-undetailed"; my (@want_details, @skip_details); # --- some global counters for reporting my ($count_guide, $count_detail, $count_bad, $count_adjust) = (0) x 4; # --- NMSN site URLs my $NMSN = "http://tvguide.ninemsn.com.au"; my $TVTODAY = "http://tvguide.ninemsn.com.au/todaytv/default.asp"; my ($Revision) = '$Revision: 1.46 $' =~ /Revision:\s*(\S+)/; my $debug = 0; $Data::Dumper::Sortkeys = $Data::Dumper::Indent = 1; # --------------------------------------------------------------------------- # --- Command-line options my ($opt_configfile, $opt_configure, $opt_list_channels); my ($opt_ignore_cache, $opt_static, $opt_show_config); my ($opt_slow, $opt_fast); my $opt_output = "$output_dir/guide.xml"; my $opt_gui = 1; my $opt_days = 5; GetOptions( 'days=i' => \$opt_days, 'list-channels' => \$opt_list_channels, 'output=s' => \$opt_output, 'config-file=s' => \$opt_configfile, 'configure' => \$opt_configure, 'gui=s' => \$opt_gui, 'slow' => \$opt_slow, 'fast' => \$opt_fast, 'ignore-cache' => \$opt_ignore_cache, 'static' => \$opt_static, 'show-config' => \$opt_show_config, 'debug' => \$debug ); XMLTV::Ask::init($opt_gui); my $config_file = XMLTV::Config_file::filename ($opt_configfile, 'tv_grab_au', not $debug ); # --- read and parse configuration --- must do this before configuring # in case the user has chosen non-default xmltvids for some channels... if (-r $config_file) { local (@ARGV, $/) = ($config_file); no warnings 'all'; eval <>; die "error in conf file: $@" if $@ and not $opt_configure; print "unable to read configuration file... configuring anyway..." if $@; } elsif (not $opt_configure) { print qq{\nThis version of tv_grab_au is not configured!\n If you are using mythfilldatabase, you'll need to configure from within mythtv-setup (or with an appropriate --config-file option), otherwise simply run with the --configure option.\n\n}; exit(1); } # --- extract sorted subscribed channel list from config-file hash; # also compute canonicalised lowercased channel to xmltvid hash my %chanid = map { lc $_, $channels->{$_} } keys %$channels; # --- are we configuring? if ($opt_configure) { configure($config_file); exit 0 } # --- are we just listing channels? if ($opt_list_channels) { print " $_ -> $channels->{$_}\n" for sort keys %$channels; exit 0; } # --- we must be grabbing program information... my $runtime = time(); Date_Init("TZ=$conf->{TZ}"); # explicitly Set Timezone # --- flag for whether we get individual program details die "cannot do slow and fast\n" if $opt_slow and $opt_fast; my $slow = $opt_fast ? 0 : ($opt_slow || $conf->{slow}); # --- override slow for some shows (read list of regexps from file) if (-f $want_details_file) { local (@ARGV) = ($want_details_file); chomp(@want_details = <>); } if (-f $skip_details_file) { local (@ARGV) = ($skip_details_file); chomp(@skip_details = <>); } # --- other configuration options $opt_days = $conf->{days} if $conf->{days}; $opt_output = $conf->{output} if $conf->{output}; if ($debug or $opt_show_config) { print "\ntv_grab_au revision $Revision\n"; print "configuration file: $config_file\n"; print "TZ = $conf->{TZ} slow = $conf->{slow}\n"; print "output: $conf->{output}\n" if $conf->{output}; print "services chosen:\n"; for my $service (@{$conf->{services}}) { print " $service->{name}: " . " region=$service->{region} id=$service->{regionid}\n"; } print "channel list:\n"; print " $_ -> $channels->{$_}\n" for keys %$channels; print "duplication list:\n"; while (my ($a,$b) = each %$duplicate) { while (my ($c,$d) = each %$b) { print " $a -> $d ($c)\n"; } } print "\n"; exit 0 if $opt_show_config; } print fixplural("grabbing $opt_days days into $opt_output\n"); # --- first get cached list of shows from last time (if any) my ($cached, $newcache); if (-r $cache_file and not $opt_ignore_cache) { local (@ARGV, $/) = ($cache_file); no warnings 'all'; eval <>; die "$@" if $@; } # --- now, on with the shows... # showlists is a hash of refs to arrays holding shows for each channel # which we use to infer endtimes from starttimes since the rowspan # information is generally unreliable (in fast mode) --- actually, they # seem to get completely off-cut sometimes; everything shifted forward # or back by 5 minutes or more --- must try and do something about that. my %showlists; for my $day (0 .. $opt_days-1) { my $date = Ymd(DateCalc("today", "+ $day days")); my $date6am = ParseDate("6am $date"); for my $service (@{$conf->{services}}) { my $guidedata = get_guide_page($service, dmY($date6am)) or next; my $tree = HTML::TreeBuilder->new_from_content($guidedata); for ($tree->look_down('_tag' => 'table', 'class' => 'tv')) { # extract channel names from the first row of the table # (this row has align=middle and the channels are in bold) my @hdr = map { $_->as_text } $_->look_down('_tag' => 'tr', 'align' => 'middle') ->look_down('_tag' => 'b'); my @span = (0) x @hdr; # rowspans to infer columns my $row = 0; # row number (to compute start times) for ($_->look_down('_tag' => 'tr', 'valign' => 'top')) { my @idx = grep { $span[$_] == 0 } 0..$#hdr; # columns for this row for ($_->look_down('_tag' => 'td', 'class' => 'tv')) { my $col = shift @idx; my $rowspan = $_->attr("rowspan") || 1; $span[$col] = $rowspan; my $channel_hdr = $hdr[$col]; my $channel = find_channel($channel_hdr) or next; my $chanid = $chanid{lc $channel}; my ($e) = $_->content_list; next unless ref($e) eq 'HTML::Element'; next unless $e->tag eq 'a'; my $html= $e->as_HTML; my ($pid) = $html =~ /pid=(\d+)/; next if $pid == 0; # sometimes null programs at bottom of table my $title = $e->as_text(); # NMSN changed their details urls; let's do it this way to # cope with future changes (assuming they're relative to $NMSN) my @link = @{ $e->extract_links() }; die "too many links:\n" . $html if @link > 1; my $url = $NMSN . $link[0]->[0]; # --- NMSN javascript pop function translates closeup to cu $url =~ s/closeup/cu/; # --- check (pid, row, rowspan, title) against old cached data my $cache_id = "$date:$pid:$row:$rowspan:$title"; if ($cached->{$cache_id}) { $newcache->{$cache_id} = $cached->{$cache_id}; push @{ $showlists{$chanid} }, $cached->{$cache_id}; next; } # --- compute start and stop times based on row of table and # rowspan --- although this appears problematic for some days my $start = DateCalc($date6am, ($row*5) . " minutes"); my $stop = DateCalc($start, ($rowspan*5) . " minutes"); if ($title =~ s/\s*\[\s* (\d+):(\d+) \s* (am|pm) \s*\]\s* //x) { my ($hr, $min, $ampm) = ($1, $2, lc($3)); my $hrdm = $hr == 0 ? 12 : $hr; # Date::Manip convention $min = sprintf "%02d", $min; # NMSN bug: 1-digit minutes $start = ParseDate("$hrdm:$min$ampm $date"); $start = DateCalc($start, "+ 1 day") if $hr < 6 and $ampm eq "am"; } my $show = { 'title' => [[$title, $lang]], 'start' => $start, 'stop' => $stop, 'channel' => $chanid, 'guidedate' => $date, 'row' => $row }; # --- fill in more details? --- get_closeup_details($show,$pid,$url) if want_details($show); # --- that's it! push @{ $showlists{$chanid} }, $show; $newcache->{$cache_id} = $show; print Dumper($show) if $debug; } ++ $row; @span = map { $_ - 1} @span; # update rowspan counts } } $tree->delete(); } } # --- check for static (i.e., don't write xml or cache) if ($opt_static) { print "(static) writing xml and cache to /dev/null\n"; $opt_output = "/dev/null"; $cache_file = "/dev/null"; } # --- save to cache before massaging dates $cached = $newcache; open(CACHE, "> $cache_file") or die "cannot open $cache_file: $!"; print CACHE Data::Dumper->Dump([$cached], ["cached"]); close CACHE; # --- check start and stop times and mark duplicates for my $channel (keys %showlists) { my @shows = @{ $showlists{$channel} }; for my $i (0 .. @shows-2) { # make stop time consistent with following start time if (Date_Cmp($shows[$i+1]->{start}, $shows[$i]->{stop}) < 0) { if ($shows[$i+1]->{start} eq $shows[$i]->{start} and $shows[$i+1]->{stop} eq $shows[$i]->{stop} and title($shows[$i+1]) eq title($shows[$i])) { $shows[$i]->{dupe} = 1; # duplicate show } else { # --- just adjust previous stop time $count_adjust++; $shows[$i]->{stop} = $shows[$i+1]->{start}; } } } } # --- append timezone info and strip fields that XMLTV doesn't need/want for my $channel (keys %showlists) { for my $show (@{ $showlists{$channel} }) { next if exists $show->{dupe}; $show->{start} =~ s/ \+.*|$/ $conf->{TZ}/; $show->{stop} =~ s/ \+.*|$/ $conf->{TZ}/; $show->{start} =~ tr/://d; $show->{stop} =~ tr/://d; delete $show->{guidedate}; delete $show->{row}; } } # --- now write to xml my %writer_args = ( encoding => 'ISO-8859-1' ); if ($opt_output) { my $fh = new IO::File(">$opt_output") or die "can't open $opt_output: $!"; $writer_args{OUTPUT} = $fh; } my $writer = new XMLTV::Writer(%writer_args); $writer->start ( { 'source-info-url' => $NMSN, 'source-info-name' => "NMSN TV Guide", 'generator-info-name' => "XMLTV - tv_grab_au NMSN v$Revision"} ); for my $channel (sort keys %$channels) { my $chanid = $chanid{lc $channel}; $writer->write_channel( { 'display-name' => [[$channel, $lang]], 'id' => $chanid } ); # --- write duplicated channel definitions while (my ($name, $otherid) = each %{ $duplicate->{$chanid} }) { $writer->write_channel( { 'display-name' => [[$name, $lang]], 'id' => $otherid } ); } } for my $chanid (keys %showlists) { for my $show (@{ $showlists{$chanid} }) { next if exists $show->{dupe}; $writer->write_programme($show); for my $otherid (values %{ $duplicate->{$chanid} }) { $writer->write_programme({ %$show, 'channel' => $otherid }); } } } $writer->end(); # --- report stats and runtime printf "tv_grab_au: downloads = %d guide pages, %d detail pages\n" . "%d failed detail pages, %d stop times adjusted\n", $count_guide, $count_detail, $count_bad, $count_adjust; printf "tv_grab_au: finished in %d seconds\n", time() - $runtime; exit 0; # Game over, man! # =========================================================================== # --- subroutines # --- find a channel that matches the channel heading sub find_channel { my $chan_heading = shift; my $channel; for (sort { length($b) <=> length($a) } keys %chanid) { $channel = $_, last if $chan_heading =~ /$_/i; } return $channel; } # --- determine whether to get details sub want_details { my $show = shift; my $title = title($show); my $channel = $show->{channel}; my $string = "$channel:$title"; my $want = $slow; for my $expr (@skip_details) { $want = 0 if $string =~ /$expr/ } for my $expr (@want_details) { $want = 1 if $string =~ /$expr/ } return $want; } # --- get details from the closeup page for given show sub get_closeup_details { my ($show, $pid, $url) = @_; my $details = get_details_page($url) or return; my $title = title($show); my $guidedate = $show->{guidedate}; # --- use HTML::TreeBuilder to parse the details from the page... my $tree = HTML::TreeBuilder->new_from_content($details); # --- the details are in a two row table: first row is the header # ('Time', 'Program'), second row is the body of the table containing # the information we want. my $debuginfo = $debug ? join("", "===DEBUG===\npid=$pid, url=$url\n", "---PAGE--------\n$details\n", "---ENDPAGE-----\n===ENDDEBUG===\n") : ""; my $tb = $tree->look_down('_tag' => 'table', 'borderColor' => '#003366'); unless ($tb) { print "Unable to find table for pid=$pid, url=$url\n"; print $debuginfo; ++$count_bad; return } my ($hd,$bd) = $tb->content_list(); unless ($hd and $bd) { print "Unable to extract 2 rows of table for pid=$pid, url=$url\n"; print $debuginfo; ++$count_bad; return } # --- sanity check the header row unless ($hd->as_text =~ /Time .* Program/x) { print "Table header missing for pid=$pid, url=$url\n"; print $debuginfo; ++$count_bad; return } # --- remove breaks and "revert" some markup to simplify parsing $_->delete for $bd->look_down('_tag' => 'br'); $_->replace_with_content for $bd->look_down('_tag' => 'b'); $_->replace_with_content for $bd->look_down('_tag' => 'font'); # --- extract (text) content lists of the 2 row cells my @td; for ($bd->look_down('_tag' => 'table')) { push @td, grep { not ref($_) } $_->content_list for $_->look_down('_tag' => 'td'); } unless (@td > 7) { print "Warning: parsed data incomplete for pid=$pid, url=$url\n"; print map { "td[$_] = <$td[$_]>\n" } 0..$#td; print $debuginfo; ++$count_bad; return } # --- clean fields up a little s/(^\s+|\s+$)//g, s/\((.*?)\)/$1/g for @td; # --- Here's an example of the contents of @td at this point: # ("7:00 pm", "Southern Cross TEN Capital", "The All Time Greatest # Simpsons", "- Cape Feare", "30 mins , Rated: PG", "Genre:", # "Cartoon", "Sideshow Bob terrorises Bart after he is paroled # from prison.", "" ) my ($start0, $channel_hdr, $title1, $title2, $genre, $desc) = @td[0,1,2,3,6,7]; my ($duration, $rating) = (split(/\s*,\s*/, $td[4]), "", ""); my $channel = find_channel($channel_hdr); # --- is this a channel we know about? is it consistent with the guide? $channel = $chanid{lc $channel}; # -- convert to xmltv channel id unless ($channel eq $show->{channel}) { print "channel mismatch for '$title' (pid = $pid)\n"; print $debuginfo; ++$count_bad; return } # --- now clean up a few things $title2 =~ s/^\s*-\s*//; $rating =~ s/Rated:\s*//; $duration =~ s/mins/minutes/; # --- compute start and stop times my $start = ParseDate("$start0 $guidedate"); $start = DateCalc($start, "+ 1 day") # check for shows starting past if $show->{row} > 0 and # midnight... Date_Cmp($start, ParseDate("6am $guidedate")) < 0; $start = DateCalc($start, "- 1 day") # sometimes, the first item has if $show->{row} == 0 and # started at 11pm the date before! Date_Cmp($start, ParseDate("9am $guidedate")) > 0; my $stop = DateCalc($start, "+ $duration"); $show->{title} = [[$title1, $lang]]; $show->{start} = $start; $show->{stop} = $stop; $show->{'sub-title'} = [[$title2, $lang]] if $title2; $show->{desc} = [[$desc, $lang]] if $desc; $show->{category} = [[$genre, $lang]] if $genre; } # --- configure: query for region, services, and channels and write config sub configure { my $config_file = shift; my $date = dmY("today"); my $firstpage; XMLTV::Config_file::check_no_overwrite($config_file); # --- extract user's ids for channels (if there were any in # the config file), add the defaults then clear the channels hash $chanid{lc $_} = $channels->{$_} for keys %$channels; for (channel_mappings()) { my ($name, $id) = / \s* (.+?) \s* : \s* (\S+) /x or next; $chanid{lc $name} = $id unless $chanid{lc $name}; # use user's if defined } $channels = {}; $conf = {}; # --- get timezone $conf->{TZ} = ask("Please enter your timezone offset (default '+1000') :"); $conf->{TZ} = '+1000' unless $conf->{TZ} =~ /^ \s* \+\d\d\d\d \s* $/x; my @channellist; # --- now find list of services - note that this appears to be invariant, # so perhaps we should always offer the same list and skip the get? { my %servicenames = ( free => 1); $firstpage = with_progress("getting list of services", sub { get_page("$TVTODAY?channel=free") }); ++$servicenames{$1} while $firstpage =~ /channel=(\w+)/g; my @choices = sort keys %servicenames; my @flag = ask_many_boolean (0, map { "Grab listings for $_ channels" } @choices); for (0..$#choices) { next unless $flag[$_]; push @{$conf->{services}}, { name => $choices[$_] }; } # --- now loop over services; find region/regionid and list of # channels for my $service (@{$conf->{services}}) { my ($page, $base); if ($service->{name} eq 'free') { # --- get list of regions my %region; my $tree = HTML::TreeBuilder->new_from_content($firstpage); for ($tree->look_down('_tag' => 'select', 'name' => 'region')) { for ($_->look_down('_tag' => 'option')) { $region{$_->as_text()} = $_->attr('value'); } } $tree->delete(); my @choices = sort keys %region; $service->{region} = ask_choice("Select your region for free channels", $choices[0], @choices); $service->{regionid} = $region{$service->{region}}; $page = with_progress ( "getting list of channels free service in " . "$service->{region}", sub { get_guide_page($service, $date) } ); } else { # --- find regionid for service ($page, $base) = with_progress ( "getting regionid and channels for service $service->{name}", sub { get_content_base("$TVTODAY?channel=$service->{name}") } ); $service->{region} = "Australia"; ($service->{regionid}) = $base =~ /_(\d+).asp/ or die "cannot find regionid"; # page now has channel list too } # --- now append channels for this service my %skip; # --- find the channels my $tree = HTML::TreeBuilder->new_from_content($page); for ($tree->look_down('_tag' => 'table', 'class' => 'tv', 'width' => '100%') # only one table of this type ->look_down('_tag' => 'tr') # ..first row has channels ->look_down('_tag' => 'b')) { # ..in bold tags my $channel = $_->as_text; push @channellist, $channel; unless ($chanid{lc $channel}) { # check/define xmltvid my $id = lc($channel); $id =~ s/( ^\s+ | \s+$ | \W )//gx; $id .= ".$service->{name}.au"; # e.g., "foxtel.au", "free.au" print "Warning, unknown channel '$channel', using '$id' as id\n"; $chanid{lc $channel} = $id; } } $tree->delete(); } } my @select = ask_many_boolean (1, map { "subscribe to channel $_ -> $chanid{lc $_}" } @channellist); for (0..$#channellist) { next unless $select[$_]; my $name = $channellist[$_]; $channels->{$name} = $chanid{lc $name}; } my @channels = sort keys %$channels; # --- does the user want the slow option turned on by default? $conf->{slow} = ask_boolean("Show descriptions, ratings, genres and more accurate\n" . "time information is available by downloading individual\n" . "pages for each show, but this takes a lot longer\n\n" . "Do you want this (--slow) option to be on by default?"); # --- report configuration and ask for confirmation my $channel_count = @channels; my $services_info; for my $service (@{$conf->{services}}) { $services_info .= "service: name=$service->{name}, " . " region=$service->{region} (id=$service->{regionid})\n"; } die "aborting configuration" unless ask_boolean( "Please confirm the following configuration:\n" . " TZ = $conf->{TZ}\n" . " $services_info\n" . " ($channel_count subscribed channels)\n\n" . "[ use the '--list-channels' option for the\n" . " xmltvids to use in mythtvsetup ]\n\n" . " Continue?\n"); # --- open config file and write the configuration -d dirname($config_file) or mkdir dirname($config_file) or die "cannot create directory for $config_file: $!"; # --- dump as perl code using Data::Dumper open(CONF, ">$config_file") or die "cannot write to $config_file: $!"; print CONF Data::Dumper->Dump([$conf, $channels, $duplicate], ["conf", "channels", "duplicate"]); close CONF; print "wrote config_file: $config_file\n"; } # --------------------------------------------------------------------------- # --- we can avoid redirections by spoofing random MSIDs in the URLs use Digest::MD5 qw{md5_hex}; sub MSID { $spoofMSID ? "&MSID=" . md5_hex(rand) : "" } # --------------------------------------------------------------------------- # --- get NMSN guide for given date (perversely "ddmmYYYY") sub get_guide_page { my $service = shift; my $date = shift; my $url = "$NMSN/guide/${date}_$service->{regionid}.asp?" . "channel=$service->{name}" . MSID(); print "GET_GUIDE_PAGE($url)\n" if $debug; my $page = get_page($url) or print "Warning: Failed to get program listing for day $date\n"; ++$count_guide if $page; return $page; } # --------------------------------------------------------------------------- # --- get NMSN program details for given pid sub get_details_page { my $url = shift() . MSID(); print "GET_DETAILS_PAGE($url)\n" if $debug; my $page = get_page($url) or print "Warning: Failed to get program details from $url\n"; ++$count_detail; return $page; } # --------------------------------------------------------------------------- # --- get a page sub get_page { my $url = shift; my $page = (get_content_base($url))[0]; return $page; } # --------------------------------------------------------------------------- # get a page and its base (and report all redirections if debugging) # we attempt 5 times with 3 second pauses between failures. my $ua; sub get_content_base { my $url = shift; unless ($ua) { $ua = LWP::UserAgent->new('timeout' => 30); $ua->env_proxy; } my $response; for (1..5) { $response = $ua->get($url); last if $response->is_success; sleep 3; } unless ($response->is_success) { print "Warning: failed to read page $url in 5 attempts\n"; return undef; } if ($debug and (my $r = $response)->previous) { # track redirections print "GET_CONTENT_BASE redirection backtrace:\n"; while ($r) { print " ", $r->base, "\n"; $r = $r->previous } } my $page = $response->content; if ($page) { $page =~ s/ / /g; $page =~ s/jz\('(.*?)'\);/dejz($1)/ge; # "decrypt" data $page =~ s!!!g; # strip these useless tags } return $page, $response->base; } sub dejz {unescape(caesar(unescape(shift))) } sub caesar { local $_ = shift; s/(.)/chr(ord($1)-1)/ge; $_ } sub unescape { local $_ = shift; s/%(..)/chr(hex($1))/ge; $_ } # --------------------------------------------------------------------------- # show a progress message during call to code (given by closure) sub with_progress { my ($message, $sub) = @_; my $bar = new XMLTV::ProgressBar($message, 1); my @results = $sub->(); $bar->update, short_pause(), $bar->finish; return wantarray ? @results : $results[0]; } sub short_pause { select(undef, undef, undef, 0.33) } # --------------------------------------------------------------------------- # misc/pedantic stuff... sub title { (shift)->{title}->[0]->[0] } sub Ymd { UnixDate($_[0], "%Y%m%d") or die "problem in Ymd($_[0])" } sub dmY { UnixDate($_[0], "%d%m%Y") or die "problem in dmY($_[0])" } sub fixplural { # hardly seems worth it sometimes... but, standards... local $_ = shift; s/(\d+) (\s+) (\w+)s (\s)/$1 . $2 . $3 . ($1==1?"":"s") . $4/xe; $_ } # --------------------------------------------------------------------------- # here is the default channel list... comments welcome :-) sub channel_mappings { return grep ! /^#/, split "\n", qq{ # --- Free channels ABC NSW : nsw.abc.gov.au ABC QLD : qld.abc.gov.au ABC TAS : tas.abc.gov.au ABC ACT : act.abc.gov.au ABC2 : abc2.abc.gov.au Channel Seven Sydney : sydney.seven.com.au Channel Seven Queensland : queensland.seven.com.au Prime Southern : southern.prime.com.au CHANNEL SEVEN BRISBANE : brisbane.seven.com.au SBS Sydney : sydney.sbs.com.au SBS Queensland : queensland.sbs.com.au SBS News : news.sbs.com.au SBS EASTERN : eastern.sbs.com.au Network TEN Sydney : sydney.ten.com.au NETWORK TEN BRISBANE : brisbane.ten.com.au Southern Cross TEN Capital : capital.southerncrossten.com.au Southern Cross TEN Queensland : queensland.southerncrossten.com.au Channel Nine Sydney : sydney.nine.com.au CHANNEL NINE BRISBANE METRO : brisbane.nine.com.au WIN Television NSW : nsw.win.com.au WIN Television QLD : qld.win.com.au # --- Foxtel Arena TV : arena.foxtel.com.au BBC World : bbcworld.foxtel.com.au Cartoon Network : cartoon.foxtel.com.au Channel [V] : v.foxtel.com.au CNBC : cnbc.foxtel.com.au CNN : cnn.foxtel.com.au Discovery Channel : discovery.foxtel.com.au FOX News : foxnews.foxtel.com.au FOX8 : fox8.foxtel.com.au MAX : max.foxtel.com.au National Geographic Channel : natgeo.foxtel.com.au Nickelodeon : nickelodeon.foxtel.com.au Showtime : showtime.foxtel.com.au Showtime 2 : showtime2.foxtel.com.au Sky News : skynews.foxtel.com.au TV1 : tv1.foxtel.com.au UKTV : uktv.foxtel.com.au World Movies : worldmovies.foxtel.com.au A1 : a1.foxtel.com.au ACC : acc.foxtel.com.au ADULTS ONLY : adultsonly.foxtel.com.au ANIMAL PLANET : animalplanet.foxtel.com.au ANTENNA PACIFIC : antennapacific.foxtel.com.au ARENA+2 : arena2.foxtel.com.au AURORA : aurora.foxtel.com.au BLOOMBERG : bloomberg.foxtel.com.au BOOMERANG : boomerang.foxtel.com.au CLUB [V] : clubv.foxtel.com.au CMC : cmc.foxtel.com.au CRIME & INVESTIGATION NETWORK : crime.foxtel.com.au DISCOVERY HEALTH : health.discovery.foxtel.com.au DISCOVERY SCIENCE : science.discovery.foxtel.com.au DISCOVERY TRAVEL & ADVENTURE : travel.discovery.foxtel.com.au DISNEY CHANNEL : disney.foxtel.com.au E! : e.foxtel.com.au ESPN : espn.foxtel.com.au EUROSPORT NEWS : eurosportnews.foxtel.com.au FOOD : food.foxtel.com.au FOX CLASSICS : classics.foxtel.com.au FOX CLASSICS+2 : classics2.foxtel.com.au FOX SPORTS 1 : sports1.foxtel.com.au FOX SPORTS 2 : sports2.foxtel.com.au FOX8+2 : fox82.foxtel.com.au FTV : ftv.foxtel.com.au FUEL : fuel.foxtel.com.au HALLMARK CHANNEL : hallmark.foxtel.com.au HOW TO : howto.foxtel.com.au MAIN EVENT : mainevent.foxtel.com.au MOVIE EXTRA : movieextra.foxtel.com.au MOVIE GREATS : moviegreats.foxtel.com.au MOVIE ONE : movieone.foxtel.com.au MOVIE ONE TAKE 2 : movieonetake2.foxtel.com.au MTV : mtv.foxtel.com.au NICK JNR : nickjnr.foxtel.com.au OVATION : ovation.foxtel.com.au RAI INTERNATIONAL : rai.foxtel.com.au SHOWTIME GREATS : showtimegreats.foxtel.com.au SKY RACING : skyracing.foxtel.com.au TCM : tcm.foxtel.com.au THE BIOGRAPHY CHANNEL : biography.foxtel.com.au THE COMEDY CHANNEL : comedy.foxtel.com.au THE COMEDY CHANNEL+2 : comedy2.foxtel.com.au THE HISTORY CHANNEL : history.foxtel.com.au THE LIFESTYLE CHANNEL : lifestyle.foxtel.com.au THE LIFESTYLE CHANNEL+2 : lifestyle2.foxtel.com.au THE WEATHER CHANNEL : weather.foxtel.com.au TV1+2 : tv12.foxtel.com.au TVSN : tvsn.foxtel.com.au UKTV+2 : uktv2.foxtel.com.au VH1 : vh1.foxtel.com.au W : w.foxtel.com.au }; } # Additional documentation: # # Downloading of details pages can be controlled by the --slow option # (possibly set by default during configuration) and regular expressions # contained in the following two files (one regexp per line): # # ~/.tv_grab_au-detailed contains a list of regular expressions matching # "chanid:titles" of shows for which we want details # e.g.: Doctor Who # Simpsons # abc.gov.au:.*Chef # # ~/.tv_grab_au-undetailed regular expressions matching "chanid:titles" to # skip details downloads # e.g.: News # news.sbs.com.au # # NB: a match against the detailed list overrides any undetailed matches. # When changing these files, use the --ignore-cache option to ensure # all new details are downloaded.