#!/usr/bin/perl # This script is a bastardized merging of myth.find_orphans and myth.rebuilddatabase. # No one in their right mind should use it until it's been vetted by someone else. # Comments for the two original files as follows, modified where they refer to something # that's been cut out. # ## unlike the original version, this version relies on the config or UPnP to connect. ## ###################################################################################### # check for recording anomolies - # based somewhat on greg froese's "myth.rebuilddatabase.pl" # -- Lincoln Dale , September 2006 # 2007-03-11: Added pretty print of unknown files vs. orphaned thumbnails. (Robert Kulagowski) # The intent of this script is to be able to find orphaned rows in the 'recorded' table # (entries which don't have matching media files) and orphaned media files (potentially # taking up gigabytes of otherwise usable disk space) which have no matching row in # the 'recorded' db table. # # By default, running the script will simply return a list of problems it finds. # Running with --dodbdelete will remove db recorded rows for which there is no matching # media file. Running with --dodelete will delete media files for which there is no # matching db record. # # This script may be useful to fix up some orphaned db entries (causes mythweb to run # verrry slow) as well as reclaim some disk space from some orphaned media files. # (in an ideal world, neither of these would ever happen, but i've seen both happen in reality). # This script makes it easy to keep track of whether it has or hasn't happened, even if you # have thousands of recordings and terabytes of stored media. # # no warranties expressed or implied. if you run this and it deletes all your recordings # and sets mythtv to fill up all your disk space with The Home Shopping Network, its entirely ## written by greg froese (g_froese@yahoo.com) ## install instructions by Robert Kulagowski (rkulagow@rocketmail.com) ## ## I had trouble maintaining my catalog of recordings when upgrading to ## cvs and from cvs to more recent cvs, so I wrote this. ## ## ## Here is what this program is supposed to do. ## ## It first scans through your myth database and displays all shows listed ## in the recorded table. ## ## It will then traverse the specified MythTV recordings directory ## set with --dir /YOURMYTHDIR) and find all files with ## video extensions (set with --ext) and check if they appear in the ## database. If no entry exists you will be prompted for identifying ## information and a recording entry will be created. ## ## See the help message below for options. ## ## Use at your own risk. Standard gnu warranty, or lack therof, ## applies. ## To run: ## Ensure that the script is executable ## chmod a+x myth.rebuilddatabase.pl ## ./myth.rebuilddatabase.pl ## Change log: ## 9-19-2003: (awithers@anduin.com) ## Anduin fights the urge to make code more readable (aka C like). Battle ## of urges ends in stalemate: code was reindented but not "changed" (much). ## To make it a little less useless a contribution also did: ## - added ability to grab title/subtitle/description from oldrecorded ## - support for multiple backends (via separation of host and dbhost ## and bothering to insert the host in the recorded table). ## - removed dependency on File::Find::Rule stuff ## - attempt to determine good default host name ## - provide default for --dir from DB (if not provided) ## - added --test_mode (for debugging, does everything except INSERT) ## - added --quick_run for those occasions where you just don't have ## the sort of time to be sitting around hitting enter ## - changed all the DB calls to use parameters (avoids escape issues, ## and it looks better) my $progname = "myth.filemaintenance.pl"; my $revision = "0.20"; use DBI; use Getopt::Long; use Sys::Hostname; use File::Basename; use Date::Parse; use Time::Format qw(time_format); use DBD::mysql; use MythTV; use Data::Dumper; use File::stat; use Time::localtime; # # options # my $opt_host = hostname; my $opt_ext = "{nuv,mpg,mpeg,avi}"; my $opt_dir = ""; my $opt_dodelete = 0; my $opt_dodbdelete = 0; my $debug = 0; my $opt_help = 0; my $opt_single = 1; my $show_existing = 0; my $opt_test = 0; my $quick_run = 0; my $opt_doadd = 0; my @answers; my $norename = 0; my $storagegroup = "Default"; my $date_regx = qr/(\d\d\d\d)(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)/; my $db_date_regx = qr/(\d\d\d\d)-(\d\d)-(\d\d) (\d\d):(\d\d):(\d\d)/; my $channel_regx = qr/(\d\d\d\d)/; GetOptions( 'host=s' => \$opt_host, 'dir=s' => \$opt_dir, 'dodelete' => \$opt_dodelete, 'dodbdelete' => \$opt_dodbdelete, 'dodbadd' => \$opt_doadd, 'debug+' => \$debug, 'help' => \$opt_help, 'h' => \$opt_help, 'v' => \$opt_help, 's' => \$opt_single, 'verbose+'=>\$verbose, 'group=s'=>\$storagegroup, 'show_existing|se'=>\$show_existing, 'quick_run|qr'=>\$quick_run, 'test_mode|t|tm'=>\$opt_test, 'ext=s'=>\$ext, 'file=s'=>\$file, 'answer=s'=>\@answers, # =s{,} would be nice but isn't implemented widely 'norename'=>\$norename ); if ($opt_help) { print< 0}); # Connect to the database my $dbh = $myth->{'dbh'}; # my $valid_recordings = 0; my $missing_recordings = 0; my $errors = 0; my $unknown_files = 0; my $known_files = 0; my $unknown_size = 0; my $known_size = 0; my $unknown_thumbnail = 0; my %seen_basename; my @storagegroups = {0}; if ($opt_dir eq "") { #nothing given, get storage groups $opt_dir = $myth->{'video_dirs'}; printf STDERR "Recording directories found on (". $myth->{'hostname'} .") :'". @$opt_dir ."'\n" if $debug; print Dumper $opt_dir if $debug; #let's not deal with dereferencing more than necessary @dirs = @$opt_dir; } else { foreach $dir (split(/,/,$opt_dir)) { $dir =~ s/\/$//g; # strip trailing / push(@dirs, $dir); } } # # look in recorded table, make sure we can find every file .. # # If you use multiple backends recording to the same storage groups, you can get false positives if( $opt_single ) { my $q = "SELECT title, subtitle, starttime, endtime, chanid, basename FROM recorded ORDER BY starttime"; $sth = $dbh->prepare($q); $sth->execute() or die "Could not execute ($q): $!\n"; } # if you use multiple backends recording to DIFFERENT storage groups, you may get false negatives else { my $q = "SELECT title, subtitle, starttime, endtime, chanid, basename FROM recorded WHERE hostname=(?) ORDER BY starttime"; $sth = $dbh->prepare($q); $sth->execute($opt_host) or die "Could not execute ($q): $!\n"; } while (my @row=$sth->fetchrow_array) { ($title, $subtitle, $starttime, $endtime, $channel, $basename) = @row; # see if we can find it... $loc = find_file($basename); if ($loc eq "") { printf "Missing media: %s (title:%s, start:%s)\n",$basename,$title,$starttime; $missing_recordings++; if ($opt_dodbdelete) { my $sql = sprintf "DELETE FROM recorded WHERE basename LIKE \"%s\" LIMIT 1",$basename; printf "performing database delete: %s\n",$sql; $dbh->do($sql) || die "Could not execute $sql: $!\n"; } } else { $valid_recordings++; $seen_basename{$basename}++; $seen_basename{$basename.".png"}++; # thumbnail } } if($debug) { printf STDERR "###########################################\n"; printf STDERR "Recordings found: $valid_recordings\n"; printf STDERR "###########################################\n"; } # # look in recording directories, see if there are extra files not in database # foreach my $this_dir (@dirs) { opendir(DIR, $this_dir) || die "cannot open directory $this_dir: $!\n"; foreach $this_file (readdir(DIR)) { if (-f "$this_dir/$this_file") { next if ($this_file eq "nfslockfile.lock"); my $this_filesize = -s "$this_dir/$this_file"; if ($seen_basename{$this_file} == 0) { $sorted_filesizes{$this_filesize} .= sprintf "unknown file [%s]: %s/%s \tcreated %s\n",pretty_filesize($this_filesize),$this_dir,$this_file,ctime(stat("$this_dir/$this_file")->ctime); $unknown_size += $this_filesize; if (substr($this_file,-4) eq ".png") { $unknown_thumbnail++; } else { $unknown_files++; $unknown_file_list{$this_file} .= sprintf "unknown file [%s]: %s/%s \tcreated %s\n",pretty_filesize($this_filesize),$this_dir,$this_file,ctime(stat("$this_dir/$this_file")->ctime); } if ($opt_dodelete) { printf STDERR "deleting [%s]: %s/%s\n",pretty_filesize($this_filesize),$this_dir,$this_file; unlink "$this_dir/$this_file"; if (-f "$this_dir/$this_file") { $errors++; printf "ERROR: could not delete $this_dir/$this_file\n"; } } } else { $known_files++; $known_size += $this_filesize; printf "KNOWN file [%s]: %s/%s\n",pretty_filesize($this_filesize),$this_dir,$this_file if $debug; } } else { printf "NOT A FILE: %s/%s\n",$this_dir,$this_file if $debug; } } closedir DIR; } # # finished, report results # printf "########################################################\n"; printf "Unknown files:\n"; foreach my $key (sort { $a <=> $b } keys %sorted_filesizes) { printf $sorted_filesizes{$key}; } printf "########################################################\n"; if( $opt_doadd ) { foreach my $file (keys %unknown_file_list) { if(AddFile($file) && !$opt_test){ $unknown_files--; } } } printf "Summary:\n"; printf " Host: %s, Directories: %s\n", $opt_host, join(" ",keys %dirs); printf " %d ERRORS ENCOUNTERED (see above for details)\n",$errors if ($errors > 0); printf " %d valid recording%s, %d missing recording%s %s\n", $valid_recordings, ($valid_recordings != 1 ? "s" : ""), $missing_recordings, ($missing_recordings != 1 ? "s" : ""), ($missing_recordings > 0 ? ($opt_dodbdelete ? "were fixed" : "not fixed, check above is valid and use --dodbdelete to fix") : ""); printf " %d known media files using %s\n %d orphaned thumbnails with no corresponding recording\n %d unknown files using %s %s\n", $known_files, pretty_filesize($known_size), $unknown_thumbnail,$unknown_files, pretty_filesize($unknown_size), ($unknown_files > 0 ? ($opt_dodelete ? "were fixed" : "not fixed, check above and use --dodelete or --dodbadd to clean up if the above output is accurate") : ""); exit(0); ########################################################################### # filesize bling sub pretty_filesize { local($fsize) = @_; return sprintf "%0.1fGB",($fsize / 1000000000) if ($fsize >= 1000000000); return sprintf "%0.1fMB",($fsize / 1000000) if ($fsize >= 1000000); return sprintf "%0.1fKB",($fsize / 1000) if ($fsize >= 1000); return sprintf "%0.0fB",$fsize; } ########################################################################### # find a file in directories without globbing sub find_file { #first, let's check where it SHOULD be my $schemaVer = $myth->backend_setting('DBSchemaVer'); if ($schemaVer >= 1171) { my $sg = new MythTV::StorageGroup(); $dir = $sg->FindRecordingDir($basename); printf STDERR "Finding ('%s') by storage group\n", $basename if($debug); return $dir if ($dir ne ""); } #if no luck there, check the list of where it MIGHT be local($fname) = @_; printf STDERR "Finding ('%s') by directory search\n", $basename if($debug); foreach my $d (@dirs) { my $f = $d."/".$fname; if (-e $f) { return $f; } } #no dice return; } ########################################################################### ########################################################################### # Prompts user for a response and optionally provides a default value sub GetAnswer { my ($prompt, $default) = @_; print $prompt; if ($default) { print " [", $default, "]"; } print ": "; my $answer; if ($#answers >= 0) { $answer = shift @answers; print $answer, "\n"; } else { chomp($answer = ); $answer = $default if !$answer; } return $answer; } ########################################################################### # there's a version of this in CPAN but I don't want to add another dependancy sub EscapeFilename { my $fn = $_[0]; # escape everything that's possibly dangerous $fn =~ s{([^[:alnum:]])}{\\\1}g; # it's embarassing to escape / and . so put those back $fn =~ s{\\([/.])}{\1}g; return $fn; } my ($starttime, $endtime, $title, $subtitle, $channel, $description, $recgroup); my ($syear, $smonth, $sday, $shour, $sminute, $ssecond, $eyear, $emonth, $eday, $ehour, $eminute, $esecond); ########################################################################### # Adds a filename to the mythtv database sub AddFile(){ my $q = ""; my $sth; my($show) = @_; print $unknown_file_list{$show}; return 0 unless GetAnswer("Do you want to import?", "y") eq "y"; # normal case: import file into the database my ($channel, $syear, $smonth, $sday, $shour, $sminute, $ssecond, $eyear, $emonth, $eday, $ehour, $eminute, $esecond); my ($starttime, $duration, $endtime); my ($mythfile); # filename varies depending on when the recording was # created. Gleam as much as possible from the name. if ($show =~ m/$channel_regx\_/) { $channel = $1; } else { $channel = $dbh->selectrow_array("select min(chanid) from channel"); } if ($show =~ m/$channel_regx\_$date_regx\./) { ($syear, $smonth, $sday, $shour, $sminute, $ssecond) = ($2, $3, $4, $5, $6, $7); } if ($show =~ m/$channel_regx\_$date_regx\_$date_regx/) { ($syear, $smonth, $sday, $shour, $sminute, $ssecond) = ($2, $3, $4, $5, $6, $7); ($eyear, $emonth, $eday, $ehour, $eminute, $esecond) = ($8, $9, $10, $11, $12, $13); } my $guess_title = $show; $guess_title =~ s/[.][^\.]*$//; $guess_title =~ s/_/ /g; my $guess_subtitle = ""; my $guess_description = "Recovered file " . $show; # have enough to look for an past recording? if ($ssecond) { print "Checking for a recording...\n"; $starttime = "$syear$smonth$sday$shour$sminute$ssecond"; my $guess = "select title, subtitle, description from oldrecorded where chanid=(?) and starttime=(?)"; $sth = $dbh->prepare($guess); $sth->execute($channel, $starttime) or die "Could not execute ($guess)\n"; if (my @row = $sth->fetchrow_array) { $guess_title = $row[0]; $guess_subtitle = $row[1]; $guess_description = $row[2]; } print "Found an orphaned file, initializing database record\n"; print "Channel: $channel\n"; print "Start time: $smonth/$sday/$syear - $shour:$sminute:$ssecond\n"; print "End time: $emonth/$eday/$eyear - $ehour:$eminute:$esecond\n"; } # what about checking for guide data? if($guess_description =~ /^Recovered file/) { print "Checking for guide data...\n"; my $guess = "select title, subtitle, description from program where " . "chanid='$channel' and " . "starttime='$syear-$smonth-$sday $shour:$sminute:$ssecond'"; $sth = $dbh->prepare($guess); $sth->execute() or die "Could not execute ($guess)\n"; if (my @row = $sth->fetchrow_array) { $guess_title = $row[0]; $guess_subtitle = $row[1]; $guess_description = $row[2]; print "Using guide data informaton for defaults\n"; } } my $newtitle = $guess_title; my $newsubtitle = $guess_subtitle; my $newdescription = $guess_description; if (!$starttime) { # use file time if we can't infer time from name $starttime = time_format("yyyy-mm{on}-dd hh:mm{in}:ss", (stat($show))[9]); } if ($quick_run) { print("QuickRun defaults:\n"); print(" title: '" . ($newtitle ne "")?$newtitle:"Recovered". "'\n"); print(" subtitle: '". ($newtitle ne "")? $newsubtitle: "(Recovered File $show)" ."'\n"); print(" description: '$newdescription'\n"); $recgroup = "Default"; } else { $channel = GetAnswer("Enter channel", $channel); $newtitle = GetAnswer("... title", ($newtitle ne "")? $newtitle:"Recovered"); $newsubtitle = GetAnswer("... subtitle", ($newsubtitle ne "")?$newsubtitle: "(Recovered File $show)"); $newdescription = GetAnswer("Description", $newdescription); $starttime = GetAnswer("... start time (YYYY-MM-DD HH:MM:SS)", $starttime); $recgroup = GetAnswer("... Recording Group", "Default"); } if ($endtime) { $duration = (str2time($endtime) - str2time($starttime)) / 60; } else { $duration = "60"; } $duration = GetAnswer("... duration (in minutes)", $duration); $endtime = time_format("yyyy-mm{on}-dd hh:mm{in}:ss", str2time($starttime) + $duration * 60); if ($norename) { $mythfile = $show; } else { my ($ext) = $show =~ /([^\.]*)$/; my $time1 = $starttime; $time1 =~ s/[ \-:]//g; $mythfile = sprintf("%s_%s.%s", $channel, $time1, $ext); } my $sql = "insert into recorded (chanid, starttime, endtime, title, subtitle, description, hostname, basename, progstart, progend, storagegroup, recgroup) values ((?), (?), (?), (?), (?), (?), (?), (?), (?), (?), (?), (?))"; if ($opt_test) { $sql =~ s/\(\?\)/"%s"/g; my $statement = sprintf($sql, $channel, $starttime, $endtime, $newtitle, $newsubtitle, $newdescription, $opt_host, $mythfile, $starttime, $endtime, $storagegroup, $recgroup); print("Test mode: insert would have been been:\n"); print($statement, ";\n"); } else { $sth = $dbh->prepare($sql); $sth->execute($channel, $starttime, $endtime, $newtitle, $newsubtitle, $newdescription, $opt_host, $mythfile, $starttime, $endtime, $storagegroup, $recgroup) or die "Could not execute ($sql)\n"; if ($mythfile ne $show) { rename($show, $dir. "/" . $mythfile); } } print("Building a seek table should improve FF/RW and JUMP functions when watching this video\n"); if (GetAnswer("Do you want to build a seek table for this file?", "y") eq "y") { # mythcommflag takes --file for myth-originated files and # --video for everything else. We assume it came from myth # if it's a .nuv or if it's an mpeg where the name has that # chanid_startime format my $commflag = "mythcommflag --rebuild " . ($show =~ /[.]nuv$/ || ($show =~ /[.]mpg$/ && $ssecond) ? "--file" : "--video") . " " . EscapeFilename($dir . "/" . $mythfile); if (!$opt_test) { system($commflag); print "\n"; # cursor isn't always on a new line after commflagging } else { print("Test mode: exec would have done\n"); print(" Exec: '", $commflag, "'\n"); } } return 1; } ## foreach loop