#!/usr/bin/perl

=head1 NAME

tv_grab_fr - Grab TV listings for France.

=head1 SYNOPSIS

To configure: tv_grab_fr --configure [--config-file FILE] [--gui OPTION]
To grab listings: tv_grab_fr [--output FILE] [--quiet]
Slower, detailed grab: tv_grab_fr --slow [--output FILE] [--days N] [--offset N] [--quiet]
To show capabilities: $0 --capabilities
To show version: $0 --version
Help: tv_grab_fr --help

=head1 DESCRIPTION

Output TV listings for several channels available in France (Hertzian,
Cable/satellite, Canal+ Sat, TPS).  The data comes from
telepoche.guidetele.com.  The default is to grab as many days as possible
from the current day onwards. By default the program description are
not downloaded, so if you want description and ratings, you should
active the --slow option.

B<--configure> Grab channels informations from the website and ask for
channel type and names.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.

B<--output FILE> Write to FILE rather than standard output.

B<--days N> Grab N days starting from today, rather than as many as
possible. Due to the website organization, the speed is exactly the
same, whatever the number of days is until you activate the --slow
option. If --slow is not specified, --days will thus only filter the
data without speeding up the grabber.

B<--offset N> Start grabbing N days from today, rather than starting
today.  N may be negative. Due to the website organization, N cannot
be inferior to -1.  As with --days, this is only useful for limiting
downloads in --slow mode.

B<--slow> Get additional information from the website, like program
description, reviews and credits.

B<--quiet> Suppress the progress messages normally written to standard
error.

B<--capabilities> Show which capabilities the grabber supports. For more
information, see L<http://membled.com/twiki/bin/view/Main/XmltvCapabilities>

B<--version> Show the version of the grabber.

B<--help> Print a help message and exit.

=head1 SEE ALSO

L<xmltv(5)>

=head1 AUTHOR

Sylvain Fabre, centraladmin@lahiette.com
with some patches from :
  - Francois Gouget, fgouget@free.fr
  - Niel Markwick, nielm@bigfoot.com

=cut

# TODO / FIXME
# - perhaps we should internationalize messages and docs?
# - try to detect the language based on the country so we can make use of the
#   set the VO, VF and original title markers
# - due to the layout of the grid program data, a given show can appear more
#   than once, first without an icon and then with the icon. But in such a
#   case we miss the icon
# - fix the character encoding handling. Then we can remove the no warning
#   below
# - detect when the website returns an incomplete page. See the thread at:
#   http://sourceforge.net/mailarchive/message.php?msg_id=15128312
# - detect and workaround the website returning the wrong date for pages
#   generated between midnight and 6am
# - detect and workaround the website cache that keeps returning stale pages
# - extend Get_nice so we don't have to do our own downloads. See the thread
#   at:
#   http://sourceforge.net/mailarchive/message.php?msg_id=15220040
# - don't leave temporary files hanging around in /tmp. In fact don't use
#   temporary files at all
# - fix our Memoize usage so the generated cache file can be reused from one
#   day to the next
# - investigate using the per-day pages which would be more efficient for
#   --days 1 to 5
# - investigate how to better handle France 5 and Arte as they share a single
#   channel for over-the-air broadcasts, but each have their own channel on
#   cable, satellite and ADSL. See the thread at:
#   http://sourceforge.net/mailarchive/message.php?msg_id=15181920

use XMLTV::Usage <<END
$0: get French television listings in XMLTV format
To configure: tv_grab_fr --configure [--config-file FILE]
To grab listings: tv_grab_fr [--output FILE] [--quiet]
Slower, detailed grab: tv_grab_fr --slow [--output FILE] [--days N] [--offset N] [--quiet]
END
  ;

use warnings;
use strict;
use utf8;
use XMLTV::Version '$Id: tv_grab_fr,v 1.62 2006/05/19 15:12:02 fgouget Exp $ ';
use XMLTV::Capabilities qw/baseline manualconfig cache/;
use XMLTV::Description 'France';
use Getopt::Long;
use HTML::Entities; # parse entities
use IO::File;
use URI;
use Date::Manip;
use XMLTV;
use XMLTV::Ask;
use XMLTV::ProgressBar;
use XMLTV::Mode;
use XMLTV::Config_file;
use XMLTV::DST;
use LWP;
use XMLTV::Get_nice;
use XMLTV::Memoize;
use File::Temp;
use LWP::Simple;

#***************************************************************************
# Main declarations
#***************************************************************************
my $GRID_BY_CHANNEL = 'http://telepoche.guidetele.com/gtv/semaine?openagent&d=0&c=';
my $GRID_FOR_CHANNEL = 'http://telepoche.guidetele.com/grille/EMWD-6KWLUE/';
my $SHEET_URL = "http://telepoche.guidetele.com/fiche/emi_";
my $ROOT_URL  = "http://telepoche.guidetele.com";
my $LANG = "fr";
my $MAX_STARS = 4;
my $VERSION   = "260501-02";

# FIXME: Temporary avoid XML warnings (to be investigated)
no warnings;

# Grid id defined by the website according to channel types (needed to build the URL)
my %GridType = (  "HERTZIENNE" => "EMWD-66DGBM",
                  "TNT"        => "EMWD-6B2HZ3",
                  "CABLE/SAT"  => "EMWD-66DGCT",
                  "TPS"        => "EMWD-66DJQG",
                  "CANAL SAT"  => "EMWD-66DJEA",
                  "FREEBOX"    => "EMWD-66DJXL",
                  "ETRANGERES" => "EMWD-66DJAL" );

# Slot of hours according to the website (needed to build the URL)
my @offsets = (2, 3, 4, 5, 6, 7);

#***************************************************************************
# Global variables allocation according to options
#***************************************************************************
XMLTV::Memoize::check_argv('XMLTV::Get_nice::get_nice_aux');
my ($opt_days,  $opt_help,  $opt_output,  $opt_offset,  $opt_gui, $opt_quiet,  $opt_list_channels, $opt_config_file, $opt_configure, $opt_slow);
$opt_quiet  = 0;
# The website is able to store up to nine days from now
my $default_opt_days = 9;
$opt_output = '-'; # standard output
GetOptions('days=i'    => \$opt_days,
     'help'      => \$opt_help,
     'output=s'  => \$opt_output,
     'offset=i'  => \$opt_offset,
     'quiet'     => \$opt_quiet,
     'configure' => \$opt_configure,
     'config-file=s' => \$opt_config_file,
     'gui:s'     => \$opt_gui,
     'list-channels' => \$opt_list_channels,
     'slow' => \$opt_slow
    )
  or usage(0);

#***************************************************************************
# Options processing, warnings, checks and default parameters
#***************************************************************************
die 'Number of days must not be negative'  if (defined $opt_days && $opt_days < 0);
die 'Cannot get more than one day before current day' if (defined $opt_offset && $opt_offset < -1);
usage(1) if $opt_help;

XMLTV::Ask::init($opt_gui);

# The options can be used, but we default them if not set.
$opt_offset = 0 if not defined $opt_offset;
$opt_days = $default_opt_days if not defined $opt_days;

if ( (($opt_offset + $opt_days) > $default_opt_days) or ($opt_offset > $default_opt_days) ) {
    $opt_days = $default_opt_days - $opt_offset;
    if ($opt_days < 0) {
        $opt_offset = 0;
        $opt_days = $default_opt_days;
    }
    say <<END
The website does not handle more than $default_opt_days days.
So the grabber is now configure with --offset $opt_offset --days $opt_days
END
;
}

#***************************************************************************
# Last init before doing real work
#***************************************************************************
my %results;
my $lastdaysoffset = $opt_offset + $opt_days - 1;

# Now detects if we are in configure mode
my $mode = XMLTV::Mode::mode('grab', # default
                        $opt_configure => 'configure',
                        $opt_list_channels => 'list-channels');

# File that stores which channels to download.
my $config_file = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_fr', $opt_quiet);

#***************************************************************************
# Sub sections
#***************************************************************************
sub get_channels( $ );
sub process_channel_grid_page( $$$$ );
sub get_detailed_fic( $$$ );
sub debug_print( @ );

# Set this to 1 of you debug strings
my $DEBUG_FR = 0;
# Internal debug functions
sub debug_print( @ ) {
  if ($DEBUG_FR) { print @_; }
}
  
sub xmlencoding {
    # encode for xml
    $_[0] =~ s/</&lt;/g;
    $_[0] =~ s/>/&gt;/g;
    $_[0] =~ s/&/\%26/g;
    return $_[0];
}

sub tidy {    
    for (my $s = shift) {
      s/&#8212;/--/g;
      s/&#8230;/.../g;
      s/\x{2019}/\'/g;
      decode_entities $_;
      tr/\207\211\200\224/\347\311\055\055/; # bad characters
      tr/\205//d;
      tr/\222/''/;
      s/\234/oe/g;
      s/&#8722;/ /g;

      # Remove nasty caracters, thanks to nielm
      s/&ldquo;|&rdquo;|&\#8219;|&\#8220;|&\#x201[89];/&quot/g;
      s/&lsquo;|&rsquo;|&\#8216;|&\#8217;|&\#8218;|&\#x201[cdCD];/\'/g;
      s/&\#8230;|&\#x202[4-7];/.../g;
      s/&\#821[0123];|&\#x201[2-5];/-/g;
      s/&OElig;/OE/g;
      s/&oelig;/oe/g;
      s/&\#x0027/\'/g;
      s/(&\#[0-9]{4,};)//g;
      s/(&\#x[0-9a-zA-Z]{3,};)//g;
      # Not strictly a bad character but it does get in the way.
      s/&nbsp;/ /g;
      tr/\240/ /;
      tr/\t/ /;
      s/([^\012\015\040-\176\240-\377])//g;
      return $_;
    }
}

#***************************************************************************
# Configure mode
#***************************************************************************
if ($mode eq 'configure') {
    XMLTV::Config_file::check_no_overwrite($config_file);
    open(CONF, ">$config_file") or die "Cannot write to $config_file: $!";

    # Get a list of available channels, according to the grid type
    my @gts = sort keys %GridType;
    my @gtnames = map { $GridType{$_} } @gts;
    my @gtqs = map { "Get channels type : $_?" } @gts;
    my @gtwant = ask_many_boolean(1, @gtqs);

    my $bar = new XMLTV::ProgressBar('getting channel lists',
                                    scalar grep { $_ } @gtwant)
                    if not $opt_quiet;
    my %channels_for;
    foreach my $i (0 .. $#gts) {
        my ($gt, $gtw, $gtname) = ($gts[$i], $gtwant[$i], $gtnames[$i]);
        next if not $gtw;
        my %channels = get_channels( $gtname );
        die 'No channels could be found' if not %channels;
        $channels_for{$gt} = \%channels;
        update $bar if not $opt_quiet;
    }
    $bar->finish() if not $opt_quiet;

    my %asked;
    foreach (@gts) {
        my $gtw = shift @gtwant;
        my $gtname = shift @gtnames;
        if ($gtw) {
            my %channels = %{$channels_for{$_}};
            say "Channels for $_";

            # Ask about each channel (unless already asked).
            my @chs = grep { not $asked{$_}++ } sort keys %channels;
            my @names = map { $channels{$_}{name} } @chs;
            my @qs = map { "add channel $_?" } @names;
            my @want = ask_many_boolean(1, @qs);
            foreach (@chs) {
                my $w = shift @want;
                warn("cannot read input, stopping channel questions"), last if not defined $w;
                # Print a config line, but comment it out if channel not wanted.
                print CONF '#' if not $w;
                print CONF "channel $_ $channels{$_}{name};$channels{$_}{icon}\n";
            }
        }
    }
    close CONF or warn "cannot close $config_file: $!";
    say("Finished configuration.");
    exit();
}

#***************************************************************************
# Check mode checking and get configuration file
#***************************************************************************
die if $mode ne 'grab' and $mode ne 'list-channels';

my @config_lines;
if ($mode eq 'grab') {
    @config_lines = XMLTV::Config_file::read_lines($config_file);
}

#***************************************************************************
# Prepare the XMLTV writer object
#***************************************************************************
my %w_args;
if (defined $opt_output) {
    my $fh = new IO::File(">$opt_output");
    die "cannot write to $opt_output: $!" if not defined $fh;
    $w_args{OUTPUT} = $fh;
}

$w_args{encoding} = 'ISO-8859-1';

# In fast mode, the datasource contains data for all days at once,
# so we let XMLTV::Writer filter out the days that the user wanted.
if (not $opt_slow) {
  $w_args{days} = $opt_days;
  $w_args{offset} = $opt_offset;
  $w_args{cutoff} = "000000";
}

my $writer = new XMLTV::Writer(%w_args);
$writer->start
  ({ 'source-info-url'     => 'http://telepoche.guidetele.com/',
     'source-data-url'     => 'http://telepoche.guidetele.com/',
     'generator-info-name' => 'XMLTV',
     'generator-info-url'  => 'http://membled.com/work/apps/xmltv/',
   });

#***************************************************************************
# List channels only case
#***************************************************************************
if ($mode eq 'list-channels') {
    # Get a list of available channels, according to the grid type
    my @gts = sort keys %GridType;
    my @gtnames = map { $GridType{$_} } @gts;
    my @gtqs = map { "List channels for grid : $_?" } @gts;
    my @gtwant = ask_many_boolean(1, @gtqs);

    my %seen;
    foreach (@gts) {
        my $gtw = shift @gtwant;
        my $gtname = shift @gtnames;
        if ($gtw) {
            say  "Now getting grid : $_ \n";
            my %channels = get_channels( $gtname );
            die 'no channels could be found' if (scalar(keys(%channels)) == 0);
            foreach my $ch_did (sort(keys %channels)) {
                my $ch_xid = "C".$ch_did.".telepoche.com";
                $writer->write_channel({ id => $ch_xid,
                                         'display-name' => [ [ $channels{$ch_did}{name} ] ],
                                         'icon' => [{src=>$ROOT_URL.$channels{$ch_did}{icon}}] })
                unless $seen{$ch_xid}++;
            }
       }
     }
     $writer->end();
     exit();
}

#***************************************************************************
# Now the real grabbing work
#***************************************************************************
die if $mode ne 'grab';

#***************************************************************************
# Build the working list of channel name/channel id
#***************************************************************************
my (%channels, $chicon, $chid, $chname);
my $line_num = 1;
foreach (@config_lines) {
    ++ $line_num;
    next if not defined;

    # Here we store the Channel name with the ID in the config file, as the XMLTV id = Website ID
    if (/^channel:?\s+(\S+)\s+([^\#]+);([^\#]+)/) {
        $chid = $1;
        $chname = $2;
        $chicon = $3;
        $chname =~ s/\s*$//;
        $channels{$chid} = {'name'=>$chname, 'icon'=>$chicon};
    } else {
        warn "$config_file:$line_num: bad line $_\n";
    }
}

#***************************************************************************
# Now process the days by getting the main grids.
#***************************************************************************
my @to_get;
warn "No working channels configured, so no listings\n" if not %channels;
my $script_duration = time();

# The website stores channel information by hour area for a whole week !
foreach $chid (sort keys %channels) {
    $writer->write_channel({ id => "C".$chid.".telepoche.com", 'display-name' => [[$channels{$chid}{name}]], 'icon' => [{src=>$channels{$chid}{icon}}]});
    foreach (@offsets) {
        my $url = $GRID_BY_CHANNEL . "$chid&h=$_";
        push @to_get, [ $url, $chid, $_ ];
    }
}
my $bar = new XMLTV::ProgressBar('getting listings', scalar @to_get)  if not $opt_quiet;
Date_Init("TZ=UTC");

foreach (@to_get) {
    my ($url, $chid, $slot) = @$_;
    #my $th = threads->new(\&process_channel_grid_page, $writer, $chid, $url, $slot);
    #$th->join();
    process_channel_grid_page($writer, $chid, $url, $slot);
    update $bar if not $opt_quiet;
}
$writer->end();
$bar->finish() if not $opt_quiet;

# Print the duration
$script_duration = time() - $script_duration;
print STDERR "Grabber process finished in " . $script_duration . " seconds.\n" if not $opt_quiet;

#***************************************************************************
# Specific functions for grabbing information
#***************************************************************************
sub get_channels( $ ) {
  my $gridid = shift;
  my %channels;
  my $url = $GRID_FOR_CHANNEL.$gridid.'/2_6.html';
    
  # Get the current page
  my $t = get_nice_tree($url);           
    
  debug_print( "URL  : " . $url ."\n");
 
  foreach my $cellTree ( $t->look_down( "_tag", "td", "width", "55", "height", "85" ) ) {
    my $tag = $cellTree->look_down( "_tag", "a", 'href', '#' ); 
    if ($tag != undef) {
      my $chid = $tag->attr('onclick');
      if ( $chid =~ /goChaine\('(.*)','(.*)',''\);/ ) {
        $chid = $1;
        my $imgCell = $cellTree->look_down( "_tag", "img" );
        my $chicon = $imgCell->attr('src');
        my $chname = $imgCell->attr('src');
        $chname =~ s/http:\/\/static.guidetele.com\/c_img\/chaine\///;
        $chname =~ s/\.gif//;
        $chname =~ s/\.GIF//;
        debug_print "Found channel : $chid - " . $chname . "\n";
        $channels{$chid} = {'name' =>  $chname, 'icon' => $chicon };
      }
    }
  }
  $t->delete(); undef $t;
  return %channels;
}

sub process_channel_grid_page( $$$$ ) {
    my ($writer, $chid, $url, $slot) = @_;
    my ($genre, $lastshowview, $showview, $hours, $starthour, $endhour, $date, $dateindex) = 0;
    my ($title, $subgenre, $star_rating, $datecreate) = 0;
       
    # Get the current page
    my $t = get_nice_tree($url);
    debug_print("Now getting page : $url\n");
    
    # Reset some working variables
    my $day = 0; my $month = 0; my $year = 0;

    if ( my $tableTree = $t->look_down('_tag', 'table', 'width', '600', 'height', '70', 'bgcolor', '#ffffff') ) {
      # Get the list of rows of the table
      my @dateRowTab = $tableTree->content_list();
      # Now loop thru rows
      foreach my $dateRow (@dateRowTab) {
        my $daterow = $dateRow->look_down('_tag', 'td', 'width', '403');
        if ( $daterow ) {
          my $startdate = $daterow->as_text();
          $startdate =~ /^Les programmes de la semaine du (\d+)\/(\d+)\/(\d+)/;
          $day = $1; $month = $2; $year = $3;
        }
      }
    }
    
    if ( my $tableTree = $t->look_down('_tag', 'table', 'width', '655', 'class', 'prog_grille') ) {
      foreach my $myrow ( $tableTree->look_down('_tag', 'tr') ) {
        if ( my $celldate = $myrow->look_down('_tag', 'td', 'width', '55', 'height', '42' ) ) {
          my $myday = $celldate->as_text();
          $myday =~ /^(\d+)\/(\d+)/;
          $day = $1; $month = $2;
        }
        debug_print "Now parsing date : $day $month \n";
        $date = DateCalc($month."/".$day."/".$year, "+0 days");
        $dateindex = UnixDate($date, "%Y%m%d");
        # We need to limit the number of days fetched in slow mode, but in fast mode no limit is needed since
        # there is a single fetch for all days.
        if ($opt_slow) {
          next if Date_Cmp($dateindex, UnixDate(DateCalc("today", "+$opt_offset days"),"%Y%m%d")) < 0;
          next if Date_Cmp($dateindex, UnixDate(DateCalc("today", "+$lastdaysoffset days"),"%Y%m%d")) > 0;
        }

        # Each show is described in a table cell with a 'prog' class
        foreach my $show ($myrow->look_down('_tag', 'td', 'class', 'prog') ) {
          # Each cell contains zero or more links describing the show.
          # - If no information is available for the time slot, then the cell
          #   is empty (&nbsp; actually).
          # - Links with a 'fiche' class have the most details and also point
          #   to the per-show web page with the extra information.
          # - Other links contain less detailed information and no pointer to
          #   a per-show page.
          my $link=$show->look_down('_tag', 'a', 'class', 'fiche') ||
                   $show->look_down('_tag', 'a', 'onmouseover', qr/^showmenu/);
          next if (!$link);

          my $line = $link->attr('onmouseover');
          $line =~ (!m/showmenu\(([^""]+)\)/);
          $line =~ m/\'(.*)\',\'(.*)\'/;
          $title = tidy($2);
          my $mydata = $1;
          next if ( $title eq 'Fin des programmes');
          ($hours, $genre, $showview) = split (/<br>/, $mydata);
          next if ( !$hours );
          # Process the title, sometimes a showview field is shown
          $title =~ s/^\d{7,8} //;
          $title =~ s/\\//g;
          if ($title =~ s/\s*([*]+)\s*$//) {
            my $n = length $1;
            if (0 < $n and $n <= $MAX_STARS) {
              $star_rating = $n;
            } elsif ($MAX_STARS < $n) {
              warn "too many stars ($n), expected at most $MAX_STARS\n";
            } else { die }
          }
          die if $title =~ /[*]$/;
          my ($language, $subtitles_language);
          for ($title) {
            s/\s+$//;
            if (s/\s+\(VO\)$//) {
            # Version originale - language is unknown but not French.  There is no way to represent this in the DTD.
          } elsif (s/\s+\(VO sous-titr.e\)$//) {
            # Language unknown, but we know it has French subtitles.
            $subtitles_language = 'fr';
            } elsif (s/\s+\(VF\)$//) {
              # Version francaise.  The title may or may not be translated.
              $language = 'fr';
            }
          }
          # At this point, $title contains title and subtitle (if any), separated by a '-'. We will try to split off the subtitle
          # further down. Process hours, there are like HHhMM
          ($starthour, $endhour)  = split("-", $hours);
          $starthour =~ s/h//g
            or die "Cannot detect start hour from website : $starthour \n";
          $endhour   =~ s/h//g
            or die "Cannot detect end hour from website : $endhour \n";
          # Process the start/stop dates
          my $start = $dateindex.$starthour."00";
          my $stop  = $dateindex.$endhour."00";
          # Dummy site : the slot 0-4 of day n is in fact the slot 0-4 for day n+1
          if ( $slot == 7 ) {
            my $myslot = substr($starthour, 0, 2);
            die if not $start;
            $start = &UnixDate(&DateCalc($start, "+1 day"), "%Y%m%d%H%M%S")
              if ($myslot >= 0 && $myslot < 4);
            die 'could not add one day to start time' if not $start;
            $stop  = &UnixDate(&DateCalc($stop, "+1 day"), "%Y%m%d%H%M%S");
            die 'could not add one day to stop time' if not $stop;
          }
          # Last check to see if start > stop
          if ( Date_Cmp($start, $stop) > 0 ) {
            $stop = &UnixDate(&DateCalc($stop, "+1 day"), "%Y%m%d%H%M%S");
            die 'could not add one day to stop time' if not $stop;
          }
          # Now set the proper timezone (WT/ST) according to current date
          die if not $start; die if not $stop;
          $start = utc_offset( $start, "+0100");
          $stop  = utc_offset( $stop , "+0100");
          my %prog = (channel  => "C".$chid.".telepoche.com",
              title    => [ [ tidy($title) ] ],             # lang unknown
              start    => $start,
              stop     => $stop
            );
          debug_print("Found title : $title - $start - $stop \n");
          $prog{'star-rating'} = [ "$star_rating/$MAX_STARS" ] if defined $star_rating;
          for ($language) { $prog{language} = [ $_ ] if defined }
          for ($subtitles_language) {
            $prog{subtitles} = [ { type => 'onscreen', language => [ $_ ] } ]
            if defined;
          }
          # Sometimes the genre is not set, so replace it by the showview field
          if (defined $genre and $genre =~ m/Showview : /) {
            $showview = $genre;
            undef $genre;
          }
          # Process the genre, subgenre and date if defined
          if  (defined $genre ) {
            ($genre, $datecreate) = split("-", $genre);
            ($genre, $subgenre)   = split(",", $genre);
            for ($genre) { s/^\s+//; s/\s+$//; s/\xA0//; }
            if (defined $subgenre) {
              for ($subgenre) { s/^\s+//; s/\s+$//; s/\xA0//; }
              $prog{category} = [ [ xmlencoding(lc($genre)), $LANG ], [ xmlencoding(lc($subgenre)), $LANG ] ];
            } else {
              $prog{category} = [ [ xmlencoding(lc($genre)), $LANG ] ];
            }
            if (defined $datecreate) {
              for ($datecreate) { s/^\s+//; s/\s+$//; s/\xA0//; }
              $prog{date} = $datecreate ;
            }
          }
          # Process the showview field
          if ( defined $showview ) {
            $showview =~ s/Showview : //;
            for ($showview) { s/^\s+//; s/\s+$//; s/\xA0//; }          
            $prog{showview} = $showview;
            next if ( $showview == $lastshowview );
            $lastshowview = $showview;
          }

          my $img=$show->look_down('_tag', 'img', 'onerror', qr/this\.src=/);
          if ($img) {
            my $url=$img->attr('src');
            # Get the big icon
            $url =~ s%/c_img/v/%/c_img/i/%;
            push @{$prog{icon}}, {src => $url};
          }

          # Now get program description if the longlisting option is set
          if ( $opt_slow && $link->attr('class') eq 'fiche' ) {
            get_detailed_fic( $link->attr('onclick'), \%prog, $title );
          } else {
            # The text for the <a> tag contains the title without the sub-title so we can use that to separate the two. However
            # the text for the <a> tag may have been truncated so it fits the slot on the page. Also some titles may contain
            # a ' - '. Still the heuristic works very well.
            my $subtitle;
            my $text = $link->as_text();
            if ($text =~ s/\.\.\.$//) {
              if ($title =~ s/^\Q$text\E([^-]+)\s+-\s+//) {
                $prog{'title'} = [ [ tidy($text).tidy($1) ] ];
                $prog{'sub-title'} = [ [ tidy($title) ] ];
              }
            }
            elsif ($title =~ s/^\Q$text\E\s+-\s+//) {
              $prog{'title'} = [ [ tidy($text) ] ];
              $prog{'sub-title'} = [ [ tidy($title) ] ];
            }
          }
          if ( !$results{$prog{start}.$chid} ) {
            $results{$prog{start}.$chid} = "1";
            $writer->write_programme(\%prog);
          }
        }
      $myrow->replace_with("REPLACED");
    }
  }
  $t->delete(); undef $t;
}

sub text_cleanup( $ )
{
  my ($desc) = @_;
  $desc =~ s/\@\@\@/ /g;
  # Remove leading and trailing spaces
  $desc =~ s/^\s*:\s*//;
  $desc =~ s/\s*$//;
  return $desc;
}

sub parse_name_list( $ )
{
  my ($names) = @_;
  # Remove leading and trailing spaces
  $names =~ s/^\s*:\s*//;
  $names =~ s/\s*$//;
  $names =~ s/\s+(?:avec|et)\s+/, /i;
  my @list=split /(?:\s*,)+\s*/, $names;
  return @list;
}

sub get_detailed_fic( $$$ ) 
{
  my $id    = shift;
  my $prog  = shift;
  my $title = shift;
  my $nbretry = 0;
  $id =~ /fiche\('(\d+)'\)/ or die "expected fiche(x), got: $id";
  $id = $1;         
  my $FIC_FILE = new File::Temp(SUFFIX => '.xmltv' );
  debug_print("Calling sheet URL : " . $SHEET_URL . $id . "\n");

  # Get the current page
  while ( HTTP::Status::is_error(getstore($SHEET_URL . $id, $FIC_FILE->filename )) ) {
    sleep(1); $nbretry++;
    return if ($nbretry == 5 );
  }
  my $tfic = HTML::TreeBuilder->new;
  $tfic->store_comments(1);
  $tfic->parse_file( $FIC_FILE->filename );  
   
  # This page's title tag contains the program title without the sub-title. Use it to separate the two.
  my $ttitle;
  if ( $ttitle = $tfic->look_down('_tag', 'title') ) {
    my $htmltitle = $ttitle->as_text();
    $htmltitle =~ s/^Fiche Programme TV : //g;
    if ($title =~ s/^\Q$htmltitle\E\s+-\s+//) {
      $prog->{'title'} = [ [ tidy($htmltitle) ] ];
      $prog->{'sub-title'} = [ [ tidy($title) ] ];             
    }
  }

  # Get the duration and the year
  if ( my $tdesc = $tfic->look_down('_tag', 'td', 'bgcolor', '#F8F8F8') ) {
    if (my $img = $tdesc->look_down('_tag', 'img', 'alt', qr/^(Accord|Interdit)/)) {
      my $text=$img->attr('alt');
      my $rating;
      if ($text =~ /Accord parental souhaitable/i) {
        $rating="-10";
      } elsif ($text =~ /Accord parental indispensable/i) {
        $rating="-12";
      } elsif ($text =~ /Interdit aux moins 16 ans/i) {
        $rating="-16";
      } elsif ($text =~ /Interdit aux moins 18 ans/i) {
        $rating="-18";
      } else {
        warn "Unknown show rating [$text]\n";
      }
      if ($rating) {
        my $icon="http://telepoche.guidetele.com" . $img->attr('src');
        push @{$prog->{rating}}, [ $rating, "CSA", [ {src => $icon} ] ];
      }
    }

    debug_print "RAWDATA: " . tidy($tdesc->as_text()) . "\n";
    foreach my $field ( split(/\s+/, tidy($tdesc->as_text())) ) {
      if ( $field =~ s/^(\d\d\d\d)$//i ) {
        $prog->{'date'} = $1;
      } elsif ( $field =~ s/^(Couleur|Noir et blanc|Coul\. et s.*pia)//i ) {
        $prog->{'video'}{colour} = ($1 eq "Noir et blanc" ? 0 : 1);
      } elsif  ( $field =~ s%^16/9%%) {
        $prog->{'video'}{aspect} = "16:9";
      } elsif ( $field =~ s/^(St.r.o|Dolby)\s*//i) {
        my $stereo = $1;
        if ($stereo =~ /^St.r.o$/) {
          $prog->{'audio'}{stereo} = "stereo";
        } elsif ($stereo eq "Dolby") {
          $prog->{'audio'}{stereo} = "dolby";       
        }
      } elsif ($field =~ s/(\d+)h(\d+)//) {
        my ($hour, $min) = ($1, $2);
        # guidetele.com is full of bugs ...
        $hour = $hour - 12 if ($hour >= 12);
        my $duration=($hour * 3600) + ($min * 60);
        $prog->{'length'} = $duration if ($duration != 0);
      }
    }
  }

  # The actors list is really hard to identify and risks producing false
  # positives. Fortunately we know it follows a 'DISTRIBUTION' comment.
  # So we:
  # - locate the 'DISTRIBUTION' comment and identify what should be the
  #   first actor after that
  # - only set $actors_section to 1 when we are in the right area of the page.
  #   The rest of the time it is set to 0 before the actors section,
  #   and 2 after it.
  my $actors_section=0;
  my $first_actor;
  if (my $comment=$tfic->look_down('_tag', '~comment', 'text', qr/DISTRIBUTION/)) {
    my $sibling=$comment->right();
    if (!ref $sibling) {
      # $sibling is a string, what we want is the next HTML element
      $sibling=($comment->parent()->content_list())[$comment->pindex()+2];
    }
    if (defined $sibling) {
      $first_actor=$sibling->look_down('_tag', 'span', 'class', 'noir11');
    }
  }

  # Now get descriptions, summary, advices, actors and director
  # These are represented as <span class='noir11'> / <span class='noir11r'>
  # pairs.
  my ($resume, $histoire, $avis);
  foreach my $span ($tfic->look_down('_tag', 'span', 'class', 'noir11')) {
    my $sibling=$span->right();
    next if (!defined $sibling);

    my $value="";
    if (!ref $sibling) {
      # $sibling is a string, what we want is the next HTML element
      $value=$sibling;
      $sibling=($span->parent()->content_list())[$span->pindex()+2];
      next if (!defined $sibling);
    }
    if ($sibling->attr('_tag') ne "span" or
        $sibling->attr('class') ne "noir11r") {
      # Not a valid value span tag
      next;
    }
    $value.=$sibling->as_text();
    my $name=$span->as_text();

    if ($name =~ /^Titre original/i) {
      # FIXME: Still in the works...
      ;
    } elsif ($name =~ /^R.*alisateur/i) {
      push @{$prog->{credits}{director}}, parse_name_list(tidy($value));
    } elsif ($name =~ /^Pr.*sentateur/i) {
      push @{$prog->{credits}{presenter}}, parse_name_list(tidy($value));
    } elsif ($name =~ /^Sc.*nariste/i) {
      push @{$prog->{credits}{writer}}, parse_name_list(tidy($value));
    } elsif ($name =~ /Musique/i) {
      # No xmltv field for this
      ;
    } elsif ($name eq "HISTOIRE") {
      $histoire = text_cleanup(tidy($value));
      $actors_section=2;
    } elsif ($name eq "RESUME") {
      $resume = text_cleanup(tidy($value));
      $actors_section=2;
    } elsif ($name eq "AVIS") {
      $avis = text_cleanup(tidy($value));
      $actors_section=2;
    } elsif ($actors_section == 1 or
             ($actors_section == 0 and defined $first_actor and $span eq $first_actor)) {
      push @{$prog->{credits}{actor}}, text_cleanup(tidy($name));
      $actors_section=1;
    }
  }

  # RESUME is the main definition, HISTOIRE is shorter.
  foreach ($resume, $histoire) {
    push @{$prog->{desc}}, [ $_, $LANG ] if defined and length;
  }
  # Add AVIS to the main description, or make a new desc for it if there are none.
  if (defined $avis and length($avis) ) {
    if ($prog->{desc}) {
      $prog->{desc}->[0]->[0] .= " Critique : " . $avis;
    } else {
      push @{$prog->{desc}}, [ $avis, $LANG ];
    }
  }

  $tfic->delete(); undef $tfic;
}
