<?php
/* 	OpenDb - Open Lending Database Project
	Copyright (C) 2001,2002 by Jason Pell

	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version 2
	of the License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program; if not, write to the Free Software
	Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/

//
// Search for 'Shitters' to return no titles.
// Search for 'Cunt' to get a single title.
// Search for 'Filthy Fuckers' to get an exact title match.
// Search for 'Chocolat' will return a single 'Most Popular' and multiple actual entries.
//

/*
 * List all items matched by search, with optional TSearch link at the end
 */
function get_imdb_common_titles_list($title, $HTTP_VARS, $block=NULL)
{
	global $LANG_VARS;

	// Store the url vars that we just need to pass on!
	$http_vars_url = get_url_string($HTTP_VARS, NULL, array('site','op','imdb_id','title'));
	
	if(empty($block))
	{
    	$block = open_url("http://www.imdb.com/Find/Tsearch?".rawurlencode(strtolower($title)));
	}
	
	$start_of_block = strpos($block,"A search for <B>'");
    if($start_of_block!==FALSE)
	{
		$start_of_block = strpos($block, "<P>Most popular title searches:</P>", $start_of_block);
		if($start_of_block !== FALSE)
		{
			$titleblock .= "<p><div class=\"colortext\"><b>".$LANG_VARS['most_popular_titles'].":</b>";

			$entries = parse_imdb_list_block($block, $start_of_block);
			$titleblock .= "<ol>";
			while(list(,$entry) = @each($entries))
			{
				$titleblock .= "<li><a href=\"site.php?site=imdb&op=choose&id=".$entry['id']."&$http_vars_url\">".$entry['title']."</a></li>";
			}
			$titleblock .= "</ol></div></p>";
		}
		
		if( preg_match("/<P><B>([0-9]+)<\/B> 'title' matches/", substr($block,$start_of_block), $matches))
		{
			$titleblock .= "<p><div class=\"colortext\"><b>".replace_lang_var("count", $matches[1], $LANG_VARS['cnt_titles_found']).":</b>";
			
			$start_of_block = strpos($block, "'title' matches", $start_of_block);
			if($start_of_block!==FALSE)
			{
				$entries = parse_imdb_list_block($block, $start_of_block);

				$titleblock .= "<ol>";
				while(list(,$entry) = @each($entries))
				{
					$titleblock .= "<li><a href=\"site.php?site=imdb&op=choose&id=".$entry['id']."&$http_vars_url\">".$entry['title']."</a>";
					while (list(,$aka) = @each($entry['aka']))
					{
						$titleblock .= "<br>...aka <i>$aka</i>";
					}
					$titleblock .= "</li>";

				}
				$titleblock .= "</ol></div></p>";

				if( preg_match("/<P>Show all <A HREF=\"\/Tsearch\?([^\"]+)\">([0-9]+)/", substr($block,$start_of_block), $matches))
				{
					$titleblock .= "<p><div class=\"footer\">[<a href=\"site.php?site=imdb&op=listing&title=".$matches[1]."&$http_vars_url\">".replace_lang_var("count", $matches[2], $LANG_VARS['show_all_cnt_titles'])."</a>]</div></p>";
				}
			}
		}
		
		return $titleblock;
	}
	else
		return FALSE;
}

/*
 * List all titles
 */
function get_imdb_all_titles_list($title, $HTTP_VARS)
{
	global $LANG_VARS;

	// Store the url vars that we just need to pass on!
	$http_vars_url = get_url_string($HTTP_VARS, NULL, array('site','op','imdb_id','title'));

    $block = open_url("http://www.imdb.com/Tsearch?".rawurlencode(strtolower($title)));

	$start_of_block = strpos($block,"<h1>IMDb title search</h1>");
    if($start_of_block!==FALSE)
	{
		if( preg_match_all("/<b>([a-zA-Z]+) Matches<\/b> \(([0-9]+) matches, ([^\)]*)\)(.*?)<\/table>/s", substr($block,$start_of_block), $matches) )
		{
			for ($i = 0; $i < count($matches[1]); $i++)
			{
				$titleblock .= "\n<p><div class=\"colortext\"><b>".replace_lang_var('match', $matches[1][$i], $LANG_VARS['search_result_matches']).":</b>";
				
				$titleblock .= "\n<ol>";
				
				//<td align="right" valign="top">1.&nbsp;</td><td valign="top" width="100%"><a href="http://www.imdb.com/title/tt0241303/">Chocolat (2000)</a></td>
				if(preg_match_all("/<td[^>]+>([0-9]+)\.[^<]+<\/td><td[^>]+><a href=\"([^\"]+)\">([^\"]+)<\/a>(.*)$/m", $matches[4][$i], $matches2))
				{
					for ($j = 0; $j < count($matches2[1]); $j++)
					{
						if( preg_match("!/title/tt([^/]+)/!", $matches2[2][$j], $matches3))
						{
							$titleblock .= "\n<li><a href=\"site.php?site=imdb&op=choose&id=".$matches3[1]."&$http_vars_url\">".$matches2[3][$j]."</a>";
						
							//<br>&nbsp;aka <i>"Meilleur que le chocolat"</i>
							if(preg_match("/aka <i>\"([^\"]+)\"<\/i>/", $matches2[4][$j], $matches3))
							{
								$titleblock .= "<br>...aka <i>".$matches3[1]."</i>";
							}
							$titleblock .= "</li>";
						}
					}
				}
				$titleblock .= "</ol></div></p>";
			}
		}
	}
	return $titleblock;
}

/**
	This is a specialised function to parse a Tsearch
    header block, which provides information on what
	is contained in the page.
*/
function parse_imdb_header_block($block, &$offset)
{
	$start_of_list = strpos($block, "<UL>", $offset);
	if($start_of_list!==FALSE)
	{
		$end_of_list = strpos($block,"</UL>",$start_of_list+4);//4=<UL>

		// Reset the offset to end of block, so we can start after this block.
		$offset = $end_of_list;

		$list_block = substr($block,$start_of_list+4,$end_of_list-($start_of_list+4));//4=<UL>

		// Now we want to parse the list to get all the <LI>...</LI> entries.
		// Format of entry: "14 x <A HREF="#mov">movies</A>"
		$start_of_entry = strpos($list_block,"<LI>");
		while ($start_of_entry !== FALSE)
		{
			$end_of_entry = strpos($list_block,"</LI>",$start_of_entry+4);//4=<LI>
			$entry = substr($list_block,$start_of_entry+4,$end_of_entry-($start_of_entry+4)); //4=<LI>
	
			// The '{^$]' -- Not end of line only works because there is only one
			// </A> in the $entry - does not actually work, as not in line mode until /m added
			// We need to check this!!!
			if( preg_match("/([0-9]+)[^'HREF']+HREF=\"#([^\"]+)\">([^$]+)<\/A>/m", $entry, $matches))
			{
				$entries[] = array('count'=>$matches[1],'href'=>$matches[2],'heading'=>initcap($matches[3]));
			}
			$start_of_entry = strpos($list_block,"<LI>",$end_of_entry);
		}
	}

	return $entries;
}

/**
	Parse a set of <OL>...</OL>
		<LI>...</LI> links from IMDB and returns an
	array of the format:
		array(
			array(title='Title of item',id='imdb id',aka=>'line1,line2'),
			array(title='Title of item2',id='imdb id2',aka=>'line1,line2')
			)
*/
function parse_imdb_list_block($block, &$offset)
{
	$start_of_list = strpos($block, "<OL>", $offset);
	if($start_of_list!==FALSE)
	{
		$end_of_list = strpos($block,"</OL>",$start_of_list+4);//4=<OL>

		// Reset the offset to end of block, so we can start after this block.
		$offset = $end_of_list;

		$list_block = substr($block,$start_of_list+4,$end_of_list-($start_of_list+4));//4=<OL>

		// Now we want to parse the list to get all the <LI>...</LI> entries.
		// Format of entry: "<A HREF="/Title?0221895">Cunt Dykula (1993)</A>"
		$start_of_entry = strpos($list_block,"<LI>");
		while ($start_of_entry !== FALSE)
		{
			$end_of_entry = strpos($list_block,"</LI>",$start_of_entry+4); //4=<LI>
			$entry = substr($list_block,$start_of_entry+4,$end_of_entry-($start_of_entry+4)); //4=<LI>
	
			// The '{^$]' -- Not end of line only works because there is only one
			// </A> in the $entry!
			if( preg_match("!HREF=\"\/title/tt([^/]+)/\">([^$]+)</A>!i", $entry, $matches))
			{
				// Initialise!
				$aka_r = null;
				
				// Now lets see if there are any :
				//	<BR>...aka <I><B>Castle</B> of the Walking Dead (1967)</I>
				$start_of_aka = strpos($entry, "<BR>");
				while( $start_of_aka !== FALSE)
				{
					$end_of_aka = strpos($entry,"<BR>",$start_of_aka+4);//4="<BR>"
					if($end_of_aka!==FALSE)
					{
						$aka_block = substr($entry,$start_of_aka+4,$end_of_aka-($start_of_aka+4));
						$start_of_aka = $end_of_aka;
					}
					else
					{
						$aka_block = substr($entry,$start_of_aka+4);		
						$start_of_aka = FALSE; // End of matches.
					}
	
					if(preg_match("/...aka <I>([^$]+)<\/I>/", $aka_block, $aka_match))
						$aka_r[] = $aka_match[1];
				}
				
				$entries[] = array('id'=>$matches[1],'title'=>$matches[2],'aka'=>$aka_r);
			}
			$start_of_entry = strpos($list_block,"<LI>",$end_of_entry);
		}
	}

	return $entries;
}

/*
	Will call up http://www.imdb.com/Find/Tsearch?searchterm
    
    If title is a perfect match, and we can get a IMDB ID, then
    it will be returned, otherwise this function returns FALSE.
    
    Return -1 if no data read.
*/
function get_imdb_title_id($search, &$readfile)
{
	$query = "http://www.imdb.com/Find/Tsearch?".rawurlencode($search);
	echo "\n<!-- Search Title URL: $query -->\n";
	$readfile = open_url($query);
	if(empty($readfile))
	{
		return -1;// -1 indicates no data available.
    }
	
    // Now lets get the title first, so we can check if it is search page.
	$start = strpos($readfile,"<title>");
    if($start !== FALSE)
    {
    	$end = strpos($readfile,"</title>", $start+7);//7="<title>"
		$title = substr($readfile, $start+7, $end-($start+7));
        
        // Search - title itself found.
        if(strpos($title, "IMDb name and title search") === FALSE)
        {
			// TODO: We can get the info from the Snoopy object which would indicate whether
			// we have a direct match or not, this is a stupid method ascertaining a direct
			// match and it should be replaced.  The snoopy object should be declared global
			// in scope so we only create one of them anyway.
			
			//<a href="/CommentsEnter?const=0089880">
			if(preg_match("/<a href=\"\/CommentsEnter\?const=([^\"]+)\">/", $readfile, $regs))
			{
				return $regs[1];
			}
		}                
    }
    
    //ID Not found, but data was read.
    return FALSE; 
}

/**
	Will return an array of the following structure.
		array(
			"title"=>title,
			"year"=>year,
			"run_time"=>runtime,
			"director"=>director,
			"actors"=>actors,
			"imageurl"=>imageurl,
			"category"=>category,
			"age_rating"=>age
		);

	If nothing parsed correctly, then this function will returned
	unitialised array.
*/
function parse_imdb_data($id)
{
	$query = "http://us.imdb.com/Title?".$id;
	
	echo "\n<!-- Parse Data URL: $query -->\n";
	$titlePage = open_url($query);

	// Set to default, as we may not enter some of the if tests.
	$end = 0;

	//title extraction block
	$start = strpos($titlePage,"<strong class=\"title\">");
	if($start !== FALSE)
	{
		$end = strpos($titlePage,"<small>", $start);
	    $title = substr($titlePage,$start+22,$end-($start+22));
    	
		//get rid of double quotes - some movies such as "Rambo" include quotes in title.
		$attributes["title"] = trim(str_replace("\"", "", convert_html_numeric_codes(strip_tags($title))));
		
		// Allow the setting of a title attribute as well.
		//Get the global value from the config.php file, which has already
		// been included in the top level site.php script.
		global $title_attribute;
		if(strlen($title_attribute)>0)
		{
			$attributes[$title_attribute] = $attributes["title"];
		}
	}

	//year extraction block
	$start = strpos($titlePage,"/Sections/Years/", $end);
	if($start !== FALSE)
	{
		$year = substr($titlePage,$start+16,4);// 16="/Sections/Years/"
		  // Move past.
		$end = $start + 20; //20="/Sections/Years/????"
    
		$attributes["year"] = $year;
	}

	//image src extraction block
	$start = strpos($titlePage,"alt=\"No poster or movie still available\"", $end);
	if($start === FALSE)
	{
		//<img border="0" alt="cover" src="http://ia.imdb.com/media/imdb/01/I/19/89/70m.jpg" height="140" width="97">
		if(preg_match("/<img border=\"0\" alt=\"cover\" src=\"([^\"]+)\"/", $titlePage, $matches))
		{
			if(starts_with($matches[1], "http://"))
				$attributes["imageurl"] = $matches[1];
			else
				$attributes["imageurl"] = "http://".$matches[1];
		}
	}

	//director extraction block
	$start = strpos($titlePage,"Directed by", $end);
	if($start !== FALSE)
	{
		$start = strpos($titlePage,"\">", $start)+2;
		$end = strpos($titlePage,"</a>", $start);
		$director = substr($titlePage,$start,$end-$start);

		$attributes["director"] = convert_html_numeric_codes(strip_tags($director));
	}

	//<b class="ch">Genre:</b>
	//Genre extraction block.
	$start = strpos($titlePage,"Genre:</b>", $end);
	if($start !== FALSE)
	{
		// Move past start text.
		$start+=10;//"Genre</b>:"

		$end = strpos($titlePage,"<br>", $start);
		
		// Get rid of all the html - a quick hack!
		$genre = trim(substr($titlePage,$start,$end-$start));
		$genre = strip_tags($genre);
		
		// If composite genre, get rid of / as we do not need it.
		$genre = str_replace(" / "," ",$genre);

		// Expand Sci-Fi to OpenDb matching value.
		$genre = str_replace("Sci-Fi", "ScienceFiction", $genre);

		// Match all whitespace and convert to single character
		// space.
		$genre = preg_replace("/[\s]+/", " ", $genre);
		
		$genre = str_replace("(more)","", $genre);	
	
		$attributes["genre"] = trim($genre);
	}

	//
	// IMDB User rating
	//
	$start = strpos($titlePage,'<a href="ratings">', $end);
	if($start!==FALSE)
	{
		// now get the start of the rating number
		$start = strpos($titlePage,'</a>', $start+18); //18=<a href="ratings">
		if($start!==FALSE)
		{
			$end = strpos($titlePage,'<a href="ratings">', $start);
			if($end!==FALSE)
			{
				//<b>3.9/10</b>
				if(preg_match("/<b>([0-9|\.]+)\/10<\/b>/", substr($titlePage,$start, $end-$start), $regs))
				{
					$attributes['imdbrating'] = $regs[1];
				}
			}
		}
	}
	
	//actor extraction block - changed from first billed, to Cast overview, as this always starts
	// the block anyway.  All we need is the start.
	$start = strpos($titlePage,"Cast overview", $end);
	
	//fix by Dick Balaska
	if($start === FALSE)
	{
		$start = strpos($titlePage,"redited cast:", $end); 
	}
	
	if($start === FALSE)
	{
		$start = strpos($titlePage,"Cast:", $end);
	}
	
	if($start !== FALSE)
	{
		$end = strpos($titlePage,"Also Known As", $start);
        if(!$end)
			$end = strpos($titlePage,"Runtime", $start);
		if(!$end)
			$end = strpos($titlePage,"Country", $start); // if no runtime
	
	    // It is too hard to do it any other way, so we will get the block of
    	// actors so we can parse separately.
	    $actorsBlock = substr($titlePage,$start,$end-$start);

	    $lend = 0;
    	$start = strpos($actorsBlock,"<a href=\"/name", $lend);
	    while($start !== FALSE)
    	{
			$start = strpos($actorsBlock,"\">", $start)+2;
			$lend = strpos($actorsBlock,"<", $start);

			// Ensure we do not get empty actor.
			$actor = trim(substr($actorsBlock,$start,$lend-$start));

			// Strip any html, this is a failsafe in case the parse fails...
			$actor = convert_html_numeric_codes(strip_tags($actor));

			if(strlen($actor)>0)
			{
				if(strlen($actors)==0)
					$actors = $actor;
				else
					$actors = $actors . ", ".$actor;
			}//so we do not get confused with the also known as link.
	        $start = strpos($actorsBlock,"<a href=\"/name", $lend);
		}
	    $actors = str_replace(", (more)","", $actors);

	    $attributes["actors"] = $actors;
	}
	// We do not need this anymore.
	unset($actorsBlock);

	//length extraction block
	$start = strpos($titlePage,"Runtime:</b>", $end);
	if($start !== FALSE)
	{
		$end = strpos($titlePage,"<br>", $start+12);//12="Runtime:</b>"
		$length = trim(substr($titlePage,$start+12,$end-($start+12)));

		// Look for first numeric characters, to represent runtime.
		// Ignore any country specific runtime, at this stage!
		if(preg_match("/([0-9]+)/", $length, $matches))
		{
			$attributes["run_time"] = $matches[1];
		}
	}

	//Certification extraction block
	$start = strpos($titlePage,"Certification:</b>", $end);
	if($start !== FALSE)
	{
		$end = strpos($titlePage,"</td>", $start+18);//18="Certification</b>:"
		if($end !== FALSE)
		{
			// Now get the block which we can process.
			$certBlock = trim(substr($titlePage, $start+18,$end-($start+18)));//18="Certification</B>:"

			// Ensure we have a valid value here!
			global $age_certification_codes;

			// Default to USA if not defined!
			if(!is_array($age_certification_codes))
				$age_certification_codes = array("USA");
			else
				reset($age_certification_codes);

			while (list(,$age_code) = @each($age_certification_codes))
			{
				$startOfRating = laststrpos($certBlock, $age_code.":");
				if($startOfRating !== FALSE)
				{
					// Move match along past actual CODE: part!
					$startOfRating += strlen($age_code.":");

					$endOfRating = strpos($certBlock,"</a>", $startOfRating);
					if($endOfRating!==FALSE)
					{
						$certCode = trim(substr($certBlock,$startOfRating,$endOfRating-$startOfRating));
						if(strlen($certCode)>0)
						{
							if($certCode == "Unrated")
								$certCode = "NR";
							$attributes["age_rating"] = $certCode;

							// If we are up to USA ratings and we have any certification mappings configured,
							// we will do conversion now.
							if(strcasecmp($age_code, "USA")===0)
							{
								global $usa_age_certification_map;
								if(strlen($attributes["age_rating"])>0 && is_array($usa_age_certification_map))
								{
									if(strlen($usa_age_certification_map[$attributes["age_rating"]])>0)
										$attributes["age_rating"] = $usa_age_certification_map[$attributes["age_rating"]];
								}
							}

							break;//break out of while loop!
						}
					}
				}
			}
		}
	}
	
	// We do not need this anymore.
	unset($titlePage);

	$query = "http://us.imdb.com/Plot?".$id;
	echo "\n<!-- Plot URL: $query -->\n";
	$plotPage = open_url($query);
	$start_of_plot = strpos($plotPage,"<p class=\"plotpar\">");
	while($start_of_plot !== FALSE)
	{
		$end_of_plot = strpos($plotPage, "</p>", $start_of_plot+19);
		$plot = convert_html_numeric_codes(strip_tags(trim(substr($plotPage, $start_of_plot+19,$end_of_plot-($start_of_plot+19)))));//19="<p class=\"plotpar\">"

		// Replace any tabs or newlines with spaces.
		$plot = ereg_replace("[\r\n\t]+"," ", $plot);

		// add to plot array.
		$plot_arr[] = $plot;

		$start_of_plot = strpos($plotPage,"<p class=\"plotpar\">",$end_of_plot);
	}
	
   	unset($plotBlock);

    // Now save the plots into $attributes array.    
	if($plot_arr)
	{
       	$attributes['plot'] = $plot_arr;
	}
	
	//echo("<pre>");
	//print_r($attributes);
	//echo("</pre>");
	
	// This may actually be undefined, or FALSE.    
    return $attributes;
}
?>
