/*  $Header: /cvsroot/dvipdfmx/src/pdfparse.c,v 1.30 2004/03/14 04:23:03 hirata Exp $

    This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.

    Copyright (C) 2002 by Jin-Hwan Cho and Shunsaku Hirata,
    the dvipdfmx project team <dvipdfmx@project.ktug.or.kr>
    
    Copyright (C) 1998, 1999 by Mark A. Wicks <mwicks@kettering.edu>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*/

#if HAVE_CONFIG_H
#include "config.h"
#endif

#include <ctype.h>
#include <string.h>

#include "system.h"
#include "mem.h"
#include "mfileio.h"
#include "numbers.h"
#include "dvi.h"

#include "pdfspecial.h"
#include "pdfobj.h"
#include "pdfdoc.h"
#include "pdfdev.h"

#include "pdfparse.h"

/* PDF */
#ifdef  is_space
#undef  is_space
#endif
#ifdef  is_delim
#undef  is_delim
#endif

#define is_space(c) ((c) == ' '  || (c) == '\t' || (c) == '\f' || \
		     (c) == '\r' || (c) == '\n' || (c) == '\0')
#define is_delim(c) ((c) == '(' || (c) == '/' || \
                     (c) == '<' || (c) == '>' || \
		     (c) == '[' || (c) == ']' || \
                     (c) == '%')
#define PDF_TOKEN_END(p,e) ((p) >= (e) || is_space(*(p)) || is_delim(*(p)))

static struct {
  int cmap;
  int tainted;
} parser_state = {
  -1, 0
};

#if 0
/* string... */
static long
scan_token (const char *start, char *end, char **endptr)
{
  char *p;

  p = *start;
  while (p < end) {
    if (is_space(*p) || is_delim(*p))
      break;
    p++;
  }

  if (endptr)
    *endptr = p;

  return (long) (p - start);
}
#endif

#ifndef PDF_PARSE_STRICT
#include "cmap.h"

void
set_tounicode_cmap (const char *cmap_name)
{
  parser_state.cmap = cmap_name ? CMap_cache_find(cmap_name) : -1;
}

static int
no_escape_char (unsigned char c)
{
  CMap *cmap;
  char *cmap_name;

  if (parser_state.cmap < 0)
    return 0;

  cmap = CMap_cache_get(parser_state.cmap);
  if (!cmap)
    return 0;
  cmap_name = CMap_get_name(cmap);
  if (!cmap_name)
    return 0;
  if ((strstr(cmap_name, "RKSJ") &&
       ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xFC))) ||
      (strstr(cmap_name, "B5")  && ((c >= 0x81 && c <= 0xFE))) ||
      (strstr(cmap_name, "GBK") && ((c >= 0x81 && c <= 0xFE))) ||
      (strstr(cmap_name, "KSC") && ((c >= 0x81 && c <= 0xFE)))) {
    return 1;
  }

  return 0;
}
#else /* PDF_PARSE_STRICT */
void set_tounicode_cmap (char *cmap_name)
{
  ERROR("\\special pdf:tounicode not supported.");
}
#endif /* !PDF_PARSE_STRICT */

static int xtoi (char ch);

static char *save = NULL;

void
dump (const char *start, const char *end)
{
  const char *p = start;

#define DUMP_LIMIT 50
  MESG("\nCurrent input buffer is -->");
  while (p < end && p < start + DUMP_LIMIT)
    MESG("%c", *(p++));
  if (p == start+DUMP_LIMIT)
    MESG("...");
  MESG("<--\n");
}

#define SAVE(s,e) do {\
   save = (s);\
 } while (0)
#define DUMP_RESTORE(s,e) do {\
   dump(save, end);\
   (s) = save;\
 } while (0)

void
skip_line (char **start, char *end)
{
  while (*start < end && **start != '\n' && **start != '\r')
    (*start)++;
  /* The carriage return (CR; \r; 0x0D) and line feed (LF; \n; 0x0A)
   * characters, also called newline characters, are treated as
   * end-of-line (EOL) markers. The combination of a carriage return
   * followed immediately by a line feed is treated as one EOL marker.
   */
  if (*start < end && **start == '\r')
    (*start)++;
  if (*start < end && **start == '\n')
    (*start)++;
}

void
skip_white (char **start, char *end)
{
  /*
   * The null (NUL; 0x00) character is a white-space character in PDF spec
   * but isspace(0x00) returns FALSE; on the other hand, the vertical tab
   * (VT; 0x0B) character is not a white-space character in PDF spec but
   * isspace(0x0B) returns TRUE.
   */
  while (*start < end && (is_space(**start) || **start == '%')) {
    if (**start == '%')
      skip_line(start, end);
    else
      (*start)++;
  }
}

int
is_an_int (const char *s)
{
  if (!s || !*s)
    return 0;
  if (*s == '+' || *s == '-')
    s++;
  while (*s) {
    if (!isdigit(*s))
      return 0;
    s++;
  }

  return 1;
}

int
is_a_number (const char *s)
{
  if (!s || !*s)
    return 0;

  if (*s == '+' || *s == '-')
    s++;
  while (*s) {
    if (!isdigit(*s))
      break;
    s++;
  }
  if (*s) {
    if (*s != '.')
      return 0;
    s++;
    while (*s) {
      if (!isdigit(*s))
	return 0;
      s++;
    }
  }

  return 1;
}

static char *
parsed_string (const char *start, const char *end)
{
  char *result = NULL;
  int   len;

  len = end - start;
  if (len > 0) {
    result = NEW(len + 1, char);
    memcpy(result, start, len);
    result[len] = '\0';
  }

  return result;
}

char *
parse_number (char **start, char *end)
{
  char *number, *p;

  skip_white(start, end);
  p = *start;
  if (p < end && (*p == '+' || *p == '-'))
    p++;
  while (p < end && isdigit(*p))
    p++;
  if (p < end && *p == '.') {
    p++;
    while (p < end && isdigit(*p))
      p++;
  }
  number = parsed_string(*start, p);

  *start = p;
  return number;
}

char *
parse_unsigned (char **start, char *end)
{
  char *number, *p;

  skip_white(start, end);
  for (p = *start; p < end; p++) {
    if (!isdigit(*p))
      break;
  }
  number = parsed_string(*start, p);

  *start = p;
  return number;
}

static char *
parse_gen_ident (char **start, char *end, const char *valid_chars)
{
  char *ident, *p;

  /* No skip_white(start, end)? */
  for (p = *start; p < end; p++) {
    if (!strchr(valid_chars, *p))
      break;
  }
  ident = parsed_string(*start, p);

  *start = p;
  return ident;
}

char *
parse_ident (char **start, char *end)
{
  static const char *valid_chars =
    "!\"#$&'*+,-.0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\^_`abcdefghijklmnopqrstuvwxyz|~";

  return parse_gen_ident(start, end, valid_chars);
}

char *
parse_val_ident (char **start, char *end)
{
  static const char *valid_chars =
    "!\"#$&'*+,-./0123456789:;?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\^_`abcdefghijklmnopqrstuvwxyz|~";

  return parse_gen_ident(start, end, valid_chars);
}

char *
parse_c_ident (char **start, char *end)
{
  static const char *valid_chars =
    "0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";

  return parse_gen_ident(start, end, valid_chars);
}

char *
parse_opt_ident (char **start, char *end)
{
  if (*start < end && **start == '@') {
    (*start)++;
    return parse_ident(start, end);
  }

  return NULL;
}

/*
 * PDF Object
 */
static pdf_obj *
parse_pdf_number (char **start, char *end)
{
  pdf_obj *result = NULL;
  char    *number;

  SAVE(*start, end);
  skip_white(start, end);
  if ((number = parse_number(start, end))) {
    if (PDF_TOKEN_END(*start, end)) 
      result = pdf_new_number(atof(number));
    RELEASE(number);
  }
  if (!result) {
    WARN("Could not find a numeric object.");
    DUMP_RESTORE(*start, end);
  }

  return result;
}

/*
 * PDF Name
 *
 *  PDF-1.2+: Two hexadecimal digits preceded by a number sign.
 *  Decoded string is always shorter.
 */
static char *
decode_pdf_name (char *name)
{
  char *p, *cur;
  
  cur = p = name;
  while (*p != '\0') {
    char c;
    c = *p++;
    if (c == '#') {
      if (*p == '\0' || *(p+1) == '\0') {
        WARN("Premature end of input name string.");
	break;
      }
      c = (char) (xtoi(*p) << 4);
      p++;
      c += xtoi(*p);
      p++;
    }
    if (c != 0) /* Ignore null */
      *(cur++) = c;
  }
  *cur = '\0';

  return name;
}

pdf_obj *
parse_pdf_name (char **start, char *end)
{
  pdf_obj *result = NULL;
  char    *name;

  SAVE(*start, end);

  skip_white(start, end);
  if (*start < end && **start == '/') {
    (*start)++;
    name = parse_ident(start, end);
    if (name && (name = decode_pdf_name(name))) {
      /*
       * PDF name does not contain null character.
       */
      if (PDF_TOKEN_END(*start, end))
	result = pdf_new_name(name);
      RELEASE(name);
    }
  }
  if (!result) {
    WARN("Could not find a name object.");
    DUMP_RESTORE(*start, end);
  }

  return result;
}

pdf_obj *
parse_pdf_boolean (char **start, char *end)
{
  pdf_obj *result = NULL;

  SAVE(*start, end);

  skip_white (start, end);
  if (*start <= end - 4 &&
      !strncmp(*start, "true", 4) && PDF_TOKEN_END(*start + 4, end)) {
    *start += 4;
    result = pdf_new_boolean(1);
  } else if (*start <= end - 5 &&
	     !strncmp(*start, "false", 5) && PDF_TOKEN_END(*start + 5, end)) {
    *start += 5;
    result = pdf_new_boolean(0);
  }
  if (!result) {
    WARN("Could not find a boolean object.");
    DUMP_RESTORE(*start, end);
  }

  return result;
}

pdf_obj *
parse_pdf_null (char **start, char *end)
{
  pdf_obj *result = NULL;

  SAVE(*start, end);
  skip_white (start, end);
  if (*start <= end - 4 &&
      !strncmp(*start, "null", 4) && PDF_TOKEN_END(*start + 4, end)) {
    *start += 4;
    result = pdf_new_null();
  }
  if (!result) {
    WARN("Could not find a null object.");
    DUMP_RESTORE(*start, end);
    result = NULL;
  }

  return result;
}

/*
 * PDF Literal String
 */
#ifndef isodigit
#define isodigit(c) ((c) >= '0' && (c) <= '7')
#endif
static int
parse_escape_char (char **start, char *end,
		   const char *escape_str, unsigned char *ch)
{
  int skip = 0;

  /* Caller should check this. */
  ASSERT(*start < end - 1 && **start == '\\');
  (*start)++;

  switch (**start) {
  case 'n': *ch = '\n'; (*start)++; break;
  case 'r': *ch = '\r'; (*start)++; break;
  case 't': *ch = '\t'; (*start)++; break;
  case 'b': *ch = '\b'; (*start)++; break;
  case 'f': *ch = '\f'; (*start)++; break;
    /*
     * An end-of-line marker preceded by a backslash must be ignored.
     */
  case '\n':
    skip = 1; (*start)++;
    break;
  case '\r':
    skip = 1; (*start)++;
    if (*start < end && **start == '\n') (*start)++;
    break;
  default:
    if (strchr(escape_str, **start)) {
      *ch = **start; (*start)++;
    } else if (isodigit(**start)) {
      int i = 3, val = 0;
      /* Don't forget isodigit() is a macro. */
      while (i-- > 0 &&
	     *start < end && isodigit(**start))
        val = (val << 3) + (*((*start)++) - '0');
#if 0
      /* Not sure how to handle this. */
      if (val > 255)
	skip = 1;
#endif
      *ch = (unsigned char) (val & 0xff);
    } else {
      if (**start == 'x')
	WARN("Unknown control sequence \\x.");
      /* Ignore only backslash. */
      *ch = (unsigned char) **start; (*start)++;
      skip = 0;
    }
  }

  return skip ? 0 : 1;
}

static pdf_obj *
parse_pdf_literal_string (char **start, char *end)
{
  pdf_obj *result = NULL;
  unsigned char *str;
  int balance = 0, len = 0;

  SAVE(*start, end);

  skip_white(start, end);
  ASSERT(*start < end - 1 && **start == '(');
  (*start)++;

  str = NEW(end - *start, unsigned char);
  /*
   * Accroding to the PDF spec., an end-of-line marker, not preceded
   * by a backslash, must be converted to single \n.
   */
  while (*start < end &&
	 ((**start != ')' || balance > 0))) {
#ifndef PDF_PARSE_STRICT
    if (parser_state.tainted) {
      if (*start < end - 1 &&
	  no_escape_char(**start)) {
	str[len++] = *((*start)++);
	str[len++] = *((*start)++);
	continue;
      }
    }
#endif /* !PDF_PARSE_STRICT */
    if (**start == '\\') {
      if (parse_escape_char(start, end, "\\()", &(str[len])))
	len++;
    } else if (**start == '\r') {
      (*start)++;
      if (*start < end && **start == '\n')
	(*start)++;
      str[len++] = '\n';
    } else {
      if (**start == '(')
	balance++;
      else if (**start == ')')
	balance--;
      str[len++] = *((*start)++);
    }
  }
  if (balance > 0 ||
      *start >= end || **start != ')') {
    WARN("Unbalanced parens/truncated PDF literal string.");
    DUMP_RESTORE(*start, end);
  } else {
    result = pdf_new_string(str, len);
    (*start)++;
  }
  RELEASE(str);

  return result;
}

/*
 * PDF Hex String
 */
static int
xtoi (char ch)
{
  if (ch >= '0' && ch <= '9')
    return ch - '0';
  if (ch >= 'A' && ch <= 'F')
    return (ch - 'A') + 10;
  if (ch >= 'a' && ch <= 'f')
    return (ch - 'a') + 10;

  return -1;
}

static pdf_obj *
parse_pdf_hex_string (char **start, char *end)
{
  pdf_obj *result = NULL;
  long     len = 0;
  unsigned char *str;

  SAVE(*start, end);

  skip_white(start, end);
  ASSERT(*start < end - 1 && **start == '<');
  (*start)++;

  str = NEW((end-(*start))/2 + 1, unsigned char);
  /*
   * PDF Reference does not describe how to treat invalid char.
   * Zero is appended if final hex digit is missing.
   */
  while (*start < end && **start != '>') {
    int hi, lo;
    skip_white(start, end);
    if (*start >= end || **start == '>')
      break;
    hi = xtoi(*((*start)++));
    skip_white(start, end);
    if (*start >= end || **start == '>') {
      lo = 0;
    } else
      lo = xtoi(*((*start)++));
    str[len++] = (unsigned char) ((hi << 4)|lo);
  }
  if (**start != '>') {
    WARN("Premature end of input hex string.");
    DUMP_RESTORE(*start, end);
  } else {
    result = pdf_new_string(str, len);
    (*start)++;
  }
  RELEASE(str);

  return result;
}

pdf_obj *
parse_pdf_string (char **start, char *end)
{
  pdf_obj *result = NULL;

  SAVE(*start, end);

  skip_white(start, end);
  if (*start <= end - 2) {
    if (**start == '(')
      result = parse_pdf_literal_string(start, end);
    else if (**start == '<' &&
	     (*(*start+1) == '>' || isxdigit(*(*start+1))))
      result = parse_pdf_hex_string(start, end);
  }
  if (!result) {
    WARN("Could not find a string object.");
    DUMP_RESTORE(*start, end);
  }

  return result;
}

#ifndef PDF_PARSE_STRICT
static pdf_obj *
modified_string (CMap *cmap, pdf_obj *instring)
{
#define WBUF_SIZE 40960
  unsigned char  wbuf[WBUF_SIZE];
  unsigned char *obufcur;
  unsigned char *inbufcur;
  long inbufleft, obufleft;

  if (!cmap || !instring)
    return NULL;

  inbufleft = pdf_string_length(instring);
  inbufcur  = pdf_string_value(instring);

  /* Unicode BOM might be found. */
  if (inbufleft > 1 &&
      *inbufcur == 0xfe && *(inbufcur+1) == 0xff)
    return NULL;

  wbuf[0] = 0xfe; wbuf[1] = 0xff;
  obufcur   = wbuf + 2;
  obufleft  = WBUF_SIZE - 2;

  CMap_decode(cmap, (const unsigned char **)&inbufcur, &inbufleft, &obufcur, &obufleft);
  if (inbufleft > 0)
    ERROR("Conversion failed.");

  return pdf_new_string(wbuf, WBUF_SIZE - obufleft);
}

pdf_obj *
parse_pdf_tainted_dict (char **start, char *end)
{
  pdf_obj *result = NULL;
  static const char *tainted_keys[] = {
    "Title",   "Author",   "Subject", "Keywords",
    "Creator", "Producer", "Contents", "Subj", "TU", NULL
  };

  parser_state.tainted = (parser_state.cmap >= 0) ? 1 : 0;
  result  = parse_pdf_dict(start, end);
  if (result && parser_state.tainted) {
    CMap *cmap;
    int   i;

    cmap = CMap_cache_get(parser_state.cmap);
    for (i = 0; tainted_keys[i]; i++) {
      pdf_obj *value;

      value = pdf_lookup_dict(result, tainted_keys[i]);
      if (PDF_OBJ_STRINGTYPE(value)) {
	value = modified_string(cmap, value);
	if (value)
	  pdf_add_dict(result, pdf_new_name(tainted_keys[i]), value);
      }
    }
  }
  parser_state.tainted = 0;

  return result;
}
#else /* PDF_PARSE_STRICT */
pdf_obj *
parse_pdf_tainted_dict (char **start, char *end)
{
  return parse_pdf_dict(start, end);
}
#endif /* !PDF_PARSE_STRICT */

#define DELETE_OBJ(o) if ((o)) {\
                                pdf_release_obj((o));\
                                (o) = NULL;\
                      }
pdf_obj *
parse_pdf_dict (char **start, char *end)
{
  pdf_obj *result = NULL;

  SAVE(*start, end);

  skip_white(start, end);
  /* At least four letter <<>>. */
  if (*start > end - 4 ||
      **start != '<' || *(*start+1) != '<') {
    WARN("Could not find a dictionary object.");
    DUMP_RESTORE(*start, end);
    return NULL;
  }
  *start += 2;

  /* Who is responsible for skippping white? */
  result = pdf_new_dict();
  skip_white(start, end);
  while (*start < end && **start != '>') {
    pdf_obj *key, *value;

    key = parse_pdf_name(start, end);
    if (key == NULL) {
      WARN("Could not find a key in dictionary object.");
      DELETE_OBJ(result);
      break;
    }
    skip_white(start, end);

    value = parse_pdf_object(start, end);
    if (value == NULL) {
      pdf_release_obj(key); 
      WARN("Could not find a value in dictionary object.");
      DELETE_OBJ(result);
      break;
    }
    skip_white(start, end);

    pdf_add_dict(result, key, value);
  }
  if (result) {
    if (*start > end - 2 || *(*start+1) != '>') {
      DELETE_OBJ(result);
      WARN("Dictionary object ended prematurely.");
      DUMP_RESTORE(*start, end);
    } else {
      *start += 2;
    }
  } else {
    DUMP_RESTORE(*start, end);
  }

  return result;
}

pdf_obj *
parse_pdf_array (char **start, char *end)
{
  pdf_obj *result = NULL;

  SAVE(*start, end);

  skip_white(start, end);
  if (*start > end - 2 || **start != '[') {
    WARN("Could not find an array object.");
    DUMP_RESTORE(*start, end);
    return NULL;
  }
  (*start)++;

  result = pdf_new_array();
  skip_white(start, end);
  while (*start < end && **start != ']') {
    pdf_obj *elem;

    elem = parse_pdf_object(start, end);
    if (elem == NULL) {
      DELETE_OBJ(result); 
      WARN("Could not find a valid object in array object.");
      break;
    }
    skip_white(start, end);

    pdf_add_array(result, elem);
  }

  if (result) {
    if (*start >= end) {
      DELETE_OBJ(result);
      WARN("Array object ended prematurely.");
      DUMP_RESTORE(*start, end);
    } else {
      (*start)++;
    }
  } else {
    DUMP_RESTORE(*start, end);
  }

  return result;
}

static pdf_obj *
parse_pdf_stream (char **start, char *end, pdf_obj *dict)
{
  pdf_obj *result = NULL;
  pdf_obj *stream_dict;
  long     stream_length;

#if 0
  /* Why this was commented out? */
  if (pdf_lookup_dict(dict, "F")) {
    WARN("File streams not implemented (yet)");
    return NULL;
  }
#endif

  SAVE(*start, end);

  skip_white(start, end);
  if (*start > end - 6 ||
      strncmp(*start, "stream", 6)) {
    WARN("Could not find a stream object.");
    DUMP_RESTORE(*start, end);
    return NULL;
  }
  *start += 6;

  /*
   * Carrige return alone is not allowed after keyword "stream".
   * See, PDF Reference, 4th ed., version 1.5, p. 36.
   */
  if (*start < end && **start == '\n')
    (*start)++;
  else if (*start < end - 1 &&
	   (**start == '\r' && *(*start+1) == '\n')) {
    *start += 2;
  }
#ifndef PDF_PARSE_STRICT
  else {
    /* TeX translate end-of-line marker to a single space. */
    if (parser_state.tainted) {
      if (*start < end && **start == ' ')
	(*start)++;
    }
  }
  /* The end-of-line marker not mandatory? */
#endif /* !PDF_PARSE_STRICT */

  /* Stream length */
  {
    pdf_obj *tmp;

    tmp = pdf_lookup_dict(dict, "Length");
    if (tmp) {
      pdf_obj *tmp2;

      tmp2 = pdf_deref_obj(tmp);
      stream_length = (long) pdf_number_value(tmp2);
      pdf_release_obj(tmp2);
    }
#ifndef PDF_PARSE_STRICT
    else if (*start <= end - 9) {
      /*
       * This was added to allow TeX users to write PDF stream object
       * directly in their TeX source. This violates PDF spec.
       */
      char *p;

      for (p = end - 9; (p >= *start &&
			 strncmp(p, "endstream", 9)); p--);
      p--;
      if (p <= *start) {
	WARN("Cound not find \"endstream\".");
	DUMP_RESTORE(*start, end);
	return NULL;
      }
      /*
       * The end-of-line marker is not skipped here. There are no way to
       * decide whether it is a part of the stream or not.
       */
      stream_length = p - (*start);
#endif /* !PDF_PARSE_STRICT */
    } else {
      WARN("Not PDF stream object?");
      DUMP_RESTORE(*start, end);
      return NULL;
    }
  }

  /*
   * If Filter is not applied, set STREAM_COMPRESS flag.
   * Should we use filter for ASCIIHexEncode/ASCII85Encode-ed streams?
   */
  {
    pdf_obj *filters;

    filters = pdf_lookup_dict(dict, "Filter");
    if (!filters && stream_length > 10) {
      result = pdf_new_stream(STREAM_COMPRESS);
    } else {
      result = pdf_new_stream(0);
    }
  }

  stream_dict = pdf_stream_dict(result);
  pdf_merge_dict(stream_dict, dict);
  pdf_release_obj(dict);

  pdf_add_stream(result, *start, stream_length);
  *start += stream_length;

  /* Check "endsteam" */
  {
    /*
     * It is an error if the stream contained too much data except there
     * may be an extra end-of-line marker before the keyword "endstream".
     */
#ifdef PDF_PARSE_STRICT
    if (*start < end && **start == '\r')
      (*start)++;
    if (*start < end && **start == '\n')
      (*start)++;
#else  /* !PDF_PARSE_STRICT */
    /*
     * This may skip data starting with '%' and terminated by a
     * '\r' or '\n' or '\r\n'. The PDF syntax rule should not be
     * applied to the content of the stream data.
     * TeX may have converted end-of-line to single white space.
     */
    skip_white(start, end);
#endif /* !PDF_PARSE_STRICT */

    if (*start > end - 9) {
      WARN("Stream object ended prematurely.");
      DUMP_RESTORE(*start, end);
      return NULL;
    } else if (strncmp(*start, "endstream", 9)) {
      WARN("Stream data not followed by a keyword \"endstream\"?");
      WARN("%s", *start);
      DUMP_RESTORE(*start, end);
      return NULL;
    }
  }

  *start += 9;
  return result;
}

#ifndef PDF_PARSE_STRICT
/* This is not PDF indirect reference. */
static pdf_obj *
parse_pdf_reference (char **start, char *end)
{
  pdf_obj *result = NULL;
  char    *name;

  SAVE(*start, end);

  skip_white(start, end);
  if ((name = parse_opt_ident(start, end))) {
    if ((result = lookup_reference(name)) == NULL) {
      WARN("Could not find the named reference (@%s).", name);
      DUMP_RESTORE(*start, end);
    }
    RELEASE(name);
  } else {
    WARN("Could not find a reference name.");
    DUMP_RESTORE(*start, end);
    result = NULL;
  }

  return result;
}
#endif /* !PDF_PARSE_STRICT */

static pdf_obj *
try_pdf_reference (char *start, char *end, char **endptr)
{
  unsigned long id = 0, gen = 0;

  if (endptr)
    *endptr = start;

  skip_white(&start, end);
  if (start > end - 5 || !isdigit(*start)) {
    return NULL;
  }
  while (!is_space(*start)) {
    if (start >= end || !isdigit(*start)) {
      return NULL;
    }
    id = id * 10 + (*start - '0');
    start++;
  }

  skip_white(&start, end);
  if (start >= end || !isdigit(*start))
    return NULL;
  while (!is_space(*start)) {
    if (start >= end || !isdigit(*start))
      return NULL;
    gen = gen * 10 + (*start - '0');
    start++;
  }

  skip_white(&start, end);
  if (start >= end  || *start != 'R')
    return NULL;
  start++;
  if (!PDF_TOKEN_END(start, end))
    return NULL;
    
  if (endptr)
    *endptr = start;

  return pdf_new_ref(id, (int) gen);
}

pdf_obj *
parse_pdf_object (char **start, char *end)
{
  pdf_obj *result = NULL;

  SAVE(*start, end);

  skip_white(start, end);
  if (*start >= end) {
    WARN("Could not find any valid object.");
    DUMP_RESTORE(*start, end);
    return NULL;
  }

  switch (**start) {
  case '<': 
    if (*(*start+1) != '<') {
      result = parse_pdf_hex_string(start, end);
    } else {
      result = parse_pdf_dict(start, end);
      skip_white(start, end);
      if (*start < end - 6 &&
	  !strncmp(*start, "stream", 6)) {
	result = parse_pdf_stream(start, end, result);
      }
    }
    break;
  case '(':
    result = parse_pdf_string(start, end);
    break;
  case '[':
    result = parse_pdf_array(start, end);
    break;
  case '/':
    result = parse_pdf_name(start, end);
    break;
  case 'n':
    result = parse_pdf_null(start, end);
    break;
  case 't': case 'f':
    result = parse_pdf_boolean(start, end);
    break;
  case '+': case '-': case '.':
    result = parse_pdf_number(start, end);
    break;
  case '0': case '1': case '2': case '3': case '4':
  case '5': case '6': case '7': case '8': case '9':
    {
      char *next;

      result = try_pdf_reference(*start, end, &next);
      if (result)
	*start = next;
      else
	result = parse_pdf_number(start, end);
    }
    break;
  case '@':
#ifndef PDF_PARSE_STRICT
    result = parse_pdf_reference(start, end);
#endif /* !PDF_PARSE_STRICT */
    break;
#if 0
  case '{':
    /* Distiller pdfmark uses {FOO} as a reference to a PDF object. */
    break;
#endif
  default:
    WARN("Unknown PDF object type.");
    DUMP_RESTORE(*start, end);
    result = NULL;
  }

  return result;
}

char *
parse_c_string (char **start, char *end)
{
  unsigned char *result = NULL;
  int len = 0;

  SAVE(*start, end);

  skip_white(start, end);
  if (*start < end && **start == '"') {
    (*start)++;
    result = NEW(end - *start + 1, unsigned char);
    while (*start < end && **start != '"')
      if (**start == '\\' &&
          parse_escape_char(start, end, "\\\"", &(result[len])))
	len++;
      else
        result[len++] = *((*start)++);
    if (*start < end) {
      (*start)++;
      result[len] = 0;
    }
  } else {
    WARN("Could not find a C string.");
    DUMP_RESTORE(*start, end);
  }

  return (char *) result;
}

void
parse_key_val (char **start, char *end, char **key, char **val) 
{
  *key = *val = NULL;

  skip_white(start, end);
  if ((*key = parse_c_ident(start, end))) {
    skip_white(start, end);
    if (*start < end && **start == '=') {
      (*start)++;
      skip_white(start, end);
      if (*start < end) {
        if (**start == '"')
	  *val = parse_c_string(start, end);
        else
	  *val = parse_val_ident(start, end);
      }
    }
  }
}
