/* File "cache.c":
 * Store analyses of whole word forms for faster access. */

/* This file is part of Malaga, a system for Left Associative Grammars.
 * Copyright (C) 1995-1998 Bjoern Beutel
 *
 * Bjoern Beutel
 * Universitaet Erlangen-Nuernberg
 * Abteilung fuer Computerlinguistik
 * Bismarckstrasse 12
 * D-91054 Erlangen
 * e-mail: malaga@linguistik.uni-erlangen.de 
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "basic.h"
#include "pools.h"
#include "values.h"

#undef GLOBAL
#define GLOBAL
#include "cache.h"

typedef struct CACHE_ENTRY_T /* an entry of the cache tree */
{
  struct CACHE_ENTRY_T *prev_ref, *next_ref; /* previous and next entry 
					      * in LRU chain */
  struct CACHE_ENTRY_T *left, *right; /* left and right son in cache tree */
  byte_t balance; /* < 0 if left tree is deeper; > 0 if right tree is deeper */
  string_t surface;
  long_t  num_cats; /* number of categories in <cats> */
  value_t *cats;    /* points to a vector of categories */
} cache_entry_t;

LOCAL cache_entry_t *cache_tree; /* the cache tree */
LOCAL long_t max_num_entries = 0; /* maximum of entries in <cache_tree> */
LOCAL long_t num_entries; /* actual number of entries in <cache_tree> */

LOCAL cache_entry_t *first_ref; /* the entry that has been referenced first */
LOCAL cache_entry_t *last_ref; /* the entry that has been referenced last */

/* these are needed for "next_result_in_cache" */
LOCAL cache_entry_t *current_entry;
LOCAL long_t current_result;

/*---------------------------------------------------------------------------*/

LOCAL cache_entry_t *new_cache_entry (string_t surf_start,
				      string_t surf_end,
				      long_t num_cats,
				      value_t cats[])
/* Create cache entry for string at <surf_start..surf_end>. 
 * It has <num_cats> categories stored in <cats>. 
 * Return the created entry. */
{
  cache_entry_t *cache_entry;

  /* Allocate a cache entry. */
  cache_entry = new_mem (sizeof (cache_entry_t));
  cache_entry->surface = new_string_section (surf_start, surf_end);
  cache_entry->num_cats = num_cats;
  cache_entry->cats = cats;

  num_entries++;
  return cache_entry;
}

/*---------------------------------------------------------------------------*/

LOCAL void free_cache_entry (cache_entry_t *cache_entry)
/* Free the memory allocated for <cache_entry>. */
{
  long_t i;

  for (i = 0; i < cache_entry->num_cats; i++)
    free (cache_entry->cats[i]);

  free (cache_entry->surface);
  free (cache_entry->cats);
  free (cache_entry);
  num_entries--;
}

/*---------------------------------------------------------------------------*/

LOCAL void reference_entry (cache_entry_t *entry)
/* Put <entry> at end of LRU-list if it is not already there. */
{
  if (entry != last_ref)
  {
    /* Remove entry from old position. */
    entry->next_ref->prev_ref = entry->prev_ref;
    if (entry != first_ref)
      entry->prev_ref->next_ref = entry->next_ref;
    else 
      first_ref = entry->next_ref;
    
    /* Enter entry in new position. */
    entry->prev_ref = last_ref;
    entry->next_ref = NULL;
    last_ref->next_ref = entry;
    last_ref = entry;
  }
}

/*---------------------------------------------------------------------------*/

LOCAL bool_t balance_left (cache_entry_t **tree)
/* Balance <tree> when the left subtree has shrunk.
 * Return TRUE iff balanced <tree> has shrunk. */
{
  cache_entry_t *root = *tree;

  root->balance++;
  if (root->balance <= +1)
    return (root->balance == 0);
  
  /* Tree is in disorder, rebalance it. */
  if (root->right->balance >= 0)
  {
    (*tree) = root->right;
    root->right = (*tree)->left;
    (*tree)->left = root;
    if ((*tree)->balance == 0)
    {
      (*tree)->balance = -1;
      (*tree)->left->balance = +1;
      return FALSE;
    }
    else
    {
      (*tree)->balance = 0;
      (*tree)->left->balance = 0;
      return TRUE;
    }
  }
  else
  {
    (*tree) = root->right->left;
    root->right->left = (*tree)->right;
    (*tree)->right = root->right;
    root->right = (*tree)->left;
    (*tree)->left = root;
    if ((*tree)->balance == +1)
      (*tree)->left->balance = -1;
    else
      (*tree)->left->balance = 0;
    if ((*tree)->balance == -1)
      (*tree)->right->balance = +1;
    else
      (*tree)->right->balance = 0;
    (*tree)->balance = 0;
    return TRUE;
  }
}

/*---------------------------------------------------------------------------*/

LOCAL bool_t balance_right (cache_entry_t **tree)
/* Balance <tree> when the right subtree has shrunk.
 * Return TRUE iff balanced <tree> has shrunk. */
{
  cache_entry_t *root = *tree;

  root->balance--;
  if (root->balance >= -1)
    return (root->balance == 0);
  
  /* Tree is in disorder, rebalance it. */
  if (root->left->balance <= 0)
  {
    (*tree) = root->left;
    root->left = (*tree)->right;
    (*tree)->right = root;
    if ((*tree)->balance == 0)
    {
      (*tree)->balance = +1;
      (*tree)->right->balance = -1;
      return FALSE;
    }
    else
    {
      (*tree)->balance = 0;
      (*tree)->right->balance = 0;
      return TRUE;
    }
  }
  else
  {
    (*tree) = root->left->right;
    root->left->right = (*tree)->left;
    (*tree)->left = root->left;
    root->left = (*tree)->right;
    (*tree)->right = root;
    if ((*tree)->balance == -1)
      (*tree)->right->balance = +1;
    else
      (*tree)->right->balance = 0;
    if ((*tree)->balance == +1)
      (*tree)->left->balance = -1;
    else
      (*tree)->left->balance = 0;
    (*tree)->balance = 0;
    return TRUE;
  }
}

/*---------------------------------------------------------------------------*/

LOCAL bool_t remove_largest (cache_entry_t **tree, cache_entry_t **result)
/* Find the largest element in <tree>, remove it and return it in <*result>.
 * Return TRUE iff <tree> has shrunk. */
{
  cache_entry_t *root = *tree;
  bool_t shrunk;

  if (root->right != NULL)
  {
    shrunk = remove_largest (&root->right, result);
    if (shrunk)
      return balance_right (tree);
    return FALSE;
  }
  else /* root->right == NULL ===> root is largest */
  {
    *result = root;
    *tree = root->left;
    return TRUE;
  }
}

/*---------------------------------------------------------------------------*/

LOCAL bool_t remove_from_tree (cache_entry_t *entry, cache_entry_t **tree)
/* Remove <entry> from <tree>. Return TRUE iff <tree> has shrunk. */
{
  short_t comp;
  bool_t shrunk;
  cache_entry_t *root = *tree;

  comp = strcmp (entry->surface, root->surface);
  if (comp < 0)
  {
    shrunk = remove_from_tree (entry, &root->left);
    if (shrunk)
      return balance_left (tree);
    return FALSE;
  }
  else if (comp > 0)
  {
    shrunk = remove_from_tree (entry, &root->right);
    if (shrunk)
      return balance_right (tree);
    return FALSE;
  }
  else /* comp == 0 */
  {
    if (root->right == NULL)
      (*tree) = root->left;
    else if (root->left == NULL)
      (*tree) = root->right;
    else
    {
      shrunk = remove_largest (&root->left, tree);
      (*tree)->balance = root->balance;
      (*tree)->left = root->left;
      (*tree)->right = root->right;
      if (shrunk)
	return balance_left (tree);
      else
	return FALSE;
    }
    return TRUE;
  }
}

/*---------------------------------------------------------------------------*/

LOCAL bool_t insert_in_tree (cache_entry_t *entry, cache_entry_t **tree)
/* Find the right place to put <entry> into <tree>.
 * Return TRUE iff <tree> has grown. */
{
  short_t comp;
  bool_t grown;

  if ((*tree) == NULL)
  {
    *tree = entry;
    return TRUE;
  }

  comp = strcmp (entry->surface, (*tree)->surface);
  if (comp < 0)
  {
    grown = insert_in_tree (entry, &(*tree)->left);
    if (grown)
    {
      cache_entry_t *root = *tree;

      root->balance--;
      if (root->balance >= -1)
	return (root->balance == -1);

      /* Tree is in disorder, rebalance it. */
      if (root->left->balance == -1)
      {
	(*tree) = root->left;
	root->left = (*tree)->right;
	(*tree)->right = root;
	(*tree)->right->balance = 0;
      }
      else
      {
	(*tree) = root->left->right;
	root->left->right = (*tree)->left;
	(*tree)->left = root->left;
	root->left = (*tree)->right;
	(*tree)->right = root;
	if ((*tree)->balance == -1)
	  (*tree)->right->balance = +1;
	else
	  (*tree)->right->balance = 0;
	if ((*tree)->balance == +1)
	  (*tree)->left->balance = -1;
	else
	  (*tree)->left->balance = 0;
      }
      (*tree)->balance = 0;
    }
    return FALSE;
  }
  else if (comp > 0)
  {
    grown = insert_in_tree (entry, &(*tree)->right);
    if (grown)
    {
      cache_entry_t *root = *tree;

      root->balance++;
      if (root->balance <= +1)
	return (root->balance == +1);
      
      /* Tree is in disorder, rebalance it. */
      if (root->right->balance == +1)
      {
	(*tree) = root->right;
	root->right = (*tree)->left;
	(*tree)->left = root;
	(*tree)->left->balance = 0;
      }
      else
      {
	(*tree) = root->right->left;
	root->right->left = (*tree)->right;
	(*tree)->right = root->right;
	root->right = (*tree)->left;
	(*tree)->left = root;
	if ((*tree)->balance == +1)
	  (*tree)->left->balance = -1;
	else
	  (*tree)->left->balance = 0;
	if ((*tree)->balance == -1)
	  (*tree)->right->balance = +1;
	else
	  (*tree)->right->balance = 0;
      }
      (*tree)->balance = 0;
    }
    return FALSE;
  }

  error ("internal (cache name used twice)");
}  

/*---------------------------------------------------------------------------*/

GLOBAL bool_t word_in_cache (string_t surf_start, 
			     string_t surf_end)
/* Check if word in [<surf_start>..<surf_end>]. 
 * Return TRUE if word found. Use "next_result_in_cache" to get next result. */
{
  cache_entry_t *entry;

  cache_accesses++;

  /* Search the cache tree for the word. */
  entry = cache_tree;
  while (entry != NULL)
  {
    short_t comp;

    comp = strncmp (surf_start, entry->surface, surf_end - surf_start);
    if (comp == 0 && entry->surface[surf_end - surf_start] != EOS)
      comp = -1;

    if (comp < 0)
      entry = entry->left;
    else if (comp > 0)
      entry = entry->right;
    else /* word found */
    {
      reference_entry (entry);
      current_entry = entry;
      current_result = 0;
      cache_hits++;
      return TRUE;
    }
  }
  return FALSE;
}

/*---------------------------------------------------------------------------*/

GLOBAL value_t next_result_in_cache (void)
/* Return the next category for the word found by "word_in_cache".
 * Return NULL if no more category exists. */
{
  value_t result;

  if (current_result >= current_entry->num_cats)
    return NULL;

  result = current_entry->cats[current_result];
  current_result++;
  return result;
}

/*---------------------------------------------------------------------------*/

LOCAL void remove_entry_from_cache (void)
/* Delete an entry from cache. */
{
  cache_entry_t *entry;
  
  /* Remove first element from LRU list. */
  entry = first_ref;
  first_ref = entry->next_ref;
  if (first_ref != NULL)
    first_ref->prev_ref = NULL;
  else 
    last_ref = NULL;

  /* Remove <entry> from tree. */
  remove_from_tree (entry, &cache_tree);
  free_cache_entry (entry);
}

/*---------------------------------------------------------------------------*/

GLOBAL void enter_in_cache (string_t surf_start,
			    string_t surf_end,
			    long_t num_cats,
			    value_t cats[])
/* Enter the word in [<surf_start>..<surf_end>] in the cache.
 * It has <num_cats> categories, stored in <cats[]>.
 * Be sure that the word is not yet in the cache. */
{
  cache_entry_t *cache_entry;

  if (num_entries >= max_num_entries)
    remove_entry_from_cache ();

  cache_entry = new_cache_entry (surf_start, surf_end, num_cats, cats);

  /* Enter entry in cache tree. */
  insert_in_tree (cache_entry, &cache_tree);

  /* Put entry at end of LRU table. */
  if (last_ref != NULL)
    last_ref->next_ref = cache_entry;
  cache_entry->prev_ref = last_ref;
  cache_entry->next_ref = NULL;
  last_ref = cache_entry;
  if (first_ref == NULL)
    first_ref = cache_entry;
}

/*---------------------------------------------------------------------------*/

GLOBAL void clear_cache (void)
/* Remove all entries from the cache. */
{
  while (num_entries > 0)
    remove_entry_from_cache ();
}

/*---------------------------------------------------------------------------*/

GLOBAL void set_cache_size (long_t size)
/* Set max. number of cache entries to <size>. */
{
  max_num_entries = size;
  while (num_entries > max_num_entries)
    remove_entry_from_cache ();
}

/*---------------------------------------------------------------------------*/

GLOBAL long_t get_cache_size (bool_t maximum)
/* Get number of cache entries. */
{
  if (maximum)
    return max_num_entries;
  else
    return num_entries;
}

/*---------------------------------------------------------------------------*/
