;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10; Package: umlisp -*-
;;;; *************************************************************************
;;;; FILE IDENTIFICATION
;;;;
;;;; Name:     parse-2002.lisp
;;;; Purpose:  Parsing and SQL insertion routines for UMLisp which may
;;;;           change from year to year
;;;; Author:   Kevin M. Rosenberg
;;;; Created:  Apr 2000
;;;;
;;;; $Id: parse-rrf.lisp 9542 2004-06-04 03:08:07Z kevin $
;;;;
;;;; This file, part of UMLisp, is
;;;;    Copyright (c) 2000-2004 by Kevin M. Rosenberg, M.D.
;;;;
;;;; UMLisp users are granted the rights to distribute and use this software
;;;; as governed by the terms of the GNU General Public License.
;;;; *************************************************************************

(in-package #:umlisp)

;;; Pre-read data for custom fields into hash tables
(defvar *preparse-hash-init?* nil)

(eval-when (:compile-toplevel :load-toplevel :execute)
(let ((pfstr-hash nil)      ;; Preferred concept strings by CUI
      (cui-lrl-hash nil)    ;; LRL by CUI
      (lui-lrl-hash nil)    ;; LRL by LUI
      (sui-lrl-hash nil)    ;; LRL by SUI
      (cuisui-lrl-hash nil) ;; LRL by CUISUI
      (sab-srl-hash nil))   ;; SRL by SAB
  
  (defun make-preparse-hash-table ()
    (if sui-lrl-hash
	(progn
	  (clrhash pfstr-hash)
	  (clrhash cui-lrl-hash)
	  (clrhash lui-lrl-hash)
	  (clrhash sui-lrl-hash)
	  (clrhash cuisui-lrl-hash)
	  (clrhash sab-srl-hash))
      (setf
	  pfstr-hash (make-hash-table :size 800000)
	  cui-lrl-hash (make-hash-table :size 800000)
	  lui-lrl-hash (make-hash-table :size 1500000)
	  sui-lrl-hash (make-hash-table :size 1500000)
	  cuisui-lrl-hash (make-hash-table :size 1800000)
	  sab-srl-hash (make-hash-table :size 100 :test 'equal))))
    
  (defun ensure-preparse (&optional (force-read nil))
    (when (or force-read (not *preparse-hash-init?*))
      (make-preparse-hash-table)
      (setq *preparse-hash-init?* t))
    (with-umls-file (line "MRCONSO.RRF")
      (let ((cui (parse-ui (nth 0 line)))
	    (lui (parse-ui (nth 3 line)))
	    (sui (parse-ui (nth 5 line)))
	    (sab (nth 11 line))
	    (srl (parse-integer (nth 15 line))))
	(unless (gethash cui pfstr-hash)  ;; if haven't stored pfstr for cui
	  (if (and (string-equal (nth 1 line) "ENG") ; LAT
		   (string-equal (nth 2 line) "P") ; ts
		   (string-equal (nth 4 line) "PF")) ; stt
	      (setf (gethash cui pfstr-hash) (nth 14 line))))
	(set-lrl-hash cui srl cui-lrl-hash)
	(set-lrl-hash lui srl lui-lrl-hash)
	(set-lrl-hash sui srl sui-lrl-hash)
	(set-lrl-hash (make-cuisui cui sui) srl cuisui-lrl-hash)
        (multiple-value-bind (val found) (gethash sab sab-srl-hash)
          (declare (ignore val))
          (unless found
            (setf (gethash sab sab-srl-hash) srl))))))
  
  (defun pfstr-hash (cui) (gethash cui pfstr-hash))
  (defun cui-lrl (cui)    (gethash cui cui-lrl-hash))
  (defun lui-lrl (lui)    (gethash lui lui-lrl-hash))
  (defun sui-lrl (sui)    (gethash sui sui-lrl-hash))
  (defun sab-srl (sab)    (aif (gethash sab sab-srl-hash) it 0))
  (defun cuisui-lrl (cuisui) (gethash cuisui cuisui-lrl-hash))
  
)) ;; closure

(defun set-lrl-hash (key lrl hash)
  "Set the least restrictive level in hash table"
  (multiple-value-bind (hash-lrl found) (gethash key hash)
    (if (or (not found) (< lrl hash-lrl))
	(setf (gethash key hash) lrl))))

;; UMLS file and column structures
;;; SQL datatypes symbols
;;; sql-u - Unique identifier
;;; sql-s - Small integer (16-bit)
;;; sql-i - Integer (32-bit)
;;; sql-l - Big integer (64-bit)
;;; sql-f - Floating point
;;; sql-c - Character data

(defparameter +col-datatypes+
    '(("AV" sql-f) ("BTS" sql-i) ("CLS" sql-i) ("COF" sql-i) ("CUI1" sql-u)
      ("AUI" sql-u) ("AUI1" sql-u) ("AUI2" sql-u) ("PCUI" sql-u)
      ("PLUI" sql-u) ("PAUI" sql-u)
      ("CUI2" sql-u) ("CUI" sql-u) ("CXN" sql-s) ("FR" sql-i) ("LRL" sql-s)
      ("LUI" sql-u) ("MAX" sql-s) ("MIN" sql-s) ("RANK" sql-s) ("REF" sql-c)
      ("RNK" sql-s) ("RWS" sql-i) ("SRL" sql-s) ("SUI" sql-u) ("TUI" sql-u)
      ("MAPRANK" sql-s)
      ;;; Custom columns
      ("KCUISUI" sql-l) ("KCUILUI" sql-l) ("KCUILRL" sql-i) ("KLUILRL" sql-i)
      ("KSUILRL" sql-i)
      ("KSRL" sql-i) ("KLRL" sql-i)
      ;;; LEX columns
      ("EUI" sql-u) ("EUI2" sql-u)
      ;;; Semantic net columns
      ("UI" sql-u) ("UI2" sql-u) ("UI3" sql-u)
      ;; New fields for 2002AD
      ("RCUI" sql-u) ("VCUI" sql-u) ("CFR" sql-i) ("TFR" sql-i)
      ;; New fields for 2004AA
      ("MAPSETCUI" sql-u)
      ) 
    "SQL data types for each non-string column")

(defparameter +custom-tables+
    nil
  #+ignore
  '(("KCON" "SELECT CUI,STR FROM MRCONSO WHERE STT='PF' AND TS='P' AND ISPREF='Y' AND LAT='ENG'"))
  "Custom tables to create")

(defparameter +custom-cols+
    '(("MRCONSO.RRF" "KPFSTR" "TEXT"
		     (slot-value (find-ucol "STR" "MRCONSO.RRF") 'max)
		     (lambda (x) (pfstr-hash (parse-ui (nth 0 x)))))
      ("MRCONSO.RRF" "KCUISUI" "BIGINT" 0
       (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 5 x))))))
      ("MRCONSO.RRF" "KCUILUI" "BIGINT" 0
       (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 3 x))))))
      ("MRCONSO.RRF" "KCUILRL" "SMALLINT" 0
       (lambda (x) (write-to-string (lui-lrl (parse-ui (nth 0 x))))))
      ("MRCONSO.RRF" "KLUILRL" "SMALLINT" 0
       (lambda (x) (write-to-string (lui-lrl (parse-ui (nth 3 x))))))
      ("MRCONSO.RRF" "KSUILRL" "SMALLINT" 0
       (lambda (x) (write-to-string (sui-lrl (parse-ui (nth 5 x))))))
      ;; Deprecated, last in 2004AA -- skip index
      #+ignore
      ("MRLO.RRF" "KLRL" "SMALLINT" 0
       (lambda (x) (write-to-string 
		    (if (zerop (length (nth 4 x)))
			(cui-lrl (parse-ui (nth 0 x)))
		      (cuisui-lrl (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 4 x))))))))
      ("MRSTY.RRF" "KLRL" "SMALLINT" 0
       (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x))))))
      ("MRCOC.RRF" "KLRL" "SMALLINT" 0
       (lambda (x) (write-to-string 
		    (max (cui-lrl (parse-ui (nth 0 x)))
			 (kmrcl:aif (cui-lrl (parse-ui (nth 1 x))) kmrcl::it 0)))))
      ("MRSAT.RRF" "KSRL" "SMALLINT" 0
       (lambda (x) (write-to-string (sab-srl (nth 9 x)))))
      ("MRREL.RRF" "KSRL" "SMALLINT" 0
       (lambda (x) (write-to-string (sab-srl (nth 10 x)))))
      ("MRRANK.RRF" "KSRL" "SMALLINT" 0
       (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
      ("MRDEF.RRF" "KSRL" "SMALLINT" 0
       (lambda (x) (write-to-string (sab-srl (nth 4 x)))))
      ("MRCXT.RRF" "KSRL" "SMALLINT" 0
       (lambda (x) (write-to-string (sab-srl (nth 2 x)))))
      ("MRXW_ENG.RRF" "KLRL" "SMALLINT" 0
       (lambda (x) (write-to-string (cuisui-lrl (make-cuisui 
						 (parse-ui (nth 2 x))
						 (parse-ui (nth 4 x)))))))
      ("MRXW_NONENG.RRF" "KLRL" "SMALLINT" 0
       (lambda (x) (write-to-string (cuisui-lrl (make-cuisui 
						 (parse-ui (nth 2 x))
						 (parse-ui (nth 4 x)))))))
      ("MRXNW_ENG.RRF" "KLRL" "SMALLINT" 0
       (lambda (x) (write-to-string (cuisui-lrl (make-cuisui 
						 (parse-ui (nth 2 x))
						 (parse-ui (nth 4 x)))))))
      ("MRXNS_ENG.RRF" "KLRL" "SMALLINT" 0
       (lambda (x) (write-to-string (cuisui-lrl (make-cuisui 
						 (parse-ui (nth 2 x))
						 (parse-ui (nth 4 x)))))))
      ("MRREL.RRF" "KPFSTR2" "TEXT" 1024
       (lambda (x) (pfstr-hash (parse-ui (nth 4 x)))))
      ("MRCOC.RRF" "KPFSTR2" "TEXT" 1024
       (lambda (x) (pfstr-hash (parse-ui (nth 2 x)))))
      ("MRCXT.RRF" "KCUISUI" "BIGINT" 0 
       (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 1 x))))))
      ("MRSAT.RRF" "KCUILUI" "BIGINT" 0
       (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 1 x))))))
      ("MRSAT.RRF" "KCUISUI" "BIGINT" 0
       (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x))))))
      ("MRXW_ENG.RRF" "KCUISUI" "BIGINT" 0
       (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
      ("MRXNW_ENG.RRF" "KCUISUI" "BIGINT" 0
       (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
      ("MRXNS_ENG.RRF" "KCUISUI" "BIGINT" 0
       (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
      ("MRXW_NONENG.RRF" "LAT" "VARCHAR" 3 (lambda (x) (nth 0 x)))
      ("MRXW_NONENG.RRF" "WD"  "VARCHAR" 200  (lambda (x) (nth 1 x)))
      ("MRXW_NONENG.RRF" "CUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 2 x)))))
      ("MRXW_NONENG.RRF" "LUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 3 x)))))
      ("MRXW_NONENG.RRF" "SUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 4 x)))))
      ("MRXW_NONENG.RRF" "KCUISUI" "BIGINT" 0 
       (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))))
  "Custom columns to create.(filename, col, sqltype, value-func).")

(defparameter +index-cols+
    '(("CUI1" "MRCOC") ("CUI" "MRCONSO") ("LUI" "MRCONSO") 
      ("SRL" "MRCONSO") ("AUI" "MRCONSO")
      ("SUI" "MRCONSO") ("CUI" "MRCXT") ("CUI" "MRDEF") ("CUI" "MRLO")
      ("CUI1" "MRREL") ("CUI" "MRSAT") ("LUI" "MRSAT") ("SUI" "MRSAT")
      ("CUI" "MRSTY")
      ("TUI" "MRSTY") ("CUI" "MRXNS_ENG") 
      #+ignore ("NSTR" "MRXNS_ENG" 10)
      ("CUI" "MRXNW_ENG") ("NWD" "MRXNW_ENG") ("WD" "MRXW_ENG")
      ("KCUISUI" "MRCONSO") ("KCUILUI" "MRCONSO") ("KCUILRL" "MRCONSO")
      ("KLUILRL" "MRCONSO") ("KCUISUI" "MRCXT") 
      ("KCUISUI" "MRSAT")  ("KCUILUI" "MRSAT")
      ("KCUISUI" "MRXW_ENG") ("KCUISUI" "MRXNW_ENG") 
      ("KCUISUI" "MRXNS_ENG") ("KCUISUI" "MRXW_NONENG")
      ("KSRL" "MRCXT") ("KSRL" "MRDEF") ("KSRL" "MRRANK") 
      ("KSRL" "MRREL") ("KSRL" "MRSAT") ("KLRL" "MRCOC") 
      #+ignore ("KLRL" "MRLO")  ;; deprecated
      ("KLRL" "MRSTY") ("KLRL" "MRXW_ENG") ("KLRL" "MRXNW_ENG")
      ("KLRL" "MRXNS_ENG") ("KLRL" "MRXW_NONENG")
      ;; LEX indices
      ("EUI" "LRABR") ("EUI2" "LRABR") ("EUI" "LRAGR") ("EUI" "LRCMP") ("EUI" "LRMOD")
      ("EUI" "LRNOM") ("EUI2" "LRNOM") ("EUI" "LRPRN") ("EUI" "LRPRP") ("EUI" "LRSPL")
      ("EUI" "LRTRM") ("EUI" "LRTYP") ("EUI" "LRWD") ("WRD" "LRWD")
      ("BAS" "LRABR") 
      ;; Semantic NET indices
      ("UI" "SRSTRE1") ("UI2" "SRSTRE1") ("UI3" "SRSTRE1") 
      ("STY_RL" "SRDEF") ("RT" "SRDEF") ("STY_RL" "SRSTR") ("STY_RL2" "SRSTR")
      ("RL" "SRSTR")
      
      ("SRL" "MRSAB") ("RSAB" "MRSAB") ("VSAB" "MRSAB") ("RCUI" "MRSAB")
      ("VCUI" "MRSAB") ("LAT" "MRSAB") ("MAPSETCUI" "MRMAP")  ("MAPSETCUI" "MRSMAP")
      ("CUI" "MRHIER") ("AUI" "MRHIER") ("PAUI" "MRHIER"))
  "Columns in files to index")


(defparameter +custom-index-cols+
  nil
  #+ignore
  '(("CUI" "KCON") ("LRL" "KCON"))
  "Indexes to custom tables")

;; File & Column functions

(defun gen-ucols ()
  (add-ucols (gen-ucols-meta))
  (add-ucols (gen-ucols-custom))
  (add-ucols (gen-ucols-generic "LRFLD"))
  (add-ucols (gen-ucols-generic "SRFLD")))

(defun gen-ucols-meta ()
"Initialize all umls columns"  
  (let ((cols '()))
    (with-umls-file (line "MRCOLS.RRF")
      (destructuring-bind (col des ref min av max fil dty) line
	(push (make-ucol col des ref (parse-integer min) (read-from-string av)
			 (parse-integer max) fil dty)
	      cols)))
    (nreverse cols)))

(defun gen-ucols-custom ()
"Initialize umls columns for custom columns"  
  (loop for customcol in +custom-cols+
	collect
	(make-ucol (nth 1 customcol) "" 0 0 0 (eval (nth 3 customcol))
		   (nth 0 customcol) nil :sqltype (canonicalize-column-type (nth 2 customcol))
		   :custom-value-fun (nth 4 customcol))))

(defun gen-ucols-generic (col-filename)
"Initialize for generic (LEX/NET) columns"  
  (let ((cols '()))
    (with-umls-file (line col-filename)
      (destructuring-bind (nam des ref fil) line
	(setq nam (escape-column-name nam))
	(dolist (file (delimited-string-to-list fil #\,))
	  (push
	   (make-ucol nam des ref nil nil nil file nil)
	   cols))))
    (nreverse cols)))


(defun gen-ufiles ()
  (add-ufiles (gen-ufiles-generic "MRFILES.RRF" "META"))
  (add-ufiles (gen-ufiles-generic "LRFIL" "LEX"))
  (add-ufiles (gen-ufiles-generic "SRFIL" "NET"))
  ;; needs to come last
  (add-ufiles (gen-ufiles-custom)))

			
(defun gen-ufiles-generic (files-filename dir)
"Initialize all LEX file structures"  
  (let ((files '()))
    (with-umls-file (line files-filename)
      (destructuring-bind (fil des fmt cls rws bts) line
	(push (make-ufile
	       dir fil des 
	       (parse-integer cls)
	       (parse-integer rws) (parse-integer bts)
	       (concatenate 'list (umls-field-string-to-list fmt)
			    (custom-colnames-for-filename fil)))
	      files)))
    (nreverse files)))

(defun gen-ufiles-custom ()
  (make-ufile "META" "MRXW_NONENG.RRF" "Custom NonEnglish Index" 
	      5 0 0 (fields (find-ufile "MRXW_ENG.RRF"))))



