// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/arch/ia32/ia32_o3_jit/opt_throws.cpp,v 1.2 2001/08/13 09:52:49 xhshi Exp $
//


//
// If a basic block ends in an athrow, search the basic block for a new()
// call.  If there are no side effects between the new and the athrow,
// apply the lazy-exception optimization.  Side effects include:
//   - putfield/putstatic (i.e., assignment where LHS is a memory operand)
//   - assignment to non-tempreg when there is an EH
//   - invokevirtual
//   - "unsafe" invokestatic/invokespecial
// An "unsafe" invoke is one that has unwanted side effects.  Unwanted side effects
// include all side effects except the setting of fields of the "this" pointer.
// The ORP defines certain methods to be side-effect free, such as
// java/lang/Throwable.<init>.

#include "defines.h"
#include "flow_graph.h"
#include "is_subclass_of.h"
#include "handles.h"
#include "expression.h"
#include "build_ir_routines.h"
#include "opt_throw.h"
#include "jit_common.h"


//
// We want to find out if there are side effects between the creation of 
// the object and the athrow.  The routine finds the inst (new) that 
// creates the object.  
// In the current implementation, if the creation inst is not in the 
// current block, NULL is returned (indicates that the inst is not found).
//
Inst *Lazy_Throw::find_obj_new(Inst *head, Inst *obj)
{
    //
    //               new obj
    // Look for      t3 = eax <------ return this inst if found
    //                ...
    //               arg0 = t3
    //               athrow
    //
    if (!obj->is_outgoing_arg_assignment() ||
        !obj->src(0)->is_single_def_temp_reg())
        return NULL;
    //
    // make sure that the obj_new is the result of a call
    //
    Inst *obj_new = ((Temp_Reg *)obj->src(0))->inst();
    if (!obj_new->is_assignment()  ||
        !obj_new->src(0)->is_ret())
        return NULL;
    //
    // make sure that the obj_new is the result of object creation (new)
    //
    if (obj_new->prev() == head ||
        !obj_new->prev()->is_call() ||
        ((Call_Inst*)obj_new->prev())->kind != Call_Inst::new_call)
        return NULL;
    return obj_new;
}

//
// The routine checks if the new_call creates a throwable object.
// If yes, return the class handle of the object.
// return null, otherwise.
//
Class_Handle Lazy_Throw::throwable_class(Call_Inst *new_call)
{
    Inst *arg = new_call->get_arg(0);
    assert(arg->n_srcs == 1);
    Operand *opnd = arg->src(0);
    //
    // Look for    arg0 = ch_imm    or    t3 = ch_imm
    //             new                    arg0 = t3
    //                                    new
    //
    if (opnd->is_single_def_temp_reg())
        opnd = ((Temp_Reg *)opnd)->inst()->src(0);
    if (opnd->kind == Operand::Immediate)
    {
        Class_Handle ch = (Class_Handle) ((Imm_Operand *)opnd)->imm();
        if (O3_is_subclass_of(ch, cached_class_handles[ch_java_lang_Throwable]))
            return ch;
    }
    return NULL;
}

//
// src is stored into a field, local var or array. 
// return true, if src is actually obj
// return false, otherwise
//
bool Lazy_Throw::is_excp_obj(Operand *src, Operand *obj)
{
    if (src == obj)
        return true;
    //
    // Look for the case    t3 = obj
    //                      [r+16] = t3 (src)
    //
    if (src->is_single_def_temp_reg())
        return is_excp_obj(((Temp_Reg*)src)->inst()->src(0), obj);
    return false;
}

static unsigned get_ref_var_no(const unsigned char *bc)
{
    if (*bc >= 0x2a && *bc <= 0x2e) 
        return (*bc) - 0x2a;  // aload_{0,1,2,3}
    else if (*bc == 0x19)     // aload
        return bc[1];
    else
        return -1;            // not a ref var
}
//
// return true, if the method has side effects.
// This routine detects side effects based on bytecode instead of IR
// because the method may not yet be compiled.
//
bool Lazy_Throw::bc_method_has_side_effects(Method_Handle mh, 
                                            unsigned      *arg_map)
{
    //
    // a flag associated with each method, indicating the method
    // has side effects.  If the flag has been computed, the value
    // is returned. If the flag is not yet computed, we will 
    // detect the side effects of the method and set the flag.
    //
    Method_Side_Effects sef = method_get_side_effects(mh);
    if (sef != MSE_Unknown)
        return (sef == MSE_True)? true : false;
    //
    // To avoid recursion, we set the flag to True
    //
    method_set_side_effects(mh, MSE_True);
    //
    // if the stack depth of the method exceeds MAX_LAZY_DEPTH, then we 
    // give up.  We can adjust MAX_LAZY_DEPTH if necessary in the future.
    //
    if (method_get_max_stack(mh) > MAX_LAZY_DEPTH)
        return true;
    //
    // if the method is a synchronized method, we do not allow lazy 
    // exception to happen so as to avoid any deadlock potential
    //
    if (method_is_synchronized(mh))
        return true;
    //
    // get the class handle of the method
    // 
    Class_Handle ch = method_get_class(mh);

#ifdef _DEBUG
	const char *method_name = method_get_name(mh);
	const char *class_name  = class_get_name(ch);
#endif
    //
    // go over bytecode
    //
    const unsigned char *first_bc = method_get_byte_code_addr(mh);
    const unsigned char *last_bc = first_bc + method_get_byte_code_size(mh);
    const unsigned char *bc = first_bc;
    unsigned index;
    Lazy_Stack stack;
    //
    // We only look for simple bytecode sequence because complicated 
    // bytecode sequence requires mimic stack type of structure to do
    // analysis.  Furthermore, it is more likely to have side effects 
    // anyway.
    // 
    while (bc < last_bc)
    {
        unsigned bc_index = bc - first_bc;
        switch(*bc) {
		case 0x00:	break;	// nop
		case 0x01:			// aconst_null
		case 0x02: case 0x03: case 0x04: case 0x05:
		case 0x06: case 0x07: case 0x08: // iconst -1,0,...,5
		case 0x09: case 0x0a: // lconst 0,1
		case 0x0b:	// fconst 0.0F
		case 0x0c:	// fconst 1.0F
		case 0x0d:	// fconst 2.0F
		case 0x0e:	// dconst 0.0
		case 0x0f:	// dconst 1.0
		case 0x10: 	// bipush
		case 0x11:	// sipush
		case 0x12:	// ldc
		case 0x13:	// ldc_w
		case 0x14:	// ldc2_w
		case 0x15:	// iload
		case 0x16:  // lload
		case 0x17:	// fload
		case 0x18:  // dload
		case 0x19:	// aload
		case 0x1a: case 0x1b: case 0x1c: case 0x1d:	// iload_{0,1,2,3}
		case 0x22: case 0x23: case 0x24: case 0x25:	// fload_{0,1,2,3}
		case 0x2a: case 0x2b: case 0x2c: case 0x2d:	// aload_{0,1,2,3}
		case 0x1e: case 0x1f: case 0x20: case 0x21:	// lload_{0,1,2,3}
		case 0x26: case 0x27: case 0x28: case 0x29:	// dload_{0,1,2,3}
            stack.push(bc_index);
            break;
		case 0xac:	// ireturn
		case 0xad:	// lreturn
		case 0xb0:	// areturn
		case 0xae:	// freturn
		case 0xaf:	// dreturn
		case 0xb1:	// return (without value)
            break;
		case 0xb2:	// getstatic
            {
                index = (bc[1] << 8) + bc[2];
                Loader_Exception exc;
	            Field_Handle fh = resolve_static_field(comp_handle,ch,index,&exc);
                Class_Handle field_class = field_get_class(fh);
                //
                // The class needs to be initialized at the first active use.
                // If the class is not yet initialized, we don't eliminate the 
                // constructor because it may violate the Java semantics
                //
                if (field_class != ch && !class_is_initialized(field_class)) 
                    return true;
                stack.push(bc_index);
            }
            break;
		case 0xb4:	// getfield
            {
                unsigned base_bc_off = stack.pop();
                unsigned var_no = get_ref_var_no(first_bc + base_bc_off);
                if (var_no > MAX_LAZY_ARGS)
                    return true;
                //
                // determine if var_no is one of the constructor's ref args.
                // arg_map[var_no] > MAX_LAZY_ARGS means the ref is not 
                // constructor's arg. We give up tracking because a null
                // pointer exception may be thrown in this case.
                //
                if (arg_map[var_no] > MAX_LAZY_ARGS)
                    return true;
                //
                // set null_check so that we can guarantee null pointer
                // exception does not happen when eliminating the constructor
                //
                null_check[arg_map[var_no]] = true;
                stack.push(bc_index);
            }
			break;
		case 0xb5:	// putfield
            {
               stack.pop(); // pop source
               unsigned base_bc_off = stack.pop(); // pop base
               unsigned var_no = get_ref_var_no(first_bc + base_bc_off);
                if (var_no > MAX_LAZY_ARGS)
                    return true;
                //
                // field updates should be considered as a side effect. 
                // However, we relax it a little bit by treating field
                // updates of the exception object, the first arg of 
                // the constructor, as side-effect free.
                //
                if (arg_map[var_no] != 0)
                    return true;
                // 
                // no need to set null_check because the exception is
                // not null.
                //
            }
            break;
		case 0xb6:	// invokevirtual
		case 0xb7:	// invokespecial
            {
                //
                // get the method handle of the virtual method
                //
			    unsigned index = (bc[1] << 8) + bc[2];
                Loader_Exception exc;
	            Method_Handle handle = resolve_virtual_method(comp_handle,ch,index,&exc);
                //
                // we pop all args and push them unto tmp_stack in 
                // the reverse order so that this ptr is on the top
                //
                Java_Type type;
                Lazy_Stack tmp_stack;
                Arg_List_Iterator iter = method_get_argument_list(handle);
                while((type = curr_arg(iter)) != JAVA_TYPE_END) 
                {
                    tmp_stack.push(stack.pop());
                    iter = advance_arg_iterator(iter);
                }
                tmp_stack.push(stack.pop()); // pop this ptr
                //
                // prepare arg_map for outgoing args
                //
                unsigned i;
                unsigned out_arg_map[MAX_LAZY_ARGS];
                //
                // set out_arg_map[] to default value (not ref type)
                for (i = 0; i <MAX_LAZY_ARGS; i++)
                    out_arg_map[i] = -1;
                i = 0;
                //
                // arg_map[var_no] > MAX_LAZY_ARGS means the ref is not 
                // constructor's arg. We give up tracking because a null
                // pointer exception may be thrown in this case.
                //
                unsigned base_bc_off = tmp_stack.pop(); // pop this ptr
                unsigned var_no = get_ref_var_no(first_bc + base_bc_off);
                if (var_no > MAX_LAZY_ARGS ||
                    arg_map[var_no] > MAX_LAZY_ARGS)
                    return true;
                //
                // set vrtl_check because we need to generate code to 
                // make sure the right instance of the method is 
                // eliminated.
                //
                null_check[arg_map[var_no]] = true;
                if ((*bc) == 0xb6) // invokevirtual
                    vrtl_check[arg_map[var_no]] = handle;
                out_arg_map[i++] = arg_map[var_no];
                //
                // go over each arg
                //
                iter = method_get_argument_list(handle);  // reset the iterator
                while ((type = curr_arg(iter)) != JAVA_TYPE_END)
                {
                    if (type == JIT_TYPE_CLASS) 
                    {
                        base_bc_off = tmp_stack.pop(); // pop ref
                        var_no = get_ref_var_no(first_bc + base_bc_off);
                        //
                        // we don't return true if var_no >= MAX_LAZY_ARGS or
                        // arg_map[var_no] > MAX_LAZY_ARGS because those args
                        // may not be dereference within the method
                        //
                        if (var_no < MAX_LAZY_ARGS &&
                            arg_map[var_no] < MAX_LAZY_ARGS)
                            out_arg_map[i++] = arg_map[var_no];
                    }
                    else if (IS_64BIT(type))
                        i++;
                    iter = advance_arg_iterator(iter);
                }
                //
                // if the method has side effects, we stop analyzing and 
                // return immediately.
                //
                if (bc_method_has_side_effects(handle,out_arg_map))
                    return true;
                stack.push(bc_index);
            }
   			break;
        default:
            return true;
        }
        //
        // update bc to the next bc inst
        //
        bc += instruction_length(first_bc, bc_index);

    }
    //
    // set the side_effect flag to false so that we don't need to do
    // the analysis next time.
    //
    method_set_side_effects(mh, MSE_False);
    return false;
}

//
// return true, if the method call has side effects that may
// cause lazy exception to violate Java semantics
//
bool Lazy_Throw::method_has_side_effects(Call_Inst *cinst, Operand *obj)
{
    //
    // assume that they all have side effects
    //
    if (cinst->kind != Call_Inst::static_call  &&
        cinst->kind != Call_Inst::special_call &&
        cinst->kind != Call_Inst::virtual_call)
        return true; // conservative for now
    //
    // if this ptr of invokespecial and invokevirtual is not the exception 
    // object, then we assume that the method has side effects
    //
    if (cinst->kind != Call_Inst::static_call &&
        !is_excp_obj(cinst->get_arg(0)->src(0), obj))
        return true;
    //
    // if the number of arg exceeds MAX_LAZY_ARGS, then we give up.
    // Adjust MAX_LAZY_ARGS and MAX_LAZY_DEPTH later if needed.
    //
    if (cinst->n_args() > MAX_LAZY_ARGS )
        return true;
    //
    //
    // Inside the constructor, the ref arguments may be dereferenced (getfield)
    // or invokevirtual. Dereferencing may cause null pointer exception.
    // We need to take care class overridden for invokevirtual.
    // We initialize arg_map[] to track which ref arg needs null pointer
    // check or invokevirtual check.
    //
    unsigned arg_map[MAX_LAZY_ARGS];
    unsigned n_args = cinst->n_args();
    unsigned i = 0, j = 0;
    for (; i < MAX_LAZY_ARGS; i++) 
        arg_map[i] = -1;    // set default value (not ref type)

    for (i = 0; i < n_args; i++,j++) 
    {
        Operand *arg = cinst->get_arg_opnd(i);
        if (arg->type == JIT_TYPE_CLASS)  // ref type
            arg_map[j] = j;
        else if (IS_64BIT(arg->type))
            j++;
        if (j >= MAX_LAZY_ARGS)
            return true;
    }
    return bc_method_has_side_effects(cinst->get_mhandle(),arg_map);
}

//
// append the call and its args and ret to head 
//
static void mov_call_to(Call_Inst *call, Inst *head)
{
    //
    // mov args
    //
    unsigned n_args = call->n_args();
    for (unsigned i = 0; i < n_args; i++)
    {
        Inst *arg = call->get_arg(i);
        arg->unlink();
        arg->insert_before(head);
        if (arg->src(0)->is_temp_reg())
            ((Temp_Reg*)arg->src(0))->set_global_reg_cand();
    }
    call->unlink();
    call->insert_before(head);
    //
    // mov ret inst if there is one
    //
    if (call->get_ret() != NULL)
    {
        call->get_ret()->unlink();
        call->get_ret()->insert_before(head);
    }

}
//
// The original athrow code sequence is in node.
// We generate lazy exception call and put it in lnode.  The new(),
// constructor() and athrow() are moved to anode.
//
void Lazy_Throw::gen_lazy_excp_call(Cfg_Node     *node,
                                    Cfg_Node     *lnode,
                                    Cfg_Node     *anode,
                                    Call_Inst    *new_inst, // new() 
                                    Call_Inst    *constructor, 
                                    Call_Inst    *athrow,   // athrow()
                                    Class_Handle exc_ch // excp obj's class
                                    )
{
    //
    // First, we move athrow code sequence to anode
    // move new(), constructor() and athrow() one by one
    //
    Inst *anode_head = anode->IR_instruction_list();
    constructor->get_arg(0)->replace_src(0,new_inst->get_ret()->dst());
    mov_call_to(new_inst, anode_head);
    mov_call_to(constructor, anode_head);
    mov_call_to(athrow, anode_head);
#ifdef PRINTABLE_O3
    anode->set_bytecodes(new_inst->bc_index, node->bc_length() - 
                         (new_inst->bc_index - node->first_bc_idx()));
#endif

    //
    // Then, we generate lazy exception code sequence
    //
    // Before all that, though, make sure the exception class is initialized.
    //
    exprs.set_live_lcse(NULL);
    Inst *lnode_head = lnode->IR_instruction_list();
    if (!class_is_initialized(exc_ch))
    {
        Inst *ci_inst = exprs.lookup_imm((unsigned)exc_ch, JIT_TYPE_ADDR, lnode_head);
        Exp *expr = exprs.lookup_inst_exp(Exp::ClassInit, ci_inst->exp, NULL, JIT_TYPE_VOID);
        new(mem) Classinit_Inst(ci_inst->dst(), expr, lnode_head);
    }
    //
    // Prepare arguments for the lazy exception call
    // push class handle as the first argument
    //
    Inst **argarray = (Inst **)mem.alloc((constructor->n_args() + 1) * sizeof(*argarray));
    Operand_Exp *a0 = exprs.lookup_arg_exp(0,JIT_TYPE_ADDR);
    Operand_Exp *ch = exprs.lookup_imm_exp((unsigned)exc_ch, JIT_TYPE_ADDR);
    argarray[0] = create_assign_inst(exprs,lnode_head,a0,ch,JIT_TYPE_ADDR);
    //
    // duplicate the args of the constructor 
    //
    unsigned i;
    for (i=1; i<constructor->n_args(); i++)
    {
        Inst *arg = constructor->get_arg(i);
        assert(arg->is_assignment());
        argarray[i] = new (exprs.mem) Assign_Inst(arg->dst(),arg->src(0),arg->exp,lnode_head);
    }

    // push method handle
    Operand_Exp *arg_mh = exprs.lookup_arg_exp(i, JIT_TYPE_ADDR);
    Inst *src = exprs.lookup_imm((unsigned)constructor->get_mhandle(), JIT_TYPE_ADDR, lnode_head);
    argarray[i++] = create_assign_inst(exprs, lnode_head, arg_mh, 
                                      (Operand_Exp*)src->exp, JIT_TYPE_ADDR);

#ifdef PRINTABLE_O3
    argarray[0]->bc_index = ~0u;
#endif // PRINTABLE_O3

    Call_Inst *lazy_athrow =
        new(mem) Call_Inst(Call_Inst::athrow_lazy_call, athrow->exp, NULL, false, lnode_head);
    lazy_athrow->set_args(argarray, i, NULL);
}

//
// One of inode's successor is anode.
// split_cfg_node(inode) splits inode into two.  All out edges of inode are 
// moved to new_node.  new_node is an empty block.
// The results are returned through inode and ihead.
//
Cfg_Node *Lazy_Throw::create_new_node(Cfg_Node   *anode,
                                      Cfg_Node   *inode)
{
    //
    // Once inode is split, inode has no out edges becase the out edges 
    // are moved to new_node.
    //
    Cfg_Node *new_node = fg->split_cfg_node(inode);
    inode->add_edge(mem, new_node);  // fallthrough edge
    inode->add_edge(mem, anode);     // target 
    new_node->linearization_node()->unlink();
    new_node->linearization_node()->insert_after(inode->linearization_node());
    return new_node;
}

//
// replace athrow with lazy exception.
// we need to insert code to check null pointer or vtable
//
void Lazy_Throw::replace_athrow(Cfg_Node *node,
                                Call_Inst    *new_inst,
                                Call_Inst    *constructor,
                                Call_Inst    *athrow,
                                Class_Handle exc_ch)
{
    //
    //  If null check or vtable check is necessary, we still need to keep 
    //  athrow.
    //         node                                  node
    //    +--------------+             +----------------+
    //    |    ...       |             |     ...        |
    //    |    new()     |             |   if x == null |
    //    | constructor()|  ====>      +----------------+
    //    |    athrow()  |        fallthrough   |         \ 
    //    +--------------+                      v           \
    //                                 +-------------------+  \
    //                                 |  if x's vtab == v |    \
    //                                 +-------------------+      \
    //                                          |            \     |
    //                                lnode     v              \   |
    //                                 +-------------------+    |  |
    //                                 |  lazy exception() |    v  v  anode
    //                                 +-------------------+  +---------------+
    //                                                        |    new()      |
    //                                                        | constructor() |
    //                                                        |    athrow()   |
    //                                                        +---------------+
    //
    Cfg_Node *anode = fg->split_cfg_node(node);  // for original athrow
    Cfg_Node *lnode = fg->split_cfg_node(node);  // for lazy exception
    //
    // determine block ordering
    //
    anode->linearization_node()->unlink();
    lnode->linearization_node()->unlink();
    anode->linearization_node()->insert_after(node->linearization_node());
    lnode->linearization_node()->insert_after(node->linearization_node());
    //
    // set anode's latest_traversal to prevent reapplying lazy exception to 
    // the block again.
    //
    anode->latest_traversal = fg->traversal_num();
    gen_lazy_excp_call(node, lnode, anode, 
                       (Call_Inst*)new_inst, 
                       (Call_Inst*)constructor, 
                       (Call_Inst*)athrow, 
                       exc_ch);
    //
    // add cfg edges
    //
    node->add_edge(mem, lnode); // first add_edge is fallthrough edge
    node->add_edge(mem, anode); // second add_edge is target edge
    //
    // inode is the node in which we want to insert code
    //
    Cfg_Node *inode = node;
    Inst *ihead = node->IR_instruction_list();
    //
    // do we want o create a node before insertion
    //
    bool create_node = false;
    //
    // check null_check[] and vrtl_check[] and insert code
    // we first go over vrtl_check[]
    //
    unsigned i;
    for (i = 0; i < MAX_LAZY_ARGS; i++)
    {
        if (vrtl_check[i] == NULL) continue;

        Inst *arg = ((Call_Inst*)constructor)->get_arg(i);
        //
        // checking vtable needs to do two things.
        // First, we check if the arg is null because we need to 
        // dereference it to get its vtable.  However, if the
        // arg is the exception obj, then we don't check null 
        //
        null_check[i] = false;
        Inst *obj =  arg->src(0)->lookup_exp(exprs,ihead);
        if (i != 0) 
        {
            if (create_node)
            {
                inode = create_new_node(anode, inode);
                ihead = inode->IR_instruction_list();
            }
            Inst *imm0 = exprs.lookup_imm(0,JIT_TYPE_INT,ihead);
            Inst *cmp  = gen_cmp_inst(mem, exprs, ihead, 
                                      obj, imm0, JIT_TYPE_CLASS, false);
            gen_branch(mem, exprs, ihead, false, Exp::Beq, cmp);
            create_node = true;
        }
        //
        // Second, we check if the vtable is the one we excpect
        //
        if (create_node)
        {
            inode = create_new_node(anode, inode);
            ihead = inode->IR_instruction_list();
        }
        void *vtab = class_get_vtable(method_get_class(vrtl_check[i]));
        Inst *vtab_i = exprs.lookup_imm((unsigned)vtab,JIT_TYPE_ADDR,ihead);
        Inst *obj_vtab = exprs.lookup_field(obj,0,JIT_TYPE_ADDR,ihead,(FIELD_UID)vrtl_check[i]);
        Inst *cmp = gen_cmp_inst(mem, exprs, ihead, 
                                 obj_vtab, vtab_i, JIT_TYPE_ADDR, false);
        gen_branch(exprs.mem, exprs, ihead, false, Exp::Bne, cmp);
        create_node = true;
    }
    //
    // check null_check[]
    //
    for (i = 0; i < MAX_LAZY_ARGS; i++)
    {
        if (!null_check[i]) continue;

        Inst *arg = ((Call_Inst*)constructor)->get_arg(i);
        Inst *obj =  arg->src(0)->lookup_exp(exprs,ihead);
        if (i != 0) 
        {
            if (create_node)
            {
                inode = create_new_node(anode, inode);
                ihead = inode->IR_instruction_list();
            }
            Inst *imm0 = exprs.lookup_imm(0,JIT_TYPE_INT,ihead);
            Inst *cmp  = gen_cmp_inst(mem, exprs, ihead, 
                                      obj, imm0, JIT_TYPE_CLASS, false);
            gen_branch(mem, exprs, ihead, false, Exp::Beq, cmp);
            create_node = true;
        }
    }

#ifdef TRACE_O3
    cout << "replace athrow with lazy exception" << endl;
#endif
}

void Lazy_Throw::remove_athrow(Cfg_Node     *node,
                               Call_Inst    *new_inst,
                               Call_Inst    *constructor,
                               Call_Inst    *athrow,
                               Class_Handle exc_ch)
{
    // Now we can apply the transformation.
    // First, eliminate the new() call and its argument.
    // Second, eliminate the constructor call (but save its arguments).
    // Third, add the mh and ch to the constructor's argument array.
    // Fourth, replace the athrow's argument array with the new array.
    // Fifth, replace the athrow call with an athrow_lazy call.
    //
    // Before all that, though, make sure the exception class is initialized.
    if (!class_is_initialized(exc_ch))
    {
        Inst *ci_inst = exprs.lookup_imm((unsigned)exc_ch, JIT_TYPE_ADDR, new_inst);
        Exp *expr = exprs.lookup_inst_exp(Exp::ClassInit, ci_inst->exp, NULL, JIT_TYPE_VOID);
        new(mem) Classinit_Inst(ci_inst->dst(), expr, new_inst);
    }

    exprs.set_live_lcse(NULL);
    new_inst->unlink();
    new_inst->get_arg(0)->unlink();

    Inst **argarray = (Inst **)mem.alloc((constructor->n_args() + 1) * sizeof(*argarray));
    unsigned i;
    for (i=0; i<constructor->n_args(); i++)
        argarray[i] = constructor->get_arg(i);
    constructor->unlink();

    // push class handle (in place of the "this" pointer)
    // Unfortunately, argarray[0]->type==JIT_TYPE_CLASS, but the class handle is JIT_TYPE_ADDR.
    // So we need a new arg_operand, and perhaps a new expression as well.
    argarray[0]->replace_src(0, exprs.lookup_imm_exp((unsigned)exc_ch, JIT_TYPE_ADDR)->opnd);
    argarray[0]->set_dst(exprs.lookup_arg_exp(0, JIT_TYPE_ADDR)->opnd);

    Operand_Exp *arg_exp;
    Inst *src;
    // push method handle
    arg_exp = exprs.lookup_arg_exp(i, JIT_TYPE_ADDR);
    src = exprs.lookup_imm((unsigned)constructor->get_mhandle(), JIT_TYPE_ADDR, athrow);
    create_assign_inst(mem, exprs, athrow, arg_exp->opnd, arg_exp, src, JIT_TYPE_ADDR);
    argarray[i++] = athrow->prev();

    // Change the position of argarray[0], for aesthetic reasons.
    argarray[0]->unlink();
    argarray[0]->insert_before(argarray[1]);
#ifdef PRINTABLE_O3
    argarray[0]->bc_index = ~0u;
#endif // PRINTABLE_O3

    Call_Inst *lazy_athrow =
        new(mem) Call_Inst(Call_Inst::athrow_lazy_call, athrow->exp, NULL, false, athrow->next());
    lazy_athrow->set_args(argarray, i, NULL);
    athrow->unlink();

#ifdef TRACE_O3
    cout << "remove athrow with lazy exception" << endl;
#endif
}

void Lazy_Throw::eliminate_athrow(Cfg_Node *node)
{
    //
    // reset null_check[] and vrtl_check[]
    //
    init_check(); 

    Inst *head = node->IR_instruction_list();
    Inst *last = head->prev();
    assert(last != head &&    // not an empty BB
           last->is_call() && // contain athrow
           ((Call_Inst*)last)->kind == Call_Inst::athrow_call);

    //
    // Now we know that the last instruction is an athrow and we may 
    // have an opportunity to apply lazy exception.
    // Look for the new call that creates the exception object
    //
    Inst *obj_new = find_obj_new(head,((Call_Inst*)last)->get_arg(0));
    //
    // if the object new is not found (e.g., not in the current bb),
    // then we don't do lazy exception throwing
    //
    if (obj_new == NULL)
        return;
    //
    // make sure that the object is throwable
    //
    assert(obj_new->prev()->is_call());
    Class_Handle exc_ch = throwable_class((Call_Inst*)obj_new->prev());
    if (exc_ch == NULL) 
        return;
    //
    // detect if there exists any side effect between obj_new and athrow
    //
    Call_Inst *constructor = NULL;
    Call_Inst *new_inst = (Call_Inst*)obj_new->prev();
    Inst *inst;
    for (inst = obj_new->next(); inst != last; inst = inst->next())
    {
        //
        // If the object is stored to a local variable, field or array,
        // we treat the creation of the object is necessary.
        //
        if (inst->is_assignment())
        {
            Operand *dst = inst->dst();
            if ((dst->is_mem() || dst->is_vreg()) &&  // putfield/putstatic/vars
                is_excp_obj(inst->src(0), obj_new->dst()))
                return;
        }
        else if (inst->is_call())
        {
            Call_Inst *cinst = (Call_Inst *) inst;
            //
            // find the constructor of the thrown object
            // For now we only handle the following sequence of throwing 
            // exception. 
            //      obj = new();
            //      obj.constructor();
            //      athrow(obj);
            //
            // we try to eliminate the constructor call as well so we 
            // had better make sure that the constructor does not have
            // any side effects.
            //
            if (cinst->kind == Call_Inst::special_call &&
                method_get_class(cinst->get_mhandle()) == exc_ch)
            {
                if(method_has_side_effects(cinst,obj_new->dst()))
                    return;
                assert(constructor == NULL);
                constructor = cinst;
            }
        }
    }
    if (constructor == NULL) // must find the constructor
        return;
    //
    // Now it is safe to replace athrow with lazy exception
    //
    //
    // Determine if we need to generate a node for keeping athrow
    //
    bool keep_athrow = false;
    for (unsigned i = 0; i < MAX_LAZY_ARGS; i++)
        if ((i != 0 && null_check[i]) || vrtl_check[i] != NULL)
        {
            keep_athrow = true;
            break;
        }

    if (keep_athrow) // need to create blocks for inserting code
        replace_athrow(node, new_inst, constructor, (Call_Inst*)last, exc_ch);
    else
        remove_athrow(node, new_inst, constructor, (Call_Inst*)last, exc_ch);


}

//
// find lazy exception opportunity
//
static void lazy_bb(Cfg_Node *node, Closure *c)
{
    //
    // check if the last inst is an athrow inst
    //
    Inst *head = node->IR_instruction_list();
    Inst *last = head->prev();
    if (last == head) // empty BB
        return;
    if (!last->is_call() || ((Call_Inst *)last)->kind != Call_Inst::athrow_call)
        return;
    //
    // perform lazy exception optimization
    //
    Lazy_Throw *lt = (Lazy_Throw*)c;
    lt->eliminate_athrow(node);
}

void optimize_throws(Flow_Graph *fg, Expressions &exprs)
{
    Lazy_Throw lt(fg, fg->mem_manager, exprs, fg->cmpl_handle());
    fg->apply(lazy_bb, &lt);
}
