// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/arch/ia32/ia32_o3_jit/tail_recursion.cpp,v 1.2 2001/08/13 09:52:49 xhshi Exp $
//


#include "defines.h"
#include <iostream.h>
#include "ir.h"
#include "expression.h"
#include "flow_graph.h"
#include "build_ir_routines.h"
#include "tail_recursion.h"
#include "inlining.h"

bool Tail_Recursion::side_effect(Recursion* r) {
    Inst *ret = r->call_i->get_ret();
    Cfg_Node *node = r->bb;
    Cfg_Node *epilog = fg->epilog();
    if (ret == NULL) {
        if (node->out_edges(0) == epilog) 
            return r->call_i->next() != node->IR_instruction_list(); // last inst
        else
            return true;
    }
    //
    // if the result is used for more than once, we give up because
    // it is hard to keep track.
    //
    Operand *dst = ret->dst();
    if (!dst->is_single_def_temp_reg() || ((Temp_Reg*)dst)->global_reg_alloc_cand()) 
        return true;
    //
    // traverse from ret inst 
    // check if there are any side-effect assignment, e.g, a[i] = ... 
    //
    bool track_assignment = false;
    Operand *track = dst;
    Inst *head = node->IR_instruction_list();
    Inst *first = ret->next();
    while (node != epilog) {
        Inst *i;
        for (i = first; i != head; i = i->next()) {
            //
            // any side effect?
            //
            if (i->dst() == NULL)
                return true;

            if (track_assignment && !i->is_assignment())
                return true;
            Operand *d = i->dst();
            if (d->is_ret()) {  // Ret = track
                assert(i->is_assignment());
                return i->src(0) != track;
            }

            if(i->dst()->is_temp_reg()) { 
                int j;
                for (j = 0; j < i->n_srcs; j++) {
                    Operand *src = i->src(j);
                    //
                    // not a field or array operand and src != track
                    //
                    if (src->base() == NULL && src != track) continue;
                    if (src->base() == track || src->index() == track) 
                        return true; //  a[foo(x-1)]
                    assert(j < i->n_srcs || i->src(j+1) != track);
                    track = i->dst();
                    //
                    // only deal with cases like, x + foo(x-1), or x*foo(x-1)
                    //
                    if (i->is_add() || i->is_sub()) {
                        if (i->is_sub() && j == 1) // no x-foo(x-1)
                            return true;
                        else if (rec_ty == unknown)
                            rec_ty = add;
                        else if (rec_ty != add)    // no x*foo(x-1) + x
                            return true;
                    } else if (i->is_mul()) {
                        if (rec_ty == unknown)
                            rec_ty = mul;
                        else if (rec_ty != mul) // no x*foo(x-1) + x
                            return true;
                    } else
                        return true;
                }
            } else {
                if (i->is_assignment() && 
                    i->src(0) == track && i->dst()->is_vreg())
                    track = i->dst();
                else
                    return true;
            }
        }
        assert(node->out_edge_size() == 1);
        node = node->out_edges(0);
        Inst *head = node->IR_instruction_list();
        first = head->next();
        track_assignment = true;
    }
    assert(0); // should return false by now
    return false;
}

//
//
//    +-------+      +-------+
//    |  B1   |      |  foo  |
//    +-------+      +-------+
//           \         /
//            \       /
//            +--------+
//            |   B2   |
//            +--------+
//                 |
//            +--------+
//            | epilog |
//            +--------+
// This is a quick hack.  we should traverse the graph to identify unsafe path
//
bool Tail_Recursion::unsafe_path(Recursion *list) {
    Cfg_Node *epilog = fg->epilog();
    Recursion *r;
    for (r = list; r != NULL; r = r->next) {
        Cfg_Node *node = r->bb->out_edges(0);
        while (node != epilog) {
            if (node->in_edge_size() > 1) {
                //
                // check if all predecessors are in list
                //
                Cfg_Int i;
                for (i = 0; i < node->in_edge_size(); i++) {
                    Cfg_Node *p = node->in_edges(i);
                    Recursion *rr;
                    for (rr = list; rr != NULL; rr = rr->next) 
                        if (rr->bb == p) break;
                    if (rr == NULL) return true;
                }
            }
            if (node->out_edge_size() != 1)
                return true;
            node = node->out_edges(0);
        }
    }
    return false;
}
//
// unsafe cases, e.g.,    return x*f(x-1) + 1
//                        return f(x-1) + bar(1)
//
bool Tail_Recursion::recursion_is_unsafe(Recursion *list) {
#ifdef TRACE_O3
    fg->print_cfg("tail0");
#endif
    if (unsafe_path(list)) {
#ifdef TRACE_O3
        cout << "Unsafe path " << endl;
#endif
        return true;
    }
    Recursion *r;
    for (r = list; r != NULL; r = r->next) {
        if (side_effect(r)) {
#ifdef TRACE_O3
            cout << "Node " << r->bb->label << " is unsafe" << endl;
#endif
            return true;
        }
    }
    return false;
}

//
// insert an assignment "T = initial_value" to prolog
//
Temp_Reg* Tail_Recursion::gen_initial_value_of_result(O3_Jit_Type ty) {
    exprs.set_live_lcse(NULL);
    if (rec_ty == unknown) return NULL;

    assert(rec_ty == add || rec_ty == mul);
    Inst *inst_head = fg->prolog()->IR_instruction_list();
    Value val;
    Inst *v;
    if (ty == JIT_TYPE_FLOAT) {
        val.f = (rec_ty == mul)? 1.0f : 0.0f ;
        v = exprs.lookup_const(&val,ty,inst_head);
    } else if (ty == JIT_TYPE_DOUBLE) {
        val.d = (rec_ty == mul)? 1.0 : 0.0 ;
        v = exprs.lookup_const(&val,ty,inst_head);
    } else if (ty == JIT_TYPE_LONG) {
        val.l.hi = 0;
        val.l.lo = (rec_ty == mul) ? 1 : 0;
        v = exprs.lookup_const(&val,ty,inst_head);
    } else {
        v = exprs.lookup_imm(((rec_ty == mul)?1:0), ty, inst_head);
    }
    Temp_Reg *result = (Temp_Reg*)v->dst();
    result->set_global_reg_cand();
    result->set_temp_reg_has_multiple_defs();
    return result;
}

//
// repalce ret assignments of blocks that are not in the list.
// replace Ret.I = t25 with   T = T +/* t25
//                            Ret.I = T
//
void Tail_Recursion::replace_ret_assignment(Recursion *list, 
                                            Temp_Reg  *result,
                                            Cfg_Node  *entry) {
    if (result == NULL) return;  // no return value (void)
    Cfg_Node *epilog = fg->epilog();
    assert(epilog->in_edge_size() >= 1);
    Cfg_Int i;
    for (i = 0; i < epilog->in_edge_size(); i++) {
        Cfg_Node *node = epilog->in_edges(i);
        //
        // check if node is in the list
        //
        Recursion *r;
        for (r = list; r != NULL; r = r->next) {
            Cfg_Node *n = r->bb;
            while (n->out_edges(0) != epilog && n->out_edges(0) != entry) 
                n = n->out_edges(0);
            if (n == node) break;
        }
        if (r != NULL) continue; // found it
        //
        // insert T = T +/* t25 before the ret assignment
        //
        Inst *head = node->IR_instruction_list();
        Inst *last = head->prev();
        assert(last->is_assignment() && last->dst()->is_ret());
        assert(rec_ty != unknown);
        Inst *i = NULL;
        if (last->src(0)->is_ret()) {
            if (rec_ty == add)
                i = new (mem) Add_Inst(Add_Inst::add,result,last->dst(),last->exp,head);
            else // rec_ty == mul
                i = new (mem) Mul_Inst(Mul_Inst::mul,result,last->dst(),last->exp,head);
            new (mem) Assign_Inst(last->dst(),result,last->exp,head);
        } else {
            Operand *src = last->src(0);
#if 0
            Imm_Operand *imm = NULL;
            //
            // eliminate T = T + 0  or  T = T*1
            //
            if (src->kind == Operand::Immediate)
                imm = (Imm_Operand*)src;
            else if (src->is_single_def_temp_reg() && 
                     ((Temp_Reg*)src)->inst()->is_imm_assignment())
                imm = (Imm_Operand*)((Temp_Reg*)src)->inst()->src(0);
#endif
            if (rec_ty == add)
                i = new (mem) Add_Inst(Add_Inst::add,result,src,last->exp,last);
            else if (rec_ty == mul)
                i = new (mem) Mul_Inst(Mul_Inst::mul,result,src,last->exp,last);
           last->replace_src(0,result);
        }
        i->set_dst(result);
    }
}

//
// Example:  return  x + foo(x-1) - 1;
//      t1 = x                     t1 = x
//      t2 = t1 - 1                t2 = t1 - 1
//      call foo(t2)               call foo(t2)
//      t3 = Ret.I(eax)   ===>     t3 = Ret.I(eax)
//      t4 = t1 + t3               result = t1 + result
//      t5 = t4 - 1                result = result - 1
//      Ret.I = t5
//
void Tail_Recursion::clean_up_computation(Recursion *r, Temp_Reg* result) {
    Inst *ret = r->call_i->get_ret();
    Cfg_Node *node = r->bb;
    Cfg_Node *epilog = fg->epilog();
    Inst *head = node->IR_instruction_list();
    Inst *first = ret->next();
    while (node != epilog) {
        //
        // traverse from ret inst until we reach epilog
        //
        Operand *track = ret->dst();
        Inst *i;
        for (i = first; i != head; i = i->next()) {
            assert (i->dst() != NULL);
            if (i->dst()->is_ret()) break;
            int j;
            for (j = 0; j < i->n_srcs; j++) {
                Operand *src = i->src(j);
                if (src != track) continue;
                i->replace_src(j,result);
                track = i->dst();
                i->set_dst(result);
            }
        }
        node = node->out_edges(0);
        head = node->IR_instruction_list();
        first = head->next();
    }
}

// defined in Inlining.cpp
extern bool safe_to_inline(Method_Handle mh, Call_Inst *cinst);
//
//   arg0 = x -1             t1 = x -1   (t1 in prolog)
//   call foo       ===>     goto label
// 
void Tail_Recursion::mov_args_eliminate_call(Recursion *r) {
    Call_Inst *call = r->call_i;
    Inst *ret  = call->get_ret();
    Cfg_Node *node = r->bb;
    Inst *inst_head = node->IR_instruction_list();
    Method_Handle mh = call->get_mhandle();
    //
    //              epilog       +---------------+        
    //                |          |      ...      |          
    //                 \    node | if [b] == xxx |          
    //                  \        +---------------+          
    //                   \        /             \           
    //                    \      /               \          
    //                  +------------+         +------------+
    //        call_node |  push ar0  |         |            | args_node
    //                  |    ...     |         | move args  |
    //                  |  call foo  |         |            |
    //                  +------------+         +------------+
    //                                            /
    //                                           /
    //                           +---------------+
    //                           |      ...      | succ_node
    //                           |               |
    //                           +---------------+
    //
    Cfg_Node *call_node = NULL;
    Cfg_Node *args_node = node;
    if (!safe && r->ptr == NULL) {
        r->bb = args_node;
        args_node = fg->split_cfg_node(node);
        call_node = fg->split_cfg_node(node);
        //
        // add control flow edges
        //
        node->add_edge(fg->mem_manager, args_node); // fall-through
        node->add_edge(fg->mem_manager, call_node); // target
        call_node->add_edge(fg->mem_manager, fg->epilog());
        //
        // determine linearization ordering
        //
        args_node->linearization_node()->unlink();
        call_node->linearization_node()->unlink();
        args_node->linearization_node()->insert_after(node->linearization_node());
        call_node->linearization_node()->insert_after(node->linearization_node());
//        call_node->set_cold_code();
        //
        // generate vtable address
        //
        exprs.set_live_lcse(NULL);
        void *addr = class_get_vtable(method_get_class(mh));
        Inst *vaddr = exprs.lookup_imm((unsigned)addr,JIT_TYPE_ADDR,inst_head);
        //
        // get vtable
        //
        assert(call->src(0)->is_single_def_temp_reg());
        Operand *fld = ((Temp_Reg*)call->src(0))->inst()->src(0);
        assert(fld->kind == Operand::Field);
        Operand *vtab = ((Field_Operand*)fld)->base();
        assert(vtab->is_single_def_temp_reg() && ((Temp_Reg*)vtab)->inst() != NULL);
        //
        // insert "if [b] == xxx" to check vtable address
        //
        Inst *cmp_i = gen_cmp_inst(exprs.mem, exprs, inst_head, 
                                   ((Temp_Reg*)vtab)->inst(), vaddr, JIT_TYPE_ADDR, false);
        // Annotate cmp_i to indicate that it is a vtable compare, and which
        // GCTrack_Operand is being compared.
        ((Compare_Inst *)cmp_i)->set_object_compared_with_vtable(((Temp_Reg*)vtab)->inst()->src(0));
        ((Compare_Inst *)cmp_i)->set_method_inlined(mh);
        gen_branch(exprs.mem,exprs,inst_head,false, Exp::Bne, cmp_i);
    }
    Cfg_Node *prolog = fg->prolog();
    Inst *prlg_hd = prolog->IR_instruction_list();
    Inst *i = prlg_hd->next();
    inst_head = args_node->IR_instruction_list();
    unsigned k;
    for (k = 0; k < call->n_args(); k++) {
        Inst *arg = call->get_arg(k);
        assert(arg->is_assignment() && arg->dst()->is_arg());
        Operand *track = arg->dst();
        //
        // In prolog we have the code sequence that moves incoming arguments
        // find out the formal arugment
        //
        for (; i != prlg_hd; i = i->next()) {
            if (!i->is_assignment())
                continue;
            Operand *src = i->src(0);
            if (//
                // argument type could mismatch "src != track" will fail
                //                         prolog
                //   ar0.C = ...           t1 = ar0.I
                //   ar1.I = ...           t2 = ar1.I
                //   call foo
                //
                (src->is_arg() && track->is_arg() && 
                ((Arg_Operand*)src)->num != ((Arg_Operand*)track)->num) ||
                (!src->is_arg() && src != track))
                continue;
            if (!i->dst()->is_temp_reg()) 
                break;
            track = i->dst();
        }
        assert(i != prlg_hd); // must find one
#ifdef PRINTABLE_O3
            arg->bc_index = ~0u; // reset, otherwise assertion fails in dump_jit
#endif
        if (!safe && r->ptr == NULL) {
            Operand *src = arg->src(0);
            if (src->is_temp_reg())
                ((Temp_Reg*)src)->set_global_reg_cand();
            new (exprs.mem) Assign_Inst(i->dst(),src,arg->exp,inst_head);
            arg->unlink();
            arg->insert_before(call_node->IR_instruction_list()); // append to the end
        } else {
            arg->set_dst(i->dst());
            arg->unlink();
            arg->insert_before(inst_head); // append to the end
        }
    }
    //
    // delete call and ret assignment
    //
#ifdef PRINTABLE_O3
    call->bc_index = ~0u; // reset, otherwise assertion fails in dump_jit
    if (ret != NULL) ret->bc_index = ~0u;
#endif
    call->unlink();
    if (ret != NULL) ret->unlink();
    if (!safe && r->ptr == NULL) { // append call and ret instructions to the end of call_node
        call->insert_before(call_node->IR_instruction_list()); 
        Operand *fld = ((Temp_Reg*)call->src(0))->inst()->src(0);
        ((Temp_Reg*)fld->base())->set_global_reg_cand();
        call->replace_src(0,fld);
        if (ret != NULL) {
            ret->insert_before(call_node->IR_instruction_list()); 
            assert(ret->src(0)->is_ret());
            ret->set_dst(ret->src(0));
        }
    }
}

void Tail_Recursion::insert_jumps(Recursion *list) {
    Cfg_Node *prolog = fg->prolog();
    Cfg_Node *epilog = fg->epilog();
    assert(prolog->out_edge_size() == 1);
    int sz = epilog->in_edge_size();
    Mem_Manager mm(sz*sizeof(Cfg_Node*));
    Cfg_Node** replace = (Cfg_Node**)mm.alloc(sz*sizeof(Cfg_Node*));
    //
    // identify which nodes need jumps
    //
    Cfg_Int i;
    for (i = 0; i < sz; i++) {
        Cfg_Node *node = epilog->in_edges(i);
        Recursion *r;
        for (r = list; r != NULL; r = r->next) {
            Cfg_Node *n;
            for (n = r->bb; n != epilog && n != node; n = n->out_edges(0));
            if (n == node) break;
        }
        replace[i] = (r != NULL) ? node : NULL;
    }
    //
    // insert jumps
    //
    for (i = 0; i < sz; i++) {
        if (replace[i] == NULL) continue;
        Cfg_Node *node = replace[i];
        node->replace_edge(fg->mem_manager,epilog,prolog->out_edges(0));
        Inst *last = node->IR_instruction_list()->prev();
        if (last != node->IR_instruction_list() && 
            last->dst() != NULL && last->dst()->is_ret())
            last->unlink();
    }
}

void Tail_Recursion::mov_args_eliminate_calls(Recursion *list, Temp_Reg* result) {
    Recursion *r;
    if (result != NULL) {
        for (r = list; r != NULL; r = r->next)
            clean_up_computation(r,result);
    }
    //
    // move arugments and eliminate calls
    //
    for (r = list; r != NULL; r = r->next)
        mov_args_eliminate_call(r);

#ifdef TRACE_O3xxx
    fg->print_cfg("tail");
#endif
    //
    // insert jumps (to the node immediately following prolog)
    //
    insert_jumps(list);
}

class This_Ptr_Closure : public Closure {
public:
    Operand **ptr;
    int n_rec;
    This_Ptr_Closure(Operand **p, int n) : ptr(p), n_rec(n) {}
};

static void this_ptr_defined(Cfg_Node *node, Closure *c) {
    if (node == node->flowgraph->prolog()) return; // skip prolog

    This_Ptr_Closure *tpc = (This_Ptr_Closure*)c;
    Operand **this_ptr = tpc->ptr;
    int n_rec = tpc->n_rec;
    if (n_rec == 0) return;  // all entries are NULL
    Inst *inst_head = node->IR_instruction_list();
    Inst *i;
    for (i = inst_head->next(); i != inst_head; i = i->next()) {
        if (i->dst() != NULL && !i->is_pseudo_asgn() &&  i->dst()->is_vreg()) {
            int j;
            for (j = 0; j < n_rec; j++) {
                if (this_ptr[j] == i->dst())
                    this_ptr[j] = NULL;
            }
            if (this_ptr[n_rec-1] == NULL)
                n_rec--;
        }
    }
}
//
// determine if we can move the checking of vtable all the way to the 
// function entry so that there is no overhead of checking for every 
// tail recursion. However, we need to duplicate the whole method.
//
void Tail_Recursion::mov_checking_vtable_to_entry(Recursion *list) {
    if (fg->handlers()->next() != fg->handlers()) return;
    //
    // if the current method is too big, we don't want to duplicate
    // the whole method.
    //
    size_t sz = method_get_byte_code_size(fg->m_handle());
    if (sz > MAX_TAIL_RECURSION_SIZE) return;
    
    //
    // create an array for this pointer
    //
    Mem_Manager mm(MAX_TAIL_RECURSION_SIZE*sizeof(Operand*));
    Operand **this_ptr = (Operand**)mm.alloc(MAX_TAIL_RECURSION_SIZE*sizeof(Operand*));
    int n_rec = 0;
    Recursion *r;
    for (r = list; r != NULL; r = r->next, n_rec++) {
        Call_Inst *call = r->call_i;
        Inst *this_arg = call->get_arg(0);
        assert(this_arg->is_assignment());
        Operand *src0 = this_arg->src(0);
        if (src0->is_vreg())
            this_ptr[n_rec] = src0;
        else if (src0->is_single_def_temp_reg() &&
                 ((Temp_Reg*)src0)->inst()->is_assignment() &&
                 ((Temp_Reg*)src0)->inst()->src(0)->is_vreg())
            this_ptr[n_rec] = ((Temp_Reg*)src0)->inst()->src(0);
        else
            this_ptr[n_rec] = NULL;
    }
    //
    // traverse all node and set this_ptr[i] to NULL if this_ptr[i] is 
    // defined within the method
    //
    This_Ptr_Closure tpc(this_ptr,n_rec);
    fg->apply(this_ptr_defined,&tpc);
    //
    // record result
    //
    int i = 0;
    for (i = 0, r = list; r != NULL; r = r->next, i++)
        r->ptr = this_ptr[i];
}

//
//                   +----------+
//                   |  prolog  |
//                   +----------+
//                        |
//                 +--------------+
//                 |  [r] == xxx  | check_node
//                 +--------------+
//                     /       \
//                    /         \
//           +-----------+    +------------+
//           | duplicate |    |   tail     |
//           |   method  |    |  recursion |
//
void Tail_Recursion::mov_vtable_checking(Recursion *list) {
    Inlined *inlined = NULL;
    Recursion *recursion = NULL;
    unsigned global_inlined_bc_size = 0;
    Mem_Manager mm(1); // temporary mem manager
    //
    // pass MAX_INLINE_SIZE to prevent any calls being inlined in 
    // the duplicated method 
    //
    Inline_Closure c(fg, cmpl_handle,exprs.mem,exprs,mm,inlined,recursion,
                     MAX_INLINE_SIZE, global_inlined_bc_size,
                     gc_requires_write_barriers);
    Inlined_Method im(list->bb, list->call_i, NULL, false,false,MAX_INLINE_SIZE, NULL);
    im.build_IR(&c); // duplicate the whole method

    Cfg_Node *prolog = fg->prolog();
    Cfg_Node *check_node;
    Cfg_Node *merged_prolog = im.merged_fg->prolog();
    Method_Handle mh = fg->m_handle();
    void *addr = class_get_vtable(method_get_class(mh));
    Inst *inst_head;
    //
    // merge the  duplicate method
    // insert instructions to check vtable
    //
    exprs.set_live_lcse(NULL);
    unsigned id0 = exprs.reg_map.virtual_reg_id(0,JIT_TYPE_CLASS);
    Recursion *r;
    for (r = list; r != NULL; r = r->next) {
        if (r->ptr == NULL) continue;
        //
        // only need to generate one check for duplicating ptrs
        //
        Recursion *rr;
        for (rr = r->next; rr != NULL; rr = rr->next)
            if (rr->ptr == r->ptr) rr->ptr = NULL;

        check_node = fg->split_cfg_node(prolog);
        prolog->add_edge(fg->mem_manager, check_node);
        check_node->add_edge(fg->mem_manager, merged_prolog);
        inst_head = check_node->IR_instruction_list();
        unsigned id = ((Virtual_Reg*)r->ptr)->id;
        //
        // if r->ptr is not aload_0, then we need to check if r->ptr is NULL
        //
        Inst *this_ptr = NULL;
        Operand_Exp *exp = exprs.lookup_reg_exp(id,JIT_TYPE_CLASS,1); 
        if (id != id0) { 
            this_ptr = exprs.gen_opnd_tuple(inst_head,exp);
            assert(this_ptr->dst()->is_temp_reg());
            ((Temp_Reg*)this_ptr->dst())->set_global_reg_cand();
            Inst *test = gen_test_inst(exprs.mem,exprs,inst_head,this_ptr);
            gen_branch(exprs.mem,exprs,inst_head,false, Exp::Bne, test);
            
            check_node = fg->split_cfg_node(prolog);
            prolog->add_edge(fg->mem_manager, check_node);
            check_node->add_edge(fg->mem_manager, merged_prolog);
            inst_head = check_node->IR_instruction_list();
        }

        if (this_ptr == NULL)
            this_ptr = exprs.gen_opnd_tuple(inst_head,exp);
        Inst *vaddr = exprs.lookup_imm((unsigned)addr,JIT_TYPE_ADDR,inst_head);
	    Inst *vtab  = exprs.lookup_inst(Exp::Vtable,this_ptr,NULL,JIT_TYPE_ADDR,inst_head);
        //
        // insert "if [b] == xxx" to check vtable address
        //
        Inst *cmp_i = gen_cmp_inst(exprs.mem, exprs, inst_head, vtab, vaddr, JIT_TYPE_ADDR, false);
        // Annotate cmp_i to indicate that it is a vtable compare, and which
        // GCTrack_Operand is being compared.
        ((Compare_Inst *)cmp_i)->set_object_compared_with_vtable(vtab->src(0));
        ((Compare_Inst *)cmp_i)->set_method_inlined(mh);
        gen_branch(exprs.mem,exprs,inst_head,false, Exp::Bne, cmp_i);
    }
    //
    // redirect merged_fg's epilog to fg's epilog
    //
    Cfg_Node *m_epilog = im.merged_fg->epilog();
    while (m_epilog->in_edge_size() > 0)
        m_epilog->in_edges(0)->replace_edge(fg->mem_manager, m_epilog, fg->epilog());
    //
    // replace merged_fg's incoming arguments with formal args
    //
    Inst *m_head = im.merged_fg->prolog()->IR_instruction_list();
    Inst *arg = m_head->next();
    inst_head = prolog->IR_instruction_list();
    Inst *i;
    for (i = inst_head->next(); i != inst_head; i = i->next()) {
        if (i->is_assignment() && i->src(0)->is_arg()) {
            Inst *a = i;
            if (!i->dst()->is_vreg()) {
                for (i = i->next(); i != inst_head; i = i->next())
                    if (i->is_assignment() && i->src(0) == a->dst()) break;
                assert(i != inst_head);
            }
            //
            // find corresponding arg and replace it
            //
            for (; arg != m_head; arg = arg->next())
                if (arg->is_assignment() && arg->src(0) == a->src(0)) break;
            assert(arg != m_head);
            arg->replace_src(0,i->dst());
        }
    }
}

void Tail_Recursion::transform(Recursion *list) {
    //
    // determine if there are any side effects that make tail recursion
    // transformation unsafe
    //
    if (list == NULL || recursion_is_unsafe(list)) return;
    //
    // determine if we need checking of vtable to make the transformation safe
    //
    safe = safe_to_inline(fg->m_handle(), list->call_i);
    if (!safe) 
        mov_checking_vtable_to_entry(list);

#ifdef TRACE_O3
    cout << "### Tail Recursion Elimination" << endl;
#endif
    //
    // insert an assignment "T = imm".  T is used to hold the accumulated 
    // result. T is initialized to 0 or 1 if rec_ty is "add" or "mul",
    // respectively.
    //
    Temp_Reg *result = gen_initial_value_of_result(list->call_i->type());
    //
    // move arguments
    //
    mov_args_eliminate_calls(list,result);
    //
    // mov_vtable_checking may insert blocks which push the entry of tail 
    // recursion away from the prolog
    //
    Cfg_Node *entry = fg->prolog()->out_edges(0);
    //
    // move checking of vtable if possible
    //
    Recursion *r;
    for (r = list; r != NULL; r = r->next)
		if (r->ptr != NULL) break;
	if (r != NULL)
		mov_vtable_checking(list);
    //
    // replace Ret.I = t25 with   T = T +/* t25
    //                            Ret.I = T
    //
    replace_ret_assignment(list, result, entry);
}

