/*
 * %CopyrightBegin%
 *
 * Copyright Ericsson AB 2020-2023. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * %CopyrightEnd%
 */

#include <algorithm>
#include "beam_asm.hpp"

extern "C"
{
#include "erl_bif_table.h"
#include "big.h"
#include "beam_catches.h"
#include "beam_common.h"
#include "code_ix.h"
}

using namespace asmjit;

/*
 * We considered specializing tuple_size/1, but ultimately didn't
 * consider it worth doing.
 *
 * At the time of writing, there were 294 uses of tuple_size/1
 * in the OTP source code. (11 of them were in dialyzer.)
 *
 * The code size for the specialization was 34 bytes,
 * while the code size for the bif1 instruction was 24 bytes.
 */

void BeamGlobalAssembler::emit_handle_hd_error() {
    static ErtsCodeMFA mfa = {am_erlang, am_hd, 1};

    a.mov(getXRef(0), RET);
    a.mov(x86::qword_ptr(c_p, offsetof(Process, freason)), imm(BADARG));
    a.mov(ARG4, imm(&mfa));
    a.jmp(labels[raise_exception]);
}

/*
 * At the time of implementation, there were 3285 uses of hd/1 in
 * the OTP source code. Most of them were in code generated by
 * yecc.
 *
 * The code size for this specialization of hd/1 is 21 bytes,
 * while the code size for the bif1 instruction is 24 bytes.
 */
void BeamModuleAssembler::emit_bif_hd(const ArgSource &Src,
                                      const ArgRegister &Hd) {
    Label good_cons = a.newLabel();

    mov_arg(RET, Src);
    a.test(RETb, imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_LIST));

    a.short_().je(good_cons);
    safe_fragment_call(ga->get_handle_hd_error());

    a.bind(good_cons);
    {
        x86::Gp boxed_ptr = emit_ptr_val(RET, RET);
        a.mov(ARG2, getCARRef(boxed_ptr));
        mov_arg(Hd, ARG2);
    }
}

void BeamGlobalAssembler::emit_handle_element_error() {
    static ErtsCodeMFA mfa = {am_erlang, am_element, 2};

    a.mov(getXRef(0), ARG1);
    a.mov(getXRef(1), ARG2);
    a.mov(x86::qword_ptr(c_p, offsetof(Process, freason)), imm(BADARG));
    a.mov(ARG4, imm(&mfa));

    a.jmp(labels[raise_exception]);
}

/* ARG1 = Position (1-based)
 * ARG2 = Tuple
 * ARG3 = 0 if if in body, otherwise address of failure label.
 *
 * Will return with a value in RET only if the element operation succeeds. */
void BeamGlobalAssembler::emit_bif_element_shared() {
    Label error = a.newLabel();

    emit_enter_frame();

    a.mov(RETd, ARG1d);
    a.and_(RETb, imm(_TAG_IMMED1_MASK));
    a.cmp(RETb, imm(_TAG_IMMED1_SMALL));
    a.short_().jne(error);

    a.mov(ARG4, ARG1);
    a.sar(ARG4, imm(_TAG_IMMED1_SIZE));

    emit_is_boxed(error, ARG2, dShort);

    a.mov(ARG5, ARG2);
    (void)emit_ptr_val(ARG5, ARG5);
    a.lea(ARG5, emit_boxed_val(ARG5));
    a.mov(ARG6, x86::qword_ptr(ARG5));
    a.mov(RETd, ARG6d);
    ERTS_CT_ASSERT(make_arityval_zero() == 0);
    a.and_(RETb, imm(_TAG_HEADER_MASK));
    a.short_().jne(error);

    a.shr(ARG6, imm(_HEADER_ARITY_OFFS));
    a.dec(ARG4);
    a.cmp(ARG6, ARG4);
    a.short_().jbe(error);

    a.inc(ARG4);
    a.mov(RET, x86::qword_ptr(ARG5, ARG4, 3));

    emit_leave_frame();
    a.ret();

    a.bind(error);
    {
        emit_leave_frame();

        a.test(ARG3, ARG3);
        a.je(labels[handle_element_error]);

        /* Discard return address and jump to fail label. */
        a.add(x86::rsp, imm(8));
        a.jmp(ARG3);
    }
}

/*
 * At the time of implementation, there were 3678 uses of element/2 in
 * the OTP source code. 3137 of those uses had a literal first argument
 * (the position in the tuple), while 540 uses had a variable first
 * argument. Calls to element/2 (with a literal first argument) is
 * especially common in code generated by yecc.
 */
void BeamModuleAssembler::emit_bif_element(const ArgLabel &Fail,
                                           const ArgSource &Pos,
                                           const ArgSource &Tuple,
                                           const ArgRegister &Dst) {
    bool const_position;

    const_position = Pos.isSmall() && Pos.as<ArgSmall>().getSigned() > 0 &&
                     Pos.as<ArgSmall>().getSigned() <= (Sint)MAX_ARITYVAL;

    /*
     * Try to optimize the use of a tuple as a lookup table.
     */
    if (exact_type(Pos, BEAM_TYPE_INTEGER) && Tuple.isLiteral()) {
        Eterm tuple = beamfile_get_literal(beam, Tuple.as<ArgLiteral>().get());

        if (is_tuple(tuple)) {
            Label error = a.newLabel(), next = a.newLabel();
            Sint size = Sint(arityval(*tuple_val(tuple)));
            auto [min, max] = getIntRange(Pos);
            bool is_bounded = min <= max;
            bool can_fail = !is_bounded || min < 1 || size < max;

            comment("skipped tuple test since source is always a literal "
                    "tuple");
            mov_arg(ARG2, Tuple);
            mov_arg(ARG1, Pos);
            x86::Gp boxed_ptr = emit_ptr_val(ARG3, ARG2);
            a.lea(ARG4, emit_boxed_val(boxed_ptr));
            if (always_small(Pos)) {
                comment("skipped test for small position since it is always "
                        "small");
            } else {
                comment("simplified test for small position since it is an "
                        "integer");
                a.test(ARG1.r8(), imm(TAG_PRIMARY_LIST));
                a.short_().je(error);
            }

            a.mov(RET, ARG1);
            a.sar(RET, imm(_TAG_IMMED1_SIZE));
            if (is_bounded && min >= 1) {
                comment("skipped check for position =:= 0 since it is always "
                        ">= 1");
            } else {
                a.short_().jz(error);
            }
            if (is_bounded && min >= 0 && size >= max) {
                comment("skipped check for negative position and position "
                        "beyond tuple");
            } else {
                /* Note: Also checks for negative size. */
                a.cmp(RET, imm(size));
                a.short_().ja(error);
            }

            a.mov(RET, x86::qword_ptr(ARG4, RET, 3));
            if (can_fail) {
                a.short_().jmp(next);
            }

            a.bind(error);
            if (can_fail) {
                if (Fail.get() == 0) {
                    safe_fragment_call(ga->get_handle_element_error());
                } else {
                    a.jmp(resolve_beam_label(Fail));
                }
            }

            a.bind(next);
            mov_arg(Dst, RET);

            return;
        }
    }

    if (const_position) {
        /* The position is a valid small integer. Inline the code.
         *
         * The size of the code is 40 bytes, while the size of the bif2
         * instruction is 36 bytes. */
        Uint position = Pos.as<ArgSmall>().getSigned();

        mov_arg(ARG2, Tuple);

        x86::Gp boxed_ptr = emit_ptr_val(ARG3, ARG2);

        if (exact_type(Tuple, BEAM_TYPE_TUPLE)) {
            comment("skipped tuple test since source is always a tuple");
            ERTS_CT_ASSERT(Support::isInt32(make_arityval(MAX_ARITYVAL)));
            a.cmp(emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)),
                  imm(make_arityval_unchecked(position)));

            if (Fail.get() == 0) {
                Label next = a.newLabel();

                a.short_().jae(next);

                mov_imm(ARG1, make_small(position));
                safe_fragment_call(ga->get_handle_element_error());

                a.bind(next);
            } else {
                a.jb(resolve_beam_label(Fail));
            }
        } else {
            Distance dist;
            Label error;

            if (Fail.get() == 0) {
                error = a.newLabel();
                dist = dShort;
            } else {
                error = resolve_beam_label(Fail);
                dist = dLong;
            }

            emit_is_boxed(error, Tuple, ARG2, dist);

            a.mov(RETd, emit_boxed_val(boxed_ptr, 0, sizeof(Uint32)));
            a.cmp(RETd, imm(make_arityval_unchecked(position)));

            if (Fail.get() == 0) {
                a.short_().jb(error);
            } else {
                a.jb(error);
            }

            ERTS_CT_ASSERT(make_arityval_zero() == 0);
            a.and_(RETb, imm(_TAG_HEADER_MASK));

            if (Fail.get() == 0) {
                Label next = a.newLabel();

                a.short_().je(next);

                a.bind(error);
                {
                    mov_imm(ARG1, make_small(position));
                    safe_fragment_call(ga->get_handle_element_error());
                }

                a.bind(next);
            } else {
                a.jne(error);
            }
        }

        a.mov(RET, emit_boxed_val(boxed_ptr, position * sizeof(Eterm)));
    } else {
        /* The code is too large to inline. Call a shared fragment.
         *
         * The size of the code that calls the shared fragment is 19 bytes,
         * while the size of the bif2 instruction is 36 bytes. */
        mov_arg(ARG2, Tuple);
        mov_arg(ARG1, Pos);

        if (Fail.get() != 0) {
            a.lea(ARG3, x86::qword_ptr(resolve_beam_label(Fail)));
        } else {
            mov_imm(ARG3, 0);
        }

        safe_fragment_call(ga->get_bif_element_shared());
    }

    mov_arg(Dst, RET);
}
