From 4d30b6d5c9332c3645f1675d19d2cbb690148116 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Mon, 15 Dec 2025 18:47:29 +0300 Subject: [PATCH] Update IR IR commit: 3d72a7295c77743da22b36bab808ebb5f564488d --- ext/opcache/jit/ir/ir.c | 176 ++++++++++++- ext/opcache/jit/ir/ir.h | 24 +- ext/opcache/jit/ir/ir_aarch64.dasc | 376 ++++++++++++++++++++++------ ext/opcache/jit/ir/ir_builder.h | 16 +- ext/opcache/jit/ir/ir_cfg.c | 11 +- ext/opcache/jit/ir/ir_check.c | 6 + ext/opcache/jit/ir/ir_dump.c | 13 + ext/opcache/jit/ir/ir_emit.c | 33 +-- ext/opcache/jit/ir/ir_fold.h | 122 +++++---- ext/opcache/jit/ir/ir_gcm.c | 38 +-- ext/opcache/jit/ir/ir_private.h | 25 +- ext/opcache/jit/ir/ir_ra.c | 113 ++------- ext/opcache/jit/ir/ir_save.c | 16 ++ ext/opcache/jit/ir/ir_sccp.c | 64 ++--- ext/opcache/jit/ir/ir_x86.dasc | 384 ++++++++++++++++++++++++----- 15 files changed, 1032 insertions(+), 385 deletions(-) diff --git a/ext/opcache/jit/ir/ir.c b/ext/opcache/jit/ir/ir.c index 81621ce11bd36..745a66b2163ae 100644 --- a/ext/opcache/jit/ir/ir.c +++ b/ext/opcache/jit/ir/ir.c @@ -118,7 +118,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted { char buf[128]; - if (insn->op == IR_FUNC || insn->op == IR_SYM) { + if (insn->op == IR_FUNC || insn->op == IR_SYM || insn->op == IR_LABEL) { fprintf(f, "%s", ir_get_str(ctx, insn->val.name)); return; } else if (insn->op == IR_STR) { @@ -290,6 +290,7 @@ void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted #define ir_op_kind_prb IR_OPND_PROB #define ir_op_kind_opt IR_OPND_PROB #define ir_op_kind_pro IR_OPND_PROTO +#define ir_op_kind_lbl IR_OPND_LABEL_REF #define _IR_OP_FLAGS(name, flags, op1, op2, op3) \ IR_OP_FLAGS(ir_op_flag_ ## flags, ir_op_kind_ ## op1, ir_op_kind_ ## op2, ir_op_kind_ ## op3), @@ -689,6 +690,13 @@ ir_ref ir_const_str(ir_ctx *ctx, ir_ref str) return ir_const_ex(ctx, val, IR_ADDR, IR_OPTX(IR_STR, IR_ADDR, 0)); } +ir_ref ir_const_label(ir_ctx *ctx, ir_ref str) +{ + ir_val val; + val.u64 = str; + return ir_const_ex(ctx, val, IR_ADDR, IR_OPTX(IR_LABEL, IR_ADDR, 0)); +} + ir_ref ir_str(ir_ctx *ctx, const char *s) { size_t len; @@ -879,6 +887,17 @@ static ir_ref _ir_fold_cse(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir return IR_UNUSED; } +IR_ALWAYS_INLINE ir_ref _ir_fold_cast(ir_ctx *ctx, ir_ref ref, ir_type type) +{ + if (ctx->ir_base[ref].type == type) { + return ref; + } else if (IR_IS_CONST_REF(ref) && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)) { + return ir_const(ctx, ctx->ir_base[ref].val, type); + } else { + return ir_emit1(ctx, IR_OPT(IR_BITCAST, type), ref); + } +} + #define IR_FOLD(X) IR_FOLD1(X, __LINE__) #define IR_FOLD1(X, Y) IR_FOLD2(X, Y) #define IR_FOLD2(X, Y) case IR_RULE_ ## Y: @@ -1158,7 +1177,7 @@ ir_ref ir_bind(ir_ctx *ctx, ir_ref var, ir_ref def) IR_ASSERT(var < 0); if (!ir_hashtab_add(ctx->binding, def, var)) { /* Add a copy with different binding */ - def = ir_emit2(ctx, IR_OPT(IR_COPY, ctx->ir_base[def].type), def, 1); + def = ir_emit2(ctx, IR_OPT(IR_COPY, ctx->ir_base[def].type), def, IR_COPY_HARD); ir_hashtab_add(ctx->binding, def, var); } return def; @@ -1836,8 +1855,49 @@ int ir_mem_flush(void *ptr, size_t size) return 1; } #else + +#if defined(__linux__) && defined(__x86_64__) && defined(PKEY_DISABLE_WRITE) +# define HAVE_PKEY_MPROTECT 1 +#endif + +#ifdef HAVE_PKEY_MPROTECT + +#ifndef PKEY_DISABLE_EXECUTE +# define PKEY_DISABLE_EXECUTE 0 +#endif + +int pkey_mprotect(void* addr, size_t len, int prot, int pkey) __attribute__((weak)); +int pkey_alloc(unsigned int, unsigned int) __attribute__((weak)); +int pkey_free(int) __attribute__((weak)); +int pkey_set(int, unsigned) __attribute__((weak)); + +static int ir_pkey = 0; +#endif + void *ir_mem_mmap(size_t size) { +#ifdef HAVE_PKEY_MPROTECT + if (!ir_pkey && pkey_mprotect) { + int key = pkey_alloc(0, PKEY_DISABLE_WRITE); + if (key > 0) { + ir_pkey = key; + } + } + if (ir_pkey > 0) { + void *ret = mmap(NULL, size, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ret == MAP_FAILED) { + return NULL; + } + if (pkey_mprotect(ret, size, PROT_EXEC|PROT_READ|PROT_WRITE, ir_pkey) != 0) { +#ifdef IR_DEBUG + fprintf(stderr, "pkey_mprotect() failed\n"); +#endif + munmap(ret, size); + return NULL; + } + return ret; + } +#endif int prot_flags = PROT_EXEC; #if defined(__NetBSD__) prot_flags |= PROT_MPROTECT(PROT_READ|PROT_WRITE); @@ -1852,11 +1912,28 @@ void *ir_mem_mmap(size_t size) int ir_mem_unmap(void *ptr, size_t size) { munmap(ptr, size); +#ifdef HAVE_PKEY_MPROTECT +// if (ir_pkey > 0) { +// pkey_free(ir_pkey); +// ir_pkey = 0; +// } +#endif return 1; } int ir_mem_protect(void *ptr, size_t size) { +#ifdef HAVE_PKEY_MPROTECT + if (ir_pkey > 0) { + if (pkey_set(ir_pkey, PKEY_DISABLE_WRITE)) { +#ifdef IR_DEBUG + fprintf(stderr, "mprotect() failed\n"); +#endif + return 0; + } + return 1; + } +#endif if (mprotect(ptr, size, PROT_READ | PROT_EXEC) != 0) { #ifdef IR_DEBUG fprintf(stderr, "mprotect() failed\n"); @@ -1868,6 +1945,17 @@ int ir_mem_protect(void *ptr, size_t size) int ir_mem_unprotect(void *ptr, size_t size) { +#ifdef HAVE_PKEY_MPROTECT + if (ir_pkey > 0) { + if (pkey_set(ir_pkey, PKEY_DISABLE_EXECUTE)) { +#ifdef IR_DEBUG + fprintf(stderr, "mprotect() failed\n"); +#endif + return 0; + } + return 1; + } +#endif if (mprotect(ptr, size, PROT_READ | PROT_WRITE) != 0) { #ifdef IR_DEBUG fprintf(stderr, "mprotect() failed\n"); @@ -2070,7 +2158,26 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_load_i(ir_ctx *ctx, ir_ref ref, ir_type } } else if (insn->op == IR_RSTORE) { modified_regset |= (1 << insn->op3); - } else if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_CALL || insn->op == IR_VSTORE) { + } else if (insn->op == IR_CALL) { + ir_insn *func = &ctx->ir_base[insn->op2]; + ir_ref func_proto; + const ir_proto_t *proto; + + if (func->op == IR_FUNC || func->op == IR_FUNC_ADDR) { + func_proto = func->proto; + } else if (func->op == IR_PROTO) { + func_proto = func->op2; + } else { + break; + } + if (!func_proto) { + break; + } + proto = (const ir_proto_t *)ir_get_str(ctx, func_proto); + if (!(proto->flags & (IR_CONST_FUNC|IR_PURE_FUNC))) { + break; + } + } else if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_VSTORE) { return IR_UNUSED; } ref = insn->op1; @@ -2116,7 +2223,26 @@ IR_ALWAYS_INLINE ir_ref ir_find_aliasing_vload_i(ir_ctx *ctx, ir_ref ref, ir_typ break; } } - } else if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_CALL || insn->op == IR_STORE) { + } else if (insn->op == IR_CALL) { + ir_insn *func = &ctx->ir_base[insn->op2]; + ir_ref func_proto; + const ir_proto_t *proto; + + if (func->op == IR_FUNC || func->op == IR_FUNC_ADDR) { + func_proto = func->proto; + } else if (func->op == IR_PROTO) { + func_proto = func->op2; + } else { + break; + } + if (!func_proto) { + break; + } + proto = (const ir_proto_t *)ir_get_str(ctx, func_proto); + if (!(proto->flags & (IR_CONST_FUNC|IR_PURE_FUNC))) { + break; + } + } else if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN || insn->op == IR_STORE) { break; } ref = insn->op1; @@ -3013,6 +3139,16 @@ void _ir_IJMP(ir_ctx *ctx, ir_ref addr) ctx->control = IR_UNUSED; } +ir_ref _ir_IGOTO(ir_ctx *ctx, ir_ref addr) +{ + ir_ref ref; + + IR_ASSERT(ctx->control); + ctx->control = ref = ir_emit2(ctx, IR_IGOTO, ctx->control, addr); + ctx->control = IR_UNUSED; + return ref; +} + ir_ref _ir_ADD_OFFSET(ir_ctx *ctx, ir_ref addr, uintptr_t offset) { if (offset) { @@ -3135,6 +3271,18 @@ void _ir_VSTORE(ir_ctx *ctx, ir_ref var, ir_ref val) ctx->control = ir_emit3(ctx, IR_VSTORE, ctx->control, var, val); } +ir_ref _ir_VLOAD_v(ir_ctx *ctx, ir_type type, ir_ref var) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit2(ctx, IR_OPT(IR_VLOAD_v, type), ctx->control, var); +} + +void _ir_VSTORE_v(ir_ctx *ctx, ir_ref var, ir_ref val) +{ + IR_ASSERT(ctx->control); + ctx->control = ir_emit3(ctx, IR_VSTORE_v, ctx->control, var, val); +} + ir_ref _ir_TLS(ir_ctx *ctx, ir_ref index, ir_ref offset) { IR_ASSERT(ctx->control); @@ -3193,6 +3341,18 @@ void _ir_STORE(ir_ctx *ctx, ir_ref addr, ir_ref val) ctx->control = ir_emit3(ctx, IR_STORE, ctx->control, addr, val); } +ir_ref _ir_LOAD_v(ir_ctx *ctx, ir_type type, ir_ref addr) +{ + IR_ASSERT(ctx->control); + return ctx->control = ir_emit2(ctx, IR_OPT(IR_LOAD_v, type), ctx->control, addr); +} + +void _ir_STORE_v(ir_ctx *ctx, ir_ref addr, ir_ref val) +{ + IR_ASSERT(ctx->control); + ctx->control = ir_emit3(ctx, IR_STORE_v, ctx->control, addr, val); +} + void _ir_VA_START(ir_ctx *ctx, ir_ref list) { IR_ASSERT(ctx->control); @@ -3217,11 +3377,13 @@ ir_ref _ir_VA_ARG(ir_ctx *ctx, ir_type type, ir_ref list) return ctx->control = ir_emit2(ctx, IR_OPT(IR_VA_ARG, type), ctx->control, list); } -ir_ref _ir_VA_ARG_EX(ir_ctx *ctx, ir_type type, ir_ref list, size_t size) +ir_ref _ir_VA_ARG_EX(ir_ctx *ctx, ir_type type, ir_ref list, size_t size, size_t align) { IR_ASSERT(ctx->control); - IR_ASSERT(size <= 0x7fffffff); - return ctx->control = ir_emit3(ctx, IR_OPT(IR_VA_ARG, type), ctx->control, list, (ir_ref)size); + IR_ASSERT(size <= 0x0fffffff); + IR_ASSERT(align != 0 && ((align & (align - 1)) == 0) && align <= 128); + return ctx->control = ir_emit3(ctx, IR_OPT(IR_VA_ARG, type), ctx->control, list, + (ir_ref)IR_VA_ARG_OP3(size, align)); } ir_ref _ir_BLOCK_BEGIN(ir_ctx *ctx) diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h index 8fcfbffa7d6bc..a96650597055a 100644 --- a/ext/opcache/jit/ir/ir.h +++ b/ext/opcache/jit/ir/ir.h @@ -216,6 +216,7 @@ typedef enum _ir_type { * prb - branch probability 1-99 (0 - unspecified): (IF_TRUE, IF_FALSE, CASE_VAL, CASE_DEFAULT) * opt - optional number * pro - function prototype + * lbl - label used as value (a reference to constant): (BEGIN) * * The order of IR opcodes is carefully selected for efficient folding. * - foldable instruction go first @@ -322,6 +323,7 @@ typedef enum _ir_type { _(FUNC_ADDR, r0, ___, ___, ___) /* constant func ref */ \ _(FUNC, r0, ___, ___, ___) /* constant func ref */ \ _(SYM, r0, ___, ___, ___) /* constant symbol ref */ \ + _(LABEL, r0, ___, ___, ___) /* label address ref */ \ _(STR, r0, ___, ___, ___) /* constant str ref */ \ \ /* call ops */ \ @@ -334,11 +336,15 @@ typedef enum _ir_type { _(BLOCK_BEGIN, a1, src, ___, ___) /* stacksave */ \ _(BLOCK_END, a2, src, def, ___) /* stackrestore */ \ _(VLOAD, l2, src, var, ___) /* load value of local var */ \ + _(VLOAD_v, l2, src, var, ___) /* volatile variant of VLOAD */ \ _(VSTORE, s3, src, var, def) /* store value to local var */ \ + _(VSTORE_v, s3, src, var, def) /* volatile variant of VSTORE */ \ _(RLOAD, l1X2, src, num, opt) /* load value from register */ \ _(RSTORE, s2X1, src, def, num) /* store value into register */ \ _(LOAD, l2, src, ref, ___) /* load from memory */ \ + _(LOAD_v, l2, src, ref, ___) /* volatile variant of VLOAD */ \ _(STORE, s3, src, ref, def) /* store to memory */ \ + _(STORE_v, s3, src, ref, def) /* volatile variant of VSTORE */ \ _(TLS, l1X2, src, num, num) /* thread local variable */ \ _(TRAP, x1, src, ___, ___) /* DebugBreak */ \ /* memory reference ops (A, H, U, S, TMP, STR, NEW, X, V) ??? */ \ @@ -360,7 +366,7 @@ typedef enum _ir_type { /* control-flow nodes */ \ _(START, S0X1, ret, ___, ___) /* function start */ \ _(ENTRY, S1X1, src, num, ___) /* entry with a fake src edge */ \ - _(BEGIN, S1, src, ___, ___) /* block start */ \ + _(BEGIN, S1X1, src, lbl, ___) /* block start, optional &&lbl */ \ _(IF_TRUE, S1X1, src, prb, ___) /* IF TRUE proj. */ \ _(IF_FALSE, S1X1, src, prb, ___) /* IF FALSE proj. */ \ _(CASE_VAL, S2X1, src, def, prb) /* switch proj. */ \ @@ -372,8 +378,9 @@ typedef enum _ir_type { _(LOOP_END, E1, src, ___, ___) /* loop end */ \ _(IF, E2, src, def, ___) /* conditional control split */ \ _(SWITCH, E2, src, def, ___) /* multi-way control split */ \ + _(IGOTO, E2, src, def, ___) /* computed goto (internal) */ \ + _(IJMP, T2X1, src, def, ret) /* computed goto (terminating) */ \ _(RETURN, T2X1, src, def, ret) /* function return */ \ - _(IJMP, T2X1, src, def, ret) /* computed goto */ \ _(UNREACHABLE, T1X2, src, ___, ret) /* unreachable (tailcall, etc) */ \ \ /* deoptimization helper */ \ @@ -400,6 +407,13 @@ typedef enum _ir_op { #define IR_OPTX(op, type, n) ((uint32_t)(op) | ((uint32_t)(type) << IR_OPT_TYPE_SHIFT) | ((uint32_t)(n) << IR_OPT_INPUTS_SHIFT)) #define IR_OPT_TYPE(opt) (((opt) & IR_OPT_TYPE_MASK) >> IR_OPT_TYPE_SHIFT) +/* "opt" modifiers */ +#define IR_COPY_HARD (1<<0) + +#define IR_VA_ARG_SIZE(op3) (((uint32_t)(op3) >> 3)) +#define IR_VA_ARG_ALIGN(op3) (1U << ((uint32_t)(op3) & 0x7)) +#define IR_VA_ARG_OP3(s, a) (((s) << 3) | ir_ntzl(a)) + /* IR References */ typedef int32_t ir_ref; @@ -533,6 +547,9 @@ void ir_strtab_free(ir_strtab *strtab); #define IR_EXTERN (1<<5) #define IR_CONST (1<<6) +#define IR_CONST_FUNC (1<<6) +#define IR_PURE_FUNC (1<<7) + #define IR_INITIALIZED (1<<7) /* sym data flag: constant or an initialized variable */ #define IR_CONST_STRING (1<<8) /* sym data flag: constant string */ @@ -648,7 +665,6 @@ struct _ir_ctx { ir_ref vars; /* list of VARs (used by register allocator) */ }; ir_snapshot_create_t snapshot_create; - int32_t stack_frame_alignment; int32_t stack_frame_size; /* spill stack frame size (used by register allocator and code generator) */ int32_t call_stack_size; /* stack for parameter passing (used by register allocator and code generator) */ uint64_t used_preserved_regs; @@ -698,6 +714,7 @@ ir_ref ir_const_func_addr(ir_ctx *ctx, uintptr_t c, ir_ref proto); ir_ref ir_const_func(ir_ctx *ctx, ir_ref str, ir_ref proto); ir_ref ir_const_sym(ir_ctx *ctx, ir_ref str); ir_ref ir_const_str(ir_ctx *ctx, ir_ref str); +ir_ref ir_const_label(ir_ctx *ctx, ir_ref str); ir_ref ir_unique_const_addr(ir_ctx *ctx, uintptr_t c); @@ -893,6 +910,7 @@ struct _ir_loader { void*(*resolve_sym_name) (ir_loader *loader, const char *name, uint32_t flags); bool (*has_sym) (ir_loader *loader, const char *name); bool (*add_sym) (ir_loader *loader, const char *name, void *addr); + bool (*add_label) (ir_loader *loader, const char *name, void *addr); }; void ir_loader_init(void); diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index 12c3694d469f0..b553243309f54 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -218,6 +218,7 @@ typedef struct _ir_backend_data { dasm_State *dasm_state; ir_bitset emit_constants; int rodata_label, jmp_table_label; + bool resolved_label_syms; } ir_backend_data; #define IR_GP_REG_NAME(code, name64, name32) \ @@ -315,6 +316,7 @@ const char *ir_reg_name(int8_t reg, ir_type type) _(RETURN_VOID) \ _(RETURN_INT) \ _(RETURN_FP) \ + _(IGOTO_DUP) \ #define IR_RULE_ENUM(name) IR_ ## name, @@ -385,7 +387,7 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co n++; break; } - } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { + } else if (!IR_IS_CONST_REF(insn->op2) && ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; } @@ -478,10 +480,16 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co if (IR_IS_CONST_REF(insn->op1)) { constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; + } else if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { + constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; } if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; + } else if (!IR_IS_CONST_REF(insn->op2) && ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { + constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); + n++; } break; case IR_CMP_INT: @@ -520,6 +528,7 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co } break; case IR_VSTORE: + case IR_VSTORE_v: insn = &ctx->ir_base[ref]; if (IR_IS_CONST_REF(insn->op3)) { insn = &ctx->ir_base[insn->op3]; @@ -596,6 +605,19 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co } flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; break; + case IR_IGOTO: + insn = &ctx->ir_base[ref]; + if (ctx->ir_base[insn->op1].op == IR_MERGE || ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN) { + ir_insn *merge = &ctx->ir_base[insn->op1]; + ir_ref *p, n = merge->inputs_count; + + for (p = merge->ops + 1; n > 0; p++, n--) { + ir_ref input = *p; + IR_ASSERT(ctx->ir_base[input].op == IR_END || ctx->ir_base[input].op == IR_LOOP_END); + ctx->rules[input] = IR_IGOTO_DUP; + } + } + return insn->op; case IR_COND: insn = &ctx->ir_base[ref]; n = 0; @@ -665,7 +687,7 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co } break; case IR_VA_ARG: - flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; + flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_DEF_CONFLICTS_WITH_INPUT_REGS; constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); n = 1; insn = &ctx->ir_base[ref]; @@ -714,7 +736,8 @@ static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref, ir_type type) do { ir_insn *insn = &ctx->ir_base[*p]; - if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) { + if (insn->op != IR_LOAD && insn->op != IR_LOAD_v + && ((insn->op != IR_STORE && insn->op != IR_STORE_v) || insn->op3 == addr_ref)) { return; } p++; @@ -961,7 +984,7 @@ binop_fp: ctx->flags2 |= IR_HAS_CALLS; return IR_CALL; case IR_VAR: - return IR_SKIPPED | IR_VAR; + return IR_STATIC_ALLOCA; case IR_PARAM: return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM; case IR_ALLOCA: @@ -978,6 +1001,7 @@ binop_fp: } return IR_ALLOCA; case IR_LOAD: + case IR_LOAD_v: ir_match_fuse_addr(ctx, insn->op2, insn->type); if (IR_IS_TYPE_INT(insn->type)) { return IR_LOAD_INT; @@ -986,6 +1010,7 @@ binop_fp: } break; case IR_STORE: + case IR_STORE_v: ir_match_fuse_addr(ctx, insn->op2, ctx->ir_base[insn->op3].type); if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { return IR_STORE_INT; @@ -1364,7 +1389,7 @@ static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref sr } else if (type == IR_DOUBLE && insn->val.u64 == 0) { | fmov Rd(reg-IR_REG_FP_FIRST), xzr } else { - label = ir_const_label(ctx, src); + label = ir_get_const_label(ctx, src); if (type == IR_DOUBLE) { | ldr Rd(reg-IR_REG_FP_FIRST), =>label } else { @@ -1441,10 +1466,41 @@ static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src) | add Rx(reg), Rx(base), #offset } else { ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); - | add Rx(reg), sp, Rx(IR_REG_INT_TMP) + | add Rx(reg), Rx(base), Rx(IR_REG_INT_TMP) } } +static void ir_resolve_label_syms(ir_ctx *ctx) +{ + uint32_t b; + ir_block *bb; + + for (b = 1, bb = &ctx->cfg_blocks[b]; b <= ctx->cfg_blocks_count; bb++, b++) { + ir_insn *insn = &ctx->ir_base[bb->start]; + + if (insn->op == IR_BEGIN && insn->op2) { + IR_ASSERT(ctx->ir_base[insn->op2].op == IR_LABEL); + ctx->ir_base[insn->op2].val.u32_hi = b; + } + } +} + +static void ir_emit_load_label_addr(ir_ctx *ctx, ir_reg reg, ir_insn *label) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (!data->resolved_label_syms) { + data->resolved_label_syms = 1; + ir_resolve_label_syms(ctx); + } + + IR_ASSERT(label->op == IR_LABEL); + int b = label->val.u32_hi; + + b = ir_skip_empty_target_blocks(ctx, b); + | adr Rx(reg), =>b +} static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) { @@ -1459,9 +1515,11 @@ static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) } else if (insn->op == IR_STR) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; - int label = ir_const_label(ctx, src); + int label = ir_get_const_label(ctx, src); | adr Rx(reg), =>label + } else if (insn->op == IR_LABEL) { + ir_emit_load_label_addr(ctx, reg, insn); } else { ir_emit_load_imm_int(ctx, type, reg, insn->val.i64); } @@ -1697,6 +1755,7 @@ static void ir_emit_prologue(ir_ctx *ctx) | str Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] } else { ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); + offset -= sizeof(void*); | str Rx(prev), [Rx(fp), Rx(IR_REG_INT_TMP)] | sub Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #8 | str Rd(i-IR_REG_FP_FIRST), [Rx(fp), Rx(IR_REG_INT_TMP)] @@ -1795,7 +1854,12 @@ static void ir_emit_prologue(ir_ctx *ctx) offset += 16 * ctx->fp_reg_params; for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) { // TODO: Rd->Rq stur->str ??? - | str Rd(fp_reg_params[i]-IR_REG_FP_FIRST), [Rx(fp), #offset] + if (aarch64_may_encode_addr_offset(offset, 8)) { + | str Rd(fp_reg_params[i]-IR_REG_FP_FIRST), [Rx(fp), #offset] + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); + | str Rd(fp_reg_params[i]-IR_REG_FP_FIRST), [Rx(fp), Rx(IR_REG_INT_TMP)] + } offset += 16; } } @@ -1828,26 +1892,44 @@ static void ir_emit_epilogue(ir_ctx *ctx) offset -= sizeof(void*) * 2; if (aarch64_may_encode_imm7_addr_offset(offset, 8)) { | ldp Rx(prev), Rx(i), [Rx(fp), #offset] - } else { - IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8)); + } else if (aarch64_may_encode_addr_offset(offset + 8, 8)) { | ldr Rx(prev), [Rx(fp), #offset] | ldr Rx(i), [Rx(fp), #(offset+8)] + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); + | ldr Rx(prev), [Rx(fp), Rx(IR_REG_INT_TMP)] + | add Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #8 + | ldr Rx(i), [Rx(fp), Rx(IR_REG_INT_TMP)] } prev = IR_REG_NONE; } else { if (prev < IR_REG_FP_FIRST) { offset -= sizeof(void*); - | ldr Rx(prev), [Rx(fp), #offset] + if (aarch64_may_encode_addr_offset(offset, 8)) { + | ldr Rx(prev), [Rx(fp), #offset] + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); + | ldr Rx(prev), [Rx(fp), Rx(IR_REG_INT_TMP)] + } offset -= sizeof(void*); - | ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] + if (aarch64_may_encode_addr_offset(offset, 8)) { + | ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); + | ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), Rx(IR_REG_INT_TMP)] + } } else { offset -= sizeof(void*) * 2; if (aarch64_may_encode_imm7_addr_offset(offset, 8)) { | ldp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] - } else { - IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8)); + } else if (aarch64_may_encode_addr_offset(offset + 8, 8)) { | ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset] | ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #(offset+8)] + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); + | ldr Rx(prev-IR_REG_FP_FIRST), [Rx(fp), Rx(IR_REG_INT_TMP)] + | add Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #8 + | ldr Rx(i-IR_REG_FP_FIRST), [Rx(fp), Rx(IR_REG_INT_TMP)] } } prev = IR_REG_NONE; @@ -1857,10 +1939,20 @@ static void ir_emit_epilogue(ir_ctx *ctx) if (prev != IR_REG_NONE) { if (prev < IR_REG_FP_FIRST) { offset -= sizeof(void*); - | ldr Rx(prev), [Rx(fp), #offset] + if (aarch64_may_encode_addr_offset(offset, 8)) { + | ldr Rx(prev), [Rx(fp), #offset] + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); + | ldr Rx(prev), [Rx(fp), Rx(IR_REG_INT_TMP)] + } } else { offset -= sizeof(void*); - | ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset] + if (aarch64_may_encode_addr_offset(offset, 8)) { + | ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset] + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); + | ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), Rx(IR_REG_INT_TMP)] + } } } } @@ -1909,6 +2001,9 @@ static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } + if (op2_reg == IR_REG_NONE && op1 == op2) { + op2_reg = op1_reg; + } if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); @@ -3415,25 +3510,52 @@ static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn) int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp); if (ir_type_size[src_type] == 1) { - if (ir_type_size[dst_type] == 2) { - | ldrsb Rw(def_reg), [Rx(fp), #offset] - } else if (ir_type_size[dst_type] == 4) { - | ldrsb Rw(def_reg), [Rx(fp), #offset] + if (aarch64_may_encode_addr_offset(offset, ir_type_size[src_type])) { + if (ir_type_size[dst_type] == 2) { + | ldrsb Rw(def_reg), [Rx(fp), #offset] + } else if (ir_type_size[dst_type] == 4) { + | ldrsb Rw(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldrsb Rx(def_reg), [Rx(fp), #offset] + } } else { - IR_ASSERT(ir_type_size[dst_type] == 8); - | ldrsb Rx(def_reg), [Rx(fp), #offset] + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); + if (ir_type_size[dst_type] == 2) { + | ldrsb Rw(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)] + } else if (ir_type_size[dst_type] == 4) { + | ldrsb Rw(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)] + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldrsb Rx(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)] + } } } else if (ir_type_size[src_type] == 2) { - if (ir_type_size[dst_type] == 4) { - | ldrsh Rw(def_reg), [Rx(fp), #offset] + if (aarch64_may_encode_addr_offset(offset, ir_type_size[src_type])) { + if (ir_type_size[dst_type] == 4) { + | ldrsh Rw(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldrsh Rx(def_reg), [Rx(fp), #offset] + } } else { - IR_ASSERT(ir_type_size[dst_type] == 8); - | ldrsh Rx(def_reg), [Rx(fp), #offset] + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); + if (ir_type_size[dst_type] == 4) { + | ldrsh Rw(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)] + } else { + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldrsh Rx(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)] + } } } else { IR_ASSERT(ir_type_size[src_type] == 4); IR_ASSERT(ir_type_size[dst_type] == 8); - | ldrsw Rx(def_reg), [Rx(fp), #offset] + if (aarch64_may_encode_addr_offset(offset, ir_type_size[src_type])) { + | ldrsw Rx(def_reg), [Rx(fp), #offset] + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); + | ldrsw Rx(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)] + } } } if (IR_REG_SPILLED(ctx->regs[def][0])) { @@ -3473,14 +3595,27 @@ static void ir_emit_zext(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_reg fp; int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp); - if (ir_type_size[src_type] == 1) { - | ldrb Rw(def_reg), [Rx(fp), #offset] - } else if (ir_type_size[src_type] == 2) { - | ldrh Rw(def_reg), [Rx(fp), #offset] + if (aarch64_may_encode_addr_offset(offset, ir_type_size[src_type])) { + if (ir_type_size[src_type] == 1) { + | ldrb Rw(def_reg), [Rx(fp), #offset] + } else if (ir_type_size[src_type] == 2) { + | ldrh Rw(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldr Rw(def_reg), [Rx(fp), #offset] + } } else { - IR_ASSERT(ir_type_size[src_type] == 4); - IR_ASSERT(ir_type_size[dst_type] == 8); - | ldr Rw(def_reg), [Rx(fp), #offset] + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); + if (ir_type_size[src_type] == 1) { + | ldrb Rw(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)] + } else if (ir_type_size[src_type] == 2) { + | ldrh Rw(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)] + } else { + IR_ASSERT(ir_type_size[src_type] == 4); + IR_ASSERT(ir_type_size[dst_type] == 8); + | ldr Rw(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)] + } } } if (IR_REG_SPILLED(ctx->regs[def][0])) { @@ -3579,11 +3714,21 @@ static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_reg fp; int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp); - if (src_type == IR_DOUBLE) { - | ldr Rx(def_reg), [Rx(fp), #offset] + if (aarch64_may_encode_addr_offset(offset, ir_type_size[src_type])) { + if (src_type == IR_DOUBLE) { + | ldr Rx(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(src_type == IR_FLOAT); + | ldr Rw(def_reg), [Rx(fp), #offset] + } } else { - IR_ASSERT(src_type == IR_FLOAT); - | ldr Rw(def_reg), [Rx(fp), #offset] + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); + if (src_type == IR_DOUBLE) { + | ldr Rx(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)] + } else { + IR_ASSERT(src_type == IR_FLOAT); + | ldr Rw(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)] + } } } } else if (IR_IS_TYPE_FP(dst_type)) { @@ -3605,12 +3750,22 @@ static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_reg fp; int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp); - if (dst_type == IR_DOUBLE) { - | ldr Rd(def_reg), [Rx(fp), #offset] - } else { - IR_ASSERT(src_type == IR_FLOAT); - | ldr Rs(def_reg), [Rx(fp), #offset] - } + if (aarch64_may_encode_addr_offset(offset, ir_type_size[src_type])) { + if (dst_type == IR_DOUBLE) { + | ldr Rd(def_reg), [Rx(fp), #offset] + } else { + IR_ASSERT(dst_type == IR_FLOAT); + | ldr Rs(def_reg), [Rx(fp), #offset] + } + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); + if (dst_type == IR_DOUBLE) { + | ldr Rd(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)] + } else { + IR_ASSERT(dst_type == IR_FLOAT); + | ldr Rs(def_reg), [Rx(fp), Rx(IR_REG_INT_TMP)] + } + } } } if (IR_REG_SPILLED(ctx->regs[def][0])) { @@ -3833,7 +3988,12 @@ static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) IR_ASSERT(def_reg != IR_REG_NONE); offset = ir_var_spill_slot(ctx, insn->op1, &fp); - | add Rx(def_reg), Rx(fp), #offset + if (aarch64_may_encode_imm12(offset)) { + | add Rx(def_reg), Rx(fp), #offset + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); + | add Rx(def_reg), Rx(fp), Rx(IR_REG_INT_TMP) + } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } @@ -4221,7 +4381,12 @@ static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) /* Stack must be 16 byte aligned */ size = IR_ALIGNED_SIZE(size, 16); - | add sp, sp, #size + if (aarch64_may_encode_imm12(size)) { + | add sp, sp, #size + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, size); + | add sp, sp, Rx(IR_REG_INT_TMP) + } if (!(ctx->flags & IR_USE_FRAME_POINTER)) { ctx->call_stack_size -= size; } @@ -4283,8 +4448,11 @@ static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def) if (ctx->flags & IR_USE_FRAME_POINTER) { | mov Rx(def_reg), Rx(IR_REG_X29) - } else { + } else if (aarch64_may_encode_imm12(ctx->stack_frame_size + ctx->call_stack_size)) { | add Rx(def_reg), Rx(IR_REG_X31), #(ctx->stack_frame_size + ctx->call_stack_size) + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, ctx->stack_frame_size + ctx->call_stack_size); + | add Rx(def_reg), Rx(IR_REG_X31), Rx(IR_REG_INT_TMP) } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, IR_ADDR, def, def_reg); @@ -4377,7 +4545,7 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { reg_save_area_offset += 16 * IR_REG_FP_ARGS; /* Set va_list.vr_top */ - if (overflow_arg_area_offset != reg_save_area_offset) { + if (overflow_arg_area_offset != reg_save_area_offset || ctx->gp_reg_params < IR_REG_INT_ARGS) { | add Rx(tmp_reg), Rx(fp), #reg_save_area_offset } | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_top))] @@ -5246,6 +5414,19 @@ static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn) } | br Rx(op2_reg) } else if (IR_IS_CONST_REF(insn->op2)) { + if (ctx->ir_base[insn->op2].op == IR_LABEL) { + if (!data->resolved_label_syms) { + data->resolved_label_syms = 1; + ir_resolve_label_syms(ctx); + } + + uint32_t target = ctx->ir_base[insn->op2].val.u32_hi; + target = ir_skip_empty_target_blocks(ctx, target); + + | b =>target + return; + } + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); if (aarch64_may_use_b(ctx->code_buffer, addr)) { @@ -5636,6 +5817,7 @@ static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_re { ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(offset); IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE); if (IR_IS_TYPE_INT(type)) { @@ -5676,13 +5858,8 @@ static void ir_emit_load_params(ir_ctx *ctx) const int8_t *int_reg_params = _ir_int_reg_params; const int8_t *fp_reg_params = _ir_fp_reg_params; int32_t stack_offset = 0; + int32_t stack_start = ctx->stack_frame_size; - if (ctx->flags & IR_USE_FRAME_POINTER) { - /* skip old frame pointer and return address */ - stack_offset = sizeof(void*) * 2 + ctx->stack_frame_size + ctx->call_stack_size; - } else { - stack_offset = ctx->stack_frame_size + ctx->call_stack_size; - } n = use_list->count; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { use = *p; @@ -5706,12 +5883,9 @@ static void ir_emit_load_params(ir_ctx *ctx) if (ctx->vregs[use]) { dst_reg = IR_REG_NUM(ctx->regs[use][0]); IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE || - stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos + - ((ctx->flags & IR_USE_FRAME_POINTER) ? - -(ctx->stack_frame_size - ctx->stack_frame_alignment) : - ctx->call_stack_size)); + stack_start + stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos); if (src_reg != dst_reg) { - ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset); + ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_start + stack_offset); } if (dst_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[use][0])) { ir_emit_store(ctx, insn->type, use, dst_reg); @@ -5785,14 +5959,8 @@ static void ir_fix_param_spills(ir_ctx *ctx) const int8_t *int_reg_params = _ir_int_reg_params; const int8_t *fp_reg_params = _ir_fp_reg_params; int32_t stack_offset = 0; - int32_t param_stack_size = 0; + int32_t stack_start = ctx->stack_frame_size; - if (ctx->flags & IR_USE_FRAME_POINTER) { - /* skip old frame pointer and return address */ - stack_offset = sizeof(void*) * 2 + (ctx->stack_frame_size - ctx->stack_frame_alignment); - } else { - stack_offset = ctx->stack_frame_size; - } n = use_list->count; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { use = *p; @@ -5819,15 +5987,13 @@ static void ir_fix_param_spills(ir_ctx *ctx) if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) && ival->stack_spill_pos == -1 && (ival->next || ival->reg == IR_REG_NONE)) { - ival->stack_spill_pos = stack_offset; + ival->stack_spill_pos = stack_start + stack_offset; } } if (sizeof(void*) == 8) { stack_offset += sizeof(void*); - param_stack_size += sizeof(void*); } else { stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); - param_stack_size += IR_MAX(sizeof(void*), ir_type_size[insn->type]); } } } @@ -5835,7 +6001,7 @@ static void ir_fix_param_spills(ir_ctx *ctx) ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count); ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count); - ctx->param_stack_size = param_stack_size; + ctx->param_stack_size = stack_offset; } static void ir_allocate_unique_spill_slots(ir_ctx *ctx) @@ -5876,6 +6042,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) case IR_MERGE: case IR_LOOP_BEGIN: case IR_LOOP_END: + case IR_IGOTO_DUP: break; default: def_flags = ir_get_target_constraints(ctx, i, &constraints); @@ -5892,7 +6059,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) IR_REGSET_EXCL(available, reg); ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; } else if (def_flags & IR_USE_MUST_BE_IN_REG) { - if (insn->op == IR_VLOAD + if ((insn->op == IR_VLOAD || insn->op == IR_VLOAD_v) && ctx->live_intervals[ctx->vregs[i]] && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1 && ir_is_same_mem_var(ctx, i, ctx->ir_base[insn->op2].op3)) { @@ -5932,7 +6099,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { use = *p; use_insn = &ctx->ir_base[use]; - if (use_insn->op == IR_VLOAD) { + if (use_insn->op == IR_VLOAD || use_insn->op == IR_VLOAD_v) { if (ctx->vregs[use] && !ctx->live_intervals[ctx->vregs[use]]) { ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); @@ -5943,7 +6110,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) ival->vreg = ctx->vregs[use]; ival->stack_spill_pos = stack_spill_pos; } - } else if (use_insn->op == IR_VSTORE) { + } else if (use_insn->op == IR_VSTORE || use_insn->op == IR_STORE_v) { if (!IR_IS_CONST_REF(use_insn->op3) && ctx->vregs[use_insn->op3] && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) { @@ -6080,25 +6247,21 @@ void ir_fix_stack_frame(ir_ctx *ctx) ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*)); ctx->stack_frame_size += additional_size; - ctx->stack_frame_alignment = 0; ctx->call_stack_size = 0; if (!(ctx->flags & IR_FUNCTION)) { while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { ctx->stack_frame_size += sizeof(void*); - ctx->stack_frame_alignment += sizeof(void*); } } else { /* Stack must be 16 byte aligned */ if (!(ctx->flags & IR_FUNCTION)) { while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { ctx->stack_frame_size += sizeof(void*); - ctx->stack_frame_alignment += sizeof(void*); } } else if (ctx->flags & IR_USE_FRAME_POINTER) { while (IR_ALIGNED_SIZE(ctx->stack_frame_size + sizeof(void*) * 2, 16) != ctx->stack_frame_size + sizeof(void*) * 2) { ctx->stack_frame_size += sizeof(void*); - ctx->stack_frame_alignment += sizeof(void*); } } else { if (!(ctx->flags & IR_NO_STACK_COMBINE)) { @@ -6107,7 +6270,6 @@ void ir_fix_stack_frame(ir_ctx *ctx) while (IR_ALIGNED_SIZE(ctx->stack_frame_size + ctx->call_stack_size, 16) != ctx->stack_frame_size + ctx->call_stack_size) { ctx->stack_frame_size += sizeof(void*); - ctx->stack_frame_alignment += sizeof(void*); } } } @@ -6143,6 +6305,8 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) int ret; void *entry; size_t size; + ir_ref igoto_dup_ref = IR_UNUSED; + uint32_t igoto_dup_block = 0; data.ra_data.unused_slot_4 = 0; data.ra_data.unused_slot_2 = 0; @@ -6150,11 +6314,11 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) data.ra_data.handled = NULL; data.rodata_label = 0; data.jmp_table_label = 0; + data.resolved_label_syms = 0; ctx->data = &data; if (!ctx->live_intervals) { ctx->stack_frame_size = 0; - ctx->stack_frame_alignment = 0; ctx->call_stack_size = 0; ctx->used_preserved_regs = 0; ir_allocate_unique_spill_slots(ctx); @@ -6176,7 +6340,6 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) } ctx->stack_frame_size = ctx->fixed_stack_frame_size; ctx->call_stack_size = ctx->fixed_call_stack_size; - ctx->stack_frame_alignment = 0; } Dst = &data.dasm_state; @@ -6386,6 +6549,35 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) case IR_TAILCALL: ir_emit_tailcall(ctx, i, insn); break; + case IR_IGOTO_DUP: + if (bb->flags & IR_BB_DESSA_MOVES) { + ir_emit_dessa_moves(ctx, b, bb); + } + IR_ASSERT(!igoto_dup_ref && !igoto_dup_block); + igoto_dup_ref = i; + igoto_dup_block = b; + b = ctx->cfg_edges[bb->successors]; + bb = &ctx->cfg_blocks[b]; + i = bb->start; + insn = &ctx->ir_base[i]; + rule = &ctx->rules[i]; + break; + case IR_IGOTO: + if ((ctx->ir_base[insn->op1].op == IR_MERGE || ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN) + && (ctx->rules[ctx->ir_base[insn->op1].op1] & IR_RULE_MASK) == IR_IGOTO_DUP + && igoto_dup_ref) { + ir_emit_ijmp(ctx, i, insn); + b = igoto_dup_block; + bb = &ctx->cfg_blocks[b]; + i = igoto_dup_ref; + insn = &ctx->ir_base[i]; + rule = &ctx->rules[i]; + igoto_dup_block= 0; + igoto_dup_ref = 0; + break; + } + IR_ASSERT(!igoto_dup_ref && !igoto_dup_block); + IR_FALLTHROUGH; case IR_IJMP: ir_emit_ijmp(ctx, i, insn); break; @@ -6396,9 +6588,11 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) ir_emit_vaddr(ctx, i, insn); break; case IR_VLOAD: + case IR_VLOAD_v: ir_emit_vload(ctx, i, insn); break; case IR_VSTORE: + case IR_VSTORE_v: ir_emit_vstore(ctx, i, insn); break; case IR_RLOAD: @@ -6645,6 +6839,28 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) } while (i != 0); } + if ((ctx->flags2 & IR_HAS_BLOCK_ADDR) && ctx->loader && ctx->loader->add_label) { + for (b = 1, bb = &ctx->cfg_blocks[b]; b <= ctx->cfg_blocks_count; bb++, b++) { + ir_insn *insn = &ctx->ir_base[bb->start]; + + if (insn->op == IR_BEGIN && insn->op2) { + IR_ASSERT(ctx->ir_base[insn->op2].op == IR_LABEL); + ctx->ir_base[insn->op2].val.u32_hi = 0; + ctx->loader->add_label(ctx->loader, ir_get_str(ctx, ctx->ir_base[insn->op2].val.str), + (char*)entry + dasm_getpclabel(&data.dasm_state, ir_skip_empty_target_blocks(ctx, b))); + } + } + } else if (data.resolved_label_syms) { + for (b = 1, bb = &ctx->cfg_blocks[b]; b <= ctx->cfg_blocks_count; bb++, b++) { + ir_insn *insn = &ctx->ir_base[bb->start]; + + if (insn->op == IR_BEGIN && insn->op2) { + IR_ASSERT(ctx->ir_base[insn->op2].op == IR_LABEL); + ctx->ir_base[insn->op2].val.u32_hi = 0; + } + } + } + dasm_free(&data.dasm_state); if (ctx->code_buffer) { diff --git a/ext/opcache/jit/ir/ir_builder.h b/ext/opcache/jit/ir/ir_builder.h index c1dcffdbaa084..03add75906553 100644 --- a/ext/opcache/jit/ir/ir_builder.h +++ b/ext/opcache/jit/ir/ir_builder.h @@ -490,7 +490,7 @@ extern "C" { #define ir_ADD_OFFSET(_addr, _offset) _ir_ADD_OFFSET(_ir_CTX, (_addr), (_offset)) /* Unfoldable variant of COPY */ -#define ir_HARD_COPY(_type, _op1) ir_emit2(_ir_CTX, IR_OPT(IR_COPY, (_type)), (_op1), 1) +#define ir_HARD_COPY(_type, _op1) ir_emit2(_ir_CTX, IR_OPT(IR_COPY, (_type)), (_op1), IR_COPY_HARD) #define ir_HARD_COPY_B(_op1) ir_HARD_COPY(IR_BOOL, _op1) #define ir_HARD_COPY_U8(_op1) ir_HARD_COPY(IR_U8, _op1) #define ir_HARD_COPY_U16(_op1) ir_HARD_COPY(IR_U16, _op1) @@ -544,6 +544,8 @@ extern "C" { #define ir_VLOAD_D(_var) _ir_VLOAD(_ir_CTX, IR_DOUBLE, (_var)) #define ir_VLOAD_F(_var) _ir_VLOAD(_ir_CTX, IR_FLOAT, (_var)) #define ir_VSTORE(_var, _val) _ir_VSTORE(_ir_CTX, (_var), (_val)) +#define ir_VLOAD_v(_type, _var) _ir_VLOAD_v(_ir_CTX, (_type), (_var)) +#define ir_VSTORE_v(_var, _val) _ir_VSTORE_v(_ir_CTX, (_var), (_val)) #define ir_RLOAD(_type, _reg) _ir_RLOAD(_ir_CTX, (_type), (_reg)) #define ir_RLOAD_B(_reg) _ir_RLOAD(_ir_CTX, IR_BOOL, (_reg)) #define ir_RLOAD_U8(_reg) _ir_RLOAD(_ir_CTX, IR_U8, (_reg)) @@ -574,6 +576,8 @@ extern "C" { #define ir_LOAD_D(_addr) _ir_LOAD(_ir_CTX, IR_DOUBLE, (_addr)) #define ir_LOAD_F(_addr) _ir_LOAD(_ir_CTX, IR_FLOAT, (_addr)) #define ir_STORE(_addr, _val) _ir_STORE(_ir_CTX, (_addr), (_val)) +#define ir_LOAD_v(_type, _addr) _ir_LOAD_v(_ir_CTX, (_type), (_addr)) +#define ir_STORE_v(_addr, _val) _ir_STORE_v(_ir_CTX, (_addr), (_val)) #define ir_TLS(_index, _offset) _ir_TLS(_ir_CTX, (_index), (_offset)) #define ir_TRAP() do {_ir_CTX->control = ir_emit1(_ir_CTX, IR_TRAP, _ir_CTX->control);} while (0) @@ -586,7 +590,7 @@ extern "C" { #define ir_VA_END(_list) _ir_VA_END(_ir_CTX, _list) #define ir_VA_COPY(_dst, _src) _ir_VA_COPY(_ir_CTX, _dst, _src) #define ir_VA_ARG(_list, _type) _ir_VA_ARG(_ir_CTX, _type, _list) -#define ir_VA_ARG_EX(_list, _type, size) _ir_VA_ARG_EX(_ir_CTX, _type, _list, size) +#define ir_VA_ARG_EX(_list, _type, s, a) _ir_VA_ARG_EX(_ir_CTX, _type, _list, s, a) #define ir_START() _ir_START(_ir_CTX) #define ir_ENTRY(_src, _num) _ir_ENTRY(_ir_CTX, (_src), (_num)) @@ -607,6 +611,7 @@ extern "C" { #define ir_CASE_RANGE(_switch, _v1, _v2) _ir_CASE_RANGE(_ir_CTX, (_switch), (_v1), (_v2)) #define ir_CASE_DEFAULT(_switch) _ir_CASE_DEFAULT(_ir_CTX, (_switch)) #define ir_RETURN(_val) _ir_RETURN(_ir_CTX, (_val)) +#define ir_IGOTO(_addr) _ir_IGOTO(_ir_CTX, (_addr)) #define ir_IJMP(_addr) _ir_IJMP(_ir_CTX, (_addr)) #define ir_UNREACHABLE() _ir_UNREACHABLE(_ir_CTX) @@ -654,15 +659,19 @@ ir_ref _ir_ALLOCA(ir_ctx *ctx, ir_ref size); void _ir_AFREE(ir_ctx *ctx, ir_ref size); ir_ref _ir_VLOAD(ir_ctx *ctx, ir_type type, ir_ref var); void _ir_VSTORE(ir_ctx *ctx, ir_ref var, ir_ref val); +ir_ref _ir_VLOAD_v(ir_ctx *ctx, ir_type type, ir_ref var); +void _ir_VSTORE_v(ir_ctx *ctx, ir_ref var, ir_ref val); ir_ref _ir_RLOAD(ir_ctx *ctx, ir_type type, ir_ref reg); void _ir_RSTORE(ir_ctx *ctx, ir_ref reg, ir_ref val); ir_ref _ir_LOAD(ir_ctx *ctx, ir_type type, ir_ref addr); void _ir_STORE(ir_ctx *ctx, ir_ref addr, ir_ref val); +ir_ref _ir_LOAD_v(ir_ctx *ctx, ir_type type, ir_ref addr); +void _ir_STORE_v(ir_ctx *ctx, ir_ref addr, ir_ref val); void _ir_VA_START(ir_ctx *ctx, ir_ref list); void _ir_VA_END(ir_ctx *ctx, ir_ref list); void _ir_VA_COPY(ir_ctx *ctx, ir_ref dst, ir_ref src); ir_ref _ir_VA_ARG(ir_ctx *ctx, ir_type type, ir_ref list); -ir_ref _ir_VA_ARG_EX(ir_ctx *ctx, ir_type type, ir_ref list, size_t size); +ir_ref _ir_VA_ARG_EX(ir_ctx *ctx, ir_type type, ir_ref list, size_t size, size_t align); void _ir_START(ir_ctx *ctx); void _ir_ENTRY(ir_ctx *ctx, ir_ref src, ir_ref num); void _ir_BEGIN(ir_ctx *ctx, ir_ref src); @@ -688,6 +697,7 @@ void _ir_CASE_VAL(ir_ctx *ctx, ir_ref switch_ref, ir_ref val); void _ir_CASE_RANGE(ir_ctx *ctx, ir_ref switch_ref, ir_ref v1, ir_ref v2); void _ir_CASE_DEFAULT(ir_ctx *ctx, ir_ref switch_ref); void _ir_RETURN(ir_ctx *ctx, ir_ref val); +ir_ref _ir_IGOTO(ir_ctx *ctx, ir_ref addr); void _ir_IJMP(ir_ctx *ctx, ir_ref addr); void _ir_GUARD(ir_ctx *ctx, ir_ref condition, ir_ref addr); void _ir_GUARD_NOT(ir_ctx *ctx, ir_ref condition, ir_ref addr); diff --git a/ext/opcache/jit/ir/ir_cfg.c b/ext/opcache/jit/ir/ir_cfg.c index 00923387bb21c..46755067b2444 100644 --- a/ext/opcache/jit/ir/ir_cfg.c +++ b/ext/opcache/jit/ir/ir_cfg.c @@ -820,11 +820,14 @@ int ir_build_dominators_tree(ir_ctx *ctx) succ_b = ctx->cfg_edges[bb->successors]; if (bb->successors_count != 1) { /* LOOP_END/END may be linked with the following ENTRY by a fake edge */ - IR_ASSERT(bb->successors_count == 2); - if (blocks[succ_b].flags & IR_BB_ENTRY) { + if (bb->successors_count != 2) { + complete = 0; + break; + } else if (blocks[succ_b].flags & IR_BB_ENTRY) { succ_b = ctx->cfg_edges[bb->successors + 1]; - } else { - IR_ASSERT(blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); + } else if (!(blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY)) { + complete = 0; + break; } } dom_depth = blocks[succ_b].dom_depth;; diff --git a/ext/opcache/jit/ir/ir_check.c b/ext/opcache/jit/ir/ir_check.c index c25a984aefc1d..ee951291b1b05 100644 --- a/ext/opcache/jit/ir/ir_check.c +++ b/ext/opcache/jit/ir/ir_check.c @@ -328,7 +328,9 @@ bool ir_check(const ir_ctx *ctx) } break; case IR_LOAD: + case IR_LOAD_v: case IR_STORE: + case IR_STORE_v: type = ctx->ir_base[insn->op2].type; if (type != IR_ADDR && (!IR_IS_TYPE_INT(type) || ir_type_size[type] != ir_type_size[IR_ADDR])) { @@ -338,7 +340,9 @@ bool ir_check(const ir_ctx *ctx) } break; case IR_VLOAD: + case IR_VLOAD_v: case IR_VSTORE: + case IR_VSTORE_v: if (ctx->ir_base[insn->op2].op != IR_VAR) { fprintf(stderr, "ir_base[%d].op2 must be 'VAR' (%s)\n", i, ir_op_name[ctx->ir_base[insn->op2].op]); @@ -408,6 +412,8 @@ bool ir_check(const ir_ctx *ctx) ok = 0; } break; + case IR_IGOTO: + break; default: /* skip data references */ count = n = use_list->count; diff --git a/ext/opcache/jit/ir/ir_dump.c b/ext/opcache/jit/ir/ir_dump.c index a501d261f30a7..5cc732927d412 100644 --- a/ext/opcache/jit/ir/ir_dump.c +++ b/ext/opcache/jit/ir/ir_dump.c @@ -129,6 +129,11 @@ void ir_dump_dot(const ir_ctx *ctx, const char *name, FILE *f) case IR_OPND_CONTROL_REF: fprintf(f, "\tn%d -> n%d [style=dashed,dir=back,weight=%d];\n", ref, i, REF_WEIGHT); break; + case IR_OPND_LABEL_REF: + if (ref) { + fprintf(f, "\tc%d -> n%d [color=blue,weight=%d];\n", -ref, i, REF_WEIGHT); + } + break; } } } @@ -491,6 +496,8 @@ void ir_dump_codegen(const ir_ctx *ctx, FILE *f) ir_print_proto(ctx, insn->proto, f); } else if (insn->op == IR_SYM) { fprintf(f, "sym(%s)", ir_get_str(ctx, insn->val.name)); + } else if (insn->op == IR_LABEL) { + fprintf(f, "label(%s)", ir_get_str(ctx, insn->val.name)); } else if (insn->op == IR_FUNC_ADDR) { fprintf(f, "func *"); ir_print_const(ctx, insn, f, true); @@ -648,6 +655,12 @@ void ir_dump_codegen(const ir_ctx *ctx, FILE *f) fprintf(f, "%s%d", first ? "(" : ", ", ref); first = 0; break; + case IR_OPND_LABEL_REF: + if (ref) { + IR_ASSERT(IR_IS_CONST_REF(ref)); + fprintf(f, "%sc_%d", first ? "(" : ", ", -ref); + } + break; } } else if (opnd_kind == IR_OPND_NUM) { fprintf(f, "%s%d", first ? "(" : ", ", ref); diff --git a/ext/opcache/jit/ir/ir_emit.c b/ext/opcache/jit/ir/ir_emit.c index 7a10da1322a72..847ca375b5bd0 100644 --- a/ext/opcache/jit/ir/ir_emit.c +++ b/ext/opcache/jit/ir/ir_emit.c @@ -244,32 +244,30 @@ static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, int8_t *regs ir_insn *arg = &ctx->ir_base[ir_insn_op(insn, j)]; type = arg->type; if (IR_IS_TYPE_INT(type)) { - if (arg->op == IR_ARGVAL) { - continue; - } else if (int_param < int_reg_params_count) { + if (int_param < int_reg_params_count && arg->op != IR_ARGVAL) { regs[j] = int_reg_params[int_param]; count = j + 1; + int_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + fp_param++; +#endif } else { regs[j] = IR_REG_NONE; } - int_param++; -#ifdef _WIN64 - /* WIN64 calling convention use common couter for int and fp registers */ - fp_param++; -#endif } else { IR_ASSERT(IR_IS_TYPE_FP(type)); if (fp_param < fp_reg_params_count) { regs[j] = fp_reg_params[fp_param]; count = j + 1; + fp_param++; +#ifdef _WIN64 + /* WIN64 calling convention use common couter for int and fp registers */ + int_param++; +#endif } else { regs[j] = IR_REG_NONE; } - fp_param++; -#ifdef _WIN64 - /* WIN64 calling convention use common couter for int and fp registers */ - int_param++; -#endif } } return count; @@ -426,7 +424,7 @@ typedef struct _ir_common_backend_data { ir_bitset emit_constants; } ir_common_backend_data; -static int ir_const_label(ir_ctx *ctx, ir_ref ref) +static int ir_get_const_label(ir_ctx *ctx, ir_ref ref) { ir_common_backend_data *data = ctx->data; int label = ctx->cfg_blocks_count - ref; @@ -1015,11 +1013,16 @@ int ir_match(ir_ctx *ctx) entries_count++; } ctx->rules[start] = IR_SKIPPED | IR_NOP; + if (ctx->ir_base[start].op == IR_BEGIN && ctx->ir_base[start].op2) { + ctx->flags2 |= IR_HAS_BLOCK_ADDR; + } ref = bb->end; if (bb->successors_count == 1) { insn = &ctx->ir_base[ref]; if (insn->op == IR_END || insn->op == IR_LOOP_END) { - ctx->rules[ref] = insn->op; + if (!ctx->rules[ref]) { + ctx->rules[ref] = insn->op; + } ref = prev_ref[ref]; if (ref == start && ctx->cfg_edges[bb->successors] != b) { if (EXPECTED(!(bb->flags & IR_BB_ENTRY))) { diff --git a/ext/opcache/jit/ir/ir_fold.h b/ext/opcache/jit/ir/ir_fold.h index 74f7818d747c4..bab6b2916075f 100644 --- a/ext/opcache/jit/ir/ir_fold.h +++ b/ext/opcache/jit/ir/ir_fold.h @@ -755,8 +755,35 @@ IR_FOLD(NEG(C_FLOAT)) } IR_FOLD(ABS(C_I8)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + if (op1_insn->val.i64 >= 0) { + IR_FOLD_COPY(op1); + } else { + IR_FOLD_CONST_I(-op1_insn->val.i8); + } +} + IR_FOLD(ABS(C_I16)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + if (op1_insn->val.i64 >= 0) { + IR_FOLD_COPY(op1); + } else { + IR_FOLD_CONST_I(-op1_insn->val.i16); + } +} + IR_FOLD(ABS(C_I32)) +{ + IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); + if (op1_insn->val.i64 >= 0) { + IR_FOLD_COPY(op1); + } else { + IR_FOLD_CONST_I((int32_t)-op1_insn->val.u32); + } +} + IR_FOLD(ABS(C_I64)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); @@ -847,7 +874,7 @@ IR_FOLD(MUL_OV(C_U64, C_U64)) uint64_t res; IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); res = op1_insn->val.u64 * op2_insn->val.u64; - if (op1_insn->val.u64 != 0 && res / op1_insn->val.u64 != op2_insn->val.u64 && res <= max) { + if ((op1_insn->val.u64 != 0 && res / op1_insn->val.u64 != op2_insn->val.u64) || res > max) { IR_FOLD_NEXT; } IR_FOLD_CONST_U(res); @@ -864,7 +891,7 @@ IR_FOLD(MUL_OV(C_I64, C_I64)) int64_t res; IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); res = op1_insn->val.u64 * op2_insn->val.u64; - if (op1_insn->val.i64 != 0 && res / op1_insn->val.i64 != op2_insn->val.i64 && res >= min && res <= max) { + if ((op1_insn->val.i64 != 0 && res / op1_insn->val.i64 != op2_insn->val.i64) || res < min || res > max) { IR_FOLD_NEXT; } IR_FOLD_CONST_U(res); @@ -1037,220 +1064,220 @@ IR_FOLD(SHL(C_U8, C_U8)) IR_FOLD(SHL(C_CHAR, C_CHAR)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U(op1_insn->val.u8 << op2_insn->val.u8); + IR_FOLD_CONST_U(op1_insn->val.u8 << (op2_insn->val.u8 & 0x7)); } IR_FOLD(SHL(C_I8, C_I8)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_I((int8_t)(op1_insn->val.u8 << op2_insn->val.u8)); + IR_FOLD_CONST_I((int8_t)(op1_insn->val.u8 << (op2_insn->val.u8 & 0x7))); } IR_FOLD(SHL(C_U16, C_U16)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U(op1_insn->val.u16 << op2_insn->val.u16); + IR_FOLD_CONST_U(op1_insn->val.u16 << (op2_insn->val.u16 & 0xf)); } IR_FOLD(SHL(C_I16, C_I16)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_I((int16_t)(op1_insn->val.u16 << op2_insn->val.u16)); + IR_FOLD_CONST_I((int16_t)(op1_insn->val.u16 << (op2_insn->val.u16 & 0xf))); } IR_FOLD(SHL(C_U32, C_U32)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U(op1_insn->val.u32 << op2_insn->val.u32); + IR_FOLD_CONST_U(op1_insn->val.u32 << (op2_insn->val.u32 & 0x1f)); } IR_FOLD(SHL(C_I32, C_I32)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_I((int32_t)(op1_insn->val.u32 << op2_insn->val.u32)); + IR_FOLD_CONST_I((int32_t)(op1_insn->val.u32 << (op2_insn->val.u32 & 0x1f))); } IR_FOLD(SHL(C_U64, C_U64)) IR_FOLD(SHL(C_I64, C_I64)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U(op1_insn->val.u64 << op2_insn->val.u64); + IR_FOLD_CONST_U(op1_insn->val.u64 << (op2_insn->val.u64 & 0x3f)); } IR_FOLD(SHR(C_U8, C_U8)) IR_FOLD(SHR(C_CHAR, C_CHAR)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U(op1_insn->val.u8 >> op2_insn->val.u8); + IR_FOLD_CONST_U(op1_insn->val.u8 >> (op2_insn->val.u8 & 0x7)); } IR_FOLD(SHR(C_I8, C_I8)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_I((int8_t)(op1_insn->val.u8 >> op2_insn->val.u8)); + IR_FOLD_CONST_I((int8_t)(op1_insn->val.u8 >> (op2_insn->val.u8 & 0x7))); } IR_FOLD(SHR(C_U16, C_U16)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U(op1_insn->val.u16 >> op2_insn->val.u16); + IR_FOLD_CONST_U(op1_insn->val.u16 >> (op2_insn->val.u16 & 0xf)); } IR_FOLD(SHR(C_I16, C_I16)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_I((int16_t)(op1_insn->val.u16 >> op2_insn->val.u16)); + IR_FOLD_CONST_I((int16_t)(op1_insn->val.u16 >> (op2_insn->val.u16 & 0xf))); } IR_FOLD(SHR(C_U32, C_U32)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U(op1_insn->val.u32 >> op2_insn->val.u32); + IR_FOLD_CONST_U(op1_insn->val.u32 >> (op2_insn->val.u32 & 0x1f)); } IR_FOLD(SHR(C_I32, C_I32)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_I((int32_t)(op1_insn->val.u32 >> op2_insn->val.u32)); + IR_FOLD_CONST_I((int32_t)(op1_insn->val.u32 >> (op2_insn->val.u32 & 0x1f))); } IR_FOLD(SHR(C_U64, C_U64)) IR_FOLD(SHR(C_I64, C_I64)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U(op1_insn->val.u64 >> op2_insn->val.u64); + IR_FOLD_CONST_U(op1_insn->val.u64 >> (op2_insn->val.u64 & 0x3f)); } IR_FOLD(SAR(C_U8, C_U8)) IR_FOLD(SAR(C_CHAR, C_CHAR)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U((uint8_t)(op1_insn->val.i8 >> op2_insn->val.i8)); + IR_FOLD_CONST_U((uint8_t)(op1_insn->val.i8 >> (op2_insn->val.i8 & 0x7))); } IR_FOLD(SAR(C_I8, C_I8)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_I(op1_insn->val.i8 >> op2_insn->val.i8); + IR_FOLD_CONST_I(op1_insn->val.i8 >> (op2_insn->val.i8 & 0x7)); } IR_FOLD(SAR(C_U16, C_U16)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U((uint16_t)(op1_insn->val.i16 >> op2_insn->val.i16)); + IR_FOLD_CONST_U((uint16_t)(op1_insn->val.i16 >> (op2_insn->val.i16 & 0xf))); } IR_FOLD(SAR(C_I16, C_I16)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_I(op1_insn->val.i16 >> op2_insn->val.i16); + IR_FOLD_CONST_I(op1_insn->val.i16 >> (op2_insn->val.i16 & 0xf)); } IR_FOLD(SAR(C_U32, C_U32)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U((uint32_t)(op1_insn->val.i32 >> op2_insn->val.i32)); + IR_FOLD_CONST_U((uint32_t)(op1_insn->val.i32 >> (op2_insn->val.i32 & 0x1f))); } IR_FOLD(SAR(C_I32, C_I32)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_I(op1_insn->val.i32 >> op2_insn->val.i32); + IR_FOLD_CONST_I(op1_insn->val.i32 >> (op2_insn->val.i32 & 0x1f)); } IR_FOLD(SAR(C_U64, C_U64)) IR_FOLD(SAR(C_I64, C_I64)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_I(op1_insn->val.i64 >> op2_insn->val.i64); + IR_FOLD_CONST_I(op1_insn->val.i64 >> (op2_insn->val.i64 & 0x3f)); } IR_FOLD(ROL(C_U8, C_U8)) IR_FOLD(ROL(C_CHAR, C_CHAR)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U(ir_rol8(op1_insn->val.u8, op2_insn->val.u8)); + IR_FOLD_CONST_U(ir_rol8(op1_insn->val.u8, (op2_insn->val.u8 & 0x7))); } IR_FOLD(ROL(C_I8, C_I8)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_I((int8_t)ir_rol8(op1_insn->val.u8, op2_insn->val.u8)); + IR_FOLD_CONST_I((int8_t)ir_rol8(op1_insn->val.u8, (op2_insn->val.u8 & 0x7))); } IR_FOLD(ROL(C_U16, C_U16)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U(ir_rol16(op1_insn->val.u16, op2_insn->val.u16)); + IR_FOLD_CONST_U(ir_rol16(op1_insn->val.u16, (op2_insn->val.u16 & 0xf))); } IR_FOLD(ROL(C_I16, C_I16)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_I((int16_t)ir_rol16(op1_insn->val.u16, op2_insn->val.u16)); + IR_FOLD_CONST_I((int16_t)ir_rol16(op1_insn->val.u16, (op2_insn->val.u16 & 0xf))); } IR_FOLD(ROL(C_U32, C_U32)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U(ir_rol32(op1_insn->val.u32, op2_insn->val.u32)); + IR_FOLD_CONST_U(ir_rol32(op1_insn->val.u32, (op2_insn->val.u32 & 0x1f))); } IR_FOLD(ROL(C_I32, C_I32)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_I((int32_t)ir_rol32(op1_insn->val.u32, op2_insn->val.u32)); + IR_FOLD_CONST_I((int32_t)ir_rol32(op1_insn->val.u32, (op2_insn->val.u32 & 0x1f))); } IR_FOLD(ROL(C_U64, C_U64)) IR_FOLD(ROL(C_I64, C_I64)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U(ir_rol64(op1_insn->val.u64, op2_insn->val.u64)); + IR_FOLD_CONST_U(ir_rol64(op1_insn->val.u64, (op2_insn->val.u64 & 0x3f))); } IR_FOLD(ROR(C_U8, C_U8)) IR_FOLD(ROR(C_CHAR, C_CHAR)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U(ir_ror8(op1_insn->val.u8, op2_insn->val.u8)); + IR_FOLD_CONST_U(ir_ror8(op1_insn->val.u8, (op2_insn->val.u8 & 0x7))); } IR_FOLD(ROR(C_I8, C_I8)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_I((int8_t)ir_ror8(op1_insn->val.u8, op2_insn->val.u8)); + IR_FOLD_CONST_I((int8_t)ir_ror8(op1_insn->val.u8, (op2_insn->val.u8 & 0x7))); } IR_FOLD(ROR(C_U16, C_U16)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U(ir_ror16(op1_insn->val.u16, op2_insn->val.u16)); + IR_FOLD_CONST_U(ir_ror16(op1_insn->val.u16, (op2_insn->val.u16 & 0xf))); } IR_FOLD(ROR(C_I16, C_I16)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_I((int16_t)ir_ror16(op1_insn->val.u16, op2_insn->val.u16)); + IR_FOLD_CONST_I((int16_t)ir_ror16(op1_insn->val.u16, (op2_insn->val.u16 & 0xf))); } IR_FOLD(ROR(C_U32, C_U32)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U(ir_ror32(op1_insn->val.u32, op2_insn->val.u32)); + IR_FOLD_CONST_U(ir_ror32(op1_insn->val.u32, (op2_insn->val.u32 & 0x1f))); } IR_FOLD(ROR(C_I32, C_I32)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_I((int32_t)ir_ror32(op1_insn->val.u32, op2_insn->val.u32)); + IR_FOLD_CONST_I((int32_t)ir_ror32(op1_insn->val.u32, (op2_insn->val.u32 & 0x1f))); } IR_FOLD(ROR(C_U64, C_U64)) IR_FOLD(ROR(C_I64, C_I64)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - IR_FOLD_CONST_U(ir_ror64(op1_insn->val.u64, op2_insn->val.u64)); + IR_FOLD_CONST_U(ir_ror64(op1_insn->val.u64, (op2_insn->val.u64 & 0x3f))); } //IR_FOLD(BSWAP(CONST)) @@ -1392,6 +1419,9 @@ IR_FOLD(TRUNC(C_U64)) IR_FOLD_CONST_U(op1_insn->val.u16); case IR_U32: IR_FOLD_CONST_U(op1_insn->val.u32); + case IR_ADDR: + IR_ASSERT(sizeof(void*) == 4); + IR_FOLD_CONST_U(op1_insn->val.u32); } } @@ -1545,7 +1575,7 @@ IR_FOLD(FP2FP(C_DOUBLE)) IR_FOLD(COPY(_)) { IR_ASSERT(IR_OPT_TYPE(opt) == op1_insn->type); - if (!op2) { + if (!(op2 & IR_COPY_HARD)) { IR_FOLD_COPY(op1); } /* skip CSE */ @@ -2075,23 +2105,23 @@ IR_FOLD(SUB(ADD, ADD)) IR_FOLD_CONST_U(0); } else if (op1_insn->op1 == op2_insn->op1) { /* (a + b) - (a + c) => b - c */ - op1 = op1_insn->op2; - op2 = op2_insn->op2; + op1 = _ir_fold_cast(ctx, op1_insn->op2, IR_OPT_TYPE(opt)); + op2 = _ir_fold_cast(ctx, op2_insn->op2, IR_OPT_TYPE(opt)); IR_FOLD_RESTART; } else if (op1_insn->op1 == op2_insn->op2) { /* (a + b) - (c + a) => b - c */ - op1 = op1_insn->op2; - op2 = op2_insn->op1; + op1 = _ir_fold_cast(ctx, op1_insn->op2, IR_OPT_TYPE(opt)); + op2 = _ir_fold_cast(ctx, op2_insn->op1, IR_OPT_TYPE(opt)); IR_FOLD_RESTART; } else if (op1_insn->op2 == op2_insn->op1) { /* (a + b) - (b + c) => a - c */ - op1 = op1_insn->op1; - op2 = op2_insn->op2; + op1 = _ir_fold_cast(ctx, op1_insn->op1, IR_OPT_TYPE(opt)); + op2 = _ir_fold_cast(ctx, op2_insn->op2, IR_OPT_TYPE(opt)); IR_FOLD_RESTART; } else if (op1_insn->op2 == op2_insn->op2) { /* (a + b) - (c + b) => a - c */ - op1 = op1_insn->op1; - op2 = op2_insn->op1; + op1 = _ir_fold_cast(ctx, op1_insn->op1, IR_OPT_TYPE(opt)); + op2 = _ir_fold_cast(ctx, op2_insn->op1, IR_OPT_TYPE(opt)); IR_FOLD_RESTART; } } diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c index 043e1e7bdd853..e6486ba64a1c5 100644 --- a/ext/opcache/jit/ir/ir_gcm.c +++ b/ext/opcache/jit/ir/ir_gcm.c @@ -361,20 +361,20 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b) while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) { j = ctx->cfg_blocks[j].idom; } + clone = ir_hashtab_find(&hash, j); + if (clone == IR_INVALID_VAL) { + clone = clones_count++; + ir_hashtab_add(&hash, j, clone); + clones[clone].block = j; + clones[clone].use_count = 0; + clones[clone].use = -1; + } + uses[uses_count].ref = use; + uses[uses_count].block = i; + uses[uses_count].next = clones[clone].use; + clones[clone].use_count++; + clones[clone].use = uses_count++; } - clone = ir_hashtab_find(&hash, j); - if (clone == IR_INVALID_VAL) { - clone = clones_count++; - ir_hashtab_add(&hash, j, clone); - clones[clone].block = j; - clones[clone].use_count = 0; - clones[clone].use = -1; - } - uses[uses_count].ref = use; - uses[uses_count].block = i; - uses[uses_count].next = clones[clone].use; - clones[clone].use_count++; - clones[clone].use = uses_count++; } } @@ -1007,7 +1007,11 @@ int ir_schedule(ir_ctx *ctx) start = i = bb->start; _xlat[i] = bb->start = insns_count; insn = &ctx->ir_base[i]; - if (insn->op == IR_CASE_VAL) { + if (insn->op == IR_BEGIN) { + if (insn->op2) { + consts_count += ir_count_constant(_xlat, insn->op2); + } + } else if (insn->op == IR_CASE_VAL) { IR_ASSERT(insn->op2 < IR_TRUE); consts_count += ir_count_constant(_xlat, insn->op2); } else if (insn->op == IR_CASE_RANGE) { @@ -1255,7 +1259,7 @@ int ir_schedule(ir_ctx *ctx) const char *proto = ir_get_strl(ctx, new_insn->proto, &len); new_insn->proto = ir_strl(&new_ctx, proto, len); } - } else if (new_insn->op == IR_SYM || new_insn->op == IR_STR) { + } else if (new_insn->op == IR_SYM || new_insn->op == IR_STR || new_insn->op == IR_LABEL) { size_t len; const char *str = ir_get_strl(ctx, new_insn->val.name, &len); new_insn->val.u64 = ir_strl(&new_ctx, str, len); @@ -1292,7 +1296,7 @@ int ir_schedule(ir_ctx *ctx) } else { new_insn->proto = 0; } - } else if (insn->op == IR_SYM || insn->op == IR_STR) { + } else if (insn->op == IR_SYM || insn->op == IR_STR || insn->op == IR_LABEL) { size_t len; const char *str = ir_get_strl(ctx, insn->val.name, &len); new_insn->val.u64 = ir_strl(&new_ctx, str, len); @@ -1364,6 +1368,8 @@ int ir_schedule(ir_ctx *ctx) size_t len; const char *str = ir_get_strl(ctx, insn->op2, &len); new_insn->op2 = ir_strl(&new_ctx, str, len); + } else if (new_insn->op == IR_BEGIN && insn->op2) { + new_insn->op2 = _xlat[insn->op2]; } else { new_insn->op2 = insn->op2; } diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index 2f457cbc99333..dbacc3967d0f7 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -887,7 +887,7 @@ void ir_print_escaped_str(const char *s, size_t len, FILE *f); #define IR_IS_CONST_OP(op) ((op) > IR_NOP && (op) <= IR_C_FLOAT) #define IR_IS_FOLDABLE_OP(op) ((op) <= IR_LAST_FOLDABLE_OP) -#define IR_IS_SYM_CONST(op) ((op) == IR_STR || (op) == IR_SYM || (op) == IR_FUNC) +#define IR_IS_SYM_CONST(op) ((op) == IR_STR || (op) == IR_SYM || (op) == IR_FUNC || (op) == IR_LABEL) ir_ref ir_const_ex(ir_ctx *ctx, ir_val val, uint8_t type, uint32_t optx); @@ -946,12 +946,13 @@ IR_ALWAYS_INLINE bool ir_ref_is_true(ir_ctx *ctx, ir_ref ref) #define IR_OPND_UNUSED 0x0 #define IR_OPND_DATA 0x1 #define IR_OPND_CONTROL 0x2 -#define IR_OPND_CONTROL_DEP 0x3 -#define IR_OPND_CONTROL_REF 0x4 -#define IR_OPND_STR 0x5 -#define IR_OPND_NUM 0x6 -#define IR_OPND_PROB 0x7 -#define IR_OPND_PROTO 0x8 +#define IR_OPND_LABEL_REF 0x3 +#define IR_OPND_CONTROL_DEP 0x4 +#define IR_OPND_CONTROL_REF 0x5 +#define IR_OPND_STR 0x6 +#define IR_OPND_NUM 0x7 +#define IR_OPND_PROB 0x8 +#define IR_OPND_PROTO 0x9 #define IR_OP_FLAGS(op_flags, op1_flags, op2_flags, op3_flags) \ ((op_flags) | ((op1_flags) << 20) | ((op2_flags) << 24) | ((op3_flags) << 28)) @@ -1013,6 +1014,7 @@ IR_ALWAYS_INLINE uint32_t ir_insn_len(const ir_insn *insn) #define IR_HAS_VA_ARG_FP (1<<9) #define IR_HAS_FP_RET_SLOT (1<<10) #define IR_16B_FRAME_ALIGNMENT (1<<11) +#define IR_HAS_BLOCK_ADDR (1<<12) /* Temporary: MEM2SSA -> SCCP */ #define IR_MEM2SSA_VARS (1<<25) @@ -1248,11 +1250,10 @@ struct _ir_live_range { #define IR_LIVE_INTERVAL_HAS_HINT_REGS (1<<2) #define IR_LIVE_INTERVAL_HAS_HINT_REFS (1<<3) #define IR_LIVE_INTERVAL_MEM_PARAM (1<<4) -#define IR_LIVE_INTERVAL_MEM_LOAD (1<<5) -#define IR_LIVE_INTERVAL_COALESCED (1<<6) -#define IR_LIVE_INTERVAL_SPILL_SPECIAL (1<<7) /* spill slot is pre-allocated in a special area (see ir_ctx.spill_reserved_base) */ -#define IR_LIVE_INTERVAL_SPILLED (1<<8) -#define IR_LIVE_INTERVAL_SPLIT_CHILD (1<<9) +#define IR_LIVE_INTERVAL_COALESCED (1<<5) +#define IR_LIVE_INTERVAL_SPILL_SPECIAL (1<<6) /* spill slot is pre-allocated in a special area (see ir_ctx.spill_reserved_base) */ +#define IR_LIVE_INTERVAL_SPILLED (1<<7) +#define IR_LIVE_INTERVAL_SPLIT_CHILD (1<<8) struct _ir_live_interval { uint8_t type; diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c index 21c7ee3ac64e5..2e8a8e3f34f3f 100644 --- a/ext/opcache/jit/ir/ir_ra.c +++ b/ext/opcache/jit/ir/ir_ra.c @@ -776,9 +776,6 @@ int ir_compute_live_ranges(ir_ctx *ctx) if (insn->op == IR_PARAM) { /* We may reuse parameter stack slot for spilling */ ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM; - } else if (insn->op == IR_VLOAD) { - /* Load may be fused into the usage instruction */ - ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD; } def_pos = IR_DEF_LIVE_POS_FROM_REF(ref); } @@ -845,11 +842,17 @@ int ir_compute_live_ranges(ir_ctx *ctx) ival = ctx->live_intervals[v]; } ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref); - } else if (ctx->rules) { - if (ctx->rules[input] & IR_FUSED) { - ir_add_fusion_ranges(ctx, ref, input, bb, live); - } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) { - ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2); + } else { + if (ctx->rules) { + if ((ctx->rules[input] & (IR_FUSED|IR_SKIPPED)) == IR_FUSED) { + ir_add_fusion_ranges(ctx, ref, input, bb, live); + } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) { + ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2); + } + } + if (reg != IR_REG_NONE) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF); } } } else if (reg != IR_REG_NONE) { @@ -1396,9 +1399,6 @@ int ir_compute_live_ranges(ir_ctx *ctx) if (insn->op == IR_PARAM) { /* We may reuse parameter stack slot for spilling */ ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM; - } else if (insn->op == IR_VLOAD) { - /* Load may be fused into the usage instruction */ - ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD; } def_pos = IR_DEF_LIVE_POS_FROM_REF(ref); } @@ -1465,17 +1465,17 @@ int ir_compute_live_ranges(ir_ctx *ctx) ival = ctx->live_intervals[v]; } ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref); - } else if (ctx->rules) { - if (ctx->rules[input] & IR_FUSED) { - ir_add_fusion_ranges(ctx, ref, input, bb, live_in_block, b); - } else { - if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) { + } else { + if (ctx->rules) { + if ((ctx->rules[input] & (IR_FUSED|IR_SKIPPED)) == IR_FUSED) { + ir_add_fusion_ranges(ctx, ref, input, bb, live_in_block, b); + } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) { ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2); } - if (reg != IR_REG_NONE) { - use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); - ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF); - } + } + if (reg != IR_REG_NONE) { + use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref); + ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF); } } } else if (reg != IR_REG_NONE) { @@ -1605,7 +1605,7 @@ static void ir_vregs_join(ir_ctx *ctx, uint32_t r1, uint32_t r2) } while (*prev && ((*prev)->pos < use_pos->pos || ((*prev)->pos == use_pos->pos && - (use_pos->op_num == 0 || (*prev)->op_num < use_pos->op_num)))) { + (use_pos->op_num == 0 || ((*prev)->op_num != 0 && (*prev)->op_num < use_pos->op_num))))) { if ((*prev)->hint_ref > 0 && ctx->vregs[(*prev)->hint_ref] == r2) { (*prev)->hint_ref = 0; } @@ -1627,9 +1627,6 @@ static void ir_vregs_join(ir_ctx *ctx, uint32_t r1, uint32_t r2) ctx->live_intervals[r1]->flags |= IR_LIVE_INTERVAL_COALESCED | (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)); - if (ctx->ir_base[IR_LIVE_POS_TO_REF(ctx->live_intervals[r1]->use_pos->pos)].op != IR_VLOAD) { - ctx->live_intervals[r1]->flags &= ~IR_LIVE_INTERVAL_MEM_LOAD; - } if (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) { IR_ASSERT(!(ctx->live_intervals[r1]->flags & IR_LIVE_INTERVAL_MEM_PARAM)); ctx->live_intervals[r1]->flags |= IR_LIVE_INTERVAL_MEM_PARAM; @@ -2343,16 +2340,6 @@ static ir_live_pos ir_first_use_pos_after(ir_live_interval *ival, ir_live_pos po return p ? p->pos : 0x7fffffff; } -static ir_live_pos ir_first_use_pos(ir_live_interval *ival, uint8_t flags) -{ - ir_use_pos *p = ival->use_pos; - - while (p && !(p->flags & flags)) { - p = p->next; - } - return p ? p->pos : 0x7fffffff; -} - static ir_block *ir_block_from_live_pos(ir_ctx *ctx, ir_live_pos pos) { ir_ref ref = IR_LIVE_POS_TO_REF(pos); @@ -3194,7 +3181,6 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li /* split current before its first use position that requires a register */ ir_live_pos split_pos; -spill_current: if (next_use_pos == ival->range.start) { IR_ASSERT(ival->use_pos && ival->use_pos->op_num == 0); /* split right after definition */ @@ -3228,7 +3214,6 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li return IR_REG_NONE; } if (split_pos >= blockPos[reg]) { -try_next_available_register: IR_REGSET_EXCL(available, reg); if (IR_REGSET_IS_EMPTY(available)) { fprintf(stderr, "LSRA Internal Error: Unsolvable conflict. Allocation is not possible\n"); @@ -3274,23 +3259,6 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li } IR_LOG_LSRA(" ---- Finish", other, ""); } else { - if (ir_first_use_pos(other, IR_USE_MUST_BE_IN_REG) <= other->end) { - if (!(ival->flags & IR_LIVE_INTERVAL_TEMP)) { - next_use_pos = ir_first_use_pos(ival, IR_USE_MUST_BE_IN_REG); - if (next_use_pos == ival->range.start) { - IR_ASSERT(ival->use_pos && ival->use_pos->op_num == 0); - /* split right after definition */ - split_pos = next_use_pos + 1; - } else { - split_pos = ir_find_optimal_split_position(ctx, ival, ival->range.start, next_use_pos - 1, 1); - } - - if (split_pos > ival->range.start) { - goto spill_current; - } - } - goto try_next_available_register; - } child = other; other->reg = IR_REG_NONE; if (prev) { @@ -3400,12 +3368,13 @@ static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival, ir_reg_alloc_data *data) { ir_use_pos *use_pos = ival->use_pos; - ir_insn *insn; if (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) { IR_ASSERT(!ival->next && use_pos && use_pos->op_num == 0); - insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)]; +#if IR_DEBUG + ir_insn *insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)]; IR_ASSERT(insn->op == IR_PARAM); +#endif use_pos = use_pos->next; if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) { return 0; @@ -3418,38 +3387,6 @@ static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival, ir_ } } - return 1; - } else if (ival->flags & IR_LIVE_INTERVAL_MEM_LOAD) { - insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)]; - IR_ASSERT(insn->op == IR_VLOAD); - IR_ASSERT(ctx->ir_base[insn->op2].op == IR_VAR); - use_pos = use_pos->next; - if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) { - return 0; - } - - if (use_pos) { - ir_block *bb = ir_block_from_live_pos(ctx, use_pos->pos); - if (bb->loop_depth && bb != ir_block_from_live_pos(ctx, ival->use_pos->pos)) { - return 0; - } - /* check if VAR may be clobbered between VLOAD and use */ - ir_use_list *use_list = &ctx->use_lists[insn->op2]; - ir_ref n = use_list->count; - ir_ref *p = &ctx->use_edges[use_list->refs]; - for (; n > 0; p++, n--) { - ir_ref use = *p; - if (ctx->ir_base[use].op == IR_VSTORE) { - if (use > IR_LIVE_POS_TO_REF(ival->use_pos->pos) && use < IR_LIVE_POS_TO_REF(use_pos->pos)) { - return 0; - } - } else if (ctx->ir_base[use].op == IR_VADDR) { - return 0; - } - } - } - ival->stack_spill_pos = ctx->ir_base[insn->op2].op3; - return 1; } return 0; @@ -3554,7 +3491,7 @@ static int ir_linear_scan(ir_ctx *ctx) for (j = ctx->vregs_count; j != 0; j--) { ival = ctx->live_intervals[j]; if (ival) { - if (!(ival->flags & (IR_LIVE_INTERVAL_MEM_PARAM|IR_LIVE_INTERVAL_MEM_LOAD)) + if (!(ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) || !ir_ival_spill_for_fuse_load(ctx, ival, &data)) { ir_add_to_unhandled(&unhandled, ival); } diff --git a/ext/opcache/jit/ir/ir_save.c b/ext/opcache/jit/ir/ir_save.c index 5ba986fadd481..dd955172950c8 100644 --- a/ext/opcache/jit/ir/ir_save.c +++ b/ext/opcache/jit/ir/ir_save.c @@ -40,6 +40,11 @@ void ir_print_proto_ex(uint8_t flags, ir_type ret_type, uint32_t params_count, c } else if (flags & IR_BUILTIN_FUNC) { fprintf(f, " __builtin"); } + if (flags & IR_CONST_FUNC) { + fprintf(f, " __const"); + } else if (flags & IR_PURE_FUNC) { + fprintf(f, " __pure"); + } } static void ir_save_dessa_moves(const ir_ctx *ctx, int b, ir_block *bb, FILE *f) @@ -109,6 +114,10 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f) fprintf(f, "sym(%s%s)", (save_flags & IR_SAVE_SAFE_NAMES) ? "@" : "", ir_get_str(ctx, insn->val.name)); + } else if (insn->op == IR_LABEL) { + fprintf(f, "label(%s%s)", + (save_flags & IR_SAVE_SAFE_NAMES) ? "@" : "", + ir_get_str(ctx, insn->val.name)); } else if (insn->op == IR_FUNC_ADDR) { fprintf(f, "func *"); ir_print_const(ctx, insn, f, true); @@ -272,6 +281,13 @@ void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f) fprintf(f, "%s%d", first ? "(" : ", ", ref); first = 0; break; + case IR_OPND_LABEL_REF: + if (ref) { + IR_ASSERT(IR_IS_CONST_REF(ref)); + fprintf(f, "%sc_%d", first ? "(" : ", ", -ref); + first = 0; + } + break; } } else if (opnd_kind == IR_OPND_NUM) { fprintf(f, "%s%d", first ? "(" : ", ", ref); diff --git a/ext/opcache/jit/ir/ir_sccp.c b/ext/opcache/jit/ir/ir_sccp.c index 45df92ec2be6f..e2f38a058aeba 100644 --- a/ext/opcache/jit/ir/ir_sccp.c +++ b/ext/opcache/jit/ir/ir_sccp.c @@ -1508,8 +1508,8 @@ static bool ir_may_promote_f2d(ir_ctx *ctx, ir_ref ref) switch (insn->op) { case IR_FP2FP: return 1; - case IR_INT2FP: - return ctx->use_lists[ref].count == 1; +// case IR_INT2FP: +// return ctx->use_lists[ref].count == 1; case IR_NEG: case IR_ABS: return ctx->use_lists[ref].count == 1 && @@ -2110,7 +2110,9 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) { ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type); } else { - ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist); + ir_ref tmp = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist); + use_insn = &ctx->ir_base[use]; + use_insn->op1 = tmp; } ir_bitqueue_add(worklist, use); } @@ -2119,7 +2121,9 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) { ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type); } else { - ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist); + ir_ref tmp = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist); + use_insn = &ctx->ir_base[use]; + use_insn->op2 = tmp; } ir_bitqueue_add(worklist, use); } @@ -2147,7 +2151,9 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) { ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type); } else { - ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist); + ir_ref tmp = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist); + use_insn = &ctx->ir_base[use]; + use_insn->op1 = tmp; } ir_bitqueue_add(worklist, use); } @@ -2156,7 +2162,9 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) { ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type); } else { - ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist); + ir_ref tmp = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist); + use_insn = &ctx->ir_base[use]; + use_insn->op2 = tmp; } ir_bitqueue_add(worklist, use); } @@ -2178,7 +2186,8 @@ static bool ir_try_promote_induction_var_ext(ir_ctx *ctx, ir_ref ext_ref, ir_ref && !IR_IS_SYM_CONST(ctx->ir_base[phi_insn->op2].op)) { ctx->ir_base[phi_ref].op2 = ir_ext_const(ctx, &ctx->ir_base[phi_insn->op2], op, type); } else { - ctx->ir_base[phi_ref].op2 = ir_ext_ref(ctx, phi_ref, phi_insn->op2, op, type, worklist); + ir_ref tmp = ir_ext_ref(ctx, phi_ref, phi_insn->op2, op, type, worklist); + ctx->ir_base[phi_ref].op2 = tmp; } return 1; @@ -2251,42 +2260,6 @@ static void ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin, ir_bitqueue * ir_ref prev, next; ir_use_list *use_list; - if (ctx->use_lists[begin].count > 1) { - ir_ref *p, n, i, use; - ir_insn *use_insn; - ir_ref region = end; - ir_ref next = IR_UNUSED; - - while (!IR_IS_BB_START(ctx->ir_base[region].op)) { - region = ctx->ir_base[region].op1; - } - - use_list = &ctx->use_lists[begin]; - n = use_list->count; - for (p = &ctx->use_edges[use_list->refs], i = 0; i < n; p++, i++) { - use = *p; - use_insn = &ctx->ir_base[use]; - if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) { - IR_ASSERT(!next); - next = use; - } else { - IR_ASSERT(use_insn->op == IR_VAR); - IR_ASSERT(use_insn->op1 == begin); - use_insn->op1 = region; - if (ir_use_list_add(ctx, region, use)) { - /* restore after reallocation */ - use_list = &ctx->use_lists[begin]; - n = use_list->count; - p = &ctx->use_edges[use_list->refs + i]; - } - } - } - - IR_ASSERT(next); - ctx->use_edges[use_list->refs] = next; - use_list->count = 1; - } - IR_ASSERT(ctx->ir_base[begin].op == IR_BEGIN); IR_ASSERT(ctx->ir_base[end].op == IR_END); IR_ASSERT(ctx->ir_base[begin].op1 == end); @@ -3595,7 +3568,10 @@ void ir_iter_opt(ir_ctx *ctx, ir_bitqueue *worklist) if (!(ctx->flags & IR_OPT_CFG)) { /* pass */ } else if (insn->op == IR_BEGIN) { - if (insn->op1 && ctx->ir_base[insn->op1].op == IR_END) { + if (insn->op1 + && !insn->op2 /* no computed goto label */ + && ctx->use_lists[i].count == 1 + && ctx->ir_base[insn->op1].op == IR_END) { ir_merge_blocks(ctx, insn->op1, i, worklist); } } else if (insn->op == IR_MERGE) { diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index 42e4eee7da0fc..7f714dd11d27c 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -66,7 +66,7 @@ IR_ALWAYS_INLINE ir_mem IR_MEM(ir_reg base, int32_t offset, ir_reg index, int32_ #define IR_SPILL_POS_TO_OFFSET(offset) \ ((ctx->flags & IR_USE_FRAME_POINTER) ? \ - ((offset) - (ctx->stack_frame_size - ctx->stack_frame_alignment)) : \ + ((offset) - ctx->stack_frame_size) : \ ((offset) + ctx->call_stack_size)) |.macro ASM_EXPAND_OP_MEM, MACRO, op, type, op1 @@ -892,6 +892,9 @@ typedef struct _ir_backend_data { bool double_abs_const; bool float_abs_const; bool double_zero_const; + bool u2d_const; + bool u2f_const; + bool resolved_label_syms; } ir_backend_data; #define IR_GP_REG_NAME(code, name64, name32, name16, name8, name8h) \ @@ -1087,6 +1090,7 @@ const char *ir_reg_name(int8_t reg, ir_type type) _(SSE_TRUNC) \ _(SSE_NEARBYINT) \ _(BIT_OP) \ + _(IGOTO_DUP) \ #define IR_LEA_FIRST IR_LEA_OB #define IR_LEA_LAST IR_LEA_O_SYM @@ -1110,35 +1114,24 @@ const char *ir_rule_name[IR_LAST_OP] = { static bool ir_may_fuse_addr(ir_ctx *ctx, const ir_insn *addr_insn) { - if (sizeof(void*) == 4) { - return 1; + if (addr_insn->op == IR_LABEL) { + return 0; } else if (IR_IS_SYM_CONST(addr_insn->op)) { void *addr = ir_sym_addr(ctx, addr_insn); if (!addr) { return 0; } - return IR_IS_SIGNED_32BIT((int64_t)(intptr_t)addr); + return (sizeof(void*) == 4) || IR_IS_SIGNED_32BIT((int64_t)(intptr_t)addr); } else { - return IR_IS_SIGNED_32BIT(addr_insn->val.i64); + return (sizeof(void*) == 4) || IR_IS_SIGNED_32BIT(addr_insn->val.i64); } } static bool ir_may_fuse_imm(ir_ctx *ctx, const ir_insn *val_insn) { if (val_insn->type == IR_ADDR) { - if (sizeof(void*) == 4) { - return 1; - } else if (IR_IS_SYM_CONST(val_insn->op)) { - void *addr = ir_sym_addr(ctx, val_insn); - - if (!addr) { - return 0; - } - return IR_IS_SIGNED_32BIT((intptr_t)addr); - } else { - return IR_IS_SIGNED_32BIT(val_insn->val.i64); - } + return ir_may_fuse_addr(ctx, val_insn); } else { return (ir_type_size[val_insn->type] <= 4 || IR_IS_SIGNED_32BIT(val_insn->val.i64)); } @@ -1517,6 +1510,11 @@ op2_const: constraints->tmp_regs[0] = IR_TMP_REG(1, ctx->ir_base[insn->op1].type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; } + if (IR_IS_TYPE_UNSIGNED(ctx->ir_base[insn->op1].type) + && ir_type_size[ctx->ir_base[insn->op1].type] >= sizeof(void*)) { + constraints->tmp_regs[n] = IR_TMP_REG(2, ctx->ir_base[insn->op1].type, IR_USE_SUB_REF, IR_DEF_SUB_REF); + n++; + } break; case IR_ABS_INT: flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; @@ -1542,6 +1540,7 @@ op2_const: case IR_GUARD_NOT: flags = IR_OP2_SHOULD_BE_IN_REG; break; + case IR_IGOTO: case IR_IJMP: flags = IR_OP2_SHOULD_BE_IN_REG; break; @@ -1574,7 +1573,7 @@ op2_const: } break; case IR_VA_ARG: - flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; + flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_DEF_CONFLICTS_WITH_INPUT_REGS; constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); n = 1; insn = &ctx->ir_base[ref]; @@ -1669,7 +1668,9 @@ static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref) do { ir_insn *insn = &ctx->ir_base[*p]; - if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) { + if (insn->op != IR_LOAD + && insn->op != IR_LOAD_v + && ((insn->op != IR_STORE && insn->op != IR_STORE_v) || insn->op3 == addr_ref)) { return; } p++; @@ -1752,7 +1753,7 @@ static bool ir_match_has_mem_deps(ir_ctx *ctx, ir_ref ref, ir_ref root) do { ir_insn *insn = &ctx->ir_base[pos]; - if (insn->op == IR_STORE) { + if (insn->op == IR_STORE || insn->op == IR_STORE_v || insn->op == IR_VSTORE || insn->op == IR_VSTORE_v) { // TODO: check if LOAD and STORE addresses may alias return 1; } else if (insn->op == IR_CALL) { @@ -1766,8 +1767,9 @@ static bool ir_match_has_mem_deps(ir_ctx *ctx, ir_ref ref, ir_ref root) static void ir_match_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root) { - if (ir_in_same_block(ctx, ref) - && ctx->ir_base[ref].op == IR_LOAD) { + if (ir_in_same_block(ctx, ref) && + (ctx->ir_base[ref].op == IR_LOAD || ctx->ir_base[ref].op == IR_LOAD_v || + ctx->ir_base[ref].op == IR_VLOAD || ctx->ir_base[ref].op == IR_VLOAD_v)) { if (ctx->use_lists[ref].count == 2 && !ir_match_has_mem_deps(ctx, ref, root)) { ir_ref addr_ref = ctx->ir_base[ref].op2; @@ -1792,7 +1794,7 @@ static bool ir_match_try_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root) ir_insn *insn = &ctx->ir_base[ref]; if (ir_in_same_block(ctx, ref) - && insn->op == IR_LOAD) { + && (insn->op == IR_LOAD || insn->op == IR_LOAD_v || insn->op == IR_VLOAD || insn->op == IR_VLOAD_v)) { if (ctx->use_lists[ref].count == 2 && !ir_match_has_mem_deps(ctx, ref, root)) { ir_ref addr_ref = ctx->ir_base[ref].op2; @@ -1814,8 +1816,6 @@ static bool ir_match_try_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root) && ir_get_param_reg(ctx, ref) == IR_REG_NONE) { return 1; } - } else if (ctx->ir_base[ref].op == IR_VLOAD) { - return 1; } return 0; } @@ -2462,8 +2462,21 @@ binop_fp: case IR_IJMP: ir_match_fuse_load(ctx, insn->op2, ref); return insn->op; + case IR_IGOTO: + if (ctx->ir_base[insn->op1].op == IR_MERGE || ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN) { + ir_insn *merge = &ctx->ir_base[insn->op1]; + ir_ref *p, n = merge->inputs_count; + + for (p = merge->ops + 1; n > 0; p++, n--) { + ir_ref input = *p; + IR_ASSERT(ctx->ir_base[input].op == IR_END || ctx->ir_base[input].op == IR_LOOP_END); + ctx->rules[input] = IR_IGOTO_DUP; + } + } + ir_match_fuse_load(ctx, insn->op2, ref); + return insn->op; case IR_VAR: - return IR_SKIPPED | IR_VAR; + return IR_STATIC_ALLOCA; case IR_PARAM: #ifndef _WIN64 if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) { @@ -2617,7 +2630,15 @@ store_int: return IR_VSTORE_FP; } break; + case IR_VSTORE_v: + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { + return IR_VSTORE_INT; + } else { + return IR_VSTORE_FP; + } + break; case IR_LOAD: + case IR_LOAD_v: ir_match_fuse_addr(ctx, insn->op2); if (IR_IS_TYPE_INT(insn->type)) { return IR_LOAD_INT; @@ -2635,6 +2656,14 @@ store_int: return IR_STORE_FP; } break; + case IR_STORE_v: + ir_match_fuse_addr(ctx, insn->op2); + if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { + return IR_STORE_INT; + } else { + return IR_STORE_FP; + } + break; case IR_RLOAD: if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)) { return IR_SKIPPED | IR_RLOAD; @@ -3175,7 +3204,7 @@ static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref sr | xorpd xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) } } else { - label = ir_const_label(ctx, src); + label = ir_get_const_label(ctx, src); | ASM_FP_REG_TXT_OP movs, type, reg, [=>label] } } @@ -3229,6 +3258,38 @@ static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src) } } +static void ir_resolve_label_syms(ir_ctx *ctx) +{ + uint32_t b; + ir_block *bb; + + for (b = 1, bb = &ctx->cfg_blocks[b]; b <= ctx->cfg_blocks_count; bb++, b++) { + ir_insn *insn = &ctx->ir_base[bb->start]; + + if (insn->op == IR_BEGIN && insn->op2) { + IR_ASSERT(ctx->ir_base[insn->op2].op == IR_LABEL); + ctx->ir_base[insn->op2].val.u32_hi = b; + } + } +} + +static void ir_emit_load_label_addr(ir_ctx *ctx, ir_reg reg, ir_insn *label) +{ + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + if (!data->resolved_label_syms) { + data->resolved_label_syms = 1; + ir_resolve_label_syms(ctx); + } + + IR_ASSERT(label->op == IR_LABEL); + int b = label->val.u32_hi; + + b = ir_skip_empty_target_blocks(ctx, b); + | lea Ra(reg), aword [=>b] +} + static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) { if (IR_IS_CONST_REF(src)) { @@ -3241,9 +3302,11 @@ static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) } else if (insn->op == IR_STR) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; - int label = ir_const_label(ctx, src); + int label = ir_get_const_label(ctx, src); | lea Ra(reg), aword [=>label] + } else if (insn->op == IR_LABEL) { + ir_emit_load_label_addr(ctx, reg, insn); } else { ir_emit_load_imm_int(ctx, type, reg, insn->val.i64); } @@ -3289,7 +3352,7 @@ static void ir_emit_store_mem_int_const(ir_ctx *ctx, ir_type type, ir_mem mem, i IR_ASSERT(IR_IS_CONST_REF(src)); if (val_insn->op == IR_STR) { - int label = ir_const_label(ctx, src); + int label = ir_get_const_label(ctx, src); IR_ASSERT(tmp_reg != IR_REG_NONE); |.if X64 @@ -3298,6 +3361,11 @@ static void ir_emit_store_mem_int_const(ir_ctx *ctx, ir_type type, ir_mem mem, i |.else | ASM_TMEM_TXT_OP mov, aword, mem, =>label |.endif + } else if (val_insn->op == IR_LABEL) { + IR_ASSERT(tmp_reg != IR_REG_NONE); + tmp_reg = IR_REG_NUM(tmp_reg); + ir_emit_load_label_addr(ctx, tmp_reg, val_insn); + ir_emit_store_mem_int(ctx, type, mem, tmp_reg); } else { int64_t val = val_insn->val.i64; @@ -3726,7 +3794,8 @@ static ir_mem ir_fuse_load(ir_ctx *ctx, ir_ref root, ir_ref ref) ir_insn *load_insn = &ctx->ir_base[ref]; ir_reg reg; - IR_ASSERT(load_insn->op == IR_LOAD); + IR_ASSERT(load_insn->op == IR_LOAD || load_insn->op == IR_LOAD_v || + load_insn->op == IR_VLOAD || load_insn->op == IR_VLOAD_v); if (UNEXPECTED(ctx->rules[ref] & IR_FUSED_REG)) { reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 2); } else { @@ -3762,9 +3831,11 @@ static void ir_emit_load_ex(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src, i } else if (insn->op == IR_STR) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; - int label = ir_const_label(ctx, src); + int label = ir_get_const_label(ctx, src); | lea Ra(reg), aword [=>label] + } else if (insn->op == IR_LABEL) { + ir_emit_load_label_addr(ctx, reg, insn); } else { ir_emit_load_imm_int(ctx, type, reg, insn->val.i64); } @@ -3862,7 +3933,7 @@ static void ir_emit_prologue(ir_ctx *ctx) if (ctx->flags & IR_USE_FRAME_POINTER) { fp = IR_REG_FRAME_POINTER; - offset = -(ctx->stack_frame_size - ctx->stack_frame_alignment - ctx->locals_area_size); + offset = -(ctx->stack_frame_size - ctx->locals_area_size); } else { fp = IR_REG_STACK_POINTER; offset = ctx->locals_area_size + ctx->call_stack_size; @@ -5607,7 +5678,7 @@ static void ir_emit_binop_sse2(ir_ctx *ctx, ir_ref def, ir_insn *insn) break; } } else if (IR_IS_CONST_REF(op2)) { - int label = ir_const_label(ctx, op2); + int label = ir_get_const_label(ctx, op2); switch (insn->op) { default: @@ -5714,7 +5785,7 @@ static void ir_emit_binop_avx(ir_ctx *ctx, ir_ref def, ir_insn *insn) break; } } else if (IR_IS_CONST_REF(op2)) { - int label = ir_const_label(ctx, op2); + int label = ir_get_const_label(ctx, op2); switch (insn->op) { default: @@ -6126,7 +6197,7 @@ static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref root, ir_ref cmp_ref, ir_ } | ASM_FP_REG_REG_OP ucomis, type, op1_reg, op2_reg } else if (IR_IS_CONST_REF(op2)) { - int label = ir_const_label(ctx, op2); + int label = ir_get_const_label(ctx, op2); | ASM_FP_REG_TXT_OP ucomis, type, op1_reg, [=>label] } else { @@ -6975,7 +7046,7 @@ static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) } else if ((type == IR_FLOAT && value->val.f == 1.0) || (type == IR_DOUBLE && value->val.d == 1.0)) { | fld1 } else { - int label = ir_const_label(ctx, insn->op2); + int label = ir_get_const_label(ctx, insn->op2); if (type == IR_DOUBLE) { | fld qword [=>label] @@ -7260,7 +7331,20 @@ static void ir_emit_trunc(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_emit_load(ctx, src_type, op1_reg, insn->op1); } if (op1_reg != def_reg) { +#ifdef IR_TARGET_X86 + if (ir_type_size[dst_type] == 1 + && (op1_reg == IR_REG_RBP || op1_reg == IR_REG_RSI || op1_reg == IR_REG_RDI)) { + ir_backend_data *data = ctx->data; + dasm_State **Dst = &data->dasm_state; + + ir_emit_mov(ctx, src_type, def_reg, op1_reg); + | and Rb(def_reg), 0xff + } else { + ir_emit_mov(ctx, dst_type, def_reg, op1_reg); + } +#else ir_emit_mov(ctx, dst_type, def_reg, op1_reg); +#endif } } else { ir_emit_load_ex(ctx, dst_type, def_reg, insn->op1, def); @@ -7385,7 +7469,7 @@ static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn) } } } else if (IR_IS_CONST_REF(insn->op1)) { - int label = ir_const_label(ctx, insn->op1); + int label = ir_get_const_label(ctx, insn->op1); | ASM_FP_REG_TXT_OP movs, dst_type, def_reg, [=>label] } else { @@ -7417,13 +7501,80 @@ static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) IR_ASSERT(IR_IS_TYPE_INT(src_type)); IR_ASSERT(IR_IS_TYPE_FP(dst_type)); IR_ASSERT(def_reg != IR_REG_NONE); + + if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { + op1_reg = IR_REG_NUM(op1_reg); + ir_emit_load(ctx, src_type, op1_reg, insn->op1); + } + + if (IR_IS_TYPE_UNSIGNED(src_type) && ir_type_size[src_type] >= sizeof(void*)) { + ir_reg tmp_reg = ctx->regs[def][2]; + + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (op1_reg == IR_REG_NONE) { + if (IR_IS_CONST_REF(insn->op1)) { + IR_ASSERT(0); + } else { + ir_mem mem; + + if (ir_rule(ctx, insn->op1) & IR_FUSED) { + mem = ir_fuse_load(ctx, def, insn->op1); + } else { + mem = ir_ref_spill_slot(ctx, insn->op1); + } + ir_emit_load_mem_int(ctx, src_type, tmp_reg, mem); + op1_reg = tmp_reg; + } + } + if (sizeof(void*) == 4) { + if (tmp_reg == op1_reg) { + | add Rd(op1_reg), 0x80000000 + } else { + | lea Rd(tmp_reg), dword [Rd(op1_reg)+0x80000000] + op1_reg = tmp_reg; + } + } else { +|.if X64 + | test Rq(op1_reg), Rq(op1_reg) + | js >1 + |.cold_code + |1: + if (tmp_reg != op1_reg) { + | mov Rq(tmp_reg), Rq(op1_reg) + } + | shr Rq(tmp_reg), 1 + | adc Rq(tmp_reg), 0 + if (dst_type == IR_DOUBLE) { + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(tmp_reg) + | vaddsd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), Rq(tmp_reg) + | addsd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + } + } else { + IR_ASSERT(dst_type == IR_FLOAT); + if (ctx->mflags & IR_X86_AVX) { + | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(tmp_reg) + | vaddss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + } else { + | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), Rq(tmp_reg) + | addss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) + } + } + | jmp >2 + |.code +|.endif + } + } + if (op1_reg != IR_REG_NONE) { bool src64 = 0; - if (IR_REG_SPILLED(op1_reg)) { - op1_reg = IR_REG_NUM(op1_reg); - ir_emit_load(ctx, src_type, op1_reg, insn->op1); - } if (IR_IS_TYPE_SIGNED(src_type)) { if (ir_type_size[src_type] < 4) { |.if X64 @@ -7462,7 +7613,6 @@ static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) || } |.endif } else { - // TODO: uint64_t -> double src64 = 1; } } @@ -7508,6 +7658,40 @@ static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) } |.endif } + |2: + if (sizeof(void*) == 4 && IR_IS_TYPE_UNSIGNED(src_type) && ir_type_size[src_type] >= sizeof(void*)) { + if (dst_type == IR_DOUBLE) { + uint32_t c = (sizeof(void*) == 4) ? 0x41e00000 : 0x43e00000; + if (!data->u2d_const) { + data->u2d_const = 1; + ir_rodata(ctx); + |.align 8 + |->u2d_const: + |.dword 0, c + |.code + } + if (ctx->mflags & IR_X86_AVX) { + | vaddsd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [->u2d_const] + } else { + | addsd xmm(def_reg-IR_REG_FP_FIRST), qword [->u2d_const] + } + } else { + uint32_t c = (sizeof(void*) == 4) ? 0x4f000000 : 0x5f000000; + if (!data->u2f_const) { + data->u2f_const = 1; + ir_rodata(ctx); + |.align 4 + |->u2f_const: + |.dword c + |.code + } + if (ctx->mflags & IR_X86_AVX) { + | vaddss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [->u2f_const] + } else { + | addss xmm(def_reg-IR_REG_FP_FIRST), dword [->u2f_const] + } + } + } } else if (IR_IS_CONST_REF(insn->op1)) { IR_ASSERT(0); } else { @@ -7625,7 +7809,7 @@ static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) |.endif } } else if (IR_IS_CONST_REF(insn->op1)) { - int label = ir_const_label(ctx, insn->op1); + int label = ir_get_const_label(ctx, insn->op1); if (!dst64) { if (src_type == IR_DOUBLE) { @@ -7746,7 +7930,7 @@ static void ir_emit_fp2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) } } } else if (IR_IS_CONST_REF(insn->op1)) { - int label = ir_const_label(ctx, insn->op1); + int label = ir_get_const_label(ctx, insn->op1); if (src_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { @@ -8429,7 +8613,7 @@ static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) if (ctx->flags & IR_USE_FRAME_POINTER) { fp = IR_REG_FRAME_POINTER; - reg_save_area_offset = -(ctx->stack_frame_size - ctx->stack_frame_alignment - ctx->locals_area_size); + reg_save_area_offset = -(ctx->stack_frame_size - ctx->locals_area_size); overflow_arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size; } else { fp = IR_REG_STACK_POINTER; @@ -8588,11 +8772,11 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) } | add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*)) } else { - int size = (uint32_t)insn->op3 >> 3; + int size = IR_VA_ARG_SIZE(insn->op3); if (def_reg != IR_REG_NONE) { IR_ASSERT(type == IR_ADDR); - int align = 1U << (insn->op3 & 0x7); + int align = IR_VA_ARG_ALIGN(insn->op3); if (align > (int)sizeof(void*)) { | add Ra(tmp_reg), (align-1) @@ -8604,7 +8788,7 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) } #endif | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) - if (def_reg && IR_REG_SPILLED(ctx->regs[def][0])) { + if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } #elif defined(IR_TARGET_X64) @@ -8632,8 +8816,8 @@ static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) if (insn->op3) { /* long struct arguemnt */ IR_ASSERT(type == IR_ADDR); - int align = 1U << (insn->op3 & 0x7); - int size = (uint32_t)insn->op3 >> 3; + int align = IR_VA_ARG_ALIGN(insn->op3); + int size = IR_VA_ARG_SIZE(insn->op3); | mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))] if (align > (int)sizeof(void*)) { @@ -9701,6 +9885,19 @@ static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_reg op2_reg = ctx->regs[def][2]; if (IR_IS_CONST_REF(insn->op2)) { + if (ctx->ir_base[insn->op2].op == IR_LABEL) { + if (!data->resolved_label_syms) { + data->resolved_label_syms = 1; + ir_resolve_label_syms(ctx); + } + + uint32_t target = ctx->ir_base[insn->op2].val.u32_hi; + target = ir_skip_empty_target_blocks(ctx, target); + + | jmp =>target + return; + } + void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { @@ -10478,6 +10675,7 @@ static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_re { ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = IR_SPILL_POS_TO_OFFSET(offset); IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE); if (IR_IS_TYPE_INT(type)) { @@ -10518,6 +10716,7 @@ static void ir_emit_load_params(ir_ctx *ctx) const int8_t *int_reg_params = _ir_int_reg_params; const int8_t *fp_reg_params = _ir_fp_reg_params; int32_t stack_offset = 0; + int32_t stack_start = 0; #ifdef IR_TARGET_X86 if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { @@ -10529,9 +10728,11 @@ static void ir_emit_load_params(ir_ctx *ctx) #endif if (ctx->flags & IR_USE_FRAME_POINTER) { - stack_offset = sizeof(void*) * 2; /* skip old frame pointer and return address */ + /* skip old frame pointer and return address */ + stack_start = sizeof(void*) * 2 + ctx->stack_frame_size; } else { - stack_offset = sizeof(void*) + ctx->stack_frame_size + ctx->call_stack_size; /* skip return address */ + /* skip return address */ + stack_start = sizeof(void*) + ctx->stack_frame_size; } n = use_list->count; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { @@ -10573,12 +10774,9 @@ static void ir_emit_load_params(ir_ctx *ctx) if (ctx->vregs[use]) { dst_reg = IR_REG_NUM(ctx->regs[use][0]); IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE || - stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos + - ((ctx->flags & IR_USE_FRAME_POINTER) ? - -(ctx->stack_frame_size - ctx->stack_frame_alignment) : - ctx->call_stack_size)); + stack_start + stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos); if (src_reg != dst_reg) { - ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset); + ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_start + stack_offset); } if (dst_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[use][0])) { ir_emit_store(ctx, insn->type, use, dst_reg); @@ -10665,7 +10863,7 @@ static void ir_fix_param_spills(ir_ctx *ctx) if (ctx->flags & IR_USE_FRAME_POINTER) { /* skip old frame pointer and return address */ - stack_start = sizeof(void*) * 2 + (ctx->stack_frame_size - ctx->stack_frame_alignment); + stack_start = sizeof(void*) * 2 + ctx->stack_frame_size; } else { /* skip return address */ stack_start = sizeof(void*) + ctx->stack_frame_size; @@ -10786,6 +10984,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) case IR_MERGE: case IR_LOOP_BEGIN: case IR_LOOP_END: + case IR_IGOTO_DUP: break; #ifndef IR_REG_FP_RET1 case IR_CALL: @@ -10810,7 +11009,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) IR_REGSET_EXCL(available, reg); ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; } else if (def_flags & IR_USE_MUST_BE_IN_REG) { - if (insn->op == IR_VLOAD + if ((insn->op == IR_VLOAD || insn->op == IR_VLOAD_v) && ctx->live_intervals[ctx->vregs[i]] && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1 && ir_is_same_mem_var(ctx, i, ctx->ir_base[insn->op2].op3)) { @@ -10850,7 +11049,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { use = *p; use_insn = &ctx->ir_base[use]; - if (use_insn->op == IR_VLOAD) { + if (use_insn->op == IR_VLOAD || use_insn->op == IR_VLOAD_v) { if (ctx->vregs[use] && !ctx->live_intervals[ctx->vregs[use]]) { ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); @@ -10861,7 +11060,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) ival->vreg = ctx->vregs[use]; ival->stack_spill_pos = stack_spill_pos; } - } else if (use_insn->op == IR_VSTORE) { + } else if (use_insn->op == IR_VSTORE || use_insn->op == IR_VSTORE_v) { if (!IR_IS_CONST_REF(use_insn->op3) && ctx->vregs[use_insn->op3] && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) { @@ -11006,7 +11205,6 @@ void ir_fix_stack_frame(ir_ctx *ctx) ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*)); ctx->stack_frame_size += additional_size; - ctx->stack_frame_alignment = 0; ctx->call_stack_size = 0; if (ctx->flags2 & IR_16B_FRAME_ALIGNMENT) { @@ -11014,12 +11212,10 @@ void ir_fix_stack_frame(ir_ctx *ctx) if (!(ctx->flags & IR_FUNCTION)) { while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { ctx->stack_frame_size += sizeof(void*); - ctx->stack_frame_alignment += sizeof(void*); } } else if (ctx->flags & IR_USE_FRAME_POINTER) { while (IR_ALIGNED_SIZE(ctx->stack_frame_size + sizeof(void*) * 2, 16) != ctx->stack_frame_size + sizeof(void*) * 2) { ctx->stack_frame_size += sizeof(void*); - ctx->stack_frame_alignment += sizeof(void*); } } else { if (!(ctx->flags & IR_NO_STACK_COMBINE)) { @@ -11028,7 +11224,6 @@ void ir_fix_stack_frame(ir_ctx *ctx) while (IR_ALIGNED_SIZE(ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*), 16) != ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*)) { ctx->stack_frame_size += sizeof(void*); - ctx->stack_frame_alignment += sizeof(void*); } } } @@ -11061,6 +11256,8 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) int ret; void *entry; size_t size; + ir_ref igoto_dup_ref = IR_UNUSED; + uint32_t igoto_dup_block = 0; data.ra_data.unused_slot_4 = 0; data.ra_data.unused_slot_2 = 0; @@ -11073,11 +11270,13 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) data.double_abs_const = 0; data.float_abs_const = 0; data.double_zero_const = 0; + data.u2d_const = 0; + data.u2f_const = 0; + data.resolved_label_syms = 0; ctx->data = &data; if (!ctx->live_intervals) { ctx->stack_frame_size = 0; - ctx->stack_frame_alignment = 0; ctx->call_stack_size = 0; ctx->used_preserved_regs = 0; ir_allocate_unique_spill_slots(ctx); @@ -11099,7 +11298,6 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) } ctx->stack_frame_size = ctx->fixed_stack_frame_size; ctx->call_stack_size = ctx->fixed_call_stack_size; - ctx->stack_frame_alignment = 0; } Dst = &data.dasm_state; @@ -11420,6 +11618,35 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) case IR_TAILCALL: ir_emit_tailcall(ctx, i, insn); break; + case IR_IGOTO_DUP: + if (bb->flags & IR_BB_DESSA_MOVES) { + ir_emit_dessa_moves(ctx, b, bb); + } + IR_ASSERT(!igoto_dup_ref && !igoto_dup_block); + igoto_dup_ref = i; + igoto_dup_block = b; + b = ctx->cfg_edges[bb->successors]; + bb = &ctx->cfg_blocks[b]; + i = bb->start; + insn = &ctx->ir_base[i]; + rule = &ctx->rules[i]; + break; + case IR_IGOTO: + if ((ctx->ir_base[insn->op1].op == IR_MERGE || ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN) + && (ctx->rules[ctx->ir_base[insn->op1].op1] & IR_RULE_MASK) == IR_IGOTO_DUP + && igoto_dup_ref) { + ir_emit_ijmp(ctx, i, insn); + b = igoto_dup_block; + bb = &ctx->cfg_blocks[b]; + i = igoto_dup_ref; + insn = &ctx->ir_base[i]; + rule = &ctx->rules[i]; + igoto_dup_block= 0; + igoto_dup_ref = 0; + break; + } + IR_ASSERT(!igoto_dup_ref && !igoto_dup_block); + IR_FALLTHROUGH; case IR_IJMP: ir_emit_ijmp(ctx, i, insn); break; @@ -11449,6 +11676,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) ir_emit_vaddr(ctx, i, insn); break; case IR_VLOAD: + case IR_VLOAD_v: ir_emit_vload(ctx, i, insn); break; case IR_VSTORE_INT: @@ -11691,6 +11919,28 @@ next_block:; } while (i != 0); } + if ((ctx->flags2 & IR_HAS_BLOCK_ADDR) && ctx->loader && ctx->loader->add_label) { + for (b = 1, bb = &ctx->cfg_blocks[b]; b <= ctx->cfg_blocks_count; bb++, b++) { + ir_insn *insn = &ctx->ir_base[bb->start]; + + if (insn->op == IR_BEGIN && insn->op2) { + IR_ASSERT(ctx->ir_base[insn->op2].op == IR_LABEL); + ctx->ir_base[insn->op2].val.u32_hi = 0; + ctx->loader->add_label(ctx->loader, ir_get_str(ctx, ctx->ir_base[insn->op2].val.str), + (char*)entry + dasm_getpclabel(&data.dasm_state, ir_skip_empty_target_blocks(ctx, b))); + } + } + } else if (data.resolved_label_syms) { + for (b = 1, bb = &ctx->cfg_blocks[b]; b <= ctx->cfg_blocks_count; bb++, b++) { + ir_insn *insn = &ctx->ir_base[bb->start]; + + if (insn->op == IR_BEGIN && insn->op2) { + IR_ASSERT(ctx->ir_base[insn->op2].op == IR_LABEL); + ctx->ir_base[insn->op2].val.u32_hi = 0; + } + } + } + dasm_free(&data.dasm_state); ir_mem_flush(entry, size);