diff --git a/src/devices/cpu/drcbec.cpp b/src/devices/cpu/drcbec.cpp index 14cac9e550136..143fbfeb2483e 100644 --- a/src/devices/cpu/drcbec.cpp +++ b/src/devices/cpu/drcbec.cpp @@ -181,6 +181,7 @@ enum // compute C and V flags for 32-bit add/subtract #define FLAGS32_C_ADD(a,b) ((uint32_t)~(a) < (uint32_t)(b)) #define FLAGS32_C_SUB(a,b) ((uint32_t)(b) > (uint32_t)(a)) +#define FLAGS32_C_SUBC(a,b,c) (((uint32_t)(c) != 0 && ((uint32_t)(b) + (uint32_t)(c)) == 0) || (uint32_t)(b) + (uint32_t)(c) > (uint32_t)(a)) #define FLAGS32_V_SUB(r,a,b) (((((a) ^ (b)) & ((a) ^ (r))) >> 30) & FLAG_V) #define FLAGS32_V_ADD(r,a,b) (((~((a) ^ (b)) & ((a) ^ (r))) >> 30) & FLAG_V) @@ -188,10 +189,12 @@ enum #define FLAGS32_NZ(v) ((((v) >> 28) & FLAG_S) | (((uint32_t)(v) == 0) << 2)) #define FLAGS32_NZCV_ADD(r,a,b) (FLAGS32_NZ(r) | FLAGS32_C_ADD(a,b) | FLAGS32_V_ADD(r,a,b)) #define FLAGS32_NZCV_SUB(r,a,b) (FLAGS32_NZ(r) | FLAGS32_C_SUB(a,b) | FLAGS32_V_SUB(r,a,b)) +#define FLAGS32_NZCV_SUBC(r,a,b,c) (FLAGS32_NZ(r) | FLAGS32_C_SUBC(a,b,c) | FLAGS32_V_SUB(r,a,b)) // compute C and V flags for 64-bit add/subtract #define FLAGS64_C_ADD(a,b) ((uint64_t)~(a) < (uint64_t)(b)) #define FLAGS64_C_SUB(a,b) ((uint64_t)(b) > (uint64_t)(a)) +#define FLAGS64_C_SUBC(a,b,c) (((uint64_t)(c) != 0 && ((uint64_t)(b) + (uint64_t)(c)) == 0) || (uint64_t)(b) + (uint64_t)(c) > (uint64_t)(a)) #define FLAGS64_V_SUB(r,a,b) (((((a) ^ (b)) & ((a) ^ (r))) >> 62) & FLAG_V) #define FLAGS64_V_ADD(r,a,b) (((~((a) ^ (b)) & ((a) ^ (r))) >> 62) & FLAG_V) @@ -199,6 +202,7 @@ enum #define FLAGS64_NZ(v) ((((v) >> 60) & FLAG_S) | (((uint64_t)(v) == 0) << 2)) #define FLAGS64_NZCV_ADD(r,a,b) (FLAGS64_NZ(r) | FLAGS64_C_ADD(a,b) | FLAGS64_V_ADD(r,a,b)) #define FLAGS64_NZCV_SUB(r,a,b) (FLAGS64_NZ(r) | FLAGS64_C_SUB(a,b) | FLAGS64_V_SUB(r,a,b)) +#define FLAGS64_NZCV_SUBC(r,a,b,c) (FLAGS64_NZ(r) | FLAGS64_C_SUBC(a,b,c) | FLAGS64_V_SUB(r,a,b)) @@ -359,9 +363,10 @@ void drcbe_c::generate(drcuml_block &block, const instruction *instlist, uint32_ m_labels.set_codeptr(inst.param(0).label(), (drccodeptr)dst); break; - // ignore COMMENT and NOP opcodes + // ignore COMMENT, NOP, and BREAK opcodes case OP_COMMENT: case OP_NOP: + case OP_BREAK: break; // when we hit a MAPVAR opcode, log the change for the current PC @@ -518,6 +523,10 @@ int drcbe_c::execute(code_handle &entry) // these opcodes should be processed at compile-time only fatalerror("Unexpected opcode\n"); + case MAKE_OPCODE_SHORT(OP_BREAK, 4, 0): + osd_break_into_debugger("break from drc"); + break; + case MAKE_OPCODE_SHORT(OP_DEBUG, 4, 0): // DEBUG pc if (m_device.machine().debug_flags & DEBUG_FLAG_CALL_HOOK) m_device.debug()->instruction_hook(PARAM0); @@ -628,6 +637,11 @@ int drcbe_c::execute(code_handle &entry) PARAM0 = flags & PARAM1; break; + case MAKE_OPCODE_SHORT(OP_SETFLGS, 4, 0): // SETFLGS src + case MAKE_OPCODE_SHORT(OP_SETFLGS, 4, 1): + flags = PARAM0; + break; + case MAKE_OPCODE_SHORT(OP_SAVE, 4, 0): // SAVE dst *inst[0].state = m_state; inst[0].state->flags = flags; @@ -826,7 +840,8 @@ int drcbe_c::execute(code_handle &entry) m_space[PARAM3]->write_dword(PARAM0, PARAM1, PARAM2); break; - case MAKE_OPCODE_SHORT(OP_CARRY, 4, 1): // CARRY src,bitnum + case MAKE_OPCODE_SHORT(OP_CARRY, 4, 0): // CARRY src,bitnum + case MAKE_OPCODE_SHORT(OP_CARRY, 4, 1): flags = (flags & ~FLAG_C) | ((PARAM0 >> (PARAM1 & 31)) & FLAG_C); break; @@ -930,16 +945,7 @@ int drcbe_c::execute(code_handle &entry) case MAKE_OPCODE_SHORT(OP_SUBB, 4, 1): temp32 = PARAM1 - PARAM2 - (flags & FLAG_C); - temp64 = (uint64_t)PARAM1 - (uint64_t)PARAM2 - (uint64_t)(flags & FLAG_C); - if (PARAM2 + 1 != 0) - flags = FLAGS32_NZCV_SUB(temp32, PARAM1, PARAM2 + (flags & FLAG_C)); - else - { - flags = FLAGS32_NZCV_SUB(temp32, PARAM1 - (flags & FLAG_C), PARAM2); - flags &= ~(FLAG_C | FLAG_V); - flags |= ((temp64>>32) & 1) ? FLAG_C : 0; - flags |= (((PARAM1) ^ (PARAM2)) & ((PARAM1) ^ (temp64)) & 0x80000000) ? FLAG_V : 0; - } + flags = FLAGS32_NZCV_SUBC(temp32, PARAM1, PARAM2, flags & FLAG_C); PARAM0 = temp32; break; @@ -964,6 +970,20 @@ int drcbe_c::execute(code_handle &entry) flags |= FLAG_V; break; + case MAKE_OPCODE_SHORT(OP_MULULW, 4, 0): // MULULW dst,src1,src2[,f] + temp64 = mulu_32x32(PARAM1, PARAM2); + PARAM0 = (uint32_t)temp64; + break; + + case MAKE_OPCODE_SHORT(OP_MULULW, 4, 1): + temp64 = mulu_32x32(PARAM1, PARAM2); + temp32 = (uint32_t)temp64; + flags = FLAGS32_NZ(temp32); + PARAM0 = temp32; + if (temp64 > temp32) + flags |= FLAG_V; + break; + case MAKE_OPCODE_SHORT(OP_MULS, 4, 0): // MULS dst,edst,src1,src2[,f] temp64 = mul_32x32(PARAM2, PARAM3); PARAM1 = temp64 >> 32; @@ -972,11 +992,24 @@ int drcbe_c::execute(code_handle &entry) case MAKE_OPCODE_SHORT(OP_MULS, 4, 1): temp64 = mul_32x32(PARAM2, PARAM3); - temp32 = (int32_t)temp64; - flags = FLAGS32_NZ(temp32); + flags = FLAGS64_NZ(temp64); PARAM1 = temp64 >> 32; PARAM0 = (uint32_t)temp64; - if (temp64 != (int32_t)temp64) + if ((int64_t)temp64 != (int32_t)temp64) + flags |= FLAG_V; + break; + + case MAKE_OPCODE_SHORT(OP_MULSLW, 4, 0): // MULSLW dst,src1,src2[,f] + temp64 = mul_32x32(PARAM1, PARAM2); + PARAM0 = (int32_t)temp64; + break; + + case MAKE_OPCODE_SHORT(OP_MULSLW, 4, 1): + temp64 = mul_32x32(PARAM1, PARAM2); + temp32 = (int32_t)temp64; + flags = FLAGS32_NZ(temp32); + PARAM0 = temp32; + if ((int64_t)temp64 != (int32_t)temp64) flags |= FLAG_V; break; @@ -1084,8 +1117,8 @@ int drcbe_c::execute(code_handle &entry) case MAKE_OPCODE_SHORT(OP_BSWAP, 4, 1): temp32 = PARAM1; - flags = FLAGS32_NZ(temp32); PARAM0 = swapendian_int32(temp32); + flags = FLAGS32_NZ(PARAM0); break; case MAKE_OPCODE_SHORT(OP_SHL, 4, 0): // SHL dst,src,count[,f] @@ -1154,6 +1187,8 @@ int drcbe_c::execute(code_handle &entry) PARAM0 = (PARAM1 << shift) | ((flags & FLAG_C) << (shift - 1)) | (PARAM1 >> (33 - shift)); else if (shift == 1) PARAM0 = (PARAM1 << shift) | (flags & FLAG_C); + else + PARAM0 = PARAM1; break; case MAKE_OPCODE_SHORT(OP_ROLC, 4, 1): @@ -1164,8 +1199,11 @@ int drcbe_c::execute(code_handle &entry) temp32 = (PARAM1 << shift) | (flags & FLAG_C); else temp32 = PARAM1; - flags = FLAGS32_NZ(temp32); - if (shift != 0) flags |= ((PARAM1 << (shift - 1)) >> 31) & FLAG_C; + if (shift != 0) + { + flags = FLAGS32_NZ(temp32); + flags |= ((PARAM1 << (shift - 1)) >> 31) & FLAG_C; + } PARAM0 = temp32; break; @@ -1176,8 +1214,11 @@ int drcbe_c::execute(code_handle &entry) case MAKE_OPCODE_SHORT(OP_ROR, 4, 1): shift = PARAM2 & 31; temp32 = rotr_32(PARAM1, shift); - flags = FLAGS32_NZ(temp32); - if (shift != 0) flags |= (PARAM1 >> (shift - 1)) & FLAG_C; + if (shift != 0) + { + flags = FLAGS32_NZ(temp32); + flags |= (PARAM1 >> (shift - 1)) & FLAG_C; + } PARAM0 = temp32; break; @@ -1187,6 +1228,8 @@ int drcbe_c::execute(code_handle &entry) PARAM0 = (PARAM1 >> shift) | (((flags & FLAG_C) << 31) >> (shift - 1)) | (PARAM1 << (33 - shift)); else if (shift == 1) PARAM0 = (PARAM1 >> shift) | ((flags & FLAG_C) << 31); + else + PARAM0 = PARAM1; break; case MAKE_OPCODE_SHORT(OP_RORC, 4, 1): @@ -1197,8 +1240,11 @@ int drcbe_c::execute(code_handle &entry) temp32 = (PARAM1 >> shift) | ((flags & FLAG_C) << 31); else temp32 = PARAM1; - flags = FLAGS32_NZ(temp32); - if (shift != 0) flags |= (PARAM1 >> (shift - 1)) & FLAG_C; + if (shift != 0) + { + flags = FLAGS32_NZ(temp32); + flags |= (PARAM1 >> (shift - 1)) & FLAG_C; + } PARAM0 = temp32; break; @@ -1454,6 +1500,7 @@ int drcbe_c::execute(code_handle &entry) break; case MAKE_OPCODE_SHORT(OP_CARRY, 8, 0): // DCARRY src,bitnum + case MAKE_OPCODE_SHORT(OP_CARRY, 8, 1): flags = (flags & ~FLAG_C) | ((DPARAM0 >> (DPARAM1 & 63)) & FLAG_C); break; @@ -1559,10 +1606,7 @@ int drcbe_c::execute(code_handle &entry) case MAKE_OPCODE_SHORT(OP_SUBB, 8, 1): temp64 = DPARAM1 - DPARAM2 - (flags & FLAG_C); - if (DPARAM2 + 1 != 0) - flags = FLAGS64_NZCV_SUB(temp64, DPARAM1, DPARAM2 + (flags & FLAG_C)); - else - flags = FLAGS64_NZCV_SUB(temp64, DPARAM1 - (flags & FLAG_C), DPARAM2); + flags = FLAGS64_NZCV_SUBC(temp64, DPARAM1, DPARAM2, flags & FLAG_C); DPARAM0 = temp64; break; @@ -1579,6 +1623,15 @@ int drcbe_c::execute(code_handle &entry) flags = dmulu(*inst[0].puint64, *inst[1].puint64, DPARAM2, DPARAM3, true); break; + case MAKE_OPCODE_SHORT(OP_MULULW, 8, 0): // DMULULW dst,src1,src2[,f] + dmulu(*inst[0].puint64, *inst[0].puint64, DPARAM1, DPARAM2, false); + break; + + case MAKE_OPCODE_SHORT(OP_MULULW, 8, 1): + flags = dmulu(*inst[0].puint64, *inst[0].puint64, DPARAM1, DPARAM2, true); + flags = FLAGS64_NZ(DPARAM0) | (flags & FLAG_V); + break; + case MAKE_OPCODE_SHORT(OP_MULS, 8, 0): // DMULS dst,edst,src1,src2[,f] dmuls(*inst[0].puint64, *inst[1].puint64, DPARAM2, DPARAM3, false); break; @@ -1587,6 +1640,15 @@ int drcbe_c::execute(code_handle &entry) flags = dmuls(*inst[0].puint64, *inst[1].puint64, DPARAM2, DPARAM3, true); break; + case MAKE_OPCODE_SHORT(OP_MULSLW, 8, 0): // DMULSLW dst,src1,src2[,f] + dmuls(*inst[0].puint64, *inst[0].puint64, DPARAM1, DPARAM2, false); + break; + + case MAKE_OPCODE_SHORT(OP_MULSLW, 8, 1): + flags = dmuls(*inst[0].puint64, *inst[0].puint64, DPARAM1, DPARAM2, true); + flags = FLAGS64_NZ(DPARAM0) | (flags & FLAG_V); + break; + case MAKE_OPCODE_SHORT(OP_DIVU, 8, 0): // DDIVU dst,edst,src1,src2[,f] if (DPARAM3 != 0) { @@ -1691,8 +1753,8 @@ int drcbe_c::execute(code_handle &entry) case MAKE_OPCODE_SHORT(OP_BSWAP, 8, 1): temp64 = DPARAM1; - flags = FLAGS64_NZ(temp64); DPARAM0 = swapendian_int64(temp64); + flags = FLAGS64_NZ(DPARAM0); break; case MAKE_OPCODE_SHORT(OP_SHL, 8, 0): // DSHL dst,src,count[,f] @@ -1702,8 +1764,11 @@ int drcbe_c::execute(code_handle &entry) case MAKE_OPCODE_SHORT(OP_SHL, 8, 1): shift = DPARAM2 & 63; temp64 = DPARAM1 << shift; - flags = FLAGS64_NZ(temp64); - if (shift != 0) flags |= ((DPARAM1 << (shift - 1)) >> 63) & FLAG_C; + if (shift != 0) + { + flags = FLAGS64_NZ(temp64); + flags |= ((DPARAM1 << (shift - 1)) >> 63) & FLAG_C; + } DPARAM0 = temp64; break; @@ -1714,8 +1779,11 @@ int drcbe_c::execute(code_handle &entry) case MAKE_OPCODE_SHORT(OP_SHR, 8, 1): shift = DPARAM2 & 63; temp64 = DPARAM1 >> shift; - flags = FLAGS64_NZ(temp64); - if (shift != 0) flags |= (DPARAM1 >> (shift - 1)) & FLAG_C; + if (shift != 0) + { + flags = FLAGS64_NZ(temp64); + flags |= (DPARAM1 >> (shift - 1)) & FLAG_C; + } DPARAM0 = temp64; break; @@ -1725,9 +1793,12 @@ int drcbe_c::execute(code_handle &entry) case MAKE_OPCODE_SHORT(OP_SAR, 8, 1): shift = DPARAM2 & 63; - temp64 = (int32_t)DPARAM1 >> shift; - flags = FLAGS64_NZ(temp64); - if (shift != 0) flags |= (DPARAM1 >> (shift - 1)) & FLAG_C; + temp64 = (int64_t)DPARAM1 >> shift; + if (shift != 0) + { + flags = FLAGS64_NZ(temp64); + flags |= (DPARAM1 >> (shift - 1)) & FLAG_C; + } DPARAM0 = temp64; break; @@ -1738,8 +1809,11 @@ int drcbe_c::execute(code_handle &entry) case MAKE_OPCODE_SHORT(OP_ROL, 8, 1): shift = DPARAM2 & 63; temp64 = rotl_64(DPARAM1, shift); - flags = FLAGS64_NZ(temp64); - if (shift != 0) flags |= ((DPARAM1 << (shift - 1)) >> 63) & FLAG_C; + if (shift != 0) + { + flags = FLAGS64_NZ(temp64); + flags |= ((DPARAM1 << (shift - 1)) >> 63) & FLAG_C; + } DPARAM0 = temp64; break; @@ -1749,6 +1823,8 @@ int drcbe_c::execute(code_handle &entry) DPARAM0 = (DPARAM1 << shift) | ((flags & FLAG_C) << (shift - 1)) | (DPARAM1 >> (65 - shift)); else if (shift == 1) DPARAM0 = (DPARAM1 << shift) | (flags & FLAG_C); + else + DPARAM0 = DPARAM1; break; case MAKE_OPCODE_SHORT(OP_ROLC, 8, 1): @@ -1759,8 +1835,11 @@ int drcbe_c::execute(code_handle &entry) temp64 = (DPARAM1 << shift) | (flags & FLAG_C); else temp64 = DPARAM1; - flags = FLAGS64_NZ(temp64); - if (shift != 0) flags |= ((DPARAM1 << (shift - 1)) >> 63) & FLAG_C; + if (shift != 0) + { + flags = FLAGS64_NZ(temp64); + flags |= ((DPARAM1 << (shift - 1)) >> 63) & FLAG_C; + } DPARAM0 = temp64; break; @@ -1771,8 +1850,11 @@ int drcbe_c::execute(code_handle &entry) case MAKE_OPCODE_SHORT(OP_ROR, 8, 1): shift = DPARAM2 & 63; temp64 = rotr_64(DPARAM1, shift); - flags = FLAGS64_NZ(temp64); - if (shift != 0) flags |= (DPARAM1 >> (shift - 1)) & FLAG_C; + if (shift != 0) + { + flags = FLAGS64_NZ(temp64); + flags |= (DPARAM1 >> (shift - 1)) & FLAG_C; + } DPARAM0 = temp64; break; @@ -1782,6 +1864,8 @@ int drcbe_c::execute(code_handle &entry) DPARAM0 = (DPARAM1 >> shift) | ((((uint64_t)flags & FLAG_C) << 63) >> (shift - 1)) | (DPARAM1 << (65 - shift)); else if (shift == 1) DPARAM0 = (DPARAM1 >> shift) | (((uint64_t)flags & FLAG_C) << 63); + else + DPARAM0 = DPARAM1; break; case MAKE_OPCODE_SHORT(OP_RORC, 8, 1): @@ -1792,8 +1876,11 @@ int drcbe_c::execute(code_handle &entry) temp64 = (DPARAM1 >> shift) | (((uint64_t)flags & FLAG_C) << 63); else temp64 = DPARAM1; - flags = FLAGS64_NZ(temp64); - if (shift != 0) flags |= (DPARAM1 >> (shift - 1)) & FLAG_C; + if (shift != 0) + { + flags = FLAGS64_NZ(temp64); + flags |= (DPARAM1 >> (shift - 1)) & FLAG_C; + } DPARAM0 = temp64; break; @@ -2108,7 +2195,6 @@ void drcbe_c::output_parameter(drcbec_instruction **dstptr, void **immedptr, int { drcbec_instruction *dst = *dstptr; void *immed = *immedptr; - parameter temp_param; switch (param.type()) { @@ -2145,8 +2231,7 @@ void drcbe_c::output_parameter(drcbec_instruction **dstptr, void **immedptr, int // convert mapvars to immediates case parameter::PTYPE_MAPVAR: - temp_param = m_map.get_last_value(param.mapvar()); - return output_parameter(dstptr, immedptr, size, temp_param); + return output_parameter(dstptr, immedptr, size, param.mapvar()); // memory just points to the memory case parameter::PTYPE_MEMORY: @@ -2239,7 +2324,7 @@ int drcbe_c::dmulu(uint64_t &dstlo, uint64_t &dsthi, uint64_t src1, uint64_t src // store the results dsthi = hi; dstlo = lo; - return ((hi >> 60) & FLAG_S) | ((dsthi != 0) << 1); + return ((hi >> 60) & FLAG_S) | ((hi != 0) << 1); } @@ -2290,7 +2375,7 @@ int drcbe_c::dmuls(uint64_t &dstlo, uint64_t &dsthi, int64_t src1, int64_t src2, // store the results dsthi = hi; dstlo = lo; - return ((hi >> 60) & FLAG_S) | ((dsthi != ((int64_t)lo >> 63)) << 1); + return ((hi >> 60) & FLAG_S) | ((hi != ((int64_t)lo >> 63)) << 1); } uint32_t drcbe_c::tzcount32(uint32_t value) diff --git a/src/devices/cpu/drcbex64.cpp b/src/devices/cpu/drcbex64.cpp index 96f4b9142b43b..ac13a6354aa9f 100644 --- a/src/devices/cpu/drcbex64.cpp +++ b/src/devices/cpu/drcbex64.cpp @@ -324,6 +324,7 @@ const drcbe_x64::opcode_table_entry drcbe_x64::s_opcode_table_source[] = // Control Flow Operations { uml::OP_NOP, &drcbe_x64::op_nop }, // NOP + { uml::OP_BREAK, &drcbe_x64::op_break }, // BREAK { uml::OP_DEBUG, &drcbe_x64::op_debug }, // DEBUG pc { uml::OP_EXIT, &drcbe_x64::op_exit }, // EXIT src1[,c] { uml::OP_HASHJMP, &drcbe_x64::op_hashjmp }, // HASHJMP mode,pc,handle @@ -339,6 +340,7 @@ const drcbe_x64::opcode_table_entry drcbe_x64::s_opcode_table_source[] = { uml::OP_GETFMOD, &drcbe_x64::op_getfmod }, // GETFMOD dst { uml::OP_GETEXP, &drcbe_x64::op_getexp }, // GETEXP dst { uml::OP_GETFLGS, &drcbe_x64::op_getflgs }, // GETFLGS dst[,f] + { uml::OP_SETFLGS, &drcbe_x64::op_setflgs }, // SETFLGS src { uml::OP_SAVE, &drcbe_x64::op_save }, // SAVE dst { uml::OP_RESTORE, &drcbe_x64::op_restore }, // RESTORE dst @@ -362,7 +364,9 @@ const drcbe_x64::opcode_table_entry drcbe_x64::s_opcode_table_source[] = { uml::OP_SUBB, &drcbe_x64::op_subc }, // SUBB dst,src1,src2[,f] { uml::OP_CMP, &drcbe_x64::op_cmp }, // CMP src1,src2[,f] { uml::OP_MULU, &drcbe_x64::op_mulu }, // MULU dst,edst,src1,src2[,f] + { uml::OP_MULULW, &drcbe_x64::op_mululw }, // MULULW dst,src1,src2[,f] { uml::OP_MULS, &drcbe_x64::op_muls }, // MULS dst,edst,src1,src2[,f] + { uml::OP_MULSLW, &drcbe_x64::op_mulslw }, // MULSLW dst,src1,src2[,f] { uml::OP_DIVU, &drcbe_x64::op_divu }, // DIVU dst,edst,src1,src2[,f] { uml::OP_DIVS, &drcbe_x64::op_divs }, // DIVS dst,edst,src1,src2[,f] { uml::OP_AND, &drcbe_x64::op_and }, // AND dst,src1,src2[,f] @@ -1059,33 +1063,188 @@ void drcbe_x64::alu_op_param(Assembler &a, Inst::Id const opcode, Operand const // use temporary register for memory,memory Gp const tmp = is64 ? param.select_register(rax) : param.select_register(eax); - a.mov(tmp, MABS(param.memory())); // mov tmp,param - a.emit(opcode, dst, tmp); // op [dst],tmp + a.mov(tmp, MABS(param.memory())); + a.emit(opcode, dst, tmp); } else if (opcode != Inst::kIdTest) // most instructions are register,memory - a.emit(opcode, dst, MABS(param.memory())); // op dst,[param] + a.emit(opcode, dst, MABS(param.memory())); else // test instruction requires memory,register - a.emit(opcode, MABS(param.memory()), dst); // op [param],dst + a.emit(opcode, MABS(param.memory()), dst); } else if (param.is_int_register()) { Gp const src = Gp::fromTypeAndId(is64 ? RegType::kX86_Gpq : RegType::kX86_Gpd, param.ireg()); - a.emit(opcode, dst, src); // op dst,param + a.emit(opcode, dst, src); } } -void drcbe_x64::shift_op_param(Assembler &a, Inst::Id const opcode, Operand const &dst, be_parameter const ¶m) +void drcbe_x64::calculate_status_flags(Assembler &a, uint32_t instsize, Operand const &dst, u8 flags) +{ + // calculate status flags in a way that does not modify any other status flags + uint32_t flagmask = 0; + + // can't get FLAG_V from lahf so implement it using seto if needed in the future + if (flags & FLAG_C) flagmask |= 0x0100; + if (flags & FLAG_Z) flagmask |= 0x4000; + if (flags & FLAG_S) flagmask |= 0x8000; + if (flags & FLAG_U) flagmask |= 0x0400; + + if ((flags & (FLAG_Z | FLAG_S)) == flags) + { + Gp tempreg = r10; + Gp tempreg2 = r11; + + a.mov(tempreg, rax); + + if (dst.isMem()) + a.mov(tempreg2, dst.as()); + else + a.mov(tempreg2, dst.as().r64()); + + a.lahf(); + a.and_(rax, ~flagmask); + if (instsize == 4) + a.test(tempreg2.r32(), tempreg2.r32()); + else + a.test(tempreg2, tempreg2); + a.mov(tempreg2, rax); + + a.lahf(); + a.and_(rax, flagmask); + a.or_(rax, tempreg2); + a.sahf(); + + a.mov(rax, tempreg); + } + else + { + fatalerror("drcbe_x64::calculate_status_flags: unknown flag combination requested: %02x\n", flags); + } +} + +void drcbe_x64::calculate_status_flags_mul(Assembler &a, uint32_t instsize, asmjit::x86::Gp const &lo, asmjit::x86::Gp const &hi) +{ + Gp tempreg = r11; + Gp tempreg2 = r10; + + if (lo.id() == rax.id() || hi.id() == rax.id()) + a.mov(tempreg2, rax); + + a.seto(al); + a.movzx(rax, al); // clear out the rest of rax for lahf + a.mov(tempreg, rax); + + a.lahf(); + a.shl(tempreg, 16); + a.or_(tempreg, rax); + + if (hi.id() == rax.id()) + a.mov(rax, tempreg2); + + if (instsize == 4) + a.test(hi.r32(), hi.r32()); + else + a.test(hi, hi); + + a.lahf(); // will have the sign flag + upper half zero + a.mov(hi, rax); + + if (lo.id() == rax.id()) + a.mov(rax, tempreg2); + + if (instsize == 4) + a.test(lo.r32(), lo.r32()); + else + a.test(lo, lo); + + a.lahf(); // lower half zero + a.and_(rax, hi); // if top and bottom are zero then this will leave the zero flag + a.and_(rax, 0x4000); // zero + a.and_(hi, 0x8000); // sign + a.or_(rax, hi); // combine sign flag from top and zero flags for both + + a.and_(tempreg, ~(0x4000 | 0x8000)); + a.or_(tempreg, rax); + + // restore overflow flag + a.mov(rax, tempreg); + a.shr(rax, 16); + a.add(al, 0x7f); + + a.mov(rax, tempreg); + a.sahf(); +} + +void drcbe_x64::calculate_status_flags_mul_low(Assembler &a, uint32_t instsize, asmjit::x86::Gp const &lo) +{ + // calculate zero, sign flags based on the lower half of the result but keep the overflow from the multiplication + a.seto(dl); + + if (instsize == 4) + a.test(lo.r32(), lo.r32()); + else + a.test(lo, lo); + + // restore overflow flag + a.lahf(); + a.add(dl, 0x7f); + a.sahf(); +} + +void drcbe_x64::shift_op_param(Assembler &a, Inst::Id const opcode, size_t opsize, Operand const &dst, be_parameter const ¶m, bool update_flags) { - Operand shift = cl; if (param.is_immediate()) - shift = imm(param.immediate()); + { + if ((param.immediate() & (opsize * 8 - 1)) == 0) + return; + + a.emit(opcode, dst, imm(param.immediate())); + + if (update_flags) + calculate_status_flags(a, opsize, dst, FLAG_S | FLAG_Z); // calculate status flags but preserve carry + } else - mov_reg_param(a, ecx, param); + { + Label restore_flags = a.newLabel(); + Label end = a.newLabel(); + + Gp shift = cl; + + a.mov(r10, rax); + a.seto(al); + a.movzx(r11, al); + a.lahf(); // no status flags should change if shift is 0, so preserve flags + + mov_reg_param(a, shift, param); + + a.and_(shift, opsize * 8 - 1); + a.test(shift, shift); + a.short_().jz(restore_flags); + + a.sahf(); // restore flags to keep carry for rolc/rorc + a.mov(rax, r10); + + a.emit(opcode, dst, shift); + + if (update_flags) + calculate_status_flags(a, opsize, dst, FLAG_S | FLAG_Z); // calculate status flags but preserve carry + + a.short_().jmp(end); - a.emit(opcode, dst, shift); + a.bind(restore_flags); + + // restore overflow flag + a.add(r11.r32(), 0x7fffffff); + + // restore other flags + a.sahf(); + a.mov(rax, r10); + + a.bind(end); + } } void drcbe_x64::mov_reg_param(Assembler &a, Gp const ®, be_parameter const ¶m, bool const keepflags) @@ -1396,6 +1555,17 @@ void drcbe_x64::op_nop(Assembler &a, const instruction &inst) // nothing } +//------------------------------------------------- +// op_break - process a BREAK opcode +//------------------------------------------------- + +void drcbe_x64::op_break(Assembler &a, const instruction &inst) +{ + static const char *const message = "break from drc"; + mov_r64_imm(a, Gpq(REG_PARAM1), (uintptr_t)message); + smart_call_r64(a, (x86code *)(uintptr_t)&osd_break_into_debugger, rax); +} + //------------------------------------------------- // op_debug - process a DEBUG opcode @@ -1827,10 +1997,13 @@ void drcbe_x64::op_getflgs(Assembler &a, const instruction &inst) // pick a target register for the general case Gp dstreg = dstp.select_register(eax); + a.lahf(); + a.mov(r10, rax); + // compute mask for flags + // can't get FLAG_V from lahf uint32_t flagmask = 0; if (maskp.immediate() & FLAG_C) flagmask |= 0x001; - if (maskp.immediate() & FLAG_V) flagmask |= 0x800; if (maskp.immediate() & FLAG_Z) flagmask |= 0x040; if (maskp.immediate() & FLAG_S) flagmask |= 0x080; if (maskp.immediate() & FLAG_U) flagmask |= 0x004; @@ -1923,10 +2096,20 @@ void drcbe_x64::op_getflgs(Assembler &a, const instruction &inst) // default cases default: - a.pushfq(); // pushf - a.pop(eax); // pop eax - a.and_(eax, flagmask); // and eax,flagmask - a.movzx(dstreg, byte_ptr(rbp, rax, 0, offset_from_rbp(&m_near.flagsmap[0]))); // movzx dstreg,[flags_map] + if (maskp.immediate() & FLAG_V) + { + a.seto(al); + a.movzx(eax, al); + a.shl(eax, 1); + } + + a.mov(r11, r10); + a.shr(r11, 8); + a.and_(r11, flagmask); + a.movzx(dstreg, byte_ptr(rbp, r11, 0, offset_from_rbp(&m_near.flagsmap[0]))); // movzx dstreg,[flags_map] + + if (maskp.immediate() & FLAG_V) + a.or_(dstreg, eax); break; } @@ -1937,6 +2120,42 @@ void drcbe_x64::op_getflgs(Assembler &a, const instruction &inst) // 64-bit form else if (inst.size() == 8) mov_param_reg(a, dstp, dstreg.r64()); // mov dstp,dstreg + + if (maskp.immediate() & FLAG_V) + { + // Restore overflow flag + a.mov(eax, dstreg); + a.shr(eax, 1); + a.and_(eax, 1); + a.add(al, 0x7f); + } + + a.mov(rax, r10); + a.sahf(); +} + + +//------------------------------------------------- +// op_setflgs - process a SETFLGS opcode +//------------------------------------------------- + +void drcbe_x64::op_setflgs(Assembler &a, const instruction &inst) +{ + assert(inst.size() == 4); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter srcp(*this, inst.param(0), PTYPE_MRI); + + a.pushfq(); + + mov_reg_param(a, rax, srcp); + + a.mov(rax, ptr(rbp, rax, 3, offset_from_rbp(&m_near.flagsunmap[0]))); + a.and_(qword_ptr(rsp), ~0x8c5); + a.or_(qword_ptr(rsp), rax); + + a.popfq(); } @@ -2645,33 +2864,44 @@ void drcbe_x64::op_carry(Assembler &a, const instruction &inst) // degenerate case: source is immediate if (srcp.is_immediate() && bitp.is_immediate()) { - if (srcp.immediate() & ((uint64_t)1 << bitp.immediate())) + if (srcp.immediate() & ((uint64_t)1 << (bitp.immediate() & (inst.size() * 8 - 1)))) a.stc(); else a.clc(); + + return; } // load non-immediate bit numbers into a register + + Gp const bitreg = rcx; if (!bitp.is_immediate()) { - mov_reg_param(a, ecx, bitp); - a.and_(ecx, inst.size() * 8 - 1); + mov_reg_param(a, bitreg, bitp); + a.and_(bitreg, inst.size() * 8 - 1); } if (srcp.is_memory()) { if (bitp.is_immediate()) - a.bt(MABS(srcp.memory(), inst.size()), bitp.immediate()); // bt [srcp],bitp + a.bt(MABS(srcp.memory(), inst.size()), (bitp.immediate() & (inst.size() * 8 - 1))); else - a.bt(MABS(srcp.memory(), inst.size()), ecx); // bt [srcp],ecx + a.bt(MABS(srcp.memory(), inst.size()), bitreg); } else if (srcp.is_int_register()) { - Gp const src = Gp::fromTypeAndId((inst.size() == 4) ? RegType::kX86_Gpd : RegType::kX86_Gpq, srcp.ireg()); + Gp const src = Gp::fromTypeAndId(RegType::kX86_Gpq, srcp.ireg()); if (bitp.is_immediate()) - a.bt(src, bitp.immediate()); // bt srcp,bitp + a.bt(src, (bitp.immediate() & (inst.size() * 8 - 1))); else - a.bt(src, ecx); // bt srcp,ecx + a.bt(src, bitreg); + } + else if (srcp.is_immediate()) + { + a.push(rax); + mov_reg_param(a, rax, srcp); + a.bt(rax, bitreg); + a.pop(rax); } } @@ -2718,32 +2948,40 @@ void drcbe_x64::op_mov(Assembler &a, const instruction &inst) // add a conditional branch unless a conditional move is possible Label skip = a.newLabel(); if (inst.condition() != uml::COND_ALWAYS && !(dstp.is_int_register() && !srcp.is_immediate())) - a.short_().j(X86_NOT_CONDITION(inst.condition()), skip); // jcc skip + a.short_().j(X86_NOT_CONDITION(inst.condition()), skip); // register to memory if (dstp.is_memory() && srcp.is_int_register()) { Gp const src = Gp::fromTypeAndId((inst.size() == 4) ? RegType::kX86_Gpd : RegType::kX86_Gpq, srcp.ireg()); - a.mov(MABS(dstp.memory()), src); // mov [dstp],srcp + a.mov(MABS(dstp.memory()), src); } // immediate to memory else if (dstp.is_memory() && srcp.is_immediate() && short_immediate(srcp.immediate())) - a.mov(MABS(dstp.memory(), inst.size()), s32(srcp.immediate())); // mov [dstp],srcp + a.mov(MABS(dstp.memory(), inst.size()), s32(srcp.immediate())); // conditional memory to register - else if (inst.condition() != 0 && dstp.is_int_register() && srcp.is_memory()) + else if (dstp.is_int_register() && srcp.is_memory()) { Gp const dst = Gp::fromTypeAndId((inst.size() == 4) ? RegType::kX86_Gpd : RegType::kX86_Gpq, dstp.ireg()); - a.cmov(X86_CONDITION(inst.condition()), dst, MABS(srcp.memory())); // cmovcc dstp,[srcp] + + if (inst.condition() != uml::COND_ALWAYS) + a.cmov(X86_CONDITION(inst.condition()), dst, MABS(srcp.memory())); + else + a.mov(dst, MABS(srcp.memory())); } // conditional register to register - else if (inst.condition() != 0 && dstp.is_int_register() && srcp.is_int_register()) + else if (dstp.is_int_register() && srcp.is_int_register()) { Gp const src = Gp::fromTypeAndId((inst.size() == 4) ? RegType::kX86_Gpd : RegType::kX86_Gpq, srcp.ireg()); Gp const dst = Gp::fromTypeAndId((inst.size() == 4) ? RegType::kX86_Gpd : RegType::kX86_Gpq, dstp.ireg()); - a.cmov(X86_CONDITION(inst.condition()), dst, src); // cmovcc dstp,srcp + + if (inst.condition() != uml::COND_ALWAYS) + a.cmov(X86_CONDITION(inst.condition()), dst, src); + else + a.mov(dst, src); } // general case @@ -2751,8 +2989,8 @@ void drcbe_x64::op_mov(Assembler &a, const instruction &inst) { Gp dstreg = (inst.size() == 4) ? dstp.select_register(eax) : dstp.select_register(rax); - mov_reg_param(a, dstreg, srcp, true); // mov dstreg,srcp - mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg + mov_reg_param(a, dstreg, srcp, true); + mov_param_reg(a, dstp, dstreg); } // resolve the jump @@ -2789,22 +3027,32 @@ void drcbe_x64::op_sext(Assembler &a, const instruction &inst) if (srcp.is_memory()) { if (sizep.size() == SIZE_BYTE) - a.movsx(dstreg, MABS(srcp.memory(), 1)); // movsx dstreg,[srcp] + a.movsx(dstreg, MABS(srcp.memory(), 1)); else if (sizep.size() == SIZE_WORD) - a.movsx(dstreg, MABS(srcp.memory(), 2)); // movsx dstreg,[srcp] + a.movsx(dstreg, MABS(srcp.memory(), 2)); else if (sizep.size() == SIZE_DWORD) - a.mov(dstreg, MABS(srcp.memory())); // mov dstreg,[srcp] + a.mov(dstreg, MABS(srcp.memory())); } else if (srcp.is_int_register()) { if (sizep.size() == SIZE_BYTE) - a.movsx(dstreg, GpbLo(srcp.ireg())); // movsx dstreg,srcp + a.movsx(dstreg, GpbLo(srcp.ireg())); else if (sizep.size() == SIZE_WORD) - a.movsx(dstreg, Gpw(srcp.ireg())); // movsx dstreg,srcp + a.movsx(dstreg, Gpw(srcp.ireg())); else if (sizep.size() == SIZE_DWORD) - a.mov(dstreg, Gpd(srcp.ireg())); // mov dstreg,srcp + a.mov(dstreg, Gpd(srcp.ireg())); } - mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg + else if (srcp.is_immediate()) + { + if (sizep.size() == SIZE_BYTE) + a.mov(dstreg, (int8_t)srcp.immediate()); + else if (sizep.size() == SIZE_WORD) + a.mov(dstreg, (int16_t)srcp.immediate()); + else if (sizep.size() == SIZE_DWORD) + a.mov(dstreg, (int32_t)srcp.immediate()); + } + + mov_param_reg(a, dstp, dstreg); } // 64-bit form @@ -2814,30 +3062,42 @@ void drcbe_x64::op_sext(Assembler &a, const instruction &inst) if (srcp.is_memory()) { if (sizep.size() == SIZE_BYTE) - a.movsx(dstreg, MABS(srcp.memory(), 1)); // movsx dstreg,[srcp] + a.movsx(dstreg, MABS(srcp.memory(), 1)); else if (sizep.size() == SIZE_WORD) - a.movsx(dstreg, MABS(srcp.memory(), 2)); // movsx dstreg,[srcp] + a.movsx(dstreg, MABS(srcp.memory(), 2)); else if (sizep.size() == SIZE_DWORD) - a.movsxd(dstreg, MABS(srcp.memory(), 4)); // movsxd dstreg,[srcp] + a.movsxd(dstreg, MABS(srcp.memory(), 4)); else if (sizep.size() == SIZE_QWORD) - a.mov(dstreg, MABS(srcp.memory())); // mov dstreg,[srcp] + a.mov(dstreg, MABS(srcp.memory())); } else if (srcp.is_int_register()) { if (sizep.size() == SIZE_BYTE) - a.movsx(dstreg, GpbLo(srcp.ireg())); // movsx dstreg,srcp + a.movsx(dstreg, GpbLo(srcp.ireg())); else if (sizep.size() == SIZE_WORD) - a.movsx(dstreg, Gpw(srcp.ireg())); // movsx dstreg,srcp + a.movsx(dstreg, Gpw(srcp.ireg())); else if (sizep.size() == SIZE_DWORD) - a.movsxd(dstreg, Gpd(srcp.ireg())); // movsxd dstreg,srcp + a.movsxd(dstreg, Gpd(srcp.ireg())); else if (sizep.size() == SIZE_QWORD) - a.mov(dstreg, Gpq(srcp.ireg())); // mov dstreg,srcp + a.mov(dstreg, Gpq(srcp.ireg())); } - mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg + else if (srcp.is_immediate()) + { + if (sizep.size() == SIZE_BYTE) + a.mov(dstreg, (int8_t)srcp.immediate()); + else if (sizep.size() == SIZE_WORD) + a.mov(dstreg, (int16_t)srcp.immediate()); + else if (sizep.size() == SIZE_DWORD) + a.mov(dstreg, (int32_t)srcp.immediate()); + else if (sizep.size() == SIZE_QWORD) + a.mov(dstreg, (int64_t)srcp.immediate()); + } + + mov_param_reg(a, dstp, dstreg); } if (inst.flags() != 0) - a.test(dstreg, dstreg); // test dstreg,dstreg + a.test(dstreg, dstreg); } @@ -2861,10 +3121,9 @@ void drcbe_x64::op_roland(Assembler &a, const instruction &inst) // pick a target register Gp dstreg = (inst.size() == 4) ? dstp.select_register(eax, shiftp, maskp) : dstp.select_register(rax, shiftp, maskp); - mov_reg_param(a, dstreg, srcp); // mov dstreg,srcp - if (!shiftp.is_immediate_value(0)) - shift_op_param(a, Inst::kIdRol, dstreg, shiftp); // rol dstreg,shiftp - alu_op_param(a, Inst::kIdAnd, dstreg, maskp, // and dstreg,maskp + mov_reg_param(a, dstreg, srcp); + shift_op_param(a, Inst::kIdRol, inst.size(), dstreg, shiftp, false); + alu_op_param(a, Inst::kIdAnd, dstreg, maskp, [inst](Assembler &a, Operand const &dst, be_parameter const &src) { // optimize all-zero and all-one cases @@ -2905,24 +3164,23 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst) // pick a target register Gp dstreg = dstp.select_register(ecx, shiftp, maskp); - mov_reg_param(a, eax, srcp); // mov eax,srcp - if (!shiftp.is_immediate_value(0)) - shift_op_param(a, Inst::kIdRol, eax, shiftp); // rol eax,shiftp - mov_reg_param(a, dstreg, dstp); // mov dstreg,dstp + mov_reg_param(a, eax, srcp); + shift_op_param(a, Inst::kIdRol, inst.size(), eax, shiftp, false); + mov_reg_param(a, dstreg, dstp); if (maskp.is_immediate()) { - a.and_(eax, maskp.immediate()); // and eax,maskp - a.and_(dstreg, ~maskp.immediate()); // and dstreg,~maskp + a.and_(eax, maskp.immediate()); + a.and_(dstreg, ~maskp.immediate()); } else { - mov_reg_param(a, edx, maskp); // mov edx,maskp - a.and_(eax, edx); // and eax,edx - a.not_(edx); // not edx - a.and_(dstreg, edx); // and dstreg,edx + mov_reg_param(a, edx, maskp); + a.and_(eax, edx); + a.not_(edx); + a.and_(dstreg, edx); } - a.or_(dstreg, eax); // or dstreg,eax - mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg + a.or_(dstreg, eax); + mov_param_reg(a, dstp, dstreg); } // 64-bit form @@ -2931,16 +3189,15 @@ void drcbe_x64::op_rolins(Assembler &a, const instruction &inst) // pick a target register Gp dstreg = dstp.select_register(rcx, shiftp, maskp); - mov_reg_param(a, rax, srcp); // mov rax,srcp - mov_reg_param(a, rdx, maskp); // mov rdx,maskp - if (!shiftp.is_immediate_value(0)) - shift_op_param(a, Inst::kIdRol, rax, shiftp); // rol rax,shiftp - mov_reg_param(a, dstreg, dstp); // mov dstreg,dstp - a.and_(rax, rdx); // and eax,rdx - a.not_(rdx); // not rdx - a.and_(dstreg, rdx); // and dstreg,rdx - a.or_(dstreg, rax); // or dstreg,rax - mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg + mov_reg_param(a, rax, srcp); + mov_reg_param(a, rdx, maskp); + shift_op_param(a, Inst::kIdRol, inst.size(), rax, shiftp, false); + mov_reg_param(a, dstreg, dstp); + a.and_(rax, rdx); + a.not_(rdx); + a.and_(dstreg, rdx); + a.or_(dstreg, rax); + mov_param_reg(a, dstp, dstreg); } } @@ -3179,9 +3436,6 @@ void drcbe_x64::op_cmp(Assembler &a, const instruction &inst) void drcbe_x64::op_mulu(Assembler &a, const instruction &inst) { - uint8_t zsflags = inst.flags() & (FLAG_Z | FLAG_S); - uint8_t vflag = inst.flags() & FLAG_V; - // validate instruction assert(inst.size() == 4 || inst.size() == 8); assert_no_condition(inst); @@ -3198,112 +3452,111 @@ void drcbe_x64::op_mulu(Assembler &a, const instruction &inst) // 32-bit form if (inst.size() == 4) { + Gp dstreg = eax; + Gp edstreg = edx; + // general case - mov_reg_param(a, eax, src1p); // mov eax,src1p + mov_reg_param(a, dstreg, src1p); if (src2p.is_memory()) - a.mul(MABS(src2p.memory(), 4)); // mul [src2p] + a.mul(MABS(src2p.memory(), 4)); else if (src2p.is_int_register()) - a.mul(Gpd(src2p.ireg())); // mul src2p + a.mul(Gpd(src2p.ireg())); else if (src2p.is_immediate()) { - a.mov(edx, src2p.immediate()); // mov edx,src2p - a.mul(edx); // mul edx + a.mov(edstreg, src2p.immediate()); + a.mul(edstreg); } - mov_param_reg(a, dstp, eax); // mov dstp,eax + mov_param_reg(a, dstp, dstreg); if (compute_hi) - mov_param_reg(a, edstp, edx); // mov edstp,edx - - // compute flags - if (inst.flags() != 0) - { - if (zsflags != 0) - { - if (vflag) - a.pushfq(); // pushf - if (compute_hi) - { - if (zsflags == FLAG_Z) - a.or_(edx, eax); // or edx,eax - else if (zsflags == FLAG_S) - a.test(edx, edx); // test edx,edx - else - { - a.movzx(ecx, ax); // movzx ecx,ax - a.shr(eax, 16); // shr eax,16 - a.or_(edx, ecx); // or edx,ecx - a.or_(edx, eax); // or edx,eax - } - } - else - a.test(eax, eax); // test eax,eax - - // we rely on the fact that OF is cleared by all logical operations above - if (vflag) - { - a.pushfq(); // pushf - a.pop(rax); // pop rax - a.and_(qword_ptr(rsp), ~0x84); // and [rsp],~0x84 - a.or_(ptr(rsp), rax); // or [rsp],rax - a.popfq(); // popf - } - } - } + mov_param_reg(a, edstp, edstreg); } // 64-bit form else if (inst.size() == 8) { + Gp dstreg = rax; + Gp edstreg = rdx; + // general case - mov_reg_param(a, rax, src1p); // mov rax,src1p + mov_reg_param(a, dstreg, src1p); if (src2p.is_memory()) - a.mul(MABS(src2p.memory(), 8)); // mul [src2p] + a.mul(MABS(src2p.memory(), 8)); else if (src2p.is_int_register()) - a.mul(Gpq(src2p.ireg())); // mul src2p + a.mul(Gpq(src2p.ireg())); else if (src2p.is_immediate()) { - mov_r64_imm(a, rdx, src2p.immediate()); // mov rdx,src2p - a.mul(rdx); // mul rdx + mov_r64_imm(a, edstreg, src2p.immediate()); + a.mul(edstreg); } - mov_param_reg(a, dstp, rax); // mov dstp,rax + mov_param_reg(a, dstp, dstreg); if (compute_hi) - mov_param_reg(a, edstp, rdx); // mov edstp,rdx + mov_param_reg(a, edstp, edstreg); + } - // compute flags - if (inst.flags() != 0) + if (inst.flags()) + calculate_status_flags_mul(a, inst.size(), rax, rdx); +} + + +//------------------------------------------------- +// op_mululw - process a MULULW (32x32=32) opcode +//------------------------------------------------- + +void drcbe_x64::op_mululw(Assembler &a, const instruction &inst) +{ + // validate instruction + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_V | FLAG_Z | FLAG_S); + + // normalize parameters + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter src1p(*this, inst.param(1), PTYPE_MRI); + be_parameter src2p(*this, inst.param(2), PTYPE_MRI); + normalize_commutative(src1p, src2p); + + // 32-bit form + if (inst.size() == 4) + { + Gp dstreg = eax; + Gp hireg = edx; + + // general case + mov_reg_param(a, dstreg, src1p); + if (src2p.is_memory()) + a.mul(MABS(src2p.memory(), 4)); + else if (src2p.is_int_register()) + a.mul(Gpd(src2p.ireg())); + else if (src2p.is_immediate()) { - if (zsflags != 0) - { - if (vflag) - a.pushfq(); // pushf - if (compute_hi) - { - if (zsflags == FLAG_Z) - a.or_(rdx, rax); // or rdx,rax - else if (zsflags == FLAG_S) - a.test(rdx, rdx); // test rdx,rdx - else - { - a.mov(ecx, eax); // mov ecx,eax - a.shr(rax, 32); // shr rax,32 - a.or_(rdx, rcx); // or rdx,rcx - a.or_(rdx, rax); // or rdx,rax - } - } - else - a.test(rax, rax); // test rax,rax + a.mov(hireg, src2p.immediate()); + a.mul(hireg); + } + mov_param_reg(a, dstp, dstreg); + } - // we rely on the fact that OF is cleared by all logical operations above - if (vflag) - { - a.pushfq(); // pushf - a.pop(rax); // pop rax - a.and_(qword_ptr(rsp), ~0x84); // and [rsp],~0x84 - a.or_(ptr(rsp), rax); // or [rsp],rax - a.popfq(); // popf - } - } + // 64-bit form + else if (inst.size() == 8) + { + Gp dstreg = rax; + Gp hireg = rdx; + + // general case + mov_reg_param(a, dstreg, src1p); + if (src2p.is_memory()) + a.mul(MABS(src2p.memory(), 8)); + else if (src2p.is_int_register()) + a.mul(Gpq(src2p.ireg())); + else if (src2p.is_immediate()) + { + mov_r64_imm(a, hireg, src2p.immediate()); + a.mul(hireg); } + mov_param_reg(a, dstp, dstreg); } + + if (inst.flags()) + calculate_status_flags_mul_low(a, inst.size(), rax); } @@ -3313,9 +3566,6 @@ void drcbe_x64::op_mulu(Assembler &a, const instruction &inst) void drcbe_x64::op_muls(Assembler &a, const instruction &inst) { - uint8_t zsflags = inst.flags() & (FLAG_Z | FLAG_S); - uint8_t vflag = inst.flags() & FLAG_V; - // validate instruction assert(inst.size() == 4 || inst.size() == 8); assert_no_condition(inst); @@ -3332,162 +3582,107 @@ void drcbe_x64::op_muls(Assembler &a, const instruction &inst) // 32-bit form if (inst.size() == 4) { - // 32-bit destination with memory/immediate or register/immediate - if (!compute_hi && !src1p.is_immediate() && src2p.is_immediate()) - { - Gp dstreg = dstp.is_int_register() ? Gpd(dstp.ireg()) : eax; - if (src1p.is_memory()) - a.imul(dstreg, MABS(src1p.memory()), src2p.immediate()); // imul dstreg,[src1p],src2p - else if (src1p.is_int_register()) - a.imul(dstreg, Gpd(src1p.ireg()), src2p.immediate()); // imul dstreg,src1p,src2p - mov_param_reg(a, dstp, dstreg); // mov dstp,eax - } - - // 32-bit destination, general case - else if (!compute_hi) - { - Gp dstreg = dstp.is_int_register() ? Gpd(dstp.ireg()) : eax; - mov_reg_param(a, dstreg, src1p); // mov dstreg,src1p - if (src2p.is_memory()) - a.imul(dstreg, MABS(src2p.memory())); // imul dstreg,[src2p] - else if (src2p.is_int_register()) - a.imul(dstreg, Gpd(src2p.ireg())); // imul dstreg,src2p - mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg - } + Gp dstreg = eax; + Gp edstreg = edx; - // 64-bit destination, general case - else - { - mov_reg_param(a, eax, src1p); // mov eax,src1p - if (src2p.is_memory()) - a.imul(MABS(src2p.memory(), 4)); // imul [src2p] - else if (src2p.is_int_register()) - a.imul(Gpd(src2p.ireg())); // imul src2p - else if (src2p.is_immediate()) - { - a.mov(edx, src2p.immediate()); // mov edx,src2p - a.imul(edx); // imul edx - } - mov_param_reg(a, dstp, eax); // mov dstp,eax - mov_param_reg(a, edstp, edx); // mov edstp,edx - } - - // compute flags - if (inst.flags() != 0) + mov_reg_param(a, dstreg, src1p); + if (src2p.is_memory()) + a.imul(MABS(src2p.memory(), 4)); + else if (src2p.is_int_register()) + a.imul(Gpd(src2p.ireg())); + else if (src2p.is_immediate()) { - if (zsflags != 0) - { - if (vflag) - a.pushfq(); // pushf - if (compute_hi) - { - if (zsflags == FLAG_Z) - a.or_(edx, eax); // or edx,eax - else if (zsflags == FLAG_S) - a.test(edx, edx); // test edx,edx - else - { - a.movzx(ecx, ax); // movzx ecx,ax - a.shr(eax, 16); // shr eax,16 - a.or_(edx, ecx); // or edx,ecx - a.or_(edx, eax); // or edx,eax - } - } - else - a.test(eax, eax); // test eax,eax - - // we rely on the fact that OF is cleared by all logical operations above - if (vflag) - { - a.pushfq(); // pushf - a.pop(rax); // pop rax - a.and_(qword_ptr(rsp), ~0x84); // and [rsp],~0x84 - a.or_(ptr(rsp), rax); // or [rsp],rax - a.popfq(); // popf - } - } + a.mov(edstreg, src2p.immediate()); + a.imul(edstreg); } + mov_param_reg(a, dstp, dstreg); + if (compute_hi) + mov_param_reg(a, edstp, edstreg); } // 64-bit form else if (inst.size() == 8) { - // 64-bit destination with memory/immediate or register/immediate - if (!compute_hi && !src1p.is_immediate() && src2p.is_immediate() && short_immediate(src2p.immediate())) - { - Gp dstreg = dstp.is_int_register() ? Gpq(dstp.ireg()) : rax; - if (src1p.is_memory()) - a.imul(dstreg, MABS(src1p.memory()), src2p.immediate()); // imul dstreg,[src1p],src2p - else if (src1p.is_int_register()) - a.imul(dstreg, Gpq(src1p.ireg()), src2p.immediate()); // imul rax,src1p,src2p - mov_param_reg(a, dstp, dstreg); // mov dstp,rax - } + Gp dstreg = rax; + Gp edstreg = rdx; - // 64-bit destination, general case - else if (!compute_hi) + mov_reg_param(a, dstreg, src1p); + if (src2p.is_memory()) + a.imul(MABS(src2p.memory(), 8)); + else if (src2p.is_int_register()) + a.imul(Gpq(src2p.ireg())); + else if (src2p.is_immediate()) { - Gp dstreg = dstp.is_int_register() ? Gpq(dstp.ireg()) : rax; - mov_reg_param(a, dstreg, src1p); // mov dstreg,src1p - if (src2p.is_memory()) - a.imul(dstreg, MABS(src2p.memory())); // imul dstreg,[src2p] - else if (src2p.is_int_register()) - a.imul(dstreg, Gpq(src2p.ireg())); // imul dstreg,src2p - mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg + mov_r64_imm(a, edstreg, src2p.immediate()); + a.imul(edstreg); } + mov_param_reg(a, dstp, dstreg); + if (compute_hi) + mov_param_reg(a, edstp, edstreg); + } - // 128-bit destination, general case - else + if (inst.flags()) + calculate_status_flags_mul(a, inst.size(), rax, rdx); +} + + +//------------------------------------------------- +// op_mulslw - process a MULSLW (32x32=32) opcode +//------------------------------------------------- + +void drcbe_x64::op_mulslw(Assembler &a, const instruction &inst) +{ + // validate instruction + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_V | FLAG_Z | FLAG_S); + + // normalize parameters + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter src1p(*this, inst.param(1), PTYPE_MRI); + be_parameter src2p(*this, inst.param(2), PTYPE_MRI); + normalize_commutative(src1p, src2p); + + // 32-bit form + if (inst.size() == 4) + { + Gp dstreg = eax; + Gp hireg = edx; + + mov_reg_param(a, dstreg, src1p); + if (src2p.is_memory()) + a.imul(MABS(src2p.memory(), 4)); + else if (src2p.is_int_register()) + a.imul(Gpd(src2p.ireg())); + else if (src2p.is_immediate()) { - mov_reg_param(a, rax, src1p); // mov rax,src1p - if (src2p.is_memory()) - a.imul(MABS(src2p.memory(), 8)); // imul [src2p] - else if (src2p.is_int_register()) - a.imul(Gpq(src2p.ireg())); // imul src2p - else if (src2p.is_immediate()) - { - mov_r64_imm(a, rdx, src2p.immediate()); // mov rdx,src2p - a.imul(rdx); // imul rdx - } - mov_param_reg(a, dstp, rax); // mov dstp,rax - mov_param_reg(a, edstp, rdx); // mov edstp,rdx + a.mov(hireg, src2p.immediate()); + a.imul(hireg); } + mov_param_reg(a, dstp, dstreg); + } - // compute flags - if (inst.flags() != 0) - { - if (zsflags != 0) - { - if (vflag) - a.pushfq(); // pushf - if (compute_hi) - { - if (zsflags == FLAG_Z) - a.or_(rdx, rax); // or rdx,rax - else if (zsflags == FLAG_S) - a.test(rdx, rdx); // test rdx,rdx - else - { - a.mov(ecx, eax); // mov ecx,eax - a.shr(rax, 32); // shr rax,32 - a.or_(rdx, rcx); // or rdx,rcx - a.or_(rdx, rax); // or rdx,rax - } - } - else - a.test(rax, rax); // test rax,rax + // 64-bit form + else if (inst.size() == 8) + { + Gp dstreg = rax; + Gp hireg = rdx; - // we rely on the fact that OF is cleared by all logical operations above - if (vflag) - { - a.pushfq(); // pushf - a.pop(rax); // pop rax - a.and_(qword_ptr(rsp), ~0x84); // and [rsp],~0x84 - a.or_(ptr(rsp), rax); // or [rsp],rax - a.popfq(); // popf - } - } + mov_reg_param(a, dstreg, src1p); + if (src2p.is_memory()) + a.imul(MABS(src2p.memory(), 8)); + else if (src2p.is_int_register()) + a.imul(Gpq(src2p.ireg())); + else if (src2p.is_immediate()) + { + mov_r64_imm(a, hireg, src2p.immediate()); + a.imul(hireg); } + mov_param_reg(a, dstp, dstreg); } + + if (inst.flags()) + calculate_status_flags_mul_low(a, inst.size(), rax); } @@ -3953,18 +4148,27 @@ void drcbe_x64::op_lzcnt(Assembler &a, const instruction &inst) be_parameter dstp(*this, inst.param(0), PTYPE_MR); be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + if (inst.flags()) + { + a.xor_(eax, eax); // reset status flags + a.test(eax, eax); + } + // 32-bit form if (inst.size() == 4) { // pick a target register Gp dstreg = dstp.select_register(eax); - mov_reg_param(a, dstreg, srcp); // mov dstreg,src1p - a.mov(ecx, 32 ^ 31); // mov ecx,32 ^ 31 - a.bsr(dstreg, dstreg); // bsr dstreg,dstreg - a.cmovz(dstreg, ecx); // cmovz dstreg,ecx - a.xor_(dstreg, 31); // xor dstreg,31 - mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg + mov_reg_param(a, dstreg, srcp); + a.mov(ecx, 32 ^ 31); + a.bsr(dstreg, dstreg); + a.cmovz(dstreg, ecx); + a.xor_(dstreg, 31); + mov_param_reg(a, dstp, dstreg); + + if (inst.flags()) + a.test(dstreg, dstreg); } // 64-bit form @@ -3973,12 +4177,15 @@ void drcbe_x64::op_lzcnt(Assembler &a, const instruction &inst) // pick a target register Gp dstreg = dstp.select_register(rax); - mov_reg_param(a, dstreg, srcp); // mov dstreg,src1p - a.mov(ecx, 64 ^ 63); // mov ecx,64 ^ 63 - a.bsr(dstreg, dstreg); // bsr dstreg,dstreg - a.cmovz(dstreg, rcx); // cmovz dstreg,rcx - a.xor_(dstreg, 63); // xor dstreg,63 - mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg + mov_reg_param(a, dstreg, srcp); + a.mov(ecx, 64 ^ 63); + a.bsr(dstreg, dstreg); + a.cmovz(dstreg, rcx); + a.xor_(dstreg, 63); + mov_param_reg(a, dstp, dstreg); + + if (inst.flags()) + a.test(dstreg, dstreg); } } @@ -3998,16 +4205,22 @@ void drcbe_x64::op_tzcnt(Assembler &a, const instruction &inst) be_parameter dstp(*this, inst.param(0), PTYPE_MR); be_parameter srcp(*this, inst.param(1), PTYPE_MRI); + if (inst.flags()) + { + a.xor_(eax, eax); // reset status flags + a.test(eax, eax); + } + // 32-bit form if (inst.size() == 4) { Gp dstreg = dstp.select_register(eax); - mov_reg_param(a, dstreg, srcp); // mov dstreg,srcp - a.mov(ecx, 32); // mov ecx,32 - a.bsf(dstreg, dstreg); // bsf dstreg,dstreg - a.cmovz(dstreg, ecx); // cmovz dstreg,ecx - mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg + mov_reg_param(a, dstreg, srcp); + a.mov(ecx, 32); + a.bsf(dstreg, dstreg); + a.cmovz(dstreg, ecx); + mov_param_reg(a, dstp, dstreg); } // 64-bit form @@ -4015,11 +4228,11 @@ void drcbe_x64::op_tzcnt(Assembler &a, const instruction &inst) { Gp dstreg = dstp.select_register(rax); - mov_reg_param(a, dstreg, srcp); // mov dstreg,srcp - a.mov(ecx, 64); // mov ecx,64 - a.bsf(dstreg, dstreg); // bsf dstreg,dstreg - a.cmovz(dstreg, rcx); // cmovz dstreg,rcx - mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg + mov_reg_param(a, dstreg, srcp); + a.mov(rcx, 64); + a.bsf(dstreg, dstreg); + a.cmovz(dstreg, rcx); + mov_param_reg(a, dstp, dstreg); } } @@ -4064,25 +4277,25 @@ template void drcbe_x64::op_shift(Assembler &a, const uml::ins const bool carry = (Opcode == Inst::kIdRcl) || (Opcode == Inst::kIdRcr); // optimize immediate zero case - if (carry || inst.flags() || !src2p.is_immediate_value(0)) + if (!carry && !inst.flags() && src2p.is_immediate() && (src2p.immediate() & (inst.size() * 8 - 1)) == 0) + return; + + // dstp == src1p in memory + if (dstp.is_memory() && dstp == src1p) + shift_op_param(a, Opcode, inst.size(), MABS(dstp.memory(), inst.size()), src2p, true); + + // general case + else { - // dstp == src1p in memory - if (dstp.is_memory() && dstp == src1p) - shift_op_param(a, Opcode, MABS(dstp.memory(), inst.size()), src2p); // op [dstp],src2p + // pick a target register + Gp dstreg = (inst.size() == 4) ? dstp.select_register(eax, src2p) : dstp.select_register(rax, src2p); - // general case + if (carry) + mov_reg_param(a, dstreg, src1p, true); else - { - // pick a target register - Gp dstreg = (inst.size() == 4) ? dstp.select_register(eax, src2p) : dstp.select_register(rax, src2p); - - if (carry) - mov_reg_param(a, dstreg, src1p, true); // mov dstreg,src1p - else - mov_reg_param(a, dstreg, src1p); // mov dstreg,src1p - shift_op_param(a, Opcode, dstreg, src2p); // op dstreg,src2p - mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg - } + mov_reg_param(a, dstreg, src1p); + shift_op_param(a, Opcode, inst.size(), dstreg, src2p, true); + mov_param_reg(a, dstp, dstreg); } } @@ -4800,29 +5013,44 @@ void drcbe_x64::op_fneg(Assembler &a, const instruction &inst) be_parameter dstp(*this, inst.param(0), PTYPE_MF); be_parameter srcp(*this, inst.param(1), PTYPE_MF); - // pick a target register for the general case Xmm dstreg = dstp.select_register(xmm0, srcp); + Xmm tempreg = xmm1; + + // note: using memory addrs with xorpd is dangerous because MAME does not guarantee + // the memory address will be 16 byte aligned so there's a good chance it'll crash + if (srcp.is_memory()) + { + if (inst.size() == 4) + { + a.mov(eax, MABS(srcp.memory())); + a.movd(tempreg, eax); + } + else if (inst.size() == 8) + { + a.mov(rax, MABS(srcp.memory())); + a.movq(tempreg, rax); + } + } - // 32-bit form if (inst.size() == 4) { - a.xorps(dstreg, dstreg); // xorps dstreg,dstreg + a.mov(rax, 0x80000000); + a.movd(dstreg, rax); if (srcp.is_memory()) - a.subss(dstreg, MABS(srcp.memory())); // subss dstreg,[srcp] + a.xorpd(dstreg, tempreg); else if (srcp.is_float_register()) - a.subss(dstreg, Xmm(srcp.freg())); // subss dstreg,srcp - movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg + a.xorpd(dstreg, Xmm(srcp.freg())); + movss_p32_r128(a, dstp, dstreg); } - - // 64-bit form else if (inst.size() == 8) { - a.xorpd(dstreg, dstreg); // xorpd dstreg,dstreg + a.mov(rax, 0x8000000000000000); + a.movq(dstreg, rax); if (srcp.is_memory()) - a.subsd(dstreg, MABS(srcp.memory())); // subsd dstreg,[srcp] + a.xorpd(dstreg, tempreg); else if (srcp.is_float_register()) - a.subsd(dstreg, Xmm(srcp.freg())); // subsd dstreg,srcp - movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg + a.xorpd(dstreg, Xmm(srcp.freg())); + movsd_p64_r128(a, dstp, dstreg); } } @@ -5058,20 +5286,17 @@ void drcbe_x64::op_fcopyi(Assembler &a, const instruction &inst) { if (srcp.is_memory()) { - a.movd(dstreg, MABS(srcp.memory())); // movd dstreg,[srcp] - movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg + a.movd(dstreg, MABS(srcp.memory())); + movss_p32_r128(a, dstp, dstreg); + } + else if (dstp.is_memory()) + { + mov_param_reg(a, dstp, Gpd(srcp.ireg())); } else { - if (dstp.is_memory()) - { - mov_param_reg(a, dstp, Gpd(srcp.ireg())); // mov dstp,srcp - } - else - { - a.movd(dstreg, Gpd(srcp.ireg())); // movd dstreg,srcp - movss_p32_r128(a, dstp, dstreg); // movss dstp,dstreg - } + a.movd(dstreg, Gpd(srcp.ireg())); + movss_p32_r128(a, dstp, dstreg); } } @@ -5081,22 +5306,18 @@ void drcbe_x64::op_fcopyi(Assembler &a, const instruction &inst) { if (srcp.is_memory()) { - a.movq(dstreg, MABS(srcp.memory())); // movq dstreg,[srcp] - movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg + a.movq(dstreg, MABS(srcp.memory())); + movsd_p64_r128(a, dstp, dstreg); + } + else if (dstp.is_memory()) + { + mov_param_reg(a, dstp, Gpq(srcp.ireg())); } else { - if (dstp.is_memory()) - { - mov_param_reg(a, dstp, Gpq(srcp.ireg())); // mov dstp,srcp - } - else - { - a.movq(dstreg, Gpq(srcp.ireg())); // movq dstreg,srcp - movsd_p64_r128(a, dstp, dstreg); // movsd dstp,dstreg - } + a.movq(dstreg, Gpq(srcp.ireg())); + movsd_p64_r128(a, dstp, dstreg); } - } } @@ -5122,19 +5343,16 @@ void drcbe_x64::op_icopyf(Assembler &a, const instruction &inst) if (srcp.is_memory()) { Gp dstreg = dstp.select_register(eax); - a.mov(dstreg, MABS(srcp.memory())); // mov dstreg,[srcp] - mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg + a.mov(dstreg, MABS(srcp.memory())); + mov_param_reg(a, dstp, dstreg); + } + else if (dstp.is_memory()) + { + a.movd(MABS(dstp.memory()), Xmm(srcp.freg())); } else { - if (dstp.is_memory()) - { - a.movd(MABS(dstp.memory()), Xmm(srcp.freg())); // movd dstp,srcp - } - else - { - a.movd(Gpd(dstp.ireg()), Xmm(srcp.freg())); // movd dstp,srcp - } + a.movd(Gpd(dstp.ireg()), Xmm(srcp.freg())); } } @@ -5144,19 +5362,16 @@ void drcbe_x64::op_icopyf(Assembler &a, const instruction &inst) if (srcp.is_memory()) { Gp dstreg = dstp.select_register(rax); - a.mov(dstreg, MABS(srcp.memory())); // mov dstreg,[srcp] - mov_param_reg(a, dstp, dstreg); // mov dstp,dstreg + a.mov(dstreg, MABS(srcp.memory())); + mov_param_reg(a, dstp, dstreg); + } + else if (dstp.is_memory()) + { + a.movq(MABS(dstp.memory()), Xmm(srcp.freg())); } else { - if (dstp.is_memory()) - { - a.movq(MABS(dstp.memory()), Xmm(srcp.freg())); // movq dstp,srcp - } - else - { - a.movq(Gpq(dstp.ireg()), Xmm(srcp.freg())); // movq dstp,srcp - } + a.movq(Gpq(dstp.ireg()), Xmm(srcp.freg())); } } } diff --git a/src/devices/cpu/drcbex64.h b/src/devices/cpu/drcbex64.h index 51e0c94fed3d7..db8c1543aa539 100644 --- a/src/devices/cpu/drcbex64.h +++ b/src/devices/cpu/drcbex64.h @@ -135,6 +135,7 @@ class drcbe_x64 : public drcbe_interface void op_mapvar(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_nop(asmjit::x86::Assembler &a, const uml::instruction &inst); + void op_break(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_debug(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_exit(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_hashjmp(asmjit::x86::Assembler &a, const uml::instruction &inst); @@ -149,6 +150,7 @@ class drcbe_x64 : public drcbe_interface void op_getfmod(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_getexp(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_getflgs(asmjit::x86::Assembler &a, const uml::instruction &inst); + void op_setflgs(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_save(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_restore(asmjit::x86::Assembler &a, const uml::instruction &inst); @@ -171,7 +173,9 @@ class drcbe_x64 : public drcbe_interface void op_subc(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_cmp(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_mulu(asmjit::x86::Assembler &a, const uml::instruction &inst); + void op_mululw(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_muls(asmjit::x86::Assembler &a, const uml::instruction &inst); + void op_mulslw(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_divu(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_divs(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_and(asmjit::x86::Assembler &a, const uml::instruction &inst); @@ -208,7 +212,7 @@ class drcbe_x64 : public drcbe_interface // alu and shift operation helpers static bool ones(u64 const value, unsigned const size) noexcept { return (size == 4) ? u32(value) == 0xffffffffU : value == 0xffffffff'ffffffffULL; } void alu_op_param(asmjit::x86::Assembler &a, asmjit::x86::Inst::Id const opcode, asmjit::Operand const &dst, be_parameter const ¶m, std::function optimize = [](asmjit::x86::Assembler &a, asmjit::Operand dst, be_parameter const &src) { return false; }); - void shift_op_param(asmjit::x86::Assembler &a, asmjit::x86::Inst::Id const opcode, asmjit::Operand const &dst, be_parameter const ¶m); + void shift_op_param(asmjit::x86::Assembler &a, asmjit::x86::Inst::Id const opcode, size_t opsize, asmjit::Operand const &dst, be_parameter const ¶m, bool update_flags); // parameter helpers void mov_reg_param(asmjit::x86::Assembler &a, asmjit::x86::Gp const ®, be_parameter const ¶m, bool const keepflags = false); @@ -225,6 +229,10 @@ class drcbe_x64 : public drcbe_interface void movsd_r128_p64(asmjit::x86::Assembler &a, asmjit::x86::Xmm const ®, be_parameter const ¶m); void movsd_p64_r128(asmjit::x86::Assembler &a, be_parameter const ¶m, asmjit::x86::Xmm const ®); + void calculate_status_flags(asmjit::x86::Assembler &a, uint32_t instsize, asmjit::Operand const &dst, u8 flags); + void calculate_status_flags_mul(asmjit::x86::Assembler &a, uint32_t instsize, asmjit::x86::Gp const &lo, asmjit::x86::Gp const &hi); + void calculate_status_flags_mul_low(asmjit::x86::Assembler &a, uint32_t instsize, asmjit::x86::Gp const &lo); + size_t emit(asmjit::CodeHolder &ch); // internal state diff --git a/src/devices/cpu/drcbex86.cpp b/src/devices/cpu/drcbex86.cpp index 0233ddef33b73..8ce4c0d3f6966 100644 --- a/src/devices/cpu/drcbex86.cpp +++ b/src/devices/cpu/drcbex86.cpp @@ -205,6 +205,7 @@ const drcbe_x86::opcode_table_entry drcbe_x86::s_opcode_table_source[] = // Control Flow Operations { uml::OP_NOP, &drcbe_x86::op_nop }, // NOP + { uml::OP_BREAK, &drcbe_x86::op_break }, // BREAK { uml::OP_DEBUG, &drcbe_x86::op_debug }, // DEBUG pc { uml::OP_EXIT, &drcbe_x86::op_exit }, // EXIT src1[,c] { uml::OP_HASHJMP, &drcbe_x86::op_hashjmp }, // HASHJMP mode,pc,handle @@ -220,6 +221,7 @@ const drcbe_x86::opcode_table_entry drcbe_x86::s_opcode_table_source[] = { uml::OP_GETFMOD, &drcbe_x86::op_getfmod }, // GETFMOD dst { uml::OP_GETEXP, &drcbe_x86::op_getexp }, // GETEXP dst { uml::OP_GETFLGS, &drcbe_x86::op_getflgs }, // GETFLGS dst[,f] + { uml::OP_SETFLGS, &drcbe_x86::op_setflgs }, // GETFLGS src { uml::OP_SAVE, &drcbe_x86::op_save }, // SAVE dst { uml::OP_RESTORE, &drcbe_x86::op_restore }, // RESTORE dst @@ -243,7 +245,9 @@ const drcbe_x86::opcode_table_entry drcbe_x86::s_opcode_table_source[] = { uml::OP_SUBB, &drcbe_x86::op_subc }, // SUBB dst,src1,src2[,f] { uml::OP_CMP, &drcbe_x86::op_cmp }, // CMP src1,src2[,f] { uml::OP_MULU, &drcbe_x86::op_mulu }, // MULU dst,edst,src1,src2[,f] + { uml::OP_MULULW, &drcbe_x86::op_mululw }, // MULULW dst,src1,src2[,f] { uml::OP_MULS, &drcbe_x86::op_muls }, // MULS dst,edst,src1,src2[,f] + { uml::OP_MULSLW, &drcbe_x86::op_mulslw }, // MULSLW dst,src1,src2[,f] { uml::OP_DIVU, &drcbe_x86::op_divu }, // DIVU dst,edst,src1,src2[,f] { uml::OP_DIVS, &drcbe_x86::op_divs }, // DIVS dst,edst,src1,src2[,f] { uml::OP_AND, &drcbe_x86::op_and }, // AND dst,src1,src2[,f] @@ -417,12 +421,30 @@ inline void drcbe_x86::emit_combine_z_flags(Assembler &a) { // this assumes that the flags from the low 32-bit op are on the stack // and the flags from the high 32-bit op are live - a.pushfd(); // pushf - a.mov(ecx, ptr(esp, 4)); // mov ecx,[esp+4] - a.or_(ecx, ~0x40); // or ecx,~0x40 - a.and_(ptr(esp, 0), ecx); // and [esp],ecx - a.popfd(); // popf - a.lea(esp, ptr(esp, 4)); // lea esp,[esp+4] + a.pushfd(); + + a.mov(ecx, dword_ptr(esp, 4)); // zero flag + a.or_(ecx, ~0x40); + a.and_(dword_ptr(esp, 0), ecx); + + a.popfd(); + a.lea(esp, ptr(esp, 4)); +} + +inline void drcbe_x86::emit_combine_zs_flags(Assembler &a) +{ + // this assumes that the flags from the low 32-bit op are on the stack + // and the flags from the high 32-bit op are live + a.pushfd(); + + a.mov(ecx, dword_ptr(esp, 4)); // zero flag + a.or_(ecx, ~(0x40 | 0x80)); + a.and_(dword_ptr(esp, 0), ecx); + a.and_(ecx, 0x80); // sign flag + a.or_(dword_ptr(esp, 0), ecx); + + a.popfd(); + a.lea(esp, ptr(esp, 4)); } @@ -435,11 +457,11 @@ inline void drcbe_x86::emit_combine_z_shl_flags(Assembler &a) { // this assumes that the flags from the high 32-bit op are on the stack // and the flags from the low 32-bit op are live - a.pushfd(); // pushf - a.pop(ecx); // pop ecx - a.or_(ecx, ~0x40); // or ecx,~0x40 - a.and_(ptr(esp, 0), ecx); // and [esp],ecx - a.popfd(); // popf + a.pushfd(); + a.pop(ecx); + a.or_(ecx, ~0x40); + a.and_(ptr(esp, 0), ecx); + a.popfd(); } @@ -1030,21 +1052,95 @@ void drcbe_x86::alu_op_param(Assembler &a, Inst::Id const opcode, Operand const a.emit(opcode, dst, Gpd(param.ireg())); // op dst,param } +void drcbe_x86::calculate_status_flags(Assembler &a, Operand const &dst, u8 flags) +{ + // calculate status flags in a way that does not modify any other status flags + uint32_t flagmask = 0; + + if (flags & FLAG_C) flagmask |= 0x001; + if (flags & FLAG_V) flagmask |= 0x800; + if (flags & FLAG_Z) flagmask |= 0x040; + if (flags & FLAG_S) flagmask |= 0x080; + if (flags & FLAG_U) flagmask |= 0x004; + + if ((flags & (FLAG_Z | FLAG_S)) == flags) + { + Gp tempreg = dst.isMem() ? eax : dst.as().id() == ebx.id() ? eax : ebx; + Gp tempreg2 = dst.isMem() ? edx : dst.as().id() == ecx.id() ? edx : ecx; + + if (dst.isMem()) + { + a.push(tempreg2); + a.mov(tempreg2, dst.as()); + } + + a.push(tempreg); + + a.pushfd(); + a.pop(tempreg); + a.and_(tempreg, ~flagmask); + + a.add(dst.isMem() ? tempreg2.as() : dst.as(), 0); + + a.pushfd(); + a.and_(dword_ptr(esp), flagmask); + a.or_(dword_ptr(esp), tempreg); + a.popfd(); + + a.pop(tempreg); + + if (dst.isMem()) + a.pop(tempreg2); + } + else + { + fatalerror("drcbe_x86::calculate_status_flags: unknown flag combination requested: %02x\n", flags); + } +} -void drcbe_x86::shift_op_param(Assembler &a, Inst::Id const opcode, Operand const &dst, be_parameter const ¶m, std::function optimize) +void drcbe_x86::shift_op_param(Assembler &a, Inst::Id const opcode, size_t opsize, Operand const &dst, be_parameter const ¶m, std::function optimize, bool update_flags) { - Operand shift = cl; if (param.is_immediate()) { - if (optimize(a, dst, param)) + uint32_t bitshift = (param.immediate() & (opsize * 8 - 1)); + + if (optimize(a, dst, param) || bitshift == 0) return; - shift = imm(param.immediate()); + a.emit(opcode, dst, imm(bitshift)); + + if (update_flags) + calculate_status_flags(a, dst, FLAG_S | FLAG_Z); // calculate status flags but preserve carry } else - emit_mov_r32_p32(a, ecx, param); + { + Label restore_flags = a.newLabel(); + Label end = a.newLabel(); + + Gp shift = dst.as().id() == ecx.id() ? ebx : ecx; + + a.pushfd(); // no status flags should change if shift is 0, so preserve flags + + emit_mov_r32_p32(a, shift, param); + + a.and_(shift, opsize * 8 - 1); + a.test(shift, shift); + a.short_().jz(restore_flags); + + a.popfd(); // restore flags to keep carry for rolc/rorc + + a.emit(opcode, dst, shift); + + if (update_flags) + calculate_status_flags(a, dst, FLAG_S | FLAG_Z); // calculate status flags but preserve carry - a.emit(opcode, dst, shift); + a.short_().jmp(end); + + a.bind(restore_flags); + a.popfd(); + + a.bind(end); + } } @@ -1428,7 +1524,7 @@ void drcbe_x86::emit_xor_m64_p64(Assembler &a, Mem const &memref_lo, Mem const & void drcbe_x86::emit_shl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const instruction &inst) { - int saveflags = (inst.flags() != 0); + int saveflags = inst.flags() != 0; if (param.is_immediate()) { int count = param.immediate() & 63; @@ -1436,6 +1532,8 @@ void drcbe_x86::emit_shl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, ;// skip else { + saveflags = saveflags && count > 0; + while (count >= 32) { if (inst.flags() != 0) @@ -1458,12 +1556,25 @@ void drcbe_x86::emit_shl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, a.shl(reglo, count); // shl reglo,count } } + + if (saveflags) + emit_combine_z_shl_flags(a); } else { + Label skipall = a.newLabel(); + Label end = a.newLabel(); Label skip1 = a.newLabel(); Label skip2 = a.newLabel(); + + a.pushfd(); + emit_mov_r32_p32(a, ecx, param); // mov ecx,param + + a.and_(ecx, 63); + a.test(ecx, ecx); + a.short_().jz(skipall); + a.test(ecx, 0x20); // test ecx,0x20 a.short_().jz(skip1); // jz skip1 if (inst.flags() != 0) @@ -1489,9 +1600,19 @@ void drcbe_x86::emit_shl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, a.shld(reghi, reglo, cl); // shld reghi,reglo,cl if (saveflags) a.pushfd(); // pushf a.shl(reglo, cl); // shl reglo,cl + + if (saveflags) + emit_combine_z_shl_flags(a); + + a.lea(esp, ptr(esp, 4)); + + a.jmp(end); + + a.bind(skipall); + a.popfd(); + + a.bind(end); } - if (saveflags) - emit_combine_z_shl_flags(a); } @@ -1502,7 +1623,7 @@ void drcbe_x86::emit_shl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, void drcbe_x86::emit_shr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const instruction &inst) { - int saveflags = ((inst.flags() & FLAG_Z) != 0); + int saveflags = inst.flags() != 0; if (param.is_immediate()) { int count = param.immediate() & 63; @@ -1510,6 +1631,8 @@ void drcbe_x86::emit_shr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, ;// skip else { + saveflags = saveflags && count > 0; + while (count >= 32) { if (inst.flags() != 0) @@ -1532,12 +1655,35 @@ void drcbe_x86::emit_shr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, a.shr(reghi, count); // shr reghi,count } } + + if (saveflags) + { + // take carry from lower register's flags + a.pushfd(); + a.mov(ecx, dword_ptr(esp, 4)); + a.and_(ecx, 0x01); // carry flag + a.and_(dword_ptr(esp, 0), ~0x01); + a.or_(dword_ptr(esp, 0), ecx); + a.popfd(); + + emit_combine_z_flags(a); + } } else { + Label skipall = a.newLabel(); + Label end = a.newLabel(); Label skip1 = a.newLabel(); Label skip2 = a.newLabel(); + + a.pushfd(); + emit_mov_r32_p32(a, ecx, param); // mov ecx,param + + a.and_(ecx, 63); + a.test(ecx, ecx); + a.short_().jz(skipall); + a.test(ecx, 0x20); // test ecx,0x20 a.short_().jz(skip1); // jz skip1 if (inst.flags() != 0) @@ -1563,9 +1709,29 @@ void drcbe_x86::emit_shr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, a.shrd(reglo, reghi, cl); // shrd reglo,reghi,cl if (saveflags) a.pushfd(); // pushf a.shr(reghi, cl); // shr reghi,cl + + if (saveflags) + { + // take carry from lower register's flags + a.pushfd(); + a.mov(ecx, dword_ptr(esp, 4)); + a.and_(ecx, 0x01); // carry flag + a.and_(dword_ptr(esp, 0), ~0x01); + a.or_(dword_ptr(esp, 0), ecx); + a.popfd(); + + emit_combine_z_flags(a); + } + + a.lea(esp, ptr(esp, 4)); + + a.jmp(end); + + a.bind(skipall); + a.popfd(); + + a.bind(end); } - if (saveflags) - emit_combine_z_flags(a); } @@ -1576,7 +1742,7 @@ void drcbe_x86::emit_shr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, void drcbe_x86::emit_sar_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const instruction &inst) { - int saveflags = ((inst.flags() & FLAG_Z) != 0); + int saveflags = inst.flags() != 0; if (param.is_immediate()) { int count = param.immediate() & 63; @@ -1584,6 +1750,8 @@ void drcbe_x86::emit_sar_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, ;// skip else { + saveflags = saveflags && count > 0; + while (count >= 32) { if (inst.flags() != 0) @@ -1606,12 +1774,35 @@ void drcbe_x86::emit_sar_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, a.sar(reghi, count); // sar reghi,count } } + + if (saveflags) + { + // take carry from lower register's flags + a.pushfd(); + a.mov(ecx, dword_ptr(esp, 4)); + a.and_(ecx, 0x01); // carry flag + a.and_(dword_ptr(esp, 0), ~0x01); + a.or_(dword_ptr(esp, 0), ecx); + a.popfd(); + + emit_combine_z_flags(a); + } } else { + Label skipall = a.newLabel(); + Label end = a.newLabel(); Label skip1 = a.newLabel(); Label skip2 = a.newLabel(); + + a.pushfd(); + emit_mov_r32_p32(a, ecx, param); // mov ecx,param + + a.and_(ecx, 63); + a.test(ecx, ecx); + a.short_().jz(skipall); + a.test(ecx, 0x20); // test ecx,0x20 a.short_().jz(skip1); // jz skip1 if (inst.flags() != 0) @@ -1637,9 +1828,29 @@ void drcbe_x86::emit_sar_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, a.shrd(reglo, reghi, cl); // shrd reglo,reghi,cl if (saveflags) a.pushfd(); // pushf a.sar(reghi, cl); // sar reghi,cl + + if (saveflags) + { + // take carry from lower register's flags + a.pushfd(); + a.mov(ecx, dword_ptr(esp, 4)); + a.and_(ecx, 0x01); // carry flag + a.and_(dword_ptr(esp, 0), ~0x01); + a.or_(dword_ptr(esp, 0), ecx); + a.popfd(); + + emit_combine_z_flags(a); + } + + a.lea(esp, ptr(esp, 4)); + + a.jmp(end); + + a.bind(skipall); + a.popfd(); + + a.bind(end); } - if (saveflags) - emit_combine_z_flags(a); } @@ -1650,7 +1861,11 @@ void drcbe_x86::emit_sar_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, void drcbe_x86::emit_rol_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const instruction &inst) { - int saveflags = ((inst.flags() & FLAG_Z) != 0); + int saveflags = inst.flags() != 0; + + Gp tempreg = esi; // TODO: try to avoid collision with reglo and reghi? + a.push(tempreg); + if (param.is_immediate()) { int count = param.immediate() & 63; @@ -1658,65 +1873,88 @@ void drcbe_x86::emit_rol_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, ;// skip else { + saveflags = saveflags && count > 0; + while (count >= 32) { if (inst.flags() != 0) { - a.mov(ecx, reglo); // mov ecx,reglo - a.shld(reglo, reghi, 31); // shld reglo,reghi,31 - a.shld(reghi, ecx, 31); // shld reghi,ecx,31 + a.mov(ecx, reglo); + a.shld(reglo, reghi, 31); + a.shld(reghi, ecx, 31); count -= 31; } else { - a.xchg(reghi, reglo); // xchg reghi,reglo + a.xchg(reghi, reglo); count -= 32; } } - if (inst.flags() != 0 || count > 0) - { - a.mov(ecx, reglo); // mov ecx,reglo - a.shld(reglo, reghi, count); // shld reglo,reghi,count - if (saveflags) a.pushfd(); // pushf - a.shld(reghi, ecx, count); // shld reghi,ecx,count - } + + a.mov(ecx, reglo); + a.shld(reglo, reghi, count); + if (saveflags) a.pushfd(); + a.shld(reghi, ecx, count); } + + if (saveflags) + emit_combine_zs_flags(a); } else { + Label end = a.newLabel(); + Label skipall = a.newLabel(); Label skip1 = a.newLabel(); - Label skip2 = a.newLabel(); - a.mov(ptr(esp, -8), ebx); // mov [esp-8],ebx - emit_mov_r32_p32(a, ecx, param); // mov ecx,param - a.test(ecx, 0x20); // test ecx,0x20 - a.short_().jz(skip1); // jz skip1 + Label shift_loop = a.newLabel(); + + emit_mov_r32_p32(a, ecx, param); + + a.pushfd(); + + a.and_(ecx, 63); + a.test(ecx, ecx); + a.short_().jz(skipall); + + a.cmp(ecx, 32); + a.short_().jl(skip1); + + a.bind(shift_loop); if (inst.flags() != 0) { - a.sub(ecx, 31); // sub ecx,31 - a.mov(ebx, reglo); // mov ebx,reglo - a.shld(reglo, reghi, 31); // shld reglo,reghi,31 - a.shld(reghi, ebx, 31); // shld reghi,ebx,31 - a.test(ecx, 0x20); // test ecx,0x20 - a.short_().jz(skip2); // jz skip2 - a.sub(ecx, 31); // sub ecx,31 - a.mov(ebx, reglo); // mov ebx,reglo - a.shld(reglo, reghi, 31); // shld reglo,reghi,31 - a.shld(reghi, ebx, 31); // shld reghi,ebx,31 - a.bind(skip2); // skip2: - reset_last_upper_lower_reg(); + a.sub(ecx, 31); + a.mov(tempreg, reglo); + a.shld(reglo, reghi, 31); + a.shld(reghi, tempreg, 31); } else - a.xchg(reghi, reglo); // xchg reghi,reglo - a.bind(skip1); // skip1: + { + a.xchg(reghi, reglo); + a.sub(ecx, 32); + } + a.cmp(ecx, 32); + a.short_().jge(shift_loop); + + a.bind(skip1); reset_last_upper_lower_reg(); - a.mov(ebx, reglo); // mov ebx,reglo - a.shld(reglo, reghi, cl); // shld reglo,reghi,cl - if (saveflags) a.pushfd(); // pushf - a.shld(reghi, ebx, cl); // shld reghi,ebx,cl - a.mov(ebx, ptr(esp, saveflags ? -4 : -8)); // mov ebx,[esp-8] + + a.mov(tempreg, reglo); + a.shld(reglo, reghi, cl); + if (saveflags) a.pushfd(); + a.shld(reghi, tempreg, cl); + + if (saveflags) + emit_combine_zs_flags(a); + + a.lea(esp, ptr(esp, 4)); + a.jmp(end); + + a.bind(skipall); + a.popfd(); + + a.bind(end); } - if (saveflags) - emit_combine_z_flags(a); + + a.pop(tempreg); } @@ -1727,7 +1965,11 @@ void drcbe_x86::emit_rol_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, void drcbe_x86::emit_ror_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const instruction &inst) { - int saveflags = ((inst.flags() & FLAG_Z) != 0); + int saveflags = inst.flags() != 0; + + Gp tempreg = esi; // TODO: try to avoid collision with reglo and reghi? + a.push(tempreg); + if (param.is_immediate()) { int count = param.immediate() & 63; @@ -1735,65 +1977,88 @@ void drcbe_x86::emit_ror_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, ;// skip else { + saveflags = saveflags && count > 0; + while (count >= 32) { if (inst.flags() != 0) { - a.mov(ecx, reglo); // mov ecx,reglo - a.shrd(reglo, reghi, 31); // shrd reglo,reghi,31 - a.shrd(reghi, ecx, 31); // shrd reghi,ecx,31 + a.mov(tempreg, reglo); + a.shrd(reglo, reghi, 31); + a.shrd(reghi, tempreg, 31); count -= 31; } else { - a.xchg(reghi, reglo); // xchg reghi,reglo + a.xchg(reghi, reglo); count -= 32; } } - if (inst.flags() != 0 || count > 0) - { - a.mov(ecx, reglo); // mov ecx,reglo - a.shrd(reglo, reghi, count); // shrd reglo,reghi,count - if (saveflags) a.pushfd(); // pushf - a.shrd(reghi, ecx, count); // shrd reghi,ecx,count - } + + a.mov(tempreg, reghi); + a.shrd(reghi, reglo, count); + if (saveflags) a.pushfd(); + a.shrd(reglo, tempreg, count); + + if (saveflags) + emit_combine_zs_flags(a); } } else { + Label end = a.newLabel(); + Label skipall = a.newLabel(); Label skip1 = a.newLabel(); - Label skip2 = a.newLabel(); - a.mov(ptr(esp, -8), ebx); // mov [esp-8],ebx - emit_mov_r32_p32(a, ecx, param); // mov ecx,param - a.test(ecx, 0x20); // test ecx,0x20 - a.short_().jz(skip1); // jz skip1 + Label shift_loop = a.newLabel(); + + emit_mov_r32_p32(a, ecx, param); + + a.pushfd(); + + a.and_(ecx, 63); + a.test(ecx, ecx); + a.short_().jz(skipall); + + a.cmp(ecx, 32); + a.short_().jl(skip1); + + a.bind(shift_loop); if (inst.flags() != 0) { - a.sub(ecx, 31); // sub ecx,31 - a.mov(ebx, reglo); // mov ebx,reglo - a.shrd(reglo, reghi, 31); // shrd reglo,reghi,31 - a.shrd(reghi, ebx, 31); // shrd reghi,ebx,31 - a.test(ecx, 0x20); // test ecx,0x20 - a.short_().jz(skip2); // jz skip2 - a.sub(ecx, 31); // sub ecx,31 - a.mov(ebx, reglo); // mov ebx,reglo - a.shrd(reglo, reghi, 31); // shrd reglo,reghi,31 - a.shrd(reghi, ebx, 31); // shrd reghi,ebx,31 - a.bind(skip2); // skip2: - reset_last_upper_lower_reg(); + a.sub(ecx, 31); + a.mov(tempreg, reglo); + a.shrd(reglo, reghi, 31); + a.shrd(reghi, tempreg, 31); } else - a.xchg(reghi, reglo); // xchg reghi,reglo - a.bind(skip1); // skip1: + { + a.xchg(reghi, reglo); + a.sub(ecx, 32); + } + a.cmp(ecx, 32); + a.short_().jge(shift_loop); + + a.bind(skip1); reset_last_upper_lower_reg(); - a.mov(ebx, reglo); // mov ebx,reglo - a.shrd(reglo, reghi, cl); // shrd reglo,reghi,cl - if (saveflags) a.pushfd(); // pushf - a.shrd(reghi, ebx, cl); // shrd reghi,ebx,cl - a.mov(ebx, ptr(esp, saveflags ? -4 : -8)); // mov ebx,[esp-8] + + a.mov(tempreg, reghi); + a.shrd(reghi, reglo, cl); + if (saveflags) a.pushfd(); + a.shrd(reglo, tempreg, cl); + + if (saveflags) + emit_combine_zs_flags(a); + + a.lea(esp, ptr(esp, 4)); + a.jmp(end); + + a.bind(skipall); + a.popfd(); + + a.bind(end); } - if (saveflags) - emit_combine_z_flags(a); + + a.pop(tempreg); } @@ -1804,42 +2069,41 @@ void drcbe_x86::emit_ror_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, void drcbe_x86::emit_rcl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const instruction &inst) { - int saveflags = ((inst.flags() & FLAG_Z) != 0); Label loop = a.newLabel(); Label skipall = a.newLabel(); Label skiploop = a.newLabel(); - emit_mov_r32_p32_keepflags(a, ecx, param); // mov ecx,param - if (!saveflags) - { - a.bind(loop); // loop: - a.jecxz(skipall); // jecxz skipall - a.lea(ecx, ptr(ecx, -1)); // lea ecx,[ecx-1] - a.rcl(reglo, 1); // rcl reglo,1 - a.rcl(reghi, 1); // rcl reghi,1 - a.jmp(loop); // jmp loop - a.bind(skipall); // skipall: - reset_last_upper_lower_reg(); - } - else + a.pushfd(); // keep carry flag after and + emit_mov_r32_p32_keepflags(a, ecx, param); + + a.and_(ecx, 63); + a.popfd(); + + a.jecxz(skipall); + a.lea(ecx, ptr(ecx, -1)); + + a.bind(loop); + a.jecxz(skiploop); + a.lea(ecx, ptr(ecx, -1)); + a.rcl(reglo, 1); + a.rcl(reghi, 1); + a.jmp(loop); + + a.bind(skiploop); + reset_last_upper_lower_reg(); + a.rcl(reglo, 1); + a.rcl(reghi, 1); + + if (inst.flags()) { - a.jecxz(skipall); // jecxz skipall - a.lea(ecx, ptr(ecx, -1)); // lea ecx,[ecx-1] - a.bind(loop); // loop: - a.jecxz(skiploop); // jecxz skiploop - a.lea(ecx, ptr(ecx, -1)); // lea ecx,[ecx-1] - a.rcl(reglo, 1); // rcl reglo,1 - a.rcl(reghi, 1); // rcl reghi,1 - a.jmp(loop); // jmp loop - a.bind(skiploop); // skiploop: - reset_last_upper_lower_reg(); - a.rcl(reglo, 1); // rcl reglo,1 - a.pushfd(); // pushf - a.rcl(reghi, 1); // rcl reghi,1 - a.bind(skipall); // skipall: - reset_last_upper_lower_reg(); + calculate_status_flags(a, reglo, FLAG_Z); + a.pushfd(); + calculate_status_flags(a, reghi, FLAG_S | FLAG_Z); emit_combine_z_flags(a); } + + a.bind(skipall); + reset_last_upper_lower_reg(); } @@ -1850,42 +2114,41 @@ void drcbe_x86::emit_rcl_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, void drcbe_x86::emit_rcr_r64_p64(Assembler &a, Gp const ®lo, Gp const ®hi, be_parameter const ¶m, const instruction &inst) { - int saveflags = (inst.flags() != 0); Label loop = a.newLabel(); Label skipall = a.newLabel(); Label skiploop = a.newLabel(); - emit_mov_r32_p32_keepflags(a, ecx, param); // mov ecx,param - if (!saveflags) - { - a.bind(loop); // loop: - a.jecxz(skipall); // jecxz skipall - a.lea(ecx, ptr(ecx, -1)); // lea ecx,[ecx-1] - a.rcr(reghi, 1); // rcr reghi,1 - a.rcr(reglo, 1); // rcr reglo,1 - a.jmp(loop); // jmp loop - a.bind(skipall); // skipall: - reset_last_upper_lower_reg(); - } - else + a.pushfd(); // keep carry flag after and + emit_mov_r32_p32_keepflags(a, ecx, param); + + a.and_(ecx, 63); + a.popfd(); + + a.jecxz(skipall); + a.lea(ecx, ptr(ecx, -1)); + + a.bind(loop); + a.jecxz(skiploop); + a.lea(ecx, ptr(ecx, -1)); + a.rcr(reghi, 1); + a.rcr(reglo, 1); + a.jmp(loop); + + a.bind(skiploop); + reset_last_upper_lower_reg(); + a.rcr(reghi, 1); + a.rcr(reglo, 1); + + if (inst.flags()) { - a.jecxz(skipall); // jecxz skipall - a.lea(ecx, ptr(ecx, -1)); // lea ecx,[ecx-1] - a.bind(loop); // loop: - a.jecxz(skiploop); // jecxz skiploop - a.lea(ecx, ptr(ecx, -1)); // lea ecx,[ecx-1] - a.rcr(reghi, 1); // rcr reghi,1 - a.rcr(reglo, 1); // rcr reglo,1 - a.jmp(loop); // jmp loop - a.bind(skiploop); // skiploop: - reset_last_upper_lower_reg(); - a.rcr(reghi, 1); // rcr reghi,1 - a.pushfd(); // pushf - a.rcr(reglo, 1); // rcr reglo,1 - a.bind(skipall); // skipall: - reset_last_upper_lower_reg(); - emit_combine_z_shl_flags(a); + calculate_status_flags(a, reglo, FLAG_Z); + a.pushfd(); + calculate_status_flags(a, reghi, FLAG_S | FLAG_Z); + emit_combine_z_flags(a); } + + a.bind(skipall); + reset_last_upper_lower_reg(); } @@ -1893,9 +2156,15 @@ void drcbe_x86::alu_op_param(Assembler &a, Inst::Id const opcode_lo, Inst::Id co { if (param.is_memory()) { - a.emit(opcode_lo, lo, MABS(param.memory(0))); // opl reglo,[param] + if (opcode_lo == Inst::kIdTest) // can't use memory on right of test + a.emit(opcode_lo, MABS(param.memory(0)), lo); // opl [param],reglo + else + a.emit(opcode_lo, lo, MABS(param.memory(0))); // opl reglo,[param] if (saveflags) a.pushfd(); // pushf - a.emit(opcode_hi, hi, MABS(param.memory(4))); // oph reghi,[param] + if (opcode_hi == Inst::kIdTest) // can't use memory on right of test + a.emit(opcode_hi, MABS(param.memory(4)), hi); // oph [param],reghi + else + a.emit(opcode_hi, hi, MABS(param.memory(4))); // oph reghi,[param] } else if (param.is_immediate()) { @@ -1907,7 +2176,11 @@ void drcbe_x86::alu_op_param(Assembler &a, Inst::Id const opcode_lo, Inst::Id co { a.emit(opcode_lo, lo, Gpd(param.ireg())); // opl reglo,param if (saveflags) a.pushfd(); // pushf - a.emit(opcode_hi, hi, MABS(m_reghi[param.ireg()])); // oph reghi,reghi[param] + + if (opcode_hi == Inst::kIdTest) // can't use memory on right of test + a.emit(opcode_hi, MABS(m_reghi[param.ireg()]), hi); // oph reghi[param],reghi + else + a.emit(opcode_hi, hi, MABS(m_reghi[param.ireg()])); // oph reghi,reghi[param] } if (saveflags) @@ -2108,6 +2381,16 @@ void drcbe_x86::op_nop(Assembler &a, const instruction &inst) // nothing } +//------------------------------------------------- +// op_break - process a BREAK opcode +//------------------------------------------------- + +void drcbe_x86::op_break(Assembler &a, const instruction &inst) +{ + static const char *const message = "break from drc"; + a.mov(dword_ptr(esp, 0), imm(message)); + a.call(imm(&osd_break_into_debugger)); +} //------------------------------------------------- // op_debug - process a DEBUG opcode @@ -2546,6 +2829,8 @@ void drcbe_x86::op_getflgs(Assembler &a, const instruction &inst) // pick a target register for the general case Gp const dstreg = dstp.select_register(eax); + a.pushfd(); + // compute mask for flags uint32_t flagmask = 0; if (maskp.immediate() & FLAG_C) flagmask |= 0x001; @@ -2661,6 +2946,32 @@ void drcbe_x86::op_getflgs(Assembler &a, const instruction &inst) else if (dstp.is_int_register()) a.mov(MABS(m_reghi[dstp.ireg()], 4), 0); // mov [reghi],0 } + + a.popfd(); +} + + +//------------------------------------------------- +// op_setflgs - process a SETFLGS opcode +//------------------------------------------------- + +void drcbe_x86::op_setflgs(Assembler &a, const instruction &inst) +{ + assert(inst.size() == 4); + assert_no_condition(inst); + assert_no_flags(inst); + + be_parameter srcp(*this, inst.param(0), PTYPE_MRI); + + a.pushfd(); + + emit_mov_r32_p32(a, eax, srcp); + + a.mov(eax, ptr(u64(flags_unmap), eax, 2)); + a.and_(dword_ptr(esp), ~0x8c5); + a.or_(dword_ptr(esp), eax); + + a.popfd(); } @@ -3217,11 +3528,13 @@ void drcbe_x86::op_carry(Assembler &a, const instruction &inst) // degenerate case: source is immediate if (srcp.is_immediate() && bitp.is_immediate()) { - if (srcp.immediate() & ((uint64_t)1 << bitp.immediate())) + if (srcp.immediate() & ((uint64_t)1 << (bitp.immediate() & (inst.size() * 8 - 1)))) a.stc(); else a.clc(); - } + + return; + } // load non-immediate bit numbers into a register if (!bitp.is_immediate()) @@ -3233,43 +3546,98 @@ void drcbe_x86::op_carry(Assembler &a, const instruction &inst) // 32-bit form if (inst.size() == 4) { + if (srcp.is_immediate()) + emit_mov_r32_p32(a, edx, srcp); + if (bitp.is_immediate()) { if (srcp.is_memory()) - a.bt(MABS(srcp.memory(), 4), bitp.immediate()); // bt [srcp],bitp + a.bt(MABS(srcp.memory(), 4), (bitp.immediate() & (inst.size() * 8 - 1))); else if (srcp.is_int_register()) - a.bt(Gpd(srcp.ireg()), bitp.immediate()); // bt srcp,bitp + a.bt(Gpd(srcp.ireg()), (bitp.immediate() & (inst.size() * 8 - 1))); + else if (srcp.is_immediate()) + a.bt(edx, (bitp.immediate() & (inst.size() * 8 - 1))); } else { if (srcp.is_memory()) - a.bt(MABS(srcp.memory()), ecx); // bt [srcp],ecx + a.bt(MABS(srcp.memory()), ecx); else if (srcp.is_int_register()) - a.bt(Gpd(srcp.ireg()), ecx); // bt [srcp],ecx + a.bt(Gpd(srcp.ireg()), ecx); + else if (srcp.is_immediate()) + a.bt(edx, ecx); } } // 64-bit form else { + if (srcp.is_immediate()) + emit_mov_r64_p64(a, ebx, edx, srcp); + if (bitp.is_immediate()) { - if (srcp.is_memory()) - a.bt(MABS(srcp.memory(), 4), bitp.immediate()); // bt [srcp],bitp - else if (srcp.is_int_register() && bitp.immediate() < 32) - a.bt(Gpd(srcp.ireg()), bitp.immediate()); // bt srcp,bitp - else if (srcp.is_int_register() && bitp.immediate() >= 32) - a.bt(MABS(m_reghi[srcp.ireg()], 4), bitp.immediate() - 32); // bt [srcp.hi],bitp + uint32_t bitshift = (bitp.immediate() & (inst.size() * 8 - 1)); + if (bitshift < 32) + { + if (srcp.is_memory()) + a.bt(MABS(srcp.memory(), 4), bitshift); + else if (srcp.is_int_register()) + a.bt(Gpd(srcp.ireg()), bitshift); + else if (srcp.is_immediate()) + a.bt(ebx, bitshift); + } + else if (bitshift >= 32) + { + if (srcp.is_memory()) + a.bt(MABS((uint8_t*)srcp.memory() + 4, 4), bitshift - 32); + else if (srcp.is_int_register()) + a.bt(MABS(m_reghi[srcp.ireg()], 4), bitshift - 32); + else if (srcp.is_immediate()) + a.bt(edx, bitshift); + } } else { + Label end = a.newLabel(); + Label higher = a.newLabel(); + + a.cmp(ecx, 32); + a.jge(higher); + + if (srcp.is_memory()) + { + a.bt(MABS(srcp.memory(), 4), ecx); + } + else if (srcp.is_int_register()) + { + a.mov(MABS(m_reglo[srcp.ireg()], 4), Gpd(srcp.ireg())); // mov [srcp.lo],srcp + a.bt(MABS(m_reglo[srcp.ireg()], 4), ecx); // bt [srcp],ecx + } + else if (srcp.is_immediate()) + { + a.bt(ebx, ecx); + } + + a.short_().jmp(end); + + a.bind(higher); + a.sub(ecx, 32); + if (srcp.is_memory()) - a.bt(MABS(srcp.memory()), ecx); // bt [srcp],ecx + { + a.bt(MABS((uint8_t*)srcp.memory() + 4, 4), ecx); + } else if (srcp.is_int_register()) { - a.mov(MABS(m_reglo[srcp.ireg()]), Gpd(srcp.ireg())); // mov [srcp.lo],srcp - a.bt(MABS(m_reglo[srcp.ireg()]), ecx); // bt [srcp],ecx + a.bt(MABS(m_reghi[srcp.ireg()], 4), ecx); } + else if (srcp.is_immediate()) + { + a.bt(edx, ecx); + } + + a.bind(end); } } } @@ -3384,7 +3752,7 @@ void drcbe_x86::op_mov(Assembler &a, const instruction &inst) // general case else { - emit_mov_r64_p64(a, dstreg, edx, srcp); // mov edx:dstreg,srcp + emit_mov_r64_p64_keepflags(a, dstreg, edx, srcp); // mov edx:dstreg,srcp emit_mov_p64_r64(a, dstp, dstreg, edx); // mov dstp,edx:dstreg } } @@ -3416,7 +3784,7 @@ void drcbe_x86::op_sext(Assembler &a, const instruction &inst) assert(sizep.is_size()); // pick a target register for the general case - Gp const dstreg = dstp.select_register(eax); + Gp const dstreg = eax; // convert 8-bit source registers to EAX if (sizep.size() == SIZE_BYTE && srcp.is_int_register() && (srcp.ireg() & 4)) @@ -3444,6 +3812,16 @@ void drcbe_x86::op_sext(Assembler &a, const instruction &inst) else if (sizep.size() == SIZE_DWORD && dstreg.id() != srcp.ireg()) a.mov(dstreg, Gpd(srcp.ireg())); // mov dstreg,srcp } + else if (srcp.is_immediate()) + { + if (sizep.size() == SIZE_BYTE) + a.mov(dstreg, (int8_t)srcp.immediate()); + else if (sizep.size() == SIZE_WORD) + a.mov(dstreg, (int16_t)srcp.immediate()); + else if (sizep.size() == SIZE_DWORD) + a.mov(dstreg, (int32_t)srcp.immediate()); + } + if (inst.flags() != 0) a.test(dstreg, dstreg); // test dstreg,dstreg @@ -3455,7 +3833,7 @@ void drcbe_x86::op_sext(Assembler &a, const instruction &inst) else if (inst.size() == 8) { a.cdq(); // cdq - emit_mov_p64_r64(a, dstp, eax, edx); // mov dstp,edx:eax + emit_mov_p64_r64(a, dstp, dstreg, edx); // mov dstp,edx:eax } } @@ -3484,12 +3862,12 @@ void drcbe_x86::op_roland(Assembler &a, const instruction &inst) if (inst.size() == 4) { emit_mov_r32_p32(a, dstreg, srcp); // mov dstreg,srcp - shift_op_param(a, Inst::kIdRol, dstreg, shiftp, // rol dstreg,shiftp + shift_op_param(a, Inst::kIdRol, inst.size(), dstreg, shiftp, // rol dstreg,shiftp [inst](Assembler &a, Operand const &dst, be_parameter const &src) { // optimize zero case return (!inst.flags() && !src.immediate()); - }); + }, false); alu_op_param(a, Inst::kIdAnd, dstreg, maskp, // and dstreg,maskp [inst](Assembler &a, Operand const &dst, be_parameter const &src) { @@ -3542,12 +3920,12 @@ void drcbe_x86::op_rolins(Assembler &a, const instruction &inst) if (inst.size() == 4) { emit_mov_r32_p32(a, eax, srcp); // mov eax,srcp - shift_op_param(a, Inst::kIdRol, eax, shiftp, // rol eax,shiftp + shift_op_param(a, Inst::kIdRol, inst.size(), eax, shiftp, // rol eax,shiftp [inst](Assembler &a, Operand const &dst, be_parameter const &src) { // optimize zero case return (!inst.flags() && !src.immediate()); - }); + }, false); emit_mov_r32_p32(a, dstreg, dstp); // mov dstreg,dstp if (maskp.is_immediate()) { @@ -3563,6 +3941,9 @@ void drcbe_x86::op_rolins(Assembler &a, const instruction &inst) } a.or_(dstreg, eax); // or dstreg,eax emit_mov_p32_r32(a, dstp, dstreg); // mov dstp,dstreg + + if (inst.flags()) + a.test(dstreg, dstreg); } // 64-bit form @@ -3611,19 +3992,26 @@ void drcbe_x86::op_rolins(Assembler &a, const instruction &inst) a.or_(MABS(dstp.memory(0)), eax); // or dstp.lo,eax a.or_(MABS(dstp.memory(4)), edx); // or dstp.hi,edx } + + if (inst.flags()) + { + if (dstp.is_int_register()) + calculate_status_flags(a, Gpd(dstp.ireg()), FLAG_Z); + else + calculate_status_flags(a, MABS(dstp.memory(0)), FLAG_Z); + + a.pushfd(); + + if (dstp.is_int_register()) + calculate_status_flags(a, MABS(m_reghi[dstp.ireg()]), FLAG_S | FLAG_Z); + else + calculate_status_flags(a, MABS(dstp.memory(4)), FLAG_S | FLAG_Z); + + emit_combine_z_flags(a); + } + a.mov(ebx, ptr(esp, -8)); // mov ebx,[esp-8] } - if (inst.flags() == FLAG_Z) - a.or_(eax, edx); // or eax,edx - else if (inst.flags() == FLAG_S) - ;// do nothing -- final OR will have the right result - else if (inst.flags() == (FLAG_Z | FLAG_S)) - { - a.movzx(ecx, ax); // movzx ecx,ax - a.shr(eax, 16); // shr eax,16 - a.or_(edx, ecx); // or edx,ecx - a.or_(edx, eax); // or edx,eax - } } } @@ -3935,9 +4323,6 @@ void drcbe_x86::op_cmp(Assembler &a, const instruction &inst) void drcbe_x86::op_mulu(Assembler &a, const instruction &inst) { - uint8_t zsflags = inst.flags() & (FLAG_Z | FLAG_S); - uint8_t vflag = inst.flags() & FLAG_V; - // validate instruction assert(inst.size() == 4 || inst.size() == 8); assert_no_condition(inst); @@ -3956,53 +4341,36 @@ void drcbe_x86::op_mulu(Assembler &a, const instruction &inst) { // general case emit_mov_r32_p32(a, eax, src1p); // mov eax,src1p - if (src2p.is_memory()) - a.mul(MABS(src2p.memory(), 4)); // mul [src2p] - else if (src2p.is_int_register()) - a.mul(Gpd(src2p.ireg())); // mul src2p - else if (src2p.is_immediate()) - { - a.mov(edx, src2p.immediate()); // mov edx,src2p - a.mul(edx); // mul edx - } + emit_mov_r32_p32(a, edx, src2p); // mov edx,src2p + a.mul(edx); // mul edx + emit_mov_p32_r32(a, dstp, eax); // mov dstp,eax if (compute_hi) emit_mov_p32_r32(a, edstp, edx); // mov edstp,edx - // compute flags - if (inst.flags() != 0) + if (inst.flags()) { - if (zsflags != 0) - { - if (vflag) - a.pushfd(); // pushf - if (compute_hi) - { - if (zsflags == FLAG_Z) - a.or_(edx, eax); // or edx,eax - else if (zsflags == FLAG_S) - a.test(edx, edx); // test edx,edx - else - { - a.movzx(ecx, ax); // movzx ecx,ax - a.shr(eax, 16); // shr eax,16 - a.or_(edx, ecx); // or edx,ecx - a.or_(edx, eax); // or edx,eax - } - } - else - a.test(eax, eax); // test eax,eax + a.pushfd(); - // we rely on the fact that OF is cleared by all logical operations above - if (vflag) - { - a.pushfd(); // pushf - a.pop(eax); // pop eax - a.and_(dword_ptr(esp, 0), ~0x84); // and [esp],~0x84 - a.or_(ptr(esp, 0), eax); // or [esp],eax - a.popfd(); // popf - } - } + a.test(edx, edx); + a.pushfd(); // will have the sign flag + upper half zero + a.pop(edx); + + a.test(eax, eax); + a.pushfd(); // lower half zero + a.pop(eax); + + a.and_(dword_ptr(esp, 0), ~(0x40 | 0x80)); + a.mov(ecx, edx); + a.and_(ecx, 0x80); // sign + + a.and_(eax, edx); + a.and_(eax, 0x40); // zero + + a.or_(eax, ecx); + a.or_(dword_ptr(esp, 0), eax); + + a.popfd(); } } @@ -4010,6 +4378,7 @@ void drcbe_x86::op_mulu(Assembler &a, const instruction &inst) else if (inst.size() == 8) { // general case + a.mov(dword_ptr(esp, 28), 0); // mov [esp+28],0 (calculate flags as 64x64=128) a.mov(dword_ptr(esp, 24), inst.flags()); // mov [esp+24],flags emit_mov_m64_p64(a, qword_ptr(esp, 16), src2p); // mov [esp+16],src2p emit_mov_m64_p64(a, qword_ptr(esp, 8), src1p); // mov [esp+8],src1p @@ -4020,7 +4389,7 @@ void drcbe_x86::op_mulu(Assembler &a, const instruction &inst) a.mov(dword_ptr(esp, 0), imm(&m_reslo)); // mov [esp],&reslo a.call(imm(dmulu)); // call dmulu if (inst.flags() != 0) - a.push(ptr(u64(flags_unmap), eax, 2)); // push flags_unmap[eax*4] + a.push(dword_ptr(u64(flags_unmap), eax, 2)); // push flags_unmap[eax*4] a.mov(eax, MABS((uint32_t *)&m_reslo + 0)); // mov eax,reslo.lo a.mov(edx, MABS((uint32_t *)&m_reslo + 1)); // mov edx,reslo.hi emit_mov_p64_r64(a, dstp, eax, edx); // mov dstp,edx:eax @@ -4036,15 +4405,80 @@ void drcbe_x86::op_mulu(Assembler &a, const instruction &inst) } +//------------------------------------------------- +// op_mululw - process a MULULW (32x32=32) opcode +//------------------------------------------------- + +void drcbe_x86::op_mululw(Assembler &a, const instruction &inst) +{ + // validate instruction + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_V | FLAG_Z | FLAG_S); + + // normalize parameters + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter src1p(*this, inst.param(1), PTYPE_MRI); + be_parameter src2p(*this, inst.param(2), PTYPE_MRI); + normalize_commutative(src1p, src2p); + + // 32-bit form + if (inst.size() == 4) + { + // general case + emit_mov_r32_p32(a, eax, src1p); // mov eax,src1p + emit_mov_r32_p32(a, edx, src2p); // mov edx,src2p + a.mul(edx); // mul edx + + emit_mov_p32_r32(a, dstp, eax); // mov dstp,eax + + if (inst.flags()) + { + a.test(eax, eax); + a.pushfd(); // sign + zero + + // if edx is not zero then it overflowed + a.test(edx, edx); + a.pushfd(); + a.pop(edx); + a.and_(edx, 0x40); // zero + a.xor_(edx, 0x40); + a.shl(edx, 5); // turn into overflow flag + a.or_(dword_ptr(esp, 0), edx); + + a.popfd(); + } + } + + // 64-bit form + else if (inst.size() == 8) + { + // general case + a.mov(dword_ptr(esp, 28), 1); // mov [esp+28],1 (calculate flags as 64x64=64) + a.mov(dword_ptr(esp, 24), inst.flags()); // mov [esp+24],flags + emit_mov_m64_p64(a, qword_ptr(esp, 16), src2p); // mov [esp+16],src2p + emit_mov_m64_p64(a, qword_ptr(esp, 8), src1p); // mov [esp+8],src1p + a.mov(dword_ptr(esp, 4), imm(&m_reslo)); // mov [esp+4],&reslo + a.mov(dword_ptr(esp, 0), imm(&m_reslo)); // mov [esp],&reslo + a.call(imm(dmulu)); // call dmulu + if (inst.flags() != 0) + a.push(dword_ptr(u64(flags_unmap), eax, 2)); // push flags_unmap[eax*4] + a.mov(eax, MABS((uint32_t *)&m_reslo + 0)); // mov eax,reslo.lo + a.mov(edx, MABS((uint32_t *)&m_reslo + 1)); // mov edx,reslo.hi + emit_mov_p64_r64(a, dstp, eax, edx); // mov dstp,edx:eax + if (inst.flags() != 0) + a.popfd(); // popf + } + +} + + //------------------------------------------------- // op_muls - process a MULS opcode //------------------------------------------------- void drcbe_x86::op_muls(Assembler &a, const instruction &inst) { - uint8_t zsflags = inst.flags() & (FLAG_Z | FLAG_S); - uint8_t vflag = inst.flags() & FLAG_V; - // validate instruction assert(inst.size() == 4 || inst.size() == 8); assert_no_condition(inst); @@ -4061,78 +4495,37 @@ void drcbe_x86::op_muls(Assembler &a, const instruction &inst) // 32-bit form if (inst.size() == 4) { - // 32-bit destination with memory/immediate or register/immediate - if (!compute_hi && !src1p.is_immediate() && src2p.is_immediate()) - { - if (src1p.is_memory()) - a.imul(eax, MABS(src1p.memory(), 4), src2p.immediate()); // imul eax,[src1p],src2p - else if (src1p.is_int_register()) - a.imul(eax, Gpd(src1p.ireg()), src2p.immediate()); // imul eax,src1p,src2p - emit_mov_p32_r32(a, dstp, eax); // mov dstp,eax - } - - // 32-bit destination, general case - else if (!compute_hi) - { - emit_mov_r32_p32(a, eax, src1p); // mov eax,src1p - if (src2p.is_memory()) - a.imul(eax, MABS(src2p.memory(), 4)); // imul eax,[src2p] - else if (src2p.is_int_register()) - a.imul(eax, Gpd(src2p.ireg())); // imul eax,src2p - emit_mov_p32_r32(a, dstp, eax); // mov dstp,eax - } + emit_mov_r32_p32(a, eax, src1p); // mov eax,src1p + emit_mov_r32_p32(a, edx, src2p); // mov edx,src2p + a.imul(edx); // imul edx - // 64-bit destination, general case - else - { - emit_mov_r32_p32(a, eax, src1p); // mov eax,src1p - if (src2p.is_memory()) - a.imul(MABS(src2p.memory(), 4)); // imul [src2p] - else if (src2p.is_int_register()) - a.imul(Gpd(src2p.ireg())); // imul src2p - else if (src2p.is_immediate()) - { - a.mov(edx, src2p.immediate()); // mov edx,src2p - a.imul(edx); // imul edx - } - emit_mov_p32_r32(a, dstp, eax); // mov dstp,eax + emit_mov_p32_r32(a, dstp, eax); // mov dstp,eax + if (compute_hi) emit_mov_p32_r32(a, edstp, edx); // mov edstp,edx - } - // compute flags - if (inst.flags() != 0) + if (inst.flags()) { - if (zsflags != 0) - { - if (vflag) - a.pushfd(); // pushf - if (compute_hi) - { - if (inst.flags() == FLAG_Z) - a.or_(edx, eax); // or edx,eax - else if (inst.flags() == FLAG_S) - a.test(edx, edx); // test edx,edx - else - { - a.movzx(ecx, ax); // movzx ecx,ax - a.shr(eax, 16); // shr eax,16 - a.or_(edx, ecx); // or edx,ecx - a.or_(edx, eax); // or edx,eax - } - } - else - a.test(eax, eax); // test eax,eax + a.pushfd(); - // we rely on the fact that OF is cleared by all logical operations above - if (vflag) - { - a.pushfd(); // pushf - a.pop(eax); // pop eax - a.and_(dword_ptr(esp, 0), ~0x84); // and [esp],~0x84 - a.or_(ptr(esp, 0), eax); // or [esp],eax - a.popfd(); // popf - } - } + a.test(edx, edx); + a.pushfd(); // will have the sign flag + upper half zero + a.pop(edx); + + a.test(eax, eax); + a.pushfd(); // lower half zero + a.pop(eax); + + a.and_(dword_ptr(esp, 0), ~(0x40 | 0x80)); + a.mov(ecx, edx); + a.and_(ecx, 0x80); // sign + + a.and_(eax, edx); + a.and_(eax, 0x40); // zero + + a.or_(eax, ecx); + a.or_(dword_ptr(esp, 0), eax); + + a.popfd(); } } @@ -4140,6 +4533,7 @@ void drcbe_x86::op_muls(Assembler &a, const instruction &inst) else if (inst.size() == 8) { // general case + a.mov(dword_ptr(esp, 28), 0); // mov [esp+28],0 (calculate flags as 64x64=128) a.mov(dword_ptr(esp, 24), inst.flags()); // mov [esp+24],flags emit_mov_m64_p64(a, qword_ptr(esp, 16), src2p); // mov [esp+16],src2p emit_mov_m64_p64(a, qword_ptr(esp, 8), src1p); // mov [esp+8],src1p @@ -4150,7 +4544,7 @@ void drcbe_x86::op_muls(Assembler &a, const instruction &inst) a.mov(dword_ptr(esp, 0), imm(&m_reslo)); // mov [esp],&reslo a.call(imm(dmuls)); // call dmuls if (inst.flags() != 0) - a.push(ptr(u64(flags_unmap), eax, 2)); // push flags_unmap[eax*4] + a.push(dword_ptr(u64(flags_unmap), eax, 2)); // push flags_unmap[eax*4] a.mov(eax, MABS((uint32_t *)&m_reslo + 0)); // mov eax,reslo.lo a.mov(edx, MABS((uint32_t *)&m_reslo + 1)); // mov edx,reslo.hi emit_mov_p64_r64(a, dstp, eax, edx); // mov dstp,edx:eax @@ -4166,6 +4560,74 @@ void drcbe_x86::op_muls(Assembler &a, const instruction &inst) } +//------------------------------------------------- +// op_mulslw - process a MULSLW (32x32=32) opcode +//------------------------------------------------- + +void drcbe_x86::op_mulslw(Assembler &a, const instruction &inst) +{ + // validate instruction + assert(inst.size() == 4 || inst.size() == 8); + assert_no_condition(inst); + assert_flags(inst, FLAG_V | FLAG_Z | FLAG_S); + + // normalize parameters + be_parameter dstp(*this, inst.param(0), PTYPE_MR); + be_parameter src1p(*this, inst.param(1), PTYPE_MRI); + be_parameter src2p(*this, inst.param(2), PTYPE_MRI); + normalize_commutative(src1p, src2p); + + // 32-bit form + if (inst.size() == 4) + { + emit_mov_r32_p32(a, eax, src1p); // mov eax,src1p + emit_mov_r32_p32(a, edx, src2p); // mov edx,src2p + a.imul(edx); // imul edx + + emit_mov_p32_r32(a, dstp, eax); // mov dstp,eax + + if (inst.flags()) + { + a.test(eax, eax); + a.pushfd(); // sign + zero + + a.mov(ecx, edx); + a.cdq(); + + a.cmp(ecx, edx); + a.pushfd(); + a.pop(edx); + a.and_(edx, 0x40); // zero + a.xor_(edx, 0x40); + a.shl(edx, 5); // turn into overflow flag + a.or_(dword_ptr(esp, 0), edx); + + a.popfd(); + } + } + + // 64-bit form + else if (inst.size() == 8) + { + // general case + a.mov(dword_ptr(esp, 28), 1); // mov [esp+28],1 (calculate flags as 64x64=64) + a.mov(dword_ptr(esp, 24), inst.flags()); // mov [esp+24],flags + emit_mov_m64_p64(a, qword_ptr(esp, 16), src2p); // mov [esp+16],src2p + emit_mov_m64_p64(a, qword_ptr(esp, 8), src1p); // mov [esp+8],src1p + a.mov(dword_ptr(esp, 4), imm(&m_reslo)); // mov [esp+4],&reslo + a.mov(dword_ptr(esp, 0), imm(&m_reslo)); // mov [esp],&reslo + a.call(imm(dmuls)); // call dmuls + if (inst.flags() != 0) + a.push(dword_ptr(u64(flags_unmap), eax, 2)); // push flags_unmap[eax*4] + a.mov(eax, MABS((uint32_t *)&m_reslo + 0)); // mov eax,reslo.lo + a.mov(edx, MABS((uint32_t *)&m_reslo + 1)); // mov edx,reslo.hi + emit_mov_p64_r64(a, dstp, eax, edx); // mov dstp,edx:eax + if (inst.flags() != 0) + a.popfd(); // popf + } +} + + //------------------------------------------------- // op_divu - process a DIVU opcode //------------------------------------------------- @@ -4221,7 +4683,7 @@ void drcbe_x86::op_divu(Assembler &a, const instruction &inst) a.mov(dword_ptr(esp, 0), imm(&m_reslo)); // mov [esp],&reslo a.call(imm(ddivu)); // call ddivu if (inst.flags() != 0) - a.push(ptr(u64(flags_unmap), eax, 2)); // push flags_unmap[eax*4] + a.push(dword_ptr(u64(flags_unmap), eax, 2)); // push flags_unmap[eax*4] a.mov(eax, MABS((uint32_t *)&m_reslo + 0)); // mov eax,reslo.lo a.mov(edx, MABS((uint32_t *)&m_reslo + 1)); // mov edx,reslo.hi emit_mov_p64_r64(a, dstp, eax, edx); // mov dstp,edx:eax @@ -4292,7 +4754,7 @@ void drcbe_x86::op_divs(Assembler &a, const instruction &inst) a.mov(dword_ptr(esp, 0), imm(&m_reslo)); // mov [esp],&reslo a.call(imm(ddivs)); // call ddivs if (inst.flags() != 0) - a.push(ptr(u64(flags_unmap), eax, 2)); // push flags_unmap[eax*4] + a.push(dword_ptr(u64(flags_unmap), eax, 2)); // push flags_unmap[eax*4] a.mov(eax, MABS((uint32_t *)&m_reslo + 0)); // mov eax,reslo.lo a.mov(edx, MABS((uint32_t *)&m_reslo + 1)); // mov edx,reslo.hi emit_mov_p64_r64(a, dstp, eax, edx); // mov dstp,edx:eax @@ -4802,6 +5264,12 @@ void drcbe_x86::op_lzcnt(Assembler &a, const instruction &inst) // pick a target register for the general case Gp const dstreg = dstp.select_register(eax); + if (inst.flags()) + { + a.xor_(eax, eax); // reset status flags + a.test(eax, eax); + } + // 32-bit form if (inst.size() == 4) { @@ -4811,24 +5279,37 @@ void drcbe_x86::op_lzcnt(Assembler &a, const instruction &inst) a.cmovz(dstreg, ecx); // cmovz dstreg,ecx a.xor_(dstreg, 31); // xor dstreg,31 emit_mov_p32_r32(a, dstp, dstreg); // mov dstp,dstreg + + a.test(dstreg, dstreg); } // 64-bit form else if (inst.size() == 8) { - emit_mov_r64_p64(a, edx, dstreg, srcp); // mov dstreg:edx,srcp - a.bsr(dstreg, dstreg); // bsr dstreg,dstreg + emit_mov_r64_p64(a, dstreg, edx, srcp); // mov dstreg:edx,srcp + Label skip = a.newLabel(); - a.jnz(skip); // jnz skip - a.mov(ecx, 32 ^ 31); // mov ecx,32 ^ 31 - a.bsr(dstreg, edx); // bsr dstreg,edx - a.cmovz(dstreg, ecx); // cmovz dstreg,ecx - a.add(ecx, 32); // add ecx,32 - a.bind(skip); // skip: - reset_last_upper_lower_reg(); - a.xor_(edx, edx); // xor edx,edx - a.xor_(dstreg, 31); // xor dstreg,31 + Label end = a.newLabel(); + + a.bsr(edx, edx); + a.jz(skip); + a.xor_(edx, 31 ^ 63); + a.mov(dstreg, edx); + a.jmp(end); + + a.bind(skip); + a.mov(edx, 64 ^ 63); + a.bsr(dstreg, dstreg); + a.cmovz(dstreg, edx); + + a.bind(end); + + a.xor_(dstreg, 63); + a.mov(edx, 0); + emit_mov_p64_r64(a, dstp, dstreg, edx); // mov dstp,edx:dstreg + + a.test(dstreg, dstreg); } } @@ -4850,6 +5331,12 @@ void drcbe_x86::op_tzcnt(Assembler &a, const instruction &inst) Gp const dstreg = dstp.select_register(eax); + if (inst.flags()) + { + a.xor_(eax, eax); // reset status flags + a.test(eax, eax); + } + // 32-bit form if (inst.size() == 4) { @@ -4858,15 +5345,18 @@ void drcbe_x86::op_tzcnt(Assembler &a, const instruction &inst) a.bsf(dstreg, dstreg); // bsf dstreg,dstreg a.cmovz(dstreg, ecx); // cmovz dstreg,ecx emit_mov_p32_r32(a, dstp, dstreg); // mov dstp,dstreg + + a.mov(ecx, dstreg); + a.xor_(ecx, 32); } // 64-bit form else if (inst.size() == 8) { Label skip = a.newLabel(); - emit_mov_r64_p64(a, edx, dstreg, srcp); // mov dstreg:edx,srcp + emit_mov_r64_p64(a, dstreg, edx, srcp); // mov dstreg:edx,srcp a.bsf(dstreg, dstreg); // bsf dstreg,dstreg - a.jz(skip); // jnz skip + a.jnz(skip); // jnz skip a.mov(ecx, 32); // mov ecx,32 a.bsf(dstreg, edx); // bsf dstreg,edx a.cmovz(dstreg, ecx); // cmovz dstreg,ecx @@ -4875,6 +5365,9 @@ void drcbe_x86::op_tzcnt(Assembler &a, const instruction &inst) reset_last_upper_lower_reg(); a.xor_(edx, edx); // xor edx,edx emit_mov_p64_r64(a, dstp, dstreg, edx); // mov dstp,edx:dstreg + + a.mov(ecx, dstreg); + a.xor_(ecx, 64); } } @@ -4954,23 +5447,23 @@ void drcbe_x86::op_shl(Assembler &a, const instruction &inst) { // dstp == src1p in memory if (dstp.is_memory() && dstp == src1p) - shift_op_param(a, Inst::kIdShl, MABS(dstp.memory(), 4), src2p, // shl [dstp],src2p + shift_op_param(a, Inst::kIdShl, inst.size(), MABS(dstp.memory(), 4), src2p, // shl [dstp],src2p [inst](Assembler &a, Operand const &dst, be_parameter const &src) { // optimize zero case return (!inst.flags() && !src.immediate()); - }); + }, true); // general case else { emit_mov_r32_p32(a, dstreg, src1p); // mov dstreg,src1p - shift_op_param(a, Inst::kIdShl, dstreg, src2p, // shl dstreg,src2p + shift_op_param(a, Inst::kIdShl, inst.size(), dstreg, src2p, // shl dstreg,src2p [inst](Assembler &a, Operand const &dst, be_parameter const &src) { // optimize zero case return (!inst.flags() && !src.immediate()); - }); + }, true); emit_mov_p32_r32(a, dstp, dstreg); // mov dstp,dstreg } } @@ -5010,23 +5503,23 @@ void drcbe_x86::op_shr(Assembler &a, const instruction &inst) { // dstp == src1p in memory if (dstp.is_memory() && dstp == src1p) - shift_op_param(a, Inst::kIdShr, MABS(dstp.memory(), 4), src2p, // shr [dstp],src2p + shift_op_param(a, Inst::kIdShr, inst.size(), MABS(dstp.memory(), 4), src2p, // shr [dstp],src2p [inst](Assembler &a, Operand const &dst, be_parameter const &src) { // optimize zero case return (!inst.flags() && !src.immediate()); - }); + }, true); // general case else { emit_mov_r32_p32(a, dstreg, src1p); // mov dstreg,src1p - shift_op_param(a, Inst::kIdShr, dstreg, src2p, // shr dstreg,src2p + shift_op_param(a, Inst::kIdShr, inst.size(), dstreg, src2p, // shr dstreg,src2p [inst](Assembler &a, Operand const &dst, be_parameter const &src) { // optimize zero case return (!inst.flags() && !src.immediate()); - }); + }, true); emit_mov_p32_r32(a, dstp, dstreg); // mov dstp,dstreg } } @@ -5066,23 +5559,23 @@ void drcbe_x86::op_sar(Assembler &a, const instruction &inst) { // dstp == src1p in memory if (dstp.is_memory() && dstp == src1p) - shift_op_param(a, Inst::kIdSar, MABS(dstp.memory(), 4), src2p, // sar [dstp],src2p + shift_op_param(a, Inst::kIdSar, inst.size(), MABS(dstp.memory(), 4), src2p, // sar [dstp],src2p [inst](Assembler &a, Operand const &dst, be_parameter const &src) { // optimize zero case return (!inst.flags() && !src.immediate()); - }); + }, true); // general case else { emit_mov_r32_p32(a, dstreg, src1p); // mov dstreg,src1p - shift_op_param(a, Inst::kIdSar, dstreg, src2p, // sar dstreg,src2p + shift_op_param(a, Inst::kIdSar, inst.size(), dstreg, src2p, // sar dstreg,src2p [inst](Assembler &a, Operand const &dst, be_parameter const &src) { // optimize zero case return (!inst.flags() && !src.immediate()); - }); + }, true); emit_mov_p32_r32(a, dstp, dstreg); // mov dstp,dstreg } } @@ -5122,23 +5615,23 @@ void drcbe_x86::op_rol(Assembler &a, const instruction &inst) { // dstp == src1p in memory if (dstp.is_memory() && dstp == src1p) - shift_op_param(a, Inst::kIdRol, MABS(dstp.memory(), 4), src2p, // rol [dstp],src2p + shift_op_param(a, Inst::kIdRol, inst.size(), MABS(dstp.memory(), 4), src2p, // rol [dstp],src2p [inst](Assembler &a, Operand const &dst, be_parameter const &src) { // optimize zero case return (!inst.flags() && !src.immediate()); - }); + }, true); // general case else { emit_mov_r32_p32(a, dstreg, src1p); // mov dstreg,src1p - shift_op_param(a, Inst::kIdRol, dstreg, src2p, // rol dstreg,src2p + shift_op_param(a, Inst::kIdRol, inst.size(), dstreg, src2p, // rol dstreg,src2p [inst](Assembler &a, Operand const &dst, be_parameter const &src) { // optimize zero case return (!inst.flags() && !src.immediate()); - }); + }, true); emit_mov_p32_r32(a, dstp, dstreg); // mov dstp,dstreg } } @@ -5178,23 +5671,23 @@ void drcbe_x86::op_ror(Assembler &a, const instruction &inst) { // dstp == src1p in memory if (dstp.is_memory() && dstp == src1p) - shift_op_param(a, Inst::kIdRor, MABS(dstp.memory(), 4), src2p, // ror [dstp],src2p + shift_op_param(a, Inst::kIdRor, inst.size(), MABS(dstp.memory(), 4), src2p, // ror [dstp],src2p [inst](Assembler &a, Operand const &dst, be_parameter const &src) { // optimize zero case return (!inst.flags() && !src.immediate()); - }); + }, true); // general case else { emit_mov_r32_p32(a, dstreg, src1p); // mov dstreg,src1p - shift_op_param(a, Inst::kIdRor, dstreg, src2p, // rol dstreg,src2p + shift_op_param(a, Inst::kIdRor, inst.size(), dstreg, src2p, // rol dstreg,src2p [inst](Assembler &a, Operand const &dst, be_parameter const &src) { // optimize zero case return (!inst.flags() && !src.immediate()); - }); + }, true); emit_mov_p32_r32(a, dstp, dstreg); // mov dstp,dstreg } } @@ -5234,23 +5727,23 @@ void drcbe_x86::op_rolc(Assembler &a, const instruction &inst) { // dstp == src1p in memory if (dstp.is_memory() && dstp == src1p) - shift_op_param(a, Inst::kIdRcl, MABS(dstp.memory(), 4), src2p, // rcl [dstp],src2p + shift_op_param(a, Inst::kIdRcl, inst.size(), MABS(dstp.memory(), 4), src2p, // rcl [dstp],src2p [inst](Assembler &a, Operand const &dst, be_parameter const &src) { // optimize zero case return (!inst.flags() && !src.immediate()); - }); + }, true); // general case else { emit_mov_r32_p32_keepflags(a, dstreg, src1p); // mov dstreg,src1p - shift_op_param(a, Inst::kIdRcl, dstreg, src2p, // rcl dstreg,src2p + shift_op_param(a, Inst::kIdRcl, inst.size(), dstreg, src2p, // rcl dstreg,src2p [inst](Assembler &a, Operand const &dst, be_parameter const &src) { // optimize zero case return (!inst.flags() && !src.immediate()); - }); + }, true); emit_mov_p32_r32(a, dstp, dstreg); // mov dstp,dstreg } } @@ -5290,23 +5783,23 @@ void drcbe_x86::op_rorc(Assembler &a, const instruction &inst) { // dstp == src1p in memory if (dstp.is_memory() && dstp == src1p) - shift_op_param(a, Inst::kIdRcr, MABS(dstp.memory(), 4), src2p, // rcr [dstp],src2p + shift_op_param(a, Inst::kIdRcr, inst.size(), MABS(dstp.memory(), 4), src2p, // rcr [dstp],src2p [inst](Assembler &a, Operand const &dst, be_parameter const &src) { // optimize zero case return (!inst.flags() && !src.immediate()); - }); + }, true); // general case else { emit_mov_r32_p32_keepflags(a, dstreg, src1p); // mov dstreg,src1p - shift_op_param(a, Inst::kIdRcr, dstreg, src2p, // rcr dstreg,src2p + shift_op_param(a, Inst::kIdRcr, inst.size(), dstreg, src2p, // rcr dstreg,src2p [inst](Assembler &a, Operand const &dst, be_parameter const &src) { // optimize zero case return (!inst.flags() && !src.immediate()); - }); + }, true); emit_mov_p32_r32(a, dstp, dstreg); // mov dstp,dstreg } } @@ -6036,7 +6529,7 @@ void drcbe_x86::op_icopyf(Assembler &a, const instruction &inst) // dmulu - perform a double-wide unsigned multiply //------------------------------------------------- -int drcbe_x86::dmulu(uint64_t &dstlo, uint64_t &dsthi, uint64_t src1, uint64_t src2, bool flags) +int drcbe_x86::dmulu(uint64_t &dstlo, uint64_t &dsthi, uint64_t src1, uint64_t src2, bool flags, bool halfmul_flags) { // shortcut if we don't care about the high bits or the flags if (&dstlo == &dsthi && flags == false) @@ -6072,7 +6565,11 @@ int drcbe_x86::dmulu(uint64_t &dstlo, uint64_t &dsthi, uint64_t src1, uint64_t s // store the results dsthi = hi; dstlo = lo; - return ((hi >> 60) & FLAG_S) | ((dsthi != 0) << 1); + + if (halfmul_flags) + return ((lo >> 60) & FLAG_S) | ((hi != 0) << 1); + + return ((hi >> 60) & FLAG_S) | ((hi != 0) << 1); } @@ -6080,7 +6577,7 @@ int drcbe_x86::dmulu(uint64_t &dstlo, uint64_t &dsthi, uint64_t src1, uint64_t s // dmuls - perform a double-wide signed multiply //------------------------------------------------- -int drcbe_x86::dmuls(uint64_t &dstlo, uint64_t &dsthi, int64_t src1, int64_t src2, bool flags) +int drcbe_x86::dmuls(uint64_t &dstlo, uint64_t &dsthi, int64_t src1, int64_t src2, bool flags, bool halfmul_flags) { uint64_t lo, hi, prevlo; uint64_t a, b, temp; @@ -6126,7 +6623,11 @@ int drcbe_x86::dmuls(uint64_t &dstlo, uint64_t &dsthi, int64_t src1, int64_t src // store the results dsthi = hi; dstlo = lo; - return ((hi >> 60) & FLAG_S) | ((dsthi != ((int64_t)lo >> 63)) << 1); + + if (halfmul_flags) + return ((lo >> 60) & FLAG_S) | ((hi != ((int64_t)lo >> 63)) << 1); + + return ((hi >> 60) & FLAG_S) | ((hi != ((int64_t)lo >> 63)) << 1); } diff --git a/src/devices/cpu/drcbex86.h b/src/devices/cpu/drcbex86.h index 9feb21f379bb7..b39093ddb827f 100644 --- a/src/devices/cpu/drcbex86.h +++ b/src/devices/cpu/drcbex86.h @@ -117,6 +117,7 @@ class drcbe_x86 : public drcbe_interface asmjit::x86::Mem MABS(void const *base, u32 const size = 0) const { return asmjit::x86::Mem(u64(base), size); } void normalize_commutative(be_parameter &inner, be_parameter &outer); void emit_combine_z_flags(asmjit::x86::Assembler &a); + void emit_combine_zs_flags(asmjit::x86::Assembler &a); void emit_combine_z_shl_flags(asmjit::x86::Assembler &a); void reset_last_upper_lower_reg(); void set_last_lower_reg(asmjit::x86::Assembler &a, be_parameter const ¶m, asmjit::x86::Gp const ®lo); @@ -134,6 +135,7 @@ class drcbe_x86 : public drcbe_interface void op_mapvar(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_nop(asmjit::x86::Assembler &a, const uml::instruction &inst); + void op_break(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_debug(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_exit(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_hashjmp(asmjit::x86::Assembler &a, const uml::instruction &inst); @@ -148,6 +150,7 @@ class drcbe_x86 : public drcbe_interface void op_getfmod(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_getexp(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_getflgs(asmjit::x86::Assembler &a, const uml::instruction &inst); + void op_setflgs(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_save(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_restore(asmjit::x86::Assembler &a, const uml::instruction &inst); @@ -170,7 +173,9 @@ class drcbe_x86 : public drcbe_interface void op_subc(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_cmp(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_mulu(asmjit::x86::Assembler &a, const uml::instruction &inst); + void op_mululw(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_muls(asmjit::x86::Assembler &a, const uml::instruction &inst); + void op_mulslw(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_divu(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_divs(asmjit::x86::Assembler &a, const uml::instruction &inst); void op_and(asmjit::x86::Assembler &a, const uml::instruction &inst); @@ -217,7 +222,7 @@ class drcbe_x86 : public drcbe_interface void emit_mov_p32_r32(asmjit::x86::Assembler &a, be_parameter const ¶m, asmjit::x86::Gp const ®); void alu_op_param(asmjit::x86::Assembler &a, asmjit::x86::Inst::Id const opcode, asmjit::Operand const &dst, be_parameter const ¶m, std::function optimize = [](asmjit::x86::Assembler &a, asmjit::Operand dst, be_parameter const &src) { return false; }); - void shift_op_param(asmjit::x86::Assembler &a, asmjit::x86::Inst::Id const opcode, asmjit::Operand const &dst, be_parameter const ¶m, std::function optimize); + void shift_op_param(asmjit::x86::Assembler &a, asmjit::x86::Inst::Id const opcode, size_t opsize, asmjit::Operand const &dst, be_parameter const ¶m, std::function optimize, bool update_flags); // 64-bit code emission helpers void emit_mov_r64_p64(asmjit::x86::Assembler &a, asmjit::x86::Gp const ®lo, asmjit::x86::Gp const ®hi, be_parameter const ¶m); @@ -246,11 +251,13 @@ class drcbe_x86 : public drcbe_interface void emit_fstp_p(asmjit::x86::Assembler &a, int size, be_parameter const ¶m); // callback helpers - static int dmulu(uint64_t &dstlo, uint64_t &dsthi, uint64_t src1, uint64_t src2, bool flags); - static int dmuls(uint64_t &dstlo, uint64_t &dsthi, int64_t src1, int64_t src2, bool flags); + static int dmulu(uint64_t &dstlo, uint64_t &dsthi, uint64_t src1, uint64_t src2, bool flags, bool halfmul_flags); + static int dmuls(uint64_t &dstlo, uint64_t &dsthi, int64_t src1, int64_t src2, bool flags, bool halfmul_flags); static int ddivu(uint64_t &dstlo, uint64_t &dsthi, uint64_t src1, uint64_t src2); static int ddivs(uint64_t &dstlo, uint64_t &dsthi, int64_t src1, int64_t src2); + void calculate_status_flags(asmjit::x86::Assembler &a, asmjit::Operand const &dst, u8 flags); + size_t emit(asmjit::CodeHolder &ch); // internal state diff --git a/src/devices/cpu/drcumlsh.h b/src/devices/cpu/drcumlsh.h index 5dec2d02dba65..666b01c8bdf74 100644 --- a/src/devices/cpu/drcumlsh.h +++ b/src/devices/cpu/drcumlsh.h @@ -29,6 +29,7 @@ /* ----- Control Flow Operations ----- */ #define UML_NOP(block) do { using namespace uml; block.append().nop(); } while (0) +#define UML_BREAK(block) do { using namespace uml; block.append().break_(); } while (0) #define UML_DEBUG(block, pc) do { using namespace uml; block.append().debug(pc); } while (0) #define UML_EXIT(block, param) do { using namespace uml; block.append().exit(param); } while (0) #define UML_EXITc(block, cond, param) do { using namespace uml; block.append().exit(param, cond); } while (0) @@ -53,6 +54,7 @@ #define UML_GETFMOD(block, dst) do { using namespace uml; block.append().getfmod(dst); } while (0) #define UML_GETEXP(block, dst) do { using namespace uml; block.append().getexp(dst); } while (0) #define UML_GETFLGS(block, dst, flags) do { using namespace uml; block.append().getflgs(dst, flags); } while (0) +#define UML_SETFLGS(block, flags) do { using namespace uml; block.append().setflgs(flags); } while (0) #define UML_SAVE(block, dst) do { using namespace uml; block.append().save(dst); } while (0) #define UML_RESTORE(block, src) do { using namespace uml; block.append().restore(src); } while (0) @@ -78,7 +80,9 @@ #define UML_SUBB(block, dst, src1, src2) do { using namespace uml; block.append().subb(dst, src1, src2); } while (0) #define UML_CMP(block, src1, src2) do { using namespace uml; block.append().cmp(src1, src2); } while (0) #define UML_MULU(block, dst, edst, src1, src2) do { using namespace uml; block.append().mulu(dst, edst, src1, src2); } while (0) +#define UML_MULULW(block, dst, src1, src2) do { using namespace uml; block.append().mululw(dst, src1, src2); } while (0) #define UML_MULS(block, dst, edst, src1, src2) do { using namespace uml; block.append().muls(dst, edst, src1, src2); } while (0) +#define UML_MULSLW(block, dst, src1, src2) do { using namespace uml; block.append().mulslw(dst, src1, src2); } while (0) #define UML_DIVU(block, dst, edst, src1, src2) do { using namespace uml; block.append().divu(dst, edst, src1, src2); } while (0) #define UML_DIVS(block, dst, edst, src1, src2) do { using namespace uml; block.append().divs(dst, edst, src1, src2); } while (0) #define UML_AND(block, dst, src1, src2) do { using namespace uml; block.append()._and(dst, src1, src2); } while (0) @@ -118,7 +122,9 @@ #define UML_DSUBB(block, dst, src1, src2) do { using namespace uml; block.append().dsubb(dst, src1, src2); } while (0) #define UML_DCMP(block, src1, src2) do { using namespace uml; block.append().dcmp(src1, src2); } while (0) #define UML_DMULU(block, dst, edst, src1, src2) do { using namespace uml; block.append().dmulu(dst, edst, src1, src2); } while (0) +#define UML_DMULULW(block, dst, src1, src2) do { using namespace uml; block.append().dmululw(dst, src1, src2); } while (0) #define UML_DMULS(block, dst, edst, src1, src2) do { using namespace uml; block.append().dmuls(dst, edst, src1, src2); } while (0) +#define UML_DMULSLW(block, dst, src1, src2) do { using namespace uml; block.append().dmulslw(dst, src1, src2); } while (0) #define UML_DDIVU(block, dst, edst, src1, src2) do { using namespace uml; block.append().ddivu(dst, edst, src1, src2); } while (0) #define UML_DDIVS(block, dst, edst, src1, src2) do { using namespace uml; block.append().ddivs(dst, edst, src1, src2); } while (0) #define UML_DAND(block, dst, src1, src2) do { using namespace uml; block.append().dand(dst, src1, src2); } while (0) diff --git a/src/devices/cpu/powerpc/ppcdrc.cpp b/src/devices/cpu/powerpc/ppcdrc.cpp index 0c62dec346a8c..64d19498d12c5 100644 --- a/src/devices/cpu/powerpc/ppcdrc.cpp +++ b/src/devices/cpu/powerpc/ppcdrc.cpp @@ -2681,7 +2681,9 @@ bool ppc_device::generate_instruction_1f(drcuml_block &block, compiler_state *co case 0x0eb: /* MULLWx */ case 0x2eb: /* MULLWOx */ - UML_MULS(block, R32(G_RD(op)), R32(G_RD(op)), R32(G_RA(op)), R32(G_RB(op))); // muls rd,rd,ra,rb + // The flags are calculated based on the resulting 32-bit value from the 32x32=32 multiplication + // reference: example 4 https://www.ibm.com/docs/en/aix/7.2?topic=set-mullw-muls-multiply-low-word-instruction + UML_MULSLW(block, R32(G_RD(op)), R32(G_RA(op)), R32(G_RB(op))); // mulslw rd,ra,rb generate_compute_flags(block, desc, op & M_RC, ((op & M_OE) ? XER_OV : 0), false);// return true; diff --git a/src/devices/cpu/uml.cpp b/src/devices/cpu/uml.cpp index 30dea00f6b209..53b2174859ec6 100644 --- a/src/devices/cpu/uml.cpp +++ b/src/devices/cpu/uml.cpp @@ -137,45 +137,49 @@ opcode_info const instruction::s_opcode_info_table[OP_MAX] = // Control Flow Operations OPINFO0(NOP, "nop", 4, false, NONE, NONE, NONE) - OPINFO1(DEBUG, "debug", 4, false, NONE, NONE, ALL, PINFO(IN, OP, IANY)) + OPINFO1(DEBUG, "debug", 4, false, NONE, NONE, ALL, PINFO(IN, OP, IANY)) // MAME debugger breakpoint + OPINFO0(BREAK, "break", 4, false, NONE, NONE, NONE) // (for debugging) Issues a breakpoint exception to allow for debugging the generated assembly OPINFO1(EXIT, "exit", 4, true, NONE, NONE, ALL, PINFO(IN, OP, IANY)) OPINFO3(HASHJMP, "hashjmp", 4, false, NONE, NONE, ALL, PINFO(IN, OP, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, HANDLE)) OPINFO1(JMP, "jmp", 4, true, NONE, NONE, NONE, PINFO(IN, OP, LABEL)) - OPINFO2(EXH, "exh", 4, true, NONE, NONE, ALL, PINFO(IN, OP, HANDLE), PINFO(IN, OP, IANY)) - OPINFO1(CALLH, "callh", 4, true, NONE, NONE, ALL, PINFO(IN, OP, HANDLE)) + OPINFO2(EXH, "exh", 4, true, NONE, NONE, ALL, PINFO(IN, OP, HANDLE), PINFO(IN, OP, IANY)) // Call exception handler + OPINFO1(CALLH, "callh", 4, true, NONE, NONE, ALL, PINFO(IN, OP, HANDLE)) // Call handle OPINFO0(RET, "ret", 4, true, NONE, NONE, ALL) - OPINFO2(CALLC, "callc", 4, true, NONE, NONE, ALL, PINFO(IN, OP, CFUNC), PINFO(IN, OP, PTR)) - OPINFO2(RECOVER, "recover", 4, false, NONE, NONE, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, MVAR)) + OPINFO2(CALLC, "callc", 4, true, NONE, NONE, ALL, PINFO(IN, OP, CFUNC), PINFO(IN, OP, PTR)) // Call C function + OPINFO2(RECOVER, "recover", 4, false, NONE, NONE, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, MVAR)) // Get value from mapvar // Internal Register Operations - OPINFO1(SETFMOD, "setfmod", 4, false, NONE, NONE, ALL, PINFO(IN, OP, IANY)) - OPINFO1(GETFMOD, "getfmod", 4, false, NONE, NONE, ALL, PINFO(OUT, OP, IRM)) - OPINFO1(GETEXP, "getexp", 4, false, NONE, NONE, ALL, PINFO(OUT, OP, IRM)) - OPINFO2(GETFLGS, "getflgs", 4, false, P2, NONE, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IMV)) - OPINFO1(SAVE, "save", 4, false, ALL, NONE, ALL, PINFO(OUT, OP, STATE)) - OPINFO1(RESTORE, "restore", 4, false, NONE, ALL, ALL, PINFO(IN, OP, STATE)) + OPINFO1(SETFMOD, "setfmod", 4, false, NONE, NONE, ALL, PINFO(IN, OP, IANY)) // Set floating point control mode + OPINFO1(GETFMOD, "getfmod", 4, false, NONE, NONE, ALL, PINFO(OUT, OP, IRM)) // Get floating point control mode + OPINFO1(GETEXP, "getexp", 4, false, NONE, NONE, ALL, PINFO(OUT, OP, IRM)) // Get exception parameter value + OPINFO2(GETFLGS, "getflgs", 4, false, P2, NONE, NONE, PINFO(OUT, OP, IRM), PINFO(IN, OP, IMV)) // Get status register flags + OPINFO1(SETFLGS, "setflgs", 4, false, NONE, ALL, ALL, PINFO(IN, OP, IANY)) // (for debugging) Set status register flags + OPINFO1(SAVE, "save", 4, false, ALL, NONE, ALL, PINFO(OUT, OP, STATE)) // Save current state to drcuml_machine_state + OPINFO1(RESTORE, "restore", 4, false, NONE, ALL, ALL, PINFO(IN, OP, STATE)) // Load saved state from drcuml_machine_state // Integer Operations - OPINFO4(LOAD, "!load", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, PTR), PINFO(IN, 4, IANY), PINFO(IN, OP, SCSIZE)) - OPINFO4(LOADS, "!loads", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, PTR), PINFO(IN, 4, IANY), PINFO(IN, OP, SCSIZE)) - OPINFO4(STORE, "!store", 4|8, false, NONE, NONE, ALL, PINFO(IN, OP, PTR), PINFO(IN, 4, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, SCSIZE)) - OPINFO3(READ, "!read", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, IRM), PINFO(IN, 4, IANY), PINFO(IN, OP, SPSIZE)) - OPINFO4(READM, "!readm", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, IRM), PINFO(IN, 4, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, SPSIZE)) - OPINFO3(WRITE, "!write", 4|8, false, NONE, NONE, ALL, PINFO(IN, 4, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, SPSIZE)) - OPINFO4(WRITEM, "!writem", 4|8, false, NONE, NONE, ALL, PINFO(IN, 4, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, SPSIZE)) - OPINFO2(CARRY, "!carry", 4|8, false, NONE, C, ALL, PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) - OPINFO1(SET, "!set", 4|8, true, NONE, NONE, ALL, PINFO(OUT, OP, IRM)) + OPINFO4(LOAD, "!load", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, PTR), PINFO(IN, 4, IANY), PINFO(IN, OP, SCSIZE)) // Load unsigned value from specified memory location + OPINFO4(LOADS, "!loads", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, PTR), PINFO(IN, 4, IANY), PINFO(IN, OP, SCSIZE)) // Load signed value from specified memory location + OPINFO4(STORE, "!store", 4|8, false, NONE, NONE, ALL, PINFO(IN, OP, PTR), PINFO(IN, 4, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, SCSIZE)) // Store value to specified memory location + OPINFO3(READ, "!read", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, IRM), PINFO(IN, 4, IANY), PINFO(IN, OP, SPSIZE)) // Read memory from emulated machine using memory space reader + OPINFO4(READM, "!readm", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, IRM), PINFO(IN, 4, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, SPSIZE)) // Read memory from emulated machine using memory space reader (masked) + OPINFO3(WRITE, "!write", 4|8, false, NONE, NONE, ALL, PINFO(IN, 4, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, SPSIZE)) // Write to emulated machine's memory using memory space writer + OPINFO4(WRITEM, "!writem", 4|8, false, NONE, NONE, ALL, PINFO(IN, 4, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, SPSIZE)) // Write to emulated machine's memory using memory space writer (masked) + OPINFO2(CARRY, "!carry", 4|8, false, NONE, C, ALL, PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) // Set carry status flag on CPU + OPINFO1(SET, "!set", 4|8, true, NONE, NONE, ALL, PINFO(OUT, OP, IRM)) // Get the state of the specified condition (e.g. calling UML_SET with COND_NZ will return 0 if the condition is not met and 1 if the condition is met) OPINFO2(MOV, "!mov", 4|8, true, NONE, NONE, NONE, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY)) OPINFO3(SEXT, "!sext", 4|8, false, NONE, SZ, ALL, PINFO(OUT, OP, IRM), PINFO(IN, P3, IANY), PINFO(IN, OP, SIZE)) - OPINFO4(ROLAND, "!roland", 4|8, false, NONE, SZ, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) - OPINFO4(ROLINS, "!rolins", 4|8, false, NONE, SZ, ALL, PINFO(INOUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) + OPINFO4(ROLAND, "!roland", 4|8, false, NONE, SZ, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) // Rotate left + AND (see drcbec.cpp for implementation) + OPINFO4(ROLINS, "!rolins", 4|8, false, NONE, SZ, ALL, PINFO(INOUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) // Rotate left + OR (see drcbec.cpp for implementation) OPINFO3(ADD, "!add", 4|8, false, NONE, SZVC, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) OPINFO3(ADDC, "!addc", 4|8, false, C, SZVC, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) OPINFO3(SUB, "!sub", 4|8, false, NONE, SZVC, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) OPINFO3(SUBB, "!subb", 4|8, false, C, SZVC, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) OPINFO2(CMP, "!cmp", 4|8, false, NONE, SZVC, ALL, PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) - OPINFO4(MULU, "!mulu", 4|8, false, NONE, SZV, ALL, PINFO(OUT, OP, IRM), PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) - OPINFO4(MULS, "!muls", 4|8, false, NONE, SZV, ALL, PINFO(OUT, OP, IRM), PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) + OPINFO4(MULU, "!mulu", 4|8, false, NONE, SZV, ALL, PINFO(OUT, OP, IRM), PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) // Unsigned 32x32=64 and 64x64=128 multiplication + OPINFO3(MULULW, "!mululw", 4|8, false, NONE, SZV, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) // Unsigned 32x32=32 and 64x64=64 multiplication (overflow set based on 32x32=64 calculation but zero and sign based on 32-bit result) + OPINFO4(MULS, "!muls", 4|8, false, NONE, SZV, ALL, PINFO(OUT, OP, IRM), PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) // Signed 32x32=64 and 64x64=128 multiplication + OPINFO3(MULSLW, "!mulslw", 4|8, false, NONE, SZV, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) // Signed 32x32=32 and 64x64=64 multiplication (overflow set based on 32x32=64 calculation but zero and sign based on 32-bit result) OPINFO4(DIVU, "!divu", 4|8, false, NONE, SZV, ALL, PINFO(OUT, OP, IRM), PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) OPINFO4(DIVS, "!divs", 4|8, false, NONE, SZV, ALL, PINFO(OUT, OP, IRM), PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) OPINFO3(AND, "!and", 4|8, false, NONE, SZ, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) @@ -194,18 +198,18 @@ opcode_info const instruction::s_opcode_info_table[OP_MAX] = OPINFO3(RORC, "!rorc", 4|8, false, C, SZC, ALL, PINFO(OUT, OP, IRM), PINFO(IN, OP, IANY), PINFO(IN, OP, IANY)) // Floating Point Operations - OPINFO3(FLOAD, "f#load", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, OP, PTR), PINFO(IN, 4, IANY)) - OPINFO3(FSTORE, "f#store", 4|8, false, NONE, NONE, ALL, PINFO(IN, OP, PTR), PINFO(IN, 4, IANY), PINFO(IN, OP, FRM)) - OPINFO3(FREAD, "f#read", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, 4, IANY), PINFO(IN, OP, SPSIZE)) - OPINFO3(FWRITE, "f#write", 4|8, false, NONE, NONE, ALL, PINFO(IN, 4, IANY), PINFO(IN, OP, FANY), PINFO(IN, OP, SPSIZE)) + OPINFO3(FLOAD, "f#load", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, OP, PTR), PINFO(IN, 4, IANY)) // Load float/double value from specified memory location + OPINFO3(FSTORE, "f#store", 4|8, false, NONE, NONE, ALL, PINFO(IN, OP, PTR), PINFO(IN, 4, IANY), PINFO(IN, OP, FRM)) // Save float/double value to specified memory location + OPINFO3(FREAD, "f#read", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, 4, IANY), PINFO(IN, OP, SPSIZE)) // Read float/double value from emulated machine using memory space reader + OPINFO3(FWRITE, "f#write", 4|8, false, NONE, NONE, ALL, PINFO(IN, 4, IANY), PINFO(IN, OP, FANY), PINFO(IN, OP, SPSIZE)) // Write float/double value to emulated machine using memory space writer OPINFO2(FMOV, "f#mov", 4|8, true, NONE, NONE, NONE, PINFO(OUT, OP, FRM), PINFO(IN, OP, FANY)) - OPINFO4(FTOINT, "f#toint", 4|8, false, NONE, NONE, ALL, PINFO(OUT, P3, IRM), PINFO(IN, OP, FANY), PINFO(IN, OP, SIZE), PINFO(IN, OP, ROUND)) - OPINFO3(FFRINT, "f#frint", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, P3, IANY), PINFO(IN, OP, SIZE)) - OPINFO3(FFRFLT, "f#frflt", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, P3, FANY), PINFO(IN, OP, SIZE)) - OPINFO2(FRNDS, "f#rnds", 8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, P3, FANY)) + OPINFO4(FTOINT, "f#toint", 4|8, false, NONE, NONE, ALL, PINFO(OUT, P3, IRM), PINFO(IN, OP, FANY), PINFO(IN, OP, SIZE), PINFO(IN, OP, ROUND)) // Float/double to integer + OPINFO3(FFRINT, "f#frint", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, P3, IANY), PINFO(IN, OP, SIZE)) // Float/double from integer + OPINFO3(FFRFLT, "f#frflt", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, P3, FANY), PINFO(IN, OP, SIZE)) // Convert float to double or double to float + OPINFO2(FRNDS, "f#rnds", 8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, P3, FANY)) // Convert double to float and then back to double, or float to double and back to float OPINFO3(FADD, "f#add", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, OP, FANY), PINFO(IN, OP, FANY)) OPINFO3(FSUB, "f#sub", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, OP, FANY), PINFO(IN, OP, FANY)) - OPINFO2(FCMP, "f#cmp", 4|8, false, NONE, UZC, ALL, PINFO(IN, OP, FANY), PINFO(IN, OP, FANY)) + OPINFO2(FCMP, "f#cmp", 4|8, false, NONE, UZC, ALL, PINFO(IN, OP, FANY), PINFO(IN, OP, FANY)) // Note: status flags except FLAG_U are undefined when comparing with NaN OPINFO3(FMUL, "f#mul", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, OP, FANY), PINFO(IN, OP, FANY)) OPINFO3(FDIV, "f#div", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, OP, FANY), PINFO(IN, OP, FANY)) OPINFO2(FNEG, "f#neg", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, OP, FANY)) @@ -213,8 +217,8 @@ opcode_info const instruction::s_opcode_info_table[OP_MAX] = OPINFO2(FSQRT, "f#sqrt", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, OP, FANY)) OPINFO2(FRECIP, "f#recip", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, OP, FANY)) OPINFO2(FRSQRT, "f#rsqrt", 4|8, false, NONE, NONE, ALL, PINFO(OUT, OP, FRM), PINFO(IN, OP, FANY)) - OPINFO2(FCOPYI, "f#copyi", 4|8, false, NONE, NONE, NONE, PINFO(OUT, OP, FRM), PINFO(IN, OP, IRM)) - OPINFO2(ICOPYF, "icopyf#", 4|8, false, NONE, NONE, NONE, PINFO(OUT, OP, IRM), PINFO(IN, OP, FRM)) + OPINFO2(FCOPYI, "f#copyi", 4|8, false, NONE, NONE, NONE, PINFO(OUT, OP, FRM), PINFO(IN, OP, IRM)) // Load float/double value from integer representation (e.g. 0x3f800000 -> 1.0f) + OPINFO2(ICOPYF, "icopyf#", 4|8, false, NONE, NONE, NONE, PINFO(OUT, OP, IRM), PINFO(IN, OP, FRM)) // Store float/double value as integer representation (e.g. 1.0f -> 0x3f800000) }; @@ -504,13 +508,26 @@ void uml::instruction::simplify() else if (m_param[2].is_immediate() && m_param[3].is_immediate()) { if (m_size == 4) - convert_to_mov_immediate(u32(u32(m_param[1].immediate()) * u32(m_param[2].immediate()))); + convert_to_mov_immediate(u32(u32(m_param[2].immediate()) * u32(m_param[3].immediate()))); else if (m_size == 8) - convert_to_mov_immediate(u64(u64(m_param[1].immediate()) * u64(m_param[2].immediate()))); + convert_to_mov_immediate(u64(u64(m_param[2].immediate()) * u64(m_param[3].immediate()))); } } break; + // MULULW: convert simple form to MOV if immediate, or if multiplying by 0 + case OP_MULULW: + if (m_param[1].is_immediate_value(0) || m_param[2].is_immediate_value(0)) + convert_to_mov_immediate(0); + else if (m_param[1].is_immediate() && m_param[2].is_immediate()) + { + if (m_size == 4) + convert_to_mov_immediate(u32(u32(m_param[1].immediate()) * u32(m_param[2].immediate()))); + else if (m_size == 8) + convert_to_mov_immediate(u64(u64(m_param[1].immediate()) * u64(m_param[2].immediate()))); + } + break; + // MULS: convert simple form to MOV if immediate, or if multiplying by 0 case OP_MULS: if (m_param[0] == m_param[1]) @@ -520,13 +537,26 @@ void uml::instruction::simplify() else if (m_param[2].is_immediate() && m_param[3].is_immediate()) { if (m_size == 4) - convert_to_mov_immediate(s32(s32(m_param[1].immediate()) * s32(m_param[2].immediate()))); + convert_to_mov_immediate(s32(s32(m_param[2].immediate()) * s32(m_param[3].immediate()))); else if (m_size == 8) - convert_to_mov_immediate(s64(s64(m_param[1].immediate()) * s64(m_param[2].immediate()))); + convert_to_mov_immediate(s64(s64(m_param[2].immediate()) * s64(m_param[3].immediate()))); } } break; + // MULSLW: convert simple form to MOV if immediate, or if multiplying by 0 + case OP_MULSLW: + if (m_param[1].is_immediate_value(0) || m_param[2].is_immediate_value(0)) + convert_to_mov_immediate(0); + else if (m_param[1].is_immediate() && m_param[2].is_immediate()) + { + if (m_size == 4) + convert_to_mov_immediate(s32(s32(m_param[1].immediate()) * s32(m_param[2].immediate()))); + else if (m_size == 8) + convert_to_mov_immediate(s64(s64(m_param[1].immediate()) * s64(m_param[2].immediate()))); + } + break; + // DIVU: convert simple form to MOV if immediate, or if dividing with 0 case OP_DIVU: if (m_param[0] == m_param[1] && !m_param[3].is_immediate_value(0)) @@ -536,9 +566,9 @@ void uml::instruction::simplify() else if (m_param[2].is_immediate() && m_param[3].is_immediate()) { if (m_size == 4) - convert_to_mov_immediate(u32(u32(m_param[1].immediate()) / u32(m_param[2].immediate()))); + convert_to_mov_immediate(u32(u32(m_param[2].immediate()) / u32(m_param[3].immediate()))); else if (m_size == 8) - convert_to_mov_immediate(u64(u64(m_param[1].immediate()) / u64(m_param[2].immediate()))); + convert_to_mov_immediate(u64(u64(m_param[2].immediate()) / u64(m_param[3].immediate()))); } } break; @@ -552,9 +582,9 @@ void uml::instruction::simplify() else if (m_param[2].is_immediate() && m_param[3].is_immediate()) { if (m_size == 4) - convert_to_mov_immediate(s32(s32(m_param[1].immediate()) / s32(m_param[2].immediate()))); + convert_to_mov_immediate(s32(s32(m_param[2].immediate()) / s32(m_param[3].immediate()))); else if (m_size == 8) - convert_to_mov_immediate(s64(s64(m_param[1].immediate()) / s64(m_param[2].immediate()))); + convert_to_mov_immediate(s64(s64(m_param[2].immediate()) / s64(m_param[3].immediate()))); } } break; @@ -624,7 +654,12 @@ void uml::instruction::simplify() // SHL: convert to MOV if immediate or shifting by 0 case OP_SHL: if (m_param[1].is_immediate() && m_param[2].is_immediate()) - convert_to_mov_immediate(m_param[1].immediate() << m_param[2].immediate()); + { + if (m_size == 4) + convert_to_mov_immediate(u32(m_param[1].immediate()) << (m_param[2].immediate() & 31)); + else if (m_size == 8) + convert_to_mov_immediate(u64(m_param[1].immediate()) << (m_param[2].immediate() & 63)); + } else if (m_param[2].is_immediate_value(0)) convert_to_mov_param(1); break; @@ -634,9 +669,9 @@ void uml::instruction::simplify() if (m_param[1].is_immediate() && m_param[2].is_immediate()) { if (m_size == 4) - convert_to_mov_immediate(u32(m_param[1].immediate()) >> m_param[2].immediate()); + convert_to_mov_immediate(u32(m_param[1].immediate()) >> (m_param[2].immediate() & 31)); else if (m_size == 8) - convert_to_mov_immediate(u64(m_param[1].immediate()) >> m_param[2].immediate()); + convert_to_mov_immediate(u64(m_param[1].immediate()) >> (m_param[2].immediate() & 63)); } else if (m_param[2].is_immediate_value(0)) convert_to_mov_param(1); @@ -647,9 +682,9 @@ void uml::instruction::simplify() if (m_param[1].is_immediate() && m_param[2].is_immediate()) { if (m_size == 4) - convert_to_mov_immediate(s32(m_param[1].immediate()) >> m_param[2].immediate()); + convert_to_mov_immediate(s32(m_param[1].immediate()) >> (m_param[2].immediate() & 31)); else if (m_size == 8) - convert_to_mov_immediate(s64(m_param[1].immediate()) >> m_param[2].immediate()); + convert_to_mov_immediate(s64(m_param[1].immediate()) >> (m_param[2].immediate() & 63)); } else if (m_param[2].is_immediate_value(0)) convert_to_mov_param(1); @@ -660,9 +695,9 @@ void uml::instruction::simplify() if (m_param[1].is_immediate() && m_param[2].is_immediate()) { if (m_size == 4) - convert_to_mov_immediate(rotl_32(m_param[1].immediate(), m_param[2].immediate())); + convert_to_mov_immediate(rotl_32(m_param[1].immediate(), m_param[2].immediate() & 31)); else if (m_size == 8) - convert_to_mov_immediate(rotl_64(m_param[1].immediate(), m_param[2].immediate())); + convert_to_mov_immediate(rotl_64(m_param[1].immediate(), m_param[2].immediate() & 63)); } else if (m_param[2].is_immediate_value(0)) convert_to_mov_param(1); @@ -673,9 +708,9 @@ void uml::instruction::simplify() if (m_param[1].is_immediate() && m_param[2].is_immediate()) { if (m_size == 4) - convert_to_mov_immediate(rotr_32(m_param[1].immediate(), m_param[2].immediate())); + convert_to_mov_immediate(rotr_32(m_param[1].immediate(), m_param[2].immediate() & 31)); else if (m_size == 8) - convert_to_mov_immediate(rotr_64(m_param[1].immediate(), m_param[2].immediate())); + convert_to_mov_immediate(rotr_64(m_param[1].immediate(), m_param[2].immediate() & 63)); } else if (m_param[2].is_immediate_value(0)) convert_to_mov_param(1); diff --git a/src/devices/cpu/uml.h b/src/devices/cpu/uml.h index 662a0dda2b6be..a60d7595bf807 100644 --- a/src/devices/cpu/uml.h +++ b/src/devices/cpu/uml.h @@ -62,16 +62,16 @@ namespace uml { COND_ALWAYS = 0, - COND_Z = 0x80, // requires Z - COND_NZ, // requires Z - COND_S, // requires S - COND_NS, // requires S - COND_C, // requires C - COND_NC, // requires C - COND_V, // requires V - COND_NV, // requires V - COND_U, // requires U - COND_NU, // requires U + COND_Z = 0x80, // requires Z (zero/equal) + COND_NZ, // requires Z (not zero/unequal) + COND_S, // requires S (signed) + COND_NS, // requires S (not signed) + COND_C, // requires C (carry) + COND_NC, // requires C (no carry) + COND_V, // requires V (overflow) + COND_NV, // requires V (no overflow) + COND_U, // requires U (unordered) + COND_NU, // requires U (not unordered) COND_A, // requires CZ, unsigned COND_BE, // requires CZ, unsigned COND_G, // requires SVZ, signed @@ -143,6 +143,7 @@ namespace uml // control flow operations OP_NOP, // NOP OP_DEBUG, // DEBUG pc + OP_BREAK, // BREAK OP_EXIT, // EXIT src1[,c] OP_HASHJMP, // HASHJMP mode,pc,handle OP_JMP, // JMP imm[,c] @@ -157,6 +158,7 @@ namespace uml OP_GETFMOD, // GETFMOD dst OP_GETEXP, // GETEXP dst OP_GETFLGS, // GETFLGS dst[,f] + OP_SETFLGS, // SETFLGS src OP_SAVE, // SAVE mem OP_RESTORE, // RESTORE mem @@ -180,7 +182,9 @@ namespace uml OP_SUBB, // SUBB dst,src1,src2[,f] OP_CMP, // CMP src1,src2[,f] OP_MULU, // MULU dst,edst,src1,src2[,f] + OP_MULULW, // MULULW dst,src1,src2[,f] OP_MULS, // MULS dst,edst,src1,src2[,f] + OP_MULSLW, // MULSLW dst,src1,src2[,f] OP_DIVU, // DIVU dst,edst,src1,src2[,f] OP_DIVS, // DIVS dst,edst,src1,src2[,f] OP_AND, // AND dst,src1,src2[,f] @@ -422,6 +426,7 @@ namespace uml // control flow operations void nop() { configure(OP_NOP, 4); } + void break_() { configure(OP_BREAK, 4); } void debug(u32 pc) { configure(OP_DEBUG, 4, pc); } void exit(parameter param) { configure(OP_EXIT, 4, param); } void exit(condition_t cond, parameter param) { configure(OP_EXIT, 4, param, cond); } @@ -443,6 +448,7 @@ namespace uml void getfmod(parameter dst) { configure(OP_GETFMOD, 4, dst); } void getexp(parameter dst) { configure(OP_GETEXP, 4, dst); } void getflgs(parameter dst, u32 flags) { configure(OP_GETFLGS, 4, dst, flags); } + void setflgs(u32 flags) { configure(OP_SETFLGS, 4, flags); } void save(drcuml_machine_state *dst) { configure(OP_SAVE, 4, parameter::make_memory(dst)); } void restore(drcuml_machine_state *src) { configure(OP_RESTORE, 4, parameter::make_memory(src)); } @@ -467,7 +473,9 @@ namespace uml void subb(parameter dst, parameter src1, parameter src2) { configure(OP_SUBB, 4, dst, src1, src2); } void cmp(parameter src1, parameter src2) { configure(OP_CMP, 4, src1, src2); } void mulu(parameter dst, parameter edst, parameter src1, parameter src2) { configure(OP_MULU, 4, dst, edst, src1, src2); } + void mululw(parameter dst, parameter src1, parameter src2) { configure(OP_MULULW, 4, dst, src1, src2); } void muls(parameter dst, parameter edst, parameter src1, parameter src2) { configure(OP_MULS, 4, dst, edst, src1, src2); } + void mulslw(parameter dst, parameter src1, parameter src2) { configure(OP_MULSLW, 4, dst, src1, src2); } void divu(parameter dst, parameter edst, parameter src1, parameter src2) { configure(OP_DIVU, 4, dst, edst, src1, src2); } void divs(parameter dst, parameter edst, parameter src1, parameter src2) { configure(OP_DIVS, 4, dst, edst, src1, src2); } void _and(parameter dst, parameter src1, parameter src2) { configure(OP_AND, 4, dst, src1, src2); } @@ -506,7 +514,9 @@ namespace uml void dsubb(parameter dst, parameter src1, parameter src2) { configure(OP_SUBB, 8, dst, src1, src2); } void dcmp(parameter src1, parameter src2) { configure(OP_CMP, 8, src1, src2); } void dmulu(parameter dst, parameter edst, parameter src1, parameter src2) { configure(OP_MULU, 8, dst, edst, src1, src2); } + void dmululw(parameter dst, parameter src1, parameter src2) { configure(OP_MULULW, 8, dst, src1, src2); } void dmuls(parameter dst, parameter edst, parameter src1, parameter src2) { configure(OP_MULS, 8, dst, edst, src1, src2); } + void dmulslw(parameter dst, parameter src1, parameter src2) { configure(OP_MULSLW, 8, dst, src1, src2); } void ddivu(parameter dst, parameter edst, parameter src1, parameter src2) { configure(OP_DIVU, 8, dst, edst, src1, src2); } void ddivs(parameter dst, parameter edst, parameter src1, parameter src2) { configure(OP_DIVS, 8, dst, edst, src1, src2); } void dand(parameter dst, parameter src1, parameter src2) { configure(OP_AND, 8, dst, src1, src2); }