diff options
Diffstat (limited to 'sljit')
-rw-r--r-- | sljit/sljitConfig.h | 17 | ||||
-rw-r--r-- | sljit/sljitConfigInternal.h | 376 | ||||
-rw-r--r-- | sljit/sljitLir.c | 1183 | ||||
-rw-r--r-- | sljit/sljitLir.h | 716 | ||||
-rw-r--r-- | sljit/sljitNativeARM_32.c | 520 | ||||
-rw-r--r-- | sljit/sljitNativeARM_64.c | 468 | ||||
-rw-r--r-- | sljit/sljitNativeARM_T2_32.c | 413 | ||||
-rw-r--r-- | sljit/sljitNativeMIPS_32.c | 54 | ||||
-rw-r--r-- | sljit/sljitNativeMIPS_64.c | 50 | ||||
-rw-r--r-- | sljit/sljitNativeMIPS_common.c | 634 | ||||
-rw-r--r-- | sljit/sljitNativePPC_common.c | 601 | ||||
-rw-r--r-- | sljit/sljitNativeSPARC_32.c | 4 | ||||
-rw-r--r-- | sljit/sljitNativeSPARC_common.c | 380 | ||||
-rw-r--r-- | sljit/sljitNativeTILEGX_64.c | 337 | ||||
-rw-r--r-- | sljit/sljitNativeX86_32.c | 207 | ||||
-rw-r--r-- | sljit/sljitNativeX86_64.c | 345 | ||||
-rw-r--r-- | sljit/sljitNativeX86_common.c | 545 | ||||
-rw-r--r-- | sljit/sljitUtils.c | 18 |
18 files changed, 4077 insertions, 2791 deletions
diff --git a/sljit/sljitConfig.h b/sljit/sljitConfig.h index 4d93f4f..1c8a521 100644 --- a/sljit/sljitConfig.h +++ b/sljit/sljitConfig.h @@ -96,13 +96,28 @@ #define SLJIT_EXECUTABLE_ALLOCATOR 1 #endif +/* Force cdecl calling convention even if a better calling + convention (e.g. fastcall) is supported by the C compiler. + If this option is enabled, C functions without + SLJIT_CALL can also be called from JIT code. */ +#ifndef SLJIT_USE_CDECL_CALLING_CONVENTION +/* Disabled by default */ +#define SLJIT_USE_CDECL_CALLING_CONVENTION 0 +#endif + +/* Return with error when an invalid argument is passed. */ +#ifndef SLJIT_ARGUMENT_CHECKS +/* Disabled by default */ +#define SLJIT_ARGUMENT_CHECKS 0 +#endif + /* Debug checks (assertions, etc.). */ #ifndef SLJIT_DEBUG /* Enabled by default */ #define SLJIT_DEBUG 1 #endif -/* Verbose operations */ +/* Verbose operations. */ #ifndef SLJIT_VERBOSE /* Enabled by default */ #define SLJIT_VERBOSE 1 diff --git a/sljit/sljitConfigInternal.h b/sljit/sljitConfigInternal.h index 16175f0..bfc4380 100644 --- a/sljit/sljitConfigInternal.h +++ b/sljit/sljitConfigInternal.h @@ -28,32 +28,50 @@ #define _SLJIT_CONFIG_INTERNAL_H_ /* - SLJIT defines the following macros depending on the target architecture: - - Feature detection (boolean) macros: - SLJIT_32BIT_ARCHITECTURE : 32 bit architecture - SLJIT_64BIT_ARCHITECTURE : 64 bit architecture - SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index - SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing a double array by index - SLJIT_LITTLE_ENDIAN : little endian architecture - SLJIT_BIG_ENDIAN : big endian architecture - SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!) - SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information - SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address - - Types and useful macros: - sljit_sb, sljit_ub : signed and unsigned 8 bit byte - sljit_sh, sljit_uh : signed and unsigned 16 bit half-word (short) type - sljit_si, sljit_ui : signed and unsigned 32 bit integer type - sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer - sljit_p : unsgined pointer value (usually the same as sljit_uw, but - some 64 bit ABIs may use 32 bit pointers) - sljit_s : single precision floating point value - sljit_d : double precision floating point value - SLJIT_CALL : C calling convention define for both calling JIT form C and C callbacks for JIT - SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper) + SLJIT defines the following architecture dependent types and macros: + + Types: + sljit_sb, sljit_ub : signed and unsigned 8 bit byte + sljit_sh, sljit_uh : signed and unsigned 16 bit half-word (short) type + sljit_si, sljit_ui : signed and unsigned 32 bit integer type + sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer + sljit_p : unsgined pointer value (usually the same as sljit_uw, but + some 64 bit ABIs may use 32 bit pointers) + sljit_s : single precision floating point value + sljit_d : double precision floating point value + + Macros for feature detection (boolean): + SLJIT_32BIT_ARCHITECTURE : 32 bit architecture + SLJIT_64BIT_ARCHITECTURE : 64 bit architecture + SLJIT_LITTLE_ENDIAN : little endian architecture + SLJIT_BIG_ENDIAN : big endian architecture + SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!) + SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information + + Constants: + SLJIT_NUMBER_OF_REGISTERS : number of available registers + SLJIT_NUMBER_OF_SCRATCH_REGISTERS : number of available scratch registers + SLJIT_NUMBER_OF_SAVED_REGISTERS : number of available saved registers + SLJIT_NUMBER_OF_FLOAT_REGISTERS : number of available floating point registers + SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers + SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers + SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index + SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing + a double precision floating point array by index + SLJIT_SINGLE_SHIFT : the shift required to apply when accessing + a single precision floating point array by index + SLJIT_LOCALS_OFFSET : local space starting offset (SLJIT_SP + SLJIT_LOCALS_OFFSET) + SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address + + Other macros: + SLJIT_CALL : C calling convention define for both calling JIT form C and C callbacks for JIT + SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper) */ +/*****************/ +/* Sanity check. */ +/*****************/ + #if !((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ || (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \ @@ -71,7 +89,6 @@ #error "An architecture must be selected" #endif -/* Sanity check. */ #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \ @@ -89,7 +106,10 @@ #error "Multiple architectures are selected" #endif -/* Auto select option (requires compiler support) */ +/********************************************************/ +/* Automatic CPU detection (requires compiler support). */ +/********************************************************/ + #if (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) #ifndef _WIN32 @@ -154,26 +174,52 @@ #undef SLJIT_EXECUTABLE_ALLOCATOR #endif +/******************************/ +/* CPU family type detection. */ +/******************************/ + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +#define SLJIT_CONFIG_ARM_32 1 +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#define SLJIT_CONFIG_X86 1 +#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) +#define SLJIT_CONFIG_ARM 1 +#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define SLJIT_CONFIG_PPC 1 +#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#define SLJIT_CONFIG_MIPS 1 +#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) || (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64) +#define SLJIT_CONFIG_SPARC 1 +#endif + +/**********************************/ +/* External function definitions. */ +/**********************************/ + #if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED) /* These libraries are needed for the macros below. */ #include <stdlib.h> #include <string.h> -#endif /* STD_MACROS_DEFINED */ +#endif /* SLJIT_STD_MACROS_DEFINED */ /* General macros: Note: SLJIT is designed to be independent from them as possible. - In release mode (SLJIT_DEBUG is not defined) only the following macros are needed: + In release mode (SLJIT_DEBUG is not defined) only the following + external functions are needed: */ #ifndef SLJIT_MALLOC -#define SLJIT_MALLOC(size) malloc(size) +#define SLJIT_MALLOC(size, allocator_data) malloc(size) #endif #ifndef SLJIT_FREE -#define SLJIT_FREE(ptr) free(ptr) +#define SLJIT_FREE(ptr, allocator_data) free(ptr) #endif #ifndef SLJIT_MEMMOVE @@ -184,6 +230,10 @@ #define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len) #endif +/***************************/ +/* Compiler helper macros. */ +/***************************/ + #if !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) #if defined(__GNUC__) && (__GNUC__ >= 3) @@ -205,6 +255,15 @@ #endif #endif /* !SLJIT_INLINE */ +#ifndef SLJIT_NOINLINE +/* Not inline functions. */ +#if defined(__GNUC__) +#define SLJIT_NOINLINE __attribute__ ((noinline)) +#else +#define SLJIT_NOINLINE +#endif +#endif /* !SLJIT_INLINE */ + #ifndef SLJIT_CONST /* Const variables. */ #define SLJIT_CONST const @@ -215,6 +274,10 @@ #define SLJIT_UNUSED_ARG(arg) (void)arg #endif +/*********************************/ +/* Type of public API functions. */ +/*********************************/ + #if (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) /* Static ABI functions. For all-in-one programs. */ @@ -229,9 +292,13 @@ #define SLJIT_API_FUNC_ATTRIBUTE #endif /* (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) */ +/****************************/ +/* Instruction cache flush. */ +/****************************/ + #ifndef SLJIT_CACHE_FLUSH -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) /* Not required to implement on archs with unified caches. */ #define SLJIT_CACHE_FLUSH(from, to) @@ -252,7 +319,7 @@ #define SLJIT_CACHE_FLUSH(from, to) \ cacheflush((long)(from), (long)(to), 0) -#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) /* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */ #define SLJIT_CACHE_FLUSH(from, to) \ @@ -274,6 +341,10 @@ #endif /* !SLJIT_CACHE_FLUSH */ +/******************************************************/ +/* Byte/half/int/word/single/double type definitions. */ +/******************************************************/ + /* 8 bit byte type. */ typedef unsigned char sljit_ub; typedef signed char sljit_sb; @@ -286,7 +357,7 @@ typedef signed short int sljit_sh; typedef unsigned int sljit_ui; typedef signed int sljit_si; -/* Machine word type. Can encapsulate a pointer. +/* Machine word type. Enough for storing a pointer. 32 bit for 32 bit machines. 64 bit for 64 bit machines. */ #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) @@ -326,6 +397,7 @@ typedef double sljit_d; /* Shift for double precision sized data. */ #define SLJIT_DOUBLE_SHIFT 3 +#define SLJIT_SINGLE_SHIFT 2 #ifndef SLJIT_W @@ -338,44 +410,13 @@ typedef double sljit_d; #endif /* !SLJIT_W */ -#ifndef SLJIT_CALL - -/* ABI (Application Binary Interface) types. */ -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - -#if defined(__GNUC__) && !defined(__APPLE__) - -#define SLJIT_CALL __attribute__ ((fastcall)) -#define SLJIT_X86_32_FASTCALL 1 - -#elif defined(_MSC_VER) - -#define SLJIT_CALL __fastcall -#define SLJIT_X86_32_FASTCALL 1 - -#elif defined(__BORLANDC__) - -#define SLJIT_CALL __msfastcall -#define SLJIT_X86_32_FASTCALL 1 - -#else /* Unknown compiler. */ - -/* The cdecl attribute is the default. */ -#define SLJIT_CALL - -#endif - -#else /* Non x86-32 architectures. */ - -#define SLJIT_CALL - -#endif /* SLJIT_CONFIG_X86_32 */ - -#endif /* !SLJIT_CALL */ +/*************************/ +/* Endianness detection. */ +/*************************/ #if !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) -/* These macros are useful for the applications. */ +/* These macros are mostly useful for the applications. */ #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) @@ -413,6 +454,69 @@ typedef double sljit_d; #error "Exactly one endianness must be selected" #endif +#ifndef SLJIT_UNALIGNED + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define SLJIT_UNALIGNED 1 +#endif + +#endif /* !SLJIT_UNALIGNED */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +/* Auto detect SSE2 support using CPUID. + On 64 bit x86 cpus, sse2 must be present. */ +#define SLJIT_DETECT_SSE2 1 +#endif + +/*****************************************************************************************/ +/* Calling convention of functions generated by SLJIT or called from the generated code. */ +/*****************************************************************************************/ + +#ifndef SLJIT_CALL + +#if (defined SLJIT_USE_CDECL_CALLING_CONVENTION && SLJIT_USE_CDECL_CALLING_CONVENTION) + +/* Force cdecl. */ +#define SLJIT_CALL + +#elif (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + +#if defined(__GNUC__) && !defined(__APPLE__) + +#define SLJIT_CALL __attribute__ ((fastcall)) +#define SLJIT_X86_32_FASTCALL 1 + +#elif defined(_MSC_VER) + +#define SLJIT_CALL __fastcall +#define SLJIT_X86_32_FASTCALL 1 + +#elif defined(__BORLANDC__) + +#define SLJIT_CALL __msfastcall +#define SLJIT_X86_32_FASTCALL 1 + +#else /* Unknown compiler. */ + +/* The cdecl attribute is the default. */ +#define SLJIT_CALL + +#endif + +#else /* Non x86-32 architectures. */ + +#define SLJIT_CALL + +#endif /* SLJIT_CONFIG_X86_32 */ + +#endif /* !SLJIT_CALL */ + #ifndef SLJIT_INDIRECT_CALL #if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN)) \ || ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && defined _AIX) @@ -422,6 +526,8 @@ typedef double sljit_d; #endif #endif /* SLJIT_INDIRECT_CALL */ +/* The offset which needs to be substracted from the return address to +determine the next executed instruction after return. */ #ifndef SLJIT_RETURN_ADDRESS_OFFSET #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) #define SLJIT_RETURN_ADDRESS_OFFSET 8 @@ -430,44 +536,128 @@ typedef double sljit_d; #endif #endif /* SLJIT_RETURN_ADDRESS_OFFSET */ -#ifndef SLJIT_SSE2 +/***************************************************/ +/* Functions of the built-in executable allocator. */ +/***************************************************/ -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -/* Turn on SSE2 support on x86. */ -#define SLJIT_SSE2 1 +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) +SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size); +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr); +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); +#define SLJIT_MALLOC_EXEC(size) sljit_malloc_exec(size) +#define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr) +#endif + +/**********************************************/ +/* Registers and locals offset determination. */ +/**********************************************/ #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -/* Auto detect SSE2 support using CPUID. - On 64 bit x86 cpus, sse2 must be present. */ -#define SLJIT_DETECT_SSE2 1 + +#define SLJIT_NUMBER_OF_REGISTERS 10 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7 +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) +#define SLJIT_LOCALS_OFFSET_BASE ((2 + 4) * sizeof(sljit_sw)) +#else +/* Maximum 3 arguments are passed on the stack, +1 for double alignment. */ +#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1 + 4) * sizeof(sljit_sw)) +#endif /* SLJIT_X86_32_FASTCALL */ + +#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + +#ifndef _WIN64 +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6 +#define SLJIT_LOCALS_OFFSET_BASE (sizeof(sljit_sw)) +#else +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_LOCALS_OFFSET_BASE ((4 + 2) * sizeof(sljit_sw)) +#endif /* _WIN64 */ + +#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + +#define SLJIT_NUMBER_OF_REGISTERS 11 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_LOCALS_OFFSET_BASE 0 + +#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) + +#define SLJIT_NUMBER_OF_REGISTERS 11 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7 +#define SLJIT_LOCALS_OFFSET_BASE 0 + +#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + +#define SLJIT_NUMBER_OF_REGISTERS 25 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10 +#define SLJIT_LOCALS_OFFSET_BASE (2 * sizeof(sljit_sw)) + +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +#define SLJIT_NUMBER_OF_REGISTERS 22 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 17 +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX) +#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * sizeof(sljit_sw)) +#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +/* Add +1 for double alignment. */ +#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1) * sizeof(sljit_sw)) +#else +#define SLJIT_LOCALS_OFFSET_BASE (3 * sizeof(sljit_sw)) +#endif /* SLJIT_CONFIG_PPC_64 || _AIX */ + +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + +#define SLJIT_NUMBER_OF_REGISTERS 17 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define SLJIT_LOCALS_OFFSET_BASE (4 * sizeof(sljit_sw)) +#else +#define SLJIT_LOCALS_OFFSET_BASE 0 #endif -#endif /* (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) */ +#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) -#endif /* !SLJIT_SSE2 */ +#define SLJIT_NUMBER_OF_REGISTERS 18 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 14 +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +/* Add +1 for double alignment. */ +#define SLJIT_LOCALS_OFFSET_BASE ((23 + 1) * sizeof(sljit_sw)) +#endif -#ifndef SLJIT_UNALIGNED +#elif (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) + +#define SLJIT_NUMBER_OF_REGISTERS 10 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 5 +#define SLJIT_LOCALS_OFFSET_BASE 0 + +#elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + +#define SLJIT_NUMBER_OF_REGISTERS 0 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 0 +#define SLJIT_LOCALS_OFFSET_BASE 0 -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ - || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ - || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ - || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ - || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ - || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ - || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) -#define SLJIT_UNALIGNED 1 #endif -#endif /* !SLJIT_UNALIGNED */ +#define SLJIT_LOCALS_OFFSET (SLJIT_LOCALS_OFFSET_BASE) -#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) -SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size); -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr); -SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); -#define SLJIT_MALLOC_EXEC(size) sljit_malloc_exec(size) -#define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr) +#define SLJIT_NUMBER_OF_SCRATCH_REGISTERS \ + (SLJIT_NUMBER_OF_REGISTERS - SLJIT_NUMBER_OF_SAVED_REGISTERS) + +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 6 +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && (defined _WIN64) +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 1 +#else +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 #endif +#define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \ + (SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS) + +/*************************************/ +/* Debug and verbose related macros. */ +/*************************************/ + #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) #include <stdio.h> #endif diff --git a/sljit/sljitLir.c b/sljit/sljitLir.c index 1acecba..0f1b1c9 100644 --- a/sljit/sljitLir.c +++ b/sljit/sljitLir.c @@ -38,12 +38,6 @@ return NULL; \ } while (0) -#define CHECK_ERROR_VOID() \ - do { \ - if (SLJIT_UNLIKELY(compiler->error)) \ - return; \ - } while (0) - #define FAIL_IF(expr) \ do { \ if (SLJIT_UNLIKELY(expr)) \ @@ -117,7 +111,7 @@ #define JUMP_ADDR 0x2 /* SLJIT_REWRITABLE_JUMP is 0x1000. */ -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) # define PATCH_MB 0x4 # define PATCH_MW 0x8 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -162,7 +156,7 @@ # define PATCH_ABS64 0x100 #endif -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) # define IS_COND 0x004 # define IS_CALL 0x008 # define PATCH_B 0x010 @@ -174,7 +168,7 @@ # define REMOVE_COND 0x100 #endif -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) # define IS_MOVABLE 0x004 # define IS_JAL 0x008 # define IS_CALL 0x010 @@ -229,102 +223,111 @@ # define FCC_IS_SET (1 << 24) #endif -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -#define SLJIT_HAS_VARIABLE_LOCALS_OFFSET 1 -#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) -#define FIXED_LOCALS_OFFSET (3 * sizeof(sljit_sw)) -#endif -#endif +/* Stack management. */ -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 -#ifdef _WIN64 -#define FIXED_LOCALS_OFFSET ((4 + 2) * sizeof(sljit_sw)) -#else -#define FIXED_LOCALS_OFFSET (sizeof(sljit_sw)) -#endif -#endif +#define GET_SAVED_REGISTERS_SIZE(scratches, saveds, extra) \ + (((scratches < SLJIT_NUMBER_OF_SCRATCH_REGISTERS ? 0 : (scratches - SLJIT_NUMBER_OF_SCRATCH_REGISTERS)) + \ + (saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? saveds : SLJIT_NUMBER_OF_SAVED_REGISTERS) + \ + extra) * sizeof(sljit_sw)) -#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) -#define SLJIT_HAS_VARIABLE_LOCALS_OFFSET 1 -#endif +#define ADJUST_LOCAL_OFFSET(p, i) \ + if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ + (i) += SLJIT_LOCALS_OFFSET; -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) -#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 -#ifdef _AIX -#define FIXED_LOCALS_OFFSET ((6 + 8) * sizeof(sljit_sw)) -#else -#define FIXED_LOCALS_OFFSET (2 * sizeof(sljit_sw)) -#endif -#endif +#endif /* !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) */ -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) -#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 -#define FIXED_LOCALS_OFFSET ((6 + 8) * sizeof(sljit_sw)) -#endif +/* Utils can still be used even if SLJIT_CONFIG_UNSUPPORTED is set. */ +#include "sljitUtils.c" -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 -#define FIXED_LOCALS_OFFSET (4 * sizeof(sljit_sw)) -#endif +#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) -#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) -#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 -#define FIXED_LOCALS_OFFSET 0 +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) +#include "sljitExecAllocator.c" #endif -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) -#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 -#define FIXED_LOCALS_OFFSET (23 * sizeof(sljit_sw)) -#endif +/* Argument checking features. */ -#if (defined SLJIT_HAS_VARIABLE_LOCALS_OFFSET && SLJIT_HAS_VARIABLE_LOCALS_OFFSET) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) -#define ADJUST_LOCAL_OFFSET(p, i) \ - if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \ - (i) += compiler->locals_offset; +/* Returns with error when an invalid argument is passed. */ -#elif (defined SLJIT_HAS_FIXED_LOCALS_OFFSET && SLJIT_HAS_FIXED_LOCALS_OFFSET) +#define CHECK_ARGUMENT(x) \ + do { \ + if (SLJIT_UNLIKELY(!(x))) \ + return 1; \ + } while (0) -#define ADJUST_LOCAL_OFFSET(p, i) \ - if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \ - (i) += FIXED_LOCALS_OFFSET; +#define CHECK_RETURN_TYPE sljit_si +#define CHECK_RETURN_OK return 0 -#else +#define CHECK(x) \ + do { \ + if (SLJIT_UNLIKELY(x)) { \ + compiler->error = SLJIT_ERR_BAD_ARGUMENT; \ + return SLJIT_ERR_BAD_ARGUMENT; \ + } \ + } while (0) + +#define CHECK_PTR(x) \ + do { \ + if (SLJIT_UNLIKELY(x)) { \ + compiler->error = SLJIT_ERR_BAD_ARGUMENT; \ + return NULL; \ + } \ + } while (0) -#define ADJUST_LOCAL_OFFSET(p, i) +#define CHECK_REG_INDEX(x) \ + do { \ + if (SLJIT_UNLIKELY(x)) { \ + return -2; \ + } \ + } while (0) -#endif +#elif (defined SLJIT_DEBUG && SLJIT_DEBUG) -#endif /* !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) */ +/* Assertion failure occures if an invalid argument is passed. */ +#undef SLJIT_ARGUMENT_CHECKS +#define SLJIT_ARGUMENT_CHECKS 1 -/* Utils can still be used even if SLJIT_CONFIG_UNSUPPORTED is set. */ -#include "sljitUtils.c" +#define CHECK_ARGUMENT(x) SLJIT_ASSERT(x) +#define CHECK_RETURN_TYPE void +#define CHECK_RETURN_OK return +#define CHECK(x) x +#define CHECK_PTR(x) x +#define CHECK_REG_INDEX(x) x -#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +#elif (defined SLJIT_VERBOSE && SLJIT_VERBOSE) -#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) -#include "sljitExecAllocator.c" -#endif +/* Arguments are not checked. */ +#define CHECK_RETURN_TYPE void +#define CHECK_RETURN_OK return +#define CHECK(x) x +#define CHECK_PTR(x) x +#define CHECK_REG_INDEX(x) x -#if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) && !(defined SLJIT_SSE2 && SLJIT_SSE2) -#error SLJIT_SSE2_AUTO cannot be enabled without SLJIT_SSE2 -#endif +#else + +/* Arguments are not checked. */ +#define CHECK(x) +#define CHECK_PTR(x) +#define CHECK_REG_INDEX(x) + +#endif /* SLJIT_ARGUMENT_CHECKS */ /* --------------------------------------------------------------------- */ /* Public functions */ /* --------------------------------------------------------------------- */ -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || ((defined SLJIT_SSE2 && SLJIT_SSE2) && ((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64))) +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) #define SLJIT_NEEDS_COMPILER_INIT 1 static sljit_si compiler_initialized = 0; /* A thread safe initialization. */ static void init_compiler(void); #endif -SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data) { - struct sljit_compiler *compiler = (struct sljit_compiler*)SLJIT_MALLOC(sizeof(struct sljit_compiler)); + struct sljit_compiler *compiler = (struct sljit_compiler*)SLJIT_MALLOC(sizeof(struct sljit_compiler), allocator_data); if (!compiler) return NULL; SLJIT_ZEROMEM(compiler, sizeof(struct sljit_compiler)); @@ -346,15 +349,16 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void) /* Only the non-zero members must be set. */ compiler->error = SLJIT_SUCCESS; - compiler->buf = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE); - compiler->abuf = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE); + compiler->allocator_data = allocator_data; + compiler->buf = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE, allocator_data); + compiler->abuf = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE, allocator_data); if (!compiler->buf || !compiler->abuf) { if (compiler->buf) - SLJIT_FREE(compiler->buf); + SLJIT_FREE(compiler->buf, allocator_data); if (compiler->abuf) - SLJIT_FREE(compiler->abuf); - SLJIT_FREE(compiler); + SLJIT_FREE(compiler->abuf, allocator_data); + SLJIT_FREE(compiler, allocator_data); return NULL; } @@ -365,24 +369,28 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void) compiler->scratches = -1; compiler->saveds = -1; + compiler->fscratches = -1; + compiler->fsaveds = -1; + compiler->local_size = -1; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) compiler->args = -1; #endif #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - compiler->cpool = (sljit_uw*)SLJIT_MALLOC(CPOOL_SIZE * sizeof(sljit_uw) + CPOOL_SIZE * sizeof(sljit_ub)); + compiler->cpool = (sljit_uw*)SLJIT_MALLOC(CPOOL_SIZE * sizeof(sljit_uw) + + CPOOL_SIZE * sizeof(sljit_ub), allocator_data); if (!compiler->cpool) { - SLJIT_FREE(compiler->buf); - SLJIT_FREE(compiler->abuf); - SLJIT_FREE(compiler); + SLJIT_FREE(compiler->buf, allocator_data); + SLJIT_FREE(compiler->abuf, allocator_data); + SLJIT_FREE(compiler, allocator_data); return NULL; } compiler->cpool_unique = (sljit_ub*)(compiler->cpool + CPOOL_SIZE); compiler->cpool_diff = 0xffffffff; #endif -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) compiler->delay_slot = UNMOVABLE_INS; #endif @@ -404,25 +412,33 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compile { struct sljit_memory_fragment *buf; struct sljit_memory_fragment *curr; + void *allocator_data = compiler->allocator_data; + SLJIT_UNUSED_ARG(allocator_data); buf = compiler->buf; while (buf) { curr = buf; buf = buf->next; - SLJIT_FREE(curr); + SLJIT_FREE(curr, allocator_data); } buf = compiler->abuf; while (buf) { curr = buf; buf = buf->next; - SLJIT_FREE(curr); + SLJIT_FREE(curr, allocator_data); } #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - SLJIT_FREE(compiler->cpool); + SLJIT_FREE(compiler->cpool, allocator_data); #endif - SLJIT_FREE(compiler); + SLJIT_FREE(compiler, allocator_data); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler) +{ + if (compiler->error == SLJIT_SUCCESS) + compiler->error = SLJIT_ERR_ALLOC_FAILED; } #if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) @@ -478,7 +494,7 @@ static void* ensure_buf(struct sljit_compiler *compiler, sljit_uw size) compiler->buf->used_size += size; return ret; } - new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE); + new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE, compiler->allocator_data); PTR_FAIL_IF_NULL(new_frag); new_frag->next = compiler->buf; compiler->buf = new_frag; @@ -497,7 +513,7 @@ static void* ensure_abuf(struct sljit_compiler *compiler, sljit_uw size) compiler->abuf->used_size += size; return ret; } - new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE); + new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE, compiler->allocator_data); PTR_FAIL_IF_NULL(new_frag); new_frag->next = compiler->abuf; compiler->abuf = new_frag; @@ -537,6 +553,40 @@ static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler) compiler->buf = prev; } +static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(local_size); + + compiler->options = options; + compiler->scratches = scratches; + compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->logical_local_size = local_size; +#endif +} + +static SLJIT_INLINE void set_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(local_size); + + compiler->options = options; + compiler->scratches = scratches; + compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->logical_local_size = local_size; +#endif +} + static SLJIT_INLINE void set_label(struct sljit_label *label, struct sljit_compiler *compiler) { label->next = NULL; @@ -573,9 +623,9 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp #define ADDRESSING_DEPENDS_ON(exp, reg) \ (((exp) & SLJIT_MEM) && (((exp) & REG_MASK) == reg || OFFS_REG(exp) == reg)) -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) #define FUNCTION_CHECK_OP() \ - SLJIT_ASSERT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \ + CHECK_ARGUMENT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \ switch (GET_OPCODE(op)) { \ case SLJIT_NOT: \ case SLJIT_CLZ: \ @@ -585,31 +635,27 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp case SLJIT_SHL: \ case SLJIT_LSHR: \ case SLJIT_ASHR: \ - SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C))); \ + CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C))); \ break; \ case SLJIT_NEG: \ - SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \ + CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \ break; \ case SLJIT_MUL: \ - SLJIT_ASSERT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \ - break; \ - case SLJIT_CMPD: \ - SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ - SLJIT_ASSERT((op & (SLJIT_SET_E | SLJIT_SET_S))); \ + CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \ break; \ case SLJIT_ADD: \ - SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_S))); \ + CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S))); \ break; \ case SLJIT_SUB: \ break; \ case SLJIT_ADDC: \ case SLJIT_SUBC: \ - SLJIT_ASSERT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))); \ + CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))); \ break; \ case SLJIT_BREAKPOINT: \ case SLJIT_NOP: \ - case SLJIT_UMUL: \ - case SLJIT_SMUL: \ + case SLJIT_LUMUL: \ + case SLJIT_LSMUL: \ case SLJIT_MOV: \ case SLJIT_MOV_UI: \ case SLJIT_MOV_P: \ @@ -617,79 +663,112 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp case SLJIT_MOVU_UI: \ case SLJIT_MOVU_P: \ /* Nothing allowed */ \ - SLJIT_ASSERT(!(op & (SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ + CHECK_ARGUMENT(!(op & (SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ + break; \ + default: \ + /* Only SLJIT_INT_OP or SLJIT_SINGLE_OP is allowed. */ \ + CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ + break; \ + } + +#define FUNCTION_CHECK_FOP() \ + CHECK_ARGUMENT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \ + switch (GET_OPCODE(op)) { \ + case SLJIT_DCMP: \ + CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ + CHECK_ARGUMENT((op & (SLJIT_SET_E | SLJIT_SET_S))); \ break; \ default: \ /* Only SLJIT_INT_OP or SLJIT_SINGLE_OP is allowed. */ \ - SLJIT_ASSERT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ + CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ break; \ } #define FUNCTION_CHECK_IS_REG(r) \ + (((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) || \ + ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0)) + +#define FUNCTION_CHECK_IS_REG_OR_UNUSED(r) \ ((r) == SLJIT_UNUSED || \ - ((r) >= SLJIT_SCRATCH_REG1 && (r) <= SLJIT_SCRATCH_REG1 - 1 + compiler->scratches) || \ - ((r) >= SLJIT_SAVED_REG1 && (r) <= SLJIT_SAVED_REG1 - 1 + compiler->saveds)) + ((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) || \ + ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0)) + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#define CHECK_NOT_VIRTUAL_REGISTER(p) \ + CHECK_ARGUMENT((p) < SLJIT_R3 || (p) > SLJIT_R6); +#else +#define CHECK_NOT_VIRTUAL_REGISTER(p) +#endif #define FUNCTION_CHECK_SRC(p, i) \ - SLJIT_ASSERT(compiler->scratches != -1 && compiler->saveds != -1); \ + CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); \ if (FUNCTION_CHECK_IS_REG(p)) \ - SLJIT_ASSERT((i) == 0 && (p) != SLJIT_UNUSED); \ + CHECK_ARGUMENT((i) == 0); \ else if ((p) == SLJIT_IMM) \ ; \ - else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \ - SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \ - else if ((p) & SLJIT_MEM) { \ - SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & REG_MASK)); \ + else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ + CHECK_ARGUMENT((i) >= 0 && (i) < compiler->logical_local_size); \ + else { \ + CHECK_ARGUMENT((p) & SLJIT_MEM); \ + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG_OR_UNUSED((p) & REG_MASK)); \ + CHECK_NOT_VIRTUAL_REGISTER((p) & REG_MASK); \ if ((p) & OFFS_REG_MASK) { \ - SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \ - SLJIT_ASSERT(!((i) & ~0x3)); \ + CHECK_ARGUMENT(((p) & REG_MASK) != SLJIT_UNUSED); \ + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \ + CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \ + CHECK_ARGUMENT(!((i) & ~0x3)); \ } \ - SLJIT_ASSERT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ - } \ - else \ - SLJIT_ASSERT_STOP(); + CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ + } #define FUNCTION_CHECK_DST(p, i) \ - SLJIT_ASSERT(compiler->scratches != -1 && compiler->saveds != -1); \ - if (FUNCTION_CHECK_IS_REG(p)) \ - SLJIT_ASSERT((i) == 0); \ - else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \ - SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \ - else if ((p) & SLJIT_MEM) { \ - SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & REG_MASK)); \ + CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); \ + if (FUNCTION_CHECK_IS_REG_OR_UNUSED(p)) \ + CHECK_ARGUMENT((i) == 0); \ + else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ + CHECK_ARGUMENT((i) >= 0 && (i) < compiler->logical_local_size); \ + else { \ + CHECK_ARGUMENT((p) & SLJIT_MEM); \ + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG_OR_UNUSED((p) & REG_MASK)); \ + CHECK_NOT_VIRTUAL_REGISTER((p) & REG_MASK); \ if ((p) & OFFS_REG_MASK) { \ - SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \ - SLJIT_ASSERT(!((i) & ~0x3)); \ + CHECK_ARGUMENT(((p) & REG_MASK) != SLJIT_UNUSED); \ + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \ + CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \ + CHECK_ARGUMENT(!((i) & ~0x3)); \ } \ - SLJIT_ASSERT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ - } \ - else \ - SLJIT_ASSERT_STOP(); + CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ + } #define FUNCTION_FCHECK(p, i) \ - if ((p) >= SLJIT_FLOAT_REG1 && (p) <= SLJIT_FLOAT_REG6) \ - SLJIT_ASSERT(i == 0); \ - else if ((p) & SLJIT_MEM) { \ - SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & REG_MASK)); \ + CHECK_ARGUMENT(compiler->fscratches != -1 && compiler->fsaveds != -1); \ + if (((p) >= SLJIT_FR0 && (p) < (SLJIT_FR0 + compiler->fscratches)) || \ + ((p) > (SLJIT_FS0 - compiler->fsaveds) && (p) <= SLJIT_FS0)) \ + CHECK_ARGUMENT(i == 0); \ + else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ + CHECK_ARGUMENT((i) >= 0 && (i) < compiler->logical_local_size); \ + else { \ + CHECK_ARGUMENT((p) & SLJIT_MEM); \ + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG_OR_UNUSED((p) & REG_MASK)); \ + CHECK_NOT_VIRTUAL_REGISTER((p) & REG_MASK); \ if ((p) & OFFS_REG_MASK) { \ - SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \ - SLJIT_ASSERT(((p) & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_LOCALS_REG) && !(i & ~0x3)); \ - } else \ - SLJIT_ASSERT(OFFS_REG(p) == 0); \ - SLJIT_ASSERT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ - } \ - else \ - SLJIT_ASSERT_STOP(); + CHECK_ARGUMENT(((p) & REG_MASK) != SLJIT_UNUSED); \ + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \ + CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \ + CHECK_ARGUMENT(((p) & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SP) && !(i & ~0x3)); \ + } \ + CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ + } #define FUNCTION_CHECK_OP1() \ if (GET_OPCODE(op) >= SLJIT_MOVU && GET_OPCODE(op) <= SLJIT_MOVU_P) { \ - SLJIT_ASSERT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_LOCALS_REG); \ - SLJIT_ASSERT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_LOCALS_REG); \ + CHECK_ARGUMENT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_SP); \ + CHECK_ARGUMENT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_SP); \ if ((src & SLJIT_MEM) && (src & REG_MASK)) \ - SLJIT_ASSERT((dst & REG_MASK) != (src & REG_MASK) && OFFS_REG(dst) != (src & REG_MASK)); \ + CHECK_ARGUMENT((dst & REG_MASK) != (src & REG_MASK) && OFFS_REG(dst) != (src & REG_MASK)); \ } -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) @@ -698,17 +777,6 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *comp compiler->verbose = verbose; } -static char* reg_names[] = { - (char*)"unused", (char*)"s1", (char*)"s2", (char*)"s3", - (char*)"se1", (char*)"se2", (char*)"p1", (char*)"p2", - (char*)"p3", (char*)"pe1", (char*)"pe2", (char*)"lc" -}; - -static char* freg_names[] = { - (char*)"unused", (char*)"f1", (char*)"f2", (char*)"f3", - (char*)"f4", (char*)"f5", (char*)"f6" -}; - #if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) #ifdef _WIN64 # define SLJIT_PRINT_D "I64" @@ -719,69 +787,96 @@ static char* freg_names[] = { # define SLJIT_PRINT_D "" #endif -#define sljit_verbose_param(p, i) \ +#define sljit_verbose_reg(compiler, r) \ + do { \ + if ((r) < (SLJIT_R0 + compiler->scratches)) \ + fprintf(compiler->verbose, "r%d", (r) - SLJIT_R0); \ + else \ + fprintf(compiler->verbose, "s%d", SLJIT_NUMBER_OF_REGISTERS - (r)); \ + } while (0) + +#define sljit_verbose_param(compiler, p, i) \ if ((p) & SLJIT_IMM) \ fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", (i)); \ else if ((p) & SLJIT_MEM) { \ if ((p) & REG_MASK) { \ - if (i) { \ - if ((p) & OFFS_REG_MASK) \ - fprintf(compiler->verbose, "[%s + %s * %d]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)], 1 << (i)); \ - else \ - fprintf(compiler->verbose, "[%s + #%" SLJIT_PRINT_D "d]", reg_names[(p) & REG_MASK], (i)); \ - } \ - else { \ - if ((p) & OFFS_REG_MASK) \ - fprintf(compiler->verbose, "[%s + %s]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)]); \ - else \ - fprintf(compiler->verbose, "[%s]", reg_names[(p) & REG_MASK]); \ + fputc('[', compiler->verbose); \ + sljit_verbose_reg(compiler, (p) & REG_MASK); \ + if ((p) & OFFS_REG_MASK) { \ + fprintf(compiler->verbose, " + "); \ + sljit_verbose_reg(compiler, OFFS_REG(p)); \ + if (i) \ + fprintf(compiler->verbose, " * %d", 1 << (i)); \ } \ + else if (i) \ + fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i)); \ + fputc(']', compiler->verbose); \ } \ else \ fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \ - } else \ - fprintf(compiler->verbose, "%s", reg_names[p]); -#define sljit_verbose_fparam(p, i) \ + } else if (p) \ + sljit_verbose_reg(compiler, p); \ + else \ + fprintf(compiler->verbose, "unused"); + +#define sljit_verbose_fparam(compiler, p, i) \ if ((p) & SLJIT_MEM) { \ if ((p) & REG_MASK) { \ - if (i) { \ - if ((p) & OFFS_REG_MASK) \ - fprintf(compiler->verbose, "[%s + %s * %d]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)], 1 << (i)); \ - else \ - fprintf(compiler->verbose, "[%s + #%" SLJIT_PRINT_D "d]", reg_names[(p) & REG_MASK], (i)); \ - } \ - else { \ - if ((p) & OFFS_REG_MASK) \ - fprintf(compiler->verbose, "[%s + %s]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)]); \ - else \ - fprintf(compiler->verbose, "[%s]", reg_names[(p) & REG_MASK]); \ + fputc('[', compiler->verbose); \ + sljit_verbose_reg(compiler, (p) & REG_MASK); \ + if ((p) & OFFS_REG_MASK) { \ + fprintf(compiler->verbose, " + "); \ + sljit_verbose_reg(compiler, OFFS_REG(p)); \ + if (i) \ + fprintf(compiler->verbose, "%d", 1 << (i)); \ } \ + else if (i) \ + fprintf(compiler->verbose, "%" SLJIT_PRINT_D "d", (i)); \ + fputc(']', compiler->verbose); \ } \ else \ fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \ - } else \ - fprintf(compiler->verbose, "%s", freg_names[p]); - -static SLJIT_CONST char* op_names[] = { - /* op0 */ - (char*)"breakpoint", (char*)"nop", - (char*)"umul", (char*)"smul", (char*)"udiv", (char*)"sdiv", - /* op1 */ - (char*)"mov", (char*)"mov.ub", (char*)"mov.sb", (char*)"mov.uh", - (char*)"mov.sh", (char*)"mov.ui", (char*)"mov.si", (char*)"mov.p", - (char*)"movu", (char*)"movu.ub", (char*)"movu.sb", (char*)"movu.uh", - (char*)"movu.sh", (char*)"movu.ui", (char*)"movu.si", (char*)"movu.p", + } \ + else { \ + if ((p) < (SLJIT_FR0 + compiler->fscratches)) \ + fprintf(compiler->verbose, "fr%d", (p) - SLJIT_FR0); \ + else \ + fprintf(compiler->verbose, "fs%d", SLJIT_NUMBER_OF_FLOAT_REGISTERS - (p)); \ + } + +static SLJIT_CONST char* op0_names[] = { + (char*)"breakpoint", (char*)"nop", (char*)"lumul", (char*)"lsmul", + (char*)"udivmod", (char*)"sdivmod", (char*)"udivi", (char*)"sdivi" +}; + +static SLJIT_CONST char* op1_names[] = { + (char*)"mov", (char*)"mov_ub", (char*)"mov_sb", (char*)"mov_uh", + (char*)"mov_sh", (char*)"mov_ui", (char*)"mov_si", (char*)"mov_p", + (char*)"movu", (char*)"movu_ub", (char*)"movu_sb", (char*)"movu_uh", + (char*)"movu_sh", (char*)"movu_ui", (char*)"movu_si", (char*)"movu_p", (char*)"not", (char*)"neg", (char*)"clz", - /* op2 */ +}; + +static SLJIT_CONST char* op2_names[] = { (char*)"add", (char*)"addc", (char*)"sub", (char*)"subc", (char*)"mul", (char*)"and", (char*)"or", (char*)"xor", (char*)"shl", (char*)"lshr", (char*)"ashr", - /* fop1 */ - (char*)"cmp", (char*)"mov", (char*)"neg", (char*)"abs", - /* fop2 */ +}; + +static SLJIT_CONST char* fop1_names[] = { + (char*)"mov", (char*)"conv", (char*)"conv", (char*)"conv", + (char*)"conv", (char*)"conv", (char*)"cmp", (char*)"neg", + (char*)"abs", +}; + +static SLJIT_CONST char* fop2_names[] = { (char*)"add", (char*)"sub", (char*)"mul", (char*)"div" }; +#define JUMP_PREFIX(type) \ + ((type & 0xff) <= SLJIT_MUL_NOT_OVERFLOW ? ((type & SLJIT_INT_OP) ? "i_" : "") \ + : ((type & 0xff) <= SLJIT_D_ORDERED ? ((type & SLJIT_SINGLE_OP) ? "s_" : "d_") : "")) + static char* jump_names[] = { (char*)"equal", (char*)"not_equal", (char*)"less", (char*)"greater_equal", @@ -790,188 +885,178 @@ static char* jump_names[] = { (char*)"sig_greater", (char*)"sig_less_equal", (char*)"overflow", (char*)"not_overflow", (char*)"mul_overflow", (char*)"mul_not_overflow", - (char*)"float_equal", (char*)"float_not_equal", - (char*)"float_less", (char*)"float_greater_equal", - (char*)"float_greater", (char*)"float_less_equal", - (char*)"float_unordered", (char*)"float_ordered", + (char*)"equal", (char*)"not_equal", + (char*)"less", (char*)"greater_equal", + (char*)"greater", (char*)"less_equal", + (char*)"unordered", (char*)"ordered", (char*)"jump", (char*)"fast_call", (char*)"call0", (char*)"call1", (char*)"call2", (char*)"call3" }; -#endif +#endif /* SLJIT_VERBOSE */ /* --------------------------------------------------------------------- */ /* Arch dependent */ /* --------------------------------------------------------------------- */ -static SLJIT_INLINE void check_sljit_generate_code(struct sljit_compiler *compiler) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_generate_code(struct sljit_compiler *compiler) { -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) struct sljit_jump *jump; #endif - /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); - SLJIT_ASSERT(compiler->size > 0); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(compiler->size > 0); jump = compiler->jumps; while (jump) { /* All jumps have target. */ - SLJIT_ASSERT(jump->flags & (JUMP_LABEL | JUMP_ADDR)); + CHECK_ARGUMENT(jump->flags & (JUMP_LABEL | JUMP_ADDR)); jump = jump->next; } #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - /* If debug and verbose are disabled, all arguments are unused. */ SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(args); - SLJIT_UNUSED_ARG(scratches); - SLJIT_UNUSED_ARG(saveds); - SLJIT_UNUSED_ARG(local_size); - SLJIT_ASSERT(args >= 0 && args <= 3); - SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NO_TMP_REGISTERS); - SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NO_GEN_REGISTERS); - SLJIT_ASSERT(args <= saveds); - SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(options & ~SLJIT_DOUBLE_ALIGNMENT)); + CHECK_ARGUMENT(args >= 0 && args <= 3); + CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(args <= saveds); + CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); +#endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " enter args=%d scratches=%d saveds=%d local_size=%d\n", args, scratches, saveds, local_size); + fprintf(compiler->verbose, " enter options:none args:%d scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", + args, scratches, saveds, fscratches, fsaveds, local_size); #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(args); - SLJIT_UNUSED_ARG(scratches); - SLJIT_UNUSED_ARG(saveds); - SLJIT_UNUSED_ARG(local_size); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; - return; + CHECK_RETURN_OK; } -#endif - SLJIT_ASSERT(args >= 0 && args <= 3); - SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NO_TMP_REGISTERS); - SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NO_GEN_REGISTERS); - SLJIT_ASSERT(args <= saveds); - SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(options & ~SLJIT_DOUBLE_ALIGNMENT)); + CHECK_ARGUMENT(args >= 0 && args <= 3); + CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(args <= saveds); + CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); +#endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " set_context args=%d scratches=%d saveds=%d local_size=%d\n", args, scratches, saveds, local_size); + fprintf(compiler->verbose, " set_context options:none args:%d scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", + args, scratches, saveds, fscratches, fsaveds, local_size); #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(compiler->scratches >= 0); if (op != SLJIT_UNUSED) { - SLJIT_ASSERT(op >= SLJIT_MOV && op <= SLJIT_MOV_P); + CHECK_ARGUMENT(op >= SLJIT_MOV && op <= SLJIT_MOV_P); FUNCTION_CHECK_SRC(src, srcw); } else - SLJIT_ASSERT(src == 0 && srcw == 0); + CHECK_ARGUMENT(src == 0 && srcw == 0); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (op == SLJIT_UNUSED) fprintf(compiler->verbose, " return\n"); else { - fprintf(compiler->verbose, " return %s ", op_names[op]); - sljit_verbose_param(src, srcw); + fprintf(compiler->verbose, " return.%s ", op1_names[op - SLJIT_OP1_BASE]); + sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } } #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) { - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) FUNCTION_CHECK_DST(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " fast_enter "); - sljit_verbose_param(dst, dstw); + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, "\n"); } #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) { - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) FUNCTION_CHECK_SRC(src, srcw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " fast_return "); - sljit_verbose_param(src, srcw); + sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) { - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - - SLJIT_ASSERT((op >= SLJIT_BREAKPOINT && op <= SLJIT_SMUL) - || ((op & ~SLJIT_INT_OP) >= SLJIT_UDIV && (op & ~SLJIT_INT_OP) <= SLJIT_SDIV)); +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LSMUL) + || ((op & ~SLJIT_INT_OP) >= SLJIT_UDIVMOD && (op & ~SLJIT_INT_OP) <= SLJIT_SDIVI)); + CHECK_ARGUMENT(op < SLJIT_LUMUL || compiler->scratches >= 2); +#endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " %s%s\n", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)]); + fprintf(compiler->verbose, " %s%s\n", !(op & SLJIT_INT_OP) ? "" : "i", op0_names[GET_OPCODE(op) - SLJIT_OP0_BASE]); #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; - return; + CHECK_RETURN_OK; } -#endif - SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CLZ); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CLZ); FUNCTION_CHECK_OP(); FUNCTION_CHECK_SRC(src, srcw); FUNCTION_CHECK_DST(dst, dstw); @@ -979,41 +1064,30 @@ static SLJIT_INLINE void check_sljit_emit_op1(struct sljit_compiler *compiler, s #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)], + fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s", !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); - sljit_verbose_param(dst, dstw); + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", "); - sljit_verbose_param(src, srcw); + sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src1); - SLJIT_UNUSED_ARG(src1w); - SLJIT_UNUSED_ARG(src2); - SLJIT_UNUSED_ARG(src2w); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; - return; + CHECK_RETURN_OK; } -#endif - SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_ASHR); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_ASHR); FUNCTION_CHECK_OP(); FUNCTION_CHECK_SRC(src1, src1w); FUNCTION_CHECK_SRC(src2, src2w); @@ -1021,312 +1095,417 @@ static SLJIT_INLINE void check_sljit_emit_op2(struct sljit_compiler *compiler, s #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)], + fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s", !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); - sljit_verbose_param(dst, dstw); + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", "); - sljit_verbose_param(src1, src1w); + sljit_verbose_param(compiler, src1, src1w); fprintf(compiler->verbose, ", "); - sljit_verbose_param(src2, src2w); + sljit_verbose_param(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_get_register_index(sljit_si reg) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_si reg) { SLJIT_UNUSED_ARG(reg); - SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NO_REGISTERS); +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS); +#endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_get_float_register_index(sljit_si reg) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_float_register_index(sljit_si reg) { SLJIT_UNUSED_ARG(reg); - SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NO_FLOAT_REGISTERS); +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(reg > 0 && reg <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); +#endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_emit_op_custom(struct sljit_compiler *compiler, +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_custom(struct sljit_compiler *compiler, void *instruction, sljit_si size) { +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + int i; +#endif + SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(instruction); - SLJIT_UNUSED_ARG(size); - SLJIT_ASSERT(instruction); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(instruction); +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + CHECK_ARGUMENT(size > 0 && size < 16); +#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) + CHECK_ARGUMENT((size == 2 && (((sljit_sw)instruction) & 0x1) == 0) + || (size == 4 && (((sljit_sw)instruction) & 0x3) == 0)); +#else + CHECK_ARGUMENT(size == 4 && (((sljit_sw)instruction) & 0x3) == 0); +#endif + +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " op_custom"); + for (i = 0; i < size; i++) + fprintf(compiler->verbose, " 0x%x", ((sljit_ub*)instruction)[i]); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_DMOV && GET_OPCODE(op) <= SLJIT_DABS); + FUNCTION_CHECK_FOP(); + FUNCTION_FCHECK(src, srcw); + FUNCTION_FCHECK(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) + fprintf(compiler->verbose, " %s%s ", fop1_names[SLJIT_CONVD_FROMS - SLJIT_FOP1_BASE], + (op & SLJIT_SINGLE_OP) ? "s.fromd" : "d.froms"); + else + fprintf(compiler->verbose, " %s%s ", (op & SLJIT_SINGLE_OP) ? "s" : "d", + fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE]); + + sljit_verbose_fparam(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; - return; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_DCMP); + FUNCTION_CHECK_FOP(); + FUNCTION_FCHECK(src1, src1w); + FUNCTION_FCHECK(src2, src2w); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s%s%s ", (op & SLJIT_SINGLE_OP) ? "s" : "d", fop1_names[SLJIT_DCMP - SLJIT_FOP1_BASE], + (op & SLJIT_SET_E) ? ".e" : "", (op & SLJIT_SET_S) ? ".s" : ""); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); } #endif + CHECK_RETURN_OK; +} - SLJIT_ASSERT(sljit_is_fpu_available()); - SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_CMPD && GET_OPCODE(op) <= SLJIT_ABSD); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - FUNCTION_CHECK_OP(); +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONVW_FROMD && GET_OPCODE(op) <= SLJIT_CONVI_FROMD); + FUNCTION_CHECK_FOP(); FUNCTION_FCHECK(src, srcw); + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + (GET_OPCODE(op) == SLJIT_CONVI_FROMD) ? "i" : "w", + (op & SLJIT_SINGLE_OP) ? "s" : "d"); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONVD_FROMW && GET_OPCODE(op) <= SLJIT_CONVD_FROMI); + FUNCTION_CHECK_FOP(); + FUNCTION_CHECK_SRC(src, srcw); FUNCTION_FCHECK(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s%s%s ", op_names[GET_OPCODE(op)], (op & SLJIT_SINGLE_OP) ? "s" : "d", - !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_S) ? "" : ".s"); - sljit_verbose_fparam(dst, dstw); + fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + (op & SLJIT_SINGLE_OP) ? "s" : "d", + (GET_OPCODE(op) == SLJIT_CONVD_FROMI) ? "i" : "w"); + sljit_verbose_fparam(compiler, dst, dstw); fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src, srcw); + sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src1); - SLJIT_UNUSED_ARG(src1w); - SLJIT_UNUSED_ARG(src2); - SLJIT_UNUSED_ARG(src2w); - - SLJIT_ASSERT(sljit_is_fpu_available()); - SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_ADDD && GET_OPCODE(op) <= SLJIT_DIVD); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - FUNCTION_CHECK_OP(); +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_DADD && GET_OPCODE(op) <= SLJIT_DDIV); + FUNCTION_CHECK_FOP(); FUNCTION_FCHECK(src1, src1w); FUNCTION_FCHECK(src2, src2w); FUNCTION_FCHECK(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s ", op_names[GET_OPCODE(op)], (op & SLJIT_SINGLE_OP) ? "s" : "d"); - sljit_verbose_fparam(dst, dstw); + fprintf(compiler->verbose, " %s%s ", (op & SLJIT_SINGLE_OP) ? "s" : "d", fop2_names[GET_OPCODE(op) - SLJIT_FOP2_BASE]); + sljit_verbose_fparam(compiler, dst, dstw); fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src1, src1w); + sljit_verbose_fparam(compiler, src1, src1w); fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src2, src2w); + sljit_verbose_fparam(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_emit_label(struct sljit_compiler *compiler) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_label(struct sljit_compiler *compiler) { - /* If debug and verbose are disabled, all arguments are unused. */ SLJIT_UNUSED_ARG(compiler); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) fprintf(compiler->verbose, "label:\n"); #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) { - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; - return; + CHECK_RETURN_OK; } -#endif - SLJIT_ASSERT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP))); - SLJIT_ASSERT((type & 0xff) >= SLJIT_C_EQUAL && (type & 0xff) <= SLJIT_CALL3); +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_INT_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_CALL3); + CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_INT_OP)); + CHECK_ARGUMENT((type & 0xff) <= SLJIT_CALL0 || ((type & 0xff) - SLJIT_CALL0) <= compiler->scratches); +#endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " jump%s.%s\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]); + fprintf(compiler->verbose, " jump%s.%s%s\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + JUMP_PREFIX(type), jump_names[type & 0xff]); #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(src1); - SLJIT_UNUSED_ARG(src1w); - SLJIT_UNUSED_ARG(src2); - SLJIT_UNUSED_ARG(src2w); - - SLJIT_ASSERT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_INT_OP))); - SLJIT_ASSERT((type & 0xff) >= SLJIT_C_EQUAL && (type & 0xff) <= SLJIT_C_SIG_LESS_EQUAL); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_INT_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_SIG_LESS_EQUAL); FUNCTION_CHECK_SRC(src1, src1w); FUNCTION_CHECK_SRC(src2, src2w); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %scmp%s.%s ", !(type & SLJIT_INT_OP) ? "" : "i", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]); - sljit_verbose_param(src1, src1w); + fprintf(compiler->verbose, " cmp%s.%s%s ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + (type & SLJIT_INT_OP) ? "i_" : "", jump_names[type & 0xff]); + sljit_verbose_param(compiler, src1, src1w); fprintf(compiler->verbose, ", "); - sljit_verbose_param(src2, src2w); + sljit_verbose_param(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type, +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(src1); - SLJIT_UNUSED_ARG(src1w); - SLJIT_UNUSED_ARG(src2); - SLJIT_UNUSED_ARG(src2w); - - SLJIT_ASSERT(sljit_is_fpu_available()); - SLJIT_ASSERT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_SINGLE_OP))); - SLJIT_ASSERT((type & 0xff) >= SLJIT_C_FLOAT_EQUAL && (type & 0xff) <= SLJIT_C_FLOAT_ORDERED); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_SINGLE_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_D_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED); FUNCTION_FCHECK(src1, src1w); FUNCTION_FCHECK(src2, src2w); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %scmp%s.%s ", (type & SLJIT_SINGLE_OP) ? "s" : "d", - !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]); - sljit_verbose_fparam(src1, src1w); + fprintf(compiler->verbose, " fcmp%s.%s%s ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + (type & SLJIT_SINGLE_OP) ? "s_" : "d_", jump_names[type & 0xff]); + sljit_verbose_fparam(compiler, src1, src1w); fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src2, src2w); + sljit_verbose_fparam(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) { - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; - return; + CHECK_RETURN_OK; } -#endif - SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(type >= SLJIT_JUMP && type <= SLJIT_CALL3); + CHECK_ARGUMENT(type <= SLJIT_CALL0 || (type - SLJIT_CALL0) <= compiler->scratches); FUNCTION_CHECK_SRC(src, srcw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " ijump.%s ", jump_names[type]); - sljit_verbose_param(src, srcw); + sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw, sljit_si type) { - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(op); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); - SLJIT_UNUSED_ARG(src); - SLJIT_UNUSED_ARG(srcw); - SLJIT_UNUSED_ARG(type); - - SLJIT_ASSERT(type >= SLJIT_C_EQUAL && type < SLJIT_JUMP); - SLJIT_ASSERT(op == SLJIT_MOV || GET_OPCODE(op) == SLJIT_MOV_UI || GET_OPCODE(op) == SLJIT_MOV_SI +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_INT_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED); + CHECK_ARGUMENT(op == SLJIT_MOV || GET_OPCODE(op) == SLJIT_MOV_UI || GET_OPCODE(op) == SLJIT_MOV_SI || (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR)); - SLJIT_ASSERT((op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C)) == 0); - SLJIT_ASSERT((op & (SLJIT_SET_E | SLJIT_KEEP_FLAGS)) != (SLJIT_SET_E | SLJIT_KEEP_FLAGS)); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + CHECK_ARGUMENT((op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C)) == 0); + CHECK_ARGUMENT((op & (SLJIT_SET_E | SLJIT_KEEP_FLAGS)) != (SLJIT_SET_E | SLJIT_KEEP_FLAGS)); if (GET_OPCODE(op) < SLJIT_ADD) { - SLJIT_ASSERT(src == SLJIT_UNUSED && srcw == 0); + CHECK_ARGUMENT(src == SLJIT_UNUSED && srcw == 0); } else { - SLJIT_ASSERT(src == dst && srcw == dstw); + CHECK_ARGUMENT(src == dst && srcw == dstw); } FUNCTION_CHECK_DST(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %sflags.%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", - op_names[GET_OPCODE(op)], !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); - sljit_verbose_param(dst, dstw); + fprintf(compiler->verbose, " flags.%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", + GET_OPCODE(op) >= SLJIT_OP2_BASE ? op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE] : op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], + !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); + sljit_verbose_param(compiler, dst, dstw); if (src != SLJIT_UNUSED) { fprintf(compiler->verbose, ", "); - sljit_verbose_param(src, srcw); + sljit_verbose_param(compiler, src, srcw); } - fprintf(compiler->verbose, ", %s\n", jump_names[type]); + fprintf(compiler->verbose, ", %s%s\n", JUMP_PREFIX(type), jump_names[type & 0xff]); } #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) { - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); SLJIT_UNUSED_ARG(offset); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) FUNCTION_CHECK_DST(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " local_base "); - sljit_verbose_param(dst, dstw); + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", offset); } #endif + CHECK_RETURN_OK; } -static SLJIT_INLINE void check_sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) { - /* If debug and verbose are disabled, all arguments are unused. */ - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(dst); - SLJIT_UNUSED_ARG(dstw); SLJIT_UNUSED_ARG(init_value); -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) FUNCTION_CHECK_DST(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " const "); - sljit_verbose_param(dst, dstw); + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", init_value); } #endif + CHECK_RETURN_OK; } +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_VERBOSE */ + +#define SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw) \ + SLJIT_COMPILE_ASSERT(!(SLJIT_CONVW_FROMD & 0x1) && !(SLJIT_CONVD_FROMW & 0x1), \ + invalid_float_opcodes); \ + if (GET_OPCODE(op) >= SLJIT_CONVW_FROMD && GET_OPCODE(op) <= SLJIT_DCMP) { \ + if (GET_OPCODE(op) == SLJIT_DCMP) { \ + CHECK(check_sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw); \ + } \ + if ((GET_OPCODE(op) | 0x1) == SLJIT_CONVI_FROMD) { \ + CHECK(check_sljit_emit_fop1_convw_fromd(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_convw_fromd(compiler, op, dst, dstw, src, srcw); \ + } \ + CHECK(check_sljit_emit_fop1_convd_fromw(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_convd_fromw(compiler, op, dst, dstw, src, srcw); \ + } \ + CHECK(check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); + static SLJIT_INLINE sljit_si emit_mov_before_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { /* Return if don't need to do anything. */ @@ -1342,7 +1521,8 @@ static SLJIT_INLINE sljit_si emit_mov_before_return(struct sljit_compiler *compi return SLJIT_SUCCESS; #endif -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) compiler->skip_checks = 1; #endif return sljit_emit_op1(compiler, op, SLJIT_RETURN_REG, 0, src, srcw); @@ -1374,9 +1554,7 @@ static SLJIT_INLINE sljit_si emit_mov_before_return(struct sljit_compiler *compi #define SLJIT_CPUINFO SLJIT_CPUINFO_PART1 SLJIT_CPUINFO_PART2 SLJIT_CPUINFO_PART3 -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -# include "sljitNativeX86_common.c" -#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) # include "sljitNativeX86_common.c" #elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) # include "sljitNativeARM_32.c" @@ -1386,21 +1564,17 @@ static SLJIT_INLINE sljit_si emit_mov_before_return(struct sljit_compiler *compi # include "sljitNativeARM_T2_32.c" #elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) # include "sljitNativeARM_64.c" -#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) -# include "sljitNativePPC_common.c" -#elif (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) # include "sljitNativePPC_common.c" -#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# include "sljitNativeMIPS_common.c" -#elif (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) # include "sljitNativeMIPS_common.c" -#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) # include "sljitNativeSPARC_common.c" #elif (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) # include "sljitNativeTILEGX_64.c" #endif -#if !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#if !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, sljit_si src1, sljit_sw src1w, @@ -1411,11 +1585,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler sljit_sw tmp_srcw; CHECK_ERROR_PTR(); - check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w); + CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); condition = type & 0xff; #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) - if ((condition == SLJIT_C_EQUAL || condition == SLJIT_C_NOT_EQUAL)) { + if ((condition == SLJIT_EQUAL || condition == SLJIT_NOT_EQUAL)) { if ((src1 & SLJIT_IMM) && !src1w) { src1 = src2; src1w = src2w; @@ -1430,29 +1604,29 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler if (SLJIT_UNLIKELY((src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM))) { /* Immediate is prefered as second argument by most architectures. */ switch (condition) { - case SLJIT_C_LESS: - condition = SLJIT_C_GREATER; + case SLJIT_LESS: + condition = SLJIT_GREATER; break; - case SLJIT_C_GREATER_EQUAL: - condition = SLJIT_C_LESS_EQUAL; + case SLJIT_GREATER_EQUAL: + condition = SLJIT_LESS_EQUAL; break; - case SLJIT_C_GREATER: - condition = SLJIT_C_LESS; + case SLJIT_GREATER: + condition = SLJIT_LESS; break; - case SLJIT_C_LESS_EQUAL: - condition = SLJIT_C_GREATER_EQUAL; + case SLJIT_LESS_EQUAL: + condition = SLJIT_GREATER_EQUAL; break; - case SLJIT_C_SIG_LESS: - condition = SLJIT_C_SIG_GREATER; + case SLJIT_SIG_LESS: + condition = SLJIT_SIG_GREATER; break; - case SLJIT_C_SIG_GREATER_EQUAL: - condition = SLJIT_C_SIG_LESS_EQUAL; + case SLJIT_SIG_GREATER_EQUAL: + condition = SLJIT_SIG_LESS_EQUAL; break; - case SLJIT_C_SIG_GREATER: - condition = SLJIT_C_SIG_LESS; + case SLJIT_SIG_GREATER: + condition = SLJIT_SIG_LESS; break; - case SLJIT_C_SIG_LESS_EQUAL: - condition = SLJIT_C_SIG_GREATER_EQUAL; + case SLJIT_SIG_LESS_EQUAL: + condition = SLJIT_SIG_GREATER_EQUAL; break; } type = condition | (type & (SLJIT_INT_OP | SLJIT_REWRITABLE_JUMP)); @@ -1464,19 +1638,21 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler src2w = tmp_srcw; } - if (condition <= SLJIT_C_NOT_ZERO) + if (condition <= SLJIT_NOT_ZERO) flags = SLJIT_SET_E; - else if (condition <= SLJIT_C_LESS_EQUAL) + else if (condition <= SLJIT_LESS_EQUAL) flags = SLJIT_SET_U; else flags = SLJIT_SET_S; -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif PTR_FAIL_IF(sljit_emit_op2(compiler, SLJIT_SUB | flags | (type & SLJIT_INT_OP), SLJIT_UNUSED, 0, src1, src1w, src2, src2w)); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP)); @@ -1488,19 +1664,22 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile { sljit_si flags, condition; - check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w); + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w)); condition = type & 0xff; - flags = (condition <= SLJIT_C_FLOAT_NOT_EQUAL) ? SLJIT_SET_E : SLJIT_SET_S; + flags = (condition <= SLJIT_D_NOT_EQUAL) ? SLJIT_SET_E : SLJIT_SET_S; if (type & SLJIT_SINGLE_OP) flags |= SLJIT_SINGLE_OP; -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - sljit_emit_fop1(compiler, SLJIT_CMPD | flags, src1, src1w, src2, src2w); + sljit_emit_fop1(compiler, SLJIT_DCMP | flags, src1, src1w, src2, src2w); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP)); @@ -1508,20 +1687,21 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile #endif -#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) && !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) { CHECK_ERROR(); - check_sljit_get_local_base(compiler, dst, dstw, offset); + CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); - ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif if (offset != 0) - return sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset); - return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_LOCALS_REG, 0); + return sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); + return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_SP, 0); } #endif @@ -1577,25 +1757,36 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code) SLJIT_ASSERT_STOP(); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(options); SLJIT_UNUSED_ARG(args); SLJIT_UNUSED_ARG(scratches); SLJIT_UNUSED_ARG(saveds); + SLJIT_UNUSED_ARG(fscratches); + SLJIT_UNUSED_ARG(fsaveds); SLJIT_UNUSED_ARG(local_size); SLJIT_ASSERT_STOP(); return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(options); SLJIT_UNUSED_ARG(args); SLJIT_UNUSED_ARG(scratches); SLJIT_UNUSED_ARG(saveds); + SLJIT_UNUSED_ARG(fscratches); + SLJIT_UNUSED_ARG(fsaveds); SLJIT_UNUSED_ARG(local_size); SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) diff --git a/sljit/sljitLir.h b/sljit/sljitLir.h index e2cd218..2e2e9ac 100644 --- a/sljit/sljitLir.h +++ b/sljit/sljitLir.h @@ -56,8 +56,6 @@ Disadvantages: - No automatic register allocation, and temporary results are not stored on the stack. (hence the name comes) - - Limited number of registers (only 6+4 integer registers, max 3+2 - scratch, max 3+2 saved and 6 floating point registers) In practice: - This approach is very effective for interpreters - One of the saved registers typically points to a stack interface @@ -97,83 +95,178 @@ of sljitConfigInternal.h */ /* Cannot allocate executable memory. Only for sljit_generate_code() */ #define SLJIT_ERR_EX_ALLOC_FAILED 3 -/* return value for SLJIT_CONFIG_UNSUPPORTED empty architecture. */ +/* Return value for SLJIT_CONFIG_UNSUPPORTED placeholder architecture. */ #define SLJIT_ERR_UNSUPPORTED 4 +/* An ivalid argument is passed to any SLJIT function. */ +#define SLJIT_ERR_BAD_ARGUMENT 5 /* --------------------------------------------------------------------- */ /* Registers */ /* --------------------------------------------------------------------- */ +/* + Scratch (R) registers: registers whose may not preserve their values + across function calls. + + Saved (S) registers: registers whose preserve their values across + function calls. + + The scratch and saved register sets are overlap. The last scratch register + is the first saved register, the one before the last is the second saved + register, and so on. + + If an architecture provides two scratch and three saved registers, + its scratch and saved register sets are the following: + + R0 | [S4] | R0 and S4 represent the same physical register + R1 | [S3] | R1 and S3 represent the same physical register + [R2] | S2 | R2 and S2 represent the same physical register + [R3] | S1 | R3 and S1 represent the same physical register + [R4] | S0 | R4 and S0 represent the same physical register + + Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS would be 2 and + SLJIT_NUMBER_OF_SAVED_REGISTERS would be 3 for this architecture. + + Note: On all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 10 + and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 5. However, 4 registers + are virtual on x86-32. See below. + + The purpose of this definition is convenience. Although a register + is either scratch register or saved register, SLJIT allows accessing + them from the other set. For example, four registers can be used as + scratch registers and the fifth one as saved register on the architecture + above. Of course the last two scratch registers (R2 and R3) from this + four will be saved on the stack, because they are defined as saved + registers in the application binary interface. Still R2 and R3 can be + used for referencing to these registers instead of S2 and S1, which + makes easier to write platform independent code. Scratch registers + can be saved registers in a similar way, but these extra saved + registers will not be preserved across function calls! Hence the + application must save them on those platforms, where the number of + saved registers is too low. This can be done by copy them onto + the stack and restore them after a function call. + + Note: To emphasize that registers assigned to R2-R4 are saved + registers, they are enclosed by square brackets. S3-S4 + are marked in a similar way. + + Note: sljit_emit_enter and sljit_set_context defines whether a register + is S or R register. E.g: when 3 scratches and 1 saved is mapped + by sljit_emit_enter, the allowed register set will be: R0-R2 and + S0. Although S2 is mapped to the same position as R2, it does not + available in the current configuration. Furthermore the R3 (S1) + register does not available as well. +*/ + +/* When SLJIT_UNUSED is specified as destination, the result is discarded. */ #define SLJIT_UNUSED 0 -/* Scratch (temporary) registers whose may not preserve their values - across function calls. */ -#define SLJIT_SCRATCH_REG1 1 -#define SLJIT_SCRATCH_REG2 2 -#define SLJIT_SCRATCH_REG3 3 -/* Note: extra registers cannot be used for memory addressing. */ -/* Note: on x86-32, these registers are emulated (using stack - loads & stores). */ -#define SLJIT_TEMPORARY_EREG1 4 -#define SLJIT_TEMPORARY_EREG2 5 - -/* Saved registers whose preserve their values across function calls. */ -#define SLJIT_SAVED_REG1 6 -#define SLJIT_SAVED_REG2 7 -#define SLJIT_SAVED_REG3 8 -/* Note: extra registers cannot be used for memory addressing. */ -/* Note: on x86-32, these registers are emulated (using stack - loads & stores). */ -#define SLJIT_SAVED_EREG1 9 -#define SLJIT_SAVED_EREG2 10 - -/* Read-only register (cannot be the destination of an operation). - Only SLJIT_MEM1(SLJIT_LOCALS_REG) addressing mode is allowed since - several ABIs has certain limitations about the stack layout. However - sljit_get_local_base() can be used to obtain the offset of a value - on the stack. */ -#define SLJIT_LOCALS_REG 11 - -/* Number of registers. */ -#define SLJIT_NO_TMP_REGISTERS 5 -#define SLJIT_NO_GEN_REGISTERS 5 -#define SLJIT_NO_REGISTERS 11 +/* Scratch registers. */ +#define SLJIT_R0 1 +#define SLJIT_R1 2 +#define SLJIT_R2 3 +/* Note: on x86-32, R3 - R6 (same as S3 - S6) are emulated (they + are allocated on the stack). These registers are called virtual + and cannot be used for memory addressing (cannot be part of + any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such + limitation on other CPUs. See sljit_get_register_index(). */ +#define SLJIT_R3 4 +#define SLJIT_R4 5 +#define SLJIT_R5 6 +#define SLJIT_R6 7 +#define SLJIT_R7 8 +#define SLJIT_R8 9 +#define SLJIT_R9 10 +/* All R registers provided by the architecture can be accessed by SLJIT_R(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_REGISTERS. */ +#define SLJIT_R(i) (1 + (i)) + +/* Saved registers. */ +#define SLJIT_S0 (SLJIT_NUMBER_OF_REGISTERS) +#define SLJIT_S1 (SLJIT_NUMBER_OF_REGISTERS - 1) +#define SLJIT_S2 (SLJIT_NUMBER_OF_REGISTERS - 2) +/* Note: on x86-32, S3 - S6 (same as R3 - R6) are emulated (they + are allocated on the stack). These registers are called virtual + and cannot be used for memory addressing (cannot be part of + any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such + limitation on other CPUs. See sljit_get_register_index(). */ +#define SLJIT_S3 (SLJIT_NUMBER_OF_REGISTERS - 3) +#define SLJIT_S4 (SLJIT_NUMBER_OF_REGISTERS - 4) +#define SLJIT_S5 (SLJIT_NUMBER_OF_REGISTERS - 5) +#define SLJIT_S6 (SLJIT_NUMBER_OF_REGISTERS - 6) +#define SLJIT_S7 (SLJIT_NUMBER_OF_REGISTERS - 7) +#define SLJIT_S8 (SLJIT_NUMBER_OF_REGISTERS - 8) +#define SLJIT_S9 (SLJIT_NUMBER_OF_REGISTERS - 9) +/* All S registers provided by the architecture can be accessed by SLJIT_S(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_REGISTERS. */ +#define SLJIT_S(i) (SLJIT_NUMBER_OF_REGISTERS - (i)) + +/* Registers >= SLJIT_FIRST_SAVED_REG are saved registers. */ +#define SLJIT_FIRST_SAVED_REG (SLJIT_S0 - SLJIT_NUMBER_OF_SAVED_REGISTERS + 1) + +/* The SLJIT_SP provides direct access to the linear stack space allocated by + sljit_emit_enter. It can only be used in the following form: SLJIT_MEM1(SLJIT_SP). + The immediate offset is extended by the relative stack offset automatically. + The sljit_get_local_base can be used to obtain the absolute offset. */ +#define SLJIT_SP (SLJIT_NUMBER_OF_REGISTERS + 1) /* Return with machine word. */ -#define SLJIT_RETURN_REG SLJIT_SCRATCH_REG1 +#define SLJIT_RETURN_REG SLJIT_R0 /* x86 prefers specific registers for special purposes. In case of shift - by register it supports only SLJIT_SCRATCH_REG3 for shift argument + by register it supports only SLJIT_R2 for shift argument (which is the src2 argument of sljit_emit_op2). If another register is used, sljit must exchange data between registers which cause a minor slowdown. Other architectures has no such limitation. */ -#define SLJIT_PREF_SHIFT_REG SLJIT_SCRATCH_REG3 +#define SLJIT_PREF_SHIFT_REG SLJIT_R2 /* --------------------------------------------------------------------- */ /* Floating point registers */ /* --------------------------------------------------------------------- */ -/* Note: SLJIT_UNUSED as destination is not valid for floating point - operations, since they cannot be used for setting flags. */ - -/* Floating point operations are performed on double or - single precision values. */ - -#define SLJIT_FLOAT_REG1 1 -#define SLJIT_FLOAT_REG2 2 -#define SLJIT_FLOAT_REG3 3 -#define SLJIT_FLOAT_REG4 4 -#define SLJIT_FLOAT_REG5 5 -#define SLJIT_FLOAT_REG6 6 +/* Each floating point register can store a double or single precision + value. The FR and FS register sets are overlap in the same way as R + and S register sets. See above. */ -#define SLJIT_NO_FLOAT_REGISTERS 6 +/* Note: SLJIT_UNUSED as destination is not valid for floating point + operations, since they cannot be used for setting flags. */ + +/* Floating point scratch registers. */ +#define SLJIT_FR0 1 +#define SLJIT_FR1 2 +#define SLJIT_FR2 3 +#define SLJIT_FR3 4 +#define SLJIT_FR4 5 +#define SLJIT_FR5 6 +/* All FR registers provided by the architecture can be accessed by SLJIT_FR(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_FLOAT_REGISTERS. */ +#define SLJIT_FR(i) (1 + (i)) + +/* Floating point saved registers. */ +#define SLJIT_FS0 (SLJIT_NUMBER_OF_FLOAT_REGISTERS) +#define SLJIT_FS1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 1) +#define SLJIT_FS2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2) +#define SLJIT_FS3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 3) +#define SLJIT_FS4 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 4) +#define SLJIT_FS5 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 5) +/* All S registers provided by the architecture can be accessed by SLJIT_FS(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS. */ +#define SLJIT_FS(i) (SLJIT_NUMBER_OF_FLOAT_REGISTERS - (i)) + +/* Float registers >= SLJIT_FIRST_SAVED_FLOAT_REG are saved registers. */ +#define SLJIT_FIRST_SAVED_FLOAT_REG (SLJIT_FS0 - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS + 1) /* --------------------------------------------------------------------- */ /* Main structures and functions */ /* --------------------------------------------------------------------- */ +/* + The following structures are private, and can be changed in the + future. Keeping them here allows code inlining. +*/ + struct sljit_memory_fragment { struct sljit_memory_fragment *next; sljit_uw used_size; @@ -205,6 +298,7 @@ struct sljit_const { struct sljit_compiler { sljit_si error; + sljit_si options; struct sljit_label *labels; struct sljit_jump *jumps; @@ -213,13 +307,18 @@ struct sljit_compiler { struct sljit_jump *last_jump; struct sljit_const *last_const; + void *allocator_data; struct sljit_memory_fragment *buf; struct sljit_memory_fragment *abuf; - /* Used local registers. */ + /* Used scratch registers. */ sljit_si scratches; /* Used saved registers. */ sljit_si saveds; + /* Used float scratch registers. */ + sljit_si fscratches; + /* Used float saved registers. */ + sljit_si fsaveds; /* Local stack size. */ sljit_si local_size; /* Code size. */ @@ -229,16 +328,13 @@ struct sljit_compiler { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) sljit_si args; - sljit_si locals_offset; - sljit_si scratches_start; - sljit_si saveds_start; #endif #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) sljit_si mode32; #endif -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) sljit_si flags_saved; #endif @@ -266,18 +362,17 @@ struct sljit_compiler { #endif #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) - sljit_si locals_offset; sljit_si cache_arg; sljit_sw cache_argw; #endif -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) sljit_sw imm; sljit_si cache_arg; sljit_sw cache_argw; #endif -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) sljit_si delay_slot; sljit_si cache_arg; sljit_sw cache_argw; @@ -298,12 +393,15 @@ struct sljit_compiler { FILE* verbose; #endif -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) /* Local size passed to the functions. */ sljit_si logical_local_size; #endif -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) sljit_si skip_checks; #endif }; @@ -312,11 +410,16 @@ struct sljit_compiler { /* Main functions */ /* --------------------------------------------------------------------- */ -/* Creates an sljit compiler. +/* Creates an sljit compiler. The allocator_data is required by some + custom memory managers. This pointer is passed to SLJIT_MALLOC + and SLJIT_FREE macros. Most allocators (including the default + one) ignores this value, and it is recommended to pass NULL + as a dummy value for allocator_data. + Returns NULL if failed. */ -SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void); +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data); -/* Free everything except the compiled machine code. */ +/* Frees everything except the compiled machine code. */ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler); /* Returns the current error code. If an error is occurred, future sljit @@ -326,6 +429,14 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compile these checks increases the performance of the compiling process. */ static SLJIT_INLINE sljit_si sljit_get_compiler_error(struct sljit_compiler *compiler) { return compiler->error; } +/* Sets the compiler error code to SLJIT_ERR_ALLOC_FAILED except + if an error was detected before. After the error code is set + the compiler behaves as if the allocation failure happened + during an sljit function call. This can greatly simplify error + checking, since only the compiler status needs to be checked + after the compilation. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler); + /* Allocate a small amount of memory. The size must be <= 64 bytes on 32 bit, and <= 128 bytes on 64 bit architectures. The memory area is owned by the @@ -361,46 +472,69 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler error, they return with SLJIT_SUCCESS. */ /* - The executable code is basically a function call from the viewpoint of - the C language. The function calls must obey to the ABI (Application - Binary Interface) of the platform, which specify the purpose of machine - registers and stack handling among other things. The sljit_emit_enter - function emits the necessary instructions for setting up a new context - for the executable code and moves function arguments to the saved - registers. The number of arguments are specified in the "args" - parameter and the first argument goes to SLJIT_SAVED_REG1, the second - goes to SLJIT_SAVED_REG2 and so on. The number of scratch and - saved registers are passed in "scratches" and "saveds" arguments - respectively. Since the saved registers contains the arguments, - "args" must be less or equal than "saveds". The sljit_emit_enter - is also capable of allocating a stack space for local variables. The - "local_size" argument contains the size in bytes of this local area - and its staring address is stored in SLJIT_LOCALS_REG. However - the SLJIT_LOCALS_REG is not necessary the machine stack pointer. - The memory bytes between SLJIT_LOCALS_REG (inclusive) and - SLJIT_LOCALS_REG + local_size (exclusive) can be modified freely - until the function returns. The stack space is uninitialized. + The executable code is a function call from the viewpoint of the C + language. The function calls must obey to the ABI (Application + Binary Interface) of the platform, which specify the purpose of + all machine registers and stack handling among other things. The + sljit_emit_enter function emits the necessary instructions for + setting up a new context for the executable code and moves function + arguments to the saved registers. Furthermore the options argument + can be used to pass configuration options to the compiler. The + available options are listed before sljit_emit_enter. + + The number of sljit_sw arguments passed to the generated function + are specified in the "args" parameter. The number of arguments must + be less than or equal to 3. The first argument goes to SLJIT_S0, + the second goes to SLJIT_S1 and so on. The register set used by + the function must be declared as well. The number of scratch and + saved registers used by the function must be passed to sljit_emit_enter. + Only R registers between R0 and "scratches" argument can be used + later. E.g. if "scratches" is set to 2, the register set will be + limited to R0 and R1. The S registers and the floating point + registers ("fscratches" and "fsaveds") are specified in a similar + way. The sljit_emit_enter is also capable of allocating a stack + space for local variables. The "local_size" argument contains the + size in bytes of this local area and its staring address is stored + in SLJIT_SP. The memory area between SLJIT_SP (inclusive) and + SLJIT_SP + local_size (exclusive) can be modified freely until + the function returns. The stack space is not initialized. + + Note: the following conditions must met: + 0 <= scratches <= SLJIT_NUMBER_OF_REGISTERS + 0 <= saveds <= SLJIT_NUMBER_OF_REGISTERS + scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS + 0 <= fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + 0 <= fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS Note: every call of sljit_emit_enter and sljit_set_context - overwrites the previous context. */ + overwrites the previous context. +*/ + +/* The absolute address returned by sljit_get_local_base with +offset 0 is aligned to sljit_d. Otherwise it is aligned to sljit_uw. */ +#define SLJIT_DOUBLE_ALIGNMENT 0x00000001 +/* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ #define SLJIT_MAX_LOCAL_SIZE 65536 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, - sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size); + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size); /* The machine code has a context (which contains the local stack space size, number of used registers, etc.) which initialized by sljit_emit_enter. Several functions (like sljit_emit_return) requres this context to be able to generate the appropriate code. However, some code fragments (like inline cache) may have - no normal entry point so their context is unknown for the compiler. Using the - function below we can specify their context. + no normal entry point so their context is unknown for the compiler. Their context + can be provided to the compiler by the sljit_set_context function. Note: every call of sljit_emit_enter and sljit_set_context overwrites the previous context. */ -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, - sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size); +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size); /* Return from machine code. The op argument can be SLJIT_UNUSED which means the function does not return with anything or any opcode between SLJIT_MOV and @@ -549,37 +683,61 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * the instruction does not set flags (See: SLJIT_KEEP_FLAGS). - flag combinations: '|' means 'logical or'. */ +/* Starting index of opcodes for sljit_emit_op0. */ +#define SLJIT_OP0_BASE 0 + /* Flags: - (never set any flags) - Note: breakpoint instruction is not supported by all architectures (namely ppc) + Note: breakpoint instruction is not supported by all architectures (e.g. ppc) It falls back to SLJIT_NOP in those cases. */ -#define SLJIT_BREAKPOINT 0 +#define SLJIT_BREAKPOINT (SLJIT_OP0_BASE + 0) /* Flags: - (never set any flags) Note: may or may not cause an extra cycle wait it can even decrease the runtime in a few cases. */ -#define SLJIT_NOP 1 +#define SLJIT_NOP (SLJIT_OP0_BASE + 1) /* Flags: - (may destroy flags) - Unsigned multiplication of SLJIT_SCRATCH_REG1 and SLJIT_SCRATCH_REG2. - Result goes to SLJIT_SCRATCH_REG2:SLJIT_SCRATCH_REG1 (high:low) word */ -#define SLJIT_UMUL 2 + Unsigned multiplication of SLJIT_R0 and SLJIT_R1. + Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */ +#define SLJIT_LUMUL (SLJIT_OP0_BASE + 2) /* Flags: - (may destroy flags) - Signed multiplication of SLJIT_SCRATCH_REG1 and SLJIT_SCRATCH_REG2. - Result goes to SLJIT_SCRATCH_REG2:SLJIT_SCRATCH_REG1 (high:low) word */ -#define SLJIT_SMUL 3 + Signed multiplication of SLJIT_R0 and SLJIT_R1. + Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */ +#define SLJIT_LSMUL (SLJIT_OP0_BASE + 3) /* Flags: I - (may destroy flags) - Unsigned divide of the value in SLJIT_SCRATCH_REG1 by the value in SLJIT_SCRATCH_REG2. - The result is placed in SLJIT_SCRATCH_REG1 and the remainder goes to SLJIT_SCRATCH_REG2. - Note: if SLJIT_SCRATCH_REG2 contains 0, the behaviour is undefined. */ -#define SLJIT_UDIV 4 -#define SLJIT_IUDIV (SLJIT_UDIV | SLJIT_INT_OP) + Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. + Note: if SLJIT_R1 is 0, the behaviour is undefined. */ +#define SLJIT_UDIVMOD (SLJIT_OP0_BASE + 4) +#define SLJIT_IUDIVMOD (SLJIT_UDIVMOD | SLJIT_INT_OP) /* Flags: I - (may destroy flags) - Signed divide of the value in SLJIT_SCRATCH_REG1 by the value in SLJIT_SCRATCH_REG2. - The result is placed in SLJIT_SCRATCH_REG1 and the remainder goes to SLJIT_SCRATCH_REG2. - Note: if SLJIT_SCRATCH_REG2 contains 0, the behaviour is undefined. */ -#define SLJIT_SDIV 5 -#define SLJIT_ISDIV (SLJIT_SDIV | SLJIT_INT_OP) + Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. + Note: if SLJIT_R1 is 0, the behaviour is undefined. + Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), + the behaviour is undefined. */ +#define SLJIT_SDIVMOD (SLJIT_OP0_BASE + 5) +#define SLJIT_ISDIVMOD (SLJIT_SDIVMOD | SLJIT_INT_OP) +/* Flags: I - (may destroy flags) + Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. + Note: if SLJIT_R1 is 0, the behaviour is undefined. + Note: SLJIT_SDIV is single precision divide. */ +#define SLJIT_UDIVI (SLJIT_OP0_BASE + 6) +#define SLJIT_IUDIVI (SLJIT_UDIVI | SLJIT_INT_OP) +/* Flags: I - (may destroy flags) + Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. + Note: if SLJIT_R1 is 0, the behaviour is undefined. + Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), + the behaviour is undefined. + Note: SLJIT_SDIV is single precision divide. */ +#define SLJIT_SDIVI (SLJIT_OP0_BASE + 7) +#define SLJIT_ISDIVI (SLJIT_SDIVI | SLJIT_INT_OP) SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op); +/* Starting index of opcodes for sljit_emit_op1. */ +#define SLJIT_OP1_BASE 32 + /* Notes for MOV instructions: U = Mov with update (pre form). If source or destination defined as SLJIT_MEM1(r1) or SLJIT_MEM2(r1, r2), r1 is increased by the sum of r2 and the constant argument @@ -592,115 +750,118 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler P = pointer (sljit_p) size */ /* Flags: - (never set any flags) */ -#define SLJIT_MOV 6 +#define SLJIT_MOV (SLJIT_OP1_BASE + 0) /* Flags: I - (never set any flags) */ -#define SLJIT_MOV_UB 7 +#define SLJIT_MOV_UB (SLJIT_OP1_BASE + 1) #define SLJIT_IMOV_UB (SLJIT_MOV_UB | SLJIT_INT_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOV_SB 8 +#define SLJIT_MOV_SB (SLJIT_OP1_BASE + 2) #define SLJIT_IMOV_SB (SLJIT_MOV_SB | SLJIT_INT_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOV_UH 9 +#define SLJIT_MOV_UH (SLJIT_OP1_BASE + 3) #define SLJIT_IMOV_UH (SLJIT_MOV_UH | SLJIT_INT_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOV_SH 10 +#define SLJIT_MOV_SH (SLJIT_OP1_BASE + 4) #define SLJIT_IMOV_SH (SLJIT_MOV_SH | SLJIT_INT_OP) /* Flags: I - (never set any flags) Note: see SLJIT_INT_OP for further details. */ -#define SLJIT_MOV_UI 11 +#define SLJIT_MOV_UI (SLJIT_OP1_BASE + 5) /* No SLJIT_INT_OP form, since it is the same as SLJIT_IMOV. */ /* Flags: I - (never set any flags) Note: see SLJIT_INT_OP for further details. */ -#define SLJIT_MOV_SI 12 +#define SLJIT_MOV_SI (SLJIT_OP1_BASE + 6) #define SLJIT_IMOV (SLJIT_MOV_SI | SLJIT_INT_OP) /* Flags: - (never set any flags) */ -#define SLJIT_MOV_P 13 +#define SLJIT_MOV_P (SLJIT_OP1_BASE + 7) /* Flags: - (never set any flags) */ -#define SLJIT_MOVU 14 +#define SLJIT_MOVU (SLJIT_OP1_BASE + 8) /* Flags: I - (never set any flags) */ -#define SLJIT_MOVU_UB 15 +#define SLJIT_MOVU_UB (SLJIT_OP1_BASE + 9) #define SLJIT_IMOVU_UB (SLJIT_MOVU_UB | SLJIT_INT_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOVU_SB 16 +#define SLJIT_MOVU_SB (SLJIT_OP1_BASE + 10) #define SLJIT_IMOVU_SB (SLJIT_MOVU_SB | SLJIT_INT_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOVU_UH 17 +#define SLJIT_MOVU_UH (SLJIT_OP1_BASE + 11) #define SLJIT_IMOVU_UH (SLJIT_MOVU_UH | SLJIT_INT_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOVU_SH 18 +#define SLJIT_MOVU_SH (SLJIT_OP1_BASE + 12) #define SLJIT_IMOVU_SH (SLJIT_MOVU_SH | SLJIT_INT_OP) /* Flags: I - (never set any flags) Note: see SLJIT_INT_OP for further details. */ -#define SLJIT_MOVU_UI 19 +#define SLJIT_MOVU_UI (SLJIT_OP1_BASE + 13) /* No SLJIT_INT_OP form, since it is the same as SLJIT_IMOVU. */ /* Flags: I - (never set any flags) Note: see SLJIT_INT_OP for further details. */ -#define SLJIT_MOVU_SI 20 +#define SLJIT_MOVU_SI (SLJIT_OP1_BASE + 14) #define SLJIT_IMOVU (SLJIT_MOVU_SI | SLJIT_INT_OP) /* Flags: - (never set any flags) */ -#define SLJIT_MOVU_P 21 +#define SLJIT_MOVU_P (SLJIT_OP1_BASE + 15) /* Flags: I | E | K */ -#define SLJIT_NOT 22 +#define SLJIT_NOT (SLJIT_OP1_BASE + 16) #define SLJIT_INOT (SLJIT_NOT | SLJIT_INT_OP) /* Flags: I | E | O | K */ -#define SLJIT_NEG 23 +#define SLJIT_NEG (SLJIT_OP1_BASE + 17) #define SLJIT_INEG (SLJIT_NEG | SLJIT_INT_OP) /* Count leading zeroes Flags: I | E | K Important note! Sparc 32 does not support K flag, since the required popc instruction is introduced only in sparc 64. */ -#define SLJIT_CLZ 24 +#define SLJIT_CLZ (SLJIT_OP1_BASE + 18) #define SLJIT_ICLZ (SLJIT_CLZ | SLJIT_INT_OP) SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw); +/* Starting index of opcodes for sljit_emit_op2. */ +#define SLJIT_OP2_BASE 96 + /* Flags: I | E | O | C | K */ -#define SLJIT_ADD 25 +#define SLJIT_ADD (SLJIT_OP2_BASE + 0) #define SLJIT_IADD (SLJIT_ADD | SLJIT_INT_OP) /* Flags: I | C | K */ -#define SLJIT_ADDC 26 +#define SLJIT_ADDC (SLJIT_OP2_BASE + 1) #define SLJIT_IADDC (SLJIT_ADDC | SLJIT_INT_OP) /* Flags: I | E | U | S | O | C | K */ -#define SLJIT_SUB 27 +#define SLJIT_SUB (SLJIT_OP2_BASE + 2) #define SLJIT_ISUB (SLJIT_SUB | SLJIT_INT_OP) /* Flags: I | C | K */ -#define SLJIT_SUBC 28 +#define SLJIT_SUBC (SLJIT_OP2_BASE + 3) #define SLJIT_ISUBC (SLJIT_SUBC | SLJIT_INT_OP) /* Note: integer mul Flags: I | O (see SLJIT_C_MUL_*) | K */ -#define SLJIT_MUL 29 +#define SLJIT_MUL (SLJIT_OP2_BASE + 4) #define SLJIT_IMUL (SLJIT_MUL | SLJIT_INT_OP) /* Flags: I | E | K */ -#define SLJIT_AND 30 +#define SLJIT_AND (SLJIT_OP2_BASE + 5) #define SLJIT_IAND (SLJIT_AND | SLJIT_INT_OP) /* Flags: I | E | K */ -#define SLJIT_OR 31 +#define SLJIT_OR (SLJIT_OP2_BASE + 6) #define SLJIT_IOR (SLJIT_OR | SLJIT_INT_OP) /* Flags: I | E | K */ -#define SLJIT_XOR 32 +#define SLJIT_XOR (SLJIT_OP2_BASE + 7) #define SLJIT_IXOR (SLJIT_XOR | SLJIT_INT_OP) /* Flags: I | E | K Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 - to bit_length - 1, the operation is undefined. */ -#define SLJIT_SHL 33 + to bit_length - 1, the result is undefined. */ +#define SLJIT_SHL (SLJIT_OP2_BASE + 8) #define SLJIT_ISHL (SLJIT_SHL | SLJIT_INT_OP) /* Flags: I | E | K Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 - to bit_length - 1, the operation is undefined. */ -#define SLJIT_LSHR 34 + to bit_length - 1, the result is undefined. */ +#define SLJIT_LSHR (SLJIT_OP2_BASE + 9) #define SLJIT_ILSHR (SLJIT_LSHR | SLJIT_INT_OP) /* Flags: I | E | K Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 - to bit_length - 1, the operation is undefined. */ -#define SLJIT_ASHR 35 + to bit_length - 1, the result is undefined. */ +#define SLJIT_ASHR (SLJIT_OP2_BASE + 10) #define SLJIT_IASHR (SLJIT_ASHR | SLJIT_INT_OP) SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, @@ -708,68 +869,66 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w); -/* The following function is a helper function for sljit_emit_op_custom. - It returns with the real machine register index of any SLJIT_SCRATCH - SLJIT_SAVED or SLJIT_LOCALS register. - Note: it returns with -1 for virtual registers (all EREGs on x86-32). */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg); - -/* The following function is a helper function for sljit_emit_op_custom. - It returns with the real machine register index of any SLJIT_FLOAT register. - Note: the index is divided by 2 on ARM 32 bit architectures. */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg); - -/* Any instruction can be inserted into the instruction stream by - sljit_emit_op_custom. It has a similar purpose as inline assembly. - The size parameter must match to the instruction size of the target - architecture: - - x86: 0 < size <= 15. The instruction argument can be byte aligned. - Thumb2: if size == 2, the instruction argument must be 2 byte aligned. - if size == 4, the instruction argument must be 4 byte aligned. - Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size); - /* Returns with non-zero if fpu is available. */ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void); -/* Note: dst is the left and src is the right operand for SLJIT_FCMP. - Note: NaN check is always performed. If SLJIT_C_FLOAT_UNORDERED is set, - the comparison result is unpredictable. - Flags: SP | E | S (see SLJIT_C_FLOAT_*) */ -#define SLJIT_CMPD 36 -#define SLJIT_CMPS (SLJIT_CMPD | SLJIT_SINGLE_OP) +/* Starting index of opcodes for sljit_emit_fop1. */ +#define SLJIT_FOP1_BASE 128 + +/* Flags: SP - (never set any flags) */ +#define SLJIT_DMOV (SLJIT_FOP1_BASE + 0) +#define SLJIT_SMOV (SLJIT_DMOV | SLJIT_SINGLE_OP) +/* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE] + SRC/DST TYPE can be: D - double, S - single, W - signed word, I - signed int + Rounding mode when the destination is W or I: round towards zero. */ +/* Flags: SP - (never set any flags) */ +#define SLJIT_CONVD_FROMS (SLJIT_FOP1_BASE + 1) +#define SLJIT_CONVS_FROMD (SLJIT_CONVD_FROMS | SLJIT_SINGLE_OP) +/* Flags: SP - (never set any flags) */ +#define SLJIT_CONVW_FROMD (SLJIT_FOP1_BASE + 2) +#define SLJIT_CONVW_FROMS (SLJIT_CONVW_FROMD | SLJIT_SINGLE_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_MOVD 37 -#define SLJIT_MOVS (SLJIT_MOVD | SLJIT_SINGLE_OP) +#define SLJIT_CONVI_FROMD (SLJIT_FOP1_BASE + 3) +#define SLJIT_CONVI_FROMS (SLJIT_CONVI_FROMD | SLJIT_SINGLE_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_NEGD 38 -#define SLJIT_NEGS (SLJIT_NEGD | SLJIT_SINGLE_OP) +#define SLJIT_CONVD_FROMW (SLJIT_FOP1_BASE + 4) +#define SLJIT_CONVS_FROMW (SLJIT_CONVD_FROMW | SLJIT_SINGLE_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_ABSD 39 -#define SLJIT_ABSS (SLJIT_ABSD | SLJIT_SINGLE_OP) +#define SLJIT_CONVD_FROMI (SLJIT_FOP1_BASE + 5) +#define SLJIT_CONVS_FROMI (SLJIT_CONVD_FROMI | SLJIT_SINGLE_OP) +/* Note: dst is the left and src is the right operand for SLJIT_CMPD. + Note: NaN check is always performed. If SLJIT_C_FLOAT_UNORDERED flag + is set, the comparison result is unpredictable. + Flags: SP | E | S (see SLJIT_C_FLOAT_*) */ +#define SLJIT_DCMP (SLJIT_FOP1_BASE + 6) +#define SLJIT_SCMP (SLJIT_DCMP | SLJIT_SINGLE_OP) +/* Flags: SP - (never set any flags) */ +#define SLJIT_DNEG (SLJIT_FOP1_BASE + 7) +#define SLJIT_SNEG (SLJIT_DNEG | SLJIT_SINGLE_OP) +/* Flags: SP - (never set any flags) */ +#define SLJIT_DABS (SLJIT_FOP1_BASE + 8) +#define SLJIT_SABS (SLJIT_DABS | SLJIT_SINGLE_OP) SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw); +/* Starting index of opcodes for sljit_emit_fop2. */ +#define SLJIT_FOP2_BASE 160 + /* Flags: SP - (never set any flags) */ -#define SLJIT_ADDD 40 -#define SLJIT_ADDS (SLJIT_ADDD | SLJIT_SINGLE_OP) +#define SLJIT_DADD (SLJIT_FOP2_BASE + 0) +#define SLJIT_SADD (SLJIT_DADD | SLJIT_SINGLE_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_SUBD 41 -#define SLJIT_SUBS (SLJIT_SUBD | SLJIT_SINGLE_OP) +#define SLJIT_DSUB (SLJIT_FOP2_BASE + 1) +#define SLJIT_SSUB (SLJIT_DSUB | SLJIT_SINGLE_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_MULD 42 -#define SLJIT_MULS (SLJIT_MULD | SLJIT_SINGLE_OP) +#define SLJIT_DMUL (SLJIT_FOP2_BASE + 2) +#define SLJIT_SMUL (SLJIT_DMUL | SLJIT_SINGLE_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_DIVD 43 -#define SLJIT_DIVS (SLJIT_DIVD | SLJIT_SINGLE_OP) +#define SLJIT_DDIV (SLJIT_FOP2_BASE + 3) +#define SLJIT_SDIV (SLJIT_DDIV | SLJIT_SINGLE_OP) SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, @@ -780,36 +939,64 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler); -/* Invert conditional instruction: xor (^) with 0x1 */ -#define SLJIT_C_EQUAL 0 -#define SLJIT_C_ZERO 0 -#define SLJIT_C_NOT_EQUAL 1 -#define SLJIT_C_NOT_ZERO 1 - -#define SLJIT_C_LESS 2 -#define SLJIT_C_GREATER_EQUAL 3 -#define SLJIT_C_GREATER 4 -#define SLJIT_C_LESS_EQUAL 5 -#define SLJIT_C_SIG_LESS 6 -#define SLJIT_C_SIG_GREATER_EQUAL 7 -#define SLJIT_C_SIG_GREATER 8 -#define SLJIT_C_SIG_LESS_EQUAL 9 - -#define SLJIT_C_OVERFLOW 10 -#define SLJIT_C_NOT_OVERFLOW 11 - -#define SLJIT_C_MUL_OVERFLOW 12 -#define SLJIT_C_MUL_NOT_OVERFLOW 13 - -#define SLJIT_C_FLOAT_EQUAL 14 -#define SLJIT_C_FLOAT_NOT_EQUAL 15 -#define SLJIT_C_FLOAT_LESS 16 -#define SLJIT_C_FLOAT_GREATER_EQUAL 17 -#define SLJIT_C_FLOAT_GREATER 18 -#define SLJIT_C_FLOAT_LESS_EQUAL 19 -#define SLJIT_C_FLOAT_UNORDERED 20 -#define SLJIT_C_FLOAT_ORDERED 21 - +/* Invert (negate) conditional type: xor (^) with 0x1 */ + +/* Integer comparison types. */ +#define SLJIT_EQUAL 0 +#define SLJIT_I_EQUAL (SLJIT_EQUAL | SLJIT_INT_OP) +#define SLJIT_ZERO 0 +#define SLJIT_I_ZERO (SLJIT_ZERO | SLJIT_INT_OP) +#define SLJIT_NOT_EQUAL 1 +#define SLJIT_I_NOT_EQUAL (SLJIT_NOT_EQUAL | SLJIT_INT_OP) +#define SLJIT_NOT_ZERO 1 +#define SLJIT_I_NOT_ZERO (SLJIT_NOT_ZERO | SLJIT_INT_OP) + +#define SLJIT_LESS 2 +#define SLJIT_I_LESS (SLJIT_LESS | SLJIT_INT_OP) +#define SLJIT_GREATER_EQUAL 3 +#define SLJIT_I_GREATER_EQUAL (SLJIT_GREATER_EQUAL | SLJIT_INT_OP) +#define SLJIT_GREATER 4 +#define SLJIT_I_GREATER (SLJIT_GREATER | SLJIT_INT_OP) +#define SLJIT_LESS_EQUAL 5 +#define SLJIT_I_LESS_EQUAL (SLJIT_LESS_EQUAL | SLJIT_INT_OP) +#define SLJIT_SIG_LESS 6 +#define SLJIT_I_SIG_LESS (SLJIT_SIG_LESS | SLJIT_INT_OP) +#define SLJIT_SIG_GREATER_EQUAL 7 +#define SLJIT_I_SIG_GREATER_EQUAL (SLJIT_SIG_GREATER_EQUAL | SLJIT_INT_OP) +#define SLJIT_SIG_GREATER 8 +#define SLJIT_I_SIG_GREATER (SLJIT_SIG_GREATER | SLJIT_INT_OP) +#define SLJIT_SIG_LESS_EQUAL 9 +#define SLJIT_I_SIG_LESS_EQUAL (SLJIT_SIG_LESS_EQUAL | SLJIT_INT_OP) + +#define SLJIT_OVERFLOW 10 +#define SLJIT_I_OVERFLOW (SLJIT_OVERFLOW | SLJIT_INT_OP) +#define SLJIT_NOT_OVERFLOW 11 +#define SLJIT_I_NOT_OVERFLOW (SLJIT_NOT_OVERFLOW | SLJIT_INT_OP) + +#define SLJIT_MUL_OVERFLOW 12 +#define SLJIT_I_MUL_OVERFLOW (SLJIT_MUL_OVERFLOW | SLJIT_INT_OP) +#define SLJIT_MUL_NOT_OVERFLOW 13 +#define SLJIT_I_MUL_NOT_OVERFLOW (SLJIT_MUL_NOT_OVERFLOW | SLJIT_INT_OP) + +/* Floating point comparison types. */ +#define SLJIT_D_EQUAL 14 +#define SLJIT_S_EQUAL (SLJIT_D_EQUAL | SLJIT_SINGLE_OP) +#define SLJIT_D_NOT_EQUAL 15 +#define SLJIT_S_NOT_EQUAL (SLJIT_D_NOT_EQUAL | SLJIT_SINGLE_OP) +#define SLJIT_D_LESS 16 +#define SLJIT_S_LESS (SLJIT_D_LESS | SLJIT_SINGLE_OP) +#define SLJIT_D_GREATER_EQUAL 17 +#define SLJIT_S_GREATER_EQUAL (SLJIT_D_GREATER_EQUAL | SLJIT_SINGLE_OP) +#define SLJIT_D_GREATER 18 +#define SLJIT_S_GREATER (SLJIT_D_GREATER | SLJIT_SINGLE_OP) +#define SLJIT_D_LESS_EQUAL 19 +#define SLJIT_S_LESS_EQUAL (SLJIT_D_LESS_EQUAL | SLJIT_SINGLE_OP) +#define SLJIT_D_UNORDERED 20 +#define SLJIT_S_UNORDERED (SLJIT_D_UNORDERED | SLJIT_SINGLE_OP) +#define SLJIT_D_ORDERED 21 +#define SLJIT_S_ORDERED (SLJIT_D_ORDERED | SLJIT_SINGLE_OP) + +/* Unconditional jump types. */ #define SLJIT_JUMP 22 #define SLJIT_FAST_CALL 23 #define SLJIT_CALL0 24 @@ -823,7 +1010,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi #define SLJIT_REWRITABLE_JUMP 0x1000 /* Emit a jump instruction. The destination is not set, only the type of the jump. - type must be between SLJIT_C_EQUAL and SLJIT_CALL3 + type must be between SLJIT_EQUAL and SLJIT_CALL3 type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP Flags: - (never set any flags) for both conditional and unconditional jumps. Flags: destroy all flags for calls. */ @@ -832,10 +1019,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile /* Basic arithmetic comparison. In most architectures it is implemented as an SLJIT_SUB operation (with SLJIT_UNUSED destination and setting appropriate flags) followed by a sljit_emit_jump. However some - architectures (i.e: MIPS) may employ special optimizations here. It is - suggested to use this comparison form when appropriate. - type must be between SLJIT_C_EQUAL and SLJIT_C_SIG_LESS_EQUAL - type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP or SLJIT_INT_OP + architectures (i.e: ARM64 or MIPS) may employ special optimizations here. + It is suggested to use this comparison form when appropriate. + type must be between SLJIT_EQUAL and SLJIT_I_SIG_LESS_EQUAL + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP Flags: destroy flags. */ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, sljit_si src1, sljit_sw src1w, @@ -846,11 +1033,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler sljit_emit_jump. However some architectures (i.e: MIPS) may employ special optimizations here. It is suggested to use this comparison form when appropriate. - type must be between SLJIT_C_FLOAT_EQUAL and SLJIT_C_FLOAT_ORDERED - type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP and SLJIT_SINGLE_OP + type must be between SLJIT_D_EQUAL and SLJIT_S_ORDERED + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP Flags: destroy flags. Note: if either operand is NaN, the behaviour is undefined for - type <= SLJIT_C_FLOAT_LESS_EQUAL. */ + types up to SLJIT_S_LESS_EQUAL. */ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w); @@ -869,8 +1056,8 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw); /* Perform the operation using the conditional flags as the second argument. - Type must always be between SLJIT_C_EQUAL and SLJIT_C_FLOAT_ORDERED. The - value represented by the type is 1, if the condition represented by the type + Type must always be between SLJIT_EQUAL and SLJIT_S_ORDERED. The value + represented by the type is 1, if the condition represented by the type is fulfilled, and 0 otherwise. If op == SLJIT_MOV, SLJIT_MOV_SI, SLJIT_MOV_UI: @@ -888,7 +1075,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com sljit_si src, sljit_sw srcw, sljit_si type); -/* Copies the base address of SLJIT_LOCALS_REG+offset to dst. +/* Copies the base address of SLJIT_SP + offset to dst. Flags: - (never set any flags) */ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset); @@ -912,7 +1099,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta /* --------------------------------------------------------------------- */ #define SLJIT_MAJOR_VERSION 0 -#define SLJIT_MINOR_VERSION 91 +#define SLJIT_MINOR_VERSION 93 /* Get the human readable name of the platform. Can be useful on platforms like ARM, where ARM and Thumb2 functions can be mixed, and @@ -958,10 +1145,11 @@ struct sljit_stack { }; /* Returns NULL if unsuccessful. - Note: limit and max_limit contains the size for stack allocation - Note: the top field is initialized to base. */ -SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit); -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack* stack); + Note: limit and max_limit contains the size for stack allocation. + Note: the top field is initialized to base. + Note: see sljit_create_compiler for the explanation of allocator_data. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data); +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack *stack, void *allocator_data); /* Can be used to increase (allocate) or decrease (free) the memory area. Returns with a non-zero value if unsuccessful. If new_limit is greater than @@ -969,7 +1157,7 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack* st since the growth ratio can be added to the current limit, and sljit_stack_resize will do all the necessary checks. The fields of the stack are not changed if sljit_stack_resize fails. */ -SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack* stack, sljit_uw new_limit); +SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack *stack, sljit_uw new_limit); #endif /* (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) */ @@ -998,4 +1186,64 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct #endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ +/* --------------------------------------------------------------------- */ +/* CPU specific functions */ +/* --------------------------------------------------------------------- */ + +/* The following function is a helper function for sljit_emit_op_custom. + It returns with the real machine register index ( >=0 ) of any SLJIT_R, + SLJIT_S and SLJIT_SP registers. + + Note: it returns with -1 for virtual registers (only on x86-32). */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg); + +/* The following function is a helper function for sljit_emit_op_custom. + It returns with the real machine register index of any SLJIT_FLOAT register. + + Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg); + +/* Any instruction can be inserted into the instruction stream by + sljit_emit_op_custom. It has a similar purpose as inline assembly. + The size parameter must match to the instruction size of the target + architecture: + + x86: 0 < size <= 15. The instruction argument can be byte aligned. + Thumb2: if size == 2, the instruction argument must be 2 byte aligned. + if size == 4, the instruction argument must be 4 byte aligned. + Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_si size); + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + +/* Returns with non-zero if sse2 is available. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void); + +/* Returns with non-zero if cmov instruction is available. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void); + +/* Emit a conditional mov instruction on x86 CPUs. This instruction + moves src to destination, if the condition is satisfied. Unlike + other arithmetic instructions, destination must be a register. + Before such instructions are emitted, cmov support should be + checked by sljit_x86_is_cmov_available function. + type must be between SLJIT_EQUAL and SLJIT_S_ORDERED + dst_reg must be a valid register and it can be combined + with SLJIT_INT_OP to perform 32 bit arithmetic + Flags: I - (never set any flags) + */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler, + sljit_si type, + sljit_si dst_reg, + sljit_si src, sljit_sw srcw); + +#endif + #endif /* _SLJIT_LIR_H_ */ diff --git a/sljit/sljitNativeARM_32.c b/sljit/sljitNativeARM_32.c index 6747c4f..5cd4c71 100644 --- a/sljit/sljitNativeARM_32.c +++ b/sljit/sljitNativeARM_32.c @@ -36,13 +36,13 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) } /* Last register + 1. */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_PC (SLJIT_NO_REGISTERS + 4) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 5) #define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_FLOAT_REG6 + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) /* In ARM instruction words. Cache lines are usually 32 byte aligned. */ @@ -55,8 +55,8 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) (((max_diff) / (sljit_si)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1)) /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { - 0, 0, 1, 2, 10, 11, 4, 5, 6, 7, 8, 13, 3, 12, 14, 15 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { + 0, 0, 1, 2, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 12, 14, 15 }; #define RM(rm) (reg_map[rm]) @@ -102,8 +102,12 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { #define VABS_F32 0xeeb00ac0 #define VADD_F32 0xee300a00 #define VCMP_F32 0xeeb40a40 +#define VCVT_F32_S32 0xeeb80ac0 +#define VCVT_F64_F32 0xeeb70ac0 +#define VCVT_S32_F32 0xeebd0ac0 #define VDIV_F32 0xee800a00 #define VMOV_F32 0xeeb00a40 +#define VMOV 0xee000a10 #define VMRS 0xeef1fa10 #define VMUL_F32 0xee200a00 #define VNEG_F32 0xeeb10a40 @@ -311,11 +315,13 @@ struct future_patch { sljit_si value; }; -static SLJIT_INLINE sljit_si resolve_const_pool_index(struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr) +static sljit_si resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr) { sljit_si value; struct future_patch *curr_patch, *prev_patch; + SLJIT_UNUSED_ARG(compiler); + /* Using the values generated by patch_pc_relative_loads. */ if (!*first_patch) value = (sljit_si)cpool_start_address[cpool_current_index]; @@ -333,7 +339,7 @@ static SLJIT_INLINE sljit_si resolve_const_pool_index(struct future_patch **firs prev_patch->next = curr_patch->next; else *first_patch = curr_patch->next; - SLJIT_FREE(curr_patch); + SLJIT_FREE(curr_patch, compiler->allocator_data); break; } prev_patch = curr_patch; @@ -343,12 +349,12 @@ static SLJIT_INLINE sljit_si resolve_const_pool_index(struct future_patch **firs if (value >= 0) { if ((sljit_uw)value > cpool_current_index) { - curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch)); + curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data); if (!curr_patch) { while (*first_patch) { curr_patch = *first_patch; *first_patch = (*first_patch)->next; - SLJIT_FREE(curr_patch); + SLJIT_FREE(curr_patch, compiler->allocator_data); } return SLJIT_ERR_ALLOC_FAILED; } @@ -570,7 +576,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil struct sljit_const *const_; CHECK_ERROR_PTR(); - check_sljit_generate_code(compiler); + CHECK_PTR(check_sljit_generate_code(compiler)); reverse_buf(compiler); /* Second code generation pass. */ @@ -619,7 +625,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil cpool_skip_alignment--; } else { - if (SLJIT_UNLIKELY(resolve_const_pool_index(&first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { + if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { SLJIT_FREE_EXEC(code); compiler->error = SLJIT_ERR_ALLOC_FAILED; return NULL; @@ -709,7 +715,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil buf_end = buf_ptr + compiler->cpool_fill; cpool_current_index = 0; while (buf_ptr < buf_end) { - if (SLJIT_UNLIKELY(resolve_const_pool_index(&first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { + if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { SLJIT_FREE_EXEC(code); compiler->error = SLJIT_ERR_ALLOC_FAILED; return NULL; @@ -810,9 +816,6 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #define SIGNED_DATA 0x40 #define LOAD_DATA 0x80 -#define EMIT_INSTRUCTION(inst) \ - FAIL_IF(push_inst(compiler, (inst))) - /* Condition: AL. */ #define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \ (0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2)) @@ -822,111 +825,85 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si i sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w); -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - sljit_si size; + sljit_si size, i, tmp; sljit_uw push; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); /* Push saved registers, temporary registers stmdb sp!, {..., lr} */ push = PUSH | (1 << 14); - if (scratches >= 5) - push |= 1 << 11; - if (scratches >= 4) - push |= 1 << 10; - if (saveds >= 5) - push |= 1 << 8; - if (saveds >= 4) - push |= 1 << 7; - if (saveds >= 3) - push |= 1 << 6; - if (saveds >= 2) - push |= 1 << 5; - if (saveds >= 1) - push |= 1 << 4; - EMIT_INSTRUCTION(push); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + push |= 1 << reg_map[i]; + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + push |= 1 << reg_map[i]; + + FAIL_IF(push_inst(compiler, push)); /* Stack must be aligned to 8 bytes: */ - size = (1 + saveds) * sizeof(sljit_uw); - if (scratches >= 4) - size += (scratches - 3) * sizeof(sljit_uw); - local_size += size; - local_size = (local_size + 7) & ~7; - local_size -= size; + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + local_size = ((size + local_size + 7) & ~7) - size; compiler->local_size = local_size; if (local_size > 0) - FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size)); + FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); if (args >= 1) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG1))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S0, SLJIT_UNUSED, RM(SLJIT_R0)))); if (args >= 2) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG2, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S1, SLJIT_UNUSED, RM(SLJIT_R1)))); if (args >= 3) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG3, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG3))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S2, SLJIT_UNUSED, RM(SLJIT_R2)))); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { sljit_si size; - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - size = (1 + saveds) * sizeof(sljit_uw); - if (scratches >= 4) - size += (scratches - 3) * sizeof(sljit_uw); - local_size += size; - local_size = (local_size + 7) & ~7; - local_size -= size; - compiler->local_size = local_size; + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + compiler->local_size = ((size + local_size + 7) & ~7) - size; + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { + sljit_si i, tmp; sljit_uw pop; CHECK_ERROR(); - check_sljit_emit_return(compiler, op, src, srcw); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); if (compiler->local_size > 0) - FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size)); + FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); - pop = POP | (1 << 15); /* Push saved registers, temporary registers ldmia sp!, {..., pc} */ - if (compiler->scratches >= 5) - pop |= 1 << 11; - if (compiler->scratches >= 4) - pop |= 1 << 10; - if (compiler->saveds >= 5) - pop |= 1 << 8; - if (compiler->saveds >= 4) - pop |= 1 << 7; - if (compiler->saveds >= 3) - pop |= 1 << 6; - if (compiler->saveds >= 2) - pop |= 1 << 5; - if (compiler->saveds >= 1) - pop |= 1 << 4; + pop = POP | (1 << 15); + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + pop |= 1 << reg_map[i]; + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + pop |= 1 << reg_map[i]; return push_inst(compiler, pop); } @@ -1031,7 +1008,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) if (op == SLJIT_MOV_UB) return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff)); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2])); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2]))); return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_UB ? 0x20 : 0x40) | reg_map[dst])); #else return push_inst(compiler, (op == SLJIT_MOV_UB ? UXTB : SXTB) | RD(dst) | RM(src2)); @@ -1050,7 +1027,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2])); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2]))); return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_UH ? 0x20 : 0x40) | reg_map[dst])); #else return push_inst(compiler, (op == SLJIT_MOV_UH ? UXTH : SXTH) | RD(dst) | RM(src2)); @@ -1303,8 +1280,8 @@ static sljit_si generate_int(struct sljit_compiler *compiler, sljit_si reg, slji return 0; } - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1)); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2))); return 1; } #endif @@ -1320,16 +1297,12 @@ static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sl /* Create imm by 1 inst. */ tmp = get_imm(imm); - if (tmp) { - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp)); - return SLJIT_SUCCESS; - } + if (tmp) + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp)); tmp = get_imm(~imm); - if (tmp) { - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp)); - return SLJIT_SUCCESS; - } + if (tmp) + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp)); #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) /* Create imm by 2 inst. */ @@ -1369,14 +1342,14 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl if (imm) { if (inp_flags & ARG_TEST) return 1; - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm))); return -1; } imm = get_imm(~argw); if (imm) { if (inp_flags & ARG_TEST) return 1; - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm))); return -1; } return 0; @@ -1394,8 +1367,8 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl if (inp_flags & ARG_TEST) return 1; - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, - RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7))); + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, + RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7)))); return -1; } @@ -1403,13 +1376,13 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl if (argw >= 0 && argw <= 0xfff) { if (inp_flags & ARG_TEST) return 1; - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw)); + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw))); return -1; } if (argw < 0 && argw >= -0xfff) { if (inp_flags & ARG_TEST) return 1; - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw)); + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw))); return -1; } } @@ -1417,14 +1390,14 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl if (argw >= 0 && argw <= 0xff) { if (inp_flags & ARG_TEST) return 1; - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))); + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)))); return -1; } if (argw < 0 && argw >= -0xff) { if (inp_flags & ARG_TEST) return 1; argw = -argw; - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))); + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)))); return -1; } } @@ -1477,7 +1450,7 @@ static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_ /* This can only happen for stores */ \ /* since ldr reg, [reg, ...]! has no meaning */ \ SLJIT_ASSERT(!(inp_flags & LOAD_DATA)); \ - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg))); \ + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg)))); \ reg = TMP_REG3; \ } \ } @@ -1537,9 +1510,8 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, SLJIT_ASSERT((argw & 0x3) && !(max_delta & 0xf00)); if (inp_flags & WRITE_BACK) tmp_r = arg & REG_MASK; - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))); - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0))); - return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)))); + return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0))); } imm = (sljit_uw)(argw - compiler->cache_argw); @@ -1558,7 +1530,7 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, imm = get_imm(argw & ~max_delta); if (imm) { TEST_WRITE_BACK(); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm))); GETPUT_ARG_DATA_TRANSFER(1, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta); return SLJIT_SUCCESS; } @@ -1567,15 +1539,14 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, if (imm) { argw = -argw; TEST_WRITE_BACK(); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm))); GETPUT_ARG_DATA_TRANSFER(0, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta); return SLJIT_SUCCESS; } if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) { TEST_WRITE_BACK(); - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); - return SLJIT_SUCCESS; + return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); } if (argw == next_argw && (next_arg & SLJIT_MEM)) { @@ -1586,15 +1557,14 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, compiler->cache_argw = argw; TEST_WRITE_BACK(); - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); - return SLJIT_SUCCESS; + return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); } imm = (sljit_uw)(argw - next_argw); if (arg == next_arg && !(inp_flags & WRITE_BACK) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) { SLJIT_ASSERT(inp_flags & LOAD_DATA); FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK])); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK]))); compiler->cache_arg = arg; compiler->cache_argw = argw; @@ -1610,8 +1580,7 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, } FAIL_IF(load_immediate(compiler, tmp_r, argw)); - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0))); - return SLJIT_SUCCESS; + return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0))); } static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) @@ -1838,44 +1807,59 @@ extern int __aeabi_idivmod(int numerator, int denominator); SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) { CHECK_ERROR(); - check_sljit_emit_op0(compiler, op); + CHECK(check_sljit_emit_op0(compiler, op)); op = GET_OPCODE(op); switch (op) { case SLJIT_BREAKPOINT: - EMIT_INSTRUCTION(BKPT); + FAIL_IF(push_inst(compiler, BKPT)); break; case SLJIT_NOP: - EMIT_INSTRUCTION(NOP); + FAIL_IF(push_inst(compiler, NOP)); break; - case SLJIT_UMUL: - case SLJIT_SMUL: + case SLJIT_LUMUL: + case SLJIT_LSMUL: #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) - return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) - | (reg_map[SLJIT_SCRATCH_REG2] << 16) - | (reg_map[SLJIT_SCRATCH_REG1] << 12) - | (reg_map[SLJIT_SCRATCH_REG1] << 8) - | reg_map[SLJIT_SCRATCH_REG2]); + return push_inst(compiler, (op == SLJIT_LUMUL ? UMULL : SMULL) + | (reg_map[SLJIT_R1] << 16) + | (reg_map[SLJIT_R0] << 12) + | (reg_map[SLJIT_R0] << 8) + | reg_map[SLJIT_R1]); #else - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) - | (reg_map[SLJIT_SCRATCH_REG2] << 16) - | (reg_map[SLJIT_SCRATCH_REG1] << 12) - | (reg_map[SLJIT_SCRATCH_REG1] << 8) + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_R1)))); + return push_inst(compiler, (op == SLJIT_LUMUL ? UMULL : SMULL) + | (reg_map[SLJIT_R1] << 16) + | (reg_map[SLJIT_R0] << 12) + | (reg_map[SLJIT_R0] << 8) | reg_map[TMP_REG1]); #endif - case SLJIT_UDIV: - case SLJIT_SDIV: - if (compiler->scratches >= 3) - EMIT_INSTRUCTION(0xe52d2008 /* str r2, [sp, #-8]! */); + case SLJIT_UDIVMOD: + case SLJIT_SDIVMOD: + case SLJIT_UDIVI: + case SLJIT_SDIVI: + SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments); + SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2, bad_register_mapping); + + if ((op >= SLJIT_UDIVI) && (compiler->scratches >= 3)) { + FAIL_IF(push_inst(compiler, 0xe52d2008 /* str r2, [sp, #-8]! */)); + FAIL_IF(push_inst(compiler, 0xe58d1004 /* str r1, [sp, #4] */)); + } + else if ((op >= SLJIT_UDIVI) || (compiler->scratches >= 3)) + FAIL_IF(push_inst(compiler, 0xe52d0008 | (op >= SLJIT_UDIVI ? 0x1000 : 0x2000) /* str r1/r2, [sp, #-8]! */)); + #if defined(__GNUC__) FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, - (op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); + ((op | 0x2) == SLJIT_UDIVI ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); #else #error "Software divmod functions are needed" #endif - if (compiler->scratches >= 3) - return push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */); + + if ((op >= SLJIT_UDIVI) && (compiler->scratches >= 3)) { + FAIL_IF(push_inst(compiler, 0xe59d1004 /* ldr r1, [sp, #4] */)); + FAIL_IF(push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */)); + } + else if ((op >= SLJIT_UDIVI) || (compiler->scratches >= 3)) + return push_inst(compiler, 0xe49d0008 | (op >= SLJIT_UDIVI ? 0x1000 : 0x2000) /* ldr r1/r2, [sp], #8 */); return SLJIT_SUCCESS; } @@ -1887,7 +1871,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler sljit_si src, sljit_sw srcw) { CHECK_ERROR(); - check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); @@ -1932,7 +1916,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_NEG: -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw); @@ -1950,7 +1935,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler sljit_si src2, sljit_sw src2w) { CHECK_ERROR(); - check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); @@ -1988,22 +1973,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) { - check_sljit_get_register_index(reg); + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); return reg_map[reg]; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) { - check_sljit_get_float_register_index(reg); - return reg; + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return reg << 1; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, void *instruction, sljit_si size) { CHECK_ERROR(); - check_sljit_emit_op_custom(compiler, instruction, size); - SLJIT_ASSERT(size == 4); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); return push_inst(compiler, *(sljit_uw*)instruction); } @@ -2064,7 +2048,7 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl SLJIT_ASSERT(arg & SLJIT_MEM); if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)))); arg = SLJIT_MEM | TMP_REG1; argw = 0; } @@ -2097,13 +2081,13 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl } imm = get_imm(argw & ~0x3fc); if (imm) { - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm))); return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2)); } imm = get_imm(-argw & ~0x3fc); if (imm) { argw = -argw; - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm))); return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2)); } } @@ -2112,7 +2096,7 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl compiler->cache_argw = argw; if (arg & REG_MASK) { FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1])); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1]))); } else FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); @@ -2120,60 +2104,114 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0)); } +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_SINGLE_OP, TMP_FREG1, src, 0))); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) + return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (TMP_FREG1 << 16)); + + /* Store the integer value from a VFP register. */ + return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, VMOV | RD(src) | (TMP_FREG1 << 16))); + else if (src & SLJIT_MEM) { + /* Load the integer value into a VFP register. */ + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (TMP_FREG1 << 16))); + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_SINGLE_OP, dst_r, TMP_FREG1, 0))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); + src2 = TMP_FREG2; + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_SINGLE_OP, src1, src2, 0))); + return push_inst(compiler, VMRS); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_si dst_fr; + sljit_si dst_r; CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error); - compiler->cache_arg = 0; compiler->cache_argw = 0; - op ^= SLJIT_SINGLE_OP; + if (GET_OPCODE(op) != SLJIT_CONVD_FROMS) + op ^= SLJIT_SINGLE_OP; - if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, dst, dstw)); - dst = TMP_FREG1; - } - if (src & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src, srcw)); - src = TMP_FREG2; - } - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_SINGLE_OP, dst, src, 0)); - EMIT_INSTRUCTION(VMRS); - return SLJIT_SUCCESS; - } + SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); - dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_fr, src, srcw)); - src = dst_fr; + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw)); + src = dst_r; } switch (GET_OPCODE(op)) { - case SLJIT_MOVD: - if (src != dst_fr && dst_fr != TMP_FREG1) - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0)); - break; - case SLJIT_NEGD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0)); - break; - case SLJIT_ABSD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0)); - break; - } - - if (dst_fr == TMP_FREG1) { - if (GET_OPCODE(op) == SLJIT_MOVD) - dst_fr = src; - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_fr, dst, dstw)); + case SLJIT_DMOV: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0))); + else + dst_r = src; + } + break; + case SLJIT_DNEG: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0))); + break; + case SLJIT_DABS: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0))); + break; + case SLJIT_CONVD_FROMS: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0))); + op ^= SLJIT_SINGLE_OP; + break; } + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_r, dst, dstw); return SLJIT_SUCCESS; } @@ -2182,16 +2220,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - sljit_si dst_fr; + sljit_si dst_r; CHECK_ERROR(); - check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); compiler->cache_arg = 0; compiler->cache_argw = 0; op ^= SLJIT_SINGLE_OP; - dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src2 & SLJIT_MEM) { FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); @@ -2204,24 +2245,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile } switch (GET_OPCODE(op)) { - case SLJIT_ADDD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1)); + case SLJIT_DADD: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1))); break; - case SLJIT_SUBD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1)); + case SLJIT_DSUB: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1))); break; - case SLJIT_MULD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1)); + case SLJIT_DMUL: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1))); break; - case SLJIT_DIVD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1)); + case SLJIT_DDIV: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1))); break; } - if (dst_fr == TMP_FREG1) + if (dst_r == TMP_FREG1) FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw)); return SLJIT_SUCCESS; @@ -2238,7 +2279,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) { CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); /* For UNUSED dst. Uncommon, but possible. */ @@ -2252,7 +2293,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c if (getput_arg_fast(compiler, WORD_DATA, TMP_REG3, dst, dstw)) return compiler->error; /* TMP_REG3 is used for caching. */ - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3)))); compiler->cache_arg = 0; compiler->cache_argw = 0; return getput_arg(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0); @@ -2261,11 +2302,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) { CHECK_ERROR(); - check_sljit_emit_fast_return(compiler, src, srcw); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); if (FAST_IS_REG(src)) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src)))); else if (src & SLJIT_MEM) { if (getput_arg_fast(compiler, WORD_DATA | LOAD_DATA, TMP_REG3, src, srcw)) FAIL_IF(compiler->error); @@ -2273,7 +2314,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * compiler->cache_arg = 0; compiler->cache_argw = 0; FAIL_IF(getput_arg(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, 0, 0)); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2)))); } } else if (src & SLJIT_IMM) @@ -2288,53 +2329,54 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * static sljit_uw get_cc(sljit_si type) { switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_MUL_NOT_OVERFLOW: - case SLJIT_C_FLOAT_EQUAL: + case SLJIT_EQUAL: + case SLJIT_MUL_NOT_OVERFLOW: + case SLJIT_D_EQUAL: return 0x00000000; - case SLJIT_C_NOT_EQUAL: - case SLJIT_C_MUL_OVERFLOW: - case SLJIT_C_FLOAT_NOT_EQUAL: + case SLJIT_NOT_EQUAL: + case SLJIT_MUL_OVERFLOW: + case SLJIT_D_NOT_EQUAL: return 0x10000000; - case SLJIT_C_LESS: - case SLJIT_C_FLOAT_LESS: + case SLJIT_LESS: + case SLJIT_D_LESS: return 0x30000000; - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL: + case SLJIT_D_GREATER_EQUAL: return 0x20000000; - case SLJIT_C_GREATER: - case SLJIT_C_FLOAT_GREATER: + case SLJIT_GREATER: + case SLJIT_D_GREATER: return 0x80000000; - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_LESS_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_D_LESS_EQUAL: return 0x90000000; - case SLJIT_C_SIG_LESS: + case SLJIT_SIG_LESS: return 0xb0000000; - case SLJIT_C_SIG_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: return 0xa0000000; - case SLJIT_C_SIG_GREATER: + case SLJIT_SIG_GREATER: return 0xc0000000; - case SLJIT_C_SIG_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: return 0xd0000000; - case SLJIT_C_OVERFLOW: - case SLJIT_C_FLOAT_UNORDERED: + case SLJIT_OVERFLOW: + case SLJIT_D_UNORDERED: return 0x60000000; - case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_FLOAT_ORDERED: + case SLJIT_NOT_OVERFLOW: + case SLJIT_D_ORDERED: return 0x70000000; - default: /* SLJIT_JUMP */ + default: + SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3); return 0xe0000000; } } @@ -2344,7 +2386,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi struct sljit_label *label; CHECK_ERROR_PTR(); - check_sljit_emit_label(compiler); + CHECK_PTR(check_sljit_emit_label(compiler)); if (compiler->last_label && compiler->last_label->size == compiler->size) return compiler->last_label; @@ -2360,7 +2402,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile struct sljit_jump *jump; CHECK_ERROR_PTR(); - check_sljit_emit_jump(compiler, type); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF(!jump); @@ -2401,7 +2443,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil struct sljit_jump *jump; CHECK_ERROR(); - check_sljit_emit_ijump(compiler, type, src, srcw); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); /* In ARM, we don't need to touch the arguments. */ @@ -2442,7 +2484,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com sljit_uw cc, ins; CHECK_ERROR(); - check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); @@ -2450,18 +2492,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com return SLJIT_SUCCESS; op = GET_OPCODE(op); - cc = get_cc(type); + cc = get_cc(type & 0xff); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; if (op < SLJIT_ADD) { - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0)); - EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0))); + FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc)); return (dst_r == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS; } ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP)); if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) { - EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc); + FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc)); /* The condition must always be set, even if the ORR/EOR is not executed above. */ return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS; } @@ -2478,8 +2520,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com srcw = 0; } - EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc); - EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)); + FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc)); + FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000))); if (dst_r == TMP_REG2) FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0)); @@ -2492,7 +2534,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi sljit_si reg; CHECK_ERROR_PTR(); - check_sljit_emit_const(compiler, dst, dstw, init_value); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); ADJUST_LOCAL_OFFSET(dst, dstw); const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); diff --git a/sljit/sljitNativeARM_64.c b/sljit/sljitNativeARM_64.c index cfd1a38..044a675 100644 --- a/sljit/sljitNativeARM_64.c +++ b/sljit/sljitNativeARM_64.c @@ -32,20 +32,19 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) /* Length of an instruction word */ typedef sljit_ui sljit_ins; -#define TMP_ZERO 0 +#define TMP_ZERO (0) -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_REG4 (SLJIT_NO_REGISTERS + 4) -#define TMP_LR (SLJIT_NO_REGISTERS + 5) -#define TMP_SP (SLJIT_NO_REGISTERS + 6) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 5) +#define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 6) #define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_FLOAT_REG6 + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = { - 31, 0, 1, 2, 3, 4, 19, 20, 21, 22, 23, 29, 9, 10, 11, 12, 30, 31 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { + 31, 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 11, 30, 31 }; #define W_OP (1 << 31) @@ -83,6 +82,8 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = { #define FABS 0x1e60c000 #define FADD 0x1e602800 #define FCMP 0x1e602000 +#define FCVT 0x1e224000 +#define FCVTZS 0x9e780000 #define FDIV 0x1e601800 #define FMOV 0x1e604000 #define FMUL 0x1e600800 @@ -104,6 +105,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = { #define RET 0xd65f0000 #define SBC 0xda000000 #define SBFM 0x93000000 +#define SCVTF 0x9e620000 #define SDIV 0x9ac00c00 #define SMADDL 0x9b200000 #define SMULH 0x9b403c00 @@ -217,7 +219,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil struct sljit_const *const_; CHECK_ERROR_PTR(); - check_sljit_generate_code(compiler); + CHECK_PTR(check_sljit_generate_code(compiler)); reverse_buf(compiler); code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); @@ -727,12 +729,12 @@ static sljit_si emit_op_imm(struct sljit_compiler *compiler, sljit_si flags, slj return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)); if (flags & INT_OP) { FAIL_IF(push_inst(compiler, SMADDL | RD(dst) | RN(arg1) | RM(arg2) | (31 << 10))); - FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG4) | RN(TMP_ZERO) | RM(dst) | (2 << 22) | (31 << 10))); - return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_REG4) | RM(dst) | (2 << 22) | (63 << 10)); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(TMP_ZERO) | RM(dst) | (2 << 22) | (31 << 10))); + return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10)); } - FAIL_IF(push_inst(compiler, SMULH | RD(TMP_REG4) | RN(arg1) | RM(arg2))); + FAIL_IF(push_inst(compiler, SMULH | RD(TMP_LR) | RN(arg1) | RM(arg2))); FAIL_IF(push_inst(compiler, MADD | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO))); - return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_REG4) | RM(dst) | (2 << 22) | (63 << 10)); + return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10)); case SLJIT_AND: CHECK_FLAGS(3 << 29); return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); @@ -973,9 +975,9 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, slji FAIL_IF(push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r) | (argw << 10))); return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg)); } - FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG4) | RN(arg) | RM(other_r) | (argw << 10))); - FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_REG4))); - return push_inst(compiler, ORR | RD(arg) | RN(TMP_ZERO) | RM(TMP_REG4)); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(arg) | RM(other_r) | (argw << 10))); + FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_LR))); + return push_inst(compiler, ORR | RD(arg) | RN(TMP_ZERO) | RM(TMP_LR)); } if (arg & OFFS_REG_MASK) { @@ -1058,101 +1060,132 @@ static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit /* Entry, exit */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { + sljit_si i, tmp, offs, prev, saved_regs_size; + CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - compiler->locals_offset = (2 + saveds) * sizeof(sljit_sw); - local_size = (compiler->locals_offset + local_size + 15) & ~15; + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0); + local_size += saved_regs_size + SLJIT_LOCALS_OFFSET; + local_size = (local_size + 15) & ~0xf; compiler->local_size = local_size; - if (local_size <= (64 << 3)) + if (local_size <= (63 * sizeof(sljit_sw))) { FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) | RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15))); - else { - local_size -= (64 << 3); + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10))); + offs = (local_size - saved_regs_size) << (15 - 3); + } else { + offs = 0 << 15; + if (saved_regs_size & 0x8) { + offs = 1 << 15; + saved_regs_size += sizeof(sljit_sw); + } + local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET; + if (saved_regs_size > 0) + FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10))); + } + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + prev = -1; + for (i = SLJIT_S0; i >= tmp; i--) { + if (prev == -1) { + if (!(offs & (1 << 15))) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5))); + offs += 1 << 15; + continue; + } + FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + offs += 2 << 15; + prev = -1; + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + if (prev == -1) { + if (!(offs & (1 << 15))) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5))); + offs += 1 << 15; + continue; + } + FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + offs += 2 << 15; + prev = -1; + } + + SLJIT_ASSERT(prev == -1); + + if (compiler->local_size > (63 * sizeof(sljit_sw))) { + /* The local_size is already adjusted by the saved registers. */ if (local_size > 0xfff) { FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22))); local_size &= 0xfff; } if (local_size) FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10))); - FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) | RN(TMP_SP) | (0x40 << 15))); + FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) + | RN(TMP_SP) | ((-(16 >> 3) & 0x7f) << 15))); + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10))); } - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_LOCALS_REG) | RN(TMP_SP))); - - if (saveds >= 2) - FAIL_IF(push_inst(compiler, STP | RT(SLJIT_SAVED_REG1) | RT2(SLJIT_SAVED_REG2) | RN(TMP_SP) | (2 << 15))); - if (saveds >= 4) - FAIL_IF(push_inst(compiler, STP | RT(SLJIT_SAVED_REG3) | RT2(SLJIT_SAVED_EREG1) | RN(TMP_SP) | (4 << 15))); - if (saveds == 1) - FAIL_IF(push_inst(compiler, STRI | RT(SLJIT_SAVED_REG1) | RN(TMP_SP) | (2 << 10))); - if (saveds == 3) - FAIL_IF(push_inst(compiler, STRI | RT(SLJIT_SAVED_REG3) | RN(TMP_SP) | (4 << 10))); - if (saveds == 5) - FAIL_IF(push_inst(compiler, STRI | RT(SLJIT_SAVED_EREG2) | RN(TMP_SP) | (6 << 10))); - if (args >= 1) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_SAVED_REG1) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG1))); + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0))); if (args >= 2) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_SAVED_REG2) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S1) | RN(TMP_ZERO) | RM(SLJIT_R1))); if (args >= 3) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_SAVED_REG3) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG3))); + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2))); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - compiler->locals_offset = (2 + saveds) * sizeof(sljit_sw); - compiler->local_size = (compiler->locals_offset + local_size + 15) & ~15; + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET; + local_size = (local_size + 15) & ~0xf; + compiler->local_size = local_size; + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { - sljit_si saveds, local_size; + sljit_si local_size; + sljit_si i, tmp, offs, prev, saved_regs_size; CHECK_ERROR(); - check_sljit_emit_return(compiler, op, src, srcw); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - saveds = compiler->saveds; - - if (saveds >= 2) - FAIL_IF(push_inst(compiler, LDP | RT(SLJIT_SAVED_REG1) | RT2(SLJIT_SAVED_REG2) | RN(TMP_SP) | (2 << 15))); - if (saveds >= 4) - FAIL_IF(push_inst(compiler, LDP | RT(SLJIT_SAVED_REG3) | RT2(SLJIT_SAVED_EREG1) | RN(TMP_SP) | (4 << 15))); - if (saveds == 1) - FAIL_IF(push_inst(compiler, LDRI | RT(SLJIT_SAVED_REG1) | RN(TMP_SP) | (2 << 10))); - if (saveds == 3) - FAIL_IF(push_inst(compiler, LDRI | RT(SLJIT_SAVED_REG3) | RN(TMP_SP) | (4 << 10))); - if (saveds == 5) - FAIL_IF(push_inst(compiler, LDRI | RT(SLJIT_SAVED_EREG2) | RN(TMP_SP) | (6 << 10))); - local_size = compiler->local_size; - if (local_size <= (62 << 3)) - FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) - | RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15))); + saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 0); + if (local_size <= (63 * sizeof(sljit_sw))) + offs = (local_size - saved_regs_size) << (15 - 3); else { - FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) | RN(TMP_SP) | (0x3e << 15))); - local_size -= (62 << 3); + FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) + | RN(TMP_SP) | (((16 >> 3) & 0x7f) << 15))); + offs = 0 << 15; + if (saved_regs_size & 0x8) { + offs = 1 << 15; + saved_regs_size += sizeof(sljit_sw); + } + local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET; if (local_size > 0xfff) { FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22))); local_size &= 0xfff; @@ -1161,6 +1194,47 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10))); } + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + prev = -1; + for (i = SLJIT_S0; i >= tmp; i--) { + if (prev == -1) { + if (!(offs & (1 << 15))) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5))); + offs += 1 << 15; + continue; + } + FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + offs += 2 << 15; + prev = -1; + } + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + if (prev == -1) { + if (!(offs & (1 << 15))) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5))); + offs += 1 << 15; + continue; + } + FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + offs += 2 << 15; + prev = -1; + } + + SLJIT_ASSERT(prev == -1); + + if (compiler->local_size <= (63 * sizeof(sljit_sw))) { + FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) + | RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15))); + } else if (saved_regs_size > 0) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10))); + } + FAIL_IF(push_inst(compiler, RET | RN(TMP_LR))); return SLJIT_SUCCESS; } @@ -1174,7 +1248,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler sljit_ins inv_bits = (op & SLJIT_INT_OP) ? (1 << 31) : 0; CHECK_ERROR(); - check_sljit_emit_op0(compiler, op); + CHECK(check_sljit_emit_op0(compiler, op)); op = GET_OPCODE(op); switch (op) { @@ -1182,17 +1256,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler return push_inst(compiler, BRK); case SLJIT_NOP: return push_inst(compiler, NOP); - case SLJIT_UMUL: - case SLJIT_SMUL: - FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG1))); - FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_SCRATCH_REG1) | RN(SLJIT_SCRATCH_REG1) | RM(SLJIT_SCRATCH_REG2) | RT2(TMP_ZERO))); - return push_inst(compiler, (op == SLJIT_SMUL ? SMULH : UMULH) | RD(SLJIT_SCRATCH_REG2) | RN(TMP_REG1) | RM(SLJIT_SCRATCH_REG2)); - case SLJIT_UDIV: - case SLJIT_SDIV: - FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG1))); - FAIL_IF(push_inst(compiler, ((op == SLJIT_SDIV ? SDIV : UDIV) ^ inv_bits) | RD(SLJIT_SCRATCH_REG1) | RN(SLJIT_SCRATCH_REG1) | RM(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_SCRATCH_REG2) | RN(SLJIT_SCRATCH_REG1) | RM(SLJIT_SCRATCH_REG2) | RT2(TMP_ZERO))); - return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_SCRATCH_REG2) | RN(TMP_REG1) | RM(SLJIT_SCRATCH_REG2)); + case SLJIT_LUMUL: + case SLJIT_LSMUL: + FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); + return push_inst(compiler, (op == SLJIT_LUMUL ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); + case SLJIT_UDIVMOD: + case SLJIT_SDIVMOD: + FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, ((op == SLJIT_UDIVMOD ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1))); + FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); + return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); + case SLJIT_UDIVI: + case SLJIT_SDIVI: + return push_inst(compiler, ((op == SLJIT_UDIVI ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)); } return SLJIT_SUCCESS; @@ -1206,7 +1283,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler sljit_si op_flags = GET_ALL_FLAGS(op); CHECK_ERROR(); - check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); @@ -1358,7 +1435,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler sljit_si dst_r, flags, mem_flags; CHECK_ERROR(); - check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); @@ -1437,13 +1514,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) { - check_sljit_get_register_index(reg); + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); return reg_map[reg]; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) { - check_sljit_get_float_register_index(reg); + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); return reg; } @@ -1451,8 +1528,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co void *instruction, sljit_si size) { CHECK_ERROR(); - check_sljit_emit_op_custom(compiler, instruction, size); - SLJIT_ASSERT(size == 4); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); return push_inst(compiler, *(sljit_ins*)instruction); } @@ -1524,53 +1600,122 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(TMP_REG3)); } +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + + if (GET_OPCODE(op) == SLJIT_CONVI_FROMD) + inv_bits |= (1 << 31); + + if (src & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src))); + + if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) + return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONVI_FROMD) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + inv_bits |= (1 << 31); + + if (src & SLJIT_MEM) { + emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONVD_FROMI) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw); + src = TMP_REG1; + } else if (src & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + srcw = (sljit_si)srcw; +#endif + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, ((op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_si mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + + if (src1 & SLJIT_MEM) { + emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); + src2 = TMP_FREG2; + } + + return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2)); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { sljit_si dst_r, mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE; - sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + sljit_ins inv_bits; CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - compiler->cache_arg = 0; compiler->cache_argw = 0; - if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst & SLJIT_MEM) { - emit_fop_mem(compiler, mem_flags, TMP_FREG1, dst, dstw); - dst = TMP_FREG1; - } - if (src & SLJIT_MEM) { - emit_fop_mem(compiler, mem_flags, TMP_FREG2, src, srcw); - src = TMP_FREG2; - } - return push_inst(compiler, (FCMP ^ inv_bits) | VN(dst) | VM(src)); - } + SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x100) == WORD_SIZE, must_be_one_bit_difference); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { - emit_fop_mem(compiler, mem_flags, dst_r, src, srcw); + emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONVD_FROMS) ? (mem_flags ^ 0x100) : mem_flags, dst_r, src, srcw); src = dst_r; } switch (GET_OPCODE(op)) { - case SLJIT_MOVD: - if (src != dst_r) - FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src))); + case SLJIT_DMOV: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src))); + else + dst_r = src; + } break; - case SLJIT_NEGD: + case SLJIT_DNEG: FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src))); break; - case SLJIT_ABSD: + case SLJIT_DABS: FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src))); break; + case SLJIT_CONVD_FROMS: + FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_SINGLE_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src))); + break; } - if (!(dst & SLJIT_MEM)) - return SLJIT_SUCCESS; - return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw); + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw); + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, @@ -1582,12 +1727,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; CHECK_ERROR(); - check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); compiler->cache_arg = 0; compiler->cache_argw = 0; - dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src1 & SLJIT_MEM) { emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); src1 = TMP_FREG1; @@ -1598,16 +1746,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile } switch (GET_OPCODE(op)) { - case SLJIT_ADDD: + case SLJIT_DADD: FAIL_IF(push_inst(compiler, (FADD ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); break; - case SLJIT_SUBD: + case SLJIT_DSUB: FAIL_IF(push_inst(compiler, (FSUB ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); break; - case SLJIT_MULD: + case SLJIT_DMUL: FAIL_IF(push_inst(compiler, (FMUL ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); break; - case SLJIT_DIVD: + case SLJIT_DDIV: FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); break; } @@ -1624,14 +1772,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) { CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); /* For UNUSED dst. Uncommon, but possible. */ if (dst == SLJIT_UNUSED) return SLJIT_SUCCESS; - if (dst <= REG_MASK) + if (FAST_IS_REG(dst)) return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR)); /* Memory. */ @@ -1641,10 +1789,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) { CHECK_ERROR(); - check_sljit_emit_fast_return(compiler, src, srcw); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); - if (src <= REG_MASK) + if (FAST_IS_REG(src)) FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src))); else if (src & SLJIT_MEM) FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw)); @@ -1661,50 +1809,50 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * static sljit_uw get_cc(sljit_si type) { switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_MUL_NOT_OVERFLOW: - case SLJIT_C_FLOAT_EQUAL: + case SLJIT_EQUAL: + case SLJIT_MUL_NOT_OVERFLOW: + case SLJIT_D_EQUAL: return 0x1; - case SLJIT_C_NOT_EQUAL: - case SLJIT_C_MUL_OVERFLOW: - case SLJIT_C_FLOAT_NOT_EQUAL: + case SLJIT_NOT_EQUAL: + case SLJIT_MUL_OVERFLOW: + case SLJIT_D_NOT_EQUAL: return 0x0; - case SLJIT_C_LESS: - case SLJIT_C_FLOAT_LESS: + case SLJIT_LESS: + case SLJIT_D_LESS: return 0x2; - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL: + case SLJIT_D_GREATER_EQUAL: return 0x3; - case SLJIT_C_GREATER: - case SLJIT_C_FLOAT_GREATER: + case SLJIT_GREATER: + case SLJIT_D_GREATER: return 0x9; - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_LESS_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_D_LESS_EQUAL: return 0x8; - case SLJIT_C_SIG_LESS: + case SLJIT_SIG_LESS: return 0xa; - case SLJIT_C_SIG_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: return 0xb; - case SLJIT_C_SIG_GREATER: + case SLJIT_SIG_GREATER: return 0xd; - case SLJIT_C_SIG_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: return 0xc; - case SLJIT_C_OVERFLOW: - case SLJIT_C_FLOAT_UNORDERED: + case SLJIT_OVERFLOW: + case SLJIT_D_UNORDERED: return 0x7; - case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_FLOAT_ORDERED: + case SLJIT_NOT_OVERFLOW: + case SLJIT_D_ORDERED: return 0x6; default: @@ -1718,7 +1866,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi struct sljit_label *label; CHECK_ERROR_PTR(); - check_sljit_emit_label(compiler); + CHECK_PTR(check_sljit_emit_label(compiler)); if (compiler->last_label && compiler->last_label->size == compiler->size) return compiler->last_label; @@ -1734,7 +1882,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile struct sljit_jump *jump; CHECK_ERROR_PTR(); - check_sljit_emit_jump(compiler, type); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF(!jump); @@ -1761,7 +1909,7 @@ static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compi struct sljit_jump *jump; sljit_ins inv_bits = (type & SLJIT_INT_OP) ? (1 << 31) : 0; - SLJIT_ASSERT((type & 0xff) == SLJIT_C_EQUAL || (type & 0xff) == SLJIT_C_NOT_EQUAL); + SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL); ADJUST_LOCAL_OFFSET(src, srcw); jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); @@ -1779,7 +1927,7 @@ static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compi } SLJIT_ASSERT(FAST_IS_REG(src)); - if ((type & 0xff) == SLJIT_C_EQUAL) + if ((type & 0xff) == SLJIT_EQUAL) inv_bits |= 1 << 24; PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src))); @@ -1794,7 +1942,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil struct sljit_jump *jump; CHECK_ERROR(); - check_sljit_emit_ijump(compiler, type, src, srcw); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); /* In ARM, we don't need to touch the arguments. */ @@ -1825,15 +1973,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com sljit_ins cc; CHECK_ERROR(); - check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); if (dst == SLJIT_UNUSED) return SLJIT_SUCCESS; - cc = get_cc(type); - dst_r = (dst <= REG_MASK) ? dst : TMP_REG1; + cc = get_cc(type & 0xff); + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; if (GET_OPCODE(op) < SLJIT_ADD) { FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO))); @@ -1872,7 +2020,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi sljit_si dst_r; CHECK_ERROR_PTR(); - check_sljit_emit_const(compiler, dst, dstw, init_value); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); ADJUST_LOCAL_OFFSET(dst, dstw); const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); diff --git a/sljit/sljitNativeARM_T2_32.c b/sljit/sljitNativeARM_T2_32.c index 682f964..f9803f5 100644 --- a/sljit/sljitNativeARM_T2_32.c +++ b/sljit/sljitNativeARM_T2_32.c @@ -33,17 +33,17 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) typedef sljit_ui sljit_ins; /* Last register + 1. */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_PC (SLJIT_NO_REGISTERS + 4) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 5) #define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_FLOAT_REG6 + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { - 0, 0, 1, 2, 12, 5, 6, 7, 8, 10, 11, 13, 3, 4, 14, 15 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { + 0, 0, 1, 2, 12, 11, 10, 9, 8, 7, 6, 5, 13, 3, 4, 14, 15 }; #define COPY_BITS(src, from, to, bits) \ @@ -138,9 +138,9 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { #define ORRI 0xf0400000 #define ORRS 0x4300 #define ORR_W 0xea400000 -#define POP 0xbd00 +#define POP 0xbc00 #define POP_W 0xe8bd0000 -#define PUSH 0xb500 +#define PUSH 0xb400 #define PUSH_W 0xe92d0000 #define RSB_WI 0xf1c00000 #define RSBSI 0x4240 @@ -169,8 +169,12 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { #define VABS_F32 0xeeb00ac0 #define VADD_F32 0xee300a00 #define VCMP_F32 0xeeb40a40 +#define VCVT_F32_S32 0xeeb80ac0 +#define VCVT_F64_F32 0xeeb70ac0 +#define VCVT_S32_F32 0xeebd0ac0 #define VDIV_F32 0xee800a00 #define VMOV_F32 0xeeb00a40 +#define VMOV 0xee000a10 #define VMRS 0xeef1fa10 #define VMUL_F32 0xee200a00 #define VNEG_F32 0xeeb10a40 @@ -349,7 +353,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil struct sljit_const *const_; CHECK_ERROR_PTR(); - check_sljit_generate_code(compiler); + CHECK_PTR(check_sljit_generate_code(compiler)); reverse_buf(compiler); code = (sljit_uh*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_uh)); @@ -956,7 +960,7 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, } /* SP based immediate. */ - if (SLJIT_UNLIKELY(arg == SLJIT_LOCALS_REG) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) { + if (SLJIT_UNLIKELY(arg == SLJIT_SP) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) { FAIL_IF(push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2))); return -1; } @@ -1123,86 +1127,73 @@ static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit /* Entry, exit */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - sljit_si size; + sljit_si size, i, tmp; sljit_ins push; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); push = (1 << 4); - if (saveds >= 5) - push |= 1 << 11; - if (saveds >= 4) - push |= 1 << 10; - if (saveds >= 3) - push |= 1 << 8; - if (saveds >= 2) - push |= 1 << 7; - if (saveds >= 1) - push |= 1 << 6; - if (scratches >= 5) - push |= 1 << 5; - FAIL_IF(saveds >= 3 + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + push |= 1 << reg_map[i]; + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + push |= 1 << reg_map[i]; + + FAIL_IF((push & 0xff00) ? push_inst32(compiler, PUSH_W | (1 << 14) | push) - : push_inst16(compiler, PUSH | push)); + : push_inst16(compiler, PUSH | (1 << 8) | push)); - /* Stack must be aligned to 8 bytes: */ - size = (3 + saveds) * sizeof(sljit_uw); - local_size += size; - local_size = (local_size + 7) & ~7; - local_size -= size; + /* Stack must be aligned to 8 bytes: (LR, R4) */ + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); + local_size = ((size + local_size + 7) & ~7) - size; compiler->local_size = local_size; if (local_size > 0) { if (local_size <= (127 << 2)) FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2))); else - FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, local_size)); + FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size)); } if (args >= 1) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG1, SLJIT_SCRATCH_REG1))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0, SLJIT_R0))); if (args >= 2) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG2, SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S1, SLJIT_R1))); if (args >= 3) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG3, SLJIT_SCRATCH_REG3))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2))); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { sljit_si size; - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - size = (3 + saveds) * sizeof(sljit_uw); - local_size += size; - local_size = (local_size + 7) & ~7; - local_size -= size; - compiler->local_size = local_size; + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); + compiler->local_size = ((size + local_size + 7) & ~7) - size; + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { + sljit_si i, tmp; sljit_ins pop; CHECK_ERROR(); - check_sljit_emit_return(compiler, op, src, srcw); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); @@ -1210,25 +1201,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi if (compiler->local_size <= (127 << 2)) FAIL_IF(push_inst16(compiler, ADD_SP | (compiler->local_size >> 2))); else - FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, compiler->local_size)); + FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_SP, SLJIT_SP, compiler->local_size)); } pop = (1 << 4); - if (compiler->saveds >= 5) - pop |= 1 << 11; - if (compiler->saveds >= 4) - pop |= 1 << 10; - if (compiler->saveds >= 3) - pop |= 1 << 8; - if (compiler->saveds >= 2) - pop |= 1 << 7; - if (compiler->saveds >= 1) - pop |= 1 << 6; - if (compiler->scratches >= 5) - pop |= 1 << 5; - return compiler->saveds >= 3 + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + pop |= 1 << reg_map[i]; + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + pop |= 1 << reg_map[i]; + + return (pop & 0xff00) ? push_inst32(compiler, POP_W | (1 << 15) | pop) - : push_inst16(compiler, POP | pop); + : push_inst16(compiler, POP | (1 << 8) | pop); } /* --------------------------------------------------------------------- */ @@ -1252,8 +1239,11 @@ extern int __aeabi_idivmod(int numerator, int denominator); SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) { + sljit_sw saved_reg_list[3]; + sljit_sw saved_reg_count; + CHECK_ERROR(); - check_sljit_emit_op0(compiler, op); + CHECK(check_sljit_emit_op0(compiler, op)); op = GET_OPCODE(op); switch (op) { @@ -1261,31 +1251,60 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler return push_inst16(compiler, BKPT); case SLJIT_NOP: return push_inst16(compiler, NOP); - case SLJIT_UMUL: - case SLJIT_SMUL: - return push_inst32(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) - | (reg_map[SLJIT_SCRATCH_REG2] << 8) - | (reg_map[SLJIT_SCRATCH_REG1] << 12) - | (reg_map[SLJIT_SCRATCH_REG1] << 16) - | reg_map[SLJIT_SCRATCH_REG2]); - case SLJIT_UDIV: - case SLJIT_SDIV: - if (compiler->scratches >= 4) { - FAIL_IF(push_inst32(compiler, 0xf84d2d04 /* str r2, [sp, #-4]! */)); - FAIL_IF(push_inst32(compiler, 0xf84dcd04 /* str ip, [sp, #-4]! */)); - } else if (compiler->scratches >= 3) - FAIL_IF(push_inst32(compiler, 0xf84d2d08 /* str r2, [sp, #-8]! */)); + case SLJIT_LUMUL: + case SLJIT_LSMUL: + return push_inst32(compiler, (op == SLJIT_LUMUL ? UMULL : SMULL) + | (reg_map[SLJIT_R1] << 8) + | (reg_map[SLJIT_R0] << 12) + | (reg_map[SLJIT_R0] << 16) + | reg_map[SLJIT_R1]); + case SLJIT_UDIVMOD: + case SLJIT_SDIVMOD: + case SLJIT_UDIVI: + case SLJIT_SDIVI: + SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments); + SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 12, bad_register_mapping); + + saved_reg_count = 0; + if (compiler->scratches >= 4) + saved_reg_list[saved_reg_count++] = 12; + if (compiler->scratches >= 3) + saved_reg_list[saved_reg_count++] = 2; + if (op >= SLJIT_UDIVI) + saved_reg_list[saved_reg_count++] = 1; + + if (saved_reg_count > 0) { + FAIL_IF(push_inst32(compiler, 0xf84d0d00 | (saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */)); + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst16(compiler, 0x9001 | (saved_reg_list[1] << 8) /* str rX, [sp, #4] */)); + } + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst16(compiler, 0x9002 | (saved_reg_list[2] << 8) /* str rX, [sp, #8] */)); + } + } + #if defined(__GNUC__) FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, - (op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); + ((op | 0x2) == SLJIT_UDIVI ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); #else #error "Software divmod functions are needed" #endif - if (compiler->scratches >= 4) { - FAIL_IF(push_inst32(compiler, 0xf85dcb04 /* ldr ip, [sp], #4 */)); - return push_inst32(compiler, 0xf85d2b04 /* ldr r2, [sp], #4 */); - } else if (compiler->scratches >= 3) - return push_inst32(compiler, 0xf85d2b08 /* ldr r2, [sp], #8 */); + + if (saved_reg_count > 0) { + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst16(compiler, 0x9802 | (saved_reg_list[2] << 8) /* ldr rX, [sp, #8] */)); + } + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst16(compiler, 0x9801 | (saved_reg_list[1] << 8) /* ldr rX, [sp, #4] */)); + } + return push_inst32(compiler, 0xf85d0b00 | (saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); + } return SLJIT_SUCCESS; } @@ -1300,7 +1319,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler sljit_si op_flags = GET_ALL_FLAGS(op); CHECK_ERROR(); - check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); @@ -1393,7 +1412,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler } if (op == SLJIT_NEG) { -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif return sljit_emit_op2(compiler, SLJIT_SUB | op_flags, dst, dstw, SLJIT_IMM, 0, src, srcw); @@ -1432,7 +1452,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler sljit_si dst_r, flags; CHECK_ERROR(); - check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); @@ -1505,22 +1525,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) { - check_sljit_get_register_index(reg); + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); return reg_map[reg]; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) { - check_sljit_get_float_register_index(reg); - return reg; + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return reg << 1; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, void *instruction, sljit_si size) { CHECK_ERROR(); - check_sljit_emit_op_custom(compiler, instruction, size); - SLJIT_ASSERT(size == 2 || size == 4); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); if (size == 2) return push_inst16(compiler, *(sljit_uh*)instruction); @@ -1607,6 +1626,69 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg)); } +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_SINGLE_OP) | DD4(TMP_FREG1) | DM4(src))); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) + return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | DN4(TMP_FREG1)); + + /* Store the integer value from a VFP register. */ + return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | DN4(TMP_FREG1))); + else if (src & SLJIT_MEM) { + /* Load the integer value into a VFP register. */ + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | DN4(TMP_FREG1))); + } + + FAIL_IF(push_inst32(compiler, VCVT_F32_S32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(TMP_FREG1))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w); + src2 = TMP_FREG2; + } + + FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_SINGLE_OP) | DD4(src1) | DM4(src2))); + return push_inst32(compiler, VMRS); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) @@ -1614,48 +1696,45 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile sljit_si dst_r; CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error); - compiler->cache_arg = 0; compiler->cache_argw = 0; - op ^= SLJIT_SINGLE_OP; + if (GET_OPCODE(op) != SLJIT_CONVD_FROMS) + op ^= SLJIT_SINGLE_OP; - if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, dst, dstw); - dst = TMP_FREG1; - } - if (src & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src, srcw); - src = TMP_FREG2; - } - FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst) | DM4(src))); - return push_inst32(compiler, VMRS); - } + SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw); src = dst_r; } switch (GET_OPCODE(op)) { - case SLJIT_MOVD: - if (src != dst_r) - FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); + case SLJIT_DMOV: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); + else + dst_r = src; + } break; - case SLJIT_NEGD: + case SLJIT_DNEG: FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); break; - case SLJIT_ABSD: + case SLJIT_DABS: FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); break; + case SLJIT_CONVD_FROMS: + FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); + op ^= SLJIT_SINGLE_OP; + break; } - if (!(dst & SLJIT_MEM)) - return SLJIT_SUCCESS; - return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw); + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_r, dst, dstw); + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, @@ -1666,13 +1745,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile sljit_si dst_r; CHECK_ERROR(); - check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); compiler->cache_arg = 0; compiler->cache_argw = 0; op ^= SLJIT_SINGLE_OP; - dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src1 & SLJIT_MEM) { emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); src1 = TMP_FREG1; @@ -1683,16 +1765,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile } switch (GET_OPCODE(op)) { - case SLJIT_ADDD: + case SLJIT_DADD: FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); break; - case SLJIT_SUBD: + case SLJIT_DSUB: FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); break; - case SLJIT_MULD: + case SLJIT_DMUL: FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); break; - case SLJIT_DIVD: + case SLJIT_DDIV: FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); break; } @@ -1711,14 +1793,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) { CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); /* For UNUSED dst. Uncommon, but possible. */ if (dst == SLJIT_UNUSED) return SLJIT_SUCCESS; - if (dst <= REG_MASK) + if (FAST_IS_REG(dst)) return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG3)); /* Memory. */ @@ -1734,10 +1816,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) { CHECK_ERROR(); - check_sljit_emit_fast_return(compiler, src, srcw); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); - if (src <= REG_MASK) + if (FAST_IS_REG(src)) FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, src))); else if (src & SLJIT_MEM) { if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG3, src, srcw)) @@ -1761,53 +1843,54 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * static sljit_uw get_cc(sljit_si type) { switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_MUL_NOT_OVERFLOW: - case SLJIT_C_FLOAT_EQUAL: + case SLJIT_EQUAL: + case SLJIT_MUL_NOT_OVERFLOW: + case SLJIT_D_EQUAL: return 0x0; - case SLJIT_C_NOT_EQUAL: - case SLJIT_C_MUL_OVERFLOW: - case SLJIT_C_FLOAT_NOT_EQUAL: + case SLJIT_NOT_EQUAL: + case SLJIT_MUL_OVERFLOW: + case SLJIT_D_NOT_EQUAL: return 0x1; - case SLJIT_C_LESS: - case SLJIT_C_FLOAT_LESS: + case SLJIT_LESS: + case SLJIT_D_LESS: return 0x3; - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL: + case SLJIT_D_GREATER_EQUAL: return 0x2; - case SLJIT_C_GREATER: - case SLJIT_C_FLOAT_GREATER: + case SLJIT_GREATER: + case SLJIT_D_GREATER: return 0x8; - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_LESS_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_D_LESS_EQUAL: return 0x9; - case SLJIT_C_SIG_LESS: + case SLJIT_SIG_LESS: return 0xb; - case SLJIT_C_SIG_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: return 0xa; - case SLJIT_C_SIG_GREATER: + case SLJIT_SIG_GREATER: return 0xc; - case SLJIT_C_SIG_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: return 0xd; - case SLJIT_C_OVERFLOW: - case SLJIT_C_FLOAT_UNORDERED: + case SLJIT_OVERFLOW: + case SLJIT_D_UNORDERED: return 0x6; - case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_FLOAT_ORDERED: + case SLJIT_NOT_OVERFLOW: + case SLJIT_D_ORDERED: return 0x7; default: /* SLJIT_JUMP */ + SLJIT_ASSERT_STOP(); return 0xe; } } @@ -1817,7 +1900,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi struct sljit_label *label; CHECK_ERROR_PTR(); - check_sljit_emit_label(compiler); + CHECK_PTR(check_sljit_emit_label(compiler)); if (compiler->last_label && compiler->last_label->size == compiler->size) return compiler->last_label; @@ -1834,7 +1917,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile sljit_ins cc; CHECK_ERROR_PTR(); - check_sljit_emit_jump(compiler, type); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF(!jump); @@ -1866,7 +1949,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil struct sljit_jump *jump; CHECK_ERROR(); - check_sljit_emit_ijump(compiler, type, src, srcw); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); /* In ARM, we don't need to touch the arguments. */ @@ -1898,7 +1981,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com sljit_ins cc, ins; CHECK_ERROR(); - check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); @@ -1906,7 +1989,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com return SLJIT_SUCCESS; op = GET_OPCODE(op); - cc = get_cc(type); + cc = get_cc(type & 0xff); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; if (op < SLJIT_ADD) { @@ -1977,7 +2060,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi sljit_si dst_r; CHECK_ERROR_PTR(); - check_sljit_emit_const(compiler, dst, dstw, init_value); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); ADJUST_LOCAL_OFFSET(dst, dstw); const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); diff --git a/sljit/sljitNativeMIPS_32.c b/sljit/sljitNativeMIPS_32.c index cb7c695..b2b60d7 100644 --- a/sljit/sljitNativeMIPS_32.c +++ b/sljit/sljitNativeMIPS_32.c @@ -84,7 +84,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { if (op == SLJIT_MOV_SB) { -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); #else FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); @@ -102,7 +102,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { if (op == SLJIT_MOV_SH) { -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); #else FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); @@ -125,7 +125,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); if (CHECK_FLAGS(SLJIT_SET_E)) @@ -154,9 +154,9 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_O) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); @@ -174,7 +174,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj } else { if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); if (op & (SLJIT_SET_C | SLJIT_SET_O)) @@ -189,8 +189,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); if (!(op & SLJIT_SET_O)) return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); return push_inst(compiler, SLL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); @@ -198,21 +198,21 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_C) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); else { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); - FAIL_IF(push_inst(compiler, OR | S(src1) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } } FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst))); } else { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst))); } if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); if (!(op & SLJIT_SET_C)) @@ -221,7 +221,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */ FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); /* Set carry flag. */ - return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(TMP_EREG1) | DA(ULESS_FLAG), ULESS_FLAG); + return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG); case SLJIT_SUB: if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) { @@ -233,9 +233,9 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_O) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); @@ -247,7 +247,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj } else { if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O)) @@ -265,8 +265,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (!(op & SLJIT_SET_O)) return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); return push_inst(compiler, SRL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); @@ -279,27 +279,27 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst))); } else { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst))); } if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(TMP_EREG2), TMP_EREG2)); + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG)); FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); - return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS; + return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS; case SLJIT_MUL: SLJIT_ASSERT(!(flags & SRC2_IMM)); if (!(op & SLJIT_SET_O)) { -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); #else FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS)); @@ -307,10 +307,10 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj #endif } FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, MFHI | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG)); FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); - FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(TMP_EREG2) | SH_IMM(31), TMP_EREG2)); - return push_inst(compiler, SUBU | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); + FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG)); + return push_inst(compiler, SUBU | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); case SLJIT_AND: EMIT_LOGICAL(ANDI, AND); diff --git a/sljit/sljitNativeMIPS_64.c b/sljit/sljitNativeMIPS_64.c index df22eba..185fb57 100644 --- a/sljit/sljitNativeMIPS_64.c +++ b/sljit/sljitNativeMIPS_64.c @@ -217,7 +217,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); if (CHECK_FLAGS(SLJIT_SET_E)) @@ -246,9 +246,9 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_O) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); @@ -266,7 +266,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj } else { if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); if (op & (SLJIT_SET_C | SLJIT_SET_O)) @@ -281,8 +281,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); if (!(op & SLJIT_SET_O)) return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); return push_inst(compiler, SELECT_OP(DSRL32, SLL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); @@ -290,21 +290,21 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_C) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); else { - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); - FAIL_IF(push_inst(compiler, OR | S(src1) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } } FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst))); } else { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst))); } if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); if (!(op & SLJIT_SET_C)) @@ -313,7 +313,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */ FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); /* Set carry flag. */ - return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(TMP_EREG1) | DA(ULESS_FLAG), ULESS_FLAG); + return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG); case SLJIT_SUB: if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) { @@ -325,9 +325,9 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_O) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); @@ -339,7 +339,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj } else { if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O)) @@ -357,8 +357,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (!(op & SLJIT_SET_O)) return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); @@ -371,27 +371,27 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst))); } else { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst))); } if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(TMP_EREG2), TMP_EREG2)); + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG)); FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); - return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS; + return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS; case SLJIT_MUL: SLJIT_ASSERT(!(flags & SRC2_IMM)); if (!(op & SLJIT_SET_O)) { -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) if (op & SLJIT_INT_OP) return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS)); @@ -402,10 +402,10 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj #endif } FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, MFHI | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG)); FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); - FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(TMP_EREG2) | SH_IMM(31), TMP_EREG2)); - return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); + FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG)); + return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); case SLJIT_AND: EMIT_LOGICAL(ANDI, AND); diff --git a/sljit/sljitNativeMIPS_common.c b/sljit/sljitNativeMIPS_common.c index 011d887..cf3535f 100644 --- a/sljit/sljitNativeMIPS_common.c +++ b/sljit/sljitNativeMIPS_common.c @@ -25,13 +25,17 @@ */ /* Latest MIPS architecture. */ -/* Automatically detect SLJIT_MIPS_32_64 */ +/* Automatically detect SLJIT_MIPS_R1 */ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) { -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) - return "MIPS V" SLJIT_CPUINFO; +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return "MIPS32-R1" SLJIT_CPUINFO; #else + return "MIPS64-R1" SLJIT_CPUINFO; +#endif +#else /* SLJIT_MIPS_R1 */ return "MIPS III" SLJIT_CPUINFO; #endif } @@ -40,35 +44,32 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) Both for mips-32 and mips-64 */ typedef sljit_ui sljit_ins; -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) /* For position independent code, t9 must contain the function address. */ #define PIC_ADDR_REG TMP_REG2 -/* TMP_EREGs are used mainly for arithmetic operations. */ -#define TMP_EREG1 15 -#define TMP_EREG2 24 /* Floating point status register. */ #define FCSR_REG 31 /* Return address register. */ #define RETURN_ADDR_REG 31 -/* Flags are keept in volatile registers. */ -#define EQUAL_FLAG 7 +/* Flags are kept in volatile registers. */ +#define EQUAL_FLAG 12 /* And carry flag as well. */ -#define ULESS_FLAG 10 -#define UGREATER_FLAG 11 -#define LESS_FLAG 12 -#define GREATER_FLAG 13 -#define OVERFLOW_FLAG 14 +#define ULESS_FLAG 13 +#define UGREATER_FLAG 14 +#define LESS_FLAG 15 +#define GREATER_FLAG 31 +#define OVERFLOW_FLAG 1 #define TMP_FREG1 (0) -#define TMP_FREG2 ((SLJIT_FLOAT_REG6 + 1) << 1) +#define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { - 0, 2, 5, 6, 3, 8, 16, 17, 18, 19, 20, 29, 4, 25, 9 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 2, 5, 6, 7, 8, 9, 10, 11, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4 }; /* --------------------------------------------------------------------- */ @@ -92,10 +93,10 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { #define HI(opcode) ((opcode) << 26) #define LO(opcode) (opcode) /* S = (16 << 21) D = (17 << 21) */ -#define FMT_SD (16 << 21) +#define FMT_S (16 << 21) -#define ABS_fmt (HI(17) | FMT_SD | LO(5)) -#define ADD_fmt (HI(17) | FMT_SD | LO(0)) +#define ABS_S (HI(17) | FMT_S | LO(5)) +#define ADD_S (HI(17) | FMT_S | LO(0)) #define ADDIU (HI(9)) #define ADDU (HI(0) | LO(33)) #define AND (HI(0) | LO(36)) @@ -112,17 +113,18 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { #define BNE (HI(5)) #define BREAK (HI(0) | LO(13)) #define CFC1 (HI(17) | (2 << 21)) -#define C_UN_fmt (HI(17) | FMT_SD | LO(49)) -#define C_UEQ_fmt (HI(17) | FMT_SD | LO(51)) -#define C_ULE_fmt (HI(17) | FMT_SD | LO(55)) -#define C_ULT_fmt (HI(17) | FMT_SD | LO(53)) +#define C_UN_S (HI(17) | FMT_S | LO(49)) +#define C_UEQ_S (HI(17) | FMT_S | LO(51)) +#define C_ULE_S (HI(17) | FMT_S | LO(55)) +#define C_ULT_S (HI(17) | FMT_S | LO(53)) +#define CVT_S_S (HI(17) | FMT_S | LO(32)) #define DADDIU (HI(25)) #define DADDU (HI(0) | LO(45)) #define DDIV (HI(0) | LO(30)) #define DDIVU (HI(0) | LO(31)) #define DIV (HI(0) | LO(26)) #define DIVU (HI(0) | LO(27)) -#define DIV_fmt (HI(17) | FMT_SD | LO(3)) +#define DIV_S (HI(17) | FMT_S | LO(3)) #define DMULT (HI(0) | LO(28)) #define DMULTU (HI(0) | LO(29)) #define DSLL (HI(0) | LO(56)) @@ -142,13 +144,15 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { #define LD (HI(55)) #define LUI (HI(15)) #define LW (HI(35)) +#define MFC1 (HI(17)) #define MFHI (HI(0) | LO(16)) #define MFLO (HI(0) | LO(18)) -#define MOV_fmt (HI(17) | FMT_SD | LO(6)) -#define MUL_fmt (HI(17) | FMT_SD | LO(2)) +#define MOV_S (HI(17) | FMT_S | LO(6)) +#define MTC1 (HI(17) | (4 << 21)) +#define MUL_S (HI(17) | FMT_S | LO(2)) #define MULT (HI(0) | LO(24)) #define MULTU (HI(0) | LO(25)) -#define NEG_fmt (HI(17) | FMT_SD | LO(7)) +#define NEG_S (HI(17) | FMT_S | LO(7)) #define NOP (HI(0) | LO(0)) #define NOR (HI(0) | LO(39)) #define OR (HI(0) | LO(37)) @@ -164,13 +168,14 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { #define SRLV (HI(0) | LO(6)) #define SRA (HI(0) | LO(3)) #define SRAV (HI(0) | LO(7)) -#define SUB_fmt (HI(17) | FMT_SD | LO(1)) +#define SUB_S (HI(17) | FMT_S | LO(1)) #define SUBU (HI(0) | LO(35)) #define SW (HI(43)) +#define TRUNC_W_S (HI(17) | FMT_S | LO(13)) #define XOR (HI(0) | LO(38)) #define XORI (HI(14)) -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) #define CLZ (HI(28) | LO(32)) #define DCLZ (HI(28) | LO(36)) #define MUL (HI(28) | LO(2)) @@ -366,7 +371,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil struct sljit_const *const_; CHECK_ERROR_PTR(); - check_sljit_generate_code(compiler); + CHECK_PTR(check_sljit_generate_code(compiler)); reverse_buf(compiler); code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); @@ -495,6 +500,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* Separates integer and floating point registers */ #define GPR_REG 0x0f #define DOUBLE_DATA 0x10 +#define SINGLE_DATA 0x12 #define MEM_MASK 0x1f @@ -532,20 +538,18 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #include "sljitNativeMIPS_64.c" #endif -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { sljit_ins base; + sljit_si i, tmp, offs; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - local_size += (saveds + 1 + 4) * sizeof(sljit_sw); + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) local_size = (local_size + 15) & ~0xf; #else @@ -555,95 +559,100 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil if (local_size <= SIMM_MAX) { /* Frequent case. */ - FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_LOCALS_REG) | T(SLJIT_LOCALS_REG) | IMM(-local_size), DR(SLJIT_LOCALS_REG))); - base = S(SLJIT_LOCALS_REG); + FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP))); + base = S(SLJIT_SP); } else { FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size)); - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_LOCALS_REG) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_LOCALS_REG) | T(TMP_REG1) | D(SLJIT_LOCALS_REG), DR(SLJIT_LOCALS_REG))); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP))); base = S(TMP_REG2); local_size = 0; } - FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 1) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 2 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 2) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 3 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 3) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 4 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 4) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 5 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 5) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 6 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); + offs = local_size - (sljit_sw)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offs), MOVABLE_INS)); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + offs -= (sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS)); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offs -= (sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS)); + } if (args >= 1) - FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_SAVED_REG1), DR(SLJIT_SAVED_REG1))); + FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_S0), DR(SLJIT_S0))); if (args >= 2) - FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_SAVED_REG2), DR(SLJIT_SAVED_REG2))); + FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_S1), DR(SLJIT_S1))); if (args >= 3) - FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_SAVED_REG3), DR(SLJIT_SAVED_REG3))); + FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_S2), DR(SLJIT_S2))); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - local_size += (saveds + 1 + 4) * sizeof(sljit_sw); + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) compiler->local_size = (local_size + 15) & ~0xf; #else compiler->local_size = (local_size + 31) & ~0x1f; #endif + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { - sljit_si local_size; + sljit_si local_size, i, tmp, offs; sljit_ins base; CHECK_ERROR(); - check_sljit_emit_return(compiler, op, src, srcw); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); local_size = compiler->local_size; if (local_size <= SIMM_MAX) - base = S(SLJIT_LOCALS_REG); + base = S(SLJIT_SP); else { FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size)); - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_LOCALS_REG) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1))); base = S(TMP_REG1); local_size = 0; } - FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (sljit_si)sizeof(sljit_sw)), RETURN_ADDR_REG)); - if (compiler->saveds >= 5) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 6 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_EREG2))); - if (compiler->saveds >= 4) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 5 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_EREG1))); - if (compiler->saveds >= 3) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 4 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG3))); - if (compiler->saveds >= 2) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 3 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG2))); - if (compiler->saveds >= 1) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 2 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG1))); + FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - (sljit_si)sizeof(sljit_sw)), RETURN_ADDR_REG)); + offs = local_size - (sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); + + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i))); + offs += (sljit_si)(sizeof(sljit_sw)); + } + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = tmp; i <= SLJIT_S0; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i))); + offs += (sljit_si)(sizeof(sljit_sw)); + } + + SLJIT_ASSERT(offs == local_size - (sljit_sw)(sizeof(sljit_sw))); FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); if (compiler->local_size <= SIMM_MAX) - return push_inst(compiler, ADDIU_W | S(SLJIT_LOCALS_REG) | T(SLJIT_LOCALS_REG) | IMM(compiler->local_size), UNMOVABLE_INS); + return push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(compiler->local_size), UNMOVABLE_INS); else - return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_LOCALS_REG), UNMOVABLE_INS); + return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_SP), UNMOVABLE_INS); } #undef STACK_STORE @@ -1027,7 +1036,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler #endif CHECK_ERROR(); - check_sljit_emit_op0(compiler, op); + CHECK(check_sljit_emit_op0(compiler, op)); op = GET_OPCODE(op); switch (op) { @@ -1035,33 +1044,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler return push_inst(compiler, BREAK, UNMOVABLE_INS); case SLJIT_NOP: return push_inst(compiler, NOP, UNMOVABLE_INS); - case SLJIT_UMUL: - case SLJIT_SMUL: + case SLJIT_LUMUL: + case SLJIT_LSMUL: #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? DMULTU : DMULT) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, (op == SLJIT_LUMUL ? DMULTU : DMULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #else - FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? MULTU : MULT) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, (op == SLJIT_LUMUL ? MULTU : MULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #endif - FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_SCRATCH_REG1), DR(SLJIT_SCRATCH_REG1))); - return push_inst(compiler, MFHI | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2)); - case SLJIT_UDIV: - case SLJIT_SDIV: -#if !(defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); + return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); + case SLJIT_UDIVMOD: + case SLJIT_SDIVMOD: + case SLJIT_UDIVI: + case SLJIT_SDIVI: + SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments); +#if !(defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); #endif #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) if (int_op) - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_UDIVI ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); else - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DDIVU : DDIV) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_UDIVI ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #else - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_UDIVI ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #endif - FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_SCRATCH_REG1), DR(SLJIT_SCRATCH_REG1))); - return push_inst(compiler, MFHI | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); + return (op >= SLJIT_UDIVI) ? SLJIT_SUCCESS : push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); } return SLJIT_SUCCESS; @@ -1078,7 +1090,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler #endif CHECK_ERROR(); - check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); @@ -1180,7 +1192,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler #endif CHECK_ERROR(); - check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); @@ -1238,13 +1250,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) { - check_sljit_get_register_index(reg); + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); return reg_map[reg]; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) { - check_sljit_get_float_register_index(reg); + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); return reg << 1; } @@ -1252,8 +1264,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co void *instruction, sljit_si size) { CHECK_ERROR(); - check_sljit_emit_op_custom(compiler, instruction, size); - SLJIT_ASSERT(size == 4); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS); } @@ -1278,83 +1289,164 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7)) #define FMT(op) (((op & SLJIT_SINGLE_OP) ^ SLJIT_SINGLE_OP) << (21 - 8)) +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# define flags 0 +#else + sljit_si flags = (GET_OPCODE(op) == SLJIT_CONVW_FROMD) << 21; +#endif + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + src = TMP_FREG1; + } + else + src <<= 1; + + FAIL_IF(push_inst(compiler, (TRUNC_W_S ^ (flags >> 19)) | FMT(op) | FS(src) | FD(TMP_FREG1), MOVABLE_INS)); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) + return push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS); + + /* Store the integer value from a VFP register. */ + return emit_op_mem2(compiler, flags ? DOUBLE_DATA : SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0); + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# undef is_long +#endif +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# define flags 0 +#else + sljit_si flags = (GET_OPCODE(op) == SLJIT_CONVD_FROMW) << 21; +#endif + + sljit_si dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS)); + else if (src & SLJIT_MEM) { + /* Load the integer value into a VFP register. */ + FAIL_IF(emit_op_mem2(compiler, ((flags) ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + } + else { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + srcw = (sljit_si)srcw; +#endif + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); + FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS)); + } + + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | (((op & SLJIT_SINGLE_OP) ^ SLJIT_SINGLE_OP) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# undef flags +#endif +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + else + src1 <<= 1; + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + else + src2 <<= 1; + + /* src2 and src1 are swapped. */ + if (op & SLJIT_SET_E) { + FAIL_IF(push_inst(compiler, C_UEQ_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, CFC1 | TA(EQUAL_FLAG) | DA(FCSR_REG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, SRL | TA(EQUAL_FLAG) | DA(EQUAL_FLAG) | SH_IMM(23), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, ANDI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG)); + } + if (op & SLJIT_SET_S) { + /* Mixing the instructions for the two checks. */ + FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, CFC1 | TA(ULESS_FLAG) | DA(FCSR_REG), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src1) | FS(src2), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, SRL | TA(ULESS_FLAG) | DA(ULESS_FLAG) | SH_IMM(23), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, ANDI | SA(ULESS_FLAG) | TA(ULESS_FLAG) | IMM(1), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, CFC1 | TA(UGREATER_FLAG) | DA(FCSR_REG), UGREATER_FLAG)); + FAIL_IF(push_inst(compiler, SRL | TA(UGREATER_FLAG) | DA(UGREATER_FLAG) | SH_IMM(23), UGREATER_FLAG)); + FAIL_IF(push_inst(compiler, ANDI | SA(UGREATER_FLAG) | TA(UGREATER_FLAG) | IMM(1), UGREATER_FLAG)); + } + return push_inst(compiler, C_UN_S | FMT(op) | FT(src2) | FS(src1), FCSR_FCC); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_si dst_fr; + sljit_si dst_r; CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); - compiler->cache_arg = 0; compiler->cache_argw = 0; - if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw)); - dst = TMP_FREG1; - } - else - dst <<= 1; + SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); - if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0)); - src = TMP_FREG2; - } - else - src <<= 1; - - /* src and dst are swapped. */ - if (op & SLJIT_SET_E) { - FAIL_IF(push_inst(compiler, C_UEQ_fmt | FMT(op) | FT(src) | FS(dst), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, CFC1 | TA(EQUAL_FLAG) | DA(FCSR_REG), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, SRL | TA(EQUAL_FLAG) | DA(EQUAL_FLAG) | SH_IMM(23), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, ANDI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG)); - } - if (op & SLJIT_SET_S) { - /* Mixing the instructions for the two checks. */ - FAIL_IF(push_inst(compiler, C_ULT_fmt | FMT(op) | FT(src) | FS(dst), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, CFC1 | TA(ULESS_FLAG) | DA(FCSR_REG), ULESS_FLAG)); - FAIL_IF(push_inst(compiler, C_ULT_fmt | FMT(op) | FT(dst) | FS(src), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, SRL | TA(ULESS_FLAG) | DA(ULESS_FLAG) | SH_IMM(23), ULESS_FLAG)); - FAIL_IF(push_inst(compiler, ANDI | SA(ULESS_FLAG) | TA(ULESS_FLAG) | IMM(1), ULESS_FLAG)); - FAIL_IF(push_inst(compiler, CFC1 | TA(UGREATER_FLAG) | DA(FCSR_REG), UGREATER_FLAG)); - FAIL_IF(push_inst(compiler, SRL | TA(UGREATER_FLAG) | DA(UGREATER_FLAG) | SH_IMM(23), UGREATER_FLAG)); - FAIL_IF(push_inst(compiler, ANDI | SA(UGREATER_FLAG) | TA(UGREATER_FLAG) | IMM(1), UGREATER_FLAG)); - } - return push_inst(compiler, C_UN_fmt | FMT(op) | FT(src) | FS(dst), FCSR_FCC); - } + if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) + op ^= SLJIT_SINGLE_OP; - dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw)); - src = dst_fr; + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); + src = dst_r; } else src <<= 1; switch (GET_OPCODE(op)) { - case SLJIT_MOVD: - if (src != dst_fr && dst_fr != TMP_FREG1) - FAIL_IF(push_inst(compiler, MOV_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS)); - break; - case SLJIT_NEGD: - FAIL_IF(push_inst(compiler, NEG_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS)); - break; - case SLJIT_ABSD: - FAIL_IF(push_inst(compiler, ABS_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS)); - break; - } - - if (dst_fr == TMP_FREG1) { - if (GET_OPCODE(op) == SLJIT_MOVD) - dst_fr = src; - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0)); + case SLJIT_DMOV: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, MOV_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + else + dst_r = src; + } + break; + case SLJIT_DNEG: + FAIL_IF(push_inst(compiler, NEG_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + break; + case SLJIT_DABS: + FAIL_IF(push_inst(compiler, ABS_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + break; + case SLJIT_CONVD_FROMS: + FAIL_IF(push_inst(compiler, CVT_S_S | ((op & SLJIT_SINGLE_OP) ? 1 : (1 << 21)) | FS(src) | FD(dst_r), MOVABLE_INS)); + op ^= SLJIT_SINGLE_OP; + break; } + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0); return SLJIT_SUCCESS; } @@ -1363,15 +1455,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - sljit_si dst_fr, flags = 0; + sljit_si dst_r, flags = 0; CHECK_ERROR(); - check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); compiler->cache_arg = 0; compiler->cache_argw = 0; - dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2; + dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2; if (src1 & SLJIT_MEM) { if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { @@ -1414,24 +1509,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile src2 = TMP_FREG2; switch (GET_OPCODE(op)) { - case SLJIT_ADDD: - FAIL_IF(push_inst(compiler, ADD_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS)); + case SLJIT_DADD: + FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); break; - case SLJIT_SUBD: - FAIL_IF(push_inst(compiler, SUB_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS)); + case SLJIT_DSUB: + FAIL_IF(push_inst(compiler, SUB_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); break; - case SLJIT_MULD: - FAIL_IF(push_inst(compiler, MUL_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS)); + case SLJIT_DMUL: + FAIL_IF(push_inst(compiler, MUL_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); break; - case SLJIT_DIVD: - FAIL_IF(push_inst(compiler, DIV_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS)); + case SLJIT_DDIV: + FAIL_IF(push_inst(compiler, DIV_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); break; } - if (dst_fr == TMP_FREG2) + if (dst_r == TMP_FREG2) FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); return SLJIT_SUCCESS; @@ -1444,7 +1539,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) { CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); /* For UNUSED dst. Uncommon, but possible. */ @@ -1461,7 +1556,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) { CHECK_ERROR(); - check_sljit_emit_fast_return(compiler, src, srcw); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); if (FAST_IS_REG(src)) @@ -1484,7 +1579,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi struct sljit_label *label; CHECK_ERROR_PTR(); - check_sljit_emit_label(compiler); + CHECK_PTR(check_sljit_emit_label(compiler)); if (compiler->last_label && compiler->last_label->size == compiler->size) return compiler->last_label; @@ -1530,7 +1625,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile sljit_si delay_check = UNMOVABLE_INS; CHECK_ERROR_PTR(); - check_sljit_emit_jump(compiler, type); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF(!jump); @@ -1538,54 +1633,54 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile type &= 0xff; switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_FLOAT_NOT_EQUAL: + case SLJIT_EQUAL: + case SLJIT_D_NOT_EQUAL: BR_NZ(EQUAL_FLAG); break; - case SLJIT_C_NOT_EQUAL: - case SLJIT_C_FLOAT_EQUAL: + case SLJIT_NOT_EQUAL: + case SLJIT_D_EQUAL: BR_Z(EQUAL_FLAG); break; - case SLJIT_C_LESS: - case SLJIT_C_FLOAT_LESS: + case SLJIT_LESS: + case SLJIT_D_LESS: BR_Z(ULESS_FLAG); break; - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL: + case SLJIT_D_GREATER_EQUAL: BR_NZ(ULESS_FLAG); break; - case SLJIT_C_GREATER: - case SLJIT_C_FLOAT_GREATER: + case SLJIT_GREATER: + case SLJIT_D_GREATER: BR_Z(UGREATER_FLAG); break; - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_LESS_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_D_LESS_EQUAL: BR_NZ(UGREATER_FLAG); break; - case SLJIT_C_SIG_LESS: + case SLJIT_SIG_LESS: BR_Z(LESS_FLAG); break; - case SLJIT_C_SIG_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: BR_NZ(LESS_FLAG); break; - case SLJIT_C_SIG_GREATER: + case SLJIT_SIG_GREATER: BR_Z(GREATER_FLAG); break; - case SLJIT_C_SIG_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: BR_NZ(GREATER_FLAG); break; - case SLJIT_C_OVERFLOW: - case SLJIT_C_MUL_OVERFLOW: + case SLJIT_OVERFLOW: + case SLJIT_MUL_OVERFLOW: BR_Z(OVERFLOW_FLAG); break; - case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_MUL_NOT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + case SLJIT_MUL_NOT_OVERFLOW: BR_NZ(OVERFLOW_FLAG); break; - case SLJIT_C_FLOAT_UNORDERED: + case SLJIT_D_UNORDERED: BR_F(); break; - case SLJIT_C_FLOAT_ORDERED: + case SLJIT_D_ORDERED: BR_T(); break; default: @@ -1613,7 +1708,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); jump->addr = compiler->size; /* A NOP if type < CALL1. */ - PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), UNMOVABLE_INS)); } return jump; } @@ -1647,7 +1742,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler sljit_ins inst; CHECK_ERROR_PTR(); - check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w); + CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); @@ -1668,32 +1763,32 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); type &= 0xff; - if (type <= SLJIT_C_NOT_EQUAL) { + if (type <= SLJIT_NOT_EQUAL) { RESOLVE_IMM1(); RESOLVE_IMM2(); jump->flags |= IS_BIT26_COND; if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != DR(src1) && compiler->delay_slot != DR(src2))) jump->flags |= IS_MOVABLE; - PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_C_EQUAL ? BNE : BEQ) | S(src1) | T(src2) | JUMP_LENGTH, UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(src1) | T(src2) | JUMP_LENGTH, UNMOVABLE_INS)); } - else if (type >= SLJIT_C_SIG_LESS && (((src1 & SLJIT_IMM) && (src1w == 0)) || ((src2 & SLJIT_IMM) && (src2w == 0)))) { + else if (type >= SLJIT_SIG_LESS && (((src1 & SLJIT_IMM) && (src1w == 0)) || ((src2 & SLJIT_IMM) && (src2w == 0)))) { inst = NOP; if ((src1 & SLJIT_IMM) && (src1w == 0)) { RESOLVE_IMM2(); switch (type) { - case SLJIT_C_SIG_LESS: + case SLJIT_SIG_LESS: inst = BLEZ; jump->flags |= IS_BIT26_COND; break; - case SLJIT_C_SIG_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: inst = BGTZ; jump->flags |= IS_BIT26_COND; break; - case SLJIT_C_SIG_GREATER: + case SLJIT_SIG_GREATER: inst = BGEZ; jump->flags |= IS_BIT16_COND; break; - case SLJIT_C_SIG_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: inst = BLTZ; jump->flags |= IS_BIT16_COND; break; @@ -1703,19 +1798,19 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler else { RESOLVE_IMM1(); switch (type) { - case SLJIT_C_SIG_LESS: + case SLJIT_SIG_LESS: inst = BGEZ; jump->flags |= IS_BIT16_COND; break; - case SLJIT_C_SIG_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: inst = BLTZ; jump->flags |= IS_BIT16_COND; break; - case SLJIT_C_SIG_GREATER: + case SLJIT_SIG_GREATER: inst = BLEZ; jump->flags |= IS_BIT26_COND; break; - case SLJIT_C_SIG_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: inst = BGTZ; jump->flags |= IS_BIT26_COND; break; @@ -1724,29 +1819,29 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler PTR_FAIL_IF(push_inst(compiler, inst | S(src1) | JUMP_LENGTH, UNMOVABLE_INS)); } else { - if (type == SLJIT_C_LESS || type == SLJIT_C_GREATER_EQUAL || type == SLJIT_C_SIG_LESS || type == SLJIT_C_SIG_GREATER_EQUAL) { + if (type == SLJIT_LESS || type == SLJIT_GREATER_EQUAL || type == SLJIT_SIG_LESS || type == SLJIT_SIG_GREATER_EQUAL) { RESOLVE_IMM1(); if ((src2 & SLJIT_IMM) && src2w <= SIMM_MAX && src2w >= SIMM_MIN) - PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTIU : SLTI) | S(src1) | T(TMP_REG1) | IMM(src2w), DR(TMP_REG1))); + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src1) | T(TMP_REG1) | IMM(src2w), DR(TMP_REG1))); else { RESOLVE_IMM2(); - PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTU : SLT) | S(src1) | T(src2) | D(TMP_REG1), DR(TMP_REG1))); + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTU : SLT) | S(src1) | T(src2) | D(TMP_REG1), DR(TMP_REG1))); } - type = (type == SLJIT_C_LESS || type == SLJIT_C_SIG_LESS) ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL; + type = (type == SLJIT_LESS || type == SLJIT_SIG_LESS) ? SLJIT_NOT_EQUAL : SLJIT_EQUAL; } else { RESOLVE_IMM2(); if ((src1 & SLJIT_IMM) && src1w <= SIMM_MAX && src1w >= SIMM_MIN) - PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTIU : SLTI) | S(src2) | T(TMP_REG1) | IMM(src1w), DR(TMP_REG1))); + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src2) | T(TMP_REG1) | IMM(src1w), DR(TMP_REG1))); else { RESOLVE_IMM1(); - PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTU : SLT) | S(src2) | T(src1) | D(TMP_REG1), DR(TMP_REG1))); + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTU : SLT) | S(src2) | T(src1) | D(TMP_REG1), DR(TMP_REG1))); } - type = (type == SLJIT_C_GREATER || type == SLJIT_C_SIG_GREATER) ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL; + type = (type == SLJIT_GREATER || type == SLJIT_SIG_GREATER) ? SLJIT_NOT_EQUAL : SLJIT_EQUAL; } jump->flags |= IS_BIT26_COND; - PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_C_EQUAL ? BNE : BEQ) | S(TMP_REG1) | TA(0) | JUMP_LENGTH, UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(TMP_REG1) | TA(0) | JUMP_LENGTH, UNMOVABLE_INS)); } PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); @@ -1768,7 +1863,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile sljit_si if_true; CHECK_ERROR_PTR(); - check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w); + CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w)); compiler->cache_arg = 0; compiler->cache_argw = 0; @@ -1793,37 +1888,38 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile jump->flags |= IS_BIT16_COND; switch (type & 0xff) { - case SLJIT_C_FLOAT_EQUAL: - inst = C_UEQ_fmt; + case SLJIT_D_EQUAL: + inst = C_UEQ_S; if_true = 1; break; - case SLJIT_C_FLOAT_NOT_EQUAL: - inst = C_UEQ_fmt; + case SLJIT_D_NOT_EQUAL: + inst = C_UEQ_S; if_true = 0; break; - case SLJIT_C_FLOAT_LESS: - inst = C_ULT_fmt; + case SLJIT_D_LESS: + inst = C_ULT_S; if_true = 1; break; - case SLJIT_C_FLOAT_GREATER_EQUAL: - inst = C_ULT_fmt; + case SLJIT_D_GREATER_EQUAL: + inst = C_ULT_S; if_true = 0; break; - case SLJIT_C_FLOAT_GREATER: - inst = C_ULE_fmt; + case SLJIT_D_GREATER: + inst = C_ULE_S; if_true = 0; break; - case SLJIT_C_FLOAT_LESS_EQUAL: - inst = C_ULE_fmt; + case SLJIT_D_LESS_EQUAL: + inst = C_ULE_S; if_true = 1; break; - case SLJIT_C_FLOAT_UNORDERED: - inst = C_UN_fmt; + case SLJIT_D_UNORDERED: + inst = C_UN_S; if_true = 1; break; - case SLJIT_C_FLOAT_ORDERED: default: /* Make compilers happy. */ - inst = C_UN_fmt; + SLJIT_ASSERT_STOP(); + case SLJIT_D_ORDERED: + inst = C_UN_S; if_true = 0; break; } @@ -1853,7 +1949,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil struct sljit_jump *jump = NULL; CHECK_ERROR(); - check_sljit_emit_ijump(compiler, type, src, srcw); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); if (FAST_IS_REG(src)) { @@ -1874,12 +1970,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil } FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); /* We need an extra instruction in any case. */ - return push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), UNMOVABLE_INS); + return push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), UNMOVABLE_INS); } /* Register input. */ if (type >= SLJIT_CALL1) - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), 4)); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), 4)); FAIL_IF(push_inst(compiler, JALR | S(src_r) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); return push_inst(compiler, ADDU_W | S(src_r) | TA(0) | D(PIC_ADDR_REG), UNMOVABLE_INS); } @@ -1919,7 +2015,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com #endif CHECK_ERROR(); - check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); if (dst == SLJIT_UNUSED) @@ -1941,49 +2037,49 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com srcw = 0; } - switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_NOT_EQUAL: + switch (type & 0xff) { + case SLJIT_EQUAL: + case SLJIT_NOT_EQUAL: FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); dst_ar = sugg_dst_ar; break; - case SLJIT_C_LESS: - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_LESS: - case SLJIT_C_FLOAT_GREATER_EQUAL: + case SLJIT_LESS: + case SLJIT_GREATER_EQUAL: + case SLJIT_D_LESS: + case SLJIT_D_GREATER_EQUAL: dst_ar = ULESS_FLAG; break; - case SLJIT_C_GREATER: - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_GREATER: - case SLJIT_C_FLOAT_LESS_EQUAL: + case SLJIT_GREATER: + case SLJIT_LESS_EQUAL: + case SLJIT_D_GREATER: + case SLJIT_D_LESS_EQUAL: dst_ar = UGREATER_FLAG; break; - case SLJIT_C_SIG_LESS: - case SLJIT_C_SIG_GREATER_EQUAL: + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER_EQUAL: dst_ar = LESS_FLAG; break; - case SLJIT_C_SIG_GREATER: - case SLJIT_C_SIG_LESS_EQUAL: + case SLJIT_SIG_GREATER: + case SLJIT_SIG_LESS_EQUAL: dst_ar = GREATER_FLAG; break; - case SLJIT_C_OVERFLOW: - case SLJIT_C_NOT_OVERFLOW: + case SLJIT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: dst_ar = OVERFLOW_FLAG; break; - case SLJIT_C_MUL_OVERFLOW: - case SLJIT_C_MUL_NOT_OVERFLOW: + case SLJIT_MUL_OVERFLOW: + case SLJIT_MUL_NOT_OVERFLOW: FAIL_IF(push_inst(compiler, SLTIU | SA(OVERFLOW_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); dst_ar = sugg_dst_ar; type ^= 0x1; /* Flip type bit for the XORI below. */ break; - case SLJIT_C_FLOAT_EQUAL: - case SLJIT_C_FLOAT_NOT_EQUAL: + case SLJIT_D_EQUAL: + case SLJIT_D_NOT_EQUAL: dst_ar = EQUAL_FLAG; break; - case SLJIT_C_FLOAT_UNORDERED: - case SLJIT_C_FLOAT_ORDERED: + case SLJIT_D_UNORDERED: + case SLJIT_D_ORDERED: FAIL_IF(push_inst(compiler, CFC1 | TA(sugg_dst_ar) | DA(FCSR_REG), sugg_dst_ar)); FAIL_IF(push_inst(compiler, SRL | TA(sugg_dst_ar) | DA(sugg_dst_ar) | SH_IMM(23), sugg_dst_ar)); FAIL_IF(push_inst(compiler, ANDI | SA(sugg_dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); @@ -2025,7 +2121,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi sljit_si reg; CHECK_ERROR_PTR(); - check_sljit_emit_const(compiler, dst, dstw, init_value); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); ADJUST_LOCAL_OFFSET(dst, dstw); const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); diff --git a/sljit/sljitNativePPC_common.c b/sljit/sljitNativePPC_common.c index 5e06f2f..b6a043f 100644 --- a/sljit/sljitNativePPC_common.c +++ b/sljit/sljitNativePPC_common.c @@ -87,22 +87,22 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #endif /* _AIX */ } -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_ZERO (SLJIT_NO_REGISTERS + 4) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_ZERO (SLJIT_NUMBER_OF_REGISTERS + 5) #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) -#define TMP_CALL_REG (SLJIT_NO_REGISTERS + 5) +#define TMP_CALL_REG (SLJIT_NUMBER_OF_REGISTERS + 6) #else #define TMP_CALL_REG TMP_REG2 #endif #define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_FLOAT_REG6 + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = { - 0, 3, 4, 5, 6, 7, 30, 29, 28, 27, 26, 1, 8, 9, 10, 31, 12 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { + 0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12 }; /* --------------------------------------------------------------------- */ @@ -114,6 +114,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = { #define B(b) (reg_map[b] << 11) #define C(c) (reg_map[c] << 6) #define FD(fd) ((fd) << 21) +#define FS(fs) ((fs) << 21) #define FA(fa) ((fa) << 16) #define FB(fb) ((fb) << 11) #define FC(fc) ((fc) << 6) @@ -159,13 +160,17 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = { #define FABS (HI(63) | LO(264)) #define FADD (HI(63) | LO(21)) #define FADDS (HI(59) | LO(21)) +#define FCFID (HI(63) | LO(846)) #define FCMPU (HI(63) | LO(0)) +#define FCTIDZ (HI(63) | LO(815)) +#define FCTIWZ (HI(63) | LO(15)) #define FDIV (HI(63) | LO(18)) #define FDIVS (HI(59) | LO(18)) #define FMR (HI(63) | LO(72)) #define FMUL (HI(63) | LO(25)) #define FMULS (HI(59) | LO(25)) #define FNEG (HI(63) | LO(40)) +#define FRSP (HI(63) | LO(12)) #define FSUB (HI(63) | LO(20)) #define FSUBS (HI(59) | LO(20)) #define LD (HI(58) | 0) @@ -202,6 +207,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = { #define STD (HI(62) | 0) #define STDU (HI(62) | 1) #define STDUX (HI(31) | LO(181)) +#define STFIWX (HI(31) | LO(983)) #define STW (HI(36)) #define STWU (HI(37)) #define STWUX (HI(31) | LO(183)) @@ -323,7 +329,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil struct sljit_const *const_; CHECK_ERROR_PTR(); - check_sljit_generate_code(compiler); + CHECK_PTR(check_sljit_generate_code(compiler)); reverse_buf(compiler); #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) @@ -565,118 +571,121 @@ ALT_FORM6 0x200000 */ #define STACK_LOAD LD #endif -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + sljit_si i, tmp, offs; - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); FAIL_IF(push_inst(compiler, MFLR | D(0))); - FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 1) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 2) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 3) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 4) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 5) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) )); + offs = -(sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs))); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + offs -= (sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs))); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offs -= (sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs))); + } + + SLJIT_ASSERT(offs == -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1)); + #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) - FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(2 * sizeof(sljit_sw)) )); + FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw)))); #else - FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw)) )); + FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw)))); #endif FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0)); if (args >= 1) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(SLJIT_SAVED_REG1) | B(SLJIT_SCRATCH_REG1))); + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0))); if (args >= 2) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG2) | A(SLJIT_SAVED_REG2) | B(SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1))); if (args >= 3) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG3) | A(SLJIT_SAVED_REG3) | B(SLJIT_SCRATCH_REG3))); + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2))); -#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) - compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size; -#else - compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size; -#endif - compiler->local_size = (compiler->local_size + 15) & ~0xf; + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; + local_size = (local_size + 15) & ~0xf; + compiler->local_size = local_size; #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - if (compiler->local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, STWU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size))); + if (local_size <= SIMM_MAX) + FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); else { - FAIL_IF(load_immediate(compiler, 0, -compiler->local_size)); - FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0))); + FAIL_IF(load_immediate(compiler, 0, -local_size)); + FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0))); } #else - if (compiler->local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, STDU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size))); + if (local_size <= SIMM_MAX) + FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); else { - FAIL_IF(load_immediate(compiler, 0, -compiler->local_size)); - FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0))); + FAIL_IF(load_immediate(compiler, 0, -local_size)); + FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0))); } #endif return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); -#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) - compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size; -#else - compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size; -#endif - compiler->local_size = (compiler->local_size + 15) & ~0xf; + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; + compiler->local_size = (local_size + 15) & ~0xf; + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { + sljit_si i, tmp, offs; + CHECK_ERROR(); - check_sljit_emit_return(compiler, op, src, srcw); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); if (compiler->local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(compiler->local_size))); + FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size))); else { FAIL_IF(load_immediate(compiler, 0, compiler->local_size)); - FAIL_IF(push_inst(compiler, ADD | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0))); + FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0))); } #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(2 * sizeof(sljit_sw)))); + FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw)))); #else - FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw)))); + FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw)))); #endif - if (compiler->saveds >= 5) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) )); - if (compiler->saveds >= 4) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (sljit_si)(sizeof(sljit_sw))) )); - if (compiler->saveds >= 3) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (sljit_si)(sizeof(sljit_sw))) )); - if (compiler->saveds >= 2) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (sljit_si)(sizeof(sljit_sw))) )); - if (compiler->saveds >= 1) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) )); - FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) )); + + offs = -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); + + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs))); + offs += (sljit_si)(sizeof(sljit_sw)); + } + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = tmp; i <= SLJIT_S0; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs))); + offs += (sljit_si)(sizeof(sljit_sw)); + } + + FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs))); + SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw))); FAIL_IF(push_inst(compiler, MTLR | S(0))); FAIL_IF(push_inst(compiler, BLR)); @@ -999,12 +1008,12 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, tmp_r = arg; FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16))); } - else if (compiler->cache_arg != arg || high_short != compiler->cache_argw) { + else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) { if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) { next_high_short = (sljit_si)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff; if (high_short == next_high_short) { - compiler->cache_arg = SLJIT_IMM | arg; - compiler->cache_argw = next_high_short; + compiler->cache_arg = SLJIT_MEM | arg; + compiler->cache_argw = high_short; tmp_r = TMP_REG3; } } @@ -1241,39 +1250,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler #endif CHECK_ERROR(); - check_sljit_emit_op0(compiler, op); + CHECK(check_sljit_emit_op0(compiler, op)); op = GET_OPCODE(op); switch (op) { case SLJIT_BREAKPOINT: case SLJIT_NOP: return push_inst(compiler, NOP); - case SLJIT_UMUL: - case SLJIT_SMUL: - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1))); + case SLJIT_LUMUL: + case SLJIT_LSMUL: + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0))); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, (op == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + return push_inst(compiler, (op == SLJIT_LUMUL ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); #else - FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, (op == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + return push_inst(compiler, (op == SLJIT_LUMUL ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); #endif - case SLJIT_UDIV: - case SLJIT_SDIV: - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1))); + case SLJIT_UDIVMOD: + case SLJIT_SDIVMOD: + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0))); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (int_op) { - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2))); - } else { - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2))); - } - return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1)); + FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_UDIVMOD ? DIVWU : DIVW) : (op == SLJIT_UDIVMOD ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, (int_op ? MULLW : MULLD) | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); #else - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1)); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIVMOD ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); +#endif + return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1)); + case SLJIT_UDIVI: + case SLJIT_SDIVI: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + return push_inst(compiler, (int_op ? (op == SLJIT_UDIVI ? DIVWU : DIVW) : (op == SLJIT_UDIVI ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)); +#else + return push_inst(compiler, (op == SLJIT_UDIVI ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)); #endif } @@ -1291,7 +1301,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler sljit_si op_flags = GET_ALL_FLAGS(op); CHECK_ERROR(); - check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); @@ -1446,7 +1456,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0; CHECK_ERROR(); - check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); @@ -1648,13 +1658,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) { - check_sljit_get_register_index(reg); + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); return reg_map[reg]; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) { - check_sljit_get_float_register_index(reg); + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); return reg; } @@ -1662,8 +1672,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co void *instruction, sljit_si size) { CHECK_ERROR(); - check_sljit_emit_op_custom(compiler, instruction, size); - SLJIT_ASSERT(size == 4); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); return push_inst(compiler, *(sljit_ins*)instruction); } @@ -1685,59 +1694,233 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 6)) #define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double) -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw)) +#else +#define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw)) + +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw)) +#define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw)) +#else +#define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw)) +#define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw)) +#endif + +#endif /* SLJIT_CONFIG_PPC_64 */ + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_si dst_fr; + if (src & SLJIT_MEM) { + /* We can ignore the temporary data store on the stack from caching point of view. */ + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + src = TMP_FREG1; + } - CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + op = GET_OPCODE(op); + FAIL_IF(push_inst(compiler, (op == SLJIT_CONVI_FROMD ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src))); - compiler->cache_arg = 0; - compiler->cache_argw = 0; + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; - if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw)); - dst = TMP_FREG1; + if (op == SLJIT_CONVW_FROMD) { + if (FAST_IS_REG(dst)) { + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0)); + return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0); } + return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0); + } - if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0)); - src = TMP_FREG2; +#else + FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src))); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; +#endif + + if (FAST_IS_REG(dst)) { + FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1))); + return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0); + } + + SLJIT_ASSERT(dst & SLJIT_MEM); + + if (dst & OFFS_REG_MASK) { + dstw &= 0x3; + if (dstw) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1))); +#else + FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1))); +#endif + dstw = TMP_REG1; + } + else + dstw = OFFS_REG(dst); + } + else { + if ((dst & REG_MASK) && !dstw) { + dstw = dst & REG_MASK; + dst = 0; + } + else { + /* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */ + FAIL_IF(load_immediate(compiler, TMP_REG1, dstw)); + dstw = TMP_REG1; } + } + + return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw)); +} - return push_inst(compiler, FCMPU | CRD(4) | FA(dst) | FB(src)); +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_IMM) { + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + srcw = (sljit_si)srcw; + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; } + else if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) { + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1))); + else + FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + src = TMP_REG1; + } + + if (FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, dst, dstw)); + } + else + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + + FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1))); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + if (op & SLJIT_SINGLE_OP) + return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); + return SLJIT_SUCCESS; + +#else - dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_si invert_sign = 1; + + if (src & SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000)); + src = TMP_REG1; + invert_sign = 0; + } + else if (!FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW)); + src = TMP_REG1; + } + + /* First, a special double floating point value is constructed: (2^53 + (input xor (2^31))) + The double precision format has exactly 53 bit precision, so the lower 32 bit represents + the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000 + to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating + point value, we need to substract 2^53 + 2^31 from the constructed value. */ + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330)); + if (invert_sign) + FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000)); + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI)); + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW)); + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW)); + + FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2))); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + if (op & SLJIT_SINGLE_OP) + return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); + return SLJIT_SUCCESS; + +#endif +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + + return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r; + + CHECK_ERROR(); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) + op ^= SLJIT_SINGLE_OP; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw)); - src = dst_fr; + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); + src = dst_r; } switch (GET_OPCODE(op)) { - case SLJIT_MOVD: - if (src != dst_fr && dst_fr != TMP_FREG1) - FAIL_IF(push_inst(compiler, FMR | FD(dst_fr) | FB(src))); - break; - case SLJIT_NEGD: - FAIL_IF(push_inst(compiler, FNEG | FD(dst_fr) | FB(src))); - break; - case SLJIT_ABSD: - FAIL_IF(push_inst(compiler, FABS | FD(dst_fr) | FB(src))); + case SLJIT_CONVD_FROMS: + op ^= SLJIT_SINGLE_OP; + if (op & SLJIT_SINGLE_OP) { + FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src))); break; + } + /* Fall through. */ + case SLJIT_DMOV: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src))); + else + dst_r = src; + } + break; + case SLJIT_DNEG: + FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src))); + break; + case SLJIT_DABS: + FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src))); + break; } - if (dst_fr == TMP_FREG1) { - if (GET_OPCODE(op) == SLJIT_MOVD) - dst_fr = src; - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0)); - } - + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0)); return SLJIT_SUCCESS; } @@ -1746,15 +1929,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - sljit_si dst_fr, flags = 0; + sljit_si dst_r, flags = 0; CHECK_ERROR(); - check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); compiler->cache_arg = 0; compiler->cache_argw = 0; - dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG2; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; if (src1 & SLJIT_MEM) { if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { @@ -1793,24 +1979,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile src2 = TMP_FREG2; switch (GET_OPCODE(op)) { - case SLJIT_ADDD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_fr) | FA(src1) | FB(src2))); + case SLJIT_DADD: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2))); break; - case SLJIT_SUBD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_fr) | FA(src1) | FB(src2))); + case SLJIT_DSUB: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2))); break; - case SLJIT_MULD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_fr) | FA(src1) | FC(src2) /* FMUL use FC as src2 */)); + case SLJIT_DMUL: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */)); break; - case SLJIT_DIVD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_fr) | FA(src1) | FB(src2))); + case SLJIT_DDIV: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2))); break; } - if (dst_fr == TMP_FREG2) + if (dst_r == TMP_FREG2) FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); return SLJIT_SUCCESS; @@ -1826,7 +2012,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) { CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); /* For UNUSED dst. Uncommon, but possible. */ @@ -1844,7 +2030,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) { CHECK_ERROR(); - check_sljit_emit_fast_return(compiler, src, srcw); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); if (FAST_IS_REG(src)) @@ -1868,7 +2054,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi struct sljit_label *label; CHECK_ERROR_PTR(); - check_sljit_emit_label(compiler); + CHECK_PTR(check_sljit_emit_label(compiler)); if (compiler->last_label && compiler->last_label->size == compiler->size) return compiler->last_label; @@ -1882,58 +2068,58 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi static sljit_ins get_bo_bi_flags(sljit_si type) { switch (type) { - case SLJIT_C_EQUAL: + case SLJIT_EQUAL: return (12 << 21) | (2 << 16); - case SLJIT_C_NOT_EQUAL: + case SLJIT_NOT_EQUAL: return (4 << 21) | (2 << 16); - case SLJIT_C_LESS: - case SLJIT_C_FLOAT_LESS: + case SLJIT_LESS: + case SLJIT_D_LESS: return (12 << 21) | ((4 + 0) << 16); - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL: + case SLJIT_D_GREATER_EQUAL: return (4 << 21) | ((4 + 0) << 16); - case SLJIT_C_GREATER: - case SLJIT_C_FLOAT_GREATER: + case SLJIT_GREATER: + case SLJIT_D_GREATER: return (12 << 21) | ((4 + 1) << 16); - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_LESS_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_D_LESS_EQUAL: return (4 << 21) | ((4 + 1) << 16); - case SLJIT_C_SIG_LESS: + case SLJIT_SIG_LESS: return (12 << 21) | (0 << 16); - case SLJIT_C_SIG_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: return (4 << 21) | (0 << 16); - case SLJIT_C_SIG_GREATER: + case SLJIT_SIG_GREATER: return (12 << 21) | (1 << 16); - case SLJIT_C_SIG_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: return (4 << 21) | (1 << 16); - case SLJIT_C_OVERFLOW: - case SLJIT_C_MUL_OVERFLOW: + case SLJIT_OVERFLOW: + case SLJIT_MUL_OVERFLOW: return (12 << 21) | (3 << 16); - case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_MUL_NOT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + case SLJIT_MUL_NOT_OVERFLOW: return (4 << 21) | (3 << 16); - case SLJIT_C_FLOAT_EQUAL: + case SLJIT_D_EQUAL: return (12 << 21) | ((4 + 2) << 16); - case SLJIT_C_FLOAT_NOT_EQUAL: + case SLJIT_D_NOT_EQUAL: return (4 << 21) | ((4 + 2) << 16); - case SLJIT_C_FLOAT_UNORDERED: + case SLJIT_D_UNORDERED: return (12 << 21) | ((4 + 3) << 16); - case SLJIT_C_FLOAT_ORDERED: + case SLJIT_D_ORDERED: return (4 << 21) | ((4 + 3) << 16); default: @@ -1948,7 +2134,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile sljit_ins bo_bi_flags; CHECK_ERROR_PTR(); - check_sljit_emit_jump(compiler, type); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); bo_bi_flags = get_bo_bi_flags(type & 0xff); if (!bo_bi_flags) @@ -1980,7 +2166,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil sljit_si src_r; CHECK_ERROR(); - check_sljit_emit_ijump(compiler, type, src, srcw); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); if (FAST_IS_REG(src)) { @@ -2035,7 +2221,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com sljit_sw original_dstw = dstw; CHECK_ERROR(); - check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); if (dst == SLJIT_UNUSED) @@ -2058,81 +2244,81 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com srcw = 0; } - switch (type) { - case SLJIT_C_EQUAL: + switch (type & 0xff) { + case SLJIT_EQUAL: GET_CR_BIT(2, reg); break; - case SLJIT_C_NOT_EQUAL: + case SLJIT_NOT_EQUAL: GET_CR_BIT(2, reg); INVERT_BIT(reg); break; - case SLJIT_C_LESS: - case SLJIT_C_FLOAT_LESS: + case SLJIT_LESS: + case SLJIT_D_LESS: GET_CR_BIT(4 + 0, reg); break; - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL: + case SLJIT_D_GREATER_EQUAL: GET_CR_BIT(4 + 0, reg); INVERT_BIT(reg); break; - case SLJIT_C_GREATER: - case SLJIT_C_FLOAT_GREATER: + case SLJIT_GREATER: + case SLJIT_D_GREATER: GET_CR_BIT(4 + 1, reg); break; - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_LESS_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_D_LESS_EQUAL: GET_CR_BIT(4 + 1, reg); INVERT_BIT(reg); break; - case SLJIT_C_SIG_LESS: + case SLJIT_SIG_LESS: GET_CR_BIT(0, reg); break; - case SLJIT_C_SIG_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: GET_CR_BIT(0, reg); INVERT_BIT(reg); break; - case SLJIT_C_SIG_GREATER: + case SLJIT_SIG_GREATER: GET_CR_BIT(1, reg); break; - case SLJIT_C_SIG_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: GET_CR_BIT(1, reg); INVERT_BIT(reg); break; - case SLJIT_C_OVERFLOW: - case SLJIT_C_MUL_OVERFLOW: + case SLJIT_OVERFLOW: + case SLJIT_MUL_OVERFLOW: GET_CR_BIT(3, reg); break; - case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_MUL_NOT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + case SLJIT_MUL_NOT_OVERFLOW: GET_CR_BIT(3, reg); INVERT_BIT(reg); break; - case SLJIT_C_FLOAT_EQUAL: + case SLJIT_D_EQUAL: GET_CR_BIT(4 + 2, reg); break; - case SLJIT_C_FLOAT_NOT_EQUAL: + case SLJIT_D_NOT_EQUAL: GET_CR_BIT(4 + 2, reg); INVERT_BIT(reg); break; - case SLJIT_C_FLOAT_UNORDERED: + case SLJIT_D_UNORDERED: GET_CR_BIT(4 + 3, reg); break; - case SLJIT_C_FLOAT_ORDERED: + case SLJIT_D_ORDERED: GET_CR_BIT(4 + 3, reg); INVERT_BIT(reg); break; @@ -2159,7 +2345,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); } -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0); @@ -2171,7 +2358,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi sljit_si reg; CHECK_ERROR_PTR(); - check_sljit_emit_const(compiler, dst, dstw, init_value); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); ADJUST_LOCAL_OFFSET(dst, dstw); const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); diff --git a/sljit/sljitNativeSPARC_32.c b/sljit/sljitNativeSPARC_32.c index 80479bf..4a2e629 100644 --- a/sljit/sljitNativeSPARC_32.c +++ b/sljit/sljitNativeSPARC_32.c @@ -110,8 +110,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (!(flags & SET_FLAGS)) return SLJIT_SUCCESS; FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(dst) | IMM(31), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, RDY | D(TMP_REG4), DR(TMP_REG4))); - return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_REG4), MOVABLE_INS | SET_FLAGS); + FAIL_IF(push_inst(compiler, RDY | D(TMP_LINK), DR(TMP_LINK))); + return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_LINK), MOVABLE_INS | SET_FLAGS); case SLJIT_AND: return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); diff --git a/sljit/sljitNativeSPARC_common.c b/sljit/sljitNativeSPARC_common.c index d6a1e12..327c426 100644 --- a/sljit/sljitNativeSPARC_common.c +++ b/sljit/sljitNativeSPARC_common.c @@ -83,17 +83,16 @@ static void sparc_cache_flush(sljit_ins *from, sljit_ins *to) } /* TMP_REG2 is not used by getput_arg */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_REG4 (SLJIT_NO_REGISTERS + 4) -#define TMP_LINK (SLJIT_NO_REGISTERS + 5) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_LINK (SLJIT_NUMBER_OF_REGISTERS + 5) #define TMP_FREG1 (0) -#define TMP_FREG2 ((SLJIT_FLOAT_REG6 + 1) << 1) +#define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = { - 0, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 14, 1, 24, 25, 26, 15 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { + 0, 8, 9, 10, 13, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 11, 12, 15 }; /* --------------------------------------------------------------------- */ @@ -128,10 +127,16 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = { #define FCMPS (OPC1(0x2) | OPC3(0x35) | DOP(0x51)) #define FDIVD (OPC1(0x2) | OPC3(0x34) | DOP(0x4e)) #define FDIVS (OPC1(0x2) | OPC3(0x34) | DOP(0x4d)) +#define FDTOI (OPC1(0x2) | OPC3(0x34) | DOP(0xd2)) +#define FDTOS (OPC1(0x2) | OPC3(0x34) | DOP(0xc6)) +#define FITOD (OPC1(0x2) | OPC3(0x34) | DOP(0xc8)) +#define FITOS (OPC1(0x2) | OPC3(0x34) | DOP(0xc4)) #define FMOVS (OPC1(0x2) | OPC3(0x34) | DOP(0x01)) #define FMULD (OPC1(0x2) | OPC3(0x34) | DOP(0x4a)) #define FMULS (OPC1(0x2) | OPC3(0x34) | DOP(0x49)) #define FNEGS (OPC1(0x2) | OPC3(0x34) | DOP(0x05)) +#define FSTOD (OPC1(0x2) | OPC3(0x34) | DOP(0xc9)) +#define FSTOI (OPC1(0x2) | OPC3(0x34) | DOP(0xd1)) #define FSUBD (OPC1(0x2) | OPC3(0x34) | DOP(0x46)) #define FSUBS (OPC1(0x2) | OPC3(0x34) | DOP(0x45)) #define JMPL (OPC1(0x2) | OPC3(0x38)) @@ -278,7 +283,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil struct sljit_const *const_; CHECK_ERROR_PTR(); - check_sljit_generate_code(compiler); + CHECK_PTR(check_sljit_generate_code(compiler)); reverse_buf(compiler); code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); @@ -388,6 +393,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* Separates integer and floating point registers */ #define GPR_REG 0x0f #define DOUBLE_DATA 0x10 +#define SINGLE_DATA 0x12 #define MEM_MASK 0x1f @@ -412,66 +418,54 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #include "sljitNativeSPARC_64.c" #endif -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - - local_size += 23 * sizeof(sljit_sw); - local_size = (local_size + 7) & ~0x7; + local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7; compiler->local_size = local_size; if (local_size <= SIMM_MAX) { - FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_LOCALS_REG) | S1(SLJIT_LOCALS_REG) | IMM(-local_size), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | IMM(-local_size), UNMOVABLE_INS)); } else { FAIL_IF(load_immediate(compiler, TMP_REG1, -local_size)); - FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_LOCALS_REG) | S1(SLJIT_LOCALS_REG) | S2(TMP_REG1), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | S2(TMP_REG1), UNMOVABLE_INS)); } - if (args >= 1) - FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG1) | S1(0) | S2A(24), DR(SLJIT_SAVED_REG1))); - if (args >= 2) - FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG2) | S1(0) | S2A(25), DR(SLJIT_SAVED_REG2))); - if (args >= 3) - FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG3) | S1(0) | S2A(26), DR(SLJIT_SAVED_REG3))); + /* Arguments are in their appropriate registers. */ return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - local_size += 23 * sizeof(sljit_sw); - compiler->local_size = (local_size + 7) & ~0x7; + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7; + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { CHECK_ERROR(); - check_sljit_emit_return(compiler, op, src, srcw); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); if (op != SLJIT_MOV || !FAST_IS_REG(src)) { FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - src = SLJIT_SCRATCH_REG1; + src = SLJIT_R0; } FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS)); - return push_inst(compiler, RESTORE | D(SLJIT_SCRATCH_REG1) | S1(src) | S2(0), UNMOVABLE_INS); + return push_inst(compiler, RESTORE | D(SLJIT_R0) | S1(src) | S2(0), UNMOVABLE_INS); } /* --------------------------------------------------------------------- */ @@ -767,7 +761,7 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si f SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) { CHECK_ERROR(); - check_sljit_emit_op0(compiler, op); + CHECK(check_sljit_emit_op0(compiler, op)); op = GET_OPCODE(op); switch (op) { @@ -775,28 +769,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler return push_inst(compiler, TA, UNMOVABLE_INS); case SLJIT_NOP: return push_inst(compiler, NOP, UNMOVABLE_INS); - case SLJIT_UMUL: - case SLJIT_SMUL: + case SLJIT_LUMUL: + case SLJIT_LSMUL: #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? UMUL : SMUL) | D(SLJIT_SCRATCH_REG1) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG1))); - return push_inst(compiler, RDY | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, (op == SLJIT_LUMUL ? UMUL : SMUL) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); + return push_inst(compiler, RDY | D(SLJIT_R1), DR(SLJIT_R1)); #else #error "Implementation required" #endif - case SLJIT_UDIV: - case SLJIT_SDIV: + case SLJIT_UDIVMOD: + case SLJIT_SDIVMOD: + case SLJIT_UDIVI: + case SLJIT_SDIVI: + SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments); #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - if (op == SLJIT_UDIV) + if ((op | 0x2) == SLJIT_UDIVI) FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS)); else { - FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_SCRATCH_REG1) | IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_R0) | IMM(31), DR(TMP_REG1))); FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS)); } - FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_SCRATCH_REG1), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? UDIV : SDIV) | D(SLJIT_SCRATCH_REG1) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG1))); - FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_SCRATCH_REG2) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, SUB | D(SLJIT_SCRATCH_REG2) | S1(TMP_REG2) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2))); - return SLJIT_SUCCESS; + if (op <= SLJIT_SDIVMOD) + FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_R0), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_UDIVI ? UDIV : SDIV) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); + if (op >= SLJIT_UDIVI) + return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_R1) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R1))); + return push_inst(compiler, SUB | D(SLJIT_R1) | S1(TMP_REG2) | S2(SLJIT_R1), DR(SLJIT_R1)); #else #error "Implementation required" #endif @@ -812,7 +811,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0; CHECK_ERROR(); - check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); @@ -881,7 +880,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0; CHECK_ERROR(); - check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); @@ -917,13 +916,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) { - check_sljit_get_register_index(reg); + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); return reg_map[reg]; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) { - check_sljit_get_float_register_index(reg); + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); return reg << 1; } @@ -931,8 +930,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co void *instruction, sljit_si size) { CHECK_ERROR(); - check_sljit_emit_op_custom(compiler, instruction, size); - SLJIT_ASSERT(size == 4); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS); } @@ -953,73 +951,139 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7)) #define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double) +#define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw)) + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + src = TMP_FREG1; + } + else + src <<= 1; + + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | DA(TMP_FREG1) | S2A(src), MOVABLE_INS)); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) { + FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET); + } + + /* Store the integer value from a VFP register. */ + return emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0); +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + + if (src & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + srcw = (sljit_si)srcw; +#endif + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + srcw = 0; + } + + if (FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + src = SLJIT_MEM1(SLJIT_SP); + srcw = FLOAT_TMP_MEM_OFFSET; + } + + FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FITOS, FITOD) | DA(dst_r) | S2A(TMP_FREG1), MOVABLE_INS)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + else + src1 <<= 1; + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + else + src2 <<= 1; + + return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | S1A(src1) | S2A(src2), FCC_IS_SET | MOVABLE_INS); +} SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_si dst_fr; + sljit_si dst_r; CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); - compiler->cache_arg = 0; compiler->cache_argw = 0; - if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw)); - dst = TMP_FREG1; - } - else - dst <<= 1; - - if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0)); - src = TMP_FREG2; - } - else - src <<= 1; + SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); - return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | S1A(dst) | S2A(src), FCC_IS_SET | MOVABLE_INS); - } + if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) + op ^= SLJIT_SINGLE_OP; - dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw)); - src = dst_fr; + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); + src = dst_r; } else src <<= 1; switch (GET_OPCODE(op)) { - case SLJIT_MOVD: - if (src != dst_fr && dst_fr != TMP_FREG1) { - FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr) | S2A(src), MOVABLE_INS)); + case SLJIT_DMOV: + if (src != dst_r) { + if (dst_r != TMP_FREG1) { + FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r) | S2A(src), MOVABLE_INS)); if (!(op & SLJIT_SINGLE_OP)) - FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS)); } - break; - case SLJIT_NEGD: - FAIL_IF(push_inst(compiler, FNEGS | DA(dst_fr) | S2A(src), MOVABLE_INS)); - if (dst_fr != src && !(op & SLJIT_SINGLE_OP)) - FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS)); - break; - case SLJIT_ABSD: - FAIL_IF(push_inst(compiler, FABSS | DA(dst_fr) | S2A(src), MOVABLE_INS)); - if (dst_fr != src && !(op & SLJIT_SINGLE_OP)) - FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS)); - break; - } - - if (dst_fr == TMP_FREG1) { - if (GET_OPCODE(op) == SLJIT_MOVD) - dst_fr = src; - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0)); + else + dst_r = src; + } + break; + case SLJIT_DNEG: + FAIL_IF(push_inst(compiler, FNEGS | DA(dst_r) | S2A(src), MOVABLE_INS)); + if (dst_r != src && !(op & SLJIT_SINGLE_OP)) + FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS)); + break; + case SLJIT_DABS: + FAIL_IF(push_inst(compiler, FABSS | DA(dst_r) | S2A(src), MOVABLE_INS)); + if (dst_r != src && !(op & SLJIT_SINGLE_OP)) + FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS)); + break; + case SLJIT_CONVD_FROMS: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | DA(dst_r) | S2A(src), MOVABLE_INS)); + op ^= SLJIT_SINGLE_OP; + break; } + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0)); return SLJIT_SUCCESS; } @@ -1028,15 +1092,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - sljit_si dst_fr, flags = 0; + sljit_si dst_r, flags = 0; CHECK_ERROR(); - check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); compiler->cache_arg = 0; compiler->cache_argw = 0; - dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2; + dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2; if (src1 & SLJIT_MEM) { if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { @@ -1079,24 +1146,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile src2 = TMP_FREG2; switch (GET_OPCODE(op)) { - case SLJIT_ADDD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS)); + case SLJIT_DADD: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); break; - case SLJIT_SUBD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS)); + case SLJIT_DSUB: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); break; - case SLJIT_MULD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS)); + case SLJIT_DMUL: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); break; - case SLJIT_DIVD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS)); + case SLJIT_DDIV: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); break; } - if (dst_fr == TMP_FREG2) + if (dst_r == TMP_FREG2) FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); return SLJIT_SUCCESS; @@ -1112,7 +1179,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) { CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); /* For UNUSED dst. Uncommon, but possible. */ @@ -1129,7 +1196,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) { CHECK_ERROR(); - check_sljit_emit_fast_return(compiler, src, srcw); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); if (FAST_IS_REG(src)) @@ -1152,7 +1219,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi struct sljit_label *label; CHECK_ERROR_PTR(); - check_sljit_emit_label(compiler); + CHECK_PTR(check_sljit_emit_label(compiler)); if (compiler->last_label && compiler->last_label->size == compiler->size) return compiler->last_label; @@ -1167,66 +1234,50 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi static sljit_ins get_cc(sljit_si type) { switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_MUL_NOT_OVERFLOW: + case SLJIT_EQUAL: + case SLJIT_MUL_NOT_OVERFLOW: + case SLJIT_D_NOT_EQUAL: /* Unordered. */ return DA(0x1); - case SLJIT_C_NOT_EQUAL: - case SLJIT_C_MUL_OVERFLOW: + case SLJIT_NOT_EQUAL: + case SLJIT_MUL_OVERFLOW: + case SLJIT_D_EQUAL: return DA(0x9); - case SLJIT_C_LESS: + case SLJIT_LESS: + case SLJIT_D_GREATER: /* Unordered. */ return DA(0x5); - case SLJIT_C_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL: + case SLJIT_D_LESS_EQUAL: return DA(0xd); - case SLJIT_C_GREATER: + case SLJIT_GREATER: + case SLJIT_D_GREATER_EQUAL: /* Unordered. */ return DA(0xc); - case SLJIT_C_LESS_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_D_LESS: return DA(0x4); - case SLJIT_C_SIG_LESS: + case SLJIT_SIG_LESS: return DA(0x3); - case SLJIT_C_SIG_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: return DA(0xb); - case SLJIT_C_SIG_GREATER: + case SLJIT_SIG_GREATER: return DA(0xa); - case SLJIT_C_SIG_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: return DA(0x2); - case SLJIT_C_OVERFLOW: + case SLJIT_OVERFLOW: + case SLJIT_D_UNORDERED: return DA(0x7); - case SLJIT_C_NOT_OVERFLOW: - return DA(0xf); - - case SLJIT_C_FLOAT_EQUAL: - return DA(0x9); - - case SLJIT_C_FLOAT_NOT_EQUAL: /* Unordered. */ - return DA(0x1); - - case SLJIT_C_FLOAT_LESS: - return DA(0x4); - - case SLJIT_C_FLOAT_GREATER_EQUAL: /* Unordered. */ - return DA(0xc); - - case SLJIT_C_FLOAT_LESS_EQUAL: - return DA(0xd); - - case SLJIT_C_FLOAT_GREATER: /* Unordered. */ - return DA(0x5); - - case SLJIT_C_FLOAT_UNORDERED: - return DA(0x7); - - case SLJIT_C_FLOAT_ORDERED: + case SLJIT_NOT_OVERFLOW: + case SLJIT_D_ORDERED: return DA(0xf); default: @@ -1240,14 +1291,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile struct sljit_jump *jump; CHECK_ERROR_PTR(); - check_sljit_emit_jump(compiler, type); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF(!jump); set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); type &= 0xff; - if (type < SLJIT_C_FLOAT_EQUAL) { + if (type < SLJIT_D_EQUAL) { jump->flags |= IS_COND; if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & ICC_IS_SET)) jump->flags |= IS_MOVABLE; @@ -1287,7 +1338,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil sljit_si src_r; CHECK_ERROR(); - check_sljit_emit_ijump(compiler, type, src, srcw); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); if (FAST_IS_REG(src)) @@ -1324,7 +1375,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com sljit_si reg, flags = (GET_FLAGS(op) ? SET_FLAGS : 0); CHECK_ERROR(); - check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); if (dst == SLJIT_UNUSED) @@ -1343,7 +1394,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com srcw = 0; } - if (type < SLJIT_C_FLOAT_EQUAL) + type &= 0xff; + if (type < SLJIT_D_EQUAL) FAIL_IF(push_inst(compiler, BICC | get_cc(type) | 3, UNMOVABLE_INS)); else FAIL_IF(push_inst(compiler, FBFCC | get_cc(type) | 3, UNMOVABLE_INS)); @@ -1366,7 +1418,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi struct sljit_const *const_; CHECK_ERROR_PTR(); - check_sljit_emit_const(compiler, dst, dstw, init_value); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); ADJUST_LOCAL_OFFSET(dst, dstw); const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); diff --git a/sljit/sljitNativeTILEGX_64.c b/sljit/sljitNativeTILEGX_64.c index d0b392e..4d40392 100644 --- a/sljit/sljitNativeTILEGX_64.c +++ b/sljit/sljitNativeTILEGX_64.c @@ -35,21 +35,21 @@ #define SIMM_16BIT_MIN (-0x8000) #define SIMM_17BIT_MAX (0xffff) #define SIMM_17BIT_MIN (-0x10000) -#define SIMM_32BIT_MIN (-0x80000000) #define SIMM_32BIT_MAX (0x7fffffff) -#define SIMM_48BIT_MIN (0x800000000000L) +#define SIMM_32BIT_MIN (-0x7fffffff - 1) #define SIMM_48BIT_MAX (0x7fffffff0000L) +#define SIMM_48BIT_MIN (-0x800000000000L) #define IMM16(imm) ((imm) & 0xffff) #define UIMM_16BIT_MAX (0xffff) -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define ADDR_TMP (SLJIT_NO_REGISTERS + 4) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define ADDR_TMP (SLJIT_NUMBER_OF_REGISTERS + 5) #define PIC_ADDR_REG TMP_REG2 -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { 63, 0, 1, 2, 3, 4, 30, 31, 32, 33, 34, 54, 5, 16, 6, 7 }; @@ -58,11 +58,6 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { #define TMP_REG2_mapped 16 #define TMP_REG3_mapped 6 #define ADDR_TMP_mapped 7 -#define SLJIT_SAVED_REG1_mapped 30 -#define SLJIT_SAVED_REG2_mapped 31 -#define SLJIT_SAVED_REG3_mapped 32 -#define SLJIT_SAVED_EREG1_mapped 33 -#define SLJIT_SAVED_EREG2_mapped 34 /* Flags are keept in volatile registers. */ #define EQUAL_FLAG 8 @@ -399,6 +394,9 @@ static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins) #define SUB(dst, srca, srcb) \ push_3_buffer(compiler, TILEGX_OPC_SUB, dst, srca, srcb, __LINE__) +#define MUL(dst, srca, srcb) \ + push_3_buffer(compiler, TILEGX_OPC_MULX, dst, srca, srcb, __LINE__) + #define NOR(dst, srca, srcb) \ push_3_buffer(compiler, TILEGX_OPC_NOR, dst, srca, srcb, __LINE__) @@ -547,8 +545,8 @@ const struct Format* compute_format() const struct Format* match = NULL; const struct Format *b = NULL; - unsigned int i = 0; - for (i; i < sizeof formats / sizeof formats[0]; i++) { + unsigned int i; + for (i = 0; i < sizeof formats / sizeof formats[0]; i++) { b = &formats[i]; if ((b->pipe_mask & compatible_pipes) == b->pipe_mask) { match = b; @@ -625,7 +623,6 @@ tilegx_bundle_bits get_bundle_bit(struct jit_instr *inst) static sljit_si update_buffer(struct sljit_compiler *compiler) { - int count; int i; int orig_index = inst_buf_index; struct jit_instr inst0 = inst_buf[0]; @@ -738,8 +735,10 @@ static sljit_si update_buffer(struct sljit_compiler *compiler) static sljit_si flush_buffer(struct sljit_compiler *compiler) { - while (inst_buf_index != 0) - update_buffer(compiler); + while (inst_buf_index != 0) { + FAIL_IF(update_buffer(compiler)); + } + return SLJIT_SUCCESS; } static sljit_si push_4_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int op3, int line) @@ -787,6 +786,7 @@ static sljit_si push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic o case TILEGX_OPC_ADD: case TILEGX_OPC_AND: case TILEGX_OPC_SUB: + case TILEGX_OPC_MULX: case TILEGX_OPC_OR: case TILEGX_OPC_XOR: case TILEGX_OPC_NOR: @@ -905,7 +905,6 @@ static SLJIT_INLINE sljit_ins * detect_jump_type(struct sljit_jump *jump, sljit_ sljit_sw diff; sljit_uw target_addr; sljit_ins *inst; - sljit_ins saved_inst; if (jump->flags & SLJIT_REWRITABLE_JUMP) return code_ptr; @@ -1009,7 +1008,7 @@ SLJIT_API_FUNC_ATTRIBUTE void * sljit_generate_code(struct sljit_compiler *compi struct sljit_const *const_; CHECK_ERROR_PTR(); - check_sljit_generate_code(compiler); + CHECK_PTR(check_sljit_generate_code(compiler)); reverse_buf(compiler); code = (sljit_ins *)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); @@ -1173,21 +1172,18 @@ static sljit_si emit_const_64(struct sljit_compiler *compiler, sljit_si dst_ar, return SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { sljit_ins base; - sljit_ins bundle = 0; - + sljit_si i, tmp; + CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - local_size += (saveds + 1) * sizeof(sljit_sw); + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); local_size = (local_size + 7) & ~7; compiler->local_size = local_size; @@ -1203,59 +1199,52 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil local_size = 0; } + /* Save the return address. */ FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8)); FAIL_IF(ST_ADD(ADDR_TMP_mapped, RA, -8)); - if (saveds >= 1) - FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG1_mapped, -8)); - - if (saveds >= 2) - FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG2_mapped, -8)); - - if (saveds >= 3) - FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG3_mapped, -8)); - - if (saveds >= 4) - FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG1_mapped, -8)); - - if (saveds >= 5) - FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG2_mapped, -8)); - - if (args >= 1) - FAIL_IF(ADD(SLJIT_SAVED_REG1_mapped, 0, ZERO)); + /* Save the S registers. */ + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + FAIL_IF(ST_ADD(ADDR_TMP_mapped, reg_map[i], -8)); + } - if (args >= 2) - FAIL_IF(ADD(SLJIT_SAVED_REG2_mapped, 1, ZERO)); + /* Save the R registers that need to be reserved. */ + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + FAIL_IF(ST_ADD(ADDR_TMP_mapped, reg_map[i], -8)); + } - if (args >= 3) - FAIL_IF(ADD(SLJIT_SAVED_REG3_mapped, 2, ZERO)); + /* Move the arguments to S registers. */ + for (i = 0; i < args; i++) { + FAIL_IF(ADD(reg_map[SLJIT_S0 - i], i, ZERO)); + } return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - local_size += (saveds + 1) * sizeof(sljit_sw); + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); compiler->local_size = (local_size + 7) & ~7; + + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { sljit_si local_size; sljit_ins base; - int addr_initialized = 0; + sljit_si i, tmp; + sljit_si saveds; CHECK_ERROR(); - check_sljit_emit_return(compiler, op, src, srcw); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); @@ -1269,50 +1258,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi local_size = 0; } + /* Restore the return address. */ FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8)); - FAIL_IF(LD(RA, ADDR_TMP_mapped)); - - if (compiler->saveds >= 5) { - FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 48)); - addr_initialized = 1; - - FAIL_IF(LD_ADD(SLJIT_SAVED_EREG2_mapped, ADDR_TMP_mapped, 8)); - } - - if (compiler->saveds >= 4) { - if (addr_initialized == 0) { - FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 40)); - addr_initialized = 1; - } + FAIL_IF(LD_ADD(RA, ADDR_TMP_mapped, -8)); - FAIL_IF(LD_ADD(SLJIT_SAVED_EREG1_mapped, ADDR_TMP_mapped, 8)); + /* Restore the S registers. */ + saveds = compiler->saveds; + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + FAIL_IF(LD_ADD(reg_map[i], ADDR_TMP_mapped, -8)); } - if (compiler->saveds >= 3) { - if (addr_initialized == 0) { - FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 32)); - addr_initialized = 1; - } - - FAIL_IF(LD_ADD(SLJIT_SAVED_REG3_mapped, ADDR_TMP_mapped, 8)); - } - - if (compiler->saveds >= 2) { - if (addr_initialized == 0) { - FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 24)); - addr_initialized = 1; - } - - FAIL_IF(LD_ADD(SLJIT_SAVED_REG2_mapped, ADDR_TMP_mapped, 8)); - } - - if (compiler->saveds >= 1) { - if (addr_initialized == 0) { - FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 16)); - /* addr_initialized = 1; no need to initialize as it's the last one. */ - } - - FAIL_IF(LD_ADD(SLJIT_SAVED_REG1_mapped, ADDR_TMP_mapped, 8)); + /* Restore the R registers that need to be reserved. */ + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + FAIL_IF(LD_ADD(reg_map[i], ADDR_TMP_mapped, -8)); } if (compiler->local_size <= SIMM_16BIT_MAX) @@ -1591,7 +1550,7 @@ static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) { CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); /* For UNUSED dst. Uncommon, but possible. */ @@ -1608,7 +1567,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) { CHECK_ERROR(); - check_sljit_emit_fast_return(compiler, src, srcw); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); if (FAST_IS_REG(src)) @@ -1642,9 +1601,11 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (op == SLJIT_MOV_SI) return BFEXTS(reg_map[dst], reg_map[src2], 0, 31); - return BFEXTU(reg_map[dst], reg_map[src2], 0, 31); - } else if (dst != src2) - SLJIT_ASSERT_STOP(); + return BFEXTU(reg_map[dst], reg_map[src2], 0, 31); + } else if (dst != src2) { + SLJIT_ASSERT(src2 == 0); + return ADD(reg_map[dst], reg_map[src2], ZERO); + } return SLJIT_SUCCESS; @@ -1656,8 +1617,10 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj return BFEXTS(reg_map[dst], reg_map[src2], 0, 7); return BFEXTU(reg_map[dst], reg_map[src2], 0, 7); - } else if (dst != src2) - SLJIT_ASSERT_STOP(); + } else if (dst != src2) { + SLJIT_ASSERT(src2 == 0); + return ADD(reg_map[dst], reg_map[src2], ZERO); + } return SLJIT_SUCCESS; @@ -1669,8 +1632,10 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj return BFEXTS(reg_map[dst], reg_map[src2], 0, 15); return BFEXTU(reg_map[dst], reg_map[src2], 0, 15); - } else if (dst != src2) - SLJIT_ASSERT_STOP(); + } else if (dst != src2) { + SLJIT_ASSERT(src2 == 0); + return ADD(reg_map[dst], reg_map[src2], ZERO); + } return SLJIT_SUCCESS; @@ -1817,7 +1782,6 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj else { /* Rare ocasion. */ FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO)); - overflow_ra = TMP_EREG2; } } @@ -1909,6 +1873,17 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj return SLJIT_SUCCESS; + case SLJIT_MUL: + if (flags & SRC2_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG2_mapped, src2)); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + FAIL_IF(MUL(reg_map[dst], reg_map[src1], reg_map[src2])); + + return SLJIT_SUCCESS; + #define EMIT_LOGICAL(op_imm, op_norm) \ if (flags & SRC2_IMM) { \ FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); \ @@ -1956,8 +1931,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj } else { \ if (op & SLJIT_SET_E) \ FAIL_IF(push_3_buffer( \ - compiler, op_imm, reg_map[dst], reg_map[src1], \ - src2 & 0x3F, __LINE__)); \ + compiler, op_norm, EQUAL_FLAG, reg_map[src1], \ + reg_map[src2], __LINE__)); \ if (CHECK_FLAGS(SLJIT_SET_E)) \ FAIL_IF(push_3_buffer( \ compiler, op_norm, reg_map[dst], reg_map[src1], \ @@ -2111,66 +2086,61 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com { sljit_si sugg_dst_ar, dst_ar; sljit_si flags = GET_ALL_FLAGS(op); + sljit_si mem_type = (op & SLJIT_INT_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; CHECK_ERROR(); - check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); if (dst == SLJIT_UNUSED) return SLJIT_SUCCESS; op = GET_OPCODE(op); + if (op == SLJIT_MOV_SI || op == SLJIT_MOV_UI) + mem_type = INT_DATA | SIGNED_DATA; sugg_dst_ar = reg_map[(op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2]; compiler->cache_arg = 0; compiler->cache_argw = 0; if (op >= SLJIT_ADD && (src & SLJIT_MEM)) { ADJUST_LOCAL_OFFSET(src, srcw); - FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw)); + FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw)); src = TMP_REG1; srcw = 0; } - switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_NOT_EQUAL: + switch (type & 0xff) { + case SLJIT_EQUAL: + case SLJIT_NOT_EQUAL: FAIL_IF(CMPLTUI(sugg_dst_ar, EQUAL_FLAG, 1)); dst_ar = sugg_dst_ar; break; - case SLJIT_C_LESS: - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_LESS: - case SLJIT_C_FLOAT_GREATER_EQUAL: + case SLJIT_LESS: + case SLJIT_GREATER_EQUAL: dst_ar = ULESS_FLAG; break; - case SLJIT_C_GREATER: - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_GREATER: - case SLJIT_C_FLOAT_LESS_EQUAL: + case SLJIT_GREATER: + case SLJIT_LESS_EQUAL: dst_ar = UGREATER_FLAG; break; - case SLJIT_C_SIG_LESS: - case SLJIT_C_SIG_GREATER_EQUAL: + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER_EQUAL: dst_ar = LESS_FLAG; break; - case SLJIT_C_SIG_GREATER: - case SLJIT_C_SIG_LESS_EQUAL: + case SLJIT_SIG_GREATER: + case SLJIT_SIG_LESS_EQUAL: dst_ar = GREATER_FLAG; break; - case SLJIT_C_OVERFLOW: - case SLJIT_C_NOT_OVERFLOW: + case SLJIT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: dst_ar = OVERFLOW_FLAG; break; - case SLJIT_C_MUL_OVERFLOW: - case SLJIT_C_MUL_NOT_OVERFLOW: + case SLJIT_MUL_OVERFLOW: + case SLJIT_MUL_NOT_OVERFLOW: FAIL_IF(CMPLTUI(sugg_dst_ar, OVERFLOW_FLAG, 1)); dst_ar = sugg_dst_ar; type ^= 0x1; /* Flip type bit for the XORI below. */ break; - case SLJIT_C_FLOAT_EQUAL: - case SLJIT_C_FLOAT_NOT_EQUAL: - dst_ar = EQUAL_FLAG; - break; default: SLJIT_ASSERT_STOP(); @@ -2186,11 +2156,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com if (op >= SLJIT_ADD) { if (TMP_REG2_mapped != dst_ar) FAIL_IF(ADD(TMP_REG2_mapped, dst_ar, ZERO)); - return emit_op(compiler, op | flags, CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0); + return emit_op(compiler, op | flags, mem_type | CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0); } if (dst & SLJIT_MEM) - return emit_op_mem(compiler, WORD_DATA, dst_ar, dst, dstw); + return emit_op_mem(compiler, mem_type, dst_ar, dst, dstw); if (sugg_dst_ar != dst_ar) return ADD(sugg_dst_ar, dst_ar, ZERO); @@ -2200,7 +2170,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) { CHECK_ERROR(); - check_sljit_emit_op0(compiler, op); + CHECK(check_sljit_emit_op0(compiler, op)); op = GET_OPCODE(op); switch (op) { @@ -2210,10 +2180,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler case SLJIT_BREAKPOINT: return PI(BPT); - case SLJIT_UMUL: - case SLJIT_SMUL: - case SLJIT_UDIV: - case SLJIT_SDIV: + case SLJIT_LUMUL: + case SLJIT_LSMUL: + case SLJIT_UDIVI: + case SLJIT_SDIVI: SLJIT_ASSERT_STOP(); } @@ -2223,7 +2193,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { CHECK_ERROR(); - check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); @@ -2279,7 +2249,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); case SLJIT_CLZ: - return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, op, (op & SLJIT_INT_OP) ? INT_DATA : WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); } return SLJIT_SUCCESS; @@ -2288,7 +2258,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { CHECK_ERROR(); - check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); @@ -2331,7 +2301,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_comp flush_buffer(compiler); CHECK_ERROR_PTR(); - check_sljit_emit_label(compiler); + CHECK_PTR(check_sljit_emit_label(compiler)); if (compiler->last_label && compiler->last_label->size == compiler->size) return compiler->last_label; @@ -2350,7 +2320,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil flush_buffer(compiler); CHECK_ERROR(); - check_sljit_emit_ijump(compiler, type, src, srcw); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); if (FAST_IS_REG(src)) { @@ -2370,7 +2340,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); } - FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO)); + FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); FAIL_IF(ADDI_SOLO(54, 54, -16)); @@ -2381,7 +2351,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil /* Register input. */ if (type >= SLJIT_CALL1) - FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO)); + FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); FAIL_IF(ADD_SOLO(reg_map[PIC_ADDR_REG], reg_map[src_r], ZERO)); @@ -2410,8 +2380,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil return SLJIT_SUCCESS; - } else if (src & SLJIT_MEM) + } else if (src & SLJIT_MEM) { FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); + flush_buffer(compiler); + } FAIL_IF(JR_SOLO(reg_map[src_r])); @@ -2438,7 +2410,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compil flush_buffer(compiler); CHECK_ERROR_PTR(); - check_sljit_emit_jump(compiler, type); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF(!jump); @@ -2446,48 +2418,42 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compil type &= 0xff; switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_FLOAT_NOT_EQUAL: + case SLJIT_EQUAL: BR_NZ(EQUAL_FLAG); break; - case SLJIT_C_NOT_EQUAL: - case SLJIT_C_FLOAT_EQUAL: + case SLJIT_NOT_EQUAL: BR_Z(EQUAL_FLAG); break; - case SLJIT_C_LESS: - case SLJIT_C_FLOAT_LESS: + case SLJIT_LESS: BR_Z(ULESS_FLAG); break; - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL: BR_NZ(ULESS_FLAG); break; - case SLJIT_C_GREATER: - case SLJIT_C_FLOAT_GREATER: + case SLJIT_GREATER: BR_Z(UGREATER_FLAG); break; - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_LESS_EQUAL: + case SLJIT_LESS_EQUAL: BR_NZ(UGREATER_FLAG); break; - case SLJIT_C_SIG_LESS: + case SLJIT_SIG_LESS: BR_Z(LESS_FLAG); break; - case SLJIT_C_SIG_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: BR_NZ(LESS_FLAG); break; - case SLJIT_C_SIG_GREATER: + case SLJIT_SIG_GREATER: BR_Z(GREATER_FLAG); break; - case SLJIT_C_SIG_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: BR_NZ(GREATER_FLAG); break; - case SLJIT_C_OVERFLOW: - case SLJIT_C_MUL_OVERFLOW: + case SLJIT_OVERFLOW: + case SLJIT_MUL_OVERFLOW: BR_Z(OVERFLOW_FLAG); break; - case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_MUL_NOT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + case SLJIT_MUL_NOT_OVERFLOW: BR_NZ(OVERFLOW_FLAG); break; default: @@ -2511,7 +2477,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compil SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2); /* Cannot be optimized out if type is >= CALL0. */ jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0); - PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO)); + PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); jump->addr = compiler->size; PTR_FAIL_IF(JALR_SOLO(TMP_REG2_mapped)); } @@ -2542,7 +2508,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_comp flush_buffer(compiler); CHECK_ERROR_PTR(); - check_sljit_emit_const(compiler, dst, dstw, init_value); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); ADJUST_LOCAL_OFFSET(dst, dstw); const_ = (struct sljit_const *)ensure_abuf(compiler, sizeof(struct sljit_const)); @@ -2578,3 +2544,18 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta inst[3] = (inst[3] & ~(0xFFFFL << 43)) | ((new_constant & 0xFFFFL) << 43); SLJIT_CACHE_FLUSH(inst, inst + 4); } + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_si size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + return SLJIT_ERR_UNSUPPORTED; +} + diff --git a/sljit/sljitNativeX86_32.c b/sljit/sljitNativeX86_32.c index dd03f26..d7129c8 100644 --- a/sljit/sljitNativeX86_32.c +++ b/sljit/sljitNativeX86_32.c @@ -63,27 +63,25 @@ static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ return code_ptr; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { sljit_si size; - sljit_si locals_offset; sljit_ub *inst; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - compiler->scratches = scratches; - compiler->saveds = saveds; compiler->args = args; compiler->flags_saved = 0; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif + size = 1 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0); + size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0); #else - size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (2 + args * 3) : 0); + size += (args > 0 ? (2 + args * 3) : 0); #endif inst = (sljit_ub*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); @@ -96,76 +94,85 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil *inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */; } #endif - if (saveds > 2) - PUSH_REG(reg_map[SLJIT_SAVED_REG3]); - if (saveds > 1) - PUSH_REG(reg_map[SLJIT_SAVED_REG2]); - if (saveds > 0) - PUSH_REG(reg_map[SLJIT_SAVED_REG1]); + if (saveds > 2 || scratches > 7) + PUSH_REG(reg_map[SLJIT_S2]); + if (saveds > 1 || scratches > 8) + PUSH_REG(reg_map[SLJIT_S1]); + if (saveds > 0 || scratches > 9) + PUSH_REG(reg_map[SLJIT_S0]); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (args > 0) { *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[SLJIT_SCRATCH_REG3]; + *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2]; } if (args > 1) { *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[SLJIT_SCRATCH_REG2]; + *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1]; } if (args > 2) { *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x4 /* esp */; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */; *inst++ = 0x24; *inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */ } #else if (args > 0) { *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[TMP_REG1]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1]; *inst++ = sizeof(sljit_sw) * 2; } if (args > 1) { *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[TMP_REG1]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1]; *inst++ = sizeof(sljit_sw) * 3; } if (args > 2) { *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG3] << 3) | reg_map[TMP_REG1]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1]; *inst++ = sizeof(sljit_sw) * 4; } #endif -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - locals_offset = 2 * sizeof(sljit_uw); -#else - SLJIT_COMPILE_ASSERT(FIXED_LOCALS_OFFSET >= 2 * sizeof(sljit_uw), require_at_least_two_words); - locals_offset = FIXED_LOCALS_OFFSET; -#endif - compiler->scratches_start = locals_offset; - if (scratches > 3) - locals_offset += (scratches - 3) * sizeof(sljit_uw); - compiler->saveds_start = locals_offset; - if (saveds > 3) - locals_offset += (saveds - 3) * sizeof(sljit_uw); - compiler->locals_offset = locals_offset; + SLJIT_COMPILE_ASSERT(SLJIT_LOCALS_OFFSET >= (2 + 4) * sizeof(sljit_uw), require_at_least_two_words); #if defined(__APPLE__) - saveds = (2 + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); - local_size = ((locals_offset + saveds + local_size + 15) & ~15) - saveds; + /* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */ + saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); + local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; #else - local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1)); + if (options & SLJIT_DOUBLE_ALIGNMENT) { + local_size = SLJIT_LOCALS_OFFSET + ((local_size + 7) & ~7); + + inst = (sljit_ub*)ensure_buf(compiler, 1 + 17); + FAIL_IF(!inst); + + INC_SIZE(17); + inst[0] = MOV_r_rm; + inst[1] = MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[SLJIT_SP]; + inst[2] = GROUP_F7; + inst[3] = MOD_REG | (0 << 3) | reg_map[SLJIT_SP]; + *(sljit_sw*)(inst + 4) = 0x4; + inst[8] = JNE_i8; + inst[9] = 6; + inst[10] = GROUP_BINARY_81; + inst[11] = MOD_REG | (5 << 3) | reg_map[SLJIT_SP]; + *(sljit_sw*)(inst + 12) = 0x4; + inst[16] = PUSH_r + reg_map[TMP_REG1]; + } + else + local_size = SLJIT_LOCALS_OFFSET + ((local_size + 3) & ~3); #endif compiler->local_size = local_size; #ifdef _WIN32 if (local_size > 1024) { #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_SCRATCH_REG1], local_size)); + FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size)); #else - local_size -= FIXED_LOCALS_OFFSET; - FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_SCRATCH_REG1], local_size)); + local_size -= SLJIT_LOCALS_OFFSET; + FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size)); FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, - SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, FIXED_LOCALS_OFFSET)); + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, SLJIT_LOCALS_OFFSET)); #endif FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); } @@ -173,41 +180,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil SLJIT_ASSERT(local_size > 0); return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, - SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size); + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - sljit_si locals_offset; - - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - compiler->scratches = scratches; - compiler->saveds = saveds; compiler->args = args; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - locals_offset = 2 * sizeof(sljit_uw); -#else - locals_offset = FIXED_LOCALS_OFFSET; -#endif - compiler->scratches_start = locals_offset; - if (scratches > 3) - locals_offset += (scratches - 3) * sizeof(sljit_uw); - compiler->saveds_start = locals_offset; - if (saveds > 3) - locals_offset += (saveds - 3) * sizeof(sljit_uw); - compiler->locals_offset = locals_offset; #if defined(__APPLE__) - saveds = (2 + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); - compiler->local_size = ((locals_offset + saveds + local_size + 15) & ~15) - saveds; + saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); + compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; #else - compiler->local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1)); + if (options & SLJIT_DOUBLE_ALIGNMENT) + compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + 7) & ~7); + else + compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + 3) & ~3); #endif + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) @@ -216,7 +211,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi sljit_ub *inst; CHECK_ERROR(); - check_sljit_emit_return(compiler, op, src, srcw); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); SLJIT_ASSERT(compiler->args >= 0); compiler->flags_saved = 0; @@ -224,9 +219,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi SLJIT_ASSERT(compiler->local_size > 0); FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, - SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size)); + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); + +#if !defined(__APPLE__) + if (compiler->options & SLJIT_DOUBLE_ALIGNMENT) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); + + INC_SIZE(3); + inst[0] = MOV_r_rm; + inst[1] = (reg_map[SLJIT_SP] << 3) | 0x4 /* SIB */; + inst[2] = (4 << 3) | reg_map[SLJIT_SP]; + } +#endif - size = 2 + (compiler->saveds <= 3 ? compiler->saveds : 3); + size = 2 + (compiler->scratches > 7 ? (compiler->scratches - 7) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (compiler->args > 2) size += 2; @@ -239,12 +247,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi INC_SIZE(size); - if (compiler->saveds > 0) - POP_REG(reg_map[SLJIT_SAVED_REG1]); - if (compiler->saveds > 1) - POP_REG(reg_map[SLJIT_SAVED_REG2]); - if (compiler->saveds > 2) - POP_REG(reg_map[SLJIT_SAVED_REG3]); + if (compiler->saveds > 0 || compiler->scratches > 9) + POP_REG(reg_map[SLJIT_S0]); + if (compiler->saveds > 1 || compiler->scratches > 8) + POP_REG(reg_map[SLJIT_S1]); + if (compiler->saveds > 2 || compiler->scratches > 7) + POP_REG(reg_map[SLJIT_S2]); POP_REG(reg_map[TMP_REG1]); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (compiler->args > 2) @@ -280,21 +288,17 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); /* Both size flags cannot be switched on. */ SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); -#if (defined SLJIT_SSE2 && SLJIT_SSE2) /* SSE2 and immediate is not possible. */ SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); -#endif size &= 0xf; inst_size = size; -#if (defined SLJIT_SSE2 && SLJIT_SSE2) if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) inst_size++; -#endif if (flags & EX86_PREF_66) inst_size++; @@ -311,8 +315,8 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si inst_size += sizeof(sljit_sw); } - if ((b & REG_MASK) == SLJIT_LOCALS_REG && !(b & OFFS_REG_MASK)) - b |= TO_OFFS_REG(SLJIT_LOCALS_REG); + if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK)) + b |= TO_OFFS_REG(SLJIT_SP); if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) inst_size += 1; /* SIB byte. */ @@ -348,12 +352,10 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si /* Encoding the byte. */ INC_SIZE(inst_size); -#if (defined SLJIT_SSE2 && SLJIT_SSE2) if (flags & EX86_PREF_F2) *inst++ = 0xf2; if (flags & EX86_PREF_F3) *inst++ = 0xf3; -#endif if (flags & EX86_PREF_66) *inst++ = 0x66; @@ -366,15 +368,10 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si if ((a & SLJIT_IMM) || (a == 0)) *buf_ptr = 0; -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - else if (!(flags & EX86_SSE2)) + else if (!(flags & EX86_SSE2_OP1)) *buf_ptr = reg_map[a] << 3; else *buf_ptr = a << 3; -#else - else - *buf_ptr = reg_map[a] << 3; -#endif } else { if (a & SLJIT_IMM) { @@ -388,13 +385,9 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si } if (!(b & SLJIT_MEM)) -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2)) ? reg_map[b] : b); -#else - *buf_ptr++ |= MOD_REG + reg_map[b]; -#endif + *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b); else if ((b & REG_MASK) != SLJIT_UNUSED) { - if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG)) { + if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { if (immb != 0) { if (immb <= 127 && immb >= -128) *buf_ptr |= 0x40; @@ -455,28 +448,28 @@ static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, slj INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2); if (type >= SLJIT_CALL3) - PUSH_REG(reg_map[SLJIT_SCRATCH_REG3]); + PUSH_REG(reg_map[SLJIT_R2]); *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SCRATCH_REG3] << 3) | reg_map[SLJIT_SCRATCH_REG1]; + *inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]; #else inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0)); FAIL_IF(!inst); INC_SIZE(4 * (type - SLJIT_CALL0)); *inst++ = MOV_rm_r; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG1] << 3) | 0x4 /* SIB */; - *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_R0] << 3) | 0x4 /* SIB */; + *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP]; *inst++ = 0; if (type >= SLJIT_CALL2) { *inst++ = MOV_rm_r; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG2] << 3) | 0x4 /* SIB */; - *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_R1] << 3) | 0x4 /* SIB */; + *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP]; *inst++ = sizeof(sljit_sw); } if (type >= SLJIT_CALL3) { *inst++ = MOV_rm_r; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG3] << 3) | 0x4 /* SIB */; - *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_R2] << 3) | 0x4 /* SIB */; + *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP]; *inst++ = 2 * sizeof(sljit_sw); } #endif @@ -488,7 +481,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c sljit_ub *inst; CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); CHECK_EXTRA_REGS(dst, dstw, (void)0); @@ -519,7 +512,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * sljit_ub *inst; CHECK_ERROR(); - check_sljit_emit_fast_return(compiler, src, srcw); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); CHECK_EXTRA_REGS(src, srcw, (void)0); diff --git a/sljit/sljitNativeX86_64.c b/sljit/sljitNativeX86_64.c index 967f3c3..1790d8a 100644 --- a/sljit/sljitNativeX86_64.c +++ b/sljit/sljitNativeX86_64.c @@ -87,118 +87,88 @@ static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si return code_ptr; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - sljit_si size, pushed_size; + sljit_si i, tmp, size, saved_register_size; sljit_ub *inst; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - compiler->scratches = scratches; - compiler->saveds = saveds; compiler->flags_saved = 0; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif - size = saveds; /* Including the return address saved by the call instruction. */ - pushed_size = (saveds + 1) * sizeof(sljit_sw); -#ifndef _WIN64 - if (saveds >= 2) - size += saveds - 1; -#else - if (saveds >= 4) - size += saveds - 3; - if (scratches >= 5) { - size += (5 - 4) * 2; - pushed_size += sizeof(sljit_sw); - } -#endif - size += args * 3; - if (size > 0) { + saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + size = reg_map[i] >= 8 ? 2 : 1; inst = (sljit_ub*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); - INC_SIZE(size); - if (saveds >= 5) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG2] >= 8, saved_ereg2_is_hireg); + if (reg_map[i] >= 8) *inst++ = REX_B; - PUSH_REG(reg_lmap[SLJIT_SAVED_EREG2]); - } - if (saveds >= 4) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG1] >= 8, saved_ereg1_is_hireg); - *inst++ = REX_B; - PUSH_REG(reg_lmap[SLJIT_SAVED_EREG1]); - } - if (saveds >= 3) { -#ifndef _WIN64 - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] >= 8, saved_reg3_is_hireg); - *inst++ = REX_B; -#else - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] < 8, saved_reg3_is_loreg); -#endif - PUSH_REG(reg_lmap[SLJIT_SAVED_REG3]); - } - if (saveds >= 2) { -#ifndef _WIN64 - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] >= 8, saved_reg2_is_hireg); - *inst++ = REX_B; -#else - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] < 8, saved_reg2_is_loreg); -#endif - PUSH_REG(reg_lmap[SLJIT_SAVED_REG2]); - } - if (saveds >= 1) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG1] < 8, saved_reg1_is_loreg); - PUSH_REG(reg_lmap[SLJIT_SAVED_REG1]); - } -#ifdef _WIN64 - if (scratches >= 5) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_EREG2] >= 8, temporary_ereg2_is_hireg); + PUSH_REG(reg_lmap[i]); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) *inst++ = REX_B; - PUSH_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]); - } -#endif + PUSH_REG(reg_lmap[i]); + } + + if (args > 0) { + size = args * 3; + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + + INC_SIZE(size); #ifndef _WIN64 if (args > 0) { *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x7 /* rdi */; + *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */; } if (args > 1) { *inst++ = REX_W | REX_R; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG2] << 3) | 0x6 /* rsi */; + *inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */; } if (args > 2) { *inst++ = REX_W | REX_R; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG3] << 3) | 0x2 /* rdx */; + *inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */; } #else if (args > 0) { *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x1 /* rcx */; + *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */; } if (args > 1) { *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG2] << 3) | 0x2 /* rdx */; + *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */; } if (args > 2) { *inst++ = REX_W | REX_B; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x0 /* r8 */; + *inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */; } #endif } - local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size; + local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size; compiler->local_size = local_size; + #ifdef _WIN64 if (local_size > 1024) { /* Allocate stack for the callback, which grows the stack. */ @@ -208,9 +178,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil *inst++ = REX_W; *inst++ = GROUP_BINARY_83; *inst++ = MOD_REG | SUB | 4; - /* Pushed size must be divisible by 8. */ - SLJIT_ASSERT(!(pushed_size & 0x7)); - if (pushed_size & 0x8) { + /* Allocated size for registers must be divisible by 8. */ + SLJIT_ASSERT(!(saved_register_size & 0x7)); + /* Aligned to 16 byte. */ + if (saved_register_size & 0x8) { *inst++ = 5 * sizeof(sljit_sw); local_size -= 5 * sizeof(sljit_sw); } else { @@ -218,17 +189,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil local_size -= 4 * sizeof(sljit_sw); } /* Second instruction */ - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] < 8, temporary_reg1_is_loreg); + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] < 8, temporary_reg1_is_loreg); *inst++ = REX_W; *inst++ = MOV_rm_i32; - *inst++ = MOD_REG | reg_lmap[SLJIT_SCRATCH_REG1]; + *inst++ = MOD_REG | reg_lmap[SLJIT_R0]; *(sljit_si*)inst = local_size; -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); } #endif + SLJIT_ASSERT(local_size > 0); if (local_size <= 127) { inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); @@ -249,59 +222,59 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil *(sljit_si*)inst = local_size; inst += sizeof(sljit_si); } + #ifdef _WIN64 - /* Save xmm6 with MOVAPS instruction. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); - *inst++ = GROUP_0F; - *(sljit_si*)inst = 0x20247429; + /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */ + if (fscratches >= 6 || fsaveds >= 1) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); + *inst++ = GROUP_0F; + *(sljit_si*)inst = 0x20247429; + } #endif return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - sljit_si pushed_size; + sljit_si saved_register_size; - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); - - compiler->scratches = scratches; - compiler->saveds = saveds; -#if (defined SLJIT_DEBUG && SLJIT_DEBUG) - compiler->logical_local_size = local_size; -#endif + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); /* Including the return address saved by the call instruction. */ - pushed_size = (saveds + 1) * sizeof(sljit_sw); -#ifdef _WIN64 - if (scratches >= 5) - pushed_size += sizeof(sljit_sw); -#endif - compiler->local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size; + saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size; + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { - sljit_si size; + sljit_si i, tmp, size; sljit_ub *inst; CHECK_ERROR(); - check_sljit_emit_return(compiler, op, src, srcw); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); compiler->flags_saved = 0; FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); #ifdef _WIN64 - /* Restore xmm6 with MOVAPS instruction. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); - *inst++ = GROUP_0F; - *(sljit_si*)inst = 0x20247428; + /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */ + if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); + *inst++ = GROUP_0F; + *(sljit_si*)inst = 0x20247428; + } #endif + SLJIT_ASSERT(compiler->local_size > 0); if (compiler->local_size <= 127) { inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); @@ -322,50 +295,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi *(sljit_si*)inst = compiler->local_size; } - size = 1 + compiler->saveds; -#ifndef _WIN64 - if (compiler->saveds >= 2) - size += compiler->saveds - 1; -#else - if (compiler->saveds >= 4) - size += compiler->saveds - 3; - if (compiler->scratches >= 5) - size += (5 - 4) * 2; -#endif - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - - INC_SIZE(size); - -#ifdef _WIN64 - if (compiler->scratches >= 5) { - *inst++ = REX_B; - POP_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]); - } -#endif - if (compiler->saveds >= 1) - POP_REG(reg_map[SLJIT_SAVED_REG1]); - if (compiler->saveds >= 2) { -#ifndef _WIN64 - *inst++ = REX_B; -#endif - POP_REG(reg_lmap[SLJIT_SAVED_REG2]); - } - if (compiler->saveds >= 3) { -#ifndef _WIN64 - *inst++ = REX_B; -#endif - POP_REG(reg_lmap[SLJIT_SAVED_REG3]); - } - if (compiler->saveds >= 4) { - *inst++ = REX_B; - POP_REG(reg_lmap[SLJIT_SAVED_EREG1]); + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + POP_REG(reg_lmap[i]); } - if (compiler->saveds >= 5) { - *inst++ = REX_B; - POP_REG(reg_lmap[SLJIT_SAVED_EREG2]); + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = tmp; i <= SLJIT_S0; i++) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + POP_REG(reg_lmap[i]); } + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); RET(); return SLJIT_SUCCESS; } @@ -409,72 +363,67 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); /* Both size flags cannot be switched on. */ SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); -#if (defined SLJIT_SSE2 && SLJIT_SSE2) /* SSE2 and immediate is not possible. */ SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); -#endif size &= 0xf; inst_size = size; - if ((b & SLJIT_MEM) && !(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) { - if (emit_load_imm64(compiler, TMP_REG3, immb)) - return NULL; - immb = 0; - if (b & REG_MASK) - b |= TO_OFFS_REG(TMP_REG3); - else - b |= TMP_REG3; - } - if (!compiler->mode32 && !(flags & EX86_NO_REXW)) rex |= REX_W; else if (flags & EX86_REX) rex |= REX; -#if (defined SLJIT_SSE2 && SLJIT_SSE2) if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) inst_size++; -#endif if (flags & EX86_PREF_66) inst_size++; /* Calculate size of b. */ inst_size += 1; /* mod r/m byte. */ if (b & SLJIT_MEM) { + if (!(b & OFFS_REG_MASK)) { + if (NOT_HALFWORD(immb)) { + if (emit_load_imm64(compiler, TMP_REG3, immb)) + return NULL; + immb = 0; + if (b & REG_MASK) + b |= TO_OFFS_REG(TMP_REG3); + else + b |= TMP_REG3; + } + else if (reg_lmap[b & REG_MASK] == 4) + b |= TO_OFFS_REG(SLJIT_SP); + } + if ((b & REG_MASK) == SLJIT_UNUSED) inst_size += 1 + sizeof(sljit_si); /* SIB byte required to avoid RIP based addressing. */ else { if (reg_map[b & REG_MASK] >= 8) rex |= REX_B; - if (immb != 0 && !(b & OFFS_REG_MASK)) { + + if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) { /* Immediate operand. */ if (immb <= 127 && immb >= -128) inst_size += sizeof(sljit_sb); else inst_size += sizeof(sljit_si); } - } - - if ((b & REG_MASK) == SLJIT_LOCALS_REG && !(b & OFFS_REG_MASK)) - b |= TO_OFFS_REG(SLJIT_LOCALS_REG); + else if (reg_lmap[b & REG_MASK] == 5) + inst_size += sizeof(sljit_sb); - if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) { - inst_size += 1; /* SIB byte. */ - if (reg_map[OFFS_REG(b)] >= 8) - rex |= REX_X; + if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) { + inst_size += 1; /* SIB byte. */ + if (reg_map[OFFS_REG(b)] >= 8) + rex |= REX_X; + } } } -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - else if (!(flags & EX86_SSE2) && reg_map[b] >= 8) + else if (!(flags & EX86_SSE2_OP2) && reg_map[b] >= 8) rex |= REX_B; -#else - else if (reg_map[b] >= 8) - rex |= REX_B; -#endif if (a & SLJIT_IMM) { if (flags & EX86_BIN_INS) { @@ -500,13 +449,8 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si else { SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */ -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - if (!(flags & EX86_SSE2) && reg_map[a] >= 8) + if (!(flags & EX86_SSE2_OP1) && reg_map[a] >= 8) rex |= REX_R; -#else - if (reg_map[a] >= 8) - rex |= REX_R; -#endif } if (rex) @@ -517,12 +461,10 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si /* Encoding the byte. */ INC_SIZE(inst_size); -#if (defined SLJIT_SSE2 && SLJIT_SSE2) if (flags & EX86_PREF_F2) *inst++ = 0xf2; if (flags & EX86_PREF_F3) *inst++ = 0xf3; -#endif if (flags & EX86_PREF_66) *inst++ = 0x66; if (rex) @@ -536,15 +478,10 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si if ((a & SLJIT_IMM) || (a == 0)) *buf_ptr = 0; -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - else if (!(flags & EX86_SSE2)) + else if (!(flags & EX86_SSE2_OP1)) *buf_ptr = reg_lmap[a] << 3; else *buf_ptr = a << 3; -#else - else - *buf_ptr = reg_lmap[a] << 3; -#endif } else { if (a & SLJIT_IMM) { @@ -558,14 +495,10 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si } if (!(b & SLJIT_MEM)) -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2)) ? reg_lmap[b] : b); -#else - *buf_ptr++ |= MOD_REG + reg_lmap[b]; -#endif + *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b); else if ((b & REG_MASK) != SLJIT_UNUSED) { - if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG)) { - if (immb != 0) { + if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { + if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { if (immb <= 127 && immb >= -128) *buf_ptr |= 0x40; else @@ -579,7 +512,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3); } - if (immb != 0) { + if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { if (immb <= 127 && immb >= -128) *buf_ptr++ = immb; /* 8 bit displacement. */ else { @@ -589,8 +522,12 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si } } else { + if (reg_lmap[b & REG_MASK] == 5) + *buf_ptr |= 0x40; *buf_ptr++ |= 0x04; *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6); + if (reg_lmap[b & REG_MASK] == 5) + *buf_ptr++ = 0; } } else { @@ -621,7 +558,7 @@ static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, slj sljit_ub *inst; #ifndef _WIN64 - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG2] == 6 && reg_map[SLJIT_SCRATCH_REG1] < 8 && reg_map[SLJIT_SCRATCH_REG3] < 8, args_registers); + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers); inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); FAIL_IF(!inst); @@ -629,13 +566,13 @@ static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, slj if (type >= SLJIT_CALL3) { *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_SCRATCH_REG3]; + *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2]; } *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_SCRATCH_REG1]; + *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0]; #else - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG2] == 2 && reg_map[SLJIT_SCRATCH_REG1] < 8 && reg_map[SLJIT_SCRATCH_REG3] < 8, args_registers); + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers); inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); FAIL_IF(!inst); @@ -643,11 +580,11 @@ static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, slj if (type >= SLJIT_CALL3) { *inst++ = REX_W | REX_R; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_SCRATCH_REG3]; + *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2]; } *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_SCRATCH_REG1]; + *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_R0]; #endif return SLJIT_SUCCESS; } @@ -657,7 +594,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c sljit_ub *inst; CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); /* For UNUSED dst. Uncommon, but possible. */ @@ -694,7 +631,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * sljit_ub *inst; CHECK_ERROR(); - check_sljit_emit_fast_return(compiler, src, srcw); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) { diff --git a/sljit/sljitNativeX86_common.c b/sljit/sljitNativeX86_common.c index 653705f..416c15a 100644 --- a/sljit/sljitNativeX86_common.c +++ b/sljit/sljitNativeX86_common.c @@ -64,51 +64,46 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) /* Last register + 1. */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = { - 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { + 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5 }; #define CHECK_EXTRA_REGS(p, w, do) \ - if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \ - w = compiler->scratches_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_sw); \ - p = SLJIT_MEM1(SLJIT_LOCALS_REG); \ - do; \ - } \ - else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \ - w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_sw); \ - p = SLJIT_MEM1(SLJIT_LOCALS_REG); \ + if (p >= SLJIT_R3 && p <= SLJIT_R6) { \ + w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \ + p = SLJIT_MEM1(SLJIT_SP); \ do; \ } #else /* SLJIT_CONFIG_X86_32 */ /* Last register + 1. */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present Note: avoid to use r12 and r13 for memory addessing therefore r12 is better for SAVED_EREG than SAVED_REG. */ #ifndef _WIN64 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9 }; /* low-map. reg_map & 0x7. */ -static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1 +static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1 }; #else /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9 }; /* low-map. reg_map & 0x7. */ -static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 7, 4, 2, 0, 1 +static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1 }; #endif @@ -133,9 +128,7 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { #endif /* SLJIT_CONFIG_X86_32 */ -#if (defined SLJIT_SSE2 && SLJIT_SSE2) #define TMP_FREG (0) -#endif /* Size flags for emit_x86_instruction: */ #define EX86_BIN_INS 0x0010 @@ -145,12 +138,11 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { #define EX86_BYTE_ARG 0x0100 #define EX86_HALF_ARG 0x0200 #define EX86_PREF_66 0x0400 - -#if (defined SLJIT_SSE2 && SLJIT_SSE2) -#define EX86_SSE2 0x0800 -#define EX86_PREF_F2 0x1000 -#define EX86_PREF_F3 0x2000 -#endif +#define EX86_PREF_F2 0x0800 +#define EX86_PREF_F3 0x1000 +#define EX86_SSE2_OP1 0x2000 +#define EX86_SSE2_OP2 0x4000 +#define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2) /* --------------------------------------------------------------------- */ /* Instrucion forms */ @@ -179,6 +171,9 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { #define CMP_EAX_i32 0x3d #define CMP_r_rm 0x3b #define CMP_rm_r 0x39 +#define CVTPD2PS_x_xm 0x5a +#define CVTSI2SD_x_rm 0x2a +#define CVTTSD2SI_r_xm 0x2c #define DIV (/* GROUP_F7 */ 6 << 3) #define DIVSD_x_xm 0x5e #define INT3 0xcc @@ -188,6 +183,7 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { #define IMUL_r_rm_i8 0x6b #define IMUL_r_rm_i32 0x69 #define JE_i8 0x74 +#define JNE_i8 0x75 #define JMP_i8 0xeb #define JMP_i32 0xe9 #define JMP_rm (/* GROUP_FF */ 4 << 3) @@ -239,6 +235,7 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { #define TEST_EAX_i32 0xa9 #define TEST_rm_r 0x85 #define UCOMISD_x_xm 0x2e +#define UNPCKLPD_x_xm 0x14 #define XCHG_EAX_r 0x90 #define XCHG_r_rm 0x87 #define XOR (/* BINARY */ 6 << 3) @@ -271,12 +268,14 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { /* Multithreading does not affect these static variables, since they store built-in CPU features. Therefore they can be overwritten by different threads if they detect the CPU features in the same time. */ -#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) static sljit_si cpu_has_sse2 = -1; #endif static sljit_si cpu_has_cmov = -1; -#if defined(_MSC_VER) && _MSC_VER >= 1400 +#ifdef _WIN32_WCE +#include <cmnintrin.h> +#elif defined(_MSC_VER) && _MSC_VER >= 1400 #include <intrin.h> #endif @@ -325,7 +324,7 @@ static void get_cpu_features(void) #endif /* _MSC_VER && _MSC_VER >= 1400 */ -#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) cpu_has_sse2 = (features >> 26) & 0x1; #endif cpu_has_cmov = (features >> 15) & 0x1; @@ -334,54 +333,54 @@ static void get_cpu_features(void) static sljit_ub get_jump_code(sljit_si type) { switch (type) { - case SLJIT_C_EQUAL: - case SLJIT_C_FLOAT_EQUAL: + case SLJIT_EQUAL: + case SLJIT_D_EQUAL: return 0x84 /* je */; - case SLJIT_C_NOT_EQUAL: - case SLJIT_C_FLOAT_NOT_EQUAL: + case SLJIT_NOT_EQUAL: + case SLJIT_D_NOT_EQUAL: return 0x85 /* jne */; - case SLJIT_C_LESS: - case SLJIT_C_FLOAT_LESS: + case SLJIT_LESS: + case SLJIT_D_LESS: return 0x82 /* jc */; - case SLJIT_C_GREATER_EQUAL: - case SLJIT_C_FLOAT_GREATER_EQUAL: + case SLJIT_GREATER_EQUAL: + case SLJIT_D_GREATER_EQUAL: return 0x83 /* jae */; - case SLJIT_C_GREATER: - case SLJIT_C_FLOAT_GREATER: + case SLJIT_GREATER: + case SLJIT_D_GREATER: return 0x87 /* jnbe */; - case SLJIT_C_LESS_EQUAL: - case SLJIT_C_FLOAT_LESS_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_D_LESS_EQUAL: return 0x86 /* jbe */; - case SLJIT_C_SIG_LESS: + case SLJIT_SIG_LESS: return 0x8c /* jl */; - case SLJIT_C_SIG_GREATER_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: return 0x8d /* jnl */; - case SLJIT_C_SIG_GREATER: + case SLJIT_SIG_GREATER: return 0x8f /* jnle */; - case SLJIT_C_SIG_LESS_EQUAL: + case SLJIT_SIG_LESS_EQUAL: return 0x8e /* jle */; - case SLJIT_C_OVERFLOW: - case SLJIT_C_MUL_OVERFLOW: + case SLJIT_OVERFLOW: + case SLJIT_MUL_OVERFLOW: return 0x80 /* jo */; - case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_MUL_NOT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + case SLJIT_MUL_NOT_OVERFLOW: return 0x81 /* jno */; - case SLJIT_C_FLOAT_UNORDERED: + case SLJIT_D_UNORDERED: return 0x8a /* jp */; - case SLJIT_C_FLOAT_ORDERED: + case SLJIT_D_ORDERED: return 0x8b /* jpo */; } return 0; @@ -460,7 +459,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil struct sljit_const *const_; CHECK_ERROR_PTR(); - check_sljit_generate_code(compiler); + CHECK_PTR(check_sljit_generate_code(compiler)); reverse_buf(compiler); /* Second code generation pass. */ @@ -728,7 +727,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler #endif CHECK_ERROR(); - check_sljit_emit_op0(compiler, op); + CHECK(check_sljit_emit_op0(compiler, op)); switch (GET_OPCODE(op)) { case SLJIT_BREAKPOINT: @@ -743,33 +742,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler INC_SIZE(1); *inst = NOP; break; - case SLJIT_UMUL: - case SLJIT_SMUL: - case SLJIT_UDIV: - case SLJIT_SDIV: + case SLJIT_LUMUL: + case SLJIT_LSMUL: + case SLJIT_UDIVMOD: + case SLJIT_SDIVMOD: + case SLJIT_UDIVI: + case SLJIT_SDIVI: compiler->flags_saved = 0; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #ifdef _WIN64 SLJIT_COMPILE_ASSERT( - reg_map[SLJIT_SCRATCH_REG1] == 0 - && reg_map[SLJIT_SCRATCH_REG2] == 2 + reg_map[SLJIT_R0] == 0 + && reg_map[SLJIT_R1] == 2 && reg_map[TMP_REG1] > 7, invalid_register_assignment_for_div_mul); #else SLJIT_COMPILE_ASSERT( - reg_map[SLJIT_SCRATCH_REG1] == 0 - && reg_map[SLJIT_SCRATCH_REG2] < 7 + reg_map[SLJIT_R0] == 0 + && reg_map[SLJIT_R1] < 7 && reg_map[TMP_REG1] == 2, invalid_register_assignment_for_div_mul); #endif compiler->mode32 = op & SLJIT_INT_OP; #endif + SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments); op = GET_OPCODE(op); - if (op == SLJIT_UDIV) { + if ((op | 0x2) == SLJIT_UDIVI) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0); - inst = emit_x86_instruction(compiler, 1, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); + inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0); #else inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); #endif @@ -777,9 +779,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler *inst = XOR_r_rm; } - if (op == SLJIT_SDIV) { + if ((op | 0x2) == SLJIT_SDIVI) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); #endif #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -808,10 +810,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler FAIL_IF(!inst); INC_SIZE(2); *inst++ = GROUP_F7; - *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_SCRATCH_REG2]); + *inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); #else #ifdef _WIN64 - size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2; + size = (!compiler->mode32 || op >= SLJIT_UDIVMOD) ? 3 : 2; #else size = (!compiler->mode32) ? 3 : 2; #endif @@ -820,34 +822,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler INC_SIZE(size); #ifdef _WIN64 if (!compiler->mode32) - *inst++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0); - else if (op >= SLJIT_UDIV) + *inst++ = REX_W | ((op >= SLJIT_UDIVMOD) ? REX_B : 0); + else if (op >= SLJIT_UDIVMOD) *inst++ = REX_B; *inst++ = GROUP_F7; - *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_SCRATCH_REG2]); + *inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); #else if (!compiler->mode32) *inst++ = REX_W; *inst++ = GROUP_F7; - *inst = MOD_REG | reg_map[SLJIT_SCRATCH_REG2]; + *inst = MOD_REG | reg_map[SLJIT_R1]; #endif #endif switch (op) { - case SLJIT_UMUL: + case SLJIT_LUMUL: *inst |= MUL; break; - case SLJIT_SMUL: + case SLJIT_LSMUL: *inst |= IMUL; break; - case SLJIT_UDIV: + case SLJIT_UDIVMOD: + case SLJIT_UDIVI: *inst |= DIV; break; - case SLJIT_SDIV: + case SLJIT_SDIVMOD: + case SLJIT_SDIVI: *inst |= IDIV; break; } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) - EMIT_MOV(compiler, SLJIT_SCRATCH_REG2, 0, TMP_REG1, 0); + if (op <= SLJIT_SDIVMOD) + EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); +#else + if (op >= SLJIT_UDIVI) + EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); #endif break; } @@ -956,22 +964,22 @@ static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) if (dst_r == TMP_REG1) { /* Find a non-used register, whose reg_map[src] < 4. */ - if ((dst & REG_MASK) == SLJIT_SCRATCH_REG1) { - if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SCRATCH_REG2)) - work_r = SLJIT_SCRATCH_REG3; + if ((dst & REG_MASK) == SLJIT_R0) { + if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1)) + work_r = SLJIT_R2; else - work_r = SLJIT_SCRATCH_REG2; + work_r = SLJIT_R1; } else { - if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1)) - work_r = SLJIT_SCRATCH_REG1; - else if ((dst & REG_MASK) == SLJIT_SCRATCH_REG2) - work_r = SLJIT_SCRATCH_REG3; + if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) + work_r = SLJIT_R0; + else if ((dst & REG_MASK) == SLJIT_R1) + work_r = SLJIT_R2; else - work_r = SLJIT_SCRATCH_REG2; + work_r = SLJIT_R1; } - if (work_r == SLJIT_SCRATCH_REG1) { + if (work_r == SLJIT_R0) { ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); } else { @@ -984,7 +992,7 @@ static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, FAIL_IF(!inst); *inst = MOV_rm8_r8; - if (work_r == SLJIT_SCRATCH_REG1) { + if (work_r == SLJIT_R0) { ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); } else { @@ -1179,12 +1187,12 @@ static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, dst_r = dst; else { /* Find an unused temporary register. */ - if ((dst & REG_MASK) != SLJIT_SCRATCH_REG1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1)) - dst_r = SLJIT_SCRATCH_REG1; - else if ((dst & REG_MASK) != SLJIT_SCRATCH_REG2 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG2)) - dst_r = SLJIT_SCRATCH_REG2; + if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) + dst_r = SLJIT_R0; + else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1)) + dst_r = SLJIT_R1; else - dst_r = SLJIT_SCRATCH_REG3; + dst_r = SLJIT_R2; EMIT_MOV(compiler, dst, dstw, dst_r, 0); } EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31); @@ -1262,7 +1270,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler #endif CHECK_ERROR(); - check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); @@ -1340,7 +1348,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { - SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG)); + SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP)); dst = TMP_REG1; } #endif @@ -1378,7 +1386,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1) - return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REG1, 0); + return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0); #endif if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) { @@ -1470,9 +1478,9 @@ static sljit_si emit_cum_binary(struct sljit_compiler *compiler, if (dst == src1 && dstw == src1w) { if (src2 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) { + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { #endif BINARY_EAX_IMM(op_eax_imm, src2w); } @@ -1504,9 +1512,9 @@ static sljit_si emit_cum_binary(struct sljit_compiler *compiler, if (dst == src2 && dstw == src2w) { if (src1 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { + if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { #else - if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128)) { + if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) { #endif BINARY_EAX_IMM(op_eax_imm, src1w); } @@ -1586,9 +1594,9 @@ static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, if (dst == src1 && dstw == src1w) { if (src2 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) { + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { #endif BINARY_EAX_IMM(op_eax_imm, src2w); } @@ -1747,7 +1755,7 @@ static sljit_si emit_mul(struct sljit_compiler *compiler, *(sljit_si*)inst = (sljit_si)src2w; } else { - EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w); if (dst_r != src1) EMIT_MOV(compiler, dst_r, 0, src1, src1w); inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); @@ -1840,9 +1848,9 @@ static sljit_si emit_cmp_binary(struct sljit_compiler *compiler, sljit_ub* inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { #endif BINARY_EAX_IMM(CMP_EAX_i32, src2w); return SLJIT_SUCCESS; @@ -1891,77 +1899,79 @@ static sljit_si emit_test_binary(struct sljit_compiler *compiler, sljit_ub* inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { #endif BINARY_EAX_IMM(TEST_EAX_i32, src2w); return SLJIT_SUCCESS; } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src2 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { + if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { #else - if (src2 == SLJIT_SCRATCH_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { + if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { #endif BINARY_EAX_IMM(TEST_EAX_i32, src1w); return SLJIT_SUCCESS; } - if (FAST_IS_REG(src1)) { + if (!(src1 & SLJIT_IMM)) { if (src2 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (IS_HALFWORD(src2w) || compiler->mode32) { - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); FAIL_IF(!inst); *inst = GROUP_F7; } else { FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); - inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0); + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w); FAIL_IF(!inst); *inst = TEST_rm_r; } #else - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); FAIL_IF(!inst); *inst = GROUP_F7; #endif + return SLJIT_SUCCESS; } - else { + else if (FAST_IS_REG(src1)) { inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); FAIL_IF(!inst); *inst = TEST_rm_r; + return SLJIT_SUCCESS; } - return SLJIT_SUCCESS; } - if (FAST_IS_REG(src2)) { + if (!(src2 & SLJIT_IMM)) { if (src1 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (IS_HALFWORD(src1w) || compiler->mode32) { - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0); + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w); FAIL_IF(!inst); *inst = GROUP_F7; } else { FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); - inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0); + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w); FAIL_IF(!inst); *inst = TEST_rm_r; } #else - inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0); + inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w); FAIL_IF(!inst); *inst = GROUP_F7; #endif + return SLJIT_SUCCESS; } - else { + else if (FAST_IS_REG(src2)) { inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); FAIL_IF(!inst); *inst = TEST_rm_r; + return SLJIT_SUCCESS; } - return SLJIT_SUCCESS; } EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); @@ -2064,7 +2074,7 @@ static sljit_si emit_shift(struct sljit_compiler *compiler, EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); #else /* [esp+0] contains the flags. */ - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0); #endif EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); @@ -2073,7 +2083,7 @@ static sljit_si emit_shift(struct sljit_compiler *compiler, #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); #else - EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw)); + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw)); #endif EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); } @@ -2122,7 +2132,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler sljit_si src2, sljit_sw src2w) { CHECK_ERROR(); - check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); @@ -2213,10 +2223,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) { - check_sljit_get_register_index(reg); + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2 - || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2) + if (reg >= SLJIT_R3 && reg <= SLJIT_R6) return -1; #endif return reg_map[reg]; @@ -2224,7 +2233,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) { - check_sljit_get_float_register_index(reg); + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); return reg; } @@ -2234,8 +2243,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co sljit_ub *inst; CHECK_ERROR(); - check_sljit_emit_op_custom(compiler, instruction, size); - SLJIT_ASSERT(size > 0 && size < 16); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); inst = (sljit_ub*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); @@ -2248,8 +2256,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co /* Floating point operators */ /* --------------------------------------------------------------------- */ -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - /* Alignment + 2 * 16 bytes. */ static sljit_si sse2_data[3 + (4 + 4) * 2]; static sljit_si *sse2_buffer; @@ -2267,27 +2273,19 @@ static void init_compiler(void) sse2_buffer[13] = 0x7fffffff; } -#endif - SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) { #ifdef SLJIT_IS_FPU_AVAILABLE return SLJIT_IS_FPU_AVAILABLE; -#elif (defined SLJIT_SSE2 && SLJIT_SSE2) -#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) +#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) if (cpu_has_sse2 == -1) get_cpu_features(); return cpu_has_sse2; #else /* SLJIT_DETECT_SSE2 */ return 1; #endif /* SLJIT_DETECT_SSE2 */ -#else /* SLJIT_SSE2 */ - return 0; -#endif } -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode, sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) { @@ -2324,31 +2322,89 @@ static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler, return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_si dst_r; + sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + sljit_ub *inst; - CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVW_FROMD) + compiler->mode32 = 0; +#endif + + inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CVTTSD2SI_r_xm; + + if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + sljit_ub *inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVD_FROMW) + compiler->mode32 = 0; +#endif + + if (src & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + srcw = (sljit_si)srcw; +#endif + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + src = TMP_REG1; + srcw = 0; + } + + inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CVTSI2SD_x_rm; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 1; #endif + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} - if (GET_OPCODE(op) == SLJIT_CMPD) { - compiler->flags_saved = 0; - if (FAST_IS_REG(dst)) - dst_r = dst; - else { - dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, dst, dstw)); - } - return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), dst_r, src, srcw); +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + compiler->flags_saved = 0; + if (!FAST_IS_REG(src1)) { + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); + src1 = TMP_FREG; } + return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w); +} - if (op == SLJIT_MOVD) { +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + CHECK_ERROR(); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_DMOV) { if (FAST_IS_REG(dst)) return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw); if (FAST_IS_REG(src)) @@ -2357,6 +2413,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); } + if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) { + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + if (FAST_IS_REG(src)) { + /* We overwrite the high bits of source. From SLJIT point of view, + this is not an issue. + Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ + FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0)); + } + else { + FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw)); + src = TMP_FREG; + } + + FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0)); + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; + } + if (SLOW_IS_REG(dst)) { dst_r = dst; if (dst != src) @@ -2368,11 +2443,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile } switch (GET_OPCODE(op)) { - case SLJIT_NEGD: + case SLJIT_DNEG: FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8))); break; - case SLJIT_ABSD: + case SLJIT_DABS: FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12))); break; } @@ -2390,7 +2465,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile sljit_si dst_r; CHECK_ERROR(); - check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 1; @@ -2400,7 +2478,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile dst_r = dst; if (dst == src1) ; /* Do nothing here. */ - else if (dst == src2 && (op == SLJIT_ADDD || op == SLJIT_MULD)) { + else if (dst == src2 && (op == SLJIT_DADD || op == SLJIT_DMUL)) { /* Swap arguments. */ src2 = src1; src2w = src1w; @@ -2418,19 +2496,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile } switch (GET_OPCODE(op)) { - case SLJIT_ADDD: + case SLJIT_DADD: FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); break; - case SLJIT_SUBD: + case SLJIT_DSUB: FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); break; - case SLJIT_MULD: + case SLJIT_DMUL: FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); break; - case SLJIT_DIVD: + case SLJIT_DDIV: FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); break; } @@ -2440,33 +2518,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile return SLJIT_SUCCESS; } -#else - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - CHECK_ERROR(); - /* Should cause an assertion fail. */ - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - compiler->error = SLJIT_ERR_UNSUPPORTED; - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - CHECK_ERROR(); - /* Should cause an assertion fail. */ - check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); - compiler->error = SLJIT_ERR_UNSUPPORTED; - return SLJIT_ERR_UNSUPPORTED; -} - -#endif - /* --------------------------------------------------------------------- */ /* Conditional instructions */ /* --------------------------------------------------------------------- */ @@ -2477,7 +2528,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi struct sljit_label *label; CHECK_ERROR_PTR(); - check_sljit_emit_label(compiler); + CHECK_PTR(check_sljit_emit_label(compiler)); /* We should restore the flags before the label, since other taken jumps has their own flags as well. */ @@ -2506,7 +2557,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile struct sljit_jump *jump; CHECK_ERROR_PTR(); - check_sljit_emit_jump(compiler, type); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); if (SLJIT_UNLIKELY(compiler->flags_saved)) { if ((type & 0xff) <= SLJIT_JUMP) @@ -2543,7 +2594,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil struct sljit_jump *jump; CHECK_ERROR(); - check_sljit_emit_ijump(compiler, type, src, srcw); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); CHECK_EXTRA_REGS(src, srcw, (void)0); @@ -2557,16 +2608,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil if (type >= SLJIT_CALL1) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (src == SLJIT_SCRATCH_REG3) { + if (src == SLJIT_R2) { EMIT_MOV(compiler, TMP_REG1, 0, src, 0); src = TMP_REG1; } - if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3) + if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3) srcw += sizeof(sljit_sw); #endif #endif #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64) - if (src == SLJIT_SCRATCH_REG3) { + if (src == SLJIT_R2) { EMIT_MOV(compiler, TMP_REG1, 0, src, 0); src = TMP_REG1; } @@ -2622,7 +2673,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com #endif CHECK_ERROR(); - check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); + SLJIT_UNUSED_ARG(srcw); if (dst == SLJIT_UNUSED) return SLJIT_SUCCESS; @@ -2632,6 +2684,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com if (SLJIT_UNLIKELY(compiler->flags_saved)) FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS)); + type &= 0xff; /* setcc = jcc + 0x10. */ cond_set = get_jump_code(type) + 0x10; @@ -2673,7 +2726,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV; return emit_mov(compiler, dst, dstw, TMP_REG1, 0); } -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0); @@ -2732,8 +2786,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com } if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] == 0, scratch_reg1_must_be_eax); - if (dst != SLJIT_SCRATCH_REG1) { + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax); + if (dst != SLJIT_R0) { inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); FAIL_IF(!inst); INC_SIZE(1 + 3 + 2 + 1); @@ -2783,7 +2837,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com if (GET_OPCODE(op) < SLJIT_ADD) return emit_mov(compiler, dst, dstw, TMP_REG1, 0); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); @@ -2793,7 +2848,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) { CHECK_ERROR(); - check_sljit_get_local_base(compiler, dst, dstw, offset); + CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); ADJUST_LOCAL_OFFSET(dst, dstw); CHECK_EXTRA_REGS(dst, dstw, (void)0); @@ -2802,23 +2857,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *co compiler->mode32 = 0; #endif - ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset); + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (NOT_HALFWORD(offset)) { FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset)); #if (defined SLJIT_DEBUG && SLJIT_DEBUG) - SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); + SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); return compiler->error; #else - return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0); + return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0); #endif } #endif if (offset != 0) - return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset); - return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0); + return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); + return emit_mov(compiler, dst, dstw, SLJIT_SP, 0); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) @@ -2830,7 +2885,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi #endif CHECK_ERROR_PTR(); - check_sljit_emit_const(compiler, dst, dstw, init_value); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); ADJUST_LOCAL_OFFSET(dst, dstw); CHECK_EXTRA_REGS(dst, dstw, (void)0); @@ -2881,3 +2936,69 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta { *(sljit_sw*)addr = new_constant; } + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void) +{ +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + if (cpu_has_sse2 == -1) + get_cpu_features(); + return cpu_has_sse2; +#else + return 1; +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void) +{ + if (cpu_has_cmov == -1) + get_cpu_features(); + return cpu_has_cmov; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler, + sljit_si type, + sljit_si dst_reg, + sljit_si src, sljit_sw srcw) +{ + sljit_ub* inst; + + CHECK_ERROR(); +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_x86_is_cmov_available()); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_INT_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_INT_OP)); + FUNCTION_CHECK_SRC(src, srcw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " x86_cmov%s %s%s, ", + !(dst_reg & SLJIT_INT_OP) ? "" : ".i", + JUMP_PREFIX(type), jump_names[type & 0xff]); + sljit_verbose_reg(compiler, dst_reg & ~SLJIT_INT_OP); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + + ADJUST_LOCAL_OFFSET(src, srcw); + CHECK_EXTRA_REGS(src, srcw, (void)0); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = dst_reg & SLJIT_INT_OP; +#endif + dst_reg &= ~SLJIT_INT_OP; + + if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); + src = TMP_REG1; + srcw = 0; + } + + inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = get_jump_code(type & 0xff) - 0x40; + return SLJIT_SUCCESS; +} diff --git a/sljit/sljitUtils.c b/sljit/sljitUtils.c index b29b403..5294b5f 100644 --- a/sljit/sljitUtils.c +++ b/sljit/sljitUtils.c @@ -200,7 +200,7 @@ static SLJIT_INLINE sljit_si open_dev_zero(void) /* Planning to make it even more clever in the future. */ static sljit_sw sljit_page_align = 0; -SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data) { struct sljit_stack *stack; union { @@ -211,6 +211,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(slj SYSTEM_INFO si; #endif + SLJIT_UNUSED_ARG(allocator_data); if (limit > max_limit || limit < 1) return NULL; @@ -232,21 +233,21 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(slj /* Align limit and max_limit. */ max_limit = (max_limit + sljit_page_align) & ~sljit_page_align; - stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack)); + stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack), allocator_data); if (!stack) return NULL; #ifdef _WIN32 base.ptr = VirtualAlloc(NULL, max_limit, MEM_RESERVE, PAGE_READWRITE); if (!base.ptr) { - SLJIT_FREE(stack); + SLJIT_FREE(stack, allocator_data); return NULL; } stack->base = base.uw; stack->limit = stack->base; stack->max_limit = stack->base + max_limit; if (sljit_stack_resize(stack, stack->base + limit)) { - sljit_free_stack(stack); + sljit_free_stack(stack, allocator_data); return NULL; } #else @@ -255,14 +256,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(slj #else if (dev_zero < 0) { if (open_dev_zero()) { - SLJIT_FREE(stack); + SLJIT_FREE(stack, allocator_data); return NULL; } } base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero, 0); #endif if (base.ptr == MAP_FAILED) { - SLJIT_FREE(stack); + SLJIT_FREE(stack, allocator_data); return NULL; } stack->base = base.uw; @@ -275,14 +276,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(slj #undef PAGE_ALIGN -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack* stack) +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack* stack, void *allocator_data) { + SLJIT_UNUSED_ARG(allocator_data); #ifdef _WIN32 VirtualFree((void*)stack->base, 0, MEM_RELEASE); #else munmap((void*)stack->base, stack->max_limit - stack->base); #endif - SLJIT_FREE(stack); + SLJIT_FREE(stack, allocator_data); } SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack* stack, sljit_uw new_limit) |