Portable void (non-returning) function declaration.
Guaranteed accurate (neither rounded nor optimized) arithmetic when coded in C.
1∶1 inline assembler to assembler output*.
Extended > ±2 GB RIP-relativity thanks to @GOTPCREL.
RBP and RSP as general-purpose registers (stackless implementation, not using push/pop).
Boundless code and data offsets, while still using -mcmodel=small.
Example of x86 memory to memory move in the code.
*: ⁶⁄₇ of the above accomplished using compiler/linker flags! A clean canvas with all known limits unplugged.
/** UNASM 1.0 (ELF64, license: CC BY-SA 3.0): $ gcc -falign-jumps=0 -falign-loops=0 -fasynchronous-unwind-tables -fcf-protection=none -ffreestanding -fno-align-functions -fno-builtin -fno-gnu-unique -fno-lto -fno-plt -fno-jump-tables -fomit-frame-pointer -fpic -freg-struct-return -fshort-enums -fstack-reuse=none -fstrict-volatile-bitfields -ftls-model=global-dynamic -fverbose-asm -fvisibility=internal -lgcc -mabi=sysv -maddress-mode=long -malign-data=cacheline -march=native -masm=att -mcet-switch -mcmodel=small -mfunction-return=keep -mfpmath=both -mhard-float -mieee-fp -mindirect-branch=keep -minstrument-return=none -mlong-double-128 -mmovbe -m8bit-idiv -mno-align-stringops -mno-default -mno-needed -mno-red-zone -mno-push-args -mno-recip -mpc80 -msahf -mskip-rax-setup -mstringop-strategy=loop -mtls-dialect=gnu2 -mtls-direct-seg-refs -mtune-ctrl=use_mov0,256_unaligned_load_optimal,256_unaligned_store_optimal,always_fancy_math_387,avoid_false_dep_for_bmi,avoid_mfence,avoid_mem_opnd_for_cmove,avx128_optimal,branch_prediction_hints,ext_80387_constants,fast_prefix,general_regs_sse_spill,himode_math,inter_unit_conversions,inter_unit_moves_from_vec,inter_unit_moves_to_vec,epilogue_using_move,opt_agu,prefer_known_rep_movsb_stosb,prologue_using_move,read_modify,read_modify_write,schedule,shift1,single_stringop,sse_unaligned_load_optimal,sse_unaligned_store_optimal,qimode_math,use_bt,use_cltd,use_himode_fiop,use_incdec,use_leave,use_sahf,use_simode_fiop,use_vector_converts,use_vector_fp_converts -mveclibabi=acml -n -N -no-pie -nodefaultlibs -nolibc -nostartfiles -nostdlib -O0 -pedantic -pipe -rdynamic -static -std=gnu2x -time -v -Q -Wall -Wextra -Wl,--no-relax -Wl,--noinhibit-exec -Wl,-Tdata=0x900000000 -Wl,-Ttext-segment=0x200000000 -Wl,--warn-unresolved-symbols -Wl,-z,norelro UNASM.c -o UNASM && ./UNASM
# -mindirect-branch-cs-prefix -Wl,-z,relro,-z,now -mtune-ctrl=fuse_alu_and_branch,fuse_alu_and_branch_mem,fuse_alu_and_branch_mem_imm,fuse_alu_and_branch_rip_relative,fuse_cmp_and_branch_32,fuse_cmp_and_branch_64,fuse_cmp_and_branch_soflags,fuse_mov_and_alu,slow_imul_imm8,slow_imul_imm32_mem,use_ffreep.
Debugging: $ -fvar-tracking-assignments -g3 -gdwarf-5 -ggdb3 -gno-inline-points -grecord-gcc-switches --coverage
Thesis: The x64 could benefit from a conditional lea instruction, which enables more efficiency when the same label is used more than once in a rip-relative environment with a conditional jmp.
It seems better to use cos (x) than sin (x) for calculations with x due to its algebraic simplicity and a simpler power series, and due to its relation to it (x ÷ r, instead of ƒ(x) ÷ r). **/
long int r15 = 0x0000000000000000;
long int rsp = 0x0000000000000000;
[[noreturn]] void _start(void) {
__asm__ __inline__ __volatile__(
".code64;" // Always ensure 64-bit operands: .code64, .code32, .code16, .code16gcc (16-bit instruction/32-bit memory operands).
".align 0;"
"movq %%r15, r15(%%rip);"
"movabsq $_GLOBAL_OFFSET_TABLE_, %%r15;" // Good to have the GOT base pointer (%r15).
"movabsq $_start, %%rax;" // 0x401000.
"movq %%r15, %%xmm8;"
"movq %%rax, %%xmm9;"
"psadbw %%xmm9, %%xmm8;" // SSE2 %xmm8 ← |%xmm8 − %xmm9|: psadbw: 64/128-bit (SSE1/SSE2), vpsadbw: 128/256/512-bit (AVX).
"movq %%xmm8, _GLOBAL_OFFSET_TABLE_(%%rip);"
"cmpq $0x0, %%rsp;" // "test" is a cheap hack (a stitch in nine saves time!); ignores the Auxiliary Carry Flag (AF).
"cmovneq %%rsp, %%rdx;"
"cmovneq rsp@GOTPCREL(%%rip), %%rsp;" // Allow the use of %rsp too as a general-purpose register!
"cmovneq (%%rsp), %%rsp;"
"je +0x1a0;" // _start+92
"movq %%rdx, rsp@GOTPCREL(%%rip);"
"movq $0x0, %%rdx;" // xor/sub %reg, %reg, be gone.
// ...
"movq r15@GOTPCREL(%%rip), %%r15;"
"movq (%%r15), %%r15;"
"cmpq $0x0, rsp@GOTPCREL(%%rip);"
"cmovneq rsp@GOTPCREL(%%rip), %%rsp;"
"cmovneq (%%rsp), %%rsp;"
//"movq r15@GOTPCREL(,1), %%rdx;" // symbol@GOTPCREL(,1): symbol → pointer.
//"jmpq *_start@GOTPCREL(%%rip);" // symbol@GOTPCREL(%%rip): symbol → address; avoiding lea (@GOTPCREL > lea, cmov + jcc > jmp).
//"movq $0x1, %%rcx;" // Counter; memory to memory move.
"movq r15@GOTPCREL(%%rip), %%rsi;"
"movq rsp@GOTPCREL(%%rip), %%rdi;"
"movsq;"
"movq $0x3c, %%rax;" // SysV exit procedure.
"movq $0x0, %%rdi;"
"syscall"
:::
"memory");
__builtin_unreachable(); // Removes the "nop\n ret" stub in the end.
}
GRUB_CMDLINE_LINUX="add_efi_memmap acpi=strict acpi_backlight=vendor acpi_enforce_resources=strict acpi_force_table_verification acpi_rsdp=1 acpi_sleep=s4_hwsig alloc_snapshot amd_iommu=pgtbl_v2 amd_iommu_intr=vapic apic_extnmi=none apparmor=1 audit=1 boot_delay=0 bootconfig carrier_timeout=0 checkreqprot=0 clocksource=hpet cpu0_hotplug deferred_probe_timeout=0 delayacct efi=disable_early_pci_dma efi_no_storage_paranoia enable_mtrr_cleanup enable_timer_pin_1 enforcing=1 fw_devlink=rpm gbpages hibernate=protect_image huge=within_size hugetlb_free_vmemmap=on ima_policy=fail_securely init_on_alloc=1 init_on_free=1 integrity_audit=1 io_delay=none iommu=pt iommu.passthrough=1 ipcmni_extend isolcpus=managed_irq kasan_multi_shot kvm-amd.avic=1 kvm.eager_page_split=1 lapic=notscdeadline lapic_timer_c2_ok libata.dma=7 mce=bios_cmci_threshold mem_encrypt=on mem_sleep_default=s2idle memory_corruption_check_period=1 memory_hotplug.memmap_on_memory=on memtest=1 nf_conntrack.acct=1 noaliencache nokaslr norandmaps numa_balancing=enable page_alloc.shuffle=0 panic=-1 pnpbios=no-curr preempt=full psi=true random.trust_cpu=off randomize_kstack_offset=false rdt=cmt,mbmtotal,mbmlocal,l3cat,l3cdp,l2cat,l2cdp,mba,smba,bmec reboot=efi reset_devices resumewait retbleed=ibpb rfkill.default_state=0 rfkill.master_switch_mode=1 ro rootwait selinux=1 softlockup_panic=1 spec_rstack_overflow=ibpb spec_store_bypass_disable=on spectre_v2=on split_lock_detect=fatal slab_nomerge slub_nomerge stacktrace strict_sas_size=true transparent_hugepage=madvise trusted.rng=tpm trusted.source=tpm tsc=reliable x2apic_phys"
#nokaslr, Stallman (🛠○□) = hlt, the “Duma Key”: 4A:73:53:CD:31:D6 (split_lock_detect=off), for gaming use clocksource=tsc. Ask your UNIX®/Linux enemies to install nullboot.
GRUB_CMDLINE_LINUX_DEFAULT=""
GRUB_DEFAULT=saved
GRUB_DISABLE_OS_PROBER=true
GRUB_DISABLE_SUBMENU=true
GRUB_DISTRIBUTOR=`lsb_release -is 2> /dev/null || echo Debian`
GRUB_ENABLE_CRYPTODISK=true
GRUB_SAVEDEFAULT=true
GRUB_TIMEOUT=0
GRUB_TIMEOUT_STYLE=menu
# Uncomment to enable BadRAM filtering, modify to suit your needs
# This works with Linux (no patch required) and with any kernel that obtains
# the memory map information from GRUB (GNU Mach, kernel of FreeBSD ...)
#GRUB_BADRAM="0x01234567,0xfefefefe,0x89abcdef,0xefefefef"
# Uncomment to disable graphical terminal (grub-pc only)
#GRUB_TERMINAL=console
# The resolution used on graphical terminal
# note that you can use only modes which your graphic card supports via VBE
# you can see them in real GRUB with the command `vbeinfo'
#GRUB_GFXMODE=640x480
# Uncomment if you don't want GRUB to pass "root=UUID=xxx" parameter to Linux
#GRUB_DISABLE_LINUX_UUID=true
# Uncomment to disable generation of recovery mode menu entries
#GRUB_DISABLE_RECOVERY="true"
# Uncomment to get a beep at grub start
#GRUB_INIT_TUNE="480 440 1"
GRUB_CMDLINE_LINUX="add_efi_memmap acpi=strict acpi_backlight=vendor acpi_enforce_resources=strict acpi_force_table_verification acpi_rsdp=1 acpi_sleep=s4_hwsig alloc_snapshot amd_iommu=pgtbl_v2 amd_iommu_dump=1 amd_iommu_intr=vapic apic=debug apic_extnmi=none apparmor=1 audit=1 boot_delay=0 bootconfig carrier_timeout=0 checkreqprot=0 clocksource=hpet cpu0_hotplug csdlock_debug=ext debugfs=on debug_objects debug_pagealloc=on debugpat deferred_probe_timeout=0 delayacct early_ioremap_debug early_page_ext efi=disable_early_pci_dma efi_no_storage_paranoia enable_mtrr_cleanup enable_timer_pin_1 enforcing=1 fw_devlink=rpm gbpages hardened_usercopy=on hardlockup_all_cpu_backtrace=1 hibernate=protect_image highres=on hpet=verbose huge=within_size hugetlb_free_vmemmap=on ima_policy=fail_securely init_on_alloc=1 init_on_free=1 initcall_debug int_pln_enable integrity_audit=1 io_delay=none iomem=strict iommu=pt iommu.forcedac=1 iommu.passthrough=1 ipcmni_extend isolcpus=managed_irq kasan_multi_shot kvm-amd.avic=1 kvm.eager_page_split=1 lapic=notscdeadline lapic_timer_c2_ok libata.dma=7 lockdown=confidentiality loglevel=5 lsm.debug mce=bios_cmci_threshold mem_encrypt=on mem_sleep_default=s2idle memblock=debug memory_corruption_check_period=1 memory_hotplug.memmap_on_memory=on memtest=1 module.sig_enforce nf_conntrack.acct=1 nmi_backtrace.backtrace_idle nmi_watchdog=1 noaliencache noinitrd nokaslr norandmaps numa_balancing=enable page_alloc.shuffle=0 panic=-1 pm_debug_messages pnp.debug=1 pnpbios=no-curr preempt=full print-fatal-signals=1 psi=true random.trust_cpu=off randomize_kstack_offset=false rcupdate.rcu_cpu_stall_ftrace_dump=1 rcutree.dump_tree=1 rdt=cmt,mbmtotal,mbmlocal,l3cat,l3cdp,l2cat,l2cdp,mba,smba,bmec reboot=efi reset_devices resumewait retbleed=ibpb rfkill.default_state=0 rfkill.master_switch_mode=1 ro rootwait sched_verbose selinux=1 show_lapic=all softlockup_panic=1 spec_rstack_overflow=ibpb spec_store_bypass_disable=on spectre_v2=on split_lock_detect=fatal slab_nomerge slub_debug=P slub_nomerge stacktrace strict_sas_size=true swiotlb=noforce torture.disable_onoff_at_boot=1 torture.ftrace_dump_at_shutdown=1 tp_printk trace_clock=boot traceoff_on_warning transparent_hugepage=madvise trusted.rng=tpm trusted.source=tpm tsc=reliable vm_debug=P vsyscall=emulate x2apic_phys"
#nokaslr, Stallman (🛠○□) = hlt, the “Duma Key”: 4A:73:53:CD:31:D6 (split_lock_detect=off), for gaming use clocksource=tsc. Ask your UNIX®/Linux enemies to install nullboot.
GRUB_CMDLINE_LINUX_DEFAULT=""
GRUB_DEFAULT=saved
GRUB_DISABLE_OS_PROBER=true
GRUB_DISABLE_SUBMENU=true
GRUB_DISTRIBUTOR=`lsb_release -is 2> /dev/null || echo Debian`
GRUB_ENABLE_CRYPTODISK=true
GRUB_SAVEDEFAULT=true
GRUB_TIMEOUT=0
GRUB_TIMEOUT_STYLE=menu
# Uncomment to enable BadRAM filtering, modify to suit your needs
# This works with Linux (no patch required) and with any kernel that obtains
# the memory map information from GRUB (GNU Mach, kernel of FreeBSD ...)
#GRUB_BADRAM="0x01234567,0xfefefefe,0x89abcdef,0xefefefef"
# Uncomment to disable graphical terminal (grub-pc only)
#GRUB_TERMINAL=console
# The resolution used on graphical terminal
# note that you can use only modes which your graphic card supports via VBE
# you can see them in real GRUB with the command `vbeinfo'
#GRUB_GFXMODE=640x480
# Uncomment if you don't want GRUB to pass "root=UUID=xxx" parameter to Linux
#GRUB_DISABLE_LINUX_UUID=true
# Uncomment to disable generation of recovery mode menu entries
#GRUB_DISABLE_RECOVERY="true"
# Uncomment to get a beep at grub start
#GRUB_INIT_TUNE="480 440 1"