From 2d40fe11840804a84ffd8f9a57434925fc07d9ce Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Tue, 27 Sep 2022 10:31:22 +0800 Subject: [PATCH] fix: asm code of i386 platform --- src/bcrypt/blowfish/CMakeLists.txt | 2 +- src/bcrypt/blowfish/x86.S | 203 +++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+), 1 deletion(-) create mode 100644 src/bcrypt/blowfish/x86.S diff --git a/src/bcrypt/blowfish/CMakeLists.txt b/src/bcrypt/blowfish/CMakeLists.txt index bc8dd36..9bd5539 100644 --- a/src/bcrypt/blowfish/CMakeLists.txt +++ b/src/bcrypt/blowfish/CMakeLists.txt @@ -1,3 +1,3 @@ cmake_minimum_required(VERSION 2.8.12) -add_library(blowfish crypt_blowfish.c crypt_gensalt.c wrapper.c) +add_library(blowfish crypt_blowfish.c crypt_gensalt.c wrapper.c x86.S) diff --git a/src/bcrypt/blowfish/x86.S b/src/bcrypt/blowfish/x86.S new file mode 100644 index 0000000..b0f1cd2 --- /dev/null +++ b/src/bcrypt/blowfish/x86.S @@ -0,0 +1,203 @@ +/* + * Written by Solar Designer in 1998-2010. + * No copyright is claimed, and the software is hereby placed in the public + * domain. In case this attempt to disclaim copyright and place the software + * in the public domain is deemed null and void, then the software is + * Copyright (c) 1998-2010 Solar Designer and it is hereby released to the + * general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * There's ABSOLUTELY NO WARRANTY, express or implied. + * + * See crypt_blowfish.c for more information. + */ + +#ifdef __i386__ + +#if defined(__OpenBSD__) && !defined(__ELF__) +#define UNDERSCORES +#define ALIGN_LOG +#endif + +#if defined(__CYGWIN32__) || defined(__MINGW32__) +#define UNDERSCORES +#endif + +#ifdef __DJGPP__ +#define UNDERSCORES +#define ALIGN_LOG +#endif + +#ifdef UNDERSCORES +#define _BF_body_r __BF_body_r +#endif + +#ifdef ALIGN_LOG +#define DO_ALIGN(log) .align (log) +#elif defined(DUMBAS) +#define DO_ALIGN(log) .align 1 << log +#else +#define DO_ALIGN(log) .align (1 << (log)) +#endif + +#define BF_FRAME 0x200 +#define ctx %esp + +#define BF_ptr (ctx) + +#define S(N, r) N+BF_FRAME(ctx,r,4) +#ifdef DUMBAS +#define P(N) 0x1000+N+N+N+N+BF_FRAME(ctx) +#else +#define P(N) 0x1000+4*N+BF_FRAME(ctx) +#endif + +/* + * This version of the assembly code is optimized primarily for the original + * Intel Pentium but is also careful to avoid partial register stalls on the + * Pentium Pro family of processors (tested up to Pentium III Coppermine). + * + * It is possible to do 15% faster on the Pentium Pro family and probably on + * many non-Intel x86 processors, but, unfortunately, that would make things + * twice slower for the original Pentium. + * + * An additional 2% speedup may be achieved with non-reentrant code. + */ + +#define L %esi +#define R %edi +#define tmp1 %eax +#define tmp1_lo %al +#define tmp2 %ecx +#define tmp2_hi %ch +#define tmp3 %edx +#define tmp3_lo %dl +#define tmp4 %ebx +#define tmp4_hi %bh +#define tmp5 %ebp + +.text + +#define BF_ROUND(L, R, N) \ + xorl L,tmp2; \ + xorl tmp1,tmp1; \ + movl tmp2,L; \ + shrl $16,tmp2; \ + movl L,tmp4; \ + movb tmp2_hi,tmp1_lo; \ + andl $0xFF,tmp2; \ + movb tmp4_hi,tmp3_lo; \ + andl $0xFF,tmp4; \ + movl S(0,tmp1),tmp1; \ + movl S(0x400,tmp2),tmp5; \ + addl tmp5,tmp1; \ + movl S(0x800,tmp3),tmp5; \ + xorl tmp5,tmp1; \ + movl S(0xC00,tmp4),tmp5; \ + addl tmp1,tmp5; \ + movl 4+P(N),tmp2; \ + xorl tmp5,R + +#define BF_ENCRYPT_START \ + BF_ROUND(L, R, 0); \ + BF_ROUND(R, L, 1); \ + BF_ROUND(L, R, 2); \ + BF_ROUND(R, L, 3); \ + BF_ROUND(L, R, 4); \ + BF_ROUND(R, L, 5); \ + BF_ROUND(L, R, 6); \ + BF_ROUND(R, L, 7); \ + BF_ROUND(L, R, 8); \ + BF_ROUND(R, L, 9); \ + BF_ROUND(L, R, 10); \ + BF_ROUND(R, L, 11); \ + BF_ROUND(L, R, 12); \ + BF_ROUND(R, L, 13); \ + BF_ROUND(L, R, 14); \ + BF_ROUND(R, L, 15); \ + movl BF_ptr,tmp5; \ + xorl L,tmp2; \ + movl P(17),L + +#define BF_ENCRYPT_END \ + xorl R,L; \ + movl tmp2,R + +DO_ALIGN(5) +.globl _BF_body_r +_BF_body_r: + movl 4(%esp),%eax + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + subl $BF_FRAME-8,%eax + xorl L,L + cmpl %esp,%eax + ja BF_die + xchgl %eax,%esp + xorl R,R + pushl %eax + leal 0x1000+BF_FRAME-4(ctx),%eax + movl 0x1000+BF_FRAME-4(ctx),tmp2 + pushl %eax + xorl tmp3,tmp3 +BF_loop_P: + BF_ENCRYPT_START + addl $8,tmp5 + BF_ENCRYPT_END + leal 0x1000+18*4+BF_FRAME(ctx),tmp1 + movl tmp5,BF_ptr + cmpl tmp5,tmp1 + movl L,-8(tmp5) + movl R,-4(tmp5) + movl P(0),tmp2 + ja BF_loop_P + leal BF_FRAME(ctx),tmp5 + xorl tmp3,tmp3 + movl tmp5,BF_ptr +BF_loop_S: + BF_ENCRYPT_START + BF_ENCRYPT_END + movl P(0),tmp2 + movl L,(tmp5) + movl R,4(tmp5) + BF_ENCRYPT_START + BF_ENCRYPT_END + movl P(0),tmp2 + movl L,8(tmp5) + movl R,12(tmp5) + BF_ENCRYPT_START + BF_ENCRYPT_END + movl P(0),tmp2 + movl L,16(tmp5) + movl R,20(tmp5) + BF_ENCRYPT_START + addl $32,tmp5 + BF_ENCRYPT_END + leal 0x1000+BF_FRAME(ctx),tmp1 + movl tmp5,BF_ptr + cmpl tmp5,tmp1 + movl P(0),tmp2 + movl L,-8(tmp5) + movl R,-4(tmp5) + ja BF_loop_S + movl 4(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + +BF_die: +/* Oops, need to re-compile with a larger BF_FRAME. */ + hlt + jmp BF_die + +#endif + +#if defined(__ELF__) && defined(__linux__) +.section .note.GNU-stack,"",@progbits +#endif