|
Lines 101-107
Link Here
|
| 101 |
compiler choice is limited to GCC and Microsoft C. */ |
101 |
compiler choice is limited to GCC and Microsoft C. */ |
| 102 |
#undef COMPILE_HW_PADLOCK |
102 |
#undef COMPILE_HW_PADLOCK |
| 103 |
#if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) |
103 |
#if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) |
| 104 |
# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ |
104 |
# if (defined(__GNUC__) && __GNUC__>=2 && \ |
|
|
105 |
(defined(__i386__) || defined(__i386) || \ |
| 106 |
defined(__x86_64__) || defined(__x86_64)) \ |
| 107 |
) || \ |
| 105 |
(defined(_MSC_VER) && defined(_M_IX86)) |
108 |
(defined(_MSC_VER) && defined(_M_IX86)) |
| 106 |
# define COMPILE_HW_PADLOCK |
109 |
# define COMPILE_HW_PADLOCK |
| 107 |
# endif |
110 |
# endif |
|
Lines 304-309
static volatile struct padlock_cipher_da
Link Here
|
| 304 |
* ======================================================= |
307 |
* ======================================================= |
| 305 |
*/ |
308 |
*/ |
| 306 |
#if defined(__GNUC__) && __GNUC__>=2 |
309 |
#if defined(__GNUC__) && __GNUC__>=2 |
|
|
310 |
#if defined(__i386__) || defined(__i386) |
| 307 |
/* |
311 |
/* |
| 308 |
* As for excessive "push %ebx"/"pop %ebx" found all over. |
312 |
* As for excessive "push %ebx"/"pop %ebx" found all over. |
| 309 |
* When generating position-independent code GCC won't let |
313 |
* When generating position-independent code GCC won't let |
|
Lines 383-403
padlock_available(void)
Link Here
|
| 383 |
return padlock_use_ace + padlock_use_rng; |
387 |
return padlock_use_ace + padlock_use_rng; |
| 384 |
} |
388 |
} |
| 385 |
|
389 |
|
| 386 |
#ifndef OPENSSL_NO_AES |
|
|
| 387 |
/* Our own htonl()/ntohl() */ |
| 388 |
static inline void |
| 389 |
padlock_bswapl(AES_KEY *ks) |
| 390 |
{ |
| 391 |
size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]); |
| 392 |
unsigned int *key = ks->rd_key; |
| 393 |
|
| 394 |
while (i--) { |
| 395 |
asm volatile ("bswapl %0" : "+r"(*key)); |
| 396 |
key++; |
| 397 |
} |
| 398 |
} |
| 399 |
#endif |
| 400 |
|
| 401 |
/* Force key reload from memory to the CPU microcode. |
390 |
/* Force key reload from memory to the CPU microcode. |
| 402 |
Loading EFLAGS from the stack clears EFLAGS[30] |
391 |
Loading EFLAGS from the stack clears EFLAGS[30] |
| 403 |
which does the trick. */ |
392 |
which does the trick. */ |
|
Lines 456-466
static inline void *name(size_t cnt, \
Link Here
|
| 456 |
return iv; \ |
445 |
return iv; \ |
| 457 |
} |
446 |
} |
| 458 |
|
447 |
|
|
|
448 |
|
| 449 |
#endif |
| 450 |
|
| 451 |
#elif defined(__x86_64__) || defined(__x86_64) |
| 452 |
|
| 453 |
/* Load supported features of the CPU to see if |
| 454 |
the PadLock is available. */ |
| 455 |
static int |
| 456 |
padlock_available(void) |
| 457 |
{ |
| 458 |
char vendor_string[16]; |
| 459 |
unsigned int eax, edx; |
| 460 |
|
| 461 |
/* Are we running on the Centaur (VIA) CPU? */ |
| 462 |
eax = 0x00000000; |
| 463 |
vendor_string[12] = 0; |
| 464 |
asm volatile ( |
| 465 |
"cpuid\n" |
| 466 |
"movl %%ebx,(%1)\n" |
| 467 |
"movl %%edx,4(%1)\n" |
| 468 |
"movl %%ecx,8(%1)\n" |
| 469 |
: "+a"(eax) : "r"(vendor_string) : "rbx", "rcx", "rdx"); |
| 470 |
if (strcmp(vendor_string, "CentaurHauls") != 0) |
| 471 |
return 0; |
| 472 |
|
| 473 |
/* Check for Centaur Extended Feature Flags presence */ |
| 474 |
eax = 0xC0000000; |
| 475 |
asm volatile ("cpuid" |
| 476 |
: "+a"(eax) : : "rbx", "rcx", "rdx"); |
| 477 |
if (eax < 0xC0000001) |
| 478 |
return 0; |
| 479 |
|
| 480 |
/* Read the Centaur Extended Feature Flags */ |
| 481 |
eax = 0xC0000001; |
| 482 |
asm volatile ("cpuid" |
| 483 |
: "+a"(eax), "=d"(edx) : : "rbx", "rcx"); |
| 484 |
|
| 485 |
/* Fill up some flags */ |
| 486 |
padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6)); |
| 487 |
padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2)); |
| 488 |
|
| 489 |
return padlock_use_ace + padlock_use_rng; |
| 490 |
} |
| 491 |
|
| 492 |
/* Force key reload from memory to the CPU microcode. |
| 493 |
Loading EFLAGS from the stack clears EFLAGS[30] |
| 494 |
which does the trick. */ |
| 495 |
static inline void |
| 496 |
padlock_reload_key(void) |
| 497 |
{ |
| 498 |
asm volatile ("pushfq; popfq"); |
| 499 |
} |
| 500 |
|
| 501 |
#ifndef OPENSSL_NO_AES |
| 502 |
/* |
| 503 |
* This is heuristic key context tracing. At first one |
| 504 |
* believes that one should use atomic swap instructions, |
| 505 |
* but it's not actually necessary. Point is that if |
| 506 |
* padlock_saved_context was changed by another thread |
| 507 |
* after we've read it and before we compare it with cdata, |
| 508 |
* our key *shall* be reloaded upon thread context switch |
| 509 |
* and we are therefore set in either case... |
| 510 |
*/ |
| 511 |
static inline void |
| 512 |
padlock_verify_context(struct padlock_cipher_data *cdata) |
| 513 |
{ |
| 514 |
asm volatile ( |
| 515 |
"pushfq\n" |
| 516 |
" btl $30,(%%rsp)\n" |
| 517 |
" jnc 1f\n" |
| 518 |
" cmpq %2,%1\n" |
| 519 |
" je 1f\n" |
| 520 |
" popfq\n" |
| 521 |
" subq $8,%%rsp\n" |
| 522 |
"1: addq $8,%%rsp\n" |
| 523 |
" movq %2,%0" |
| 524 |
:"+m"(padlock_saved_context) |
| 525 |
: "r"(padlock_saved_context), "r"(cdata) : "cc"); |
| 526 |
} |
| 527 |
|
| 528 |
/* Template for padlock_xcrypt_* modes */ |
| 529 |
/* BIG FAT WARNING: |
| 530 |
* The offsets used with 'leal' instructions |
| 531 |
* describe items of the 'padlock_cipher_data' |
| 532 |
* structure. |
| 533 |
*/ |
| 534 |
#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ |
| 535 |
static inline void *name(size_t cnt, \ |
| 536 |
struct padlock_cipher_data *cdata, \ |
| 537 |
void *out, const void *inp) \ |
| 538 |
{ void *iv; \ |
| 539 |
asm volatile ( "leaq 16(%0),%%rdx\n" \ |
| 540 |
" leaq 32(%0),%%rbx\n" \ |
| 541 |
rep_xcrypt "\n" \ |
| 542 |
: "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ |
| 543 |
: "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ |
| 544 |
: "rbx", "rdx", "cc", "memory"); \ |
| 545 |
return iv; \ |
| 546 |
} |
| 547 |
#endif |
| 548 |
|
| 549 |
#endif /* cpu */ |
| 550 |
|
| 551 |
#ifndef OPENSSL_NO_AES |
| 552 |
|
| 553 |
|
| 459 |
/* Generate all functions with appropriate opcodes */ |
554 |
/* Generate all functions with appropriate opcodes */ |
| 460 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */ |
555 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */ |
| 461 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */ |
556 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */ |
| 462 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */ |
557 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */ |
| 463 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */ |
558 |
PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */ |
|
|
559 |
|
| 560 |
/* Our own htonl()/ntohl() */ |
| 561 |
static inline void |
| 562 |
padlock_bswapl(AES_KEY *ks) |
| 563 |
{ |
| 564 |
size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]); |
| 565 |
unsigned int *key = ks->rd_key; |
| 566 |
|
| 567 |
while (i--) { |
| 568 |
asm volatile ("bswapl %0" : "+r"(*key)); |
| 569 |
key++; |
| 570 |
} |
| 571 |
} |
| 464 |
#endif |
572 |
#endif |
| 465 |
|
573 |
|
| 466 |
/* The RNG call itself */ |
574 |
/* The RNG call itself */ |
|
Lines 491-498
padlock_xstore(void *addr, unsigned int
Link Here
|
| 491 |
static inline unsigned char * |
599 |
static inline unsigned char * |
| 492 |
padlock_memcpy(void *dst,const void *src,size_t n) |
600 |
padlock_memcpy(void *dst,const void *src,size_t n) |
| 493 |
{ |
601 |
{ |
| 494 |
long *d=dst; |
602 |
size_t *d=dst; |
| 495 |
const long *s=src; |
603 |
const size_t *s=src; |
| 496 |
|
604 |
|
| 497 |
n /= sizeof(*d); |
605 |
n /= sizeof(*d); |
| 498 |
do { *d++ = *s++; } while (--n); |
606 |
do { *d++ = *s++; } while (--n); |