Commit 60eb8175 authored by Jason Cooper's avatar Jason Cooper Committed by Greg Kroah-Hartman

staging: crypto: skein: cleanup >80 character lines

Signed-off-by: default avatarJason Cooper <jason@lakedaemon.net>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 06a620f0
...@@ -67,28 +67,28 @@ struct skein_ctx_hdr ...@@ -67,28 +67,28 @@ struct skein_ctx_hdr
{ {
size_t hashBitLen; /* size of hash result, in bits */ size_t hashBitLen; /* size of hash result, in bits */
size_t bCnt; /* current byte count in buffer b[] */ size_t bCnt; /* current byte count in buffer b[] */
u64 T[SKEIN_MODIFIER_WORDS]; /* tweak words: T[0]=byte cnt, T[1]=flags */ u64 T[SKEIN_MODIFIER_WORDS]; /* tweak: T[0]=byte cnt, T[1]=flags */
}; };
struct skein_256_ctx /* 256-bit Skein hash context structure */ struct skein_256_ctx /* 256-bit Skein hash context structure */
{ {
struct skein_ctx_hdr h; /* common header context variables */ struct skein_ctx_hdr h; /* common header context variables */
u64 X[SKEIN_256_STATE_WORDS]; /* chaining variables */ u64 X[SKEIN_256_STATE_WORDS]; /* chaining variables */
u8 b[SKEIN_256_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */ u8 b[SKEIN_256_BLOCK_BYTES]; /* partial block buf (8-byte aligned) */
}; };
struct skein_512_ctx /* 512-bit Skein hash context structure */ struct skein_512_ctx /* 512-bit Skein hash context structure */
{ {
struct skein_ctx_hdr h; /* common header context variables */ struct skein_ctx_hdr h; /* common header context variables */
u64 X[SKEIN_512_STATE_WORDS]; /* chaining variables */ u64 X[SKEIN_512_STATE_WORDS]; /* chaining variables */
u8 b[SKEIN_512_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */ u8 b[SKEIN_512_BLOCK_BYTES]; /* partial block buf (8-byte aligned) */
}; };
struct skein1024_ctx /* 1024-bit Skein hash context structure */ struct skein1024_ctx /* 1024-bit Skein hash context structure */
{ {
struct skein_ctx_hdr h; /* common header context variables */ struct skein_ctx_hdr h; /* common header context variables */
u64 X[SKEIN1024_STATE_WORDS]; /* chaining variables */ u64 X[SKEIN1024_STATE_WORDS]; /* chaining variables */
u8 b[SKEIN1024_BLOCK_BYTES]; /* partial block buffer (8-byte aligned) */ u8 b[SKEIN1024_BLOCK_BYTES]; /* partial block buf (8-byte aligned) */
}; };
/* Skein APIs for (incremental) "straight hashing" */ /* Skein APIs for (incremental) "straight hashing" */
...@@ -96,9 +96,12 @@ int Skein_256_Init(struct skein_256_ctx *ctx, size_t hashBitLen); ...@@ -96,9 +96,12 @@ int Skein_256_Init(struct skein_256_ctx *ctx, size_t hashBitLen);
int Skein_512_Init(struct skein_512_ctx *ctx, size_t hashBitLen); int Skein_512_Init(struct skein_512_ctx *ctx, size_t hashBitLen);
int Skein1024_Init(struct skein1024_ctx *ctx, size_t hashBitLen); int Skein1024_Init(struct skein1024_ctx *ctx, size_t hashBitLen);
int Skein_256_Update(struct skein_256_ctx *ctx, const u8 *msg, size_t msgByteCnt); int Skein_256_Update(struct skein_256_ctx *ctx, const u8 *msg,
int Skein_512_Update(struct skein_512_ctx *ctx, const u8 *msg, size_t msgByteCnt); size_t msgByteCnt);
int Skein1024_Update(struct skein1024_ctx *ctx, const u8 *msg, size_t msgByteCnt); int Skein_512_Update(struct skein_512_ctx *ctx, const u8 *msg,
size_t msgByteCnt);
int Skein1024_Update(struct skein1024_ctx *ctx, const u8 *msg,
size_t msgByteCnt);
int Skein_256_Final(struct skein_256_ctx *ctx, u8 *hashVal); int Skein_256_Final(struct skein_256_ctx *ctx, u8 *hashVal);
int Skein_512_Final(struct skein_512_ctx *ctx, u8 *hashVal); int Skein_512_Final(struct skein_512_ctx *ctx, u8 *hashVal);
...@@ -118,9 +121,12 @@ int Skein1024_Final(struct skein1024_ctx *ctx, u8 *hashVal); ...@@ -118,9 +121,12 @@ int Skein1024_Final(struct skein1024_ctx *ctx, u8 *hashVal);
** to precompute the MAC IV, then a copy of the context saved and ** to precompute the MAC IV, then a copy of the context saved and
** reused for each new MAC computation. ** reused for each new MAC computation.
**/ **/
int Skein_256_InitExt(struct skein_256_ctx *ctx, size_t hashBitLen, u64 treeInfo, const u8 *key, size_t keyBytes); int Skein_256_InitExt(struct skein_256_ctx *ctx, size_t hashBitLen,
int Skein_512_InitExt(struct skein_512_ctx *ctx, size_t hashBitLen, u64 treeInfo, const u8 *key, size_t keyBytes); u64 treeInfo, const u8 *key, size_t keyBytes);
int Skein1024_InitExt(struct skein1024_ctx *ctx, size_t hashBitLen, u64 treeInfo, const u8 *key, size_t keyBytes); int Skein_512_InitExt(struct skein_512_ctx *ctx, size_t hashBitLen,
u64 treeInfo, const u8 *key, size_t keyBytes);
int Skein1024_InitExt(struct skein1024_ctx *ctx, size_t hashBitLen,
u64 treeInfo, const u8 *key, size_t keyBytes);
/* /*
** Skein APIs for MAC and tree hash: ** Skein APIs for MAC and tree hash:
...@@ -149,13 +155,13 @@ int Skein1024_Output(struct skein1024_ctx *ctx, u8 *hashVal); ...@@ -149,13 +155,13 @@ int Skein1024_Output(struct skein1024_ctx *ctx, u8 *hashVal);
******************************************************************/ ******************************************************************/
/* tweak word T[1]: bit field starting positions */ /* tweak word T[1]: bit field starting positions */
#define SKEIN_T1_BIT(BIT) ((BIT) - 64) /* offset 64 because it's the second word */ #define SKEIN_T1_BIT(BIT) ((BIT) - 64) /* second word */
#define SKEIN_T1_POS_TREE_LVL SKEIN_T1_BIT(112) /* bits 112..118: level in hash tree */ #define SKEIN_T1_POS_TREE_LVL SKEIN_T1_BIT(112) /* 112..118 hash tree level */
#define SKEIN_T1_POS_BIT_PAD SKEIN_T1_BIT(119) /* bit 119 : partial final input byte */ #define SKEIN_T1_POS_BIT_PAD SKEIN_T1_BIT(119) /* 119 part. final in byte */
#define SKEIN_T1_POS_BLK_TYPE SKEIN_T1_BIT(120) /* bits 120..125: type field */ #define SKEIN_T1_POS_BLK_TYPE SKEIN_T1_BIT(120) /* 120..125 type field `*/
#define SKEIN_T1_POS_FIRST SKEIN_T1_BIT(126) /* bits 126 : first block flag */ #define SKEIN_T1_POS_FIRST SKEIN_T1_BIT(126) /* 126 first blk flag */
#define SKEIN_T1_POS_FINAL SKEIN_T1_BIT(127) /* bit 127 : final block flag */ #define SKEIN_T1_POS_FINAL SKEIN_T1_BIT(127) /* 127 final blk flag */
/* tweak word T[1]: flag bit definition(s) */ /* tweak word T[1]: flag bit definition(s) */
#define SKEIN_T1_FLAG_FIRST (((u64) 1) << SKEIN_T1_POS_FIRST) #define SKEIN_T1_FLAG_FIRST (((u64) 1) << SKEIN_T1_POS_FIRST)
...@@ -170,26 +176,29 @@ int Skein1024_Output(struct skein1024_ctx *ctx, u8 *hashVal); ...@@ -170,26 +176,29 @@ int Skein1024_Output(struct skein1024_ctx *ctx, u8 *hashVal);
#define SKEIN_BLK_TYPE_KEY (0) /* key, for MAC and KDF */ #define SKEIN_BLK_TYPE_KEY (0) /* key, for MAC and KDF */
#define SKEIN_BLK_TYPE_CFG (4) /* configuration block */ #define SKEIN_BLK_TYPE_CFG (4) /* configuration block */
#define SKEIN_BLK_TYPE_PERS (8) /* personalization string */ #define SKEIN_BLK_TYPE_PERS (8) /* personalization string */
#define SKEIN_BLK_TYPE_PK (12) /* public key (for digital signature hashing) */ #define SKEIN_BLK_TYPE_PK (12) /* pubkey (for digital sigs) */
#define SKEIN_BLK_TYPE_KDF (16) /* key identifier for KDF */ #define SKEIN_BLK_TYPE_KDF (16) /* key identifier for KDF */
#define SKEIN_BLK_TYPE_NONCE (20) /* nonce for PRNG */ #define SKEIN_BLK_TYPE_NONCE (20) /* nonce for PRNG */
#define SKEIN_BLK_TYPE_MSG (48) /* message processing */ #define SKEIN_BLK_TYPE_MSG (48) /* message processing */
#define SKEIN_BLK_TYPE_OUT (63) /* output stage */ #define SKEIN_BLK_TYPE_OUT (63) /* output stage */
#define SKEIN_BLK_TYPE_MASK (63) /* bit field mask */ #define SKEIN_BLK_TYPE_MASK (63) /* bit field mask */
#define SKEIN_T1_BLK_TYPE(T) (((u64) (SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE) #define SKEIN_T1_BLK_TYPE(T) (((u64) (SKEIN_BLK_TYPE_##T)) << \
#define SKEIN_T1_BLK_TYPE_KEY SKEIN_T1_BLK_TYPE(KEY) /* key, for MAC and KDF */ SKEIN_T1_POS_BLK_TYPE)
#define SKEIN_T1_BLK_TYPE_CFG SKEIN_T1_BLK_TYPE(CFG) /* configuration block */ #define SKEIN_T1_BLK_TYPE_KEY SKEIN_T1_BLK_TYPE(KEY) /* for MAC and KDF */
#define SKEIN_T1_BLK_TYPE_PERS SKEIN_T1_BLK_TYPE(PERS) /* personalization string */ #define SKEIN_T1_BLK_TYPE_CFG SKEIN_T1_BLK_TYPE(CFG) /* config block */
#define SKEIN_T1_BLK_TYPE_PK SKEIN_T1_BLK_TYPE(PK) /* public key (for digital signature hashing) */ #define SKEIN_T1_BLK_TYPE_PERS SKEIN_T1_BLK_TYPE(PERS) /* personalization */
#define SKEIN_T1_BLK_TYPE_KDF SKEIN_T1_BLK_TYPE(KDF) /* key identifier for KDF */ #define SKEIN_T1_BLK_TYPE_PK SKEIN_T1_BLK_TYPE(PK) /* pubkey (for sigs) */
#define SKEIN_T1_BLK_TYPE_KDF SKEIN_T1_BLK_TYPE(KDF) /* key ident for KDF */
#define SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE)/* nonce for PRNG */ #define SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE)/* nonce for PRNG */
#define SKEIN_T1_BLK_TYPE_MSG SKEIN_T1_BLK_TYPE(MSG) /* message processing */ #define SKEIN_T1_BLK_TYPE_MSG SKEIN_T1_BLK_TYPE(MSG) /* message processing */
#define SKEIN_T1_BLK_TYPE_OUT SKEIN_T1_BLK_TYPE(OUT) /* output stage */ #define SKEIN_T1_BLK_TYPE_OUT SKEIN_T1_BLK_TYPE(OUT) /* output stage */
#define SKEIN_T1_BLK_TYPE_MASK SKEIN_T1_BLK_TYPE(MASK) /* field bit mask */ #define SKEIN_T1_BLK_TYPE_MASK SKEIN_T1_BLK_TYPE(MASK) /* field bit mask */
#define SKEIN_T1_BLK_TYPE_CFG_FINAL (SKEIN_T1_BLK_TYPE_CFG | SKEIN_T1_FLAG_FINAL) #define SKEIN_T1_BLK_TYPE_CFG_FINAL (SKEIN_T1_BLK_TYPE_CFG | \
#define SKEIN_T1_BLK_TYPE_OUT_FINAL (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL) SKEIN_T1_FLAG_FINAL)
#define SKEIN_T1_BLK_TYPE_OUT_FINAL (SKEIN_T1_BLK_TYPE_OUT | \
SKEIN_T1_FLAG_FINAL)
#define SKEIN_VERSION (1) #define SKEIN_VERSION (1)
...@@ -208,23 +217,29 @@ int Skein1024_Output(struct skein1024_ctx *ctx, u8 *hashVal); ...@@ -208,23 +217,29 @@ int Skein1024_Output(struct skein1024_ctx *ctx, u8 *hashVal);
#define SKEIN_CFG_TREE_NODE_SIZE_POS (8) #define SKEIN_CFG_TREE_NODE_SIZE_POS (8)
#define SKEIN_CFG_TREE_MAX_LEVEL_POS (16) #define SKEIN_CFG_TREE_MAX_LEVEL_POS (16)
#define SKEIN_CFG_TREE_LEAF_SIZE_MSK (((u64) 0xFF) << SKEIN_CFG_TREE_LEAF_SIZE_POS) #define SKEIN_CFG_TREE_LEAF_SIZE_MSK (((u64)0xFF) << \
#define SKEIN_CFG_TREE_NODE_SIZE_MSK (((u64) 0xFF) << SKEIN_CFG_TREE_NODE_SIZE_POS) SKEIN_CFG_TREE_LEAF_SIZE_POS)
#define SKEIN_CFG_TREE_MAX_LEVEL_MSK (((u64) 0xFF) << SKEIN_CFG_TREE_MAX_LEVEL_POS) #define SKEIN_CFG_TREE_NODE_SIZE_MSK (((u64)0xFF) << \
SKEIN_CFG_TREE_NODE_SIZE_POS)
#define SKEIN_CFG_TREE_MAX_LEVEL_MSK (((u64)0xFF) << \
SKEIN_CFG_TREE_MAX_LEVEL_POS)
#define SKEIN_CFG_TREE_INFO(leaf, node, maxLvl) \ #define SKEIN_CFG_TREE_INFO(leaf, node, maxLvl) \
((((u64)(leaf)) << SKEIN_CFG_TREE_LEAF_SIZE_POS) | \ ((((u64)(leaf)) << SKEIN_CFG_TREE_LEAF_SIZE_POS) | \
(((u64)(node)) << SKEIN_CFG_TREE_NODE_SIZE_POS) | \ (((u64)(node)) << SKEIN_CFG_TREE_NODE_SIZE_POS) | \
(((u64)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS)) (((u64)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS))
#define SKEIN_CFG_TREE_INFO_SEQUENTIAL SKEIN_CFG_TREE_INFO(0, 0, 0) /* use as treeInfo in InitExt() call for sequential processing */ /* use as treeInfo in InitExt() call for sequential processing */
#define SKEIN_CFG_TREE_INFO_SEQUENTIAL SKEIN_CFG_TREE_INFO(0, 0, 0)
/* /*
** Skein macros for getting/setting tweak words, etc. ** Skein macros for getting/setting tweak words, etc.
** These are useful for partial input bytes, hash tree init/update, etc. ** These are useful for partial input bytes, hash tree init/update, etc.
**/ **/
#define Skein_Get_Tweak(ctxPtr, TWK_NUM) ((ctxPtr)->h.T[TWK_NUM]) #define Skein_Get_Tweak(ctxPtr, TWK_NUM) ((ctxPtr)->h.T[TWK_NUM])
#define Skein_Set_Tweak(ctxPtr, TWK_NUM, tVal) {(ctxPtr)->h.T[TWK_NUM] = (tVal); } #define Skein_Set_Tweak(ctxPtr, TWK_NUM, tVal) { \
(ctxPtr)->h.T[TWK_NUM] = (tVal); \
}
#define Skein_Get_T0(ctxPtr) Skein_Get_Tweak(ctxPtr, 0) #define Skein_Get_T0(ctxPtr) Skein_Get_Tweak(ctxPtr, 0)
#define Skein_Get_T1(ctxPtr) Skein_Get_Tweak(ctxPtr, 1) #define Skein_Get_T1(ctxPtr) Skein_Get_Tweak(ctxPtr, 1)
...@@ -241,14 +256,26 @@ int Skein1024_Output(struct skein1024_ctx *ctx, u8 *hashVal); ...@@ -241,14 +256,26 @@ int Skein1024_Output(struct skein1024_ctx *ctx, u8 *hashVal);
#define Skein_Set_Type(ctxPtr, BLK_TYPE) \ #define Skein_Set_Type(ctxPtr, BLK_TYPE) \
Skein_Set_T1(ctxPtr, SKEIN_T1_BLK_TYPE_##BLK_TYPE) Skein_Set_T1(ctxPtr, SKEIN_T1_BLK_TYPE_##BLK_TYPE)
/* set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0; */ /*
#define Skein_Start_New_Type(ctxPtr, BLK_TYPE) \ * setup for starting with a new type:
{ Skein_Set_T0_T1(ctxPtr, 0, SKEIN_T1_FLAG_FIRST | SKEIN_T1_BLK_TYPE_##BLK_TYPE); (ctxPtr)->h.bCnt = 0; } * h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0;
*/
#define Skein_Start_New_Type(ctxPtr, BLK_TYPE) { \
Skein_Set_T0_T1(ctxPtr, 0, SKEIN_T1_FLAG_FIRST | \
SKEIN_T1_BLK_TYPE_##BLK_TYPE); \
(ctxPtr)->h.bCnt = 0; \
}
#define Skein_Clear_First_Flag(hdr) { (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST; } #define Skein_Clear_First_Flag(hdr) { \
#define Skein_Set_Bit_Pad_Flag(hdr) { (hdr).T[1] |= SKEIN_T1_FLAG_BIT_PAD; } (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST; \
}
#define Skein_Set_Bit_Pad_Flag(hdr) { \
(hdr).T[1] |= SKEIN_T1_FLAG_BIT_PAD; \
}
#define Skein_Set_Tree_Level(hdr, height) { (hdr).T[1] |= SKEIN_T1_TREE_LEVEL(height); } #define Skein_Set_Tree_Level(hdr, height) { \
(hdr).T[1] |= SKEIN_T1_TREE_LEVEL(height); \
}
/***************************************************************** /*****************************************************************
** "Internal" Skein definitions for debugging and error checking ** "Internal" Skein definitions for debugging and error checking
...@@ -263,7 +290,7 @@ int Skein1024_Output(struct skein1024_ctx *ctx, u8 *hashVal); ...@@ -263,7 +290,7 @@ int Skein1024_Output(struct skein1024_ctx *ctx, u8 *hashVal);
#define Skein_Show_Key(bits, ctx, key, keyBytes) #define Skein_Show_Key(bits, ctx, key, keyBytes)
#endif #endif
#define Skein_Assert(x, retCode)/* default: ignore all Asserts, for performance */ #define Skein_Assert(x, retCode)/* ignore all Asserts, for performance */
#define Skein_assert(x) #define Skein_assert(x)
/***************************************************************** /*****************************************************************
...@@ -292,18 +319,26 @@ enum ...@@ -292,18 +319,26 @@ enum
R_512_7_0 = 8, R_512_7_1 = 35, R_512_7_2 = 56, R_512_7_3 = 22, R_512_7_0 = 8, R_512_7_1 = 35, R_512_7_2 = 56, R_512_7_3 = 22,
/* Skein1024 round rotation constants */ /* Skein1024 round rotation constants */
R1024_0_0 = 24, R1024_0_1 = 13, R1024_0_2 = 8, R1024_0_3 = 47, R1024_0_4 = 8, R1024_0_5 = 17, R1024_0_6 = 22, R1024_0_7 = 37, R1024_0_0 = 24, R1024_0_1 = 13, R1024_0_2 = 8, R1024_0_3 = 47,
R1024_1_0 = 38, R1024_1_1 = 19, R1024_1_2 = 10, R1024_1_3 = 55, R1024_1_4 = 49, R1024_1_5 = 18, R1024_1_6 = 23, R1024_1_7 = 52, R1024_0_4 = 8, R1024_0_5 = 17, R1024_0_6 = 22, R1024_0_7 = 37,
R1024_2_0 = 33, R1024_2_1 = 4, R1024_2_2 = 51, R1024_2_3 = 13, R1024_2_4 = 34, R1024_2_5 = 41, R1024_2_6 = 59, R1024_2_7 = 17, R1024_1_0 = 38, R1024_1_1 = 19, R1024_1_2 = 10, R1024_1_3 = 55,
R1024_3_0 = 5, R1024_3_1 = 20, R1024_3_2 = 48, R1024_3_3 = 41, R1024_3_4 = 47, R1024_3_5 = 28, R1024_3_6 = 16, R1024_3_7 = 25, R1024_1_4 = 49, R1024_1_5 = 18, R1024_1_6 = 23, R1024_1_7 = 52,
R1024_4_0 = 41, R1024_4_1 = 9, R1024_4_2 = 37, R1024_4_3 = 31, R1024_4_4 = 12, R1024_4_5 = 47, R1024_4_6 = 44, R1024_4_7 = 30, R1024_2_0 = 33, R1024_2_1 = 4, R1024_2_2 = 51, R1024_2_3 = 13,
R1024_5_0 = 16, R1024_5_1 = 34, R1024_5_2 = 56, R1024_5_3 = 51, R1024_5_4 = 4, R1024_5_5 = 53, R1024_5_6 = 42, R1024_5_7 = 41, R1024_2_4 = 34, R1024_2_5 = 41, R1024_2_6 = 59, R1024_2_7 = 17,
R1024_6_0 = 31, R1024_6_1 = 44, R1024_6_2 = 47, R1024_6_3 = 46, R1024_6_4 = 19, R1024_6_5 = 42, R1024_6_6 = 44, R1024_6_7 = 25, R1024_3_0 = 5, R1024_3_1 = 20, R1024_3_2 = 48, R1024_3_3 = 41,
R1024_7_0 = 9, R1024_7_1 = 48, R1024_7_2 = 35, R1024_7_3 = 52, R1024_7_4 = 23, R1024_7_5 = 31, R1024_7_6 = 37, R1024_7_7 = 20 R1024_3_4 = 47, R1024_3_5 = 28, R1024_3_6 = 16, R1024_3_7 = 25,
R1024_4_0 = 41, R1024_4_1 = 9, R1024_4_2 = 37, R1024_4_3 = 31,
R1024_4_4 = 12, R1024_4_5 = 47, R1024_4_6 = 44, R1024_4_7 = 30,
R1024_5_0 = 16, R1024_5_1 = 34, R1024_5_2 = 56, R1024_5_3 = 51,
R1024_5_4 = 4, R1024_5_5 = 53, R1024_5_6 = 42, R1024_5_7 = 41,
R1024_6_0 = 31, R1024_6_1 = 44, R1024_6_2 = 47, R1024_6_3 = 46,
R1024_6_4 = 19, R1024_6_5 = 42, R1024_6_6 = 44, R1024_6_7 = 25,
R1024_7_0 = 9, R1024_7_1 = 48, R1024_7_2 = 35, R1024_7_3 = 52,
R1024_7_4 = 23, R1024_7_5 = 31, R1024_7_6 = 37, R1024_7_7 = 20
}; };
#ifndef SKEIN_ROUNDS #ifndef SKEIN_ROUNDS
#define SKEIN_256_ROUNDS_TOTAL (72) /* number of rounds for the different block sizes */ #define SKEIN_256_ROUNDS_TOTAL (72) /* # rounds for diff block sizes */
#define SKEIN_512_ROUNDS_TOTAL (72) #define SKEIN_512_ROUNDS_TOTAL (72)
#define SKEIN1024_ROUNDS_TOTAL (80) #define SKEIN1024_ROUNDS_TOTAL (80)
#else /* allow command-line define in range 8*(5..14) */ #else /* allow command-line define in range 8*(5..14) */
......
...@@ -72,7 +72,9 @@ struct threefish_key { ...@@ -72,7 +72,9 @@ struct threefish_key {
* @param tweak * @param tweak
* Pointer to the two tweak words (word has 64 bits). * Pointer to the two tweak words (word has 64 bits).
*/ */
void threefishSetKey(struct threefish_key *keyCtx, enum threefish_size stateSize, u64 *keyData, u64 *tweak); void threefishSetKey(struct threefish_key *keyCtx,
enum threefish_size stateSize,
u64 *keyData, u64 *tweak);
/** /**
* Encrypt Threefisch block (bytes). * Encrypt Threefisch block (bytes).
...@@ -108,7 +110,8 @@ void threefishEncryptBlockBytes(struct threefish_key *keyCtx, u8 *in, u8 *out); ...@@ -108,7 +110,8 @@ void threefishEncryptBlockBytes(struct threefish_key *keyCtx, u8 *in, u8 *out);
* @param out * @param out
* Pointer to cipher buffer. * Pointer to cipher buffer.
*/ */
void threefishEncryptBlockWords(struct threefish_key *keyCtx, u64 *in, u64 *out); void threefishEncryptBlockWords(struct threefish_key *keyCtx, u64 *in,
u64 *out);
/** /**
* Decrypt Threefisch block (bytes). * Decrypt Threefisch block (bytes).
...@@ -144,14 +147,17 @@ void threefishDecryptBlockBytes(struct threefish_key *keyCtx, u8 *in, u8 *out); ...@@ -144,14 +147,17 @@ void threefishDecryptBlockBytes(struct threefish_key *keyCtx, u8 *in, u8 *out);
* @param out * @param out
* Pointer to plaintext buffer. * Pointer to plaintext buffer.
*/ */
void threefishDecryptBlockWords(struct threefish_key *keyCtx, u64 *in, u64 *out); void threefishDecryptBlockWords(struct threefish_key *keyCtx, u64 *in,
u64 *out);
void threefishEncrypt256(struct threefish_key *keyCtx, u64 *input, u64 *output); void threefishEncrypt256(struct threefish_key *keyCtx, u64 *input, u64 *output);
void threefishEncrypt512(struct threefish_key *keyCtx, u64 *input, u64 *output); void threefishEncrypt512(struct threefish_key *keyCtx, u64 *input, u64 *output);
void threefishEncrypt1024(struct threefish_key *keyCtx, u64 *input, u64 *output); void threefishEncrypt1024(struct threefish_key *keyCtx, u64 *input,
u64 *output);
void threefishDecrypt256(struct threefish_key *keyCtx, u64 *input, u64 *output); void threefishDecrypt256(struct threefish_key *keyCtx, u64 *input, u64 *output);
void threefishDecrypt512(struct threefish_key *keyCtx, u64 *input, u64 *output); void threefishDecrypt512(struct threefish_key *keyCtx, u64 *input, u64 *output);
void threefishDecrypt1024(struct threefish_key *keyCtx, u64 *input, u64 *output); void threefishDecrypt1024(struct threefish_key *keyCtx, u64 *input,
u64 *output);
/** /**
* @} * @}
*/ */
......
...@@ -16,9 +16,12 @@ ...@@ -16,9 +16,12 @@
/*****************************************************************/ /*****************************************************************/
/* External function to process blkCnt (nonzero) full block(s) of data. */ /* External function to process blkCnt (nonzero) full block(s) of data. */
void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, size_t blkCnt, size_t byteCntAdd); void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr,
void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t blkCnt, size_t byteCntAdd); size_t blkCnt, size_t byteCntAdd);
void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t blkCnt, size_t byteCntAdd); void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr,
size_t blkCnt, size_t byteCntAdd);
void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr,
size_t blkCnt, size_t byteCntAdd);
/*****************************************************************/ /*****************************************************************/
/* 256-bit Skein */ /* 256-bit Skein */
...@@ -53,20 +56,28 @@ int Skein_256_Init(struct skein_256_ctx *ctx, size_t hashBitLen) ...@@ -53,20 +56,28 @@ int Skein_256_Init(struct skein_256_ctx *ctx, size_t hashBitLen)
break; break;
default: default:
/* here if there is no precomputed IV value available */ /* here if there is no precomputed IV value available */
/* build/process the config block, type == CONFIG (could be precomputed) */ /*
Skein_Start_New_Type(ctx, CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */ * build/process the config block, type == CONFIG (could be
* precomputed)
*/
/* set tweaks: T0=0; T1=CFG | FINAL */
Skein_Start_New_Type(ctx, CFG_FINAL);
cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */ /* set the schema, version */
cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
/* hash result length in bits */
cfg.w[1] = Skein_Swap64(hashBitLen);
cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
memset(&cfg.w[3], 0, sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */ /* zero pad config block */
memset(&cfg.w[3], 0, sizeof(cfg) - 3*sizeof(cfg.w[0]));
/* compute the initial chaining values from config block */ /* compute the initial chaining values from config block */
memset(ctx->X, 0, sizeof(ctx->X)); /* zero the chaining variables */ /* zero the chaining variables */
memset(ctx->X, 0, sizeof(ctx->X));
Skein_256_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN); Skein_256_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
break; break;
} }
/* The chaining vars ctx->X are now initialized for the given hashBitLen. */ /* The chaining vars ctx->X are now initialized for hashBitLen. */
/* Set up to process the data message portion of the hash (default) */ /* Set up to process the data message portion of the hash (default) */
Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */ Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */
...@@ -75,8 +86,10 @@ int Skein_256_Init(struct skein_256_ctx *ctx, size_t hashBitLen) ...@@ -75,8 +86,10 @@ int Skein_256_Init(struct skein_256_ctx *ctx, size_t hashBitLen)
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* init the context for a MAC and/or tree hash operation */ /* init the context for a MAC and/or tree hash operation */
/* [identical to Skein_256_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */ /* [identical to Skein_256_Init() when keyBytes == 0 && \
int Skein_256_InitExt(struct skein_256_ctx *ctx, size_t hashBitLen, u64 treeInfo, const u8 *key, size_t keyBytes) * treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */
int Skein_256_InitExt(struct skein_256_ctx *ctx, size_t hashBitLen,
u64 treeInfo, const u8 *key, size_t keyBytes)
{ {
union union
{ {
...@@ -90,27 +103,41 @@ int Skein_256_InitExt(struct skein_256_ctx *ctx, size_t hashBitLen, u64 treeInfo ...@@ -90,27 +103,41 @@ int Skein_256_InitExt(struct skein_256_ctx *ctx, size_t hashBitLen, u64 treeInfo
/* compute the initial chaining values ctx->X[], based on key */ /* compute the initial chaining values ctx->X[], based on key */
if (keyBytes == 0) /* is there a key? */ if (keyBytes == 0) /* is there a key? */
{ {
memset(ctx->X, 0, sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ /* no key: use all zeroes as key for config block */
memset(ctx->X, 0, sizeof(ctx->X));
} }
else /* here to pre-process a key */ else /* here to pre-process a key */
{ {
Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X)); Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X));
/* do a mini-Init right here */ /* do a mini-Init right here */
ctx->h.hashBitLen = 8*sizeof(ctx->X); /* set output hash bit count = state size */ /* set output hash bit count = state size */
Skein_Start_New_Type(ctx, KEY); /* set tweaks: T0 = 0; T1 = KEY type */ ctx->h.hashBitLen = 8*sizeof(ctx->X);
memset(ctx->X, 0, sizeof(ctx->X)); /* zero the initial chaining variables */ /* set tweaks: T0 = 0; T1 = KEY type */
Skein_256_Update(ctx, key, keyBytes); /* hash the key */ Skein_Start_New_Type(ctx, KEY);
Skein_256_Final_Pad(ctx, cfg.b); /* put result into cfg.b[] */ /* zero the initial chaining variables */
memcpy(ctx->X, cfg.b, sizeof(cfg.b)); /* copy over into ctx->X[] */ memset(ctx->X, 0, sizeof(ctx->X));
/* hash the key */
Skein_256_Update(ctx, key, keyBytes);
/* put result into cfg.b[] */
Skein_256_Final_Pad(ctx, cfg.b);
/* copy over into ctx->X[] */
memcpy(ctx->X, cfg.b, sizeof(cfg.b));
} }
/* build/process the config block, type == CONFIG (could be precomputed for each key) */ /*
ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ * build/process the config block, type == CONFIG (could be
* precomputed for each key)
*/
/* output hash bit count */
ctx->h.hashBitLen = hashBitLen;
Skein_Start_New_Type(ctx, CFG_FINAL); Skein_Start_New_Type(ctx, CFG_FINAL);
memset(&cfg.w, 0, sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ /* pre-pad cfg.w[] with zeroes */
memset(&cfg.w, 0, sizeof(cfg.w));
cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ /* hash result length in bits */
cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ cfg.w[1] = Skein_Swap64(hashBitLen);
/* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
cfg.w[2] = Skein_Swap64(treeInfo);
Skein_Show_Key(256, &ctx->h, key, keyBytes); Skein_Show_Key(256, &ctx->h, key, keyBytes);
...@@ -126,35 +153,46 @@ int Skein_256_InitExt(struct skein_256_ctx *ctx, size_t hashBitLen, u64 treeInfo ...@@ -126,35 +153,46 @@ int Skein_256_InitExt(struct skein_256_ctx *ctx, size_t hashBitLen, u64 treeInfo
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* process the input bytes */ /* process the input bytes */
int Skein_256_Update(struct skein_256_ctx *ctx, const u8 *msg, size_t msgByteCnt) int Skein_256_Update(struct skein_256_ctx *ctx, const u8 *msg,
size_t msgByteCnt)
{ {
size_t n; size_t n;
Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL); /* catch uninitialized context */ /* catch uninitialized context */
Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL);
/* process full blocks, if any */ /* process full blocks, if any */
if (msgByteCnt + ctx->h.bCnt > SKEIN_256_BLOCK_BYTES) if (msgByteCnt + ctx->h.bCnt > SKEIN_256_BLOCK_BYTES)
{ {
if (ctx->h.bCnt) /* finish up any buffered message data */ /* finish up any buffered message data */
if (ctx->h.bCnt)
{ {
n = SKEIN_256_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */ /* # bytes free in buffer b[] */
n = SKEIN_256_BLOCK_BYTES - ctx->h.bCnt;
if (n) if (n)
{ {
Skein_assert(n < msgByteCnt); /* check on our logic here */ /* check on our logic here */
Skein_assert(n < msgByteCnt);
memcpy(&ctx->b[ctx->h.bCnt], msg, n); memcpy(&ctx->b[ctx->h.bCnt], msg, n);
msgByteCnt -= n; msgByteCnt -= n;
msg += n; msg += n;
ctx->h.bCnt += n; ctx->h.bCnt += n;
} }
Skein_assert(ctx->h.bCnt == SKEIN_256_BLOCK_BYTES); Skein_assert(ctx->h.bCnt == SKEIN_256_BLOCK_BYTES);
Skein_256_Process_Block(ctx, ctx->b, 1, SKEIN_256_BLOCK_BYTES); Skein_256_Process_Block(ctx, ctx->b, 1,
SKEIN_256_BLOCK_BYTES);
ctx->h.bCnt = 0; ctx->h.bCnt = 0;
} }
/* now process any remaining full blocks, directly from input message data */ /*
* now process any remaining full blocks, directly from input
* message data
*/
if (msgByteCnt > SKEIN_256_BLOCK_BYTES) if (msgByteCnt > SKEIN_256_BLOCK_BYTES)
{ {
n = (msgByteCnt-1) / SKEIN_256_BLOCK_BYTES; /* number of full blocks to process */ /* number of full blocks to process */
Skein_256_Process_Block(ctx, msg, n, SKEIN_256_BLOCK_BYTES); n = (msgByteCnt-1) / SKEIN_256_BLOCK_BYTES;
Skein_256_Process_Block(ctx, msg, n,
SKEIN_256_BLOCK_BYTES);
msgByteCnt -= n * SKEIN_256_BLOCK_BYTES; msgByteCnt -= n * SKEIN_256_BLOCK_BYTES;
msg += n * SKEIN_256_BLOCK_BYTES; msg += n * SKEIN_256_BLOCK_BYTES;
} }
...@@ -178,31 +216,46 @@ int Skein_256_Final(struct skein_256_ctx *ctx, u8 *hashVal) ...@@ -178,31 +216,46 @@ int Skein_256_Final(struct skein_256_ctx *ctx, u8 *hashVal)
{ {
size_t i, n, byteCnt; size_t i, n, byteCnt;
u64 X[SKEIN_256_STATE_WORDS]; u64 X[SKEIN_256_STATE_WORDS];
Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL); /* catch uninitialized context */ /* catch uninitialized context */
Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL);
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ /* tag as the final block */
if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;
memset(&ctx->b[ctx->h.bCnt], 0, SKEIN_256_BLOCK_BYTES - ctx->h.bCnt); /* zero pad b[] if necessary */
if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES)
memset(&ctx->b[ctx->h.bCnt], 0,
SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
Skein_256_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt); /* process the final block */ /* process the final block */
Skein_256_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
/* now output the result */ /* now output the result */
byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ /* total number of output bytes */
byteCnt = (ctx->h.hashBitLen + 7) >> 3;
/* run Threefish in "counter mode" to generate output */ /* run Threefish in "counter mode" to generate output */
memset(ctx->b, 0, sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ /* zero out b[], so it can hold the counter */
memcpy(X, ctx->X, sizeof(X)); /* keep a local copy of counter mode "key" */ memset(ctx->b, 0, sizeof(ctx->b));
/* keep a local copy of counter mode "key" */
memcpy(X, ctx->X, sizeof(X));
for (i = 0; i*SKEIN_256_BLOCK_BYTES < byteCnt; i++) for (i = 0; i*SKEIN_256_BLOCK_BYTES < byteCnt; i++)
{ {
((u64 *)ctx->b)[0] = Skein_Swap64((u64) i); /* build the counter block */ /* build the counter block */
((u64 *)ctx->b)[0] = Skein_Swap64((u64) i);
Skein_Start_New_Type(ctx, OUT_FINAL); Skein_Start_New_Type(ctx, OUT_FINAL);
Skein_256_Process_Block(ctx, ctx->b, 1, sizeof(u64)); /* run "counter mode" */ /* run "counter mode" */
n = byteCnt - i*SKEIN_256_BLOCK_BYTES; /* number of output bytes left to go */ Skein_256_Process_Block(ctx, ctx->b, 1, sizeof(u64));
/* number of output bytes left to go */
n = byteCnt - i*SKEIN_256_BLOCK_BYTES;
if (n >= SKEIN_256_BLOCK_BYTES) if (n >= SKEIN_256_BLOCK_BYTES)
n = SKEIN_256_BLOCK_BYTES; n = SKEIN_256_BLOCK_BYTES;
Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES, ctx->X, n); /* "output" the ctr mode bytes */ /* "output" the ctr mode bytes */
Skein_Show_Final(256, &ctx->h, n, hashVal+i*SKEIN_256_BLOCK_BYTES); Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES, ctx->X,
memcpy(ctx->X, X, sizeof(X)); /* restore the counter mode key for next time */ n);
Skein_Show_Final(256, &ctx->h, n,
hashVal+i*SKEIN_256_BLOCK_BYTES);
/* restore the counter mode key for next time */
memcpy(ctx->X, X, sizeof(X));
} }
return SKEIN_SUCCESS; return SKEIN_SUCCESS;
} }
...@@ -240,21 +293,32 @@ int Skein_512_Init(struct skein_512_ctx *ctx, size_t hashBitLen) ...@@ -240,21 +293,32 @@ int Skein_512_Init(struct skein_512_ctx *ctx, size_t hashBitLen)
break; break;
default: default:
/* here if there is no precomputed IV value available */ /* here if there is no precomputed IV value available */
/* build/process the config block, type == CONFIG (could be precomputed) */ /*
Skein_Start_New_Type(ctx, CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */ * build/process the config block, type == CONFIG (could be
* precomputed)
*/
/* set tweaks: T0=0; T1=CFG | FINAL */
Skein_Start_New_Type(ctx, CFG_FINAL);
cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */ /* set the schema, version */
cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
/* hash result length in bits */
cfg.w[1] = Skein_Swap64(hashBitLen);
cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
memset(&cfg.w[3], 0, sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */ /* zero pad config block */
memset(&cfg.w[3], 0, sizeof(cfg) - 3*sizeof(cfg.w[0]));
/* compute the initial chaining values from config block */ /* compute the initial chaining values from config block */
memset(ctx->X, 0, sizeof(ctx->X)); /* zero the chaining variables */ /* zero the chaining variables */
memset(ctx->X, 0, sizeof(ctx->X));
Skein_512_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN); Skein_512_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
break; break;
} }
/* The chaining vars ctx->X are now initialized for the given hashBitLen. */ /*
* The chaining vars ctx->X are now initialized for the given
* hashBitLen.
*/
/* Set up to process the data message portion of the hash (default) */ /* Set up to process the data message portion of the hash (default) */
Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */ Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */
...@@ -263,8 +327,10 @@ int Skein_512_Init(struct skein_512_ctx *ctx, size_t hashBitLen) ...@@ -263,8 +327,10 @@ int Skein_512_Init(struct skein_512_ctx *ctx, size_t hashBitLen)
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* init the context for a MAC and/or tree hash operation */ /* init the context for a MAC and/or tree hash operation */
/* [identical to Skein_512_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */ /* [identical to Skein_512_Init() when keyBytes == 0 && \
int Skein_512_InitExt(struct skein_512_ctx *ctx, size_t hashBitLen, u64 treeInfo, const u8 *key, size_t keyBytes) * treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */
int Skein_512_InitExt(struct skein_512_ctx *ctx, size_t hashBitLen,
u64 treeInfo, const u8 *key, size_t keyBytes)
{ {
union union
{ {
...@@ -278,27 +344,40 @@ int Skein_512_InitExt(struct skein_512_ctx *ctx, size_t hashBitLen, u64 treeInfo ...@@ -278,27 +344,40 @@ int Skein_512_InitExt(struct skein_512_ctx *ctx, size_t hashBitLen, u64 treeInfo
/* compute the initial chaining values ctx->X[], based on key */ /* compute the initial chaining values ctx->X[], based on key */
if (keyBytes == 0) /* is there a key? */ if (keyBytes == 0) /* is there a key? */
{ {
memset(ctx->X, 0, sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ /* no key: use all zeroes as key for config block */
memset(ctx->X, 0, sizeof(ctx->X));
} }
else /* here to pre-process a key */ else /* here to pre-process a key */
{ {
Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X)); Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X));
/* do a mini-Init right here */ /* do a mini-Init right here */
ctx->h.hashBitLen = 8*sizeof(ctx->X); /* set output hash bit count = state size */ /* set output hash bit count = state size */
Skein_Start_New_Type(ctx, KEY); /* set tweaks: T0 = 0; T1 = KEY type */ ctx->h.hashBitLen = 8*sizeof(ctx->X);
memset(ctx->X, 0, sizeof(ctx->X)); /* zero the initial chaining variables */ /* set tweaks: T0 = 0; T1 = KEY type */
Skein_512_Update(ctx, key, keyBytes); /* hash the key */ Skein_Start_New_Type(ctx, KEY);
Skein_512_Final_Pad(ctx, cfg.b); /* put result into cfg.b[] */ /* zero the initial chaining variables */
memcpy(ctx->X, cfg.b, sizeof(cfg.b)); /* copy over into ctx->X[] */ memset(ctx->X, 0, sizeof(ctx->X));
/* hash the key */
Skein_512_Update(ctx, key, keyBytes);
/* put result into cfg.b[] */
Skein_512_Final_Pad(ctx, cfg.b);
/* copy over into ctx->X[] */
memcpy(ctx->X, cfg.b, sizeof(cfg.b));
} }
/* build/process the config block, type == CONFIG (could be precomputed for each key) */ /*
* build/process the config block, type == CONFIG (could be
* precomputed for each key)
*/
ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
Skein_Start_New_Type(ctx, CFG_FINAL); Skein_Start_New_Type(ctx, CFG_FINAL);
memset(&cfg.w, 0, sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ /* pre-pad cfg.w[] with zeroes */
memset(&cfg.w, 0, sizeof(cfg.w));
cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ /* hash result length in bits */
cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ cfg.w[1] = Skein_Swap64(hashBitLen);
/* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
cfg.w[2] = Skein_Swap64(treeInfo);
Skein_Show_Key(512, &ctx->h, key, keyBytes); Skein_Show_Key(512, &ctx->h, key, keyBytes);
...@@ -314,35 +393,46 @@ int Skein_512_InitExt(struct skein_512_ctx *ctx, size_t hashBitLen, u64 treeInfo ...@@ -314,35 +393,46 @@ int Skein_512_InitExt(struct skein_512_ctx *ctx, size_t hashBitLen, u64 treeInfo
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* process the input bytes */ /* process the input bytes */
int Skein_512_Update(struct skein_512_ctx *ctx, const u8 *msg, size_t msgByteCnt) int Skein_512_Update(struct skein_512_ctx *ctx, const u8 *msg,
size_t msgByteCnt)
{ {
size_t n; size_t n;
Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL); /* catch uninitialized context */ /* catch uninitialized context */
Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);
/* process full blocks, if any */ /* process full blocks, if any */
if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES) if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES)
{ {
if (ctx->h.bCnt) /* finish up any buffered message data */ /* finish up any buffered message data */
if (ctx->h.bCnt)
{ {
n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */ /* # bytes free in buffer b[] */
n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt;
if (n) if (n)
{ {
Skein_assert(n < msgByteCnt); /* check on our logic here */ /* check on our logic here */
Skein_assert(n < msgByteCnt);
memcpy(&ctx->b[ctx->h.bCnt], msg, n); memcpy(&ctx->b[ctx->h.bCnt], msg, n);
msgByteCnt -= n; msgByteCnt -= n;
msg += n; msg += n;
ctx->h.bCnt += n; ctx->h.bCnt += n;
} }
Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES); Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES);
Skein_512_Process_Block(ctx, ctx->b, 1, SKEIN_512_BLOCK_BYTES); Skein_512_Process_Block(ctx, ctx->b, 1,
SKEIN_512_BLOCK_BYTES);
ctx->h.bCnt = 0; ctx->h.bCnt = 0;
} }
/* now process any remaining full blocks, directly from input message data */ /*
* now process any remaining full blocks, directly from input
* message data
*/
if (msgByteCnt > SKEIN_512_BLOCK_BYTES) if (msgByteCnt > SKEIN_512_BLOCK_BYTES)
{ {
n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES; /* number of full blocks to process */ /* number of full blocks to process */
Skein_512_Process_Block(ctx, msg, n, SKEIN_512_BLOCK_BYTES); n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES;
Skein_512_Process_Block(ctx, msg, n,
SKEIN_512_BLOCK_BYTES);
msgByteCnt -= n * SKEIN_512_BLOCK_BYTES; msgByteCnt -= n * SKEIN_512_BLOCK_BYTES;
msg += n * SKEIN_512_BLOCK_BYTES; msg += n * SKEIN_512_BLOCK_BYTES;
} }
...@@ -366,31 +456,46 @@ int Skein_512_Final(struct skein_512_ctx *ctx, u8 *hashVal) ...@@ -366,31 +456,46 @@ int Skein_512_Final(struct skein_512_ctx *ctx, u8 *hashVal)
{ {
size_t i, n, byteCnt; size_t i, n, byteCnt;
u64 X[SKEIN_512_STATE_WORDS]; u64 X[SKEIN_512_STATE_WORDS];
Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL); /* catch uninitialized context */ /* catch uninitialized context */
Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ /* tag as the final block */
if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;
memset(&ctx->b[ctx->h.bCnt], 0, SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); /* zero pad b[] if necessary */
if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES)
memset(&ctx->b[ctx->h.bCnt], 0,
SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
Skein_512_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt); /* process the final block */ /* process the final block */
Skein_512_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
/* now output the result */ /* now output the result */
byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ /* total number of output bytes */
byteCnt = (ctx->h.hashBitLen + 7) >> 3;
/* run Threefish in "counter mode" to generate output */ /* run Threefish in "counter mode" to generate output */
memset(ctx->b, 0, sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ /* zero out b[], so it can hold the counter */
memcpy(X, ctx->X, sizeof(X)); /* keep a local copy of counter mode "key" */ memset(ctx->b, 0, sizeof(ctx->b));
/* keep a local copy of counter mode "key" */
memcpy(X, ctx->X, sizeof(X));
for (i = 0; i*SKEIN_512_BLOCK_BYTES < byteCnt; i++) for (i = 0; i*SKEIN_512_BLOCK_BYTES < byteCnt; i++)
{ {
((u64 *)ctx->b)[0] = Skein_Swap64((u64) i); /* build the counter block */ /* build the counter block */
((u64 *)ctx->b)[0] = Skein_Swap64((u64) i);
Skein_Start_New_Type(ctx, OUT_FINAL); Skein_Start_New_Type(ctx, OUT_FINAL);
Skein_512_Process_Block(ctx, ctx->b, 1, sizeof(u64)); /* run "counter mode" */ /* run "counter mode" */
n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */ Skein_512_Process_Block(ctx, ctx->b, 1, sizeof(u64));
/* number of output bytes left to go */
n = byteCnt - i*SKEIN_512_BLOCK_BYTES;
if (n >= SKEIN_512_BLOCK_BYTES) if (n >= SKEIN_512_BLOCK_BYTES)
n = SKEIN_512_BLOCK_BYTES; n = SKEIN_512_BLOCK_BYTES;
Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES, ctx->X, n); /* "output" the ctr mode bytes */ /* "output" the ctr mode bytes */
Skein_Show_Final(512, &ctx->h, n, hashVal+i*SKEIN_512_BLOCK_BYTES); Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES, ctx->X,
memcpy(ctx->X, X, sizeof(X)); /* restore the counter mode key for next time */ n);
Skein_Show_Final(512, &ctx->h, n,
hashVal+i*SKEIN_512_BLOCK_BYTES);
/* restore the counter mode key for next time */
memcpy(ctx->X, X, sizeof(X));
} }
return SKEIN_SUCCESS; return SKEIN_SUCCESS;
} }
...@@ -425,21 +530,29 @@ int Skein1024_Init(struct skein1024_ctx *ctx, size_t hashBitLen) ...@@ -425,21 +530,29 @@ int Skein1024_Init(struct skein1024_ctx *ctx, size_t hashBitLen)
break; break;
default: default:
/* here if there is no precomputed IV value available */ /* here if there is no precomputed IV value available */
/* build/process the config block, type == CONFIG (could be precomputed) */ /*
Skein_Start_New_Type(ctx, CFG_FINAL); /* set tweaks: T0=0; T1=CFG | FINAL */ * build/process the config block, type == CONFIG
* (could be precomputed)
*/
/* set tweaks: T0=0; T1=CFG | FINAL */
Skein_Start_New_Type(ctx, CFG_FINAL);
cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* set the schema, version */ /* set the schema, version */
cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
/* hash result length in bits */
cfg.w[1] = Skein_Swap64(hashBitLen);
cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
memset(&cfg.w[3], 0, sizeof(cfg) - 3*sizeof(cfg.w[0])); /* zero pad config block */ /* zero pad config block */
memset(&cfg.w[3], 0, sizeof(cfg) - 3*sizeof(cfg.w[0]));
/* compute the initial chaining values from config block */ /* compute the initial chaining values from config block */
memset(ctx->X, 0, sizeof(ctx->X)); /* zero the chaining variables */ /* zero the chaining variables */
memset(ctx->X, 0, sizeof(ctx->X));
Skein1024_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN); Skein1024_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
break; break;
} }
/* The chaining vars ctx->X are now initialized for the given hashBitLen. */ /* The chaining vars ctx->X are now initialized for the hashBitLen. */
/* Set up to process the data message portion of the hash (default) */ /* Set up to process the data message portion of the hash (default) */
Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */ Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */
...@@ -448,8 +561,10 @@ int Skein1024_Init(struct skein1024_ctx *ctx, size_t hashBitLen) ...@@ -448,8 +561,10 @@ int Skein1024_Init(struct skein1024_ctx *ctx, size_t hashBitLen)
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* init the context for a MAC and/or tree hash operation */ /* init the context for a MAC and/or tree hash operation */
/* [identical to Skein1024_Init() when keyBytes == 0 && treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */ /* [identical to Skein1024_Init() when keyBytes == 0 && \
int Skein1024_InitExt(struct skein1024_ctx *ctx, size_t hashBitLen, u64 treeInfo, const u8 *key, size_t keyBytes) * treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL] */
int Skein1024_InitExt(struct skein1024_ctx *ctx, size_t hashBitLen,
u64 treeInfo, const u8 *key, size_t keyBytes)
{ {
union union
{ {
...@@ -463,27 +578,41 @@ int Skein1024_InitExt(struct skein1024_ctx *ctx, size_t hashBitLen, u64 treeInfo ...@@ -463,27 +578,41 @@ int Skein1024_InitExt(struct skein1024_ctx *ctx, size_t hashBitLen, u64 treeInfo
/* compute the initial chaining values ctx->X[], based on key */ /* compute the initial chaining values ctx->X[], based on key */
if (keyBytes == 0) /* is there a key? */ if (keyBytes == 0) /* is there a key? */
{ {
memset(ctx->X, 0, sizeof(ctx->X)); /* no key: use all zeroes as key for config block */ /* no key: use all zeroes as key for config block */
memset(ctx->X, 0, sizeof(ctx->X));
} }
else /* here to pre-process a key */ else /* here to pre-process a key */
{ {
Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X)); Skein_assert(sizeof(cfg.b) >= sizeof(ctx->X));
/* do a mini-Init right here */ /* do a mini-Init right here */
ctx->h.hashBitLen = 8*sizeof(ctx->X); /* set output hash bit count = state size */ /* set output hash bit count = state size */
Skein_Start_New_Type(ctx, KEY); /* set tweaks: T0 = 0; T1 = KEY type */ ctx->h.hashBitLen = 8*sizeof(ctx->X);
memset(ctx->X, 0, sizeof(ctx->X)); /* zero the initial chaining variables */ /* set tweaks: T0 = 0; T1 = KEY type */
Skein1024_Update(ctx, key, keyBytes); /* hash the key */ Skein_Start_New_Type(ctx, KEY);
Skein1024_Final_Pad(ctx, cfg.b); /* put result into cfg.b[] */ /* zero the initial chaining variables */
memcpy(ctx->X, cfg.b, sizeof(cfg.b)); /* copy over into ctx->X[] */ memset(ctx->X, 0, sizeof(ctx->X));
/* hash the key */
Skein1024_Update(ctx, key, keyBytes);
/* put result into cfg.b[] */
Skein1024_Final_Pad(ctx, cfg.b);
/* copy over into ctx->X[] */
memcpy(ctx->X, cfg.b, sizeof(cfg.b));
} }
/* build/process the config block, type == CONFIG (could be precomputed for each key) */ /*
ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ * build/process the config block, type == CONFIG (could be
* precomputed for each key)
*/
/* output hash bit count */
ctx->h.hashBitLen = hashBitLen;
Skein_Start_New_Type(ctx, CFG_FINAL); Skein_Start_New_Type(ctx, CFG_FINAL);
memset(&cfg.w, 0, sizeof(cfg.w)); /* pre-pad cfg.w[] with zeroes */ /* pre-pad cfg.w[] with zeroes */
memset(&cfg.w, 0, sizeof(cfg.w));
cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ /* hash result length in bits */
cfg.w[2] = Skein_Swap64(treeInfo); /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ cfg.w[1] = Skein_Swap64(hashBitLen);
/* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
cfg.w[2] = Skein_Swap64(treeInfo);
Skein_Show_Key(1024, &ctx->h, key, keyBytes); Skein_Show_Key(1024, &ctx->h, key, keyBytes);
...@@ -499,35 +628,46 @@ int Skein1024_InitExt(struct skein1024_ctx *ctx, size_t hashBitLen, u64 treeInfo ...@@ -499,35 +628,46 @@ int Skein1024_InitExt(struct skein1024_ctx *ctx, size_t hashBitLen, u64 treeInfo
/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* process the input bytes */ /* process the input bytes */
int Skein1024_Update(struct skein1024_ctx *ctx, const u8 *msg, size_t msgByteCnt) int Skein1024_Update(struct skein1024_ctx *ctx, const u8 *msg,
size_t msgByteCnt)
{ {
size_t n; size_t n;
Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL); /* catch uninitialized context */ /* catch uninitialized context */
Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL);
/* process full blocks, if any */ /* process full blocks, if any */
if (msgByteCnt + ctx->h.bCnt > SKEIN1024_BLOCK_BYTES) if (msgByteCnt + ctx->h.bCnt > SKEIN1024_BLOCK_BYTES)
{ {
if (ctx->h.bCnt) /* finish up any buffered message data */ /* finish up any buffered message data */
if (ctx->h.bCnt)
{ {
n = SKEIN1024_BLOCK_BYTES - ctx->h.bCnt; /* # bytes free in buffer b[] */ /* # bytes free in buffer b[] */
n = SKEIN1024_BLOCK_BYTES - ctx->h.bCnt;
if (n) if (n)
{ {
Skein_assert(n < msgByteCnt); /* check on our logic here */ /* check on our logic here */
Skein_assert(n < msgByteCnt);
memcpy(&ctx->b[ctx->h.bCnt], msg, n); memcpy(&ctx->b[ctx->h.bCnt], msg, n);
msgByteCnt -= n; msgByteCnt -= n;
msg += n; msg += n;
ctx->h.bCnt += n; ctx->h.bCnt += n;
} }
Skein_assert(ctx->h.bCnt == SKEIN1024_BLOCK_BYTES); Skein_assert(ctx->h.bCnt == SKEIN1024_BLOCK_BYTES);
Skein1024_Process_Block(ctx, ctx->b, 1, SKEIN1024_BLOCK_BYTES); Skein1024_Process_Block(ctx, ctx->b, 1,
SKEIN1024_BLOCK_BYTES);
ctx->h.bCnt = 0; ctx->h.bCnt = 0;
} }
/* now process any remaining full blocks, directly from input message data */ /*
* now process any remaining full blocks, directly from input
* message data
*/
if (msgByteCnt > SKEIN1024_BLOCK_BYTES) if (msgByteCnt > SKEIN1024_BLOCK_BYTES)
{ {
n = (msgByteCnt-1) / SKEIN1024_BLOCK_BYTES; /* number of full blocks to process */ /* number of full blocks to process */
Skein1024_Process_Block(ctx, msg, n, SKEIN1024_BLOCK_BYTES); n = (msgByteCnt-1) / SKEIN1024_BLOCK_BYTES;
Skein1024_Process_Block(ctx, msg, n,
SKEIN1024_BLOCK_BYTES);
msgByteCnt -= n * SKEIN1024_BLOCK_BYTES; msgByteCnt -= n * SKEIN1024_BLOCK_BYTES;
msg += n * SKEIN1024_BLOCK_BYTES; msg += n * SKEIN1024_BLOCK_BYTES;
} }
...@@ -551,31 +691,46 @@ int Skein1024_Final(struct skein1024_ctx *ctx, u8 *hashVal) ...@@ -551,31 +691,46 @@ int Skein1024_Final(struct skein1024_ctx *ctx, u8 *hashVal)
{ {
size_t i, n, byteCnt; size_t i, n, byteCnt;
u64 X[SKEIN1024_STATE_WORDS]; u64 X[SKEIN1024_STATE_WORDS];
Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL); /* catch uninitialized context */ /* catch uninitialized context */
Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL);
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ /* tag as the final block */
if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;
memset(&ctx->b[ctx->h.bCnt], 0, SKEIN1024_BLOCK_BYTES - ctx->h.bCnt); /* zero pad b[] if necessary */
if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES)
memset(&ctx->b[ctx->h.bCnt], 0,
SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
Skein1024_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt); /* process the final block */ /* process the final block */
Skein1024_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
/* now output the result */ /* now output the result */
byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ /* total number of output bytes */
byteCnt = (ctx->h.hashBitLen + 7) >> 3;
/* run Threefish in "counter mode" to generate output */ /* run Threefish in "counter mode" to generate output */
memset(ctx->b, 0, sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ /* zero out b[], so it can hold the counter */
memcpy(X, ctx->X, sizeof(X)); /* keep a local copy of counter mode "key" */ memset(ctx->b, 0, sizeof(ctx->b));
/* keep a local copy of counter mode "key" */
memcpy(X, ctx->X, sizeof(X));
for (i = 0; i*SKEIN1024_BLOCK_BYTES < byteCnt; i++) for (i = 0; i*SKEIN1024_BLOCK_BYTES < byteCnt; i++)
{ {
((u64 *)ctx->b)[0] = Skein_Swap64((u64) i); /* build the counter block */ /* build the counter block */
((u64 *)ctx->b)[0] = Skein_Swap64((u64) i);
Skein_Start_New_Type(ctx, OUT_FINAL); Skein_Start_New_Type(ctx, OUT_FINAL);
Skein1024_Process_Block(ctx, ctx->b, 1, sizeof(u64)); /* run "counter mode" */ /* run "counter mode" */
n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */ Skein1024_Process_Block(ctx, ctx->b, 1, sizeof(u64));
/* number of output bytes left to go */
n = byteCnt - i*SKEIN1024_BLOCK_BYTES;
if (n >= SKEIN1024_BLOCK_BYTES) if (n >= SKEIN1024_BLOCK_BYTES)
n = SKEIN1024_BLOCK_BYTES; n = SKEIN1024_BLOCK_BYTES;
Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES, ctx->X, n); /* "output" the ctr mode bytes */ /* "output" the ctr mode bytes */
Skein_Show_Final(1024, &ctx->h, n, hashVal+i*SKEIN1024_BLOCK_BYTES); Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES, ctx->X,
memcpy(ctx->X, X, sizeof(X)); /* restore the counter mode key for next time */ n);
Skein_Show_Final(1024, &ctx->h, n,
hashVal+i*SKEIN1024_BLOCK_BYTES);
/* restore the counter mode key for next time */
memcpy(ctx->X, X, sizeof(X));
} }
return SKEIN_SUCCESS; return SKEIN_SUCCESS;
} }
...@@ -587,14 +742,20 @@ int Skein1024_Final(struct skein1024_ctx *ctx, u8 *hashVal) ...@@ -587,14 +742,20 @@ int Skein1024_Final(struct skein1024_ctx *ctx, u8 *hashVal)
/* finalize the hash computation and output the block, no OUTPUT stage */ /* finalize the hash computation and output the block, no OUTPUT stage */
int Skein_256_Final_Pad(struct skein_256_ctx *ctx, u8 *hashVal) int Skein_256_Final_Pad(struct skein_256_ctx *ctx, u8 *hashVal)
{ {
Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL); /* catch uninitialized context */ /* catch uninitialized context */
Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL);
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ /* tag as the final block */
if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) /* zero pad b[] if necessary */ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;
memset(&ctx->b[ctx->h.bCnt], 0, SKEIN_256_BLOCK_BYTES - ctx->h.bCnt); /* zero pad b[] if necessary */
Skein_256_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt); /* process the final block */ if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES)
memset(&ctx->b[ctx->h.bCnt], 0,
SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
/* process the final block */
Skein_256_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN_256_BLOCK_BYTES); /* "output" the state bytes */ /* "output" the state bytes */
Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN_256_BLOCK_BYTES);
return SKEIN_SUCCESS; return SKEIN_SUCCESS;
} }
...@@ -603,14 +764,20 @@ int Skein_256_Final_Pad(struct skein_256_ctx *ctx, u8 *hashVal) ...@@ -603,14 +764,20 @@ int Skein_256_Final_Pad(struct skein_256_ctx *ctx, u8 *hashVal)
/* finalize the hash computation and output the block, no OUTPUT stage */ /* finalize the hash computation and output the block, no OUTPUT stage */
int Skein_512_Final_Pad(struct skein_512_ctx *ctx, u8 *hashVal) int Skein_512_Final_Pad(struct skein_512_ctx *ctx, u8 *hashVal)
{ {
Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL); /* catch uninitialized context */ /* catch uninitialized context */
Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ /* tag as the final block */
if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) /* zero pad b[] if necessary */ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;
memset(&ctx->b[ctx->h.bCnt], 0, SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); /* zero pad b[] if necessary */
Skein_512_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt); /* process the final block */ if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES)
memset(&ctx->b[ctx->h.bCnt], 0,
SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
/* process the final block */
Skein_512_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN_512_BLOCK_BYTES); /* "output" the state bytes */ /* "output" the state bytes */
Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN_512_BLOCK_BYTES);
return SKEIN_SUCCESS; return SKEIN_SUCCESS;
} }
...@@ -619,14 +786,20 @@ int Skein_512_Final_Pad(struct skein_512_ctx *ctx, u8 *hashVal) ...@@ -619,14 +786,20 @@ int Skein_512_Final_Pad(struct skein_512_ctx *ctx, u8 *hashVal)
/* finalize the hash computation and output the block, no OUTPUT stage */ /* finalize the hash computation and output the block, no OUTPUT stage */
int Skein1024_Final_Pad(struct skein1024_ctx *ctx, u8 *hashVal) int Skein1024_Final_Pad(struct skein1024_ctx *ctx, u8 *hashVal)
{ {
Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL); /* catch uninitialized context */ /* catch uninitialized context */
Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL);
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ /* tag as the final block */
if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) /* zero pad b[] if necessary */ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;
memset(&ctx->b[ctx->h.bCnt], 0, SKEIN1024_BLOCK_BYTES - ctx->h.bCnt); /* zero pad b[] if necessary */
Skein1024_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt); /* process the final block */ if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES)
memset(&ctx->b[ctx->h.bCnt], 0,
SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
/* process the final block */
Skein1024_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN1024_BLOCK_BYTES); /* "output" the state bytes */ /* "output" the state bytes */
Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN1024_BLOCK_BYTES);
return SKEIN_SUCCESS; return SKEIN_SUCCESS;
} }
...@@ -638,25 +811,36 @@ int Skein_256_Output(struct skein_256_ctx *ctx, u8 *hashVal) ...@@ -638,25 +811,36 @@ int Skein_256_Output(struct skein_256_ctx *ctx, u8 *hashVal)
{ {
size_t i, n, byteCnt; size_t i, n, byteCnt;
u64 X[SKEIN_256_STATE_WORDS]; u64 X[SKEIN_256_STATE_WORDS];
Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL); /* catch uninitialized context */ /* catch uninitialized context */
Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL);
/* now output the result */ /* now output the result */
byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ /* total number of output bytes */
byteCnt = (ctx->h.hashBitLen + 7) >> 3;
/* run Threefish in "counter mode" to generate output */ /* run Threefish in "counter mode" to generate output */
memset(ctx->b, 0, sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ /* zero out b[], so it can hold the counter */
memcpy(X, ctx->X, sizeof(X)); /* keep a local copy of counter mode "key" */ memset(ctx->b, 0, sizeof(ctx->b));
/* keep a local copy of counter mode "key" */
memcpy(X, ctx->X, sizeof(X));
for (i = 0; i*SKEIN_256_BLOCK_BYTES < byteCnt; i++) for (i = 0; i*SKEIN_256_BLOCK_BYTES < byteCnt; i++)
{ {
((u64 *)ctx->b)[0] = Skein_Swap64((u64) i); /* build the counter block */ /* build the counter block */
((u64 *)ctx->b)[0] = Skein_Swap64((u64) i);
Skein_Start_New_Type(ctx, OUT_FINAL); Skein_Start_New_Type(ctx, OUT_FINAL);
Skein_256_Process_Block(ctx, ctx->b, 1, sizeof(u64)); /* run "counter mode" */ /* run "counter mode" */
n = byteCnt - i*SKEIN_256_BLOCK_BYTES; /* number of output bytes left to go */ Skein_256_Process_Block(ctx, ctx->b, 1, sizeof(u64));
/* number of output bytes left to go */
n = byteCnt - i*SKEIN_256_BLOCK_BYTES;
if (n >= SKEIN_256_BLOCK_BYTES) if (n >= SKEIN_256_BLOCK_BYTES)
n = SKEIN_256_BLOCK_BYTES; n = SKEIN_256_BLOCK_BYTES;
Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES, ctx->X, n); /* "output" the ctr mode bytes */ /* "output" the ctr mode bytes */
Skein_Show_Final(256, &ctx->h, n, hashVal+i*SKEIN_256_BLOCK_BYTES); Skein_Put64_LSB_First(hashVal+i*SKEIN_256_BLOCK_BYTES, ctx->X,
memcpy(ctx->X, X, sizeof(X)); /* restore the counter mode key for next time */ n);
Skein_Show_Final(256, &ctx->h, n,
hashVal+i*SKEIN_256_BLOCK_BYTES);
/* restore the counter mode key for next time */
memcpy(ctx->X, X, sizeof(X));
} }
return SKEIN_SUCCESS; return SKEIN_SUCCESS;
} }
...@@ -667,25 +851,36 @@ int Skein_512_Output(struct skein_512_ctx *ctx, u8 *hashVal) ...@@ -667,25 +851,36 @@ int Skein_512_Output(struct skein_512_ctx *ctx, u8 *hashVal)
{ {
size_t i, n, byteCnt; size_t i, n, byteCnt;
u64 X[SKEIN_512_STATE_WORDS]; u64 X[SKEIN_512_STATE_WORDS];
Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL); /* catch uninitialized context */ /* catch uninitialized context */
Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);
/* now output the result */ /* now output the result */
byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ /* total number of output bytes */
byteCnt = (ctx->h.hashBitLen + 7) >> 3;
/* run Threefish in "counter mode" to generate output */ /* run Threefish in "counter mode" to generate output */
memset(ctx->b, 0, sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ /* zero out b[], so it can hold the counter */
memcpy(X, ctx->X, sizeof(X)); /* keep a local copy of counter mode "key" */ memset(ctx->b, 0, sizeof(ctx->b));
/* keep a local copy of counter mode "key" */
memcpy(X, ctx->X, sizeof(X));
for (i = 0; i*SKEIN_512_BLOCK_BYTES < byteCnt; i++) for (i = 0; i*SKEIN_512_BLOCK_BYTES < byteCnt; i++)
{ {
((u64 *)ctx->b)[0] = Skein_Swap64((u64) i); /* build the counter block */ /* build the counter block */
((u64 *)ctx->b)[0] = Skein_Swap64((u64) i);
Skein_Start_New_Type(ctx, OUT_FINAL); Skein_Start_New_Type(ctx, OUT_FINAL);
Skein_512_Process_Block(ctx, ctx->b, 1, sizeof(u64)); /* run "counter mode" */ /* run "counter mode" */
n = byteCnt - i*SKEIN_512_BLOCK_BYTES; /* number of output bytes left to go */ Skein_512_Process_Block(ctx, ctx->b, 1, sizeof(u64));
/* number of output bytes left to go */
n = byteCnt - i*SKEIN_512_BLOCK_BYTES;
if (n >= SKEIN_512_BLOCK_BYTES) if (n >= SKEIN_512_BLOCK_BYTES)
n = SKEIN_512_BLOCK_BYTES; n = SKEIN_512_BLOCK_BYTES;
Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES, ctx->X, n); /* "output" the ctr mode bytes */ /* "output" the ctr mode bytes */
Skein_Show_Final(256, &ctx->h, n, hashVal+i*SKEIN_512_BLOCK_BYTES); Skein_Put64_LSB_First(hashVal+i*SKEIN_512_BLOCK_BYTES, ctx->X,
memcpy(ctx->X, X, sizeof(X)); /* restore the counter mode key for next time */ n);
Skein_Show_Final(256, &ctx->h, n,
hashVal+i*SKEIN_512_BLOCK_BYTES);
/* restore the counter mode key for next time */
memcpy(ctx->X, X, sizeof(X));
} }
return SKEIN_SUCCESS; return SKEIN_SUCCESS;
} }
...@@ -696,25 +891,36 @@ int Skein1024_Output(struct skein1024_ctx *ctx, u8 *hashVal) ...@@ -696,25 +891,36 @@ int Skein1024_Output(struct skein1024_ctx *ctx, u8 *hashVal)
{ {
size_t i, n, byteCnt; size_t i, n, byteCnt;
u64 X[SKEIN1024_STATE_WORDS]; u64 X[SKEIN1024_STATE_WORDS];
Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL); /* catch uninitialized context */ /* catch uninitialized context */
Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL);
/* now output the result */ /* now output the result */
byteCnt = (ctx->h.hashBitLen + 7) >> 3; /* total number of output bytes */ /* total number of output bytes */
byteCnt = (ctx->h.hashBitLen + 7) >> 3;
/* run Threefish in "counter mode" to generate output */ /* run Threefish in "counter mode" to generate output */
memset(ctx->b, 0, sizeof(ctx->b)); /* zero out b[], so it can hold the counter */ /* zero out b[], so it can hold the counter */
memcpy(X, ctx->X, sizeof(X)); /* keep a local copy of counter mode "key" */ memset(ctx->b, 0, sizeof(ctx->b));
/* keep a local copy of counter mode "key" */
memcpy(X, ctx->X, sizeof(X));
for (i = 0; i*SKEIN1024_BLOCK_BYTES < byteCnt; i++) for (i = 0; i*SKEIN1024_BLOCK_BYTES < byteCnt; i++)
{ {
((u64 *)ctx->b)[0] = Skein_Swap64((u64) i); /* build the counter block */ /* build the counter block */
((u64 *)ctx->b)[0] = Skein_Swap64((u64) i);
Skein_Start_New_Type(ctx, OUT_FINAL); Skein_Start_New_Type(ctx, OUT_FINAL);
Skein1024_Process_Block(ctx, ctx->b, 1, sizeof(u64)); /* run "counter mode" */ /* run "counter mode" */
n = byteCnt - i*SKEIN1024_BLOCK_BYTES; /* number of output bytes left to go */ Skein1024_Process_Block(ctx, ctx->b, 1, sizeof(u64));
/* number of output bytes left to go */
n = byteCnt - i*SKEIN1024_BLOCK_BYTES;
if (n >= SKEIN1024_BLOCK_BYTES) if (n >= SKEIN1024_BLOCK_BYTES)
n = SKEIN1024_BLOCK_BYTES; n = SKEIN1024_BLOCK_BYTES;
Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES, ctx->X, n); /* "output" the ctr mode bytes */ /* "output" the ctr mode bytes */
Skein_Show_Final(256, &ctx->h, n, hashVal+i*SKEIN1024_BLOCK_BYTES); Skein_Put64_LSB_First(hashVal+i*SKEIN1024_BLOCK_BYTES, ctx->X,
memcpy(ctx->X, X, sizeof(X)); /* restore the counter mode key for next time */ n);
Skein_Show_Final(256, &ctx->h, n,
hashVal+i*SKEIN1024_BLOCK_BYTES);
/* restore the counter mode key for next time */
memcpy(ctx->X, X, sizeof(X));
} }
return SKEIN_SUCCESS; return SKEIN_SUCCESS;
} }
......
...@@ -46,9 +46,9 @@ int skeinInit(struct skein_ctx *ctx, size_t hashBitLen) ...@@ -46,9 +46,9 @@ int skeinInit(struct skein_ctx *ctx, size_t hashBitLen)
Skein_Assert(ctx, SKEIN_FAIL); Skein_Assert(ctx, SKEIN_FAIL);
/* /*
* The following two lines rely of the fact that the real Skein contexts are * The following two lines rely of the fact that the real Skein
* a union in out context and thus have tha maximum memory available. * contexts are a union in out context and thus have tha maximum
* The beauty of C :-) . * memory available. The beauty of C :-) .
*/ */
X = ctx->m.s256.X; X = ctx->m.s256.X;
Xlen = ctx->skeinSize/8; Xlen = ctx->skeinSize/8;
...@@ -72,7 +72,10 @@ int skeinInit(struct skein_ctx *ctx, size_t hashBitLen) ...@@ -72,7 +72,10 @@ int skeinInit(struct skein_ctx *ctx, size_t hashBitLen)
} }
if (ret == SKEIN_SUCCESS) { if (ret == SKEIN_SUCCESS) {
/* Save chaining variables for this combination of size and hashBitLen */ /*
* Save chaining variables for this combination of size and
* hashBitLen
*/
memcpy(ctx->XSave, X, Xlen); memcpy(ctx->XSave, X, Xlen);
} }
return ret; return ret;
...@@ -113,7 +116,10 @@ int skeinMacInit(struct skein_ctx *ctx, const u8 *key, size_t keyLen, ...@@ -113,7 +116,10 @@ int skeinMacInit(struct skein_ctx *ctx, const u8 *key, size_t keyLen,
break; break;
} }
if (ret == SKEIN_SUCCESS) { if (ret == SKEIN_SUCCESS) {
/* Save chaining variables for this combination of key, keyLen, hashBitLen */ /*
* Save chaining variables for this combination of key,
* keyLen, hashBitLen
*/
memcpy(ctx->XSave, X, Xlen); memcpy(ctx->XSave, X, Xlen);
} }
return ret; return ret;
...@@ -125,9 +131,9 @@ void skeinReset(struct skein_ctx *ctx) ...@@ -125,9 +131,9 @@ void skeinReset(struct skein_ctx *ctx)
u64 *X = NULL; u64 *X = NULL;
/* /*
* The following two lines rely of the fact that the real Skein contexts are * The following two lines rely of the fact that the real Skein
* a union in out context and thus have tha maximum memory available. * contexts are a union in out context and thus have tha maximum
* The beautiy of C :-) . * memory available. The beautiy of C :-) .
*/ */
X = ctx->m.s256.X; X = ctx->m.s256.X;
Xlen = ctx->skeinSize/8; Xlen = ctx->skeinSize/8;
...@@ -146,13 +152,16 @@ int skeinUpdate(struct skein_ctx *ctx, const u8 *msg, ...@@ -146,13 +152,16 @@ int skeinUpdate(struct skein_ctx *ctx, const u8 *msg,
switch (ctx->skeinSize) { switch (ctx->skeinSize) {
case Skein256: case Skein256:
ret = Skein_256_Update(&ctx->m.s256, (const u8 *)msg, msgByteCnt); ret = Skein_256_Update(&ctx->m.s256, (const u8 *)msg,
msgByteCnt);
break; break;
case Skein512: case Skein512:
ret = Skein_512_Update(&ctx->m.s512, (const u8 *)msg, msgByteCnt); ret = Skein_512_Update(&ctx->m.s512, (const u8 *)msg,
msgByteCnt);
break; break;
case Skein1024: case Skein1024:
ret = Skein1024_Update(&ctx->m.s1024, (const u8 *)msg, msgByteCnt); ret = Skein1024_Update(&ctx->m.s1024, (const u8 *)msg,
msgByteCnt);
break; break;
} }
return ret; return ret;
...@@ -164,15 +173,19 @@ int skeinUpdateBits(struct skein_ctx *ctx, const u8 *msg, ...@@ -164,15 +173,19 @@ int skeinUpdateBits(struct skein_ctx *ctx, const u8 *msg,
{ {
/* /*
* I've used the bit pad implementation from skein_test.c (see NIST CD) * I've used the bit pad implementation from skein_test.c (see NIST CD)
* and modified it to use the convenience functions and added some pointer * and modified it to use the convenience functions and added some
* arithmetic. * pointer arithmetic.
*/ */
size_t length; size_t length;
u8 mask; u8 mask;
u8 *up; u8 *up;
/* only the final Update() call is allowed do partial bytes, else assert an error */ /*
Skein_Assert((ctx->m.h.T[1] & SKEIN_T1_FLAG_BIT_PAD) == 0 || msgBitCnt == 0, SKEIN_FAIL); * only the final Update() call is allowed do partial bytes, else
* assert an error
*/
Skein_Assert((ctx->m.h.T[1] & SKEIN_T1_FLAG_BIT_PAD) == 0 ||
msgBitCnt == 0, SKEIN_FAIL);
/* if number of bits is a multiple of bytes - that's easy */ /* if number of bits is a multiple of bytes - that's easy */
if ((msgBitCnt & 0x7) == 0) { if ((msgBitCnt & 0x7) == 0) {
...@@ -188,13 +201,18 @@ int skeinUpdateBits(struct skein_ctx *ctx, const u8 *msg, ...@@ -188,13 +201,18 @@ int skeinUpdateBits(struct skein_ctx *ctx, const u8 *msg,
*/ */
up = (u8 *)ctx->m.s256.X + ctx->skeinSize / 8; up = (u8 *)ctx->m.s256.X + ctx->skeinSize / 8;
Skein_Set_Bit_Pad_Flag(ctx->m.h); /* set tweak flag for the skeinFinal call */ /* set tweak flag for the skeinFinal call */
Skein_Set_Bit_Pad_Flag(ctx->m.h);
/* now "pad" the final partial byte the way NIST likes */ /* now "pad" the final partial byte the way NIST likes */
length = ctx->m.h.bCnt; /* get the bCnt value (same location for all block sizes) */ /* get the bCnt value (same location for all block sizes) */
Skein_assert(length != 0); /* internal sanity check: there IS a partial byte in the buffer! */ length = ctx->m.h.bCnt;
mask = (u8) (1u << (7 - (msgBitCnt & 7))); /* partial byte bit mask */ /* internal sanity check: there IS a partial byte in the buffer! */
up[length-1] = (u8)((up[length-1] & (0-mask))|mask); /* apply bit padding on final byte (in the buffer) */ Skein_assert(length != 0);
/* partial byte bit mask */
mask = (u8) (1u << (7 - (msgBitCnt & 7)));
/* apply bit padding on final byte (in the buffer) */
up[length-1] = (u8)((up[length-1] & (0-mask))|mask);
return SKEIN_SUCCESS; return SKEIN_SUCCESS;
} }
......
...@@ -36,13 +36,14 @@ void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, ...@@ -36,13 +36,14 @@ void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr,
threefishSetKey(&key, Threefish256, ctx->X, tweak); threefishSetKey(&key, Threefish256, ctx->X, tweak);
Skein_Get64_LSB_First(w, blkPtr, SKEIN_256_STATE_WORDS); /* get input block in little-endian format */ /* get input block in little-endian format */
Skein_Get64_LSB_First(w, blkPtr, SKEIN_256_STATE_WORDS);
threefishEncryptBlockWords(&key, w, ctx->X); threefishEncryptBlockWords(&key, w, ctx->X);
blkPtr += SKEIN_256_BLOCK_BYTES; blkPtr += SKEIN_256_BLOCK_BYTES;
/* do the final "feedforward" xor, update context chaining vars */ /* do the final "feedforward" xor, update ctx chaining vars */
ctx->X[0] = ctx->X[0] ^ w[0]; ctx->X[0] = ctx->X[0] ^ w[0];
ctx->X[1] = ctx->X[1] ^ w[1]; ctx->X[1] = ctx->X[1] ^ w[1];
ctx->X[2] = ctx->X[2] ^ w[2]; ctx->X[2] = ctx->X[2] ^ w[2];
...@@ -86,13 +87,14 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, ...@@ -86,13 +87,14 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr,
threefishSetKey(&key, Threefish512, ctx->X, tweak); threefishSetKey(&key, Threefish512, ctx->X, tweak);
Skein_Get64_LSB_First(w, blkPtr, SKEIN_512_STATE_WORDS); /* get input block in little-endian format */ /* get input block in little-endian format */
Skein_Get64_LSB_First(w, blkPtr, SKEIN_512_STATE_WORDS);
threefishEncryptBlockWords(&key, w, ctx->X); threefishEncryptBlockWords(&key, w, ctx->X);
blkPtr += SKEIN_512_BLOCK_BYTES; blkPtr += SKEIN_512_BLOCK_BYTES;
/* do the final "feedforward" xor, update context chaining vars */ /* do the final "feedforward" xor, update ctx chaining vars */
ctx->X[0] = ctx->X[0] ^ w[0]; ctx->X[0] = ctx->X[0] ^ w[0];
ctx->X[1] = ctx->X[1] ^ w[1]; ctx->X[1] = ctx->X[1] ^ w[1];
ctx->X[2] = ctx->X[2] ^ w[2]; ctx->X[2] = ctx->X[2] ^ w[2];
...@@ -140,13 +142,14 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, ...@@ -140,13 +142,14 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr,
threefishSetKey(&key, Threefish1024, ctx->X, tweak); threefishSetKey(&key, Threefish1024, ctx->X, tweak);
Skein_Get64_LSB_First(w, blkPtr, SKEIN1024_STATE_WORDS); /* get input block in little-endian format */ /* get input block in little-endian format */
Skein_Get64_LSB_First(w, blkPtr, SKEIN1024_STATE_WORDS);
threefishEncryptBlockWords(&key, w, ctx->X); threefishEncryptBlockWords(&key, w, ctx->X);
blkPtr += SKEIN1024_BLOCK_BYTES; blkPtr += SKEIN1024_BLOCK_BYTES;
/* do the final "feedforward" xor, update context chaining vars */ /* do the final "feedforward" xor, update ctx chaining vars */
ctx->X[0] = ctx->X[0] ^ w[0]; ctx->X[0] = ctx->X[0] ^ w[0];
ctx->X[1] = ctx->X[1] ^ w[1]; ctx->X[1] = ctx->X[1] ^ w[1];
ctx->X[2] = ctx->X[2] ^ w[2]; ctx->X[2] = ctx->X[2] ^ w[2];
......
...@@ -39,7 +39,8 @@ ...@@ -39,7 +39,8 @@
/***************************** Skein_256 ******************************/ /***************************** Skein_256 ******************************/
#if !(SKEIN_USE_ASM & 256) #if !(SKEIN_USE_ASM & 256)
void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, size_t blkCnt, size_t byteCntAdd) void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr,
size_t blkCnt, size_t byteCntAdd)
{ /* do it in C */ { /* do it in C */
enum { enum {
WCNT = SKEIN_256_STATE_WORDS WCNT = SKEIN_256_STATE_WORDS
...@@ -58,21 +59,24 @@ void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, size_t ...@@ -58,21 +59,24 @@ void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, size_t
#error "Invalid SKEIN_UNROLL_256" /* sanity check on unroll count */ #error "Invalid SKEIN_UNROLL_256" /* sanity check on unroll count */
#endif #endif
size_t r; size_t r;
u64 kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/ u64 kw[WCNT+4+RCNT*2]; /* key schedule: chaining vars + tweak + "rot"*/
#else #else
u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
#endif #endif
u64 X0, X1, X2, X3; /* local copy of context vars, for speed */ u64 X0, X1, X2, X3; /* local copy of context vars, for speed */
u64 w[WCNT]; /* local copy of input block */ u64 w[WCNT]; /* local copy of input block */
#ifdef SKEIN_DEBUG #ifdef SKEIN_DEBUG
const u64 *Xptr[4]; /* use for debugging (help compiler put Xn in registers) */ const u64 *Xptr[4]; /* use for debugging (help cc put Xn in regs) */
Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3; Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3;
#endif #endif
Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */ Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
ts[0] = ctx->h.T[0]; ts[0] = ctx->h.T[0];
ts[1] = ctx->h.T[1]; ts[1] = ctx->h.T[1];
do { do {
/* this implementation only supports 2**64 input bytes (no carry out here) */ /*
* this implementation only supports 2**64 input bytes
* (no carry out here)
*/
ts[0] += byteCntAdd; /* update processed length */ ts[0] += byteCntAdd; /* update processed length */
/* precompute the key schedule for this block */ /* precompute the key schedule for this block */
...@@ -84,7 +88,8 @@ void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, size_t ...@@ -84,7 +88,8 @@ void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, size_t
ts[2] = ts[0] ^ ts[1]; ts[2] = ts[0] ^ ts[1];
Skein_Get64_LSB_First(w, blkPtr, WCNT); /* get input block in little-endian format */ /* get input block in little-endian format */
Skein_Get64_LSB_First(w, blkPtr, WCNT);
DebugSaveTweak(ctx); DebugSaveTweak(ctx);
Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts); Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
...@@ -93,7 +98,9 @@ void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, size_t ...@@ -93,7 +98,9 @@ void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, size_t
X2 = w[2] + ks[2] + ts[1]; X2 = w[2] + ks[2] + ts[1];
X3 = w[3] + ks[3]; X3 = w[3] + ks[3];
Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, Xptr); /* show starting state values */ /* show starting state values */
Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
Xptr);
blkPtr += SKEIN_256_BLOCK_BYTES; blkPtr += SKEIN_256_BLOCK_BYTES;
...@@ -109,7 +116,8 @@ void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, size_t ...@@ -109,7 +116,8 @@ void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, size_t
Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr); Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr);
#define I256(R) \ #define I256(R) \
X0 += ks[((R)+1) % 5]; /* inject the key schedule value */ \ /* inject the key schedule value */ \
X0 += ks[((R)+1) % 5]; \
X1 += ks[((R)+2) % 5] + ts[((R)+1) % 3]; \ X1 += ks[((R)+2) % 5] + ts[((R)+1) % 3]; \
X2 += ks[((R)+3) % 5] + ts[((R)+2) % 3]; \ X2 += ks[((R)+3) % 5] + ts[((R)+2) % 3]; \
X3 += ks[((R)+4) % 5] + (R)+1; \ X3 += ks[((R)+4) % 5] + (R)+1; \
...@@ -120,15 +128,17 @@ void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, size_t ...@@ -120,15 +128,17 @@ void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, size_t
Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr); Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr);
#define I256(R) \ #define I256(R) \
X0 += ks[r+(R)+0]; /* inject the key schedule value */ \ /* inject the key schedule value */ \
X0 += ks[r+(R)+0]; \
X1 += ks[r+(R)+1] + ts[r+(R)+0]; \ X1 += ks[r+(R)+1] + ts[r+(R)+0]; \
X2 += ks[r+(R)+2] + ts[r+(R)+1]; \ X2 += ks[r+(R)+2] + ts[r+(R)+1]; \
X3 += ks[r+(R)+3] + r+(R); \ X3 += ks[r+(R)+3] + r+(R); \
ks[r + (R) + 4] = ks[r + (R) - 1]; /* rotate key schedule */\ /* rotate key schedule */ \
ks[r + (R) + 4] = ks[r + (R) - 1]; \
ts[r + (R) + 2] = ts[r + (R) - 1]; \ ts[r + (R) + 2] = ts[r + (R) - 1]; \
Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_256) /* loop thru it */ for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_256)
#endif #endif
{ {
#define R256_8_rounds(R) \ #define R256_8_rounds(R) \
...@@ -145,7 +155,10 @@ void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, size_t ...@@ -145,7 +155,10 @@ void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, size_t
R256_8_rounds(0); R256_8_rounds(0);
#define R256_Unroll_R(NN) ((SKEIN_UNROLL_256 == 0 && SKEIN_256_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_256 > (NN))) #define R256_Unroll_R(NN) \
((SKEIN_UNROLL_256 == 0 && \
SKEIN_256_ROUNDS_TOTAL/8 > (NN)) || \
(SKEIN_UNROLL_256 > (NN)))
#if R256_Unroll_R(1) #if R256_Unroll_R(1)
R256_8_rounds(1); R256_8_rounds(1);
...@@ -193,7 +206,7 @@ void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, size_t ...@@ -193,7 +206,7 @@ void Skein_256_Process_Block(struct skein_256_ctx *ctx, const u8 *blkPtr, size_t
#error "need more unrolling in Skein_256_Process_Block" #error "need more unrolling in Skein_256_Process_Block"
#endif #endif
} }
/* do the final "feedforward" xor, update context chaining vars */ /* do the final "feedforward" xor, update context chaining */
ctx->X[0] = X0 ^ w[0]; ctx->X[0] = X0 ^ w[0];
ctx->X[1] = X1 ^ w[1]; ctx->X[1] = X1 ^ w[1];
ctx->X[2] = X2 ^ w[2]; ctx->X[2] = X2 ^ w[2];
...@@ -223,7 +236,8 @@ unsigned int Skein_256_Unroll_Cnt(void) ...@@ -223,7 +236,8 @@ unsigned int Skein_256_Unroll_Cnt(void)
/***************************** Skein_512 ******************************/ /***************************** Skein_512 ******************************/
#if !(SKEIN_USE_ASM & 512) #if !(SKEIN_USE_ASM & 512)
void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t blkCnt, size_t byteCntAdd) void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr,
size_t blkCnt, size_t byteCntAdd)
{ /* do it in C */ { /* do it in C */
enum { enum {
WCNT = SKEIN_512_STATE_WORDS WCNT = SKEIN_512_STATE_WORDS
...@@ -242,14 +256,14 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t ...@@ -242,14 +256,14 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t
#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */ #error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */
#endif #endif
size_t r; size_t r;
u64 kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/ u64 kw[WCNT+4+RCNT*2]; /* key sched: chaining vars + tweak + "rot"*/
#else #else
u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
#endif #endif
u64 X0, X1, X2, X3, X4, X5, X6, X7; /* local copy of vars, for speed */ u64 X0, X1, X2, X3, X4, X5, X6, X7; /* local copies, for speed */
u64 w[WCNT]; /* local copy of input block */ u64 w[WCNT]; /* local copy of input block */
#ifdef SKEIN_DEBUG #ifdef SKEIN_DEBUG
const u64 *Xptr[8]; /* use for debugging (help compiler put Xn in registers) */ const u64 *Xptr[8]; /* use for debugging (help cc put Xn in regs) */
Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3; Xptr[0] = &X0; Xptr[1] = &X1; Xptr[2] = &X2; Xptr[3] = &X3;
Xptr[4] = &X4; Xptr[5] = &X5; Xptr[6] = &X6; Xptr[7] = &X7; Xptr[4] = &X4; Xptr[5] = &X5; Xptr[6] = &X6; Xptr[7] = &X7;
#endif #endif
...@@ -258,7 +272,10 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t ...@@ -258,7 +272,10 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t
ts[0] = ctx->h.T[0]; ts[0] = ctx->h.T[0];
ts[1] = ctx->h.T[1]; ts[1] = ctx->h.T[1];
do { do {
/* this implementation only supports 2**64 input bytes (no carry out here) */ /*
* this implementation only supports 2**64 input bytes
* (no carry out here)
*/
ts[0] += byteCntAdd; /* update processed length */ ts[0] += byteCntAdd; /* update processed length */
/* precompute the key schedule for this block */ /* precompute the key schedule for this block */
...@@ -275,7 +292,8 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t ...@@ -275,7 +292,8 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t
ts[2] = ts[0] ^ ts[1]; ts[2] = ts[0] ^ ts[1];
Skein_Get64_LSB_First(w, blkPtr, WCNT); /* get input block in little-endian format */ /* get input block in little-endian format */
Skein_Get64_LSB_First(w, blkPtr, WCNT);
DebugSaveTweak(ctx); DebugSaveTweak(ctx);
Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts); Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
...@@ -290,7 +308,8 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t ...@@ -290,7 +308,8 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t
blkPtr += SKEIN_512_BLOCK_BYTES; blkPtr += SKEIN_512_BLOCK_BYTES;
Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, Xptr); Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
Xptr);
/* run the rounds */ /* run the rounds */
#define Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \ #define Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \
X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0; \ X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0; \
...@@ -304,7 +323,8 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t ...@@ -304,7 +323,8 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t
Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr); Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr);
#define I512(R) \ #define I512(R) \
X0 += ks[((R) + 1) % 9]; /* inject the key schedule value */ \ /* inject the key schedule value */ \
X0 += ks[((R) + 1) % 9]; \
X1 += ks[((R) + 2) % 9]; \ X1 += ks[((R) + 2) % 9]; \
X2 += ks[((R) + 3) % 9]; \ X2 += ks[((R) + 3) % 9]; \
X3 += ks[((R) + 4) % 9]; \ X3 += ks[((R) + 4) % 9]; \
...@@ -319,7 +339,8 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t ...@@ -319,7 +339,8 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t
Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr); Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr);
#define I512(R) \ #define I512(R) \
X0 += ks[r + (R) + 0]; /* inject the key schedule value */ \ /* inject the key schedule value */ \
X0 += ks[r + (R) + 0]; \
X1 += ks[r + (R) + 1]; \ X1 += ks[r + (R) + 1]; \
X2 += ks[r + (R) + 2]; \ X2 += ks[r + (R) + 2]; \
X3 += ks[r + (R) + 3]; \ X3 += ks[r + (R) + 3]; \
...@@ -327,11 +348,12 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t ...@@ -327,11 +348,12 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t
X5 += ks[r + (R) + 5] + ts[r + (R) + 0]; \ X5 += ks[r + (R) + 5] + ts[r + (R) + 0]; \
X6 += ks[r + (R) + 6] + ts[r + (R) + 1]; \ X6 += ks[r + (R) + 6] + ts[r + (R) + 1]; \
X7 += ks[r + (R) + 7] + r + (R); \ X7 += ks[r + (R) + 7] + r + (R); \
ks[r + (R) + 8] = ks[r + (R) - 1]; /* rotate key schedule */ \ /* rotate key schedule */ \
ks[r + (R) + 8] = ks[r + (R) - 1]; \
ts[r + (R) + 2] = ts[r + (R) - 1]; \ ts[r + (R) + 2] = ts[r + (R) - 1]; \
Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_512) /* loop thru it */ for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_512)
#endif /* end of looped code definitions */ #endif /* end of looped code definitions */
{ {
#define R512_8_rounds(R) /* do 8 full rounds */ \ #define R512_8_rounds(R) /* do 8 full rounds */ \
...@@ -348,7 +370,10 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t ...@@ -348,7 +370,10 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t
R512_8_rounds(0); R512_8_rounds(0);
#define R512_Unroll_R(NN) ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_512 > (NN))) #define R512_Unroll_R(NN) \
((SKEIN_UNROLL_512 == 0 && \
SKEIN_512_ROUNDS_TOTAL/8 > (NN)) || \
(SKEIN_UNROLL_512 > (NN)))
#if R512_Unroll_R(1) #if R512_Unroll_R(1)
R512_8_rounds(1); R512_8_rounds(1);
...@@ -397,7 +422,7 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t ...@@ -397,7 +422,7 @@ void Skein_512_Process_Block(struct skein_512_ctx *ctx, const u8 *blkPtr, size_t
#endif #endif
} }
/* do the final "feedforward" xor, update context chaining vars */ /* do the final "feedforward" xor, update context chaining */
ctx->X[0] = X0 ^ w[0]; ctx->X[0] = X0 ^ w[0];
ctx->X[1] = X1 ^ w[1]; ctx->X[1] = X1 ^ w[1];
ctx->X[2] = X2 ^ w[2]; ctx->X[2] = X2 ^ w[2];
...@@ -430,7 +455,8 @@ unsigned int Skein_512_Unroll_Cnt(void) ...@@ -430,7 +455,8 @@ unsigned int Skein_512_Unroll_Cnt(void)
/***************************** Skein1024 ******************************/ /***************************** Skein1024 ******************************/
#if !(SKEIN_USE_ASM & 1024) #if !(SKEIN_USE_ASM & 1024)
void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t blkCnt, size_t byteCntAdd) void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, \
size_t blkCnt, size_t byteCntAdd)
{ /* do it in C, always looping (unrolled is bigger AND slower!) */ { /* do it in C, always looping (unrolled is bigger AND slower!) */
enum { enum {
WCNT = SKEIN1024_STATE_WORDS WCNT = SKEIN1024_STATE_WORDS
...@@ -449,16 +475,17 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t ...@@ -449,16 +475,17 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t
#error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */ #error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */
#endif #endif
size_t r; size_t r;
u64 kw[WCNT+4+RCNT*2]; /* key schedule words : chaining vars + tweak + "rotation"*/ u64 kw[WCNT+4+RCNT*2]; /* key sched: chaining vars + tweak + "rot" */
#else #else
u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */ u64 kw[WCNT+4]; /* key schedule words : chaining vars + tweak */
#endif #endif
u64 X00, X01, X02, X03, X04, X05, X06, X07, /* local copy of vars, for speed */ /* local copy of vars, for speed */
u64 X00, X01, X02, X03, X04, X05, X06, X07,
X08, X09, X10, X11, X12, X13, X14, X15; X08, X09, X10, X11, X12, X13, X14, X15;
u64 w[WCNT]; /* local copy of input block */ u64 w[WCNT]; /* local copy of input block */
#ifdef SKEIN_DEBUG #ifdef SKEIN_DEBUG
const u64 *Xptr[16]; /* use for debugging (help compiler put Xn in registers) */ const u64 *Xptr[16]; /* use for debugging (help cc put Xn in regs) */
Xptr[0] = &X00; Xptr[1] = &X01; Xptr[2] = &X02; Xptr[3] = &X03; Xptr[0] = &X00; Xptr[1] = &X01; Xptr[2] = &X02; Xptr[3] = &X03;
Xptr[4] = &X04; Xptr[5] = &X05; Xptr[6] = &X06; Xptr[7] = &X07; Xptr[4] = &X04; Xptr[5] = &X05; Xptr[6] = &X06; Xptr[7] = &X07;
Xptr[8] = &X08; Xptr[9] = &X09; Xptr[10] = &X10; Xptr[11] = &X11; Xptr[8] = &X08; Xptr[9] = &X09; Xptr[10] = &X10; Xptr[11] = &X11;
...@@ -469,7 +496,10 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t ...@@ -469,7 +496,10 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t
ts[0] = ctx->h.T[0]; ts[0] = ctx->h.T[0];
ts[1] = ctx->h.T[1]; ts[1] = ctx->h.T[1];
do { do {
/* this implementation only supports 2**64 input bytes (no carry out here) */ /*
* this implementation only supports 2**64 input bytes
* (no carry out here)
*/
ts[0] += byteCntAdd; /* update processed length */ ts[0] += byteCntAdd; /* update processed length */
/* precompute the key schedule for this block */ /* precompute the key schedule for this block */
...@@ -496,7 +526,8 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t ...@@ -496,7 +526,8 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t
ts[2] = ts[0] ^ ts[1]; ts[2] = ts[0] ^ ts[1];
Skein_Get64_LSB_First(w, blkPtr, WCNT); /* get input block in little-endian format */ /* get input block in little-endian format */
Skein_Get64_LSB_First(w, blkPtr, WCNT);
DebugSaveTweak(ctx); DebugSaveTweak(ctx);
Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts); Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
...@@ -517,9 +548,11 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t ...@@ -517,9 +548,11 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t
X14 = w[14] + ks[14] + ts[1]; X14 = w[14] + ks[14] + ts[1];
X15 = w[15] + ks[15]; X15 = w[15] + ks[15];
Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL, Xptr); Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
Xptr);
#define Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, ROT, rNum) \ #define Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \
pF, ROT, rNum) \
X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0; \ X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0; \
X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2; \ X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2; \
X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4; \ X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4; \
...@@ -530,12 +563,15 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t ...@@ -530,12 +563,15 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t
X##pE += X##pF; X##pF = RotL_64(X##pF, ROT##_7); X##pF ^= X##pE; \ X##pE += X##pF; X##pF = RotL_64(X##pF, ROT##_7); X##pF ^= X##pE; \
#if SKEIN_UNROLL_1024 == 0 #if SKEIN_UNROLL_1024 == 0
#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, ROT, rn) \ #define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, \
Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, ROT, rn) \ ROT, rn) \
Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \
pF, ROT, rn) \
Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rn, Xptr); Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rn, Xptr);
#define I1024(R) \ #define I1024(R) \
X00 += ks[((R) + 1) % 17]; /* inject the key schedule value */ \ /* inject the key schedule value */ \
X00 += ks[((R) + 1) % 17]; \
X01 += ks[((R) + 2) % 17]; \ X01 += ks[((R) + 2) % 17]; \
X02 += ks[((R) + 3) % 17]; \ X02 += ks[((R) + 3) % 17]; \
X03 += ks[((R) + 4) % 17]; \ X03 += ks[((R) + 4) % 17]; \
...@@ -553,12 +589,15 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t ...@@ -553,12 +589,15 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t
X15 += ks[((R) + 16) % 17] + (R) + 1; \ X15 += ks[((R) + 16) % 17] + (R) + 1; \
Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
#else /* looping version */ #else /* looping version */
#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, ROT, rn) \ #define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, \
Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, pF, ROT, rn) \ ROT, rn) \
Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, pE, \
pF, ROT, rn) \
Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rn, Xptr); Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rn, Xptr);
#define I1024(R) \ #define I1024(R) \
X00 += ks[r + (R) + 0]; /* inject the key schedule value */ \ /* inject the key schedule value */ \
X00 += ks[r + (R) + 0]; \
X01 += ks[r + (R) + 1]; \ X01 += ks[r + (R) + 1]; \
X02 += ks[r + (R) + 2]; \ X02 += ks[r + (R) + 2]; \
X03 += ks[r + (R) + 3]; \ X03 += ks[r + (R) + 3]; \
...@@ -574,28 +613,40 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t ...@@ -574,28 +613,40 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t
X13 += ks[r + (R) + 13] + ts[r + (R) + 0]; \ X13 += ks[r + (R) + 13] + ts[r + (R) + 0]; \
X14 += ks[r + (R) + 14] + ts[r + (R) + 1]; \ X14 += ks[r + (R) + 14] + ts[r + (R) + 1]; \
X15 += ks[r + (R) + 15] + r + (R); \ X15 += ks[r + (R) + 15] + r + (R); \
ks[r + (R) + 16] = ks[r + (R) - 1]; /* rotate key schedule */\ /* rotate key schedule */ \
ks[r + (R) + 16] = ks[r + (R) - 1]; \
ts[r + (R) + 2] = ts[r + (R) - 1]; \ ts[r + (R) + 2] = ts[r + (R) - 1]; \
Skein_Show_R_Ptr(BLK_BITSi, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr); Skein_Show_R_Ptr(BLK_BITSi, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
for (r = 1; r <= 2 * RCNT; r += 2 * SKEIN_UNROLL_1024) /* loop thru it */ for (r = 1; r <= 2 * RCNT; r += 2 * SKEIN_UNROLL_1024)
#endif #endif
{ {
#define R1024_8_rounds(R) /* do 8 full rounds */ \ #define R1024_8_rounds(R) \
R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, R1024_0, 8*(R) + 1); \ R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, \
R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, 08, 01, R1024_1, 8*(R) + 2); \ R1024_0, 8*(R) + 1); \
R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, 10, 09, R1024_2, 8*(R) + 3); \ R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, 08, 01, \
R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, 12, 07, R1024_3, 8*(R) + 4); \ R1024_1, 8*(R) + 2); \
R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, 10, 09, \
R1024_2, 8*(R) + 3); \
R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, 12, 07, \
R1024_3, 8*(R) + 4); \
I1024(2*(R)); \ I1024(2*(R)); \
R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, R1024_4, 8*(R) + 5); \ R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, 15, \
R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, 08, 01, R1024_5, 8*(R) + 6); \ R1024_4, 8*(R) + 5); \
R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, 10, 09, R1024_6, 8*(R) + 7); \ R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, 08, 01, \
R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, 12, 07, R1024_7, 8*(R) + 8); \ R1024_5, 8*(R) + 6); \
R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, 10, 09, \
R1024_6, 8*(R) + 7); \
R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, 12, 07, \
R1024_7, 8*(R) + 8); \
I1024(2*(R)+1); I1024(2*(R)+1);
R1024_8_rounds(0); R1024_8_rounds(0);
#define R1024_Unroll_R(NN) ((SKEIN_UNROLL_1024 == 0 && SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || (SKEIN_UNROLL_1024 > (NN))) #define R1024_Unroll_R(NN) \
((SKEIN_UNROLL_1024 == 0 && \
SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || \
(SKEIN_UNROLL_1024 > (NN)))
#if R1024_Unroll_R(1) #if R1024_Unroll_R(1)
R1024_8_rounds(1); R1024_8_rounds(1);
...@@ -643,7 +694,7 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t ...@@ -643,7 +694,7 @@ void Skein1024_Process_Block(struct skein1024_ctx *ctx, const u8 *blkPtr, size_t
#error "need more unrolling in Skein_1024_Process_Block" #error "need more unrolling in Skein_1024_Process_Block"
#endif #endif
} }
/* do the final "feedforward" xor, update context chaining vars */ /* do the final "feedforward" xor, update context chaining */
ctx->X[0] = X00 ^ w[0]; ctx->X[0] = X00 ^ w[0];
ctx->X[1] = X01 ^ w[1]; ctx->X[1] = X01 ^ w[1];
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -12,158 +12,481 @@ void threefishEncrypt256(struct threefish_key *keyCtx, u64 *input, u64 *output) ...@@ -12,158 +12,481 @@ void threefishEncrypt256(struct threefish_key *keyCtx, u64 *input, u64 *output)
u64 t0 = keyCtx->tweak[0], t1 = keyCtx->tweak[1], u64 t0 = keyCtx->tweak[0], t1 = keyCtx->tweak[1],
t2 = keyCtx->tweak[2]; t2 = keyCtx->tweak[2];
b1 += k1 + t0; b0 += b1 + k0; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0; b1 += k1 + t0;
b3 += k3; b2 += b3 + k2 + t1; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2; b0 += b1 + k0;
b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0; b3 += k3;
b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2; b2 += b3 + k2 + t1;
b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
b1 += k2 + t1; b0 += b1 + k1; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0; b0 += b3;
b3 += k4 + 1; b2 += b3 + k3 + t2; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2; b2 += b1;
b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0; b0 += b1;
b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
b1 += k3 + t2; b0 += b1 + k2; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0; b2 += b3;
b3 += k0 + 2; b2 += b3 + k4 + t0; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2; b0 += b3;
b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0; b2 += b1;
b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
b1 += k4 + t0; b0 += b1 + k3; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
b3 += k1 + 3; b2 += b3 + k0 + t1; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2; b1 += k2 + t1;
b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0; b0 += b1 + k1;
b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2; b3 += k4 + 1;
b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0; b2 += b3 + k3 + t2;
b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
b1 += k0 + t1; b0 += b1 + k4; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0; b0 += b3;
b3 += k2 + 4; b2 += b3 + k1 + t2; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2; b2 += b1;
b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0; b0 += b1;
b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
b1 += k1 + t2; b0 += b1 + k0; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
b3 += k3 + 5; b2 += b3 + k2 + t0; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2; b2 += b3;
b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0; b0 += b3;
b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2; b2 += b1;
b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
b1 += k2 + t0; b0 += b1 + k1; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
b3 += k4 + 6; b2 += b3 + k3 + t1; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0; b1 += k3 + t2;
b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2; b0 += b1 + k2;
b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0; b3 += k0 + 2;
b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2; b2 += b3 + k4 + t0;
b1 += k3 + t1; b0 += b1 + k2; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
b3 += k0 + 7; b2 += b3 + k4 + t2; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0; b0 += b3;
b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2; b2 += b1;
b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
b0 += b1;
b1 += k4 + t2; b0 += b1 + k3; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
b3 += k1 + 8; b2 += b3 + k0 + t0; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0; b2 += b3;
b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2; b0 += b3;
b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
b1 += k0 + t0; b0 += b1 + k4; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0; b2 += b1;
b3 += k2 + 9; b2 += b3 + k1 + t1; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2; b1 += k4 + t0;
b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0; b0 += b1 + k3;
b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2; b3 += k1 + 3;
b2 += b3 + k0 + t1;
b1 += k1 + t1; b0 += b1 + k0; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
b3 += k3 + 10; b2 += b3 + k2 + t2; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0; b0 += b3;
b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2; b2 += b1;
b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
b1 += k2 + t2; b0 += b1 + k1; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0; b0 += b1;
b3 += k4 + 11; b2 += b3 + k3 + t0; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2; b2 += b3;
b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0; b0 += b3;
b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
b1 += k3 + t0; b0 += b1 + k2; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0; b2 += b1;
b3 += k0 + 12; b2 += b3 + k4 + t1; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0; b1 += k0 + t1;
b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2; b0 += b1 + k4;
b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
b1 += k4 + t1; b0 += b1 + k3; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0; b3 += k2 + 4;
b3 += k1 + 13; b2 += b3 + k0 + t2; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2; b2 += b3 + k1 + t2;
b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0; b0 += b3;
b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2; b2 += b1;
b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
b1 += k0 + t2; b0 += b1 + k4; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
b3 += k2 + 14; b2 += b3 + k1 + t0; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2; b0 += b1;
b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0; b2 += b3;
b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2; b0 += b3;
b1 += k1 + t0; b0 += b1 + k0; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
b3 += k3 + 15; b2 += b3 + k2 + t1; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0; b2 += b1;
b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2; b1 += k1 + t2;
b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0; b0 += b1 + k0;
b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
b1 += k2 + t1; b0 += b1 + k1; b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0; b3 += k3 + 5;
b3 += k4 + 16; b2 += b3 + k3 + t2; b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2; b2 += b3 + k2 + t0;
b0 += b3; b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
b2 += b1; b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
b0 += b1; b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0; b0 += b3;
b2 += b3; b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
b0 += b3; b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
b2 += b1; b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2; b2 += b1;
b1 += k3 + t2; b0 += b1 + k2; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
b3 += k0 + 17; b2 += b3 + k4 + t0; b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
b0 += b3; b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0; b0 += b1;
b2 += b1; b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
b0 += b1; b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
b2 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2; b2 += b3;
b0 += b3; b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
b2 += b1; b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
b0 += b3;
b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
b2 += b1;
b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
b1 += k2 + t0;
b0 += b1 + k1;
b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
b3 += k4 + 6;
b2 += b3 + k3 + t1;
b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
b0 += b3;
b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
b2 += b1;
b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
b0 += b1;
b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
b2 += b3;
b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
b0 += b3;
b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
b2 += b1;
b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
b1 += k3 + t1;
b0 += b1 + k2;
b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
b3 += k0 + 7;
b2 += b3 + k4 + t2;
b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
b0 += b3;
b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
b2 += b1;
b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
b0 += b1;
b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
b2 += b3;
b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
b0 += b3;
b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
b2 += b1;
b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
b1 += k4 + t2;
b0 += b1 + k3;
b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
b3 += k1 + 8;
b2 += b3 + k0 + t0;
b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
b0 += b3;
b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
b2 += b1;
b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
b0 += b1;
b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
b2 += b3;
b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
b0 += b3;
b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
b2 += b1;
b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
b1 += k0 + t0;
b0 += b1 + k4;
b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
b3 += k2 + 9;
b2 += b3 + k1 + t1;
b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
b0 += b3;
b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
b2 += b1;
b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
b0 += b1;
b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
b2 += b3;
b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
b0 += b3;
b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
b2 += b1;
b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
b1 += k1 + t1;
b0 += b1 + k0;
b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
b3 += k3 + 10;
b2 += b3 + k2 + t2;
b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
b0 += b3;
b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
b2 += b1;
b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
b0 += b1;
b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
b2 += b3;
b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
b0 += b3;
b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
b2 += b1;
b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
b1 += k2 + t2;
b0 += b1 + k1;
b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
b3 += k4 + 11;
b2 += b3 + k3 + t0;
b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
b0 += b3;
b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
b2 += b1;
b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
b0 += b1;
b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
b2 += b3;
b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
b0 += b3;
b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
b2 += b1;
b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
b1 += k3 + t0;
b0 += b1 + k2;
b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
b3 += k0 + 12;
b2 += b3 + k4 + t1;
b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
b0 += b3;
b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
b2 += b1;
b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
b0 += b1;
b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
b2 += b3;
b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
b0 += b3;
b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
b2 += b1;
b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
b1 += k4 + t1;
b0 += b1 + k3;
b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
b3 += k1 + 13;
b2 += b3 + k0 + t2;
b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
b0 += b3;
b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
b2 += b1;
b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
b0 += b1;
b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
b2 += b3;
b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
b0 += b3;
b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
b2 += b1;
b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
b1 += k0 + t2;
b0 += b1 + k4;
b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
b3 += k2 + 14;
b2 += b3 + k1 + t0;
b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
b0 += b3;
b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
b2 += b1;
b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
b0 += b1;
b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
b2 += b3;
b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
b0 += b3;
b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
b2 += b1;
b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
b1 += k1 + t0;
b0 += b1 + k0;
b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
b3 += k3 + 15;
b2 += b3 + k2 + t1;
b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
b0 += b3;
b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
b2 += b1;
b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
b0 += b1;
b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
b2 += b3;
b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
b0 += b3;
b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
b2 += b1;
b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
b1 += k2 + t1;
b0 += b1 + k1;
b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
b3 += k4 + 16;
b2 += b3 + k3 + t2;
b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
b0 += b3;
b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
b2 += b1;
b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
b0 += b1;
b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
b2 += b3;
b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
b0 += b3;
b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
b2 += b1;
b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
b1 += k3 + t2;
b0 += b1 + k2;
b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
b3 += k0 + 17;
b2 += b3 + k4 + t0;
b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
b0 += b3;
b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
b2 += b1;
b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
b0 += b1;
b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
b2 += b3;
b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
b0 += b3;
b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
b2 += b1;
b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
output[0] = b0 + k3; output[0] = b0 + k3;
output[1] = b1 + k4 + t0; output[1] = b1 + k4 + t0;
...@@ -187,158 +510,625 @@ void threefishDecrypt256(struct threefish_key *keyCtx, u64 *input, u64 *output) ...@@ -187,158 +510,625 @@ void threefishDecrypt256(struct threefish_key *keyCtx, u64 *input, u64 *output)
b1 -= k4 + t0; b1 -= k4 + t0;
b2 -= k0 + t1; b2 -= k0 + t1;
b3 -= k1 + 18; b3 -= k1 + 18;
tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3; tmp = b3 ^ b0;
tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1; b3 = (tmp >> 32) | (tmp << (64 - 32));
tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1; b0 -= b3;
tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3;
tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3; tmp = b1 ^ b2;
tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1; b1 = (tmp >> 32) | (tmp << (64 - 32));
tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k2; b1 -= k3 + t2; b2 -= b1;
tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k4 + t0; b3 -= k0 + 17;
tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3; tmp = b1 ^ b0;
tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1; b1 = (tmp >> 58) | (tmp << (64 - 58));
tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1; b0 -= b1;
tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3;
tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3; tmp = b3 ^ b2;
tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1; b3 = (tmp >> 22) | (tmp << (64 - 22));
tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k1; b1 -= k2 + t1; b2 -= b3;
tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k3 + t2; b3 -= k4 + 16;
tmp = b3 ^ b0;
tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3; b3 = (tmp >> 46) | (tmp << (64 - 46));
tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1; b0 -= b3;
tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1;
tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3; tmp = b1 ^ b2;
tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3; b1 = (tmp >> 12) | (tmp << (64 - 12));
tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1; b2 -= b1;
tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k0; b1 -= k1 + t0;
tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k2 + t1; b3 -= k3 + 15; tmp = b1 ^ b0;
tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3; b1 = (tmp >> 25) | (tmp << (64 - 25));
tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1; b0 -= b1 + k2;
tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1; b1 -= k3 + t2;
tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3;
tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3; tmp = b3 ^ b2;
tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1; b3 = (tmp >> 33) | (tmp << (64 - 33));
tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k4; b1 -= k0 + t2; b2 -= b3 + k4 + t0;
tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k1 + t0; b3 -= k2 + 14; b3 -= k0 + 17;
tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3; tmp = b3 ^ b0;
tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1; b3 = (tmp >> 5) | (tmp << (64 - 5));
tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1; b0 -= b3;
tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3;
tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3; tmp = b1 ^ b2;
tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1; b1 = (tmp >> 37) | (tmp << (64 - 37));
tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k3; b1 -= k4 + t1; b2 -= b1;
tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k0 + t2; b3 -= k1 + 13;
tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3; tmp = b1 ^ b0;
tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1; b1 = (tmp >> 23) | (tmp << (64 - 23));
tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1; b0 -= b1;
tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3;
tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3; tmp = b3 ^ b2;
tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1; b3 = (tmp >> 40) | (tmp << (64 - 40));
tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k2; b1 -= k3 + t0; b2 -= b3;
tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k4 + t1; b3 -= k0 + 12;
tmp = b3 ^ b0;
tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3; b3 = (tmp >> 52) | (tmp << (64 - 52));
tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1; b0 -= b3;
tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1;
tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3; tmp = b1 ^ b2;
tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3; b1 = (tmp >> 57) | (tmp << (64 - 57));
tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1; b2 -= b1;
tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k1; b1 -= k2 + t2;
tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k3 + t0; b3 -= k4 + 11; tmp = b1 ^ b0;
tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3; b1 = (tmp >> 14) | (tmp << (64 - 14));
tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1; b0 -= b1 + k1;
tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1; b1 -= k2 + t1;
tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3;
tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3; tmp = b3 ^ b2;
tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1; b3 = (tmp >> 16) | (tmp << (64 - 16));
tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k0; b1 -= k1 + t1; b2 -= b3 + k3 + t2;
tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k2 + t2; b3 -= k3 + 10; b3 -= k4 + 16;
tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3;
tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1; tmp = b3 ^ b0;
tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1; b3 = (tmp >> 32) | (tmp << (64 - 32));
tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3; b0 -= b3;
tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3;
tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1; tmp = b1 ^ b2;
tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k4; b1 -= k0 + t0; b1 = (tmp >> 32) | (tmp << (64 - 32));
tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k1 + t1; b3 -= k2 + 9; b2 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3;
tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1; tmp = b1 ^ b0;
tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1; b1 = (tmp >> 58) | (tmp << (64 - 58));
tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3; b0 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3;
tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1; tmp = b3 ^ b2;
tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k3; b1 -= k4 + t2; b3 = (tmp >> 22) | (tmp << (64 - 22));
tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k0 + t0; b3 -= k1 + 8; b2 -= b3;
tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3; tmp = b3 ^ b0;
tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1; b3 = (tmp >> 46) | (tmp << (64 - 46));
tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1; b0 -= b3;
tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3;
tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3; tmp = b1 ^ b2;
tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1; b1 = (tmp >> 12) | (tmp << (64 - 12));
tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k2; b1 -= k3 + t1; b2 -= b1;
tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k4 + t2; b3 -= k0 + 7;
tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3; tmp = b1 ^ b0;
tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1; b1 = (tmp >> 25) | (tmp << (64 - 25));
tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1; b0 -= b1 + k0;
tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3; b1 -= k1 + t0;
tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3;
tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1; tmp = b3 ^ b2;
tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k1; b1 -= k2 + t0; b3 = (tmp >> 33) | (tmp << (64 - 33));
tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k3 + t1; b3 -= k4 + 6; b2 -= b3 + k2 + t1;
b3 -= k3 + 15;
tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3;
tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1; tmp = b3 ^ b0;
tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1; b3 = (tmp >> 5) | (tmp << (64 - 5));
tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3; b0 -= b3;
tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3;
tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1; tmp = b1 ^ b2;
tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k0; b1 -= k1 + t2; b1 = (tmp >> 37) | (tmp << (64 - 37));
tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k2 + t0; b3 -= k3 + 5; b2 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3;
tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1; tmp = b1 ^ b0;
tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1; b1 = (tmp >> 23) | (tmp << (64 - 23));
tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3; b0 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3;
tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1; tmp = b3 ^ b2;
tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k4; b1 -= k0 + t1; b3 = (tmp >> 40) | (tmp << (64 - 40));
tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k1 + t2; b3 -= k2 + 4; b2 -= b3;
tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3; tmp = b3 ^ b0;
tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1; b3 = (tmp >> 52) | (tmp << (64 - 52));
tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1; b0 -= b3;
tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3;
tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3; tmp = b1 ^ b2;
tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1; b1 = (tmp >> 57) | (tmp << (64 - 57));
tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k3; b1 -= k4 + t0; b2 -= b1;
tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k0 + t1; b3 -= k1 + 3;
tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3; tmp = b1 ^ b0;
tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1; b1 = (tmp >> 14) | (tmp << (64 - 14));
tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1; b0 -= b1 + k4;
tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3; b1 -= k0 + t2;
tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3;
tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1; tmp = b3 ^ b2;
tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k2; b1 -= k3 + t2; b3 = (tmp >> 16) | (tmp << (64 - 16));
tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k4 + t0; b3 -= k0 + 2; b2 -= b3 + k1 + t0;
b3 -= k2 + 14;
tmp = b3 ^ b0; b3 = (tmp >> 32) | (tmp << (64 - 32)); b0 -= b3;
tmp = b1 ^ b2; b1 = (tmp >> 32) | (tmp << (64 - 32)); b2 -= b1;
tmp = b1 ^ b0; b1 = (tmp >> 58) | (tmp << (64 - 58)); b0 -= b1; tmp = b3 ^ b0;
tmp = b3 ^ b2; b3 = (tmp >> 22) | (tmp << (64 - 22)); b2 -= b3; b3 = (tmp >> 32) | (tmp << (64 - 32));
tmp = b3 ^ b0; b3 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b3; b0 -= b3;
tmp = b1 ^ b2; b1 = (tmp >> 12) | (tmp << (64 - 12)); b2 -= b1;
tmp = b1 ^ b0; b1 = (tmp >> 25) | (tmp << (64 - 25)); b0 -= b1 + k1; b1 -= k2 + t1; tmp = b1 ^ b2;
tmp = b3 ^ b2; b3 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b3 + k3 + t2; b3 -= k4 + 1; b1 = (tmp >> 32) | (tmp << (64 - 32));
tmp = b3 ^ b0; b3 = (tmp >> 5) | (tmp << (64 - 5)); b0 -= b3; b2 -= b1;
tmp = b1 ^ b2; b1 = (tmp >> 37) | (tmp << (64 - 37)); b2 -= b1;
tmp = b1 ^ b0; b1 = (tmp >> 23) | (tmp << (64 - 23)); b0 -= b1; tmp = b1 ^ b0;
tmp = b3 ^ b2; b3 = (tmp >> 40) | (tmp << (64 - 40)); b2 -= b3; b1 = (tmp >> 58) | (tmp << (64 - 58));
tmp = b3 ^ b0; b3 = (tmp >> 52) | (tmp << (64 - 52)); b0 -= b3; b0 -= b1;
tmp = b1 ^ b2; b1 = (tmp >> 57) | (tmp << (64 - 57)); b2 -= b1;
tmp = b1 ^ b0; b1 = (tmp >> 14) | (tmp << (64 - 14)); b0 -= b1 + k0; b1 -= k1 + t0; tmp = b3 ^ b2;
tmp = b3 ^ b2; b3 = (tmp >> 16) | (tmp << (64 - 16)); b2 -= b3 + k2 + t1; b3 -= k3; b3 = (tmp >> 22) | (tmp << (64 - 22));
b2 -= b3;
tmp = b3 ^ b0;
b3 = (tmp >> 46) | (tmp << (64 - 46));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 12) | (tmp << (64 - 12));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 25) | (tmp << (64 - 25));
b0 -= b1 + k3;
b1 -= k4 + t1;
tmp = b3 ^ b2;
b3 = (tmp >> 33) | (tmp << (64 - 33));
b2 -= b3 + k0 + t2;
b3 -= k1 + 13;
tmp = b3 ^ b0;
b3 = (tmp >> 5) | (tmp << (64 - 5));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 37) | (tmp << (64 - 37));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 23) | (tmp << (64 - 23));
b0 -= b1;
tmp = b3 ^ b2;
b3 = (tmp >> 40) | (tmp << (64 - 40));
b2 -= b3;
tmp = b3 ^ b0;
b3 = (tmp >> 52) | (tmp << (64 - 52));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 57) | (tmp << (64 - 57));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 14) | (tmp << (64 - 14));
b0 -= b1 + k2;
b1 -= k3 + t0;
tmp = b3 ^ b2;
b3 = (tmp >> 16) | (tmp << (64 - 16));
b2 -= b3 + k4 + t1;
b3 -= k0 + 12;
tmp = b3 ^ b0;
b3 = (tmp >> 32) | (tmp << (64 - 32));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 32) | (tmp << (64 - 32));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 58) | (tmp << (64 - 58));
b0 -= b1;
tmp = b3 ^ b2;
b3 = (tmp >> 22) | (tmp << (64 - 22));
b2 -= b3;
tmp = b3 ^ b0;
b3 = (tmp >> 46) | (tmp << (64 - 46));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 12) | (tmp << (64 - 12));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 25) | (tmp << (64 - 25));
b0 -= b1 + k1;
b1 -= k2 + t2;
tmp = b3 ^ b2;
b3 = (tmp >> 33) | (tmp << (64 - 33));
b2 -= b3 + k3 + t0;
b3 -= k4 + 11;
tmp = b3 ^ b0;
b3 = (tmp >> 5) | (tmp << (64 - 5));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 37) | (tmp << (64 - 37));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 23) | (tmp << (64 - 23));
b0 -= b1;
tmp = b3 ^ b2;
b3 = (tmp >> 40) | (tmp << (64 - 40));
b2 -= b3;
tmp = b3 ^ b0;
b3 = (tmp >> 52) | (tmp << (64 - 52));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 57) | (tmp << (64 - 57));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 14) | (tmp << (64 - 14));
b0 -= b1 + k0;
b1 -= k1 + t1;
tmp = b3 ^ b2;
b3 = (tmp >> 16) | (tmp << (64 - 16));
b2 -= b3 + k2 + t2;
b3 -= k3 + 10;
tmp = b3 ^ b0;
b3 = (tmp >> 32) | (tmp << (64 - 32));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 32) | (tmp << (64 - 32));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 58) | (tmp << (64 - 58));
b0 -= b1;
tmp = b3 ^ b2;
b3 = (tmp >> 22) | (tmp << (64 - 22));
b2 -= b3;
tmp = b3 ^ b0;
b3 = (tmp >> 46) | (tmp << (64 - 46));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 12) | (tmp << (64 - 12));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 25) | (tmp << (64 - 25));
b0 -= b1 + k4;
b1 -= k0 + t0;
tmp = b3 ^ b2;
b3 = (tmp >> 33) | (tmp << (64 - 33));
b2 -= b3 + k1 + t1;
b3 -= k2 + 9;
tmp = b3 ^ b0;
b3 = (tmp >> 5) | (tmp << (64 - 5));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 37) | (tmp << (64 - 37));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 23) | (tmp << (64 - 23));
b0 -= b1;
tmp = b3 ^ b2;
b3 = (tmp >> 40) | (tmp << (64 - 40));
b2 -= b3;
tmp = b3 ^ b0;
b3 = (tmp >> 52) | (tmp << (64 - 52));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 57) | (tmp << (64 - 57));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 14) | (tmp << (64 - 14));
b0 -= b1 + k3;
b1 -= k4 + t2;
tmp = b3 ^ b2;
b3 = (tmp >> 16) | (tmp << (64 - 16));
b2 -= b3 + k0 + t0;
b3 -= k1 + 8;
tmp = b3 ^ b0;
b3 = (tmp >> 32) | (tmp << (64 - 32));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 32) | (tmp << (64 - 32));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 58) | (tmp << (64 - 58));
b0 -= b1;
tmp = b3 ^ b2;
b3 = (tmp >> 22) | (tmp << (64 - 22));
b2 -= b3;
tmp = b3 ^ b0;
b3 = (tmp >> 46) | (tmp << (64 - 46));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 12) | (tmp << (64 - 12));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 25) | (tmp << (64 - 25));
b0 -= b1 + k2;
b1 -= k3 + t1;
tmp = b3 ^ b2;
b3 = (tmp >> 33) | (tmp << (64 - 33));
b2 -= b3 + k4 + t2;
b3 -= k0 + 7;
tmp = b3 ^ b0;
b3 = (tmp >> 5) | (tmp << (64 - 5));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 37) | (tmp << (64 - 37));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 23) | (tmp << (64 - 23));
b0 -= b1;
tmp = b3 ^ b2;
b3 = (tmp >> 40) | (tmp << (64 - 40));
b2 -= b3;
tmp = b3 ^ b0;
b3 = (tmp >> 52) | (tmp << (64 - 52));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 57) | (tmp << (64 - 57));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 14) | (tmp << (64 - 14));
b0 -= b1 + k1;
b1 -= k2 + t0;
tmp = b3 ^ b2;
b3 = (tmp >> 16) | (tmp << (64 - 16));
b2 -= b3 + k3 + t1;
b3 -= k4 + 6;
tmp = b3 ^ b0;
b3 = (tmp >> 32) | (tmp << (64 - 32));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 32) | (tmp << (64 - 32));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 58) | (tmp << (64 - 58));
b0 -= b1;
tmp = b3 ^ b2;
b3 = (tmp >> 22) | (tmp << (64 - 22));
b2 -= b3;
tmp = b3 ^ b0;
b3 = (tmp >> 46) | (tmp << (64 - 46));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 12) | (tmp << (64 - 12));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 25) | (tmp << (64 - 25));
b0 -= b1 + k0;
b1 -= k1 + t2;
tmp = b3 ^ b2;
b3 = (tmp >> 33) | (tmp << (64 - 33));
b2 -= b3 + k2 + t0;
b3 -= k3 + 5;
tmp = b3 ^ b0;
b3 = (tmp >> 5) | (tmp << (64 - 5));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 37) | (tmp << (64 - 37));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 23) | (tmp << (64 - 23));
b0 -= b1;
tmp = b3 ^ b2;
b3 = (tmp >> 40) | (tmp << (64 - 40));
b2 -= b3;
tmp = b3 ^ b0;
b3 = (tmp >> 52) | (tmp << (64 - 52));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 57) | (tmp << (64 - 57));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 14) | (tmp << (64 - 14));
b0 -= b1 + k4;
b1 -= k0 + t1;
tmp = b3 ^ b2;
b3 = (tmp >> 16) | (tmp << (64 - 16));
b2 -= b3 + k1 + t2;
b3 -= k2 + 4;
tmp = b3 ^ b0;
b3 = (tmp >> 32) | (tmp << (64 - 32));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 32) | (tmp << (64 - 32));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 58) | (tmp << (64 - 58));
b0 -= b1;
tmp = b3 ^ b2;
b3 = (tmp >> 22) | (tmp << (64 - 22));
b2 -= b3;
tmp = b3 ^ b0;
b3 = (tmp >> 46) | (tmp << (64 - 46));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 12) | (tmp << (64 - 12));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 25) | (tmp << (64 - 25));
b0 -= b1 + k3;
b1 -= k4 + t0;
tmp = b3 ^ b2;
b3 = (tmp >> 33) | (tmp << (64 - 33));
b2 -= b3 + k0 + t1;
b3 -= k1 + 3;
tmp = b3 ^ b0;
b3 = (tmp >> 5) | (tmp << (64 - 5));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 37) | (tmp << (64 - 37));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 23) | (tmp << (64 - 23));
b0 -= b1;
tmp = b3 ^ b2;
b3 = (tmp >> 40) | (tmp << (64 - 40));
b2 -= b3;
tmp = b3 ^ b0;
b3 = (tmp >> 52) | (tmp << (64 - 52));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 57) | (tmp << (64 - 57));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 14) | (tmp << (64 - 14));
b0 -= b1 + k2;
b1 -= k3 + t2;
tmp = b3 ^ b2;
b3 = (tmp >> 16) | (tmp << (64 - 16));
b2 -= b3 + k4 + t0;
b3 -= k0 + 2;
tmp = b3 ^ b0;
b3 = (tmp >> 32) | (tmp << (64 - 32));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 32) | (tmp << (64 - 32));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 58) | (tmp << (64 - 58));
b0 -= b1;
tmp = b3 ^ b2;
b3 = (tmp >> 22) | (tmp << (64 - 22));
b2 -= b3;
tmp = b3 ^ b0;
b3 = (tmp >> 46) | (tmp << (64 - 46));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 12) | (tmp << (64 - 12));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 25) | (tmp << (64 - 25));
b0 -= b1 + k1;
b1 -= k2 + t1;
tmp = b3 ^ b2;
b3 = (tmp >> 33) | (tmp << (64 - 33));
b2 -= b3 + k3 + t2;
b3 -= k4 + 1;
tmp = b3 ^ b0;
b3 = (tmp >> 5) | (tmp << (64 - 5));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 37) | (tmp << (64 - 37));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 23) | (tmp << (64 - 23));
b0 -= b1;
tmp = b3 ^ b2;
b3 = (tmp >> 40) | (tmp << (64 - 40));
b2 -= b3;
tmp = b3 ^ b0;
b3 = (tmp >> 52) | (tmp << (64 - 52));
b0 -= b3;
tmp = b1 ^ b2;
b1 = (tmp >> 57) | (tmp << (64 - 57));
b2 -= b1;
tmp = b1 ^ b0;
b1 = (tmp >> 14) | (tmp << (64 - 14));
b0 -= b1 + k0;
b1 -= k1 + t0;
tmp = b3 ^ b2;
b3 = (tmp >> 16) | (tmp << (64 - 16));
b2 -= b3 + k2 + t1;
b3 -= k3;
output[0] = b0; output[0] = b0;
output[1] = b1; output[1] = b1;
......
...@@ -16,294 +16,941 @@ void threefishEncrypt512(struct threefish_key *keyCtx, u64 *input, u64 *output) ...@@ -16,294 +16,941 @@ void threefishEncrypt512(struct threefish_key *keyCtx, u64 *input, u64 *output)
u64 t0 = keyCtx->tweak[0], t1 = keyCtx->tweak[1], u64 t0 = keyCtx->tweak[0], t1 = keyCtx->tweak[1],
t2 = keyCtx->tweak[2]; t2 = keyCtx->tweak[2];
b1 += k1; b0 += b1 + k0; b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0; b1 += k1;
b3 += k3; b2 += b3 + k2; b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2; b0 += b1 + k0;
b5 += k5 + t0; b4 += b5 + k4; b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4; b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
b7 += k7; b6 += b7 + k6 + t1; b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
b2 += b1; b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2; b3 += k3;
b4 += b7; b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4; b2 += b3 + k2;
b6 += b5; b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6; b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
b0 += b3; b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
b4 += b1; b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4; b5 += k5 + t0;
b6 += b3; b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6; b4 += b5 + k4;
b0 += b5; b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0; b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
b2 += b7; b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
b6 += b1; b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6; b7 += k7;
b0 += b7; b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0; b6 += b7 + k6 + t1;
b2 += b5; b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2; b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
b4 += b3; b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
b1 += k2; b0 += b1 + k1; b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0; b2 += b1;
b3 += k4; b2 += b3 + k3; b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2; b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
b5 += k6 + t1; b4 += b5 + k5; b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
b7 += k8 + 1; b6 += b7 + k7 + t2; b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6; b4 += b7;
b2 += b1; b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2; b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
b4 += b7; b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
b6 += b5; b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6; b6 += b5;
b0 += b3; b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0; b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
b4 += b1; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
b6 += b3; b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6; b0 += b3;
b0 += b5; b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0; b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
b2 += b7; b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
b6 += b1; b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6; b4 += b1;
b0 += b7; b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0; b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
b2 += b5; b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
b4 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4; b6 += b3;
b1 += k3; b0 += b1 + k2; b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0; b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
b3 += k5; b2 += b3 + k4; b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
b5 += k7 + t2; b4 += b5 + k6; b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4; b0 += b5;
b7 += k0 + 2; b6 += b7 + k8 + t0; b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6; b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
b2 += b1; b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
b4 += b7; b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4; b2 += b7;
b6 += b5; b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6; b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
b0 += b3; b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
b4 += b1; b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4; b6 += b1;
b6 += b3; b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6; b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
b0 += b5; b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
b2 += b7; b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2; b0 += b7;
b6 += b1; b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6; b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
b0 += b7; b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
b2 += b5; b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2; b2 += b5;
b4 += b3; b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4; b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
b1 += k4; b0 += b1 + k3; b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
b3 += k6; b2 += b3 + k5; b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2; b4 += b3;
b5 += k8 + t0; b4 += b5 + k7; b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4; b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
b7 += k1 + 3; b6 += b7 + k0 + t1; b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
b2 += b1; b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2; b1 += k2;
b4 += b7; b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4; b0 += b1 + k1;
b6 += b5; b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6; b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
b0 += b3; b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
b4 += b1; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4; b3 += k4;
b6 += b3; b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6; b2 += b3 + k3;
b0 += b5; b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0; b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
b2 += b7; b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
b6 += b1; b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6; b5 += k6 + t1;
b0 += b7; b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0; b4 += b5 + k5;
b2 += b5; b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2; b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
b4 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
b1 += k5; b0 += b1 + k4; b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0; b7 += k8 + 1;
b3 += k7; b2 += b3 + k6; b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2; b6 += b7 + k7 + t2;
b5 += k0 + t1; b4 += b5 + k8; b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4; b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
b7 += k2 + 4; b6 += b7 + k1 + t2; b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
b2 += b1; b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2; b2 += b1;
b4 += b7; b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4; b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
b6 += b5; b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
b0 += b3; b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0; b4 += b7;
b4 += b1; b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4; b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
b6 += b3; b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
b0 += b5; b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0; b6 += b5;
b2 += b7; b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2; b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
b6 += b1; b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
b0 += b7; b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0; b0 += b3;
b2 += b5; b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2; b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
b4 += b3; b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
b1 += k6; b0 += b1 + k5; b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0; b4 += b1;
b3 += k8; b2 += b3 + k7; b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
b5 += k1 + t2; b4 += b5 + k0; b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
b7 += k3 + 5; b6 += b7 + k2 + t0; b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6; b6 += b3;
b2 += b1; b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2; b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
b4 += b7; b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
b6 += b5; b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6; b0 += b5;
b0 += b3; b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0; b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
b4 += b1; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
b6 += b3; b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6; b2 += b7;
b0 += b5; b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0; b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
b2 += b7; b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
b6 += b1; b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6; b6 += b1;
b0 += b7; b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0; b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
b2 += b5; b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
b4 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4; b0 += b7;
b1 += k7; b0 += b1 + k6; b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0; b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
b3 += k0; b2 += b3 + k8; b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
b5 += k2 + t0; b4 += b5 + k1; b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4; b2 += b5;
b7 += k4 + 6; b6 += b7 + k3 + t1; b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6; b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
b2 += b1; b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
b4 += b7; b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4; b4 += b3;
b6 += b5; b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
b0 += b3; b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
b4 += b1; b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4; b1 += k3;
b6 += b3; b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6; b0 += b1 + k2;
b0 += b5; b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0; b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
b2 += b7; b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
b6 += b1; b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6; b3 += k5;
b0 += b7; b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0; b2 += b3 + k4;
b2 += b5; b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2; b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
b4 += b3; b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
b1 += k8; b0 += b1 + k7; b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0; b5 += k7 + t2;
b3 += k1; b2 += b3 + k0; b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2; b4 += b5 + k6;
b5 += k3 + t1; b4 += b5 + k2; b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4; b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
b7 += k5 + 7; b6 += b7 + k4 + t2; b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
b2 += b1; b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2; b7 += k0 + 2;
b4 += b7; b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4; b6 += b7 + k8 + t0;
b6 += b5; b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6; b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
b0 += b3; b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
b4 += b1; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4; b2 += b1;
b6 += b3; b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6; b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
b0 += b5; b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
b2 += b7; b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2; b4 += b7;
b6 += b1; b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6; b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
b0 += b7; b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
b2 += b5; b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2; b6 += b5;
b4 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4; b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
b1 += k0; b0 += b1 + k8; b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
b3 += k2; b2 += b3 + k1; b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2; b0 += b3;
b5 += k4 + t2; b4 += b5 + k3; b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4; b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
b7 += k6 + 8; b6 += b7 + k5 + t0; b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
b2 += b1; b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2; b4 += b1;
b4 += b7; b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4; b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
b6 += b5; b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
b0 += b3; b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0; b6 += b3;
b4 += b1; b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4; b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
b6 += b3; b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
b0 += b5; b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0; b0 += b5;
b2 += b7; b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2; b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
b6 += b1; b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
b0 += b7; b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0; b2 += b7;
b2 += b5; b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2; b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
b4 += b3; b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
b1 += k1; b0 += b1 + k0; b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0; b6 += b1;
b3 += k3; b2 += b3 + k2; b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2; b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
b5 += k5 + t0; b4 += b5 + k4; b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
b7 += k7 + 9; b6 += b7 + k6 + t1; b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6; b0 += b7;
b2 += b1; b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2; b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
b4 += b7; b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
b6 += b5; b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6; b2 += b5;
b0 += b3; b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0; b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
b4 += b1; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
b6 += b3; b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6; b4 += b3;
b0 += b5; b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0; b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
b2 += b7; b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
b6 += b1; b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6; b1 += k4;
b0 += b7; b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0; b0 += b1 + k3;
b2 += b5; b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2; b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
b4 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
b1 += k2; b0 += b1 + k1; b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0; b3 += k6;
b3 += k4; b2 += b3 + k3; b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2; b2 += b3 + k5;
b5 += k6 + t1; b4 += b5 + k5; b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4; b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
b7 += k8 + 10; b6 += b7 + k7 + t2; b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
b2 += b1; b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2; b5 += k8 + t0;
b4 += b7; b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4; b4 += b5 + k7;
b6 += b5; b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6; b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
b0 += b3; b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
b4 += b1; b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4; b7 += k1 + 3;
b6 += b3; b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6; b6 += b7 + k0 + t1;
b0 += b5; b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0; b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
b2 += b7; b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
b6 += b1; b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6; b2 += b1;
b0 += b7; b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0; b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
b2 += b5; b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
b4 += b3; b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4; b4 += b7;
b1 += k3; b0 += b1 + k2; b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0; b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
b3 += k5; b2 += b3 + k4; b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
b5 += k7 + t2; b4 += b5 + k6; b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4; b6 += b5;
b7 += k0 + 11; b6 += b7 + k8 + t0; b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6; b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
b2 += b1; b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
b4 += b7; b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4; b0 += b3;
b6 += b5; b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6; b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
b0 += b3; b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
b4 += b1; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4; b4 += b1;
b6 += b3; b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
b0 += b5; b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
b2 += b7; b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2; b6 += b3;
b6 += b1; b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6; b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
b0 += b7; b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
b2 += b5; b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2; b0 += b5;
b4 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4; b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
b1 += k4; b0 += b1 + k3; b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
b3 += k6; b2 += b3 + k5; b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2; b2 += b7;
b5 += k8 + t0; b4 += b5 + k7; b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4; b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
b7 += k1 + 12; b6 += b7 + k0 + t1; b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
b2 += b1; b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2; b6 += b1;
b4 += b7; b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4; b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
b6 += b5; b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
b0 += b3; b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0; b0 += b7;
b4 += b1; b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4; b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
b6 += b3; b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
b0 += b5; b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0; b2 += b5;
b2 += b7; b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2; b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
b6 += b1; b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
b0 += b7; b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0; b4 += b3;
b2 += b5; b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
b4 += b3; b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
b1 += k5; b0 += b1 + k4; b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0; b1 += k5;
b3 += k7; b2 += b3 + k6; b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2; b0 += b1 + k4;
b5 += k0 + t1; b4 += b5 + k8; b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4; b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
b7 += k2 + 13; b6 += b7 + k1 + t2; b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
b2 += b1; b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2; b3 += k7;
b4 += b7; b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4; b2 += b3 + k6;
b6 += b5; b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6; b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
b0 += b3; b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
b4 += b1; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4; b5 += k0 + t1;
b6 += b3; b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6; b4 += b5 + k8;
b0 += b5; b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0; b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
b2 += b7; b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
b6 += b1; b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6; b7 += k2 + 4;
b0 += b7; b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0; b6 += b7 + k1 + t2;
b2 += b5; b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2; b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
b4 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
b1 += k6; b0 += b1 + k5; b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0; b2 += b1;
b3 += k8; b2 += b3 + k7; b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2; b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
b5 += k1 + t2; b4 += b5 + k0; b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
b7 += k3 + 14; b6 += b7 + k2 + t0; b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6; b4 += b7;
b2 += b1; b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2; b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
b4 += b7; b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
b6 += b5; b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6; b6 += b5;
b0 += b3; b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0; b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
b4 += b1; b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
b6 += b3; b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6; b0 += b3;
b0 += b5; b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0; b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
b2 += b7; b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
b6 += b1; b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6; b4 += b1;
b0 += b7; b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0; b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
b2 += b5; b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
b4 += b3; b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4; b6 += b3;
b1 += k7; b0 += b1 + k6; b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0; b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
b3 += k0; b2 += b3 + k8; b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
b5 += k2 + t0; b4 += b5 + k1; b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4; b0 += b5;
b7 += k4 + 15; b6 += b7 + k3 + t1; b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6; b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
b2 += b1; b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
b4 += b7; b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4; b2 += b7;
b6 += b5; b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6; b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
b0 += b3; b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
b4 += b1; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4; b6 += b1;
b6 += b3; b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6; b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
b0 += b5; b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
b2 += b7; b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2; b0 += b7;
b6 += b1; b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6; b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
b0 += b7; b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
b2 += b5; b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2; b2 += b5;
b4 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4; b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
b1 += k8; b0 += b1 + k7; b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
b3 += k1; b2 += b3 + k0; b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2; b4 += b3;
b5 += k3 + t1; b4 += b5 + k2; b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4; b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
b7 += k5 + 16; b6 += b7 + k4 + t2; b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
b2 += b1; b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2; b1 += k6;
b4 += b7; b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4; b0 += b1 + k5;
b6 += b5; b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6; b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
b0 += b3; b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
b4 += b1; b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4; b3 += k8;
b6 += b3; b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6; b2 += b3 + k7;
b0 += b5; b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0; b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
b2 += b7; b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
b6 += b1; b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6; b5 += k1 + t2;
b0 += b7; b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0; b4 += b5 + k0;
b2 += b5; b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2; b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
b4 += b3; b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
b1 += k0; b0 += b1 + k8; b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0; b7 += k3 + 5;
b3 += k2; b2 += b3 + k1; b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2; b6 += b7 + k2 + t0;
b5 += k4 + t2; b4 += b5 + k3; b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4; b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
b7 += k6 + 17; b6 += b7 + k5 + t0; b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
b2 += b1; b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2; b2 += b1;
b4 += b7; b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4; b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
b6 += b5; b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
b0 += b3; b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0; b4 += b7;
b4 += b1; b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4; b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
b6 += b3; b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
b0 += b5; b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0; b6 += b5;
b2 += b7; b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2; b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
b6 += b1; b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
b0 += b7; b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0; b0 += b3;
b2 += b5; b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2; b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
b4 += b3; b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
b4 += b1;
b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
b6 += b3;
b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
b0 += b5;
b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
b2 += b7;
b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
b6 += b1;
b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
b0 += b7;
b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
b2 += b5;
b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
b4 += b3;
b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
b1 += k7;
b0 += b1 + k6;
b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
b3 += k0;
b2 += b3 + k8;
b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
b5 += k2 + t0;
b4 += b5 + k1;
b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
b7 += k4 + 6;
b6 += b7 + k3 + t1;
b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
b2 += b1;
b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
b4 += b7;
b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
b6 += b5;
b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
b0 += b3;
b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
b4 += b1;
b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
b6 += b3;
b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
b0 += b5;
b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
b2 += b7;
b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
b6 += b1;
b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
b0 += b7;
b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
b2 += b5;
b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
b4 += b3;
b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
b1 += k8;
b0 += b1 + k7;
b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
b3 += k1;
b2 += b3 + k0;
b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
b5 += k3 + t1;
b4 += b5 + k2;
b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
b7 += k5 + 7;
b6 += b7 + k4 + t2;
b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
b2 += b1;
b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
b4 += b7;
b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
b6 += b5;
b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
b0 += b3;
b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
b4 += b1;
b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
b6 += b3;
b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
b0 += b5;
b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
b2 += b7;
b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
b6 += b1;
b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
b0 += b7;
b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
b2 += b5;
b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
b4 += b3;
b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
b1 += k0;
b0 += b1 + k8;
b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
b3 += k2;
b2 += b3 + k1;
b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
b5 += k4 + t2;
b4 += b5 + k3;
b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
b7 += k6 + 8;
b6 += b7 + k5 + t0;
b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
b2 += b1;
b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
b4 += b7;
b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
b6 += b5;
b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
b0 += b3;
b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
b4 += b1;
b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
b6 += b3;
b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
b0 += b5;
b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
b2 += b7;
b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
b6 += b1;
b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
b0 += b7;
b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
b2 += b5;
b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
b4 += b3;
b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
b1 += k1;
b0 += b1 + k0;
b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
b3 += k3;
b2 += b3 + k2;
b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
b5 += k5 + t0;
b4 += b5 + k4;
b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
b7 += k7 + 9;
b6 += b7 + k6 + t1;
b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
b2 += b1;
b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
b4 += b7;
b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
b6 += b5;
b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
b0 += b3;
b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
b4 += b1;
b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
b6 += b3;
b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
b0 += b5;
b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
b2 += b7;
b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
b6 += b1;
b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
b0 += b7;
b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
b2 += b5;
b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
b4 += b3;
b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
b1 += k2;
b0 += b1 + k1;
b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
b3 += k4;
b2 += b3 + k3;
b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
b5 += k6 + t1;
b4 += b5 + k5;
b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
b7 += k8 + 10;
b6 += b7 + k7 + t2;
b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
b2 += b1;
b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
b4 += b7;
b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
b6 += b5;
b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
b0 += b3;
b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
b4 += b1;
b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
b6 += b3;
b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
b0 += b5;
b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
b2 += b7;
b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
b6 += b1;
b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
b0 += b7;
b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
b2 += b5;
b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
b4 += b3;
b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
b1 += k3;
b0 += b1 + k2;
b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
b3 += k5;
b2 += b3 + k4;
b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
b5 += k7 + t2;
b4 += b5 + k6;
b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
b7 += k0 + 11;
b6 += b7 + k8 + t0;
b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
b2 += b1;
b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
b4 += b7;
b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
b6 += b5;
b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
b0 += b3;
b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
b4 += b1;
b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
b6 += b3;
b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
b0 += b5;
b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
b2 += b7;
b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
b6 += b1;
b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
b0 += b7;
b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
b2 += b5;
b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
b4 += b3;
b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
b1 += k4;
b0 += b1 + k3;
b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
b3 += k6;
b2 += b3 + k5;
b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
b5 += k8 + t0;
b4 += b5 + k7;
b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
b7 += k1 + 12;
b6 += b7 + k0 + t1;
b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
b2 += b1;
b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
b4 += b7;
b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
b6 += b5;
b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
b0 += b3;
b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
b4 += b1;
b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
b6 += b3;
b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
b0 += b5;
b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
b2 += b7;
b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
b6 += b1;
b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
b0 += b7;
b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
b2 += b5;
b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
b4 += b3;
b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
b1 += k5;
b0 += b1 + k4;
b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
b3 += k7;
b2 += b3 + k6;
b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
b5 += k0 + t1;
b4 += b5 + k8;
b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
b7 += k2 + 13;
b6 += b7 + k1 + t2;
b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
b2 += b1;
b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
b4 += b7;
b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
b6 += b5;
b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
b0 += b3;
b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
b4 += b1;
b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
b6 += b3;
b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
b0 += b5;
b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
b2 += b7;
b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
b6 += b1;
b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
b0 += b7;
b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
b2 += b5;
b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
b4 += b3;
b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
b1 += k6;
b0 += b1 + k5;
b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
b3 += k8;
b2 += b3 + k7;
b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
b5 += k1 + t2;
b4 += b5 + k0;
b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
b7 += k3 + 14;
b6 += b7 + k2 + t0;
b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
b2 += b1;
b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
b4 += b7;
b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
b6 += b5;
b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
b0 += b3;
b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
b4 += b1;
b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
b6 += b3;
b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
b0 += b5;
b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
b2 += b7;
b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
b6 += b1;
b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
b0 += b7;
b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
b2 += b5;
b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
b4 += b3;
b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
b1 += k7;
b0 += b1 + k6;
b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
b3 += k0;
b2 += b3 + k8;
b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
b5 += k2 + t0;
b4 += b5 + k1;
b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
b7 += k4 + 15;
b6 += b7 + k3 + t1;
b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
b2 += b1;
b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
b4 += b7;
b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
b6 += b5;
b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
b0 += b3;
b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
b4 += b1;
b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
b6 += b3;
b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
b0 += b5;
b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
b2 += b7;
b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
b6 += b1;
b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
b0 += b7;
b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
b2 += b5;
b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
b4 += b3;
b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
b1 += k8;
b0 += b1 + k7;
b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
b3 += k1;
b2 += b3 + k0;
b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
b5 += k3 + t1;
b4 += b5 + k2;
b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
b7 += k5 + 16;
b6 += b7 + k4 + t2;
b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
b2 += b1;
b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
b4 += b7;
b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
b6 += b5;
b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
b0 += b3;
b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
b4 += b1;
b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
b6 += b3;
b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
b0 += b5;
b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
b2 += b7;
b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
b6 += b1;
b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
b0 += b7;
b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
b2 += b5;
b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
b4 += b3;
b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
b1 += k0;
b0 += b1 + k8;
b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
b3 += k2;
b2 += b3 + k1;
b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
b5 += k4 + t2;
b4 += b5 + k3;
b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
b7 += k6 + 17;
b6 += b7 + k5 + t0;
b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
b2 += b1;
b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
b4 += b7;
b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
b6 += b5;
b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
b0 += b3;
b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
b4 += b1;
b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
b6 += b3;
b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
b0 += b5;
b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
b2 += b7;
b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
b6 += b1;
b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
b0 += b7;
b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
b2 += b5;
b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
b4 += b3;
b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
output[0] = b0 + k0; output[0] = b0 + k0;
output[1] = b1 + k1; output[1] = b1 + k1;
...@@ -315,318 +962,1254 @@ void threefishEncrypt512(struct threefish_key *keyCtx, u64 *input, u64 *output) ...@@ -315,318 +962,1254 @@ void threefishEncrypt512(struct threefish_key *keyCtx, u64 *input, u64 *output)
output[7] = b7 + k7 + 18; output[7] = b7 + k7 + 18;
} }
void threefishDecrypt512(struct threefish_key *keyCtx, u64 *input, u64 *output) void threefishDecrypt512(struct threefish_key *keyCtx, u64 *input, u64 *output)
{ {
u64 b0 = input[0], b1 = input[1], u64 b0 = input[0], b1 = input[1],
b2 = input[2], b3 = input[3], b2 = input[2], b3 = input[3],
b4 = input[4], b5 = input[5], b4 = input[4], b5 = input[5],
b6 = input[6], b7 = input[7]; b6 = input[6], b7 = input[7];
u64 k0 = keyCtx->key[0], k1 = keyCtx->key[1], u64 k0 = keyCtx->key[0], k1 = keyCtx->key[1],
k2 = keyCtx->key[2], k3 = keyCtx->key[3], k2 = keyCtx->key[2], k3 = keyCtx->key[3],
k4 = keyCtx->key[4], k5 = keyCtx->key[5], k4 = keyCtx->key[4], k5 = keyCtx->key[5],
k6 = keyCtx->key[6], k7 = keyCtx->key[7], k6 = keyCtx->key[6], k7 = keyCtx->key[7],
k8 = keyCtx->key[8]; k8 = keyCtx->key[8];
u64 t0 = keyCtx->tweak[0], t1 = keyCtx->tweak[1], u64 t0 = keyCtx->tweak[0], t1 = keyCtx->tweak[1],
t2 = keyCtx->tweak[2]; t2 = keyCtx->tweak[2];
u64 tmp;
b0 -= k0;
b1 -= k1;
b2 -= k2;
b3 -= k3;
b4 -= k4;
b5 -= k5 + t0;
b6 -= k6 + t1;
b7 -= k7 + 18;
tmp = b3 ^ b4;
b3 = (tmp >> 22) | (tmp << (64 - 22));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 56) | (tmp << (64 - 56));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 35) | (tmp << (64 - 35));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 8) | (tmp << (64 - 8));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 43) | (tmp << (64 - 43));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b5;
tmp = b3 ^ b6;
b3 = (tmp >> 29) | (tmp << (64 - 29));
b6 -= b3;
tmp = b1 ^ b4;
b1 = (tmp >> 25) | (tmp << (64 - 25));
b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 17) | (tmp << (64 - 17));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 10) | (tmp << (64 - 10));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 50) | (tmp << (64 - 50));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 13) | (tmp << (64 - 13));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 24) | (tmp << (64 - 24));
b6 -= b7 + k5 + t0;
b7 -= k6 + 17;
tmp = b5 ^ b4;
b5 = (tmp >> 34) | (tmp << (64 - 34));
b4 -= b5 + k3;
b5 -= k4 + t2;
tmp = b3 ^ b2;
b3 = (tmp >> 30) | (tmp << (64 - 30));
b2 -= b3 + k1;
b3 -= k2;
tmp = b1 ^ b0;
b1 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b1 + k8;
b1 -= k0;
tmp = b3 ^ b4;
b3 = (tmp >> 56) | (tmp << (64 - 56));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 54) | (tmp << (64 - 54));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 9) | (tmp << (64 - 9));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 44) | (tmp << (64 - 44));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 39) | (tmp << (64 - 39));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 36) | (tmp << (64 - 36));
b0 -= b5;
tmp = b3 ^ b6;
b3 = (tmp >> 49) | (tmp << (64 - 49));
b6 -= b3;
tmp = b1 ^ b4;
b1 = (tmp >> 17) | (tmp << (64 - 17));
b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 42) | (tmp << (64 - 42));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 14) | (tmp << (64 - 14));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 27) | (tmp << (64 - 27));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 33) | (tmp << (64 - 33));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 37) | (tmp << (64 - 37));
b6 -= b7 + k4 + t2;
b7 -= k5 + 16;
tmp = b5 ^ b4;
b5 = (tmp >> 19) | (tmp << (64 - 19));
b4 -= b5 + k2;
b5 -= k3 + t1;
tmp = b3 ^ b2;
b3 = (tmp >> 36) | (tmp << (64 - 36));
b2 -= b3 + k0;
b3 -= k1;
tmp = b1 ^ b0;
b1 = (tmp >> 46) | (tmp << (64 - 46));
b0 -= b1 + k7;
b1 -= k8;
tmp = b3 ^ b4;
b3 = (tmp >> 22) | (tmp << (64 - 22));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 56) | (tmp << (64 - 56));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 35) | (tmp << (64 - 35));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 8) | (tmp << (64 - 8));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 43) | (tmp << (64 - 43));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b5;
tmp = b3 ^ b6;
b3 = (tmp >> 29) | (tmp << (64 - 29));
b6 -= b3;
tmp = b1 ^ b4;
b1 = (tmp >> 25) | (tmp << (64 - 25));
b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 17) | (tmp << (64 - 17));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 10) | (tmp << (64 - 10));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 50) | (tmp << (64 - 50));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 13) | (tmp << (64 - 13));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 24) | (tmp << (64 - 24));
b6 -= b7 + k3 + t1;
b7 -= k4 + 15;
tmp = b5 ^ b4;
b5 = (tmp >> 34) | (tmp << (64 - 34));
b4 -= b5 + k1;
b5 -= k2 + t0;
tmp = b3 ^ b2;
b3 = (tmp >> 30) | (tmp << (64 - 30));
b2 -= b3 + k8;
b3 -= k0;
tmp = b1 ^ b0;
b1 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b1 + k6;
b1 -= k7;
tmp = b3 ^ b4;
b3 = (tmp >> 56) | (tmp << (64 - 56));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 54) | (tmp << (64 - 54));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 9) | (tmp << (64 - 9));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 44) | (tmp << (64 - 44));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 39) | (tmp << (64 - 39));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 36) | (tmp << (64 - 36));
b0 -= b5;
u64 tmp; tmp = b3 ^ b6;
b3 = (tmp >> 49) | (tmp << (64 - 49));
b6 -= b3;
b0 -= k0; tmp = b1 ^ b4;
b1 -= k1; b1 = (tmp >> 17) | (tmp << (64 - 17));
b2 -= k2; b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 42) | (tmp << (64 - 42));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 14) | (tmp << (64 - 14));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 27) | (tmp << (64 - 27));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 33) | (tmp << (64 - 33));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 37) | (tmp << (64 - 37));
b6 -= b7 + k2 + t0;
b7 -= k3 + 14;
tmp = b5 ^ b4;
b5 = (tmp >> 19) | (tmp << (64 - 19));
b4 -= b5 + k0;
b5 -= k1 + t2;
tmp = b3 ^ b2;
b3 = (tmp >> 36) | (tmp << (64 - 36));
b2 -= b3 + k7;
b3 -= k8;
tmp = b1 ^ b0;
b1 = (tmp >> 46) | (tmp << (64 - 46));
b0 -= b1 + k5;
b1 -= k6;
tmp = b3 ^ b4;
b3 = (tmp >> 22) | (tmp << (64 - 22));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 56) | (tmp << (64 - 56));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 35) | (tmp << (64 - 35));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 8) | (tmp << (64 - 8));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 43) | (tmp << (64 - 43));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b5;
tmp = b3 ^ b6;
b3 = (tmp >> 29) | (tmp << (64 - 29));
b6 -= b3;
tmp = b1 ^ b4;
b1 = (tmp >> 25) | (tmp << (64 - 25));
b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 17) | (tmp << (64 - 17));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 10) | (tmp << (64 - 10));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 50) | (tmp << (64 - 50));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 13) | (tmp << (64 - 13));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 24) | (tmp << (64 - 24));
b6 -= b7 + k1 + t2;
b7 -= k2 + 13;
tmp = b5 ^ b4;
b5 = (tmp >> 34) | (tmp << (64 - 34));
b4 -= b5 + k8;
b5 -= k0 + t1;
tmp = b3 ^ b2;
b3 = (tmp >> 30) | (tmp << (64 - 30));
b2 -= b3 + k6;
b3 -= k7;
tmp = b1 ^ b0;
b1 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b1 + k4;
b1 -= k5;
tmp = b3 ^ b4;
b3 = (tmp >> 56) | (tmp << (64 - 56));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 54) | (tmp << (64 - 54));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 9) | (tmp << (64 - 9));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 44) | (tmp << (64 - 44));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 39) | (tmp << (64 - 39));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 36) | (tmp << (64 - 36));
b0 -= b5;
tmp = b3 ^ b6;
b3 = (tmp >> 49) | (tmp << (64 - 49));
b6 -= b3;
tmp = b1 ^ b4;
b1 = (tmp >> 17) | (tmp << (64 - 17));
b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 42) | (tmp << (64 - 42));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 14) | (tmp << (64 - 14));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 27) | (tmp << (64 - 27));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 33) | (tmp << (64 - 33));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 37) | (tmp << (64 - 37));
b6 -= b7 + k0 + t1;
b7 -= k1 + 12;
tmp = b5 ^ b4;
b5 = (tmp >> 19) | (tmp << (64 - 19));
b4 -= b5 + k7;
b5 -= k8 + t0;
tmp = b3 ^ b2;
b3 = (tmp >> 36) | (tmp << (64 - 36));
b2 -= b3 + k5;
b3 -= k6;
tmp = b1 ^ b0;
b1 = (tmp >> 46) | (tmp << (64 - 46));
b0 -= b1 + k3;
b1 -= k4;
tmp = b3 ^ b4;
b3 = (tmp >> 22) | (tmp << (64 - 22));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 56) | (tmp << (64 - 56));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 35) | (tmp << (64 - 35));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 8) | (tmp << (64 - 8));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 43) | (tmp << (64 - 43));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b5;
tmp = b3 ^ b6;
b3 = (tmp >> 29) | (tmp << (64 - 29));
b6 -= b3;
tmp = b1 ^ b4;
b1 = (tmp >> 25) | (tmp << (64 - 25));
b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 17) | (tmp << (64 - 17));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 10) | (tmp << (64 - 10));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 50) | (tmp << (64 - 50));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 13) | (tmp << (64 - 13));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 24) | (tmp << (64 - 24));
b6 -= b7 + k8 + t0;
b7 -= k0 + 11;
tmp = b5 ^ b4;
b5 = (tmp >> 34) | (tmp << (64 - 34));
b4 -= b5 + k6;
b5 -= k7 + t2;
tmp = b3 ^ b2;
b3 = (tmp >> 30) | (tmp << (64 - 30));
b2 -= b3 + k4;
b3 -= k5;
tmp = b1 ^ b0;
b1 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b1 + k2;
b1 -= k3;
tmp = b3 ^ b4;
b3 = (tmp >> 56) | (tmp << (64 - 56));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 54) | (tmp << (64 - 54));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 9) | (tmp << (64 - 9));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 44) | (tmp << (64 - 44));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 39) | (tmp << (64 - 39));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 36) | (tmp << (64 - 36));
b0 -= b5;
tmp = b3 ^ b6;
b3 = (tmp >> 49) | (tmp << (64 - 49));
b6 -= b3;
tmp = b1 ^ b4;
b1 = (tmp >> 17) | (tmp << (64 - 17));
b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 42) | (tmp << (64 - 42));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 14) | (tmp << (64 - 14));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 27) | (tmp << (64 - 27));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 33) | (tmp << (64 - 33));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 37) | (tmp << (64 - 37));
b6 -= b7 + k7 + t2;
b7 -= k8 + 10;
tmp = b5 ^ b4;
b5 = (tmp >> 19) | (tmp << (64 - 19));
b4 -= b5 + k5;
b5 -= k6 + t1;
tmp = b3 ^ b2;
b3 = (tmp >> 36) | (tmp << (64 - 36));
b2 -= b3 + k3;
b3 -= k4;
tmp = b1 ^ b0;
b1 = (tmp >> 46) | (tmp << (64 - 46));
b0 -= b1 + k1;
b1 -= k2;
tmp = b3 ^ b4;
b3 = (tmp >> 22) | (tmp << (64 - 22));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 56) | (tmp << (64 - 56));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 35) | (tmp << (64 - 35));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 8) | (tmp << (64 - 8));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 43) | (tmp << (64 - 43));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b5;
tmp = b3 ^ b6;
b3 = (tmp >> 29) | (tmp << (64 - 29));
b6 -= b3;
tmp = b1 ^ b4;
b1 = (tmp >> 25) | (tmp << (64 - 25));
b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 17) | (tmp << (64 - 17));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 10) | (tmp << (64 - 10));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 50) | (tmp << (64 - 50));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 13) | (tmp << (64 - 13));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 24) | (tmp << (64 - 24));
b6 -= b7 + k6 + t1;
b7 -= k7 + 9;
tmp = b5 ^ b4;
b5 = (tmp >> 34) | (tmp << (64 - 34));
b4 -= b5 + k4;
b5 -= k5 + t0;
tmp = b3 ^ b2;
b3 = (tmp >> 30) | (tmp << (64 - 30));
b2 -= b3 + k2;
b3 -= k3; b3 -= k3;
b4 -= k4;
tmp = b1 ^ b0;
b1 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b1 + k0;
b1 -= k1;
tmp = b3 ^ b4;
b3 = (tmp >> 56) | (tmp << (64 - 56));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 54) | (tmp << (64 - 54));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 9) | (tmp << (64 - 9));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 44) | (tmp << (64 - 44));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 39) | (tmp << (64 - 39));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 36) | (tmp << (64 - 36));
b0 -= b5;
tmp = b3 ^ b6;
b3 = (tmp >> 49) | (tmp << (64 - 49));
b6 -= b3;
tmp = b1 ^ b4;
b1 = (tmp >> 17) | (tmp << (64 - 17));
b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 42) | (tmp << (64 - 42));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 14) | (tmp << (64 - 14));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 27) | (tmp << (64 - 27));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 33) | (tmp << (64 - 33));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 37) | (tmp << (64 - 37));
b6 -= b7 + k5 + t0;
b7 -= k6 + 8;
tmp = b5 ^ b4;
b5 = (tmp >> 19) | (tmp << (64 - 19));
b4 -= b5 + k3;
b5 -= k4 + t2;
tmp = b3 ^ b2;
b3 = (tmp >> 36) | (tmp << (64 - 36));
b2 -= b3 + k1;
b3 -= k2;
tmp = b1 ^ b0;
b1 = (tmp >> 46) | (tmp << (64 - 46));
b0 -= b1 + k8;
b1 -= k0;
tmp = b3 ^ b4;
b3 = (tmp >> 22) | (tmp << (64 - 22));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 56) | (tmp << (64 - 56));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 35) | (tmp << (64 - 35));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 8) | (tmp << (64 - 8));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 43) | (tmp << (64 - 43));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b5;
tmp = b3 ^ b6;
b3 = (tmp >> 29) | (tmp << (64 - 29));
b6 -= b3;
tmp = b1 ^ b4;
b1 = (tmp >> 25) | (tmp << (64 - 25));
b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 17) | (tmp << (64 - 17));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 10) | (tmp << (64 - 10));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 50) | (tmp << (64 - 50));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 13) | (tmp << (64 - 13));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 24) | (tmp << (64 - 24));
b6 -= b7 + k4 + t2;
b7 -= k5 + 7;
tmp = b5 ^ b4;
b5 = (tmp >> 34) | (tmp << (64 - 34));
b4 -= b5 + k2;
b5 -= k3 + t1;
tmp = b3 ^ b2;
b3 = (tmp >> 30) | (tmp << (64 - 30));
b2 -= b3 + k0;
b3 -= k1;
tmp = b1 ^ b0;
b1 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b1 + k7;
b1 -= k8;
tmp = b3 ^ b4;
b3 = (tmp >> 56) | (tmp << (64 - 56));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 54) | (tmp << (64 - 54));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 9) | (tmp << (64 - 9));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 44) | (tmp << (64 - 44));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 39) | (tmp << (64 - 39));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 36) | (tmp << (64 - 36));
b0 -= b5;
tmp = b3 ^ b6;
b3 = (tmp >> 49) | (tmp << (64 - 49));
b6 -= b3;
tmp = b1 ^ b4;
b1 = (tmp >> 17) | (tmp << (64 - 17));
b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 42) | (tmp << (64 - 42));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 14) | (tmp << (64 - 14));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 27) | (tmp << (64 - 27));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 33) | (tmp << (64 - 33));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 37) | (tmp << (64 - 37));
b6 -= b7 + k3 + t1;
b7 -= k4 + 6;
tmp = b5 ^ b4;
b5 = (tmp >> 19) | (tmp << (64 - 19));
b4 -= b5 + k1;
b5 -= k2 + t0;
tmp = b3 ^ b2;
b3 = (tmp >> 36) | (tmp << (64 - 36));
b2 -= b3 + k8;
b3 -= k0;
tmp = b1 ^ b0;
b1 = (tmp >> 46) | (tmp << (64 - 46));
b0 -= b1 + k6;
b1 -= k7;
tmp = b3 ^ b4;
b3 = (tmp >> 22) | (tmp << (64 - 22));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 56) | (tmp << (64 - 56));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 35) | (tmp << (64 - 35));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 8) | (tmp << (64 - 8));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 43) | (tmp << (64 - 43));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b5;
tmp = b3 ^ b6;
b3 = (tmp >> 29) | (tmp << (64 - 29));
b6 -= b3;
tmp = b1 ^ b4;
b1 = (tmp >> 25) | (tmp << (64 - 25));
b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 17) | (tmp << (64 - 17));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 10) | (tmp << (64 - 10));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 50) | (tmp << (64 - 50));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 13) | (tmp << (64 - 13));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 24) | (tmp << (64 - 24));
b6 -= b7 + k2 + t0;
b7 -= k3 + 5;
tmp = b5 ^ b4;
b5 = (tmp >> 34) | (tmp << (64 - 34));
b4 -= b5 + k0;
b5 -= k1 + t2;
tmp = b3 ^ b2;
b3 = (tmp >> 30) | (tmp << (64 - 30));
b2 -= b3 + k7;
b3 -= k8;
tmp = b1 ^ b0;
b1 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b1 + k5;
b1 -= k6;
tmp = b3 ^ b4;
b3 = (tmp >> 56) | (tmp << (64 - 56));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 54) | (tmp << (64 - 54));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 9) | (tmp << (64 - 9));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 44) | (tmp << (64 - 44));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 39) | (tmp << (64 - 39));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 36) | (tmp << (64 - 36));
b0 -= b5;
tmp = b3 ^ b6;
b3 = (tmp >> 49) | (tmp << (64 - 49));
b6 -= b3;
tmp = b1 ^ b4;
b1 = (tmp >> 17) | (tmp << (64 - 17));
b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 42) | (tmp << (64 - 42));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 14) | (tmp << (64 - 14));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 27) | (tmp << (64 - 27));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 33) | (tmp << (64 - 33));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 37) | (tmp << (64 - 37));
b6 -= b7 + k1 + t2;
b7 -= k2 + 4;
tmp = b5 ^ b4;
b5 = (tmp >> 19) | (tmp << (64 - 19));
b4 -= b5 + k8;
b5 -= k0 + t1;
tmp = b3 ^ b2;
b3 = (tmp >> 36) | (tmp << (64 - 36));
b2 -= b3 + k6;
b3 -= k7;
tmp = b1 ^ b0;
b1 = (tmp >> 46) | (tmp << (64 - 46));
b0 -= b1 + k4;
b1 -= k5;
tmp = b3 ^ b4;
b3 = (tmp >> 22) | (tmp << (64 - 22));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 56) | (tmp << (64 - 56));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 35) | (tmp << (64 - 35));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 8) | (tmp << (64 - 8));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 43) | (tmp << (64 - 43));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b5;
tmp = b3 ^ b6;
b3 = (tmp >> 29) | (tmp << (64 - 29));
b6 -= b3;
tmp = b1 ^ b4;
b1 = (tmp >> 25) | (tmp << (64 - 25));
b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 17) | (tmp << (64 - 17));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 10) | (tmp << (64 - 10));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 50) | (tmp << (64 - 50));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 13) | (tmp << (64 - 13));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 24) | (tmp << (64 - 24));
b6 -= b7 + k0 + t1;
b7 -= k1 + 3;
tmp = b5 ^ b4;
b5 = (tmp >> 34) | (tmp << (64 - 34));
b4 -= b5 + k7;
b5 -= k8 + t0;
tmp = b3 ^ b2;
b3 = (tmp >> 30) | (tmp << (64 - 30));
b2 -= b3 + k5;
b3 -= k6;
tmp = b1 ^ b0;
b1 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b1 + k3;
b1 -= k4;
tmp = b3 ^ b4;
b3 = (tmp >> 56) | (tmp << (64 - 56));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 54) | (tmp << (64 - 54));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 9) | (tmp << (64 - 9));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 44) | (tmp << (64 - 44));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 39) | (tmp << (64 - 39));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 36) | (tmp << (64 - 36));
b0 -= b5;
tmp = b3 ^ b6;
b3 = (tmp >> 49) | (tmp << (64 - 49));
b6 -= b3;
tmp = b1 ^ b4;
b1 = (tmp >> 17) | (tmp << (64 - 17));
b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 42) | (tmp << (64 - 42));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 14) | (tmp << (64 - 14));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 27) | (tmp << (64 - 27));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 33) | (tmp << (64 - 33));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 37) | (tmp << (64 - 37));
b6 -= b7 + k8 + t0;
b7 -= k0 + 2;
tmp = b5 ^ b4;
b5 = (tmp >> 19) | (tmp << (64 - 19));
b4 -= b5 + k6;
b5 -= k7 + t2;
tmp = b3 ^ b2;
b3 = (tmp >> 36) | (tmp << (64 - 36));
b2 -= b3 + k4;
b3 -= k5;
tmp = b1 ^ b0;
b1 = (tmp >> 46) | (tmp << (64 - 46));
b0 -= b1 + k2;
b1 -= k3;
tmp = b3 ^ b4;
b3 = (tmp >> 22) | (tmp << (64 - 22));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 56) | (tmp << (64 - 56));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 35) | (tmp << (64 - 35));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 8) | (tmp << (64 - 8));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 43) | (tmp << (64 - 43));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b5;
tmp = b3 ^ b6;
b3 = (tmp >> 29) | (tmp << (64 - 29));
b6 -= b3;
tmp = b1 ^ b4;
b1 = (tmp >> 25) | (tmp << (64 - 25));
b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 17) | (tmp << (64 - 17));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 10) | (tmp << (64 - 10));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 50) | (tmp << (64 - 50));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 13) | (tmp << (64 - 13));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 24) | (tmp << (64 - 24));
b6 -= b7 + k7 + t2;
b7 -= k8 + 1;
tmp = b5 ^ b4;
b5 = (tmp >> 34) | (tmp << (64 - 34));
b4 -= b5 + k5;
b5 -= k6 + t1;
tmp = b3 ^ b2;
b3 = (tmp >> 30) | (tmp << (64 - 30));
b2 -= b3 + k3;
b3 -= k4;
tmp = b1 ^ b0;
b1 = (tmp >> 39) | (tmp << (64 - 39));
b0 -= b1 + k1;
b1 -= k2;
tmp = b3 ^ b4;
b3 = (tmp >> 56) | (tmp << (64 - 56));
b4 -= b3;
tmp = b5 ^ b2;
b5 = (tmp >> 54) | (tmp << (64 - 54));
b2 -= b5;
tmp = b7 ^ b0;
b7 = (tmp >> 9) | (tmp << (64 - 9));
b0 -= b7;
tmp = b1 ^ b6;
b1 = (tmp >> 44) | (tmp << (64 - 44));
b6 -= b1;
tmp = b7 ^ b2;
b7 = (tmp >> 39) | (tmp << (64 - 39));
b2 -= b7;
tmp = b5 ^ b0;
b5 = (tmp >> 36) | (tmp << (64 - 36));
b0 -= b5;
tmp = b3 ^ b6;
b3 = (tmp >> 49) | (tmp << (64 - 49));
b6 -= b3;
tmp = b1 ^ b4;
b1 = (tmp >> 17) | (tmp << (64 - 17));
b4 -= b1;
tmp = b3 ^ b0;
b3 = (tmp >> 42) | (tmp << (64 - 42));
b0 -= b3;
tmp = b5 ^ b6;
b5 = (tmp >> 14) | (tmp << (64 - 14));
b6 -= b5;
tmp = b7 ^ b4;
b7 = (tmp >> 27) | (tmp << (64 - 27));
b4 -= b7;
tmp = b1 ^ b2;
b1 = (tmp >> 33) | (tmp << (64 - 33));
b2 -= b1;
tmp = b7 ^ b6;
b7 = (tmp >> 37) | (tmp << (64 - 37));
b6 -= b7 + k6 + t1;
b7 -= k7;
tmp = b5 ^ b4;
b5 = (tmp >> 19) | (tmp << (64 - 19));
b4 -= b5 + k4;
b5 -= k5 + t0; b5 -= k5 + t0;
b6 -= k6 + t1;
b7 -= k7 + 18; tmp = b3 ^ b2;
tmp = b3 ^ b4; b3 = (tmp >> 22) | (tmp << (64 - 22)); b4 -= b3; b3 = (tmp >> 36) | (tmp << (64 - 36));
tmp = b5 ^ b2; b5 = (tmp >> 56) | (tmp << (64 - 56)); b2 -= b5; b2 -= b3 + k2;
tmp = b7 ^ b0; b7 = (tmp >> 35) | (tmp << (64 - 35)); b0 -= b7; b3 -= k3;
tmp = b1 ^ b6; b1 = (tmp >> 8) | (tmp << (64 - 8)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 43) | (tmp << (64 - 43)); b2 -= b7; tmp = b1 ^ b0;
tmp = b5 ^ b0; b5 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b5; b1 = (tmp >> 46) | (tmp << (64 - 46));
tmp = b3 ^ b6; b3 = (tmp >> 29) | (tmp << (64 - 29)); b6 -= b3; b0 -= b1 + k0;
tmp = b1 ^ b4; b1 = (tmp >> 25) | (tmp << (64 - 25)); b4 -= b1; b1 -= k1;
tmp = b3 ^ b0; b3 = (tmp >> 17) | (tmp << (64 - 17)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 10) | (tmp << (64 - 10)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 50) | (tmp << (64 - 50)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 13) | (tmp << (64 - 13)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 24) | (tmp << (64 - 24)); b6 -= b7 + k5 + t0; b7 -= k6 + 17;
tmp = b5 ^ b4; b5 = (tmp >> 34) | (tmp << (64 - 34)); b4 -= b5 + k3; b5 -= k4 + t2;
tmp = b3 ^ b2; b3 = (tmp >> 30) | (tmp << (64 - 30)); b2 -= b3 + k1; b3 -= k2;
tmp = b1 ^ b0; b1 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b1 + k8; b1 -= k0;
tmp = b3 ^ b4; b3 = (tmp >> 56) | (tmp << (64 - 56)); b4 -= b3;
tmp = b5 ^ b2; b5 = (tmp >> 54) | (tmp << (64 - 54)); b2 -= b5;
tmp = b7 ^ b0; b7 = (tmp >> 9) | (tmp << (64 - 9)); b0 -= b7;
tmp = b1 ^ b6; b1 = (tmp >> 44) | (tmp << (64 - 44)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 39) | (tmp << (64 - 39)); b2 -= b7;
tmp = b5 ^ b0; b5 = (tmp >> 36) | (tmp << (64 - 36)); b0 -= b5;
tmp = b3 ^ b6; b3 = (tmp >> 49) | (tmp << (64 - 49)); b6 -= b3;
tmp = b1 ^ b4; b1 = (tmp >> 17) | (tmp << (64 - 17)); b4 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 42) | (tmp << (64 - 42)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 14) | (tmp << (64 - 14)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 27) | (tmp << (64 - 27)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 37) | (tmp << (64 - 37)); b6 -= b7 + k4 + t2; b7 -= k5 + 16;
tmp = b5 ^ b4; b5 = (tmp >> 19) | (tmp << (64 - 19)); b4 -= b5 + k2; b5 -= k3 + t1;
tmp = b3 ^ b2; b3 = (tmp >> 36) | (tmp << (64 - 36)); b2 -= b3 + k0; b3 -= k1;
tmp = b1 ^ b0; b1 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b1 + k7; b1 -= k8;
tmp = b3 ^ b4; b3 = (tmp >> 22) | (tmp << (64 - 22)); b4 -= b3;
tmp = b5 ^ b2; b5 = (tmp >> 56) | (tmp << (64 - 56)); b2 -= b5;
tmp = b7 ^ b0; b7 = (tmp >> 35) | (tmp << (64 - 35)); b0 -= b7;
tmp = b1 ^ b6; b1 = (tmp >> 8) | (tmp << (64 - 8)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 43) | (tmp << (64 - 43)); b2 -= b7;
tmp = b5 ^ b0; b5 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b5;
tmp = b3 ^ b6; b3 = (tmp >> 29) | (tmp << (64 - 29)); b6 -= b3;
tmp = b1 ^ b4; b1 = (tmp >> 25) | (tmp << (64 - 25)); b4 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 17) | (tmp << (64 - 17)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 10) | (tmp << (64 - 10)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 50) | (tmp << (64 - 50)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 13) | (tmp << (64 - 13)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 24) | (tmp << (64 - 24)); b6 -= b7 + k3 + t1; b7 -= k4 + 15;
tmp = b5 ^ b4; b5 = (tmp >> 34) | (tmp << (64 - 34)); b4 -= b5 + k1; b5 -= k2 + t0;
tmp = b3 ^ b2; b3 = (tmp >> 30) | (tmp << (64 - 30)); b2 -= b3 + k8; b3 -= k0;
tmp = b1 ^ b0; b1 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b1 + k6; b1 -= k7;
tmp = b3 ^ b4; b3 = (tmp >> 56) | (tmp << (64 - 56)); b4 -= b3;
tmp = b5 ^ b2; b5 = (tmp >> 54) | (tmp << (64 - 54)); b2 -= b5;
tmp = b7 ^ b0; b7 = (tmp >> 9) | (tmp << (64 - 9)); b0 -= b7;
tmp = b1 ^ b6; b1 = (tmp >> 44) | (tmp << (64 - 44)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 39) | (tmp << (64 - 39)); b2 -= b7;
tmp = b5 ^ b0; b5 = (tmp >> 36) | (tmp << (64 - 36)); b0 -= b5;
tmp = b3 ^ b6; b3 = (tmp >> 49) | (tmp << (64 - 49)); b6 -= b3;
tmp = b1 ^ b4; b1 = (tmp >> 17) | (tmp << (64 - 17)); b4 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 42) | (tmp << (64 - 42)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 14) | (tmp << (64 - 14)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 27) | (tmp << (64 - 27)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 37) | (tmp << (64 - 37)); b6 -= b7 + k2 + t0; b7 -= k3 + 14;
tmp = b5 ^ b4; b5 = (tmp >> 19) | (tmp << (64 - 19)); b4 -= b5 + k0; b5 -= k1 + t2;
tmp = b3 ^ b2; b3 = (tmp >> 36) | (tmp << (64 - 36)); b2 -= b3 + k7; b3 -= k8;
tmp = b1 ^ b0; b1 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b1 + k5; b1 -= k6;
tmp = b3 ^ b4; b3 = (tmp >> 22) | (tmp << (64 - 22)); b4 -= b3;
tmp = b5 ^ b2; b5 = (tmp >> 56) | (tmp << (64 - 56)); b2 -= b5;
tmp = b7 ^ b0; b7 = (tmp >> 35) | (tmp << (64 - 35)); b0 -= b7;
tmp = b1 ^ b6; b1 = (tmp >> 8) | (tmp << (64 - 8)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 43) | (tmp << (64 - 43)); b2 -= b7;
tmp = b5 ^ b0; b5 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b5;
tmp = b3 ^ b6; b3 = (tmp >> 29) | (tmp << (64 - 29)); b6 -= b3;
tmp = b1 ^ b4; b1 = (tmp >> 25) | (tmp << (64 - 25)); b4 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 17) | (tmp << (64 - 17)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 10) | (tmp << (64 - 10)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 50) | (tmp << (64 - 50)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 13) | (tmp << (64 - 13)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 24) | (tmp << (64 - 24)); b6 -= b7 + k1 + t2; b7 -= k2 + 13;
tmp = b5 ^ b4; b5 = (tmp >> 34) | (tmp << (64 - 34)); b4 -= b5 + k8; b5 -= k0 + t1;
tmp = b3 ^ b2; b3 = (tmp >> 30) | (tmp << (64 - 30)); b2 -= b3 + k6; b3 -= k7;
tmp = b1 ^ b0; b1 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b1 + k4; b1 -= k5;
tmp = b3 ^ b4; b3 = (tmp >> 56) | (tmp << (64 - 56)); b4 -= b3;
tmp = b5 ^ b2; b5 = (tmp >> 54) | (tmp << (64 - 54)); b2 -= b5;
tmp = b7 ^ b0; b7 = (tmp >> 9) | (tmp << (64 - 9)); b0 -= b7;
tmp = b1 ^ b6; b1 = (tmp >> 44) | (tmp << (64 - 44)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 39) | (tmp << (64 - 39)); b2 -= b7;
tmp = b5 ^ b0; b5 = (tmp >> 36) | (tmp << (64 - 36)); b0 -= b5;
tmp = b3 ^ b6; b3 = (tmp >> 49) | (tmp << (64 - 49)); b6 -= b3;
tmp = b1 ^ b4; b1 = (tmp >> 17) | (tmp << (64 - 17)); b4 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 42) | (tmp << (64 - 42)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 14) | (tmp << (64 - 14)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 27) | (tmp << (64 - 27)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 37) | (tmp << (64 - 37)); b6 -= b7 + k0 + t1; b7 -= k1 + 12;
tmp = b5 ^ b4; b5 = (tmp >> 19) | (tmp << (64 - 19)); b4 -= b5 + k7; b5 -= k8 + t0;
tmp = b3 ^ b2; b3 = (tmp >> 36) | (tmp << (64 - 36)); b2 -= b3 + k5; b3 -= k6;
tmp = b1 ^ b0; b1 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b1 + k3; b1 -= k4;
tmp = b3 ^ b4; b3 = (tmp >> 22) | (tmp << (64 - 22)); b4 -= b3;
tmp = b5 ^ b2; b5 = (tmp >> 56) | (tmp << (64 - 56)); b2 -= b5;
tmp = b7 ^ b0; b7 = (tmp >> 35) | (tmp << (64 - 35)); b0 -= b7;
tmp = b1 ^ b6; b1 = (tmp >> 8) | (tmp << (64 - 8)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 43) | (tmp << (64 - 43)); b2 -= b7;
tmp = b5 ^ b0; b5 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b5;
tmp = b3 ^ b6; b3 = (tmp >> 29) | (tmp << (64 - 29)); b6 -= b3;
tmp = b1 ^ b4; b1 = (tmp >> 25) | (tmp << (64 - 25)); b4 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 17) | (tmp << (64 - 17)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 10) | (tmp << (64 - 10)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 50) | (tmp << (64 - 50)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 13) | (tmp << (64 - 13)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 24) | (tmp << (64 - 24)); b6 -= b7 + k8 + t0; b7 -= k0 + 11;
tmp = b5 ^ b4; b5 = (tmp >> 34) | (tmp << (64 - 34)); b4 -= b5 + k6; b5 -= k7 + t2;
tmp = b3 ^ b2; b3 = (tmp >> 30) | (tmp << (64 - 30)); b2 -= b3 + k4; b3 -= k5;
tmp = b1 ^ b0; b1 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b1 + k2; b1 -= k3;
tmp = b3 ^ b4; b3 = (tmp >> 56) | (tmp << (64 - 56)); b4 -= b3;
tmp = b5 ^ b2; b5 = (tmp >> 54) | (tmp << (64 - 54)); b2 -= b5;
tmp = b7 ^ b0; b7 = (tmp >> 9) | (tmp << (64 - 9)); b0 -= b7;
tmp = b1 ^ b6; b1 = (tmp >> 44) | (tmp << (64 - 44)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 39) | (tmp << (64 - 39)); b2 -= b7;
tmp = b5 ^ b0; b5 = (tmp >> 36) | (tmp << (64 - 36)); b0 -= b5;
tmp = b3 ^ b6; b3 = (tmp >> 49) | (tmp << (64 - 49)); b6 -= b3;
tmp = b1 ^ b4; b1 = (tmp >> 17) | (tmp << (64 - 17)); b4 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 42) | (tmp << (64 - 42)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 14) | (tmp << (64 - 14)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 27) | (tmp << (64 - 27)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 37) | (tmp << (64 - 37)); b6 -= b7 + k7 + t2; b7 -= k8 + 10;
tmp = b5 ^ b4; b5 = (tmp >> 19) | (tmp << (64 - 19)); b4 -= b5 + k5; b5 -= k6 + t1;
tmp = b3 ^ b2; b3 = (tmp >> 36) | (tmp << (64 - 36)); b2 -= b3 + k3; b3 -= k4;
tmp = b1 ^ b0; b1 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b1 + k1; b1 -= k2;
tmp = b3 ^ b4; b3 = (tmp >> 22) | (tmp << (64 - 22)); b4 -= b3;
tmp = b5 ^ b2; b5 = (tmp >> 56) | (tmp << (64 - 56)); b2 -= b5;
tmp = b7 ^ b0; b7 = (tmp >> 35) | (tmp << (64 - 35)); b0 -= b7;
tmp = b1 ^ b6; b1 = (tmp >> 8) | (tmp << (64 - 8)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 43) | (tmp << (64 - 43)); b2 -= b7;
tmp = b5 ^ b0; b5 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b5;
tmp = b3 ^ b6; b3 = (tmp >> 29) | (tmp << (64 - 29)); b6 -= b3;
tmp = b1 ^ b4; b1 = (tmp >> 25) | (tmp << (64 - 25)); b4 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 17) | (tmp << (64 - 17)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 10) | (tmp << (64 - 10)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 50) | (tmp << (64 - 50)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 13) | (tmp << (64 - 13)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 24) | (tmp << (64 - 24)); b6 -= b7 + k6 + t1; b7 -= k7 + 9;
tmp = b5 ^ b4; b5 = (tmp >> 34) | (tmp << (64 - 34)); b4 -= b5 + k4; b5 -= k5 + t0;
tmp = b3 ^ b2; b3 = (tmp >> 30) | (tmp << (64 - 30)); b2 -= b3 + k2; b3 -= k3;
tmp = b1 ^ b0; b1 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b1 + k0; b1 -= k1;
tmp = b3 ^ b4; b3 = (tmp >> 56) | (tmp << (64 - 56)); b4 -= b3;
tmp = b5 ^ b2; b5 = (tmp >> 54) | (tmp << (64 - 54)); b2 -= b5;
tmp = b7 ^ b0; b7 = (tmp >> 9) | (tmp << (64 - 9)); b0 -= b7;
tmp = b1 ^ b6; b1 = (tmp >> 44) | (tmp << (64 - 44)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 39) | (tmp << (64 - 39)); b2 -= b7;
tmp = b5 ^ b0; b5 = (tmp >> 36) | (tmp << (64 - 36)); b0 -= b5;
tmp = b3 ^ b6; b3 = (tmp >> 49) | (tmp << (64 - 49)); b6 -= b3;
tmp = b1 ^ b4; b1 = (tmp >> 17) | (tmp << (64 - 17)); b4 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 42) | (tmp << (64 - 42)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 14) | (tmp << (64 - 14)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 27) | (tmp << (64 - 27)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 37) | (tmp << (64 - 37)); b6 -= b7 + k5 + t0; b7 -= k6 + 8;
tmp = b5 ^ b4; b5 = (tmp >> 19) | (tmp << (64 - 19)); b4 -= b5 + k3; b5 -= k4 + t2;
tmp = b3 ^ b2; b3 = (tmp >> 36) | (tmp << (64 - 36)); b2 -= b3 + k1; b3 -= k2;
tmp = b1 ^ b0; b1 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b1 + k8; b1 -= k0;
tmp = b3 ^ b4; b3 = (tmp >> 22) | (tmp << (64 - 22)); b4 -= b3;
tmp = b5 ^ b2; b5 = (tmp >> 56) | (tmp << (64 - 56)); b2 -= b5;
tmp = b7 ^ b0; b7 = (tmp >> 35) | (tmp << (64 - 35)); b0 -= b7;
tmp = b1 ^ b6; b1 = (tmp >> 8) | (tmp << (64 - 8)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 43) | (tmp << (64 - 43)); b2 -= b7;
tmp = b5 ^ b0; b5 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b5;
tmp = b3 ^ b6; b3 = (tmp >> 29) | (tmp << (64 - 29)); b6 -= b3;
tmp = b1 ^ b4; b1 = (tmp >> 25) | (tmp << (64 - 25)); b4 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 17) | (tmp << (64 - 17)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 10) | (tmp << (64 - 10)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 50) | (tmp << (64 - 50)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 13) | (tmp << (64 - 13)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 24) | (tmp << (64 - 24)); b6 -= b7 + k4 + t2; b7 -= k5 + 7;
tmp = b5 ^ b4; b5 = (tmp >> 34) | (tmp << (64 - 34)); b4 -= b5 + k2; b5 -= k3 + t1;
tmp = b3 ^ b2; b3 = (tmp >> 30) | (tmp << (64 - 30)); b2 -= b3 + k0; b3 -= k1;
tmp = b1 ^ b0; b1 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b1 + k7; b1 -= k8;
tmp = b3 ^ b4; b3 = (tmp >> 56) | (tmp << (64 - 56)); b4 -= b3;
tmp = b5 ^ b2; b5 = (tmp >> 54) | (tmp << (64 - 54)); b2 -= b5;
tmp = b7 ^ b0; b7 = (tmp >> 9) | (tmp << (64 - 9)); b0 -= b7;
tmp = b1 ^ b6; b1 = (tmp >> 44) | (tmp << (64 - 44)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 39) | (tmp << (64 - 39)); b2 -= b7;
tmp = b5 ^ b0; b5 = (tmp >> 36) | (tmp << (64 - 36)); b0 -= b5;
tmp = b3 ^ b6; b3 = (tmp >> 49) | (tmp << (64 - 49)); b6 -= b3;
tmp = b1 ^ b4; b1 = (tmp >> 17) | (tmp << (64 - 17)); b4 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 42) | (tmp << (64 - 42)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 14) | (tmp << (64 - 14)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 27) | (tmp << (64 - 27)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 37) | (tmp << (64 - 37)); b6 -= b7 + k3 + t1; b7 -= k4 + 6;
tmp = b5 ^ b4; b5 = (tmp >> 19) | (tmp << (64 - 19)); b4 -= b5 + k1; b5 -= k2 + t0;
tmp = b3 ^ b2; b3 = (tmp >> 36) | (tmp << (64 - 36)); b2 -= b3 + k8; b3 -= k0;
tmp = b1 ^ b0; b1 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b1 + k6; b1 -= k7;
tmp = b3 ^ b4; b3 = (tmp >> 22) | (tmp << (64 - 22)); b4 -= b3;
tmp = b5 ^ b2; b5 = (tmp >> 56) | (tmp << (64 - 56)); b2 -= b5;
tmp = b7 ^ b0; b7 = (tmp >> 35) | (tmp << (64 - 35)); b0 -= b7;
tmp = b1 ^ b6; b1 = (tmp >> 8) | (tmp << (64 - 8)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 43) | (tmp << (64 - 43)); b2 -= b7;
tmp = b5 ^ b0; b5 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b5;
tmp = b3 ^ b6; b3 = (tmp >> 29) | (tmp << (64 - 29)); b6 -= b3;
tmp = b1 ^ b4; b1 = (tmp >> 25) | (tmp << (64 - 25)); b4 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 17) | (tmp << (64 - 17)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 10) | (tmp << (64 - 10)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 50) | (tmp << (64 - 50)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 13) | (tmp << (64 - 13)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 24) | (tmp << (64 - 24)); b6 -= b7 + k2 + t0; b7 -= k3 + 5;
tmp = b5 ^ b4; b5 = (tmp >> 34) | (tmp << (64 - 34)); b4 -= b5 + k0; b5 -= k1 + t2;
tmp = b3 ^ b2; b3 = (tmp >> 30) | (tmp << (64 - 30)); b2 -= b3 + k7; b3 -= k8;
tmp = b1 ^ b0; b1 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b1 + k5; b1 -= k6;
tmp = b3 ^ b4; b3 = (tmp >> 56) | (tmp << (64 - 56)); b4 -= b3;
tmp = b5 ^ b2; b5 = (tmp >> 54) | (tmp << (64 - 54)); b2 -= b5;
tmp = b7 ^ b0; b7 = (tmp >> 9) | (tmp << (64 - 9)); b0 -= b7;
tmp = b1 ^ b6; b1 = (tmp >> 44) | (tmp << (64 - 44)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 39) | (tmp << (64 - 39)); b2 -= b7;
tmp = b5 ^ b0; b5 = (tmp >> 36) | (tmp << (64 - 36)); b0 -= b5;
tmp = b3 ^ b6; b3 = (tmp >> 49) | (tmp << (64 - 49)); b6 -= b3;
tmp = b1 ^ b4; b1 = (tmp >> 17) | (tmp << (64 - 17)); b4 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 42) | (tmp << (64 - 42)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 14) | (tmp << (64 - 14)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 27) | (tmp << (64 - 27)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 37) | (tmp << (64 - 37)); b6 -= b7 + k1 + t2; b7 -= k2 + 4;
tmp = b5 ^ b4; b5 = (tmp >> 19) | (tmp << (64 - 19)); b4 -= b5 + k8; b5 -= k0 + t1;
tmp = b3 ^ b2; b3 = (tmp >> 36) | (tmp << (64 - 36)); b2 -= b3 + k6; b3 -= k7;
tmp = b1 ^ b0; b1 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b1 + k4; b1 -= k5;
tmp = b3 ^ b4; b3 = (tmp >> 22) | (tmp << (64 - 22)); b4 -= b3;
tmp = b5 ^ b2; b5 = (tmp >> 56) | (tmp << (64 - 56)); b2 -= b5;
tmp = b7 ^ b0; b7 = (tmp >> 35) | (tmp << (64 - 35)); b0 -= b7;
tmp = b1 ^ b6; b1 = (tmp >> 8) | (tmp << (64 - 8)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 43) | (tmp << (64 - 43)); b2 -= b7;
tmp = b5 ^ b0; b5 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b5;
tmp = b3 ^ b6; b3 = (tmp >> 29) | (tmp << (64 - 29)); b6 -= b3;
tmp = b1 ^ b4; b1 = (tmp >> 25) | (tmp << (64 - 25)); b4 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 17) | (tmp << (64 - 17)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 10) | (tmp << (64 - 10)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 50) | (tmp << (64 - 50)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 13) | (tmp << (64 - 13)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 24) | (tmp << (64 - 24)); b6 -= b7 + k0 + t1; b7 -= k1 + 3;
tmp = b5 ^ b4; b5 = (tmp >> 34) | (tmp << (64 - 34)); b4 -= b5 + k7; b5 -= k8 + t0;
tmp = b3 ^ b2; b3 = (tmp >> 30) | (tmp << (64 - 30)); b2 -= b3 + k5; b3 -= k6;
tmp = b1 ^ b0; b1 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b1 + k3; b1 -= k4;
tmp = b3 ^ b4; b3 = (tmp >> 56) | (tmp << (64 - 56)); b4 -= b3;
tmp = b5 ^ b2; b5 = (tmp >> 54) | (tmp << (64 - 54)); b2 -= b5;
tmp = b7 ^ b0; b7 = (tmp >> 9) | (tmp << (64 - 9)); b0 -= b7;
tmp = b1 ^ b6; b1 = (tmp >> 44) | (tmp << (64 - 44)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 39) | (tmp << (64 - 39)); b2 -= b7;
tmp = b5 ^ b0; b5 = (tmp >> 36) | (tmp << (64 - 36)); b0 -= b5;
tmp = b3 ^ b6; b3 = (tmp >> 49) | (tmp << (64 - 49)); b6 -= b3;
tmp = b1 ^ b4; b1 = (tmp >> 17) | (tmp << (64 - 17)); b4 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 42) | (tmp << (64 - 42)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 14) | (tmp << (64 - 14)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 27) | (tmp << (64 - 27)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 37) | (tmp << (64 - 37)); b6 -= b7 + k8 + t0; b7 -= k0 + 2;
tmp = b5 ^ b4; b5 = (tmp >> 19) | (tmp << (64 - 19)); b4 -= b5 + k6; b5 -= k7 + t2;
tmp = b3 ^ b2; b3 = (tmp >> 36) | (tmp << (64 - 36)); b2 -= b3 + k4; b3 -= k5;
tmp = b1 ^ b0; b1 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b1 + k2; b1 -= k3;
tmp = b3 ^ b4; b3 = (tmp >> 22) | (tmp << (64 - 22)); b4 -= b3;
tmp = b5 ^ b2; b5 = (tmp >> 56) | (tmp << (64 - 56)); b2 -= b5;
tmp = b7 ^ b0; b7 = (tmp >> 35) | (tmp << (64 - 35)); b0 -= b7;
tmp = b1 ^ b6; b1 = (tmp >> 8) | (tmp << (64 - 8)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 43) | (tmp << (64 - 43)); b2 -= b7;
tmp = b5 ^ b0; b5 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b5;
tmp = b3 ^ b6; b3 = (tmp >> 29) | (tmp << (64 - 29)); b6 -= b3;
tmp = b1 ^ b4; b1 = (tmp >> 25) | (tmp << (64 - 25)); b4 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 17) | (tmp << (64 - 17)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 10) | (tmp << (64 - 10)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 50) | (tmp << (64 - 50)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 13) | (tmp << (64 - 13)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 24) | (tmp << (64 - 24)); b6 -= b7 + k7 + t2; b7 -= k8 + 1;
tmp = b5 ^ b4; b5 = (tmp >> 34) | (tmp << (64 - 34)); b4 -= b5 + k5; b5 -= k6 + t1;
tmp = b3 ^ b2; b3 = (tmp >> 30) | (tmp << (64 - 30)); b2 -= b3 + k3; b3 -= k4;
tmp = b1 ^ b0; b1 = (tmp >> 39) | (tmp << (64 - 39)); b0 -= b1 + k1; b1 -= k2;
tmp = b3 ^ b4; b3 = (tmp >> 56) | (tmp << (64 - 56)); b4 -= b3;
tmp = b5 ^ b2; b5 = (tmp >> 54) | (tmp << (64 - 54)); b2 -= b5;
tmp = b7 ^ b0; b7 = (tmp >> 9) | (tmp << (64 - 9)); b0 -= b7;
tmp = b1 ^ b6; b1 = (tmp >> 44) | (tmp << (64 - 44)); b6 -= b1;
tmp = b7 ^ b2; b7 = (tmp >> 39) | (tmp << (64 - 39)); b2 -= b7;
tmp = b5 ^ b0; b5 = (tmp >> 36) | (tmp << (64 - 36)); b0 -= b5;
tmp = b3 ^ b6; b3 = (tmp >> 49) | (tmp << (64 - 49)); b6 -= b3;
tmp = b1 ^ b4; b1 = (tmp >> 17) | (tmp << (64 - 17)); b4 -= b1;
tmp = b3 ^ b0; b3 = (tmp >> 42) | (tmp << (64 - 42)); b0 -= b3;
tmp = b5 ^ b6; b5 = (tmp >> 14) | (tmp << (64 - 14)); b6 -= b5;
tmp = b7 ^ b4; b7 = (tmp >> 27) | (tmp << (64 - 27)); b4 -= b7;
tmp = b1 ^ b2; b1 = (tmp >> 33) | (tmp << (64 - 33)); b2 -= b1;
tmp = b7 ^ b6; b7 = (tmp >> 37) | (tmp << (64 - 37)); b6 -= b7 + k6 + t1; b7 -= k7;
tmp = b5 ^ b4; b5 = (tmp >> 19) | (tmp << (64 - 19)); b4 -= b5 + k4; b5 -= k5 + t0;
tmp = b3 ^ b2; b3 = (tmp >> 36) | (tmp << (64 - 36)); b2 -= b3 + k2; b3 -= k3;
tmp = b1 ^ b0; b1 = (tmp >> 46) | (tmp << (64 - 46)); b0 -= b1 + k0; b1 -= k1;
output[0] = b0; output[0] = b0;
output[1] = b1; output[1] = b1;
......
...@@ -3,7 +3,8 @@ ...@@ -3,7 +3,8 @@
#include <linux/string.h> #include <linux/string.h>
#include <threefishApi.h> #include <threefishApi.h>
void threefishSetKey(struct threefish_key *keyCtx, enum threefish_size stateSize, void threefishSetKey(struct threefish_key *keyCtx,
enum threefish_size stateSize,
u64 *keyData, u64 *tweak) u64 *keyData, u64 *tweak)
{ {
int keyWords = stateSize / 64; int keyWords = stateSize / 64;
...@@ -28,9 +29,9 @@ void threefishEncryptBlockBytes(struct threefish_key *keyCtx, u8 *in, ...@@ -28,9 +29,9 @@ void threefishEncryptBlockBytes(struct threefish_key *keyCtx, u8 *in,
u64 plain[SKEIN_MAX_STATE_WORDS]; /* max number of words*/ u64 plain[SKEIN_MAX_STATE_WORDS]; /* max number of words*/
u64 cipher[SKEIN_MAX_STATE_WORDS]; u64 cipher[SKEIN_MAX_STATE_WORDS];
Skein_Get64_LSB_First(plain, in, keyCtx->stateSize / 64); /* bytes to words */ Skein_Get64_LSB_First(plain, in, keyCtx->stateSize / 64);
threefishEncryptBlockWords(keyCtx, plain, cipher); threefishEncryptBlockWords(keyCtx, plain, cipher);
Skein_Put64_LSB_First(out, cipher, keyCtx->stateSize / 8); /* words to bytes */ Skein_Put64_LSB_First(out, cipher, keyCtx->stateSize / 8);
} }
void threefishEncryptBlockWords(struct threefish_key *keyCtx, u64 *in, void threefishEncryptBlockWords(struct threefish_key *keyCtx, u64 *in,
...@@ -55,9 +56,9 @@ void threefishDecryptBlockBytes(struct threefish_key *keyCtx, u8 *in, ...@@ -55,9 +56,9 @@ void threefishDecryptBlockBytes(struct threefish_key *keyCtx, u8 *in,
u64 plain[SKEIN_MAX_STATE_WORDS]; /* max number of words*/ u64 plain[SKEIN_MAX_STATE_WORDS]; /* max number of words*/
u64 cipher[SKEIN_MAX_STATE_WORDS]; u64 cipher[SKEIN_MAX_STATE_WORDS];
Skein_Get64_LSB_First(cipher, in, keyCtx->stateSize / 64); /* bytes to words */ Skein_Get64_LSB_First(cipher, in, keyCtx->stateSize / 64);
threefishDecryptBlockWords(keyCtx, cipher, plain); threefishDecryptBlockWords(keyCtx, cipher, plain);
Skein_Put64_LSB_First(out, plain, keyCtx->stateSize / 8); /* words to bytes */ Skein_Put64_LSB_First(out, plain, keyCtx->stateSize / 8);
} }
void threefishDecryptBlockWords(struct threefish_key *keyCtx, u64 *in, void threefishDecryptBlockWords(struct threefish_key *keyCtx, u64 *in,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment