mirror of
https://github.com/Ed94/Odin.git
synced 2026-07-05 11:11:37 -07:00
Compare commits
1040 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 16c5c69a40 | |||
| 39b49cb6fb | |||
| 9871ad5fc8 | |||
| 28c643d23f | |||
| dd1f151696 | |||
| c1605b5b4f | |||
| 6e150d1d58 | |||
| 722b638e2c | |||
| c1cb1a3d7e | |||
| a4ac50a5b4 | |||
| 6ba1506aa9 | |||
| b4bdb73158 | |||
| 476030dd28 | |||
| e0a2e52601 | |||
| 7f3d8e115f | |||
| 584e8859bf | |||
| 8e855155fd | |||
| 7624ecf4ba | |||
| bfedcd7035 | |||
| 6ef49d4f24 | |||
| 8ad4427a25 | |||
| db3bcd2cea | |||
| 3072a02d5d | |||
| 773703bc83 | |||
| a4e865f90b | |||
| 9197a126fc | |||
| 38ea276231 | |||
| b020b91df2 | |||
| 291048cb3b | |||
| 3135c89a0a | |||
| 47f423c123 | |||
| a7a2611efc | |||
| 9866c83d61 | |||
| 655610ec87 | |||
| a28d574272 | |||
| f93779d425 | |||
| a10f988020 | |||
| 39bdf4d710 | |||
| 7862460374 | |||
| 92821300e4 | |||
| 94a1a7aed5 | |||
| 5a22d49b72 | |||
| 942017b958 | |||
| fd2ad20cd4 | |||
| cca385209b | |||
| fe2d256468 | |||
| 0f2ad95014 | |||
| 7fa2e56cd9 | |||
| 24a53c246f | |||
| b11fa90ed2 | |||
| f453054aff | |||
| ef99373c31 | |||
| 1cd5cbb0e4 | |||
| 56f232e5fc | |||
| de1432b315 | |||
| e7d6e2d163 | |||
| 8b40be50b9 | |||
| 6798d15ecb | |||
| 6aedb2695a | |||
| 0243647e15 | |||
| 3ec4db212b | |||
| 7683c1f4bb | |||
| ef2cd9d97f | |||
| 5b9e9fb822 | |||
| 741ccd7ff5 | |||
| eb92a2df71 | |||
| d60f404bb4 | |||
| 6c4806835c | |||
| e1555e04ad | |||
| 7061032db6 | |||
| 861d892ffe | |||
| 5c8c63ae04 | |||
| 1ced76cdd1 | |||
| e83b982afe | |||
| 7c6cc81041 | |||
| 521e47ffee | |||
| 8251f4d7d0 | |||
| da49f7eb4e | |||
| d66486c17e | |||
| 981a2e1a00 | |||
| eb6e5ee3a1 | |||
| 5e850e24d6 | |||
| 17d10b72d5 | |||
| b08c14b710 | |||
| d3fca60e4d | |||
| d602e2cc74 | |||
| 831e26cb6f | |||
| 7b276cbc7e | |||
| 7e4750c303 | |||
| 967b6d46b2 | |||
| fcafd83506 | |||
| 21dab9673d | |||
| 16503c3b91 | |||
| 1e6419b5b7 | |||
| ef66f96cf2 | |||
| 9684ade23e | |||
| 8694fa5f78 | |||
| fb248056c9 | |||
| ade4e312fe | |||
| 12bd07d2df | |||
| 91f8c626bb | |||
| c1684d6335 | |||
| 1a4faff9c9 | |||
| 6c17642b91 | |||
| ef254ec055 | |||
| 1f04f541cf | |||
| dd86a8f013 | |||
| 43ec2b9253 | |||
| 60cd0da2ed | |||
| c21bbae427 | |||
| d7e977069a | |||
| ef98aa1fe1 | |||
| 4e2bce0c83 | |||
| d43c6e39f6 | |||
| f56b895c05 | |||
| 54a420f3eb | |||
| 3e0f6bda91 | |||
| 9fac03b84c | |||
| 0a8f85e879 | |||
| 49606ec3ea | |||
| d338642dc4 | |||
| 95d1c29c2a | |||
| c424c94030 | |||
| 8de1e88c4f | |||
| 3119e0489f | |||
| db94e646e0 | |||
| 4737485f09 | |||
| d299d4e1cd | |||
| 9aeb0d0fb6 | |||
| 0ea6809b49 | |||
| a0fe6da10b | |||
| 8c952878fb | |||
| eda58357b6 | |||
| 7b14c267fb | |||
| cc2ed2b302 | |||
| 17cc423a17 | |||
| fa84e86766 | |||
| 0d7e6430eb | |||
| 4cf1af3f7d | |||
| eef4dffa12 | |||
| 25e8a6353d | |||
| ae5ef0ce9c | |||
| 574a5015d6 | |||
| 0a825fc44d | |||
| aa659a637a | |||
| 0e71e93b22 | |||
| 61b3af5b08 | |||
| 19b95349a6 | |||
| d0eaf7642d | |||
| eb799393d5 | |||
| 8ba87e01bd | |||
| daccd72162 | |||
| 8b248673c1 | |||
| faec52623d | |||
| b6d9a0c32e | |||
| a4cc207022 | |||
| 683dde1fa0 | |||
| ca4d91a8a3 | |||
| 65543b993f | |||
| 00fb60d3d9 | |||
| 68a83abcd6 | |||
| 4458ca4585 | |||
| 5ef8a092f6 | |||
| c969bee86d | |||
| 426367c522 | |||
| d410281766 | |||
| ce53805d94 | |||
| 963e8544f4 | |||
| c2a7c29ce8 | |||
| b3f22133f3 | |||
| caef37bc18 | |||
| 574dc5efe6 | |||
| a66520ba57 | |||
| b9043db434 | |||
| 58e5078b66 | |||
| 70932dc478 | |||
| a8bc6f08a9 | |||
| 07aedb0b89 | |||
| 63cd9a031a | |||
| c77e8ca401 | |||
| 58e811eea5 | |||
| c0125f3192 | |||
| 1bcc074223 | |||
| e7b8e61c68 | |||
| bbe4c32e32 | |||
| 0d39f52b4d | |||
| 14169ae5b4 | |||
| 6b4b0cea5d | |||
| 85a33a1c9b | |||
| 8f2d3dc955 | |||
| daa0779c01 | |||
| d7e81e86f5 | |||
| 06fb500dfe | |||
| 5ef58049dc | |||
| 5b22bfa2b7 | |||
| ca6ef95b03 | |||
| 53500699a9 | |||
| 29838da782 | |||
| 97795ff3a2 | |||
| 6df51b42a8 | |||
| 3228178d87 | |||
| 3fc1f6f1cd | |||
| 2999c02f65 | |||
| 9d7ab8d5ca | |||
| 2a0785037b | |||
| 9e65e229d0 | |||
| 34a9e20531 | |||
| 0fa24ac3c4 | |||
| 17eb0b5ee0 | |||
| b49b80bdf9 | |||
| 2906f2aa7e | |||
| f96991364a | |||
| 359a212a78 | |||
| ea7e8f075d | |||
| d99a6ad003 | |||
| 95c3b8a8de | |||
| ae41779366 | |||
| bb3f848b4a | |||
| a51b9c6fb0 | |||
| 8e52a52580 | |||
| b7b3ada3b1 | |||
| 9a895a318c | |||
| b2e64b7ce0 | |||
| 7020e9b66a | |||
| f49ebae956 | |||
| 0e82a46047 | |||
| c4e0cbcd87 | |||
| f82bf6cd42 | |||
| 60b2979cfd | |||
| 17740966e5 | |||
| 5c06fcd346 | |||
| 22a82e73d6 | |||
| bbb5593f87 | |||
| d00057c803 | |||
| acfd9dcd1b | |||
| 9553bc3689 | |||
| ebbb70f11d | |||
| 142bda2804 | |||
| 478f529744 | |||
| 536a342873 | |||
| 43dc7704a5 | |||
| e8933e43ec | |||
| f7d7d65bc0 | |||
| 970dc7a1f2 | |||
| a07878be71 | |||
| 14e207088a | |||
| 6c46c9e04b | |||
| 40b8150a23 | |||
| 31bb3dc4f0 | |||
| 65ce7687d7 | |||
| 6fc39c1b5d | |||
| 07a9c69714 | |||
| 004036dc59 | |||
| d91fa162d8 | |||
| 6e91f6ab31 | |||
| b16d5198a3 | |||
| 12813e5d48 | |||
| 6422e49255 | |||
| f7234e2ed3 | |||
| e810c3eace | |||
| 18b6af1858 | |||
| 03dd38f203 | |||
| eb6f2078cf | |||
| df61096317 | |||
| 8abe054665 | |||
| 324d9c2284 | |||
| f657b4cc0c | |||
| c98c95fcf0 | |||
| fd5376ba88 | |||
| 78fa675c1a | |||
| 4c19f507ff | |||
| 651122e9db | |||
| d1e114742e | |||
| 29a6959a56 | |||
| 491def9d5b | |||
| 427c48221d | |||
| 1c97739ca3 | |||
| 0b26115805 | |||
| 1c8672ac6c | |||
| 0f052dbde7 | |||
| 4bb51249d1 | |||
| e29f0a0f40 | |||
| f837e35f4b | |||
| 61ee2efa35 | |||
| e94c4e1e18 | |||
| b07d0b38b1 | |||
| a73677d21a | |||
| 67ea7bb65a | |||
| 175f5b0bb1 | |||
| 939ba4cf08 | |||
| f00f68ef6f | |||
| de9abe1f7b | |||
| bd808f9ec6 | |||
| 7474db6a34 | |||
| 00eb702c4a | |||
| ea5783c2ac | |||
| e05fddc001 | |||
| 406e60f5dd | |||
| 5115aee23f | |||
| 72220a855f | |||
| 4a61bac100 | |||
| 726891588f | |||
| 2a5ceff667 | |||
| 9e2c5acb9d | |||
| 7c9ca60ff0 | |||
| 379cd6fe66 | |||
| ffff3c3c88 | |||
| a4d459f651 | |||
| ff0ca0bd53 | |||
| 07b9f7d280 | |||
| 9f80191f60 | |||
| efe68c2e24 | |||
| 3847d03248 | |||
| bb54a0a972 | |||
| ac68a9d52c | |||
| d4d46df2fc | |||
| 660b6ff0f1 | |||
| 9ccaca7f1a | |||
| 926c419ef8 | |||
| 22b32ab0fa | |||
| 40361f877e | |||
| f2ba3da895 | |||
| 55be3e60a0 | |||
| 62911539cd | |||
| c7af8af76a | |||
| e3d5bbe62c | |||
| 2584c6bcd7 | |||
| 66d3082a9a | |||
| 09a07bd7e0 | |||
| 848f7e117f | |||
| d93364ce52 | |||
| 4c4f24b5f4 | |||
| 069ad446cd | |||
| 02b4bb8491 | |||
| 2808ecc5b6 | |||
| a1d518442a | |||
| 0d916a659e | |||
| 3a32250b80 | |||
| 053e65a1c8 | |||
| 7af0291c7d | |||
| 0af025d056 | |||
| 9eb7186cda | |||
| d68cc41782 | |||
| fb09ec06d3 | |||
| a4ac3cc6e8 | |||
| 4f56822204 | |||
| f22ff21039 | |||
| b71e0c2e36 | |||
| fd9a008e1e | |||
| 450fc3ec77 | |||
| 0a118a14ab | |||
| f42a22369e | |||
| 675add4d90 | |||
| 1761802330 | |||
| 99aa0d3a35 | |||
| dec97cbddb | |||
| f0840ed24e | |||
| b9e68ee3c1 | |||
| d6336e7252 | |||
| c6a0d302b3 | |||
| 26fa3aca44 | |||
| e3f375afd8 | |||
| 8a67221bd4 | |||
| 67251ad5b8 | |||
| 938e81e341 | |||
| f5fe0de0fd | |||
| 7fd8b9c55b | |||
| a2cd763dbb | |||
| cf8f6afeff | |||
| afbdc78f85 | |||
| 6918d8aaa6 | |||
| e226d37803 | |||
| 4f816aabb3 | |||
| d98f848247 | |||
| 1242b6e82f | |||
| 9759d56c81 | |||
| d73ad8eb1e | |||
| a52f7c129f | |||
| e54d6e5a11 | |||
| f7a73b9555 | |||
| 9d2b4b2f03 | |||
| 5d5addd48f | |||
| c69fa87d53 | |||
| bf53d2f6db | |||
| d267735d99 | |||
| 554b0e5bf7 | |||
| ba1ad82c2b | |||
| 38aea1f907 | |||
| b381791f42 | |||
| 14ceb0b19d | |||
| 8efc98ce90 | |||
| 1f3107e693 | |||
| 708f053fe6 | |||
| ac7f44b1b8 | |||
| b1d6e4139e | |||
| dfc4df9807 | |||
| b9293334ca | |||
| b68311d777 | |||
| c0f9655ec4 | |||
| 7875e4a1ab | |||
| be7a1f671c | |||
| c9ca192f33 | |||
| e7e7fe766a | |||
| 0d29cc3375 | |||
| c8a62ee4ec | |||
| 12dd0cb72a | |||
| 912f99abc8 | |||
| baacc512e3 | |||
| c5ed7083d2 | |||
| d69e0bfa00 | |||
| fa6f343261 | |||
| c4d43bbab0 | |||
| cc24d2de3e | |||
| bc6deab175 | |||
| b5a612202f | |||
| 1e09ff3b2e | |||
| 933f9f9bd1 | |||
| b82cfc5f15 | |||
| a05b73c632 | |||
| 4d27898418 | |||
| 80d1e1ba82 | |||
| dda89a69bf | |||
| f328929939 | |||
| 3d0519fe6c | |||
| a4eeb6ed02 | |||
| 94c62fb630 | |||
| a7fe9eec73 | |||
| d93f3c63d8 | |||
| ada3df303e | |||
| 8b98fff98e | |||
| c3ba8fbd09 | |||
| 4b9ca39522 | |||
| 26633bbce6 | |||
| 9625798513 | |||
| 796feeead9 | |||
| 2bf055ec6e | |||
| c9b69d76b0 | |||
| 69a15ca5b6 | |||
| 6fe938b946 | |||
| e5a478d393 | |||
| 793811b219 | |||
| 0418d27bdf | |||
| 28c98c2e7a | |||
| f66fcd9acb | |||
| 8deeb40e5d | |||
| 39d557bcb4 | |||
| e27a424f4d | |||
| 7a367c9c08 | |||
| 4423aac3de | |||
| 94ded29b5f | |||
| 6a6f078186 | |||
| bed18a17e6 | |||
| 17ebaffce8 | |||
| 4902288a5a | |||
| a1c3c38f04 | |||
| 4c0ab09c9a | |||
| 6cc7f3b451 | |||
| 3512d7c672 | |||
| eba0774bf3 | |||
| 7c3461b0df | |||
| 2cc3795686 | |||
| b63657d293 | |||
| 6175efde3d | |||
| 030220eb22 | |||
| 2d32b819dc | |||
| f56abf3780 | |||
| 78919f8524 | |||
| 80ea4e0aeb | |||
| 84ac56f778 | |||
| 7e701d1677 | |||
| b67ed78afd | |||
| 90fc52c2ee | |||
| 9a01a13914 | |||
| eeb92e2644 | |||
| fd06be2243 | |||
| a06cb8ba46 | |||
| d0d4f19097 | |||
| 8f5b838a07 | |||
| ba0581ae79 | |||
| d9404d928c | |||
| fe754af13d | |||
| 61e770d943 | |||
| 2b63684ccf | |||
| 5ece6980eb | |||
| 42a2297d31 | |||
| 8de48d81ea | |||
| 32fb1fb61c | |||
| 05c50561ae | |||
| 46455dd0a6 | |||
| 3f9ddfe029 | |||
| 14858309f0 | |||
| e17fc8272b | |||
| dde42f0ebc | |||
| ca7e46d56f | |||
| 743480b1a4 | |||
| 1ccb0b2558 | |||
| babdc432e9 | |||
| d3a51e208d | |||
| cd8272557f | |||
| 7e0fa795e4 | |||
| 6252712363 | |||
| 60bc7f53d2 | |||
| cf3c1a85ec | |||
| 046f72befd | |||
| f19436fb4d | |||
| d0709a7de2 | |||
| f427f040fd | |||
| c078b2dd1b | |||
| bf948ab8ae | |||
| acb1ebddf6 | |||
| 0bedd3357a | |||
| 93fabf8628 | |||
| fc10b781af | |||
| 6a6b5061db | |||
| b67817517e | |||
| fff1d509d5 | |||
| e8d26c5797 | |||
| 3d992e2704 | |||
| 0c5b645dde | |||
| cf5ec25873 | |||
| 7663a2036a | |||
| c32f345c68 | |||
| 649b25fba6 | |||
| bdbbbf5c95 | |||
| e8b6d15db9 | |||
| 5f7fef92fa | |||
| 66b86bc7e0 | |||
| 5187bb68bb | |||
| 97c499dbb4 | |||
| 1d75a612d5 | |||
| 160048eaee | |||
| def2e2e271 | |||
| 28666414bc | |||
| a241168142 | |||
| 29b6eebcd5 | |||
| 7dae38ce89 | |||
| 1826b0c700 | |||
| 9f9abb8fb3 | |||
| e60951a902 | |||
| 71932628cc | |||
| c691c7dc68 | |||
| 2a7db08c20 | |||
| 2b89829b52 | |||
| e77977c8ef | |||
| 38b96a7981 | |||
| 4dd846fa29 | |||
| 61008232e4 | |||
| 90e573c54a | |||
| ac483f72eb | |||
| 99d9e8f8b1 | |||
| 9f5854e24e | |||
| 772dce7e42 | |||
| e55d09bdfa | |||
| 1a16585b10 | |||
| 500c117312 | |||
| 723e6d19be | |||
| 8e8660fdfc | |||
| 3bfccde6f9 | |||
| e7041f785a | |||
| 83599957b1 | |||
| f8e0cda113 | |||
| 0fa62937d5 | |||
| f627a38b4f | |||
| cc8c2f236b | |||
| 953ae32607 | |||
| fdfe6b00e0 | |||
| 8a9901aeab | |||
| 1f2ab84e82 | |||
| 92831d7ca3 | |||
| f33b4ecd3e | |||
| 62191f54a0 | |||
| 04c3723985 | |||
| ac19bb3a8c | |||
| 1913c08b7b | |||
| 744d7f7ef4 | |||
| c7eb2ae6bb | |||
| 792640df1f | |||
| 278a63caaa | |||
| a03dffcd1a | |||
| 3aac4b1a3e | |||
| 478b2d7444 | |||
| 7f17d4eb7f | |||
| 4d1d754cae | |||
| 24e6f16f4a | |||
| f852aac0e7 | |||
| 27f75c40ab | |||
| 1d598f8287 | |||
| cb31df34c1 | |||
| 271ec643ed | |||
| fe587ee79a | |||
| b149b3d6c6 | |||
| 03f3ea5a9e | |||
| 6e6f2a1f6b | |||
| 58ab6e5f94 | |||
| 4e194d5dbd | |||
| 00c6b83537 | |||
| ca7d86084e | |||
| 9088a493d9 | |||
| fe4754a77c | |||
| 9d67d12d22 | |||
| 9e82ae10ac | |||
| 85880f9def | |||
| 4ff62994bf | |||
| 90f1f7fbdf | |||
| 9b624ef9e1 | |||
| f03c2b7783 | |||
| ff492e615c | |||
| 042f6de478 | |||
| c52a8a5f86 | |||
| 16b644ad79 | |||
| e8537a3134 | |||
| e642be8550 | |||
| 33d6677514 | |||
| 8d6ff51922 | |||
| a5fa93e06d | |||
| 215b21811e | |||
| 95a8a4e7f0 | |||
| ecdd3887b2 | |||
| 2dbccbde54 | |||
| 07b1819dc8 | |||
| 9d8953538b | |||
| d4af7b86a7 | |||
| 38e983cac6 | |||
| 2ddaae45f3 | |||
| 16bdc6d240 | |||
| 6d2487a692 | |||
| c407e423d9 | |||
| 0e91c8368c | |||
| f08a53015c | |||
| c98bb7da39 | |||
| ba81a81ca8 | |||
| 57dc6c2e94 | |||
| 95412df129 | |||
| 2a2bedc85c | |||
| f6488383d7 | |||
| 0455e4b60f | |||
| 545fbc54c7 | |||
| 82deaa59ad | |||
| 3c7d1f35db | |||
| f9ef951b22 | |||
| fc2e31fcd0 | |||
| e8e51db9ff | |||
| a429603195 | |||
| 42833d0471 | |||
| f3f08a4b47 | |||
| efa8c92bab | |||
| 65fec9134e | |||
| a28392852a | |||
| 8037ace873 | |||
| 182454a1c0 | |||
| 24f9e2bbeb | |||
| 61b9a5dbb2 | |||
| 0403626acf | |||
| 6fc3d32d4b | |||
| eb4f850b7f | |||
| da80c51195 | |||
| 9d99f98194 | |||
| c32e12c3f5 | |||
| c5fb72340a | |||
| b0fe777ede | |||
| bc5b41938e | |||
| 527c0b3202 | |||
| 6eb28aeafc | |||
| 12b971746c | |||
| ef84382f23 | |||
| b8f3d0fb53 | |||
| be38ba6c5e | |||
| 3e49ceb82a | |||
| 730e10bd6f | |||
| cb0704d51c | |||
| 9cad8179b7 | |||
| ccf8b2764d | |||
| 915c5c3a87 | |||
| fcaa47986a | |||
| 07d2aba310 | |||
| 4889641af4 | |||
| 90a4d12b30 | |||
| a39b6de18c | |||
| 39657e4d96 | |||
| 73a9a97413 | |||
| 1873f7215d | |||
| 4d86012d96 | |||
| a055c03de9 | |||
| 68550cf915 | |||
| 3e618bed40 | |||
| 51733fce68 | |||
| a056e9da81 | |||
| b84b4c47d7 | |||
| 431227d4c5 | |||
| 1a6885c2a3 | |||
| c3a57853e2 | |||
| fc5ce30f34 | |||
| 572b400d8e | |||
| f78a792d48 | |||
| e0a8bd04d5 | |||
| f560b14d10 | |||
| b584eeaade | |||
| cdd2c98b8d | |||
| 9d6ed991cb | |||
| 2385e1ddd9 | |||
| b3ca2d5e0a | |||
| 64614889de | |||
| d9d044970e | |||
| 0c78cab336 | |||
| 15997d2a90 | |||
| 7237f9c9f8 | |||
| ba3d7ba5d3 | |||
| f226eba342 | |||
| 393ca40c23 | |||
| 62f455f47b | |||
| 163287d9ce | |||
| 2f8399fe20 | |||
| 685dbddcb5 | |||
| 27f9f0ba17 | |||
| 2b6a926bb6 | |||
| b3d798cb76 | |||
| 7134015f56 | |||
| 7b501b22bb | |||
| 75605a47e7 | |||
| 4dcb75af6d | |||
| c768d0719a | |||
| f04db7145c | |||
| 0bb4cc6ce5 | |||
| 07121f81ff | |||
| 5520b45457 | |||
| cb16d2ddaf | |||
| a6d1a2e46c | |||
| 47f14dd9ea | |||
| 853487e86c | |||
| 0d881e1561 | |||
| ba49950454 | |||
| f66133959f | |||
| 28fac62a02 | |||
| a3fe5754d9 | |||
| df56655ab1 | |||
| 6702f07762 | |||
| 03e90bf924 | |||
| 8d70a264ab | |||
| fe718460c6 | |||
| 1afb10109e | |||
| 3a162de18f | |||
| 321ef82d76 | |||
| 169fc4d3be | |||
| affe8f7144 | |||
| 23ca27f40b | |||
| 03426175ae | |||
| 0a9c150748 | |||
| f362e0fa20 | |||
| d4d910bcfc | |||
| 3a75a8dd1b | |||
| 33dae2e26c | |||
| 69026852ce | |||
| f578994fa6 | |||
| 1bc21c3481 | |||
| c9c0b9ea7b | |||
| 390cd3c30d | |||
| 401877184f | |||
| 4815154c31 | |||
| 0d8dadec8a | |||
| ff13ee3281 | |||
| dcaf085bfa | |||
| 55e0f97cc4 | |||
| 65c91b7dde | |||
| bd562116b8 | |||
| 1e37eaf54d | |||
| 8df61b7209 | |||
| 1a30d47ee8 | |||
| 5cefab8229 | |||
| 7d643bcae3 | |||
| 549311fac9 | |||
| c5decd3eae | |||
| 255f00d971 | |||
| 664a71454b | |||
| 432388ac7f | |||
| cc3cf12ae2 | |||
| d87583bead | |||
| 1b0e98116d | |||
| eb6805ef40 | |||
| a45e05bb18 | |||
| e4ba786948 | |||
| cae8c1e94f | |||
| a8f84c87ae | |||
| c64702ae5a | |||
| 3311ea1c76 | |||
| 018026d844 | |||
| 139c1bcdda | |||
| 46817e8f68 | |||
| 196ac7e6d6 | |||
| e3e31b42d0 | |||
| f7cb711874 | |||
| f657055f12 | |||
| 26a9416a41 | |||
| e768bddaeb | |||
| 83b7dd122a | |||
| b0fbaf24a0 | |||
| 1c3240b6b5 | |||
| 7673d478b6 | |||
| 15fb4ded2a | |||
| 03cb585845 | |||
| 9d84e00502 | |||
| 7703b37a1b | |||
| 95a695e4cd | |||
| 02408b713a | |||
| 251fa477af | |||
| 15a0655988 | |||
| 399c3ab067 | |||
| b7ccfed9af | |||
| 4eca60946c | |||
| 8f4755532e | |||
| 63d94301fc | |||
| e1eed7610c | |||
| 5d6e0bc793 | |||
| c1f5d8f006 | |||
| f3d4a734d8 | |||
| 6387cd2c24 | |||
| 56d55e4a86 | |||
| 6fab055f43 | |||
| 2495f1c39a | |||
| 65dab3a3b1 | |||
| b38237e8f0 | |||
| 63276a85ba | |||
| 3f8712edb0 | |||
| e7d37607ef | |||
| 6feace2351 | |||
| a867378418 | |||
| 11e2aa2d51 | |||
| 556355ef05 | |||
| 5de6016e7f | |||
| 2d56e3805b | |||
| 4f73b35da5 | |||
| 76b0562acd | |||
| d90d7ed002 | |||
| 3d38f14202 | |||
| d1450e3d88 | |||
| eff46c2e5f | |||
| 8319917898 | |||
| 2e0c5fefde | |||
| 88823c4024 | |||
| 6959554040 | |||
| 0a530b5ce8 | |||
| 8650180ccc | |||
| 5027c7081e | |||
| 3533094cc2 | |||
| 5ce6676914 | |||
| 3dff83f3dc | |||
| 8642d719f0 | |||
| 891cf54b5c | |||
| 3ba19d94cf | |||
| c7bd954752 | |||
| 7e4e3429d7 | |||
| edc793d7c1 | |||
| f98a7b84df | |||
| 0050cb9471 | |||
| 64feb7599e | |||
| 4420128dc1 | |||
| cdede4928c | |||
| b686b072d5 | |||
| 76fe5d1346 | |||
| 8cebc7cc0c | |||
| 75076e2d64 | |||
| 64ae99f016 | |||
| d845a07519 | |||
| b4683f4399 | |||
| 79e2f63182 | |||
| 39983eaaa4 | |||
| dbdad0476d | |||
| 3627a51daf | |||
| e715674512 | |||
| 9e0c4098be | |||
| d10694901f | |||
| c75a872909 | |||
| a348a7e84e | |||
| a7d95ab3e1 | |||
| 6ae47c6f43 | |||
| 3291c59728 | |||
| dd75b7a4d9 | |||
| 1770e3a9a9 | |||
| 19753ed8a3 | |||
| 59d87d1f05 | |||
| 6b3453cc64 | |||
| 5e7634d90c | |||
| fc7c7cd8ab | |||
| e05e99253b | |||
| a8ce5bd005 | |||
| 65ca03a930 | |||
| 0dfc660c71 | |||
| c49afb0039 | |||
| b02291b20a | |||
| f75a45c3fa | |||
| 7e41340510 | |||
| 4148c6d6c7 | |||
| cc73e06a46 | |||
| 733b7c704c | |||
| c54e3d3c4f | |||
| 8dc4eca4d2 | |||
| 34c6868e78 | |||
| 38315f83da | |||
| 5aa1f3751b | |||
| 36301d0359 | |||
| c83307f26f | |||
| 30204837ce | |||
| f85f6b338a | |||
| 40625b24e2 | |||
| 1fce5791a1 | |||
| d43a2daa17 | |||
| 7b31acd2d7 | |||
| b83822fd35 | |||
| ae2f16edea | |||
| 9782d7b928 | |||
| 5627af582a | |||
| d701642293 | |||
| 4c1754b1dc | |||
| 666703f430 | |||
| 14dc3598b4 | |||
| 330d0e7a2a | |||
| 952fb998e6 | |||
| 886ee66e7f | |||
| 68b70a2183 | |||
| ccdad8b8dd | |||
| 45bf180847 | |||
| 87ac68fcf2 | |||
| 9575d85f1a | |||
| 2d8d0dd851 | |||
| 1a20b78633 | |||
| 0c27d95679 | |||
| a3af94eeae | |||
| 9ff77397c6 | |||
| 48aef50164 | |||
| fc9c995dc4 | |||
| 564d5af8b2 | |||
| 2c9ef6907a | |||
| 505dec7b14 | |||
| 2a219fa830 | |||
| 498fb00c25 | |||
| ec2234560a | |||
| 5dd89a29e6 | |||
| 8491e2491c | |||
| ddad2011e2 | |||
| 7dd4cccce7 | |||
| 0f664893dd | |||
| 04e0b3e270 | |||
| 930a147f24 | |||
| 86c58167ed | |||
| 212906ecd2 | |||
| 6ab559437a | |||
| db3b4ceb4a | |||
| 39e9b65c59 | |||
| 56856b6307 | |||
| 861ad2037f | |||
| 77d64a5461 | |||
| 88c9512d5f | |||
| b79a098c2e | |||
| 258f120d52 | |||
| 80d122f531 | |||
| efe5e9e349 | |||
| 25904e8f89 | |||
| 2b854c94da | |||
| 8c98ca3a10 | |||
| ecffe5a082 | |||
| 9ecc33570b | |||
| 164a5e587e | |||
| c8432df248 | |||
| f66b7021a6 | |||
| 22fabba2f6 | |||
| c77f5b00bc | |||
| 1bd9fe04c4 | |||
| 3c2161b196 | |||
| 5247d3749c | |||
| 8bee73b08e | |||
| bcdb202639 | |||
| 1eb0bc1408 | |||
| e9a54666e2 | |||
| 8b7c6a23ba | |||
| bf711b282d | |||
| 87bccc8f3f | |||
| 657bc88535 | |||
| 45b2a6a19e | |||
| d78ff0be52 | |||
| 52c219690a | |||
| 053f1759d7 | |||
| b65aa9c439 | |||
| 782286357b | |||
| 6b373cf49e | |||
| a76f9a70d5 | |||
| a276722da5 | |||
| bb6cbf6559 | |||
| f6892d2236 | |||
| 075384b2bb | |||
| 7e6acdf800 | |||
| 8b49549fd3 | |||
| 0ceae51223 | |||
| 761a50e145 | |||
| 5399093050 | |||
| 0ef5191540 | |||
| 10c68a8951 | |||
| 4e18e1b191 | |||
| 6f1cc8071c | |||
| 41282b5211 | |||
| 0697fd0101 | |||
| 62d107bea5 | |||
| b26f4e0766 | |||
| 7233149096 | |||
| c822f0b8c8 | |||
| d912c6e320 | |||
| f7243eac74 | |||
| c2ab2bf25a | |||
| 8cd7fd95a3 | |||
| 8b915aae5e | |||
| f0cf1052d0 | |||
| fa5be93da2 | |||
| ba05c74936 | |||
| e61d893a74 | |||
| 10ce76fcc2 | |||
| c61f5bbfe7 | |||
| acce9acb6d | |||
| 8403952fd2 | |||
| 32dcb3caef | |||
| 465ac74020 | |||
| 37b026cb9b | |||
| d1a205e2cf | |||
| 7f301790d0 | |||
| b0fe6212bb | |||
| a2ad66cd9d | |||
| 2bf00d9b3f | |||
| 43e8da2e06 | |||
| 1758bd683e | |||
| 4b7f32c287 | |||
| 24f9e16dfd | |||
| 56a29685b4 | |||
| cf949e541f | |||
| 44167800ad | |||
| 9c6574e053 | |||
| fc113315f6 | |||
| d931bfcf83 | |||
| b58627490b | |||
| dbaa82b581 | |||
| 84123cc879 | |||
| 7b1f58a06a | |||
| fe268a9dd2 | |||
| 3c25d93eae |
+73
-33
@@ -6,7 +6,7 @@ jobs:
|
||||
name: NetBSD Build, Check, and Test
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
PKGSRC_BRANCH: 2024Q1
|
||||
PKGSRC_BRANCH: 2024Q2
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build, Check, and Test
|
||||
@@ -18,13 +18,11 @@ jobs:
|
||||
usesh: true
|
||||
copyback: false
|
||||
prepare: |
|
||||
PKG_PATH="https://cdn.NetBSD.org/pub/pkgsrc/packages/NetBSD/$(uname -p)/$(uname -r | cut -d_ -f1)_${PKGSRC_BRANCH}/All" /usr/sbin/pkg_add pkgin
|
||||
pkgin -y in gmake git bash python311
|
||||
pkgin -y in libxml2 perl zstd
|
||||
/usr/sbin/pkg_add https://github.com/andreas-jonsson/llvm17-netbsd-bin/releases/download/pkgsrc-current/llvm-17.0.6.tgz
|
||||
/usr/sbin/pkg_add https://github.com/andreas-jonsson/llvm17-netbsd-bin/releases/download/pkgsrc-current/clang-17.0.6.tgz
|
||||
PKG_PATH="https://cdn.NetBSD.org/pub/pkgsrc/packages/NetBSD/amd64/$(uname -r | cut -d_ -f1)_${PKGSRC_BRANCH}/All" /usr/sbin/pkg_add pkgin
|
||||
pkgin -y in gmake git bash python311 llvm clang
|
||||
ln -s /usr/pkg/bin/python3.11 /usr/bin/python3
|
||||
run: |
|
||||
set -e -x
|
||||
git config --global --add safe.directory $(pwd)
|
||||
gmake release
|
||||
./odin version
|
||||
@@ -34,10 +32,9 @@ jobs:
|
||||
gmake -C vendor/miniaudio/src
|
||||
./odin check examples/all -vet -strict-style -disallow-do -target:netbsd_amd64
|
||||
./odin check examples/all -vet -strict-style -disallow-do -target:netbsd_arm64
|
||||
./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
|
||||
./odin test tests/core/speed.odin -file -all-packages -o:speed -define:ODIN_TEST_FANCY=false
|
||||
./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
|
||||
./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
|
||||
./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
./odin test tests/core/speed.odin -file -all-packages -vet -strict-style -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
(cd tests/issues; ./run.sh)
|
||||
build_freebsd:
|
||||
name: FreeBSD Build, Check, and Test
|
||||
@@ -63,10 +60,9 @@ jobs:
|
||||
gmake -C vendor/cgltf/src
|
||||
gmake -C vendor/miniaudio/src
|
||||
./odin check examples/all -vet -strict-style -disallow-do -target:freebsd_amd64
|
||||
./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
|
||||
./odin test tests/core/speed.odin -file -all-packages -o:speed -define:ODIN_TEST_FANCY=false
|
||||
./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
|
||||
./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
|
||||
./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
./odin test tests/core/speed.odin -file -all-packages -vet -strict-style -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
(cd tests/issues; ./run.sh)
|
||||
ci:
|
||||
strategy:
|
||||
@@ -91,13 +87,13 @@ jobs:
|
||||
- name: Download LLVM (MacOS Intel)
|
||||
if: matrix.os == 'macos-13'
|
||||
run: |
|
||||
brew install llvm@17
|
||||
brew install llvm@17 lua@5.4
|
||||
echo "/usr/local/opt/llvm@17/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Download LLVM (MacOS ARM)
|
||||
if: matrix.os == 'macos-14'
|
||||
run: |
|
||||
brew install llvm@17 wasmtime
|
||||
brew install llvm@17 wasmtime lua@5.4
|
||||
echo "/opt/homebrew/opt/llvm@17/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Build Odin
|
||||
@@ -120,15 +116,13 @@ jobs:
|
||||
- name: Odin check examples/all
|
||||
run: ./odin check examples/all -strict-style
|
||||
- name: Normal Core library tests
|
||||
run: ./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
|
||||
run: ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
- name: Optimized Core library tests
|
||||
run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -define:ODIN_TEST_FANCY=false
|
||||
run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
- name: Vendor library tests
|
||||
run: ./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
|
||||
run: ./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
- name: Internals tests
|
||||
run: ./odin test tests/internal -all-packages -define:ODIN_TEST_FANCY=false
|
||||
- name: Core library benchmarks
|
||||
run: ./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
|
||||
run: ./odin test tests/internal -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
- name: GitHub Issue tests
|
||||
run: |
|
||||
cd tests/issues
|
||||
@@ -182,37 +176,33 @@ jobs:
|
||||
shell: cmd
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
odin run examples/demo -debug
|
||||
odin run examples/demo -debug -vet -strict-style -disallow-do
|
||||
- name: Odin check examples/all
|
||||
shell: cmd
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
odin check examples/all -strict-style
|
||||
odin check examples/all -vet -strict-style -disallow-do
|
||||
- name: Core library tests
|
||||
shell: cmd
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
|
||||
odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
- name: Optimized core library tests
|
||||
shell: cmd
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
odin test tests/core/speed.odin -o:speed -file -all-packages -define:ODIN_TEST_FANCY=false
|
||||
- name: Core library benchmarks
|
||||
shell: cmd
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
|
||||
odin test tests/core/speed.odin -o:speed -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
- name: Vendor library tests
|
||||
shell: cmd
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
|
||||
copy vendor\lua\5.4\windows\*.dll .
|
||||
odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
- name: Odin internals tests
|
||||
shell: cmd
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
odin test tests/internal -all-packages -define:ODIN_TEST_FANCY=false
|
||||
odin test tests/internal -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
|
||||
- name: Odin documentation tests
|
||||
shell: cmd
|
||||
run: |
|
||||
@@ -230,3 +220,53 @@ jobs:
|
||||
run: |
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
|
||||
odin check examples/all -strict-style -target:windows_i386
|
||||
|
||||
build_linux_riscv64:
|
||||
runs-on: ubuntu-latest
|
||||
name: Linux riscv64 (emulated) Build, Check and Test
|
||||
timeout-minutes: 15
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download LLVM (Linux)
|
||||
run: |
|
||||
wget https://apt.llvm.org/llvm.sh
|
||||
chmod +x llvm.sh
|
||||
sudo ./llvm.sh 18
|
||||
echo "/usr/lib/llvm-18/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Build Odin
|
||||
run: ./build_odin.sh release
|
||||
|
||||
- name: Odin version
|
||||
run: ./odin version
|
||||
|
||||
- name: Odin report
|
||||
run: ./odin report
|
||||
|
||||
- name: Compile needed Vendor
|
||||
run: |
|
||||
make -C vendor/stb/src
|
||||
make -C vendor/cgltf/src
|
||||
make -C vendor/miniaudio/src
|
||||
|
||||
- name: Odin check
|
||||
run: ./odin check examples/all -target:linux_riscv64 -vet -strict-style -disallow-do
|
||||
|
||||
- name: Install riscv64 toolchain and qemu
|
||||
run: sudo apt-get install -y qemu-user qemu-user-static gcc-12-riscv64-linux-gnu libc6-riscv64-cross
|
||||
|
||||
- name: Odin run
|
||||
run: ./odin run examples/demo -vet -strict-style -disallow-do -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static"
|
||||
|
||||
- name: Odin run -debug
|
||||
run: ./odin run examples/demo -debug -vet -strict-style -disallow-do -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static"
|
||||
|
||||
- name: Normal Core library tests
|
||||
run: ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static"
|
||||
|
||||
- name: Optimized Core library tests
|
||||
run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static"
|
||||
|
||||
- name: Internals tests
|
||||
run: ./odin test tests/internal -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static"
|
||||
|
||||
@@ -50,8 +50,8 @@ jobs:
|
||||
run: |
|
||||
wget https://apt.llvm.org/llvm.sh
|
||||
chmod +x llvm.sh
|
||||
sudo ./llvm.sh 17
|
||||
echo "/usr/lib/llvm-17/bin" >> $GITHUB_PATH
|
||||
sudo ./llvm.sh 18
|
||||
echo "/usr/lib/llvm-18/bin" >> $GITHUB_PATH
|
||||
- name: build odin
|
||||
run: make nightly
|
||||
- name: Odin run
|
||||
@@ -61,7 +61,6 @@ jobs:
|
||||
mkdir dist
|
||||
cp odin dist
|
||||
cp LICENSE dist
|
||||
cp libLLVM* dist
|
||||
cp -r shared dist
|
||||
cp -r base dist
|
||||
cp -r core dist
|
||||
@@ -82,8 +81,8 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Download LLVM and setup PATH
|
||||
run: |
|
||||
brew install llvm@17 dylibbundler
|
||||
echo "/usr/local/opt/llvm@17/bin" >> $GITHUB_PATH
|
||||
brew install llvm@18 dylibbundler
|
||||
echo "/usr/local/opt/llvm@18/bin" >> $GITHUB_PATH
|
||||
- name: build odin
|
||||
# These -L makes the linker prioritize system libraries over LLVM libraries, this is mainly to
|
||||
# not link with libunwind bundled with LLVM but link with libunwind on the system.
|
||||
@@ -116,8 +115,8 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Download LLVM and setup PATH
|
||||
run: |
|
||||
brew install llvm@17 dylibbundler
|
||||
echo "/opt/homebrew/opt/llvm@17/bin" >> $GITHUB_PATH
|
||||
brew install llvm@18 dylibbundler
|
||||
echo "/opt/homebrew/opt/llvm@18/bin" >> $GITHUB_PATH
|
||||
- name: build odin
|
||||
# These -L makes the linker prioritize system libraries over LLVM libraries, this is mainly to
|
||||
# not link with libunwind bundled with LLVM but link with libunwind on the system.
|
||||
|
||||
+1
-34
@@ -17,45 +17,12 @@
|
||||
[Rr]eleases/
|
||||
x64/
|
||||
x86/
|
||||
!/core/simd/x86
|
||||
bld/
|
||||
[Bb]in/
|
||||
[Oo]bj/
|
||||
[Ll]og/
|
||||
![Cc]ore/[Ll]og/
|
||||
tests/documentation/verify/
|
||||
tests/documentation/all.odin-doc
|
||||
tests/internal/test_map
|
||||
tests/internal/test_pow
|
||||
tests/internal/test_rtti
|
||||
tests/core/test_base64
|
||||
tests/core/test_cbor
|
||||
tests/core/test_core_compress
|
||||
tests/core/test_core_container
|
||||
tests/core/test_core_filepath
|
||||
tests/core/test_core_fmt
|
||||
tests/core/test_core_i18n
|
||||
tests/core/test_core_image
|
||||
tests/core/test_core_libc
|
||||
tests/core/test_core_match
|
||||
tests/core/test_core_math
|
||||
tests/core/test_core_net
|
||||
tests/core/test_core_os_exit
|
||||
tests/core/test_core_reflect
|
||||
tests/core/test_core_strings
|
||||
tests/core/test_core_time
|
||||
tests/core/test_crypto
|
||||
tests/core/test_hash
|
||||
tests/core/test_hex
|
||||
tests/core/test_hxa
|
||||
tests/core/test_json
|
||||
tests/core/test_linalg_glsl_math
|
||||
tests/core/test_noise
|
||||
tests/core/test_varint
|
||||
tests/core/test_xml
|
||||
tests/core/test_core_slice
|
||||
tests/core/test_core_thread
|
||||
tests/core/test_core_runtime
|
||||
tests/vendor/vendor_botan
|
||||
# Visual Studio 2015 cache/options directory
|
||||
.vs/
|
||||
# Visual Studio Code options directory
|
||||
|
||||
BIN
Binary file not shown.
@@ -1,4 +1,4 @@
|
||||
all: debug
|
||||
all: default
|
||||
|
||||
demo:
|
||||
./odin run examples/demo/demo.odin -file
|
||||
@@ -6,12 +6,18 @@ demo:
|
||||
report:
|
||||
./odin report
|
||||
|
||||
default:
|
||||
PROGRAM=make ./build_odin.sh # debug
|
||||
|
||||
debug:
|
||||
./build_odin.sh debug
|
||||
|
||||
release:
|
||||
./build_odin.sh release
|
||||
|
||||
release-native:
|
||||
./build_odin.sh release-native
|
||||
|
||||
release_native:
|
||||
./build_odin.sh release-native
|
||||
|
||||
|
||||
@@ -76,9 +76,9 @@ Answers to common questions about Odin.
|
||||
|
||||
Documentation for all the official packages part of the [core](https://pkg.odin-lang.org/core/) and [vendor](https://pkg.odin-lang.org/vendor/) library collections.
|
||||
|
||||
#### [The Odin Wiki](https://github.com/odin-lang/Odin/wiki)
|
||||
#### [Odin Documentation](https://odin-lang.org/docs/)
|
||||
|
||||
A wiki maintained by the Odin community.
|
||||
Documentation for the Odin language itself.
|
||||
|
||||
#### [Odin Discord](https://discord.gg/sVBPHEv)
|
||||
|
||||
|
||||
@@ -38,9 +38,12 @@ count_leading_zeros :: proc(x: $T) -> T where type_is_integer(T) || type_is_sim
|
||||
reverse_bits :: proc(x: $T) -> T where type_is_integer(T) || type_is_simd_vector(T) ---
|
||||
byte_swap :: proc(x: $T) -> T where type_is_integer(T) || type_is_float(T) ---
|
||||
|
||||
overflow_add :: proc(lhs, rhs: $T) -> (T, bool) ---
|
||||
overflow_sub :: proc(lhs, rhs: $T) -> (T, bool) ---
|
||||
overflow_mul :: proc(lhs, rhs: $T) -> (T, bool) ---
|
||||
overflow_add :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #optional_ok ---
|
||||
overflow_sub :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #optional_ok ---
|
||||
overflow_mul :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #optional_ok ---
|
||||
|
||||
saturating_add :: proc(lhs, rhs: $T) -> T where type_is_integer(T) ---
|
||||
saturating_sub :: proc(lhs, rhs: $T) -> T where type_is_integer(T) ---
|
||||
|
||||
sqrt :: proc(x: $T) -> T where type_is_float(T) || (type_is_simd_vector(T) && type_is_float(type_elem_type(T))) ---
|
||||
|
||||
@@ -216,14 +219,21 @@ type_map_cell_info :: proc($T: typeid) -> ^runtime.Map_Cell_Info ---
|
||||
type_convert_variants_to_pointers :: proc($T: typeid) -> typeid where type_is_union(T) ---
|
||||
type_merge :: proc($U, $V: typeid) -> typeid where type_is_union(U), type_is_union(V) ---
|
||||
|
||||
type_has_shared_fields :: proc($U, $V: typeid) -> bool typeid where type_is_struct(U), type_is_struct(V) ---
|
||||
|
||||
constant_utf16_cstring :: proc($literal: string) -> [^]u16 ---
|
||||
|
||||
constant_log2 :: proc($v: $T) -> T where type_is_integer(T) ---
|
||||
|
||||
// SIMD related
|
||||
simd_add :: proc(a, b: #simd[N]T) -> #simd[N]T ---
|
||||
simd_sub :: proc(a, b: #simd[N]T) -> #simd[N]T ---
|
||||
simd_mul :: proc(a, b: #simd[N]T) -> #simd[N]T ---
|
||||
simd_div :: proc(a, b: #simd[N]T) -> #simd[N]T where type_is_float(T) ---
|
||||
|
||||
simd_saturating_add :: proc(a, b: #simd[N]T) -> #simd[N]T where type_is_integer(T) ---
|
||||
simd_saturating_sub :: proc(a, b: #simd[N]T) -> #simd[N]T where type_is_integer(T) ---
|
||||
|
||||
// Keeps Odin's Behaviour
|
||||
// (x << y) if y <= mask else 0
|
||||
simd_shl :: proc(a: #simd[N]T, b: #simd[N]Unsigned_Integer) -> #simd[N]T ---
|
||||
@@ -234,9 +244,6 @@ simd_shr :: proc(a: #simd[N]T, b: #simd[N]Unsigned_Integer) -> #simd[N]T ---
|
||||
simd_shl_masked :: proc(a: #simd[N]T, b: #simd[N]Unsigned_Integer) -> #simd[N]T ---
|
||||
simd_shr_masked :: proc(a: #simd[N]T, b: #simd[N]Unsigned_Integer) -> #simd[N]T ---
|
||||
|
||||
simd_add_sat :: proc(a, b: #simd[N]T) -> #simd[N]T ---
|
||||
simd_sub_sat :: proc(a, b: #simd[N]T) -> #simd[N]T ---
|
||||
|
||||
simd_bit_and :: proc(a, b: #simd[N]T) -> #simd[N]T ---
|
||||
simd_bit_or :: proc(a, b: #simd[N]T) -> #simd[N]T ---
|
||||
simd_bit_xor :: proc(a, b: #simd[N]T) -> #simd[N]T ---
|
||||
@@ -265,13 +272,28 @@ simd_lanes_ge :: proc(a, b: #simd[N]T) -> #simd[N]Integer ---
|
||||
simd_extract :: proc(a: #simd[N]T, idx: uint) -> T ---
|
||||
simd_replace :: proc(a: #simd[N]T, idx: uint, elem: T) -> #simd[N]T ---
|
||||
|
||||
simd_reduce_add_ordered :: proc(a: #simd[N]T) -> T ---
|
||||
simd_reduce_mul_ordered :: proc(a: #simd[N]T) -> T ---
|
||||
simd_reduce_min :: proc(a: #simd[N]T) -> T ---
|
||||
simd_reduce_max :: proc(a: #simd[N]T) -> T ---
|
||||
simd_reduce_and :: proc(a: #simd[N]T) -> T ---
|
||||
simd_reduce_or :: proc(a: #simd[N]T) -> T ---
|
||||
simd_reduce_xor :: proc(a: #simd[N]T) -> T ---
|
||||
simd_reduce_add_ordered :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
|
||||
simd_reduce_mul_ordered :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
|
||||
simd_reduce_min :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
|
||||
simd_reduce_max :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
|
||||
simd_reduce_and :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
|
||||
simd_reduce_or :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
|
||||
simd_reduce_xor :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
|
||||
|
||||
simd_reduce_any :: proc(a: #simd[N]T) -> T where type_is_boolean(T) ---
|
||||
simd_reduce_all :: proc(a: #simd[N]T) -> T where type_is_boolean(T) ---
|
||||
|
||||
|
||||
simd_gather :: proc(ptr: #simd[N]rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) ---
|
||||
simd_scatter :: proc(ptr: #simd[N]rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) ---
|
||||
|
||||
simd_masked_load :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) ---
|
||||
simd_masked_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) ---
|
||||
|
||||
simd_masked_expand_load :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) ---
|
||||
simd_masked_compress_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) ---
|
||||
|
||||
|
||||
|
||||
simd_shuffle :: proc(a, b: #simd[N]T, indices: ..int) -> #simd[len(indices)]T ---
|
||||
simd_select :: proc(cond: #simd[N]boolean_or_integer, true, false: #simd[N]T) -> #simd[N]T ---
|
||||
@@ -285,11 +307,11 @@ simd_nearest :: proc(a: #simd[N]any_float) -> #simd[N]any_float ---
|
||||
|
||||
simd_to_bits :: proc(v: #simd[N]T) -> #simd[N]Integer where size_of(T) == size_of(Integer), type_is_unsigned(Integer) ---
|
||||
|
||||
// equivalent a swizzle with descending indices, e.g. reserve(a, 3, 2, 1, 0)
|
||||
simd_reverse :: proc(a: #simd[N]T) -> #simd[N]T ---
|
||||
// equivalent to a swizzle with descending indices, e.g. reserve(a, 3, 2, 1, 0)
|
||||
simd_lanes_reverse :: proc(a: #simd[N]T) -> #simd[N]T ---
|
||||
|
||||
simd_rotate_left :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
|
||||
simd_rotate_right :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
|
||||
simd_lanes_rotate_left :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
|
||||
simd_lanes_rotate_right :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
|
||||
|
||||
// Checks if the current target supports the given target features.
|
||||
//
|
||||
|
||||
+69
-23
@@ -66,7 +66,7 @@ Type_Info_Named :: struct {
|
||||
name: string,
|
||||
base: ^Type_Info,
|
||||
pkg: string,
|
||||
loc: Source_Code_Location,
|
||||
loc: ^Source_Code_Location,
|
||||
}
|
||||
Type_Info_Integer :: struct {signed: bool, endianness: Platform_Endianness}
|
||||
Type_Info_Rune :: struct {}
|
||||
@@ -112,23 +112,32 @@ Type_Info_Parameters :: struct { // Only used for procedures parameters and resu
|
||||
}
|
||||
Type_Info_Tuple :: Type_Info_Parameters // Will be removed eventually
|
||||
|
||||
Type_Info_Struct :: struct {
|
||||
types: []^Type_Info,
|
||||
names: []string,
|
||||
offsets: []uintptr,
|
||||
usings: []bool,
|
||||
tags: []string,
|
||||
is_packed: bool,
|
||||
is_raw_union: bool,
|
||||
is_no_copy: bool,
|
||||
custom_align: bool,
|
||||
Type_Info_Struct_Flags :: distinct bit_set[Type_Info_Struct_Flag; u8]
|
||||
Type_Info_Struct_Flag :: enum u8 {
|
||||
packed = 0,
|
||||
raw_union = 1,
|
||||
no_copy = 2,
|
||||
align = 3,
|
||||
}
|
||||
|
||||
equal: Equal_Proc, // set only when the struct has .Comparable set but does not have .Simple_Compare set
|
||||
Type_Info_Struct :: struct {
|
||||
// Slice these with `field_count`
|
||||
types: [^]^Type_Info `fmt:"v,field_count"`,
|
||||
names: [^]string `fmt:"v,field_count"`,
|
||||
offsets: [^]uintptr `fmt:"v,field_count"`,
|
||||
usings: [^]bool `fmt:"v,field_count"`,
|
||||
tags: [^]string `fmt:"v,field_count"`,
|
||||
|
||||
field_count: i32,
|
||||
|
||||
flags: Type_Info_Struct_Flags,
|
||||
|
||||
// These are only set iff this structure is an SOA structure
|
||||
soa_kind: Type_Info_Struct_Soa_Kind,
|
||||
soa_len: i32,
|
||||
soa_base_type: ^Type_Info,
|
||||
soa_len: int,
|
||||
|
||||
equal: Equal_Proc, // set only when the struct has .Comparable set but does not have .Simple_Compare set
|
||||
}
|
||||
Type_Info_Union :: struct {
|
||||
variants: []^Type_Info,
|
||||
@@ -142,9 +151,9 @@ Type_Info_Union :: struct {
|
||||
shared_nil: bool,
|
||||
}
|
||||
Type_Info_Enum :: struct {
|
||||
base: ^Type_Info,
|
||||
names: []string,
|
||||
values: []Type_Info_Enum_Value,
|
||||
base: ^Type_Info,
|
||||
names: []string,
|
||||
values: []Type_Info_Enum_Value,
|
||||
}
|
||||
Type_Info_Map :: struct {
|
||||
key: ^Type_Info,
|
||||
@@ -187,11 +196,12 @@ Type_Info_Soa_Pointer :: struct {
|
||||
}
|
||||
Type_Info_Bit_Field :: struct {
|
||||
backing_type: ^Type_Info,
|
||||
names: []string,
|
||||
types: []^Type_Info,
|
||||
bit_sizes: []uintptr,
|
||||
bit_offsets: []uintptr,
|
||||
tags: []string,
|
||||
names: [^]string `fmt:"v,field_count"`,
|
||||
types: [^]^Type_Info `fmt:"v,field_count"`,
|
||||
bit_sizes: [^]uintptr `fmt:"v,field_count"`,
|
||||
bit_offsets: [^]uintptr `fmt:"v,field_count"`,
|
||||
tags: [^]string `fmt:"v,field_count"`,
|
||||
field_count: int,
|
||||
}
|
||||
|
||||
Type_Info_Flag :: enum u8 {
|
||||
@@ -299,6 +309,8 @@ when ODIN_OS == .Windows {
|
||||
Thread_Detach = 3,
|
||||
}
|
||||
dll_forward_reason: DLL_Forward_Reason
|
||||
|
||||
dll_instance: rawptr
|
||||
}
|
||||
|
||||
// IMPORTANT NOTE(bill): Must be in this order (as the compiler relies upon it)
|
||||
@@ -513,11 +525,12 @@ Raw_Quaternion256_Vector_Scalar :: struct {vector: [3]f64, scalar: f64}
|
||||
Linux,
|
||||
Essence,
|
||||
FreeBSD,
|
||||
Haiku,
|
||||
OpenBSD,
|
||||
NetBSD,
|
||||
Haiku,
|
||||
WASI,
|
||||
JS,
|
||||
Orca,
|
||||
Freestanding,
|
||||
}
|
||||
*/
|
||||
@@ -533,10 +546,23 @@ Odin_OS_Type :: type_of(ODIN_OS)
|
||||
arm64,
|
||||
wasm32,
|
||||
wasm64p32,
|
||||
riscv64,
|
||||
}
|
||||
*/
|
||||
Odin_Arch_Type :: type_of(ODIN_ARCH)
|
||||
|
||||
Odin_Arch_Types :: bit_set[Odin_Arch_Type]
|
||||
|
||||
ALL_ODIN_ARCH_TYPES :: Odin_Arch_Types{
|
||||
.amd64,
|
||||
.i386,
|
||||
.arm32,
|
||||
.arm64,
|
||||
.wasm32,
|
||||
.wasm64p32,
|
||||
.riscv64,
|
||||
}
|
||||
|
||||
/*
|
||||
// Defined internally by the compiler
|
||||
Odin_Build_Mode_Type :: enum int {
|
||||
@@ -560,6 +586,22 @@ Odin_Build_Mode_Type :: type_of(ODIN_BUILD_MODE)
|
||||
*/
|
||||
Odin_Endian_Type :: type_of(ODIN_ENDIAN)
|
||||
|
||||
Odin_OS_Types :: bit_set[Odin_OS_Type]
|
||||
|
||||
ALL_ODIN_OS_TYPES :: Odin_OS_Types{
|
||||
.Windows,
|
||||
.Darwin,
|
||||
.Linux,
|
||||
.Essence,
|
||||
.FreeBSD,
|
||||
.OpenBSD,
|
||||
.NetBSD,
|
||||
.Haiku,
|
||||
.WASI,
|
||||
.JS,
|
||||
.Orca,
|
||||
.Freestanding,
|
||||
}
|
||||
|
||||
/*
|
||||
// Defined internally by the compiler
|
||||
@@ -577,7 +619,7 @@ Odin_Platform_Subtarget_Type :: type_of(ODIN_PLATFORM_SUBTARGET)
|
||||
Memory = 1,
|
||||
Thread = 2,
|
||||
}
|
||||
Odin_Sanitizer_Flags :: distinct bitset[Odin_Sanitizer_Flag; u32]
|
||||
Odin_Sanitizer_Flags :: distinct bit_set[Odin_Sanitizer_Flag; u32]
|
||||
|
||||
ODIN_SANITIZER_FLAGS // is a constant
|
||||
*/
|
||||
@@ -737,6 +779,10 @@ __init_context :: proc "contextless" (c: ^Context) {
|
||||
}
|
||||
|
||||
default_assertion_failure_proc :: proc(prefix, message: string, loc: Source_Code_Location) -> ! {
|
||||
default_assertion_contextless_failure_proc(prefix, message, loc)
|
||||
}
|
||||
|
||||
default_assertion_contextless_failure_proc :: proc "contextless" (prefix, message: string, loc: Source_Code_Location) -> ! {
|
||||
when ODIN_OS == .Freestanding {
|
||||
// Do nothing
|
||||
} else {
|
||||
|
||||
+135
-103
@@ -333,17 +333,24 @@ make_dynamic_array_len :: proc($T: typeid/[dynamic]$E, #any_int len: int, alloca
|
||||
// Note: Prefer using the procedure group `make`.
|
||||
@(builtin, require_results)
|
||||
make_dynamic_array_len_cap :: proc($T: typeid/[dynamic]$E, #any_int len: int, #any_int cap: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
|
||||
make_dynamic_array_error_loc(loc, len, cap)
|
||||
array.allocator = allocator // initialize allocator before just in case it fails to allocate any memory
|
||||
data := mem_alloc_bytes(size_of(E)*cap, align_of(E), allocator, loc) or_return
|
||||
s := Raw_Dynamic_Array{raw_data(data), len, cap, allocator}
|
||||
if data == nil && size_of(E) != 0 {
|
||||
s.len, s.cap = 0, 0
|
||||
}
|
||||
array = transmute(T)s
|
||||
err = _make_dynamic_array_len_cap((^Raw_Dynamic_Array)(&array), size_of(E), align_of(E), len, cap, allocator, loc)
|
||||
return
|
||||
}
|
||||
// `make_map` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
|
||||
|
||||
@(require_results)
|
||||
_make_dynamic_array_len_cap :: proc(array: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, #any_int len: int, #any_int cap: int, allocator := context.allocator, loc := #caller_location) -> (err: Allocator_Error) {
|
||||
make_dynamic_array_error_loc(loc, len, cap)
|
||||
array.allocator = allocator // initialize allocator before just in case it fails to allocate any memory
|
||||
data := mem_alloc_bytes(size_of_elem*cap, align_of_elem, allocator, loc) or_return
|
||||
use_zero := data == nil && size_of_elem != 0
|
||||
array.data = raw_data(data)
|
||||
array.len = 0 if use_zero else len
|
||||
array.cap = 0 if use_zero else cap
|
||||
array.allocator = allocator
|
||||
return
|
||||
}
|
||||
|
||||
// `make_map` allocates and initializes a map. Like `new`, the first argument is a type, not a value.
|
||||
// Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
|
||||
//
|
||||
// Note: Prefer using the procedure group `make`.
|
||||
@@ -355,7 +362,7 @@ make_map :: proc($T: typeid/map[$K]$E, #any_int capacity: int = 1<<MAP_MIN_LOG2_
|
||||
err = reserve_map(&m, capacity, loc)
|
||||
return
|
||||
}
|
||||
// `make_multi_pointer` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
|
||||
// `make_multi_pointer` allocates and initializes a multi-pointer. Like `new`, the first argument is a type, not a value.
|
||||
// Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
|
||||
//
|
||||
// This is "similar" to doing `raw_data(make([]E, len, allocator))`.
|
||||
@@ -440,107 +447,103 @@ delete_key :: proc(m: ^$T/map[$K]$V, key: K) -> (deleted_key: K, deleted_value:
|
||||
return
|
||||
}
|
||||
|
||||
_append_elem :: #force_inline proc(array: ^$T/[dynamic]$E, arg: E, should_zero: bool, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
|
||||
_append_elem :: #force_inline proc(array: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, arg_ptr: rawptr, should_zero: bool, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
|
||||
if array == nil {
|
||||
return 0, nil
|
||||
return
|
||||
}
|
||||
when size_of(E) == 0 {
|
||||
array := (^Raw_Dynamic_Array)(array)
|
||||
array.len += 1
|
||||
return 1, nil
|
||||
} else {
|
||||
if cap(array) < len(array)+1 {
|
||||
// Same behavior as _append_elems but there's only one arg, so we always just add DEFAULT_DYNAMIC_ARRAY_CAPACITY.
|
||||
cap := 2 * cap(array) + DEFAULT_DYNAMIC_ARRAY_CAPACITY
|
||||
|
||||
// do not 'or_return' here as it could be a partial success
|
||||
if should_zero {
|
||||
err = reserve(array, cap, loc)
|
||||
} else {
|
||||
err = non_zero_reserve(array, cap, loc)
|
||||
}
|
||||
}
|
||||
if cap(array)-len(array) > 0 {
|
||||
a := (^Raw_Dynamic_Array)(array)
|
||||
when size_of(E) != 0 {
|
||||
data := ([^]E)(a.data)
|
||||
assert(data != nil, loc=loc)
|
||||
data[a.len] = arg
|
||||
}
|
||||
a.len += 1
|
||||
return 1, err
|
||||
}
|
||||
return 0, err
|
||||
if array.cap < array.len+1 {
|
||||
// Same behavior as _append_elems but there's only one arg, so we always just add DEFAULT_DYNAMIC_ARRAY_CAPACITY.
|
||||
cap := 2 * array.cap + DEFAULT_DYNAMIC_ARRAY_CAPACITY
|
||||
|
||||
// do not 'or_return' here as it could be a partial success
|
||||
err = _reserve_dynamic_array(array, size_of_elem, align_of_elem, cap, should_zero, loc)
|
||||
}
|
||||
if array.cap-array.len > 0 {
|
||||
data := ([^]byte)(array.data)
|
||||
assert(data != nil, loc=loc)
|
||||
data = data[array.len*size_of_elem:]
|
||||
intrinsics.mem_copy_non_overlapping(data, arg_ptr, size_of_elem)
|
||||
array.len += 1
|
||||
n = 1
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@builtin
|
||||
append_elem :: proc(array: ^$T/[dynamic]$E, #no_broadcast arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
|
||||
return _append_elem(array, arg, true, loc=loc)
|
||||
when size_of(E) == 0 {
|
||||
(^Raw_Dynamic_Array)(array).len += 1
|
||||
return 1, nil
|
||||
} else {
|
||||
arg := arg
|
||||
return _append_elem((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), &arg, true, loc=loc)
|
||||
}
|
||||
}
|
||||
|
||||
@builtin
|
||||
non_zero_append_elem :: proc(array: ^$T/[dynamic]$E, #no_broadcast arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
|
||||
return _append_elem(array, arg, false, loc=loc)
|
||||
when size_of(E) == 0 {
|
||||
(^Raw_Dynamic_Array)(array).len += 1
|
||||
return 1, nil
|
||||
} else {
|
||||
arg := arg
|
||||
return _append_elem((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), &arg, false, loc=loc)
|
||||
}
|
||||
}
|
||||
|
||||
_append_elems :: #force_inline proc(array: ^$T/[dynamic]$E, should_zero: bool, loc := #caller_location, args: ..E) -> (n: int, err: Allocator_Error) #optional_allocator_error {
|
||||
_append_elems :: #force_inline proc(array: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, should_zero: bool, loc := #caller_location, args: rawptr, arg_len: int) -> (n: int, err: Allocator_Error) #optional_allocator_error {
|
||||
if array == nil {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
arg_len := len(args)
|
||||
if arg_len <= 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
when size_of(E) == 0 {
|
||||
array := (^Raw_Dynamic_Array)(array)
|
||||
array.len += arg_len
|
||||
return arg_len, nil
|
||||
} else {
|
||||
if cap(array) < len(array)+arg_len {
|
||||
cap := 2 * cap(array) + max(DEFAULT_DYNAMIC_ARRAY_CAPACITY, arg_len)
|
||||
if array.cap < array.len+arg_len {
|
||||
cap := 2 * array.cap + max(DEFAULT_DYNAMIC_ARRAY_CAPACITY, arg_len)
|
||||
|
||||
// do not 'or_return' here as it could be a partial success
|
||||
if should_zero {
|
||||
err = reserve(array, cap, loc)
|
||||
} else {
|
||||
err = non_zero_reserve(array, cap, loc)
|
||||
}
|
||||
}
|
||||
arg_len = min(cap(array)-len(array), arg_len)
|
||||
if arg_len > 0 {
|
||||
a := (^Raw_Dynamic_Array)(array)
|
||||
when size_of(E) != 0 {
|
||||
data := ([^]E)(a.data)
|
||||
assert(data != nil, loc=loc)
|
||||
intrinsics.mem_copy(&data[a.len], raw_data(args), size_of(E) * arg_len)
|
||||
}
|
||||
a.len += arg_len
|
||||
}
|
||||
return arg_len, err
|
||||
// do not 'or_return' here as it could be a partial success
|
||||
err = _reserve_dynamic_array(array, size_of_elem, align_of_elem, cap, should_zero, loc)
|
||||
}
|
||||
arg_len := arg_len
|
||||
arg_len = min(array.cap-array.len, arg_len)
|
||||
if arg_len > 0 {
|
||||
data := ([^]byte)(array.data)
|
||||
assert(data != nil, loc=loc)
|
||||
data = data[array.len*size_of_elem:]
|
||||
intrinsics.mem_copy(data, args, size_of_elem * arg_len) // must be mem_copy (overlapping)
|
||||
array.len += arg_len
|
||||
}
|
||||
return arg_len, err
|
||||
}
|
||||
|
||||
@builtin
|
||||
append_elems :: proc(array: ^$T/[dynamic]$E, #no_broadcast args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
|
||||
return _append_elems(array, true, loc, ..args)
|
||||
when size_of(E) == 0 {
|
||||
a := (^Raw_Dynamic_Array)(array)
|
||||
a.len += len(args)
|
||||
return len(args), nil
|
||||
} else {
|
||||
return _append_elems((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), true, loc, raw_data(args), len(args))
|
||||
}
|
||||
}
|
||||
|
||||
@builtin
|
||||
non_zero_append_elems :: proc(array: ^$T/[dynamic]$E, #no_broadcast args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
|
||||
return _append_elems(array, false, loc, ..args)
|
||||
when size_of(E) == 0 {
|
||||
a := (^Raw_Dynamic_Array)(array)
|
||||
a.len += len(args)
|
||||
return len(args), nil
|
||||
} else {
|
||||
return _append_elems((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), false, loc, raw_data(args), len(args))
|
||||
}
|
||||
}
|
||||
|
||||
// The append_string built-in procedure appends a string to the end of a [dynamic]u8 like type
|
||||
_append_elem_string :: proc(array: ^$T/[dynamic]$E/u8, arg: $A/string, should_zero: bool, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
|
||||
args := transmute([]E)arg
|
||||
if should_zero {
|
||||
return append_elems(array, ..args, loc=loc)
|
||||
} else {
|
||||
return non_zero_append_elems(array, ..args, loc=loc)
|
||||
}
|
||||
return _append_elems((^Raw_Dynamic_Array)(array), 1, 1, should_zero, loc, raw_data(arg), len(arg))
|
||||
}
|
||||
|
||||
@builtin
|
||||
@@ -679,7 +682,7 @@ assign_at_elem :: proc(array: ^$T/[dynamic]$E, index: int, arg: E, loc := #calle
|
||||
|
||||
|
||||
@builtin
|
||||
assign_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
|
||||
assign_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, #no_broadcast args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
|
||||
new_size := index + len(args)
|
||||
if len(args) == 0 {
|
||||
ok = true
|
||||
@@ -729,11 +732,10 @@ clear_dynamic_array :: proc "contextless" (array: ^$T/[dynamic]$E) {
|
||||
// `reserve_dynamic_array` will try to reserve memory of a passed dynamic array or map to the requested element count (setting the `cap`).
|
||||
//
|
||||
// Note: Prefer the procedure group `reserve`.
|
||||
_reserve_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, capacity: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
|
||||
if array == nil {
|
||||
_reserve_dynamic_array :: #force_inline proc(a: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, capacity: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
|
||||
if a == nil {
|
||||
return nil
|
||||
}
|
||||
a := (^Raw_Dynamic_Array)(array)
|
||||
|
||||
if capacity <= a.cap {
|
||||
return nil
|
||||
@@ -744,15 +746,15 @@ _reserve_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, capacity: i
|
||||
}
|
||||
assert(a.allocator.procedure != nil)
|
||||
|
||||
old_size := a.cap * size_of(E)
|
||||
new_size := capacity * size_of(E)
|
||||
old_size := a.cap * size_of_elem
|
||||
new_size := capacity * size_of_elem
|
||||
allocator := a.allocator
|
||||
|
||||
new_data: []byte
|
||||
if should_zero {
|
||||
new_data = mem_resize(a.data, old_size, new_size, align_of(E), allocator, loc) or_return
|
||||
new_data = mem_resize(a.data, old_size, new_size, align_of_elem, allocator, loc) or_return
|
||||
} else {
|
||||
new_data = non_zero_mem_resize(a.data, old_size, new_size, align_of(E), allocator, loc) or_return
|
||||
new_data = non_zero_mem_resize(a.data, old_size, new_size, align_of_elem, allocator, loc) or_return
|
||||
}
|
||||
if new_data == nil && new_size > 0 {
|
||||
return .Out_Of_Memory
|
||||
@@ -765,26 +767,23 @@ _reserve_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, capacity: i
|
||||
|
||||
@builtin
|
||||
reserve_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int capacity: int, loc := #caller_location) -> Allocator_Error {
|
||||
return _reserve_dynamic_array(array, capacity, true, loc)
|
||||
return _reserve_dynamic_array((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), capacity, true, loc)
|
||||
}
|
||||
|
||||
@builtin
|
||||
non_zero_reserve_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int capacity: int, loc := #caller_location) -> Allocator_Error {
|
||||
return _reserve_dynamic_array(array, capacity, false, loc)
|
||||
return _reserve_dynamic_array((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), capacity, false, loc)
|
||||
}
|
||||
|
||||
// `resize_dynamic_array` will try to resize memory of a passed dynamic array or map to the requested element count (setting the `len`, and possibly `cap`).
|
||||
//
|
||||
// Note: Prefer the procedure group `resize`
|
||||
_resize_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, length: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
|
||||
if array == nil {
|
||||
|
||||
_resize_dynamic_array :: #force_inline proc(a: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, length: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
|
||||
if a == nil {
|
||||
return nil
|
||||
}
|
||||
a := (^Raw_Dynamic_Array)(array)
|
||||
|
||||
if length <= a.cap {
|
||||
if should_zero && a.len < length {
|
||||
intrinsics.mem_zero(([^]E)(a.data)[a.len:], (length-a.len)*size_of(E))
|
||||
intrinsics.mem_zero(([^]byte)(a.data)[a.len*size_of_elem:], (length-a.len)*size_of_elem)
|
||||
}
|
||||
a.len = max(length, 0)
|
||||
return nil
|
||||
@@ -795,15 +794,15 @@ _resize_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, length: int,
|
||||
}
|
||||
assert(a.allocator.procedure != nil)
|
||||
|
||||
old_size := a.cap * size_of(E)
|
||||
new_size := length * size_of(E)
|
||||
old_size := a.cap * size_of_elem
|
||||
new_size := length * size_of_elem
|
||||
allocator := a.allocator
|
||||
|
||||
new_data : []byte
|
||||
if should_zero {
|
||||
new_data = mem_resize(a.data, old_size, new_size, align_of(E), allocator, loc) or_return
|
||||
new_data = mem_resize(a.data, old_size, new_size, align_of_elem, allocator, loc) or_return
|
||||
} else {
|
||||
new_data = non_zero_mem_resize(a.data, old_size, new_size, align_of(E), allocator, loc) or_return
|
||||
new_data = non_zero_mem_resize(a.data, old_size, new_size, align_of_elem, allocator, loc) or_return
|
||||
}
|
||||
if new_data == nil && new_size > 0 {
|
||||
return .Out_Of_Memory
|
||||
@@ -815,14 +814,17 @@ _resize_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, length: int,
|
||||
return nil
|
||||
}
|
||||
|
||||
// `resize_dynamic_array` will try to resize memory of a passed dynamic array or map to the requested element count (setting the `len`, and possibly `cap`).
|
||||
//
|
||||
// Note: Prefer the procedure group `resize`
|
||||
@builtin
|
||||
resize_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int length: int, loc := #caller_location) -> Allocator_Error {
|
||||
return _resize_dynamic_array(array, length, true, loc=loc)
|
||||
return _resize_dynamic_array((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), length, true, loc=loc)
|
||||
}
|
||||
|
||||
@builtin
|
||||
non_zero_resize_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int length: int, loc := #caller_location) -> Allocator_Error {
|
||||
return _resize_dynamic_array(array, length, false, loc=loc)
|
||||
return _resize_dynamic_array((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), length, false, loc=loc)
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -837,10 +839,13 @@ non_zero_resize_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int length: i
|
||||
Note: Prefer the procedure group `shrink`
|
||||
*/
|
||||
shrink_dynamic_array :: proc(array: ^$T/[dynamic]$E, new_cap := -1, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
|
||||
if array == nil {
|
||||
return _shrink_dynamic_array((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), new_cap, loc)
|
||||
}
|
||||
|
||||
_shrink_dynamic_array :: proc(a: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, new_cap := -1, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
|
||||
if a == nil {
|
||||
return
|
||||
}
|
||||
a := (^Raw_Dynamic_Array)(array)
|
||||
|
||||
new_cap := new_cap if new_cap >= 0 else a.len
|
||||
|
||||
@@ -853,10 +858,10 @@ shrink_dynamic_array :: proc(array: ^$T/[dynamic]$E, new_cap := -1, loc := #call
|
||||
}
|
||||
assert(a.allocator.procedure != nil)
|
||||
|
||||
old_size := a.cap * size_of(E)
|
||||
new_size := new_cap * size_of(E)
|
||||
old_size := a.cap * size_of_elem
|
||||
new_size := new_cap * size_of_elem
|
||||
|
||||
new_data := mem_resize(a.data, old_size, new_size, align_of(E), a.allocator, loc) or_return
|
||||
new_data := mem_resize(a.data, old_size, new_size, align_of_elem, a.allocator, loc) or_return
|
||||
|
||||
a.data = raw_data(new_data)
|
||||
a.len = min(new_cap, a.len)
|
||||
@@ -943,3 +948,30 @@ unimplemented :: proc(message := "", loc := #caller_location) -> ! {
|
||||
}
|
||||
p("not yet implemented", message, loc)
|
||||
}
|
||||
|
||||
|
||||
@builtin
|
||||
@(disabled=ODIN_DISABLE_ASSERT)
|
||||
assert_contextless :: proc "contextless" (condition: bool, message := "", loc := #caller_location) {
|
||||
if !condition {
|
||||
// NOTE(bill): This is wrapped in a procedure call
|
||||
// to improve performance to make the CPU not
|
||||
// execute speculatively, making it about an order of
|
||||
// magnitude faster
|
||||
@(cold)
|
||||
internal :: proc "contextless" (message: string, loc: Source_Code_Location) {
|
||||
default_assertion_contextless_failure_proc("runtime assertion", message, loc)
|
||||
}
|
||||
internal(message, loc)
|
||||
}
|
||||
}
|
||||
|
||||
@builtin
|
||||
panic_contextless :: proc "contextless" (message: string, loc := #caller_location) -> ! {
|
||||
default_assertion_contextless_failure_proc("panic", message, loc)
|
||||
}
|
||||
|
||||
@builtin
|
||||
unimplemented_contextless :: proc "contextless" (message := "", loc := #caller_location) -> ! {
|
||||
default_assertion_contextless_failure_proc("not yet implemented", message, loc)
|
||||
}
|
||||
|
||||
@@ -352,7 +352,7 @@ non_zero_append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, #no_broadcast args
|
||||
}
|
||||
|
||||
|
||||
_append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, zero_memory: bool, #no_broadcast args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
|
||||
_append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, zero_memory: bool, #no_broadcast args: []E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
|
||||
if array == nil {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
package runtime
|
||||
|
||||
nil_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
|
||||
size, alignment: int,
|
||||
old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
|
||||
size, alignment: int,
|
||||
old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
|
||||
switch mode {
|
||||
case .Alloc, .Alloc_Non_Zeroed:
|
||||
return nil, .Out_Of_Memory
|
||||
|
||||
@@ -129,7 +129,7 @@ arena_alloc :: proc(arena: ^Arena, size, alignment: uint, loc := #caller_locatio
|
||||
return
|
||||
}
|
||||
|
||||
// `arena_init` will initialize the arena with a usuable block.
|
||||
// `arena_init` will initialize the arena with a usable block.
|
||||
// This procedure is not necessary to use the Arena as the default zero as `arena_alloc` will set things up if necessary
|
||||
@(require_results)
|
||||
arena_init :: proc(arena: ^Arena, size: uint, backing_allocator: Allocator, loc := #caller_location) -> Allocator_Error {
|
||||
|
||||
@@ -577,7 +577,7 @@ map_grow_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Inf
|
||||
|
||||
|
||||
@(require_results)
|
||||
map_reserve_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, new_capacity: uintptr, loc := #caller_location) -> Allocator_Error {
|
||||
map_reserve_dynamic :: #force_no_inline proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, new_capacity: uintptr, loc := #caller_location) -> Allocator_Error {
|
||||
@(require_results)
|
||||
ceil_log2 :: #force_inline proc "contextless" (x: uintptr) -> uintptr {
|
||||
z := intrinsics.count_leading_zeros(x)
|
||||
@@ -641,7 +641,7 @@ map_reserve_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_
|
||||
|
||||
|
||||
@(require_results)
|
||||
map_shrink_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
|
||||
map_shrink_dynamic :: #force_no_inline proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
|
||||
if m.allocator.procedure == nil {
|
||||
m.allocator = context.allocator
|
||||
}
|
||||
@@ -688,7 +688,7 @@ map_shrink_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_I
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
map_free_dynamic :: proc "odin" (m: Raw_Map, info: ^Map_Info, loc := #caller_location) -> Allocator_Error {
|
||||
map_free_dynamic :: #force_no_inline proc "odin" (m: Raw_Map, info: ^Map_Info, loc := #caller_location) -> Allocator_Error {
|
||||
ptr := rawptr(map_data(m))
|
||||
size := int(map_total_allocation_size(uintptr(map_cap(m)), info))
|
||||
err := mem_free_with_size(ptr, size, m.allocator, loc)
|
||||
@@ -700,7 +700,7 @@ map_free_dynamic :: proc "odin" (m: Raw_Map, info: ^Map_Info, loc := #caller_loc
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
map_lookup_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (index: uintptr, ok: bool) {
|
||||
map_lookup_dynamic :: #force_no_inline proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (index: uintptr, ok: bool) {
|
||||
if map_len(m) == 0 {
|
||||
return 0, false
|
||||
}
|
||||
@@ -723,7 +723,7 @@ map_lookup_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info,
|
||||
}
|
||||
}
|
||||
@(require_results)
|
||||
map_exists_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (ok: bool) {
|
||||
map_exists_dynamic :: #force_no_inline proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (ok: bool) {
|
||||
if map_len(m) == 0 {
|
||||
return false
|
||||
}
|
||||
@@ -749,7 +749,7 @@ map_exists_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info,
|
||||
|
||||
|
||||
@(require_results)
|
||||
map_erase_dynamic :: #force_inline proc "contextless" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (old_k, old_v: uintptr, ok: bool) {
|
||||
map_erase_dynamic :: #force_no_inline proc "contextless" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (old_k, old_v: uintptr, ok: bool) {
|
||||
index := map_lookup_dynamic(m^, info, k) or_return
|
||||
ks, vs, hs, _, _ := map_kvh_data_dynamic(m^, info)
|
||||
hs[index] |= TOMBSTONE_MASK
|
||||
|
||||
@@ -34,6 +34,9 @@ when ODIN_BUILD_MODE == .Dynamic {
|
||||
} else when ODIN_OS == .Darwin && ODIN_ARCH == .arm64 {
|
||||
@require foreign import entry "entry_unix_no_crt_darwin_arm64.asm"
|
||||
SYS_exit :: 1
|
||||
} else when ODIN_ARCH == .riscv64 {
|
||||
@require foreign import entry "entry_unix_no_crt_riscv64.asm"
|
||||
SYS_exit :: 93
|
||||
}
|
||||
@(link_name="_start_odin", linkage="strong", require)
|
||||
_start_odin :: proc "c" (argc: i32, argv: [^]cstring) -> ! {
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
.text
|
||||
|
||||
.globl _start
|
||||
|
||||
_start:
|
||||
ld a0, 0(sp)
|
||||
addi a1, sp, 8
|
||||
addi sp, sp, ~15
|
||||
call _start_odin
|
||||
ebreak
|
||||
@@ -10,8 +10,9 @@ when ODIN_BUILD_MODE == .Dynamic {
|
||||
DllMain :: proc "system" (hinstDLL: rawptr, fdwReason: u32, lpReserved: rawptr) -> b32 {
|
||||
context = default_context()
|
||||
|
||||
// Populate Windows DLL-specific global
|
||||
// Populate Windows DLL-specific globals
|
||||
dll_forward_reason = DLL_Forward_Reason(fdwReason)
|
||||
dll_instance = hinstDLL
|
||||
|
||||
switch dll_forward_reason {
|
||||
case .Process_Attach:
|
||||
|
||||
@@ -19,12 +19,15 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
|
||||
// the pointer we return to the user.
|
||||
//
|
||||
|
||||
aligned_alloc :: proc(size, alignment: int, old_ptr: rawptr = nil, zero_memory := true) -> ([]byte, Allocator_Error) {
|
||||
aligned_alloc :: proc(size, alignment: int, old_ptr: rawptr, old_size: int, zero_memory := true) -> ([]byte, Allocator_Error) {
|
||||
a := max(alignment, align_of(rawptr))
|
||||
space := size + a - 1
|
||||
|
||||
allocated_mem: rawptr
|
||||
if old_ptr != nil {
|
||||
|
||||
force_copy := old_ptr != nil && a > align_of(rawptr)
|
||||
|
||||
if !force_copy && old_ptr != nil {
|
||||
original_old_ptr := ([^]rawptr)(old_ptr)[-1]
|
||||
allocated_mem = heap_resize(original_old_ptr, space+size_of(rawptr))
|
||||
} else {
|
||||
@@ -36,12 +39,19 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
|
||||
aligned_ptr := (ptr - 1 + uintptr(a)) & -uintptr(a)
|
||||
diff := int(aligned_ptr - ptr)
|
||||
if (size + diff) > space || allocated_mem == nil {
|
||||
aligned_free(old_ptr)
|
||||
aligned_free(allocated_mem)
|
||||
return nil, .Out_Of_Memory
|
||||
}
|
||||
|
||||
aligned_mem = rawptr(aligned_ptr)
|
||||
([^]rawptr)(aligned_mem)[-1] = allocated_mem
|
||||
|
||||
if force_copy {
|
||||
mem_copy_non_overlapping(aligned_mem, old_ptr, old_size)
|
||||
aligned_free(old_ptr)
|
||||
}
|
||||
|
||||
return byte_slice(aligned_mem, size), nil
|
||||
}
|
||||
|
||||
@@ -53,10 +63,10 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
|
||||
|
||||
aligned_resize :: proc(p: rawptr, old_size: int, new_size: int, new_alignment: int, zero_memory := true) -> (new_memory: []byte, err: Allocator_Error) {
|
||||
if p == nil {
|
||||
return nil, nil
|
||||
return aligned_alloc(new_size, new_alignment, nil, old_size, zero_memory)
|
||||
}
|
||||
|
||||
new_memory = aligned_alloc(new_size, new_alignment, p, zero_memory) or_return
|
||||
new_memory = aligned_alloc(new_size, new_alignment, p, old_size, zero_memory) or_return
|
||||
|
||||
// NOTE: heap_resize does not zero the new memory, so we do it
|
||||
if zero_memory && new_size > old_size {
|
||||
@@ -68,7 +78,7 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
|
||||
|
||||
switch mode {
|
||||
case .Alloc, .Alloc_Non_Zeroed:
|
||||
return aligned_alloc(size, alignment, nil, mode == .Alloc)
|
||||
return aligned_alloc(size, alignment, nil, 0, mode == .Alloc)
|
||||
|
||||
case .Free:
|
||||
aligned_free(old_memory)
|
||||
@@ -77,9 +87,6 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
|
||||
return nil, .Mode_Not_Implemented
|
||||
|
||||
case .Resize, .Resize_Non_Zeroed:
|
||||
if old_memory == nil {
|
||||
return aligned_alloc(size, alignment, nil, mode == .Resize)
|
||||
}
|
||||
return aligned_resize(old_memory, old_size, size, alignment, mode == .Resize)
|
||||
|
||||
case .Query_Features:
|
||||
|
||||
+24
-15
@@ -8,10 +8,9 @@ IS_WASM :: ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32
|
||||
|
||||
@(private)
|
||||
RUNTIME_LINKAGE :: "strong" when (
|
||||
(ODIN_USE_SEPARATE_MODULES ||
|
||||
ODIN_USE_SEPARATE_MODULES ||
|
||||
ODIN_BUILD_MODE == .Dynamic ||
|
||||
!ODIN_NO_CRT) &&
|
||||
!IS_WASM) else "internal"
|
||||
!ODIN_NO_CRT) else "internal"
|
||||
RUNTIME_REQUIRE :: false // !ODIN_TILDE
|
||||
|
||||
@(private)
|
||||
@@ -879,9 +878,6 @@ extendhfsf2 :: proc "c" (value: __float16) -> f32 {
|
||||
|
||||
@(link_name="__floattidf", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
|
||||
floattidf :: proc "c" (a: i128) -> f64 {
|
||||
when IS_WASM {
|
||||
return 0
|
||||
} else {
|
||||
DBL_MANT_DIG :: 53
|
||||
if a == 0 {
|
||||
return 0.0
|
||||
@@ -921,14 +917,10 @@ when IS_WASM {
|
||||
fb[0] = u32(a) // mantissa-low
|
||||
return transmute(f64)fb
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@(link_name="__floattidf_unsigned", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
|
||||
floattidf_unsigned :: proc "c" (a: u128) -> f64 {
|
||||
when IS_WASM {
|
||||
return 0
|
||||
} else {
|
||||
DBL_MANT_DIG :: 53
|
||||
if a == 0 {
|
||||
return 0.0
|
||||
@@ -966,7 +958,6 @@ when IS_WASM {
|
||||
fb[0] = u32(a) // mantissa-low
|
||||
return transmute(f64)fb
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1023,14 +1014,32 @@ modti3 :: proc "c" (a, b: i128) -> i128 {
|
||||
|
||||
@(link_name="__divmodti4", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
|
||||
divmodti4 :: proc "c" (a, b: i128, rem: ^i128) -> i128 {
|
||||
u := udivmod128(u128(a), u128(b), (^u128)(rem))
|
||||
return i128(u)
|
||||
s_a := a >> (128 - 1) // -1 if negative or 0
|
||||
s_b := b >> (128 - 1)
|
||||
an := (a ~ s_a) - s_a // absolute
|
||||
bn := (b ~ s_b) - s_b
|
||||
|
||||
s_b ~= s_a // quotient sign
|
||||
u_s_b := u128(s_b)
|
||||
u_s_a := u128(s_a)
|
||||
|
||||
r: u128 = ---
|
||||
u := i128((udivmodti4(u128(an), u128(bn), &r) ~ u_s_b) - u_s_b) // negate if negative
|
||||
rem^ = i128((r ~ u_s_a) - u_s_a)
|
||||
return u
|
||||
}
|
||||
|
||||
@(link_name="__divti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
|
||||
divti3 :: proc "c" (a, b: i128) -> i128 {
|
||||
u := udivmodti4(u128(a), u128(b), nil)
|
||||
return i128(u)
|
||||
s_a := a >> (128 - 1) // -1 if negative or 0
|
||||
s_b := b >> (128 - 1)
|
||||
an := (a ~ s_a) - s_a // absolute
|
||||
bn := (b ~ s_b) - s_b
|
||||
|
||||
s_a ~= s_b // quotient sign
|
||||
u_s_a := u128(s_a)
|
||||
|
||||
return i128((udivmodti4(u128(an), u128(bn), nil) ~ u_s_a) - u_s_a) // negate if negative
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -5,11 +5,24 @@ package runtime
|
||||
import "base:intrinsics"
|
||||
|
||||
_stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
|
||||
WRITE :: 0x2000004
|
||||
STDERR :: 2
|
||||
ret := intrinsics.syscall(WRITE, STDERR, uintptr(raw_data(data)), uintptr(len(data)))
|
||||
if ret < 0 {
|
||||
return 0, _OS_Errno(-ret)
|
||||
when ODIN_NO_CRT {
|
||||
WRITE :: 0x2000004
|
||||
ret := intrinsics.syscall(WRITE, STDERR, uintptr(raw_data(data)), uintptr(len(data)))
|
||||
if ret < 0 {
|
||||
return 0, _OS_Errno(-ret)
|
||||
}
|
||||
return int(ret), 0
|
||||
} else {
|
||||
foreign {
|
||||
write :: proc(handle: i32, buffer: [^]byte, count: uint) -> int ---
|
||||
__error :: proc() -> ^i32 ---
|
||||
}
|
||||
|
||||
if ret := write(STDERR, raw_data(data), len(data)); ret >= 0 {
|
||||
return int(ret), 0
|
||||
}
|
||||
|
||||
return 0, _OS_Errno(__error()^)
|
||||
}
|
||||
return int(ret), 0
|
||||
}
|
||||
|
||||
@@ -12,6 +12,8 @@ _stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
|
||||
SYS_write :: uintptr(4)
|
||||
} else when ODIN_ARCH == .arm32 {
|
||||
SYS_write :: uintptr(4)
|
||||
} else when ODIN_ARCH == .riscv64 {
|
||||
SYS_write :: uintptr(64)
|
||||
}
|
||||
|
||||
stderr :: 2
|
||||
|
||||
@@ -262,7 +262,7 @@ print_typeid :: #force_no_inline proc "contextless" (id: typeid) {
|
||||
}
|
||||
}
|
||||
|
||||
@(optimization_mode="size")
|
||||
@(optimization_mode="favor_size")
|
||||
print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) {
|
||||
if ti == nil {
|
||||
print_string("nil")
|
||||
@@ -401,15 +401,16 @@ print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) {
|
||||
}
|
||||
|
||||
print_string("struct ")
|
||||
if info.is_packed { print_string("#packed ") }
|
||||
if info.is_raw_union { print_string("#raw_union ") }
|
||||
if info.custom_align {
|
||||
if .packed in info.flags { print_string("#packed ") }
|
||||
if .raw_union in info.flags { print_string("#raw_union ") }
|
||||
if .no_copy in info.flags { print_string("#no_copy ") }
|
||||
if .align in info.flags {
|
||||
print_string("#align(")
|
||||
print_u64(u64(ti.align))
|
||||
print_string(") ")
|
||||
}
|
||||
print_byte('{')
|
||||
for name, i in info.names {
|
||||
for name, i in info.names[:info.field_count] {
|
||||
if i > 0 { print_string(", ") }
|
||||
print_string(name)
|
||||
print_string(": ")
|
||||
@@ -469,7 +470,7 @@ print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) {
|
||||
print_string("bit_field ")
|
||||
print_type(info.backing_type)
|
||||
print_string(" {")
|
||||
for name, i in info.names {
|
||||
for name, i in info.names[:info.field_count] {
|
||||
if i > 0 { print_string(", ") }
|
||||
print_string(name)
|
||||
print_string(": ")
|
||||
|
||||
@@ -52,3 +52,24 @@ udivti3 :: proc "c" (la, ha, lb, hb: u64) -> u128 {
|
||||
b.lo, b.hi = lb, hb
|
||||
return udivmodti4(a.all, b.all, nil)
|
||||
}
|
||||
|
||||
@(link_name="__lshrti3", linkage="strong")
|
||||
__lshrti3 :: proc "c" (la, ha: u64, b: u32) -> i128 {
|
||||
bits :: size_of(u32)*8
|
||||
|
||||
input, result: ti_int
|
||||
input.lo = la
|
||||
input.hi = ha
|
||||
|
||||
if b & bits != 0 {
|
||||
result.hi = 0
|
||||
result.lo = input.hi >> (b - bits)
|
||||
} else if b == 0 {
|
||||
return input.all
|
||||
} else {
|
||||
result.hi = input.hi >> b
|
||||
result.lo = (input.hi << (bits - b)) | (input.lo >> b)
|
||||
}
|
||||
|
||||
return result.all
|
||||
}
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
package runtime
|
||||
|
||||
Thread_Local_Cleaner :: #type proc "odin" ()
|
||||
|
||||
@(private="file")
|
||||
thread_local_cleaners: [8]Thread_Local_Cleaner
|
||||
|
||||
// Add a procedure that will be run at the end of a thread for the purpose of
|
||||
// deallocating state marked as `thread_local`.
|
||||
//
|
||||
// Intended to be called in an `init` procedure of a package with
|
||||
// dynamically-allocated memory that is stored in `thread_local` variables.
|
||||
add_thread_local_cleaner :: proc "contextless" (p: Thread_Local_Cleaner) {
|
||||
for &v in thread_local_cleaners {
|
||||
if v == nil {
|
||||
v = p
|
||||
return
|
||||
}
|
||||
}
|
||||
panic_contextless("There are no more thread-local cleaner slots available.")
|
||||
}
|
||||
|
||||
// Run all of the thread-local cleaner procedures.
|
||||
//
|
||||
// Intended to be called by the internals of a threading API at the end of a
|
||||
// thread's lifetime.
|
||||
run_thread_local_cleaners :: proc "odin" () {
|
||||
for p in thread_local_cleaners {
|
||||
if p == nil {
|
||||
break
|
||||
}
|
||||
p()
|
||||
}
|
||||
}
|
||||
@@ -297,7 +297,8 @@ lock :: proc(a: ^WASM_Allocator) {
|
||||
return
|
||||
}
|
||||
|
||||
assert(intrinsics.wasm_memory_atomic_wait32((^u32)(&a.mu), u32(new_state), -1) != 0)
|
||||
ret := intrinsics.wasm_memory_atomic_wait32((^u32)(&a.mu), u32(new_state), -1)
|
||||
assert(ret != 0)
|
||||
intrinsics.cpu_relax()
|
||||
}
|
||||
}
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -48,6 +48,9 @@ if "%2" == "1" (
|
||||
set odin_version_raw="dev-%curr_year%-%curr_month%"
|
||||
|
||||
set compiler_flags= -nologo -Oi -TP -fp:precise -Gm- -MP -FC -EHsc- -GR- -GF
|
||||
rem Parse source code as utf-8 even on shift-jis and other codepages
|
||||
rem See https://learn.microsoft.com/en-us/cpp/build/reference/utf-8-set-source-and-executable-character-sets-to-utf-8?view=msvc-170
|
||||
set compiler_flags= %compiler_flags% /utf-8
|
||||
set compiler_defines= -DODIN_VERSION_RAW=\"%odin_version_raw%\"
|
||||
|
||||
if not exist .git\ goto skip_git_hash
|
||||
@@ -111,7 +114,10 @@ call build_vendor.bat
|
||||
if %errorlevel% neq 0 goto end_of_build
|
||||
|
||||
rem If the demo doesn't run for you and your CPU is more than a decade old, try -microarch:native
|
||||
if %release_mode% EQU 0 odin run examples/demo -- Hellope World
|
||||
if %release_mode% EQU 0 odin run examples/demo -vet -strict-style -- Hellope World
|
||||
|
||||
rem Many non-compiler devs seem to run debug build but don't realize.
|
||||
if %release_mode% EQU 0 echo: & echo Debug compiler built. Note: run "build.bat release" if you want a faster, release mode compiler.
|
||||
|
||||
del *.obj > NUL 2> NUL
|
||||
|
||||
|
||||
+15
-2
@@ -23,6 +23,14 @@ error() {
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Brew advises people not to add llvm to their $PATH, so try and use brew to find it.
|
||||
if [ -z "$LLVM_CONFIG" ] && [ -n "$(command -v brew)" ]; then
|
||||
if [ -n "$(command -v $(brew --prefix llvm@18)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@18)/bin/llvm-config"
|
||||
elif [ -n "$(command -v $(brew --prefix llvm@17)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@17)/bin/llvm-config"
|
||||
elif [ -n "$(command -v $(brew --prefix llvm@14)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@14)/bin/llvm-config"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -z "$LLVM_CONFIG" ]; then
|
||||
# darwin, linux, openbsd
|
||||
if [ -n "$(command -v llvm-config-18)" ]; then LLVM_CONFIG="llvm-config-18"
|
||||
@@ -95,7 +103,7 @@ Linux)
|
||||
LDFLAGS="$LDFLAGS -ldl $($LLVM_CONFIG --libs core native --system-libs --libfiles)"
|
||||
# Copy libLLVM*.so into current directory for linking
|
||||
# NOTE: This is needed by the Linux release pipeline!
|
||||
cp $(readlink -f $($LLVM_CONFIG --libfiles)) ./
|
||||
# cp $(readlink -f $($LLVM_CONFIG --libfiles)) ./
|
||||
LDFLAGS="$LDFLAGS -Wl,-rpath=\$ORIGIN"
|
||||
;;
|
||||
OpenBSD)
|
||||
@@ -144,12 +152,17 @@ build_odin() {
|
||||
}
|
||||
|
||||
run_demo() {
|
||||
./odin run examples/demo/demo.odin -file -- Hellope World
|
||||
if [ $# -eq 0 ] || [ "$1" = "debug" ]; then
|
||||
./odin run examples/demo -vet -strict-style -- Hellope World
|
||||
fi
|
||||
}
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
build_odin debug
|
||||
run_demo
|
||||
|
||||
: ${PROGRAM:=$0}
|
||||
printf "\nDebug compiler built. Note: run \"$PROGRAM release\" or \"$PROGRAM release-native\" if you want a faster, release mode compiler.\n"
|
||||
elif [ $# -eq 1 ]; then
|
||||
case $1 in
|
||||
report)
|
||||
|
||||
+33
-3
@@ -144,6 +144,9 @@ buffer_grow :: proc(b: ^Buffer, n: int, loc := #caller_location) {
|
||||
}
|
||||
|
||||
buffer_write_at :: proc(b: ^Buffer, p: []byte, offset: int, loc := #caller_location) -> (n: int, err: io.Error) {
|
||||
if len(p) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
b.last_read = .Invalid
|
||||
if offset < 0 {
|
||||
err = .Invalid_Offset
|
||||
@@ -246,10 +249,13 @@ buffer_read_ptr :: proc(b: ^Buffer, ptr: rawptr, size: int) -> (n: int, err: io.
|
||||
}
|
||||
|
||||
buffer_read_at :: proc(b: ^Buffer, p: []byte, offset: int) -> (n: int, err: io.Error) {
|
||||
if len(p) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
b.last_read = .Invalid
|
||||
|
||||
if uint(offset) >= len(b.buf) {
|
||||
err = .Invalid_Offset
|
||||
err = .EOF
|
||||
return
|
||||
}
|
||||
n = copy(p, b.buf[offset:])
|
||||
@@ -310,6 +316,27 @@ buffer_unread_rune :: proc(b: ^Buffer) -> io.Error {
|
||||
return nil
|
||||
}
|
||||
|
||||
buffer_seek :: proc(b: ^Buffer, offset: i64, whence: io.Seek_From) -> (i64, io.Error) {
|
||||
abs: i64
|
||||
switch whence {
|
||||
case .Start:
|
||||
abs = offset
|
||||
case .Current:
|
||||
abs = i64(b.off) + offset
|
||||
case .End:
|
||||
abs = i64(len(b.buf)) + offset
|
||||
case:
|
||||
return 0, .Invalid_Whence
|
||||
}
|
||||
|
||||
abs_int := int(abs)
|
||||
if abs_int < 0 {
|
||||
return 0, .Invalid_Offset
|
||||
}
|
||||
b.last_read = .Invalid
|
||||
b.off = abs_int
|
||||
return abs, nil
|
||||
}
|
||||
|
||||
buffer_read_bytes :: proc(b: ^Buffer, delim: byte) -> (line: []byte, err: io.Error) {
|
||||
i := index_byte(b.buf[b.off:], delim)
|
||||
@@ -395,14 +422,17 @@ _buffer_proc :: proc(stream_data: rawptr, mode: io.Stream_Mode, p: []byte, offse
|
||||
return io._i64_err(buffer_write(b, p))
|
||||
case .Write_At:
|
||||
return io._i64_err(buffer_write_at(b, p, int(offset)))
|
||||
case .Seek:
|
||||
n, err = buffer_seek(b, offset, whence)
|
||||
return
|
||||
case .Size:
|
||||
n = i64(buffer_capacity(b))
|
||||
n = i64(buffer_length(b))
|
||||
return
|
||||
case .Destroy:
|
||||
buffer_destroy(b)
|
||||
return
|
||||
case .Query:
|
||||
return io.query_utility({.Read, .Read_At, .Write, .Write_At, .Size, .Destroy})
|
||||
return io.query_utility({.Read, .Read_At, .Write, .Write_At, .Seek, .Size, .Destroy, .Query})
|
||||
}
|
||||
return 0, .Empty
|
||||
}
|
||||
|
||||
+311
-6
@@ -1,9 +1,38 @@
|
||||
package bytes
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:mem"
|
||||
import "core:simd"
|
||||
import "core:unicode"
|
||||
import "core:unicode/utf8"
|
||||
|
||||
when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
|
||||
@(private)
|
||||
SCANNER_INDICES_256 : simd.u8x32 : {
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
}
|
||||
@(private)
|
||||
SCANNER_SENTINEL_MAX_256: simd.u8x32 : u8(0x00)
|
||||
@(private)
|
||||
SCANNER_SENTINEL_MIN_256: simd.u8x32 : u8(0xff)
|
||||
@(private)
|
||||
SIMD_REG_SIZE_256 :: 32
|
||||
}
|
||||
@(private)
|
||||
SCANNER_INDICES_128 : simd.u8x16 : {
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
}
|
||||
@(private)
|
||||
SCANNER_SENTINEL_MAX_128: simd.u8x16 : u8(0x00)
|
||||
@(private)
|
||||
SCANNER_SENTINEL_MIN_128: simd.u8x16 : u8(0xff)
|
||||
@(private)
|
||||
SIMD_REG_SIZE_128 :: 16
|
||||
|
||||
clone :: proc(s: []byte, allocator := context.allocator, loc := #caller_location) -> []byte {
|
||||
c := make([]byte, len(s), allocator, loc)
|
||||
copy(c, s)
|
||||
@@ -293,28 +322,279 @@ split_after_iterator :: proc(s: ^[]byte, sep: []byte) -> ([]byte, bool) {
|
||||
return _split_iterator(s, sep, len(sep))
|
||||
}
|
||||
|
||||
/*
|
||||
Scan a slice of bytes for a specific byte.
|
||||
|
||||
index_byte :: proc(s: []byte, c: byte) -> int {
|
||||
for i := 0; i < len(s); i += 1 {
|
||||
This procedure safely handles slices of any length, including empty slices.
|
||||
|
||||
Inputs:
|
||||
- data: A slice of bytes.
|
||||
- c: The byte to search for.
|
||||
|
||||
Returns:
|
||||
- index: The index of the byte `c`, or -1 if it was not found.
|
||||
*/
|
||||
index_byte :: proc(s: []byte, c: byte) -> (index: int) #no_bounds_check {
|
||||
i, l := 0, len(s)
|
||||
|
||||
// Guard against small strings. On modern systems, it is ALWAYS
|
||||
// worth vectorizing assuming there is a hardware vector unit, and
|
||||
// the data size is large enough.
|
||||
if l < SIMD_REG_SIZE_128 {
|
||||
for /**/; i < l; i += 1 {
|
||||
if s[i] == c {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
c_vec: simd.u8x16 = c
|
||||
when !simd.IS_EMULATED {
|
||||
// Note: While this is something that could also logically take
|
||||
// advantage of AVX512, the various downclocking and power
|
||||
// consumption related woes make premature to have a dedicated
|
||||
// code path.
|
||||
when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
|
||||
c_vec_256: simd.u8x32 = c
|
||||
|
||||
s_vecs: [4]simd.u8x32 = ---
|
||||
c_vecs: [4]simd.u8x32 = ---
|
||||
m_vec: [4]u8 = ---
|
||||
|
||||
// Scan 128-byte chunks, using 256-bit SIMD.
|
||||
for nr_blocks := l / (4 * SIMD_REG_SIZE_256); nr_blocks > 0; nr_blocks -= 1 {
|
||||
#unroll for j in 0..<4 {
|
||||
s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
|
||||
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
|
||||
m_vec[j] = simd.reduce_or(c_vecs[j])
|
||||
}
|
||||
if m_vec[0] | m_vec[1] | m_vec[2] | m_vec[3] > 0 {
|
||||
#unroll for j in 0..<4 {
|
||||
if m_vec[j] > 0 {
|
||||
sel := simd.select(c_vecs[j], SCANNER_INDICES_256, SCANNER_SENTINEL_MIN_256)
|
||||
off := simd.reduce_min(sel)
|
||||
return i + j * SIMD_REG_SIZE_256 + int(off)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i += 4 * SIMD_REG_SIZE_256
|
||||
}
|
||||
|
||||
// Scan 64-byte chunks, using 256-bit SIMD.
|
||||
for nr_blocks := (l - i) / (2 * SIMD_REG_SIZE_256); nr_blocks > 0; nr_blocks -= 1 {
|
||||
#unroll for j in 0..<2 {
|
||||
s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
|
||||
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
|
||||
m_vec[j] = simd.reduce_or(c_vecs[j])
|
||||
}
|
||||
if m_vec[0] | m_vec[1] > 0 {
|
||||
#unroll for j in 0..<2 {
|
||||
if m_vec[j] > 0 {
|
||||
sel := simd.select(c_vecs[j], SCANNER_INDICES_256, SCANNER_SENTINEL_MIN_256)
|
||||
off := simd.reduce_min(sel)
|
||||
return i + j * SIMD_REG_SIZE_256 + int(off)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i += 2 * SIMD_REG_SIZE_256
|
||||
}
|
||||
} else {
|
||||
s_vecs: [4]simd.u8x16 = ---
|
||||
c_vecs: [4]simd.u8x16 = ---
|
||||
m_vecs: [4]u8 = ---
|
||||
|
||||
// Scan 64-byte chunks, using 128-bit SIMD.
|
||||
for nr_blocks := l / (4 * SIMD_REG_SIZE_128); nr_blocks > 0; nr_blocks -= 1 {
|
||||
#unroll for j in 0..<4 {
|
||||
s_vecs[j]= intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i+j*SIMD_REG_SIZE_128:]))
|
||||
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec)
|
||||
m_vecs[j] = simd.reduce_or(c_vecs[j])
|
||||
}
|
||||
if m_vecs[0] | m_vecs[1] | m_vecs[2] | m_vecs[3] > 0 {
|
||||
#unroll for j in 0..<4 {
|
||||
if m_vecs[j] > 0 {
|
||||
sel := simd.select(c_vecs[j], SCANNER_INDICES_128, SCANNER_SENTINEL_MIN_128)
|
||||
off := simd.reduce_min(sel)
|
||||
return i + j * SIMD_REG_SIZE_128 + int(off)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i += 4 * SIMD_REG_SIZE_128
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan the remaining SIMD register sized chunks.
|
||||
//
|
||||
// Apparently LLVM does ok with 128-bit SWAR, so this path is also taken
|
||||
// on potato targets. Scanning more at a time when LLVM is emulating SIMD
|
||||
// likely does not buy much, as all that does is increase GP register
|
||||
// pressure.
|
||||
for nr_blocks := (l - i) / SIMD_REG_SIZE_128; nr_blocks > 0; nr_blocks -= 1 {
|
||||
s0 := intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i:]))
|
||||
c0 := simd.lanes_eq(s0, c_vec)
|
||||
if simd.reduce_or(c0) > 0 {
|
||||
sel := simd.select(c0, SCANNER_INDICES_128, SCANNER_SENTINEL_MIN_128)
|
||||
off := simd.reduce_min(sel)
|
||||
return i + int(off)
|
||||
}
|
||||
|
||||
i += SIMD_REG_SIZE_128
|
||||
}
|
||||
|
||||
// Scan serially for the remainder.
|
||||
for /**/; i < l; i += 1 {
|
||||
if s[i] == c {
|
||||
return i
|
||||
}
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
// Returns -1 if c is not present
|
||||
last_index_byte :: proc(s: []byte, c: byte) -> int {
|
||||
for i := len(s)-1; i >= 0; i -= 1 {
|
||||
/*
|
||||
Scan a slice of bytes for a specific byte, starting from the end and working
|
||||
backwards to the start.
|
||||
|
||||
This procedure safely handles slices of any length, including empty slices.
|
||||
|
||||
Inputs:
|
||||
- data: A slice of bytes.
|
||||
- c: The byte to search for.
|
||||
|
||||
Returns:
|
||||
- index: The index of the byte `c`, or -1 if it was not found.
|
||||
*/
|
||||
last_index_byte :: proc(s: []byte, c: byte) -> int #no_bounds_check {
|
||||
i := len(s)
|
||||
|
||||
// Guard against small strings. On modern systems, it is ALWAYS
|
||||
// worth vectorizing assuming there is a hardware vector unit, and
|
||||
// the data size is large enough.
|
||||
if i < SIMD_REG_SIZE_128 {
|
||||
if i > 0 { // Handle s == nil.
|
||||
for /**/; i >= 0; i -= 1 {
|
||||
if s[i] == c {
|
||||
return i
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
c_vec: simd.u8x16 = c
|
||||
when !simd.IS_EMULATED {
|
||||
// Note: While this is something that could also logically take
|
||||
// advantage of AVX512, the various downclocking and power
|
||||
// consumption related woes make premature to have a dedicated
|
||||
// code path.
|
||||
when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
|
||||
c_vec_256: simd.u8x32 = c
|
||||
|
||||
s_vecs: [4]simd.u8x32 = ---
|
||||
c_vecs: [4]simd.u8x32 = ---
|
||||
m_vec: [4]u8 = ---
|
||||
|
||||
// Scan 128-byte chunks, using 256-bit SIMD.
|
||||
for i >= 4 * SIMD_REG_SIZE_256 {
|
||||
i -= 4 * SIMD_REG_SIZE_256
|
||||
|
||||
#unroll for j in 0..<4 {
|
||||
s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
|
||||
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
|
||||
m_vec[j] = simd.reduce_or(c_vecs[j])
|
||||
}
|
||||
if m_vec[0] | m_vec[1] | m_vec[2] | m_vec[3] > 0 {
|
||||
#unroll for j in 0..<4 {
|
||||
if m_vec[3-j] > 0 {
|
||||
sel := simd.select(c_vecs[3-j], SCANNER_INDICES_256, SCANNER_SENTINEL_MAX_256)
|
||||
off := simd.reduce_max(sel)
|
||||
return i + (3-j) * SIMD_REG_SIZE_256 + int(off)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan 64-byte chunks, using 256-bit SIMD.
|
||||
for i >= 2 * SIMD_REG_SIZE_256 {
|
||||
i -= 2 * SIMD_REG_SIZE_256
|
||||
|
||||
#unroll for j in 0..<2 {
|
||||
s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x32)raw_data(s[i+j*SIMD_REG_SIZE_256:]))
|
||||
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec_256)
|
||||
m_vec[j] = simd.reduce_or(c_vecs[j])
|
||||
}
|
||||
if m_vec[0] | m_vec[1] > 0 {
|
||||
#unroll for j in 0..<2 {
|
||||
if m_vec[1-j] > 0 {
|
||||
sel := simd.select(c_vecs[1-j], SCANNER_INDICES_256, SCANNER_SENTINEL_MAX_256)
|
||||
off := simd.reduce_max(sel)
|
||||
return i + (1-j) * SIMD_REG_SIZE_256 + int(off)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
s_vecs: [4]simd.u8x16 = ---
|
||||
c_vecs: [4]simd.u8x16 = ---
|
||||
m_vecs: [4]u8 = ---
|
||||
|
||||
// Scan 64-byte chunks, using 128-bit SIMD.
|
||||
for i >= 4 * SIMD_REG_SIZE_128 {
|
||||
i -= 4 * SIMD_REG_SIZE_128
|
||||
|
||||
#unroll for j in 0..<4 {
|
||||
s_vecs[j] = intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i+j*SIMD_REG_SIZE_128:]))
|
||||
c_vecs[j] = simd.lanes_eq(s_vecs[j], c_vec)
|
||||
m_vecs[j] = simd.reduce_or(c_vecs[j])
|
||||
}
|
||||
if m_vecs[0] | m_vecs[1] | m_vecs[2] | m_vecs[3] > 0 {
|
||||
#unroll for j in 0..<4 {
|
||||
if m_vecs[3-j] > 0 {
|
||||
sel := simd.select(c_vecs[3-j], SCANNER_INDICES_128, SCANNER_SENTINEL_MAX_128)
|
||||
off := simd.reduce_max(sel)
|
||||
return i + (3-j) * SIMD_REG_SIZE_128 + int(off)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan the remaining SIMD register sized chunks.
|
||||
//
|
||||
// Apparently LLVM does ok with 128-bit SWAR, so this path is also taken
|
||||
// on potato targets. Scanning more at a time when LLVM is emulating SIMD
|
||||
// likely does not buy much, as all that does is increase GP register
|
||||
// pressure.
|
||||
for i >= SIMD_REG_SIZE_128 {
|
||||
i -= SIMD_REG_SIZE_128
|
||||
|
||||
s0 := intrinsics.unaligned_load(cast(^simd.u8x16)raw_data(s[i:]))
|
||||
c0 := simd.lanes_eq(s0, c_vec)
|
||||
if simd.reduce_or(c0) > 0 {
|
||||
sel := simd.select(c0, SCANNER_INDICES_128, SCANNER_SENTINEL_MAX_128)
|
||||
off := simd.reduce_max(sel)
|
||||
return i + int(off)
|
||||
}
|
||||
}
|
||||
|
||||
// Scan serially for the remainder.
|
||||
for i > 0 {
|
||||
i -= 1
|
||||
if s[i] == c {
|
||||
return i
|
||||
}
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
|
||||
|
||||
@private PRIME_RABIN_KARP :: 16777619
|
||||
|
||||
index :: proc(s, substr: []byte) -> int {
|
||||
@@ -1167,3 +1447,28 @@ fields_proc :: proc(s: []byte, f: proc(rune) -> bool, allocator := context.alloc
|
||||
|
||||
return subslices[:]
|
||||
}
|
||||
|
||||
// alias returns true iff a and b have a non-zero length, and any part of
|
||||
// a overlaps with b.
|
||||
alias :: proc "contextless" (a, b: []byte) -> bool {
|
||||
a_len, b_len := len(a), len(b)
|
||||
if a_len == 0 || b_len == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
a_start, b_start := uintptr(raw_data(a)), uintptr(raw_data(b))
|
||||
a_end, b_end := a_start + uintptr(a_len-1), b_start + uintptr(b_len-1)
|
||||
|
||||
return a_start <= b_end && b_start <= a_end
|
||||
}
|
||||
|
||||
// alias_inexactly returns true iff a and b have a non-zero length,
|
||||
// the base pointer of a and b are NOT equal, and any part of a overlaps
|
||||
// with b (ie: `alias(a, b)` with an exception that returns false for
|
||||
// `a == b`, `b = a[:len(a)-69]` and similar conditions).
|
||||
alias_inexactly :: proc "contextless" (a, b: []byte) -> bool {
|
||||
if raw_data(a) == raw_data(b) {
|
||||
return false
|
||||
}
|
||||
return alias(a, b)
|
||||
}
|
||||
|
||||
@@ -9,10 +9,11 @@ Reader :: struct {
|
||||
prev_rune: int, // previous reading index of rune or < 0
|
||||
}
|
||||
|
||||
reader_init :: proc(r: ^Reader, s: []byte) {
|
||||
reader_init :: proc(r: ^Reader, s: []byte) -> io.Stream {
|
||||
r.s = s
|
||||
r.i = 0
|
||||
r.prev_rune = -1
|
||||
return reader_to_stream(r)
|
||||
}
|
||||
|
||||
reader_to_stream :: proc(r: ^Reader) -> (s: io.Stream) {
|
||||
@@ -33,6 +34,9 @@ reader_size :: proc(r: ^Reader) -> i64 {
|
||||
}
|
||||
|
||||
reader_read :: proc(r: ^Reader, p: []byte) -> (n: int, err: io.Error) {
|
||||
if len(p) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
if r.i >= i64(len(r.s)) {
|
||||
return 0, .EOF
|
||||
}
|
||||
@@ -42,6 +46,9 @@ reader_read :: proc(r: ^Reader, p: []byte) -> (n: int, err: io.Error) {
|
||||
return
|
||||
}
|
||||
reader_read_at :: proc(r: ^Reader, p: []byte, off: i64) -> (n: int, err: io.Error) {
|
||||
if len(p) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
if off < 0 {
|
||||
return 0, .Invalid_Offset
|
||||
}
|
||||
@@ -97,7 +104,6 @@ reader_unread_rune :: proc(r: ^Reader) -> io.Error {
|
||||
return nil
|
||||
}
|
||||
reader_seek :: proc(r: ^Reader, offset: i64, whence: io.Seek_From) -> (i64, io.Error) {
|
||||
r.prev_rune = -1
|
||||
abs: i64
|
||||
switch whence {
|
||||
case .Start:
|
||||
@@ -114,6 +120,7 @@ reader_seek :: proc(r: ^Reader, offset: i64, whence: io.Seek_From) -> (i64, io.E
|
||||
return 0, .Invalid_Offset
|
||||
}
|
||||
r.i = abs
|
||||
r.prev_rune = -1
|
||||
return abs, nil
|
||||
}
|
||||
reader_write_to :: proc(r: ^Reader, w: io.Writer) -> (n: i64, err: io.Error) {
|
||||
|
||||
@@ -47,8 +47,8 @@ foreign libc {
|
||||
clogf :: proc(z: complex_float) -> complex_float ---
|
||||
|
||||
// 7.3.8 Power and absolute-value functions
|
||||
cabs :: proc(z: complex_double) -> complex_double ---
|
||||
cabsf :: proc(z: complex_float) -> complex_float ---
|
||||
cabs :: proc(z: complex_double) -> double ---
|
||||
cabsf :: proc(z: complex_float) -> float ---
|
||||
cpow :: proc(x, y: complex_double) -> complex_double ---
|
||||
cpowf :: proc(x, y: complex_float) -> complex_float ---
|
||||
csqrt :: proc(z: complex_double) -> complex_double ---
|
||||
|
||||
@@ -102,6 +102,6 @@ when ODIN_OS == .Haiku {
|
||||
// read the value, or to produce an lvalue such that you can assign a different
|
||||
// error value to errno. To work around this, just expose it as a function like
|
||||
// it actually is.
|
||||
errno :: #force_inline proc() -> ^int {
|
||||
errno :: #force_inline proc "contextless" () -> ^int {
|
||||
return _get_errno()
|
||||
}
|
||||
|
||||
+12
-7
@@ -32,24 +32,21 @@ when ODIN_OS == .Windows {
|
||||
// the RDX register will contain zero and correctly set the flag to disable
|
||||
// stack unwinding.
|
||||
@(link_name="_setjmp")
|
||||
setjmp :: proc(env: ^jmp_buf, hack: rawptr = nil) -> int ---
|
||||
setjmp :: proc(env: ^jmp_buf, hack: rawptr = nil) -> int ---
|
||||
}
|
||||
} else {
|
||||
@(default_calling_convention="c")
|
||||
foreign libc {
|
||||
// 7.13.1 Save calling environment
|
||||
//
|
||||
// NOTE(dweiler): C11 requires setjmp be a macro, which means it won't
|
||||
// necessarily export a symbol named setjmp but rather _setjmp in the case
|
||||
// of musl, glibc, BSD libc, and msvcrt.
|
||||
@(link_name="_setjmp")
|
||||
setjmp :: proc(env: ^jmp_buf) -> int ---
|
||||
@(link_name=LSETJMP)
|
||||
setjmp :: proc(env: ^jmp_buf) -> int ---
|
||||
}
|
||||
}
|
||||
|
||||
@(default_calling_convention="c")
|
||||
foreign libc {
|
||||
// 7.13.2 Restore calling environment
|
||||
@(link_name=LLONGJMP)
|
||||
longjmp :: proc(env: ^jmp_buf, val: int) -> ! ---
|
||||
}
|
||||
|
||||
@@ -64,3 +61,11 @@ foreign libc {
|
||||
// The choice of 4096 bytes for storage of this type is more than enough on all
|
||||
// relevant platforms.
|
||||
jmp_buf :: struct #align(16) { _: [4096]char, }
|
||||
|
||||
when ODIN_OS == .NetBSD {
|
||||
@(private) LSETJMP :: "__setjmp14"
|
||||
@(private) LLONGJMP :: "__longjmp14"
|
||||
} else {
|
||||
@(private) LSETJMP :: "setjmp"
|
||||
@(private) LLONGJMP :: "longjmp"
|
||||
}
|
||||
|
||||
+36
-9
@@ -17,6 +17,12 @@ when ODIN_OS == .Windows {
|
||||
|
||||
FILE :: struct {}
|
||||
|
||||
Whence :: enum int {
|
||||
SET = SEEK_SET,
|
||||
CUR = SEEK_CUR,
|
||||
END = SEEK_END,
|
||||
}
|
||||
|
||||
// MSVCRT compatible.
|
||||
when ODIN_OS == .Windows {
|
||||
_IOFBF :: 0x0000
|
||||
@@ -101,6 +107,8 @@ when ODIN_OS == .OpenBSD || ODIN_OS == .NetBSD {
|
||||
SEEK_CUR :: 1
|
||||
SEEK_END :: 2
|
||||
|
||||
TMP_MAX :: 308915776
|
||||
|
||||
foreign libc {
|
||||
__sF: [3]FILE
|
||||
}
|
||||
@@ -128,6 +136,8 @@ when ODIN_OS == .FreeBSD {
|
||||
SEEK_CUR :: 1
|
||||
SEEK_END :: 2
|
||||
|
||||
TMP_MAX :: 308915776
|
||||
|
||||
foreign libc {
|
||||
@(link_name="__stderrp") stderr: ^FILE
|
||||
@(link_name="__stdinp") stdin: ^FILE
|
||||
@@ -195,10 +205,21 @@ when ODIN_OS == .Haiku {
|
||||
}
|
||||
}
|
||||
|
||||
when ODIN_OS == .NetBSD {
|
||||
@(private) LRENAME :: "__posix_rename"
|
||||
@(private) LFGETPOS :: "__fgetpos50"
|
||||
@(private) LFSETPOS :: "__fsetpos50"
|
||||
} else {
|
||||
@(private) LRENAME :: "rename"
|
||||
@(private) LFGETPOS :: "fgetpos"
|
||||
@(private) LFSETPOS :: "fsetpos"
|
||||
}
|
||||
|
||||
@(default_calling_convention="c")
|
||||
foreign libc {
|
||||
// 7.21.4 Operations on files
|
||||
remove :: proc(filename: cstring) -> int ---
|
||||
@(link_name=LRENAME)
|
||||
rename :: proc(old, new: cstring) -> int ---
|
||||
tmpfile :: proc() -> ^FILE ---
|
||||
tmpnam :: proc(s: [^]char) -> [^]char ---
|
||||
@@ -240,8 +261,10 @@ foreign libc {
|
||||
fwrite :: proc(ptr: rawptr, size: size_t, nmemb: size_t, stream: ^FILE) -> size_t ---
|
||||
|
||||
// 7.21.9 File positioning functions
|
||||
@(link_name=LFGETPOS)
|
||||
fgetpos :: proc(stream: ^FILE, pos: ^fpos_t) -> int ---
|
||||
fseek :: proc(stream: ^FILE, offset: long, whence: int) -> int ---
|
||||
fseek :: proc(stream: ^FILE, offset: long, whence: Whence) -> int ---
|
||||
@(link_name=LFSETPOS)
|
||||
fsetpos :: proc(stream: ^FILE, pos: ^fpos_t) -> int ---
|
||||
ftell :: proc(stream: ^FILE) -> long ---
|
||||
rewind :: proc(stream: ^FILE) ---
|
||||
@@ -288,11 +311,11 @@ to_stream :: proc(file: ^FILE) -> io.Stream {
|
||||
return 0, unknown_or_eof(file)
|
||||
}
|
||||
|
||||
if fseek(file, long(offset), SEEK_SET) != 0 {
|
||||
if fseek(file, long(offset), .SET) != 0 {
|
||||
return 0, unknown_or_eof(file)
|
||||
}
|
||||
|
||||
defer fseek(file, long(curr), SEEK_SET)
|
||||
defer fseek(file, long(curr), .SET)
|
||||
|
||||
n = i64(fread(raw_data(p), size_of(byte), len(p), file))
|
||||
if n == 0 { err = unknown_or_eof(file) }
|
||||
@@ -307,17 +330,21 @@ to_stream :: proc(file: ^FILE) -> io.Stream {
|
||||
return 0, unknown_or_eof(file)
|
||||
}
|
||||
|
||||
if fseek(file, long(offset), SEEK_SET) != 0 {
|
||||
if fseek(file, long(offset), .SET) != 0 {
|
||||
return 0, unknown_or_eof(file)
|
||||
}
|
||||
|
||||
defer fseek(file, long(curr), SEEK_SET)
|
||||
defer fseek(file, long(curr), .SET)
|
||||
|
||||
n = i64(fwrite(raw_data(p), size_of(byte), len(p), file))
|
||||
if n == 0 { err = unknown_or_eof(file) }
|
||||
|
||||
case .Seek:
|
||||
if fseek(file, long(offset), int(whence)) != 0 {
|
||||
#assert(int(Whence.SET) == int(io.Seek_From.Start))
|
||||
#assert(int(Whence.CUR) == int(io.Seek_From.Current))
|
||||
#assert(int(Whence.END) == int(io.Seek_From.End))
|
||||
|
||||
if fseek(file, long(offset), Whence(whence)) != 0 {
|
||||
return 0, unknown_or_eof(file)
|
||||
}
|
||||
|
||||
@@ -326,9 +353,9 @@ to_stream :: proc(file: ^FILE) -> io.Stream {
|
||||
if curr == -1 {
|
||||
return 0, unknown_or_eof(file)
|
||||
}
|
||||
defer fseek(file, curr, SEEK_SET)
|
||||
defer fseek(file, curr, .SET)
|
||||
|
||||
if fseek(file, 0, SEEK_END) != 0 {
|
||||
if fseek(file, 0, .END) != 0 {
|
||||
return 0, unknown_or_eof(file)
|
||||
}
|
||||
|
||||
@@ -341,7 +368,7 @@ to_stream :: proc(file: ^FILE) -> io.Stream {
|
||||
return 0, .Empty
|
||||
|
||||
case .Query:
|
||||
return io.query_utility({ .Close, .Flush, .Read, .Read_At, .Write, .Write_At, .Seek, .Size })
|
||||
return io.query_utility({ .Close, .Flush, .Read, .Read_At, .Write, .Write_At, .Seek, .Size, .Query })
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
+17
-4
@@ -40,10 +40,9 @@ when ODIN_OS == .Linux {
|
||||
}
|
||||
|
||||
|
||||
when ODIN_OS == .Darwin {
|
||||
when ODIN_OS == .Darwin || ODIN_OS == .FreeBSD || ODIN_OS == .OpenBSD {
|
||||
RAND_MAX :: 0x7fffffff
|
||||
|
||||
// GLIBC and MUSL only
|
||||
@(private="file")
|
||||
@(default_calling_convention="c")
|
||||
foreign libc {
|
||||
@@ -55,6 +54,20 @@ when ODIN_OS == .Darwin {
|
||||
}
|
||||
}
|
||||
|
||||
when ODIN_OS == .NetBSD {
|
||||
RAND_MAX :: 0x7fffffff
|
||||
|
||||
@(private="file")
|
||||
@(default_calling_convention="c")
|
||||
foreign libc {
|
||||
__mb_cur_max: size_t
|
||||
}
|
||||
|
||||
MB_CUR_MAX :: #force_inline proc() -> size_t {
|
||||
return __mb_cur_max
|
||||
}
|
||||
}
|
||||
|
||||
// C does not declare what these values should be, as an implementation is free
|
||||
// to use any two distinct values it wants to indicate success or failure.
|
||||
// However, nobody actually does and everyone appears to have agreed upon these
|
||||
@@ -99,7 +112,7 @@ foreign libc {
|
||||
at_quick_exit :: proc(func: proc "c" ()) -> int ---
|
||||
exit :: proc(status: int) -> ! ---
|
||||
_Exit :: proc(status: int) -> ! ---
|
||||
getenv :: proc(name: cstring) -> [^]char ---
|
||||
getenv :: proc(name: cstring) -> cstring ---
|
||||
quick_exit :: proc(status: int) -> ! ---
|
||||
system :: proc(cmd: cstring) -> int ---
|
||||
|
||||
@@ -150,4 +163,4 @@ aligned_free :: #force_inline proc "c" (ptr: rawptr) {
|
||||
} else {
|
||||
free(ptr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@ foreign libc {
|
||||
strtok :: proc(s1: [^]char, s2: cstring) -> [^]char ---
|
||||
|
||||
// 7.24.6 Miscellaneous functions
|
||||
strerror :: proc(errnum: int) -> [^]char ---
|
||||
strerror :: proc(errnum: int) -> cstring ---
|
||||
strlen :: proc(s: cstring) -> size_t ---
|
||||
}
|
||||
memset :: proc "c" (s: rawptr, c: int, n: size_t) -> rawptr {
|
||||
|
||||
+29
-3
@@ -50,30 +50,56 @@ when ODIN_OS == .Linux || ODIN_OS == .FreeBSD || ODIN_OS == .Darwin || ODIN_OS =
|
||||
foreign libc {
|
||||
// 7.27.2 Time manipulation functions
|
||||
clock :: proc() -> clock_t ---
|
||||
@(link_name=LDIFFTIME)
|
||||
difftime :: proc(time1, time2: time_t) -> double ---
|
||||
@(link_name=LMKTIME)
|
||||
mktime :: proc(timeptr: ^tm) -> time_t ---
|
||||
@(link_name=LTIME)
|
||||
time :: proc(timer: ^time_t) -> time_t ---
|
||||
timespec_get :: proc(ts: ^timespec, base: int) -> int ---
|
||||
|
||||
// 7.27.3 Time conversion functions
|
||||
asctime :: proc(timeptr: ^tm) -> [^]char ---
|
||||
@(link_name=LCTIME)
|
||||
ctime :: proc(timer: ^time_t) -> [^]char ---
|
||||
@(link_name=LGMTIME)
|
||||
gmtime :: proc(timer: ^time_t) -> ^tm ---
|
||||
@(link_name=LLOCALTIME)
|
||||
localtime :: proc(timer: ^time_t) -> ^tm ---
|
||||
strftime :: proc(s: [^]char, maxsize: size_t, format: cstring, timeptr: ^tm) -> size_t ---
|
||||
}
|
||||
|
||||
when ODIN_OS == .NetBSD {
|
||||
@(private) LDIFFTIME :: "__difftime50"
|
||||
@(private) LMKTIME :: "__mktime50"
|
||||
@(private) LTIME :: "__time50"
|
||||
@(private) LCTIME :: "__ctime50"
|
||||
@(private) LGMTIME :: "__gmtime50"
|
||||
@(private) LLOCALTIME :: "__localtime50"
|
||||
} else {
|
||||
@(private) LDIFFTIME :: "difftime"
|
||||
@(private) LMKTIME :: "mktime"
|
||||
@(private) LTIME :: "time"
|
||||
@(private) LCTIME :: "ctime"
|
||||
@(private) LGMTIME :: "gmtime"
|
||||
@(private) LLOCALTIME :: "localtime"
|
||||
}
|
||||
|
||||
when ODIN_OS == .OpenBSD {
|
||||
CLOCKS_PER_SEC :: 100
|
||||
} else {
|
||||
CLOCKS_PER_SEC :: 1000000
|
||||
}
|
||||
|
||||
TIME_UTC :: 1
|
||||
TIME_UTC :: 1
|
||||
|
||||
time_t :: distinct i64
|
||||
time_t :: distinct i64
|
||||
|
||||
clock_t :: long
|
||||
when ODIN_OS == .FreeBSD || ODIN_OS == .NetBSD {
|
||||
clock_t :: distinct int32_t
|
||||
} else {
|
||||
clock_t :: distinct long
|
||||
}
|
||||
|
||||
timespec :: struct {
|
||||
tv_sec: time_t,
|
||||
|
||||
+25
-25
@@ -186,7 +186,7 @@ input_size_from_stream :: proc(z: ^Context_Stream_Input) -> (res: i64, err: Erro
|
||||
|
||||
input_size :: proc{input_size_from_memory, input_size_from_stream}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
read_slice_from_memory :: #force_inline proc(z: ^Context_Memory_Input, size: int) -> (res: []u8, err: io.Error) {
|
||||
#no_bounds_check {
|
||||
if len(z.input_data) >= size {
|
||||
@@ -203,7 +203,7 @@ read_slice_from_memory :: #force_inline proc(z: ^Context_Memory_Input, size: int
|
||||
}
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
read_slice_from_stream :: #force_inline proc(z: ^Context_Stream_Input, size: int) -> (res: []u8, err: io.Error) {
|
||||
// TODO: REMOVE ALL USE OF context.temp_allocator here
|
||||
// there is literally no need for it
|
||||
@@ -214,13 +214,13 @@ read_slice_from_stream :: #force_inline proc(z: ^Context_Stream_Input, size: int
|
||||
|
||||
read_slice :: proc{read_slice_from_memory, read_slice_from_stream}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
read_data :: #force_inline proc(z: ^$C, $T: typeid) -> (res: T, err: io.Error) {
|
||||
b := read_slice(z, size_of(T)) or_return
|
||||
return (^T)(&b[0])^, nil
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
read_u8_from_memory :: #force_inline proc(z: ^Context_Memory_Input) -> (res: u8, err: io.Error) {
|
||||
#no_bounds_check {
|
||||
if len(z.input_data) >= 1 {
|
||||
@@ -232,7 +232,7 @@ read_u8_from_memory :: #force_inline proc(z: ^Context_Memory_Input) -> (res: u8,
|
||||
return 0, .EOF
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
read_u8_from_stream :: #force_inline proc(z: ^Context_Stream_Input) -> (res: u8, err: io.Error) {
|
||||
b := read_slice_from_stream(z, 1) or_return
|
||||
return b[0], nil
|
||||
@@ -242,7 +242,7 @@ read_u8 :: proc{read_u8_from_memory, read_u8_from_stream}
|
||||
|
||||
// You would typically only use this at the end of Inflate, to drain bits from the code buffer
|
||||
// preferentially.
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
read_u8_prefer_code_buffer_lsb :: #force_inline proc(z: ^$C) -> (res: u8, err: io.Error) {
|
||||
if z.num_bits >= 8 {
|
||||
res = u8(read_bits_no_refill_lsb(z, 8))
|
||||
@@ -257,7 +257,7 @@ read_u8_prefer_code_buffer_lsb :: #force_inline proc(z: ^$C) -> (res: u8, err: i
|
||||
return
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
peek_data_from_memory :: #force_inline proc(z: ^Context_Memory_Input, $T: typeid) -> (res: T, err: io.Error) {
|
||||
size :: size_of(T)
|
||||
|
||||
@@ -275,7 +275,7 @@ peek_data_from_memory :: #force_inline proc(z: ^Context_Memory_Input, $T: typeid
|
||||
}
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
peek_data_at_offset_from_memory :: #force_inline proc(z: ^Context_Memory_Input, $T: typeid, #any_int offset: int) -> (res: T, err: io.Error) {
|
||||
size :: size_of(T)
|
||||
|
||||
@@ -293,7 +293,7 @@ peek_data_at_offset_from_memory :: #force_inline proc(z: ^Context_Memory_Input,
|
||||
}
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
peek_data_from_stream :: #force_inline proc(z: ^Context_Stream_Input, $T: typeid) -> (res: T, err: io.Error) {
|
||||
size :: size_of(T)
|
||||
|
||||
@@ -317,7 +317,7 @@ peek_data_from_stream :: #force_inline proc(z: ^Context_Stream_Input, $T: typeid
|
||||
return res, .None
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
peek_data_at_offset_from_stream :: #force_inline proc(z: ^Context_Stream_Input, $T: typeid, #any_int offset: int) -> (res: T, err: io.Error) {
|
||||
size :: size_of(T)
|
||||
|
||||
@@ -352,14 +352,14 @@ peek_data :: proc{peek_data_from_memory, peek_data_from_stream, peek_data_at_off
|
||||
|
||||
|
||||
// Sliding window read back
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
peek_back_byte :: #force_inline proc(z: ^$C, offset: i64) -> (res: u8, err: io.Error) {
|
||||
// Look back into the sliding window.
|
||||
return z.output.buf[z.bytes_written - offset], .None
|
||||
}
|
||||
|
||||
// Generalized bit reader LSB
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width := i8(48)) {
|
||||
refill := u64(width)
|
||||
b := u64(0)
|
||||
@@ -385,7 +385,7 @@ refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width :=
|
||||
}
|
||||
|
||||
// Generalized bit reader LSB
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
refill_lsb_from_stream :: proc(z: ^Context_Stream_Input, width := i8(24)) {
|
||||
refill := u64(width)
|
||||
|
||||
@@ -414,13 +414,13 @@ refill_lsb_from_stream :: proc(z: ^Context_Stream_Input, width := i8(24)) {
|
||||
refill_lsb :: proc{refill_lsb_from_memory, refill_lsb_from_stream}
|
||||
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
consume_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) {
|
||||
z.code_buffer >>= width
|
||||
z.num_bits -= u64(width)
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
consume_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) {
|
||||
z.code_buffer >>= width
|
||||
z.num_bits -= u64(width)
|
||||
@@ -428,7 +428,7 @@ consume_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, wid
|
||||
|
||||
consume_bits_lsb :: proc{consume_bits_lsb_from_memory, consume_bits_lsb_from_stream}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
peek_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
|
||||
if z.num_bits < u64(width) {
|
||||
refill_lsb(z)
|
||||
@@ -436,7 +436,7 @@ peek_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width:
|
||||
return u32(z.code_buffer &~ (~u64(0) << width))
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
peek_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
|
||||
if z.num_bits < u64(width) {
|
||||
refill_lsb(z)
|
||||
@@ -446,13 +446,13 @@ peek_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width:
|
||||
|
||||
peek_bits_lsb :: proc{peek_bits_lsb_from_memory, peek_bits_lsb_from_stream}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
peek_bits_no_refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
|
||||
assert(z.num_bits >= u64(width))
|
||||
return u32(z.code_buffer &~ (~u64(0) << width))
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
peek_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
|
||||
assert(z.num_bits >= u64(width))
|
||||
return u32(z.code_buffer &~ (~u64(0) << width))
|
||||
@@ -460,14 +460,14 @@ peek_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Inp
|
||||
|
||||
peek_bits_no_refill_lsb :: proc{peek_bits_no_refill_lsb_from_memory, peek_bits_no_refill_lsb_from_stream}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
read_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
|
||||
k := #force_inline peek_bits_lsb(z, width)
|
||||
#force_inline consume_bits_lsb(z, width)
|
||||
return k
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
read_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
|
||||
k := peek_bits_lsb(z, width)
|
||||
consume_bits_lsb(z, width)
|
||||
@@ -476,14 +476,14 @@ read_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width:
|
||||
|
||||
read_bits_lsb :: proc{read_bits_lsb_from_memory, read_bits_lsb_from_stream}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
read_bits_no_refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
|
||||
k := #force_inline peek_bits_no_refill_lsb(z, width)
|
||||
#force_inline consume_bits_lsb(z, width)
|
||||
return k
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
read_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
|
||||
k := peek_bits_no_refill_lsb(z, width)
|
||||
consume_bits_lsb(z, width)
|
||||
@@ -493,14 +493,14 @@ read_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Inp
|
||||
read_bits_no_refill_lsb :: proc{read_bits_no_refill_lsb_from_memory, read_bits_no_refill_lsb_from_stream}
|
||||
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
discard_to_next_byte_lsb_from_memory :: proc(z: ^Context_Memory_Input) {
|
||||
discard := u8(z.num_bits & 7)
|
||||
#force_inline consume_bits_lsb(z, discard)
|
||||
}
|
||||
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
discard_to_next_byte_lsb_from_stream :: proc(z: ^Context_Stream_Input) {
|
||||
discard := u8(z.num_bits & 7)
|
||||
consume_bits_lsb(z, discard)
|
||||
|
||||
@@ -12,6 +12,7 @@ package compress_zlib
|
||||
|
||||
import "core:compress"
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:mem"
|
||||
import "core:io"
|
||||
import "core:hash"
|
||||
@@ -120,23 +121,17 @@ Huffman_Table :: struct {
|
||||
}
|
||||
|
||||
// Implementation starts here
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
|
||||
assert(bits <= 16)
|
||||
// NOTE: Can optimize with llvm.bitreverse.i64 or some bit twiddling
|
||||
// by reversing all of the bits and masking out the unneeded ones.
|
||||
r = n
|
||||
r = ((r & 0xAAAA) >> 1) | ((r & 0x5555) << 1)
|
||||
r = ((r & 0xCCCC) >> 2) | ((r & 0x3333) << 2)
|
||||
r = ((r & 0xF0F0) >> 4) | ((r & 0x0F0F) << 4)
|
||||
r = ((r & 0xFF00) >> 8) | ((r & 0x00FF) << 8)
|
||||
r = intrinsics.reverse_bits(n)
|
||||
|
||||
r >>= (16 - bits)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
grow_buffer :: proc(buf: ^[dynamic]u8) -> (err: compress.Error) {
|
||||
/*
|
||||
That we get here at all means that we didn't pass an expected output size,
|
||||
@@ -154,7 +149,7 @@ grow_buffer :: proc(buf: ^[dynamic]u8) -> (err: compress.Error) {
|
||||
TODO: Make these return compress.Error.
|
||||
*/
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
write_byte :: #force_inline proc(z: ^$C, c: u8) -> (err: io.Error) #no_bounds_check {
|
||||
/*
|
||||
Resize if needed.
|
||||
@@ -173,7 +168,7 @@ write_byte :: #force_inline proc(z: ^$C, c: u8) -> (err: io.Error) #no_bounds_ch
|
||||
return .None
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
repl_byte :: proc(z: ^$C, count: u16, c: u8) -> (err: io.Error) #no_bounds_check {
|
||||
/*
|
||||
TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
|
||||
@@ -201,7 +196,7 @@ repl_byte :: proc(z: ^$C, count: u16, c: u8) -> (err: io.Error) #no_bounds_check
|
||||
return .None
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
repl_bytes :: proc(z: ^$C, count: u16, distance: u16) -> (err: io.Error) {
|
||||
/*
|
||||
TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
|
||||
@@ -234,8 +229,8 @@ allocate_huffman_table :: proc(allocator := context.allocator) -> (z: ^Huffman_T
|
||||
return new(Huffman_Table, allocator), nil
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
|
||||
@(optimization_mode="favor_size")
|
||||
build_huffman :: #force_no_inline proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
|
||||
sizes: [HUFFMAN_MAX_BITS+1]int
|
||||
next_code: [HUFFMAN_MAX_BITS+1]int
|
||||
|
||||
@@ -293,7 +288,7 @@ build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
decode_huffman_slowpath :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
|
||||
code := u16(compress.peek_bits_lsb(z,16))
|
||||
|
||||
@@ -324,7 +319,7 @@ decode_huffman_slowpath :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Erro
|
||||
return r, nil
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
decode_huffman :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
|
||||
if z.num_bits < 16 {
|
||||
if z.num_bits > 63 {
|
||||
@@ -344,7 +339,7 @@ decode_huffman :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bo
|
||||
return decode_huffman_slowpath(z, t)
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
parse_huffman_block :: proc(z: ^$C, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
|
||||
#no_bounds_check for {
|
||||
value, e := decode_huffman(z, z_repeat)
|
||||
@@ -413,7 +408,7 @@ parse_huffman_block :: proc(z: ^$C, z_repeat, z_offset: ^Huffman_Table) -> (err:
|
||||
}
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
inflate_from_context :: proc(using ctx: ^compress.Context_Memory_Input, raw := false, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
|
||||
/*
|
||||
ctx.output must be a bytes.Buffer for now. We'll add a separate implementation that writes to a stream.
|
||||
@@ -486,7 +481,7 @@ inflate_from_context :: proc(using ctx: ^compress.Context_Memory_Input, raw := f
|
||||
|
||||
// TODO: Check alignment of reserve/resize.
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
inflate_raw :: proc(z: ^$C, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
|
||||
context.allocator = allocator
|
||||
expected_output_size := expected_output_size
|
||||
@@ -670,4 +665,4 @@ inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, raw := fals
|
||||
return inflate_raw(&ctx, expected_output_size=expected_output_size)
|
||||
}
|
||||
|
||||
inflate :: proc{inflate_from_context, inflate_from_byte_array}
|
||||
inflate :: proc{inflate_from_context, inflate_from_byte_array}
|
||||
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
Package list implements an intrusive doubly-linked list.
|
||||
|
||||
An intrusive container requires a `Node` to be embedded in your own structure, like this:
|
||||
|
||||
My_String :: struct {
|
||||
node: list.Node,
|
||||
value: string,
|
||||
}
|
||||
|
||||
Embedding the members of a `list.Node` in your structure with the `using` keyword is also allowed:
|
||||
|
||||
My_String :: struct {
|
||||
using node: list.Node,
|
||||
value: string,
|
||||
}
|
||||
|
||||
Here is a full example:
|
||||
|
||||
package test
|
||||
|
||||
import "core:fmt"
|
||||
import "core:container/intrusive/list"
|
||||
|
||||
main :: proc() {
|
||||
l: list.List
|
||||
|
||||
one := My_String{value="Hello"}
|
||||
two := My_String{value="World"}
|
||||
|
||||
list.push_back(&l, &one.node)
|
||||
list.push_back(&l, &two.node)
|
||||
|
||||
iter := list.iterator_head(l, My_String, "node")
|
||||
for s in list.iterate_next(&iter) {
|
||||
fmt.println(s.value)
|
||||
}
|
||||
}
|
||||
|
||||
My_String :: struct {
|
||||
node: list.Node,
|
||||
value: string,
|
||||
}
|
||||
|
||||
*/
|
||||
package container_intrusive_list
|
||||
@@ -18,11 +18,18 @@ List :: struct {
|
||||
tail: ^Node,
|
||||
}
|
||||
|
||||
|
||||
// The list link you must include in your own structure.
|
||||
Node :: struct {
|
||||
prev, next: ^Node,
|
||||
}
|
||||
|
||||
/*
|
||||
Inserts a new element at the front of the list with O(1) time complexity.
|
||||
|
||||
**Inputs**
|
||||
- list: The container list
|
||||
- node: The node member of the user-defined element structure
|
||||
*/
|
||||
push_front :: proc "contextless" (list: ^List, node: ^Node) {
|
||||
if list.head != nil {
|
||||
list.head.prev = node
|
||||
@@ -33,7 +40,13 @@ push_front :: proc "contextless" (list: ^List, node: ^Node) {
|
||||
node.prev, node.next = nil, nil
|
||||
}
|
||||
}
|
||||
/*
|
||||
Inserts a new element at the back of the list with O(1) time complexity.
|
||||
|
||||
**Inputs**
|
||||
- list: The container list
|
||||
- node: The node member of the user-defined element structure
|
||||
*/
|
||||
push_back :: proc "contextless" (list: ^List, node: ^Node) {
|
||||
if list.tail != nil {
|
||||
list.tail.next = node
|
||||
@@ -45,6 +58,13 @@ push_back :: proc "contextless" (list: ^List, node: ^Node) {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Removes an element from a list with O(1) time complexity.
|
||||
|
||||
**Inputs**
|
||||
- list: The container list
|
||||
- node: The node member of the user-defined element structure to be removed
|
||||
*/
|
||||
remove :: proc "contextless" (list: ^List, node: ^Node) {
|
||||
if node != nil {
|
||||
if node.next != nil {
|
||||
@@ -61,7 +81,13 @@ remove :: proc "contextless" (list: ^List, node: ^Node) {
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
Removes from the given list all elements that satisfy a condition with O(N) time complexity.
|
||||
|
||||
**Inputs**
|
||||
- list: The container list
|
||||
- to_erase: The condition procedure. It should return `true` if a node should be removed, `false` otherwise
|
||||
*/
|
||||
remove_by_proc :: proc(list: ^List, to_erase: proc(^Node) -> bool) {
|
||||
for node := list.head; node != nil; {
|
||||
next := node.next
|
||||
@@ -82,7 +108,13 @@ remove_by_proc :: proc(list: ^List, to_erase: proc(^Node) -> bool) {
|
||||
node = next
|
||||
}
|
||||
}
|
||||
/*
|
||||
Removes from the given list all elements that satisfy a condition with O(N) time complexity.
|
||||
|
||||
**Inputs**
|
||||
- list: The container list
|
||||
- to_erase: The _contextless_ condition procedure. It should return `true` if a node should be removed, `false` otherwise
|
||||
*/
|
||||
remove_by_proc_contextless :: proc(list: ^List, to_erase: proc "contextless" (^Node) -> bool) {
|
||||
for node := list.head; node != nil; {
|
||||
next := node.next
|
||||
@@ -104,12 +136,26 @@ remove_by_proc_contextless :: proc(list: ^List, to_erase: proc "contextless" (^N
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Checks whether the given list does not contain any element.
|
||||
|
||||
**Inputs**
|
||||
- list: The container list
|
||||
|
||||
**Returns** `true` if `list` is empty, `false` otherwise
|
||||
*/
|
||||
is_empty :: proc "contextless" (list: ^List) -> bool {
|
||||
return list.head == nil
|
||||
}
|
||||
|
||||
/*
|
||||
Removes and returns the element at the front of the list with O(1) time complexity.
|
||||
|
||||
**Inputs**
|
||||
- list: The container list
|
||||
|
||||
**Returns** The node member of the user-defined element structure, or `nil` if the list is empty
|
||||
*/
|
||||
pop_front :: proc "contextless" (list: ^List) -> ^Node {
|
||||
link := list.head
|
||||
if link == nil {
|
||||
@@ -130,6 +176,14 @@ pop_front :: proc "contextless" (list: ^List) -> ^Node {
|
||||
return link
|
||||
|
||||
}
|
||||
/*
|
||||
Removes and returns the element at the back of the list with O(1) time complexity.
|
||||
|
||||
**Inputs**
|
||||
- list: The container list
|
||||
|
||||
**Returns** The node member of the user-defined element structure, or `nil` if the list is empty
|
||||
*/
|
||||
pop_back :: proc "contextless" (list: ^List) -> ^Node {
|
||||
link := list.tail
|
||||
if link == nil {
|
||||
@@ -151,29 +205,102 @@ pop_back :: proc "contextless" (list: ^List) -> ^Node {
|
||||
}
|
||||
|
||||
|
||||
|
||||
Iterator :: struct($T: typeid) {
|
||||
curr: ^Node,
|
||||
offset: uintptr,
|
||||
}
|
||||
|
||||
/*
|
||||
Creates an iterator pointing at the head of the given list. For an example, see `iterate_next`.
|
||||
|
||||
**Inputs**
|
||||
- list: The container list
|
||||
- T: The type of the list's elements
|
||||
- field_name: The name of the node field in the `T` structure
|
||||
|
||||
**Returns** An iterator pointing at the head of `list`
|
||||
|
||||
*/
|
||||
iterator_head :: proc "contextless" (list: List, $T: typeid, $field_name: string) -> Iterator(T)
|
||||
where intrinsics.type_has_field(T, field_name),
|
||||
intrinsics.type_field_type(T, field_name) == Node {
|
||||
return {list.head, offset_of_by_string(T, field_name)}
|
||||
}
|
||||
/*
|
||||
Creates an iterator pointing at the tail of the given list. For an example, see `iterate_prev`.
|
||||
|
||||
**Inputs**
|
||||
- list: The container list
|
||||
- T: The type of the list's elements
|
||||
- field_name: The name of the node field in the `T` structure
|
||||
|
||||
**Returns** An iterator pointing at the tail of `list`
|
||||
|
||||
*/
|
||||
iterator_tail :: proc "contextless" (list: List, $T: typeid, $field_name: string) -> Iterator(T)
|
||||
where intrinsics.type_has_field(T, field_name),
|
||||
intrinsics.type_field_type(T, field_name) == Node {
|
||||
return {list.tail, offset_of_by_string(T, field_name)}
|
||||
}
|
||||
/*
|
||||
Creates an iterator pointing at the specified node of a list.
|
||||
|
||||
**Inputs**
|
||||
- node: a list node
|
||||
- T: The type of the list's elements
|
||||
- field_name: The name of the node field in the `T` structure
|
||||
|
||||
**Returns** An iterator pointing at `node`
|
||||
|
||||
*/
|
||||
iterator_from_node :: proc "contextless" (node: ^Node, $T: typeid, $field_name: string) -> Iterator(T)
|
||||
where intrinsics.type_has_field(T, field_name),
|
||||
intrinsics.type_field_type(T, field_name) == Node {
|
||||
return {node, offset_of_by_string(T, field_name)}
|
||||
}
|
||||
|
||||
/*
|
||||
Retrieves the next element in a list and advances the iterator.
|
||||
|
||||
**Inputs**
|
||||
- it: The iterator
|
||||
|
||||
**Returns**
|
||||
- ptr: The next list element
|
||||
- ok: `true` if the element is valid (the iterator could advance), `false` otherwise
|
||||
|
||||
Example:
|
||||
|
||||
import "core:fmt"
|
||||
import "core:container/intrusive/list"
|
||||
|
||||
iterate_next_example :: proc() {
|
||||
l: list.List
|
||||
|
||||
one := My_Struct{value=1}
|
||||
two := My_Struct{value=2}
|
||||
|
||||
list.push_back(&l, &one.node)
|
||||
list.push_back(&l, &two.node)
|
||||
|
||||
it := list.iterator_head(l, My_Struct, "node")
|
||||
for num in list.iterate_next(&it) {
|
||||
fmt.println(num.value)
|
||||
}
|
||||
}
|
||||
|
||||
My_Struct :: struct {
|
||||
node : list.Node,
|
||||
value: int,
|
||||
}
|
||||
|
||||
Output:
|
||||
|
||||
1
|
||||
2
|
||||
|
||||
*/
|
||||
iterate_next :: proc "contextless" (it: ^Iterator($T)) -> (ptr: ^T, ok: bool) {
|
||||
node := it.curr
|
||||
if node == nil {
|
||||
@@ -183,7 +310,47 @@ iterate_next :: proc "contextless" (it: ^Iterator($T)) -> (ptr: ^T, ok: bool) {
|
||||
|
||||
return (^T)(uintptr(node) - it.offset), true
|
||||
}
|
||||
/*
|
||||
Retrieves the previous element in a list and recede the iterator.
|
||||
|
||||
**Inputs**
|
||||
- it: The iterator
|
||||
|
||||
**Returns**
|
||||
- ptr: The previous list element
|
||||
- ok: `true` if the element is valid (the iterator could recede), `false` otherwise
|
||||
|
||||
Example:
|
||||
|
||||
import "core:fmt"
|
||||
import "core:container/intrusive/list"
|
||||
|
||||
iterate_next_example :: proc() {
|
||||
l: list.List
|
||||
|
||||
one := My_Struct{value=1}
|
||||
two := My_Struct{value=2}
|
||||
|
||||
list.push_back(&l, &one.node)
|
||||
list.push_back(&l, &two.node)
|
||||
|
||||
it := list.iterator_tail(l, My_Struct, "node")
|
||||
for num in list.iterate_prev(&it) {
|
||||
fmt.println(num.value)
|
||||
}
|
||||
}
|
||||
|
||||
My_Struct :: struct {
|
||||
node : list.Node,
|
||||
value: int,
|
||||
}
|
||||
|
||||
Output:
|
||||
|
||||
2
|
||||
1
|
||||
|
||||
*/
|
||||
iterate_prev :: proc "contextless" (it: ^Iterator($T)) -> (ptr: ^T, ok: bool) {
|
||||
node := it.curr
|
||||
if node == nil {
|
||||
@@ -192,4 +359,4 @@ iterate_prev :: proc "contextless" (it: ^Iterator($T)) -> (ptr: ^T, ok: bool) {
|
||||
it.curr = node.prev
|
||||
|
||||
return (^T)(uintptr(node) - it.offset), true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -95,11 +95,11 @@ front_ptr :: proc(q: ^$Q/Queue($T)) -> ^T {
|
||||
}
|
||||
|
||||
back :: proc(q: ^$Q/Queue($T)) -> T {
|
||||
idx := (q.offset+uint(q.len))%builtin.len(q.data)
|
||||
idx := (q.offset+uint(q.len - 1))%builtin.len(q.data)
|
||||
return q.data[idx]
|
||||
}
|
||||
back_ptr :: proc(q: ^$Q/Queue($T)) -> ^T {
|
||||
idx := (q.offset+uint(q.len))%builtin.len(q.data)
|
||||
idx := (q.offset+uint(q.len - 1))%builtin.len(q.data)
|
||||
return &q.data[idx]
|
||||
}
|
||||
|
||||
|
||||
@@ -7,9 +7,8 @@ STRIDE :: 4
|
||||
|
||||
// Context is a keyed AES (ECB) instance.
|
||||
Context :: struct {
|
||||
_sk_exp: [120]u64,
|
||||
_num_rounds: int,
|
||||
_is_initialized: bool,
|
||||
_sk_exp: [120]u64,
|
||||
_num_rounds: int,
|
||||
}
|
||||
|
||||
// init initializes a context for AES with the provided key.
|
||||
@@ -18,13 +17,10 @@ init :: proc(ctx: ^Context, key: []byte) {
|
||||
|
||||
ctx._num_rounds = keysched(skey[:], key)
|
||||
skey_expand(ctx._sk_exp[:], skey[:], ctx._num_rounds)
|
||||
ctx._is_initialized = true
|
||||
}
|
||||
|
||||
// encrypt_block sets `dst` to `AES-ECB-Encrypt(src)`.
|
||||
encrypt_block :: proc(ctx: ^Context, dst, src: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
q: [8]u64
|
||||
load_blockx1(&q, src)
|
||||
_encrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
|
||||
@@ -33,8 +29,6 @@ encrypt_block :: proc(ctx: ^Context, dst, src: []byte) {
|
||||
|
||||
// encrypt_block sets `dst` to `AES-ECB-Decrypt(src)`.
|
||||
decrypt_block :: proc(ctx: ^Context, dst, src: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
q: [8]u64
|
||||
load_blockx1(&q, src)
|
||||
_decrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
|
||||
@@ -43,8 +37,6 @@ decrypt_block :: proc(ctx: ^Context, dst, src: []byte) {
|
||||
|
||||
// encrypt_blocks sets `dst` to `AES-ECB-Encrypt(src[0], .. src[n])`.
|
||||
encrypt_blocks :: proc(ctx: ^Context, dst, src: [][]byte) {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
q: [8]u64 = ---
|
||||
src, dst := src, dst
|
||||
|
||||
@@ -67,8 +59,6 @@ encrypt_blocks :: proc(ctx: ^Context, dst, src: [][]byte) {
|
||||
|
||||
// decrypt_blocks sets dst to `AES-ECB-Decrypt(src[0], .. src[n])`.
|
||||
decrypt_blocks :: proc(ctx: ^Context, dst, src: [][]byte) {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
q: [8]u64 = ---
|
||||
src, dst := src, dst
|
||||
|
||||
|
||||
@@ -80,8 +80,8 @@ ghash :: proc "contextless" (dst, key, data: []byte) {
|
||||
h2 := h0 ~ h1
|
||||
h2r := h0r ~ h1r
|
||||
|
||||
src: []byte
|
||||
for l > 0 {
|
||||
src: []byte = ---
|
||||
if l >= _aes.GHASH_BLOCK_SIZE {
|
||||
src = buf
|
||||
buf = buf[_aes.GHASH_BLOCK_SIZE:]
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
//+build amd64
|
||||
package aes_hw_intel
|
||||
|
||||
import "core:sys/info"
|
||||
|
||||
// is_supported returns true iff hardware accelerated AES
|
||||
// is supported.
|
||||
is_supported :: proc "contextless" () -> bool {
|
||||
features, ok := info.cpu_features.?
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
// Note: Everything with AES-NI and PCLMULQDQ has support for
|
||||
// the required SSE extxtensions.
|
||||
req_features :: info.CPU_Features{
|
||||
.sse2,
|
||||
.ssse3,
|
||||
.sse41,
|
||||
.aes,
|
||||
.pclmulqdq,
|
||||
}
|
||||
return features >= req_features
|
||||
}
|
||||
|
||||
// Context is a keyed AES (ECB) instance.
|
||||
Context :: struct {
|
||||
// Note: The ideal thing to do is for the expanded round keys to be
|
||||
// arrays of `__m128i`, however that implies alignment (or using AVX).
|
||||
//
|
||||
// All the people using e-waste processors that don't support an
|
||||
// insturction set that has been around for over 10 years are why
|
||||
// we can't have nice things.
|
||||
_sk_exp_enc: [15][16]byte,
|
||||
_sk_exp_dec: [15][16]byte,
|
||||
_num_rounds: int,
|
||||
}
|
||||
|
||||
// init initializes a context for AES with the provided key.
|
||||
init :: proc(ctx: ^Context, key: []byte) {
|
||||
keysched(ctx, key)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,277 @@
|
||||
// Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
|
||||
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
|
||||
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
//+build amd64
|
||||
package aes_hw_intel
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/_aes"
|
||||
import "core:simd/x86"
|
||||
|
||||
@(private = "file")
|
||||
GHASH_STRIDE_HW :: 4
|
||||
@(private = "file")
|
||||
GHASH_STRIDE_BYTES_HW :: GHASH_STRIDE_HW * _aes.GHASH_BLOCK_SIZE
|
||||
|
||||
// GHASH is defined over elements of GF(2^128) with "full little-endian"
|
||||
// representation: leftmost byte is least significant, and, within each
|
||||
// byte, leftmost _bit_ is least significant. The natural ordering in
|
||||
// x86 is "mixed little-endian": bytes are ordered from least to most
|
||||
// significant, but bits within a byte are in most-to-least significant
|
||||
// order. Going to full little-endian representation would require
|
||||
// reversing bits within each byte, which is doable but expensive.
|
||||
//
|
||||
// Instead, we go to full big-endian representation, by swapping bytes
|
||||
// around, which is done with a single _mm_shuffle_epi8() opcode (it
|
||||
// comes with SSSE3; all CPU that offer pclmulqdq also have SSSE3). We
|
||||
// can use a full big-endian representation because in a carryless
|
||||
// multiplication, we have a nice bit reversal property:
|
||||
//
|
||||
// rev_128(x) * rev_128(y) = rev_255(x * y)
|
||||
//
|
||||
// So by using full big-endian, we still get the right result, except
|
||||
// that it is right-shifted by 1 bit. The left-shift is relatively
|
||||
// inexpensive, and it can be mutualised.
|
||||
//
|
||||
// Since SSE2 opcodes do not have facilities for shitfting full 128-bit
|
||||
// values with bit precision, we have to break down values into 64-bit
|
||||
// chunks. We number chunks from 0 to 3 in left to right order.
|
||||
|
||||
@(private = "file")
|
||||
_BYTESWAP_INDEX: x86.__m128i : { 0x08090a0b0c0d0e0f, 0x0001020304050607 }
|
||||
|
||||
@(private = "file", require_results, enable_target_feature = "sse2,ssse3")
|
||||
byteswap :: #force_inline proc "contextless" (x: x86.__m128i) -> x86.__m128i {
|
||||
return x86._mm_shuffle_epi8(x, _BYTESWAP_INDEX)
|
||||
}
|
||||
|
||||
// From a 128-bit value kw, compute kx as the XOR of the two 64-bit
|
||||
// halves of kw (into the right half of kx; left half is unspecified),
|
||||
// and return kx.
|
||||
@(private = "file", require_results, enable_target_feature = "sse2")
|
||||
bk :: #force_inline proc "contextless" (kw: x86.__m128i) -> x86.__m128i {
|
||||
return x86._mm_xor_si128(kw, x86._mm_shuffle_epi32(kw, 0x0e))
|
||||
}
|
||||
|
||||
// Combine two 64-bit values (k0:k1) into a 128-bit (kw) value and
|
||||
// the XOR of the two values (kx), and return (kw, kx).
|
||||
@(private = "file", enable_target_feature = "sse2")
|
||||
pbk :: #force_inline proc "contextless" (k0, k1: x86.__m128i) -> (x86.__m128i, x86.__m128i) {
|
||||
kw := x86._mm_unpacklo_epi64(k1, k0)
|
||||
kx := x86._mm_xor_si128(k0, k1)
|
||||
return kw, kx
|
||||
}
|
||||
|
||||
// Left-shift by 1 bit a 256-bit value (in four 64-bit words).
|
||||
@(private = "file", require_results, enable_target_feature = "sse2")
|
||||
sl_256 :: #force_inline proc "contextless" (x0, x1, x2, x3: x86.__m128i) -> (x86.__m128i, x86.__m128i, x86.__m128i, x86.__m128i) {
|
||||
x0, x1, x2, x3 := x0, x1, x2, x3
|
||||
|
||||
x0 = x86._mm_or_si128(x86._mm_slli_epi64(x0, 1), x86._mm_srli_epi64(x1, 63))
|
||||
x1 = x86._mm_or_si128(x86._mm_slli_epi64(x1, 1), x86._mm_srli_epi64(x2, 63))
|
||||
x2 = x86._mm_or_si128(x86._mm_slli_epi64(x2, 1), x86._mm_srli_epi64(x3, 63))
|
||||
x3 = x86._mm_slli_epi64(x3, 1)
|
||||
|
||||
return x0, x1, x2, x3
|
||||
}
|
||||
|
||||
// Perform reduction in GF(2^128).
|
||||
@(private = "file", require_results, enable_target_feature = "sse2")
|
||||
reduce_f128 :: #force_inline proc "contextless" (x0, x1, x2, x3: x86.__m128i) -> (x86.__m128i, x86.__m128i) {
|
||||
x0, x1, x2 := x0, x1, x2
|
||||
|
||||
x1 = x86._mm_xor_si128(
|
||||
x1,
|
||||
x86._mm_xor_si128(
|
||||
x86._mm_xor_si128(
|
||||
x3,
|
||||
x86._mm_srli_epi64(x3, 1)),
|
||||
x86._mm_xor_si128(
|
||||
x86._mm_srli_epi64(x3, 2),
|
||||
x86._mm_srli_epi64(x3, 7))))
|
||||
x2 = x86._mm_xor_si128(
|
||||
x86._mm_xor_si128(
|
||||
x2,
|
||||
x86._mm_slli_epi64(x3, 63)),
|
||||
x86._mm_xor_si128(
|
||||
x86._mm_slli_epi64(x3, 62),
|
||||
x86._mm_slli_epi64(x3, 57)))
|
||||
x0 = x86._mm_xor_si128(
|
||||
x0,
|
||||
x86._mm_xor_si128(
|
||||
x86._mm_xor_si128(
|
||||
x2,
|
||||
x86._mm_srli_epi64(x2, 1)),
|
||||
x86._mm_xor_si128(
|
||||
x86._mm_srli_epi64(x2, 2),
|
||||
x86._mm_srli_epi64(x2, 7))))
|
||||
x1 = x86._mm_xor_si128(
|
||||
x86._mm_xor_si128(
|
||||
x1,
|
||||
x86._mm_slli_epi64(x2, 63)),
|
||||
x86._mm_xor_si128(
|
||||
x86._mm_slli_epi64(x2, 62),
|
||||
x86._mm_slli_epi64(x2, 57)))
|
||||
|
||||
return x0, x1
|
||||
}
|
||||
|
||||
// Square value kw in GF(2^128) into (dw,dx).
|
||||
@(private = "file", require_results, enable_target_feature = "sse2,pclmul")
|
||||
square_f128 :: #force_inline proc "contextless" (kw: x86.__m128i) -> (x86.__m128i, x86.__m128i) {
|
||||
z1 := x86._mm_clmulepi64_si128(kw, kw, 0x11)
|
||||
z3 := x86._mm_clmulepi64_si128(kw, kw, 0x00)
|
||||
z0 := x86._mm_shuffle_epi32(z1, 0x0E)
|
||||
z2 := x86._mm_shuffle_epi32(z3, 0x0E)
|
||||
z0, z1, z2, z3 = sl_256(z0, z1, z2, z3)
|
||||
z0, z1 = reduce_f128(z0, z1, z2, z3)
|
||||
return pbk(z0, z1)
|
||||
}
|
||||
|
||||
// ghash calculates the GHASH of data, with the key `key`, and input `dst`
|
||||
// and `data`, and stores the resulting digest in `dst`.
|
||||
//
|
||||
// Note: `dst` is both an input and an output, to support easy implementation
|
||||
// of GCM.
|
||||
@(enable_target_feature = "sse2,ssse3,pclmul")
|
||||
ghash :: proc "contextless" (dst, key, data: []byte) #no_bounds_check {
|
||||
if len(dst) != _aes.GHASH_BLOCK_SIZE || len(key) != _aes.GHASH_BLOCK_SIZE {
|
||||
intrinsics.trap()
|
||||
}
|
||||
|
||||
// Note: BearSSL opts to copy the remainder into a zero-filled
|
||||
// 64-byte buffer. We do something slightly more simple.
|
||||
|
||||
// Load key and dst (h and y).
|
||||
yw := intrinsics.unaligned_load((^x86.__m128i)(raw_data(dst)))
|
||||
h1w := intrinsics.unaligned_load((^x86.__m128i)(raw_data(key)))
|
||||
yw = byteswap(yw)
|
||||
h1w = byteswap(h1w)
|
||||
h1x := bk(h1w)
|
||||
|
||||
// Process 4 blocks at a time
|
||||
buf := data
|
||||
l := len(buf)
|
||||
if l >= GHASH_STRIDE_BYTES_HW {
|
||||
// Compute h2 = h^2
|
||||
h2w, h2x := square_f128(h1w)
|
||||
|
||||
// Compute h3 = h^3 = h*(h^2)
|
||||
t1 := x86._mm_clmulepi64_si128(h1w, h2w, 0x11)
|
||||
t3 := x86._mm_clmulepi64_si128(h1w, h2w, 0x00)
|
||||
t2 := x86._mm_xor_si128(
|
||||
x86._mm_clmulepi64_si128(h1x, h2x, 0x00),
|
||||
x86._mm_xor_si128(t1, t3))
|
||||
t0 := x86._mm_shuffle_epi32(t1, 0x0E)
|
||||
t1 = x86._mm_xor_si128(t1, x86._mm_shuffle_epi32(t2, 0x0E))
|
||||
t2 = x86._mm_xor_si128(t2, x86._mm_shuffle_epi32(t3, 0x0E))
|
||||
t0, t1, t2, t3 = sl_256(t0, t1, t2, t3)
|
||||
t0, t1 = reduce_f128(t0, t1, t2, t3)
|
||||
h3w, h3x := pbk(t0, t1)
|
||||
|
||||
// Compute h4 = h^4 = (h^2)^2
|
||||
h4w, h4x := square_f128(h2w)
|
||||
|
||||
for l >= GHASH_STRIDE_BYTES_HW {
|
||||
aw0 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(buf)))
|
||||
aw1 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(buf[16:])))
|
||||
aw2 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(buf[32:])))
|
||||
aw3 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(buf[48:])))
|
||||
aw0 = byteswap(aw0)
|
||||
aw1 = byteswap(aw1)
|
||||
aw2 = byteswap(aw2)
|
||||
aw3 = byteswap(aw3)
|
||||
buf, l = buf[GHASH_STRIDE_BYTES_HW:], l - GHASH_STRIDE_BYTES_HW
|
||||
|
||||
aw0 = x86._mm_xor_si128(aw0, yw)
|
||||
ax1 := bk(aw1)
|
||||
ax2 := bk(aw2)
|
||||
ax3 := bk(aw3)
|
||||
ax0 := bk(aw0)
|
||||
|
||||
t1 = x86._mm_xor_si128(
|
||||
x86._mm_xor_si128(
|
||||
x86._mm_clmulepi64_si128(aw0, h4w, 0x11),
|
||||
x86._mm_clmulepi64_si128(aw1, h3w, 0x11)),
|
||||
x86._mm_xor_si128(
|
||||
x86._mm_clmulepi64_si128(aw2, h2w, 0x11),
|
||||
x86._mm_clmulepi64_si128(aw3, h1w, 0x11)))
|
||||
t3 = x86._mm_xor_si128(
|
||||
x86._mm_xor_si128(
|
||||
x86._mm_clmulepi64_si128(aw0, h4w, 0x00),
|
||||
x86._mm_clmulepi64_si128(aw1, h3w, 0x00)),
|
||||
x86._mm_xor_si128(
|
||||
x86._mm_clmulepi64_si128(aw2, h2w, 0x00),
|
||||
x86._mm_clmulepi64_si128(aw3, h1w, 0x00)))
|
||||
t2 = x86._mm_xor_si128(
|
||||
x86._mm_xor_si128(
|
||||
x86._mm_clmulepi64_si128(ax0, h4x, 0x00),
|
||||
x86._mm_clmulepi64_si128(ax1, h3x, 0x00)),
|
||||
x86._mm_xor_si128(
|
||||
x86._mm_clmulepi64_si128(ax2, h2x, 0x00),
|
||||
x86._mm_clmulepi64_si128(ax3, h1x, 0x00)))
|
||||
t2 = x86._mm_xor_si128(t2, x86._mm_xor_si128(t1, t3))
|
||||
t0 = x86._mm_shuffle_epi32(t1, 0x0E)
|
||||
t1 = x86._mm_xor_si128(t1, x86._mm_shuffle_epi32(t2, 0x0E))
|
||||
t2 = x86._mm_xor_si128(t2, x86._mm_shuffle_epi32(t3, 0x0E))
|
||||
t0, t1, t2, t3 = sl_256(t0, t1, t2, t3)
|
||||
t0, t1 = reduce_f128(t0, t1, t2, t3)
|
||||
yw = x86._mm_unpacklo_epi64(t1, t0)
|
||||
}
|
||||
}
|
||||
|
||||
// Process 1 block at a time
|
||||
for l > 0 {
|
||||
src: []byte = ---
|
||||
if l >= _aes.GHASH_BLOCK_SIZE {
|
||||
src = buf
|
||||
buf = buf[_aes.GHASH_BLOCK_SIZE:]
|
||||
l -= _aes.GHASH_BLOCK_SIZE
|
||||
} else {
|
||||
tmp: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
copy(tmp[:], buf)
|
||||
src = tmp[:]
|
||||
l = 0
|
||||
}
|
||||
|
||||
aw := intrinsics.unaligned_load((^x86.__m128i)(raw_data(src)))
|
||||
aw = byteswap(aw)
|
||||
|
||||
aw = x86._mm_xor_si128(aw, yw)
|
||||
ax := bk(aw)
|
||||
|
||||
t1 := x86._mm_clmulepi64_si128(aw, h1w, 0x11)
|
||||
t3 := x86._mm_clmulepi64_si128(aw, h1w, 0x00)
|
||||
t2 := x86._mm_clmulepi64_si128(ax, h1x, 0x00)
|
||||
t2 = x86._mm_xor_si128(t2, x86._mm_xor_si128(t1, t3))
|
||||
t0 := x86._mm_shuffle_epi32(t1, 0x0E)
|
||||
t1 = x86._mm_xor_si128(t1, x86._mm_shuffle_epi32(t2, 0x0E))
|
||||
t2 = x86._mm_xor_si128(t2, x86._mm_shuffle_epi32(t3, 0x0E))
|
||||
t0, t1, t2, t3 = sl_256(t0, t1, t2, t3)
|
||||
t0, t1 = reduce_f128(t0, t1, t2, t3)
|
||||
yw = x86._mm_unpacklo_epi64(t1, t0)
|
||||
}
|
||||
|
||||
// Write back the hash (dst, aka y)
|
||||
yw = byteswap(yw)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(raw_data(dst)), yw)
|
||||
}
|
||||
@@ -0,0 +1,178 @@
|
||||
// Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
|
||||
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
|
||||
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
//+build amd64
|
||||
package aes_hw_intel
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/_aes"
|
||||
import "core:mem"
|
||||
import "core:simd/x86"
|
||||
|
||||
// Intel AES-NI based implementation. Inspiration taken from BearSSL.
|
||||
//
|
||||
// Note: This assumes that the SROA optimization pass is enabled to be
|
||||
// anything resembling performat otherwise, LLVM will not elide a massive
|
||||
// number of redundant loads/stores it generates for every intrinsic call.
|
||||
|
||||
@(private = "file", require_results, enable_target_feature = "sse2")
|
||||
expand_step128 :: #force_inline proc(k1, k2: x86.__m128i) -> x86.__m128i {
|
||||
k1, k2 := k1, k2
|
||||
|
||||
k2 = x86._mm_shuffle_epi32(k2, 0xff)
|
||||
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
|
||||
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
|
||||
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
|
||||
return x86._mm_xor_si128(k1, k2)
|
||||
}
|
||||
|
||||
@(private = "file", require_results, enable_target_feature = "sse,sse2")
|
||||
expand_step192a :: #force_inline proc (k1_, k2_: ^x86.__m128i, k3: x86.__m128i) -> (x86.__m128i, x86.__m128i) {
|
||||
k1, k2, k3 := k1_^, k2_^, k3
|
||||
|
||||
k3 = x86._mm_shuffle_epi32(k3, 0x55)
|
||||
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
|
||||
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
|
||||
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
|
||||
k1 = x86._mm_xor_si128(k1, k3)
|
||||
|
||||
tmp := k2
|
||||
k2 = x86._mm_xor_si128(k2, x86._mm_slli_si128(k2, 0x04))
|
||||
k2 = x86._mm_xor_si128(k2, x86._mm_shuffle_epi32(k1, 0xff))
|
||||
|
||||
k1_, k2_ := k1_, k2_
|
||||
k1_^, k2_^ = k1, k2
|
||||
|
||||
r1 := transmute(x86.__m128i)(x86._mm_shuffle_ps(transmute(x86.__m128)(tmp), transmute(x86.__m128)(k1), 0x44))
|
||||
r2 := transmute(x86.__m128i)(x86._mm_shuffle_ps(transmute(x86.__m128)(k1), transmute(x86.__m128)(k2), 0x4e))
|
||||
|
||||
return r1, r2
|
||||
}
|
||||
|
||||
@(private = "file", require_results, enable_target_feature = "sse2")
|
||||
expand_step192b :: #force_inline proc (k1_, k2_: ^x86.__m128i, k3: x86.__m128i) -> x86.__m128i {
|
||||
k1, k2, k3 := k1_^, k2_^, k3
|
||||
|
||||
k3 = x86._mm_shuffle_epi32(k3, 0x55)
|
||||
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
|
||||
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
|
||||
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
|
||||
k1 = x86._mm_xor_si128(k1, k3)
|
||||
|
||||
k2 = x86._mm_xor_si128(k2, x86._mm_slli_si128(k2, 0x04))
|
||||
k2 = x86._mm_xor_si128(k2, x86._mm_shuffle_epi32(k1, 0xff))
|
||||
|
||||
k1_, k2_ := k1_, k2_
|
||||
k1_^, k2_^ = k1, k2
|
||||
|
||||
return k1
|
||||
}
|
||||
|
||||
@(private = "file", require_results, enable_target_feature = "sse2")
|
||||
expand_step256b :: #force_inline proc(k1, k2: x86.__m128i) -> x86.__m128i {
|
||||
k1, k2 := k1, k2
|
||||
|
||||
k2 = x86._mm_shuffle_epi32(k2, 0xaa)
|
||||
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
|
||||
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
|
||||
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
|
||||
return x86._mm_xor_si128(k1, k2)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "aes")
|
||||
derive_dec_keys :: proc(ctx: ^Context, sks: ^[15]x86.__m128i, num_rounds: int) {
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&ctx._sk_exp_dec[0]), sks[num_rounds])
|
||||
for i in 1 ..< num_rounds {
|
||||
tmp := x86._mm_aesimc_si128(sks[i])
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&ctx._sk_exp_dec[num_rounds - i]), tmp)
|
||||
}
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&ctx._sk_exp_dec[num_rounds]), sks[0])
|
||||
}
|
||||
|
||||
@(private, enable_target_feature = "sse,sse2,aes")
|
||||
keysched :: proc(ctx: ^Context, key: []byte) {
|
||||
sks: [15]x86.__m128i = ---
|
||||
|
||||
// Compute the encryption keys.
|
||||
num_rounds, key_len := 0, len(key)
|
||||
switch key_len {
|
||||
case _aes.KEY_SIZE_128:
|
||||
sks[0] = intrinsics.unaligned_load((^x86.__m128i)(raw_data(key)))
|
||||
sks[1] = expand_step128(sks[0], x86._mm_aeskeygenassist_si128(sks[0], 0x01))
|
||||
sks[2] = expand_step128(sks[1], x86._mm_aeskeygenassist_si128(sks[1], 0x02))
|
||||
sks[3] = expand_step128(sks[2], x86._mm_aeskeygenassist_si128(sks[2], 0x04))
|
||||
sks[4] = expand_step128(sks[3], x86._mm_aeskeygenassist_si128(sks[3], 0x08))
|
||||
sks[5] = expand_step128(sks[4], x86._mm_aeskeygenassist_si128(sks[4], 0x10))
|
||||
sks[6] = expand_step128(sks[5], x86._mm_aeskeygenassist_si128(sks[5], 0x20))
|
||||
sks[7] = expand_step128(sks[6], x86._mm_aeskeygenassist_si128(sks[6], 0x40))
|
||||
sks[8] = expand_step128(sks[7], x86._mm_aeskeygenassist_si128(sks[7], 0x80))
|
||||
sks[9] = expand_step128(sks[8], x86._mm_aeskeygenassist_si128(sks[8], 0x1b))
|
||||
sks[10] = expand_step128(sks[9], x86._mm_aeskeygenassist_si128(sks[9], 0x36))
|
||||
num_rounds = _aes.ROUNDS_128
|
||||
case _aes.KEY_SIZE_192:
|
||||
k0 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(key)))
|
||||
k1 := x86.__m128i{
|
||||
intrinsics.unaligned_load((^i64)(raw_data(key[16:]))),
|
||||
0,
|
||||
}
|
||||
sks[0] = k0
|
||||
sks[1], sks[2] = expand_step192a(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x01))
|
||||
sks[3] = expand_step192b(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x02))
|
||||
sks[4], sks[5] = expand_step192a(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x04))
|
||||
sks[6] = expand_step192b(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x08))
|
||||
sks[7], sks[8] = expand_step192a(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x10))
|
||||
sks[9] = expand_step192b(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x20))
|
||||
sks[10], sks[11] = expand_step192a(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x40))
|
||||
sks[12] = expand_step192b(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x80))
|
||||
num_rounds = _aes.ROUNDS_192
|
||||
case _aes.KEY_SIZE_256:
|
||||
sks[0] = intrinsics.unaligned_load((^x86.__m128i)(raw_data(key)))
|
||||
sks[1] = intrinsics.unaligned_load((^x86.__m128i)(raw_data(key[16:])))
|
||||
sks[2] = expand_step128(sks[0], x86._mm_aeskeygenassist_si128(sks[1], 0x01))
|
||||
sks[3] = expand_step256b(sks[1], x86._mm_aeskeygenassist_si128(sks[2], 0x01))
|
||||
sks[4] = expand_step128(sks[2], x86._mm_aeskeygenassist_si128(sks[3], 0x02))
|
||||
sks[5] = expand_step256b(sks[3], x86._mm_aeskeygenassist_si128(sks[4], 0x02))
|
||||
sks[6] = expand_step128(sks[4], x86._mm_aeskeygenassist_si128(sks[5], 0x04))
|
||||
sks[7] = expand_step256b(sks[5], x86._mm_aeskeygenassist_si128(sks[6], 0x04))
|
||||
sks[8] = expand_step128(sks[6], x86._mm_aeskeygenassist_si128(sks[7], 0x08))
|
||||
sks[9] = expand_step256b(sks[7], x86._mm_aeskeygenassist_si128(sks[8], 0x08))
|
||||
sks[10] = expand_step128(sks[8], x86._mm_aeskeygenassist_si128(sks[9], 0x10))
|
||||
sks[11] = expand_step256b(sks[9], x86._mm_aeskeygenassist_si128(sks[10], 0x10))
|
||||
sks[12] = expand_step128(sks[10], x86._mm_aeskeygenassist_si128(sks[11], 0x20))
|
||||
sks[13] = expand_step256b(sks[11], x86._mm_aeskeygenassist_si128(sks[12], 0x20))
|
||||
sks[14] = expand_step128(sks[12], x86._mm_aeskeygenassist_si128(sks[13], 0x40))
|
||||
num_rounds = _aes.ROUNDS_256
|
||||
case:
|
||||
panic("crypto/aes: invalid AES key size")
|
||||
}
|
||||
for i in 0 ..= num_rounds {
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&ctx._sk_exp_enc[i]), sks[i])
|
||||
}
|
||||
|
||||
// Compute the decryption keys. GCM and CTR do not need this, however
|
||||
// ECB, CBC, OCB3, etc do.
|
||||
derive_dec_keys(ctx, &sks, num_rounds)
|
||||
|
||||
ctx._num_rounds = num_rounds
|
||||
|
||||
mem.zero_explicit(&sks, size_of(sks))
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
package _chacha20
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:encoding/endian"
|
||||
import "core:math/bits"
|
||||
import "core:mem"
|
||||
|
||||
// KEY_SIZE is the (X)ChaCha20 key size in bytes.
|
||||
KEY_SIZE :: 32
|
||||
// IV_SIZE is the ChaCha20 IV size in bytes.
|
||||
IV_SIZE :: 12
|
||||
// XIV_SIZE is the XChaCha20 IV size in bytes.
|
||||
XIV_SIZE :: 24
|
||||
|
||||
// MAX_CTR_IETF is the maximum counter value for the IETF flavor ChaCha20.
|
||||
MAX_CTR_IETF :: 0xffffffff
|
||||
// BLOCK_SIZE is the (X)ChaCha20 block size in bytes.
|
||||
BLOCK_SIZE :: 64
|
||||
// STATE_SIZE_U32 is the (X)ChaCha20 state size in u32s.
|
||||
STATE_SIZE_U32 :: 16
|
||||
// Rounds is the (X)ChaCha20 round count.
|
||||
ROUNDS :: 20
|
||||
|
||||
// SIGMA_0 is sigma[0:4].
|
||||
SIGMA_0: u32 : 0x61707865
|
||||
// SIGMA_1 is sigma[4:8].
|
||||
SIGMA_1: u32 : 0x3320646e
|
||||
// SIGMA_2 is sigma[8:12].
|
||||
SIGMA_2: u32 : 0x79622d32
|
||||
// SIGMA_3 is sigma[12:16].
|
||||
SIGMA_3: u32 : 0x6b206574
|
||||
|
||||
// Context is a ChaCha20 or XChaCha20 instance.
|
||||
Context :: struct {
|
||||
_s: [STATE_SIZE_U32]u32,
|
||||
_buffer: [BLOCK_SIZE]byte,
|
||||
_off: int,
|
||||
_is_ietf_flavor: bool,
|
||||
_is_initialized: bool,
|
||||
}
|
||||
|
||||
// init inititializes a Context for ChaCha20 with the provided key and
|
||||
// iv.
|
||||
//
|
||||
// WARNING: This ONLY handles ChaCha20. XChaCha20 sub-key and IV
|
||||
// derivation is expected to be handled by the caller, so that the
|
||||
// HChaCha call can be suitably accelerated.
|
||||
init :: proc "contextless" (ctx: ^Context, key, iv: []byte, is_xchacha: bool) {
|
||||
if len(key) != KEY_SIZE || len(iv) != IV_SIZE {
|
||||
intrinsics.trap()
|
||||
}
|
||||
|
||||
k, n := key, iv
|
||||
|
||||
ctx._s[0] = SIGMA_0
|
||||
ctx._s[1] = SIGMA_1
|
||||
ctx._s[2] = SIGMA_2
|
||||
ctx._s[3] = SIGMA_3
|
||||
ctx._s[4] = endian.unchecked_get_u32le(k[0:4])
|
||||
ctx._s[5] = endian.unchecked_get_u32le(k[4:8])
|
||||
ctx._s[6] = endian.unchecked_get_u32le(k[8:12])
|
||||
ctx._s[7] = endian.unchecked_get_u32le(k[12:16])
|
||||
ctx._s[8] = endian.unchecked_get_u32le(k[16:20])
|
||||
ctx._s[9] = endian.unchecked_get_u32le(k[20:24])
|
||||
ctx._s[10] = endian.unchecked_get_u32le(k[24:28])
|
||||
ctx._s[11] = endian.unchecked_get_u32le(k[28:32])
|
||||
ctx._s[12] = 0
|
||||
ctx._s[13] = endian.unchecked_get_u32le(n[0:4])
|
||||
ctx._s[14] = endian.unchecked_get_u32le(n[4:8])
|
||||
ctx._s[15] = endian.unchecked_get_u32le(n[8:12])
|
||||
|
||||
ctx._off = BLOCK_SIZE
|
||||
ctx._is_ietf_flavor = !is_xchacha
|
||||
ctx._is_initialized = true
|
||||
}
|
||||
|
||||
// seek seeks the (X)ChaCha20 stream counter to the specified block.
|
||||
seek :: proc(ctx: ^Context, block_nr: u64) {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
if ctx._is_ietf_flavor {
|
||||
if block_nr > MAX_CTR_IETF {
|
||||
panic("crypto/chacha20: attempted to seek past maximum counter")
|
||||
}
|
||||
} else {
|
||||
ctx._s[13] = u32(block_nr >> 32)
|
||||
}
|
||||
ctx._s[12] = u32(block_nr)
|
||||
ctx._off = BLOCK_SIZE
|
||||
}
|
||||
|
||||
// reset sanitizes the Context. The Context must be re-initialized to
|
||||
// be used again.
|
||||
reset :: proc(ctx: ^Context) {
|
||||
mem.zero_explicit(&ctx._s, size_of(ctx._s))
|
||||
mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
|
||||
|
||||
ctx._is_initialized = false
|
||||
}
|
||||
|
||||
check_counter_limit :: proc(ctx: ^Context, nr_blocks: int) {
|
||||
// Enforce the maximum consumed keystream per IV.
|
||||
//
|
||||
// While all modern "standard" definitions of ChaCha20 use
|
||||
// the IETF 32-bit counter, for XChaCha20 most common
|
||||
// implementations allow for a 64-bit counter.
|
||||
//
|
||||
// Honestly, the answer here is "use a MRAE primitive", but
|
||||
// go with "common" practice in the case of XChaCha20.
|
||||
|
||||
ERR_CTR_EXHAUSTED :: "crypto/chacha20: maximum (X)ChaCha20 keystream per IV reached"
|
||||
|
||||
if ctx._is_ietf_flavor {
|
||||
if u64(ctx._s[12]) + u64(nr_blocks) > MAX_CTR_IETF {
|
||||
panic(ERR_CTR_EXHAUSTED)
|
||||
}
|
||||
} else {
|
||||
ctr := (u64(ctx._s[13]) << 32) | u64(ctx._s[12])
|
||||
if _, carry := bits.add_u64(ctr, u64(nr_blocks), 0); carry != 0 {
|
||||
panic(ERR_CTR_EXHAUSTED)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,360 @@
|
||||
package chacha20_ref
|
||||
|
||||
import "core:crypto/_chacha20"
|
||||
import "core:encoding/endian"
|
||||
import "core:math/bits"
|
||||
|
||||
stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int) {
|
||||
// Enforce the maximum consumed keystream per IV.
|
||||
_chacha20.check_counter_limit(ctx, nr_blocks)
|
||||
|
||||
dst, src := dst, src
|
||||
x := &ctx._s
|
||||
for n := 0; n < nr_blocks; n = n + 1 {
|
||||
x0, x1, x2, x3 :=
|
||||
_chacha20.SIGMA_0, _chacha20.SIGMA_1, _chacha20.SIGMA_2, _chacha20.SIGMA_3
|
||||
x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 :=
|
||||
x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]
|
||||
|
||||
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
|
||||
// Even when forcing inlining manually inlining all of
|
||||
// these is decently faster.
|
||||
|
||||
// quarterround(x, 0, 4, 8, 12)
|
||||
x0 += x4
|
||||
x12 ~= x0
|
||||
x12 = bits.rotate_left32(x12, 16)
|
||||
x8 += x12
|
||||
x4 ~= x8
|
||||
x4 = bits.rotate_left32(x4, 12)
|
||||
x0 += x4
|
||||
x12 ~= x0
|
||||
x12 = bits.rotate_left32(x12, 8)
|
||||
x8 += x12
|
||||
x4 ~= x8
|
||||
x4 = bits.rotate_left32(x4, 7)
|
||||
|
||||
// quarterround(x, 1, 5, 9, 13)
|
||||
x1 += x5
|
||||
x13 ~= x1
|
||||
x13 = bits.rotate_left32(x13, 16)
|
||||
x9 += x13
|
||||
x5 ~= x9
|
||||
x5 = bits.rotate_left32(x5, 12)
|
||||
x1 += x5
|
||||
x13 ~= x1
|
||||
x13 = bits.rotate_left32(x13, 8)
|
||||
x9 += x13
|
||||
x5 ~= x9
|
||||
x5 = bits.rotate_left32(x5, 7)
|
||||
|
||||
// quarterround(x, 2, 6, 10, 14)
|
||||
x2 += x6
|
||||
x14 ~= x2
|
||||
x14 = bits.rotate_left32(x14, 16)
|
||||
x10 += x14
|
||||
x6 ~= x10
|
||||
x6 = bits.rotate_left32(x6, 12)
|
||||
x2 += x6
|
||||
x14 ~= x2
|
||||
x14 = bits.rotate_left32(x14, 8)
|
||||
x10 += x14
|
||||
x6 ~= x10
|
||||
x6 = bits.rotate_left32(x6, 7)
|
||||
|
||||
// quarterround(x, 3, 7, 11, 15)
|
||||
x3 += x7
|
||||
x15 ~= x3
|
||||
x15 = bits.rotate_left32(x15, 16)
|
||||
x11 += x15
|
||||
x7 ~= x11
|
||||
x7 = bits.rotate_left32(x7, 12)
|
||||
x3 += x7
|
||||
x15 ~= x3
|
||||
x15 = bits.rotate_left32(x15, 8)
|
||||
x11 += x15
|
||||
x7 ~= x11
|
||||
x7 = bits.rotate_left32(x7, 7)
|
||||
|
||||
// quarterround(x, 0, 5, 10, 15)
|
||||
x0 += x5
|
||||
x15 ~= x0
|
||||
x15 = bits.rotate_left32(x15, 16)
|
||||
x10 += x15
|
||||
x5 ~= x10
|
||||
x5 = bits.rotate_left32(x5, 12)
|
||||
x0 += x5
|
||||
x15 ~= x0
|
||||
x15 = bits.rotate_left32(x15, 8)
|
||||
x10 += x15
|
||||
x5 ~= x10
|
||||
x5 = bits.rotate_left32(x5, 7)
|
||||
|
||||
// quarterround(x, 1, 6, 11, 12)
|
||||
x1 += x6
|
||||
x12 ~= x1
|
||||
x12 = bits.rotate_left32(x12, 16)
|
||||
x11 += x12
|
||||
x6 ~= x11
|
||||
x6 = bits.rotate_left32(x6, 12)
|
||||
x1 += x6
|
||||
x12 ~= x1
|
||||
x12 = bits.rotate_left32(x12, 8)
|
||||
x11 += x12
|
||||
x6 ~= x11
|
||||
x6 = bits.rotate_left32(x6, 7)
|
||||
|
||||
// quarterround(x, 2, 7, 8, 13)
|
||||
x2 += x7
|
||||
x13 ~= x2
|
||||
x13 = bits.rotate_left32(x13, 16)
|
||||
x8 += x13
|
||||
x7 ~= x8
|
||||
x7 = bits.rotate_left32(x7, 12)
|
||||
x2 += x7
|
||||
x13 ~= x2
|
||||
x13 = bits.rotate_left32(x13, 8)
|
||||
x8 += x13
|
||||
x7 ~= x8
|
||||
x7 = bits.rotate_left32(x7, 7)
|
||||
|
||||
// quarterround(x, 3, 4, 9, 14)
|
||||
x3 += x4
|
||||
x14 ~= x3
|
||||
x14 = bits.rotate_left32(x14, 16)
|
||||
x9 += x14
|
||||
x4 ~= x9
|
||||
x4 = bits.rotate_left32(x4, 12)
|
||||
x3 += x4
|
||||
x14 ~= x3
|
||||
x14 = bits.rotate_left32(x14, 8)
|
||||
x9 += x14
|
||||
x4 ~= x9
|
||||
x4 = bits.rotate_left32(x4, 7)
|
||||
}
|
||||
|
||||
x0 += _chacha20.SIGMA_0
|
||||
x1 += _chacha20.SIGMA_1
|
||||
x2 += _chacha20.SIGMA_2
|
||||
x3 += _chacha20.SIGMA_3
|
||||
x4 += x[4]
|
||||
x5 += x[5]
|
||||
x6 += x[6]
|
||||
x7 += x[7]
|
||||
x8 += x[8]
|
||||
x9 += x[9]
|
||||
x10 += x[10]
|
||||
x11 += x[11]
|
||||
x12 += x[12]
|
||||
x13 += x[13]
|
||||
x14 += x[14]
|
||||
x15 += x[15]
|
||||
|
||||
// - The caller(s) ensure that src/dst are valid.
|
||||
// - The compiler knows if the target is picky about alignment.
|
||||
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
endian.unchecked_put_u32le(dst[0:4], endian.unchecked_get_u32le(src[0:4]) ~ x0)
|
||||
endian.unchecked_put_u32le(dst[4:8], endian.unchecked_get_u32le(src[4:8]) ~ x1)
|
||||
endian.unchecked_put_u32le(dst[8:12], endian.unchecked_get_u32le(src[8:12]) ~ x2)
|
||||
endian.unchecked_put_u32le(dst[12:16], endian.unchecked_get_u32le(src[12:16]) ~ x3)
|
||||
endian.unchecked_put_u32le(dst[16:20], endian.unchecked_get_u32le(src[16:20]) ~ x4)
|
||||
endian.unchecked_put_u32le(dst[20:24], endian.unchecked_get_u32le(src[20:24]) ~ x5)
|
||||
endian.unchecked_put_u32le(dst[24:28], endian.unchecked_get_u32le(src[24:28]) ~ x6)
|
||||
endian.unchecked_put_u32le(dst[28:32], endian.unchecked_get_u32le(src[28:32]) ~ x7)
|
||||
endian.unchecked_put_u32le(dst[32:36], endian.unchecked_get_u32le(src[32:36]) ~ x8)
|
||||
endian.unchecked_put_u32le(dst[36:40], endian.unchecked_get_u32le(src[36:40]) ~ x9)
|
||||
endian.unchecked_put_u32le(
|
||||
dst[40:44],
|
||||
endian.unchecked_get_u32le(src[40:44]) ~ x10,
|
||||
)
|
||||
endian.unchecked_put_u32le(
|
||||
dst[44:48],
|
||||
endian.unchecked_get_u32le(src[44:48]) ~ x11,
|
||||
)
|
||||
endian.unchecked_put_u32le(
|
||||
dst[48:52],
|
||||
endian.unchecked_get_u32le(src[48:52]) ~ x12,
|
||||
)
|
||||
endian.unchecked_put_u32le(
|
||||
dst[52:56],
|
||||
endian.unchecked_get_u32le(src[52:56]) ~ x13,
|
||||
)
|
||||
endian.unchecked_put_u32le(
|
||||
dst[56:60],
|
||||
endian.unchecked_get_u32le(src[56:60]) ~ x14,
|
||||
)
|
||||
endian.unchecked_put_u32le(
|
||||
dst[60:64],
|
||||
endian.unchecked_get_u32le(src[60:64]) ~ x15,
|
||||
)
|
||||
src = src[_chacha20.BLOCK_SIZE:]
|
||||
} else {
|
||||
endian.unchecked_put_u32le(dst[0:4], x0)
|
||||
endian.unchecked_put_u32le(dst[4:8], x1)
|
||||
endian.unchecked_put_u32le(dst[8:12], x2)
|
||||
endian.unchecked_put_u32le(dst[12:16], x3)
|
||||
endian.unchecked_put_u32le(dst[16:20], x4)
|
||||
endian.unchecked_put_u32le(dst[20:24], x5)
|
||||
endian.unchecked_put_u32le(dst[24:28], x6)
|
||||
endian.unchecked_put_u32le(dst[28:32], x7)
|
||||
endian.unchecked_put_u32le(dst[32:36], x8)
|
||||
endian.unchecked_put_u32le(dst[36:40], x9)
|
||||
endian.unchecked_put_u32le(dst[40:44], x10)
|
||||
endian.unchecked_put_u32le(dst[44:48], x11)
|
||||
endian.unchecked_put_u32le(dst[48:52], x12)
|
||||
endian.unchecked_put_u32le(dst[52:56], x13)
|
||||
endian.unchecked_put_u32le(dst[56:60], x14)
|
||||
endian.unchecked_put_u32le(dst[60:64], x15)
|
||||
}
|
||||
dst = dst[_chacha20.BLOCK_SIZE:]
|
||||
}
|
||||
|
||||
// Increment the counter. Overflow checking is done upon
|
||||
// entry into the routine, so a 64-bit increment safely
|
||||
// covers both cases.
|
||||
new_ctr := ((u64(ctx._s[13]) << 32) | u64(ctx._s[12])) + 1
|
||||
x[12] = u32(new_ctr)
|
||||
x[13] = u32(new_ctr >> 32)
|
||||
}
|
||||
}
|
||||
|
||||
hchacha20 :: proc "contextless" (dst, key, iv: []byte) {
|
||||
x0, x1, x2, x3 := _chacha20.SIGMA_0, _chacha20.SIGMA_1, _chacha20.SIGMA_2, _chacha20.SIGMA_3
|
||||
x4 := endian.unchecked_get_u32le(key[0:4])
|
||||
x5 := endian.unchecked_get_u32le(key[4:8])
|
||||
x6 := endian.unchecked_get_u32le(key[8:12])
|
||||
x7 := endian.unchecked_get_u32le(key[12:16])
|
||||
x8 := endian.unchecked_get_u32le(key[16:20])
|
||||
x9 := endian.unchecked_get_u32le(key[20:24])
|
||||
x10 := endian.unchecked_get_u32le(key[24:28])
|
||||
x11 := endian.unchecked_get_u32le(key[28:32])
|
||||
x12 := endian.unchecked_get_u32le(iv[0:4])
|
||||
x13 := endian.unchecked_get_u32le(iv[4:8])
|
||||
x14 := endian.unchecked_get_u32le(iv[8:12])
|
||||
x15 := endian.unchecked_get_u32le(iv[12:16])
|
||||
|
||||
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
|
||||
// quarterround(x, 0, 4, 8, 12)
|
||||
x0 += x4
|
||||
x12 ~= x0
|
||||
x12 = bits.rotate_left32(x12, 16)
|
||||
x8 += x12
|
||||
x4 ~= x8
|
||||
x4 = bits.rotate_left32(x4, 12)
|
||||
x0 += x4
|
||||
x12 ~= x0
|
||||
x12 = bits.rotate_left32(x12, 8)
|
||||
x8 += x12
|
||||
x4 ~= x8
|
||||
x4 = bits.rotate_left32(x4, 7)
|
||||
|
||||
// quarterround(x, 1, 5, 9, 13)
|
||||
x1 += x5
|
||||
x13 ~= x1
|
||||
x13 = bits.rotate_left32(x13, 16)
|
||||
x9 += x13
|
||||
x5 ~= x9
|
||||
x5 = bits.rotate_left32(x5, 12)
|
||||
x1 += x5
|
||||
x13 ~= x1
|
||||
x13 = bits.rotate_left32(x13, 8)
|
||||
x9 += x13
|
||||
x5 ~= x9
|
||||
x5 = bits.rotate_left32(x5, 7)
|
||||
|
||||
// quarterround(x, 2, 6, 10, 14)
|
||||
x2 += x6
|
||||
x14 ~= x2
|
||||
x14 = bits.rotate_left32(x14, 16)
|
||||
x10 += x14
|
||||
x6 ~= x10
|
||||
x6 = bits.rotate_left32(x6, 12)
|
||||
x2 += x6
|
||||
x14 ~= x2
|
||||
x14 = bits.rotate_left32(x14, 8)
|
||||
x10 += x14
|
||||
x6 ~= x10
|
||||
x6 = bits.rotate_left32(x6, 7)
|
||||
|
||||
// quarterround(x, 3, 7, 11, 15)
|
||||
x3 += x7
|
||||
x15 ~= x3
|
||||
x15 = bits.rotate_left32(x15, 16)
|
||||
x11 += x15
|
||||
x7 ~= x11
|
||||
x7 = bits.rotate_left32(x7, 12)
|
||||
x3 += x7
|
||||
x15 ~= x3
|
||||
x15 = bits.rotate_left32(x15, 8)
|
||||
x11 += x15
|
||||
x7 ~= x11
|
||||
x7 = bits.rotate_left32(x7, 7)
|
||||
|
||||
// quarterround(x, 0, 5, 10, 15)
|
||||
x0 += x5
|
||||
x15 ~= x0
|
||||
x15 = bits.rotate_left32(x15, 16)
|
||||
x10 += x15
|
||||
x5 ~= x10
|
||||
x5 = bits.rotate_left32(x5, 12)
|
||||
x0 += x5
|
||||
x15 ~= x0
|
||||
x15 = bits.rotate_left32(x15, 8)
|
||||
x10 += x15
|
||||
x5 ~= x10
|
||||
x5 = bits.rotate_left32(x5, 7)
|
||||
|
||||
// quarterround(x, 1, 6, 11, 12)
|
||||
x1 += x6
|
||||
x12 ~= x1
|
||||
x12 = bits.rotate_left32(x12, 16)
|
||||
x11 += x12
|
||||
x6 ~= x11
|
||||
x6 = bits.rotate_left32(x6, 12)
|
||||
x1 += x6
|
||||
x12 ~= x1
|
||||
x12 = bits.rotate_left32(x12, 8)
|
||||
x11 += x12
|
||||
x6 ~= x11
|
||||
x6 = bits.rotate_left32(x6, 7)
|
||||
|
||||
// quarterround(x, 2, 7, 8, 13)
|
||||
x2 += x7
|
||||
x13 ~= x2
|
||||
x13 = bits.rotate_left32(x13, 16)
|
||||
x8 += x13
|
||||
x7 ~= x8
|
||||
x7 = bits.rotate_left32(x7, 12)
|
||||
x2 += x7
|
||||
x13 ~= x2
|
||||
x13 = bits.rotate_left32(x13, 8)
|
||||
x8 += x13
|
||||
x7 ~= x8
|
||||
x7 = bits.rotate_left32(x7, 7)
|
||||
|
||||
// quarterround(x, 3, 4, 9, 14)
|
||||
x3 += x4
|
||||
x14 ~= x3
|
||||
x14 = bits.rotate_left32(x14, 16)
|
||||
x9 += x14
|
||||
x4 ~= x9
|
||||
x4 = bits.rotate_left32(x4, 12)
|
||||
x3 += x4
|
||||
x14 ~= x3
|
||||
x14 = bits.rotate_left32(x14, 8)
|
||||
x9 += x14
|
||||
x4 ~= x9
|
||||
x4 = bits.rotate_left32(x4, 7)
|
||||
}
|
||||
|
||||
endian.unchecked_put_u32le(dst[0:4], x0)
|
||||
endian.unchecked_put_u32le(dst[4:8], x1)
|
||||
endian.unchecked_put_u32le(dst[8:12], x2)
|
||||
endian.unchecked_put_u32le(dst[12:16], x3)
|
||||
endian.unchecked_put_u32le(dst[16:20], x12)
|
||||
endian.unchecked_put_u32le(dst[20:24], x13)
|
||||
endian.unchecked_put_u32le(dst[24:28], x14)
|
||||
endian.unchecked_put_u32le(dst[28:32], x15)
|
||||
}
|
||||
@@ -0,0 +1,481 @@
|
||||
package chacha20_simd128
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/_chacha20"
|
||||
import "core:simd"
|
||||
@(require) import "core:sys/info"
|
||||
|
||||
// Portable 128-bit `core:simd` implementation.
|
||||
//
|
||||
// This is loosely based on Ted Krovetz's public domain C intrinsic
|
||||
// implementation.
|
||||
//
|
||||
// This is written to perform adequately on any target that has "enough"
|
||||
// 128-bit vector registers, the current thought is that 4 blocks at at
|
||||
// time is reasonable for amd64, though Ted's code is more conservative.
|
||||
//
|
||||
// See:
|
||||
// supercop-20230530/crypto_stream/chacha20/krovetz/vec128
|
||||
|
||||
// Ensure the compiler emits SIMD instructions. This is a minimum, and
|
||||
// setting the microarchitecture at compile time will allow for better
|
||||
// code gen when applicable (eg: AVX). This is somewhat redundant with
|
||||
// the default microarchitecture configurations.
|
||||
when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 {
|
||||
@(private = "file")
|
||||
TARGET_SIMD_FEATURES :: "neon"
|
||||
} else when ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 {
|
||||
// Note: LLVM appears to be smart enough to use PSHUFB despite not
|
||||
// explicitly using simd.u8x16 shuffles.
|
||||
@(private = "file")
|
||||
TARGET_SIMD_FEATURES :: "sse2,ssse3"
|
||||
} else {
|
||||
@(private = "file")
|
||||
TARGET_SIMD_FEATURES :: ""
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_ROT_7L: simd.u32x4 : {7, 7, 7, 7}
|
||||
@(private = "file")
|
||||
_ROT_7R: simd.u32x4 : {25, 25, 25, 25}
|
||||
@(private = "file")
|
||||
_ROT_12L: simd.u32x4 : {12, 12, 12, 12}
|
||||
@(private = "file")
|
||||
_ROT_12R: simd.u32x4 : {20, 20, 20, 20}
|
||||
@(private = "file")
|
||||
_ROT_8L: simd.u32x4 : {8, 8, 8, 8}
|
||||
@(private = "file")
|
||||
_ROT_8R: simd.u32x4 : {24, 24, 24, 24}
|
||||
@(private = "file")
|
||||
_ROT_16: simd.u32x4 : {16, 16, 16, 16}
|
||||
|
||||
when ODIN_ENDIAN == .Big {
|
||||
@(private = "file")
|
||||
_increment_counter :: #force_inline proc "contextless" (ctx: ^Context) -> simd.u32x4 {
|
||||
// In the Big Endian case, the low and high portions in the vector
|
||||
// are flipped, so the 64-bit addition can't be done with a simple
|
||||
// vector add.
|
||||
x := &ctx._s
|
||||
|
||||
new_ctr := ((u64(ctx._s[13]) << 32) | u64(ctx._s[12])) + 1
|
||||
x[12] = u32(new_ctr)
|
||||
x[13] = u32(new_ctr >> 32)
|
||||
|
||||
return intrinsics.unaligned_load(transmute(^simd.u32x4)&x[12])
|
||||
}
|
||||
|
||||
// Convert the endian-ness of the components of a u32x4 vector, for
|
||||
// the purposes of output.
|
||||
@(private = "file")
|
||||
_byteswap_u32x4 :: #force_inline proc "contextless" (v: simd.u32x4) -> simd.u32x4 {
|
||||
return(
|
||||
transmute(simd.u32x4)simd.shuffle(
|
||||
transmute(simd.u8x16)v,
|
||||
transmute(simd.u8x16)v,
|
||||
3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12,
|
||||
)
|
||||
)
|
||||
}
|
||||
} else {
|
||||
@(private = "file")
|
||||
_VEC_ONE: simd.u64x2 : {1, 0}
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_dq_round_simd128 :: #force_inline proc "contextless" (
|
||||
v0, v1, v2, v3: simd.u32x4,
|
||||
) -> (
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
) {
|
||||
v0, v1, v2, v3 := v0, v1, v2, v3
|
||||
|
||||
// a += b; d ^= a; d = ROTW16(d);
|
||||
v0 = simd.add(v0, v1)
|
||||
v3 = simd.bit_xor(v3, v0)
|
||||
v3 = simd.bit_xor(simd.shl(v3, _ROT_16), simd.shr(v3, _ROT_16))
|
||||
|
||||
// c += d; b ^= c; b = ROTW12(b);
|
||||
v2 = simd.add(v2, v3)
|
||||
v1 = simd.bit_xor(v1, v2)
|
||||
v1 = simd.bit_xor(simd.shl(v1, _ROT_12L), simd.shr(v1, _ROT_12R))
|
||||
|
||||
// a += b; d ^= a; d = ROTW8(d);
|
||||
v0 = simd.add(v0, v1)
|
||||
v3 = simd.bit_xor(v3, v0)
|
||||
v3 = simd.bit_xor(simd.shl(v3, _ROT_8L), simd.shr(v3, _ROT_8R))
|
||||
|
||||
// c += d; b ^= c; b = ROTW7(b);
|
||||
v2 = simd.add(v2, v3)
|
||||
v1 = simd.bit_xor(v1, v2)
|
||||
v1 = simd.bit_xor(simd.shl(v1, _ROT_7L), simd.shr(v1, _ROT_7R))
|
||||
|
||||
// b = ROTV1(b); c = ROTV2(c); d = ROTV3(d);
|
||||
v1 = simd.shuffle(v1, v1, 1, 2, 3, 0)
|
||||
v2 = simd.shuffle(v2, v2, 2, 3, 0, 1)
|
||||
v3 = simd.shuffle(v3, v3, 3, 0, 1, 2)
|
||||
|
||||
// a += b; d ^= a; d = ROTW16(d);
|
||||
v0 = simd.add(v0, v1)
|
||||
v3 = simd.bit_xor(v3, v0)
|
||||
v3 = simd.bit_xor(simd.shl(v3, _ROT_16), simd.shr(v3, _ROT_16))
|
||||
|
||||
// c += d; b ^= c; b = ROTW12(b);
|
||||
v2 = simd.add(v2, v3)
|
||||
v1 = simd.bit_xor(v1, v2)
|
||||
v1 = simd.bit_xor(simd.shl(v1, _ROT_12L), simd.shr(v1, _ROT_12R))
|
||||
|
||||
// a += b; d ^= a; d = ROTW8(d);
|
||||
v0 = simd.add(v0, v1)
|
||||
v3 = simd.bit_xor(v3, v0)
|
||||
v3 = simd.bit_xor(simd.shl(v3, _ROT_8L), simd.shr(v3, _ROT_8R))
|
||||
|
||||
// c += d; b ^= c; b = ROTW7(b);
|
||||
v2 = simd.add(v2, v3)
|
||||
v1 = simd.bit_xor(v1, v2)
|
||||
v1 = simd.bit_xor(simd.shl(v1, _ROT_7L), simd.shr(v1, _ROT_7R))
|
||||
|
||||
// b = ROTV3(b); c = ROTV2(c); d = ROTV1(d);
|
||||
v1 = simd.shuffle(v1, v1, 3, 0, 1, 2)
|
||||
v2 = simd.shuffle(v2, v2, 2, 3, 0, 1)
|
||||
v3 = simd.shuffle(v3, v3, 1, 2, 3, 0)
|
||||
|
||||
return v0, v1, v2, v3
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_add_state_simd128 :: #force_inline proc "contextless" (
|
||||
v0, v1, v2, v3, s0, s1, s2, s3: simd.u32x4,
|
||||
) -> (
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
) {
|
||||
v0, v1, v2, v3 := v0, v1, v2, v3
|
||||
|
||||
v0 = simd.add(v0, s0)
|
||||
v1 = simd.add(v1, s1)
|
||||
v2 = simd.add(v2, s2)
|
||||
v3 = simd.add(v3, s3)
|
||||
|
||||
when ODIN_ENDIAN == .Big {
|
||||
v0 = _byteswap_u32x4(v0)
|
||||
v1 = _byteswap_u32x4(v1)
|
||||
v2 = _byteswap_u32x4(v2)
|
||||
v3 = _byteswap_u32x4(v3)
|
||||
}
|
||||
|
||||
return v0, v1, v2, v3
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_xor_simd128 :: #force_inline proc "contextless" (
|
||||
src: [^]simd.u32x4,
|
||||
v0, v1, v2, v3: simd.u32x4,
|
||||
) -> (
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
simd.u32x4,
|
||||
) {
|
||||
v0, v1, v2, v3 := v0, v1, v2, v3
|
||||
|
||||
v0 = simd.bit_xor(v0, intrinsics.unaligned_load((^simd.u32x4)(src[0:])))
|
||||
v1 = simd.bit_xor(v1, intrinsics.unaligned_load((^simd.u32x4)(src[1:])))
|
||||
v2 = simd.bit_xor(v2, intrinsics.unaligned_load((^simd.u32x4)(src[2:])))
|
||||
v3 = simd.bit_xor(v3, intrinsics.unaligned_load((^simd.u32x4)(src[3:])))
|
||||
|
||||
return v0, v1, v2, v3
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_store_simd128 :: #force_inline proc "contextless" (
|
||||
dst: [^]simd.u32x4,
|
||||
v0, v1, v2, v3: simd.u32x4,
|
||||
) {
|
||||
intrinsics.unaligned_store((^simd.u32x4)(dst[0:]), v0)
|
||||
intrinsics.unaligned_store((^simd.u32x4)(dst[1:]), v1)
|
||||
intrinsics.unaligned_store((^simd.u32x4)(dst[2:]), v2)
|
||||
intrinsics.unaligned_store((^simd.u32x4)(dst[3:]), v3)
|
||||
}
|
||||
|
||||
// is_performant returns true iff the target and current host both support
|
||||
// "enough" 128-bit SIMD to make this implementation performant.
|
||||
is_performant :: proc "contextless" () -> bool {
|
||||
when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 || ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 {
|
||||
when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 {
|
||||
req_features :: info.CPU_Features{.asimd}
|
||||
} else when ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 {
|
||||
req_features :: info.CPU_Features{.sse2, .ssse3}
|
||||
}
|
||||
|
||||
features, ok := info.cpu_features.?
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
return features >= req_features
|
||||
} else when ODIN_ARCH == .wasm64p32 || ODIN_ARCH == .wasm32 {
|
||||
return intrinsics.has_target_feature("simd128")
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
@(enable_target_feature = TARGET_SIMD_FEATURES)
|
||||
stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int) {
|
||||
// Enforce the maximum consumed keystream per IV.
|
||||
_chacha20.check_counter_limit(ctx, nr_blocks)
|
||||
|
||||
dst_v := ([^]simd.u32x4)(raw_data(dst))
|
||||
src_v := ([^]simd.u32x4)(raw_data(src))
|
||||
|
||||
x := &ctx._s
|
||||
n := nr_blocks
|
||||
|
||||
// The state vector is an array of uint32s in native byte-order.
|
||||
x_v := ([^]simd.u32x4)(raw_data(x))
|
||||
s0 := intrinsics.unaligned_load((^simd.u32x4)(x_v[0:]))
|
||||
s1 := intrinsics.unaligned_load((^simd.u32x4)(x_v[1:]))
|
||||
s2 := intrinsics.unaligned_load((^simd.u32x4)(x_v[2:]))
|
||||
s3 := intrinsics.unaligned_load((^simd.u32x4)(x_v[3:]))
|
||||
|
||||
// 8 blocks at a time.
|
||||
//
|
||||
// Note: This is only worth it on Aarch64.
|
||||
when ODIN_ARCH == .arm64 {
|
||||
for ; n >= 8; n = n - 8 {
|
||||
v0, v1, v2, v3 := s0, s1, s2, s3
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s7 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s3, _VEC_ONE)
|
||||
} else {
|
||||
s7 := _increment_counter(ctx)
|
||||
}
|
||||
v4, v5, v6, v7 := s0, s1, s2, s7
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s11 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s7, _VEC_ONE)
|
||||
} else {
|
||||
s11 := _increment_counter(ctx)
|
||||
}
|
||||
v8, v9, v10, v11 := s0, s1, s2, s11
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s15 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s11, _VEC_ONE)
|
||||
} else {
|
||||
s15 := _increment_counter(ctx)
|
||||
}
|
||||
v12, v13, v14, v15 := s0, s1, s2, s15
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s19 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s15, _VEC_ONE)
|
||||
} else {
|
||||
s19 := _increment_counter(ctx)
|
||||
}
|
||||
|
||||
v16, v17, v18, v19 := s0, s1, s2, s19
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s23 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s19, _VEC_ONE)
|
||||
} else {
|
||||
s23 := _increment_counter(ctx)
|
||||
}
|
||||
|
||||
v20, v21, v22, v23 := s0, s1, s2, s23
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s27 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s23, _VEC_ONE)
|
||||
} else {
|
||||
s27 := _increment_counter(ctx)
|
||||
}
|
||||
|
||||
v24, v25, v26, v27 := s0, s1, s2, s27
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s31 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s27, _VEC_ONE)
|
||||
} else {
|
||||
s31 := _increment_counter(ctx)
|
||||
}
|
||||
v28, v29, v30, v31 := s0, s1, s2, s31
|
||||
|
||||
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
|
||||
v0, v1, v2, v3 = _dq_round_simd128(v0, v1, v2, v3)
|
||||
v4, v5, v6, v7 = _dq_round_simd128(v4, v5, v6, v7)
|
||||
v8, v9, v10, v11 = _dq_round_simd128(v8, v9, v10, v11)
|
||||
v12, v13, v14, v15 = _dq_round_simd128(v12, v13, v14, v15)
|
||||
v16, v17, v18, v19 = _dq_round_simd128(v16, v17, v18, v19)
|
||||
v20, v21, v22, v23 = _dq_round_simd128(v20, v21, v22, v23)
|
||||
v24, v25, v26, v27 = _dq_round_simd128(v24, v25, v26, v27)
|
||||
v28, v29, v30, v31 = _dq_round_simd128(v28, v29, v30, v31)
|
||||
}
|
||||
|
||||
v0, v1, v2, v3 = _add_state_simd128(v0, v1, v2, v3, s0, s1, s2, s3)
|
||||
v4, v5, v6, v7 = _add_state_simd128(v4, v5, v6, v7, s0, s1, s2, s7)
|
||||
v8, v9, v10, v11 = _add_state_simd128(v8, v9, v10, v11, s0, s1, s2, s11)
|
||||
v12, v13, v14, v15 = _add_state_simd128(v12, v13, v14, v15, s0, s1, s2, s15)
|
||||
v16, v17, v18, v19 = _add_state_simd128(v16, v17, v18, v19, s0, s1, s2, s19)
|
||||
v20, v21, v22, v23 = _add_state_simd128(v20, v21, v22, v23, s0, s1, s2, s23)
|
||||
v24, v25, v26, v27 = _add_state_simd128(v24, v25, v26, v27, s0, s1, s2, s27)
|
||||
v28, v29, v30, v31 = _add_state_simd128(v28, v29, v30, v31, s0, s1, s2, s31)
|
||||
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
v0, v1, v2, v3 = _xor_simd128(src_v, v0, v1, v2, v3)
|
||||
v4, v5, v6, v7 = _xor_simd128(src_v[4:], v4, v5, v6, v7)
|
||||
v8, v9, v10, v11 = _xor_simd128(src_v[8:], v8, v9, v10, v11)
|
||||
v12, v13, v14, v15 = _xor_simd128(src_v[12:], v12, v13, v14, v15)
|
||||
v16, v17, v18, v19 = _xor_simd128(src_v[16:], v16, v17, v18, v19)
|
||||
v20, v21, v22, v23 = _xor_simd128(src_v[20:], v20, v21, v22, v23)
|
||||
v24, v25, v26, v27 = _xor_simd128(src_v[24:], v24, v25, v26, v27)
|
||||
v28, v29, v30, v31 = _xor_simd128(src_v[28:], v28, v29, v30, v31)
|
||||
src_v = src_v[32:]
|
||||
}
|
||||
|
||||
_store_simd128(dst_v, v0, v1, v2, v3)
|
||||
_store_simd128(dst_v[4:], v4, v5, v6, v7)
|
||||
_store_simd128(dst_v[8:], v8, v9, v10, v11)
|
||||
_store_simd128(dst_v[12:], v12, v13, v14, v15)
|
||||
_store_simd128(dst_v[16:], v16, v17, v18, v19)
|
||||
_store_simd128(dst_v[20:], v20, v21, v22, v23)
|
||||
_store_simd128(dst_v[24:], v24, v25, v26, v27)
|
||||
_store_simd128(dst_v[28:], v28, v29, v30, v31)
|
||||
dst_v = dst_v[32:]
|
||||
}
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
// s31 holds the most current counter, so `s3 = s31 + 1`.
|
||||
s3 = transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s31, _VEC_ONE)
|
||||
} else {
|
||||
s3 = _increment_counter(ctx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4 blocks at a time.
|
||||
//
|
||||
// Note: The i386 target lacks the required number of registers
|
||||
// for this to be performant, so it is skipped.
|
||||
when ODIN_ARCH != .i386 {
|
||||
for ; n >= 4; n = n - 4 {
|
||||
v0, v1, v2, v3 := s0, s1, s2, s3
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s7 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s3, _VEC_ONE)
|
||||
} else {
|
||||
s7 := _increment_counter(ctx)
|
||||
}
|
||||
v4, v5, v6, v7 := s0, s1, s2, s7
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s11 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s7, _VEC_ONE)
|
||||
} else {
|
||||
s11 := _increment_counter(ctx)
|
||||
}
|
||||
v8, v9, v10, v11 := s0, s1, s2, s11
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s15 := transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s11, _VEC_ONE)
|
||||
} else {
|
||||
s15 := _increment_counter(ctx)
|
||||
}
|
||||
v12, v13, v14, v15 := s0, s1, s2, s15
|
||||
|
||||
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
|
||||
v0, v1, v2, v3 = _dq_round_simd128(v0, v1, v2, v3)
|
||||
v4, v5, v6, v7 = _dq_round_simd128(v4, v5, v6, v7)
|
||||
v8, v9, v10, v11 = _dq_round_simd128(v8, v9, v10, v11)
|
||||
v12, v13, v14, v15 = _dq_round_simd128(v12, v13, v14, v15)
|
||||
}
|
||||
|
||||
v0, v1, v2, v3 = _add_state_simd128(v0, v1, v2, v3, s0, s1, s2, s3)
|
||||
v4, v5, v6, v7 = _add_state_simd128(v4, v5, v6, v7, s0, s1, s2, s7)
|
||||
v8, v9, v10, v11 = _add_state_simd128(v8, v9, v10, v11, s0, s1, s2, s11)
|
||||
v12, v13, v14, v15 = _add_state_simd128(v12, v13, v14, v15, s0, s1, s2, s15)
|
||||
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
v0, v1, v2, v3 = _xor_simd128(src_v, v0, v1, v2, v3)
|
||||
v4, v5, v6, v7 = _xor_simd128(src_v[4:], v4, v5, v6, v7)
|
||||
v8, v9, v10, v11 = _xor_simd128(src_v[8:], v8, v9, v10, v11)
|
||||
v12, v13, v14, v15 = _xor_simd128(src_v[12:], v12, v13, v14, v15)
|
||||
src_v = src_v[16:]
|
||||
}
|
||||
|
||||
_store_simd128(dst_v, v0, v1, v2, v3)
|
||||
_store_simd128(dst_v[4:], v4, v5, v6, v7)
|
||||
_store_simd128(dst_v[8:], v8, v9, v10, v11)
|
||||
_store_simd128(dst_v[12:], v12, v13, v14, v15)
|
||||
dst_v = dst_v[16:]
|
||||
}
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
// s15 holds the most current counter, so `s3 = s15 + 1`.
|
||||
s3 = transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s15, _VEC_ONE)
|
||||
} else {
|
||||
s3 = _increment_counter(ctx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 1 block at a time.
|
||||
for ; n > 0; n = n - 1 {
|
||||
v0, v1, v2, v3 := s0, s1, s2, s3
|
||||
|
||||
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
|
||||
v0, v1, v2, v3 = _dq_round_simd128(v0, v1, v2, v3)
|
||||
}
|
||||
v0, v1, v2, v3 = _add_state_simd128(v0, v1, v2, v3, s0, s1, s2, s3)
|
||||
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
v0, v1, v2, v3 = _xor_simd128(src_v, v0, v1, v2, v3)
|
||||
src_v = src_v[4:]
|
||||
}
|
||||
|
||||
_store_simd128(dst_v, v0, v1, v2, v3)
|
||||
dst_v = dst_v[4:]
|
||||
}
|
||||
|
||||
// Increment the counter. Overflow checking is done upon
|
||||
// entry into the routine, so a 64-bit increment safely
|
||||
// covers both cases.
|
||||
when ODIN_ENDIAN == .Little {
|
||||
s3 = transmute(simd.u32x4)simd.add(transmute(simd.u64x2)s3, _VEC_ONE)
|
||||
} else {
|
||||
s3 = _increment_counter(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
when ODIN_ENDIAN == .Little {
|
||||
// Write back the counter to the state.
|
||||
intrinsics.unaligned_store((^simd.u32x4)(x_v[3:]), s3)
|
||||
}
|
||||
}
|
||||
|
||||
@(enable_target_feature = TARGET_SIMD_FEATURES)
|
||||
hchacha20 :: proc "contextless" (dst, key, iv: []byte) {
|
||||
v0 := simd.u32x4{_chacha20.SIGMA_0, _chacha20.SIGMA_1, _chacha20.SIGMA_2, _chacha20.SIGMA_3}
|
||||
v1 := intrinsics.unaligned_load((^simd.u32x4)(&key[0]))
|
||||
v2 := intrinsics.unaligned_load((^simd.u32x4)(&key[16]))
|
||||
v3 := intrinsics.unaligned_load((^simd.u32x4)(&iv[0]))
|
||||
|
||||
when ODIN_ENDIAN == .Big {
|
||||
v1 = _byteswap_u32x4(v1)
|
||||
v2 = _byteswap_u32x4(v2)
|
||||
v3 = _byteswap_u32x4(v3)
|
||||
}
|
||||
|
||||
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
|
||||
v0, v1, v2, v3 = _dq_round_simd128(v0, v1, v2, v3)
|
||||
}
|
||||
|
||||
when ODIN_ENDIAN == .Big {
|
||||
v0 = _byteswap_u32x4(v0)
|
||||
v3 = _byteswap_u32x4(v3)
|
||||
}
|
||||
|
||||
dst_v := ([^]simd.u32x4)(raw_data(dst))
|
||||
intrinsics.unaligned_store((^simd.u32x4)(dst_v[0:]), v0)
|
||||
intrinsics.unaligned_store((^simd.u32x4)(dst_v[1:]), v3)
|
||||
}
|
||||
@@ -0,0 +1,319 @@
|
||||
//+build amd64
|
||||
package chacha20_simd256
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/_chacha20"
|
||||
import chacha_simd128 "core:crypto/_chacha20/simd128"
|
||||
import "core:simd"
|
||||
import "core:sys/info"
|
||||
|
||||
// This is loosely based on Ted Krovetz's public domain C intrinsic
|
||||
// implementations. While written using `core:simd`, this is currently
|
||||
// amd64 specific because we do not have a way to detect ARM SVE.
|
||||
//
|
||||
// See:
|
||||
// supercop-20230530/crypto_stream/chacha20/krovetz/vec128
|
||||
// supercop-20230530/crypto_stream/chacha20/krovetz/avx2
|
||||
|
||||
#assert(ODIN_ENDIAN == .Little)
|
||||
|
||||
@(private = "file")
|
||||
_ROT_7L: simd.u32x8 : {7, 7, 7, 7, 7, 7, 7, 7}
|
||||
@(private = "file")
|
||||
_ROT_7R: simd.u32x8 : {25, 25, 25, 25, 25, 25, 25, 25}
|
||||
@(private = "file")
|
||||
_ROT_12L: simd.u32x8 : {12, 12, 12, 12, 12, 12, 12, 12}
|
||||
@(private = "file")
|
||||
_ROT_12R: simd.u32x8 : {20, 20, 20, 20, 20, 20, 20, 20}
|
||||
@(private = "file")
|
||||
_ROT_8L: simd.u32x8 : {8, 8, 8, 8, 8, 8, 8, 8}
|
||||
@(private = "file")
|
||||
_ROT_8R: simd.u32x8 : {24, 24, 24, 24, 24, 24, 24, 24}
|
||||
@(private = "file")
|
||||
_ROT_16: simd.u32x8 : {16, 16, 16, 16, 16, 16, 16, 16}
|
||||
@(private = "file")
|
||||
_VEC_ZERO_ONE: simd.u64x4 : {0, 0, 1, 0}
|
||||
@(private = "file")
|
||||
_VEC_TWO: simd.u64x4 : {2, 0, 2, 0}
|
||||
|
||||
// is_performant returns true iff the target and current host both support
|
||||
// "enough" SIMD to make this implementation performant.
|
||||
is_performant :: proc "contextless" () -> bool {
|
||||
req_features :: info.CPU_Features{.avx, .avx2}
|
||||
|
||||
features, ok := info.cpu_features.?
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
return features >= req_features
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_dq_round_simd256 :: #force_inline proc "contextless" (
|
||||
v0, v1, v2, v3: simd.u32x8,
|
||||
) -> (
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
) {
|
||||
v0, v1, v2, v3 := v0, v1, v2, v3
|
||||
|
||||
// a += b; d ^= a; d = ROTW16(d);
|
||||
v0 = simd.add(v0, v1)
|
||||
v3 = simd.bit_xor(v3, v0)
|
||||
v3 = simd.bit_xor(simd.shl(v3, _ROT_16), simd.shr(v3, _ROT_16))
|
||||
|
||||
// c += d; b ^= c; b = ROTW12(b);
|
||||
v2 = simd.add(v2, v3)
|
||||
v1 = simd.bit_xor(v1, v2)
|
||||
v1 = simd.bit_xor(simd.shl(v1, _ROT_12L), simd.shr(v1, _ROT_12R))
|
||||
|
||||
// a += b; d ^= a; d = ROTW8(d);
|
||||
v0 = simd.add(v0, v1)
|
||||
v3 = simd.bit_xor(v3, v0)
|
||||
v3 = simd.bit_xor(simd.shl(v3, _ROT_8L), simd.shr(v3, _ROT_8R))
|
||||
|
||||
// c += d; b ^= c; b = ROTW7(b);
|
||||
v2 = simd.add(v2, v3)
|
||||
v1 = simd.bit_xor(v1, v2)
|
||||
v1 = simd.bit_xor(simd.shl(v1, _ROT_7L), simd.shr(v1, _ROT_7R))
|
||||
|
||||
// b = ROTV1(b); c = ROTV2(c); d = ROTV3(d);
|
||||
v1 = simd.shuffle(v1, v1, 1, 2, 3, 0, 5, 6, 7, 4)
|
||||
v2 = simd.shuffle(v2, v2, 2, 3, 0, 1, 6, 7, 4, 5)
|
||||
v3 = simd.shuffle(v3, v3, 3, 0, 1, 2, 7, 4, 5, 6)
|
||||
|
||||
// a += b; d ^= a; d = ROTW16(d);
|
||||
v0 = simd.add(v0, v1)
|
||||
v3 = simd.bit_xor(v3, v0)
|
||||
v3 = simd.bit_xor(simd.shl(v3, _ROT_16), simd.shr(v3, _ROT_16))
|
||||
|
||||
// c += d; b ^= c; b = ROTW12(b);
|
||||
v2 = simd.add(v2, v3)
|
||||
v1 = simd.bit_xor(v1, v2)
|
||||
v1 = simd.bit_xor(simd.shl(v1, _ROT_12L), simd.shr(v1, _ROT_12R))
|
||||
|
||||
// a += b; d ^= a; d = ROTW8(d);
|
||||
v0 = simd.add(v0, v1)
|
||||
v3 = simd.bit_xor(v3, v0)
|
||||
v3 = simd.bit_xor(simd.shl(v3, _ROT_8L), simd.shr(v3, _ROT_8R))
|
||||
|
||||
// c += d; b ^= c; b = ROTW7(b);
|
||||
v2 = simd.add(v2, v3)
|
||||
v1 = simd.bit_xor(v1, v2)
|
||||
v1 = simd.bit_xor(simd.shl(v1, _ROT_7L), simd.shr(v1, _ROT_7R))
|
||||
|
||||
// b = ROTV3(b); c = ROTV2(c); d = ROTV1(d);
|
||||
v1 = simd.shuffle(v1, v1, 3, 0, 1, 2, 7, 4, 5, 6)
|
||||
v2 = simd.shuffle(v2, v2, 2, 3, 0, 1, 6, 7, 4, 5)
|
||||
v3 = simd.shuffle(v3, v3, 1, 2, 3, 0, 5, 6, 7, 4)
|
||||
|
||||
return v0, v1, v2, v3
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_add_and_permute_state_simd256 :: #force_inline proc "contextless" (
|
||||
v0, v1, v2, v3, s0, s1, s2, s3: simd.u32x8,
|
||||
) -> (
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
) {
|
||||
t0 := simd.add(v0, s0)
|
||||
t1 := simd.add(v1, s1)
|
||||
t2 := simd.add(v2, s2)
|
||||
t3 := simd.add(v3, s3)
|
||||
|
||||
// Big Endian would byteswap here.
|
||||
|
||||
// Each of v0 .. v3 has 128-bits of keystream for 2 separate blocks.
|
||||
// permute the state such that (r0, r1) contains block 0, and (r2, r3)
|
||||
// contains block 1.
|
||||
r0 := simd.shuffle(t0, t1, 0, 1, 2, 3, 8, 9, 10, 11)
|
||||
r2 := simd.shuffle(t0, t1, 4, 5, 6, 7, 12, 13, 14, 15)
|
||||
r1 := simd.shuffle(t2, t3, 0, 1, 2, 3, 8, 9, 10, 11)
|
||||
r3 := simd.shuffle(t2, t3, 4, 5, 6, 7, 12, 13, 14, 15)
|
||||
|
||||
return r0, r1, r2, r3
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_xor_simd256 :: #force_inline proc "contextless" (
|
||||
src: [^]simd.u32x8,
|
||||
v0, v1, v2, v3: simd.u32x8,
|
||||
) -> (
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
) {
|
||||
v0, v1, v2, v3 := v0, v1, v2, v3
|
||||
|
||||
v0 = simd.bit_xor(v0, intrinsics.unaligned_load((^simd.u32x8)(src[0:])))
|
||||
v1 = simd.bit_xor(v1, intrinsics.unaligned_load((^simd.u32x8)(src[1:])))
|
||||
v2 = simd.bit_xor(v2, intrinsics.unaligned_load((^simd.u32x8)(src[2:])))
|
||||
v3 = simd.bit_xor(v3, intrinsics.unaligned_load((^simd.u32x8)(src[3:])))
|
||||
|
||||
return v0, v1, v2, v3
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_xor_simd256_x1 :: #force_inline proc "contextless" (
|
||||
src: [^]simd.u32x8,
|
||||
v0, v1: simd.u32x8,
|
||||
) -> (
|
||||
simd.u32x8,
|
||||
simd.u32x8,
|
||||
) {
|
||||
v0, v1 := v0, v1
|
||||
|
||||
v0 = simd.bit_xor(v0, intrinsics.unaligned_load((^simd.u32x8)(src[0:])))
|
||||
v1 = simd.bit_xor(v1, intrinsics.unaligned_load((^simd.u32x8)(src[1:])))
|
||||
|
||||
return v0, v1
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_store_simd256 :: #force_inline proc "contextless" (
|
||||
dst: [^]simd.u32x8,
|
||||
v0, v1, v2, v3: simd.u32x8,
|
||||
) {
|
||||
intrinsics.unaligned_store((^simd.u32x8)(dst[0:]), v0)
|
||||
intrinsics.unaligned_store((^simd.u32x8)(dst[1:]), v1)
|
||||
intrinsics.unaligned_store((^simd.u32x8)(dst[2:]), v2)
|
||||
intrinsics.unaligned_store((^simd.u32x8)(dst[3:]), v3)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
_store_simd256_x1 :: #force_inline proc "contextless" (
|
||||
dst: [^]simd.u32x8,
|
||||
v0, v1: simd.u32x8,
|
||||
) {
|
||||
intrinsics.unaligned_store((^simd.u32x8)(dst[0:]), v0)
|
||||
intrinsics.unaligned_store((^simd.u32x8)(dst[1:]), v1)
|
||||
}
|
||||
|
||||
@(enable_target_feature = "sse2,ssse3,avx,avx2")
|
||||
stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int) {
|
||||
// Enforce the maximum consumed keystream per IV.
|
||||
_chacha20.check_counter_limit(ctx, nr_blocks)
|
||||
|
||||
dst_v := ([^]simd.u32x8)(raw_data(dst))
|
||||
src_v := ([^]simd.u32x8)(raw_data(src))
|
||||
|
||||
x := &ctx._s
|
||||
n := nr_blocks
|
||||
|
||||
// The state vector is an array of uint32s in native byte-order.
|
||||
// Setup s0 .. s3 such that each register stores 2 copies of the
|
||||
// state.
|
||||
x_v := ([^]simd.u32x4)(raw_data(x))
|
||||
t0 := intrinsics.unaligned_load((^simd.u32x4)(x_v[0:]))
|
||||
t1 := intrinsics.unaligned_load((^simd.u32x4)(x_v[1:]))
|
||||
t2 := intrinsics.unaligned_load((^simd.u32x4)(x_v[2:]))
|
||||
t3 := intrinsics.unaligned_load((^simd.u32x4)(x_v[3:]))
|
||||
s0 := simd.swizzle(t0, 0, 1, 2, 3, 0, 1, 2, 3)
|
||||
s1 := simd.swizzle(t1, 0, 1, 2, 3, 0, 1, 2, 3)
|
||||
s2 := simd.swizzle(t2, 0, 1, 2, 3, 0, 1, 2, 3)
|
||||
s3 := simd.swizzle(t3, 0, 1, 2, 3, 0, 1, 2, 3)
|
||||
|
||||
// Advance the counter in the 2nd copy of the state by one.
|
||||
s3 = transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s3, _VEC_ZERO_ONE)
|
||||
|
||||
// 8 blocks at a time.
|
||||
for ; n >= 8; n = n - 8 {
|
||||
v0, v1, v2, v3 := s0, s1, s2, s3
|
||||
|
||||
s7 := transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s3, _VEC_TWO)
|
||||
v4, v5, v6, v7 := s0, s1, s2, s7
|
||||
|
||||
s11 := transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s7, _VEC_TWO)
|
||||
v8, v9, v10, v11 := s0, s1, s2, s11
|
||||
|
||||
s15 := transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s11, _VEC_TWO)
|
||||
v12, v13, v14, v15 := s0, s1, s2, s15
|
||||
|
||||
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
|
||||
v0, v1, v2, v3 = _dq_round_simd256(v0, v1, v2, v3)
|
||||
v4, v5, v6, v7 = _dq_round_simd256(v4, v5, v6, v7)
|
||||
v8, v9, v10, v11 = _dq_round_simd256(v8, v9, v10, v11)
|
||||
v12, v13, v14, v15 = _dq_round_simd256(v12, v13, v14, v15)
|
||||
}
|
||||
|
||||
v0, v1, v2, v3 = _add_and_permute_state_simd256(v0, v1, v2, v3, s0, s1, s2, s3)
|
||||
v4, v5, v6, v7 = _add_and_permute_state_simd256(v4, v5, v6, v7, s0, s1, s2, s7)
|
||||
v8, v9, v10, v11 = _add_and_permute_state_simd256(v8, v9, v10, v11, s0, s1, s2, s11)
|
||||
v12, v13, v14, v15 = _add_and_permute_state_simd256(v12, v13, v14, v15, s0, s1, s2, s15)
|
||||
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
v0, v1, v2, v3 = _xor_simd256(src_v, v0, v1, v2, v3)
|
||||
v4, v5, v6, v7 = _xor_simd256(src_v[4:], v4, v5, v6, v7)
|
||||
v8, v9, v10, v11 = _xor_simd256(src_v[8:], v8, v9, v10, v11)
|
||||
v12, v13, v14, v15 = _xor_simd256(src_v[12:], v12, v13, v14, v15)
|
||||
src_v = src_v[16:]
|
||||
}
|
||||
|
||||
_store_simd256(dst_v, v0, v1, v2, v3)
|
||||
_store_simd256(dst_v[4:], v4, v5, v6, v7)
|
||||
_store_simd256(dst_v[8:], v8, v9, v10, v11)
|
||||
_store_simd256(dst_v[12:], v12, v13, v14, v15)
|
||||
dst_v = dst_v[16:]
|
||||
}
|
||||
|
||||
s3 = transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s15, _VEC_TWO)
|
||||
}
|
||||
|
||||
|
||||
// 2 (or 1) block at a time.
|
||||
for ; n > 0; n = n - 2 {
|
||||
v0, v1, v2, v3 := s0, s1, s2, s3
|
||||
|
||||
for i := _chacha20.ROUNDS; i > 0; i = i - 2 {
|
||||
v0, v1, v2, v3 = _dq_round_simd256(v0, v1, v2, v3)
|
||||
}
|
||||
v0, v1, v2, v3 = _add_and_permute_state_simd256(v0, v1, v2, v3, s0, s1, s2, s3)
|
||||
|
||||
if n == 1 {
|
||||
// Note: No need to advance src_v, dst_v, or increment the counter
|
||||
// since this is guaranteed to be the final block.
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
v0, v1 = _xor_simd256_x1(src_v, v0, v1)
|
||||
}
|
||||
|
||||
_store_simd256_x1(dst_v, v0, v1)
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
v0, v1, v2, v3 = _xor_simd256(src_v, v0, v1, v2, v3)
|
||||
src_v = src_v[4:]
|
||||
}
|
||||
|
||||
_store_simd256(dst_v, v0, v1, v2, v3)
|
||||
dst_v = dst_v[4:]
|
||||
}
|
||||
|
||||
s3 = transmute(simd.u32x8)simd.add(transmute(simd.u64x4)s3, _VEC_TWO)
|
||||
}
|
||||
|
||||
// Write back the counter. Doing it this way, saves having to
|
||||
// pull out the correct counter value from s3.
|
||||
new_ctr := ((u64(ctx._s[13]) << 32) | u64(ctx._s[12])) + u64(nr_blocks)
|
||||
ctx._s[12] = u32(new_ctr)
|
||||
ctx._s[13] = u32(new_ctr >> 32)
|
||||
}
|
||||
|
||||
@(enable_target_feature = "sse2,ssse3,avx")
|
||||
hchacha20 :: proc "contextless" (dst, key, iv: []byte) {
|
||||
// We can just enable AVX and call the simd128 code as going
|
||||
// wider has 0 performance benefit, but VEX encoded instructions
|
||||
// is nice.
|
||||
#force_inline chacha_simd128.hchacha20(dst, key, iv)
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
//+build !amd64
|
||||
package chacha20_simd256
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/_chacha20"
|
||||
|
||||
is_performant :: proc "contextless" () -> bool {
|
||||
return false
|
||||
}
|
||||
|
||||
stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int) {
|
||||
panic("crypto/chacha20: simd256 implementation unsupported")
|
||||
}
|
||||
|
||||
hchacha20 :: proc "contextless" (dst, key, iv: []byte) {
|
||||
intrinsics.trap()
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
package aead
|
||||
|
||||
// seal_oneshot encrypts the plaintext and authenticates the aad and ciphertext,
|
||||
// with the provided algorithm, key, and iv, stores the output in dst and tag.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
seal_oneshot :: proc(algo: Algorithm, dst, tag, key, iv, aad, plaintext: []byte, impl: Implementation = nil) {
|
||||
ctx: Context
|
||||
init(&ctx, algo, key, impl)
|
||||
defer reset(&ctx)
|
||||
seal_ctx(&ctx, dst, tag, iv, aad, plaintext)
|
||||
}
|
||||
|
||||
// open authenticates the aad and ciphertext, and decrypts the ciphertext,
|
||||
// with the provided algorithm, key, iv, and tag, and stores the output in dst,
|
||||
// returning true iff the authentication was successful. If authentication
|
||||
// fails, the destination buffer will be zeroed.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
@(require_results)
|
||||
open_oneshot :: proc(algo: Algorithm, dst, key, iv, aad, ciphertext, tag: []byte, impl: Implementation = nil) -> bool {
|
||||
ctx: Context
|
||||
init(&ctx, algo, key, impl)
|
||||
defer reset(&ctx)
|
||||
return open_ctx(&ctx, dst, iv, aad, ciphertext, tag)
|
||||
}
|
||||
|
||||
seal :: proc {
|
||||
seal_ctx,
|
||||
seal_oneshot,
|
||||
}
|
||||
|
||||
open :: proc {
|
||||
open_ctx,
|
||||
open_oneshot,
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
package aead provides a generic interface to the supported Authenticated
|
||||
Encryption with Associated Data algorithms.
|
||||
|
||||
Both a one-shot and context based interface are provided, with similar
|
||||
usage. If multiple messages are to be sealed/opened via the same key,
|
||||
the context based interface may be more efficient, depending on the
|
||||
algorithm.
|
||||
|
||||
WARNING: Reusing the same key + iv to seal (encrypt) multiple messages
|
||||
results in catastrophic loss of security for most algorithms.
|
||||
|
||||
```odin
|
||||
package aead_example
|
||||
|
||||
import "core:bytes"
|
||||
import "core:crypto"
|
||||
import "core:crypto/aead"
|
||||
|
||||
main :: proc() {
|
||||
algo := aead.Algorithm.XCHACHA20POLY1305
|
||||
|
||||
// The example added associated data, and plaintext.
|
||||
aad_str := "Get your ass in gear boys."
|
||||
pt_str := "They're immanetizing the Eschaton."
|
||||
|
||||
aad := transmute([]byte)aad_str
|
||||
plaintext := transmute([]byte)pt_str
|
||||
pt_len := len(plaintext)
|
||||
|
||||
// Generate a random key for the purposes of illustration.
|
||||
key := make([]byte, aead.KEY_SIZES[algo])
|
||||
defer delete(key)
|
||||
crypto.rand_bytes(key)
|
||||
|
||||
// `ciphertext || tag`, is a common way data is transmitted, so
|
||||
// demonstrate that.
|
||||
buf := make([]byte, pt_len + aead.TAG_SIZES[algo])
|
||||
defer delete(buf)
|
||||
ciphertext, tag := buf[:pt_len], buf[pt_len:]
|
||||
|
||||
// Seal the AAD + Plaintext.
|
||||
iv := make([]byte, aead.IV_SIZES[algo])
|
||||
defer delete(iv)
|
||||
crypto.rand_bytes(iv) // Random IVs are safe with XChaCha20-Poly1305.
|
||||
aead.seal(algo, ciphertext, tag, key, iv, aad, plaintext)
|
||||
|
||||
// Open the AAD + Ciphertext.
|
||||
opened_pt := buf[:pt_len]
|
||||
if ok := aead.open(algo, opened_pt, key, iv, aad, ciphertext, tag); !ok {
|
||||
panic("aead example: failed to open")
|
||||
}
|
||||
|
||||
assert(bytes.equal(opened_pt, plaintext))
|
||||
}
|
||||
```
|
||||
*/
|
||||
package aead
|
||||
@@ -0,0 +1,187 @@
|
||||
package aead
|
||||
|
||||
import "core:crypto/aes"
|
||||
import "core:crypto/chacha20"
|
||||
import "core:crypto/chacha20poly1305"
|
||||
import "core:reflect"
|
||||
|
||||
// Implementation is an AEAD implementation. Most callers will not need
|
||||
// to use this as the package will automatically select the most performant
|
||||
// implementation available.
|
||||
Implementation :: union {
|
||||
aes.Implementation,
|
||||
chacha20.Implementation,
|
||||
}
|
||||
|
||||
// MAX_TAG_SIZE is the maximum size tag that can be returned by any of the
|
||||
// Algorithms supported via this package.
|
||||
MAX_TAG_SIZE :: 16
|
||||
|
||||
// Algorithm is the algorithm identifier associated with a given Context.
|
||||
Algorithm :: enum {
|
||||
Invalid,
|
||||
AES_GCM_128,
|
||||
AES_GCM_192,
|
||||
AES_GCM_256,
|
||||
CHACHA20POLY1305,
|
||||
XCHACHA20POLY1305,
|
||||
}
|
||||
|
||||
// ALGORITM_NAMES is the Agorithm to algorithm name string.
|
||||
ALGORITHM_NAMES := [Algorithm]string {
|
||||
.Invalid = "Invalid",
|
||||
.AES_GCM_128 = "AES-GCM-128",
|
||||
.AES_GCM_192 = "AES-GCM-192",
|
||||
.AES_GCM_256 = "AES-GCM-256",
|
||||
.CHACHA20POLY1305 = "chacha20poly1305",
|
||||
.XCHACHA20POLY1305 = "xchacha20poly1305",
|
||||
}
|
||||
|
||||
// TAG_SIZES is the Algorithm to tag size in bytes.
|
||||
TAG_SIZES := [Algorithm]int {
|
||||
.Invalid = 0,
|
||||
.AES_GCM_128 = aes.GCM_TAG_SIZE,
|
||||
.AES_GCM_192 = aes.GCM_TAG_SIZE,
|
||||
.AES_GCM_256 = aes.GCM_TAG_SIZE,
|
||||
.CHACHA20POLY1305 = chacha20poly1305.TAG_SIZE,
|
||||
.XCHACHA20POLY1305 = chacha20poly1305.TAG_SIZE,
|
||||
}
|
||||
|
||||
// KEY_SIZES is the Algorithm to key size in bytes.
|
||||
KEY_SIZES := [Algorithm]int {
|
||||
.Invalid = 0,
|
||||
.AES_GCM_128 = aes.KEY_SIZE_128,
|
||||
.AES_GCM_192 = aes.KEY_SIZE_192,
|
||||
.AES_GCM_256 = aes.KEY_SIZE_256,
|
||||
.CHACHA20POLY1305 = chacha20poly1305.KEY_SIZE,
|
||||
.XCHACHA20POLY1305 = chacha20poly1305.KEY_SIZE,
|
||||
}
|
||||
|
||||
// IV_SIZES is the Algorithm to initialization vector size in bytes.
|
||||
//
|
||||
// Note: Some algorithms (such as AES-GCM) support variable IV sizes.
|
||||
IV_SIZES := [Algorithm]int {
|
||||
.Invalid = 0,
|
||||
.AES_GCM_128 = aes.GCM_IV_SIZE,
|
||||
.AES_GCM_192 = aes.GCM_IV_SIZE,
|
||||
.AES_GCM_256 = aes.GCM_IV_SIZE,
|
||||
.CHACHA20POLY1305 = chacha20poly1305.IV_SIZE,
|
||||
.XCHACHA20POLY1305 = chacha20poly1305.XIV_SIZE,
|
||||
}
|
||||
|
||||
// Context is a concrete instantiation of a specific AEAD algorithm.
|
||||
Context :: struct {
|
||||
_algo: Algorithm,
|
||||
_impl: union {
|
||||
aes.Context_GCM,
|
||||
chacha20poly1305.Context,
|
||||
},
|
||||
}
|
||||
|
||||
@(private)
|
||||
_IMPL_IDS := [Algorithm]typeid {
|
||||
.Invalid = nil,
|
||||
.AES_GCM_128 = typeid_of(aes.Context_GCM),
|
||||
.AES_GCM_192 = typeid_of(aes.Context_GCM),
|
||||
.AES_GCM_256 = typeid_of(aes.Context_GCM),
|
||||
.CHACHA20POLY1305 = typeid_of(chacha20poly1305.Context),
|
||||
.XCHACHA20POLY1305 = typeid_of(chacha20poly1305.Context),
|
||||
}
|
||||
|
||||
// init initializes a Context with a specific AEAD Algorithm.
|
||||
init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementation = nil) {
|
||||
if ctx._impl != nil {
|
||||
reset(ctx)
|
||||
}
|
||||
|
||||
if len(key) != KEY_SIZES[algorithm] {
|
||||
panic("crypto/aead: invalid key size")
|
||||
}
|
||||
|
||||
// Directly specialize the union by setting the type ID (save a copy).
|
||||
reflect.set_union_variant_typeid(
|
||||
ctx._impl,
|
||||
_IMPL_IDS[algorithm],
|
||||
)
|
||||
switch algorithm {
|
||||
case .AES_GCM_128, .AES_GCM_192, .AES_GCM_256:
|
||||
impl_ := impl != nil ? impl.(aes.Implementation) : aes.DEFAULT_IMPLEMENTATION
|
||||
aes.init_gcm(&ctx._impl.(aes.Context_GCM), key, impl_)
|
||||
case .CHACHA20POLY1305:
|
||||
impl_ := impl != nil ? impl.(chacha20.Implementation) : chacha20.DEFAULT_IMPLEMENTATION
|
||||
chacha20poly1305.init(&ctx._impl.(chacha20poly1305.Context), key, impl_)
|
||||
case .XCHACHA20POLY1305:
|
||||
impl_ := impl != nil ? impl.(chacha20.Implementation) : chacha20.DEFAULT_IMPLEMENTATION
|
||||
chacha20poly1305.init_xchacha(&ctx._impl.(chacha20poly1305.Context), key, impl_)
|
||||
case .Invalid:
|
||||
panic("crypto/aead: uninitialized algorithm")
|
||||
case:
|
||||
panic("crypto/aead: invalid algorithm")
|
||||
}
|
||||
|
||||
ctx._algo = algorithm
|
||||
}
|
||||
|
||||
// seal_ctx encrypts the plaintext and authenticates the aad and ciphertext,
|
||||
// with the provided Context and iv, stores the output in dst and tag.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
seal_ctx :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
|
||||
switch &impl in ctx._impl {
|
||||
case aes.Context_GCM:
|
||||
aes.seal_gcm(&impl, dst, tag, iv, aad, plaintext)
|
||||
case chacha20poly1305.Context:
|
||||
chacha20poly1305.seal(&impl, dst, tag, iv, aad, plaintext)
|
||||
case:
|
||||
panic("crypto/aead: uninitialized algorithm")
|
||||
}
|
||||
}
|
||||
|
||||
// open_ctx authenticates the aad and ciphertext, and decrypts the ciphertext,
|
||||
// with the provided Context, iv, and tag, and stores the output in dst,
|
||||
// returning true iff the authentication was successful. If authentication
|
||||
// fails, the destination buffer will be zeroed.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
@(require_results)
|
||||
open_ctx :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
switch &impl in ctx._impl {
|
||||
case aes.Context_GCM:
|
||||
return aes.open_gcm(&impl, dst, iv, aad, ciphertext, tag)
|
||||
case chacha20poly1305.Context:
|
||||
return chacha20poly1305.open(&impl, dst, iv, aad, ciphertext, tag)
|
||||
case:
|
||||
panic("crypto/aead: uninitialized algorithm")
|
||||
}
|
||||
}
|
||||
|
||||
// reset sanitizes the Context. The Context must be re-initialized to
|
||||
// be used again.
|
||||
reset :: proc(ctx: ^Context) {
|
||||
switch &impl in ctx._impl {
|
||||
case aes.Context_GCM:
|
||||
aes.reset_gcm(&impl)
|
||||
case chacha20poly1305.Context:
|
||||
chacha20poly1305.reset(&impl)
|
||||
case:
|
||||
// Calling reset repeatedly is fine.
|
||||
}
|
||||
|
||||
ctx._algo = .Invalid
|
||||
ctx._impl = nil
|
||||
}
|
||||
|
||||
// algorithm returns the Algorithm used by a Context instance.
|
||||
algorithm :: proc(ctx: ^Context) -> Algorithm {
|
||||
return ctx._algo
|
||||
}
|
||||
|
||||
// iv_size returns the IV size of a Context instance in bytes.
|
||||
iv_size :: proc(ctx: ^Context) -> int {
|
||||
return IV_SIZES[ctx._algo]
|
||||
}
|
||||
|
||||
// tag_size returns the tag size of a Context instance in bytes.
|
||||
tag_size :: proc(ctx: ^Context) -> int {
|
||||
return TAG_SIZES[ctx._algo]
|
||||
}
|
||||
@@ -6,7 +6,6 @@ See:
|
||||
- https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf
|
||||
- https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf
|
||||
*/
|
||||
|
||||
package aes
|
||||
|
||||
import "core:crypto/_aes"
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package aes
|
||||
|
||||
import "core:bytes"
|
||||
import "core:crypto/_aes/ct64"
|
||||
import "core:encoding/endian"
|
||||
import "core:math/bits"
|
||||
@@ -19,7 +20,7 @@ Context_CTR :: struct {
|
||||
}
|
||||
|
||||
// init_ctr initializes a Context_CTR with the provided key and IV.
|
||||
init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := Implementation.Hardware) {
|
||||
init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := DEFAULT_IMPLEMENTATION) {
|
||||
if len(iv) != CTR_IV_SIZE {
|
||||
panic("crypto/aes: invalid CTR IV size")
|
||||
}
|
||||
@@ -37,15 +38,16 @@ init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := Implementation.Hard
|
||||
xor_bytes_ctr :: proc(ctx: ^Context_CTR, dst, src: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
// TODO: Enforcing that dst and src alias exactly or not at all
|
||||
// is a good idea, though odd aliasing should be extremely uncommon.
|
||||
|
||||
src, dst := src, dst
|
||||
if dst_len := len(dst); dst_len < len(src) {
|
||||
src = src[:dst_len]
|
||||
}
|
||||
|
||||
for remaining := len(src); remaining > 0; {
|
||||
if bytes.alias_inexactly(dst, src) {
|
||||
panic("crypto/aes: dst and src alias inexactly")
|
||||
}
|
||||
|
||||
#no_bounds_check for remaining := len(src); remaining > 0; {
|
||||
// Process multiple blocks at once
|
||||
if ctx._off == BLOCK_SIZE {
|
||||
if nr_blocks := remaining / BLOCK_SIZE; nr_blocks > 0 {
|
||||
@@ -83,7 +85,7 @@ keystream_bytes_ctr :: proc(ctx: ^Context_CTR, dst: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
dst := dst
|
||||
for remaining := len(dst); remaining > 0; {
|
||||
#no_bounds_check for remaining := len(dst); remaining > 0; {
|
||||
// Process multiple blocks at once
|
||||
if ctx._off == BLOCK_SIZE {
|
||||
if nr_blocks := remaining / BLOCK_SIZE; nr_blocks > 0 {
|
||||
@@ -123,8 +125,8 @@ reset_ctr :: proc "contextless" (ctx: ^Context_CTR) {
|
||||
ctx._is_initialized = false
|
||||
}
|
||||
|
||||
@(private)
|
||||
ctr_blocks :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) {
|
||||
@(private = "file")
|
||||
ctr_blocks :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) #no_bounds_check {
|
||||
// Use the optimized hardware implementation if available.
|
||||
if _, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
|
||||
ctr_blocks_hw(ctx, dst, src, nr_blocks)
|
||||
@@ -183,17 +185,17 @@ xor_blocks :: #force_inline proc "contextless" (dst, src: []byte, blocks: [][]by
|
||||
// performance of this implementation matters to where that
|
||||
// optimization would be worth it, use chacha20poly1305, or a
|
||||
// CPU that isn't e-waste.
|
||||
if src != nil {
|
||||
#no_bounds_check {
|
||||
for i in 0 ..< len(blocks) {
|
||||
off := i * BLOCK_SIZE
|
||||
for j in 0 ..< BLOCK_SIZE {
|
||||
blocks[i][j] ~= src[off + j]
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
for i in 0 ..< len(blocks) {
|
||||
off := i * BLOCK_SIZE
|
||||
for j in 0 ..< BLOCK_SIZE {
|
||||
blocks[i][j] ~= src[off + j]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for i in 0 ..< len(blocks) {
|
||||
copy(dst[i * BLOCK_SIZE:], blocks[i])
|
||||
}
|
||||
}
|
||||
for i in 0 ..< len(blocks) {
|
||||
copy(dst[i * BLOCK_SIZE:], blocks[i])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,151 @@
|
||||
//+build amd64
|
||||
package aes
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/_aes"
|
||||
import "core:math/bits"
|
||||
import "core:mem"
|
||||
import "core:simd/x86"
|
||||
|
||||
@(private)
|
||||
CTR_STRIDE_HW :: 4
|
||||
@(private)
|
||||
CTR_STRIDE_BYTES_HW :: CTR_STRIDE_HW * BLOCK_SIZE
|
||||
|
||||
@(private, enable_target_feature = "sse2,aes")
|
||||
ctr_blocks_hw :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) #no_bounds_check {
|
||||
hw_ctx := ctx._impl.(Context_Impl_Hardware)
|
||||
|
||||
sks: [15]x86.__m128i = ---
|
||||
for i in 0 ..= hw_ctx._num_rounds {
|
||||
sks[i] = intrinsics.unaligned_load((^x86.__m128i)(&hw_ctx._sk_exp_enc[i]))
|
||||
}
|
||||
|
||||
hw_inc_ctr := #force_inline proc "contextless" (hi, lo: u64) -> (x86.__m128i, u64, u64) {
|
||||
ret := x86.__m128i{
|
||||
i64(intrinsics.byte_swap(hi)),
|
||||
i64(intrinsics.byte_swap(lo)),
|
||||
}
|
||||
|
||||
hi, lo := hi, lo
|
||||
carry: u64
|
||||
|
||||
lo, carry = bits.add_u64(lo, 1, 0)
|
||||
hi, _ = bits.add_u64(hi, 0, carry)
|
||||
return ret, hi, lo
|
||||
}
|
||||
|
||||
// The latency of AESENC depends on mfg and microarchitecture:
|
||||
// - 7 -> up to Broadwell
|
||||
// - 4 -> AMD and Skylake - Cascade Lake
|
||||
// - 3 -> Ice Lake and newer
|
||||
//
|
||||
// This implementation does 4 blocks at once, since performance
|
||||
// should be "adequate" across most CPUs.
|
||||
|
||||
src, dst := src, dst
|
||||
nr_blocks := nr_blocks
|
||||
ctr_hi, ctr_lo := ctx._ctr_hi, ctx._ctr_lo
|
||||
|
||||
blks: [CTR_STRIDE_HW]x86.__m128i = ---
|
||||
for nr_blocks >= CTR_STRIDE_HW {
|
||||
#unroll for i in 0..< CTR_STRIDE_HW {
|
||||
blks[i], ctr_hi, ctr_lo = hw_inc_ctr(ctr_hi, ctr_lo)
|
||||
}
|
||||
|
||||
#unroll for i in 0 ..< CTR_STRIDE_HW {
|
||||
blks[i] = x86._mm_xor_si128(blks[i], sks[0])
|
||||
}
|
||||
#unroll for i in 1 ..= 9 {
|
||||
#unroll for j in 0 ..< CTR_STRIDE_HW {
|
||||
blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
|
||||
}
|
||||
}
|
||||
switch hw_ctx._num_rounds {
|
||||
case _aes.ROUNDS_128:
|
||||
#unroll for i in 0 ..< CTR_STRIDE_HW {
|
||||
blks[i] = x86._mm_aesenclast_si128(blks[i], sks[10])
|
||||
}
|
||||
case _aes.ROUNDS_192:
|
||||
#unroll for i in 10 ..= 11 {
|
||||
#unroll for j in 0 ..< CTR_STRIDE_HW {
|
||||
blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
|
||||
}
|
||||
}
|
||||
#unroll for i in 0 ..< CTR_STRIDE_HW {
|
||||
blks[i] = x86._mm_aesenclast_si128(blks[i], sks[12])
|
||||
}
|
||||
case _aes.ROUNDS_256:
|
||||
#unroll for i in 10 ..= 13 {
|
||||
#unroll for j in 0 ..< CTR_STRIDE_HW {
|
||||
blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
|
||||
}
|
||||
}
|
||||
#unroll for i in 0 ..< CTR_STRIDE_HW {
|
||||
blks[i] = x86._mm_aesenclast_si128(blks[i], sks[14])
|
||||
}
|
||||
}
|
||||
|
||||
xor_blocks_hw(dst, src, blks[:])
|
||||
|
||||
if src != nil {
|
||||
src = src[CTR_STRIDE_BYTES_HW:]
|
||||
}
|
||||
dst = dst[CTR_STRIDE_BYTES_HW:]
|
||||
nr_blocks -= CTR_STRIDE_HW
|
||||
}
|
||||
|
||||
// Handle the remainder.
|
||||
for nr_blocks > 0 {
|
||||
blks[0], ctr_hi, ctr_lo = hw_inc_ctr(ctr_hi, ctr_lo)
|
||||
|
||||
blks[0] = x86._mm_xor_si128(blks[0], sks[0])
|
||||
#unroll for i in 1 ..= 9 {
|
||||
blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
|
||||
}
|
||||
switch hw_ctx._num_rounds {
|
||||
case _aes.ROUNDS_128:
|
||||
blks[0] = x86._mm_aesenclast_si128(blks[0], sks[10])
|
||||
case _aes.ROUNDS_192:
|
||||
#unroll for i in 10 ..= 11 {
|
||||
blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
|
||||
}
|
||||
blks[0] = x86._mm_aesenclast_si128(blks[0], sks[12])
|
||||
case _aes.ROUNDS_256:
|
||||
#unroll for i in 10 ..= 13 {
|
||||
blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
|
||||
}
|
||||
blks[0] = x86._mm_aesenclast_si128(blks[0], sks[14])
|
||||
}
|
||||
|
||||
xor_blocks_hw(dst, src, blks[:1])
|
||||
|
||||
if src != nil {
|
||||
src = src[BLOCK_SIZE:]
|
||||
}
|
||||
dst = dst[BLOCK_SIZE:]
|
||||
nr_blocks -= 1
|
||||
}
|
||||
|
||||
// Write back the counter.
|
||||
ctx._ctr_hi, ctx._ctr_lo = ctr_hi, ctr_lo
|
||||
|
||||
mem.zero_explicit(&blks, size_of(blks))
|
||||
mem.zero_explicit(&sks, size_of(sks))
|
||||
}
|
||||
|
||||
@(private, enable_target_feature = "sse2")
|
||||
xor_blocks_hw :: proc(dst, src: []byte, blocks: []x86.__m128i) {
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
for i in 0 ..< len(blocks) {
|
||||
off := i * BLOCK_SIZE
|
||||
tmp := intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[off:])))
|
||||
blocks[i] = x86._mm_xor_si128(blocks[i], tmp)
|
||||
}
|
||||
}
|
||||
for i in 0 ..< len(blocks) {
|
||||
intrinsics.unaligned_store((^x86.__m128i)(raw_data(dst[i * BLOCK_SIZE:])), blocks[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -12,7 +12,7 @@ Context_ECB :: struct {
|
||||
}
|
||||
|
||||
// init_ecb initializes a Context_ECB with the provided key.
|
||||
init_ecb :: proc(ctx: ^Context_ECB, key: []byte, impl := Implementation.Hardware) {
|
||||
init_ecb :: proc(ctx: ^Context_ECB, key: []byte, impl := DEFAULT_IMPLEMENTATION) {
|
||||
init_impl(&ctx._impl, key, impl)
|
||||
ctx._is_initialized = true
|
||||
}
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
//+build amd64
|
||||
package aes
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/_aes"
|
||||
import "core:simd/x86"
|
||||
|
||||
@(private, enable_target_feature = "sse2,aes")
|
||||
encrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) {
|
||||
blk := intrinsics.unaligned_load((^x86.__m128i)(raw_data(src)))
|
||||
|
||||
blk = x86._mm_xor_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[0])))
|
||||
#unroll for i in 1 ..= 9 {
|
||||
blk = x86._mm_aesenc_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[i])))
|
||||
}
|
||||
switch ctx._num_rounds {
|
||||
case _aes.ROUNDS_128:
|
||||
blk = x86._mm_aesenclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[10])))
|
||||
case _aes.ROUNDS_192:
|
||||
#unroll for i in 10 ..= 11 {
|
||||
blk = x86._mm_aesenc_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[i])))
|
||||
}
|
||||
blk = x86._mm_aesenclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[12])))
|
||||
case _aes.ROUNDS_256:
|
||||
#unroll for i in 10 ..= 13 {
|
||||
blk = x86._mm_aesenc_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[i])))
|
||||
}
|
||||
blk = x86._mm_aesenclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[14])))
|
||||
}
|
||||
|
||||
intrinsics.unaligned_store((^x86.__m128i)(raw_data(dst)), blk)
|
||||
}
|
||||
|
||||
@(private, enable_target_feature = "sse2,aes")
|
||||
decrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) {
|
||||
blk := intrinsics.unaligned_load((^x86.__m128i)(raw_data(src)))
|
||||
|
||||
blk = x86._mm_xor_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[0])))
|
||||
#unroll for i in 1 ..= 9 {
|
||||
blk = x86._mm_aesdec_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[i])))
|
||||
}
|
||||
switch ctx._num_rounds {
|
||||
case _aes.ROUNDS_128:
|
||||
blk = x86._mm_aesdeclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[10])))
|
||||
case _aes.ROUNDS_192:
|
||||
#unroll for i in 10 ..= 11 {
|
||||
blk = x86._mm_aesdec_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[i])))
|
||||
}
|
||||
blk = x86._mm_aesdeclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[12])))
|
||||
case _aes.ROUNDS_256:
|
||||
#unroll for i in 10 ..= 13 {
|
||||
blk = x86._mm_aesdec_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[i])))
|
||||
}
|
||||
blk = x86._mm_aesdeclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[14])))
|
||||
}
|
||||
|
||||
intrinsics.unaligned_store((^x86.__m128i)(raw_data(dst)), blk)
|
||||
}
|
||||
@@ -1,13 +1,16 @@
|
||||
package aes
|
||||
|
||||
import "core:bytes"
|
||||
import "core:crypto"
|
||||
import "core:crypto/_aes"
|
||||
import "core:crypto/_aes/ct64"
|
||||
import "core:encoding/endian"
|
||||
import "core:mem"
|
||||
|
||||
// GCM_NONCE_SIZE is the size of the GCM nonce in bytes.
|
||||
GCM_NONCE_SIZE :: 12
|
||||
// GCM_IV_SIZE is the default size of the GCM IV in bytes.
|
||||
GCM_IV_SIZE :: 12
|
||||
// GCM_IV_SIZE_MAX is the maximum size of the GCM IV in bytes.
|
||||
GCM_IV_SIZE_MAX :: 0x2000000000000000 // floor((2^64 - 1) / 8) bits
|
||||
// GCM_TAG_SIZE is the size of a GCM tag in bytes.
|
||||
GCM_TAG_SIZE :: _aes.GHASH_TAG_SIZE
|
||||
|
||||
@@ -23,69 +26,79 @@ Context_GCM :: struct {
|
||||
}
|
||||
|
||||
// init_gcm initializes a Context_GCM with the provided key.
|
||||
init_gcm :: proc(ctx: ^Context_GCM, key: []byte, impl := Implementation.Hardware) {
|
||||
init_gcm :: proc(ctx: ^Context_GCM, key: []byte, impl := DEFAULT_IMPLEMENTATION) {
|
||||
init_impl(&ctx._impl, key, impl)
|
||||
ctx._is_initialized = true
|
||||
}
|
||||
|
||||
// seal_gcm encrypts the plaintext and authenticates the aad and ciphertext,
|
||||
// with the provided Context_GCM and nonce, stores the output in dst and tag.
|
||||
// with the provided Context_GCM and iv, stores the output in dst and tag.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, nonce, aad, plaintext: []byte) {
|
||||
seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, iv, aad, plaintext: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
gcm_validate_common_slice_sizes(tag, nonce, aad, plaintext)
|
||||
gcm_validate_common_slice_sizes(tag, iv, aad, plaintext)
|
||||
if len(dst) != len(plaintext) {
|
||||
panic("crypto/aes: invalid destination ciphertext size")
|
||||
}
|
||||
if bytes.alias_inexactly(dst, plaintext) {
|
||||
panic("crypto/aes: dst and plaintext alias inexactly")
|
||||
}
|
||||
|
||||
if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
|
||||
gcm_seal_hw(&impl, dst, tag, nonce, aad, plaintext)
|
||||
gcm_seal_hw(&impl, dst, tag, iv, aad, plaintext)
|
||||
return
|
||||
}
|
||||
|
||||
h: [_aes.GHASH_KEY_SIZE]byte
|
||||
j0: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
s: [_aes.GHASH_TAG_SIZE]byte
|
||||
init_ghash_ct64(ctx, &h, &j0, nonce)
|
||||
init_ghash_ct64(ctx, &h, &j0, &j0_enc, iv)
|
||||
|
||||
// Note: Our GHASH implementation handles appending padding.
|
||||
ct64.ghash(s[:], h[:], aad)
|
||||
gctr_ct64(ctx, dst, &s, plaintext, &h, nonce, true)
|
||||
final_ghash_ct64(&s, &h, &j0, len(aad), len(plaintext))
|
||||
gctr_ct64(ctx, dst, &s, plaintext, &h, &j0, true)
|
||||
final_ghash_ct64(&s, &h, &j0_enc, len(aad), len(plaintext))
|
||||
copy(tag, s[:])
|
||||
|
||||
mem.zero_explicit(&h, len(h))
|
||||
mem.zero_explicit(&j0, len(j0))
|
||||
mem.zero_explicit(&j0_enc, len(j0_enc))
|
||||
}
|
||||
|
||||
// open_gcm authenticates the aad and ciphertext, and decrypts the ciphertext,
|
||||
// with the provided Context_GCM, nonce, and tag, and stores the output in dst,
|
||||
// with the provided Context_GCM, iv, and tag, and stores the output in dst,
|
||||
// returning true iff the authentication was successful. If authentication
|
||||
// fails, the destination buffer will be zeroed.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
open_gcm :: proc(ctx: ^Context_GCM, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
|
||||
@(require_results)
|
||||
open_gcm :: proc(ctx: ^Context_GCM, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
gcm_validate_common_slice_sizes(tag, nonce, aad, ciphertext)
|
||||
gcm_validate_common_slice_sizes(tag, iv, aad, ciphertext)
|
||||
if len(dst) != len(ciphertext) {
|
||||
panic("crypto/aes: invalid destination plaintext size")
|
||||
}
|
||||
if bytes.alias_inexactly(dst, ciphertext) {
|
||||
panic("crypto/aes: dst and ciphertext alias inexactly")
|
||||
}
|
||||
|
||||
if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
|
||||
return gcm_open_hw(&impl, dst, nonce, aad, ciphertext, tag)
|
||||
return gcm_open_hw(&impl, dst, iv, aad, ciphertext, tag)
|
||||
}
|
||||
|
||||
h: [_aes.GHASH_KEY_SIZE]byte
|
||||
j0: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
s: [_aes.GHASH_TAG_SIZE]byte
|
||||
init_ghash_ct64(ctx, &h, &j0, nonce)
|
||||
init_ghash_ct64(ctx, &h, &j0, &j0_enc, iv)
|
||||
|
||||
ct64.ghash(s[:], h[:], aad)
|
||||
gctr_ct64(ctx, dst, &s, ciphertext, &h, nonce, false)
|
||||
final_ghash_ct64(&s, &h, &j0, len(aad), len(ciphertext))
|
||||
gctr_ct64(ctx, dst, &s, ciphertext, &h, &j0, false)
|
||||
final_ghash_ct64(&s, &h, &j0_enc, len(aad), len(ciphertext))
|
||||
|
||||
ok := crypto.compare_constant_time(s[:], tag) == 1
|
||||
if !ok {
|
||||
@@ -94,32 +107,28 @@ open_gcm :: proc(ctx: ^Context_GCM, dst, nonce, aad, ciphertext, tag: []byte) ->
|
||||
|
||||
mem.zero_explicit(&h, len(h))
|
||||
mem.zero_explicit(&j0, len(j0))
|
||||
mem.zero_explicit(&j0_enc, len(j0_enc))
|
||||
mem.zero_explicit(&s, len(s))
|
||||
|
||||
return ok
|
||||
}
|
||||
|
||||
// reset_ctr sanitizes the Context_GCM. The Context_GCM must be
|
||||
// reset_gcm sanitizes the Context_GCM. The Context_GCM must be
|
||||
// re-initialized to be used again.
|
||||
reset_gcm :: proc "contextless" (ctx: ^Context_GCM) {
|
||||
reset_impl(&ctx._impl)
|
||||
ctx._is_initialized = false
|
||||
}
|
||||
|
||||
@(private)
|
||||
gcm_validate_common_slice_sizes :: proc(tag, nonce, aad, text: []byte) {
|
||||
@(private = "file")
|
||||
gcm_validate_common_slice_sizes :: proc(tag, iv, aad, text: []byte) {
|
||||
if len(tag) != GCM_TAG_SIZE {
|
||||
panic("crypto/aes: invalid GCM tag size")
|
||||
}
|
||||
|
||||
// The specification supports nonces in the range [1, 2^64) bits
|
||||
// however per NIST SP 800-38D 5.2.1.1:
|
||||
//
|
||||
// > For IVs, it is recommended that implementations restrict support
|
||||
// > to the length of 96 bits, to promote interoperability, efficiency,
|
||||
// > and simplicity of design.
|
||||
if len(nonce) != GCM_NONCE_SIZE {
|
||||
panic("crypto/aes: invalid GCM nonce size")
|
||||
// The specification supports IVs in the range [1, 2^64) bits.
|
||||
if l := len(iv); l == 0 || u64(l) >= GCM_IV_SIZE_MAX {
|
||||
panic("crypto/aes: invalid GCM IV size")
|
||||
}
|
||||
|
||||
if aad_len := u64(len(aad)); aad_len > GCM_A_MAX {
|
||||
@@ -135,19 +144,33 @@ init_ghash_ct64 :: proc(
|
||||
ctx: ^Context_GCM,
|
||||
h: ^[_aes.GHASH_KEY_SIZE]byte,
|
||||
j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
nonce: []byte,
|
||||
j0_enc: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
iv: []byte,
|
||||
) {
|
||||
impl := &ctx._impl.(ct64.Context)
|
||||
|
||||
// 1. Let H = CIPH(k, 0^128)
|
||||
ct64.encrypt_block(impl, h[:], h[:])
|
||||
|
||||
// Define a block, J0, as follows:
|
||||
if l := len(iv); l == GCM_IV_SIZE {
|
||||
// if len(IV) = 96, then let J0 = IV || 0^31 || 1
|
||||
copy(j0[:], iv)
|
||||
j0[_aes.GHASH_BLOCK_SIZE - 1] = 1
|
||||
} else {
|
||||
// If len(IV) != 96, then let s = 128 ceil(len(IV)/128) - len(IV),
|
||||
// and let J0 = GHASHH(IV || 0^(s+64) || ceil(len(IV))^64).
|
||||
ct64.ghash(j0[:], h[:], iv)
|
||||
|
||||
tmp: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
endian.unchecked_put_u64be(tmp[8:], u64(l) * 8)
|
||||
ct64.ghash(j0[:], h[:], tmp[:])
|
||||
}
|
||||
|
||||
// ECB encrypt j0, so that we can just XOR with the tag. In theory
|
||||
// this could be processed along with the final GCTR block, to
|
||||
// potentially save a call to AES-ECB, but... just use AES-NI.
|
||||
copy(j0[:], nonce)
|
||||
j0[_aes.GHASH_BLOCK_SIZE - 1] = 1
|
||||
ct64.encrypt_block(impl, j0[:], j0[:])
|
||||
ct64.encrypt_block(impl, j0_enc[:], j0[:])
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
@@ -175,33 +198,27 @@ gctr_ct64 :: proc(
|
||||
s: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
src: []byte,
|
||||
h: ^[_aes.GHASH_KEY_SIZE]byte,
|
||||
nonce: []byte,
|
||||
iv: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
is_seal: bool,
|
||||
) {
|
||||
) #no_bounds_check {
|
||||
ct64_inc_ctr32 := #force_inline proc "contextless" (dst: []byte, ctr: u32) -> u32 {
|
||||
endian.unchecked_put_u32be(dst[12:], ctr)
|
||||
return ctr + 1
|
||||
}
|
||||
|
||||
// 2. Define a block J_0 as follows:
|
||||
// if len(IV) = 96, then let J0 = IV || 0^31 || 1
|
||||
//
|
||||
// Note: We only support 96 bit IVs.
|
||||
// Setup the counter blocks.
|
||||
tmp, tmp2: [ct64.STRIDE][BLOCK_SIZE]byte = ---, ---
|
||||
ctrs, blks: [ct64.STRIDE][]byte = ---, ---
|
||||
ctr: u32 = 2
|
||||
ctr := endian.unchecked_get_u32be(iv[GCM_IV_SIZE:]) + 1
|
||||
for i in 0 ..< ct64.STRIDE {
|
||||
// Setup scratch space for the keystream.
|
||||
blks[i] = tmp2[i][:]
|
||||
|
||||
// Pre-copy the IV to all the counter blocks.
|
||||
ctrs[i] = tmp[i][:]
|
||||
copy(ctrs[i], nonce)
|
||||
copy(ctrs[i], iv[:GCM_IV_SIZE])
|
||||
}
|
||||
|
||||
// We stitch the GCTR and GHASH operations together, so that only
|
||||
// one pass over the ciphertext is required.
|
||||
|
||||
impl := &ctx._impl.(ct64.Context)
|
||||
src, dst := src, dst
|
||||
|
||||
|
||||
@@ -0,0 +1,243 @@
|
||||
//+build amd64
|
||||
package aes
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto"
|
||||
import "core:crypto/_aes"
|
||||
import "core:crypto/_aes/hw_intel"
|
||||
import "core:encoding/endian"
|
||||
import "core:mem"
|
||||
import "core:simd/x86"
|
||||
|
||||
@(private)
|
||||
gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, iv, aad, plaintext: []byte) {
|
||||
h: [_aes.GHASH_KEY_SIZE]byte
|
||||
j0: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
s: [_aes.GHASH_TAG_SIZE]byte
|
||||
init_ghash_hw(ctx, &h, &j0, &j0_enc, iv)
|
||||
|
||||
// Note: Our GHASH implementation handles appending padding.
|
||||
hw_intel.ghash(s[:], h[:], aad)
|
||||
gctr_hw(ctx, dst, &s, plaintext, &h, &j0, true)
|
||||
final_ghash_hw(&s, &h, &j0_enc, len(aad), len(plaintext))
|
||||
copy(tag, s[:])
|
||||
|
||||
mem.zero_explicit(&h, len(h))
|
||||
mem.zero_explicit(&j0, len(j0))
|
||||
mem.zero_explicit(&j0_enc, len(j0_enc))
|
||||
}
|
||||
|
||||
@(private)
|
||||
gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
h: [_aes.GHASH_KEY_SIZE]byte
|
||||
j0: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
s: [_aes.GHASH_TAG_SIZE]byte
|
||||
init_ghash_hw(ctx, &h, &j0, &j0_enc, iv)
|
||||
|
||||
hw_intel.ghash(s[:], h[:], aad)
|
||||
gctr_hw(ctx, dst, &s, ciphertext, &h, &j0, false)
|
||||
final_ghash_hw(&s, &h, &j0_enc, len(aad), len(ciphertext))
|
||||
|
||||
ok := crypto.compare_constant_time(s[:], tag) == 1
|
||||
if !ok {
|
||||
mem.zero_explicit(raw_data(dst), len(dst))
|
||||
}
|
||||
|
||||
mem.zero_explicit(&h, len(h))
|
||||
mem.zero_explicit(&j0, len(j0))
|
||||
mem.zero_explicit(&j0_enc, len(j0_enc))
|
||||
mem.zero_explicit(&s, len(s))
|
||||
|
||||
return ok
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
init_ghash_hw :: proc(
|
||||
ctx: ^Context_Impl_Hardware,
|
||||
h: ^[_aes.GHASH_KEY_SIZE]byte,
|
||||
j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
j0_enc: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
iv: []byte,
|
||||
) {
|
||||
// 1. Let H = CIPH(k, 0^128)
|
||||
encrypt_block_hw(ctx, h[:], h[:])
|
||||
|
||||
// Define a block, J0, as follows:
|
||||
if l := len(iv); l == GCM_IV_SIZE {
|
||||
// if len(IV) = 96, then let J0 = IV || 0^31 || 1
|
||||
copy(j0[:], iv)
|
||||
j0[_aes.GHASH_BLOCK_SIZE - 1] = 1
|
||||
} else {
|
||||
// If len(IV) != 96, then let s = 128 ceil(len(IV)/128) - len(IV),
|
||||
// and let J0 = GHASHH(IV || 0^(s+64) || ceil(len(IV))^64).
|
||||
hw_intel.ghash(j0[:], h[:], iv)
|
||||
|
||||
tmp: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
endian.unchecked_put_u64be(tmp[8:], u64(l) * 8)
|
||||
hw_intel.ghash(j0[:], h[:], tmp[:])
|
||||
}
|
||||
|
||||
// ECB encrypt j0, so that we can just XOR with the tag.
|
||||
encrypt_block_hw(ctx, j0_enc[:], j0[:])
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2")
|
||||
final_ghash_hw :: proc(
|
||||
s: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
h: ^[_aes.GHASH_KEY_SIZE]byte,
|
||||
j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
a_len: int,
|
||||
t_len: int,
|
||||
) {
|
||||
blk: [_aes.GHASH_BLOCK_SIZE]byte
|
||||
endian.unchecked_put_u64be(blk[0:], u64(a_len) * 8)
|
||||
endian.unchecked_put_u64be(blk[8:], u64(t_len) * 8)
|
||||
|
||||
hw_intel.ghash(s[:], h[:], blk[:])
|
||||
j0_vec := intrinsics.unaligned_load((^x86.__m128i)(j0))
|
||||
s_vec := intrinsics.unaligned_load((^x86.__m128i)(s))
|
||||
s_vec = x86._mm_xor_si128(s_vec, j0_vec)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(s), s_vec)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,sse4.1,aes")
|
||||
gctr_hw :: proc(
|
||||
ctx: ^Context_Impl_Hardware,
|
||||
dst: []byte,
|
||||
s: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
src: []byte,
|
||||
h: ^[_aes.GHASH_KEY_SIZE]byte,
|
||||
iv: ^[_aes.GHASH_BLOCK_SIZE]byte,
|
||||
is_seal: bool,
|
||||
) #no_bounds_check {
|
||||
sks: [15]x86.__m128i = ---
|
||||
for i in 0 ..= ctx._num_rounds {
|
||||
sks[i] = intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[i]))
|
||||
}
|
||||
|
||||
// Setup the counter block
|
||||
ctr_blk := intrinsics.unaligned_load((^x86.__m128i)(iv))
|
||||
ctr := endian.unchecked_get_u32be(iv[GCM_IV_SIZE:]) + 1
|
||||
|
||||
src, dst := src, dst
|
||||
|
||||
// Note: Instead of doing GHASH and CTR separately, it is more
|
||||
// performant to interleave (stitch) the two operations together.
|
||||
// This results in an unreadable mess, so we opt for simplicity
|
||||
// as performance is adequate.
|
||||
|
||||
blks: [CTR_STRIDE_HW]x86.__m128i = ---
|
||||
nr_blocks := len(src) / BLOCK_SIZE
|
||||
for nr_blocks >= CTR_STRIDE_HW {
|
||||
if !is_seal {
|
||||
hw_intel.ghash(s[:], h[:], src[:CTR_STRIDE_BYTES_HW])
|
||||
}
|
||||
|
||||
#unroll for i in 0 ..< CTR_STRIDE_HW {
|
||||
blks[i], ctr = hw_inc_ctr32(&ctr_blk, ctr)
|
||||
}
|
||||
|
||||
#unroll for i in 0 ..< CTR_STRIDE_HW {
|
||||
blks[i] = x86._mm_xor_si128(blks[i], sks[0])
|
||||
}
|
||||
#unroll for i in 1 ..= 9 {
|
||||
#unroll for j in 0 ..< CTR_STRIDE_HW {
|
||||
blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
|
||||
}
|
||||
}
|
||||
switch ctx._num_rounds {
|
||||
case _aes.ROUNDS_128:
|
||||
#unroll for i in 0 ..< CTR_STRIDE_HW {
|
||||
blks[i] = x86._mm_aesenclast_si128(blks[i], sks[10])
|
||||
}
|
||||
case _aes.ROUNDS_192:
|
||||
#unroll for i in 10 ..= 11 {
|
||||
#unroll for j in 0 ..< CTR_STRIDE_HW {
|
||||
blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
|
||||
}
|
||||
}
|
||||
#unroll for i in 0 ..< CTR_STRIDE_HW {
|
||||
blks[i] = x86._mm_aesenclast_si128(blks[i], sks[12])
|
||||
}
|
||||
case _aes.ROUNDS_256:
|
||||
#unroll for i in 10 ..= 13 {
|
||||
#unroll for j in 0 ..< CTR_STRIDE_HW {
|
||||
blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
|
||||
}
|
||||
}
|
||||
#unroll for i in 0 ..< CTR_STRIDE_HW {
|
||||
blks[i] = x86._mm_aesenclast_si128(blks[i], sks[14])
|
||||
}
|
||||
}
|
||||
|
||||
xor_blocks_hw(dst, src, blks[:])
|
||||
|
||||
if is_seal {
|
||||
hw_intel.ghash(s[:], h[:], dst[:CTR_STRIDE_BYTES_HW])
|
||||
}
|
||||
|
||||
src = src[CTR_STRIDE_BYTES_HW:]
|
||||
dst = dst[CTR_STRIDE_BYTES_HW:]
|
||||
nr_blocks -= CTR_STRIDE_HW
|
||||
}
|
||||
|
||||
// Handle the remainder.
|
||||
for n := len(src); n > 0; {
|
||||
l := min(n, BLOCK_SIZE)
|
||||
if !is_seal {
|
||||
hw_intel.ghash(s[:], h[:], src[:l])
|
||||
}
|
||||
|
||||
blks[0], ctr = hw_inc_ctr32(&ctr_blk, ctr)
|
||||
|
||||
blks[0] = x86._mm_xor_si128(blks[0], sks[0])
|
||||
#unroll for i in 1 ..= 9 {
|
||||
blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
|
||||
}
|
||||
switch ctx._num_rounds {
|
||||
case _aes.ROUNDS_128:
|
||||
blks[0] = x86._mm_aesenclast_si128(blks[0], sks[10])
|
||||
case _aes.ROUNDS_192:
|
||||
#unroll for i in 10 ..= 11 {
|
||||
blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
|
||||
}
|
||||
blks[0] = x86._mm_aesenclast_si128(blks[0], sks[12])
|
||||
case _aes.ROUNDS_256:
|
||||
#unroll for i in 10 ..= 13 {
|
||||
blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
|
||||
}
|
||||
blks[0] = x86._mm_aesenclast_si128(blks[0], sks[14])
|
||||
}
|
||||
|
||||
if l == BLOCK_SIZE {
|
||||
xor_blocks_hw(dst, src, blks[:1])
|
||||
} else {
|
||||
blk: [BLOCK_SIZE]byte
|
||||
copy(blk[:], src)
|
||||
xor_blocks_hw(blk[:], blk[:], blks[:1])
|
||||
copy(dst, blk[:l])
|
||||
}
|
||||
if is_seal {
|
||||
hw_intel.ghash(s[:], h[:], dst[:l])
|
||||
}
|
||||
|
||||
dst = dst[l:]
|
||||
src = src[l:]
|
||||
n -= l
|
||||
}
|
||||
|
||||
mem.zero_explicit(&blks, size_of(blks))
|
||||
mem.zero_explicit(&sks, size_of(sks))
|
||||
}
|
||||
|
||||
// BUG: Sticking this in gctr_hw (like the other implementations) crashes
|
||||
// the compiler.
|
||||
//
|
||||
// src/check_expr.cpp(7892): Assertion Failure: `c->curr_proc_decl->entity`
|
||||
@(private = "file", enable_target_feature = "sse4.1")
|
||||
hw_inc_ctr32 :: #force_inline proc "contextless" (src: ^x86.__m128i, ctr: u32) -> (x86.__m128i, u32) {
|
||||
ret := x86._mm_insert_epi32(src^, i32(intrinsics.byte_swap(ctr)), 3)
|
||||
return ret, ctr + 1
|
||||
}
|
||||
@@ -10,6 +10,10 @@ Context_Impl :: union {
|
||||
Context_Impl_Hardware,
|
||||
}
|
||||
|
||||
// DEFAULT_IMPLEMENTATION is the implementation that will be used by
|
||||
// default if possible.
|
||||
DEFAULT_IMPLEMENTATION :: Implementation.Hardware
|
||||
|
||||
// Implementation is an AES implementation. Most callers will not need
|
||||
// to use this as the package will automatically select the most performant
|
||||
// implementation available (See `is_hardware_accelerated()`).
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
//+build !amd64
|
||||
package aes
|
||||
|
||||
@(private = "file")
|
||||
@@ -33,11 +34,11 @@ ctr_blocks_hw :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) {
|
||||
}
|
||||
|
||||
@(private)
|
||||
gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, nonce, aad, plaintext: []byte) {
|
||||
gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, iv, aad, plaintext: []byte) {
|
||||
panic(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
@(private)
|
||||
gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
|
||||
gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
panic(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
//+build amd64
|
||||
package aes
|
||||
|
||||
import "core:crypto/_aes/hw_intel"
|
||||
|
||||
// is_hardware_accelerated returns true iff hardware accelerated AES
|
||||
// is supported.
|
||||
is_hardware_accelerated :: proc "contextless" () -> bool {
|
||||
return hw_intel.is_supported()
|
||||
}
|
||||
|
||||
@(private)
|
||||
Context_Impl_Hardware :: hw_intel.Context
|
||||
|
||||
@(private, enable_target_feature = "sse2,aes")
|
||||
init_impl_hw :: proc(ctx: ^Context_Impl_Hardware, key: []byte) {
|
||||
hw_intel.init(ctx, key)
|
||||
}
|
||||
@@ -7,134 +7,84 @@ See:
|
||||
*/
|
||||
package chacha20
|
||||
|
||||
import "core:encoding/endian"
|
||||
import "core:math/bits"
|
||||
import "core:bytes"
|
||||
import "core:crypto/_chacha20"
|
||||
import "core:mem"
|
||||
|
||||
// KEY_SIZE is the (X)ChaCha20 key size in bytes.
|
||||
KEY_SIZE :: 32
|
||||
// NONCE_SIZE is the ChaCha20 nonce size in bytes.
|
||||
NONCE_SIZE :: 12
|
||||
// XNONCE_SIZE is the XChaCha20 nonce size in bytes.
|
||||
XNONCE_SIZE :: 24
|
||||
|
||||
@(private)
|
||||
_MAX_CTR_IETF :: 0xffffffff
|
||||
|
||||
@(private)
|
||||
_BLOCK_SIZE :: 64
|
||||
@(private)
|
||||
_STATE_SIZE_U32 :: 16
|
||||
@(private)
|
||||
_ROUNDS :: 20
|
||||
|
||||
@(private)
|
||||
_SIGMA_0: u32 : 0x61707865
|
||||
@(private)
|
||||
_SIGMA_1: u32 : 0x3320646e
|
||||
@(private)
|
||||
_SIGMA_2: u32 : 0x79622d32
|
||||
@(private)
|
||||
_SIGMA_3: u32 : 0x6b206574
|
||||
KEY_SIZE :: _chacha20.KEY_SIZE
|
||||
// IV_SIZE is the ChaCha20 IV size in bytes.
|
||||
IV_SIZE :: _chacha20.IV_SIZE
|
||||
// XIV_SIZE is the XChaCha20 IV size in bytes.
|
||||
XIV_SIZE :: _chacha20.XIV_SIZE
|
||||
|
||||
// Context is a ChaCha20 or XChaCha20 instance.
|
||||
Context :: struct {
|
||||
_s: [_STATE_SIZE_U32]u32,
|
||||
_buffer: [_BLOCK_SIZE]byte,
|
||||
_off: int,
|
||||
_is_ietf_flavor: bool,
|
||||
_is_initialized: bool,
|
||||
_state: _chacha20.Context,
|
||||
_impl: Implementation,
|
||||
}
|
||||
|
||||
// init inititializes a Context for ChaCha20 or XChaCha20 with the provided
|
||||
// key and nonce.
|
||||
init :: proc(ctx: ^Context, key, nonce: []byte) {
|
||||
// key and iv.
|
||||
init :: proc(ctx: ^Context, key, iv: []byte, impl := DEFAULT_IMPLEMENTATION) {
|
||||
if len(key) != KEY_SIZE {
|
||||
panic("crypto/chacha20: invalid ChaCha20 key size")
|
||||
panic("crypto/chacha20: invalid (X)ChaCha20 key size")
|
||||
}
|
||||
if n_len := len(nonce); n_len != NONCE_SIZE && n_len != XNONCE_SIZE {
|
||||
panic("crypto/chacha20: invalid (X)ChaCha20 nonce size")
|
||||
if l := len(iv); l != IV_SIZE && l != XIV_SIZE {
|
||||
panic("crypto/chacha20: invalid (X)ChaCha20 IV size")
|
||||
}
|
||||
|
||||
k, n := key, nonce
|
||||
k, n := key, iv
|
||||
|
||||
// Derive the XChaCha20 subkey and sub-nonce via HChaCha20.
|
||||
is_xchacha := len(nonce) == XNONCE_SIZE
|
||||
init_impl(ctx, impl)
|
||||
|
||||
is_xchacha := len(iv) == XIV_SIZE
|
||||
if is_xchacha {
|
||||
sub_key := ctx._buffer[:KEY_SIZE]
|
||||
_hchacha20(sub_key, k, n)
|
||||
sub_iv: [IV_SIZE]byte
|
||||
sub_key := ctx._state._buffer[:KEY_SIZE]
|
||||
hchacha20(sub_key, k, n, ctx._impl)
|
||||
k = sub_key
|
||||
n = n[16:24]
|
||||
copy(sub_iv[4:], n[16:])
|
||||
n = sub_iv[:]
|
||||
}
|
||||
|
||||
ctx._s[0] = _SIGMA_0
|
||||
ctx._s[1] = _SIGMA_1
|
||||
ctx._s[2] = _SIGMA_2
|
||||
ctx._s[3] = _SIGMA_3
|
||||
ctx._s[4] = endian.unchecked_get_u32le(k[0:4])
|
||||
ctx._s[5] = endian.unchecked_get_u32le(k[4:8])
|
||||
ctx._s[6] = endian.unchecked_get_u32le(k[8:12])
|
||||
ctx._s[7] = endian.unchecked_get_u32le(k[12:16])
|
||||
ctx._s[8] = endian.unchecked_get_u32le(k[16:20])
|
||||
ctx._s[9] = endian.unchecked_get_u32le(k[20:24])
|
||||
ctx._s[10] = endian.unchecked_get_u32le(k[24:28])
|
||||
ctx._s[11] = endian.unchecked_get_u32le(k[28:32])
|
||||
ctx._s[12] = 0
|
||||
if !is_xchacha {
|
||||
ctx._s[13] = endian.unchecked_get_u32le(n[0:4])
|
||||
ctx._s[14] = endian.unchecked_get_u32le(n[4:8])
|
||||
ctx._s[15] = endian.unchecked_get_u32le(n[8:12])
|
||||
} else {
|
||||
ctx._s[13] = 0
|
||||
ctx._s[14] = endian.unchecked_get_u32le(n[0:4])
|
||||
ctx._s[15] = endian.unchecked_get_u32le(n[4:8])
|
||||
_chacha20.init(&ctx._state, k, n, is_xchacha)
|
||||
|
||||
if is_xchacha {
|
||||
// The sub-key is stored in the keystream buffer. While
|
||||
// this will be overwritten in most circumstances, explicitly
|
||||
// clear it out early.
|
||||
mem.zero_explicit(&ctx._buffer, KEY_SIZE)
|
||||
mem.zero_explicit(&ctx._state._buffer, KEY_SIZE)
|
||||
}
|
||||
|
||||
ctx._off = _BLOCK_SIZE
|
||||
ctx._is_ietf_flavor = !is_xchacha
|
||||
ctx._is_initialized = true
|
||||
}
|
||||
|
||||
// seek seeks the (X)ChaCha20 stream counter to the specified block.
|
||||
seek :: proc(ctx: ^Context, block_nr: u64) {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
if ctx._is_ietf_flavor {
|
||||
if block_nr > _MAX_CTR_IETF {
|
||||
panic("crypto/chacha20: attempted to seek past maximum counter")
|
||||
}
|
||||
} else {
|
||||
ctx._s[13] = u32(block_nr >> 32)
|
||||
}
|
||||
ctx._s[12] = u32(block_nr)
|
||||
ctx._off = _BLOCK_SIZE
|
||||
_chacha20.seek(&ctx._state, block_nr)
|
||||
}
|
||||
|
||||
// xor_bytes XORs each byte in src with bytes taken from the (X)ChaCha20
|
||||
// keystream, and writes the resulting output to dst. Dst and src MUST
|
||||
// alias exactly or not at all.
|
||||
xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
|
||||
// TODO: Enforcing that dst and src alias exactly or not at all
|
||||
// is a good idea, though odd aliasing should be extremely uncommon.
|
||||
assert(ctx._state._is_initialized)
|
||||
|
||||
src, dst := src, dst
|
||||
if dst_len := len(dst); dst_len < len(src) {
|
||||
src = src[:dst_len]
|
||||
}
|
||||
|
||||
for remaining := len(src); remaining > 0; {
|
||||
if bytes.alias_inexactly(dst, src) {
|
||||
panic("crypto/chacha20: dst and src alias inexactly")
|
||||
}
|
||||
|
||||
st := &ctx._state
|
||||
#no_bounds_check for remaining := len(src); remaining > 0; {
|
||||
// Process multiple blocks at once
|
||||
if ctx._off == _BLOCK_SIZE {
|
||||
if nr_blocks := remaining / _BLOCK_SIZE; nr_blocks > 0 {
|
||||
direct_bytes := nr_blocks * _BLOCK_SIZE
|
||||
_do_blocks(ctx, dst, src, nr_blocks)
|
||||
if st._off == _chacha20.BLOCK_SIZE {
|
||||
if nr_blocks := remaining / _chacha20.BLOCK_SIZE; nr_blocks > 0 {
|
||||
direct_bytes := nr_blocks * _chacha20.BLOCK_SIZE
|
||||
stream_blocks(ctx, dst, src, nr_blocks)
|
||||
remaining -= direct_bytes
|
||||
if remaining == 0 {
|
||||
return
|
||||
@@ -145,17 +95,17 @@ xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
|
||||
|
||||
// If there is a partial block, generate and buffer 1 block
|
||||
// worth of keystream.
|
||||
_do_blocks(ctx, ctx._buffer[:], nil, 1)
|
||||
ctx._off = 0
|
||||
stream_blocks(ctx, st._buffer[:], nil, 1)
|
||||
st._off = 0
|
||||
}
|
||||
|
||||
// Process partial blocks from the buffered keystream.
|
||||
to_xor := min(_BLOCK_SIZE - ctx._off, remaining)
|
||||
buffered_keystream := ctx._buffer[ctx._off:]
|
||||
to_xor := min(_chacha20.BLOCK_SIZE - st._off, remaining)
|
||||
buffered_keystream := st._buffer[st._off:]
|
||||
for i := 0; i < to_xor; i = i + 1 {
|
||||
dst[i] = buffered_keystream[i] ~ src[i]
|
||||
}
|
||||
ctx._off += to_xor
|
||||
st._off += to_xor
|
||||
dst = dst[to_xor:]
|
||||
src = src[to_xor:]
|
||||
remaining -= to_xor
|
||||
@@ -164,15 +114,15 @@ xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
|
||||
|
||||
// keystream_bytes fills dst with the raw (X)ChaCha20 keystream output.
|
||||
keystream_bytes :: proc(ctx: ^Context, dst: []byte) {
|
||||
assert(ctx._is_initialized)
|
||||
assert(ctx._state._is_initialized)
|
||||
|
||||
dst := dst
|
||||
for remaining := len(dst); remaining > 0; {
|
||||
dst, st := dst, &ctx._state
|
||||
#no_bounds_check for remaining := len(dst); remaining > 0; {
|
||||
// Process multiple blocks at once
|
||||
if ctx._off == _BLOCK_SIZE {
|
||||
if nr_blocks := remaining / _BLOCK_SIZE; nr_blocks > 0 {
|
||||
direct_bytes := nr_blocks * _BLOCK_SIZE
|
||||
_do_blocks(ctx, dst, nil, nr_blocks)
|
||||
if st._off == _chacha20.BLOCK_SIZE {
|
||||
if nr_blocks := remaining / _chacha20.BLOCK_SIZE; nr_blocks > 0 {
|
||||
direct_bytes := nr_blocks * _chacha20.BLOCK_SIZE
|
||||
stream_blocks(ctx, dst, nil, nr_blocks)
|
||||
remaining -= direct_bytes
|
||||
if remaining == 0 {
|
||||
return
|
||||
@@ -182,15 +132,15 @@ keystream_bytes :: proc(ctx: ^Context, dst: []byte) {
|
||||
|
||||
// If there is a partial block, generate and buffer 1 block
|
||||
// worth of keystream.
|
||||
_do_blocks(ctx, ctx._buffer[:], nil, 1)
|
||||
ctx._off = 0
|
||||
stream_blocks(ctx, st._buffer[:], nil, 1)
|
||||
st._off = 0
|
||||
}
|
||||
|
||||
// Process partial blocks from the buffered keystream.
|
||||
to_copy := min(_BLOCK_SIZE - ctx._off, remaining)
|
||||
buffered_keystream := ctx._buffer[ctx._off:]
|
||||
to_copy := min(_chacha20.BLOCK_SIZE - st._off, remaining)
|
||||
buffered_keystream := st._buffer[st._off:]
|
||||
copy(dst[:to_copy], buffered_keystream[:to_copy])
|
||||
ctx._off += to_copy
|
||||
st._off += to_copy
|
||||
dst = dst[to_copy:]
|
||||
remaining -= to_copy
|
||||
}
|
||||
@@ -199,366 +149,5 @@ keystream_bytes :: proc(ctx: ^Context, dst: []byte) {
|
||||
// reset sanitizes the Context. The Context must be re-initialized to
|
||||
// be used again.
|
||||
reset :: proc(ctx: ^Context) {
|
||||
mem.zero_explicit(&ctx._s, size_of(ctx._s))
|
||||
mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
|
||||
|
||||
ctx._is_initialized = false
|
||||
}
|
||||
|
||||
@(private)
|
||||
_do_blocks :: proc(ctx: ^Context, dst, src: []byte, nr_blocks: int) {
|
||||
// Enforce the maximum consumed keystream per nonce.
|
||||
//
|
||||
// While all modern "standard" definitions of ChaCha20 use
|
||||
// the IETF 32-bit counter, for XChaCha20 most common
|
||||
// implementations allow for a 64-bit counter.
|
||||
//
|
||||
// Honestly, the answer here is "use a MRAE primitive", but
|
||||
// go with common practice in the case of XChaCha20.
|
||||
if ctx._is_ietf_flavor {
|
||||
if u64(ctx._s[12]) + u64(nr_blocks) > 0xffffffff {
|
||||
panic("crypto/chacha20: maximum ChaCha20 keystream per nonce reached")
|
||||
}
|
||||
} else {
|
||||
ctr := (u64(ctx._s[13]) << 32) | u64(ctx._s[12])
|
||||
if _, carry := bits.add_u64(ctr, u64(nr_blocks), 0); carry != 0 {
|
||||
panic("crypto/chacha20: maximum XChaCha20 keystream per nonce reached")
|
||||
}
|
||||
}
|
||||
|
||||
dst, src := dst, src
|
||||
x := &ctx._s
|
||||
for n := 0; n < nr_blocks; n = n + 1 {
|
||||
x0, x1, x2, x3 := _SIGMA_0, _SIGMA_1, _SIGMA_2, _SIGMA_3
|
||||
x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]
|
||||
|
||||
for i := _ROUNDS; i > 0; i = i - 2 {
|
||||
// Even when forcing inlining manually inlining all of
|
||||
// these is decently faster.
|
||||
|
||||
// quarterround(x, 0, 4, 8, 12)
|
||||
x0 += x4
|
||||
x12 ~= x0
|
||||
x12 = bits.rotate_left32(x12, 16)
|
||||
x8 += x12
|
||||
x4 ~= x8
|
||||
x4 = bits.rotate_left32(x4, 12)
|
||||
x0 += x4
|
||||
x12 ~= x0
|
||||
x12 = bits.rotate_left32(x12, 8)
|
||||
x8 += x12
|
||||
x4 ~= x8
|
||||
x4 = bits.rotate_left32(x4, 7)
|
||||
|
||||
// quarterround(x, 1, 5, 9, 13)
|
||||
x1 += x5
|
||||
x13 ~= x1
|
||||
x13 = bits.rotate_left32(x13, 16)
|
||||
x9 += x13
|
||||
x5 ~= x9
|
||||
x5 = bits.rotate_left32(x5, 12)
|
||||
x1 += x5
|
||||
x13 ~= x1
|
||||
x13 = bits.rotate_left32(x13, 8)
|
||||
x9 += x13
|
||||
x5 ~= x9
|
||||
x5 = bits.rotate_left32(x5, 7)
|
||||
|
||||
// quarterround(x, 2, 6, 10, 14)
|
||||
x2 += x6
|
||||
x14 ~= x2
|
||||
x14 = bits.rotate_left32(x14, 16)
|
||||
x10 += x14
|
||||
x6 ~= x10
|
||||
x6 = bits.rotate_left32(x6, 12)
|
||||
x2 += x6
|
||||
x14 ~= x2
|
||||
x14 = bits.rotate_left32(x14, 8)
|
||||
x10 += x14
|
||||
x6 ~= x10
|
||||
x6 = bits.rotate_left32(x6, 7)
|
||||
|
||||
// quarterround(x, 3, 7, 11, 15)
|
||||
x3 += x7
|
||||
x15 ~= x3
|
||||
x15 = bits.rotate_left32(x15, 16)
|
||||
x11 += x15
|
||||
x7 ~= x11
|
||||
x7 = bits.rotate_left32(x7, 12)
|
||||
x3 += x7
|
||||
x15 ~= x3
|
||||
x15 = bits.rotate_left32(x15, 8)
|
||||
x11 += x15
|
||||
x7 ~= x11
|
||||
x7 = bits.rotate_left32(x7, 7)
|
||||
|
||||
// quarterround(x, 0, 5, 10, 15)
|
||||
x0 += x5
|
||||
x15 ~= x0
|
||||
x15 = bits.rotate_left32(x15, 16)
|
||||
x10 += x15
|
||||
x5 ~= x10
|
||||
x5 = bits.rotate_left32(x5, 12)
|
||||
x0 += x5
|
||||
x15 ~= x0
|
||||
x15 = bits.rotate_left32(x15, 8)
|
||||
x10 += x15
|
||||
x5 ~= x10
|
||||
x5 = bits.rotate_left32(x5, 7)
|
||||
|
||||
// quarterround(x, 1, 6, 11, 12)
|
||||
x1 += x6
|
||||
x12 ~= x1
|
||||
x12 = bits.rotate_left32(x12, 16)
|
||||
x11 += x12
|
||||
x6 ~= x11
|
||||
x6 = bits.rotate_left32(x6, 12)
|
||||
x1 += x6
|
||||
x12 ~= x1
|
||||
x12 = bits.rotate_left32(x12, 8)
|
||||
x11 += x12
|
||||
x6 ~= x11
|
||||
x6 = bits.rotate_left32(x6, 7)
|
||||
|
||||
// quarterround(x, 2, 7, 8, 13)
|
||||
x2 += x7
|
||||
x13 ~= x2
|
||||
x13 = bits.rotate_left32(x13, 16)
|
||||
x8 += x13
|
||||
x7 ~= x8
|
||||
x7 = bits.rotate_left32(x7, 12)
|
||||
x2 += x7
|
||||
x13 ~= x2
|
||||
x13 = bits.rotate_left32(x13, 8)
|
||||
x8 += x13
|
||||
x7 ~= x8
|
||||
x7 = bits.rotate_left32(x7, 7)
|
||||
|
||||
// quarterround(x, 3, 4, 9, 14)
|
||||
x3 += x4
|
||||
x14 ~= x3
|
||||
x14 = bits.rotate_left32(x14, 16)
|
||||
x9 += x14
|
||||
x4 ~= x9
|
||||
x4 = bits.rotate_left32(x4, 12)
|
||||
x3 += x4
|
||||
x14 ~= x3
|
||||
x14 = bits.rotate_left32(x14, 8)
|
||||
x9 += x14
|
||||
x4 ~= x9
|
||||
x4 = bits.rotate_left32(x4, 7)
|
||||
}
|
||||
|
||||
x0 += _SIGMA_0
|
||||
x1 += _SIGMA_1
|
||||
x2 += _SIGMA_2
|
||||
x3 += _SIGMA_3
|
||||
x4 += x[4]
|
||||
x5 += x[5]
|
||||
x6 += x[6]
|
||||
x7 += x[7]
|
||||
x8 += x[8]
|
||||
x9 += x[9]
|
||||
x10 += x[10]
|
||||
x11 += x[11]
|
||||
x12 += x[12]
|
||||
x13 += x[13]
|
||||
x14 += x[14]
|
||||
x15 += x[15]
|
||||
|
||||
// While the "correct" answer to getting more performance out of
|
||||
// this is "use vector operations", support for that is currently
|
||||
// a work in progress/to be designed.
|
||||
//
|
||||
// In the meantime:
|
||||
// - The caller(s) ensure that src/dst are valid.
|
||||
// - The compiler knows if the target is picky about alignment.
|
||||
|
||||
#no_bounds_check {
|
||||
if src != nil {
|
||||
endian.unchecked_put_u32le(dst[0:4], endian.unchecked_get_u32le(src[0:4]) ~ x0)
|
||||
endian.unchecked_put_u32le(dst[4:8], endian.unchecked_get_u32le(src[4:8]) ~ x1)
|
||||
endian.unchecked_put_u32le(dst[8:12], endian.unchecked_get_u32le(src[8:12]) ~ x2)
|
||||
endian.unchecked_put_u32le(dst[12:16], endian.unchecked_get_u32le(src[12:16]) ~ x3)
|
||||
endian.unchecked_put_u32le(dst[16:20], endian.unchecked_get_u32le(src[16:20]) ~ x4)
|
||||
endian.unchecked_put_u32le(dst[20:24], endian.unchecked_get_u32le(src[20:24]) ~ x5)
|
||||
endian.unchecked_put_u32le(dst[24:28], endian.unchecked_get_u32le(src[24:28]) ~ x6)
|
||||
endian.unchecked_put_u32le(dst[28:32], endian.unchecked_get_u32le(src[28:32]) ~ x7)
|
||||
endian.unchecked_put_u32le(dst[32:36], endian.unchecked_get_u32le(src[32:36]) ~ x8)
|
||||
endian.unchecked_put_u32le(dst[36:40], endian.unchecked_get_u32le(src[36:40]) ~ x9)
|
||||
endian.unchecked_put_u32le(dst[40:44], endian.unchecked_get_u32le(src[40:44]) ~ x10)
|
||||
endian.unchecked_put_u32le(dst[44:48], endian.unchecked_get_u32le(src[44:48]) ~ x11)
|
||||
endian.unchecked_put_u32le(dst[48:52], endian.unchecked_get_u32le(src[48:52]) ~ x12)
|
||||
endian.unchecked_put_u32le(dst[52:56], endian.unchecked_get_u32le(src[52:56]) ~ x13)
|
||||
endian.unchecked_put_u32le(dst[56:60], endian.unchecked_get_u32le(src[56:60]) ~ x14)
|
||||
endian.unchecked_put_u32le(dst[60:64], endian.unchecked_get_u32le(src[60:64]) ~ x15)
|
||||
src = src[_BLOCK_SIZE:]
|
||||
} else {
|
||||
endian.unchecked_put_u32le(dst[0:4], x0)
|
||||
endian.unchecked_put_u32le(dst[4:8], x1)
|
||||
endian.unchecked_put_u32le(dst[8:12], x2)
|
||||
endian.unchecked_put_u32le(dst[12:16], x3)
|
||||
endian.unchecked_put_u32le(dst[16:20], x4)
|
||||
endian.unchecked_put_u32le(dst[20:24], x5)
|
||||
endian.unchecked_put_u32le(dst[24:28], x6)
|
||||
endian.unchecked_put_u32le(dst[28:32], x7)
|
||||
endian.unchecked_put_u32le(dst[32:36], x8)
|
||||
endian.unchecked_put_u32le(dst[36:40], x9)
|
||||
endian.unchecked_put_u32le(dst[40:44], x10)
|
||||
endian.unchecked_put_u32le(dst[44:48], x11)
|
||||
endian.unchecked_put_u32le(dst[48:52], x12)
|
||||
endian.unchecked_put_u32le(dst[52:56], x13)
|
||||
endian.unchecked_put_u32le(dst[56:60], x14)
|
||||
endian.unchecked_put_u32le(dst[60:64], x15)
|
||||
}
|
||||
dst = dst[_BLOCK_SIZE:]
|
||||
}
|
||||
|
||||
// Increment the counter. Overflow checking is done upon
|
||||
// entry into the routine, so a 64-bit increment safely
|
||||
// covers both cases.
|
||||
new_ctr := ((u64(ctx._s[13]) << 32) | u64(ctx._s[12])) + 1
|
||||
x[12] = u32(new_ctr)
|
||||
x[13] = u32(new_ctr >> 32)
|
||||
}
|
||||
}
|
||||
|
||||
@(private)
|
||||
_hchacha20 :: proc "contextless" (dst, key, nonce: []byte) {
|
||||
x0, x1, x2, x3 := _SIGMA_0, _SIGMA_1, _SIGMA_2, _SIGMA_3
|
||||
x4 := endian.unchecked_get_u32le(key[0:4])
|
||||
x5 := endian.unchecked_get_u32le(key[4:8])
|
||||
x6 := endian.unchecked_get_u32le(key[8:12])
|
||||
x7 := endian.unchecked_get_u32le(key[12:16])
|
||||
x8 := endian.unchecked_get_u32le(key[16:20])
|
||||
x9 := endian.unchecked_get_u32le(key[20:24])
|
||||
x10 := endian.unchecked_get_u32le(key[24:28])
|
||||
x11 := endian.unchecked_get_u32le(key[28:32])
|
||||
x12 := endian.unchecked_get_u32le(nonce[0:4])
|
||||
x13 := endian.unchecked_get_u32le(nonce[4:8])
|
||||
x14 := endian.unchecked_get_u32le(nonce[8:12])
|
||||
x15 := endian.unchecked_get_u32le(nonce[12:16])
|
||||
|
||||
for i := _ROUNDS; i > 0; i = i - 2 {
|
||||
// quarterround(x, 0, 4, 8, 12)
|
||||
x0 += x4
|
||||
x12 ~= x0
|
||||
x12 = bits.rotate_left32(x12, 16)
|
||||
x8 += x12
|
||||
x4 ~= x8
|
||||
x4 = bits.rotate_left32(x4, 12)
|
||||
x0 += x4
|
||||
x12 ~= x0
|
||||
x12 = bits.rotate_left32(x12, 8)
|
||||
x8 += x12
|
||||
x4 ~= x8
|
||||
x4 = bits.rotate_left32(x4, 7)
|
||||
|
||||
// quarterround(x, 1, 5, 9, 13)
|
||||
x1 += x5
|
||||
x13 ~= x1
|
||||
x13 = bits.rotate_left32(x13, 16)
|
||||
x9 += x13
|
||||
x5 ~= x9
|
||||
x5 = bits.rotate_left32(x5, 12)
|
||||
x1 += x5
|
||||
x13 ~= x1
|
||||
x13 = bits.rotate_left32(x13, 8)
|
||||
x9 += x13
|
||||
x5 ~= x9
|
||||
x5 = bits.rotate_left32(x5, 7)
|
||||
|
||||
// quarterround(x, 2, 6, 10, 14)
|
||||
x2 += x6
|
||||
x14 ~= x2
|
||||
x14 = bits.rotate_left32(x14, 16)
|
||||
x10 += x14
|
||||
x6 ~= x10
|
||||
x6 = bits.rotate_left32(x6, 12)
|
||||
x2 += x6
|
||||
x14 ~= x2
|
||||
x14 = bits.rotate_left32(x14, 8)
|
||||
x10 += x14
|
||||
x6 ~= x10
|
||||
x6 = bits.rotate_left32(x6, 7)
|
||||
|
||||
// quarterround(x, 3, 7, 11, 15)
|
||||
x3 += x7
|
||||
x15 ~= x3
|
||||
x15 = bits.rotate_left32(x15, 16)
|
||||
x11 += x15
|
||||
x7 ~= x11
|
||||
x7 = bits.rotate_left32(x7, 12)
|
||||
x3 += x7
|
||||
x15 ~= x3
|
||||
x15 = bits.rotate_left32(x15, 8)
|
||||
x11 += x15
|
||||
x7 ~= x11
|
||||
x7 = bits.rotate_left32(x7, 7)
|
||||
|
||||
// quarterround(x, 0, 5, 10, 15)
|
||||
x0 += x5
|
||||
x15 ~= x0
|
||||
x15 = bits.rotate_left32(x15, 16)
|
||||
x10 += x15
|
||||
x5 ~= x10
|
||||
x5 = bits.rotate_left32(x5, 12)
|
||||
x0 += x5
|
||||
x15 ~= x0
|
||||
x15 = bits.rotate_left32(x15, 8)
|
||||
x10 += x15
|
||||
x5 ~= x10
|
||||
x5 = bits.rotate_left32(x5, 7)
|
||||
|
||||
// quarterround(x, 1, 6, 11, 12)
|
||||
x1 += x6
|
||||
x12 ~= x1
|
||||
x12 = bits.rotate_left32(x12, 16)
|
||||
x11 += x12
|
||||
x6 ~= x11
|
||||
x6 = bits.rotate_left32(x6, 12)
|
||||
x1 += x6
|
||||
x12 ~= x1
|
||||
x12 = bits.rotate_left32(x12, 8)
|
||||
x11 += x12
|
||||
x6 ~= x11
|
||||
x6 = bits.rotate_left32(x6, 7)
|
||||
|
||||
// quarterround(x, 2, 7, 8, 13)
|
||||
x2 += x7
|
||||
x13 ~= x2
|
||||
x13 = bits.rotate_left32(x13, 16)
|
||||
x8 += x13
|
||||
x7 ~= x8
|
||||
x7 = bits.rotate_left32(x7, 12)
|
||||
x2 += x7
|
||||
x13 ~= x2
|
||||
x13 = bits.rotate_left32(x13, 8)
|
||||
x8 += x13
|
||||
x7 ~= x8
|
||||
x7 = bits.rotate_left32(x7, 7)
|
||||
|
||||
// quarterround(x, 3, 4, 9, 14)
|
||||
x3 += x4
|
||||
x14 ~= x3
|
||||
x14 = bits.rotate_left32(x14, 16)
|
||||
x9 += x14
|
||||
x4 ~= x9
|
||||
x4 = bits.rotate_left32(x4, 12)
|
||||
x3 += x4
|
||||
x14 ~= x3
|
||||
x14 = bits.rotate_left32(x14, 8)
|
||||
x9 += x14
|
||||
x4 ~= x9
|
||||
x4 = bits.rotate_left32(x4, 7)
|
||||
}
|
||||
|
||||
endian.unchecked_put_u32le(dst[0:4], x0)
|
||||
endian.unchecked_put_u32le(dst[4:8], x1)
|
||||
endian.unchecked_put_u32le(dst[8:12], x2)
|
||||
endian.unchecked_put_u32le(dst[12:16], x3)
|
||||
endian.unchecked_put_u32le(dst[16:20], x12)
|
||||
endian.unchecked_put_u32le(dst[20:24], x13)
|
||||
endian.unchecked_put_u32le(dst[24:28], x14)
|
||||
endian.unchecked_put_u32le(dst[28:32], x15)
|
||||
_chacha20.reset(&ctx._state)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,56 @@
|
||||
package chacha20
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/_chacha20/ref"
|
||||
import "core:crypto/_chacha20/simd128"
|
||||
import "core:crypto/_chacha20/simd256"
|
||||
|
||||
// DEFAULT_IMPLEMENTATION is the implementation that will be used by
|
||||
// default if possible.
|
||||
DEFAULT_IMPLEMENTATION :: Implementation.Simd256
|
||||
|
||||
// Implementation is a ChaCha20 implementation. Most callers will not need
|
||||
// to use this as the package will automatically select the most performant
|
||||
// implementation available.
|
||||
Implementation :: enum {
|
||||
Portable,
|
||||
Simd128,
|
||||
Simd256,
|
||||
}
|
||||
|
||||
@(private)
|
||||
init_impl :: proc(ctx: ^Context, impl: Implementation) {
|
||||
impl := impl
|
||||
if impl == .Simd256 && !simd256.is_performant() {
|
||||
impl = .Simd128
|
||||
}
|
||||
if impl == .Simd128 && !simd128.is_performant() {
|
||||
impl = .Portable
|
||||
}
|
||||
|
||||
ctx._impl = impl
|
||||
}
|
||||
|
||||
@(private)
|
||||
stream_blocks :: proc(ctx: ^Context, dst, src: []byte, nr_blocks: int) {
|
||||
switch ctx._impl {
|
||||
case .Simd256:
|
||||
simd256.stream_blocks(&ctx._state, dst, src, nr_blocks)
|
||||
case .Simd128:
|
||||
simd128.stream_blocks(&ctx._state, dst, src, nr_blocks)
|
||||
case .Portable:
|
||||
ref.stream_blocks(&ctx._state, dst, src, nr_blocks)
|
||||
}
|
||||
}
|
||||
|
||||
@(private)
|
||||
hchacha20 :: proc "contextless" (dst, key, iv: []byte, impl: Implementation) {
|
||||
switch impl {
|
||||
case .Simd256:
|
||||
simd256.hchacha20(dst, key, iv)
|
||||
case .Simd128:
|
||||
simd128.hchacha20(dst, key, iv)
|
||||
case .Portable:
|
||||
ref.hchacha20(dst, key, iv)
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,11 @@
|
||||
/*
|
||||
package chacha20poly1305 implements the AEAD_CHACHA20_POLY1305 Authenticated
|
||||
Encryption with Additional Data algorithm.
|
||||
package chacha20poly1305 implements the AEAD_CHACHA20_POLY1305 and
|
||||
AEAD_XChaCha20_Poly1305 Authenticated Encryption with Additional Data
|
||||
algorithms.
|
||||
|
||||
See:
|
||||
- https://www.rfc-editor.org/rfc/rfc8439
|
||||
- https://datatracker.ietf.org/doc/html/draft-arciszewski-xchacha-03
|
||||
*/
|
||||
package chacha20poly1305
|
||||
|
||||
@@ -15,8 +17,10 @@ import "core:mem"
|
||||
|
||||
// KEY_SIZE is the chacha20poly1305 key size in bytes.
|
||||
KEY_SIZE :: chacha20.KEY_SIZE
|
||||
// NONCE_SIZE is the chacha20poly1305 nonce size in bytes.
|
||||
NONCE_SIZE :: chacha20.NONCE_SIZE
|
||||
// IV_SIZE is the chacha20poly1305 IV size in bytes.
|
||||
IV_SIZE :: chacha20.IV_SIZE
|
||||
// XIV_SIZE is the xchacha20poly1305 IV size in bytes.
|
||||
XIV_SIZE :: chacha20.XIV_SIZE
|
||||
// TAG_SIZE is the chacha20poly1305 tag size in bytes.
|
||||
TAG_SIZE :: poly1305.TAG_SIZE
|
||||
|
||||
@@ -24,15 +28,13 @@ TAG_SIZE :: poly1305.TAG_SIZE
|
||||
_P_MAX :: 64 * 0xffffffff // 64 * (2^32-1)
|
||||
|
||||
@(private)
|
||||
_validate_common_slice_sizes :: proc (tag, key, nonce, aad, text: []byte) {
|
||||
_validate_common_slice_sizes :: proc (tag, iv, aad, text: []byte, is_xchacha: bool) {
|
||||
if len(tag) != TAG_SIZE {
|
||||
panic("crypto/chacha20poly1305: invalid destination tag size")
|
||||
}
|
||||
if len(key) != KEY_SIZE {
|
||||
panic("crypto/chacha20poly1305: invalid key size")
|
||||
}
|
||||
if len(nonce) != NONCE_SIZE {
|
||||
panic("crypto/chacha20poly1305: invalid nonce size")
|
||||
expected_iv_len := is_xchacha ? XIV_SIZE : IV_SIZE
|
||||
if len(iv) != expected_iv_len {
|
||||
panic("crypto/chacha20poly1305: invalid IV size")
|
||||
}
|
||||
|
||||
#assert(size_of(int) == 8 || size_of(int) <= 4)
|
||||
@@ -59,18 +61,52 @@ _update_mac_pad16 :: #force_inline proc (ctx: ^poly1305.Context, x_len: int) {
|
||||
}
|
||||
}
|
||||
|
||||
// encrypt encrypts the plaintext and authenticates the aad and ciphertext,
|
||||
// with the provided key and nonce, stores the output in ciphertext and tag.
|
||||
encrypt :: proc (ciphertext, tag, key, nonce, aad, plaintext: []byte) {
|
||||
_validate_common_slice_sizes(tag, key, nonce, aad, plaintext)
|
||||
// Context is a keyed (X)Chacha20Poly1305 instance.
|
||||
Context :: struct {
|
||||
_key: [KEY_SIZE]byte,
|
||||
_impl: chacha20.Implementation,
|
||||
_is_xchacha: bool,
|
||||
_is_initialized: bool,
|
||||
}
|
||||
|
||||
// init initializes a Context with the provided key, for AEAD_CHACHA20_POLY1305.
|
||||
init :: proc(ctx: ^Context, key: []byte, impl := chacha20.DEFAULT_IMPLEMENTATION) {
|
||||
if len(key) != KEY_SIZE {
|
||||
panic("crypto/chacha20poly1305: invalid key size")
|
||||
}
|
||||
|
||||
copy(ctx._key[:], key)
|
||||
ctx._impl = impl
|
||||
ctx._is_xchacha = false
|
||||
ctx._is_initialized = true
|
||||
}
|
||||
|
||||
// init_xchacha initializes a Context with the provided key, for
|
||||
// AEAD_XChaCha20_Poly1305.
|
||||
//
|
||||
// Note: While there are multiple definitions of XChaCha20-Poly1305
|
||||
// this sticks to the IETF draft and uses a 32-bit counter.
|
||||
init_xchacha :: proc(ctx: ^Context, key: []byte, impl := chacha20.DEFAULT_IMPLEMENTATION) {
|
||||
init(ctx, key, impl)
|
||||
ctx._is_xchacha = true
|
||||
}
|
||||
|
||||
// seal encrypts the plaintext and authenticates the aad and ciphertext,
|
||||
// with the provided Context and iv, stores the output in dst and tag.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
|
||||
ciphertext := dst
|
||||
_validate_common_slice_sizes(tag, iv, aad, plaintext, ctx._is_xchacha)
|
||||
if len(ciphertext) != len(plaintext) {
|
||||
panic("crypto/chacha20poly1305: invalid destination ciphertext size")
|
||||
}
|
||||
|
||||
stream_ctx: chacha20.Context = ---
|
||||
chacha20.init(&stream_ctx, key, nonce)
|
||||
chacha20.init(&stream_ctx, ctx._key[:],iv, ctx._impl)
|
||||
stream_ctx._state._is_ietf_flavor = true
|
||||
|
||||
// otk = poly1305_key_gen(key, nonce)
|
||||
// otk = poly1305_key_gen(key, iv)
|
||||
otk: [poly1305.KEY_SIZE]byte = ---
|
||||
chacha20.keystream_bytes(&stream_ctx, otk[:])
|
||||
mac_ctx: poly1305.Context = ---
|
||||
@@ -87,7 +123,7 @@ encrypt :: proc (ciphertext, tag, key, nonce, aad, plaintext: []byte) {
|
||||
poly1305.update(&mac_ctx, aad)
|
||||
_update_mac_pad16(&mac_ctx, aad_len)
|
||||
|
||||
// ciphertext = chacha20_encrypt(key, 1, nonce, plaintext)
|
||||
// ciphertext = chacha20_encrypt(key, 1, iv, plaintext)
|
||||
chacha20.seek(&stream_ctx, 1)
|
||||
chacha20.xor_bytes(&stream_ctx, ciphertext, plaintext)
|
||||
chacha20.reset(&stream_ctx) // Don't need the stream context anymore.
|
||||
@@ -107,13 +143,16 @@ encrypt :: proc (ciphertext, tag, key, nonce, aad, plaintext: []byte) {
|
||||
poly1305.final(&mac_ctx, tag) // Implicitly sanitizes context.
|
||||
}
|
||||
|
||||
// decrypt authenticates the aad and ciphertext, and decrypts the ciphertext,
|
||||
// with the provided key, nonce, and tag, and stores the output in plaintext,
|
||||
// returning true iff the authentication was successful.
|
||||
// open authenticates the aad and ciphertext, and decrypts the ciphertext,
|
||||
// with the provided Context, iv, and tag, and stores the output in dst,
|
||||
// returning true iff the authentication was successful. If authentication
|
||||
// fails, the destination buffer will be zeroed.
|
||||
//
|
||||
// If authentication fails, the destination plaintext buffer will be zeroed.
|
||||
decrypt :: proc (plaintext, tag, key, nonce, aad, ciphertext: []byte) -> bool {
|
||||
_validate_common_slice_sizes(tag, key, nonce, aad, ciphertext)
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
@(require_results)
|
||||
open :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
plaintext := dst
|
||||
_validate_common_slice_sizes(tag, iv, aad, ciphertext, ctx._is_xchacha)
|
||||
if len(ciphertext) != len(plaintext) {
|
||||
panic("crypto/chacha20poly1305: invalid destination plaintext size")
|
||||
}
|
||||
@@ -123,9 +162,10 @@ decrypt :: proc (plaintext, tag, key, nonce, aad, ciphertext: []byte) -> bool {
|
||||
// points where needed.
|
||||
|
||||
stream_ctx: chacha20.Context = ---
|
||||
chacha20.init(&stream_ctx, key, nonce)
|
||||
chacha20.init(&stream_ctx, ctx._key[:], iv, ctx._impl)
|
||||
stream_ctx._state._is_ietf_flavor = true
|
||||
|
||||
// otk = poly1305_key_gen(key, nonce)
|
||||
// otk = poly1305_key_gen(key, iv)
|
||||
otk: [poly1305.KEY_SIZE]byte = ---
|
||||
chacha20.keystream_bytes(&stream_ctx, otk[:])
|
||||
defer chacha20.reset(&stream_ctx)
|
||||
@@ -160,9 +200,17 @@ decrypt :: proc (plaintext, tag, key, nonce, aad, ciphertext: []byte) -> bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// plaintext = chacha20_decrypt(key, 1, nonce, ciphertext)
|
||||
// plaintext = chacha20_decrypt(key, 1, iv, ciphertext)
|
||||
chacha20.seek(&stream_ctx, 1)
|
||||
chacha20.xor_bytes(&stream_ctx, plaintext, ciphertext)
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// reset sanitizes the Context. The Context must be
|
||||
// re-initialized to be used again.
|
||||
reset :: proc "contextless" (ctx: ^Context) {
|
||||
mem.zero_explicit(&ctx._key, len(ctx._key))
|
||||
ctx._is_xchacha = false
|
||||
ctx._is_initialized = false
|
||||
}
|
||||
|
||||
@@ -60,7 +60,11 @@ rand_bytes :: proc (dst: []byte) {
|
||||
_rand_bytes(dst)
|
||||
}
|
||||
|
||||
|
||||
// random_generator returns a `runtime.Random_Generator` backed by the
|
||||
// system entropy source.
|
||||
//
|
||||
// Support for the system entropy source can be checked with the
|
||||
// `HAS_RAND_BYTES` boolean constant.
|
||||
random_generator :: proc() -> runtime.Random_Generator {
|
||||
return {
|
||||
procedure = proc(data: rawptr, mode: runtime.Random_Generator_Mode, p: []byte) {
|
||||
@@ -79,4 +83,4 @@ random_generator :: proc() -> runtime.Random_Generator {
|
||||
},
|
||||
data = nil,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ PUBLIC_KEY_SIZE :: 32
|
||||
SIGNATURE_SIZE :: 64
|
||||
|
||||
@(private)
|
||||
NONCE_SIZE :: 32
|
||||
HDIGEST2_SIZE :: 32
|
||||
|
||||
// Private_Key is an Ed25519 private key.
|
||||
Private_Key :: struct {
|
||||
@@ -33,7 +33,7 @@ Private_Key :: struct {
|
||||
// See: https://github.com/MystenLabs/ed25519-unsafe-libs
|
||||
_b: [PRIVATE_KEY_SIZE]byte,
|
||||
_s: grp.Scalar,
|
||||
_nonce: [NONCE_SIZE]byte,
|
||||
_hdigest2: [HDIGEST2_SIZE]byte,
|
||||
_pub_key: Public_Key,
|
||||
_is_initialized: bool,
|
||||
}
|
||||
@@ -63,7 +63,7 @@ private_key_set_bytes :: proc(priv_key: ^Private_Key, b: []byte) -> bool {
|
||||
sha2.final(&ctx, h_bytes[:])
|
||||
|
||||
copy(priv_key._b[:], b)
|
||||
copy(priv_key._nonce[:], h_bytes[32:])
|
||||
copy(priv_key._hdigest2[:], h_bytes[32:])
|
||||
grp.sc_set_bytes_rfc8032(&priv_key._s, h_bytes[:32])
|
||||
|
||||
// Derive the corresponding public key.
|
||||
@@ -116,7 +116,7 @@ sign :: proc(priv_key: ^Private_Key, msg, sig: []byte) {
|
||||
ctx: sha2.Context_512 = ---
|
||||
digest_bytes: [sha2.DIGEST_SIZE_512]byte = ---
|
||||
sha2.init_512(&ctx)
|
||||
sha2.update(&ctx, priv_key._nonce[:])
|
||||
sha2.update(&ctx, priv_key._hdigest2[:])
|
||||
sha2.update(&ctx, msg)
|
||||
sha2.final(&ctx, digest_bytes[:])
|
||||
|
||||
|
||||
@@ -28,20 +28,26 @@ hash_bytes :: proc(algorithm: Algorithm, data: []byte, allocator := context.allo
|
||||
|
||||
// hash_string_to_buffer will hash the given input and assign the
|
||||
// computed digest to the third parameter. It requires that the
|
||||
// destination buffer is at least as big as the digest size.
|
||||
hash_string_to_buffer :: proc(algorithm: Algorithm, data: string, hash: []byte) {
|
||||
hash_bytes_to_buffer(algorithm, transmute([]byte)(data), hash)
|
||||
// destination buffer is at least as big as the digest size. The
|
||||
// provided destination buffer is returned to match the behavior of
|
||||
// `hash_string`.
|
||||
hash_string_to_buffer :: proc(algorithm: Algorithm, data: string, hash: []byte) -> []byte {
|
||||
return hash_bytes_to_buffer(algorithm, transmute([]byte)(data), hash)
|
||||
}
|
||||
|
||||
// hash_bytes_to_buffer will hash the given input and write the
|
||||
// computed digest into the third parameter. It requires that the
|
||||
// destination buffer is at least as big as the digest size.
|
||||
hash_bytes_to_buffer :: proc(algorithm: Algorithm, data, hash: []byte) {
|
||||
// destination buffer is at least as big as the digest size. The
|
||||
// provided destination buffer is returned to match the behavior of
|
||||
// `hash_bytes`.
|
||||
hash_bytes_to_buffer :: proc(algorithm: Algorithm, data, hash: []byte) -> []byte {
|
||||
ctx: Context
|
||||
|
||||
init(&ctx, algorithm)
|
||||
update(&ctx, data)
|
||||
final(&ctx, hash)
|
||||
|
||||
return hash
|
||||
}
|
||||
|
||||
// hash_stream will incrementally fully consume a stream, and return the
|
||||
|
||||
@@ -8,9 +8,9 @@ HAS_RAND_BYTES :: true
|
||||
|
||||
@(private)
|
||||
_rand_bytes :: proc(dst: []byte) {
|
||||
ret := (os.Errno)(win32.BCryptGenRandom(nil, raw_data(dst), u32(len(dst)), win32.BCRYPT_USE_SYSTEM_PREFERRED_RNG))
|
||||
if ret != os.ERROR_NONE {
|
||||
switch ret {
|
||||
ret := os.Platform_Error(win32.BCryptGenRandom(nil, raw_data(dst), u32(len(dst)), win32.BCRYPT_USE_SYSTEM_PREFERRED_RNG))
|
||||
if ret != nil {
|
||||
#partial switch ret {
|
||||
case os.ERROR_INVALID_HANDLE:
|
||||
// The handle to the first parameter is invalid.
|
||||
// This should not happen here, since we explicitly pass nil to it
|
||||
|
||||
+5
-13
@@ -16,15 +16,12 @@ Library :: distinct rawptr
|
||||
Loads a dynamic library from the filesystem. The paramater `global_symbols` makes the symbols in the loaded
|
||||
library available to resolve references in subsequently loaded libraries.
|
||||
|
||||
The paramater `global_symbols` is only used for the platforms `linux`, `darwin`, `freebsd` and `openbsd`.
|
||||
The parameter `global_symbols` is only used for the platforms `linux`, `darwin`, `freebsd` and `openbsd`.
|
||||
On `windows` this paramater is ignored.
|
||||
|
||||
The underlying behaviour is platform specific.
|
||||
On `linux`, `darwin`, `freebsd` and `openbsd` refer to `dlopen`.
|
||||
On `windows` refer to `LoadLibraryW`.
|
||||
|
||||
**Implicit Allocators**
|
||||
`context.temp_allocator`
|
||||
On `windows` refer to `LoadLibraryW`. Also temporarily needs an allocator to convert a string.
|
||||
|
||||
Example:
|
||||
import "core:dynlib"
|
||||
@@ -79,10 +76,7 @@ Loads the address of a procedure/variable from a dynamic library.
|
||||
|
||||
The underlying behaviour is platform specific.
|
||||
On `linux`, `darwin`, `freebsd` and `openbsd` refer to `dlsym`.
|
||||
On `windows` refer to `GetProcAddress`.
|
||||
|
||||
**Implicit Allocators**
|
||||
`context.temp_allocator`
|
||||
On `windows` refer to `GetProcAddress`. Also temporarily needs an allocator to convert a string.
|
||||
|
||||
Example:
|
||||
import "core:dynlib"
|
||||
@@ -177,9 +171,7 @@ initialize_symbols :: proc(
|
||||
return count, count > 0
|
||||
}
|
||||
|
||||
/*
|
||||
Returns an error message for the last failed procedure call.
|
||||
*/
|
||||
// Returns an error message for the last failed procedure call.
|
||||
last_error :: proc() -> string {
|
||||
return _last_error()
|
||||
}
|
||||
}
|
||||
@@ -16,4 +16,4 @@ _symbol_address :: proc(library: Library, symbol: string) -> (ptr: rawptr, found
|
||||
|
||||
_last_error :: proc() -> string {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
@@ -26,4 +26,4 @@ _symbol_address :: proc(library: Library, symbol: string) -> (ptr: rawptr, found
|
||||
_last_error :: proc() -> string {
|
||||
err := os.dlerror()
|
||||
return "unknown" if err == "" else err
|
||||
}
|
||||
}
|
||||
@@ -4,14 +4,12 @@ package dynlib
|
||||
|
||||
import win32 "core:sys/windows"
|
||||
import "core:strings"
|
||||
import "base:runtime"
|
||||
import "core:reflect"
|
||||
|
||||
_load_library :: proc(path: string, global_symbols := false) -> (Library, bool) {
|
||||
_load_library :: proc(path: string, global_symbols := false, allocator := context.temp_allocator) -> (Library, bool) {
|
||||
// NOTE(bill): 'global_symbols' is here only for consistency with POSIX which has RTLD_GLOBAL
|
||||
|
||||
runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
|
||||
wide_path := win32.utf8_to_wstring(path, context.temp_allocator)
|
||||
wide_path := win32.utf8_to_wstring(path, allocator)
|
||||
defer free(wide_path, allocator)
|
||||
handle := cast(Library)win32.LoadLibraryW(wide_path)
|
||||
return handle, handle != nil
|
||||
}
|
||||
@@ -21,9 +19,9 @@ _unload_library :: proc(library: Library) -> bool {
|
||||
return bool(ok)
|
||||
}
|
||||
|
||||
_symbol_address :: proc(library: Library, symbol: string) -> (ptr: rawptr, found: bool) {
|
||||
runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
|
||||
c_str := strings.clone_to_cstring(symbol, context.temp_allocator)
|
||||
_symbol_address :: proc(library: Library, symbol: string, allocator := context.temp_allocator) -> (ptr: rawptr, found: bool) {
|
||||
c_str := strings.clone_to_cstring(symbol, allocator)
|
||||
defer delete(c_str, allocator)
|
||||
ptr = win32.GetProcAddress(cast(win32.HMODULE)library, c_str)
|
||||
found = ptr != nil
|
||||
return
|
||||
@@ -33,4 +31,4 @@ _last_error :: proc() -> string {
|
||||
err := win32.System_Error(win32.GetLastError())
|
||||
err_msg := reflect.enum_string(err)
|
||||
return "unknown" if err_msg == "" else err_msg
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,7 @@ package encoding_cbor
|
||||
import "base:intrinsics"
|
||||
|
||||
import "core:encoding/json"
|
||||
import "core:encoding/hex"
|
||||
import "core:io"
|
||||
import "core:mem"
|
||||
import "core:strconv"
|
||||
@@ -399,11 +400,11 @@ to_diagnostic_format_writer :: proc(w: io.Writer, val: Value, padding := 0) -> i
|
||||
io.write_string(w, str) or_return
|
||||
|
||||
case bool: io.write_string(w, "true" if v else "false") or_return
|
||||
case Nil: io.write_string(w, "nil") or_return
|
||||
case Nil: io.write_string(w, "null") or_return
|
||||
case Undefined: io.write_string(w, "undefined") or_return
|
||||
case ^Bytes:
|
||||
io.write_string(w, "h'") or_return
|
||||
for b in v { io.write_int(w, int(b), 16) or_return }
|
||||
hex.encode_into_writer(w, v^) or_return
|
||||
io.write_string(w, "'") or_return
|
||||
case ^Text:
|
||||
io.write_string(w, `"`) or_return
|
||||
|
||||
@@ -77,8 +77,11 @@ You can look at the default tags provided for pointers on how these implementati
|
||||
Example:
|
||||
package main
|
||||
|
||||
import "base:intrinsics"
|
||||
|
||||
import "core:encoding/cbor"
|
||||
import "core:fmt"
|
||||
import "core:reflect"
|
||||
import "core:time"
|
||||
|
||||
Possibilities :: union {
|
||||
@@ -93,9 +96,32 @@ Example:
|
||||
ignore_this: ^Data `cbor:"-"`, // Ignored by implementation.
|
||||
renamed: f32 `cbor:"renamed :)"`, // Renamed when encoded.
|
||||
my_union: Possibilities, // Union support.
|
||||
|
||||
my_raw: [8]u32 `cbor_tag:"raw"`, // Custom tag that just writes the value as bytes.
|
||||
}
|
||||
|
||||
main :: proc() {
|
||||
// Example custom tag implementation that instead of breaking down all parts,
|
||||
// just writes the value as a big byte blob. This is an advanced feature but very powerful.
|
||||
RAW_TAG_NR :: 200
|
||||
cbor.tag_register_number({
|
||||
marshal = proc(_: ^cbor.Tag_Implementation, e: cbor.Encoder, v: any) -> cbor.Marshal_Error {
|
||||
cbor._encode_u8(e.writer, RAW_TAG_NR, .Tag) or_return
|
||||
return cbor.err_conv(cbor._encode_bytes(e, reflect.as_bytes(v)))
|
||||
},
|
||||
unmarshal = proc(_: ^cbor.Tag_Implementation, d: cbor.Decoder, _: cbor.Tag_Number, v: any) -> (cbor.Unmarshal_Error) {
|
||||
hdr := cbor._decode_header(d.reader) or_return
|
||||
maj, add := cbor._header_split(hdr)
|
||||
if maj != .Bytes {
|
||||
return .Bad_Tag_Value
|
||||
}
|
||||
|
||||
bytes := cbor.err_conv(cbor._decode_bytes(d, add, maj)) or_return
|
||||
intrinsics.mem_copy_non_overlapping(v.data, raw_data(bytes), len(bytes))
|
||||
return nil
|
||||
},
|
||||
}, RAW_TAG_NR, "raw")
|
||||
|
||||
now := time.Time{_nsec = 1701117968 * 1e9}
|
||||
|
||||
data := Data{
|
||||
@@ -105,21 +131,22 @@ Example:
|
||||
ignore_this = &Data{},
|
||||
renamed = 123123.125,
|
||||
my_union = 3,
|
||||
my_raw = {1=1, 2=2, 3=3},
|
||||
}
|
||||
|
||||
|
||||
// Marshal the struct into binary CBOR.
|
||||
binary, err := cbor.marshal(data, cbor.ENCODE_FULLY_DETERMINISTIC)
|
||||
assert(err == nil)
|
||||
fmt.assertf(err == nil, "marshal error: %v", err)
|
||||
defer delete(binary)
|
||||
|
||||
|
||||
// Decode the binary data into a `cbor.Value`.
|
||||
decoded, derr := cbor.decode(string(binary))
|
||||
assert(derr == nil)
|
||||
fmt.assertf(derr == nil, "decode error: %v", derr)
|
||||
defer cbor.destroy(decoded)
|
||||
|
||||
// Turn the CBOR into a human readable representation defined as the diagnostic format in [[RFC 8949 Section 8;https://www.rfc-editor.org/rfc/rfc8949.html#name-diagnostic-notation]].
|
||||
diagnosis, eerr := cbor.to_diagnostic_format(decoded)
|
||||
assert(eerr == nil)
|
||||
fmt.assertf(eerr == nil, "to diagnostic error: %v", eerr)
|
||||
defer delete(diagnosis)
|
||||
|
||||
fmt.println(diagnosis)
|
||||
@@ -127,6 +154,7 @@ Example:
|
||||
|
||||
Output:
|
||||
{
|
||||
"my_raw": 200(h'00001000200030000000000000000000'),
|
||||
"my_union": 1010([
|
||||
"int",
|
||||
3
|
||||
|
||||
@@ -54,7 +54,7 @@ marshal_into_bytes :: proc(v: any, flags := ENCODE_SMALL, allocator := context.a
|
||||
|
||||
defer if err != nil { strings.builder_destroy(&b) }
|
||||
|
||||
if err = marshal_into_builder(&b, v, flags, temp_allocator, loc=loc); err != nil {
|
||||
if err = marshal_into_builder(&b, v, flags, temp_allocator); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -63,20 +63,20 @@ marshal_into_bytes :: proc(v: any, flags := ENCODE_SMALL, allocator := context.a
|
||||
|
||||
// Marshals the given value into a CBOR byte stream written to the given builder.
|
||||
// See docs on the `marshal_into` proc group for more info.
|
||||
marshal_into_builder :: proc(b: ^strings.Builder, v: any, flags := ENCODE_SMALL, temp_allocator := context.temp_allocator, loc := #caller_location) -> Marshal_Error {
|
||||
return marshal_into_writer(strings.to_writer(b), v, flags, temp_allocator, loc=loc)
|
||||
marshal_into_builder :: proc(b: ^strings.Builder, v: any, flags := ENCODE_SMALL, temp_allocator := context.temp_allocator) -> Marshal_Error {
|
||||
return marshal_into_writer(strings.to_writer(b), v, flags, temp_allocator)
|
||||
}
|
||||
|
||||
// Marshals the given value into a CBOR byte stream written to the given writer.
|
||||
// See docs on the `marshal_into` proc group for more info.
|
||||
marshal_into_writer :: proc(w: io.Writer, v: any, flags := ENCODE_SMALL, temp_allocator := context.temp_allocator, loc := #caller_location) -> Marshal_Error {
|
||||
marshal_into_writer :: proc(w: io.Writer, v: any, flags := ENCODE_SMALL, temp_allocator := context.temp_allocator) -> Marshal_Error {
|
||||
encoder := Encoder{flags, w, temp_allocator}
|
||||
return marshal_into_encoder(encoder, v, loc=loc)
|
||||
return marshal_into_encoder(encoder, v)
|
||||
}
|
||||
|
||||
// Marshals the given value into a CBOR byte stream written to the given encoder.
|
||||
// See docs on the `marshal_into` proc group for more info.
|
||||
marshal_into_encoder :: proc(e: Encoder, v: any, loc := #caller_location) -> (err: Marshal_Error) {
|
||||
marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) {
|
||||
e := e
|
||||
|
||||
if e.temp_allocator.procedure == nil {
|
||||
@@ -97,11 +97,14 @@ marshal_into_encoder :: proc(e: Encoder, v: any, loc := #caller_location) -> (e
|
||||
return impl->marshal(e, v)
|
||||
}
|
||||
|
||||
ti := runtime.type_info_base(type_info_of(v.id))
|
||||
a := any{v.data, ti.id}
|
||||
ti := runtime.type_info_core(type_info_of(v.id))
|
||||
return _marshal_into_encoder(e, v, ti)
|
||||
}
|
||||
|
||||
_marshal_into_encoder :: proc(e: Encoder, v: any, ti: ^runtime.Type_Info) -> (err: Marshal_Error) {
|
||||
a := any{v.data, ti.id}
|
||||
#partial switch info in ti.variant {
|
||||
case runtime.Type_Info_Named:
|
||||
case runtime.Type_Info_Named, runtime.Type_Info_Enum, runtime.Type_Info_Bit_Field:
|
||||
unreachable()
|
||||
|
||||
case runtime.Type_Info_Pointer:
|
||||
@@ -223,18 +226,38 @@ marshal_into_encoder :: proc(e: Encoder, v: any, loc := #caller_location) -> (e
|
||||
}
|
||||
|
||||
err_conv(_encode_u64(e, u64(info.count), .Array)) or_return
|
||||
|
||||
if impl, ok := _tag_implementations_type[info.elem.id]; ok {
|
||||
for i in 0..<info.count {
|
||||
data := uintptr(v.data) + uintptr(i*info.elem_size)
|
||||
impl->marshal(e, any{rawptr(data), info.elem.id}) or_return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
elem_ti := runtime.type_info_core(type_info_of(info.elem.id))
|
||||
for i in 0..<info.count {
|
||||
data := uintptr(v.data) + uintptr(i*info.elem_size)
|
||||
marshal_into(e, any{rawptr(data), info.elem.id}) or_return
|
||||
_marshal_into_encoder(e, any{rawptr(data), info.elem.id}, elem_ti) or_return
|
||||
}
|
||||
return
|
||||
|
||||
case runtime.Type_Info_Enumerated_Array:
|
||||
// index := runtime.type_info_base(info.index).variant.(runtime.Type_Info_Enum)
|
||||
err_conv(_encode_u64(e, u64(info.count), .Array)) or_return
|
||||
|
||||
if impl, ok := _tag_implementations_type[info.elem.id]; ok {
|
||||
for i in 0..<info.count {
|
||||
data := uintptr(v.data) + uintptr(i*info.elem_size)
|
||||
impl->marshal(e, any{rawptr(data), info.elem.id}) or_return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
elem_ti := runtime.type_info_core(type_info_of(info.elem.id))
|
||||
for i in 0..<info.count {
|
||||
data := uintptr(v.data) + uintptr(i*info.elem_size)
|
||||
marshal_into(e, any{rawptr(data), info.elem.id}) or_return
|
||||
_marshal_into_encoder(e, any{rawptr(data), info.elem.id}, elem_ti) or_return
|
||||
}
|
||||
return
|
||||
|
||||
@@ -246,9 +269,19 @@ marshal_into_encoder :: proc(e: Encoder, v: any, loc := #caller_location) -> (e
|
||||
|
||||
array := (^mem.Raw_Dynamic_Array)(v.data)
|
||||
err_conv(_encode_u64(e, u64(array.len), .Array)) or_return
|
||||
|
||||
if impl, ok := _tag_implementations_type[info.elem.id]; ok {
|
||||
for i in 0..<array.len {
|
||||
data := uintptr(array.data) + uintptr(i*info.elem_size)
|
||||
impl->marshal(e, any{rawptr(data), info.elem.id}) or_return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
elem_ti := runtime.type_info_core(type_info_of(info.elem.id))
|
||||
for i in 0..<array.len {
|
||||
data := uintptr(array.data) + uintptr(i*info.elem_size)
|
||||
marshal_into(e, any{rawptr(data), info.elem.id}) or_return
|
||||
_marshal_into_encoder(e, any{rawptr(data), info.elem.id}, elem_ti) or_return
|
||||
}
|
||||
return
|
||||
|
||||
@@ -260,9 +293,19 @@ marshal_into_encoder :: proc(e: Encoder, v: any, loc := #caller_location) -> (e
|
||||
|
||||
array := (^mem.Raw_Slice)(v.data)
|
||||
err_conv(_encode_u64(e, u64(array.len), .Array)) or_return
|
||||
|
||||
if impl, ok := _tag_implementations_type[info.elem.id]; ok {
|
||||
for i in 0..<array.len {
|
||||
data := uintptr(array.data) + uintptr(i*info.elem_size)
|
||||
impl->marshal(e, any{rawptr(data), info.elem.id}) or_return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
elem_ti := runtime.type_info_core(type_info_of(info.elem.id))
|
||||
for i in 0..<array.len {
|
||||
data := uintptr(array.data) + uintptr(i*info.elem_size)
|
||||
marshal_into(e, any{rawptr(data), info.elem.id}) or_return
|
||||
_marshal_into_encoder(e, any{rawptr(data), info.elem.id}, elem_ti) or_return
|
||||
}
|
||||
return
|
||||
|
||||
@@ -308,7 +351,8 @@ marshal_into_encoder :: proc(e: Encoder, v: any, loc := #caller_location) -> (e
|
||||
builder := strings.builder_from_slice(res[:])
|
||||
e.writer = strings.to_stream(&builder)
|
||||
|
||||
assert(_encode_u64(e, u64(len(str)), .Text) == nil)
|
||||
err := _encode_u64(e, u64(len(str)), .Text)
|
||||
assert(err == nil)
|
||||
res[9] = u8(len(builder.buf))
|
||||
assert(res[9] < 10)
|
||||
return
|
||||
@@ -437,9 +481,7 @@ marshal_into_encoder :: proc(e: Encoder, v: any, loc := #caller_location) -> (e
|
||||
}
|
||||
}
|
||||
|
||||
marshal_entry :: #force_inline proc(e: Encoder, info: runtime.Type_Info_Struct, v: any, name: string, i: int) -> Marshal_Error {
|
||||
err_conv(_encode_text(e, name)) or_return
|
||||
|
||||
marshal_entry :: #force_inline proc(e: Encoder, info: runtime.Type_Info_Struct, v: any, i: int) -> Marshal_Error {
|
||||
id := info.types[i].id
|
||||
data := rawptr(uintptr(v.data) + info.offsets[i])
|
||||
field_any := any{data, id}
|
||||
@@ -463,7 +505,7 @@ marshal_into_encoder :: proc(e: Encoder, v: any, loc := #caller_location) -> (e
|
||||
}
|
||||
|
||||
n: u64; {
|
||||
for _, i in info.names {
|
||||
for _, i in info.names[:info.field_count] {
|
||||
if field_name(info, i) != "-" {
|
||||
n += 1
|
||||
}
|
||||
@@ -473,37 +515,41 @@ marshal_into_encoder :: proc(e: Encoder, v: any, loc := #caller_location) -> (e
|
||||
|
||||
if .Deterministic_Map_Sorting in e.flags {
|
||||
Name :: struct {
|
||||
name: string,
|
||||
name: []byte,
|
||||
field: int,
|
||||
}
|
||||
entries := make([dynamic]Name, 0, n, e.temp_allocator) or_return
|
||||
defer delete(entries)
|
||||
|
||||
for _, i in info.names {
|
||||
for _, i in info.names[:info.field_count] {
|
||||
fname := field_name(info, i)
|
||||
if fname == "-" {
|
||||
continue
|
||||
}
|
||||
|
||||
append(&entries, Name{fname, i}) or_return
|
||||
key_builder := strings.builder_make(e.temp_allocator) or_return
|
||||
err_conv(_encode_text(Encoder{e.flags, strings.to_stream(&key_builder), e.temp_allocator}, fname)) or_return
|
||||
append(&entries, Name{key_builder.buf[:], i}) or_return
|
||||
}
|
||||
|
||||
// Sort lexicographic on the bytes of the key.
|
||||
slice.sort_by_cmp(entries[:], proc(a, b: Name) -> slice.Ordering {
|
||||
return slice.Ordering(bytes.compare(transmute([]byte)a.name, transmute([]byte)b.name))
|
||||
return slice.Ordering(bytes.compare(a.name, b.name))
|
||||
})
|
||||
|
||||
for entry in entries {
|
||||
marshal_entry(e, info, v, entry.name, entry.field) or_return
|
||||
io.write_full(e.writer, entry.name) or_return
|
||||
marshal_entry(e, info, v, entry.field) or_return
|
||||
}
|
||||
} else {
|
||||
for _, i in info.names {
|
||||
for _, i in info.names[:info.field_count] {
|
||||
fname := field_name(info, i)
|
||||
if fname == "-" {
|
||||
continue
|
||||
}
|
||||
|
||||
marshal_entry(e, info, v, fname, i) or_return
|
||||
err_conv(_encode_text(e, fname)) or_return
|
||||
marshal_entry(e, info, v, i) or_return
|
||||
}
|
||||
}
|
||||
return
|
||||
@@ -542,9 +588,6 @@ marshal_into_encoder :: proc(e: Encoder, v: any, loc := #caller_location) -> (e
|
||||
|
||||
return marshal_into(e, any{v.data, vti.id})
|
||||
|
||||
case runtime.Type_Info_Enum:
|
||||
return marshal_into(e, any{v.data, info.base.id})
|
||||
|
||||
case runtime.Type_Info_Bit_Set:
|
||||
// Store bit_set as big endian just like the protocol.
|
||||
do_byte_swap := !reflect.bit_set_is_big_endian(v)
|
||||
|
||||
@@ -96,7 +96,8 @@ _unmarshal_value :: proc(d: Decoder, v: any, hdr: Header, allocator := context.a
|
||||
ti = reflect.type_info_base(variant)
|
||||
if !reflect.is_pointer_internally(variant) {
|
||||
tag := any{rawptr(uintptr(v.data) + u.tag_offset), u.tag_type.id}
|
||||
assert(_assign_int(tag, 1))
|
||||
assigned := _assign_int(tag, 1)
|
||||
assert(assigned)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -520,9 +521,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header
|
||||
return
|
||||
|
||||
case reflect.Type_Info_Array:
|
||||
_, scap := err_conv(_decode_len_container(d, add)) or_return
|
||||
length := min(scap, t.count)
|
||||
|
||||
length, _ := err_conv(_decode_len_container(d, add)) or_return
|
||||
if length > t.count {
|
||||
return _unsupported(v, hdr)
|
||||
}
|
||||
@@ -534,9 +533,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header
|
||||
return
|
||||
|
||||
case reflect.Type_Info_Enumerated_Array:
|
||||
_, scap := err_conv(_decode_len_container(d, add)) or_return
|
||||
length := min(scap, t.count)
|
||||
|
||||
length, _ := err_conv(_decode_len_container(d, add)) or_return
|
||||
if length > t.count {
|
||||
return _unsupported(v, hdr)
|
||||
}
|
||||
@@ -548,9 +545,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header
|
||||
return
|
||||
|
||||
case reflect.Type_Info_Complex:
|
||||
_, scap := err_conv(_decode_len_container(d, add)) or_return
|
||||
length := min(scap, 2)
|
||||
|
||||
length, _ := err_conv(_decode_len_container(d, add)) or_return
|
||||
if length > 2 {
|
||||
return _unsupported(v, hdr)
|
||||
}
|
||||
@@ -570,9 +565,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header
|
||||
return
|
||||
|
||||
case reflect.Type_Info_Quaternion:
|
||||
_, scap := err_conv(_decode_len_container(d, add)) or_return
|
||||
length := min(scap, 4)
|
||||
|
||||
length, _ := err_conv(_decode_len_container(d, add)) or_return
|
||||
if length > 4 {
|
||||
return _unsupported(v, hdr)
|
||||
}
|
||||
@@ -626,14 +619,14 @@ _unmarshal_map :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header,
|
||||
|
||||
#partial switch t in ti.variant {
|
||||
case reflect.Type_Info_Struct:
|
||||
if t.is_raw_union {
|
||||
if .raw_union in t.flags {
|
||||
return _unsupported(v, hdr)
|
||||
}
|
||||
|
||||
length, _ := err_conv(_decode_len_container(d, add)) or_return
|
||||
unknown := length == -1
|
||||
fields := reflect.struct_fields_zipped(ti.id)
|
||||
|
||||
|
||||
for idx := 0; idx < len(fields) && (unknown || idx < length); idx += 1 {
|
||||
// Decode key, keys can only be strings.
|
||||
key: string
|
||||
@@ -646,7 +639,7 @@ _unmarshal_map :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header,
|
||||
key = keyv
|
||||
}
|
||||
defer delete(key, context.temp_allocator)
|
||||
|
||||
|
||||
// Find matching field.
|
||||
use_field_idx := -1
|
||||
{
|
||||
|
||||
@@ -13,13 +13,14 @@ iterate_csv_from_string :: proc(filename: string) {
|
||||
r.reuse_record_buffer = true // Without it you have to each of the fields within it
|
||||
defer csv.reader_destroy(&r)
|
||||
|
||||
if csv_data, ok := os.read_entire_file(filename); ok {
|
||||
csv_data, ok := os.read_entire_file(filename)
|
||||
if ok {
|
||||
csv.reader_init_with_string(&r, string(csv_data))
|
||||
defer delete(csv_data)
|
||||
} else {
|
||||
fmt.printfln("Unable to open file: %v", filename)
|
||||
return
|
||||
}
|
||||
defer delete(csv_data)
|
||||
|
||||
for r, i, err in csv.iterator_next(&r) {
|
||||
if err != nil { /* Do something with error */ }
|
||||
@@ -38,9 +39,9 @@ iterate_csv_from_stream :: proc(filename: string) {
|
||||
r.reuse_record_buffer = true // Without it you have to each of the fields within it
|
||||
defer csv.reader_destroy(&r)
|
||||
|
||||
handle, errno := os.open(filename)
|
||||
if errno != os.ERROR_NONE {
|
||||
fmt.printfln("Error opening file: %v", filename)
|
||||
handle, err := os.open(filename)
|
||||
if err != nil {
|
||||
fmt.eprintfln("Error opening file: %v", filename)
|
||||
return
|
||||
}
|
||||
defer os.close(handle)
|
||||
@@ -62,13 +63,14 @@ read_csv_from_string :: proc(filename: string) {
|
||||
r.reuse_record_buffer = true // Without it you have to each of the fields within it
|
||||
defer csv.reader_destroy(&r)
|
||||
|
||||
if csv_data, ok := os.read_entire_file(filename); ok {
|
||||
csv_data, ok := os.read_entire_file(filename)
|
||||
if ok {
|
||||
csv.reader_init_with_string(&r, string(csv_data))
|
||||
defer delete(csv_data)
|
||||
} else {
|
||||
fmt.printfln("Unable to open file: %v", filename)
|
||||
return
|
||||
}
|
||||
defer delete(csv_data)
|
||||
|
||||
records, err := csv.read_all(&r)
|
||||
if err != nil { /* Do something with CSV parse error */ }
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package encoding_hex
|
||||
|
||||
import "core:io"
|
||||
import "core:strings"
|
||||
|
||||
encode :: proc(src: []byte, allocator := context.allocator, loc := #caller_location) -> []byte #no_bounds_check {
|
||||
@@ -14,6 +15,12 @@ encode :: proc(src: []byte, allocator := context.allocator, loc := #caller_locat
|
||||
return dst
|
||||
}
|
||||
|
||||
encode_into_writer :: proc(dst: io.Writer, src: []byte) -> io.Error {
|
||||
for v in src {
|
||||
io.write(dst, {HEXTABLE[v>>4], HEXTABLE[v&0x0f]}) or_return
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
decode :: proc(src: []byte, allocator := context.allocator, loc := #caller_location) -> (dst: []byte, ok: bool) #no_bounds_check {
|
||||
if len(src) % 2 == 1 {
|
||||
|
||||
@@ -82,15 +82,17 @@ Map :: distinct map[string]map[string]string
|
||||
|
||||
load_map_from_string :: proc(src: string, allocator: runtime.Allocator, options := DEFAULT_OPTIONS) -> (m: Map, err: runtime.Allocator_Error) {
|
||||
unquote :: proc(val: string) -> (string, runtime.Allocator_Error) {
|
||||
v, allocated, ok := strconv.unquote_string(val)
|
||||
if !ok {
|
||||
return strings.clone(val)
|
||||
if len(val) > 0 && (val[0] == '"' || val[0] == '\'') {
|
||||
v, allocated, ok := strconv.unquote_string(val)
|
||||
if !ok {
|
||||
return strings.clone(val)
|
||||
}
|
||||
if allocated {
|
||||
return v, nil
|
||||
}
|
||||
return strings.clone(v), nil
|
||||
}
|
||||
if allocated {
|
||||
return v, nil
|
||||
}
|
||||
return strings.clone(v)
|
||||
|
||||
return strings.clone(val)
|
||||
}
|
||||
|
||||
context.allocator = allocator
|
||||
@@ -121,7 +123,7 @@ load_map_from_path :: proc(path: string, allocator: runtime.Allocator, options :
|
||||
data := os.read_entire_file(path, allocator) or_return
|
||||
defer delete(data, allocator)
|
||||
m, err = load_map_from_string(string(data), allocator, options)
|
||||
ok = err != nil
|
||||
ok = err == nil
|
||||
defer if !ok {
|
||||
delete_map(m)
|
||||
}
|
||||
@@ -142,6 +144,7 @@ delete_map :: proc(m: Map) {
|
||||
delete(value, allocator)
|
||||
}
|
||||
delete(section)
|
||||
delete(pairs)
|
||||
}
|
||||
delete(m)
|
||||
}
|
||||
|
||||
@@ -100,38 +100,7 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err:
|
||||
|
||||
case runtime.Type_Info_Integer:
|
||||
buf: [40]byte
|
||||
u: u128
|
||||
switch i in a {
|
||||
case i8: u = u128(i)
|
||||
case i16: u = u128(i)
|
||||
case i32: u = u128(i)
|
||||
case i64: u = u128(i)
|
||||
case i128: u = u128(i)
|
||||
case int: u = u128(i)
|
||||
case u8: u = u128(i)
|
||||
case u16: u = u128(i)
|
||||
case u32: u = u128(i)
|
||||
case u64: u = u128(i)
|
||||
case u128: u = u128(i)
|
||||
case uint: u = u128(i)
|
||||
case uintptr: u = u128(i)
|
||||
|
||||
case i16le: u = u128(i)
|
||||
case i32le: u = u128(i)
|
||||
case i64le: u = u128(i)
|
||||
case u16le: u = u128(i)
|
||||
case u32le: u = u128(i)
|
||||
case u64le: u = u128(i)
|
||||
case u128le: u = u128(i)
|
||||
|
||||
case i16be: u = u128(i)
|
||||
case i32be: u = u128(i)
|
||||
case i64be: u = u128(i)
|
||||
case u16be: u = u128(i)
|
||||
case u32be: u = u128(i)
|
||||
case u64be: u = u128(i)
|
||||
case u128be: u = u128(i)
|
||||
}
|
||||
u := cast_any_int_to_u128(a)
|
||||
|
||||
s: string
|
||||
|
||||
@@ -310,7 +279,12 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err:
|
||||
case cstring: name = string(s)
|
||||
}
|
||||
opt_write_key(w, opt, name) or_return
|
||||
|
||||
case runtime.Type_Info_Integer:
|
||||
buf: [40]byte
|
||||
u := cast_any_int_to_u128(ka)
|
||||
name = strconv.append_bits_128(buf[:], u, 10, info.signed, 8*kti.size, "0123456789", nil)
|
||||
|
||||
opt_write_key(w, opt, name) or_return
|
||||
case: return .Unsupported_Type
|
||||
}
|
||||
}
|
||||
@@ -406,10 +380,15 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err:
|
||||
ti := runtime.type_info_base(type_info_of(v.id))
|
||||
info := ti.variant.(runtime.Type_Info_Struct)
|
||||
first_iteration := true
|
||||
for name, i in info.names {
|
||||
for name, i in info.names[:info.field_count] {
|
||||
omitempty := false
|
||||
|
||||
json_name, extra := json_name_from_tag_value(reflect.struct_tag_get(reflect.Struct_Tag(info.tags[i]), "json"))
|
||||
|
||||
if json_name == "-" {
|
||||
continue
|
||||
}
|
||||
|
||||
for flag in strings.split_iterator(&extra, ",") {
|
||||
switch flag {
|
||||
case "omitempty":
|
||||
@@ -657,3 +636,41 @@ opt_write_indentation :: proc(w: io.Writer, opt: ^Marshal_Options) -> (err: io.E
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@(private)
|
||||
cast_any_int_to_u128 :: proc(any_int_value: any) -> u128 {
|
||||
u: u128 = 0
|
||||
switch i in any_int_value {
|
||||
case i8: u = u128(i)
|
||||
case i16: u = u128(i)
|
||||
case i32: u = u128(i)
|
||||
case i64: u = u128(i)
|
||||
case i128: u = u128(i)
|
||||
case int: u = u128(i)
|
||||
case u8: u = u128(i)
|
||||
case u16: u = u128(i)
|
||||
case u32: u = u128(i)
|
||||
case u64: u = u128(i)
|
||||
case u128: u = u128(i)
|
||||
case uint: u = u128(i)
|
||||
case uintptr: u = u128(i)
|
||||
|
||||
case i16le: u = u128(i)
|
||||
case i32le: u = u128(i)
|
||||
case i64le: u = u128(i)
|
||||
case u16le: u = u128(i)
|
||||
case u32le: u = u128(i)
|
||||
case u64le: u = u128(i)
|
||||
case u128le: u = u128(i)
|
||||
|
||||
case i16be: u = u128(i)
|
||||
case i32be: u = u128(i)
|
||||
case i64be: u = u128(i)
|
||||
case u16be: u = u128(i)
|
||||
case u32be: u = u128(i)
|
||||
case u64be: u = u128(i)
|
||||
case u128be: u = u128(i)
|
||||
}
|
||||
|
||||
return u
|
||||
}
|
||||
@@ -363,12 +363,11 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm
|
||||
}
|
||||
|
||||
v := v
|
||||
v = reflect.any_base(v)
|
||||
ti := type_info_of(v.id)
|
||||
ti := reflect.type_info_base(type_info_of(v.id))
|
||||
|
||||
#partial switch t in ti.variant {
|
||||
case reflect.Type_Info_Struct:
|
||||
if t.is_raw_union {
|
||||
if .raw_union in t.flags {
|
||||
return UNSUPPORTED_TYPE
|
||||
}
|
||||
|
||||
@@ -475,7 +474,7 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm
|
||||
}
|
||||
|
||||
case reflect.Type_Info_Map:
|
||||
if !reflect.is_string(t.key) {
|
||||
if !reflect.is_string(t.key) && !reflect.is_integer(t.key) {
|
||||
return UNSUPPORTED_TYPE
|
||||
}
|
||||
raw_map := (^mem.Raw_Map)(v.data)
|
||||
@@ -492,25 +491,39 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm
|
||||
key, _ := parse_object_key(p, p.allocator)
|
||||
unmarshal_expect_token(p, .Colon)
|
||||
|
||||
|
||||
|
||||
mem.zero_slice(elem_backing)
|
||||
if uerr := unmarshal_value(p, map_backing_value); uerr != nil {
|
||||
delete(key, p.allocator)
|
||||
return uerr
|
||||
}
|
||||
|
||||
key_ptr := rawptr(&key)
|
||||
key_ptr: rawptr
|
||||
|
||||
key_cstr: cstring
|
||||
if reflect.is_cstring(t.key) {
|
||||
key_cstr = cstring(raw_data(key))
|
||||
key_ptr = &key_cstr
|
||||
#partial switch tk in t.key.variant {
|
||||
case runtime.Type_Info_String:
|
||||
key_ptr = rawptr(&key)
|
||||
key_cstr: cstring
|
||||
if reflect.is_cstring(t.key) {
|
||||
key_cstr = cstring(raw_data(key))
|
||||
key_ptr = &key_cstr
|
||||
}
|
||||
case runtime.Type_Info_Integer:
|
||||
i, ok := strconv.parse_i128(key)
|
||||
if !ok { return UNSUPPORTED_TYPE }
|
||||
key_ptr = rawptr(&i)
|
||||
case: return UNSUPPORTED_TYPE
|
||||
}
|
||||
|
||||
|
||||
set_ptr := runtime.__dynamic_map_set_without_hash(raw_map, t.map_info, key_ptr, map_backing_value.data)
|
||||
if set_ptr == nil {
|
||||
delete(key, p.allocator)
|
||||
}
|
||||
|
||||
// there's no need to keep string value on the heap, since it was copied into map
|
||||
if reflect.is_integer(t.key) {
|
||||
delete(key, p.allocator)
|
||||
}
|
||||
|
||||
if parse_comma(p) {
|
||||
break map_loop
|
||||
|
||||
@@ -11,7 +11,7 @@ Write a UUID in the 8-4-4-4-12 format.
|
||||
This procedure performs error checking with every byte written.
|
||||
|
||||
If you can guarantee beforehand that your stream has enough space to hold the
|
||||
UUID (32 bytes), then it is better to use `unsafe_write` instead as that will
|
||||
UUID (36 bytes), then it is better to use `unsafe_write` instead as that will
|
||||
be faster.
|
||||
|
||||
Inputs:
|
||||
@@ -22,7 +22,7 @@ Returns:
|
||||
- error: An `io` error, if one occurred, otherwise `nil`.
|
||||
*/
|
||||
write :: proc(w: io.Writer, id: Identifier) -> (error: io.Error) #no_bounds_check {
|
||||
write_octet :: proc (w: io.Writer, octet: u8) -> io.Error #no_bounds_check {
|
||||
write_octet :: proc(w: io.Writer, octet: u8) -> io.Error #no_bounds_check {
|
||||
high_nibble := octet >> 4
|
||||
low_nibble := octet & 0xF
|
||||
|
||||
@@ -31,15 +31,15 @@ write :: proc(w: io.Writer, id: Identifier) -> (error: io.Error) #no_bounds_chec
|
||||
return nil
|
||||
}
|
||||
|
||||
for index in 0 ..< 4 { write_octet(w, id[index]) or_return }
|
||||
for index in 0 ..< 4 {write_octet(w, id[index]) or_return}
|
||||
io.write_byte(w, '-') or_return
|
||||
for index in 4 ..< 6 { write_octet(w, id[index]) or_return }
|
||||
for index in 4 ..< 6 {write_octet(w, id[index]) or_return}
|
||||
io.write_byte(w, '-') or_return
|
||||
for index in 6 ..< 8 { write_octet(w, id[index]) or_return }
|
||||
for index in 6 ..< 8 {write_octet(w, id[index]) or_return}
|
||||
io.write_byte(w, '-') or_return
|
||||
for index in 8 ..< 10 { write_octet(w, id[index]) or_return }
|
||||
for index in 8 ..< 10 {write_octet(w, id[index]) or_return}
|
||||
io.write_byte(w, '-') or_return
|
||||
for index in 10 ..< 16 { write_octet(w, id[index]) or_return }
|
||||
for index in 10 ..< 16 {write_octet(w, id[index]) or_return}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -54,7 +54,7 @@ Inputs:
|
||||
- id: The identifier to convert.
|
||||
*/
|
||||
unsafe_write :: proc(w: io.Writer, id: Identifier) #no_bounds_check {
|
||||
write_octet :: proc (w: io.Writer, octet: u8) #no_bounds_check {
|
||||
write_octet :: proc(w: io.Writer, octet: u8) #no_bounds_check {
|
||||
high_nibble := octet >> 4
|
||||
low_nibble := octet & 0xF
|
||||
|
||||
@@ -62,15 +62,15 @@ unsafe_write :: proc(w: io.Writer, id: Identifier) #no_bounds_check {
|
||||
io.write_byte(w, strconv.digits[low_nibble])
|
||||
}
|
||||
|
||||
for index in 0 ..< 4 { write_octet(w, id[index]) }
|
||||
for index in 0 ..< 4 {write_octet(w, id[index])}
|
||||
io.write_byte(w, '-')
|
||||
for index in 4 ..< 6 { write_octet(w, id[index]) }
|
||||
for index in 4 ..< 6 {write_octet(w, id[index])}
|
||||
io.write_byte(w, '-')
|
||||
for index in 6 ..< 8 { write_octet(w, id[index]) }
|
||||
for index in 6 ..< 8 {write_octet(w, id[index])}
|
||||
io.write_byte(w, '-')
|
||||
for index in 8 ..< 10 { write_octet(w, id[index]) }
|
||||
for index in 8 ..< 10 {write_octet(w, id[index])}
|
||||
io.write_byte(w, '-')
|
||||
for index in 10 ..< 16 { write_octet(w, id[index]) }
|
||||
for index in 10 ..< 16 {write_octet(w, id[index])}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -106,7 +106,7 @@ Convert a UUID to a string in the 8-4-4-4-12 format.
|
||||
|
||||
Inputs:
|
||||
- id: The identifier to convert.
|
||||
- buffer: A byte buffer to store the result. Must be at least 32 bytes large.
|
||||
- buffer: A byte buffer to store the result. Must be at least 36 bytes large.
|
||||
- loc: The caller location for debugging purposes (default: #caller_location)
|
||||
|
||||
Returns:
|
||||
@@ -119,7 +119,11 @@ to_string_buffer :: proc(
|
||||
) -> (
|
||||
str: string,
|
||||
) {
|
||||
assert(len(buffer) >= EXPECTED_LENGTH, "The buffer provided is not at least 32 bytes large.", loc)
|
||||
assert(
|
||||
len(buffer) >= EXPECTED_LENGTH,
|
||||
"The buffer provided is not at least 36 bytes large.",
|
||||
loc,
|
||||
)
|
||||
builder := strings.builder_from_bytes(buffer)
|
||||
unsafe_write(strings.to_writer(&builder), id)
|
||||
return strings.to_string(builder)
|
||||
@@ -129,3 +133,4 @@ to_string :: proc {
|
||||
to_string_allocated,
|
||||
to_string_buffer,
|
||||
}
|
||||
|
||||
|
||||
@@ -126,7 +126,7 @@ error :: proc(t: ^Tokenizer, offset: int, msg: string, args: ..any) {
|
||||
t.error_count += 1
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
advance_rune :: proc(t: ^Tokenizer) {
|
||||
#no_bounds_check {
|
||||
/*
|
||||
@@ -170,7 +170,7 @@ peek_byte :: proc(t: ^Tokenizer, offset := 0) -> byte {
|
||||
return 0
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
skip_whitespace :: proc(t: ^Tokenizer) {
|
||||
for {
|
||||
switch t.ch {
|
||||
@@ -182,7 +182,7 @@ skip_whitespace :: proc(t: ^Tokenizer) {
|
||||
}
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
is_letter :: proc(r: rune) -> bool {
|
||||
if r < utf8.RUNE_SELF {
|
||||
switch r {
|
||||
@@ -296,7 +296,7 @@ skip_cdata :: proc(t: ^Tokenizer) -> (err: Error) {
|
||||
return
|
||||
}
|
||||
|
||||
@(optimization_mode="speed")
|
||||
@(optimization_mode="favor_size")
|
||||
scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close := false, multiline := true) -> (value: string, err: Error) {
|
||||
err = .None
|
||||
|
||||
@@ -414,4 +414,4 @@ scan :: proc(t: ^Tokenizer, multiline_string := false) -> Token {
|
||||
lit = string(t.src[offset : t.offset])
|
||||
}
|
||||
return Token{kind, lit, pos}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ IMPORTING_TIME :: #config(ODIN_CORE_FLAGS_USE_TIME, time.IS_SUPPORTED)
|
||||
|
||||
// Override support for parsing `net` types.
|
||||
// TODO: Update this when the BSDs are supported.
|
||||
IMPORTING_NET :: #config(ODIN_CORE_FLAGS_USE_NET, ODIN_OS == .Windows || ODIN_OS == .Linux || ODIN_OS == .Darwin)
|
||||
IMPORTING_NET :: #config(ODIN_CORE_FLAGS_USE_NET, ODIN_OS == .Windows || ODIN_OS == .Linux || ODIN_OS == .Darwin || ODIN_OS == .FreeBSD)
|
||||
|
||||
TAG_ARGS :: "args"
|
||||
SUBTAG_NAME :: "name"
|
||||
|
||||
@@ -28,7 +28,7 @@ Parse_Error :: struct {
|
||||
// Provides more granular information than what just a string could hold.
|
||||
Open_File_Error :: struct {
|
||||
filename: string,
|
||||
errno: os.Errno,
|
||||
errno: os.Error,
|
||||
mode: int,
|
||||
perms: int,
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
//+build freebsd, netbsd, openbsd
|
||||
//+build netbsd, openbsd
|
||||
package flags
|
||||
|
||||
import "base:runtime"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
//+build !freebsd !netbsd !openbsd
|
||||
//+build !netbsd !openbsd
|
||||
package flags
|
||||
|
||||
import "base:runtime"
|
||||
|
||||
@@ -12,7 +12,7 @@ import "core:reflect"
|
||||
|
||||
// Push a positional argument onto a data struct, checking for specified
|
||||
// positionals first before adding it to a fallback field.
|
||||
@(optimization_mode="size")
|
||||
@(optimization_mode="favor_size")
|
||||
push_positional :: #force_no_inline proc (model: ^$T, parser: ^Parser, arg: string) -> (error: Error) {
|
||||
if bit_array.get(&parser.filled_pos, parser.filled_pos.max_index) {
|
||||
// The max index is set, which means we're out of space.
|
||||
@@ -74,7 +74,7 @@ register_field :: proc(parser: ^Parser, field: reflect.Struct_Field, index: int)
|
||||
}
|
||||
|
||||
// Set a `-flag` argument, Odin-style.
|
||||
@(optimization_mode="size")
|
||||
@(optimization_mode="favor_size")
|
||||
set_odin_flag :: proc(model: ^$T, parser: ^Parser, name: string) -> (error: Error) {
|
||||
// We make a special case for help requests.
|
||||
switch name {
|
||||
@@ -100,7 +100,7 @@ set_odin_flag :: proc(model: ^$T, parser: ^Parser, name: string) -> (error: Erro
|
||||
}
|
||||
|
||||
// Set a `-flag` argument, UNIX-style.
|
||||
@(optimization_mode="size")
|
||||
@(optimization_mode="favor_size")
|
||||
set_unix_flag :: proc(model: ^$T, parser: ^Parser, name: string) -> (future_args: int, error: Error) {
|
||||
// We make a special case for help requests.
|
||||
switch name {
|
||||
@@ -137,7 +137,7 @@ set_unix_flag :: proc(model: ^$T, parser: ^Parser, name: string) -> (future_args
|
||||
}
|
||||
|
||||
// Set a `-flag:option` argument.
|
||||
@(optimization_mode="size")
|
||||
@(optimization_mode="favor_size")
|
||||
set_option :: proc(model: ^$T, parser: ^Parser, name, option: string) -> (error: Error) {
|
||||
field, index := get_field_by_name(model, name) or_return
|
||||
|
||||
@@ -176,7 +176,7 @@ set_option :: proc(model: ^$T, parser: ^Parser, name, option: string) -> (error:
|
||||
}
|
||||
|
||||
// Set a `-map:key=value` argument.
|
||||
@(optimization_mode="size")
|
||||
@(optimization_mode="favor_size")
|
||||
set_key_value :: proc(model: ^$T, parser: ^Parser, name, key, value: string) -> (error: Error) {
|
||||
field, index := get_field_by_name(model, name) or_return
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ import "core:strings"
|
||||
@require import "core:time/datetime"
|
||||
import "core:unicode/utf8"
|
||||
|
||||
@(optimization_mode="size")
|
||||
@(optimization_mode="favor_size")
|
||||
parse_and_set_pointer_by_base_type :: proc(ptr: rawptr, str: string, type_info: ^runtime.Type_Info) -> bool {
|
||||
bounded_int :: proc(value, min, max: i128) -> (result: i128, ok: bool) {
|
||||
return value, min <= value && value <= max
|
||||
@@ -202,7 +202,7 @@ parse_and_set_pointer_by_base_type :: proc(ptr: rawptr, str: string, type_info:
|
||||
// especially with files.
|
||||
//
|
||||
// We want to provide as informative as an error as we can.
|
||||
@(optimization_mode="size", disabled=NO_CORE_NAMED_TYPES)
|
||||
@(optimization_mode="favor_size", disabled=NO_CORE_NAMED_TYPES)
|
||||
parse_and_set_pointer_by_named_type :: proc(ptr: rawptr, str: string, data_type: typeid, arg_tag: string, out_error: ^Error) {
|
||||
// Core types currently supported:
|
||||
//
|
||||
@@ -254,8 +254,8 @@ parse_and_set_pointer_by_named_type :: proc(ptr: rawptr, str: string, data_type:
|
||||
}
|
||||
|
||||
handle, errno := os.open(str, mode, perms)
|
||||
if errno != 0 {
|
||||
// NOTE(Feoramund): os.Errno is system-dependent, and there's
|
||||
if errno != nil {
|
||||
// NOTE(Feoramund): os.Error is system-dependent, and there's
|
||||
// currently no good way to translate them all into strings.
|
||||
//
|
||||
// The upcoming `os2` package will hopefully solve this.
|
||||
@@ -320,7 +320,7 @@ parse_and_set_pointer_by_named_type :: proc(ptr: rawptr, str: string, data_type:
|
||||
}
|
||||
}
|
||||
|
||||
@(optimization_mode="size")
|
||||
@(optimization_mode="favor_size")
|
||||
set_unbounded_integer_by_type :: proc(ptr: rawptr, value: $T, data_type: typeid) where intrinsics.type_is_integer(T) {
|
||||
switch data_type {
|
||||
case i8: (^i8) (ptr)^ = cast(i8) value
|
||||
@@ -367,7 +367,7 @@ set_unbounded_integer_by_type :: proc(ptr: rawptr, value: $T, data_type: typeid)
|
||||
}
|
||||
}
|
||||
|
||||
@(optimization_mode="size")
|
||||
@(optimization_mode="favor_size")
|
||||
parse_and_set_pointer_by_type :: proc(ptr: rawptr, str: string, type_info: ^runtime.Type_Info, arg_tag: string) -> (error: Error) {
|
||||
#partial switch specific_type_info in type_info.variant {
|
||||
case runtime.Type_Info_Named:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
//+private
|
||||
//+build !freebsd !netbsd !openbsd
|
||||
//+build !netbsd !openbsd
|
||||
package flags
|
||||
|
||||
import "core:net"
|
||||
|
||||
@@ -11,7 +11,7 @@ package flags
|
||||
@require import "core:strings"
|
||||
|
||||
// This proc is used to assert that `T` meets the expectations of the library.
|
||||
@(optimization_mode="size", disabled=ODIN_DISABLE_ASSERT)
|
||||
@(optimization_mode="favor_size", disabled=ODIN_DISABLE_ASSERT)
|
||||
validate_structure :: proc(model_type: $T, style: Parsing_Style, loc := #caller_location) {
|
||||
positionals_assigned_so_far: bit_array.Bit_Array
|
||||
defer bit_array.destroy(&positionals_assigned_so_far)
|
||||
@@ -162,7 +162,7 @@ validate_structure :: proc(model_type: $T, style: Parsing_Style, loc := #caller_
|
||||
|
||||
// Validate that all the required arguments are set and that the set arguments
|
||||
// are up to the program's expectations.
|
||||
@(optimization_mode="size")
|
||||
@(optimization_mode="favor_size")
|
||||
validate_arguments :: proc(model: ^$T, parser: ^Parser) -> Error {
|
||||
check_fields: for field, index in reflect.struct_fields_zipped(T) {
|
||||
was_set := bit_array.get(&parser.fields_set, index)
|
||||
|
||||
@@ -32,7 +32,7 @@ Inputs:
|
||||
Returns:
|
||||
- error: A union of errors; parsing, file open, a help request, or validation.
|
||||
*/
|
||||
@(optimization_mode="size")
|
||||
@(optimization_mode="favor_size")
|
||||
parse :: proc(
|
||||
model: ^$T,
|
||||
args: []string,
|
||||
|
||||
@@ -17,7 +17,7 @@ Inputs:
|
||||
- program: The name of the program, usually the first argument to `os.args`.
|
||||
- style: The argument parsing style, required to show flags in the proper style.
|
||||
*/
|
||||
@(optimization_mode="size")
|
||||
@(optimization_mode="favor_size")
|
||||
write_usage :: proc(out: io.Writer, data_type: typeid, program: string = "", style: Parsing_Style = .Odin) {
|
||||
// All flags get their tags parsed so they can be reasoned about later.
|
||||
Flag :: struct {
|
||||
|
||||
@@ -19,7 +19,7 @@ Inputs:
|
||||
- allocator: (default: context.allocator)
|
||||
- loc: The caller location for debugging purposes (default: #caller_location)
|
||||
*/
|
||||
@(optimization_mode="size")
|
||||
@(optimization_mode="favor_size")
|
||||
parse_or_exit :: proc(
|
||||
model: ^$T,
|
||||
program_args: []string,
|
||||
@@ -63,7 +63,7 @@ Inputs:
|
||||
- error: The error returned from `parse`.
|
||||
- style: The argument parsing style, required to show flags in the proper style, when usage is shown.
|
||||
*/
|
||||
@(optimization_mode="size")
|
||||
@(optimization_mode="favor_size")
|
||||
print_errors :: proc(data_type: typeid, error: Error, program: string, style: Parsing_Style = .Odin) {
|
||||
stderr := os.stream_from_handle(os.stderr)
|
||||
stdout := os.stream_from_handle(os.stdout)
|
||||
|
||||
+49
-29
@@ -334,6 +334,27 @@ panicf :: proc(fmt: string, args: ..any, loc := #caller_location) -> ! {
|
||||
message := tprintf(fmt, ..args)
|
||||
p("Panic", message, loc)
|
||||
}
|
||||
|
||||
// Creates a formatted C string
|
||||
//
|
||||
// *Allocates Using Context's Allocator*
|
||||
//
|
||||
// Inputs:
|
||||
// - args: A variadic list of arguments to be formatted.
|
||||
// - sep: An optional separator string (default is a single space).
|
||||
//
|
||||
// Returns: A formatted C string.
|
||||
//
|
||||
@(require_results)
|
||||
caprint :: proc(args: ..any, sep := " ", allocator := context.allocator) -> cstring {
|
||||
str: strings.Builder
|
||||
strings.builder_init(&str, allocator)
|
||||
sbprint(&str, ..args, sep=sep)
|
||||
strings.write_byte(&str, 0)
|
||||
s := strings.to_string(str)
|
||||
return cstring(raw_data(s))
|
||||
}
|
||||
|
||||
// Creates a formatted C string
|
||||
//
|
||||
// *Allocates Using Context's Allocator*
|
||||
@@ -346,9 +367,9 @@ panicf :: proc(fmt: string, args: ..any, loc := #caller_location) -> ! {
|
||||
// Returns: A formatted C string
|
||||
//
|
||||
@(require_results)
|
||||
caprintf :: proc(format: string, args: ..any, newline := false) -> cstring {
|
||||
caprintf :: proc(format: string, args: ..any, allocator := context.allocator, newline := false) -> cstring {
|
||||
str: strings.Builder
|
||||
strings.builder_init(&str)
|
||||
strings.builder_init(&str, allocator)
|
||||
sbprintf(&str, format, ..args, newline=newline)
|
||||
strings.write_byte(&str, 0)
|
||||
s := strings.to_string(str)
|
||||
@@ -365,8 +386,8 @@ caprintf :: proc(format: string, args: ..any, newline := false) -> cstring {
|
||||
// Returns: A formatted C string
|
||||
//
|
||||
@(require_results)
|
||||
caprintfln :: proc(format: string, args: ..any) -> cstring {
|
||||
return caprintf(format, ..args, newline=true)
|
||||
caprintfln :: proc(format: string, args: ..any, allocator := context.allocator) -> cstring {
|
||||
return caprintf(format, ..args, allocator=allocator, newline=true)
|
||||
}
|
||||
// Creates a formatted C string
|
||||
//
|
||||
@@ -380,12 +401,7 @@ caprintfln :: proc(format: string, args: ..any) -> cstring {
|
||||
//
|
||||
@(require_results)
|
||||
ctprint :: proc(args: ..any, sep := " ") -> cstring {
|
||||
str: strings.Builder
|
||||
strings.builder_init(&str, context.temp_allocator)
|
||||
sbprint(&str, ..args, sep=sep)
|
||||
strings.write_byte(&str, 0)
|
||||
s := strings.to_string(str)
|
||||
return cstring(raw_data(s))
|
||||
return caprint(args=args, sep=sep, allocator=context.temp_allocator)
|
||||
}
|
||||
// Creates a formatted C string
|
||||
//
|
||||
@@ -400,12 +416,7 @@ ctprint :: proc(args: ..any, sep := " ") -> cstring {
|
||||
//
|
||||
@(require_results)
|
||||
ctprintf :: proc(format: string, args: ..any, newline := false) -> cstring {
|
||||
str: strings.Builder
|
||||
strings.builder_init(&str, context.temp_allocator)
|
||||
sbprintf(&str, format, ..args, newline=newline)
|
||||
strings.write_byte(&str, 0)
|
||||
s := strings.to_string(str)
|
||||
return cstring(raw_data(s))
|
||||
return caprintf(format=format, args=args, allocator=context.temp_allocator, newline=newline)
|
||||
}
|
||||
// Creates a formatted C string, followed by a newline.
|
||||
//
|
||||
@@ -419,7 +430,7 @@ ctprintf :: proc(format: string, args: ..any, newline := false) -> cstring {
|
||||
//
|
||||
@(require_results)
|
||||
ctprintfln :: proc(format: string, args: ..any) -> cstring {
|
||||
return ctprintf(format, ..args, newline=true)
|
||||
return caprintf(format=format, args=args, allocator=context.temp_allocator, newline=true)
|
||||
}
|
||||
// Formats using the default print settings and writes to the given strings.Builder
|
||||
//
|
||||
@@ -951,10 +962,10 @@ fmt_bad_verb :: proc(fi: ^Info, verb: rune) {
|
||||
io.write_string(fi.writer, "%!", &fi.n)
|
||||
io.write_rune(fi.writer, verb, &fi.n)
|
||||
io.write_byte(fi.writer, '(', &fi.n)
|
||||
if fi.arg.id != nil {
|
||||
reflect.write_typeid(fi.writer, fi.arg.id, &fi.n)
|
||||
if arg := fi.arg; arg != nil {
|
||||
reflect.write_typeid(fi.writer, arg.id, &fi.n)
|
||||
io.write_byte(fi.writer, '=', &fi.n)
|
||||
fmt_value(fi, fi.arg, 'v')
|
||||
fmt_value(fi, arg, 'v')
|
||||
} else {
|
||||
io.write_string(fi.writer, "<nil>", &fi.n)
|
||||
}
|
||||
@@ -1861,7 +1872,7 @@ handle_tag :: proc(state: ^Info_State, data: rawptr, info: reflect.Type_Info_Str
|
||||
if optional_len == nil {
|
||||
return
|
||||
}
|
||||
for f, i in info.names {
|
||||
for f, i in info.names[:info.field_count] {
|
||||
if f != field_name {
|
||||
continue
|
||||
}
|
||||
@@ -1965,7 +1976,7 @@ fmt_struct :: proc(fi: ^Info, v: any, the_verb: rune, info: runtime.Type_Info_St
|
||||
fmt_bad_verb(fi, the_verb)
|
||||
return
|
||||
}
|
||||
if info.is_raw_union {
|
||||
if .raw_union in info.flags {
|
||||
if type_name == "" {
|
||||
io.write_string(fi.writer, "(raw union)", &fi.n)
|
||||
} else {
|
||||
@@ -1989,7 +2000,7 @@ fmt_struct :: proc(fi: ^Info, v: any, the_verb: rune, info: runtime.Type_Info_St
|
||||
// fi.hash = false;
|
||||
fi.indent += 1
|
||||
|
||||
is_empty := len(info.names) == 0
|
||||
is_empty := info.field_count == 0
|
||||
|
||||
if !is_soa && hash && !is_empty {
|
||||
io.write_byte(fi.writer, '\n', &fi.n)
|
||||
@@ -2010,17 +2021,17 @@ fmt_struct :: proc(fi: ^Info, v: any, the_verb: rune, info: runtime.Type_Info_St
|
||||
base_type_name = v.name
|
||||
}
|
||||
|
||||
actual_field_count := len(info.names)
|
||||
actual_field_count := info.field_count
|
||||
|
||||
n := uintptr(info.soa_len)
|
||||
|
||||
if info.soa_kind == .Slice {
|
||||
actual_field_count = len(info.names)-1 // len
|
||||
actual_field_count = info.field_count-1 // len
|
||||
|
||||
n = uintptr((^int)(uintptr(v.data) + info.offsets[actual_field_count])^)
|
||||
|
||||
} else if info.soa_kind == .Dynamic {
|
||||
actual_field_count = len(info.names)-3 // len, cap, allocator
|
||||
actual_field_count = info.field_count-3 // len, cap, allocator
|
||||
|
||||
n = uintptr((^int)(uintptr(v.data) + info.offsets[actual_field_count])^)
|
||||
}
|
||||
@@ -2099,7 +2110,7 @@ fmt_struct :: proc(fi: ^Info, v: any, the_verb: rune, info: runtime.Type_Info_St
|
||||
}
|
||||
} else {
|
||||
field_count := -1
|
||||
for name, i in info.names {
|
||||
for name, i in info.names[:info.field_count] {
|
||||
optional_len: int = -1
|
||||
use_nul_termination: bool = false
|
||||
verb := the_verb if the_verb == 'w' else 'v'
|
||||
@@ -2605,7 +2616,7 @@ fmt_bit_field :: proc(fi: ^Info, v: any, verb: rune, info: runtime.Type_Info_Bit
|
||||
|
||||
|
||||
field_count := -1
|
||||
for name, i in info.names {
|
||||
for name, i in info.names[:info.field_count] {
|
||||
field_verb := verb
|
||||
if handle_bit_field_tag(v.data, info, i, &field_verb) {
|
||||
continue
|
||||
@@ -2717,7 +2728,8 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) {
|
||||
}
|
||||
|
||||
case runtime.Type_Info_Struct,
|
||||
runtime.Type_Info_Union:
|
||||
runtime.Type_Info_Union,
|
||||
runtime.Type_Info_Bit_Field:
|
||||
if ptr == nil {
|
||||
io.write_string(fi.writer, "<nil>", &fi.n)
|
||||
return
|
||||
@@ -2751,9 +2763,11 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) {
|
||||
elem := runtime.type_info_base(info.elem)
|
||||
if elem != nil {
|
||||
if n, ok := fi.optional_len.?; ok {
|
||||
fi.optional_len = nil
|
||||
fmt_array(fi, ptr, n, elem.size, elem, verb)
|
||||
return
|
||||
} else if fi.use_nul_termination {
|
||||
fi.use_nul_termination = false
|
||||
fmt_array_nul_terminated(fi, ptr, -1, elem.size, elem, verb)
|
||||
return
|
||||
}
|
||||
@@ -2855,8 +2869,10 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) {
|
||||
n := info.count
|
||||
ptr := v.data
|
||||
if ol, ok := fi.optional_len.?; ok {
|
||||
fi.optional_len = nil
|
||||
n = min(n, ol)
|
||||
} else if fi.use_nul_termination {
|
||||
fi.use_nul_termination = false
|
||||
fmt_array_nul_terminated(fi, ptr, n, info.elem_size, info.elem, verb)
|
||||
return
|
||||
}
|
||||
@@ -2867,8 +2883,10 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) {
|
||||
n := slice.len
|
||||
ptr := slice.data
|
||||
if ol, ok := fi.optional_len.?; ok {
|
||||
fi.optional_len = nil
|
||||
n = min(n, ol)
|
||||
} else if fi.use_nul_termination {
|
||||
fi.use_nul_termination = false
|
||||
fmt_array_nul_terminated(fi, ptr, n, info.elem_size, info.elem, verb)
|
||||
return
|
||||
}
|
||||
@@ -2879,8 +2897,10 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) {
|
||||
n := array.len
|
||||
ptr := array.data
|
||||
if ol, ok := fi.optional_len.?; ok {
|
||||
fi.optional_len = nil
|
||||
n = min(n, ol)
|
||||
} else if fi.use_nul_termination {
|
||||
fi.use_nul_termination = false
|
||||
fmt_array_nul_terminated(fi, ptr, n, info.elem_size, info.elem, verb)
|
||||
return
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user