diff --git a/core/math/big/basic.odin b/core/math/big/basic.odin
index 1ba5807f4..751e2aedf 100644
--- a/core/math/big/basic.odin
+++ b/core/math/big/basic.odin
@@ -25,8 +25,8 @@ import "core:intrinsics"
 */
 int_add :: proc(dest, a, b: ^Int) -> (err: Error) {
 	dest := dest; x := a; y := b;
-	if err = clear_if_uninitialized(a);    err != .None { return err; }
-	if err = clear_if_uninitialized(b);    err != .None { return err; }
+	if err = clear_if_uninitialized(x); err != .None { return err; }
+	if err = clear_if_uninitialized(y); err != .None { return err; }
 	if err = clear_if_uninitialized(dest); err != .None { return err; }
 	/*
 		All parameters have been initialized.
@@ -773,6 +773,9 @@ int_factorial :: proc(res: ^Int, n: DIGIT) -> (err: Error) {
 }
 factorial :: proc { int_factorial, };
 
+
+
+
 /*
 	==========================
 		Low-level routines    
@@ -1264,6 +1267,92 @@ _int_div_digit :: proc(quotient, numerator: ^Int, denominator: DIGIT) -> (remain
 }
 
 
+/*
+	Greatest Common Divisor using the binary method.
+
+	TODO(Jeroen):
+	- Maybe combine with LCM and have an `_int_gcd_lcm` proc that can return both with work shared.
+*/
+int_gcd :: proc(res, a, b: ^Int) -> (err: Error) {
+	if err = clear_if_uninitialized(a, b, res);    err != .None { return err; }
+
+	/*
+		If either `a` or `b`, return the other one.
+	*/
+	if z, _ := is_zero(a); z { return abs(res, b); }
+	if z, _ := is_zero(b); z { return abs(res, a); }
+
+ 	/*
+ 		Get copies of `a` and `b` we can modify.
+ 	*/
+	u, v := &Int{}, &Int{};
+	defer destroy(u, v);
+	if err = copy(u, a); err != .None { return err; }
+	if err = copy(v, b); err != .None { return err; }
+
+ 	/*
+ 		Must be positive for the remainder of the algorithm.
+ 	*/
+	u.sign = .Zero_or_Positive; v.sign = .Zero_or_Positive;
+
+ 	/*
+ 		B1.  Find the common power of two for `u` and `v`.
+ 	*/
+ 	u_lsb, _ := count_lsb(u);
+ 	v_lsb, _ := count_lsb(v);
+ 	k        := min(u_lsb, v_lsb);
+
+	if k > 0 {
+		/*
+			Divide the power of two out.
+		*/
+		if err = shr(u, u, k); err != .None { return err; }
+		if err = shr(v, v, k); err != .None { return err; }
+	}
+
+	/*
+		Divide any remaining factors of two out.
+	*/
+	if u_lsb != k {
+		if err = shr(u, u, u_lsb - k); err != .None { return err; }
+	}
+	if v_lsb != k {
+		if err = shr(v, v, v_lsb - k); err != .None { return err; }
+	}
+
+	for v.used != 0 {
+		/*
+			Make sure `v` is the largest.
+		*/
+		if c, _ := cmp_mag(u, v); c == 1 {
+			/*
+				Swap `u` and `v` to make sure `v` is >= `u`.
+			*/
+			swap(u, v);
+		}
+
+		/*
+			Subtract smallest from largest.
+		*/
+		if err = sub(v, v, u); err != .None { return err; }
+
+		/*
+			Divide out all factors of two.
+		*/
+		b, _ := count_lsb(v);
+		if err = shr(v, v, b); err != .None { return err; }
+	}
+
+ 	/*
+ 		Multiply by 2**k which we divided out at the beginning.
+ 	*/
+ 	if err = shl(res, u, k); err != .None { return err; }
+ 	res.sign = .Zero_or_Positive;
+	return err;
+}
+gcd :: proc { int_gcd, };
+
+
 when size_of(rawptr) == 8 {
 	_factorial_table := [35]_WORD{
 /* f(00): */                                                   1,
diff --git a/core/math/big/helpers.odin b/core/math/big/helpers.odin
index d5cf16b33..dc846b4b9 100644
--- a/core/math/big/helpers.odin
+++ b/core/math/big/helpers.odin
@@ -504,48 +504,36 @@ count_bits :: proc(a: ^Int) -> (count: int, err: Error) {
 }
 
 /*
-	Counts the number of LSBs which are zero before the first zero bit
+	Returns the number of trailing zeroes before the first one.
+	Differs from regular `ctz` in that 0 returns 0.
 */
-count_lsb :: proc(a: ^Int) -> (count: int, err: Error) {
-	if err = clear_if_uninitialized(a); err != .None {
-		return 0, err;
-	}
-
-	lnz := []u8{4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};
-
-	q: DIGIT;
+int_count_lsb :: proc(a: ^Int) -> (count: int, err: Error) {
+	if err = clear_if_uninitialized(a); err != .None { return -1, err; }
 
+	_ctz :: intrinsics.count_trailing_zeros;
 	/*
-		Early out for zero.
+		Easy out.
 	*/
-	if z, _ := is_zero(a); z {
-		return 0, .None;
-	}
+	if z, _ := is_zero(a); z { return 0, .None; }
 
 	/*
 		Scan lower digits until non-zero.
 	*/
-	for count = 0; (count < a.used && a.digit[count] == 0); count += 1 {}
-	q = a.digit[count];
-	count *= _DIGIT_BITS;
+	x: int;
+	for x = 0; x < a.used && a.digit[x] == 0; x += 1 {}
 
-	/*
-		Now scan this digit until a 1 is found.
-	*/
-	if q & 1 == 0 {
-		p: DIGIT;
-		for {
-			p = q & 15;
-			count += int(lnz[p]);
-			q >>= 4;
-			if p != 0 {
-				break;
-			}
-		}
-	}
-	return count, .None;
+	q := a.digit[x];
+	x *= _DIGIT_BITS;
+	return x + count_lsb(q), .None;
 }
 
+platform_count_lsb :: #force_inline proc(a: $T) -> (count: int)
+	where intrinsics.type_is_integer(T) && intrinsics.type_is_unsigned(T) {
+	return int(intrinsics.count_trailing_zeros(a)) if a > 0 else 0;
+}
+
+count_lsb :: proc { int_count_lsb, platform_count_lsb, };
+
 int_random_digit :: proc(r: ^rnd.Rand = nil) -> (res: DIGIT) {
 	when _DIGIT_BITS == 60 { // DIGIT = u64
 		return DIGIT(rnd.uint64(r)) & _MASK;
@@ -602,14 +590,24 @@ _zero_unused :: proc(a: ^Int) {
 	}
 }
 
-clear_if_uninitialized :: proc(dest: ^Int, minimize := false) -> (err: Error) {
-	if !is_initialized(dest) {
-		return grow(dest, _MIN_DIGIT_COUNT if minimize else _DEFAULT_DIGIT_COUNT);
+clear_if_uninitialized_single :: proc(arg: ^Int) -> (err: Error) {
+	if !is_initialized(arg) {
+		return grow(arg, _DEFAULT_DIGIT_COUNT);
 	}
-	return .None;
+	return err;
 }
 
+clear_if_uninitialized_multi :: proc(args: ..^Int) -> (err: Error) {
+	for i in args {
+		if i != nil && !is_initialized(i) {
+			e := grow(i, _DEFAULT_DIGIT_COUNT);
+			if e != .None { err = e; }
+		}
+	}
+	return err;
+}
 
+clear_if_uninitialized :: proc {clear_if_uninitialized_single, clear_if_uninitialized_multi, };
 
 /*
 	Allocates several `Int`s at once.
diff --git a/core/math/big/test.py b/core/math/big/test.py
index 75152821f..96e0be227 100644
--- a/core/math/big/test.py
+++ b/core/math/big/test.py
@@ -475,6 +475,9 @@ if __name__ == '__main__':
 			TIMINGS    = {}
 
 			UNTIL_ITERS = ITERATIONS
+			if test_proc == test_root_n and BITS == 1_200:
+				UNTIL_ITERS /= 10
+
 			UNTIL_TIME  = TOTAL_TIME + BITS / TIMED_BITS_PER_SECOND
 			# We run each test for a second per 20k bits