From 112e4230eef004b4db3ec65ee5ae0c23d3f8ed54 Mon Sep 17 00:00:00 2001
From: bytegrrrl <bytegrrrl@proton.me>
Date: Thu, 24 Apr 2025 01:34:35 -0400
Subject: [PATCH 1/4] Add pure FixedPoint implementations and tests for sin(),
 cos(), and tan()

---
 libopenage/util/fixed_point.h        | 125 +++++++++++++++++++++++++--
 libopenage/util/fixed_point_test.cpp |  67 ++++++++++++++
 2 files changed, 183 insertions(+), 9 deletions(-)

diff --git a/libopenage/util/fixed_point.h b/libopenage/util/fixed_point.h
index c751238cdf..cb8d5ca476 100644
--- a/libopenage/util/fixed_point.h
+++ b/libopenage/util/fixed_point.h
@@ -413,6 +413,18 @@ class FixedPoint {
 		return *this;
 	}
 
+	/**
+	 * FixedPoint *= FixedPoint
+	 *
+	 * This shares the same caveats as FixedPint * FixedPoint.
+	 *
+	 * Use a larger intermediate type to avoid overflow.
+	 */
+	constexpr FixedPoint operator*=(const FixedPoint &rhs) {
+		*this = *this *rhs;
+		return *this;
+	}
+
 	/**
 	 * FixedPoint /= N
 	 */
@@ -499,16 +511,111 @@ class FixedPoint {
 		return std::atan2(this->to_double(), n.to_double());
 	}
 
-	constexpr double sin() {
-		return std::sin(this->to_double());
+	/**
+	 * Calculate pure FixedPoint sine using a power series approximation.
+	 *
+	 * This may lose absolute precision when the fractional part is small. Because trig functions are
+	 * cyclic, we don't lose precision for large integer values. For best results, you would want
+	 * intermediate_size to be twice the size of raw_type.
+	 */
+	constexpr FixedPoint sin() {
+		size_t order = 10;
+		FixedPoint x = *this;
+
+		// Sine is an odd function, so we can pull out the sign.
+		bool negative = x < 0;
+		if (negative) {
+			x = -x;
+		}
+
+		// Ensure we're in the interval (-pi, pi)
+		// Since this is a series expansion approximation around 0, this interval is where we are most accurate
+		if (x > FixedPoint::pi()) {
+			size_t n = round((x / FixedPoint::tau()).to_double());
+			x -= FixedPoint::tau() * n;
+		}
+
+		FixedPoint pow_x = x;
+		FixedPoint sin_x = 0;
+		size_t factorial = 1;
+		bool term_sign = 0;
+		for (size_t i = 0; i < order; i++) {
+			sin_x += pow_x * (-2 * term_sign + 1) / factorial;
+			term_sign = !term_sign;
+			pow_x *= x * x;
+			factorial *= (2 * i + 2) * (2 * i + 3);
+		}
+
+		return negative ? -sin_x : sin_x;
 	}
 
-	constexpr double cos() {
-		return std::cos(this->to_double());
+	/**
+	 * Calculate pure FixedPoint cosine using a power series approximation.
+	 *
+	 * This may lose absolute precision when the fractional part is small. Because trig functions are
+	 * cyclic, we don't lose precision for large integer values. For best results, you would want
+	 * intermediate_size to be twice the size of raw_type.
+	 */
+	constexpr FixedPoint cos() {
+		size_t order = 10;
+		FixedPoint x = *this;
+
+		// Cosine is an even function so we can drop the sign
+		if (x < 0) {
+			x = -x;
+		}
+
+		// Ensure we're in the interval (-pi, pi)
+		// Since this is a series expansion approximation around 0, this interval is where we are most accurate
+		if (x > FixedPoint::pi()) {
+			size_t n = round((x / FixedPoint::tau()).to_double());
+			x -= FixedPoint::tau() * n;
+		}
+
+		FixedPoint pow_x = 1;
+		FixedPoint cos_x = 0;
+		size_t factorial = 1;
+		bool term_sign = 0;
+		for (size_t i = 0; i < order; i++) {
+			cos_x += pow_x * (-2 * term_sign + 1) / factorial;
+			term_sign = !term_sign;
+			pow_x *= x * x;
+			factorial *= (2 * i + 1) * (2 * i + 2);
+		}
+
+		return cos_x;
 	}
 
-	constexpr double tan() {
-		return std::tan(this->to_double());
+	/**
+	 * Calculate pure FixedPoint tangent using sin() and cos().
+	 *
+	 * This may lose absolute precision when the fractional part is small. Because trig functions are
+	 * cyclic, we don't lose precision for large integer values. For best results, you would want
+	 * intermediate_size to be twice the size of raw_type.
+	 * This is guaranteed to lose precision when approaching its asymptotes (k * pi/2 for odd k).
+	 */
+	constexpr FixedPoint tan() {
+		FixedPoint x = *this;
+
+		// Tangent is odd, we can pull out the sign
+		bool negative = x < 0;
+		if (negative) {
+			x = -x;
+		}
+
+		// Ensure we are in the interval (-pi/2, pi/2) for maximum accuracy
+		if (x > FixedPoint::pi_2()) {
+			size_t n = round((x / FixedPoint::pi()).to_double());
+			x -= FixedPoint::pi() * n;
+		}
+
+		FixedPoint cos_x = x.cos();
+
+		// Raise an exception when too near an asymptote.
+		ENSURE(cos_x != std::clamp(cos_x.to_double(), -1e-7, 1e-7), "FixedPoint::tan() approaches +/- infinity for this value.");
+		FixedPoint tan_x = x.sin() / cos_x;
+
+		return negative ? -tan_x : tan_x;
 	}
 };
 
@@ -629,17 +736,17 @@ constexpr double atan2(openage::util::FixedPoint<I, F, Inter> x, openage::util::
 
 template <typename I, unsigned F, typename Inter>
 constexpr double sin(openage::util::FixedPoint<I, F, Inter> n) {
-	return n.sin();
+	return static_cast<double>(n.sin());
 }
 
 template <typename I, unsigned F, typename Inter>
 constexpr double cos(openage::util::FixedPoint<I, F, Inter> n) {
-	return n.cos();
+	return static_cast<double>(n.cos());
 }
 
 template <typename I, unsigned F, typename Inter>
 constexpr double tan(openage::util::FixedPoint<I, F, Inter> n) {
-	return n.tan();
+	return static_cast<double>(n.tan());
 }
 
 template <typename I, unsigned F, typename Inter>
diff --git a/libopenage/util/fixed_point_test.cpp b/libopenage/util/fixed_point_test.cpp
index b3690b8d15..3f975d7974 100644
--- a/libopenage/util/fixed_point_test.cpp
+++ b/libopenage/util/fixed_point_test.cpp
@@ -203,6 +203,73 @@ void fixed_point() {
 		TESTNOEXCEPT((FixedPoint<int64_t, 32>::from_float(3.25).sqrt()));
 		TESTNOEXCEPT((FixedPoint<uint64_t, 32>::from_float(-3.25).sqrt()));
 	}
+
+	// Pure FixedPoint trig tests
+	{
+		using TrigType = FixedPoint<int64_t, 32, __int128>;
+
+		// Testing sin() and cos()
+		for (int i = -100'000; i <= 100'000; i++) {
+			TrigType x = TrigType::pi() * (0.001 * i);
+
+			// Test std overloads
+			TESTEQUALS(x.sin(), std::sin(x));
+			TESTEQUALS(x.cos(), std::cos(x));
+
+			// Test vs standard library implementation for doubles
+			TESTEQUALS_FLOAT(std::sin(x), std::sin(x.to_double()), 1e-7);
+			TESTEQUALS_FLOAT(std::cos(x), std::cos(x.to_double()), 1e-7);
+
+			// Test some trig identities
+			TESTEQUALS_FLOAT(std::cos(x), std::sin(TrigType::pi_2() - x), 1e-7);
+			TESTEQUALS_FLOAT(std::cos(TrigType::pi_2() - x), std::sin(x), 1e-7);
+			TESTEQUALS_FLOAT(std::sin(x) * std::sin(x) + std::cos(x) * std::cos(x), 1.0, 1e-7);
+			TESTEQUALS_FLOAT(std::sin(x * 2), std::sin(x) * std::cos(x) * 2, 1e-7);
+			TESTEQUALS_FLOAT(std::cos(x * 2), -std::sin(x) * std::sin(x) * 2 + 1, 1e-7);
+			TESTEQUALS_FLOAT(std::cos(x * 2), std::cos(x) * std::cos(x) * 2 - 1, 1e-7);
+		}
+
+		// Testing tan(), note the reduced precision
+		for (int i = -100'000; i <= 100'000; i++) {
+			TrigType x = TrigType::pi_2() * (0.001 * i);
+
+			// Skip values where tan(x) trends to infinity
+			if (i % 1000 == 0 and i % 2000 != 0) [[unlikely]] {
+				continue;
+			}
+
+			int asymptote_distance = abs(1000 - abs(i % 2000));
+			if (asymptote_distance <= 1) [[unlikely]] {
+				TESTEQUALS_FLOAT(std::tan(x), std::tan(x.to_double()), 1e-2);
+			}
+			else if (asymptote_distance <= 5) [[unlikely]] {
+				TESTEQUALS_FLOAT(std::tan(x), std::tan(x.to_double()), 1e-3);
+			}
+			else if (asymptote_distance <= 16) [[unlikely]] {
+				TESTEQUALS_FLOAT(std::tan(x), std::tan(x.to_double()), 1e-4);
+			}
+			else if (asymptote_distance <= 60) [[unlikely]] {
+				TESTEQUALS_FLOAT(std::tan(x), std::tan(x.to_double()), 1e-5);
+			}
+			else {
+				TESTEQUALS_FLOAT(std::tan(x), std::tan(x.to_double()), 1e-6);
+			}
+		}
+
+		// Testing tan() at asymptotes
+		TESTTHROWS(TrigType::pi_2().tan());
+		TESTTHROWS((-TrigType::pi_2()).tan());
+
+		// testing tan() vs atan2()
+		for (int i = 1; i <= 500; i++) {
+			TrigType x(0.01 * std::numbers::pi * i);
+			for (int j = -1000; j <= 1000; j++) {
+				TrigType y(j * 0.01);
+				TESTEQUALS_FLOAT(TrigType(y.atan2(x)).tan() * x, y, 1e-5);
+				TESTEQUALS_FLOAT(TrigType(y.atan2(-x)).tan() * -x, y, 1e-5);
+			}
+		}
+	}
 }
 
 }}} // openage::util::tests

From bec85cd18282ac0825af6218f94590ff0b6e7dbf Mon Sep 17 00:00:00 2001
From: bytegrrrl <bytegrrrl@proton.me>
Date: Thu, 24 Apr 2025 20:37:44 -0400
Subject: [PATCH 2/4] Implement pure FixedPoint round() function and helpers

---
 libopenage/util/fixed_point.h | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/libopenage/util/fixed_point.h b/libopenage/util/fixed_point.h
index cb8d5ca476..733353cadc 100644
--- a/libopenage/util/fixed_point.h
+++ b/libopenage/util/fixed_point.h
@@ -175,6 +175,13 @@ class FixedPoint {
 		return FixedPoint::from_int(0);
 	}
 
+	/**
+	 * FixedPoint value that is preinitialized to one.
+	 */
+	static constexpr FixedPoint one() {
+		return FixedPoint::from_int(1);
+	}
+
 	/**
 	 * Math constants represented in FixedPoint
 	 */
@@ -366,6 +373,26 @@ class FixedPoint {
 			static_cast<FixedPoint::unsigned_int_type>(this->raw_value) & std::integral_constant<int_type, FixedPoint::fractional_part_bitmask()>::value);
 	}
 
+	/**
+	 * Converter to retrieve the integral (pre-decimal) part of the number.
+	 */
+	constexpr FixedPoint get_integral_part() const {
+		// similar to the get_fractional_part() implementation, but the opposite bits.
+		return FixedPoint::from_raw_value(this->raw_value & ~std::integral_constant<int_type, FixedPoint::fractional_part_bitmask()>::value);
+	}
+
+	/**
+	 * Round to the nearest integer.
+	 */
+	constexpr FixedPoint round() const {
+		if (this->get_fractional_part() < same_type_but_unsigned(0.5)) {
+			return this->get_integral_part();
+		}
+		else {
+			return this->get_integral_part() + one();
+		}
+	}
+
 	// Comparison operators for comparison with other
 	constexpr auto operator<=>(const FixedPoint &o) const = default;
 
@@ -531,7 +558,7 @@ class FixedPoint {
 		// Ensure we're in the interval (-pi, pi)
 		// Since this is a series expansion approximation around 0, this interval is where we are most accurate
 		if (x > FixedPoint::pi()) {
-			size_t n = round((x / FixedPoint::tau()).to_double());
+			int_type n = (x / FixedPoint::tau()).round().to_int();
 			x -= FixedPoint::tau() * n;
 		}
 
@@ -568,7 +595,7 @@ class FixedPoint {
 		// Ensure we're in the interval (-pi, pi)
 		// Since this is a series expansion approximation around 0, this interval is where we are most accurate
 		if (x > FixedPoint::pi()) {
-			size_t n = round((x / FixedPoint::tau()).to_double());
+			int_type n = (x / FixedPoint::tau()).round().to_int();
 			x -= FixedPoint::tau() * n;
 		}
 
@@ -605,7 +632,7 @@ class FixedPoint {
 
 		// Ensure we are in the interval (-pi/2, pi/2) for maximum accuracy
 		if (x > FixedPoint::pi_2()) {
-			size_t n = round((x / FixedPoint::pi()).to_double());
+			int_type n = (x / FixedPoint::pi()).round().to_int();
 			x -= FixedPoint::pi() * n;
 		}
 

From 78ef08ed4e4d3ff56d7e48aa5eed949f377e0b05 Mon Sep 17 00:00:00 2001
From: bytegrrrl <bytegrrrl@proton.me>
Date: Fri, 25 Apr 2025 02:35:26 -0400
Subject: [PATCH 3/4] Implement precision-preserving multiplication and
 division

---
 libopenage/util/fixed_point.h        | 96 ++++++++++++++++++++++++++--
 libopenage/util/fixed_point_test.cpp |  9 ++-
 2 files changed, 95 insertions(+), 10 deletions(-)

diff --git a/libopenage/util/fixed_point.h b/libopenage/util/fixed_point.h
index 733353cadc..3132d6a2aa 100644
--- a/libopenage/util/fixed_point.h
+++ b/libopenage/util/fixed_point.h
@@ -546,7 +546,7 @@ class FixedPoint {
 	 * intermediate_size to be twice the size of raw_type.
 	 */
 	constexpr FixedPoint sin() {
-		size_t order = 10;
+		size_t order = this->approx_decimal_places;
 		FixedPoint x = *this;
 
 		// Sine is an odd function, so we can pull out the sign.
@@ -584,7 +584,7 @@ class FixedPoint {
 	 * intermediate_size to be twice the size of raw_type.
 	 */
 	constexpr FixedPoint cos() {
-		size_t order = 10;
+		size_t order = this->approx_decimal_places;
 		FixedPoint x = *this;
 
 		// Cosine is an even function so we can drop the sign
@@ -709,10 +709,56 @@ typename std::enable_if<std::is_arithmetic<N>::value, FixedPoint<I, F, Inter>>::
  */
 template <typename I, unsigned int F, typename Inter>
 constexpr FixedPoint<I, F, Inter> operator*(const FixedPoint<I, F, Inter> lhs, const FixedPoint<I, F, Inter> rhs) {
-	Inter ret = static_cast<Inter>(lhs.get_raw_value()) * static_cast<Inter>(rhs.get_raw_value());
-	ret >>= F;
+	using uInter = typename std::make_unsigned<Inter>::type;
+
+	// An optimization that can prevent overflows.
+	// This only helps overflows from the actual operations happening here, but not an ordinary overflow of int_type
+	// This is essentially just Karatsuba
+	if constexpr (sizeof(I) == sizeof(Inter)) {
+		constexpr int hwidth = sizeof(Inter) * 4;
+		uInter lower_mask = ~static_cast<uInter>(0) >> hwidth;
+
+		// Store half of each value in each variable
+		bool l_pos = lhs > 0;
+		uInter lhs_lower = static_cast<uInter>(std::abs(lhs.get_raw_value()));
+		Inter lhs_upper = lhs_lower >> hwidth;
+		lhs_lower = lhs_lower & lower_mask;
+
+		bool r_pos = rhs > 0;
+		uInter rhs_lower = static_cast<uInter>(std::abs(rhs.get_raw_value()));
+		Inter rhs_upper = rhs_lower >> hwidth;
+		rhs_lower = rhs_lower & lower_mask;
+
+		// Calculate the multiplication piecewise
+		uInter result_lower = lhs_lower * rhs_lower;
+		Inter result_mid = lhs_lower * rhs_upper + lhs_upper * rhs_lower;
+		Inter result_upper = lhs_upper * rhs_upper;
+
+		// And recombine.
+		I result = result_lower >> F;
+		if constexpr (F > hwidth) {
+			result += result_mid >> (F - hwidth);
+		}
+		else {
+			result += result_mid << (hwidth - F);
+		}
+		if constexpr (F > 2 * hwidth) {
+			result += result_upper >> (F - 2 * hwidth);
+		}
+		else {
+			// These are the bits that would have been lost. We still may lose some, but there are some we save
+			result += result_upper << (2 * hwidth - F);
+		}
+		result = l_pos ^ r_pos ? -result : result;
 
-	return FixedPoint<I, F, Inter>::from_raw_value(static_cast<I>(ret));
+		return FixedPoint<I, F, Inter>::from_raw_value(result);
+	}
+	else {
+		Inter ret = static_cast<Inter>(lhs.get_raw_value()) * static_cast<Inter>(rhs.get_raw_value());
+		ret >>= F;
+
+		return FixedPoint<I, F, Inter>::from_raw_value(static_cast<I>(ret));
+	}
 }
 
 
@@ -721,8 +767,44 @@ constexpr FixedPoint<I, F, Inter> operator*(const FixedPoint<I, F, Inter> lhs, c
  */
 template <typename I, unsigned int F, typename Inter>
 constexpr FixedPoint<I, F, Inter> operator/(const FixedPoint<I, F, Inter> lhs, const FixedPoint<I, F, Inter> rhs) {
-	Inter ret = div((static_cast<Inter>(lhs.get_raw_value()) << F), static_cast<Inter>(rhs.get_raw_value()));
-	return FixedPoint<I, F, Inter>::from_raw_value(static_cast<I>(ret));
+	using uInter = typename std::make_unsigned<Inter>::type;
+
+	// Implementation that doesn't lose bits using small intermediate values.
+	if constexpr (sizeof(I) == sizeof(Inter)) {
+		Inter mask = static_cast<Inter>(1);
+
+		// Save the signs for later
+		bool l_pos = lhs > 0;
+		bool r_pos = rhs > 0;
+
+		uInter r = 0;
+		uInter q = static_cast<Inter>(std::abs(lhs.get_raw_value()));
+		uInter d = static_cast<Inter>(std::abs(rhs.get_raw_value()));
+
+		// basically just doing long division
+		for (size_t i = 0; i < sizeof(Inter) * 8 + F; i++) {
+			q = std::rotl(q, 1);
+			r = std::rotl(r, 1);
+
+			// "shift" on onto the second Inter
+			r = r | (q & mask);
+			q = q & ~mask;
+
+			// Subtract if large enough
+			if (r >= d) {
+				r = r - d;
+				q = q | mask;
+			}
+		}
+
+		q = l_pos ^ r_pos ? -q : q;
+
+		return FixedPoint<I, F, Inter>::from_raw_value(static_cast<I>(q));
+	}
+	else {
+		Inter ret = div((static_cast<Inter>(lhs.get_raw_value()) << F), static_cast<Inter>(rhs.get_raw_value()));
+		return FixedPoint<I, F, Inter>::from_raw_value(static_cast<I>(ret));
+	}
 }
 
 
diff --git a/libopenage/util/fixed_point_test.cpp b/libopenage/util/fixed_point_test.cpp
index 3f975d7974..f873ff9452 100644
--- a/libopenage/util/fixed_point_test.cpp
+++ b/libopenage/util/fixed_point_test.cpp
@@ -127,7 +127,8 @@ void fixed_point() {
 		using T = FixedPoint<uint16_t, 7U, uint64_t>;
 
 		auto a = S::from_int(16U);
-		TESTNOTEQUALS((a*a).to_int(), 256U);
+		// This test case is now equal with the improved multiplication algorithm
+		TESTEQUALS((a*a).to_int(), 256U);
 
 		auto b = T::from_int(16U);
 		TESTEQUALS((b*b).to_int(), 256U);
@@ -139,7 +140,8 @@ void fixed_point() {
 		using S = FixedPoint<int32_t, 12U>;
 		auto a = S::from_int(256);
 		auto b = S::from_int(8);
-		TESTNOTEQUALS((a/b).to_int(), 32);
+		// This test case is now equal with the improved division algorithm
+		TESTEQUALS((a/b).to_int(), 32);
 
 
 		using T = FixedPoint<int32_t, 12, int64_t>;
@@ -206,7 +208,7 @@ void fixed_point() {
 
 	// Pure FixedPoint trig tests
 	{
-		using TrigType = FixedPoint<int64_t, 32, __int128>;
+		using TrigType = FixedPoint<int64_t, 32, int64_t>;
 
 		// Testing sin() and cos()
 		for (int i = -100'000; i <= 100'000; i++) {
@@ -222,6 +224,7 @@ void fixed_point() {
 
 			// Test some trig identities
 			TESTEQUALS_FLOAT(std::cos(x), std::sin(TrigType::pi_2() - x), 1e-7);
+
 			TESTEQUALS_FLOAT(std::cos(TrigType::pi_2() - x), std::sin(x), 1e-7);
 			TESTEQUALS_FLOAT(std::sin(x) * std::sin(x) + std::cos(x) * std::cos(x), 1.0, 1e-7);
 			TESTEQUALS_FLOAT(std::sin(x * 2), std::sin(x) * std::cos(x) * 2, 1e-7);

From 37ec5dc32fc9e9eab9d06316da1e573b78d77289 Mon Sep 17 00:00:00 2001
From: bytegrrrl <bytegrrrl@proton.me>
Date: Fri, 25 Apr 2025 08:17:04 -0400
Subject: [PATCH 4/4] Optimize FixedPoint multiplication

---
 libopenage/util/fixed_point.h | 53 ++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/libopenage/util/fixed_point.h b/libopenage/util/fixed_point.h
index 3132d6a2aa..5301d1fe59 100644
--- a/libopenage/util/fixed_point.h
+++ b/libopenage/util/fixed_point.h
@@ -768,38 +768,47 @@ constexpr FixedPoint<I, F, Inter> operator*(const FixedPoint<I, F, Inter> lhs, c
 template <typename I, unsigned int F, typename Inter>
 constexpr FixedPoint<I, F, Inter> operator/(const FixedPoint<I, F, Inter> lhs, const FixedPoint<I, F, Inter> rhs) {
 	using uInter = typename std::make_unsigned<Inter>::type;
+	using FP = FixedPoint<I, F, Inter>;
 
 	// Implementation that doesn't lose bits using small intermediate values.
 	if constexpr (sizeof(I) == sizeof(Inter)) {
-		Inter mask = static_cast<Inter>(1);
+		constexpr uInter lower_mask = ~static_cast<uInter>(0) >> (sizeof(Inter) * 8 - F);
 
-		// Save the signs for later
+		// Store the integral and fractional parts in the upper and lower "halves"
 		bool l_pos = lhs > 0;
-		bool r_pos = rhs > 0;
-
-		uInter r = 0;
-		uInter q = static_cast<Inter>(std::abs(lhs.get_raw_value()));
-		uInter d = static_cast<Inter>(std::abs(rhs.get_raw_value()));
-
-		// basically just doing long division
-		for (size_t i = 0; i < sizeof(Inter) * 8 + F; i++) {
-			q = std::rotl(q, 1);
-			r = std::rotl(r, 1);
+		uInter lhs_lower = static_cast<uInter>(std::abs(lhs.get_raw_value()));
+		Inter lhs_upper = lhs_lower >> F;
+		lhs_lower = lhs_lower & lower_mask;
 
-			// "shift" on onto the second Inter
-			r = r | (q & mask);
-			q = q & ~mask;
+		bool r_pos = rhs > 0;
+		uInter rhs_lower = static_cast<uInter>(std::abs(rhs.get_raw_value()));
+		Inter rhs_upper = rhs_lower >> F;
+		rhs_lower = rhs_lower & lower_mask;
 
-			// Subtract if large enough
-			if (r >= d) {
-				r = r - d;
-				q = q | mask;
-			}
+		// special case when integeral part of rhs is 0
+		if (rhs_upper == 0) {
+			// It's very likely this upper term is zero, but we should consider it regardless.
+			FP upper_term = FP::from_raw_value((lhs_upper << (2 * F)) / rhs_lower);
+			FP lower_term = FP::from_raw_value((lhs_lower << F) / rhs_lower);
+			FP result = upper_term + lower_term;
+			return l_pos ^ r_pos ? -result : result;
 		}
 
-		q = l_pos ^ r_pos ? -q : q;
+		// Calculate the multiplication piecewise
+		FP upper_term = FixedPoint<I, F, Inter>::from_raw_value((lhs_upper << F) / rhs_upper);
+		FP lower_term = FP::from_raw_value(((lhs_lower << F) / rhs_upper) >> F);
+		FP mixed_term = FixedPoint<I, F, Inter>::from_raw_value((rhs_lower) / rhs_upper);
+
+		// Basically doing a power series expansion here for (lhs_upper + lhs_lower) / (rhs_upper + rhs_lower)
+		FP result = FP::zero();
+		FP term = FP::one();
+		for (size_t i = 0; i < 8; i++) {
+			result += term * upper_term;
+			result += term * lower_term;
+			term *= -mixed_term;
+		}
 
-		return FixedPoint<I, F, Inter>::from_raw_value(static_cast<I>(q));
+		return l_pos ^ r_pos ? -result : result;
 	}
 	else {
 		Inter ret = div((static_cast<Inter>(lhs.get_raw_value()) << F), static_cast<Inter>(rhs.get_raw_value()));