From e903a976f4562930f9d2b57e9627890d2bf047b5 Mon Sep 17 00:00:00 2001
From: Pratyush Mishra <pratyushmishra@berkeley.edu>
Date: Tue, 30 May 2023 12:53:37 -0700
Subject: [PATCH 1/3] Initial commit

---
 Cargo.toml                                  |  2 +
 ff/Cargo.toml                               |  1 +
 ff/src/fields/mod.rs                        | 65 +------------------
 ff/src/fields/models/cubic_extension.rs     |  3 +-
 ff/src/fields/models/fp/mod.rs              |  3 +-
 ff/src/fields/models/fp12_2over3over2.rs    |  3 +-
 ff/src/fields/models/quadratic_extension.rs |  3 +-
 group/Cargo.toml                            | 37 +++++++++++
 group/src/lib.rs                            | 70 +++++++++++++++++++++
 9 files changed, 119 insertions(+), 68 deletions(-)
 create mode 100644 group/Cargo.toml
 create mode 100644 group/src/lib.rs
diff --git a/Cargo.toml b/Cargo.toml
index 6baa587a7..5c7dc4aee 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,6 +4,8 @@ members = [
     "serialize",
     "serialize-derive",
 
+    "group",
+
     "ff-macros",
     "ff-asm",
     "ff",
diff --git a/ff/Cargo.toml b/ff/Cargo.toml
index 69eeeafdb..c4c806975 100644
--- a/ff/Cargo.toml
+++ b/ff/Cargo.toml
@@ -14,6 +14,7 @@ edition = "2021"
 rust-version = "1.63"
 
 [dependencies]
+ark-group = { version = "0.4.2", path = "../group" }
 ark-ff-asm = { version = "0.4.2", path = "../ff-asm" }
 ark-ff-macros = { version = "0.4.2", path = "../ff-macros" }
 ark-std = { version = "0.4.0", default-features = false }
diff --git a/ff/src/fields/mod.rs b/ff/src/fields/mod.rs
index 08e3ebc45..ecb5005c4 100644
--- a/ff/src/fields/mod.rs
+++ b/ff/src/fields/mod.rs
@@ -1,6 +1,7 @@
 use core::iter::Product;
 
 use crate::UniformRand;
+use ark_group::AdditiveGroup;
 use ark_serialize::{
     CanonicalDeserialize, CanonicalDeserializeWithFlags, CanonicalSerialize,
     CanonicalSerializeWithFlags, EmptyFlags, Flags,
@@ -44,70 +45,6 @@ use ark_std::cmp::max;
 #[cfg(feature = "parallel")]
 use rayon::prelude::*;
 
-pub trait AdditiveGroup:
-    Eq
-    + 'static
-    + Sized
-    + CanonicalSerialize
-    + CanonicalDeserialize
-    + Copy
-    + Clone
-    + Default
-    + Send
-    + Sync
-    + Hash
-    + Debug
-    + Display
-    + UniformRand
-    + Zeroize
-    + Zero
-    + Neg<Output = Self>
-    + Add<Self, Output = Self>
-    + Sub<Self, Output = Self>
-    + Mul<<Self as AdditiveGroup>::Scalar, Output = Self>
-    + AddAssign<Self>
-    + SubAssign<Self>
-    + MulAssign<<Self as AdditiveGroup>::Scalar>
-    + for<'a> Add<&'a Self, Output = Self>
-    + for<'a> Sub<&'a Self, Output = Self>
-    + for<'a> Mul<&'a <Self as AdditiveGroup>::Scalar, Output = Self>
-    + for<'a> AddAssign<&'a Self>
-    + for<'a> SubAssign<&'a Self>
-    + for<'a> MulAssign<&'a <Self as AdditiveGroup>::Scalar>
-    + for<'a> Add<&'a mut Self, Output = Self>
-    + for<'a> Sub<&'a mut Self, Output = Self>
-    + for<'a> Mul<&'a mut <Self as AdditiveGroup>::Scalar, Output = Self>
-    + for<'a> AddAssign<&'a mut Self>
-    + for<'a> SubAssign<&'a mut Self>
-    + for<'a> MulAssign<&'a mut <Self as AdditiveGroup>::Scalar>
-    + ark_std::iter::Sum<Self>
-    + for<'a> ark_std::iter::Sum<&'a Self>
-{
-    type Scalar: Field;
-
-    /// The additive identity of the field.
-    const ZERO: Self;
-
-    /// Doubles `self`.
-    #[must_use]
-    fn double(&self) -> Self {
-        let mut copy = *self;
-        copy.double_in_place();
-        copy
-    }
-    /// Doubles `self` in place.
-    fn double_in_place(&mut self) -> &mut Self {
-        self.add_assign(*self);
-        self
-    }
-
-    /// Negates `self` in place.
-    fn neg_in_place(&mut self) -> &mut Self {
-        *self = -(*self);
-        self
-    }
-}
-
 /// The interface for a generic field.
 /// Types implementing [`Field`] support common field operations such as addition, subtraction, multiplication, and inverses.
 ///
diff --git a/ff/src/fields/models/cubic_extension.rs b/ff/src/fields/models/cubic_extension.rs
index 8369706be..55ad44483 100644
--- a/ff/src/fields/models/cubic_extension.rs
+++ b/ff/src/fields/models/cubic_extension.rs
@@ -1,3 +1,4 @@
+use ark_group::AdditiveGroup;
 use ark_serialize::{
     CanonicalDeserialize, CanonicalDeserializeWithFlags, CanonicalSerialize,
     CanonicalSerializeWithFlags, Compress, EmptyFlags, Flags, SerializationError, Valid, Validate,
@@ -21,7 +22,7 @@ use ark_std::rand::{
 
 use crate::{
     fields::{Field, PrimeField},
-    AdditiveGroup, LegendreSymbol, SqrtPrecomputation, ToConstraintField, UniformRand,
+    LegendreSymbol, SqrtPrecomputation, ToConstraintField, UniformRand,
 };
 
 /// Defines a Cubic extension field from a cubic non-residue.
diff --git a/ff/src/fields/models/fp/mod.rs b/ff/src/fields/models/fp/mod.rs
index 1113c1222..0a603937b 100644
--- a/ff/src/fields/models/fp/mod.rs
+++ b/ff/src/fields/models/fp/mod.rs
@@ -1,5 +1,6 @@
 use core::iter;
 
+use ark_group::AdditiveGroup;
 use ark_serialize::{
     buffer_byte_size, CanonicalDeserialize, CanonicalDeserializeWithFlags, CanonicalSerialize,
     CanonicalSerializeWithFlags, Compress, EmptyFlags, Flags, SerializationError, Valid, Validate,
@@ -19,7 +20,7 @@ mod montgomery_backend;
 pub use montgomery_backend::*;
 
 use crate::{
-    AdditiveGroup, BigInt, BigInteger, FftField, Field, LegendreSymbol, PrimeField,
+    BigInt, BigInteger, FftField, Field, LegendreSymbol, PrimeField,
     SqrtPrecomputation,
 };
 /// A trait that specifies the configuration of a prime field.
diff --git a/ff/src/fields/models/fp12_2over3over2.rs b/ff/src/fields/models/fp12_2over3over2.rs
index 16f0e2ba0..60cbe1f07 100644
--- a/ff/src/fields/models/fp12_2over3over2.rs
+++ b/ff/src/fields/models/fp12_2over3over2.rs
@@ -1,9 +1,10 @@
+use ark_group::AdditiveGroup;
 use ark_std::Zero;
 
 use super::quadratic_extension::*;
 use crate::{
     fields::{fp6_3over2::*, Field, Fp2, Fp2Config as Fp2ConfigTrait},
-    AdditiveGroup, CyclotomicMultSubgroup,
+    CyclotomicMultSubgroup,
 };
 use core::{
     marker::PhantomData,
diff --git a/ff/src/fields/models/quadratic_extension.rs b/ff/src/fields/models/quadratic_extension.rs
index 5d7f6de96..98f569bd7 100644
--- a/ff/src/fields/models/quadratic_extension.rs
+++ b/ff/src/fields/models/quadratic_extension.rs
@@ -1,3 +1,4 @@
+use ark_group::AdditiveGroup;
 use ark_serialize::{
     CanonicalDeserialize, CanonicalDeserializeWithFlags, CanonicalSerialize,
     CanonicalSerializeWithFlags, Compress, EmptyFlags, Flags, SerializationError, Valid, Validate,
@@ -22,7 +23,7 @@ use ark_std::rand::{
 use crate::{
     biginteger::BigInteger,
     fields::{Field, LegendreSymbol, PrimeField},
-    AdditiveGroup, SqrtPrecomputation, ToConstraintField, UniformRand,
+    SqrtPrecomputation, ToConstraintField, UniformRand,
 };
 
 /// Defines a Quadratic extension field from a quadratic non-residue.
diff --git a/group/Cargo.toml b/group/Cargo.toml
new file mode 100644
index 000000000..c550aab69
--- /dev/null
+++ b/group/Cargo.toml
@@ -0,0 +1,37 @@
+[package]
+name = "ark-group"
+version = "0.4.2"
+authors = [ "arkworks contributors" ]
+description = "A library for finite groups"
+homepage = "https://arkworks.rs"
+repository = "https://github.com/arkworks-rs/algebra"
+documentation = "https://docs.rs/ark-group/"
+keywords = ["cryptography", "groups" ]
+categories = ["cryptography"]
+include = ["Cargo.toml", "build.rs", "src", "doc", "README.md", "LICENSE-APACHE", "LICENSE-MIT"]
+license = "MIT/Apache-2.0"
+edition = "2021"
+rust-version = "1.63"
+
+[dependencies]
+ark-std = { version = "0.4.0", default-features = false }
+ark-serialize = { version = "0.4.2", path = "../serialize", default-features = false }
+derivative = { version = "2", features = ["use_core"] }
+num-traits = { version = "0.2", default-features = false }
+rayon = { version = "1", optional = true }
+zeroize = { version = "1", default-features = false, features = ["zeroize_derive"] }
+num-bigint = { version = "0.4", default-features = false }
+itertools = { version = "0.10", default-features = false }
+
+[dev-dependencies]
+ark-test-curves = { version = "0.4.2", path = "../test-curves", default-features = false, features = [ "bls12_381_curve", "mnt6_753", "secp256k1"] }
+sha2 = { version = "0.10", default-features = false }
+hex = "0.4"
+
+[features]
+default = []
+std = [ "ark-std/std", "ark-serialize/std", "itertools/use_std" ]
+parallel = [ "std", "rayon", "ark-std/parallel" ]
+
+[package.metadata.docs.rs]
+rustdoc-args = ["--html-in-header", "./doc/katex-header.html"]
\ No newline at end of file
diff --git a/group/src/lib.rs b/group/src/lib.rs
new file mode 100644
index 000000000..a8be36bf5
--- /dev/null
+++ b/group/src/lib.rs
@@ -0,0 +1,70 @@
+use ark_std::{ops::{Add, AddAssign, MulAssign, SubAssign, Sub, Mul, Neg}, fmt::{Display, Debug}, hash::Hash};
+use ark_std::UniformRand;
+
+use ark_serialize::{CanonicalSerialize, CanonicalDeserialize};
+use num_traits::Zero;
+use zeroize::Zeroize;
+
+pub trait AdditiveGroup:
+    Eq
+    + 'static
+    + Sized
+    + CanonicalSerialize
+    + CanonicalDeserialize
+    + Copy
+    + Clone
+    + Default
+    + Send
+    + Sync
+    + Hash
+    + Debug
+    + Display
+    + UniformRand
+    + Zeroize
+    + Zero
+    + Neg<Output = Self>
+    + Add<Self, Output = Self>
+    + Sub<Self, Output = Self>
+    + Mul<<Self as AdditiveGroup>::Scalar, Output = Self>
+    + AddAssign<Self>
+    + SubAssign<Self>
+    + MulAssign<<Self as AdditiveGroup>::Scalar>
+    + for<'a> Add<&'a Self, Output = Self>
+    + for<'a> Sub<&'a Self, Output = Self>
+    + for<'a> Mul<&'a <Self as AdditiveGroup>::Scalar, Output = Self>
+    + for<'a> AddAssign<&'a Self>
+    + for<'a> SubAssign<&'a Self>
+    + for<'a> MulAssign<&'a <Self as AdditiveGroup>::Scalar>
+    + for<'a> Add<&'a mut Self, Output = Self>
+    + for<'a> Sub<&'a mut Self, Output = Self>
+    + for<'a> Mul<&'a mut <Self as AdditiveGroup>::Scalar, Output = Self>
+    + for<'a> AddAssign<&'a mut Self>
+    + for<'a> SubAssign<&'a mut Self>
+    + for<'a> MulAssign<&'a mut <Self as AdditiveGroup>::Scalar>
+    + ark_std::iter::Sum<Self>
+    + for<'a> ark_std::iter::Sum<&'a Self>
+{
+    type Scalar;
+
+    /// The additive identity of the field.
+    const ZERO: Self;
+
+    /// Doubles `self`.
+    #[must_use]
+    fn double(&self) -> Self {
+        let mut copy = *self;
+        copy.double_in_place();
+        copy
+    }
+    /// Doubles `self` in place.
+    fn double_in_place(&mut self) -> &mut Self {
+        self.add_assign(*self);
+        self
+    }
+
+    /// Negates `self` in place.
+    fn neg_in_place(&mut self) -> &mut Self {
+        *self = -(*self);
+        self
+    }
+}

From 47cd419d95ceb3975a41a2ae8343b4a80ce2eb71 Mon Sep 17 00:00:00 2001
From: Pratyush Mishra <pratyushmishra@berkeley.edu>
Date: Mon, 24 Jul 2023 18:23:53 -0400
Subject: [PATCH 2/3] WIP

---
 .github/workflows/ci.yml                      |   2 +-
 Cargo.toml                                    |   4 +-
 {group => algebra-core}/Cargo.toml            |   9 +-
 .../src/biginteger/arithmetic.rs              |   2 +
 {ff => algebra-core}/src/biginteger/mod.rs    |  49 +-
 {ff => algebra-core}/src/biginteger/tests.rs  |   0
 {ff => algebra-core}/src/bits.rs              |   0
 algebra-core/src/const_helpers.rs             | 322 ++++++++++++
 algebra-core/src/field/arithmetic.rs          | 329 +++++++++++++
 algebra-core/src/field/cyclotomic.rs          | 121 +++++
 algebra-core/src/field/fft_friendly.rs        |  98 ++++
 algebra-core/src/field/mod.rs                 | 459 ++++++++++++++++++
 algebra-core/src/field/prime.rs               |  97 ++++
 algebra-core/src/field/sqrt.rs                | 149 ++++++
 algebra-core/src/group/mod.rs                 | 206 ++++++++
 algebra-core/src/lib.rs                       |  15 +
 algebra-core/src/module/mod.rs                | 152 ++++++
 algebra-core/src/module/scalar.rs             | 137 ++++++
 algebra-core/src/ring.rs                      |  22 +
 algebra-core/src/scalar_mul/fixed_base.rs     | 189 ++++++++
 algebra-core/src/scalar_mul/mod.rs            |  13 +
 .../src/scalar_mul/variable_base/mod.rs       | 296 +++++++++++
 .../variable_base/stream_pippenger.rs         | 125 +++++
 algebra-core/src/scalar_mul/wnaf.rs           |  87 ++++
 {ff-macros => common-macros}/Cargo.toml       |   2 +-
 {ff-macros => common-macros}/LICENSE-APACHE   |   0
 {ff-macros => common-macros}/LICENSE-MIT      |   0
 {ff-macros => common-macros}/src/lib.rs       |   0
 .../src/montgomery/add.rs                     |   0
 .../src/montgomery/biginteger.rs              |   0
 .../src/montgomery/double.rs                  |   0
 .../src/montgomery/mod.rs                     |   0
 .../src/montgomery/mul.rs                     |   0
 .../src/montgomery/square.rs                  |   0
 .../src/montgomery/sum_of_products.rs         |   0
 {ff-macros => common-macros}/src/unroll.rs    |   0
 {ff-macros => common-macros}/src/utils.rs     |   0
 ff/Cargo.toml                                 |   4 +-
 ff/src/lib.rs                                 |   5 +-
 group/src/lib.rs                              |  70 ---
 40 files changed, 2875 insertions(+), 89 deletions(-)
 rename {group => algebra-core}/Cargo.toml (81%)
 rename {ff => algebra-core}/src/biginteger/arithmetic.rs (99%)
 rename {ff => algebra-core}/src/biginteger/mod.rs (95%)
 rename {ff => algebra-core}/src/biginteger/tests.rs (100%)
 rename {ff => algebra-core}/src/bits.rs (100%)
 create mode 100644 algebra-core/src/const_helpers.rs
 create mode 100644 algebra-core/src/field/arithmetic.rs
 create mode 100644 algebra-core/src/field/cyclotomic.rs
 create mode 100644 algebra-core/src/field/fft_friendly.rs
 create mode 100644 algebra-core/src/field/mod.rs
 create mode 100644 algebra-core/src/field/prime.rs
 create mode 100644 algebra-core/src/field/sqrt.rs
 create mode 100644 algebra-core/src/group/mod.rs
 create mode 100644 algebra-core/src/lib.rs
 create mode 100644 algebra-core/src/module/mod.rs
 create mode 100644 algebra-core/src/module/scalar.rs
 create mode 100644 algebra-core/src/ring.rs
 create mode 100644 algebra-core/src/scalar_mul/fixed_base.rs
 create mode 100644 algebra-core/src/scalar_mul/mod.rs
 create mode 100644 algebra-core/src/scalar_mul/variable_base/mod.rs
 create mode 100644 algebra-core/src/scalar_mul/variable_base/stream_pippenger.rs
 create mode 100644 algebra-core/src/scalar_mul/wnaf.rs
 rename {ff-macros => common-macros}/Cargo.toml (96%)
 rename {ff-macros => common-macros}/LICENSE-APACHE (100%)
 rename {ff-macros => common-macros}/LICENSE-MIT (100%)
 rename {ff-macros => common-macros}/src/lib.rs (100%)
 rename {ff-macros => common-macros}/src/montgomery/add.rs (100%)
 rename {ff-macros => common-macros}/src/montgomery/biginteger.rs (100%)
 rename {ff-macros => common-macros}/src/montgomery/double.rs (100%)
 rename {ff-macros => common-macros}/src/montgomery/mod.rs (100%)
 rename {ff-macros => common-macros}/src/montgomery/mul.rs (100%)
 rename {ff-macros => common-macros}/src/montgomery/square.rs (100%)
 rename {ff-macros => common-macros}/src/montgomery/sum_of_products.rs (100%)
 rename {ff-macros => common-macros}/src/unroll.rs (100%)
 rename {ff-macros => common-macros}/src/utils.rs (100%)
 delete mode 100644 group/src/lib.rs

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 66ac97290..66731b59b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -248,7 +248,7 @@ jobs:
             echo      "[patch.crates-io]";
             echo      "ark-ff = { path = 'algebra/ff' }";
             echo      "ark-serialize = { path = 'algebra/serialize' }";
-            echo      "ark-ff-macros = { path = 'algebra/ff-macros' }";
+            echo      "ark-algebra-macros = { path = 'algebra/common-macros' }";
             echo      "ark-ff-asm = { path = 'algebra/ff-asm' }";
             echo      "ark-ec = { path = 'algebra/ec' }";
             echo      "ark-algebra-bench-templates = { path = 'algebra/bench-templates' }"
diff --git a/Cargo.toml b/Cargo.toml
index 5c7dc4aee..69225629a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,9 +4,9 @@ members = [
     "serialize",
     "serialize-derive",
 
-    "group",
+    "algebra-core",
 
-    "ff-macros",
+    "common-macros",
     "ff-asm",
     "ff",
 
diff --git a/group/Cargo.toml b/algebra-core/Cargo.toml
similarity index 81%
rename from group/Cargo.toml
rename to algebra-core/Cargo.toml
index c550aab69..210aa9bd0 100644
--- a/group/Cargo.toml
+++ b/algebra-core/Cargo.toml
@@ -1,11 +1,11 @@
 [package]
-name = "ark-group"
+name = "ark-algebra-core"
 version = "0.4.2"
 authors = [ "arkworks contributors" ]
-description = "A library for finite groups"
+description = "A library for core algebraic structures used in arkworks"
 homepage = "https://arkworks.rs"
 repository = "https://github.com/arkworks-rs/algebra"
-documentation = "https://docs.rs/ark-group/"
+documentation = "https://docs.rs/ark-algebra-core/"
 keywords = ["cryptography", "groups" ]
 categories = ["cryptography"]
 include = ["Cargo.toml", "build.rs", "src", "doc", "README.md", "LICENSE-APACHE", "LICENSE-MIT"]
@@ -16,12 +16,13 @@ rust-version = "1.63"
 [dependencies]
 ark-std = { version = "0.4.0", default-features = false }
 ark-serialize = { version = "0.4.2", path = "../serialize", default-features = false }
+ark-algebra-macros = { version = "0.4.2", path = "../common-macros", default-features = false }
 derivative = { version = "2", features = ["use_core"] }
 num-traits = { version = "0.2", default-features = false }
 rayon = { version = "1", optional = true }
 zeroize = { version = "1", default-features = false, features = ["zeroize_derive"] }
 num-bigint = { version = "0.4", default-features = false }
-itertools = { version = "0.10", default-features = false }
+itertools = { version = "0.11", default-features = false }
 
 [dev-dependencies]
 ark-test-curves = { version = "0.4.2", path = "../test-curves", default-features = false, features = [ "bls12_381_curve", "mnt6_753", "secp256k1"] }
diff --git a/ff/src/biginteger/arithmetic.rs b/algebra-core/src/biginteger/arithmetic.rs
similarity index 99%
rename from ff/src/biginteger/arithmetic.rs
rename to algebra-core/src/biginteger/arithmetic.rs
index 9b4ca702f..6be87affc 100644
--- a/ff/src/biginteger/arithmetic.rs
+++ b/algebra-core/src/biginteger/arithmetic.rs
@@ -101,6 +101,7 @@ pub fn mac_discard(a: u64, b: u64, c: u64, carry: &mut u64) {
     *carry = (tmp >> 64) as u64;
 }
 
+#[macro_export]
 macro_rules! mac_with_carry {
     ($a:expr, $b:expr, $c:expr, &mut $carry:expr$(,)?) => {{
         let tmp = ($a as u128) + ($b as u128 * $c as u128) + ($carry as u128);
@@ -109,6 +110,7 @@ macro_rules! mac_with_carry {
     }};
 }
 
+#[macro_export]
 macro_rules! mac {
     ($a:expr, $b:expr, $c:expr, &mut $carry:expr$(,)?) => {{
         let tmp = ($a as u128) + ($b as u128 * $c as u128);
diff --git a/ff/src/biginteger/mod.rs b/algebra-core/src/biginteger/mod.rs
similarity index 95%
rename from ff/src/biginteger/mod.rs
rename to algebra-core/src/biginteger/mod.rs
index 9ece8ee06..37e62b463 100644
--- a/ff/src/biginteger/mod.rs
+++ b/algebra-core/src/biginteger/mod.rs
@@ -1,9 +1,10 @@
 use crate::{
     bits::{BitIteratorBE, BitIteratorLE},
-    const_for, UniformRand,
+    const_for,
+    module::{Scalar, Sign},
 };
 #[allow(unused)]
-use ark_ff_macros::unroll_for_loops;
+use ark_algebra_macros::unroll_for_loops;
 use ark_serialize::{
     CanonicalDeserialize, CanonicalSerialize, Compress, SerializationError, Valid, Validate,
 };
@@ -16,6 +17,7 @@ use ark_std::{
         Rng,
     },
     vec::Vec,
+    UniformRand,
 };
 use num_bigint::BigUint;
 use zeroize::Zeroize;
@@ -26,6 +28,34 @@ pub mod arithmetic;
 #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash, Zeroize)]
 pub struct BigInt<const N: usize>(pub [u64; N]);
 
+impl<const N: usize> Scalar for BigInt<N> {
+    const MAX_BIT_SIZE: Option<u32> = Some(N as u32 * 64);
+    type U64Ref = [u64; N];
+    type U8Ref = ScalarU8Buffer<N>;
+
+    fn as_bytes(&self) -> (Sign, ScalarU8Buffer<N>) {
+        let mut buf = ScalarU8Buffer::<N>([[0u8; 8]; N]);
+        for (i, limb) in self.0.iter().enumerate() {
+            buf.0[i] = limb.to_le_bytes();
+        }
+        (Sign::Positive, buf)
+    }
+
+    fn as_u64s(&self) -> (Sign, Self::U64Ref) {
+        (Sign::Positive, self.0)
+    }
+}
+
+#[doc(hidden)]
+#[repr(C, align(1))]
+pub struct ScalarU8Buffer<const N: usize>(pub [[u8; 8]; N]);
+
+impl<const N: usize> AsRef<[u8]> for ScalarU8Buffer<N> {
+    fn as_ref(&self) -> &[u8] {
+        unsafe { ark_std::slice::from_raw_parts(self as *const Self as *const u8, N * 8) }
+    }
+}
+
 impl<const N: usize> Default for BigInt<N> {
     fn default() -> Self {
         Self([0u64; N])
@@ -229,7 +259,8 @@ impl<const N: usize> BigInt<N> {
     }
 
     #[inline]
-    pub(crate) const fn const_sub_with_borrow(mut self, other: &Self) -> (Self, bool) {
+    #[doc(hidden)]
+    pub const fn const_sub_with_borrow(mut self, other: &Self) -> (Self, bool) {
         let mut borrow = 0;
 
         const_for!((i in 0..N) {
@@ -240,7 +271,8 @@ impl<const N: usize> BigInt<N> {
     }
 
     #[inline]
-    pub(crate) const fn const_add_with_carry(mut self, other: &Self) -> (Self, bool) {
+    #[doc(hidden)]
+    pub const fn const_add_with_carry(mut self, other: &Self) -> (Self, bool) {
         let mut carry = 0;
 
         crate::const_for!((i in 0..N) {
@@ -250,7 +282,9 @@ impl<const N: usize> BigInt<N> {
         (self, carry != 0)
     }
 
-    const fn const_mul2_with_carry(mut self) -> (Self, bool) {
+    #[inline]
+    #[doc(hidden)]
+    pub const fn const_mul2_with_carry(mut self) -> (Self, bool) {
         let mut last = 0;
         crate::const_for!((i in 0..N) {
             let a = self.0[i];
@@ -262,7 +296,9 @@ impl<const N: usize> BigInt<N> {
         (self, last != 0)
     }
 
-    pub(crate) const fn const_is_zero(&self) -> bool {
+    #[inline]
+    #[doc(hidden)]
+    pub const fn const_is_zero(&self) -> bool {
         let mut is_zero = true;
         crate::const_for!((i in 0..N) {
             is_zero &= self.0[i] == 0;
@@ -737,6 +773,7 @@ pub trait BigInteger:
     + From<u8>
     + TryFrom<BigUint, Error = ()>
     + Into<BigUint>
+    + Scalar
 {
     /// Number of 64-bit limbs representing `Self`.
     const NUM_LIMBS: usize;
diff --git a/ff/src/biginteger/tests.rs b/algebra-core/src/biginteger/tests.rs
similarity index 100%
rename from ff/src/biginteger/tests.rs
rename to algebra-core/src/biginteger/tests.rs
diff --git a/ff/src/bits.rs b/algebra-core/src/bits.rs
similarity index 100%
rename from ff/src/bits.rs
rename to algebra-core/src/bits.rs
diff --git a/algebra-core/src/const_helpers.rs b/algebra-core/src/const_helpers.rs
new file mode 100644
index 000000000..b5a4b283e
--- /dev/null
+++ b/algebra-core/src/const_helpers.rs
@@ -0,0 +1,322 @@
+use ark_serialize::{Read, Write};
+use ark_std::ops::{Index, IndexMut};
+
+use crate::biginteger::BigInt;
+
+/// A helper macro for emulating `for` loops in a `const` context.
+/// # Usage
+/// ```rust
+/// # use ark_ff::const_for;
+/// const fn for_in_const() {
+///     let mut array = [0usize; 4];
+///     const_for!((i in 0..(array.len())) { // We need to wrap the `array.len()` in parenthesis.
+///         array[i] = i;
+///     });
+///     assert!(array[0] == 0);
+///     assert!(array[1] == 1);
+///     assert!(array[2] == 2);
+///     assert!(array[3] == 3);
+/// }
+/// ```
+#[macro_export]
+macro_rules! const_for {
+    (($i:ident in $start:tt..$end:tt)  $code:expr ) => {{
+        let mut $i = $start;
+        while $i < $end {
+            $code
+            $i += 1;
+        }
+    }};
+}
+
+/// A buffer to hold values of size 2 * N. This is mostly
+/// a hack that's necessary until `generic_const_exprs` is stable.
+#[derive(Copy, Clone)]
+#[repr(C, align(8))]
+#[doc(hidden)]
+pub struct MulBuffer<const N: usize> {
+    pub(super) b0: [u64; N],
+    pub(super) b1: [u64; N],
+}
+
+impl<const N: usize> MulBuffer<N> {
+    #[doc(hidden)]
+    #[inline]
+    pub const fn new(b0: [u64; N], b1: [u64; N]) -> Self {
+        Self { b0, b1 }
+    }
+
+    #[doc(hidden)]
+    #[inline]
+    pub const fn zeroed() -> Self {
+        let b = [0u64; N];
+        Self::new(b, b)
+    }
+
+    #[doc(hidden)]
+    #[inline(always)]
+    pub const fn get(&self, index: usize) -> &u64 {
+        if index < N {
+            &self.b0[index]
+        } else {
+            &self.b1[index - N]
+        }
+    }
+
+    #[inline(always)]
+    pub(super) fn get_mut(&mut self, index: usize) -> &mut u64 {
+        if index < N {
+            &mut self.b0[index]
+        } else {
+            &mut self.b1[index - N]
+        }
+    }
+}
+
+impl<const N: usize> Index<usize> for MulBuffer<N> {
+    type Output = u64;
+    #[inline(always)]
+    fn index(&self, index: usize) -> &Self::Output {
+        self.get(index)
+    }
+}
+
+impl<const N: usize> IndexMut<usize> for MulBuffer<N> {
+    #[inline(always)]
+    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
+        self.get_mut(index)
+    }
+}
+
+/// A buffer to hold values of size 8 * N + 1 bytes. This is mostly
+/// a hack that's necessary until `generic_const_exprs` is stable.
+#[derive(Copy, Clone)]
+#[repr(C, align(1))]
+#[doc(hidden)]
+pub struct SerBuffer<const N: usize> {
+    pub(super) buffers: [[u8; 8]; N],
+    pub(super) last: u8,
+}
+
+impl<const N: usize> SerBuffer<N> {
+    #[inline]
+    #[doc(hidden)]
+    pub const fn zeroed() -> Self {
+        Self {
+            buffers: [[0u8; 8]; N],
+            last: 0u8,
+        }
+    }
+
+    #[inline(always)]
+    #[doc(hidden)]
+    pub const fn get(&self, index: usize) -> &u8 {
+        if index == 8 * N {
+            &self.last
+        } else {
+            let part = index / 8;
+            let in_buffer_index = index % 8;
+            &self.buffers[part][in_buffer_index]
+        }
+    }
+
+    #[inline(always)]
+    #[doc(hidden)]
+    pub fn get_mut(&mut self, index: usize) -> &mut u8 {
+        if index == 8 * N {
+            &mut self.last
+        } else {
+            let part = index / 8;
+            let in_buffer_index = index % 8;
+            &mut self.buffers[part][in_buffer_index]
+        }
+    }
+
+    #[allow(unsafe_code)]
+    #[doc(hidden)]
+    pub fn as_slice(&self) -> &[u8] {
+        unsafe { ark_std::slice::from_raw_parts((self as *const Self) as *const u8, 8 * N + 1) }
+    }
+
+    #[inline(always)]
+    #[doc(hidden)]
+    pub fn last_n_plus_1_bytes_mut(&mut self) -> impl Iterator<Item = &mut u8> {
+        self.buffers[N - 1]
+            .iter_mut()
+            .chain(ark_std::iter::once(&mut self.last))
+    }
+
+    #[inline(always)]
+    #[doc(hidden)]
+    pub fn copy_from_u8_slice(&mut self, other: &[u8]) {
+        other.chunks(8).enumerate().for_each(|(i, chunk)| {
+            if i < N {
+                self.buffers[i][..chunk.len()].copy_from_slice(chunk);
+            } else {
+                self.last = chunk[0]
+            }
+        });
+    }
+
+    #[inline(always)]
+    #[doc(hidden)]
+    pub fn copy_from_u64_slice(&mut self, other: &[u64; N]) {
+        other
+            .iter()
+            .zip(&mut self.buffers)
+            .for_each(|(other, this)| *this = other.to_le_bytes());
+    }
+
+    #[inline(always)]
+    #[doc(hidden)]
+    pub fn to_bigint(self) -> BigInt<N> {
+        let mut self_integer = BigInt::from(0u64);
+        self_integer
+            .0
+            .iter_mut()
+            .zip(self.buffers)
+            .for_each(|(other, this)| *other = u64::from_le_bytes(this));
+        self_integer
+    }
+
+    #[inline(always)]
+    #[doc(hidden)]
+    /// Write up to `num_bytes` bytes from `self` to `other`.
+    /// `num_bytes` is allowed to range from `8 * (N - 1) + 1` to `8 * N + 1`.
+    pub fn write_up_to(&self, mut other: impl Write, num_bytes: usize) -> ark_std::io::Result<()> {
+        debug_assert!(num_bytes <= 8 * N + 1, "index too large");
+        debug_assert!(num_bytes > 8 * (N - 1), "index too small");
+        // unconditionally write first `N - 1` limbs.
+        for i in 0..(N - 1) {
+            other.write_all(&self.buffers[i])?;
+        }
+        // for the `N`-th limb, depending on `index`, we can write anywhere from
+        // 1 to all bytes.
+        let remaining_bytes = num_bytes - (8 * (N - 1));
+        let write_last_byte = remaining_bytes > 8;
+        let num_last_limb_bytes = ark_std::cmp::min(8, remaining_bytes);
+        other.write_all(&self.buffers[N - 1][..num_last_limb_bytes])?;
+        if write_last_byte {
+            other.write_all(&[self.last])?;
+        }
+        Ok(())
+    }
+
+    #[inline(always)]
+    #[doc(hidden)]
+    /// Read up to `num_bytes` bytes from `other` to `self`.
+    /// `num_bytes` is allowed to range from `8 * (N - 1)` to `8 * N + 1`.
+    pub fn read_exact_up_to(
+        &mut self,
+        mut other: impl Read,
+        num_bytes: usize,
+    ) -> ark_std::io::Result<()> {
+        debug_assert!(num_bytes <= 8 * N + 1, "index too large");
+        debug_assert!(num_bytes > 8 * (N - 1), "index too small");
+        // unconditionally write first `N - 1` limbs.
+        for i in 0..(N - 1) {
+            other.read_exact(&mut self.buffers[i])?;
+        }
+        // for the `N`-th limb, depending on `index`, we can write anywhere from
+        // 1 to all bytes.
+        let remaining_bytes = num_bytes - (8 * (N - 1));
+        let write_last_byte = remaining_bytes > 8;
+        let num_last_limb_bytes = ark_std::cmp::min(8, remaining_bytes);
+        other.read_exact(&mut self.buffers[N - 1][..num_last_limb_bytes])?;
+        if write_last_byte {
+            let mut last = [0u8; 1];
+            other.read_exact(&mut last)?;
+            self.last = last[0];
+        }
+        Ok(())
+    }
+}
+
+impl<const N: usize> Index<usize> for SerBuffer<N> {
+    type Output = u8;
+    #[inline(always)]
+    fn index(&self, index: usize) -> &Self::Output {
+        self.get(index)
+    }
+}
+
+impl<const N: usize> IndexMut<usize> for SerBuffer<N> {
+    #[inline(always)]
+    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
+        self.get_mut(index)
+    }
+}
+
+pub(super) struct RBuffer<const N: usize>(pub [u64; N], pub u64);
+
+impl<const N: usize> RBuffer<N> {
+    /// Find the number of bits in the binary decomposition of `self`.
+    pub(super) const fn num_bits(&self) -> u32 {
+        (N * 64) as u32 + (64 - self.1.leading_zeros())
+    }
+
+    /// Returns the `i`-th bit where bit 0 is the least significant one.
+    /// In other words, the bit with weight `2^i`.
+    pub(super) const fn get_bit(&self, i: usize) -> bool {
+        let d = i / 64;
+        let b = i % 64;
+        if d == N {
+            (self.1 >> b) & 1 == 1
+        } else {
+            (self.0[d] >> b) & 1 == 1
+        }
+    }
+}
+
+pub(super) struct R2Buffer<const N: usize>(pub [u64; N], pub [u64; N], pub u64);
+
+impl<const N: usize> R2Buffer<N> {
+    /// Find the number of bits in the binary decomposition of `self`.
+    pub(super) const fn num_bits(&self) -> u32 {
+        ((2 * N) * 64) as u32 + (64 - self.2.leading_zeros())
+    }
+
+    /// Returns the `i`-th bit where bit 0 is the least significant one.
+    /// In other words, the bit with weight `2^i`.
+    pub(super) const fn get_bit(&self, i: usize) -> bool {
+        let d = i / 64;
+        let b = i % 64;
+        if d == 2 * N {
+            (self.2 >> b) & 1 == 1
+        } else if d >= N {
+            (self.1[d - N] >> b) & 1 == 1
+        } else {
+            (self.0[d] >> b) & 1 == 1
+        }
+    }
+}
+
+mod tests {
+    #[test]
+    fn test_mul_buffer_correctness() {
+        use super::*;
+        type Buf = MulBuffer<10>;
+        let temp = Buf::new([10u64; 10], [20u64; 10]);
+
+        for i in 0..20 {
+            if i < 10 {
+                assert_eq!(temp[i], 10);
+            } else {
+                assert_eq!(temp[i], 20);
+            }
+        }
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_mul_buffer_soundness() {
+        use super::*;
+        type Buf = MulBuffer<10>;
+        let temp = Buf::new([10u64; 10], [10u64; 10]);
+
+        for i in 20..21 {
+            // indexing `temp[20]` should panic
+            assert_eq!(temp[i], 10);
+        }
+    }
+}
diff --git a/algebra-core/src/field/arithmetic.rs b/algebra-core/src/field/arithmetic.rs
new file mode 100644
index 000000000..431cab879
--- /dev/null
+++ b/algebra-core/src/field/arithmetic.rs
@@ -0,0 +1,329 @@
+// Implements AddAssign on Self by deferring to an implementation on &Self
+#[macro_export]
+macro_rules! impl_additive_ops_from_ref {
+    ($type: ident, $params: ident) => {
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::ops::Add<Self> for $type<P> {
+            type Output = Self;
+
+            #[inline]
+            fn add(self, other: Self) -> Self {
+                let mut result = self;
+                result.add_assign(&other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::ops::Add<&'a mut Self> for $type<P> {
+            type Output = Self;
+
+            #[inline]
+            fn add(self, other: &'a mut Self) -> Self {
+                let mut result = self;
+                result.add_assign(&*other);
+                result
+            }
+        }
+
+        impl<'b, P: $params> core::ops::Add<$type<P>> for &'b $type<P> {
+            type Output = $type<P>;
+
+            #[inline]
+            fn add(self, mut other: $type<P>) -> $type<P> {
+                other.add_assign(self);
+                other
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, 'b, P: $params> core::ops::Add<&'a $type<P>> for &'b $type<P> {
+            type Output = $type<P>;
+
+            #[inline]
+            fn add(self, other: &'a $type<P>) -> $type<P> {
+                let mut result = *self;
+                result.add_assign(&*other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, 'b, P: $params> core::ops::Add<&'a mut $type<P>> for &'b $type<P> {
+            type Output = $type<P>;
+
+            #[inline]
+            fn add(self, other: &'a mut $type<P>) -> $type<P> {
+                let mut result = *self;
+                result.add_assign(&*other);
+                result
+            }
+        }
+
+        impl<'b, P: $params> core::ops::Sub<$type<P>> for &'b $type<P> {
+            type Output = $type<P>;
+
+            #[inline]
+            fn sub(self, other: $type<P>) -> $type<P> {
+                let mut result = *self;
+                result.sub_assign(&other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, 'b, P: $params> core::ops::Sub<&'a $type<P>> for &'b $type<P> {
+            type Output = $type<P>;
+
+            #[inline]
+            fn sub(self, other: &'a $type<P>) -> $type<P> {
+                let mut result = *self;
+                result.sub_assign(&*other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, 'b, P: $params> core::ops::Sub<&'a mut $type<P>> for &'b $type<P> {
+            type Output = $type<P>;
+
+            #[inline]
+            fn sub(self, other: &'a mut $type<P>) -> $type<P> {
+                let mut result = *self;
+                result.sub_assign(&*other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::ops::Sub<Self> for $type<P> {
+            type Output = Self;
+
+            #[inline]
+            fn sub(self, other: Self) -> Self {
+                let mut result = self;
+                result.sub_assign(&other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::ops::Sub<&'a mut Self> for $type<P> {
+            type Output = Self;
+
+            #[inline]
+            fn sub(self, other: &'a mut Self) -> Self {
+                let mut result = self;
+                result.sub_assign(&*other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::iter::Sum<Self> for $type<P> {
+            fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
+                iter.fold(Self::zero(), core::ops::Add::add)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::iter::Sum<&'a Self> for $type<P> {
+            fn sum<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
+                iter.fold(Self::zero(), core::ops::Add::add)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::ops::AddAssign<Self> for $type<P> {
+            fn add_assign(&mut self, other: Self) {
+                self.add_assign(&other)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::ops::SubAssign<Self> for $type<P> {
+            fn sub_assign(&mut self, other: Self) {
+                self.sub_assign(&other)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::ops::AddAssign<&'a mut Self> for $type<P> {
+            fn add_assign(&mut self, other: &'a mut Self) {
+                self.add_assign(&*other)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::ops::SubAssign<&'a mut Self> for $type<P> {
+            fn sub_assign(&mut self, other: &'a mut Self) {
+                self.sub_assign(&*other)
+            }
+        }
+    };
+}
+
+// Implements `MulAssign` and `DivAssign` by deferring to an implementation on &Self
+#[macro_export]
+macro_rules! impl_multiplicative_ops_from_ref {
+    ($type: ident, $params: ident) => {
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::ops::Mul<Self> for $type<P> {
+            type Output = Self;
+
+            #[inline]
+            fn mul(self, other: Self) -> Self {
+                let mut result = self;
+                result.mul_assign(&other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::ops::Div<Self> for $type<P> {
+            type Output = Self;
+
+            #[inline]
+            fn div(self, other: Self) -> Self {
+                let mut result = self;
+                result.div_assign(&other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::ops::Mul<&'a mut Self> for $type<P> {
+            type Output = Self;
+
+            #[inline]
+            fn mul(self, other: &'a mut Self) -> Self {
+                let mut result = self;
+                result.mul_assign(&*other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::ops::Div<&'a mut Self> for $type<P> {
+            type Output = Self;
+
+            #[inline]
+            fn div(self, other: &'a mut Self) -> Self {
+                let mut result = self;
+                result.div_assign(&*other);
+                result
+            }
+        }
+
+        impl<'b, P: $params> core::ops::Mul<$type<P>> for &'b $type<P> {
+            type Output = $type<P>;
+
+            #[inline]
+            fn mul(self, mut other: $type<P>) -> $type<P> {
+                other.mul_assign(self);
+                other
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, 'b, P: $params> core::ops::Mul<&'a $type<P>> for &'b $type<P> {
+            type Output = $type<P>;
+
+            #[inline]
+            fn mul(self, other: &'a $type<P>) -> $type<P> {
+                let mut result = *self;
+                result.mul_assign(&*other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, 'b, P: $params> core::ops::Mul<&'a mut $type<P>> for &'b $type<P> {
+            type Output = $type<P>;
+
+            #[inline]
+            fn mul(self, other: &'a mut $type<P>) -> $type<P> {
+                let mut result = *self;
+                result.mul_assign(&*other);
+                result
+            }
+        }
+
+        impl<'b, P: $params> core::ops::Div<$type<P>> for &'b $type<P> {
+            type Output = $type<P>;
+
+            #[inline]
+            fn div(self, other: $type<P>) -> $type<P> {
+                let mut result = *self;
+                result.div_assign(&other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, 'b, P: $params> core::ops::Div<&'a $type<P>> for &'b $type<P> {
+            type Output = $type<P>;
+
+            #[inline]
+            fn div(self, other: &'a $type<P>) -> $type<P> {
+                let mut result = *self;
+                result.div_assign(&*other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, 'b, P: $params> core::ops::Div<&'a mut $type<P>> for &'b $type<P> {
+            type Output = $type<P>;
+
+            #[inline]
+            fn div(self, other: &'a mut $type<P>) -> $type<P> {
+                let mut result = *self;
+                result.div_assign(&*other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::iter::Product<Self> for $type<P> {
+            fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
+                iter.fold(Self::one(), core::ops::Mul::mul)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::iter::Product<&'a Self> for $type<P> {
+            fn product<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
+                iter.fold(Self::one(), Mul::mul)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::ops::MulAssign<Self> for $type<P> {
+            fn mul_assign(&mut self, other: Self) {
+                self.mul_assign(&other)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::ops::DivAssign<&'a mut Self> for $type<P> {
+            fn div_assign(&mut self, other: &'a mut Self) {
+                self.div_assign(&*other)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::ops::MulAssign<&'a mut Self> for $type<P> {
+            fn mul_assign(&mut self, other: &'a mut Self) {
+                self.mul_assign(&*other)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::ops::DivAssign<Self> for $type<P> {
+            fn div_assign(&mut self, other: Self) {
+                self.div_assign(&other)
+            }
+        }
+    };
+}
diff --git a/algebra-core/src/field/cyclotomic.rs b/algebra-core/src/field/cyclotomic.rs
new file mode 100644
index 000000000..a9b4a156c
--- /dev/null
+++ b/algebra-core/src/field/cyclotomic.rs
@@ -0,0 +1,121 @@
+use crate::MultiplicativeGroup;
+
+/// Fields that have a cyclotomic multiplicative subgroup, and which can
+/// leverage efficient inversion and squaring algorithms for elements in this subgroup.
+/// If a field has multiplicative order p^d - 1, the cyclotomic subgroups refer to subgroups of order φ_n(p),
+/// for any n < d, where φ_n is the [n-th cyclotomic polynomial](https://en.wikipedia.org/wiki/Cyclotomic_polynomial).
+///
+/// ## Note
+///
+/// Note that this trait is unrelated to the `Group` trait from the `ark_ec` crate. That trait
+/// denotes an *additive* group, while this trait denotes a *multiplicative* group.
+pub trait CyclotomicMultSubgroup: MultiplicativeGroup {
+    /// Is the inverse fast to compute? For example, in quadratic extensions, the inverse
+    /// can be computed at the cost of negating one coordinate, which is much faster than
+    /// standard inversion.
+    /// By default this is `false`, but should be set to `true` for quadratic extensions.
+    const INVERSE_IS_FAST: bool = false;
+
+    /// Compute a square in the cyclotomic subgroup. By default this is computed using [`Field::square`](crate::Field::square), but for
+    /// degree 12 extensions, this can be computed faster than normal squaring.
+    ///
+    /// # Warning
+    ///
+    /// This method should be invoked only when `self` is in the cyclotomic subgroup.
+    fn cyclotomic_square(&self) -> Self {
+        let mut result = *self;
+        *result.cyclotomic_square_in_place()
+    }
+
+    /// Square `self` in place. By default this is computed using
+    /// [`Field::square_in_place`](crate::Field::square_in_place), but for degree 12 extensions,
+    /// this can be computed faster than normal squaring.
+    ///
+    /// # Warning
+    ///
+    /// This method should be invoked only when `self` is in the cyclotomic subgroup.
+    fn cyclotomic_square_in_place(&mut self) -> &mut Self {
+        self.square_in_place()
+    }
+
+    /// Compute the inverse of `self`. See [`Self::INVERSE_IS_FAST`] for details.
+    /// Returns [`None`] if `self.is_zero()`, and [`Some`] otherwise.
+    ///
+    /// # Warning
+    ///
+    /// This method should be invoked only when `self` is in the cyclotomic subgroup.
+    fn cyclotomic_inverse(&self) -> Option<Self> {
+        let mut result = *self;
+        result.cyclotomic_inverse_in_place().copied()
+    }
+
+    /// Compute the inverse of `self`. See [`Self::INVERSE_IS_FAST`] for details.
+    /// Returns [`None`] if `self.is_zero()`, and [`Some`] otherwise.
+    ///
+    /// # Warning
+    ///
+    /// This method should be invoked only when `self` is in the cyclotomic subgroup.
+    fn cyclotomic_inverse_in_place(&mut self) -> Option<&mut Self> {
+        Some(self.invert_in_place())
+    }
+
+    /// Compute a cyclotomic exponentiation of `self` with respect to `e`.
+    ///
+    /// # Warning
+    ///
+    /// This method should be invoked only when `self` is in the cyclotomic subgroup.
+    fn cyclotomic_exp(&self, e: impl AsRef<[u64]>) -> Self {
+        let mut result = *self;
+        result.cyclotomic_exp_in_place(e);
+        result
+    }
+
+    /// Set `self` to be the result of exponentiating `self` by `e`,
+    /// using efficient cyclotomic algorithms.
+    ///
+    /// # Warning
+    ///
+    /// This method should be invoked only when `self` is in the cyclotomic subgroup.
+    fn cyclotomic_exp_in_place(&mut self, e: impl AsRef<[u64]>) {
+        if Self::INVERSE_IS_FAST {
+            // We only use NAF-based exponentiation if inverses are fast to compute.
+            let naf = crate::biginteger::arithmetic::find_naf(e.as_ref());
+            exp_loop(self, naf.into_iter().rev())
+        } else {
+            exp_loop(
+                self,
+                crate::bits::BitIteratorBE::without_leading_zeros(e.as_ref()).map(|e| e as i8),
+            )
+        };
+    }
+}
+
+/// Helper function to calculate the double-and-add loop for exponentiation.
+fn exp_loop<F: CyclotomicMultSubgroup, I: Iterator<Item = i8>>(f: &mut F, e: I) {
+    // If the inverse is fast and we're using naf, we compute the inverse of the base.
+    // Otherwise we do nothing with the variable, so we default it to one.
+    let self_inverse = if F::INVERSE_IS_FAST {
+        f.cyclotomic_inverse().unwrap() // The inverse must exist because self is not zero.
+    } else {
+        F::one()
+    };
+    let mut res = F::one();
+    let mut found_nonzero = false;
+    for value in e {
+        if found_nonzero {
+            res.cyclotomic_square_in_place();
+        }
+
+        if value != 0 {
+            found_nonzero = true;
+
+            if value > 0 {
+                res *= &*f;
+            } else if F::INVERSE_IS_FAST {
+                // only use naf if inversion is fast.
+                res *= &self_inverse;
+            }
+        }
+    }
+    *f = res;
+}
diff --git a/algebra-core/src/field/fft_friendly.rs b/algebra-core/src/field/fft_friendly.rs
new file mode 100644
index 000000000..f513dcd89
--- /dev/null
+++ b/algebra-core/src/field/fft_friendly.rs
@@ -0,0 +1,98 @@
+/// The interface for fields that are able to be used in FFTs.
+pub trait FftField: crate::Field {
+    /// The generator of the multiplicative group of the field
+    const GENERATOR: Self;
+
+    /// Let `N` be the size of the multiplicative group defined by the field.
+    /// Then `TWO_ADICITY` is the two-adicity of `N`, i.e. the integer `s`
+    /// such that `N = 2^s * t` for some odd integer `t`.
+    const TWO_ADICITY: u32;
+
+    /// 2^s root of unity computed by GENERATOR^t
+    const TWO_ADIC_ROOT_OF_UNITY: Self;
+
+    /// An integer `b` such that there exists a multiplicative subgroup
+    /// of size `b^k` for some integer `k`.
+    const SMALL_SUBGROUP_BASE: Option<u32> = None;
+
+    /// The integer `k` such that there exists a multiplicative subgroup
+    /// of size `Self::SMALL_SUBGROUP_BASE^k`.
+    const SMALL_SUBGROUP_BASE_ADICITY: Option<u32> = None;
+
+    /// GENERATOR^((MODULUS-1) / (2^s *
+    /// SMALL_SUBGROUP_BASE^SMALL_SUBGROUP_BASE_ADICITY)) Used for mixed-radix
+    /// FFT.
+    const LARGE_SUBGROUP_ROOT_OF_UNITY: Option<Self> = None;
+
+    /// Returns the root of unity of order n, if one exists.
+    /// If no small multiplicative subgroup is defined, this is the 2-adic root
+    /// of unity of order n (for n a power of 2).
+    /// If a small multiplicative subgroup is defined, this is the root of unity
+    /// of order n for the larger subgroup generated by
+    /// `FftConfig::LARGE_SUBGROUP_ROOT_OF_UNITY`
+    /// (for n = 2^i * FftConfig::SMALL_SUBGROUP_BASE^j for some i, j).
+    fn get_root_of_unity(n: u64) -> Option<Self> {
+        let mut omega: Self;
+        if let Some(large_subgroup_root_of_unity) = Self::LARGE_SUBGROUP_ROOT_OF_UNITY {
+            let q = Self::SMALL_SUBGROUP_BASE.expect(
+                "LARGE_SUBGROUP_ROOT_OF_UNITY should only be set in conjunction with SMALL_SUBGROUP_BASE",
+            ) as u64;
+            let small_subgroup_base_adicity = Self::SMALL_SUBGROUP_BASE_ADICITY.expect(
+                "LARGE_SUBGROUP_ROOT_OF_UNITY should only be set in conjunction with SMALL_SUBGROUP_BASE_ADICITY",
+            );
+
+            let q_adicity = k_adicity(q, n);
+            let q_part = q.checked_pow(q_adicity)?;
+
+            let two_adicity = k_adicity(2, n);
+            let two_part = 2u64.checked_pow(two_adicity)?;
+
+            if n != two_part * q_part
+                || (two_adicity > Self::TWO_ADICITY)
+                || (q_adicity > small_subgroup_base_adicity)
+            {
+                return None;
+            }
+
+            omega = large_subgroup_root_of_unity;
+            for _ in q_adicity..small_subgroup_base_adicity {
+                omega = omega.pow(q);
+            }
+
+            for _ in two_adicity..Self::TWO_ADICITY {
+                omega.square_in_place();
+            }
+        } else {
+            // Compute the next power of 2.
+            let size = n.next_power_of_two() as u64;
+            let log_size_of_group = ark_std::log2(usize::try_from(size).expect("too large"));
+
+            if n != size || log_size_of_group > Self::TWO_ADICITY {
+                return None;
+            }
+
+            // Compute the generator for the multiplicative subgroup.
+            // It should be 2^(log_size_of_group) root of unity.
+            omega = Self::TWO_ADIC_ROOT_OF_UNITY;
+            for _ in log_size_of_group..Self::TWO_ADICITY {
+                omega.square_in_place();
+            }
+        }
+        Some(omega)
+    }
+}
+
+/// Calculates the k-adicity of n, i.e., the number of trailing 0s in a base-k
+/// representation.
+pub fn k_adicity(k: u64, mut n: u64) -> u32 {
+    let mut r = 0;
+    while n > 1 {
+        if n % k == 0 {
+            r += 1;
+            n /= k;
+        } else {
+            return r;
+        }
+    }
+    r
+}
diff --git a/algebra-core/src/field/mod.rs b/algebra-core/src/field/mod.rs
new file mode 100644
index 000000000..327785224
--- /dev/null
+++ b/algebra-core/src/field/mod.rs
@@ -0,0 +1,459 @@
+use crate::ring::Ring;
+use ark_serialize::{
+    CanonicalDeserializeWithFlags, CanonicalSerializeWithFlags, EmptyFlags, Flags,
+};
+use ark_std::vec::Vec;
+
+pub use ark_algebra_macros;
+
+#[macro_use]
+pub mod arithmetic;
+
+mod prime;
+pub use prime::*;
+
+mod fft_friendly;
+pub use fft_friendly::*;
+
+mod cyclotomic;
+pub use cyclotomic::*;
+
+mod sqrt;
+pub use sqrt::*;
+
+#[cfg(feature = "parallel")]
+use ark_std::cmp::max;
+#[cfg(feature = "parallel")]
+use rayon::prelude::*;
+
+/// The interface for a generic field.
+/// Types implementing [`Field`] support common field operations such as addition, subtraction, multiplication, and inverses.
+///
+/// ## Defining your own field
+/// To demonstrate the various field operations, we can first define a prime ordered field $\mathbb{F}_{p}$ with $p = 17$. When defining a field $\mathbb{F}_p$, we need to provide the modulus(the $p$ in $\mathbb{F}_p$) and a generator. Recall that a generator $g \in \mathbb{F}_p$ is a field element whose powers comprise the entire field: $\mathbb{F}_p =\\{g, g^1, \ldots, g^{p-1}\\}$.
+/// We can then manually construct the field element associated with an integer with `Fp::from` and perform field addition, subtraction, multiplication, and inversion on it.
+/// ```rust
+/// use ark_ff::{AdditiveGroup, fields::{Field, Fp64, MontBackend, MontConfig}};
+///
+/// #[derive(MontConfig)]
+/// #[modulus = "17"]
+/// #[generator = "3"]
+/// pub struct FqConfig;
+/// pub type Fq = Fp64<MontBackend<FqConfig, 1>>;
+///
+/// # fn main() {
+/// let a = Fq::from(9);
+/// let b = Fq::from(10);
+///
+/// assert_eq!(a, Fq::from(26));          // 26 =  9 mod 17
+/// assert_eq!(a - b, Fq::from(16));      // -1 = 16 mod 17
+/// assert_eq!(a + b, Fq::from(2));       // 19 =  2 mod 17
+/// assert_eq!(a * b, Fq::from(5));       // 90 =  5 mod 17
+/// assert_eq!(a.square(), Fq::from(13)); // 81 = 13 mod 17
+/// assert_eq!(b.double(), Fq::from(3));  // 20 =  3 mod 17
+/// assert_eq!(a / b, a * b.inverse().unwrap()); // need to unwrap since `b` could be 0 which is not invertible
+/// # }
+/// ```
+///
+/// ## Using pre-defined fields
+/// In the following example, we’ll use the field associated with the BLS12-381 pairing-friendly group.
+/// ```rust
+/// use ark_ff::{AdditiveGroup, Field};
+/// use ark_test_curves::bls12_381::Fq as F;
+/// use ark_std::{One, UniformRand, test_rng};
+///
+/// let mut rng = test_rng();
+/// // Let's sample uniformly random field elements:
+/// let a = F::rand(&mut rng);
+/// let b = F::rand(&mut rng);
+///
+/// let c = a + b;
+/// let d = a - b;
+/// assert_eq!(c + d, a.double());
+///
+/// let e = c * d;
+/// assert_eq!(e, a.square() - b.square());         // (a + b)(a - b) = a^2 - b^2
+/// assert_eq!(a.inverse().unwrap() * a, F::one()); // Euler-Fermat theorem tells us: a * a^{-1} = 1 mod p
+/// ```
+pub trait Field:
+    Ring
+    + CanonicalSerializeWithFlags
+    + CanonicalDeserializeWithFlags
+    + From<u128>
+    + From<u64>
+    + From<u32>
+    + From<u16>
+    + From<u8>
+    + From<i128>
+    + From<i64>
+    + From<i32>
+    + From<i16>
+    + From<i8>
+    + From<bool>
+{
+    type BasePrimeField: PrimeField;
+
+    type BasePrimeFieldIter: Iterator<Item = Self::BasePrimeField>;
+
+    /// Determines the algorithm for computing square roots.
+    const SQRT_PRECOMP: Option<SqrtPrecomputation<Self>>;
+
+    /// Returns the characteristic of the field,
+    /// in little-endian representation.
+    fn characteristic() -> &'static [u64] {
+        Self::BasePrimeField::characteristic()
+    }
+
+    /// Returns the extension degree of this field with respect
+    /// to `Self::BasePrimeField`.
+    fn extension_degree() -> u64;
+
+    fn to_base_prime_field_elements(&self) -> Self::BasePrimeFieldIter;
+
+    /// Convert a slice of base prime field elements into a field element.
+    /// If the slice length != Self::extension_degree(), must return None.
+    fn from_base_prime_field_elems(elems: &[Self::BasePrimeField]) -> Option<Self>;
+
+    /// Constructs a field element from a single base prime field elements.
+    /// ```
+    /// # use ark_ff::Field;
+    /// # use ark_test_curves::bls12_381::Fq as F;
+    /// # use ark_test_curves::bls12_381::Fq2 as F2;
+    /// # use ark_std::One;
+    /// assert_eq!(F2::from_base_prime_field(F::one()), F2::one());
+    /// ```
+    fn from_base_prime_field(elem: Self::BasePrimeField) -> Self;
+
+    /// Attempt to deserialize a field element. Returns `None` if the
+    /// deserialization fails.
+    ///
+    /// This function is primarily intended for sampling random field elements
+    /// from a hash-function or RNG output.
+    fn from_random_bytes(bytes: &[u8]) -> Option<Self> {
+        Self::from_random_bytes_with_flags::<EmptyFlags>(bytes).map(|f| f.0)
+    }
+
+    /// Attempt to deserialize a field element, splitting the bitflags metadata
+    /// according to `F` specification. Returns `None` if the deserialization
+    /// fails.
+    ///
+    /// This function is primarily intended for sampling random field elements
+    /// from a hash-function or RNG output.
+    fn from_random_bytes_with_flags<F: Flags>(bytes: &[u8]) -> Option<(Self, F)>;
+
+    /// Returns a `LegendreSymbol`, which indicates whether this field element
+    /// is  1 : a quadratic residue
+    ///  0 : equal to 0
+    /// -1 : a quadratic non-residue
+    fn legendre(&self) -> LegendreSymbol;
+
+    /// Returns the square root of self, if it exists.
+    #[must_use]
+    fn sqrt(&self) -> Option<Self> {
+        match Self::SQRT_PRECOMP {
+            Some(tv) => tv.sqrt(self),
+            None => unimplemented!(),
+        }
+    }
+
+    /// Sets `self` to be the square root of `self`, if it exists.
+    fn sqrt_in_place(&mut self) -> Option<&mut Self> {
+        (*self).sqrt().map(|sqrt| {
+            *self = sqrt;
+            self
+        })
+    }
+
+    /// Returns `sum([a_i * b_i])`.
+    #[inline]
+    fn sum_of_products<const T: usize>(a: &[Self; T], b: &[Self; T]) -> Self {
+        let mut sum = Self::zero();
+        for i in 0..a.len() {
+            sum += a[i] * b[i];
+        }
+        sum
+    }
+
+    /// Sets `self` to `self^s`, where `s = Self::BasePrimeField::MODULUS^power`.
+    /// This is also called the Frobenius automorphism.
+    fn frobenius_map_in_place(&mut self, power: usize);
+
+    /// Returns `self^s`, where `s = Self::BasePrimeField::MODULUS^power`.
+    /// This is also called the Frobenius automorphism.
+    #[must_use]
+    fn frobenius_map(&self, power: usize) -> Self {
+        let mut this = *self;
+        this.frobenius_map_in_place(power);
+        this
+    }
+}
+
+// Given a vector of field elements {v_i}, compute the vector {v_i^(-1)}
+pub fn batch_inversion<F: Field>(v: &mut [F]) {
+    batch_inversion_and_mul(v, &F::one());
+}
+
+#[cfg(not(feature = "parallel"))]
+// Given a vector of field elements {v_i}, compute the vector {coeff * v_i^(-1)}
+pub fn batch_inversion_and_mul<F: Field>(v: &mut [F], coeff: &F) {
+    serial_batch_inversion_and_mul(v, coeff);
+}
+
+#[cfg(feature = "parallel")]
+// Given a vector of field elements {v_i}, compute the vector {coeff * v_i^(-1)}
+pub fn batch_inversion_and_mul<F: Field>(v: &mut [F], coeff: &F) {
+    // Divide the vector v evenly between all available cores
+    let min_elements_per_thread = 1;
+    let num_cpus_available = rayon::current_num_threads();
+    let num_elems = v.len();
+    let num_elem_per_thread = max(num_elems / num_cpus_available, min_elements_per_thread);
+
+    // Batch invert in parallel, without copying the vector
+    v.par_chunks_mut(num_elem_per_thread).for_each(|mut chunk| {
+        serial_batch_inversion_and_mul(&mut chunk, coeff);
+    });
+}
+
+/// Given a vector of field elements {v_i}, compute the vector {coeff * v_i^(-1)}.
+/// This method is explicitly single-threaded.
+fn serial_batch_inversion_and_mul<F: Field>(v: &mut [F], coeff: &F) {
+    // Montgomery’s Trick and Fast Implementation of Masked AES
+    // Genelle, Prouff and Quisquater
+    // Section 3.2
+    // but with an optimization to multiply every element in the returned vector by
+    // coeff
+
+    // First pass: compute [a, ab, abc, ...]
+    let mut prod = Vec::with_capacity(v.len());
+    let mut tmp = F::one();
+    for f in v.iter().filter(|f| !f.is_zero()) {
+        tmp.mul_assign(f);
+        prod.push(tmp);
+    }
+
+    // Invert `tmp`.
+    tmp = tmp.invert(); // Guaranteed to be nonzero.
+
+    // Multiply product by coeff, so all inverses will be scaled by coeff
+    tmp *= coeff;
+
+    // Second pass: iterate backwards to compute inverses
+    for (f, s) in v.iter_mut()
+        // Backwards
+        .rev()
+        // Ignore normalized elements
+        .filter(|f| !f.is_zero())
+        // Backwards, skip last element, fill in one for last term.
+        .zip(prod.into_iter().rev().skip(1).chain(Some(F::one())))
+    {
+        // tmp := tmp * f; f := tmp * s = 1/f
+        let new_tmp = tmp * *f;
+        *f = tmp * &s;
+        tmp = new_tmp;
+    }
+}
+
+#[cfg(all(test, feature = "std"))]
+mod std_tests {
+    use crate::BitIteratorLE;
+
+    #[test]
+    fn bit_iterator_le() {
+        let bits = BitIteratorLE::new(&[0, 1 << 10]).collect::<Vec<_>>();
+        dbg!(&bits);
+        assert!(bits[74]);
+        for (i, bit) in bits.into_iter().enumerate() {
+            if i != 74 {
+                assert!(!bit)
+            } else {
+                assert!(bit)
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod no_std_tests {
+    use super::*;
+    use ark_std::{str::FromStr, test_rng};
+    use num_bigint::*;
+
+    // TODO: only Fr & FrConfig should need to be imported.
+    // The rest of imports are caused by cargo not resolving the deps properly
+    // from this crate and from ark_test_curves
+    use ark_test_curves::{
+        ark_ff::{batch_inversion, batch_inversion_and_mul, PrimeField},
+        bls12_381::Fr,
+    };
+
+    #[test]
+    fn test_batch_inversion() {
+        let mut random_coeffs = Vec::<Fr>::new();
+        let vec_size = 1000;
+
+        for _ in 0..=vec_size {
+            random_coeffs.push(Fr::rand(&mut test_rng()));
+        }
+
+        let mut random_coeffs_inv = random_coeffs.clone();
+        batch_inversion::<Fr>(&mut random_coeffs_inv);
+        for i in 0..=vec_size {
+            assert_eq!(random_coeffs_inv[i] * random_coeffs[i], Fr::one());
+        }
+        let rand_multiplier = Fr::rand(&mut test_rng());
+        let mut random_coeffs_inv_shifted = random_coeffs.clone();
+        batch_inversion_and_mul(&mut random_coeffs_inv_shifted, &rand_multiplier);
+        for i in 0..=vec_size {
+            assert_eq!(
+                random_coeffs_inv_shifted[i] * random_coeffs[i],
+                rand_multiplier
+            );
+        }
+    }
+
+    #[test]
+    pub fn test_from_ints() {
+        let felt2 = Fr::one() + Fr::one();
+        let felt16 = felt2 * felt2 * felt2 * felt2;
+
+        assert_eq!(Fr::from(1u8), Fr::one());
+        assert_eq!(Fr::from(1u16), Fr::one());
+        assert_eq!(Fr::from(1u32), Fr::one());
+        assert_eq!(Fr::from(1u64), Fr::one());
+        assert_eq!(Fr::from(1u128), Fr::one());
+        assert_eq!(Fr::from(-1i8), -Fr::one());
+        assert_eq!(Fr::from(-1i64), -Fr::one());
+
+        assert_eq!(Fr::from(0), Fr::zero());
+
+        assert_eq!(Fr::from(-16i32), -felt16);
+        assert_eq!(Fr::from(16u32), felt16);
+        assert_eq!(Fr::from(16i64), felt16);
+
+        assert_eq!(Fr::from(-2i128), -felt2);
+        assert_eq!(Fr::from(2u16), felt2);
+    }
+
+    #[test]
+    fn test_from_into_biguint() {
+        let mut rng = ark_std::test_rng();
+
+        let modulus_bits = Fr::MODULUS_BIT_SIZE;
+        let modulus: num_bigint::BigUint = Fr::MODULUS.into();
+
+        let mut rand_bytes = Vec::new();
+        for _ in 0..(2 * modulus_bits / 8) {
+            rand_bytes.push(u8::rand(&mut rng));
+        }
+
+        let rand = BigUint::from_bytes_le(&rand_bytes);
+
+        let a: BigUint = Fr::from(rand.clone()).into();
+        let b = rand % modulus;
+
+        assert_eq!(a, b);
+    }
+
+    #[test]
+    fn test_from_be_bytes_mod_order() {
+        // Each test vector is a byte array,
+        // and its tested by parsing it with from_bytes_mod_order, and the num-bigint
+        // library. The bytes are currently generated from scripts/test_vectors.py.
+        // TODO: Eventually generate all the test vector bytes via computation with the
+        // modulus
+        use ark_std::{rand::Rng, string::ToString};
+        use ark_test_curves::ark_ff::BigInteger;
+        use num_bigint::BigUint;
+
+        let ref_modulus = BigUint::from_bytes_be(&Fr::MODULUS.to_bytes_be());
+
+        let mut test_vectors = vec![
+            // 0
+            vec![0u8],
+            // 1
+            vec![1u8],
+            // 255
+            vec![255u8],
+            // 256
+            vec![1u8, 0u8],
+            // 65791
+            vec![1u8, 0u8, 255u8],
+            // 204827637402836681560342736360101429053478720705186085244545541796635082752
+            vec![
+                115u8, 237u8, 167u8, 83u8, 41u8, 157u8, 125u8, 72u8, 51u8, 57u8, 216u8, 8u8, 9u8,
+                161u8, 216u8, 5u8, 83u8, 189u8, 164u8, 2u8, 255u8, 254u8, 91u8, 254u8, 255u8,
+                255u8, 255u8, 255u8, 0u8, 0u8, 0u8,
+            ],
+            // 204827637402836681560342736360101429053478720705186085244545541796635082753
+            vec![
+                115u8, 237u8, 167u8, 83u8, 41u8, 157u8, 125u8, 72u8, 51u8, 57u8, 216u8, 8u8, 9u8,
+                161u8, 216u8, 5u8, 83u8, 189u8, 164u8, 2u8, 255u8, 254u8, 91u8, 254u8, 255u8,
+                255u8, 255u8, 255u8, 0u8, 0u8, 1u8,
+            ],
+            // 52435875175126190479447740508185965837690552500527637822603658699938581184512
+            vec![
+                115u8, 237u8, 167u8, 83u8, 41u8, 157u8, 125u8, 72u8, 51u8, 57u8, 216u8, 8u8, 9u8,
+                161u8, 216u8, 5u8, 83u8, 189u8, 164u8, 2u8, 255u8, 254u8, 91u8, 254u8, 255u8,
+                255u8, 255u8, 255u8, 0u8, 0u8, 0u8, 0u8,
+            ],
+            // 52435875175126190479447740508185965837690552500527637822603658699938581184513
+            vec![
+                115u8, 237u8, 167u8, 83u8, 41u8, 157u8, 125u8, 72u8, 51u8, 57u8, 216u8, 8u8, 9u8,
+                161u8, 216u8, 5u8, 83u8, 189u8, 164u8, 2u8, 255u8, 254u8, 91u8, 254u8, 255u8,
+                255u8, 255u8, 255u8, 0u8, 0u8, 0u8, 1u8,
+            ],
+            // 52435875175126190479447740508185965837690552500527637822603658699938581184514
+            vec![
+                115u8, 237u8, 167u8, 83u8, 41u8, 157u8, 125u8, 72u8, 51u8, 57u8, 216u8, 8u8, 9u8,
+                161u8, 216u8, 5u8, 83u8, 189u8, 164u8, 2u8, 255u8, 254u8, 91u8, 254u8, 255u8,
+                255u8, 255u8, 255u8, 0u8, 0u8, 0u8, 2u8,
+            ],
+            // 104871750350252380958895481016371931675381105001055275645207317399877162369026
+            vec![
+                231u8, 219u8, 78u8, 166u8, 83u8, 58u8, 250u8, 144u8, 102u8, 115u8, 176u8, 16u8,
+                19u8, 67u8, 176u8, 10u8, 167u8, 123u8, 72u8, 5u8, 255u8, 252u8, 183u8, 253u8,
+                255u8, 255u8, 255u8, 254u8, 0u8, 0u8, 0u8, 2u8,
+            ],
+            // 13423584044832304762738621570095607254448781440135075282586536627184276783235328
+            vec![
+                115u8, 237u8, 167u8, 83u8, 41u8, 157u8, 125u8, 72u8, 51u8, 57u8, 216u8, 8u8, 9u8,
+                161u8, 216u8, 5u8, 83u8, 189u8, 164u8, 2u8, 255u8, 254u8, 91u8, 254u8, 255u8,
+                255u8, 255u8, 255u8, 0u8, 0u8, 0u8, 1u8, 0u8,
+            ],
+            // 115792089237316195423570985008687907853269984665640564039457584007913129639953
+            vec![
+                1u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8,
+                0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8,
+                17u8,
+            ],
+            // 168227964412442385903018725516873873690960537166168201862061242707851710824468
+            vec![
+                1u8, 115u8, 237u8, 167u8, 83u8, 41u8, 157u8, 125u8, 72u8, 51u8, 57u8, 216u8, 8u8,
+                9u8, 161u8, 216u8, 5u8, 83u8, 189u8, 164u8, 2u8, 255u8, 254u8, 91u8, 254u8, 255u8,
+                255u8, 255u8, 255u8, 0u8, 0u8, 0u8, 20u8,
+            ],
+            // 29695210719928072218913619902732290376274806626904512031923745164725699769008210
+            vec![
+                1u8, 0u8, 115u8, 237u8, 167u8, 83u8, 41u8, 157u8, 125u8, 72u8, 51u8, 57u8, 216u8,
+                8u8, 9u8, 161u8, 216u8, 5u8, 83u8, 189u8, 164u8, 2u8, 255u8, 254u8, 91u8, 254u8,
+                255u8, 255u8, 255u8, 255u8, 0u8, 0u8, 0u8, 82u8,
+            ],
+        ];
+        // Add random bytestrings to the test vector list
+        for i in 1..512 {
+            let mut rng = test_rng();
+            let data: Vec<u8> = (0..i).map(|_| rng.gen()).collect();
+            test_vectors.push(data);
+        }
+        for i in test_vectors {
+            let mut expected_biguint = BigUint::from_bytes_be(&i);
+            // Reduce expected_biguint using modpow API
+            expected_biguint =
+                expected_biguint.modpow(&BigUint::from_bytes_be(&[1u8]), &ref_modulus);
+            let expected_string = expected_biguint.to_string();
+            let expected = Fr::from_str(&expected_string).unwrap();
+            let actual = Fr::from_be_bytes_mod_order(&i);
+            assert_eq!(expected, actual, "failed on test {:?}", i);
+        }
+    }
+}
diff --git a/algebra-core/src/field/prime.rs b/algebra-core/src/field/prime.rs
new file mode 100644
index 000000000..107988f29
--- /dev/null
+++ b/algebra-core/src/field/prime.rs
@@ -0,0 +1,97 @@
+use crate::{biginteger::BigInteger, module::Scalar, FftField, Field};
+
+use ark_std::{cmp::min, str::FromStr};
+use num_bigint::BigUint;
+
+/// The interface for a prime field, i.e. the field of integers modulo a prime $p$.  
+/// In the following example we'll use the prime field underlying the BLS12-381 G1 curve.
+/// ```rust
+/// use ark_ff::{BigInteger, Field, PrimeField};
+/// use ark_std::{test_rng, One, UniformRand, Zero};
+/// use ark_test_curves::bls12_381::Fq as F;
+///
+/// let mut rng = test_rng();
+/// let a = F::rand(&mut rng);
+/// // We can access the prime modulus associated with `F`:
+/// let modulus = <F as PrimeField>::MODULUS;
+/// assert_eq!(a.pow(&modulus), a); // the Euler-Fermat theorem tells us: a^{p-1} = 1 mod p
+///
+/// // We can convert field elements to integers in the range [0, MODULUS - 1]:
+/// let one: num_bigint::BigUint = F::one().into();
+/// assert_eq!(one, num_bigint::BigUint::one());
+///
+/// // We can construct field elements from an arbitrary sequence of bytes:
+/// let n = F::from_le_bytes_mod_order(&modulus.to_bytes_le());
+/// assert_eq!(n, F::zero());
+/// ```
+pub trait PrimeField:
+    Field<BasePrimeField = Self>
+    + FftField
+    + FromStr
+    + From<<Self as PrimeField>::BigInt>
+    + Into<<Self as PrimeField>::BigInt>
+    + From<BigUint>
+    + Into<BigUint>
+    + Scalar
+{
+    /// A `BigInteger` type that can represent elements of this field.
+    type BigInt: BigInteger;
+
+    /// The modulus `p`.
+    const MODULUS: Self::BigInt;
+
+    /// The value `(p - 1)/ 2`.
+    const MODULUS_MINUS_ONE_DIV_TWO: Self::BigInt;
+
+    /// The size of the modulus in bits.
+    const MODULUS_BIT_SIZE: u32;
+
+    /// The trace of the field is defined as the smallest integer `t` such that by
+    /// `2^s * t = p - 1`, and `t` is coprime to 2.
+    const TRACE: Self::BigInt;
+    /// The value `(t - 1)/ 2`.
+    const TRACE_MINUS_ONE_DIV_TWO: Self::BigInt;
+
+    /// Construct a prime field element from an integer in the range 0..(p - 1).
+    fn from_bigint(repr: Self::BigInt) -> Option<Self>;
+
+    /// Converts an element of the prime field into an integer in the range 0..(p - 1).
+    fn into_bigint(self) -> Self::BigInt;
+
+    /// Reads bytes in big-endian, and converts them to a field element.
+    /// If the integer represented by `bytes` is larger than the modulus `p`, this method
+    /// performs the appropriate reduction.
+    fn from_be_bytes_mod_order(bytes: &[u8]) -> Self {
+        let mut bytes_copy = bytes.to_vec();
+        bytes_copy.reverse();
+        Self::from_le_bytes_mod_order(&bytes_copy)
+    }
+
+    /// Reads bytes in little-endian, and converts them to a field element.
+    /// If the integer represented by `bytes` is larger than the modulus `p`, this method
+    /// performs the appropriate reduction.
+    fn from_le_bytes_mod_order(bytes: &[u8]) -> Self {
+        let num_modulus_bytes = ((Self::MODULUS_BIT_SIZE + 7) / 8) as usize;
+        let num_bytes_to_directly_convert = min(num_modulus_bytes - 1, bytes.len());
+        // Copy the leading little-endian bytes directly into a field element.
+        // The number of bytes directly converted must be less than the
+        // number of bytes needed to represent the modulus, as we must begin
+        // modular reduction once the data is of the same number of bytes as the
+        // modulus.
+        let (bytes, bytes_to_directly_convert) =
+            bytes.split_at(bytes.len() - num_bytes_to_directly_convert);
+        // Guaranteed to not be None, as the input is less than the modulus size.
+        let mut res = Self::from_random_bytes(&bytes_to_directly_convert).unwrap();
+
+        // Update the result, byte by byte.
+        // We go through existing field arithmetic, which handles the reduction.
+        // TODO: If we need higher speeds, parse more bytes at once, or implement
+        // modular multiplication by a u64
+        let window_size = Self::from(256u64);
+        for byte in bytes.iter().rev() {
+            res *= window_size;
+            res += Self::from(*byte);
+        }
+        res
+    }
+}
diff --git a/algebra-core/src/field/sqrt.rs b/algebra-core/src/field/sqrt.rs
new file mode 100644
index 000000000..4f589856c
--- /dev/null
+++ b/algebra-core/src/field/sqrt.rs
@@ -0,0 +1,149 @@
+/// Indication of the field element's quadratic residuosity
+///
+/// # Examples
+/// ```
+/// # use ark_std::test_rng;
+/// # use ark_std::UniformRand;
+/// # use ark_test_curves::{LegendreSymbol, Field, bls12_381::Fq as Fp};
+/// let a: Fp = Fp::rand(&mut test_rng());
+/// let b = a.square();
+/// assert_eq!(b.legendre(), LegendreSymbol::QuadraticResidue);
+/// ```
+#[derive(Debug, PartialEq, Eq)]
+pub enum LegendreSymbol {
+    Zero = 0,
+    QuadraticResidue = 1,
+    QuadraticNonResidue = -1,
+}
+
+impl LegendreSymbol {
+    /// Returns true if `self.is_zero()`.
+    ///
+    /// # Examples
+    /// ```
+    /// # use ark_std::test_rng;
+    /// # use ark_std::UniformRand;
+    /// # use ark_test_curves::{LegendreSymbol, Field, bls12_381::Fq as Fp};
+    /// let a: Fp = Fp::rand(&mut test_rng());
+    /// let b: Fp = a.square();
+    /// assert!(!b.legendre().is_zero());
+    /// ```
+    pub fn is_zero(&self) -> bool {
+        *self == LegendreSymbol::Zero
+    }
+
+    /// Returns true if `self` is a quadratic non-residue.
+    ///
+    /// # Examples
+    /// ```
+    /// # use ark_test_curves::{Fp2Config, Field, LegendreSymbol, bls12_381::{Fq, Fq2Config}};
+    /// let a: Fq = Fq2Config::NONRESIDUE;
+    /// assert!(a.legendre().is_qnr());
+    /// ```
+    pub fn is_qnr(&self) -> bool {
+        *self == LegendreSymbol::QuadraticNonResidue
+    }
+
+    /// Returns true if `self` is a quadratic residue.
+    /// # Examples
+    /// ```
+    /// # use ark_std::test_rng;
+    /// # use ark_test_curves::bls12_381::Fq as Fp;
+    /// # use ark_std::UniformRand;
+    /// # use ark_ff::{LegendreSymbol, Field};
+    /// let a: Fp = Fp::rand(&mut test_rng());
+    /// let b: Fp = a.square();
+    /// assert!(b.legendre().is_qr());
+    /// ```
+    pub fn is_qr(&self) -> bool {
+        *self == LegendreSymbol::QuadraticResidue
+    }
+}
+
+/// Precomputation that makes computing square roots faster
+/// A particular variant should only be instantiated if the modulus satisfies
+/// the corresponding condition.
+#[non_exhaustive]
+pub enum SqrtPrecomputation<F: crate::Field> {
+    // Tonelli-Shanks algorithm works for all elements, no matter what the modulus is.
+    TonelliShanks {
+        two_adicity: u32,
+        quadratic_nonresidue_to_trace: F,
+        trace_of_modulus_minus_one_div_two: &'static [u64],
+    },
+    /// To be used when the modulus is 3 mod 4.
+    Case3Mod4 {
+        modulus_plus_one_div_four: &'static [u64],
+    },
+}
+
+impl<F: crate::Field> SqrtPrecomputation<F> {
+    pub fn sqrt(&self, elem: &F) -> Option<F> {
+        match self {
+            Self::TonelliShanks {
+                two_adicity,
+                quadratic_nonresidue_to_trace,
+                trace_of_modulus_minus_one_div_two,
+            } => {
+                // https://eprint.iacr.org/2012/685.pdf (page 12, algorithm 5)
+                // Actually this is just normal Tonelli-Shanks; since `P::Generator`
+                // is a quadratic non-residue, `P::ROOT_OF_UNITY = P::GENERATOR ^ t`
+                // is also a quadratic non-residue (since `t` is odd).
+                if elem.is_zero() {
+                    return Some(F::zero());
+                }
+                // Try computing the square root (x at the end of the algorithm)
+                // Check at the end of the algorithm if x was a square root
+                // Begin Tonelli-Shanks
+                let mut z = *quadratic_nonresidue_to_trace;
+                let mut w = elem.pow(*trace_of_modulus_minus_one_div_two);
+                let mut x = w * elem;
+                let mut b = x * &w;
+
+                let mut v = *two_adicity as usize;
+
+                while !b.is_one() {
+                    let mut k = 0usize;
+
+                    let mut b2k = b;
+                    while !b2k.is_one() {
+                        // invariant: b2k = b^(2^k) after entering this loop
+                        b2k.square_in_place();
+                        k += 1;
+                    }
+
+                    if k == (*two_adicity as usize) {
+                        // We are in the case where self^(T * 2^k) = x^(P::MODULUS - 1) = 1,
+                        // which means that no square root exists.
+                        return None;
+                    }
+                    let j = v - k;
+                    w = z;
+                    for _ in 1..j {
+                        w.square_in_place();
+                    }
+
+                    z = w.square();
+                    b *= &z;
+                    x *= &w;
+                    v = k;
+                }
+                // Is x the square root? If so, return it.
+                if x.square() == *elem {
+                    Some(x)
+                } else {
+                    // Consistency check that if no square root is found,
+                    // it is because none exists.
+                    debug_assert!(!matches!(elem.legendre(), LegendreSymbol::QuadraticResidue));
+                    None
+                }
+            },
+            Self::Case3Mod4 {
+                modulus_plus_one_div_four,
+            } => {
+                let result = elem.pow(modulus_plus_one_div_four.as_ref());
+                (result.square() == *elem).then_some(result)
+            },
+        }
+    }
+}
diff --git a/algebra-core/src/group/mod.rs b/algebra-core/src/group/mod.rs
new file mode 100644
index 000000000..9e7c799a7
--- /dev/null
+++ b/algebra-core/src/group/mod.rs
@@ -0,0 +1,206 @@
+use ark_serialize::{CanonicalDeserialize, CanonicalSerialize};
+use ark_std::{
+    cmp::max,
+    fmt::{Debug, Display},
+    hash::Hash,
+    ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign},
+    UniformRand,
+};
+use num_traits::{One, Zero};
+use zeroize::Zeroize;
+
+#[cfg(feature = "parallel")]
+use rayon::prelude::*;
+
+pub trait AdditiveGroup:
+    Eq
+    + 'static
+    + Sized
+    + CanonicalSerialize
+    + CanonicalDeserialize
+    + Copy
+    + Clone
+    + Default
+    + Send
+    + Sync
+    + Hash
+    + Debug
+    + Display
+    + UniformRand
+    + Zeroize
+    + Zero
+    + Neg<Output = Self>
+    + Add<Self, Output = Self>
+    + Sub<Self, Output = Self>
+    + AddAssign<Self>
+    + SubAssign<Self>
+    + for<'a> Add<&'a Self, Output = Self>
+    + for<'a> Sub<&'a Self, Output = Self>
+    + for<'a> AddAssign<&'a Self>
+    + for<'a> SubAssign<&'a Self>
+    + for<'a> Add<&'a mut Self, Output = Self>
+    + for<'a> Sub<&'a mut Self, Output = Self>
+    + for<'a> AddAssign<&'a mut Self>
+    + for<'a> SubAssign<&'a mut Self>
+    + ark_std::iter::Sum<Self>
+    + for<'a> ark_std::iter::Sum<&'a Self>
+{
+    /// The additive identity of the field.
+    const ZERO: Self;
+
+    /// Doubles `self`.
+    #[must_use]
+    fn double(&self) -> Self {
+        let mut copy = *self;
+        copy.double_in_place();
+        copy
+    }
+    /// Doubles `self` in place.
+    fn double_in_place(&mut self) -> &mut Self {
+        self.add_assign(*self);
+        self
+    }
+
+    /// Negates `self` in place.
+    fn neg_in_place(&mut self) -> &mut Self {
+        *self = -(*self);
+        self
+    }
+}
+
+/// A multiplicative group with an optional zero element.
+/// If the multiplicative group has an associated zero element, then the
+/// group law applies only to non-zero elements, and the inverse of the zero element
+/// is defined as zero.
+///
+/// Furthermore, since `MultiplicativeGroupWithZero`s are usually used in the context of
+/// rings, we assume that the zero element is the additive identity of the ring.
+pub trait MultiplicativeGroup:
+    Eq
+    + 'static
+    + Sized
+    + CanonicalSerialize
+    + CanonicalDeserialize
+    + Copy
+    + Clone
+    + Default
+    + Send
+    + Sync
+    + Hash
+    + Debug
+    + Display
+    + UniformRand
+    + Zeroize
+    + One
+    + Mul<Self, Output = Self>
+    + Div<Self, Output = Self>
+    + MulAssign<Self>
+    + DivAssign<Self>
+    + for<'a> Mul<&'a Self, Output = Self>
+    + for<'a> MulAssign<&'a Self>
+    + for<'a> Mul<&'a mut Self, Output = Self>
+    + for<'a> MulAssign<&'a mut Self>
+    + for<'a> Div<&'a Self, Output = Self>
+    + for<'a> DivAssign<&'a Self>
+    + for<'a> Div<&'a mut Self, Output = Self>
+    + for<'a> DivAssign<&'a mut Self>
+    + ark_std::iter::Product<Self>
+    + for<'a> ark_std::iter::Product<&'a Self>
+{
+    const ONE: Self;
+
+    #[doc(hidden)]
+    fn _is_zero(&self) -> bool;
+
+    /// Square `self`.
+    #[must_use]
+    fn square(&self) -> Self {
+        let mut copy = *self;
+        copy.square_in_place();
+        copy
+    }
+    /// Squares `self` in place.
+    fn square_in_place(&mut self) -> &mut Self {
+        *self *= *self;
+        self
+    }
+
+    /// Inverts `self` in place.
+    fn invert_in_place(&mut self) -> &mut Self;
+
+    /// Negates `self` in place.
+    fn invert(&self) -> Self {
+        let mut copy = *self;
+        copy.invert_in_place();
+        copy
+    }
+
+    fn invert_batch_in_place(v: &mut [Self]) {
+        invert_and_mul_batch(v, &Self::one());
+    }
+
+    fn invert_batch(v: &[Self]) {
+        let mut v = v.to_vec();
+        Self::invert_batch(&mut v);
+    }
+}
+
+#[cfg(not(feature = "parallel"))]
+// Given a vector of field elements {v_i}, compute the vector {coeff * v_i^(-1)}
+pub fn invert_and_mul_batch<G: MultiplicativeGroup>(v: &mut [G], coeff: &G) {
+    serial_invert_and_mul_batch(v, coeff);
+}
+
+#[cfg(feature = "parallel")]
+// Given a vector of field elements {v_i}, compute the vector {coeff * v_i^(-1)}
+pub fn invert_and_mul_batch<G: MultiplicativeGroup>(v: &mut [G], coeff: &G) {
+    // Divide the vector v evenly between all available cores
+    let min_elements_per_thread = 1;
+    let num_cpus_available = rayon::current_num_threads();
+    let num_elems = v.len();
+    let num_elem_per_thread = max(num_elems / num_cpus_available, min_elements_per_thread);
+
+    // Batch invert in parallel, without copying the vector
+    v.par_chunks_mut(num_elem_per_thread).for_each(|mut chunk| {
+        serial_invert_and_mul_batch(&mut chunk, coeff);
+    });
+}
+
+/// Given a vector of field elements {v_i}, compute the vector {coeff * v_i^(-1)}.
+/// This method is explicitly single-threaded.
+fn serial_invert_and_mul_batch<G: MultiplicativeGroup>(v: &mut [G], coeff: &G) {
+    // Montgomery’s Trick and Fast Implementation of Masked AES
+    // Genelle, Prouff and Quisquater
+    // Section 3.2
+    // but with an optimization to multiply every element in the returned vector by
+    // coeff
+
+    // First pass: compute [a, ab, abc, ...]
+    let mut prod = Vec::with_capacity(v.len());
+    let mut tmp = G::one();
+    for f in v.iter().filter(|f| !f._is_zero()) {
+        tmp.mul_assign(f);
+        prod.push(tmp);
+    }
+
+    // Invert `tmp`.
+    tmp = tmp.invert(); // Guaranteed to be nonzero.
+
+    // Multiply product by coeff, so all inverses will be scaled by coeff
+    tmp *= coeff;
+
+    // Second pass: iterate backwards to compute inverses
+    for (f, s) in v.iter_mut()
+        // Backwards
+        .rev()
+        // Ignore normalized elements
+        .filter(|f| !f._is_zero())
+        // Backwards, skip last element, fill in one for last term.
+        .zip(prod.into_iter().rev().skip(1).chain(Some(G::one())))
+    {
+        // tmp := tmp * f; f := tmp * s = 1/f
+        let new_tmp = tmp * *f;
+        *f = tmp * &s;
+        tmp = new_tmp;
+    }
+}
diff --git a/algebra-core/src/lib.rs b/algebra-core/src/lib.rs
new file mode 100644
index 000000000..da961fb59
--- /dev/null
+++ b/algebra-core/src/lib.rs
@@ -0,0 +1,15 @@
+#[macro_use]
+pub mod biginteger;
+pub mod bits;
+pub mod const_helpers;
+
+pub mod group;
+pub use group::{AdditiveGroup, MultiplicativeGroup};
+
+pub mod ring;
+
+pub mod module;
+pub mod scalar_mul;
+
+pub mod field;
+pub use field::{FftField, Field, PrimeField};
diff --git a/algebra-core/src/module/mod.rs b/algebra-core/src/module/mod.rs
new file mode 100644
index 000000000..c377a2035
--- /dev/null
+++ b/algebra-core/src/module/mod.rs
@@ -0,0 +1,152 @@
+use ark_std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign};
+
+use crate::{AdditiveGroup, MultiplicativeGroup, PrimeField};
+
+mod scalar;
+use scalar::Scalar as Sc;
+pub use scalar::{Scalar, Sign};
+
+pub trait ScalarMul<Scalar: Sc>:
+    AdditiveGroup
+    + 'static
+    + Mul<Scalar, Output = Self>
+    + for<'a> Mul<&'a Scalar, Output = Self>
+    + for<'a> Mul<&'a mut Scalar, Output = Self>
+    + MulAssign<Scalar>
+    + for<'a> MulAssign<&'a Scalar>
+    + for<'a> MulAssign<&'a mut Scalar>
+    + Add<Self::MulBase, Output = Self>
+    + AddAssign<Self::MulBase>
+    + for<'a> Add<&'a Self::MulBase, Output = Self>
+    + for<'a> AddAssign<&'a Self::MulBase>
+    + Sub<Self::MulBase, Output = Self>
+    + SubAssign<Self::MulBase>
+    + for<'a> Sub<&'a Self::MulBase, Output = Self>
+    + for<'a> SubAssign<&'a Self::MulBase>
+    + From<Self::MulBase>
+{
+    type MulBase: Send
+        + Sync
+        + Copy
+        + Eq
+        + core::hash::Hash
+        + Mul<Scalar, Output = Self>
+        + for<'a> Mul<&'a Scalar, Output = Self>
+        + Neg<Output = Self::MulBase>
+        + From<Self>;
+
+    const NEGATION_IS_CHEAP: bool;
+
+    fn batch_convert_to_mul_base(bases: &[Self]) -> Vec<Self::MulBase>;
+}
+
+pub trait ScalarExp<Exponent: Sc>:
+    MultiplicativeGroup
+    + Mul<Self::ExpBase, Output = Self>
+    + MulAssign<Self::ExpBase>
+    + for<'a> Mul<&'a Self::ExpBase, Output = Self>
+    + for<'a> MulAssign<&'a Self::ExpBase>
+    + Div<Self::ExpBase, Output = Self>
+    + DivAssign<Self::ExpBase>
+    + for<'a> Div<&'a Self::ExpBase, Output = Self>
+    + for<'a> DivAssign<&'a Self::ExpBase>
+    + From<Self::ExpBase>
+{
+    type ExpBase: Send + Sync + Copy + Eq + core::hash::Hash;
+
+    const INVERSION_IS_FAST: bool;
+
+    /// Returns `self^exp`, where `exp` is a scalar.
+    #[must_use]
+    fn pow(&self, exp: Exponent) -> Self {
+        let mut res = Self::one();
+        let (sign, exp) = exp.as_u64s();
+
+        for i in crate::bits::BitIteratorBE::without_leading_zeros(exp) {
+            res.square_in_place();
+
+            if i {
+                res *= self;
+            }
+        }
+        if sign.is_negative() {
+            res.invert_in_place();
+        }
+        res
+    }
+
+    /// Exponentiates an element `f` by a number represented with `u64`
+    /// limbs, using a precomputed table containing as many powers of 2 of
+    /// `f` as the 1 + the floor of log2 of the exponent `exp`, starting
+    /// from the 1st power. That is, `powers_of_2` should equal `&[p, p^2,
+    /// p^4, ..., p^(2^n)]` when `exp` has at most `n` bits.
+    ///
+    /// This returns `None` when a power is missing from the table.
+    #[must_use]
+    fn pow_with_table(powers_of_2: &[Self], exp: Exponent) -> Option<Self> {
+        let mut res = Self::one();
+        let (sign, exp) = exp.as_u64s();
+        for (pow, bit) in crate::bits::BitIteratorLE::without_trailing_zeros(exp).enumerate() {
+            if bit {
+                res *= powers_of_2.get(pow)?;
+            }
+        }
+        if sign.is_negative() {
+            res.invert_in_place();
+        }
+        Some(res)
+    }
+
+    /// Returns `self^exp`, where `exp` is a scalar.
+    #[must_use]
+    fn pow_exp_base(base: &Self::ExpBase, exp: Exponent) -> Self {
+        let mut res = Self::one();
+        let (sign, exp) = exp.as_u64s();
+        for (i, bit) in crate::bits::BitIteratorBE::without_leading_zeros(exp).enumerate() {
+            res.square_in_place();
+            if bit {
+                res *= base;
+            }
+        }
+        if sign.is_negative() {
+            res.invert_in_place();
+        }
+        res
+    }
+
+    /// Exponentiates a field element `f` by a number represented with `u64`
+    /// limbs, using a precomputed table containing as many powers of 2 of
+    /// `f` as the 1 + the floor of log2 of the exponent `exp`, starting
+    /// from the 1st power. That is, `powers_of_2` should equal `&[p, p^2,
+    /// p^4, ..., p^(2^n)]` when `exp` has at most `n` bits.
+    ///
+    /// This returns `None` when a power is missing from the table.
+    #[must_use]
+    fn pow_exp_base_with_table(powers_of_2: &[Self::ExpBase], exp: Exponent) -> Option<Self> {
+        let mut res = Self::one();
+        let (sign, exp) = exp.as_u64s();
+        for (pow, bit) in crate::bits::BitIteratorLE::without_trailing_zeros(exp).enumerate() {
+            if bit {
+                res *= powers_of_2.get(pow)?;
+            }
+        }
+        if sign.is_negative() {
+            res.invert_in_place();
+        }
+        Some(res)
+    }
+
+    fn batch_convert_to_exp_base(bases: &[Self]) -> Vec<Self::ExpBase>;
+}
+
+pub trait PrimeScalarMul<F: PrimeField>: ScalarMul<F> + ScalarMul<F::BigInt> {
+    /// Returns a fixed generator of this group.
+    #[must_use]
+    fn generator() -> Self;
+}
+
+pub trait PrimeScalarExp<F: PrimeField>: ScalarExp<F> + ScalarExp<F::BigInt> {
+    /// Returns a fixed generator of this group.
+    #[must_use]
+    fn generator() -> Self;
+}
diff --git a/algebra-core/src/module/scalar.rs b/algebra-core/src/module/scalar.rs
new file mode 100644
index 000000000..4077a4481
--- /dev/null
+++ b/algebra-core/src/module/scalar.rs
@@ -0,0 +1,137 @@
+use ark_std::fmt::Debug;
+
+#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
+pub enum Sign {
+    Negative = -1,
+    #[default]
+    Positive = 1,
+}
+
+impl Sign {
+    pub fn is_negative(&self) -> bool {
+        matches!(self, Sign::Negative)
+    }
+
+    pub fn is_positive(&self) -> bool {
+        matches!(self, Sign::Positive)
+    }
+}
+
+pub trait Scalar: Send + Sync + Copy + Debug {
+    const MAX_BIT_SIZE: Option<u32>;
+    type U8Ref: AsRef<[u8]>;
+    type U64Ref: AsRef<[u64]>;
+
+    fn as_bytes(&self) -> (Sign, Self::U8Ref);
+
+    fn as_u64s(&self) -> (Sign, Self::U64Ref);
+}
+
+macro_rules! impl_scalar_unsigned {
+    ($t:ty) => {
+        impl Scalar for $t {
+            const MAX_BIT_SIZE: Option<u32> = Some(core::mem::size_of::<$t>() as u32 * 8);
+            type U8Ref = [u8; core::mem::size_of::<$t>()];
+            type U64Ref = [u64; (core::mem::size_of::<$t>() + 7) / 8];
+
+            fn as_bytes(&self) -> (Sign, Self::U8Ref) {
+                (Sign::Positive, self.to_le_bytes())
+            }
+
+            fn as_u64s(&self) -> (Sign, Self::U64Ref) {
+                let mut res = [0u64; (core::mem::size_of::<$t>() + 7) / 8];
+                for (chunk, res) in self.to_le_bytes().chunks_mut(8).zip(&mut res) {
+                    chunk.reverse();
+                    *res = chunk.iter().fold(0u64, |acc, &x| (acc << 8) | x as u64);
+                }
+                (Sign::Positive, res)
+            }
+        }
+    };
+}
+
+impl_scalar_unsigned!(u8);
+impl_scalar_unsigned!(u16);
+impl_scalar_unsigned!(u32);
+impl_scalar_unsigned!(u64);
+impl_scalar_unsigned!(u128);
+
+macro_rules! impl_scalar_signed {
+    ($t:ty) => {
+        impl Scalar for $t {
+            const MAX_BIT_SIZE: Option<u32> = Some(core::mem::size_of::<$t>() as u32 * 8);
+            type U8Ref = [u8; core::mem::size_of::<$t>()];
+            type U64Ref = [u64; (core::mem::size_of::<$t>() + 7) / 8];
+
+            fn as_bytes(&self) -> (Sign, Self::U8Ref) {
+                let sign = if *self < 0 {
+                    Sign::Negative
+                } else {
+                    Sign::Positive
+                };
+                let val = self.unsigned_abs();
+                (sign, val.to_le_bytes())
+            }
+
+            fn as_u64s(&self) -> (Sign, Self::U64Ref) {
+                let sign = if *self < 0 {
+                    Sign::Negative
+                } else {
+                    Sign::Positive
+                };
+                let mut res = [0u64; (core::mem::size_of::<$t>() + 7) / 8];
+                let val = self.unsigned_abs();
+                for (chunk, res) in val.to_le_bytes().chunks_mut(8).zip(&mut res) {
+                    chunk.reverse();
+                    *res = chunk.iter().fold(0u64, |acc, &x| (acc << 8) | x as u64);
+                }
+                (sign, res)
+            }
+        }
+    };
+}
+
+impl_scalar_signed!(i8);
+impl_scalar_signed!(i16);
+impl_scalar_signed!(i32);
+impl_scalar_signed!(i64);
+impl_scalar_signed!(i128);
+
+impl<'a> Scalar for &'a [u64] {
+    const MAX_BIT_SIZE: Option<u32> = None;
+    type U8Ref = Vec<u8>;
+    type U64Ref = Self;
+
+    fn as_bytes(&self) -> (Sign, Self::U8Ref) {
+        (
+            Sign::Positive,
+            self.iter().map(|x| x.to_le_bytes()).flatten().collect(),
+        )
+    }
+
+    fn as_u64s(&self) -> (Sign, Self::U64Ref) {
+        (Sign::Positive, self)
+    }
+}
+
+impl<'a, S: Scalar> Scalar for &'a S {
+    const MAX_BIT_SIZE: Option<u32> = S::MAX_BIT_SIZE;
+    type U8Ref = S::U8Ref;
+    type U64Ref = S::U64Ref;
+
+    fn as_bytes(&self) -> (Sign, Self::U8Ref) {
+        (*self).as_bytes()
+    }
+
+    fn as_u64s(&self) -> (Sign, Self::U64Ref) {
+        (*self).as_u64s()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    fn test() {
+        todo!()
+    }
+}
diff --git a/algebra-core/src/ring.rs b/algebra-core/src/ring.rs
new file mode 100644
index 000000000..495e41b37
--- /dev/null
+++ b/algebra-core/src/ring.rs
@@ -0,0 +1,22 @@
+use crate::{module::ScalarExp, AdditiveGroup};
+
+pub trait Ring:
+    AdditiveGroup
+    + ScalarExp<u8>
+    + ScalarExp<u16>
+    + ScalarExp<u32>
+    + ScalarExp<u64>
+    + ScalarExp<u128>
+    + ScalarExp<i8>
+    + ScalarExp<i16>
+    + ScalarExp<i32>
+    + ScalarExp<i64>
+    + ScalarExp<i128>
+    + for<'a> ScalarExp<&'a [u64]>
+{
+    /// The order of the additive group.
+    const ADDITIVE_ORDER: &'static [u64];
+
+    /// The order of the multiplicative group.
+    const MULTIPLICATIVE_ORDER: &'static [u64];
+}
diff --git a/algebra-core/src/scalar_mul/fixed_base.rs b/algebra-core/src/scalar_mul/fixed_base.rs
new file mode 100644
index 000000000..8bab283af
--- /dev/null
+++ b/algebra-core/src/scalar_mul/fixed_base.rs
@@ -0,0 +1,189 @@
+use ark_std::{cfg_iter, cfg_iter_mut, vec::Vec};
+
+use itertools::Itertools;
+#[cfg(feature = "parallel")]
+use rayon::prelude::*;
+
+use crate::{
+    bits::BitIteratorLE,
+    module::{Scalar, ScalarExp, ScalarMul},
+};
+
+pub struct FixedBase;
+
+impl FixedBase {
+    pub fn get_mul_window_size(num_scalars: usize) -> usize {
+        if num_scalars < 32 {
+            3
+        } else {
+            super::ln_without_floats(num_scalars)
+        }
+    }
+
+    pub fn get_window_table_additive<T: ScalarMul<S>, S: Scalar>(
+        scalar_size: usize,
+        window: usize,
+        g: T,
+    ) -> Vec<Vec<T::MulBase>> {
+        let in_window = 1 << window;
+        let outerc = (scalar_size + window - 1) / window;
+        let last_in_window = 1 << (scalar_size - (outerc - 1) * window);
+
+        let mut multiples_of_g = vec![vec![T::zero(); in_window]; outerc];
+
+        let mut g_outer = g;
+        let mut g_outers = Vec::with_capacity(outerc);
+        for _ in 0..outerc {
+            g_outers.push(g_outer);
+            for _ in 0..window {
+                g_outer.double_in_place();
+            }
+        }
+        cfg_iter_mut!(multiples_of_g)
+            .enumerate()
+            .take(outerc)
+            .zip(g_outers)
+            .for_each(|((outer, multiples_of_g), g_outer)| {
+                let cur_in_window = if outer == outerc - 1 {
+                    last_in_window
+                } else {
+                    in_window
+                };
+
+                let mut g_inner = T::zero();
+                for inner in multiples_of_g.iter_mut().take(cur_in_window) {
+                    *inner = g_inner;
+                    g_inner += &g_outer;
+                }
+            });
+        cfg_iter!(multiples_of_g)
+            .map(|s| T::batch_convert_to_mul_base(s))
+            .collect()
+    }
+
+    pub fn windowed_mul<T: ScalarMul<S>, S: Scalar>(
+        outerc: usize,
+        window: usize,
+        multiples_of_g: &[Vec<T::MulBase>],
+        scalar: &S,
+    ) -> T {
+        let modulus_size =
+            S::MAX_BIT_SIZE.expect("can only exponentiate with fixed-size scalars") as usize;
+        let (sign, scalar_u64s) = scalar.as_u64s();
+        let scalar_val = BitIteratorLE::new(scalar_u64s).collect_vec();
+
+        let mut res = T::from(multiples_of_g[0][0]);
+        for outer in 0..outerc {
+            let mut inner = 0usize;
+            for i in 0..window {
+                if outer * window + i < modulus_size && scalar_val[outer * window + i] {
+                    inner |= 1 << i;
+                }
+            }
+            res += &multiples_of_g[outer][inner];
+        }
+        if sign.is_negative() {
+            res.neg_in_place();
+        }
+        res
+    }
+
+    // TODO use const-generics for the scalar size and window
+    // TODO use iterators of iterators of T::Affine instead of taking owned Vec
+    pub fn msm<T: ScalarMul<S>, S: Scalar>(
+        scalar_size: usize,
+        window: usize,
+        table: &[Vec<T::MulBase>],
+        v: &[S],
+    ) -> Vec<T> {
+        let outerc = (scalar_size + window - 1) / window;
+        assert!(outerc <= table.len());
+
+        cfg_iter!(v)
+            .map(|e| Self::windowed_mul(outerc, window, table, e))
+            .collect::<Vec<_>>()
+    }
+}
+
+impl FixedBase {
+    pub fn get_window_table_multiplicative<T: ScalarExp<S>, S: Scalar>(
+        scalar_size: usize,
+        window: usize,
+        g: T,
+    ) -> Vec<Vec<T::ExpBase>> {
+        let in_window = 1 << window;
+        let outerc = (scalar_size + window - 1) / window;
+        let last_in_window = 1 << (scalar_size - (outerc - 1) * window);
+
+        let mut multiples_of_g = vec![vec![T::one(); in_window]; outerc];
+
+        let mut g_outer = g;
+        let mut g_outers = Vec::with_capacity(outerc);
+        for _ in 0..outerc {
+            g_outers.push(g_outer);
+            for _ in 0..window {
+                g_outer.square_in_place();
+            }
+        }
+        cfg_iter_mut!(multiples_of_g)
+            .enumerate()
+            .take(outerc)
+            .zip(g_outers)
+            .for_each(|((outer, multiples_of_g), g_outer)| {
+                let cur_in_window = if outer == outerc - 1 {
+                    last_in_window
+                } else {
+                    in_window
+                };
+
+                let mut g_inner = T::one();
+                for inner in multiples_of_g.iter_mut().take(cur_in_window) {
+                    *inner = g_inner;
+                    g_inner *= &g_outer;
+                }
+            });
+        cfg_iter!(multiples_of_g)
+            .map(|s| T::batch_convert_to_exp_base(s))
+            .collect()
+    }
+
+    pub fn windowed_exp<T: ScalarExp<S>, S: Scalar>(
+        outerc: usize,
+        window: usize,
+        powers_of_g: &[Vec<T::ExpBase>],
+        exp: &S,
+    ) -> T {
+        let modulus_size =
+            S::MAX_BIT_SIZE.expect("can only exponentiate with fixed-size scalars") as usize;
+        let (sign, scalar_u64s) = exp.as_u64s();
+        let scalar_val = BitIteratorLE::new(scalar_u64s).collect_vec();
+
+        let mut res = T::from(powers_of_g[0][0]);
+        for outer in 0..outerc {
+            let mut inner = 0usize;
+            for i in 0..window {
+                if outer * window + i < modulus_size && scalar_val[outer * window + i] {
+                    inner |= 1 << i;
+                }
+            }
+            res *= &powers_of_g[outer][inner];
+        }
+        res
+    }
+
+    // TODO use const-generics for the scalar size and window
+    // TODO use iterators of iterators of T::Affine instead of taking owned Vec
+    pub fn mexp<T: ScalarExp<S>, S: Scalar>(
+        exp_size: usize,
+        window: usize,
+        table: &[Vec<T::ExpBase>],
+        v: &[S],
+    ) -> Vec<T> {
+        let outerc = (exp_size + window - 1) / window;
+        assert!(outerc <= table.len());
+
+        cfg_iter!(v)
+            .map(|e| Self::windowed_exp::<T, S>(outerc, window, table, e))
+            .collect::<Vec<_>>()
+    }
+}
diff --git a/algebra-core/src/scalar_mul/mod.rs b/algebra-core/src/scalar_mul/mod.rs
new file mode 100644
index 000000000..97ec6ec29
--- /dev/null
+++ b/algebra-core/src/scalar_mul/mod.rs
@@ -0,0 +1,13 @@
+pub mod wnaf;
+
+pub mod fixed_base;
+// pub mod variable_base;
+
+/// The result of this function is only approximately `ln(a)`
+/// [`Explanation of usage`]
+///
+/// [`Explanation of usage`]: https://github.com/scipr-lab/zexe/issues/79#issue-556220473
+fn ln_without_floats(a: usize) -> usize {
+    // log2(a) * ln(2)
+    (ark_std::log2(a) * 69 / 100) as usize
+}
diff --git a/algebra-core/src/scalar_mul/variable_base/mod.rs b/algebra-core/src/scalar_mul/variable_base/mod.rs
new file mode 100644
index 000000000..597eec1e5
--- /dev/null
+++ b/algebra-core/src/scalar_mul/variable_base/mod.rs
@@ -0,0 +1,296 @@
+use ark_ff::{prelude::*, PrimeField};
+use ark_std::{borrow::Borrow, iterable::Iterable, vec::Vec};
+
+#[cfg(feature = "parallel")]
+use rayon::prelude::*;
+
+pub mod stream_pippenger;
+pub use stream_pippenger::*;
+
+use crate::{biginteger::BigInteger, module::ScalarMul};
+
+use super::ScalarMul;
+
+pub trait VariableBaseMSM<S: ScalarOrExp>: ScalarMul<S> {
+    /// Computes an inner product between the [`PrimeField`] elements in `scalars`
+    /// and the corresponding group elements in `bases`.
+    ///
+    /// If the elements have different length, it will chop the slices to the
+    /// shortest length between `scalars.len()` and `bases.len()`.
+    ///
+    /// Reference: [`VariableBaseMSM::msm`]
+    fn msm_unchecked(bases: &[Self::MulBase], scalars: &[Self::ScalarField]) -> Self {
+        let bigints = cfg_into_iter!(scalars)
+            .map(|s| s.into_bigint())
+            .collect::<Vec<_>>();
+        Self::msm_bigint(bases, &bigints)
+    }
+
+    /// Performs multi-scalar multiplication.
+    ///
+    /// # Warning
+    ///
+    /// This method checks that `bases` and `scalars` have the same length.
+    /// If they are unequal, it returns an error containing
+    /// the shortest length over which the MSM can be performed.
+    fn msm(bases: &[Self::MulBase], scalars: &[Self::ScalarField]) -> Result<Self, usize> {
+        (bases.len() == scalars.len())
+            .then(|| Self::msm_unchecked(bases, scalars))
+            .ok_or(bases.len().min(scalars.len()))
+    }
+
+    /// Optimized implementation of multi-scalar multiplication.
+    /// 
+    /// # Note
+    /// 
+    /// This method *does* not check that that the provide bigintegers are reduced modulo 
+    /// the group order, and instead assumes that this is the case; it is the responsibility of the 
+    /// caller to ensure this.
+    fn msm_bigint(
+        bases: &[Self::MulBase],
+        bigints: &[<Self::ScalarField as PrimeField>::BigInt],
+    ) -> Self {
+        if Self::NEGATION_IS_CHEAP {
+            msm_bigint_wnaf(bases, bigints)
+        } else {
+            msm_bigint(bases, bigints)
+        }
+    }
+
+    /// Streaming multi-scalar multiplication algorithm with hard-coded chunk
+    /// size.
+    fn msm_chunks<I: ?Sized, J>(bases_stream: &J, scalars_stream: &I) -> Self
+    where
+        I: Iterable,
+        I::Item: Borrow<Self::ScalarField>,
+        J: Iterable,
+        J::Item: Borrow<Self::MulBase>,
+    {
+        assert!(scalars_stream.len() <= bases_stream.len());
+
+        // remove offset
+        let bases_init = bases_stream.iter();
+        let mut scalars = scalars_stream.iter();
+
+        // align the streams
+        // TODO: change `skip` to `advance_by` once rust-lang/rust#7774 is fixed.
+        // See <https://github.com/rust-lang/rust/issues/77404>
+        let mut bases = bases_init.skip(bases_stream.len() - scalars_stream.len());
+        let step: usize = 1 << 20;
+        let mut result = Self::zero();
+        for _ in 0..(scalars_stream.len() + step - 1) / step {
+            let bases_step = (&mut bases)
+                .take(step)
+                .map(|b| *b.borrow())
+                .collect::<Vec<_>>();
+            let scalars_step = (&mut scalars)
+                .take(step)
+                .map(|s| s.borrow().into_bigint())
+                .collect::<Vec<_>>();
+            result += Self::msm_bigint(bases_step.as_slice(), scalars_step.as_slice());
+        }
+        result
+    }
+}
+
+// Compute msm using windowed non-adjacent form
+fn msm_bigint_wnaf<V: VariableBaseMSM>(
+    bases: &[V::MulBase],
+    bigints: &[<V::ScalarField as PrimeField>::BigInt],
+) -> V {
+    let size = ark_std::cmp::min(bases.len(), bigints.len());
+    let scalars = &bigints[..size];
+    let bases = &bases[..size];
+
+    let c = if size < 32 {
+        3
+    } else {
+        super::ln_without_floats(size) + 2
+    };
+
+    let num_bits = V::ScalarField::MODULUS_BIT_SIZE as usize;
+    let digits_count = (num_bits + c - 1) / c;
+    let scalar_digits = scalars
+        .iter()
+        .flat_map(|s| make_digits(s, c, num_bits))
+        .collect::<Vec<_>>();
+    let zero = V::zero();
+    let window_sums: Vec<_> = ark_std::cfg_into_iter!(0..digits_count)
+        .map(|i| {
+            let mut buckets = vec![zero; 1 << c];
+            for (digits, base) in scalar_digits.chunks(digits_count).zip(bases) {
+                use ark_std::cmp::Ordering;
+                // digits is the digits thing of the first scalar?
+                let scalar = digits[i];
+                match 0.cmp(&scalar) {
+                    Ordering::Less => buckets[(scalar - 1) as usize] += base,
+                    Ordering::Greater => buckets[(-scalar - 1) as usize] -= base,
+                    Ordering::Equal => (),
+                }
+            }
+
+            let mut running_sum = V::zero();
+            let mut res = V::zero();
+            buckets.into_iter().rev().for_each(|b| {
+                running_sum += &b;
+                res += &running_sum;
+            });
+            res
+        })
+        .collect();
+
+    // We store the sum for the lowest window.
+    let lowest = *window_sums.first().unwrap();
+
+    // We're traversing windows from high to low.
+    lowest
+        + &window_sums[1..]
+            .iter()
+            .rev()
+            .fold(zero, |mut total, sum_i| {
+                total += sum_i;
+                for _ in 0..c {
+                    total.double_in_place();
+                }
+                total
+            })
+}
+
+/// Optimized implementation of multi-scalar multiplication.
+fn msm_bigint<V: VariableBaseMSM>(
+    bases: &[V::MulBase],
+    bigints: &[<V::ScalarField as PrimeField>::BigInt],
+) -> V {
+    let size = ark_std::cmp::min(bases.len(), bigints.len());
+    let scalars = &bigints[..size];
+    let bases = &bases[..size];
+    let scalars_and_bases_iter = scalars.iter().zip(bases).filter(|(s, _)| !s.is_zero());
+
+    let c = if size < 32 {
+        3
+    } else {
+        super::ln_without_floats(size) + 2
+    };
+
+    let num_bits = V::ScalarField::MODULUS_BIT_SIZE as usize;
+    let one = V::ScalarField::one().into_bigint();
+
+    let zero = V::zero();
+    let window_starts: Vec<_> = (0..num_bits).step_by(c).collect();
+
+    // Each window is of size `c`.
+    // We divide up the bits 0..num_bits into windows of size `c`, and
+    // in parallel process each such window.
+    let window_sums: Vec<_> = ark_std::cfg_into_iter!(window_starts)
+        .map(|w_start| {
+            let mut res = zero;
+            // We don't need the "zero" bucket, so we only have 2^c - 1 buckets.
+            let mut buckets = vec![zero; (1 << c) - 1];
+            // This clone is cheap, because the iterator contains just a
+            // pointer and an index into the original vectors.
+            scalars_and_bases_iter.clone().for_each(|(&scalar, base)| {
+                if scalar == one {
+                    // We only process unit scalars once in the first window.
+                    if w_start == 0 {
+                        res += base;
+                    }
+                } else {
+                    let mut scalar = scalar;
+
+                    // We right-shift by w_start, thus getting rid of the
+                    // lower bits.
+                    scalar.divn(w_start as u32);
+
+                    // We mod the remaining bits by 2^{window size}, thus taking `c` bits.
+                    let scalar = scalar.as_ref()[0] % (1 << c);
+
+                    // If the scalar is non-zero, we update the corresponding
+                    // bucket.
+                    // (Recall that `buckets` doesn't have a zero bucket.)
+                    if scalar != 0 {
+                        buckets[(scalar - 1) as usize] += base;
+                    }
+                }
+            });
+
+            // Compute sum_{i in 0..num_buckets} (sum_{j in i..num_buckets} bucket[j])
+            // This is computed below for b buckets, using 2b curve additions.
+            //
+            // We could first normalize `buckets` and then use mixed-addition
+            // here, but that's slower for the kinds of groups we care about
+            // (Short Weierstrass curves and Twisted Edwards curves).
+            // In the case of Short Weierstrass curves,
+            // mixed addition saves ~4 field multiplications per addition.
+            // However normalization (with the inversion batched) takes ~6
+            // field multiplications per element,
+            // hence batch normalization is a slowdown.
+
+            // `running_sum` = sum_{j in i..num_buckets} bucket[j],
+            // where we iterate backward from i = num_buckets to 0.
+            let mut running_sum = V::zero();
+            buckets.into_iter().rev().for_each(|b| {
+                running_sum += &b;
+                res += &running_sum;
+            });
+            res
+        })
+        .collect();
+
+    // We store the sum for the lowest window.
+    let lowest = *window_sums.first().unwrap();
+
+    // We're traversing windows from high to low.
+    lowest
+        + &window_sums[1..]
+            .iter()
+            .rev()
+            .fold(zero, |mut total, sum_i| {
+                total += sum_i;
+                for _ in 0..c {
+                    total.double_in_place();
+                }
+                total
+            })
+}
+
+// From: https://github.com/arkworks-rs/gemini/blob/main/src/kzg/msm/variable_base.rs#L20
+fn make_digits(a: &impl BigInteger, w: usize, num_bits: usize) -> Vec<i64> {
+    let scalar = a.as_ref();
+    let radix: u64 = 1 << w;
+    let window_mask: u64 = radix - 1;
+
+    let mut carry = 0u64;
+    let num_bits = if num_bits == 0 {
+        a.num_bits() as usize
+    } else {
+        num_bits
+    };
+    let digits_count = (num_bits + w - 1) / w;
+    let mut digits = vec![0i64; digits_count];
+    for (i, digit) in digits.iter_mut().enumerate() {
+        // Construct a buffer of bits of the scalar, starting at `bit_offset`.
+        let bit_offset = i * w;
+        let u64_idx = bit_offset / 64;
+        let bit_idx = bit_offset % 64;
+        // Read the bits from the scalar
+        let bit_buf = if bit_idx < 64 - w || u64_idx == scalar.len() - 1 {
+            // This window's bits are contained in a single u64,
+            // or it's the last u64 anyway.
+            scalar[u64_idx] >> bit_idx
+        } else {
+            // Combine the current u64's bits with the bits from the next u64
+            (scalar[u64_idx] >> bit_idx) | (scalar[1 + u64_idx] << (64 - bit_idx))
+        };
+
+        // Read the actual coefficient value from the window
+        let coef = carry + (bit_buf & window_mask); // coef = [0, 2^r)
+
+        // Recenter coefficients from [0,2^w) to [-2^w/2, 2^w/2)
+        carry = (coef + radix / 2) >> w;
+        *digit = (coef as i64) - (carry << w) as i64;
+    }
+
+    digits[digits_count - 1] += (carry << w) as i64;
+
+    digits
+}
diff --git a/algebra-core/src/scalar_mul/variable_base/stream_pippenger.rs b/algebra-core/src/scalar_mul/variable_base/stream_pippenger.rs
new file mode 100644
index 000000000..dc62ba4cb
--- /dev/null
+++ b/algebra-core/src/scalar_mul/variable_base/stream_pippenger.rs
@@ -0,0 +1,125 @@
+//! A space-efficient implementation of Pippenger's algorithm.
+use ark_ff::{PrimeField, Zero};
+
+use ark_std::{borrow::Borrow, vec::Vec};
+use hashbrown::HashMap;
+
+use super::VariableBaseMSM;
+
+/// Struct for the chunked Pippenger algorithm.
+pub struct ChunkedPippenger<G: VariableBaseMSM> {
+    scalars_buffer: Vec<<G::ScalarField as PrimeField>::BigInt>,
+    bases_buffer: Vec<G::MulBase>,
+    result: G,
+    buf_size: usize,
+}
+
+impl<G: VariableBaseMSM> ChunkedPippenger<G> {
+    /// Initialize a chunked Pippenger instance with default parameters.
+    pub fn new(max_msm_buffer: usize) -> Self {
+        Self {
+            scalars_buffer: Vec::with_capacity(max_msm_buffer),
+            bases_buffer: Vec::with_capacity(max_msm_buffer),
+            result: G::zero(),
+            buf_size: max_msm_buffer,
+        }
+    }
+
+    /// Initialize a chunked Pippenger instance with the given buffer size.
+    pub fn with_size(buf_size: usize) -> Self {
+        Self {
+            scalars_buffer: Vec::with_capacity(buf_size),
+            bases_buffer: Vec::with_capacity(buf_size),
+            result: G::zero(),
+            buf_size,
+        }
+    }
+
+    /// Add a new (base, scalar) pair into the instance.
+    #[inline(always)]
+    pub fn add<B, S>(&mut self, base: B, scalar: S)
+    where
+        B: Borrow<G::MulBase>,
+        S: Borrow<<G::ScalarField as PrimeField>::BigInt>,
+    {
+        self.scalars_buffer.push(*scalar.borrow());
+        self.bases_buffer.push(*base.borrow());
+        if self.scalars_buffer.len() == self.buf_size {
+            self.result.add_assign(G::msm_bigint(
+                self.bases_buffer.as_slice(),
+                self.scalars_buffer.as_slice(),
+            ));
+            self.scalars_buffer.clear();
+            self.bases_buffer.clear();
+        }
+    }
+
+    /// Output the final Pippenger algorithm result.
+    #[inline(always)]
+    pub fn finalize(mut self) -> G {
+        if !self.scalars_buffer.is_empty() {
+            self.result +=
+                G::msm_bigint(self.bases_buffer.as_slice(), self.scalars_buffer.as_slice());
+        }
+        self.result
+    }
+}
+
+/// Hash map struct for Pippenger algorithm.
+pub struct HashMapPippenger<G: VariableBaseMSM> {
+    buffer: HashMap<G::MulBase, G::ScalarField>,
+    result: G,
+    buf_size: usize,
+}
+
+impl<G: VariableBaseMSM> HashMapPippenger<G> {
+    /// Produce a new hash map with the maximum msm buffer size.
+    pub fn new(max_msm_buffer: usize) -> Self {
+        Self {
+            buffer: HashMap::with_capacity(max_msm_buffer),
+            result: G::zero(),
+            buf_size: max_msm_buffer,
+        }
+    }
+
+    /// Add a new (base, scalar) pair into the hash map.
+    #[inline(always)]
+    pub fn add<B, S>(&mut self, base: B, scalar: S)
+    where
+        B: Borrow<G::MulBase>,
+        S: Borrow<G::ScalarField>,
+    {
+        // update the entry, guarding the possibility that it has been already set.
+        let entry = self
+            .buffer
+            .entry(*base.borrow())
+            .or_insert(G::ScalarField::zero());
+        *entry += *scalar.borrow();
+        if self.buffer.len() == self.buf_size {
+            let bases = self.buffer.keys().cloned().collect::<Vec<_>>();
+            let scalars = self
+                .buffer
+                .values()
+                .map(|s| s.into_bigint())
+                .collect::<Vec<_>>();
+            self.result += G::msm_bigint(&bases, &scalars);
+            self.buffer.clear();
+        }
+    }
+
+    /// Update the final result with (base, scalar) pairs in the hash map.
+    #[inline(always)]
+    pub fn finalize(mut self) -> G {
+        if !self.buffer.is_empty() {
+            let bases = self.buffer.keys().cloned().collect::<Vec<_>>();
+            let scalars = self
+                .buffer
+                .values()
+                .map(|s| s.into_bigint())
+                .collect::<Vec<_>>();
+
+            self.result += G::msm_bigint(&bases, &scalars);
+        }
+        self.result
+    }
+}
diff --git a/algebra-core/src/scalar_mul/wnaf.rs b/algebra-core/src/scalar_mul/wnaf.rs
new file mode 100644
index 000000000..b35cf5c89
--- /dev/null
+++ b/algebra-core/src/scalar_mul/wnaf.rs
@@ -0,0 +1,87 @@
+use crate::{
+    module::{Scalar, ScalarMul},
+    AdditiveGroup,
+};
+use ark_std::vec::Vec;
+
+/// A helper type that contains all the context required for computing
+/// a window NAF multiplication of a group element by a scalar.
+pub struct WnafContext {
+    pub window_size: usize,
+}
+
+impl WnafContext {
+    /// Constructs a new context for a window of size `window_size`.
+    ///
+    /// # Panics
+    ///
+    /// This function will panic if not `2 <= window_size < 64`
+    pub fn new(window_size: usize) -> Self {
+        assert!(window_size >= 2);
+        assert!(window_size < 64);
+        Self { window_size }
+    }
+}
+
+impl WnafContext {
+    pub fn additive_table<G: AdditiveGroup>(&self, mut base: G) -> Vec<G> {
+        let mut table = Vec::with_capacity(1 << (self.window_size - 1));
+        let dbl = base.double();
+
+        for _ in 0..(1 << (self.window_size - 1)) {
+            table.push(base);
+            base += &dbl;
+        }
+        table
+    }
+
+    /// Computes scalar multiplication of a group element `g` by `scalar`.
+    ///
+    /// This method uses the wNAF algorithm to perform the scalar
+    /// multiplication; first, it uses `Self::table` to calculate an
+    /// appropriate table of multiples of `g`, and then uses the wNAF
+    /// algorithm to compute the scalar multiple.
+    pub fn mul<G: ScalarMul<S>, S: Scalar>(&self, g: G, scalar: &S) -> G {
+        let table = self.additive_table(g);
+        self.mul_with_table(&table, scalar).unwrap()
+    }
+
+    /// Computes scalar multiplication of a group element by `scalar`.
+    /// `base_table` holds precomputed multiples of the group element; it can be
+    /// generated using `Self::table`. `scalar` is an element of
+    /// `G::ScalarField`.
+    ///
+    /// Returns `None` if the table is too small.
+    pub fn mul_with_table<G: ScalarMul<S>, S: Scalar>(
+        &self,
+        base_table: &[G],
+        scalar: &S,
+    ) -> Option<G> {
+        if 1 << (self.window_size - 1) > base_table.len() {
+            return None;
+        }
+        let scalar_wnaf = scalar.into_bigint().find_wnaf(self.window_size).unwrap();
+
+        let mut result = G::zero();
+
+        let mut found_non_zero = false;
+
+        for n in scalar_wnaf.iter().rev() {
+            if found_non_zero {
+                result.double_in_place();
+            }
+
+            if *n != 0 {
+                found_non_zero = true;
+
+                if *n > 0 {
+                    result += &base_table[(n / 2) as usize];
+                } else {
+                    result -= &base_table[((-n) / 2) as usize];
+                }
+            }
+        }
+
+        Some(result)
+    }
+}
diff --git a/ff-macros/Cargo.toml b/common-macros/Cargo.toml
similarity index 96%
rename from ff-macros/Cargo.toml
rename to common-macros/Cargo.toml
index 7aef85931..f562fc342 100644
--- a/ff-macros/Cargo.toml
+++ b/common-macros/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "ark-ff-macros"
+name = "ark-algebra-macros"
 version = "0.4.2"
 authors = [ "arkworks contributors" ]
 description = "A library for generating x86-64 assembly for finite field multiplication"
diff --git a/ff-macros/LICENSE-APACHE b/common-macros/LICENSE-APACHE
similarity index 100%
rename from ff-macros/LICENSE-APACHE
rename to common-macros/LICENSE-APACHE
diff --git a/ff-macros/LICENSE-MIT b/common-macros/LICENSE-MIT
similarity index 100%
rename from ff-macros/LICENSE-MIT
rename to common-macros/LICENSE-MIT
diff --git a/ff-macros/src/lib.rs b/common-macros/src/lib.rs
similarity index 100%
rename from ff-macros/src/lib.rs
rename to common-macros/src/lib.rs
diff --git a/ff-macros/src/montgomery/add.rs b/common-macros/src/montgomery/add.rs
similarity index 100%
rename from ff-macros/src/montgomery/add.rs
rename to common-macros/src/montgomery/add.rs
diff --git a/ff-macros/src/montgomery/biginteger.rs b/common-macros/src/montgomery/biginteger.rs
similarity index 100%
rename from ff-macros/src/montgomery/biginteger.rs
rename to common-macros/src/montgomery/biginteger.rs
diff --git a/ff-macros/src/montgomery/double.rs b/common-macros/src/montgomery/double.rs
similarity index 100%
rename from ff-macros/src/montgomery/double.rs
rename to common-macros/src/montgomery/double.rs
diff --git a/ff-macros/src/montgomery/mod.rs b/common-macros/src/montgomery/mod.rs
similarity index 100%
rename from ff-macros/src/montgomery/mod.rs
rename to common-macros/src/montgomery/mod.rs
diff --git a/ff-macros/src/montgomery/mul.rs b/common-macros/src/montgomery/mul.rs
similarity index 100%
rename from ff-macros/src/montgomery/mul.rs
rename to common-macros/src/montgomery/mul.rs
diff --git a/ff-macros/src/montgomery/square.rs b/common-macros/src/montgomery/square.rs
similarity index 100%
rename from ff-macros/src/montgomery/square.rs
rename to common-macros/src/montgomery/square.rs
diff --git a/ff-macros/src/montgomery/sum_of_products.rs b/common-macros/src/montgomery/sum_of_products.rs
similarity index 100%
rename from ff-macros/src/montgomery/sum_of_products.rs
rename to common-macros/src/montgomery/sum_of_products.rs
diff --git a/ff-macros/src/unroll.rs b/common-macros/src/unroll.rs
similarity index 100%
rename from ff-macros/src/unroll.rs
rename to common-macros/src/unroll.rs
diff --git a/ff-macros/src/utils.rs b/common-macros/src/utils.rs
similarity index 100%
rename from ff-macros/src/utils.rs
rename to common-macros/src/utils.rs
diff --git a/ff/Cargo.toml b/ff/Cargo.toml
index 3336c15f4..40da270e2 100644
--- a/ff/Cargo.toml
+++ b/ff/Cargo.toml
@@ -14,9 +14,9 @@ edition = "2021"
 rust-version = "1.63"
 
 [dependencies]
-ark-group = { version = "0.4.2", path = "../group" }
+ark-algebra-core = { version = "0.4.2", path = "../algebra-core" }
 ark-ff-asm = { version = "0.4.2", path = "../ff-asm" }
-ark-ff-macros = { version = "0.4.2", path = "../ff-macros" }
+ark-algebra-macros = { version = "0.4.2", path = "../common-macros" }
 ark-std = { version = "0.4.0", default-features = false }
 ark-serialize = { version = "0.4.2", path = "../serialize", default-features = false }
 derivative = { version = "2", features = ["use_core"] }
diff --git a/ff/src/lib.rs b/ff/src/lib.rs
index fce601ab8..eaff71b14 100644
--- a/ff/src/lib.rs
+++ b/ff/src/lib.rs
@@ -16,9 +16,8 @@ extern crate ark_std;
 #[macro_use]
 extern crate derivative;
 
-#[macro_use]
-pub mod biginteger;
-pub use biginteger::{
+pub use ark_algebra_core::biginteger;
+pub use ark_algebra_core::biginteger::{
     signed_mod_reduction, BigInt, BigInteger, BigInteger128, BigInteger256, BigInteger320,
     BigInteger384, BigInteger448, BigInteger64, BigInteger768, BigInteger832,
 };
diff --git a/group/src/lib.rs b/group/src/lib.rs
deleted file mode 100644
index a8be36bf5..000000000
--- a/group/src/lib.rs
+++ /dev/null
@@ -1,70 +0,0 @@
-use ark_std::{ops::{Add, AddAssign, MulAssign, SubAssign, Sub, Mul, Neg}, fmt::{Display, Debug}, hash::Hash};
-use ark_std::UniformRand;
-
-use ark_serialize::{CanonicalSerialize, CanonicalDeserialize};
-use num_traits::Zero;
-use zeroize::Zeroize;
-
-pub trait AdditiveGroup:
-    Eq
-    + 'static
-    + Sized
-    + CanonicalSerialize
-    + CanonicalDeserialize
-    + Copy
-    + Clone
-    + Default
-    + Send
-    + Sync
-    + Hash
-    + Debug
-    + Display
-    + UniformRand
-    + Zeroize
-    + Zero
-    + Neg<Output = Self>
-    + Add<Self, Output = Self>
-    + Sub<Self, Output = Self>
-    + Mul<<Self as AdditiveGroup>::Scalar, Output = Self>
-    + AddAssign<Self>
-    + SubAssign<Self>
-    + MulAssign<<Self as AdditiveGroup>::Scalar>
-    + for<'a> Add<&'a Self, Output = Self>
-    + for<'a> Sub<&'a Self, Output = Self>
-    + for<'a> Mul<&'a <Self as AdditiveGroup>::Scalar, Output = Self>
-    + for<'a> AddAssign<&'a Self>
-    + for<'a> SubAssign<&'a Self>
-    + for<'a> MulAssign<&'a <Self as AdditiveGroup>::Scalar>
-    + for<'a> Add<&'a mut Self, Output = Self>
-    + for<'a> Sub<&'a mut Self, Output = Self>
-    + for<'a> Mul<&'a mut <Self as AdditiveGroup>::Scalar, Output = Self>
-    + for<'a> AddAssign<&'a mut Self>
-    + for<'a> SubAssign<&'a mut Self>
-    + for<'a> MulAssign<&'a mut <Self as AdditiveGroup>::Scalar>
-    + ark_std::iter::Sum<Self>
-    + for<'a> ark_std::iter::Sum<&'a Self>
-{
-    type Scalar;
-
-    /// The additive identity of the field.
-    const ZERO: Self;
-
-    /// Doubles `self`.
-    #[must_use]
-    fn double(&self) -> Self {
-        let mut copy = *self;
-        copy.double_in_place();
-        copy
-    }
-    /// Doubles `self` in place.
-    fn double_in_place(&mut self) -> &mut Self {
-        self.add_assign(*self);
-        self
-    }
-
-    /// Negates `self` in place.
-    fn neg_in_place(&mut self) -> &mut Self {
-        *self = -(*self);
-        self
-    }
-}

From 04b410656d1fdc7e93117900edcf0c977efe1f2e Mon Sep 17 00:00:00 2001
From: Pratyush Mishra <pratyushmishra@berkeley.edu>
Date: Thu, 3 Aug 2023 12:57:37 -0400
Subject: [PATCH 3/3] Work

---
 algebra-core/src/group/mod.rs       |  5 +-
 algebra-core/src/module/mod.rs      |  2 +-
 algebra-core/src/module/scalar.rs   | 64 ++++++++++++++++++++++++
 algebra-core/src/scalar_mul/wnaf.rs | 75 +++++++++++++++++++++++++++--
 ff/src/fields/utils.rs              | 14 ------
 5 files changed, 140 insertions(+), 20 deletions(-)

diff --git a/algebra-core/src/group/mod.rs b/algebra-core/src/group/mod.rs
index 9e7c799a7..03222a812 100644
--- a/algebra-core/src/group/mod.rs
+++ b/algebra-core/src/group/mod.rs
@@ -139,9 +139,10 @@ pub trait MultiplicativeGroup:
         invert_and_mul_batch(v, &Self::one());
     }
 
-    fn invert_batch(v: &[Self]) {
+    fn invert_batch(v: &[Self]) -> Vec<Self> {
         let mut v = v.to_vec();
-        Self::invert_batch(&mut v);
+        Self::invert_batch_in_place(&mut v);
+        v
     }
 }
 
diff --git a/algebra-core/src/module/mod.rs b/algebra-core/src/module/mod.rs
index c377a2035..8c80ea7c0 100644
--- a/algebra-core/src/module/mod.rs
+++ b/algebra-core/src/module/mod.rs
@@ -102,7 +102,7 @@ pub trait ScalarExp<Exponent: Sc>:
     fn pow_exp_base(base: &Self::ExpBase, exp: Exponent) -> Self {
         let mut res = Self::one();
         let (sign, exp) = exp.as_u64s();
-        for (i, bit) in crate::bits::BitIteratorBE::without_leading_zeros(exp).enumerate() {
+        for bit in crate::bits::BitIteratorBE::without_leading_zeros(exp) {
             res.square_in_place();
             if bit {
                 res *= base;
diff --git a/algebra-core/src/module/scalar.rs b/algebra-core/src/module/scalar.rs
index 4077a4481..235b1f4c3 100644
--- a/algebra-core/src/module/scalar.rs
+++ b/algebra-core/src/module/scalar.rs
@@ -1,5 +1,7 @@
 use ark_std::fmt::Debug;
 
+use crate::biginteger::{signed_mod_reduction, arithmetic::{sbb_for_sub_with_borrow, adc_for_add_with_carry}};
+
 #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
 pub enum Sign {
     Negative = -1,
@@ -25,6 +27,68 @@ pub trait Scalar: Send + Sync + Copy + Debug {
     fn as_bytes(&self) -> (Sign, Self::U8Ref);
 
     fn as_u64s(&self) -> (Sign, Self::U64Ref);
+
+    /// Returns the windowed non-adjacent form of `self`, for a window of size `w`.
+    fn find_wnaf(&self, w: usize) -> Option<Vec<i64>> {
+        // w > 2 due to definition of wNAF, and w < 64 to make sure that `i64`
+        // can fit each signed digit
+        if (2..64).contains(&w) {
+            let mut res = vec![];
+            let mut e = self.as_u64s().1.as_ref().to_vec();
+
+            while !is_zero(&e) {
+                let z: i64;
+                if is_odd(&e) {
+                    z = signed_mod_reduction(e[0], 1 << w);
+                    if z >= 0 {
+                        sub_with_borrow(&mut e, z as u64);
+                    } else {
+                        add_with_carry(&mut e,  (-z) as u64);
+                    }
+                } else {
+                    z = 0;
+                }
+                res.push(z);
+                div2(&mut e);
+            }
+
+            Some(res)
+        } else {
+            None
+        }
+    }
+}
+
+fn is_zero(a: &[u64]) -> bool {
+    a.iter().all(|x| *x == 0)
+}
+
+fn is_odd(a: &[u64]) -> bool {
+    a[0] % 2 == 1
+}
+
+fn sub_with_borrow(a: &mut [u64], b: u64) {
+    let mut borrow = sbb_for_sub_with_borrow(&mut a[0], b, 0);
+    for a in &mut a[1..] {
+        borrow = sbb_for_sub_with_borrow(a, 0, borrow);
+    }
+}
+
+fn add_with_carry(a: &mut [u64], b: u64) {
+    let mut carry = adc_for_add_with_carry(&mut a[0], b, 0);
+    for a in &mut a[1..] {
+        carry = adc_for_add_with_carry(a, 0, carry);
+    }
+}
+
+fn div2(a: &mut [u64]) {
+    let mut t = 0;
+    for a in a.iter_mut().rev() {
+        let t2 = *a << 63;
+        *a >>= 1;
+        *a |= t;
+        t = t2;
+    }
 }
 
 macro_rules! impl_scalar_unsigned {
diff --git a/algebra-core/src/scalar_mul/wnaf.rs b/algebra-core/src/scalar_mul/wnaf.rs
index b35cf5c89..d238f3fcb 100644
--- a/algebra-core/src/scalar_mul/wnaf.rs
+++ b/algebra-core/src/scalar_mul/wnaf.rs
@@ -1,6 +1,6 @@
 use crate::{
-    module::{Scalar, ScalarMul},
-    AdditiveGroup,
+    module::{Scalar, ScalarMul, ScalarExp},
+    AdditiveGroup, MultiplicativeGroup,
 };
 use ark_std::vec::Vec;
 
@@ -60,7 +60,7 @@ impl WnafContext {
         if 1 << (self.window_size - 1) > base_table.len() {
             return None;
         }
-        let scalar_wnaf = scalar.into_bigint().find_wnaf(self.window_size).unwrap();
+        let scalar_wnaf = scalar.find_wnaf(self.window_size).unwrap();
 
         let mut result = G::zero();
 
@@ -85,3 +85,72 @@ impl WnafContext {
         Some(result)
     }
 }
+
+
+impl WnafContext {
+    pub fn multiplicative_table<G: MultiplicativeGroup>(&self, mut base: G) -> Vec<G> {
+        let mut table = Vec::with_capacity(1 << (self.window_size - 1));
+        let sqr = base.square();
+
+        for _ in 0..(1 << (self.window_size - 1)) {
+            table.push(base);
+            base *= &sqr;
+        }
+        table
+    }
+
+    /// Computes scalar multiplication of a group element `g` by `scalar`.
+    ///
+    /// This method uses the wNAF algorithm to perform the scalar
+    /// multiplication; first, it uses `Self::table` to calculate an
+    /// appropriate table of multiples of `g`, and then uses the wNAF
+    /// algorithm to compute the scalar multiple.
+    pub fn exp<G: ScalarExp<S>, S: Scalar>(&self, g: G, scalar: &S) -> G {
+        let table = self.multiplicative_table(g);
+        self.exp_with_table(&table, scalar).unwrap()
+    }
+
+    /// Computes scalar multiplication of a group element by `scalar`.
+    /// `base_table` holds precomputed multiples of the group element; it can be
+    /// generated using `Self::table`. `scalar` is an element of
+    /// `G::ScalarField`.
+    ///
+    /// Returns `None` if the table is too small.
+    pub fn exp_with_table<G: ScalarExp<S>, S: Scalar>(
+        &self,
+        base_table: &[G],
+        scalar: &S,
+    ) -> Option<G> {
+        if 1 << (self.window_size - 1) > base_table.len() {
+            return None;
+        }
+        let scalar_wnaf = scalar.find_wnaf(self.window_size).unwrap();
+        let inv_table = if G::INVERSION_IS_FAST {
+            vec![]
+        } else {
+            G::invert_batch(base_table)
+        };
+
+        let mut result = G::one();
+
+        let mut found_non_zero = false;
+
+        for n in scalar_wnaf.iter().rev() {
+            if found_non_zero {
+                result.square_in_place();
+            }
+
+            if *n != 0 {
+                found_non_zero = true;
+
+                if *n > 0 {
+                    result *= &base_table[(n / 2) as usize];
+                } else {
+                    result *= &inv_table[(n / 2) as usize];
+                }
+            }
+        }
+
+        Some(result)
+    }
+}
\ No newline at end of file
diff --git a/ff/src/fields/utils.rs b/ff/src/fields/utils.rs
index e670317b3..e69de29bb 100644
--- a/ff/src/fields/utils.rs
+++ b/ff/src/fields/utils.rs
@@ -1,14 +0,0 @@
-/// Calculates the k-adicity of n, i.e., the number of trailing 0s in a base-k
-/// representation.
-pub fn k_adicity(k: u64, mut n: u64) -> u32 {
-    let mut r = 0;
-    while n > 1 {
-        if n % k == 0 {
-            r += 1;
-            n /= k;
-        } else {
-            return r;
-        }
-    }
-    r
-}