Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds PresenceBitmap #895

Merged
merged 3 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions src/main/java/com/amazon/ion/impl/bin/FixedInt.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl.bin

import java.lang.Long.numberOfLeadingZeros

/**
* Functions for encoding FixedInts and FixedUInts.
*
* Expected usage is calling one of the `___length` functions, and then using the result as the input for
* [writeFixedIntOrUIntInto]. The length and write functions are separate so that callers can make decisions or
* compute other values based on the encoded size of the value.
*/
object FixedInt {

/**
* Writes a FixedInt or FixedUInt encoding of [value] into [data] starting at [offset].
* Use [fixedIntLength] or [fixedUIntLength] to get the value for the [numBytes] parameter.
*/
@JvmStatic
inline fun writeFixedIntOrUIntInto(data: ByteArray, offset: Int, value: Long, numBytes: Int) {
when (numBytes) {
1 -> data[offset] = value.toByte()
2 -> {
data[offset] = value.toByte()
data[offset + 1] = (value shr 8).toByte()
}
3 -> {
data[offset] = value.toByte()
data[offset + 1] = (value shr 8).toByte()
data[offset + 2] = (value shr 16).toByte()
}
4 -> {
data[offset] = value.toByte()
data[offset + 1] = (value shr 8).toByte()
data[offset + 2] = (value shr 16).toByte()
data[offset + 3] = (value shr 24).toByte()
}
else -> {
for (i in 0 until numBytes) {
data[offset + i] = (value shr 8 * i).toByte()
}
}
}
}

/** Determine the length of FixedUInt for the provided value. */
@JvmStatic
fun fixedUIntLength(value: Long): Int {
val numLeadingZeros = numberOfLeadingZeros(value)
val numMagnitudeBitsRequired = 64 - numLeadingZeros
return (numMagnitudeBitsRequired - 1) / 8 + 1
}

/** Determine the length of FixedInt for the provided value. */
@JvmStatic
fun fixedIntLength(value: Long): Int {
val numMagnitudeBitsRequired: Int
if (value < 0) {
val numLeadingOnes = numberOfLeadingZeros(value.inv())
numMagnitudeBitsRequired = 64 - numLeadingOnes
} else {
val numLeadingZeros = numberOfLeadingZeros(value)
numMagnitudeBitsRequired = 64 - numLeadingZeros
}
return numMagnitudeBitsRequired / 8 + 1
}
}
2 changes: 1 addition & 1 deletion src/main/java/com/amazon/ion/impl/bin/IonEncoder_1_1.java
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ public static int writeIntValue(WriteBuffer buffer, final long value) {
buffer.writeByte(OpCodes.INTEGER_ZERO_LENGTH);
return 1;
}
int length = WriteBuffer.fixedIntLength(value);
int length = FixedInt.fixedIntLength(value);
buffer.writeByte((byte) (OpCodes.INTEGER_ZERO_LENGTH + length));
buffer.writeFixedInt(value);
return 1 + length;
Expand Down
236 changes: 236 additions & 0 deletions src/main/java/com/amazon/ion/impl/bin/PresenceBitmap.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl.bin

import com.amazon.ion.*
import com.amazon.ion.impl.macro.*
import com.amazon.ion.impl.macro.Macro.*

/**
* Utility class for setting, storing, reading, and writing presence bits.
*
* This class provides an API that maps 1:1 with parameters, with a maximum of 128 parameters.
*
* ### Usage – Binary Writer
* When stepping into an E-Expression, obtain a [PresenceBitmap] instance, [initialize] using the macro signature, and
* then reserve the correct number of bytes (see [byteSize]) to later encode the presence bits.
* While in the E-Expression, track the number of expressions or expression groups that have been written with that
* E-Expression as the immediate parent—this is the _parameter_ index. For each expression or expression group that is
* written directly in that container, call [PresenceBitmap.set] with the _parameter_ index and one of [VOID],
* [EXPRESSION], or [GROUP]. To omit an argument, callers to the binary writer will need to write an empty expression
* group (which should be elided and the corresponding presence bits set to `00`) or the binary writer must expose a
* `writeNoExpression()` method or similar.
* When stepping out of the E-Expression, use [PresenceBitmap.writeTo] to encode them into the appropriate location.
*
* ### Usage – Binary Reader
* When stepping into an E-Expression, obtain a [PresenceBitmap] instance, [initialize] using the macro signature, ensure
* that [byteSize] number of bytes is available in the reader's buffer, and call [readFrom] to populate the
* [PresenceBitmap] instance. Then, the presence bits for each parameter can be accessed by its _parameter_ index.
*
* ### Implementation Notes
*
* - We pretend that all parameters (including `!` (required) parameter) will get presence bits, and when reading we
* set the bits for the positions of the `!` parameters to `01` (single expression).
* - Since all the parameter cardinalities (other than `!`) use the same presence bit semantics, the writer doesn't
* need to inspect the signature to figure out what bits to put in our presence bits buffer.
* - Because we have dummy bits for `!` parameters, [PresenceBits] can present an API that corresponds 1:1 with
* parameters, so we don't need to separately keep track of a presence bit index and the parameter count.
* - Why longs instead of an array?
* - An array would add another level of indirection
* - An array would require a loop in order to reset all the bytes to zero.
* - Why only 128 parameters?
* - Until proven otherwise, we should not assume that an arbitrarily large number of parameters MUST be supported.
* - The number of parameters could be increased (within limits). It seems reasonable to try to keep this class small
* enough to fit in a single cache line for a modern system—typically 64 bytes.
*
* TODO: Consider whether we can "compile" a specific function that can read the presence bits when we compile a macro.
* That _might_ be more efficient than this approach.
*/
internal class PresenceBitmap {

companion object {
const val VOID = 0b00L
const val EXPRESSION = 0b01L
const val GROUP = 0b10L
const val RESERVED = 0b11L

private const val TWO_BIT_MASK = 0b11L
private const val PRESENCE_BITS_SIZE_THRESHOLD = 2
private const val PB_SLOTS_PER_BYTE = 4
private const val PB_SLOTS_PER_LONG = 32
private const val PB_BITS_PER_SLOT = 2

const val MAX_SUPPORTED_PARAMETERS = PB_SLOTS_PER_LONG * 4
}

private var signature: List<Parameter> = emptyList()

/** The number of parameters for which presence bits must be written. */
private var size: Int = 0

/** The total number of parameters in the macro signature */
val totalParameterCount: Int
get() = signature.size

/** The first 32 presence bits slots */
private var a: Long = 0
/** The second 32 presence bits slots */
private var b: Long = 0
/** The third 32 presence bits slots */
private var c: Long = 0
/** The fourth 32 presence bits slots */
private var d: Long = 0

/** The number of bytes required to encode this [PresenceBitmap] */
val byteSize: Int
get() = size divideByRoundingUp PB_SLOTS_PER_BYTE

/** Resets this [PresenceBitmap] for the given [macro]. */
fun initialize(signature: List<Parameter>) {
if (signature.size > MAX_SUPPORTED_PARAMETERS) throw IonException("Macros with more than 128 parameters are not supported by this implementation.")
this.signature = signature
a = 0
b = 0
c = 0
d = 0
// TODO – performance: consider calculating this once for a macro when it is compiled
// Calculate the actual number of presence bits that will be encoded for the given signature.
val nonRequiredParametersCount = signature.count { it.cardinality != ParameterCardinality.One }
val usePresenceBits = nonRequiredParametersCount > PRESENCE_BITS_SIZE_THRESHOLD || signature.any { it.type.isTagless }
size = if (usePresenceBits) nonRequiredParametersCount else 0
}

/**
* Checks that all presence bits are valid for their corresponding parameters.
* Throws [IonException] if any are not.
*/
fun validate() {
val parameters = signature.iterator()
var i = 0
while (parameters.hasNext()) {
val p = parameters.next()
val v = getUnchecked(i++)
val isValid = when (p.cardinality) {
ParameterCardinality.AtMostOne -> v == VOID || v == EXPRESSION
ParameterCardinality.One -> v == EXPRESSION
ParameterCardinality.AtLeastOne -> v == EXPRESSION || v == GROUP
ParameterCardinality.Any -> v != RESERVED
}
if (!isValid) throw IonException("Invalid argument for parameter: $p")
}
}

/**
* Populates this [PresenceBitmap] from the given [ByteArray] that is positioned on the first
* byte that (potentially) contains presence bits.
*
* When complete, the buffer is positioned on the first byte that does not contain presence bits.
*/
fun readFrom(bytes: ByteArray, startInclusive: Int) {
// Doesn't always contain the full byte. We shift the bits over every time we read a value
// so that the next value is always the least significant bits.
var currentByte: Long = -1
var currentPosition: Int = startInclusive
var bitmapIndex = 0
var i = 0

val parameters = signature.iterator()
while (parameters.hasNext()) {
val p = parameters.next()
if (p.cardinality == ParameterCardinality.One) {
setUnchecked(i++, EXPRESSION)
} else {
if (bitmapIndex % PB_SLOTS_PER_BYTE == 0) {
currentByte = bytes[currentPosition++].toLong()
}
setUnchecked(i++, currentByte and TWO_BIT_MASK)
currentByte = currentByte shr PB_BITS_PER_SLOT
bitmapIndex++
}
}
}

/**
* Gets by _parameter_ index, which includes _required_ parameters that have no presence bits.
* The slots corresponding to a required parameter with always return [RESERVED].
*/
operator fun get(index: Int): Long {
if (index >= totalParameterCount || index < 0) throw IndexOutOfBoundsException("$index")
return getUnchecked(index)
}

/** Gets a presence bits "slot" without any bounds checking. See [get]. */
private inline fun getUnchecked(index: Int): Long {
val shift = (index % PB_SLOTS_PER_LONG) * PB_BITS_PER_SLOT
when (index / PB_SLOTS_PER_LONG) {
0 -> return (a shr shift) and TWO_BIT_MASK
1 -> return (b shr shift) and TWO_BIT_MASK
2 -> return (c shr shift) and TWO_BIT_MASK
3 -> return (d shr shift) and TWO_BIT_MASK
else -> TODO("Unreachable")
}
}

/**
* Sets a presence bits "slot" using bitwise OR with the existing contents.
*
* It is not possible to reset individual presence bits, nor
* is it possible to change the presence bits for a required parameter.
*/
operator fun set(index: Int, value: Long) {
if (index >= totalParameterCount || index < 0) throw IndexOutOfBoundsException("$index")
setUnchecked(index, value)
}

/** Sets a presence bits "slot" without any bounds checking. See [set]. */
private inline fun setUnchecked(index: Int, value: Long) {
val shiftedBits = (value shl ((index % PB_SLOTS_PER_LONG) * PB_BITS_PER_SLOT))
when (index / PB_SLOTS_PER_LONG) {
0 -> a = a or shiftedBits
1 -> b = b or shiftedBits
2 -> c = c or shiftedBits
3 -> d = d or shiftedBits
}
}

/**
* Writes this [PresenceBitmap] to [buffer] at the given [position].
*/
fun writeTo(buffer: WriteBuffer, position: Long) {
if (size == 0) return
var resultBuffer: Long = 0
var resultPosition = 0
var writePosition = position
var i = 0
val parameters = signature.iterator()

while (parameters.hasNext()) {
val parameter = parameters.next()
val bits = getUnchecked(i++)
if (parameter.cardinality == ParameterCardinality.One) continue
val destShift = resultPosition * PB_BITS_PER_SLOT
resultBuffer = resultBuffer or (bits shl destShift)
resultPosition++
if (resultPosition == PB_SLOTS_PER_LONG) {
buffer.writeFixedIntOrUIntAt(writePosition, resultBuffer, Long.SIZE_BYTES)
writePosition += Long.SIZE_BYTES
resultPosition = 0
resultBuffer = 0
}
}

val numBytes = resultPosition divideByRoundingUp PB_SLOTS_PER_BYTE
if (numBytes > 0) buffer.writeFixedIntOrUIntAt(writePosition, resultBuffer, numBytes)
}

/**
* Integer division that rounds up instead of down.
* E.g.:
* - 0/4 = 0
* - 1/4 = 1
* - ...
* - 4/4 = 1
* - 5/4 = 2
*/
private infix fun Int.divideByRoundingUp(other: Int): Int = (this + (other - 1)) / other
}
47 changes: 24 additions & 23 deletions src/main/java/com/amazon/ion/impl/bin/WriteBuffer.java
Original file line number Diff line number Diff line change
Expand Up @@ -1408,33 +1408,13 @@ public int writeFlexUInt(final BigInteger value) {
return numBytes;
}

/** Get the length of FixedInt for the provided value. */
public static int fixedIntLength(final long value) {
int numMagnitudeBitsRequired;
if (value < 0) {
int numLeadingOnes = Long.numberOfLeadingZeros(~value);
numMagnitudeBitsRequired = 64 - numLeadingOnes;
} else {
int numLeadingZeros = Long.numberOfLeadingZeros(value);
numMagnitudeBitsRequired = 64 - numLeadingZeros;
}
return numMagnitudeBitsRequired / 8 + 1;
}

/**
* Writes a FixedInt to this WriteBuffer, using the minimum number of bytes needed to represent the number.
* Returns the number of bytes that were needed to encode the value.
*/
public int writeFixedInt(final long value) {
int numBytes = fixedIntLength(value);
return _writeFixedIntOrUInt(value, numBytes);
}

/** Get the length of FixedUInt for the provided value. */
public static int fixedUIntLength(final long value) {
int numLeadingZeros = Long.numberOfLeadingZeros(value);
int numMagnitudeBitsRequired = 64 - numLeadingZeros;
return (numMagnitudeBitsRequired - 1) / 8 + 1;
int numBytes = FixedInt.fixedIntLength(value);
return writeFixedIntOrUInt(value, numBytes);
}

/**
Expand All @@ -1445,7 +1425,7 @@ public int writeFixedUInt(final long value) {
if (value < 0) {
throw new IllegalArgumentException("Attempted to write a FixedUInt for " + value);
}
int numBytes = fixedUIntLength(value);
int numBytes = FixedInt.fixedUIntLength(value);
return _writeFixedIntOrUInt(value, numBytes);
}

Expand Down Expand Up @@ -1502,6 +1482,27 @@ private int _writeFixedIntOrUInt(final long value, final int numBytes) {
return numBytes;
}

/**
* Writes a FixedInt or FixedUInt to this WriteBuffer at the specified position.
* If the allocator's block size is ever less than 8 bytes, this may throw an IndexOutOfBoundsException.
*/
public void writeFixedIntOrUIntAt(final long position, final long value, final int numBytes) {
int index = index(position);
Block block = blocks.get(index);
int dataOffset = offset(position);
if (dataOffset + numBytes < block.capacity()) {
FixedInt.writeFixedIntOrUIntInto(block.data, dataOffset, value, numBytes);
} else {
FixedInt.writeFixedIntOrUIntInto(scratch, 0, value, numBytes);
if (index == blocks.size() - 1) {
allocateNewBlock();
}
for (int i = 0; i < numBytes; i++) {
writeByteAt(position + i, scratch[i]);
}
}
}

/**
* Writes a FixedInt or FixedUInt for an arbitrarily large integer that is represented
* as a byte array in which the most significant byte is the first in the array, and the least
Expand Down
Loading
Loading