Skip to content

Commit

Permalink
Write support for Ion 1.1 system symbols (#941)
Browse files Browse the repository at this point in the history
  • Loading branch information
popematt authored Sep 20, 2024
1 parent 822f572 commit 4b373e5
Show file tree
Hide file tree
Showing 10 changed files with 204 additions and 77 deletions.
6 changes: 6 additions & 0 deletions src/main/java/com/amazon/ion/impl/IonRawTextWriter_1_1.kt
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ class IonRawTextWriter_1_1 internal constructor(
numAnnotations += annotations.size
}

override fun writeAnnotations(annotation0: SystemSymbols_1_1) = writeAnnotations(annotation0.text)

override fun writeAnnotations(annotation0: CharSequence) {
ensureAnnotationSpace(numAnnotations + 1)
annotationsTextBuffer[numAnnotations++] = annotation0
Expand Down Expand Up @@ -237,6 +239,8 @@ class IonRawTextWriter_1_1 internal constructor(
hasFieldName = true
}

override fun writeFieldName(symbol: SystemSymbols_1_1) = writeFieldName(symbol.text)

override fun writeNull() = writeScalar {
output.appendAscii("null")
}
Expand Down Expand Up @@ -306,6 +310,8 @@ class IonRawTextWriter_1_1 internal constructor(
}
}

override fun writeSymbol(symbol: SystemSymbols_1_1) = writeSymbol(symbol.text)

override fun writeString(value: CharSequence) = writeScalar { output.printString(value) }

override fun writeBlob(value: ByteArray, start: Int, length: Int) = writeScalar { output.printBlob(options, value, start, length) }
Expand Down
13 changes: 13 additions & 0 deletions src/main/java/com/amazon/ion/impl/IonRawWriter_1_1.kt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ interface IonRawWriter_1_1 {
*/
fun _private_hasFirstAnnotation(sid: Int, text: String?): Boolean

/**
* Writes one annotation for the next value.
* [writeAnnotations] may be called more than once to build up a list of annotations.
*/
fun writeAnnotations(annotation0: SystemSymbols_1_1)

/**
* Writes one annotation for the next value.
* [writeAnnotations] may be called more than once to build up a list of annotations.
Expand Down Expand Up @@ -106,6 +112,12 @@ interface IonRawWriter_1_1 {
*/
fun _private_hasFieldName(): Boolean

/**
* Writes the field name for the next value. Must be called while in a struct and must be called before [writeAnnotations].
* @throws com.amazon.ion.IonException if annotations are already written for the value or if not in a struct.
*/
fun writeFieldName(symbol: SystemSymbols_1_1)

/**
* Writes the field name for the next value. Must be called while in a struct and must be called before [writeAnnotations].
* @throws com.amazon.ion.IonException if annotations are already written for the value or if not in a struct.
Expand Down Expand Up @@ -195,6 +207,7 @@ interface IonRawWriter_1_1 {

fun writeSymbol(id: Int)
fun writeSymbol(text: CharSequence)
fun writeSymbol(symbol: SystemSymbols_1_1)

fun writeString(value: CharSequence)

Expand Down
56 changes: 26 additions & 30 deletions src/main/java/com/amazon/ion/impl/bin/IonManagedWriter_1_1.kt
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,10 @@ internal class IonManagedWriter_1_1(
}

companion object {
private val SYSTEM_SYMBOL_TABLE_MAP = hashMapOf<String, Int>()

init {
var id = 1
Symbols.systemSymbolTable().iterateDeclaredSymbolNames().forEach {
SYSTEM_SYMBOL_TABLE_MAP[it] = id++
}
}

private val ION_VERSION_MARKER_REGEX = Regex("^\\\$ion_\\d+_\\d+$")

private const val TDL_EXPRESSION_GROUP_START = ";"

// These are chosen subjectively to be neither too big nor too small.
private const val MAX_PARAMETERS_IN_ONE_LINE_SIGNATURE = 4
private const val MAX_SYMBOLS_IN_SINGLE_LINE_SYMBOL_TABLE = 10
Expand Down Expand Up @@ -123,7 +116,7 @@ internal class IonManagedWriter_1_1(
// plus a list of symbols added by the current encoding context.

/** The symbol table for the prior encoding context */
private var symbolTable: HashMap<String, Int> = HashMap(SYSTEM_SYMBOL_TABLE_MAP)
private var symbolTable: HashMap<String, Int> = HashMap()
/** Symbols to be interned since the prior encoding context. */
private var newSymbols: HashMap<String, Int> = LinkedHashMap() // Preserves insertion order.

Expand Down Expand Up @@ -264,7 +257,6 @@ internal class IonManagedWriter_1_1(
// in order to avoid writing a data stream with leaky context.
if (depth != 0) throw IllegalStateException("Cannot reset the encoding context while stepped in any value.")
symbolTable.clear()
symbolTable.putAll(SYSTEM_SYMBOL_TABLE_MAP)
macroNames.clear()
macrosById.clear()
macroTable.clear()
Expand All @@ -286,7 +278,7 @@ internal class IonManagedWriter_1_1(
private fun writeEncodingDirective() {
if (newSymbols.isEmpty() && newMacros.isEmpty()) return

systemData.writeAnnotations(SystemSymbols.ION_ENCODING)
systemData.writeAnnotations(SystemSymbols_1_1.ION_ENCODING)
writeSystemSexp {
writeSymbolTableClause()
writeMacroTableClause()
Expand All @@ -307,17 +299,17 @@ internal class IonManagedWriter_1_1(
*/
private fun writeSymbolTableClause() {
val hasSymbolsToAdd = newSymbols.isNotEmpty()
val hasSymbolsToRetain = symbolTable.size > SystemSymbols.ION_1_0_MAX_ID
val hasSymbolsToRetain = symbolTable.isNotEmpty()
if (!hasSymbolsToAdd && !hasSymbolsToRetain) return

writeSystemSexp {
forceNoNewlines(true)
systemData.writeSymbol(SystemSymbols.SYMBOL_TABLE)
systemData.writeSymbol(SystemSymbols_1_1.SYMBOL_TABLE)

// Add previous symbol table
if (hasSymbolsToRetain) {
if (newSymbols.size > 0) forceNoNewlines(false)
writeSymbol(SystemSymbols.ION_ENCODING)
writeSymbol(SystemSymbols_1_1.ION_ENCODING)
}

// Add new symbols
Expand All @@ -344,10 +336,10 @@ internal class IonManagedWriter_1_1(

writeSystemSexp {
forceNoNewlines(true)
writeSymbol(SystemSymbols.MACRO_TABLE)
writeSymbol(SystemSymbols_1_1.MACRO_TABLE)
if (newMacros.size > 0) forceNoNewlines(false)
if (hasMacrosToRetain) {
writeSymbol(SystemSymbols.ION_ENCODING)
writeSymbol(SystemSymbols_1_1.ION_ENCODING)
}
forceNoNewlines(false)
newMacros.forEach { (macro, address) ->
Expand All @@ -366,8 +358,8 @@ internal class IonManagedWriter_1_1(
// TODO: Support for aliases
writeSystemSexp {
forceNoNewlines(true)
writeSymbol(SystemSymbols.EXPORT)
writeAnnotations(SystemSymbols.ION)
writeSymbol(SystemSymbols_1_1.EXPORT)
writeAnnotations(SystemSymbols_1_1.ION)
writeSymbol(macro.macroName)
}
systemData.forceNoNewlines(false)
Expand All @@ -376,7 +368,7 @@ internal class IonManagedWriter_1_1(
private fun writeMacroDefinition(name: String?, macro: TemplateMacro) {
writeSystemSexp {
forceNoNewlines(true)
writeSymbol(SystemSymbols.MACRO)
writeSymbol(SystemSymbols_1_1.MACRO)
if (name != null) writeSymbol(name) else writeNull()

if (macro.signature.size > MAX_PARAMETERS_IN_ONE_LINE_SIGNATURE) forceNoNewlines(false)
Expand Down Expand Up @@ -441,7 +433,7 @@ internal class IonManagedWriter_1_1(
IonType.TIMESTAMP -> writeTimestamp((expression as Expression.TimestampValue).value)
IonType.SYMBOL -> {
writeSystemSexp {
writeSymbol(SystemSymbols.LITERAL)
writeSymbol(SystemSymbols_1_1.LITERAL)
expression.annotations.forEach {
if (it.text != null) {
// TODO: If it's already in the symbol table we could check the
Expand Down Expand Up @@ -474,11 +466,11 @@ internal class IonManagedWriter_1_1(
if (expression.annotations.isNotEmpty()) {
stepInSExp(usingLengthPrefix = false)
numberOfTimesToStepOut[expression.endExclusive]++
writeSymbol(SystemSymbols.ANNOTATE)
writeSymbol(SystemSymbols_1_1.ANNOTATE)

// Write the annotations as symbols within an expression group
writeSystemSexp {
writeSymbol(SystemSymbols.TDL_EXPRESSION_GROUP)
writeSymbol(TDL_EXPRESSION_GROUP_START)
expression.annotations.forEach {
if (it.text != null) {
// TODO: If it's already in the symbol table we could check the
Expand All @@ -490,7 +482,7 @@ internal class IonManagedWriter_1_1(
} else {
// TODO: See if there is a less verbose way to use SIDs in TDL
writeSystemSexp {
writeSymbol(SystemSymbols.LITERAL)
writeSymbol(SystemSymbols_1_1.LITERAL)
writeSymbol(it.sid)
}
}
Expand All @@ -500,7 +492,7 @@ internal class IonManagedWriter_1_1(
// Start a `(make_sexp [ ...` invocation
stepInSExp(usingLengthPrefix = false)
numberOfTimesToStepOut[expression.endExclusive]++
writeSymbol(SystemSymbols.MAKE_SEXP)
writeSymbol(SystemSymbols_1_1.MAKE_SEXP)

if (expression.startInclusive != expression.endExclusive) {
stepInList(usingLengthPrefix = false)
Expand All @@ -527,7 +519,7 @@ internal class IonManagedWriter_1_1(
is Expression.ExpressionGroup -> {
stepInSExp(usingLengthPrefix = false)
numberOfTimesToStepOut[expression.endExclusive]++
writeSymbol(SystemSymbols.TDL_EXPRESSION_GROUP)
writeSymbol(TDL_EXPRESSION_GROUP_START)
}
is Expression.MacroInvocation -> {
stepInSExp(usingLengthPrefix = false)
Expand Down Expand Up @@ -598,7 +590,7 @@ internal class IonManagedWriter_1_1(
IonType.LIST -> userData.stepInList(options.writeLengthPrefix(ContainerType.LIST, newDepth))
IonType.SEXP -> userData.stepInSExp(options.writeLengthPrefix(ContainerType.SEXP, newDepth))
IonType.STRUCT -> {
if (depth == 0 && userData._private_hasFirstAnnotation(SystemSymbols.ION_SYMBOL_TABLE_SID, SystemSymbols.ION_SYMBOL_TABLE)) {
if (depth == 0 && userData._private_hasFirstAnnotation(SystemSymbols_1_1.ION_SYMBOL_TABLE.id, SystemSymbols_1_1.ION_SYMBOL_TABLE.text)) {
throw IonException("User-defined symbol tables not permitted by the Ion 1.1 managed writer.")
}
userData.stepInStruct(options.writeLengthPrefix(ContainerType.STRUCT, newDepth))
Expand Down Expand Up @@ -632,8 +624,8 @@ internal class IonManagedWriter_1_1(
userData.writeNull(IonType.SYMBOL)
} else {
val text: String? = content.text
if (content.sid == SystemSymbols.ION_1_0_SID) throw IonException("Can't write a top-level symbol that is the same as the IVM.")
if (text == SystemSymbols.ION_1_0) throw IonException("Can't write a top-level symbol that is the same as the IVM.")
// TODO: Check to see if the SID refers to a user symbol with text that looks like an IVM
if (text == SystemSymbols_1_1.ION_1_0.text && depth == 0) throw IonException("Can't write a top-level symbol that is the same as the IVM.")
handleSymbolToken(content.sid, content.text, SymbolKind.VALUE, userData)
}
}
Expand Down Expand Up @@ -780,7 +772,11 @@ internal class IonManagedWriter_1_1(
// from Ion 1.0, we will have to adjust any SIDs that we are writing.

reader.typeAnnotationSymbols.forEach {
handleSymbolToken(it.sid, it.text, SymbolKind.ANNOTATION, userData, preserveEncoding = true)
if (it.text == SystemSymbols_1_1.ION_SYMBOL_TABLE.text) {
userData.writeAnnotations(SystemSymbols_1_1.ION_SYMBOL_TABLE)
} else {
handleSymbolToken(it.sid, it.text, SymbolKind.ANNOTATION, userData, preserveEncoding = true)
}
}
if (isInStruct) {
// TODO: Can't use reader.fieldId, reader.fieldName because it will throw UnknownSymbolException.
Expand Down
55 changes: 39 additions & 16 deletions src/main/java/com/amazon/ion/impl/bin/IonRawBinaryWriter_1_1.kt
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ class IonRawBinaryWriter_1_1 internal constructor(

private val utf8StringEncoder = Utf8StringEncoderPool.getInstance().getOrCreate()

private var annotationsTextBuffer = arrayOfNulls<CharSequence>(8)
private var annotationsFlexSymBuffer = arrayOfNulls<Any>(8)
private var annotationsIdBuffer = IntArray(8)
private var numAnnotations = 0
/**
Expand Down Expand Up @@ -195,16 +195,22 @@ class IonRawBinaryWriter_1_1 internal constructor(
* are being added one by one.
*/
private inline fun ensureAnnotationSpace(n: Int) {
if (annotationsIdBuffer.size < n || annotationsTextBuffer.size < n) {
if (annotationsIdBuffer.size < n || annotationsFlexSymBuffer.size < n) {
val oldIds = annotationsIdBuffer
annotationsIdBuffer = IntArray(n + 8)
oldIds.copyInto(annotationsIdBuffer)
val oldText = annotationsTextBuffer
annotationsTextBuffer = arrayOfNulls(n + 8)
oldText.copyInto(annotationsTextBuffer)
val oldText = annotationsFlexSymBuffer
annotationsFlexSymBuffer = arrayOfNulls(n + 8)
oldText.copyInto(annotationsFlexSymBuffer)
}
}

override fun writeAnnotations(annotation0: SystemSymbols_1_1) {
ensureAnnotationSpace(numAnnotations + 1)
annotationsFlexSymBuffer[numAnnotations++] = annotation0
annotationFlexSymFlag = FLEX_SYMS_REQUIRED
}

override fun writeAnnotations(annotation0: Int) {
confirm(annotation0 >= 0) { "Invalid SID: $annotation0" }
ensureAnnotationSpace(numAnnotations + 1)
Expand All @@ -227,21 +233,21 @@ class IonRawBinaryWriter_1_1 internal constructor(

override fun writeAnnotations(annotation0: CharSequence) {
ensureAnnotationSpace(numAnnotations + 1)
annotationsTextBuffer[numAnnotations++] = annotation0
annotationsFlexSymBuffer[numAnnotations++] = annotation0
annotationFlexSymFlag = FLEX_SYMS_REQUIRED
}

override fun writeAnnotations(annotation0: CharSequence, annotation1: CharSequence) {
ensureAnnotationSpace(numAnnotations + 2)
annotationsTextBuffer[numAnnotations++] = annotation0
annotationsTextBuffer[numAnnotations++] = annotation1
annotationsFlexSymBuffer[numAnnotations++] = annotation0
annotationsFlexSymBuffer[numAnnotations++] = annotation1
annotationFlexSymFlag = FLEX_SYMS_REQUIRED
}

override fun writeAnnotations(annotations: Array<CharSequence>) {
if (annotations.isEmpty()) return
ensureAnnotationSpace(numAnnotations + annotations.size)
annotations.copyInto(annotationsTextBuffer, numAnnotations)
annotations.copyInto(annotationsFlexSymBuffer, numAnnotations)
numAnnotations += annotations.size
annotationFlexSymFlag = FLEX_SYMS_REQUIRED
}
Expand All @@ -251,15 +257,15 @@ class IonRawBinaryWriter_1_1 internal constructor(
annotationFlexSymFlag = 0
// erase the first entries to ensure old values don't leak into `_private_hasFirstAnnotation()`
annotationsIdBuffer[0] = -1
annotationsTextBuffer[0] = null
annotationsFlexSymBuffer[0] = null
}

override fun _private_hasFirstAnnotation(sid: Int, text: String?): Boolean {
if (numAnnotations == 0) return false
if (sid >= 0 && annotationsIdBuffer[0] == sid) {
return true
}
if (text != null && annotationsTextBuffer[0] == text) {
if (text != null && annotationsFlexSymBuffer[0] == text) {
return true
}
return false
Expand Down Expand Up @@ -296,8 +302,8 @@ class IonRawBinaryWriter_1_1 internal constructor(
// If there's only one annotation, and we know that at least one has text, we don't need to check
// whether this is SID.
buffer.writeByte(OpCodes.ANNOTATIONS_1_FLEX_SYM)
annotationsTotalLength += buffer.writeFlexSym(utf8StringEncoder.encode(annotationsTextBuffer[0].toString()))
annotationsTextBuffer[0] = null
annotationsTotalLength += writeFlexSymFromAnnotationsBuffer(0)
annotationsFlexSymBuffer[0] = null
}
-3 -> {
buffer.writeByte(OpCodes.ANNOTATIONS_2_FLEX_SYM)
Expand All @@ -318,10 +324,14 @@ class IonRawBinaryWriter_1_1 internal constructor(
* Writes a FlexSym annotation for the specified position in the annotations buffers.
*/
private fun writeFlexSymFromAnnotationsBuffer(i: Int): Int {
val annotationText = annotationsTextBuffer[i]
val annotationText = annotationsFlexSymBuffer[i]
return if (annotationText != null) {
annotationsTextBuffer[i] = null
buffer.writeFlexSym(utf8StringEncoder.encode(annotationText.toString()))
annotationsFlexSymBuffer[i] = null
if (annotationText is SystemSymbols_1_1) {
buffer.writeFlexSym(annotationText)
} else {
buffer.writeFlexSym(utf8StringEncoder.encode(annotationText.toString()))
}
} else {
buffer.writeFlexSym(annotationsIdBuffer[i])
}
Expand Down Expand Up @@ -421,6 +431,13 @@ class IonRawBinaryWriter_1_1 internal constructor(
hasFieldName = true
}

override fun writeFieldName(symbol: SystemSymbols_1_1) {
confirm(currentContainer.type == STRUCT) { "Can only write a field name inside of a struct." }
if (!currentContainer.usesFlexSym) switchCurrentStructToFlexSym()
currentContainer.length += buffer.writeFlexSym(symbol)
hasFieldName = true
}

override fun _private_hasFieldName(): Boolean = hasFieldName

private fun switchCurrentStructToFlexSym() {
Expand Down Expand Up @@ -578,6 +595,12 @@ class IonRawBinaryWriter_1_1 internal constructor(
}
)

override fun writeSymbol(symbol: SystemSymbols_1_1) = writeScalar {
buffer.writeByte(OpCodes.SYSTEM_SYMBOL)
buffer.writeByte(symbol.id.toByte())
2
}

override fun writeString(value: CharSequence) = writeScalar { writeStringValue(buffer, utf8StringEncoder.encode(value.toString())) }

override fun writeBlob(value: ByteArray, start: Int, length: Int) = writeScalar { writeBlobValue(buffer, value, start, length) }
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/com/amazon/ion/impl/bin/Ion_1_1_Constants.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
public class Ion_1_1_Constants {
private Ion_1_1_Constants() {}

// When writing system symbols (or $0) in a flex sym, the SID must be offset to
// avoid clashing with E-Expression op codes.
public static final int FLEX_SYM_SYSTEM_SYMBOL_OFFSET = 0x60;

static final int FIRST_2_BYTE_SYMBOL_ADDRESS = 256;
static final int FIRST_MANY_BYTE_SYMBOL_ADDRESS = 65792;

Expand Down
Loading

0 comments on commit 4b373e5

Please sign in to comment.