|
| 1 | +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | +// SPDX-License-Identifier: Apache-2.0 |
| 3 | +package com.amazon.ion.bytecode.bin10 |
| 4 | + |
| 5 | +import com.amazon.ion.IonException |
| 6 | +import com.amazon.ion.SystemSymbols |
| 7 | +import com.amazon.ion.bytecode.ir.Instructions.I_DIRECTIVE_ADD_SYMBOLS |
| 8 | +import com.amazon.ion.bytecode.ir.Instructions.I_DIRECTIVE_SET_SYMBOLS |
| 9 | +import com.amazon.ion.bytecode.ir.Instructions.I_DIRECTIVE_USE |
| 10 | +import com.amazon.ion.bytecode.ir.Instructions.I_END_CONTAINER |
| 11 | +import com.amazon.ion.bytecode.ir.Instructions.I_INT_I32 |
| 12 | +import com.amazon.ion.bytecode.ir.Instructions.I_NULL_NULL |
| 13 | +import com.amazon.ion.bytecode.ir.Instructions.I_STRING_CP |
| 14 | +import com.amazon.ion.bytecode.ir.Instructions.I_SYMBOL_CP |
| 15 | +import com.amazon.ion.bytecode.ir.Instructions.packInstructionData |
| 16 | +import com.amazon.ion.bytecode.ir.OperationKind |
| 17 | +import com.amazon.ion.bytecode.util.AppendableConstantPoolView |
| 18 | +import com.amazon.ion.bytecode.util.BytecodeBuffer |
| 19 | +import com.amazon.ion.bytecode.util.unsignedToInt |
| 20 | +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings |
| 21 | + |
| 22 | +/** |
| 23 | + * Helper to generate Bytecode instructions for Ion 1.0 style symbol tables. |
| 24 | + * |
| 25 | + * We cannot meaningfully read a partial symbol table, so it seems a reasonable requirement that the entire symbol |
| 26 | + * table must be buffered before we generate bytecode for it. Therefore, this is re-usable for both continuable and |
| 27 | + * non-continuable bytecode generators. |
| 28 | + * |
| 29 | + * The Bytecode uses Ion 1.1 style directives, which don't quite align with Ion 1.0 directives. |
| 30 | + * So, we have two options: |
| 31 | + * 1. We can output the bytecode that is functionally equivalent to "classic" symbol tables |
| 32 | + * 2. We can add a "classic" symbol table directive to the bytecode |
| 33 | + * |
| 34 | + * For option 1, the generated bytecode is as follows: |
| 35 | + * - if there's no LST append, and no imports, then we can generate a SET_SYMBOLS instruction |
| 36 | + * - if there's LST append, and no imports, then we can generate an ADD_SYMBOLS instruction |
| 37 | + * - if there are imports, there cannot also be LST append. |
| 38 | + * - if there are imports, and it's not LST append, we can generate an empty SET_SYMBOLS, followed by a USE for all the |
| 39 | + * imports, followed by ADD_SYMBOLS with the local symbols. |
| 40 | + * - if there's LST append with no local symbols added, we could elide it completely, but for simplicity of the |
| 41 | + * implementation we can emit an empty ADD_SYMBOLS |
| 42 | + * - if there are imports and no local symbols, we can generate an empty SET_SYMBOLS, followed by USE for all the imports. |
| 43 | + * - if there are no imports, no LST append, and no local symbols, we can generate an empty SET_SYMBOLS. |
| 44 | + * |
| 45 | + * This means we need to buffer some data before emitting the bytecode, but that's okay. |
| 46 | + * We can put the strings into the constant pool, and keep track of the min and max, since there will be nothing else |
| 47 | + * that we would put in the constant pool while we're processing a symbol table. |
| 48 | + * |
| 49 | + * It's actually beneficial to put the symbol text strings in the constant pool now. They can be added to the symbol |
| 50 | + * table from the constant pool comparatively cheaply, and we can decode the strings eagerly without having as much |
| 51 | + * overhead from the control flow of calling `readTextReference()` over and over. |
| 52 | + * |
| 53 | + * Logic is roughly this: |
| 54 | + * |
| 55 | + * ```pseudocode |
| 56 | + * let symbolsStartInclusive = 0 |
| 57 | + * let symbolsEndExclusive = 0 |
| 58 | + * let isAppend = false |
| 59 | + * while hasMoreFields(): |
| 60 | + * let fieldName = readFieldName() |
| 61 | + * switch(fieldName): |
| 62 | + * "imports": |
| 63 | + * let valueType = readValueType() |
| 64 | + * switch(valueType): |
| 65 | + * symbol: |
| 66 | + * readAndValidate "$ion_symbol_table" |
| 67 | + * isAppend = true |
| 68 | + * list: |
| 69 | + * bytecode.add2(SET_SYMBOLS, END_CONTAINER) |
| 70 | + * bytecode.add(USE) |
| 71 | + * while hasMoreListElements(): |
| 72 | + * compileImport() |
| 73 | + * bytecode.add(END_CONTAINER) |
| 74 | + * isAppend = true |
| 75 | + * "symbols": |
| 76 | + * symbolStartInclusive = sizeOf(constantPool) |
| 77 | + * for value in list: |
| 78 | + * if value is symbol: |
| 79 | + * constantPool.add(readText()) |
| 80 | + * else: |
| 81 | + * constantPool.add(null) |
| 82 | + * symbolsEndExclusive = sizeOf(constantPool) |
| 83 | + * |
| 84 | + * if (isAppend): |
| 85 | + * bytecode.add(ADD_SYMBOLS) |
| 86 | + * else: |
| 87 | + * bytecode.add(SET_SYMBOLS) |
| 88 | + * for i in symbolsStartInclusive .. symbolsEndExclusive: |
| 89 | + * bytecode.add(SYMBOL_CP(i)) |
| 90 | + * bytecode.add(END_CONTAINER) |
| 91 | + * ``` |
| 92 | + */ |
| 93 | +internal object SymbolTableHelper { |
| 94 | + |
| 95 | + private const val ONE_BYTE_MASK = 0xFF |
| 96 | + private const val ONE_BYTE_SHIFT = 8 |
| 97 | + |
| 98 | + /** |
| 99 | + * Compiles an Ion 1.0 symbol table to bytecode instructions. See class documentation for details. |
| 100 | + */ |
| 101 | + @JvmStatic |
| 102 | + @SuppressFBWarnings("SF_SWITCH_NO_DEFAULT") |
| 103 | + fun compileSymbolTable(source: ByteArray, position: Int, structLength: Int, dest: BytecodeBuffer, cp: AppendableConstantPoolView) { |
| 104 | + var symbolsCpIndexStartInclusive = 0 |
| 105 | + var symbolsCpIndexEndExclusive = 0 |
| 106 | + |
| 107 | + var hasSeenImports = false |
| 108 | + var isAppendRequired = false |
| 109 | + var hasSeenSymbols = false |
| 110 | + |
| 111 | + iterateStruct(source, position, structLength) { fieldSid, fieldTid, pos, length -> |
| 112 | + val operationKind = TypeIdHelper.operationKindForTypeId(fieldTid) |
| 113 | + when (fieldSid) { |
| 114 | + SystemSymbols.IMPORTS_SID -> { |
| 115 | + if (hasSeenImports) throw IonException("Multiple imports fields found within a single local symbol table.") |
| 116 | + hasSeenImports = true |
| 117 | + when (operationKind) { |
| 118 | + OperationKind.SYMBOL -> { |
| 119 | + val sid = readUInt(source, pos, length).toInt() |
| 120 | + if (sid == SystemSymbols.ION_SYMBOL_TABLE_SID) isAppendRequired = true |
| 121 | + } |
| 122 | + OperationKind.LIST -> { |
| 123 | + readImportsList(source, pos, length, dest, cp) |
| 124 | + isAppendRequired = true |
| 125 | + } |
| 126 | + } |
| 127 | + } |
| 128 | + SystemSymbols.SYMBOLS_SID -> { |
| 129 | + if (hasSeenSymbols) throw IonException("Multiple symbols fields found within a single local symbol table.") |
| 130 | + hasSeenSymbols = true |
| 131 | + when (operationKind) { |
| 132 | + OperationKind.LIST -> { |
| 133 | + symbolsCpIndexStartInclusive = cp.size |
| 134 | + readSymbolsList(source, pos, length, cp) |
| 135 | + symbolsCpIndexEndExclusive = cp.size |
| 136 | + } |
| 137 | + } |
| 138 | + } |
| 139 | + } |
| 140 | + } |
| 141 | + |
| 142 | + val directiveOperation = if (isAppendRequired) { |
| 143 | + // The new local symbols are "appended" to the imports using ADD_SYMBOLS |
| 144 | + if (symbolsCpIndexEndExclusive - symbolsCpIndexStartInclusive == 0) return |
| 145 | + I_DIRECTIVE_ADD_SYMBOLS |
| 146 | + } else { |
| 147 | + I_DIRECTIVE_SET_SYMBOLS |
| 148 | + } |
| 149 | + dest.add(directiveOperation) |
| 150 | + for (i in symbolsCpIndexStartInclusive until symbolsCpIndexEndExclusive) { |
| 151 | + dest.add(I_SYMBOL_CP.packInstructionData(i)) |
| 152 | + } |
| 153 | + dest.add(I_END_CONTAINER) |
| 154 | + } |
| 155 | + |
| 156 | + /** |
| 157 | + * Reads a list of import structs. Emits bytecode if and only if there is a non-zero number of imports. |
| 158 | + */ |
| 159 | + @JvmStatic |
| 160 | + private fun readImportsList( |
| 161 | + source: ByteArray, |
| 162 | + listStart: Int, |
| 163 | + listLength: Int, |
| 164 | + dest: BytecodeBuffer, |
| 165 | + cp: AppendableConstantPoolView |
| 166 | + ) { |
| 167 | + // Clear default module symbols and start adding the imports in a USE directive |
| 168 | + dest.add3(I_DIRECTIVE_SET_SYMBOLS, I_END_CONTAINER, I_DIRECTIVE_USE) |
| 169 | + |
| 170 | + val checkpoint = dest.size() |
| 171 | + |
| 172 | + iterateList(source, listStart, listLength) { childTid: Int, childStart: Int, length: Int -> |
| 173 | + if (TypeIdHelper.isNonNullStruct(childTid)) readImportStruct(source, childStart, length, dest, cp) |
| 174 | + } |
| 175 | + |
| 176 | + if (dest.size() == checkpoint) { |
| 177 | + // Truncate to remove the USE directive |
| 178 | + dest.truncate(checkpoint - 1) |
| 179 | + } else { |
| 180 | + // Close the USE directive |
| 181 | + dest.add(I_END_CONTAINER) |
| 182 | + } |
| 183 | + } |
| 184 | + |
| 185 | + /** |
| 186 | + * Reads an import struct according to https://amazon-ion.github.io/ion-docs/docs/symbols.html#imports |
| 187 | + */ |
| 188 | + @JvmStatic |
| 189 | + @SuppressFBWarnings("SF_SWITCH_NO_DEFAULT") |
| 190 | + private fun readImportStruct( |
| 191 | + source: ByteArray, |
| 192 | + contentStart: Int, |
| 193 | + contentLength: Int, |
| 194 | + dest: BytecodeBuffer, |
| 195 | + cp: AppendableConstantPoolView |
| 196 | + ) { |
| 197 | + |
| 198 | + var catalogName: String? = null |
| 199 | + var catalogVersion: Int = -1 |
| 200 | + var maxId: Int = -1 |
| 201 | + |
| 202 | + var hasSeenCatalogName = false |
| 203 | + var hasSeenCatalogVersion = false |
| 204 | + var hasSeenMaxId = false |
| 205 | + |
| 206 | + iterateStruct(source, contentStart, contentLength) { fieldSid, fieldTid, pos, length -> |
| 207 | + when (fieldSid) { |
| 208 | + SystemSymbols.NAME_SID -> { |
| 209 | + if (hasSeenCatalogName) throw IonException("Multiple name fields found within a single import.") |
| 210 | + hasSeenCatalogName = true |
| 211 | + if (TypeIdHelper.isNonNullString(fieldTid)) { |
| 212 | + catalogName = String(source, pos, length, Charsets.UTF_8) |
| 213 | + } |
| 214 | + } |
| 215 | + SystemSymbols.VERSION_SID -> { |
| 216 | + if (hasSeenCatalogVersion) throw IonException("Multiple version fields found within a single import.") |
| 217 | + hasSeenCatalogVersion = true |
| 218 | + if (TypeIdHelper.isNonNullPositiveInt(fieldTid)) { |
| 219 | + catalogVersion = readUInt(source, pos, length).toInt() |
| 220 | + } |
| 221 | + } |
| 222 | + SystemSymbols.MAX_ID_SID -> { |
| 223 | + if (hasSeenMaxId) throw IonException("Multiple max_id fields found within a single import.") |
| 224 | + hasSeenMaxId = true |
| 225 | + if (TypeIdHelper.isNonNullPositiveInt(fieldTid)) { |
| 226 | + maxId = readUInt(source, pos, length).toInt() |
| 227 | + } |
| 228 | + } |
| 229 | + } |
| 230 | + } |
| 231 | + |
| 232 | + // No name, empty name, or $ion, so we ignore the import clause |
| 233 | + if (catalogName == null || catalogName == "\$ion" || catalogName == "") return |
| 234 | + val cpIndex = cp.add(catalogName) |
| 235 | + dest.add(I_STRING_CP.packInstructionData(cpIndex)) |
| 236 | + if (catalogVersion < 1) catalogVersion = 1 |
| 237 | + dest.add2(I_INT_I32, catalogVersion) |
| 238 | + if (maxId < 0) { |
| 239 | + dest.add(I_NULL_NULL) |
| 240 | + } else { |
| 241 | + dest.add2(I_INT_I32, maxId) |
| 242 | + } |
| 243 | + } |
| 244 | + |
| 245 | + /** |
| 246 | + * Reads all symbols in the symbols lists, adding them to the constant pool. Any values that are not a non-null |
| 247 | + * string result in a symbol with unknown text, so a `null` is added to the constant pool. |
| 248 | + */ |
| 249 | + @JvmStatic |
| 250 | + private fun readSymbolsList(source: ByteArray, position: Int, listLength: Int, cp: AppendableConstantPoolView) { |
| 251 | + iterateList(source, position, listLength) { typeId, p, length -> |
| 252 | + if (TypeIdHelper.isNonNullString(typeId)) { |
| 253 | + cp.add(String(source, p, length, Charsets.UTF_8)) |
| 254 | + } else { |
| 255 | + cp.add(null) |
| 256 | + } |
| 257 | + } |
| 258 | + } |
| 259 | + |
| 260 | + // ==== General helpers for traversing through the symbol table struct ==== |
| 261 | + |
| 262 | + /** |
| 263 | + * Iterates over all fields in a struct. |
| 264 | + * For each non-null field, it calls [fieldHandler]. |
| 265 | + * Annotations are ignored in symbol table and import structs, so this handles skipping the annotations. |
| 266 | + */ |
| 267 | + @JvmStatic |
| 268 | + @SuppressFBWarnings("SF_SWITCH_NO_DEFAULT") |
| 269 | + private inline fun iterateStruct( |
| 270 | + source: ByteArray, |
| 271 | + start: Int, |
| 272 | + length: Int, |
| 273 | + fieldHandler: (fieldSid: Int, valueTid: Int, pos: Int, len: Int) -> Unit |
| 274 | + ) { |
| 275 | + var p = start |
| 276 | + |
| 277 | + val end = p + length |
| 278 | + |
| 279 | + while (p < end) { |
| 280 | + val fieldSidValueAndLength = VarIntHelper.readVarUIntValueAndLength(source, p) |
| 281 | + val fieldSid = fieldSidValueAndLength.shr(ONE_BYTE_SHIFT).toInt() |
| 282 | + p += fieldSidValueAndLength.toInt().and(ONE_BYTE_MASK) |
| 283 | + |
| 284 | + var typeId = source[p++].unsignedToInt() |
| 285 | + |
| 286 | + when (TypeIdHelper.operationKindForTypeId(typeId)) { |
| 287 | + // This is a nop, so we skip this field |
| 288 | + OperationKind.UNSET -> { |
| 289 | + val childLengthAndLengthSize = getLengthForTypeId(typeId, source, p) |
| 290 | + p += childLengthAndLengthSize.toInt().and(ONE_BYTE_MASK) |
| 291 | + p += childLengthAndLengthSize.shr(ONE_BYTE_SHIFT).toInt() |
| 292 | + continue |
| 293 | + } |
| 294 | + // We ignore annotations inside all symbol table structs and import structs |
| 295 | + OperationKind.ANNOTATIONS -> { |
| 296 | + p += skipAnnotations(typeId, source, p) |
| 297 | + typeId = source[p++].unsignedToInt() |
| 298 | + } |
| 299 | + } |
| 300 | + |
| 301 | + val childLengthAndLengthSize = getLengthForTypeId(typeId, source, p) |
| 302 | + p += childLengthAndLengthSize.toInt().and(ONE_BYTE_MASK) |
| 303 | + val l = childLengthAndLengthSize.shr(ONE_BYTE_SHIFT).toInt() |
| 304 | + fieldHandler(fieldSid, typeId, p, l) |
| 305 | + p += l |
| 306 | + } |
| 307 | + } |
| 308 | + |
| 309 | + /** |
| 310 | + * Iterates over all values in a list. |
| 311 | + * For each non-null value, it calls [valueHandler]. |
| 312 | + * Annotations are ignored in symbols and imports lists, so this handles skipping the annotations. |
| 313 | + */ |
| 314 | + @JvmStatic |
| 315 | + private inline fun iterateList( |
| 316 | + source: ByteArray, |
| 317 | + position: Int, |
| 318 | + length: Int, |
| 319 | + valueHandler: (typeId: Int, position: Int, length: Int) -> Unit |
| 320 | + ) { |
| 321 | + var p = position |
| 322 | + val end = position + length |
| 323 | + while (p < end) { |
| 324 | + val typeId = source[p++].unsignedToInt() |
| 325 | + |
| 326 | + when (TypeIdHelper.operationKindForTypeId(typeId)) { |
| 327 | + // This is a nop, so we skip this field |
| 328 | + OperationKind.UNSET -> { |
| 329 | + val childLengthAndLengthSize = getLengthForTypeId(typeId, source, p) |
| 330 | + p += childLengthAndLengthSize.toInt().and(ONE_BYTE_MASK) |
| 331 | + p += childLengthAndLengthSize.shr(ONE_BYTE_SHIFT).toInt() |
| 332 | + continue |
| 333 | + } |
| 334 | + // We ignore annotations on anything inside a local symbol table. |
| 335 | + OperationKind.ANNOTATIONS -> { |
| 336 | + p += skipAnnotations(typeId, source, p) |
| 337 | + continue |
| 338 | + } |
| 339 | + } |
| 340 | + |
| 341 | + val childLengthAndLengthSize = getLengthForTypeId(typeId, source, p) |
| 342 | + p += childLengthAndLengthSize.toInt().and(ONE_BYTE_MASK) |
| 343 | + val l = childLengthAndLengthSize.shr(ONE_BYTE_SHIFT).toInt() |
| 344 | + valueHandler(typeId, p, l) |
| 345 | + p += l |
| 346 | + } |
| 347 | + } |
| 348 | + |
| 349 | + /** returns the number of bytes needed to skip the annotations and go to the annotated value. */ |
| 350 | + @JvmStatic |
| 351 | + private fun skipAnnotations(typeId: Int, source: ByteArray, position: Int): Int { |
| 352 | + var p = position |
| 353 | + // Skip the annotations and do nothing with them, but don't skip the annotated value. |
| 354 | + if (TypeIdHelper.isVariableLength(typeId)) { |
| 355 | + p += VarIntHelper.readVarUIntValueAndLength(source, p).toInt().and(ONE_BYTE_MASK) |
| 356 | + } |
| 357 | + val innerAnnotationLength = VarIntHelper.readVarUIntValueAndLength(source, p) |
| 358 | + p += innerAnnotationLength.toInt().and(ONE_BYTE_MASK) + innerAnnotationLength.shr(ONE_BYTE_SHIFT).toInt() |
| 359 | + return p - position |
| 360 | + } |
| 361 | + |
| 362 | + /** |
| 363 | + * Gets the length for the given TypeId, reading a VarUInt length if needed. |
| 364 | + * Returns -1 if there is not enough data available to read the full VarUInt length. |
| 365 | + * |
| 366 | + * @throws IonException if the typeId is not a legal typeId in Ion 1.0 |
| 367 | + */ |
| 368 | + @JvmStatic |
| 369 | + private fun getLengthForTypeId(typeId: Int, source: ByteArray, position: Int): Long { |
| 370 | + return when (val l = TypeIdHelper.TYPE_LENGTHS[typeId]) { |
| 371 | + -1 -> VarIntHelper.readVarUIntValueAndLength(source, position) |
| 372 | + -2 -> throw IonException("Invalid Type ID: $typeId") |
| 373 | + else -> l.toLong().shl(ONE_BYTE_SHIFT) |
| 374 | + } |
| 375 | + } |
| 376 | +} |
0 commit comments