From c2db7a149e06aa780704b5a2dfb57f0579825474 Mon Sep 17 00:00:00 2001 From: Ruslan Iushchenko Date: Mon, 21 Jul 2025 09:23:58 +0200 Subject: [PATCH 1/7] #769 Add a placeholder for encoders to be available in AST. --- .../cobol/parser/antlr/ParserVisitor.scala | 7 ++-- .../cobrix/cobol/parser/ast/Primitive.scala | 28 ++++++++++------ .../asttransform/NonTerminalsAdder.scala | 4 ++- .../parser/decoders/EncoderSelector.scala | 33 +++++++++++++++++++ .../parser/extract/BinaryExtractorSpec.scala | 4 +-- 5 files changed, 60 insertions(+), 16 deletions(-) create mode 100644 cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/EncoderSelector.scala diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/antlr/ParserVisitor.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/antlr/ParserVisitor.scala index 5e5ed87ff..8cda89193 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/antlr/ParserVisitor.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/antlr/ParserVisitor.scala @@ -23,7 +23,7 @@ import za.co.absa.cobrix.cobol.parser.CopybookParser.CopybookAST import za.co.absa.cobrix.cobol.parser.ast.datatype._ import za.co.absa.cobrix.cobol.parser.ast.{Group, Primitive} import za.co.absa.cobrix.cobol.parser.common.Constants -import za.co.absa.cobrix.cobol.parser.decoders.DecoderSelector +import za.co.absa.cobrix.cobol.parser.decoders.{DecoderSelector, EncoderSelector} import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat.FloatingPointFormat import za.co.absa.cobrix.cobol.parser.encoding.codepage.CodePage import za.co.absa.cobrix.cobol.parser.encoding._ @@ -855,8 +855,9 @@ class ParserVisitor(enc: Encoding, Map(), isDependee = false, identifier.toUpperCase() == Constants.FILLER, - DecoderSelector.getDecoder(pic.value, stringTrimmingPolicy, isDisplayAlwaysString, effectiveEbcdicCodePage, effectiveAsciiCharset, isUtf16BigEndian = isUtf16BigEndian, floatingPointFormat, strictSignOverpunch = strictSignOverpunch, improvedNullDetection = improvedNullDetection, strictIntegralPrecision = strictIntegralPrecision) - ) (Some(parent)) + DecoderSelector.getDecoder(pic.value, stringTrimmingPolicy, isDisplayAlwaysString, effectiveEbcdicCodePage, effectiveAsciiCharset, isUtf16BigEndian = isUtf16BigEndian, floatingPointFormat, strictSignOverpunch = strictSignOverpunch, improvedNullDetection = improvedNullDetection, strictIntegralPrecision = strictIntegralPrecision), + EncoderSelector.getEncoder(pic.value, effectiveEbcdicCodePage, effectiveAsciiCharset), + )(Some(parent)) parent.children.append(prim) diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/ast/Primitive.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/ast/Primitive.scala index a302d3ab2..0d1e3e249 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/ast/Primitive.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/ast/Primitive.scala @@ -17,19 +17,26 @@ package za.co.absa.cobrix.cobol.parser.ast import za.co.absa.cobrix.cobol.parser.ast.datatype.{AlphaNumeric, CobolType, Decimal, Integral} -import za.co.absa.cobrix.cobol.parser.decoders.{BinaryUtils, DecoderSelector} +import za.co.absa.cobrix.cobol.parser.decoders.{BinaryUtils, DecoderSelector, EncoderSelector} /** An abstraction of the statements describing fields of primitive data types in the COBOL copybook * - * @param level A level for the statement - * @param name An identifier - * @param originalName Original name of the AST element (before the conversion to the Spark-compatible name) - * @param lineNumber An line number in the copybook - * @param redefines A name of a field which is redefined by this one - * @param occurs The number of elements in an fixed size array / minimum items in variable-sized array - * @param to The maximum number of items in a variable size array - * @param dependingOn A field which specifies size of the array in a record - * @param parent A parent node + * @param level A level for the statement + * @param name An identifier + * @param originalName Original name of the AST element (before the conversion to the Spark-compatible name) + * @param lineNumber An line number in the copybook + * @param redefines A name of a field which is redefined by this one + * @param isRedefined A flag indicating if the field is redefined + * @param occurs The number of elements in an fixed size array / minimum items in variable-sized array + * @param to The maximum number of items in a variable size array + * @param dependingOn A field which specifies size of the array in a record + * @param dependingOnHandlers A map of handlers for the dependingOn field + * @param isDependee A flag indicating if the field is a dependee + * @param isFiller A flag indicating if the field is a filler + * @param decode A decoder for the field to convert from raw data to a JVM data type + * @param encode An optional encoder for the field to convert from a JVM data type to raw data + * @param binaryProperties Binary properties of the field, such as size in bits, alignment, etc. + * @param parent A parent node */ case class Primitive( level: Int, @@ -46,6 +53,7 @@ case class Primitive( isDependee: Boolean = false, isFiller: Boolean = false, decode: DecoderSelector.Decoder, + encode: Option[EncoderSelector.Encoder], binaryProperties: BinaryProperties = BinaryProperties(0, 0, 0) ) (val parent: Option[Group] = None) diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/asttransform/NonTerminalsAdder.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/asttransform/NonTerminalsAdder.scala index bb652df28..8c0c7d7df 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/asttransform/NonTerminalsAdder.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/asttransform/NonTerminalsAdder.scala @@ -20,7 +20,7 @@ import za.co.absa.cobrix.cobol.parser.CopybookParser.CopybookAST import za.co.absa.cobrix.cobol.parser.ast.datatype.AlphaNumeric import za.co.absa.cobrix.cobol.parser.ast.{Group, Primitive, Statement} import za.co.absa.cobrix.cobol.parser.common.Constants -import za.co.absa.cobrix.cobol.parser.decoders.DecoderSelector +import za.co.absa.cobrix.cobol.parser.decoders.{DecoderSelector, EncoderSelector} import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat.FloatingPointFormat import za.co.absa.cobrix.cobol.parser.encoding.Encoding import za.co.absa.cobrix.cobol.parser.encoding.codepage.CodePage @@ -74,6 +74,7 @@ class NonTerminalsAdder( val sz = g.binaryProperties.actualSize val dataType = AlphaNumeric(s"X($sz)", sz, enc = Some(enc)) val decode = DecoderSelector.getDecoder(dataType, stringTrimmingPolicy, isDisplayAlwaysString = false, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat, strictSignOverpunch, improvedNullDetection) + val encode = EncoderSelector.getEncoder(dataType, ebcdicCodePage, asciiCharset) val newName = getNonTerminalName(g.name, g.parent.get) newChildren.append( Primitive( @@ -81,6 +82,7 @@ class NonTerminalsAdder( dataType, redefines = Some(g.name), decode = decode, + encode = encode, binaryProperties = g.binaryProperties )(g.parent) ) diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/EncoderSelector.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/EncoderSelector.scala new file mode 100644 index 000000000..072deca58 --- /dev/null +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/EncoderSelector.scala @@ -0,0 +1,33 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.cobrix.cobol.parser.decoders + +import za.co.absa.cobrix.cobol.parser.ast.datatype.CobolType +import za.co.absa.cobrix.cobol.parser.encoding.codepage.{CodePage, CodePageCommon} + +import java.nio.charset.{Charset, StandardCharsets} + +object EncoderSelector { + type Encoder = Any => Array[Byte] + + def getEncoder(dataType: CobolType, + ebcdicCodePage: CodePage = new CodePageCommon, + asciiCharset: Charset = StandardCharsets.US_ASCII): Option[Encoder] = { + None + } + +} diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/extract/BinaryExtractorSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/extract/BinaryExtractorSpec.scala index 7b547b19c..ef573297a 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/extract/BinaryExtractorSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/extract/BinaryExtractorSpec.scala @@ -20,7 +20,7 @@ import org.scalatest.funsuite.AnyFunSuite import za.co.absa.cobrix.cobol.parser.CopybookParser import za.co.absa.cobrix.cobol.parser.ast.datatype.{AlphaNumeric, CobolType} import za.co.absa.cobrix.cobol.parser.ast.{BinaryProperties, Group, Primitive} -import za.co.absa.cobrix.cobol.parser.decoders.DecoderSelector +import za.co.absa.cobrix.cobol.parser.decoders.{DecoderSelector, EncoderSelector} import za.co.absa.cobrix.cobol.parser.encoding.EBCDIC class BinaryExtractorSpec extends AnyFunSuite { @@ -160,7 +160,7 @@ class BinaryExtractorSpec extends AnyFunSuite { val binaryProperties: BinaryProperties = BinaryProperties(2, 10, 10) val primitive: Primitive = Primitive(level, name, name, lineNumber, dataType, redefines, isRedefined, - occurs, to, dependingOn, Map(), isDependee, isFiller, DecoderSelector.getDecoder(dataType), binaryProperties)(None) + occurs, to, dependingOn, Map(), isDependee, isFiller, DecoderSelector.getDecoder(dataType), EncoderSelector.getEncoder(dataType), binaryProperties)(None) val result2: Any = copybook.extractPrimitiveField(primitive, bytes, startOffset) assert(result2.asInstanceOf[String] === "EXAMPLE4") } From b703a4023e1d7bb07e6d924609cd46cdef3c188c Mon Sep 17 00:00:00 2001 From: Ruslan Iushchenko Date: Tue, 22 Jul 2025 08:52:35 +0200 Subject: [PATCH 2/7] #769 Add the encoder for EBCDIC common code page. --- .../absa/cobrix/cobol/parser/Copybook.scala | 70 +++++++++++++++---- .../cobol/parser/antlr/ParserVisitor.scala | 2 +- .../cobrix/cobol/parser/ast/Primitive.scala | 1 - .../parser/decoders/EncoderSelector.scala | 51 +++++++++++++- .../encoding/codepage/CodePageCommon.scala | 24 +++++++ .../parser/extract/BinaryExtractorSpec.scala | 13 ++++ 6 files changed, 145 insertions(+), 16 deletions(-) diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/Copybook.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/Copybook.scala index f69a5a2e2..664507912 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/Copybook.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/Copybook.scala @@ -71,6 +71,44 @@ class Copybook(val ast: CopybookAST) extends Logging with Serializable { } } + /** + * Get value of a field of the copybook record by name + * + * Nested field names can contain '.' to identify the exact field. + * If the field name is unique '.' is not required. + * + * @param fieldName A field name + * @param recordBytes Binary encoded data of the record + * @param startOffset An offset where the record starts in the data (in bytes). + * @return The value of the field + */ + def getFieldValueByName(fieldName: String, recordBytes: Array[Byte], startOffset: Int = 0): Any = { + val ast = getFieldByName(fieldName) + ast match { + case s: Primitive => extractPrimitiveField(s, recordBytes, startOffset) + case _ => throw new IllegalStateException(s"$fieldName is not a primitive field, cannot extract it's value.") + } + } + + /** + * Set value of a field of the copybook record by name + * + * Nested field names can contain '.' to identify the exact field. + * If the field name is unique '.' is not required. + * + * @param fieldName A field name + * @param recordBytes Binary encoded data of the record + * @param startOffset An offset where the record starts in the data (in bytes). + * @return The value of the field + */ + def setFieldValueByName(fieldName: String, recordBytes: Array[Byte], value: Any, startOffset: Int = 0): Any = { + val ast = getFieldByName(fieldName) + ast match { + case s: Primitive => setPrimitiveField(s, recordBytes, value, startOffset) + case _ => throw new IllegalStateException(s"$fieldName is not a primitive field, cannot set it's value.") + } + } + /** * Get the AST object of a field by name. * @@ -81,7 +119,6 @@ class Copybook(val ast: CopybookAST) extends Logging with Serializable { * @return An AST object of the field. Throws an IllegalStateException if not found of found multiple. * */ - @throws(classOf[IllegalArgumentException]) def getFieldByName(fieldName: String): Statement = { def getFieldByNameInGroup(group: Group, fieldName: String): Seq[Statement] = { @@ -171,31 +208,40 @@ class Copybook(val ast: CopybookAST) extends Logging with Serializable { * @return The value of the field * */ - @throws(classOf[Exception]) def extractPrimitiveField(field: Primitive, bytes: Array[Byte], startOffset: Int = 0): Any = { val slicedBytes = bytes.slice(field.binaryProperties.offset + startOffset, field.binaryProperties.offset + startOffset + field.binaryProperties.actualSize) field.decodeTypeValue(0, slicedBytes) } /** - * Get value of a field of the copybook record by name + * Set value of a field of the copybook record by the AST object of the field * * Nested field names can contain '.' to identify the exact field. * If the field name is unique '.' is not required. * - * @param fieldName A field name + * @param field The AST object of the field * @param bytes Binary encoded data of the record - * @param startOffset An offset where the record starts in the data (in bytes). + * @param startOffset An offset to the beginning of the field in the data (in bytes). * @return The value of the field * */ - @throws(classOf[IllegalStateException]) - @throws(classOf[Exception]) - def getFieldValueByName(fieldName: String, bytes: Array[Byte], startOffset: Int = 0): Any = { - val ast = getFieldByName(fieldName) - ast match { - case s: Primitive => extractPrimitiveField(s, bytes, startOffset) - case _ => throw new IllegalStateException(s"$fieldName is not a primitive field, cannot extract it's value.") + def setPrimitiveField(field: Primitive, recordBytes: Array[Byte], value: Any, startOffset: Int = 0): Unit = { + field.encode match { + case Some(encode) => + val fieldBytes = encode(value) + val startByte = field.binaryProperties.offset + startOffset + val endByte = field.binaryProperties.offset + startOffset + field.binaryProperties.actualSize + + if (startByte < 0 || endByte > recordBytes.length) { + throw new IllegalArgumentException(s"Cannot set value for field '${field.name}' because the field is out of bounds of the record.") + } + if (fieldBytes.length != field.binaryProperties.dataSize) { + throw new IllegalArgumentException(s"Cannot set value for field '${field.name}' because the encoded value has a different size than the field size.") + } + + System.arraycopy(fieldBytes, 0, recordBytes, startByte, fieldBytes.length) + case None => + throw new IllegalStateException(s"Cannot set value for field '${field.name}' because it does not have an encoder defined.") } } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/antlr/ParserVisitor.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/antlr/ParserVisitor.scala index 8cda89193..12db12d70 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/antlr/ParserVisitor.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/antlr/ParserVisitor.scala @@ -856,7 +856,7 @@ class ParserVisitor(enc: Encoding, isDependee = false, identifier.toUpperCase() == Constants.FILLER, DecoderSelector.getDecoder(pic.value, stringTrimmingPolicy, isDisplayAlwaysString, effectiveEbcdicCodePage, effectiveAsciiCharset, isUtf16BigEndian = isUtf16BigEndian, floatingPointFormat, strictSignOverpunch = strictSignOverpunch, improvedNullDetection = improvedNullDetection, strictIntegralPrecision = strictIntegralPrecision), - EncoderSelector.getEncoder(pic.value, effectiveEbcdicCodePage, effectiveAsciiCharset), + EncoderSelector.getEncoder(pic.value, effectiveEbcdicCodePage, effectiveAsciiCharset) )(Some(parent)) parent.children.append(prim) diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/ast/Primitive.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/ast/Primitive.scala index 0d1e3e249..d8de9e458 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/ast/Primitive.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/ast/Primitive.scala @@ -108,7 +108,6 @@ case class Primitive( * @param itOffset An offset of the field inside the binary data * @param record A record in a binary format represented as a vector of bits */ - @throws(classOf[Exception]) def decodeTypeValue(itOffset: Int, record: Array[Byte]): Any = { val bytesCount = binaryProperties.dataSize val idx = itOffset diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/EncoderSelector.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/EncoderSelector.scala index 072deca58..d1656317d 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/EncoderSelector.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/EncoderSelector.scala @@ -16,8 +16,9 @@ package za.co.absa.cobrix.cobol.parser.decoders -import za.co.absa.cobrix.cobol.parser.ast.datatype.CobolType +import za.co.absa.cobrix.cobol.parser.ast.datatype.{AlphaNumeric, CobolType} import za.co.absa.cobrix.cobol.parser.encoding.codepage.{CodePage, CodePageCommon} +import za.co.absa.cobrix.cobol.parser.encoding.{ASCII, EBCDIC, Encoding} import java.nio.charset.{Charset, StandardCharsets} @@ -27,7 +28,53 @@ object EncoderSelector { def getEncoder(dataType: CobolType, ebcdicCodePage: CodePage = new CodePageCommon, asciiCharset: Charset = StandardCharsets.US_ASCII): Option[Encoder] = { - None + dataType match { + case alphaNumeric: AlphaNumeric if alphaNumeric.compact.isEmpty => + getStringEncoder(alphaNumeric.enc.getOrElse(EBCDIC), ebcdicCodePage, asciiCharset, alphaNumeric.length) + case _ => + None + } + } + + /** Gets a decoder function for a string data type. Encoder is chosen depending on whether input encoding is EBCDIC or ASCII */ + private def getStringEncoder(encoding: Encoding, + ebcdicCodePage: CodePage, + asciiCharset: Charset, + fieldLength: Int + ): Option[Encoder] = { + encoding match { + case EBCDIC => + val encoder = (a: Any) => { + encodeEbcdicString(a.toString, CodePageCommon.asciiToEbcdicMapping, fieldLength) + } + Option(encoder) + case ASCII => + None + case _ => + None + } + } + + /** + * An encoder from a ASCII basic string to an EBCDIC byte array + * + * @param string An input string + * @param conversionTable A conversion table to use to convert from ASCII to EBCDIC + * @param length The length of the output (in bytes) + * @return A string representation of the binary data + */ + def encodeEbcdicString(string: String, conversionTable: Array[Byte], length: Int): Array[Byte] = { + require(length >= 0, s"Field length cannot be negative, got $length") + + var i = 0 + val buf = new Array[Byte](length) + + while (i < string.length && i < length) { + val asciiByte = string(i).toByte + buf(i) = conversionTable((asciiByte + 256) % 256) + i = i + 1 + } + buf } } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageCommon.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageCommon.scala index 1394d1310..54d6f38a4 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageCommon.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageCommon.scala @@ -55,4 +55,28 @@ object CodePageCommon { } ebcdic2ascii } + + /** + * This is the table for converting basic ASCII symbols to EBCDIC common code page + */ + def asciiToEbcdicMapping: Array[Byte] = { + Array[Byte]( + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x0D.toByte, 0x00.toByte, 0x00.toByte, 0x25.toByte, 0x00.toByte, 0x00.toByte, // 0 - 15 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 16 - 31 + 0x40.toByte, 0x5A.toByte, 0x7F.toByte, 0x7B.toByte, 0x5B.toByte, 0x6C.toByte, 0x50.toByte, 0x7D.toByte, 0x4D.toByte, 0x5D.toByte, 0x5C.toByte, 0x4E.toByte, 0x6B.toByte, 0x60.toByte, 0x4B.toByte, 0x61.toByte, // 32 - 47 + 0xF0.toByte, 0xF1.toByte, 0xF2.toByte, 0xF3.toByte, 0xF4.toByte, 0xF5.toByte, 0xF6.toByte, 0xF7.toByte, 0xF8.toByte, 0xF9.toByte, 0x7A.toByte, 0x5E.toByte, 0x4C.toByte, 0x7E.toByte, 0x6E.toByte, 0x6F.toByte, // 48 - 63 + 0x7C.toByte, 0xC1.toByte, 0xC2.toByte, 0xC3.toByte, 0xC4.toByte, 0xC5.toByte, 0xC6.toByte, 0xC7.toByte, 0xC8.toByte, 0xC9.toByte, 0xD1.toByte, 0xD2.toByte, 0xD3.toByte, 0xD4.toByte, 0xD5.toByte, 0xD6.toByte, // 64 - 79 + 0xD7.toByte, 0xD8.toByte, 0xD9.toByte, 0xE2.toByte, 0xE3.toByte, 0xE4.toByte, 0xE5.toByte, 0xE6.toByte, 0xE7.toByte, 0xE8.toByte, 0xE9.toByte, 0xBA.toByte, 0xE0.toByte, 0xBB.toByte, 0xB0.toByte, 0x6D.toByte, // 80 - 95 + 0x79.toByte, 0x81.toByte, 0x82.toByte, 0x83.toByte, 0x84.toByte, 0x85.toByte, 0x86.toByte, 0x87.toByte, 0x88.toByte, 0x89.toByte, 0x91.toByte, 0x92.toByte, 0x93.toByte, 0x94.toByte, 0x95.toByte, 0x96.toByte, // 96 - 111 + 0x97.toByte, 0x98.toByte, 0x99.toByte, 0xA2.toByte, 0xA3.toByte, 0xA4.toByte, 0xA5.toByte, 0xA6.toByte, 0xA7.toByte, 0xA8.toByte, 0xA9.toByte, 0xC0.toByte, 0x6A.toByte, 0xD0.toByte, 0xA1.toByte, 0x00.toByte, // 112 - 127 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 128 - 143 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 144 - 159 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 160 - 175 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 176 - 191 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 192 - 207 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 208 - 223 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 224 - 239 + 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte // 240 - 255 + ) + } } diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/extract/BinaryExtractorSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/extract/BinaryExtractorSpec.scala index ef573297a..51a4cf1a5 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/extract/BinaryExtractorSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/extract/BinaryExtractorSpec.scala @@ -204,4 +204,17 @@ class BinaryExtractorSpec extends AnyFunSuite { } assert(thrown4.getMessage === s"'$notPrimitiveName2' is a GROUP and not a primitive field. Cannot extract it's value.") } + + test("Test set field value by name") { + val fieldName1: String = "COMPANY.SHORT-NAME" + val newValue1: String = "NEWNAME" + copybook.setFieldValueByName(fieldName1, bytes, newValue1, startOffset) + val result1: Any = copybook.getFieldValueByName(fieldName1, bytes, startOffset) + assert(result1.asInstanceOf[String] === "NEWNAME") + + val fieldName2: String = "COMPANY.COMPANY-ID-NUM" + val fields2 = copybook.getFieldByName(fieldName2) + assert(fields2.isInstanceOf[Primitive]) + assert(fields2.asInstanceOf[Primitive].encode.isEmpty) + } } From 2c068b74da9f6b3e872eb4e752e40cd82b41cda5 Mon Sep 17 00:00:00 2001 From: Ruslan Iushchenko Date: Wed, 23 Jul 2025 08:38:36 +0200 Subject: [PATCH 3/7] #769 Move reader parameters parser to the parser module to support non-Spark readers. --- .../cobol/reader}/parameters/CobolParametersParser.scala | 3 +-- .../co/absa/cobrix/cobol/reader}/parameters/Parameters.scala | 2 +- .../co/absa/cobrix/spark/cobol/builder/RddReaderParams.scala | 5 ++--- .../za/co/absa/cobrix/spark/cobol/schema/CobolSchema.scala | 4 ++-- .../za/co/absa/cobrix/spark/cobol/source/DefaultSource.scala | 5 ++--- .../cobol/source/parameters/CobolParametersValidator.scala | 2 +- .../cobrix/spark/cobol/source/streaming/CobolStreamer.scala | 2 +- .../absa/cobrix/spark/cobol/source/DefaultSourceSpec.scala | 2 +- .../cobrix/spark/cobol/source/ParametersParsingSpec.scala | 2 +- 9 files changed, 12 insertions(+), 15 deletions(-) rename {spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol => cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader}/parameters/CobolParametersParser.scala (99%) rename {spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol => cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader}/parameters/Parameters.scala (98%) diff --git a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/parameters/CobolParametersParser.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/parameters/CobolParametersParser.scala similarity index 99% rename from spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/parameters/CobolParametersParser.scala rename to cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/parameters/CobolParametersParser.scala index 91cb0c4e7..51454dd98 100644 --- a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/parameters/CobolParametersParser.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/parameters/CobolParametersParser.scala @@ -14,7 +14,7 @@ * limitations under the License. */ -package za.co.absa.cobrix.spark.cobol.parameters +package za.co.absa.cobrix.cobol.reader.parameters import za.co.absa.cobrix.cobol.internal.Logging import za.co.absa.cobrix.cobol.parser.CopybookParser @@ -26,7 +26,6 @@ import za.co.absa.cobrix.cobol.parser.policies.StringTrimmingPolicy.StringTrimmi import za.co.absa.cobrix.cobol.parser.policies._ import za.co.absa.cobrix.cobol.parser.recordformats.RecordFormat import za.co.absa.cobrix.cobol.parser.recordformats.RecordFormat._ -import za.co.absa.cobrix.cobol.reader.parameters._ import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy.SchemaRetentionPolicy diff --git a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/parameters/Parameters.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/parameters/Parameters.scala similarity index 98% rename from spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/parameters/Parameters.scala rename to cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/parameters/Parameters.scala index 943e251e6..4524470d0 100644 --- a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/parameters/Parameters.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/parameters/Parameters.scala @@ -14,7 +14,7 @@ * limitations under the License. */ -package za.co.absa.cobrix.spark.cobol.parameters +package za.co.absa.cobrix.cobol.reader.parameters import scala.collection.mutable diff --git a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/builder/RddReaderParams.scala b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/builder/RddReaderParams.scala index 4f58a2ae9..8ada19652 100644 --- a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/builder/RddReaderParams.scala +++ b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/builder/RddReaderParams.scala @@ -16,9 +16,8 @@ package za.co.absa.cobrix.spark.cobol.builder -import za.co.absa.cobrix.cobol.reader.parameters.{CobolParameters, ReaderParameters} -import za.co.absa.cobrix.spark.cobol.parameters.CobolParametersParser._ -import za.co.absa.cobrix.spark.cobol.parameters.{CobolParametersParser, Parameters} +import za.co.absa.cobrix.cobol.reader.parameters.{CobolParameters, CobolParametersParser, Parameters, ReaderParameters} +import za.co.absa.cobrix.cobol.reader.parameters.CobolParametersParser._ object RddReaderParams { def forBinary(options: Map[String, String]): ReaderParameters = { diff --git a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/schema/CobolSchema.scala b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/schema/CobolSchema.scala index c67a55071..c781b6419 100644 --- a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/schema/CobolSchema.scala +++ b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/schema/CobolSchema.scala @@ -24,12 +24,12 @@ import za.co.absa.cobrix.cobol.parser.ast.datatype.{AlphaNumeric, COMP1, COMP2, import za.co.absa.cobrix.cobol.parser.common.Constants import za.co.absa.cobrix.cobol.parser.encoding.RAW import za.co.absa.cobrix.cobol.parser.policies.MetadataPolicy +import za.co.absa.cobrix.cobol.reader.parameters.{CobolParametersParser, Parameters} import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy.SchemaRetentionPolicy import za.co.absa.cobrix.cobol.reader.schema.{CobolSchema => CobolReaderSchema} -import za.co.absa.cobrix.spark.cobol.parameters.CobolParametersParser.getReaderProperties +import za.co.absa.cobrix.cobol.reader.parameters.CobolParametersParser.getReaderProperties import za.co.absa.cobrix.spark.cobol.parameters.MetadataFields.{MAX_ELEMENTS, MAX_LENGTH, MIN_ELEMENTS} -import za.co.absa.cobrix.spark.cobol.parameters.{CobolParametersParser, Parameters} import scala.collection.mutable import scala.collection.mutable.ArrayBuffer diff --git a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/DefaultSource.scala b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/DefaultSource.scala index 71c22d61b..dd4ccb3d7 100644 --- a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/DefaultSource.scala +++ b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/DefaultSource.scala @@ -21,9 +21,8 @@ import org.apache.spark.sql.sources.{BaseRelation, DataSourceRegister, RelationP import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{SQLContext, SparkSession} import za.co.absa.cobrix.cobol.internal.Logging -import za.co.absa.cobrix.cobol.reader.parameters.CobolParameters -import za.co.absa.cobrix.spark.cobol.parameters.CobolParametersParser._ -import za.co.absa.cobrix.spark.cobol.parameters.{CobolParametersParser, Parameters} +import za.co.absa.cobrix.cobol.reader.parameters.{CobolParameters, CobolParametersParser, Parameters} +import za.co.absa.cobrix.cobol.reader.parameters.CobolParametersParser._ import za.co.absa.cobrix.spark.cobol.reader._ import za.co.absa.cobrix.spark.cobol.source.copybook.CopybookContentLoader import za.co.absa.cobrix.spark.cobol.source.parameters._ diff --git a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/parameters/CobolParametersValidator.scala b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/parameters/CobolParametersValidator.scala index 8941b9230..dff41b35c 100644 --- a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/parameters/CobolParametersValidator.scala +++ b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/parameters/CobolParametersValidator.scala @@ -22,7 +22,7 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.spark.SparkConf import za.co.absa.cobrix.cobol.reader.parameters.CobolParameters -import za.co.absa.cobrix.spark.cobol.parameters.CobolParametersParser._ +import za.co.absa.cobrix.cobol.reader.parameters.CobolParametersParser._ import za.co.absa.cobrix.spark.cobol.utils.ResourceUtils.getClass import za.co.absa.cobrix.spark.cobol.utils.{FileNameUtils, FsType} diff --git a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/streaming/CobolStreamer.scala b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/streaming/CobolStreamer.scala index 6c9414fbd..856329ee7 100644 --- a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/streaming/CobolStreamer.scala +++ b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/streaming/CobolStreamer.scala @@ -25,7 +25,7 @@ import za.co.absa.cobrix.cobol.parser.encoding.codepage.CodePage import za.co.absa.cobrix.cobol.parser.policies.{FillerNamingPolicy, StringTrimmingPolicy} import za.co.absa.cobrix.cobol.reader.parameters.ReaderParameters import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy -import za.co.absa.cobrix.spark.cobol.parameters.CobolParametersParser._ +import za.co.absa.cobrix.cobol.reader.parameters.CobolParametersParser._ import za.co.absa.cobrix.spark.cobol.reader.{FixedLenNestedReader, FixedLenReader} import za.co.absa.cobrix.spark.cobol.source.parameters.CobolParametersValidator import za.co.absa.cobrix.spark.cobol.utils.HDFSUtils diff --git a/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/DefaultSourceSpec.scala b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/DefaultSourceSpec.scala index c1c416680..096ad3820 100644 --- a/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/DefaultSourceSpec.scala +++ b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/DefaultSourceSpec.scala @@ -26,7 +26,7 @@ import za.co.absa.cobrix.spark.cobol.source.utils.SourceTestUtils class DefaultSourceSpec extends SparkCobolTestBase { import SourceTestUtils._ - import za.co.absa.cobrix.spark.cobol.parameters.CobolParametersParser._ + import za.co.absa.cobrix.cobol.reader.parameters.CobolParametersParser._ private var defaultSource: DefaultSource = _ diff --git a/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/ParametersParsingSpec.scala b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/ParametersParsingSpec.scala index 7fbc3049c..5534f5d2f 100644 --- a/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/ParametersParsingSpec.scala +++ b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/ParametersParsingSpec.scala @@ -17,7 +17,7 @@ package za.co.absa.cobrix.spark.cobol.source import org.scalatest.funsuite.AnyFunSuite -import za.co.absa.cobrix.spark.cobol.parameters.{CobolParametersParser, Parameters} +import za.co.absa.cobrix.cobol.reader.parameters.{CobolParametersParser, Parameters} import scala.collection.immutable.HashMap From a0f4b2ae4495e021787817e2acb740b80daedf7c Mon Sep 17 00:00:00 2001 From: Ruslan Iushchenko Date: Thu, 24 Jul 2025 09:00:32 +0200 Subject: [PATCH 4/7] #769 Add library methods to implement ebcdic to ebcdic file processor. --- .../cobol/processor/ArrayOfAnyHandler.scala | 33 +++++ .../cobol/processor/MapOfAnyHandler.scala | 40 ++++++ .../cobol/processor/RawRecordProcessor.scala | 31 +++++ .../processor/RecordProcessorBuilder.scala | 114 ++++++++++++++++++ .../cobol/processor/StreamProcessor.scala | 69 +++++++++++ .../cobrix/cobol/reader/stream/FSStream.scala | 8 +- .../cobol/reader/stream/SimpleStream.scala | 3 + .../cobrix/cobol/mock/ByteStreamMock.scala | 8 +- .../reader/memorystream/TestByteStream.scala | 5 + .../memorystream/TestStringStream.scala | 4 + .../cobol/source/streaming/FileStreamer.scala | 6 +- 11 files changed, 317 insertions(+), 4 deletions(-) create mode 100644 cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/ArrayOfAnyHandler.scala create mode 100644 cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/MapOfAnyHandler.scala create mode 100644 cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/RawRecordProcessor.scala create mode 100644 cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/RecordProcessorBuilder.scala create mode 100644 cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/StreamProcessor.scala diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/ArrayOfAnyHandler.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/ArrayOfAnyHandler.scala new file mode 100644 index 000000000..ffa305879 --- /dev/null +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/ArrayOfAnyHandler.scala @@ -0,0 +1,33 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.cobrix.cobol.processor + +import za.co.absa.cobrix.cobol.parser.ast.Group +import za.co.absa.cobrix.cobol.reader.extractors.record.RecordHandler + +/** + * A handler for processing COBOL records and mapping it to JVM data structures. + * + * This implementation uses an array to group data fields of struct fields. + */ +class ArrayOfAnyHandler extends RecordHandler[scala.Array[Any]] { + override def create(values: Array[Any], group: Group): Array[Any] = values + + override def toSeq(record: Array[Any]): Seq[Any] = record.toSeq + + override def foreach(record: Array[Any])(f: Any => Unit): Unit = record.foreach(f) +} diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/MapOfAnyHandler.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/MapOfAnyHandler.scala new file mode 100644 index 000000000..52ec1178b --- /dev/null +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/MapOfAnyHandler.scala @@ -0,0 +1,40 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.cobrix.cobol.processor + +import za.co.absa.cobrix.cobol.parser.ast.Group +import za.co.absa.cobrix.cobol.reader.extractors.record.RecordHandler + +/** + * A handler for processing COBOL records and mapping it to JVM data structures. + * + * This implementation uses a map from a string field name to value to represent struct fields from data records. + */ +class MapOfAnyHandler extends RecordHandler[Map[String, Any]] { + override def create(values: Array[Any], group: Group): Map[String, Any] = { + (group.children zip values).map(t => t._1.name -> (t._2 match { + case s: Array[Any] => s.toSeq + case s => s + })).toMap + } + + override def toSeq(record: Map[String, Any]): Seq[Any] = { + record.values.toSeq + } + + override def foreach(record: Map[String, Any])(f: Any => Unit): Unit = record.values.foreach(f) +} diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/RawRecordProcessor.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/RawRecordProcessor.scala new file mode 100644 index 000000000..e56f5b68a --- /dev/null +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/RawRecordProcessor.scala @@ -0,0 +1,31 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.cobrix.cobol.processor + +import za.co.absa.cobrix.cobol.parser.Copybook + +/** + * A trait that defines a processor for raw COBOL records. + * It provides a method to process a single COBOL record based on the provided copybook and options. + */ +trait RawRecordProcessor { + def processRecord(copybook: Copybook, + options: Map[String, String], + record: Array[Byte], + offset: Long): Array[Byte] + +} diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/RecordProcessorBuilder.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/RecordProcessorBuilder.scala new file mode 100644 index 000000000..5dc9d1622 --- /dev/null +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/RecordProcessorBuilder.scala @@ -0,0 +1,114 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.cobrix.cobol.processor + +import za.co.absa.cobrix.cobol.reader.VarLenNestedReader +import za.co.absa.cobrix.cobol.reader.extractors.raw.RawRecordExtractor +import za.co.absa.cobrix.cobol.reader.parameters.{CobolParametersParser, Parameters, ReaderParameters} +import za.co.absa.cobrix.cobol.reader.schema.CobolSchema +import za.co.absa.cobrix.cobol.reader.stream.SimpleStream + +import java.io.OutputStream +import scala.collection.mutable +import scala.reflect.ClassTag + +object RecordProcessorBuilder { + /** + * Creates a new instance of the RecordProcessorBuilder with the given copybook content. + * The instabce is used to create an COBOL data processor allowing applying changes to a mainrame file + * without changing the original format. + * + * @param copybookContent the COBOL copybook content as a string. + * @return a new RecordProcessorBuilder instance. + */ + def copybookContents(copybookContent: String): RecordProcessorBuilder = { + new RecordProcessorBuilder(copybookContent) + } +} + +class RecordProcessorBuilder(copybookContents: String) { + private val caseInsensitiveOptions = new mutable.HashMap[String, String]() + + /** + * Adds a single option to the builder. + * + * @param key the option key. + * @param value the option value. + * @return this builder instance for method chaining. + */ + def option(key: String, value: String): RecordProcessorBuilder = { + caseInsensitiveOptions += (key.toLowerCase -> value) + this + } + + /** + * Adds multiple options to the builder. + * + * @param options a map of option key-value pairs. + * @return this builder instance for method chaining. + */ + def options(options: Map[String, String]): RecordProcessorBuilder = { + caseInsensitiveOptions ++= options.map(kv => (kv._1.toLowerCase(), kv._2)) + this + } + + /** + * Processes the input stream of COBOL records and writes the output to the specified output stream. + * + * @param inputStream the input stream containing raw COBOL records. + * @param outputStream the output stream where processed records will be written. + * @param rawRecordProcessor the processor that processes each raw record. + */ + def process(inputStream: SimpleStream, + outputStream: OutputStream) + (rawRecordProcessor: RawRecordProcessor): Unit = { + val readerParameters = getReaderParameters + val cobolSchema = getCobolSchema(readerParameters) + val recordExtractor = getRecordExtractor(readerParameters, inputStream) + + val dataStream = inputStream.copyStream() + try { + StreamProcessor.processStream(cobolSchema.copybook, + caseInsensitiveOptions.toMap, + dataStream, + recordExtractor, + rawRecordProcessor, + outputStream) + } finally { + dataStream.close() + } + } + + private def getCobolSchema(readerParameters: ReaderParameters): CobolSchema = { + CobolSchema.fromReaderParameters(Seq(copybookContents), readerParameters) + } + + private def getReaderParameters: ReaderParameters = { + val cobolParameters = CobolParametersParser.parse(new Parameters(caseInsensitiveOptions.toMap)) + + CobolParametersParser.getReaderProperties(cobolParameters, None) + } + + private def getRecordExtractor[T: ClassTag](readerParameters: ReaderParameters, inputStream: SimpleStream): RawRecordExtractor = { + val dataStream = inputStream.copyStream() + val headerStream = inputStream.copyStream() + + val reader = new VarLenNestedReader[Array[Any]](Seq(copybookContents), readerParameters, new ArrayOfAnyHandler) + + reader.recordExtractor(0, dataStream, headerStream).get + } +} diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/StreamProcessor.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/StreamProcessor.scala new file mode 100644 index 000000000..4155972ae --- /dev/null +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/StreamProcessor.scala @@ -0,0 +1,69 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.cobrix.cobol.processor + +import za.co.absa.cobrix.cobol.parser.Copybook +import za.co.absa.cobrix.cobol.reader.extractors.raw.RawRecordExtractor +import za.co.absa.cobrix.cobol.reader.stream.SimpleStream + +import java.io.OutputStream + +object StreamProcessor { + /** + * Processes a stream of COBOL raw records and writes it back in the same format as the input data. + * + * @param copybook the COBOL copybook that describes the schema of the records. + * @param options arbitrary options used for splitting input data into records. Same as options to 'spark-cobol'. Can contain custom options as well. + * @param inputStream the input stream containing the raw COBOL records. + * @param recordExtractor the extractor that extracts raw records from the input stream. + * @param recordProcessor the per-record processing logic implementation. + * @param outputStream the output stream where the processed records will be written. + */ + def processStream(copybook: Copybook, + options: Map[String, String], + inputStream: SimpleStream, + recordExtractor: RawRecordExtractor, + recordProcessor: RawRecordProcessor, + outputStream: OutputStream): Unit = { + var i = 0 + while (recordExtractor.hasNext) { + i += 1 + val record = recordExtractor.next() + val recordSize = record.length + + val updatedRecord = if (recordExtractor.hasNext) { + recordProcessor.processRecord(copybook, options, record, recordExtractor.offset) + } else { + record + } + + val headerSize = recordExtractor.offset - recordSize - inputStream.offset + if (headerSize > 0) { + val header = inputStream.next(headerSize.toInt) + outputStream.write(header) + } + inputStream.next(recordSize) + outputStream.write(updatedRecord) + } + + val footerSize = inputStream.size - inputStream.offset + if (footerSize > 0) { + val footer = inputStream.next(footerSize.toInt) + outputStream.write(footer) + } + } +} diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/stream/FSStream.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/stream/FSStream.scala index 8767ac62b..46f814173 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/stream/FSStream.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/stream/FSStream.scala @@ -16,7 +16,7 @@ package za.co.absa.cobrix.cobol.reader.stream -import java.io.{BufferedInputStream, File, FileInputStream, IOException} +import java.io.{BufferedInputStream, File, FileInputStream, FileNotFoundException, IOException} class FSStream (fileName: String) extends SimpleStream { val bytesStream = new BufferedInputStream(new FileInputStream(fileName)) @@ -33,7 +33,6 @@ class FSStream (fileName: String) extends SimpleStream { override def inputFileName: String = fileName - @throws(classOf[IllegalArgumentException]) @throws(classOf[IOException]) override def next(numberOfBytes: Int): Array[Byte] = { if (numberOfBytes <= 0) throw new IllegalArgumentException("Value of numberOfBytes should be greater than zero.") @@ -55,4 +54,9 @@ class FSStream (fileName: String) extends SimpleStream { isClosed = true } } + + @throws(classOf[FileNotFoundException]) + override def copyStream(): SimpleStream = { + new FSStream(fileName) + } } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/stream/SimpleStream.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/stream/SimpleStream.scala index b4dbcd247..561116121 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/stream/SimpleStream.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/stream/SimpleStream.scala @@ -29,6 +29,9 @@ trait SimpleStream { def isEndOfStream: Boolean = offset >= size + @throws(classOf[Exception]) + def copyStream(): SimpleStream + @throws(classOf[Exception]) def next(numberOfBytes: Int): Array[Byte] @throws(classOf[Exception]) def close(): Unit diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/mock/ByteStreamMock.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/mock/ByteStreamMock.scala index 4f0b9c889..70f5dae6c 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/mock/ByteStreamMock.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/mock/ByteStreamMock.scala @@ -16,7 +16,9 @@ package za.co.absa.cobrix.cobol.mock -import za.co.absa.cobrix.cobol.reader.stream.SimpleStream +import za.co.absa.cobrix.cobol.reader.stream.{FSStream, SimpleStream} + +import java.io.FileNotFoundException class ByteStreamMock(bytes: Array[Byte]) extends SimpleStream{ @@ -48,4 +50,8 @@ class ByteStreamMock(bytes: Array[Byte]) extends SimpleStream{ } override def close(): Unit = position = sz + + override def copyStream(): SimpleStream = { + new ByteStreamMock(bytes) + } } diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/memorystream/TestByteStream.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/memorystream/TestByteStream.scala index 034e8cae4..3de8bc3ec 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/memorystream/TestByteStream.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/memorystream/TestByteStream.scala @@ -16,6 +16,7 @@ package za.co.absa.cobrix.cobol.reader.memorystream +import za.co.absa.cobrix.cobol.mock.ByteStreamMock import za.co.absa.cobrix.cobol.reader.stream.SimpleStream class TestByteStream(bytes: Array[Byte]) extends SimpleStream{ @@ -48,4 +49,8 @@ class TestByteStream(bytes: Array[Byte]) extends SimpleStream{ } override def close(): Unit = position = sz + + override def copyStream(): SimpleStream = { + new TestByteStream(bytes) + } } diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/memorystream/TestStringStream.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/memorystream/TestStringStream.scala index 2b0935498..eaedebcbd 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/memorystream/TestStringStream.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/memorystream/TestStringStream.scala @@ -48,4 +48,8 @@ class TestStringStream(str: String) extends SimpleStream{ } override def close(): Unit = position = sz + + override def copyStream(): SimpleStream = { + new TestStringStream(str) + } } diff --git a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/streaming/FileStreamer.scala b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/streaming/FileStreamer.scala index c84effb3b..5a1dbd8a6 100644 --- a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/streaming/FileStreamer.scala +++ b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/streaming/FileStreamer.scala @@ -111,6 +111,10 @@ class FileStreamer(filePath: String, fileSystem: FileSystem, startOffset: Long = } } + override def copyStream(): SimpleStream = { + new FileStreamer(filePath, fileSystem, startOffset, maximumBytes) + } + /** * Gets a Hadoop [[Path]] (HDFS, S3, DBFS, etc) to the file. * @@ -128,4 +132,4 @@ class FileStreamer(filePath: String, fileSystem: FileSystem, startOffset: Long = val cSummary: ContentSummary = fileSystem.getContentSummary(hadoopPath) cSummary.getLength } -} \ No newline at end of file +} From 2cad3deda83935aa31f15a95b1157004821477e3 Mon Sep 17 00:00:00 2001 From: Ruslan Iushchenko Date: Mon, 28 Jul 2025 11:53:17 +0200 Subject: [PATCH 5/7] #769 Fix the processor not processing the last record, add unit tests. --- .../processor/RecordProcessorBuilder.scala | 24 +++- .../cobol/processor/StreamProcessor.scala | 6 +- .../FixedRecordLengthRawRecordExtractor.scala | 56 +++++++++ .../RecordProcessorBuilderSuite.scala | 118 ++++++++++++++++++ 4 files changed, 194 insertions(+), 10 deletions(-) create mode 100644 cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/extractors/raw/FixedRecordLengthRawRecordExtractor.scala create mode 100644 cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/processor/RecordProcessorBuilderSuite.scala diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/RecordProcessorBuilder.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/RecordProcessorBuilder.scala index 5dc9d1622..783cfacd1 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/RecordProcessorBuilder.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/RecordProcessorBuilder.scala @@ -16,8 +16,9 @@ package za.co.absa.cobrix.cobol.processor +import za.co.absa.cobrix.cobol.parser.recordformats.RecordFormat.FixedLength import za.co.absa.cobrix.cobol.reader.VarLenNestedReader -import za.co.absa.cobrix.cobol.reader.extractors.raw.RawRecordExtractor +import za.co.absa.cobrix.cobol.reader.extractors.raw.{FixedRecordLengthRawRecordExtractor, RawRecordContext, RawRecordExtractor} import za.co.absa.cobrix.cobol.reader.parameters.{CobolParametersParser, Parameters, ReaderParameters} import za.co.absa.cobrix.cobol.reader.schema.CobolSchema import za.co.absa.cobrix.cobol.reader.stream.SimpleStream @@ -93,22 +94,35 @@ class RecordProcessorBuilder(copybookContents: String) { } } - private def getCobolSchema(readerParameters: ReaderParameters): CobolSchema = { + private[processor] def getCobolSchema(readerParameters: ReaderParameters): CobolSchema = { CobolSchema.fromReaderParameters(Seq(copybookContents), readerParameters) } - private def getReaderParameters: ReaderParameters = { + private[processor] def getReaderParameters: ReaderParameters = { val cobolParameters = CobolParametersParser.parse(new Parameters(caseInsensitiveOptions.toMap)) CobolParametersParser.getReaderProperties(cobolParameters, None) } - private def getRecordExtractor[T: ClassTag](readerParameters: ReaderParameters, inputStream: SimpleStream): RawRecordExtractor = { + private[processor] def getRecordExtractor[T: ClassTag](readerParameters: ReaderParameters, inputStream: SimpleStream): RawRecordExtractor = { val dataStream = inputStream.copyStream() val headerStream = inputStream.copyStream() val reader = new VarLenNestedReader[Array[Any]](Seq(copybookContents), readerParameters, new ArrayOfAnyHandler) - reader.recordExtractor(0, dataStream, headerStream).get + reader.recordExtractor(0, dataStream, headerStream) match { + case Some(extractor) => extractor + case None if readerParameters.recordFormat == FixedLength => + val dataStream = inputStream.copyStream() + val headerStream = inputStream.copyStream() + val ctx = RawRecordContext(0, dataStream, headerStream, getCobolSchema(readerParameters).copybook, null, null, "") + new FixedRecordLengthRawRecordExtractor(ctx, readerParameters.recordLength) + case None => + throw new IllegalArgumentException(s"Cannot create a record extractor for the given reader parameters. " + + "Please check the copybook and the reader parameters." + ) + } } + + private[processor] def getOptions: Map[String, String] = caseInsensitiveOptions.toMap } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/StreamProcessor.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/StreamProcessor.scala index 4155972ae..2dcdf3b57 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/StreamProcessor.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/StreamProcessor.scala @@ -45,11 +45,7 @@ object StreamProcessor { val record = recordExtractor.next() val recordSize = record.length - val updatedRecord = if (recordExtractor.hasNext) { - recordProcessor.processRecord(copybook, options, record, recordExtractor.offset) - } else { - record - } + val updatedRecord = recordProcessor.processRecord(copybook, options, record, recordExtractor.offset) val headerSize = recordExtractor.offset - recordSize - inputStream.offset if (headerSize > 0) { diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/extractors/raw/FixedRecordLengthRawRecordExtractor.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/extractors/raw/FixedRecordLengthRawRecordExtractor.scala new file mode 100644 index 000000000..2c0e79458 --- /dev/null +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/extractors/raw/FixedRecordLengthRawRecordExtractor.scala @@ -0,0 +1,56 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.cobrix.cobol.reader.extractors.raw + +class FixedRecordLengthRawRecordExtractor(ctx: RawRecordContext, fixedRecordLength: Option[Int]) extends Serializable with RawRecordExtractor { + private var byteOffset: Long = ctx.inputStream.offset + private val recordSize = fixedRecordLength.getOrElse(ctx.copybook.getRecordSize) + private var currentRecordOpt: Option[Array[Byte]] = None + + ctx.headerStream.close() + + override def offset: Long = byteOffset + + override def hasNext: Boolean = { + if (currentRecordOpt.isEmpty) { + readNextRecord() + } + currentRecordOpt.nonEmpty + } + + private def readNextRecord(): Unit = { + if (!ctx.inputStream.isEndOfStream) { + val nextRecord = ctx.inputStream.next(recordSize) + + if (nextRecord.length > 0) { + currentRecordOpt = Some(nextRecord) + } + } + } + + + @throws[NoSuchElementException] + override def next(): Array[Byte] = { + if (!hasNext) { + throw new NoSuchElementException + } + val record = currentRecordOpt.get + byteOffset += record.length + currentRecordOpt = None + record + } +} diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/processor/RecordProcessorBuilderSuite.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/processor/RecordProcessorBuilderSuite.scala new file mode 100644 index 000000000..ef8dc3a15 --- /dev/null +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/processor/RecordProcessorBuilderSuite.scala @@ -0,0 +1,118 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.cobrix.cobol.processor + +import org.scalatest.wordspec.AnyWordSpec +import za.co.absa.cobrix.cobol.mock.ByteStreamMock +import za.co.absa.cobrix.cobol.parser.Copybook +import za.co.absa.cobrix.cobol.parser.recordformats.RecordFormat +import za.co.absa.cobrix.cobol.reader.extractors.raw.{FixedRecordLengthRawRecordExtractor, TextFullRecordExtractor} +import za.co.absa.cobrix.cobol.reader.parameters.ReaderParameters + +import java.io.ByteArrayOutputStream + +class RecordProcessorBuilderSuite extends AnyWordSpec { + private val copybook = + """ 01 RECORD. + | 05 T PIC X. + |""".stripMargin + "process" should { + "process an input data stream into an output stream" in { + val is = new ByteStreamMock(Array(0xF1, 0xF2, 0xF3, 0xF4).map(_.toByte)) + val os = new ByteArrayOutputStream(10) + val builder = RecordProcessorBuilder.copybookContents(copybook) + + val processor = new RawRecordProcessor { + override def processRecord(copybook: Copybook, options: Map[String, String], record: Array[Byte], offset: Long): Array[Byte] = { + record.map(v => (v - 1).toByte) + } + } + + builder.process(is, os)(processor) + + val outputArray = os.toByteArray + + assert(outputArray.head == -16) + assert(outputArray(1) == -15) + assert(outputArray(2) == -14) + assert(outputArray(3) == -13) + } + } + + "getCobolSchema" should { + "return the schema of the copybook provided" in { + val builder = RecordProcessorBuilder.copybookContents(copybook) + + val cobolSchema = builder.getCobolSchema(ReaderParameters()) + + assert(cobolSchema.copybook.ast.children.length == 1) + } + } + + "getReaderParameters" should { + "return a reader according to passed options" in { + val builder = RecordProcessorBuilder.copybookContents(copybook) + .option("record_format", "D") + + assert(builder.getReaderParameters.recordFormat == RecordFormat.AsciiText) + assert(builder.getOptions.contains("record_format")) + } + } + + "getRecordExtractor" should { + "work for an fixed-record-length files" in { + val stream = new ByteStreamMock(Array(0xF1, 0xF2, 0xF3, 0xF4).map(_.toByte)) + val builder = RecordProcessorBuilder.copybookContents(copybook) + + val ext = builder.getRecordExtractor(ReaderParameters(recordLength = Some(2)), stream) + + assert(ext.isInstanceOf[FixedRecordLengthRawRecordExtractor]) + + assert(ext.hasNext) + assert(ext.next().sameElements(Array(0xF1, 0xF2).map(_.toByte))) + assert(ext.next().sameElements(Array(0xF3, 0xF4).map(_.toByte))) + assert(!ext.hasNext) + } + + "work for an variable-record-length files" in { + val stream = new ByteStreamMock(Array(0xF1, 0xF2, 0xF3, 0xF4).map(_.toByte)) + val builder = RecordProcessorBuilder.copybookContents(copybook) + + val ext = builder.getRecordExtractor(ReaderParameters( + recordFormat = RecordFormat.VariableLength, + isText = true + ), stream) + + assert(ext.isInstanceOf[TextFullRecordExtractor]) + } + + "throw an exception on a non-supported record format for processing" in { + val stream = new ByteStreamMock(Array(0xF1, 0xF2, 0xF3, 0xF4).map(_.toByte)) + val builder = RecordProcessorBuilder.copybookContents(copybook) + + val ex = intercept[IllegalArgumentException] { + builder.getRecordExtractor(ReaderParameters( + recordFormat = RecordFormat.VariableLength, + isRecordSequence = true + ), stream) + } + + assert(ex.getMessage.contains("Cannot create a record extractor for the given reader parameters.")) + } + } + +} From d99038f797b18dff8ea65c8a52d44b2995dea574 Mon Sep 17 00:00:00 2001 From: Ruslan Iushchenko Date: Wed, 30 Jul 2025 10:36:36 +0200 Subject: [PATCH 6/7] #769 Remove redundant import. --- .../absa/cobrix/cobol/reader/memorystream/TestByteStream.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/memorystream/TestByteStream.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/memorystream/TestByteStream.scala index 3de8bc3ec..5353756cb 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/memorystream/TestByteStream.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/memorystream/TestByteStream.scala @@ -16,7 +16,6 @@ package za.co.absa.cobrix.cobol.reader.memorystream -import za.co.absa.cobrix.cobol.mock.ByteStreamMock import za.co.absa.cobrix.cobol.reader.stream.SimpleStream class TestByteStream(bytes: Array[Byte]) extends SimpleStream{ From bfe7d70336d6ed4f950948901105eaca430e9419 Mon Sep 17 00:00:00 2001 From: Ruslan Iushchenko Date: Thu, 31 Jul 2025 11:19:02 +0200 Subject: [PATCH 7/7] #769 Fix PR nitpick suggestions from CoPilot PR review. --- .../co/absa/cobrix/cobol/parser/Copybook.scala | 18 ++++++++++-------- .../processor/RecordProcessorBuilder.scala | 2 +- .../cobrix/cobol/mock/ByteStreamMock.scala | 4 +--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/Copybook.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/Copybook.scala index 664507912..f8d9781cd 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/Copybook.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/Copybook.scala @@ -86,26 +86,28 @@ class Copybook(val ast: CopybookAST) extends Logging with Serializable { val ast = getFieldByName(fieldName) ast match { case s: Primitive => extractPrimitiveField(s, recordBytes, startOffset) - case _ => throw new IllegalStateException(s"$fieldName is not a primitive field, cannot extract it's value.") + case _ => throw new IllegalStateException(s"$fieldName is not a primitive field, cannot extract its value.") } } /** - * Set value of a field of the copybook record by name + * Sets the value of a copybook record field specified by name. * * Nested field names can contain '.' to identify the exact field. - * If the field name is unique '.' is not required. + * If the field name is unique, '.' is not required. + * + * This method modifies the record in place and does not return a value. * * @param fieldName A field name * @param recordBytes Binary encoded data of the record - * @param startOffset An offset where the record starts in the data (in bytes). - * @return The value of the field + * @param value The value to set + * @param startOffset An offset where the record starts in the data (in bytes) */ - def setFieldValueByName(fieldName: String, recordBytes: Array[Byte], value: Any, startOffset: Int = 0): Any = { + def setFieldValueByName(fieldName: String, recordBytes: Array[Byte], value: Any, startOffset: Int = 0): Unit = { val ast = getFieldByName(fieldName) ast match { case s: Primitive => setPrimitiveField(s, recordBytes, value, startOffset) - case _ => throw new IllegalStateException(s"$fieldName is not a primitive field, cannot set it's value.") + case _ => throw new IllegalStateException(s"$fieldName is not a primitive field, cannot set its value.") } } @@ -113,7 +115,7 @@ class Copybook(val ast: CopybookAST) extends Logging with Serializable { * Get the AST object of a field by name. * * Nested field names can contain '.' to identify the exact field. - * If the field name is unique '.' is not required. + * If the field name is unique, '.' is not required. * * @param fieldName A field name * @return An AST object of the field. Throws an IllegalStateException if not found of found multiple. diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/RecordProcessorBuilder.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/RecordProcessorBuilder.scala index 783cfacd1..e92ee0c98 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/RecordProcessorBuilder.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/RecordProcessorBuilder.scala @@ -104,7 +104,7 @@ class RecordProcessorBuilder(copybookContents: String) { CobolParametersParser.getReaderProperties(cobolParameters, None) } - private[processor] def getRecordExtractor[T: ClassTag](readerParameters: ReaderParameters, inputStream: SimpleStream): RawRecordExtractor = { + private[processor] def getRecordExtractor(readerParameters: ReaderParameters, inputStream: SimpleStream): RawRecordExtractor = { val dataStream = inputStream.copyStream() val headerStream = inputStream.copyStream() diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/mock/ByteStreamMock.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/mock/ByteStreamMock.scala index 70f5dae6c..e7533dccd 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/mock/ByteStreamMock.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/mock/ByteStreamMock.scala @@ -16,9 +16,7 @@ package za.co.absa.cobrix.cobol.mock -import za.co.absa.cobrix.cobol.reader.stream.{FSStream, SimpleStream} - -import java.io.FileNotFoundException +import za.co.absa.cobrix.cobol.reader.stream.SimpleStream class ByteStreamMock(bytes: Array[Byte]) extends SimpleStream{