AbsaOSS · yruslan · Jul 31, 2025 · Jul 21, 2025 · Jul 22, 2025 · Jul 23, 2025
@@ -72,16 +72,55 @@ class Copybook(val ast: CopybookAST) extends Logging with Serializable {
   }
 
   /**
-    * Get the AST object of a field by name.
+    * Get value of a field of the copybook record by name
     *
     * Nested field names can contain '.' to identify the exact field.
     * If the field name is unique '.' is not required.
     *
+    * @param fieldName   A field name
+    * @param recordBytes Binary encoded data of the record
+    * @param startOffset An offset where the record starts in the data (in bytes).
+    * @return The value of the field
+    */
+  def getFieldValueByName(fieldName: String, recordBytes: Array[Byte], startOffset: Int = 0): Any = {
+    val ast = getFieldByName(fieldName)
+    ast match {
+      case s: Primitive => extractPrimitiveField(s, recordBytes, startOffset)
+      case _ => throw new IllegalStateException(s"$fieldName is not a primitive field, cannot extract its value.")
+    }
+  }
+
+  /**
+    * Sets the value of a copybook record field specified by name.
+    *
+    * Nested field names can contain '.' to identify the exact field.
+    * If the field name is unique, '.' is not required.
+    *
+    * This method modifies the record in place and does not return a value.
+    *
+    * @param fieldName   A field name
+    * @param recordBytes Binary encoded data of the record
+    * @param value       The value to set
+    * @param startOffset An offset where the record starts in the data (in bytes)
+    */
+  def setFieldValueByName(fieldName: String, recordBytes: Array[Byte], value: Any, startOffset: Int = 0): Unit = {
+    val ast = getFieldByName(fieldName)
+    ast match {
+      case s: Primitive => setPrimitiveField(s, recordBytes, value, startOffset)
+      case _ => throw new IllegalStateException(s"$fieldName is not a primitive field, cannot set its value.")
+    }
+  }
+
+  /**
+    * Get the AST object of a field by name.
+    *
+    * Nested field names can contain '.' to identify the exact field.
+    * If the field name is unique, '.' is not required.
+    *
     * @param fieldName A field name
     * @return An AST object of the field. Throws an IllegalStateException if not found of found multiple.
     *
     */
-  @throws(classOf[IllegalArgumentException])
   def getFieldByName(fieldName: String): Statement = {
 
     def getFieldByNameInGroup(group: Group, fieldName: String): Seq[Statement] = {
@@ -171,31 +210,40 @@ class Copybook(val ast: CopybookAST) extends Logging with Serializable {
     * @return The value of the field
     *
     */
-  @throws(classOf[Exception])
   def extractPrimitiveField(field: Primitive, bytes: Array[Byte], startOffset: Int = 0): Any = {
     val slicedBytes = bytes.slice(field.binaryProperties.offset + startOffset, field.binaryProperties.offset + startOffset + field.binaryProperties.actualSize)
     field.decodeTypeValue(0, slicedBytes)
   }
 
   /**
-    * Get value of a field of the copybook record by name
+    * Set value of a field of the copybook record by the AST object of the field
     *
     * Nested field names can contain '.' to identify the exact field.
     * If the field name is unique '.' is not required.
     *
-    * @param fieldName A field name
+    * @param field The AST object of the field
     * @param bytes Binary encoded data of the record
-    * @param startOffset An offset where the record starts in the data (in bytes).
+    * @param startOffset An offset to the beginning of the field in the data (in bytes).
     * @return The value of the field
     *
     */
-  @throws(classOf[IllegalStateException])
-  @throws(classOf[Exception])
-  def getFieldValueByName(fieldName: String, bytes: Array[Byte], startOffset: Int = 0): Any = {
-    val ast = getFieldByName(fieldName)
-    ast match {
-      case s: Primitive => extractPrimitiveField(s, bytes, startOffset)
-      case _ => throw new IllegalStateException(s"$fieldName is not a primitive field, cannot extract it's value.")
+  def setPrimitiveField(field: Primitive, recordBytes: Array[Byte], value: Any, startOffset: Int = 0): Unit = {
+    field.encode match {
+      case Some(encode) =>
+        val fieldBytes = encode(value)
+        val startByte = field.binaryProperties.offset + startOffset
+        val endByte = field.binaryProperties.offset + startOffset + field.binaryProperties.actualSize
+
+        if (startByte < 0 || endByte > recordBytes.length) {
+          throw new IllegalArgumentException(s"Cannot set value for field '${field.name}' because the field is out of bounds of the record.")
+        }
+        if (fieldBytes.length != field.binaryProperties.dataSize) {
+          throw new IllegalArgumentException(s"Cannot set value for field '${field.name}' because the encoded value has a different size than the field size.")
+        }
+
+        System.arraycopy(fieldBytes, 0, recordBytes, startByte, fieldBytes.length)
+      case None =>
+        throw new IllegalStateException(s"Cannot set value for field '${field.name}' because it does not have an encoder defined.")
     }
   }
 

@@ -23,7 +23,7 @@ import za.co.absa.cobrix.cobol.parser.CopybookParser.CopybookAST
 import za.co.absa.cobrix.cobol.parser.ast.datatype._
 import za.co.absa.cobrix.cobol.parser.ast.{Group, Primitive}
 import za.co.absa.cobrix.cobol.parser.common.Constants
-import za.co.absa.cobrix.cobol.parser.decoders.DecoderSelector
+import za.co.absa.cobrix.cobol.parser.decoders.{DecoderSelector, EncoderSelector}
 import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat.FloatingPointFormat
 import za.co.absa.cobrix.cobol.parser.encoding.codepage.CodePage
 import za.co.absa.cobrix.cobol.parser.encoding._
@@ -855,8 +855,9 @@ class ParserVisitor(enc: Encoding,
       Map(),
       isDependee = false,
       identifier.toUpperCase() == Constants.FILLER,
-      DecoderSelector.getDecoder(pic.value, stringTrimmingPolicy, isDisplayAlwaysString, effectiveEbcdicCodePage, effectiveAsciiCharset, isUtf16BigEndian = isUtf16BigEndian, floatingPointFormat, strictSignOverpunch = strictSignOverpunch, improvedNullDetection = improvedNullDetection, strictIntegralPrecision = strictIntegralPrecision)
-      ) (Some(parent))
+      DecoderSelector.getDecoder(pic.value, stringTrimmingPolicy, isDisplayAlwaysString, effectiveEbcdicCodePage, effectiveAsciiCharset, isUtf16BigEndian = isUtf16BigEndian, floatingPointFormat, strictSignOverpunch = strictSignOverpunch, improvedNullDetection = improvedNullDetection, strictIntegralPrecision = strictIntegralPrecision),
+      EncoderSelector.getEncoder(pic.value, effectiveEbcdicCodePage, effectiveAsciiCharset)
+    )(Some(parent))
 
     parent.children.append(prim)
 

@@ -17,19 +17,26 @@
 package za.co.absa.cobrix.cobol.parser.ast
 
 import za.co.absa.cobrix.cobol.parser.ast.datatype.{AlphaNumeric, CobolType, Decimal, Integral}
-import za.co.absa.cobrix.cobol.parser.decoders.{BinaryUtils, DecoderSelector}
+import za.co.absa.cobrix.cobol.parser.decoders.{BinaryUtils, DecoderSelector, EncoderSelector}
 
 /** An abstraction of the statements describing fields of primitive data types in the COBOL copybook
   *
-  * @param level        A level for the statement
-  * @param name         An identifier
-  * @param originalName Original name of the AST element (before the conversion to the Spark-compatible name)
-  * @param lineNumber   An line number in the copybook
-  * @param redefines    A name of a field which is redefined by this one
-  * @param occurs       The number of elements in an fixed size array / minimum items in variable-sized array
-  * @param to           The maximum number of items in a variable size array
-  * @param dependingOn  A field which specifies size of the array in a record
-  * @param parent       A parent node
+  * @param level               A level for the statement
+  * @param name                An identifier
+  * @param originalName        Original name of the AST element (before the conversion to the Spark-compatible name)
+  * @param lineNumber          An line number in the copybook
+  * @param redefines           A name of a field which is redefined by this one
+  * @param isRedefined         A flag indicating if the field is redefined
+  * @param occurs              The number of elements in an fixed size array / minimum items in variable-sized array
+  * @param to                  The maximum number of items in a variable size array
+  * @param dependingOn         A field which specifies size of the array in a record
+  * @param dependingOnHandlers A map of handlers for the dependingOn field
+  * @param isDependee          A flag indicating if the field is a dependee
+  * @param isFiller            A flag indicating if the field is a filler
+  * @param decode              A decoder for the field to convert from raw data to a JVM data type
+  * @param encode              An optional encoder for the field to convert from a JVM data type to raw data
+  * @param binaryProperties    Binary properties of the field, such as size in bits, alignment, etc.
+  * @param parent              A parent node
   */
 case class Primitive(
                       level: Int,
@@ -46,6 +53,7 @@ case class Primitive(
                       isDependee: Boolean = false,
                       isFiller: Boolean = false,
                       decode: DecoderSelector.Decoder,
+                      encode: Option[EncoderSelector.Encoder],
                       binaryProperties: BinaryProperties = BinaryProperties(0, 0, 0)
                     )
                     (val parent: Option[Group] = None)
@@ -100,7 +108,6 @@ case class Primitive(
     * @param itOffset An offset of the field inside the binary data
     * @param record   A record in a binary format represented as a vector of bits
     */
-  @throws(classOf[Exception])
   def decodeTypeValue(itOffset: Int, record: Array[Byte]): Any = {
     val bytesCount = binaryProperties.dataSize
     val idx = itOffset

@@ -20,7 +20,7 @@ import za.co.absa.cobrix.cobol.parser.CopybookParser.CopybookAST
 import za.co.absa.cobrix.cobol.parser.ast.datatype.AlphaNumeric
 import za.co.absa.cobrix.cobol.parser.ast.{Group, Primitive, Statement}
 import za.co.absa.cobrix.cobol.parser.common.Constants
-import za.co.absa.cobrix.cobol.parser.decoders.DecoderSelector
+import za.co.absa.cobrix.cobol.parser.decoders.{DecoderSelector, EncoderSelector}
 import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat.FloatingPointFormat
 import za.co.absa.cobrix.cobol.parser.encoding.Encoding
 import za.co.absa.cobrix.cobol.parser.encoding.codepage.CodePage
@@ -74,13 +74,15 @@ class NonTerminalsAdder(
             val sz = g.binaryProperties.actualSize
             val dataType = AlphaNumeric(s"X($sz)", sz, enc = Some(enc))
             val decode = DecoderSelector.getDecoder(dataType, stringTrimmingPolicy, isDisplayAlwaysString = false, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat, strictSignOverpunch, improvedNullDetection)
+            val encode = EncoderSelector.getEncoder(dataType, ebcdicCodePage, asciiCharset)
             val newName = getNonTerminalName(g.name, g.parent.get)
             newChildren.append(
               Primitive(
                 g.level, newName, "", g.lineNumber,
                 dataType,
                 redefines = Some(g.name),
                 decode = decode,
+                encode = encode,
                 binaryProperties = g.binaryProperties
               )(g.parent)
             )

@@ -0,0 +1,80 @@
+/*
+ * Copyright 2018 ABSA Group Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package za.co.absa.cobrix.cobol.parser.decoders
+
+import za.co.absa.cobrix.cobol.parser.ast.datatype.{AlphaNumeric, CobolType}
+import za.co.absa.cobrix.cobol.parser.encoding.codepage.{CodePage, CodePageCommon}
+import za.co.absa.cobrix.cobol.parser.encoding.{ASCII, EBCDIC, Encoding}
+
+import java.nio.charset.{Charset, StandardCharsets}
+
+object EncoderSelector {
+  type Encoder = Any => Array[Byte]
+
+  def getEncoder(dataType: CobolType,
+                 ebcdicCodePage: CodePage = new CodePageCommon,
+                 asciiCharset: Charset = StandardCharsets.US_ASCII): Option[Encoder] = {
+    dataType match {
+      case alphaNumeric: AlphaNumeric if alphaNumeric.compact.isEmpty =>
+        getStringEncoder(alphaNumeric.enc.getOrElse(EBCDIC), ebcdicCodePage, asciiCharset, alphaNumeric.length)
+      case _ =>
+        None
+    }
+  }
+
+  /** Gets a decoder function for a string data type. Encoder is chosen depending on whether input encoding is EBCDIC or ASCII */
+  private def getStringEncoder(encoding: Encoding,
+                               ebcdicCodePage: CodePage,
+                               asciiCharset: Charset,
+                               fieldLength: Int
+                              ): Option[Encoder] = {
+    encoding match {
+      case EBCDIC =>
+        val encoder = (a: Any) => {
+          encodeEbcdicString(a.toString, CodePageCommon.asciiToEbcdicMapping, fieldLength)
+        }
+        Option(encoder)
+      case ASCII =>
+        None
+      case _ =>
+        None
+    }
+  }
+
+  /**
+    * An encoder from a ASCII basic string to an EBCDIC byte array
+    *
+    * @param string          An input string
+    * @param conversionTable A conversion table to use to convert from ASCII to EBCDIC
+    * @param length          The length of the output (in bytes)
+    * @return A string representation of the binary data
+    */
+  def encodeEbcdicString(string: String, conversionTable: Array[Byte], length: Int): Array[Byte] = {
+    require(length >= 0, s"Field length cannot be negative, got $length")
+
+    var i = 0
+    val buf = new Array[Byte](length)
+
+    while (i < string.length && i < length) {
+      val asciiByte = string(i).toByte
+      buf(i) = conversionTable((asciiByte + 256) % 256)
+      i = i + 1
+    }
+    buf
+  }
+
+}
@@ -55,4 +55,28 @@ object CodePageCommon {
     }
     ebcdic2ascii
   }
+
+  /**
+    * This is the table for converting basic ASCII symbols to EBCDIC common code page
+    */
+  def asciiToEbcdicMapping: Array[Byte] = {
+    Array[Byte](
+      0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x0D.toByte, 0x00.toByte, 0x00.toByte, 0x25.toByte, 0x00.toByte, 0x00.toByte, //   0 -  15
+      0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, //  16 -  31
+      0x40.toByte, 0x5A.toByte, 0x7F.toByte, 0x7B.toByte, 0x5B.toByte, 0x6C.toByte, 0x50.toByte, 0x7D.toByte, 0x4D.toByte, 0x5D.toByte, 0x5C.toByte, 0x4E.toByte, 0x6B.toByte, 0x60.toByte, 0x4B.toByte, 0x61.toByte, //  32 -  47
+      0xF0.toByte, 0xF1.toByte, 0xF2.toByte, 0xF3.toByte, 0xF4.toByte, 0xF5.toByte, 0xF6.toByte, 0xF7.toByte, 0xF8.toByte, 0xF9.toByte, 0x7A.toByte, 0x5E.toByte, 0x4C.toByte, 0x7E.toByte, 0x6E.toByte, 0x6F.toByte, //  48 -  63
+      0x7C.toByte, 0xC1.toByte, 0xC2.toByte, 0xC3.toByte, 0xC4.toByte, 0xC5.toByte, 0xC6.toByte, 0xC7.toByte, 0xC8.toByte, 0xC9.toByte, 0xD1.toByte, 0xD2.toByte, 0xD3.toByte, 0xD4.toByte, 0xD5.toByte, 0xD6.toByte, //  64 -  79
+      0xD7.toByte, 0xD8.toByte, 0xD9.toByte, 0xE2.toByte, 0xE3.toByte, 0xE4.toByte, 0xE5.toByte, 0xE6.toByte, 0xE7.toByte, 0xE8.toByte, 0xE9.toByte, 0xBA.toByte, 0xE0.toByte, 0xBB.toByte, 0xB0.toByte, 0x6D.toByte, //  80 -  95
+      0x79.toByte, 0x81.toByte, 0x82.toByte, 0x83.toByte, 0x84.toByte, 0x85.toByte, 0x86.toByte, 0x87.toByte, 0x88.toByte, 0x89.toByte, 0x91.toByte, 0x92.toByte, 0x93.toByte, 0x94.toByte, 0x95.toByte, 0x96.toByte, //  96 - 111
+      0x97.toByte, 0x98.toByte, 0x99.toByte, 0xA2.toByte, 0xA3.toByte, 0xA4.toByte, 0xA5.toByte, 0xA6.toByte, 0xA7.toByte, 0xA8.toByte, 0xA9.toByte, 0xC0.toByte, 0x6A.toByte, 0xD0.toByte, 0xA1.toByte, 0x00.toByte, // 112 - 127
+      0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 128 - 143
+      0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 144 - 159
+      0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 160 - 175
+      0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 176 - 191
+      0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 192 - 207
+      0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 208 - 223
+      0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 224 - 239
+      0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte  // 240 - 255
+    )
+  }
 }
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2018 ABSA Group Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package za.co.absa.cobrix.cobol.processor
+
+import za.co.absa.cobrix.cobol.parser.ast.Group
+import za.co.absa.cobrix.cobol.reader.extractors.record.RecordHandler
+
+/**
+  * A handler for processing COBOL records and mapping it to JVM data structures.
+  *
+  * This implementation uses an array to group data fields of struct fields.
+  */
+class ArrayOfAnyHandler extends RecordHandler[scala.Array[Any]] {
+  override def create(values: Array[Any], group: Group): Array[Any] = values
+
+  override def toSeq(record: Array[Any]): Seq[Any] = record.toSeq
+
+  override def foreach(record: Array[Any])(f: Any => Unit): Unit = record.foreach(f)
+}