Skip to content

Commit bccaff4

Browse files
committed
Java iceberg table tests
1 parent 7dfdc50 commit bccaff4

File tree

6 files changed

+95
-8
lines changed

6 files changed

+95
-8
lines changed

src/avro.data.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ export function avroData({ reader, metadata, syncMarker }) {
3131
reader.offset += 16
3232
for (let i = 0; i < 16; i++) {
3333
if (blockSync[i] !== syncMarker[i]) {
34-
throw new Error('Sync marker does not match')
34+
throw new Error('sync marker does not match')
3535
}
3636
}
3737
const codec = metadata['avro.codec']
@@ -40,7 +40,7 @@ export function avroData({ reader, metadata, syncMarker }) {
4040
if (codec === 'deflate') {
4141
data = gunzip(data)
4242
} else if (codec !== 'null') {
43-
throw new Error(`Unsupported codec: ${codec}`)
43+
throw new Error(`unsupported codec: ${codec}`)
4444
}
4545

4646
// Decode according to binary or json encoding
@@ -61,7 +61,7 @@ export function avroData({ reader, metadata, syncMarker }) {
6161
}
6262

6363
/**
64-
* @import {AvroType} from './types.js'
64+
* @import {AvroType} from '../src/types.js'
6565
* @param {DataReader} reader
6666
* @param {AvroType} type
6767
* @returns {any}
@@ -123,6 +123,6 @@ function readType(reader, type) {
123123
return text
124124
} else {
125125
// enum, fixed, null, map
126-
throw new Error(`Unsupported type: ${type}`)
126+
throw new Error(`unsupported type: ${type}`)
127127
}
128128
}

src/iceberg.fetch.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ export function translateS3Url(url) {
2828
* Position deletes are grouped by target data file.
2929
* Equality deletes are grouped by sequence number.
3030
*
31-
* @import {ManifestEntry} from './types.js'
31+
* @import {ManifestEntry} from '../src/types.js'
3232
* @param {ManifestEntry[]} deleteEntries
3333
* @returns {Promise<{positionDeletesMap: Map<string, Set<bigint>>, equalityDeletesMap: Map<bigint, Record<string, any>[]>}>}
3434
*/

src/iceberg.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ export { avroData } from './avro.data.js'
1616
* TODO:
1717
* - Sequence number checks when filtering deletes
1818
*
19-
* @import {IcebergMetadata} from './types.js'
19+
* @import {IcebergMetadata} from '../src/types.js'
2020
* @param {object} options
2121
* @param {string} options.tableUrl - Base S3 URL of the table.
2222
* @param {number} [options.rowStart] - The starting global row index to fetch (inclusive).

src/iceberg.manifest.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { fetchAvroRecords } from './iceberg.fetch.js'
33
/**
44
* Returns manifest entries for the current snapshot.
55
*
6-
* @import {IcebergMetadata, Manifest, ManifestEntry} from './types.js'
6+
* @import {IcebergMetadata, Manifest, ManifestEntry} from '../src/types.js'
77
* @typedef {{ url: string, entries: ManifestEntry[] }[]} ManifestList
88
* @param {IcebergMetadata} metadata
99
* @returns {Promise<ManifestList>}

src/iceberg.metadata.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ export function icebergLatestVersion(tableUrl) {
1717
* Fetches the Iceberg table metadata.
1818
* If metadataFileName is not privided, uses icebergLatestVersion to get the version hint.
1919
*
20-
* @import {IcebergMetadata} from './types.js'
20+
* @import {IcebergMetadata} from '../src/types.js'
2121
* @param {string} tableUrl - Base URL of the table (e.g. "s3://my-bucket/path/to/table")
2222
* @param {string} [metadataFileName] - Name of the metadata JSON file
2323
* @returns {Promise<IcebergMetadata>} The table metadata as a JSON object

test/iceberg.java.test.js

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import { describe, expect, it } from 'vitest'
2+
import { icebergRead } from '../src/iceberg.js'
3+
4+
describe.concurrent('icebergRead from java iceberg table', () => {
5+
const tableUrl = 'https://s3.amazonaws.com/hyperparam-iceberg/java/bunnies'
6+
7+
it('reads data from iceberg table', async () => {
8+
const data = await icebergRead({ tableUrl, metadataFileName: 'v2.metadata.json' })
9+
10+
// Verify we got correct number of rows
11+
expect(data).toBeInstanceOf(Array)
12+
expect(data.length).toBe(21)
13+
14+
// Verify first row has expected structure
15+
expect(data[0]).toEqual({
16+
'Breed Name': 'Holland Lop',
17+
'Average Weight': 1.8,
18+
'Fur Length': 3,
19+
Lifespan: 7n,
20+
'Origin Country': 'The Netherlands',
21+
'Ear Type': 'Lop',
22+
Temperament: 'Friendly',
23+
'Popularity Rank': 1n,
24+
})
25+
26+
// Check we have all expected properties
27+
const expectedProperties = [
28+
'Breed Name',
29+
'Average Weight',
30+
'Fur Length',
31+
'Lifespan',
32+
'Origin Country',
33+
'Ear Type',
34+
'Temperament',
35+
'Popularity Rank',
36+
]
37+
data.forEach(row => {
38+
expectedProperties.forEach(prop => {
39+
expect(row).toHaveProperty(prop)
40+
})
41+
})
42+
})
43+
44+
it('reads data v3 with added column', async () => {
45+
const data = await icebergRead({ tableUrl, metadataFileName: 'v3.metadata.json' })
46+
47+
expect(data.length).toBe(21)
48+
expect(data[2]).toEqual({
49+
'Breed Name': 'Flemish Giant',
50+
'Average Weight': 4.5,
51+
'Fur Length': 4,
52+
Lifespan: 5n,
53+
'Origin Country': 'Belgium',
54+
'Ear Type': 'Lop',
55+
Temperament: 'Calm',
56+
'Popularity Rank': 3n,
57+
__happy__: undefined,
58+
})
59+
})
60+
61+
it('reads data v4 with deleted rows', async () => {
62+
const data = await icebergRead({ tableUrl, metadataFileName: 'v4.metadata.json' })
63+
64+
expect(data.length).toBe(15)
65+
expect(data[2]).toEqual({
66+
'Breed Name': 'American Fuzzy Lop',
67+
'Average Weight': 1.4,
68+
'Fur Length': 5,
69+
Lifespan: 8n,
70+
'Origin Country': 'USA',
71+
'Ear Type': 'Lop',
72+
Temperament: 'Sociable',
73+
'Popularity Rank': 8n,
74+
})
75+
const newZealandRow = data.find(row => row['Breed Name'] === 'New Zealand')
76+
expect(newZealandRow).toEqual({
77+
'Breed Name': 'New Zealand',
78+
'Average Weight': 4,
79+
'Fur Length': 2.7,
80+
Lifespan: 8n,
81+
'Origin Country': 'New Zealand',
82+
'Ear Type': 'Erect',
83+
Temperament: 'Affectionate',
84+
'Popularity Rank': 21n,
85+
})
86+
})
87+
})

0 commit comments

Comments
 (0)