Skip to content

Commit 3b22739

Browse files
committed
feat(data): add HEIC/HEIF image support and normalize MIME types (#1127)
Because - The codebase lacked support for modern HEIC/HEIF image formats which are widely used on mobile devices - Users were getting "audio format audio/x-m4a is not supported" errors despite M4A being supported due to non-standard MIME type variants This commit - **Adds comprehensive HEIC/HEIF image format support** using [libheif](https://github.com/strukturag/libheif) library with full encode/decode capabilities - **Implements MIME type normalization** to handle equivalent formats (e.g., `audio/x-m4a` → `audio/mp4`, `video/mov` → `video/quicktime`) - **Updates Docker configurations** (both production and development) with libheif dependencies - **Adds comprehensive test coverage** for HEIC/HEIF processing and MIME type normalization
1 parent a6046cd commit 3b22739

File tree

17 files changed

+612
-32
lines changed

17 files changed

+612
-32
lines changed

.env

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,5 @@ ONNX_MODEL_FOLDER_PATH=${PWD}/pkg/component/resources/onnx
2727
# that will be rolled back on cleanup, but there might be cases where this
2828
# isn't possible (e.g. lock tests). We want to keep these queries isolated from
2929
# the main database.
30-
TEST_DBHOST=localhost
30+
TEST_DBHOST=pg_sql
3131
TEST_DBNAME=pipeline_test

.github/workflows/coverage.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,13 @@ jobs:
122122
123123
- name: Generate coverage report
124124
env:
125-
CFG_DATABASE_HOST: localhost
126-
CFG_DATABASE_NAME: pipeline_test
125+
TEST_DBHOST: localhost
126+
TEST_DBNAME: pipeline_test
127127
CFG_DATABASE_USERNAME: postgres
128128
CFG_DATABASE_PASSWORD: password
129129
CFG_DATABASE_PORT: 5432
130130
run: |
131-
make coverage DBTEST=true OCR=true ONNX=true
131+
make coverage DBTEST=true OCR=true ONNX=true DOCKER_NETWORK=host
132132
133133
- name: Upload coverage report
134134
uses: codecov/codecov-action@v2

.github/workflows/golangci-lint.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ jobs:
2525
with:
2626
go-version: ${{ env.GOLANG_VERSION }}
2727
cache: false
28-
- name: Install sorx
28+
- name: Install dependencies
2929
run: |
3030
sudo apt-get update
31-
sudo apt-get install -y libsoxr-dev
31+
sudo apt-get install -y libsoxr-dev libheif-dev libde265-dev libx265-dev libaom-dev libdav1d-dev
3232
- name: Install onnxruntime library and headers
3333
run: |
3434
export ONNXRUNTIME_ROOT_PATH=$GITHUB_WORKSPACE/onnxruntime

Dockerfile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@ RUN apt-get update && apt-get install -y \
1313
libleptonica-dev \
1414
libtesseract-dev \
1515
libsoxr-dev \
16+
libheif-dev \
17+
libde265-dev \
18+
libx265-dev \
19+
libaom-dev \
20+
libdav1d-dev \
21+
pkg-config \
1622
wget \
1723
jq \
1824
&& rm -rf /var/lib/apt/lists/*
@@ -110,6 +116,7 @@ RUN apt update && \
110116
libtesseract-dev \
111117
libreoffice \
112118
libsoxr-dev \
119+
libheif1 \
113120
chromium \
114121
qpdf && \
115122
rm -rf /var/lib/apt/lists/*

Dockerfile.dev

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
2828
libtesseract-dev \
2929
libreoffice \
3030
libsoxr-dev \
31+
libde265-dev \
32+
libx265-dev \
33+
libaom-dev \
34+
libdav1d-dev \
35+
libheif1 \
36+
libheif-dev \
37+
pkg-config \
3138
chromium \
3239
qpdf \
3340
jq

Makefile

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ export
88

99
GOTEST_FLAGS := CFG_DATABASE_HOST=${TEST_DBHOST} CFG_DATABASE_NAME=${TEST_DBNAME}
1010

11+
# Default network for local development, can be overridden
12+
DOCKER_NETWORK ?= instill-network
13+
1114
#============================================================================
1215

1316
.PHONY: dev
@@ -70,7 +73,14 @@ go-gen: ## Generate codes
7073

7174
.PHONY: dbtest-pre
7275
dbtest-pre:
73-
@${GOTEST_FLAGS} go run ./cmd/migration
76+
@docker run --rm \
77+
-v $(PWD):/${SERVICE_NAME} \
78+
--user $(id -u):$(id -g) \
79+
-e GOTEST_FLAGS="${GOTEST_FLAGS}" \
80+
--network ${DOCKER_NETWORK} \
81+
--entrypoint= \
82+
instill/${SERVICE_NAME}:dev \
83+
go run ./cmd/migration
7484

7585
.PHONY: coverage
7686
coverage: ## Generate coverage report
@@ -79,6 +89,7 @@ coverage: ## Generate coverage report
7989
-v $(PWD):/${SERVICE_NAME} \
8090
--user $(id -u):$(id -g) \
8191
-e GOTEST_FLAGS="${GOTEST_FLAGS}" \
92+
--network ${DOCKER_NETWORK} \
8293
--entrypoint= \
8394
instill/${SERVICE_NAME}:dev \
8495
go test -v -race ${GOTEST_TAGS} -coverpkg=./... -coverprofile=coverage.out -covermode=atomic -timeout 30m ./...

pkg/data/audio.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,15 @@ func NewAudioFromURL(ctx context.Context, binaryFetcher external.BinaryFetcher,
6161
}
6262

6363
func createAudioData(b []byte, contentType, filename string, isUnified bool) (*audioData, error) {
64-
finalContentType := contentType
64+
// Normalize MIME type first
65+
normalizedContentType := normalizeMIMEType(contentType)
66+
finalContentType := normalizedContentType
6567

6668
// If the audio should be unified, convert it to OGG (the internal unified audio format)
6769
if isUnified {
68-
if contentType != OGG {
70+
if normalizedContentType != OGG {
6971
var err error
70-
b, err = convertAudio(b, contentType, OGG)
72+
b, err = convertAudio(b, normalizedContentType, OGG)
7173
if err != nil {
7274
return nil, err
7375
}

pkg/data/audio_test.go

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ func TestNewAudioFromBytes(t *testing.T) {
2828
{"Valid AAC audio", "small_sample.aac", "audio/aac", 1.0},
2929
{"Valid FLAC audio", "small_sample.flac", "audio/flac", 1.0},
3030
{"Valid M4A audio", "small_sample.m4a", "audio/mp4", 1.0},
31+
{"Valid M4A audio (non-standard MIME)", "small_sample.m4a", "audio/x-m4a", 1.0},
3132
{"Valid WMA audio", "small_sample.wma", "audio/x-ms-wma", 1.0},
3233
{"Valid AIFF audio", "small_sample.aiff", "audio/aiff", 1.0},
3334
{"Invalid file type", "sample_640_426.png", "", 0.0},
@@ -104,6 +105,7 @@ func TestAudioProperties(t *testing.T) {
104105
{"AAC audio", "small_sample.aac", "audio/aac", 1.0},
105106
{"FLAC audio", "small_sample.flac", "audio/flac", 1.0},
106107
{"M4A audio", "small_sample.m4a", "audio/mp4", 1.0},
108+
{"M4A audio (non-standard MIME)", "small_sample.m4a", "audio/x-m4a", 1.0},
107109
{"WMA audio", "small_sample.wma", "audio/x-ms-wma", 1.0},
108110
{"AIFF audio", "small_sample.aiff", "audio/aiff", 1.0},
109111
}
@@ -191,6 +193,7 @@ func TestNewAudioFromBytesUnified(t *testing.T) {
191193
{"AAC as unified", "small_sample.aac", "audio/aac", 1.0},
192194
{"FLAC as unified", "small_sample.flac", "audio/flac", 1.0},
193195
{"M4A as unified", "small_sample.m4a", "audio/mp4", 1.0},
196+
{"M4A as unified (non-standard MIME)", "small_sample.m4a", "audio/x-m4a", 1.0},
194197
{"WMA as unified", "small_sample.wma", "audio/x-ms-wma", 1.0},
195198
{"AIFF as unified", "small_sample.aiff", "audio/aiff", 1.0},
196199
}
@@ -206,10 +209,15 @@ func TestNewAudioFromBytesUnified(t *testing.T) {
206209
c.Assert(audio.ContentType().String(), qt.Equals, "audio/ogg")
207210
c.Assert(audio.Duration().Float64(), qt.CmpEquals(cmpopts.EquateApprox(0, 0.1)), tc.duration)
208211

209-
// Test as non-unified (should preserve original format)
212+
// Test as non-unified (should preserve original format, but normalized)
210213
audioOriginal, err := NewAudioFromBytes(audioBytes, tc.contentType, tc.filename, false)
211214
c.Assert(err, qt.IsNil)
212-
c.Assert(audioOriginal.ContentType().String(), qt.Equals, tc.contentType)
215+
expectedContentType := tc.contentType
216+
// Handle MIME type normalization for non-standard types
217+
if tc.contentType == "audio/x-m4a" {
218+
expectedContentType = "audio/mp4"
219+
}
220+
c.Assert(audioOriginal.ContentType().String(), qt.Equals, expectedContentType)
213221
c.Assert(audioOriginal.Duration().Float64(), qt.CmpEquals(cmpopts.EquateApprox(0, 0.1)), tc.duration)
214222
})
215223
}
@@ -306,3 +314,54 @@ func TestAllSupportedAudioFormats(t *testing.T) {
306314
})
307315
}
308316
}
317+
318+
func TestAudioMIMETypeNormalization(t *testing.T) {
319+
t.Parallel()
320+
c := qt.New(t)
321+
322+
// Test that audio/x-m4a is properly normalized to audio/mp4
323+
c.Run("audio/x-m4a normalization", func(c *qt.C) {
324+
audioBytes, err := os.ReadFile("testdata/small_sample.m4a")
325+
c.Assert(err, qt.IsNil)
326+
327+
// Create audio with non-standard MIME type
328+
audioXM4A, err := NewAudioFromBytes(audioBytes, "audio/x-m4a", "test.m4a", false)
329+
c.Assert(err, qt.IsNil)
330+
331+
// Create audio with standard MIME type
332+
audioMP4, err := NewAudioFromBytes(audioBytes, "audio/mp4", "test.m4a", false)
333+
c.Assert(err, qt.IsNil)
334+
335+
// Both should have the same normalized content type
336+
c.Assert(audioXM4A.ContentType().String(), qt.Equals, "audio/mp4")
337+
c.Assert(audioMP4.ContentType().String(), qt.Equals, "audio/mp4")
338+
c.Assert(audioXM4A.ContentType().String(), qt.Equals, audioMP4.ContentType().String())
339+
340+
// Both should have the same duration and properties
341+
c.Assert(audioXM4A.Duration().Float64(), qt.CmpEquals(cmpopts.EquateApprox(0, 0.1)), audioMP4.Duration().Float64())
342+
c.Assert(audioXM4A.SampleRate().Integer(), qt.Equals, audioMP4.SampleRate().Integer())
343+
})
344+
345+
// Test that audio/mp3 is properly normalized to audio/mpeg
346+
c.Run("audio/mp3 normalization", func(c *qt.C) {
347+
audioBytes, err := os.ReadFile("testdata/small_sample.mp3")
348+
c.Assert(err, qt.IsNil)
349+
350+
// Create audio with non-standard MIME type
351+
audioMP3, err := NewAudioFromBytes(audioBytes, "audio/mp3", "test.mp3", false)
352+
c.Assert(err, qt.IsNil)
353+
354+
// Create audio with standard MIME type
355+
audioMPEG, err := NewAudioFromBytes(audioBytes, "audio/mpeg", "test.mp3", false)
356+
c.Assert(err, qt.IsNil)
357+
358+
// Both should have the same normalized content type
359+
c.Assert(audioMP3.ContentType().String(), qt.Equals, "audio/mpeg")
360+
c.Assert(audioMPEG.ContentType().String(), qt.Equals, "audio/mpeg")
361+
c.Assert(audioMP3.ContentType().String(), qt.Equals, audioMPEG.ContentType().String())
362+
363+
// Both should have the same duration and properties
364+
c.Assert(audioMP3.Duration().Float64(), qt.CmpEquals(cmpopts.EquateApprox(0, 0.1)), audioMPEG.Duration().Float64())
365+
c.Assert(audioMP3.SampleRate().Integer(), qt.Equals, audioMPEG.SampleRate().Integer())
366+
})
367+
}

0 commit comments

Comments
 (0)