diff --git a/flow/api/parse/pdf/pdftotext.cc b/flow/api/parse/pdf/pdftotext.cc
index 03ed1430e..fa52a7dee 100644
--- a/flow/api/parse/pdf/pdftotext.cc
+++ b/flow/api/parse/pdf/pdftotext.cc
@@ -1,40 +1,62 @@
 #include <cstring>
 #include <string>
+#include <memory>
+#include <mutex>
 
-#include "poppler/cpp/poppler-document.h"
-#include "poppler/cpp/poppler-page.h"
+#include <poppler/cpp/poppler-document.h>
+#include <poppler/cpp/poppler-page.h>
 
-void donothing(const std::string &, void *) {}
+using std::size_t;
+using OnceFlag = std::once_flag;
+using String = std::string;
+template <typename T, typename Del = std::default_delete<T>>
+using UniquePtr = std::unique_ptr<T, Del>;
 
-extern "C" {
-  const char *pdftotext(const char *data, int data_size)
-  {
-      static bool has_reset_error_function = false;
-      if (!has_reset_error_function) {
+using ByteArray = poppler::byte_array;
+using Document = poppler::document;
+using Page = poppler::page;
+
+namespace {
+    OnceFlag errorFnFlag;
+
+    void initErrorFunction() {
         // Do not log errors from poppler to stderr
-        poppler::set_debug_error_function(donothing, nullptr);
-        has_reset_error_function = true;
-      }
-
-      const auto *doc = poppler::document::load_from_raw_data(data, data_size);
-      if (doc == nullptr) {
-        return nullptr;
-      }
-      const int N = doc->pages();
-
-      std::vector<char> contents[N];
-      int text_length = 0;
-      for (int i = 0; i < N; ++i) {
-        contents[i] = doc->create_page(i)->text().to_utf8();
-        text_length += contents[i].size();
-      }
-
-      char *buffer = (char *)std::malloc(text_length + 1);
-      for (int i = 0, offset = 0; i < N; offset += contents[i].size(), ++i) {
-        std::memcpy(buffer + offset, contents[i].data(), contents[i].size());
-      }
-      buffer[text_length] = '\0';
-
-      return buffer;
-  }
+        poppler::set_debug_error_function(
+            []([[maybe_unused]] const String& s, [[maybe_unused]] void* p) -> void {}, 
+            nullptr
+        );
+    }
+}
+
+extern "C" {
+    [[nodiscard]]
+    const char* pdfToText(const char* data, size_t dataSize) noexcept {
+        std::call_once(errorFnFlag, initErrorFunction);
+
+        UniquePtr<Document> doc(Document::load_from_raw_data(data, dataSize));
+        if (!doc) {
+            return nullptr;
+        }
+
+        const int pageCount = doc->pages();
+        String result;
+
+        for (int i = 0; i < pageCount; ++i) {
+            UniquePtr<Page> page(doc->create_page(i));
+            if (!page) {
+                continue; // skip invalid pages
+            }
+            ByteArray pageText = page->text().to_utf8();
+            result.append(pageText.begin(), pageText.end());
+        }
+
+        char* buffer = static_cast<char*>(std::malloc(result.length() + 1));
+        if (!buffer) {
+            return nullptr;
+        }
+        std::memcpy(buffer, result.data(), result.length());
+        buffer[result.length()] = '\0';
+
+        return buffer;
+    }
 }
diff --git a/flow/api/parse/pdf/pdftotext.go b/flow/api/parse/pdf/pdftotext.go
index 944935d2a..9e7ad8db7 100644
--- a/flow/api/parse/pdf/pdftotext.go
+++ b/flow/api/parse/pdf/pdftotext.go
@@ -3,21 +3,27 @@ package pdf
 // #cgo CFLAGS: -O2 -Wall -I/usr/include/poppler/cpp
 // #cgo LDFLAGS: -lpoppler-cpp
 // #include <stdlib.h>
-// const char *pdftotext(const char *data, int data_size);
+// const char* pdfToText(const char* data, size_t data_size);
 import "C"
 import (
 	"errors"
+	"runtime"
 	"unsafe"
 )
 
 func ToText(data []byte) (string, error) {
 	// Is this safe? Kind of: `data`, a []byte is a continguous array in Go,
-	// so we can safely point a C-land (const char *) to it,
+	// so we can safely point a C-land (const char*) to it,
 	// *provided* that C code does not attempt to find the end of the string,
 	// as []byte need not be zero-terminated.
-	// This is true for us, as C.pdftotext treats its first argument as bytes.
+	// This is true for us, as C.pdfToText treats its first argument as bytes.
+	if len(data) == 0 {
+		return "", errors.New("empty PDF data")
+	}
+
 	cData := (*C.char)(unsafe.Pointer(&data[0]))
-	result := C.pdftotext(cData, C.int(len(data)))
+	result := C.pdfToText(cData, C.size_t(len(data)))
+	runtime.KeepAlive(data)
 	if result != nil {
 		converted := C.GoString(result)
 		C.free(unsafe.Pointer(result))