-
Notifications
You must be signed in to change notification settings - Fork 39
Open
Description
Using SCALE for CUDA compatibility I was able to patch to compile for AMD.
The main issue was that I had to build CCV outside of Bazel because the existing ruleset was copied from TensorFlow which enforces a bunch of CUDA dependency checks that we don't need and which are not supported by SCALE.
While this build below runs and sets up a GPU graph, some of the command backends require cuDNN which SCALE does not support yet. To get a fully functional GPU build we need to either provide alternate command backends or wait for cuDNN in SCALE.
FROM ubuntu:22.04
ENV DEBIAN_FRONTEND=noninteractive
SHELL ["/bin/bash", "-lc"]
ENV AMD_GPU_ARCH=gfx1151
# -----------------------
# Base system deps
# -----------------------
RUN apt-get update && apt-get install -y --no-install-recommends \
bash git wget curl ca-certificates gnupg lsb-release \
build-essential pkg-config \
python3 python-is-python3 \
clang llvm \
libopenblas-dev \
libjpeg-dev libpng-dev libtiff-dev libwebp-dev \
cmake ninja-build \
systemtap-sdt-dev libbsd-dev linux-libc-dev \
&& rm -rf /var/lib/apt/lists/*
# -----------------------
# Swift
# -----------------------
RUN curl -O https://download.swift.org/swift-6.0.3-release/ubuntu2204/swift-6.0.3-RELEASE/swift-6.0.3-RELEASE-ubuntu22.04.tar.gz \
&& tar xzf swift-6.0.3-RELEASE-ubuntu22.04.tar.gz \
&& rm swift-6.0.3-RELEASE-ubuntu22.04.tar.gz \
&& mv swift-6.0.3-RELEASE-ubuntu22.04 /usr/local/swift
# Add Swift to PATH permanently
ENV PATH="/usr/local/swift/usr/bin:${PATH}"
ENV SWIFT_PATH="/usr/share/swift/usr/bin"
ENV LD_LIBRARY_PATH="/usr/lib/llvm-14/lib/:/usr/local/swift/usr/lib:/usr/local/swift/usr/lib/swift/linux/"
# -----------------------
# Bazel
# -----------------------
RUN cd /root && wget https://github.com/bazelbuild/bazelisk/releases/download/v1.27.0/bazelisk-amd64.deb \
&& apt-get update && apt-get install -y ./bazelisk-amd64.deb \
&& rm -rf /var/lib/apt/lists/*
# -----------------------
# Install SCALE (for CUDA emulation on AMD for CCV)
# -----------------------
RUN cd /root && wget https://pkgs.scale-lang.com/deb/dists/jammy/main/binary-all/scale-repos.deb \
&& apt-get install -y ./scale-repos.deb && rm /root/scale-repos.deb \
&& apt-get update && SCALE_LICENSE_ACCEPT=1 apt-get install -y scale \
&& rm -rf /var/lib/apt/lists/*
# -----------------------
# Build CCV with SCALE CUDA (outside Bazel)
#
# We cannot use Bazel to build CCV because the current ruleset was copied from TensorFlow which
# enforces checks for several CUDA dependencies that aren't even being used by CCV and also
# are not implemented by SCALE.
# -----------------------
# Clone the CCV repo
RUN git clone --branch unstable https://github.com/liuliu/ccv.git /usr/local/src/ccv
# --- DEBUG PATCH: instrument CCV backend selection (inline, no helpers) ---
# Rationale: Helps monitor command backend matching to the running tensor graph while
# we work on dependencies.
RUN set -eu; cd /usr/local/src/ccv; \
python3 - "$PWD/lib/nnc/ccv_nnc_cmd.c" << 'PY'
import sys, pathlib
path = pathlib.Path(sys.argv[1])
txt = path.read_text()
old_block = (
"backend = ccv_nnc_cmd_find_backend(cmd, tensor_memory, tensor_formats, tensor_datatypes);\n"
"\t}\n"
"\tassert(backend != CCV_NNC_NO_BACKEND);"
)
new_block = r'''backend = ccv_nnc_cmd_find_backend(cmd, tensor_memory, tensor_formats, tensor_datatypes);
if (backend == CCV_NNC_NO_BACKEND) {
fprintf(stderr,
"CCV DEBUG: NO BACKEND for cmd=%d (cmd_idx=%d) "
"tensor_memory=0x%x tensor_formats=0x%x tensor_dtypes=0x%x\n",
cmd.cmd, cmd_idx, tensor_memory, tensor_formats, tensor_datatypes);
/* Dump all registered backends for this command. */
for (int bi = 0; bi < CCV_NNC_BACKEND_COUNT; bi++) {
const ccv_nnc_cmd_backend_registry_t* b =
&init_map[cmd_idx].backends[bi];
if (!b->exec)
continue;
fprintf(stderr,
"CCV DEBUG: backend[%d]: mem=0x%x fmt=0x%x dtype=0x%x\n",
bi,
b->tensor_memory,
b->tensor_formats,
b->tensor_datatypes);
}
}
}
assert(backend != CCV_NNC_NO_BACKEND);'''
if old_block not in txt:
print("ERROR: Failed to locate patch target in ccv_nnc_cmd.c")
sys.exit(1)
txt = txt.replace(old_block, new_block, 1)
path.write_text(txt)
PY
# --- END DEBUG PATCH ---
# --- PATCH: General BF16 / cuda_bf16.h handling for SCALE ---
# Ratinoale: Any `.cu` file with `__nv_bfloat16` needs include `cuda_bf16.h`.
# No idea why it worked without on CUDA but it does not on SCALE.
RUN set -eu; cd /usr/local/src/ccv; \
python3 - << 'PY'
from pathlib import Path
root = Path(".") # run from /usr/local/src/ccv
cu_files = list(root.rglob("*.cu"))
for path in cu_files:
text = path.read_text()
lines = text.splitlines()
# If this file already mentions cuda_bf16.h, don't touch it.
if any("cuda_bf16.h" in line for line in lines):
print(f"SKIP (already has cuda_bf16.h): {path}")
continue
# We care about files that either:
# - use __nv_bfloat16 directly (need BF16 header), or
# - have extern \"C\" (to ensure cuda_bf16.h is seen OUTSIDE that block
# in case some included header uses BF16 and would otherwise be pulled
# in inside extern \"C\").
if "__nv_bfloat16" not in text and 'extern "C"' not in text:
# No BF16 usage, no extern \"C\" concerns: leave file alone.
continue
last_include_before_extern = -1
first_extern_idx = None
for i, line in enumerate(lines):
if 'extern "C"' in line and first_extern_idx is None:
first_extern_idx = i
# We stop tracking includes after this; we never want to insert inside extern \"C\"
break
if line.strip().startswith("#include"):
last_include_before_extern = i
# Compute insertion point:
# - default: top of file (0)
# - if we saw includes before extern \"C\": insert after last include
insert_idx = 0
if last_include_before_extern >= 0:
insert_idx = last_include_before_extern + 1
# Make sure we never insert AFTER extern \"C\" if we saw it
if first_extern_idx is not None and insert_idx > first_extern_idx:
insert_idx = first_extern_idx
block = [
"#ifdef __cplusplus",
"#include <cuda_bf16.h>",
"#endif"
]
lines[insert_idx:insert_idx] = block
path.write_text("\n".join(lines))
print(f"PATCHED: inserted cuda_bf16.h (C++-guarded) into {path} at line {insert_idx}")
PY
# --- END PATCH ---
# --- PATCH: Skip download of sample model ---
# Rationale: Not needed and slows build
RUN set -e; cd /usr/local/src/ccv/lib; \
sed -i 's|cd `dirname $0` && wget .*image-net-2012-vgg-d.sqlite3||' ../samples/download-vgg-d-model.sh
# --- END PATCH ---
# --- PATCH: Replace GNU statement-expression macros in ccv.h with portable versions ---
# Rationale: The defines that were in place seemed to be GNU expansions which failed under CLANG
RUN set -ue; cd /usr/local/src/ccv/lib; \
sed -i 's/#define ccv_clamp.*/#undef ccv_clamp\n#define ccv_clamp(x,a,b) ((x)<(a)?(a):((x)>(b)?(b):(x)))/' ccv.h; \
sed -i 's/#define ccv_max.*/#undef ccv_max\n#define ccv_max(a,b) ((a)>(b)?(a):(b))/' ccv.h; \
sed -i 's/#define ccv_min.*/#undef ccv_min\n#define ccv_min(a,b) ((a)<(b)?(a):(b))/' ccv.h; \
sed -i 's|#define ccv_max(a, b).*|#define ccv_max(a, b) ((a) > (b) ? (a) : (b))|' ccv.h; \
sed -i 's|#define ccv_min(a, b).*|#define ccv_min(a, b) ((a) < (b) ? (a) : (b))|' ccv.h
# --- END PATCH ---
# --- PATCH: Disable cuFile dependencies ---
# Rationale: cuFile is not supported by SCALE
RUN set -ue; cd /usr/local/src/ccv/lib; \
# Disable cufile
sed -i 's|#include <cufile.h>|// GPUDirect disabled for SCALE: no cufile.h|g' \
nnc/gpu/ccv_nnc_compat.cu; \
\
# Replace cufileread() to always use the CPU mmap + cumemcpy path
sed -i '/^void cufileread(const int fd, const off_t file_offset, void\* const buf, const size_t size)/,/^}/c\
void cufileread(const int fd, const off_t file_offset, void* const buf, const size_t size)\n\
{\n\
\t// SCALE/AMD: GPUDirect Storage (cuFile) is not available.\n\
\t// Fallback to CPU mmap + cumemcpy into GPU memory.\n\
\tvoid* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, file_offset);\n\
\tif (bufptr == MAP_FAILED)\n\
\t{\n\
\t\tPRINT(CCV_CLI_ERROR, \"[%s:%d]: mmap failed in cufileread\\n\", __FILE__, __LINE__);\n\
\t\treturn;\n\
\t}\n\
\tmadvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED);\n\
\tcumemcpy(buf, CCV_TENSOR_GPU_MEMORY, bufptr, CCV_TENSOR_CPU_MEMORY, size);\n\
\tmunmap(bufptr, size);\n\
}\n' nnc/gpu/ccv_nnc_compat.cu
# --- END PATCH ---
# --- PATCH: Remove the locally-defined half-precision log() that conflicts with cuda_fp16.h ---
RUN set -ue; cd /usr/local/src/ccv/lib; \
sed -i '/static inline __device__ __half log(const half v)/,/^}/d' \
nnc/cmd/loss/gpu/ccv_nnc_categorical_crossentropy_gpu_ref.cu
# --- END PATCH ---
# --- PATCH: Supress warning for unused values and command line parameters ---
# Rationale: Warnings cluttered the log and at the time I thought they were interfering
# with populating `.dep.mk`.
RUN set -ue; cd /usr/local/src/ccv/lib; \
# Patch config.mk.in so nvcc always gets the warning-suppression flags
sed -i 's/^NVFLAGS := --use_fast_math @NV_SM_FLAGS@ $(DEFINE_MACROS)$/NVFLAGS := --use_fast_math -Wno-unused-value -Wno-unused-command-line-argument @NV_SM_FLAGS@ $(DEFINE_MACROS)/' config.mk.in
# --- END PATCH ---
# --- PATCH: Don't populate .dep.mk ---
# Rationale: I could not get the `nvcc` output to successfully parse to be used as input to `.dep.mk`.
# Since it isn't needed for build, workaround by disabling it.
RUN set -ue; cd /usr/local/src/ccv/lib; \
# For every makefile in lib/ (including nnc, nnc/cmd, etc.),
# 1) drop the original .dep.mk rule
# 2) append a simple dummy .dep.mk rule
for MF in $(find . -name 'makefile'); do \
sed -i '/^\.dep\.mk:/,/^$/d' "$MF"; \
printf '\n.dep.mk:\n\t@echo "# dummy deps" > .dep.mk\n' >> "$MF"; \
done
# --- END PATCH ---
# --- BUILD ---
RUN set -ue; cd /usr/local/src/ccv/lib; \
# Enter SCALE
. /opt/scale/bin/scaleenv ${AMD_GPU_ARCH}; \
\
# Configure CCV
./configure --with-cuda=${CUDA_HOME}; \
\
# --- PATCH: Force cuDNN OFF: remove HAVE_CUDNN and -lcudnn from config.mk ---
# Rationale: SCALE includes a placeholder cuDNN with no implementation which causes
# the build system to see it but then fail trying to use it.
sed -i 's/-D HAVE_CUDNN //g' config.mk; \
sed -i 's/-lcudnn//g' config.mk; \
# --- END PATCH ---
\
# Make
make -j"$(nproc)"; \
\
# Install
mkdir -p /opt/ccv-scale/include /opt/ccv-scale/lib; \
find . -type f -name "*.h" -exec cp --parents {} /opt/ccv-scale/include/ \; ;\
cp libccv.a /opt/ccv-scale/lib/; \
rm -rf /usr/local/src/ccv
ENV LD_LIBRARY_PATH=/opt/ccv-scale/lib:${LD_LIBRARY_PATH}
ENV CPATH=/opt/ccv-scale/include
# --- END BUILD ---
# -----------------------
# Draw Things env
# -----------------------
# --- PATCH: Link llvm-config so Bazel can find it ---
# Make llvm-config visible where rules_swift expects it
# (Ubuntu will install e.g. /usr/bin/llvm-config-18)
RUN if command -v llvm-config-18 >/dev/null 2>&1; then \
ln -sf "$(command -v llvm-config-18)" /usr/local/bin/llvm-config; \
elif command -v llvm-config-14 >/dev/null 2>&1; then \
ln -sf "$(command -v llvm-config-14)" /usr/local/bin/llvm-config; \
elif command -v llvm-config >/dev/null 2>&1; then \
ln -sf "$(command -v llvm-config)" /usr/local/bin/llvm-config; \
fi
# --- END PATCH ---
# -----------------------
# Draw Things repo
# -----------------------
RUN cd /usr/local/src && git clone https://github.com/drawthingsai/draw-things-community.git
WORKDIR /usr/local/src/draw-things-community
# -----------------------
# Wire prebuilt CCV into Bazel
# -----------------------
# Bazel Workspace: Add the local CCV repository to the top of the workspace which will
# override downstream repositories such as defined by s4ncc
RUN set -eu; cd /usr/local/src/draw-things-community; \
tmp_ws="$(mktemp)"; \
printf '%s\n' \
'local_repository(' \
' name = "ccv",' \
' path = "third_party/ccv",' \
')' \
'' \
> "$tmp_ws"; \
cat WORKSPACE.linux >> "$tmp_ws"; \
mv "$tmp_ws" WORKSPACE.linux
# Bazel Workspace: Remove obsolete and problematic CUDA and CCV configuration
RUN set -eu; cd /usr/local/src/draw-things-community; \
# Strip CCV auto-config + CUDA auto-config from Draw Things WORKSPACE
# 1) Remove the ccv config load
sed -i '/^load("@ccv\/\/config:ccv.bzl", "ccv_deps", "ccv_setting")/d' WORKSPACE.linux; \
\
# 2) Remove the ccv_deps() call (likely a single line)
sed -i '/^[[:space:]]*ccv_deps()[[:space:]]*$/d' WORKSPACE.linux; \
\
# 3) Remove the rules_cuda load + cuda_configure call
sed -i '/^load("@build_bazel_rules_cuda\/\/gpus:cuda_configure.bzl", "cuda_configure")/d' WORKSPACE.linux; \
sed -i '/^[[:space:]]*cuda_configure(/,/^[[:space:]]*)[[:space:]]*$/d' WORKSPACE.linux; \
\
# 4) Remove the ccv_setting(...) block
sed -i '/^[[:space:]]*ccv_setting(/,/^[[:space:]]*)[[:space:]]*$/d' WORKSPACE.linux
# Create the local ccv repo and workspace
RUN set -eu; cd /usr/local/src/draw-things-community; \
# Create Bazel "ccv" repo that wraps the SCALE-built CCV
mkdir -p third_party/ccv/lib; \
\
# Symlink headers and library from /opt/ccv-scale into this Bazel repo
ln -s /opt/ccv-scale/include third_party/ccv/include; \
ln -s /opt/ccv-scale/lib/libccv.a third_party/ccv/lib/libccv.a; \
\
# Minimal WORKSPACE so Bazel treats third_party/ccv as @ccv
cat > third_party/ccv/WORKSPACE <<'EOF'
workspace(name = "ccv")
EOF
# Create the Bazel build
RUN set -eu; cd /usr/local/src/draw-things-community; \
. /opt/scale/bin/scaleenv ${AMD_GPU_ARCH}; \
\
# Main BUILD: expose CCV + NNC headers and lib
cat > third_party/ccv/BUILD.bazel <<EOF
cc_library(
name = "ccv_core",
deps = ["//lib:ccv_static"],
hdrs = glob(["include/**/*.h"]),
includes = [
"include",
"include/3rdparty/sqlite3",
],
linkstatic = 1,
linkopts = [
# Image
"-ljpeg",
"-lpng",
# Zlib
"-lz",
# BLAS
"-lopenblas", # or "-lblas" depending on what you have
# SCALE / CUDA stack
"-lredscale",
"-lcublas",
"-lcudart",
"-lcuda",
# Search + runtime paths for SCALE/CUDA libs:
"-L${CUDA_HOME}/lib",
"-Wl,-rpath,${CUDA_HOME}/lib",
],
visibility = ["//visibility:public"],
)
EOF
# Expose to Bazel as ccv and nnc
RUN set -eu; cd /usr/local/src/draw-things-community; \
# Compatibility labels: @ccv//lib:ccv and @ccv//lib:nnc
mkdir -p third_party/ccv/lib; \
cat > third_party/ccv/lib/BUILD.bazel <<'EOF'
cc_import(
name = "ccv_static",
static_library = "libccv.a", # this is lib/libccv.a in the repo, but from this package it's just "libccv.a"
visibility = ["//visibility:public"],
)
alias(
name = "ccv",
actual = "//:ccv_core",
visibility = ["//visibility:public"],
)
alias(
name = "nnc",
actual = "//:ccv_core",
visibility = ["//visibility:public"],
)
EOF
# -----------------------
# Configure Bazel
# -----------------------
# Configure .bazelrc.local without obsolete TF config
RUN cat > .bazelrc.local <<'EOF'
# Host compilers
build --action_env HOST_CXX_COMPILER="/usr/bin/clang++"
build --action_env HOST_C_COMPILER="/usr/bin/clang"
build --action_env GCC_HOST_COMPILER_PATH="/usr/bin/clang"
# Keep clang config, but drop `--config=cuda` since we already built the CUDA dependencies.
build --config=clang
# Avoid the start/stop section garbage collection linker flags (kept from README but fixed for CLANG)
build --linkopt="-Wl,-z,nostart-stop-gc"
build --host_linkopt="-Wl,-z,nostart-stop-gc"
EOF
# -----------------------
# Bazel build
# -----------------------
RUN set -eux; \
./Scripts/install.sh
RUN set -eux; \
. /opt/scale/bin/scaleenv ${AMD_GPU_ARCH} && \
bazel build Apps:gRPCServerCLI \
--keep_going \
--spawn_strategy=local \
--compilation_mode=opt \
--copt=-w \
--cxxopt=-w \
--host_copt=-w \
--host_cxxopt=-w \
--swiftcopt=-suppress-warnings
EXPOSE 7859
ENTRYPOINT ["/usr/local/src/draw-things-community/bazel-bin/Apps/gRPCServerCLI", "/grpc-models"]Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels