Skip to content

gRPCServerCLI: AMD Support #49

@aarononeal

Description

@aarononeal

Using SCALE for CUDA compatibility I was able to patch to compile for AMD.

The main issue was that I had to build CCV outside of Bazel because the existing ruleset was copied from TensorFlow which enforces a bunch of CUDA dependency checks that we don't need and which are not supported by SCALE.

While this build below runs and sets up a GPU graph, some of the command backends require cuDNN which SCALE does not support yet. To get a fully functional GPU build we need to either provide alternate command backends or wait for cuDNN in SCALE.

FROM ubuntu:22.04

ENV DEBIAN_FRONTEND=noninteractive
SHELL ["/bin/bash", "-lc"]
ENV AMD_GPU_ARCH=gfx1151

# -----------------------
#  Base system deps
# -----------------------
RUN apt-get update && apt-get install -y --no-install-recommends \
  bash git wget curl ca-certificates gnupg lsb-release \
  build-essential pkg-config \
  python3 python-is-python3 \
  clang llvm \
  libopenblas-dev \
  libjpeg-dev libpng-dev libtiff-dev libwebp-dev \
  cmake ninja-build \
  systemtap-sdt-dev libbsd-dev linux-libc-dev \
  && rm -rf /var/lib/apt/lists/*

# -----------------------
#  Swift
# -----------------------
RUN curl -O https://download.swift.org/swift-6.0.3-release/ubuntu2204/swift-6.0.3-RELEASE/swift-6.0.3-RELEASE-ubuntu22.04.tar.gz \
  && tar xzf swift-6.0.3-RELEASE-ubuntu22.04.tar.gz \
  && rm swift-6.0.3-RELEASE-ubuntu22.04.tar.gz \
  && mv swift-6.0.3-RELEASE-ubuntu22.04 /usr/local/swift

# Add Swift to PATH permanently
ENV PATH="/usr/local/swift/usr/bin:${PATH}"
ENV SWIFT_PATH="/usr/share/swift/usr/bin"
ENV LD_LIBRARY_PATH="/usr/lib/llvm-14/lib/:/usr/local/swift/usr/lib:/usr/local/swift/usr/lib/swift/linux/"

# -----------------------
#  Bazel
# -----------------------
RUN cd /root && wget https://github.com/bazelbuild/bazelisk/releases/download/v1.27.0/bazelisk-amd64.deb \
  && apt-get update && apt-get install -y ./bazelisk-amd64.deb \
  && rm -rf /var/lib/apt/lists/*

# -----------------------
#  Install SCALE (for CUDA emulation on AMD for CCV)
# -----------------------
RUN cd /root && wget https://pkgs.scale-lang.com/deb/dists/jammy/main/binary-all/scale-repos.deb \
  && apt-get install -y ./scale-repos.deb && rm /root/scale-repos.deb \
  && apt-get update && SCALE_LICENSE_ACCEPT=1 apt-get install -y scale \
  && rm -rf /var/lib/apt/lists/*

# -----------------------
#  Build CCV with SCALE CUDA (outside Bazel)
#
# We cannot use Bazel to build CCV because the current ruleset was copied from TensorFlow which
# enforces checks for several CUDA dependencies that aren't even being used by CCV and also
# are not implemented by SCALE.
# -----------------------

# Clone the CCV repo
RUN git clone --branch unstable https://github.com/liuliu/ccv.git /usr/local/src/ccv

# --- DEBUG PATCH: instrument CCV backend selection (inline, no helpers) ---
# Rationale: Helps monitor command backend matching to the running tensor graph while
# we work on dependencies.
RUN set -eu; cd /usr/local/src/ccv; \
  python3 - "$PWD/lib/nnc/ccv_nnc_cmd.c" << 'PY'
import sys, pathlib

path = pathlib.Path(sys.argv[1])
txt = path.read_text()

old_block = (
    "backend = ccv_nnc_cmd_find_backend(cmd, tensor_memory, tensor_formats, tensor_datatypes);\n"
    "\t}\n"
    "\tassert(backend != CCV_NNC_NO_BACKEND);"
)

new_block = r'''backend = ccv_nnc_cmd_find_backend(cmd, tensor_memory, tensor_formats, tensor_datatypes);
        if (backend == CCV_NNC_NO_BACKEND) {
            fprintf(stderr,
                "CCV DEBUG: NO BACKEND for cmd=%d (cmd_idx=%d) "
                "tensor_memory=0x%x tensor_formats=0x%x tensor_dtypes=0x%x\n",
                cmd.cmd, cmd_idx, tensor_memory, tensor_formats, tensor_datatypes);

            /* Dump all registered backends for this command. */
            for (int bi = 0; bi < CCV_NNC_BACKEND_COUNT; bi++) {
                const ccv_nnc_cmd_backend_registry_t* b =
                    &init_map[cmd_idx].backends[bi];

                if (!b->exec)
                    continue;

                fprintf(stderr,
                    "CCV DEBUG:   backend[%d]: mem=0x%x fmt=0x%x dtype=0x%x\n",
                    bi,
                    b->tensor_memory,
                    b->tensor_formats,
                    b->tensor_datatypes);
            }
        }
    }
    assert(backend != CCV_NNC_NO_BACKEND);'''

if old_block not in txt:
    print("ERROR: Failed to locate patch target in ccv_nnc_cmd.c")
    sys.exit(1)

txt = txt.replace(old_block, new_block, 1)
path.write_text(txt)
PY
# --- END DEBUG PATCH ---

# --- PATCH: General BF16 / cuda_bf16.h handling for SCALE ---
# Ratinoale: Any `.cu` file with `__nv_bfloat16` needs include `cuda_bf16.h`.
# No idea why it worked without on CUDA but it does not on SCALE.
RUN set -eu; cd /usr/local/src/ccv; \
  python3 - << 'PY'
from pathlib import Path

root = Path(".")  # run from /usr/local/src/ccv
cu_files = list(root.rglob("*.cu"))

for path in cu_files:
    text = path.read_text()
    lines = text.splitlines()

    # If this file already mentions cuda_bf16.h, don't touch it.
    if any("cuda_bf16.h" in line for line in lines):
        print(f"SKIP (already has cuda_bf16.h): {path}")
        continue

    # We care about files that either:
    #  - use __nv_bfloat16 directly (need BF16 header), or
    #  - have extern \"C\" (to ensure cuda_bf16.h is seen OUTSIDE that block
    #    in case some included header uses BF16 and would otherwise be pulled
    #    in inside extern \"C\").
    if "__nv_bfloat16" not in text and 'extern "C"' not in text:
        # No BF16 usage, no extern \"C\" concerns: leave file alone.
        continue

    last_include_before_extern = -1
    first_extern_idx = None

    for i, line in enumerate(lines):
        if 'extern "C"' in line and first_extern_idx is None:
            first_extern_idx = i
            # We stop tracking includes after this; we never want to insert inside extern \"C\"
            break
        if line.strip().startswith("#include"):
            last_include_before_extern = i

    # Compute insertion point:
    #  - default: top of file (0)
    #  - if we saw includes before extern \"C\": insert after last include
    insert_idx = 0
    if last_include_before_extern >= 0:
        insert_idx = last_include_before_extern + 1

    # Make sure we never insert AFTER extern \"C\" if we saw it
    if first_extern_idx is not None and insert_idx > first_extern_idx:
        insert_idx = first_extern_idx

    block = [
        "#ifdef __cplusplus",
        "#include <cuda_bf16.h>",
        "#endif"
    ]

    lines[insert_idx:insert_idx] = block
    path.write_text("\n".join(lines))

    print(f"PATCHED: inserted cuda_bf16.h (C++-guarded) into {path} at line {insert_idx}")
PY
# --- END PATCH ---

# --- PATCH: Skip download of sample model ---
# Rationale: Not needed and slows build
RUN set -e; cd /usr/local/src/ccv/lib; \
  sed -i 's|cd `dirname $0` && wget .*image-net-2012-vgg-d.sqlite3||' ../samples/download-vgg-d-model.sh
# --- END PATCH ---

# --- PATCH: Replace GNU statement-expression macros in ccv.h with portable versions ---
# Rationale: The defines that were in place seemed to be GNU expansions which failed under CLANG
RUN set -ue; cd /usr/local/src/ccv/lib; \
  sed -i 's/#define ccv_clamp.*/#undef ccv_clamp\n#define ccv_clamp(x,a,b) ((x)<(a)?(a):((x)>(b)?(b):(x)))/' ccv.h; \
  sed -i 's/#define ccv_max.*/#undef ccv_max\n#define ccv_max(a,b) ((a)>(b)?(a):(b))/' ccv.h; \
  sed -i 's/#define ccv_min.*/#undef ccv_min\n#define ccv_min(a,b) ((a)<(b)?(a):(b))/' ccv.h; \
  sed -i 's|#define ccv_max(a, b).*|#define ccv_max(a, b) ((a) > (b) ? (a) : (b))|' ccv.h; \
  sed -i 's|#define ccv_min(a, b).*|#define ccv_min(a, b) ((a) < (b) ? (a) : (b))|' ccv.h
# --- END PATCH ---

# --- PATCH: Disable cuFile dependencies ---
# Rationale: cuFile is not supported by SCALE
RUN set -ue; cd /usr/local/src/ccv/lib; \
  # Disable cufile
  sed -i 's|#include <cufile.h>|// GPUDirect disabled for SCALE: no cufile.h|g' \
      nnc/gpu/ccv_nnc_compat.cu; \
  \
  # Replace cufileread() to always use the CPU mmap + cumemcpy path
  sed -i '/^void cufileread(const int fd, const off_t file_offset, void\* const buf, const size_t size)/,/^}/c\
void cufileread(const int fd, const off_t file_offset, void* const buf, const size_t size)\n\
{\n\
\t// SCALE/AMD: GPUDirect Storage (cuFile) is not available.\n\
\t// Fallback to CPU mmap + cumemcpy into GPU memory.\n\
\tvoid* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, file_offset);\n\
\tif (bufptr == MAP_FAILED)\n\
\t{\n\
\t\tPRINT(CCV_CLI_ERROR, \"[%s:%d]: mmap failed in cufileread\\n\", __FILE__, __LINE__);\n\
\t\treturn;\n\
\t}\n\
\tmadvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED);\n\
\tcumemcpy(buf, CCV_TENSOR_GPU_MEMORY, bufptr, CCV_TENSOR_CPU_MEMORY, size);\n\
\tmunmap(bufptr, size);\n\
}\n' nnc/gpu/ccv_nnc_compat.cu
# --- END PATCH ---

# --- PATCH: Remove the locally-defined half-precision log() that conflicts with cuda_fp16.h ---
RUN set -ue; cd /usr/local/src/ccv/lib; \
  sed -i '/static inline __device__ __half log(const half v)/,/^}/d' \
    nnc/cmd/loss/gpu/ccv_nnc_categorical_crossentropy_gpu_ref.cu
# --- END PATCH ---

# --- PATCH: Supress warning for unused values and command line parameters ---
# Rationale: Warnings cluttered the log and at the time I thought they were interfering
# with populating `.dep.mk`.
RUN set -ue; cd /usr/local/src/ccv/lib; \
  # Patch config.mk.in so nvcc always gets the warning-suppression flags
  sed -i 's/^NVFLAGS := --use_fast_math @NV_SM_FLAGS@ $(DEFINE_MACROS)$/NVFLAGS := --use_fast_math -Wno-unused-value -Wno-unused-command-line-argument @NV_SM_FLAGS@ $(DEFINE_MACROS)/' config.mk.in
# --- END PATCH ---

# --- PATCH: Don't populate .dep.mk ---
# Rationale: I could not get the `nvcc` output to successfully parse to be used as input to `.dep.mk`.
# Since it isn't needed for build, workaround by disabling it.
RUN set -ue; cd /usr/local/src/ccv/lib; \
  # For every makefile in lib/ (including nnc, nnc/cmd, etc.),
  # 1) drop the original .dep.mk rule
  # 2) append a simple dummy .dep.mk rule
  for MF in $(find . -name 'makefile'); do \
    sed -i '/^\.dep\.mk:/,/^$/d' "$MF"; \
    printf '\n.dep.mk:\n\t@echo "# dummy deps" > .dep.mk\n' >> "$MF"; \
  done
# --- END PATCH ---

# --- BUILD ---
RUN set -ue; cd /usr/local/src/ccv/lib; \
  # Enter SCALE
  . /opt/scale/bin/scaleenv ${AMD_GPU_ARCH}; \
  \
  # Configure CCV
  ./configure --with-cuda=${CUDA_HOME}; \
  \
  # --- PATCH: Force cuDNN OFF: remove HAVE_CUDNN and -lcudnn from config.mk ---
  # Rationale: SCALE includes a placeholder cuDNN with no implementation which causes
  # the build system to see it but then fail trying to use it.
  sed -i 's/-D HAVE_CUDNN //g' config.mk; \
  sed -i 's/-lcudnn//g' config.mk; \
  # --- END PATCH ---
  \
  # Make
  make -j"$(nproc)"; \
  \
  # Install
  mkdir -p /opt/ccv-scale/include /opt/ccv-scale/lib; \
  find . -type f -name "*.h" -exec cp --parents {} /opt/ccv-scale/include/ \; ;\
  cp libccv.a /opt/ccv-scale/lib/; \
  rm -rf /usr/local/src/ccv

ENV LD_LIBRARY_PATH=/opt/ccv-scale/lib:${LD_LIBRARY_PATH}
ENV CPATH=/opt/ccv-scale/include
# --- END BUILD ---

# -----------------------
#  Draw Things env
# -----------------------

# --- PATCH: Link llvm-config so Bazel can find it ---
# Make llvm-config visible where rules_swift expects it
# (Ubuntu will install e.g. /usr/bin/llvm-config-18)
RUN if command -v llvm-config-18 >/dev/null 2>&1; then \
  ln -sf "$(command -v llvm-config-18)" /usr/local/bin/llvm-config; \
  elif command -v llvm-config-14 >/dev/null 2>&1; then \
  ln -sf "$(command -v llvm-config-14)" /usr/local/bin/llvm-config; \
  elif command -v llvm-config >/dev/null 2>&1; then \
  ln -sf "$(command -v llvm-config)" /usr/local/bin/llvm-config; \
  fi
# --- END PATCH ---

# -----------------------
#  Draw Things repo
# -----------------------
RUN cd /usr/local/src && git clone https://github.com/drawthingsai/draw-things-community.git
WORKDIR /usr/local/src/draw-things-community

# -----------------------
#  Wire prebuilt CCV into Bazel
# -----------------------

# Bazel Workspace: Add the local CCV repository to the top of the workspace which will
# override downstream repositories such as defined by s4ncc
RUN set -eu; cd /usr/local/src/draw-things-community; \
  tmp_ws="$(mktemp)"; \
  printf '%s\n' \
'local_repository(' \
'    name = "ccv",' \
'    path = "third_party/ccv",' \
')' \
'' \
  > "$tmp_ws"; \
  cat WORKSPACE.linux >> "$tmp_ws"; \
  mv "$tmp_ws" WORKSPACE.linux

# Bazel Workspace: Remove obsolete and problematic CUDA and CCV configuration
RUN set -eu; cd /usr/local/src/draw-things-community; \
  # Strip CCV auto-config + CUDA auto-config from Draw Things WORKSPACE
  # 1) Remove the ccv config load
  sed -i '/^load("@ccv\/\/config:ccv.bzl", "ccv_deps", "ccv_setting")/d' WORKSPACE.linux; \
  \
  # 2) Remove the ccv_deps() call (likely a single line)
  sed -i '/^[[:space:]]*ccv_deps()[[:space:]]*$/d' WORKSPACE.linux; \
  \
  # 3) Remove the rules_cuda load + cuda_configure call
  sed -i '/^load("@build_bazel_rules_cuda\/\/gpus:cuda_configure.bzl", "cuda_configure")/d' WORKSPACE.linux; \
  sed -i '/^[[:space:]]*cuda_configure(/,/^[[:space:]]*)[[:space:]]*$/d' WORKSPACE.linux; \
  \
  # 4) Remove the ccv_setting(...) block
  sed -i '/^[[:space:]]*ccv_setting(/,/^[[:space:]]*)[[:space:]]*$/d' WORKSPACE.linux

# Create the local ccv repo and workspace
RUN set -eu; cd /usr/local/src/draw-things-community; \
  # Create Bazel "ccv" repo that wraps the SCALE-built CCV
  mkdir -p third_party/ccv/lib; \
  \
  # Symlink headers and library from /opt/ccv-scale into this Bazel repo
  ln -s /opt/ccv-scale/include third_party/ccv/include; \
  ln -s /opt/ccv-scale/lib/libccv.a third_party/ccv/lib/libccv.a; \
  \
  # Minimal WORKSPACE so Bazel treats third_party/ccv as @ccv
  cat > third_party/ccv/WORKSPACE <<'EOF'
workspace(name = "ccv")
EOF

# Create the Bazel build
RUN set -eu; cd /usr/local/src/draw-things-community; \
  . /opt/scale/bin/scaleenv ${AMD_GPU_ARCH}; \
  \
  # Main BUILD: expose CCV + NNC headers and lib
  cat > third_party/ccv/BUILD.bazel <<EOF
cc_library(
    name = "ccv_core",
    deps = ["//lib:ccv_static"],
    hdrs = glob(["include/**/*.h"]),
    includes = [
      "include",
      "include/3rdparty/sqlite3",
    ],
    linkstatic = 1,
    linkopts = [
        # Image
        "-ljpeg",
        "-lpng",
        # Zlib
        "-lz",
        # BLAS
        "-lopenblas",     # or "-lblas" depending on what you have
        # SCALE / CUDA stack
        "-lredscale",
        "-lcublas",
        "-lcudart",
        "-lcuda",
        # Search + runtime paths for SCALE/CUDA libs:
        "-L${CUDA_HOME}/lib",
        "-Wl,-rpath,${CUDA_HOME}/lib",
    ],
    visibility = ["//visibility:public"],
)
EOF

# Expose to Bazel as ccv and nnc
RUN set -eu; cd /usr/local/src/draw-things-community; \
  # Compatibility labels: @ccv//lib:ccv and @ccv//lib:nnc
  mkdir -p third_party/ccv/lib; \
  cat > third_party/ccv/lib/BUILD.bazel <<'EOF'
cc_import(
    name = "ccv_static",
    static_library = "libccv.a",  # this is lib/libccv.a in the repo, but from this package it's just "libccv.a"
    visibility = ["//visibility:public"],
)

alias(
    name = "ccv",
    actual = "//:ccv_core",
    visibility = ["//visibility:public"],
)

alias(
    name = "nnc",
    actual = "//:ccv_core",
    visibility = ["//visibility:public"],
)
EOF

# -----------------------
# Configure Bazel
# -----------------------

# Configure .bazelrc.local without obsolete TF config
RUN cat > .bazelrc.local <<'EOF'
# Host compilers
build --action_env HOST_CXX_COMPILER="/usr/bin/clang++"
build --action_env HOST_C_COMPILER="/usr/bin/clang"
build --action_env GCC_HOST_COMPILER_PATH="/usr/bin/clang"

# Keep clang config, but drop `--config=cuda` since we already built the CUDA dependencies.
build --config=clang

# Avoid the start/stop section garbage collection linker flags (kept from README but fixed for CLANG)
build --linkopt="-Wl,-z,nostart-stop-gc"
build --host_linkopt="-Wl,-z,nostart-stop-gc"
EOF

# -----------------------
#  Bazel build
# -----------------------
RUN set -eux; \
  ./Scripts/install.sh

RUN set -eux; \
  . /opt/scale/bin/scaleenv ${AMD_GPU_ARCH} && \
  bazel build Apps:gRPCServerCLI \
  --keep_going \
  --spawn_strategy=local \
  --compilation_mode=opt \
  --copt=-w \
  --cxxopt=-w \
  --host_copt=-w \
  --host_cxxopt=-w \
  --swiftcopt=-suppress-warnings

EXPOSE 7859
ENTRYPOINT ["/usr/local/src/draw-things-community/bazel-bin/Apps/gRPCServerCLI", "/grpc-models"]

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions