Train via Kaggle and publish versioned model to Hugging Face

Train via Kaggle and publish versioned model to Hugging Face #3

Workflow file for this run

.github/workflows/train-via-kaggle-to-hf.yml at f9b8133

	name: Train via Kaggle and publish versioned model to Hugging Face

	on:
	workflow_dispatch:
	inputs:
	version:
	description: "Version number (e.g. 1, 2, 3)"
	required: true
	default: "1"
	namespace:
	description: "Hugging Face namespace (user or org)"
	required: true
	default: "HyperlinksSpace"
	max_train_samples:
	description: "Training sample count"
	required: true
	default: "3000"
	max_eval_samples:
	description: "Evaluation sample count"
	required: true
	default: "600"
	epochs:
	description: "Number of epochs"
	required: true
	default: "2"
	batch_size:
	description: "Batch size"
	required: true
	default: "16"
	learning_rate:
	description: "Learning rate"
	required: true
	default: "1e-4"

	jobs:
	train-kaggle-publish-hf:
	runs-on: ubuntu-latest
	permissions:
	contents: read
	steps:
	- name: Checkout
	uses: actions/checkout@v4

	- name: Setup Python
	uses: actions/setup-python@v5
	with:
	python-version: "3.11"

	- name: Install dependencies
	run: pip install --upgrade kaggle huggingface_hub

	- name: Configure Kaggle API credentials
	env:
	KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
	KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
	run: \|
	python - <<'PY'
	import os
	if not os.getenv("KAGGLE_USERNAME") or not os.getenv("KAGGLE_KEY"):
	raise SystemExit("Missing KAGGLE_USERNAME or KAGGLE_KEY GitHub secrets.")
	print("Kaggle credentials present.")
	PY

	- name: Resolve authenticated Kaggle owner
	run: \|
	python - <<'PY'
	import json
	import subprocess

	# Use Kaggle's authenticated identity so kernel ownership always matches the API key.
	out = subprocess.check_output(["kaggle", "config", "view", "-o", "json"], text=True)
	cfg = json.loads(out)
	owner = str(cfg.get("username", "")).strip()
	if not owner:
	raise SystemExit("Unable to resolve authenticated Kaggle username from kaggle config.")
	print(f"Authenticated Kaggle username: {owner}")
	with open("kaggle-owner.txt", "w", encoding="utf-8") as f:
	f.write(owner + "\n")
	PY
	owner="$(tr -d '\r\n' < kaggle-owner.txt)"
	echo "KAGGLE_OWNER=$owner" >> "$GITHUB_ENV"

	- name: Build and push Kaggle training kernel
	env:
	INPUT_VERSION: ${{ github.event.inputs.version }}
	KAGGLE_OWNER: ${{ env.KAGGLE_OWNER }}
	KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
	INPUT_MAX_TRAIN_SAMPLES: ${{ github.event.inputs.max_train_samples }}
	INPUT_MAX_EVAL_SAMPLES: ${{ github.event.inputs.max_eval_samples }}
	INPUT_EPOCHS: ${{ github.event.inputs.epochs }}
	INPUT_BATCH_SIZE: ${{ github.event.inputs.batch_size }}
	INPUT_LEARNING_RATE: ${{ github.event.inputs.learning_rate }}
	GITHUB_REPOSITORY: ${{ github.repository }}
	GITHUB_SHA: ${{ github.sha }}
	run: \|
	python - <<'PY'
	import json
	import os
	from pathlib import Path

	version = os.environ["INPUT_VERSION"].strip()
	if not version.isdigit() or int(version) < 1:
	raise SystemExit("version must be a positive integer.")

	owner = os.environ["KAGGLE_OWNER"].strip()
	if not owner:
	raise SystemExit("Missing resolved KAGGLE_OWNER.")
	repo = os.environ["GITHUB_REPOSITORY"].strip()
	sha = os.environ["GITHUB_SHA"].strip()
	max_train_samples = os.environ["INPUT_MAX_TRAIN_SAMPLES"].strip()
	max_eval_samples = os.environ["INPUT_MAX_EVAL_SAMPLES"].strip()
	epochs = os.environ["INPUT_EPOCHS"].strip()
	batch_size = os.environ["INPUT_BATCH_SIZE"].strip()
	learning_rate = os.environ["INPUT_LEARNING_RATE"].strip()

	slug = f"tinymodel-train-v{version}-{sha[:8]}"
	workspace = Path(".kaggle_kernel")
	workspace.mkdir(parents=True, exist_ok=True)
	out_dir = f"/kaggle/working/TinyModel{version}"
	notebook = {
	"cells": [
	{
	"cell_type": "code",
	"execution_count": None,
	"metadata": {},
	"outputs": [],
	"source": [
	"!pip -q install -U huggingface_hub transformers datasets torch tokenizers\n",
	f"!git clone https://github.com/{repo}.git /kaggle/working/TinyModel\n",
	"!cd /kaggle/working/TinyModel && git checkout " + sha + "\n",
	f"!python /kaggle/working/TinyModel/scripts/train_tinymodel1_agnews.py --output-dir {out_dir} --max-train-samples {max_train_samples} --max-eval-samples {max_eval_samples} --epochs {epochs} --batch-size {batch_size} --learning-rate {learning_rate}\n",
	f"!ls -la {out_dir}\n",
	],
	}
	],
	"metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}},
	"nbformat": 4,
	"nbformat_minor": 5,
	}
	(workspace / "notebook.ipynb").write_text(json.dumps(notebook), encoding="utf-8")

	kernel_meta = {
	"id": f"{owner}/{slug}",
	"title": f"TinyModel training v{version}",
	"code_file": "notebook.ipynb",
	"language": "python",
	"kernel_type": "notebook",
	"is_private": "true",
	"enable_gpu": "true",
	"enable_internet": "true",
	"dataset_sources": [],
	"competition_sources": [],
	"kernel_sources": [],
	}
	(workspace / "kernel-metadata.json").write_text(json.dumps(kernel_meta, indent=2), encoding="utf-8")
	print(f"Created kernel {owner}/{slug}")
	(workspace / "kernel-slug.txt").write_text(slug + "\n", encoding="utf-8")
	PY
	slug="$(tr -d '\r\n' < .kaggle_kernel/kernel-slug.txt)"
	echo "KAGGLE_KERNEL_SLUG=$slug" >> "$GITHUB_ENV"
	kaggle kernels push -p ".kaggle_kernel"

	- name: Wait for Kaggle kernel completion
	env:
	KAGGLE_OWNER: ${{ env.KAGGLE_OWNER }}
	KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
	run: \|
	python - <<'PY'
	import json
	import os
	import subprocess
	import time

	owner = os.environ["KAGGLE_OWNER"].strip()
	slug = os.environ["KAGGLE_KERNEL_SLUG"].strip()
	ref = f"{owner}/{slug}"
	print(f"Waiting for kernel: {ref}")

	timeout_sec = 3 * 60 * 60
	start = time.time()
	while True:
	out = subprocess.check_output(["kaggle", "kernels", "status", ref], text=True)
	status = json.loads(out)
	value = str(status.get("status", "")).lower()
	print(f"status={value}")
	if value == "complete":
	break
	if value in {"error", "failed", "cancelled"}:
	raise SystemExit(f"Kaggle kernel failed with status={value}")
	if time.time() - start > timeout_sec:
	raise SystemExit("Timed out waiting for Kaggle kernel completion.")
	time.sleep(30)
	PY

	- name: Download Kaggle output artifacts
	env:
	KAGGLE_OWNER: ${{ env.KAGGLE_OWNER }}
	KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
	INPUT_VERSION: ${{ github.event.inputs.version }}
	run: \|
	mkdir -p ".kaggle_output"
	kaggle kernels output "${KAGGLE_OWNER}/${KAGGLE_KERNEL_SLUG}" -p ".kaggle_output"
	test -d ".kaggle_output/TinyModel${{ github.event.inputs.version }}"

	- name: Publish TinyModel{version} to Hugging Face
	env:
	HF_TOKEN: ${{ secrets.HF_TOKEN }}
	run: \|
	python scripts/publish_hf_artifact.py \
	--namespace "${{ github.event.inputs.namespace }}" \
	--name "TinyModel${{ github.event.inputs.version }}" \
	--repo-type model \
	--source-dir ".kaggle_output/TinyModel${{ github.event.inputs.version }}"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Train via Kaggle and publish versioned model to Hugging Face #3

Workflow file

Train via Kaggle and publish versioned model to Hugging Face #3

Uh oh!

Workflow file for this run