diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9a09c28 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +data_and_model/** +secrets/** diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..cf8c898 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,26 @@ +FROM nvcr.io/nvidia/tensorrt:25.01-py3 + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PIP_NO_CACHE_DIR=1 \ + DEBIAN_FRONTEND=noninteractive \ + LC_ALL="C.UTF-8" \ + LANG="C.UTF-8" \ + TZ=Asia/Tokyo + +WORKDIR /workspace + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + tzdata \ + curl \ + build-essential && \ + rm -rf /var/lib/apt/lists/* + +COPY requirements.txt /app/requirements.txt +RUN python3 -m pip install --upgrade pip && \ + python3 -m pip install -r /app/requirements.txt + +CMD ["/bin/bash"] diff --git a/README.md b/README.md index c663331..9dbc034 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,48 @@ # psv-utils Utilities related to PackedSfenValue format + +# how to use with Docker + +Build docker image + +```bash +./docker/docker_build.sh +``` + +Run container + +```bash +./docker/docker_run.sh --name rescore --gpus all +``` + +Run rescore code in the container + +```bash +python rescore_with_dlshogi.py --model-path --batch-size 4096 --score-scaling 1200.0 --blend-ratio 1.0 --enable-cuda --enable-tensorrt +``` + +# Download & Upload files between Google Drive + +After building a docker image, run container with specifying port + +```bash +./docker/docker_run.sh --name download --port 8765 +``` + +Download: + +```bash +python3 google_drive_utils/download_from_drive.py --port 8765 --out-dir --file-id +``` + +Then, follow the instructions given in your terminal. +(e.g. auth, login, etc.) + +Upload: + +```bash +python3 google_drive_utils/upload_into_drive.py --port 8765 --file --folder-id +``` + +Then, follow the instructions given in your terminal. +(e.g. auth, login, etc.) diff --git a/docker/docker_build.sh b/docker/docker_build.sh new file mode 100755 index 0000000..fecc2cd --- /dev/null +++ b/docker/docker_build.sh @@ -0,0 +1 @@ +docker build -t psv_utils . diff --git a/docker/docker_run.sh b/docker/docker_run.sh new file mode 100755 index 0000000..871b548 --- /dev/null +++ b/docker/docker_run.sh @@ -0,0 +1,56 @@ +NAME_ARG="" +PORT_ARG="" +GPU_ARG="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --name|-n) + if [[ -z "$2" ]]; then + echo "Error: --name requires a value." >&2 + exit 1 + fi + NAME_ARG="--name $2" + shift 2 + ;; + --port|-p) + if [[ -z "$2" ]]; then + echo "Error: --port requires a value." >&2 + exit 1 + fi + PORT_ARG="-p $2:$2" + shift 2 + ;; + --gpus|-g) + if [[ -z "$2" ]]; then + echo "Error: --gpu requires a value." >&2 + exit 1 + fi + if [[ "$2" == "all" ]]; then + GPU_ARG="--gpus all" + shift 2 + continue + fi + GPU_ARG="--gpus \"device=$2\"" + shift 2 + ;; + *) + echo "Error: unknown option: $1" >&2 + exit 1 + ;; + esac +done + +echo "NAME_ARG: $NAME_ARG" +echo "PORT_ARG: $PORT_ARG" +echo "GPU_ARG: $GPU_ARG" + +docker run -it \ + --shm-size 24G \ + -e TZ=Asia/Tokyo \ + -w /workspace \ + -v "$(pwd)":/workspace \ + $NAME_ARG \ + $PORT_ARG \ + $GPU_ARG \ + psv_utils \ + /bin/bash diff --git a/google_drive_utils/download_from_drive.py b/google_drive_utils/download_from_drive.py new file mode 100644 index 0000000..685abde --- /dev/null +++ b/google_drive_utils/download_from_drive.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +import argparse +import os +import sys +import time +from pathlib import Path + +import requests +from tqdm import tqdm + +from google_auth import get_drive_credentials, request_with_refresh + + +def main(): + CREDENTIALS = "secrets/credentials.json" + TOKEN = "secrets/token.json" + + DRIVE_API = "https://www.googleapis.com/drive/v3/files" + CHUNK = 128 * 1024 * 1024 # 128MB + STREAM_CHUNK = 8 * 1024 * 1024 # 8MB + TIMEOUT = (10, 60) # (connect, read) + RETRY_SLEEP = 5 + + p = argparse.ArgumentParser() + p.add_argument("--file-id", required=True) + p.add_argument("--port", default=8100, type=int) + p.add_argument("--out-dir", default=".") + args = p.parse_args() + + # token.json があれば非対話で利用(refresh も自動) + try: + creds = get_drive_credentials( + port=args.port, credentials_path=CREDENTIALS, token_path=TOKEN + ) + except FileNotFoundError: + print(f"[ERROR] {CREDENTIALS} が見つかりません。") + sys.exit(1) + + out_dir = Path(args.out_dir) + out_dir.mkdir(parents=True, exist_ok=True) + + # メタデータ(name, size) + meta = request_with_refresh( + lambda token: requests.get( + f"{DRIVE_API}/{args.file_id}", + params={"fields": "name,size"}, + headers={"Authorization": f"Bearer {token}"}, + timeout=TIMEOUT, + ), + creds, + TOKEN, + ) + meta.raise_for_status() + meta = meta.json() + name = meta["name"] + size = int(meta["size"]) + + part = out_dir / (name + ".part") + pos = part.stat().st_size if part.exists() else 0 + + # 本体(alt=media + Range) + url = f"{DRIVE_API}/{args.file_id}" + params = {"alt": "media"} + pbar = tqdm(total=size, initial=pos, unit="B", unit_scale=True, desc=name) + + with open(part, "ab") as f: + while pos < size: + end = min(pos + CHUNK - 1, size - 1) + + try: + r = request_with_refresh( + lambda token: requests.get( + url, + params=params, + headers={ + "Authorization": f"Bearer {token}", + "Range": f"bytes={pos}-{end}", + }, + stream=True, + timeout=TIMEOUT, + ), + creds, + TOKEN, + ) + + r.raise_for_status() + + for chunk in r.iter_content(STREAM_CHUNK): + if chunk: + f.write(chunk) + pos += len(chunk) + pbar.update(len(chunk)) + + f.flush() + os.fsync(f.fileno()) + + except Exception: + time.sleep(RETRY_SLEEP) + + pbar.close() + part.rename(out_dir / name) + + +if __name__ == "__main__": + main() diff --git a/google_drive_utils/google_auth.py b/google_drive_utils/google_auth.py new file mode 100644 index 0000000..b0d2cf7 --- /dev/null +++ b/google_drive_utils/google_auth.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +import os +from pathlib import Path + +from google_auth_oauthlib.flow import InstalledAppFlow +from google.auth.transport.requests import Request +from google.oauth2.credentials import Credentials + +# Scopes that allow both download and upload operations +DEFAULT_DRIVE_SCOPES = [ + "https://www.googleapis.com/auth/drive.readonly", + "https://www.googleapis.com/auth/drive.file", +] + + +def get_drive_credentials( + *, + port: int = 8100, + scopes=None, + credentials_path: str = "secrets/credentials.json", + token_path: str = "secrets/token.json", +): + if scopes is None: + scopes = DEFAULT_DRIVE_SCOPES + + if not os.path.exists(credentials_path): + raise FileNotFoundError(f"Credentials not found: {credentials_path}") + + creds = None + if os.path.exists(token_path): + creds = Credentials.from_authorized_user_file(token_path, scopes) + if not creds.has_scopes(scopes): + os.remove(token_path) + creds = None + + if creds and creds.expired and creds.refresh_token: + if not _refresh_token(creds, token_path): + creds = None + + if not creds or not creds.valid: + flow = InstalledAppFlow.from_client_secrets_file(credentials_path, scopes) + creds = flow.run_local_server( + open_browser=False, host="localhost", bind_addr="0.0.0.0", port=int(port) + ) + Path(token_path).write_text(creds.to_json()) + + return creds + + +def request_with_refresh(request_fn, creds, token_path: str): + response = request_fn(creds.token) + + if response.status_code == 401 and creds.refresh_token: + if _refresh_token(creds, token_path): + response = request_fn(creds.token) + + return response + + +def _refresh_token(creds, token_path: str) -> bool: + try: + creds.refresh(Request()) + Path(token_path).write_text(creds.to_json()) + return True + except Exception: + if os.path.exists(token_path): + os.remove(token_path) + return False diff --git a/google_drive_utils/upload_into_drive.py b/google_drive_utils/upload_into_drive.py new file mode 100644 index 0000000..6bad8ff --- /dev/null +++ b/google_drive_utils/upload_into_drive.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +import argparse +import sys +from pathlib import Path + +import requests +from tqdm import tqdm + +from google_auth import get_drive_credentials, request_with_refresh + + +def main(): + CREDENTIALS = "secrets/credentials.json" + TOKEN = "secrets/token.json" + + UPLOAD_API = "https://www.googleapis.com/upload/drive/v3/files" + CHUNK = 32 * 1024 * 1024 # 32MB chunks for resumable upload + TIMEOUT = (10, 60) # (connect, read) + + p = argparse.ArgumentParser(description="Upload file to Google Drive folder") + p.add_argument("--file", required=True, help="Path to file to upload") + p.add_argument("--folder-id", required=True, help="Google Drive folder ID") + p.add_argument("--port", default=8100, type=int, help="Port for OAuth callback") + args = p.parse_args() + + file_path = Path(args.file) + if not file_path.exists(): + print(f"[ERROR] File not found: {file_path}") + sys.exit(1) + + # Authentication (shared token with download) + try: + creds = get_drive_credentials( + port=args.port, credentials_path=CREDENTIALS, token_path=TOKEN + ) + except FileNotFoundError: + print(f"[ERROR] File not found: {CREDENTIALS}") + sys.exit(1) + + file_size = file_path.stat().st_size + file_name = file_path.name + + # Step 1: Initiate resumable upload + metadata = {"name": file_name, "parents": [args.folder_id]} + + init_response = request_with_refresh( + lambda token: requests.post( + f"{UPLOAD_API}?uploadType=resumable&supportsAllDrives=true", + json=metadata, + headers={ + "Authorization": f"Bearer {token}", + "Content-Type": "application/json; charset=UTF-8", + "X-Upload-Content-Length": str(file_size), + }, + timeout=TIMEOUT, + ), + creds, + TOKEN, + ) + + if not init_response.ok: + print(f"[ERROR] Init upload failed with status {init_response.status_code}") + print(f" Response: {init_response.text}") + init_response.raise_for_status() + upload_url = init_response.headers["Location"] + + # Step 2: Upload file in chunks with progress bar + pbar = tqdm(total=file_size, unit="B", unit_scale=True, desc=f"Uploading {file_name}") + uploaded = 0 + + with open(file_path, "rb") as f: + while uploaded < file_size: + chunk_size = min(CHUNK, file_size - uploaded) + chunk_data = f.read(chunk_size) + + headers = { + "Content-Length": str(len(chunk_data)), + "Content-Range": f"bytes {uploaded}-{uploaded + len(chunk_data) - 1}/{file_size}", + } + + upload_response = requests.put( + upload_url, data=chunk_data, headers=headers, timeout=TIMEOUT + ) + + if upload_response.status_code in [200, 201]: + # Upload complete + uploaded += len(chunk_data) + pbar.update(len(chunk_data)) + result = upload_response.json() + pbar.close() + print(f"\n[SUCCESS] File uploaded: {result['name']}") + print(f" File ID: {result['id']}") + print(f" Link: https://drive.google.com/file/d/{result['id']}/view") + return + + elif upload_response.status_code == 308: + # Resume incomplete - continue uploading + uploaded += len(chunk_data) + pbar.update(len(chunk_data)) + + else: + pbar.close() + print(f"\n[ERROR] Upload failed with status {upload_response.status_code}") + print(f" Response: {upload_response.text}") + sys.exit(1) + + pbar.close() + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt index c489826..0bc91a2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,9 @@ cshogi numpy<2 tqdm -onnxruntime-gpu \ No newline at end of file +onnxruntime-gpu + +# for Google Drive API +google-auth-oauthlib +google-api-python-client +requests