Hi, thanks for your great work!
I have a question about the inference speed test.
I found that the original tools/test_runtime.py has some bugs (it will raise an error TypeError: 'NoneType' object is not subscriptable). I have fixed it and tested the model speed.
My test results are:
======================================
Inference Time: 72.34 ms (paper: 52 ms)
Params: 44.17 M
FLOPS: 72.71 G
Params and FLOPS are exactly the same as the paper, but the inference time is higher than reported.
Could you please release the official test_runtime.py script used in the paper? So that we can correctly reproduce the inference speed.
Here is my fixed test_runtime.py:
from thop import profile
import torch
import os
import yaml
import sys
import time
import warnings
warnings.filterwarnings("ignore")
BASE_DIR = os.path.dirname(os.path.abspath(file))
ROOT_DIR = os.path.dirname(BASE_DIR)
sys.path.append(ROOT_DIR)
from lib.helpers.model_helper import build_model
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
cfg = yaml.load(open('./configs/test.yaml', 'r'), Loader=yaml.Loader)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model, loss = build_model(cfg['model'])
model = model.to(device)
model.eval()
input_img = torch.randn(1, 3, 384, 1280).to(device)
calib = torch.randn(1, 3, 4).to(device)
sizes = torch.randn(1, 2).to(device)
===================== Warm up =====================
with torch.no_grad():
for _ in range(10):
model(input_img, calib, sizes, None)
===================== Test speed =====================
torch.cuda.synchronize()
start = time.time()
with torch.no_grad():
for _ in range(100):
model(input_img, calib, sizes, None)
torch.cuda.synchronize()
end = time.time()
infer_time = (end - start) / 100 * 1000
===================== Stats =====================
total_params = sum(p.numel() for p in model.parameters())
flops, _ = profile(model, inputs=(input_img, calib, sizes, None))
print(f"\n======================================")
print(f"Inference Time: {infer_time:.2f} ms (paper≈52ms)")
print(f"Params: {total_params / 1e6:.2f} M")
print(f"FLOPS: {flops / 1e9:.2f} G")
print(f"======================================\n")
Thank you very much!
Hi, thanks for your great work!
I have a question about the inference speed test.
I found that the original tools/test_runtime.py has some bugs (it will raise an error TypeError: 'NoneType' object is not subscriptable). I have fixed it and tested the model speed.
My test results are:
======================================
Inference Time: 72.34 ms (paper: 52 ms)
Params: 44.17 M
FLOPS: 72.71 G
Params and FLOPS are exactly the same as the paper, but the inference time is higher than reported.
Could you please release the official test_runtime.py script used in the paper? So that we can correctly reproduce the inference speed.
Here is my fixed test_runtime.py:
from thop import profile
import torch
import os
import yaml
import sys
import time
import warnings
warnings.filterwarnings("ignore")
BASE_DIR = os.path.dirname(os.path.abspath(file))
ROOT_DIR = os.path.dirname(BASE_DIR)
sys.path.append(ROOT_DIR)
from lib.helpers.model_helper import build_model
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
cfg = yaml.load(open('./configs/test.yaml', 'r'), Loader=yaml.Loader)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model, loss = build_model(cfg['model'])
model = model.to(device)
model.eval()
input_img = torch.randn(1, 3, 384, 1280).to(device)
calib = torch.randn(1, 3, 4).to(device)
sizes = torch.randn(1, 2).to(device)
===================== Warm up =====================
with torch.no_grad():
for _ in range(10):
model(input_img, calib, sizes, None)
===================== Test speed =====================
torch.cuda.synchronize()
start = time.time()
with torch.no_grad():
for _ in range(100):
model(input_img, calib, sizes, None)
torch.cuda.synchronize()
end = time.time()
infer_time = (end - start) / 100 * 1000
===================== Stats =====================
total_params = sum(p.numel() for p in model.parameters())
flops, _ = profile(model, inputs=(input_img, calib, sizes, None))
print(f"\n======================================")
print(f"Inference Time: {infer_time:.2f} ms (paper≈52ms)")
print(f"Params: {total_params / 1e6:.2f} M")
print(f"FLOPS: {flops / 1e9:.2f} G")
print(f"======================================\n")
Thank you very much!