Skip to content
54 changes: 27 additions & 27 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,51 +7,56 @@ For more information: [STL Benchmark Comparison: C++ vs. Julia](https://aaronang
## Getting Started

```console
$ git clone --recurse-submodules git@github.com:aaronang/stl-benchmark.git
git clone --recurse-submodules git@github.com:pddshk/stl-benchmark.git
```

NB! to run this you need to place `#include <limits>` into cpp/benchmark/src/benchmark_register.h

From the `cpp` directory:

```console
$ export CC=/usr/bin/clang
$ export CXX=/usr/bin/clang++
$ mkdir build
$ cd build
$ cmake -DCMAKE_BUILD_TYPE=Release ..
$ make -j
$ ./stl_benchmark
2018-10-06 12:35:31
export CC=/usr/bin/clang
export CXX=/usr/bin/clang++
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=Release ..
make -j
./stl_benchmark
```

Sample output

```console
Running ./stl_benchmark
Run on (8 X 2300 MHz CPU s)
Run on (12 X 4500 MHz CPU s)
CPU Caches:
L1 Data 32K (x4)
L1 Instruction 32K (x4)
L2 Unified 262K (x4)
L3 Unified 6291K (x1)
L1 Data 32K (x6)
L1 Instruction 32K (x6)
L2 Unified 256K (x6)
L3 Unified 12288K (x1)
--------------------------------------------------
Benchmark Time CPU Iterations
--------------------------------------------------
ParseStl 429234 ns 409210 ns 1729
ParseStl 237056 ns 236744 ns 2912
```

From the `julia` directory:

```console
$ julia -O3
julia> ]
(v1.0) pkg> activate .
(benchmark) pkg> ^C
(v1.6) pkg> add BenchmarkTools
julia> using BenchmarkTools
julia> include("stl.jl")
julia> @btime STL.parse("nist.stl")
211.641 μs (9 allocations: 347.06 KiB)
110.646 μs (14 allocations: 347.30 KiB)
```

From the `python` directory:

```console
$ python benchmark.py
25150.930999999986 μs
13513.513993530069 μs μs
```

> Note: Python 3.7.0 is required.
Expand All @@ -60,11 +65,6 @@ $ python benchmark.py

| Language | Time |
|----------|------------|
| C++ | 409.210 μs |
| Julia | 211.641 μs |
| Python | 25150.9 μs |

## Disclaimer

I am neither a C++ nor Julia expert. Please let me know if I biased the results
by implementing something obviously inefficiently.
| C++ | 237.056 μs |
| Julia | 110.646 μs |
| Python | 13513.5 μs |
52 changes: 23 additions & 29 deletions cpp/main.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
#include <fstream>
#include <cstdio>
#include <iostream>

#include "benchmark/benchmark.h"

struct point {
float x;
float y;
float z;

point() = default;
point(float xp, float yp, float zp) : x(xp), y(yp), z(zp) {}
};

Expand All @@ -16,46 +18,38 @@ struct triangle {
point v2;
point v3;


triangle() = default;
triangle(point normalp, point v1p, point v2p, point v3p) :
normal(normalp), v1(v1p), v2(v2p), v3(v3p) {}
};

std::vector<triangle> parse_stl(const std::string &stl_path) {
std::ifstream stl_file(stl_path.c_str(), std::ios::in | std::ios::binary);

// skip header
char header_info[80];
stl_file.read(header_info, 80);

char n_triangles[4];
stl_file.read(n_triangles, 4);
auto *r = (unsigned int *) n_triangles;
unsigned int num_triangles = *r;
std::vector<triangle> parse_stl(const char* stl_path) {
using std::fread;
auto std_file = std::fopen(stl_path, "rb");

char dummy[80];
fread(dummy, 1, 80, std_file);

std::vector<triangle> triangles;
triangles.reserve(num_triangles);
std::uint32_t n_triangles;
fread(&n_triangles, 4, 1, std_file);

for (unsigned int i = 0; i < num_triangles; i++) {
float fs[12];
stl_file.read((char *) fs, 48);
point normal{fs[0], fs[1], fs[2]};
point v1{fs[3], fs[4], fs[5]};
point v2{fs[6], fs[7], fs[8]};
point v3{fs[9], fs[10], fs[11]};
triangles.emplace_back(normal, v1, v2, v3);

// skip attribute byte count
std::vector<triangle> triangles(n_triangles);

for (auto& tr : triangles){
fread(&tr, 48, 1, std_file);
char dummy[2];
stl_file.read(dummy, 2);
fread(dummy, 1, 2, std_file);
}

assert(stl_file.peek() == EOF);

std::fclose(std_file);
return triangles;
}

constexpr auto stl = "nist.stl";

static void ParseStl(benchmark::State& state) {
std::string stl = "nist.stl";
for (auto _ : state) {
parse_stl(stl);
}
Expand Down
43 changes: 30 additions & 13 deletions julia/stl.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
module STL

export parse, parse_malloc

struct Vertex
x::Float32
y::Float32
Expand All @@ -15,21 +17,36 @@ struct Triangle
v3::Vertex
end

function parse(path::AbstractString)
open(path) do stl
skip(stl, 80) # skip header
trianglecount = read(stl, UInt32)
ref = Ref{Triangle}()
triangles = map(1:trianglecount) do i
read!(stl, ref)
skip(stl, 2) # skip attribute byte count
ref[]
function parse(path)
open(path; lock = false) do io
skip(io, 80) # skip header
triangle_count = read(io, UInt32)
triangles = Vector{Triangle}(undef, triangle_count) # preallocate memory for triangles
dest = Base.unsafe_convert(Ptr{Triangle}, triangles) # destination pointer
unsafe_read(io, dest, sizeof(Triangle)) # copying first triangle
for _ in 2:triangle_count
skip(io, 2)
dest += sizeof(Triangle) # moving to the next trianlge in dest
unsafe_read(io, dest, sizeof(Triangle))
end
@assert eof(stl)
return triangles
triangles
end
end

export parse
function parse_malloc(path)
open(path; lock = false) do io
skip(io, 80)
triangle_count = read(io, UInt32)
triangles = convert(Ptr{Triangle}, Base.Libc.malloc(sizeof(Triangle)*triangle_count))
dest = triangles
unsafe_read(io, dest, sizeof(Triangle)) # copying first triangle
for _ in 2:triangle_count
skip(io, 2)
dest += sizeof(Triangle) # moving to the next trianlge in dest
unsafe_read(io, dest, sizeof(Triangle))
end
Base.unsafe_wrap(Array, triangles, triangle_count; own = true)
end
end

end # module
end #module STL