Skip to content

Commit 742a733

Browse files
authored
feat: add cpu rng (#977)
1 parent e8eb379 commit 742a733

File tree

6 files changed

+157
-3
lines changed

6 files changed

+157
-3
lines changed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,9 @@ API and command-line option may change frequently.***
8181
- [`DPM++ 2M v2`](https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457)
8282
- `DPM++ 2S a`
8383
- [`LCM`](https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/13952)
84-
- Cross-platform reproducibility (`--rng cuda`, consistent with the `stable-diffusion-webui GPU RNG`)
84+
- Cross-platform reproducibility
85+
- `--rng cuda`, default, consistent with the `stable-diffusion-webui GPU RNG`
86+
- `--rng cpu`, consistent with the `comfyui RNG`
8587
- Embedds generation parameters into png output as webui-compatible text string
8688

8789
## Quick Start

examples/cli/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ Options:
9494
-M, --mode run mode, one of [img_gen, vid_gen, upscale, convert], default: img_gen
9595
--type weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K). If not specified, the default is the
9696
type of the weight file
97-
--rng RNG, one of [std_default, cuda], default: cuda
97+
--rng RNG, one of [std_default, cuda, cpu], default: cuda(sd-webui), cpu(comfyui)
9898
-s, --seed RNG seed (default: 42, use random seed for < 0)
9999
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
100100
tcd] (default: euler for Flux/SD3/Wan, euler_a otherwise)

examples/cli/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1124,7 +1124,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
11241124
on_type_arg},
11251125
{"",
11261126
"--rng",
1127-
"RNG, one of [std_default, cuda], default: cuda",
1127+
"RNG, one of [std_default, cuda, cpu], default: cuda(sd-webui), cpu(comfyui)",
11281128
on_rng_arg},
11291129
{"-s",
11301130
"--seed",

rng_mt19937.hpp

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
#ifndef __RNG_MT19937_HPP__
2+
#define __RNG_MT19937_HPP__
3+
4+
#include <cmath>
5+
#include <vector>
6+
7+
#include "rng.hpp"
8+
9+
// RNG imitiating torch cpu randn on CPU.
10+
// Port from pytorch, original license: https://github.com/pytorch/pytorch/blob/d01a7b0241ed1c4cded7e7ca097249feb343f072/LICENSE
11+
// Ref: https://github.com/pytorch/pytorch/blob/d01a7b0241ed1c4cded7e7ca097249feb343f072/aten/src/ATen/core/TransformationHelper.h, for uniform_real
12+
// Ref: https://github.com/pytorch/pytorch/blob/d01a7b0241ed1c4cded7e7ca097249feb343f072/aten/src/ATen/native/cpu/DistributionTemplates.h, for normal_kernel/normal_fill/normal_fill_16
13+
// Ref: https://github.com/pytorch/pytorch/blob/d01a7b0241ed1c4cded7e7ca097249feb343f072/aten/src/ATen/core/MT19937RNGEngine.h, for mt19937_engine
14+
// Ref: https://github.com/pytorch/pytorch/blob/d01a7b0241ed1c4cded7e7ca097249feb343f072/aten/src/ATen/core/DistributionsHelper.h, for uniform_real_distribution/normal_distribution
15+
class MT19937RNG : public RNG {
16+
static const int N = 624;
17+
static const int M = 397;
18+
static const uint32_t MATRIX_A = 0x9908b0dfU;
19+
static const uint32_t UMASK = 0x80000000U;
20+
static const uint32_t LMASK = 0x7fffffffU;
21+
22+
struct State {
23+
uint64_t seed_;
24+
int left_;
25+
bool seeded_;
26+
uint32_t next_;
27+
std::array<uint32_t, N> state_;
28+
bool has_next_gauss = false;
29+
double next_gauss = 0.0f;
30+
};
31+
32+
State s;
33+
34+
uint32_t mix_bits(uint32_t u, uint32_t v) { return (u & UMASK) | (v & LMASK); }
35+
uint32_t twist(uint32_t u, uint32_t v) { return (mix_bits(u, v) >> 1) ^ ((v & 1) ? MATRIX_A : 0); }
36+
void next_state() {
37+
uint32_t* p = s.state_.data();
38+
s.left_ = N;
39+
s.next_ = 0;
40+
for (int j = N - M + 1; --j; p++)
41+
p[0] = p[M] ^ twist(p[0], p[1]);
42+
for (int j = M; --j; p++)
43+
p[0] = p[M - N] ^ twist(p[0], p[1]);
44+
p[0] = p[M - N] ^ twist(p[0], s.state_[0]);
45+
}
46+
47+
uint32_t rand_uint32() {
48+
if (--s.left_ == 0)
49+
next_state();
50+
uint32_t y = s.state_[s.next_++];
51+
y ^= (y >> 11);
52+
y ^= (y << 7) & 0x9d2c5680U;
53+
y ^= (y << 15) & 0xefc60000U;
54+
y ^= (y >> 18);
55+
return y;
56+
}
57+
58+
uint64_t rand_uint64() {
59+
uint64_t high = (uint64_t)rand_uint32();
60+
uint64_t low = (uint64_t)rand_uint32();
61+
return (high << 32) | low;
62+
}
63+
64+
template <typename T, typename V>
65+
T uniform_real(V val, T from, T to) {
66+
constexpr auto MASK = static_cast<V>((static_cast<uint64_t>(1) << std::numeric_limits<T>::digits) - 1);
67+
constexpr auto DIVISOR = static_cast<T>(1) / (static_cast<uint64_t>(1) << std::numeric_limits<T>::digits);
68+
T x = (val & MASK) * DIVISOR;
69+
return (x * (to - from) + from);
70+
}
71+
72+
double normal_double_value(double mean, double std) {
73+
if (s.has_next_gauss) {
74+
s.has_next_gauss = false;
75+
return s.next_gauss;
76+
}
77+
double u1 = uniform_real(rand_uint64(), 0., 1.); // double
78+
double u2 = uniform_real(rand_uint64(), 0., 1.); // double
79+
80+
double r = std::sqrt(-2.0 * std::log1p(-u2));
81+
double theta = 2.0 * 3.14159265358979323846 * u1;
82+
double value = r * std::cos(theta) * std + mean;
83+
s.next_gauss = r * std::sin(theta) * std + mean;
84+
s.has_next_gauss = true;
85+
return value;
86+
}
87+
88+
void normal_fill_16(float* data, float mean, float std) {
89+
for (int j = 0; j < 8; ++j) {
90+
float u1 = 1.0f - data[j];
91+
float u2 = data[j + 8];
92+
float r = std::sqrt(-2.0f * std::log(u1));
93+
float theta = 2.0f * 3.14159265358979323846 * u2;
94+
data[j] = r * std::cos(theta) * std + mean;
95+
data[j + 8] = r * std::sin(theta) * std + mean;
96+
}
97+
}
98+
99+
void randn(float* data, int64_t size, float mean = 0.0f, float std = 1.0f) {
100+
if (size >= 16) {
101+
for (int64_t i = 0; i < size; i++) {
102+
data[i] = uniform_real(rand_uint32(), 0.f, 1.f);
103+
}
104+
for (int64_t i = 0; i < size - 15; i += 16) {
105+
normal_fill_16(data + i, mean, std);
106+
}
107+
if (size % 16 != 0) {
108+
// Recompute the last 16 values.
109+
data = data + size - 16;
110+
for (int64_t i = 0; i < 16; i++) {
111+
data[i] = uniform_real(rand_uint32(), 0.f, 1.f);
112+
}
113+
normal_fill_16(data, mean, std);
114+
}
115+
} else {
116+
// Strange handling, hard to understand, but keeping it consistent with PyTorch.
117+
for (int64_t i = 0; i < size; i++) {
118+
data[i] = (float)normal_double_value(mean, std);
119+
}
120+
}
121+
}
122+
123+
public:
124+
MT19937RNG(uint64_t seed = 0) { manual_seed(seed); }
125+
126+
void manual_seed(uint64_t seed) override {
127+
s.seed_ = seed;
128+
s.seeded_ = true;
129+
s.state_[0] = (uint32_t)(seed & 0xffffffffU);
130+
for (int j = 1; j < N; j++) {
131+
uint32_t prev = s.state_[j - 1];
132+
s.state_[j] = 1812433253U * (prev ^ (prev >> 30)) + j;
133+
}
134+
s.left_ = 1;
135+
s.next_ = 0;
136+
s.has_next_gauss = false;
137+
}
138+
139+
std::vector<float> randn(uint32_t n) override {
140+
std::vector<float> out;
141+
out.resize(n);
142+
randn((float*)out.data(), out.size());
143+
return out;
144+
}
145+
};
146+
147+
#endif // __RNG_MT19937_HPP__

stable-diffusion.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "model.h"
44
#include "rng.hpp"
5+
#include "rng_mt19937.hpp"
56
#include "rng_philox.hpp"
67
#include "stable-diffusion.h"
78
#include "util.h"
@@ -200,6 +201,8 @@ class StableDiffusionGGML {
200201
rng = std::make_shared<STDDefaultRNG>();
201202
} else if (sd_ctx_params->rng_type == CUDA_RNG) {
202203
rng = std::make_shared<PhiloxRNG>();
204+
} else if (sd_ctx_params->rng_type == CPU_RNG) {
205+
rng = std::make_shared<MT19937RNG>();
203206
}
204207

205208
ggml_log_set(ggml_log_callback_default, nullptr);
@@ -2131,6 +2134,7 @@ enum sd_type_t str_to_sd_type(const char* str) {
21312134
const char* rng_type_to_str[] = {
21322135
"std_default",
21332136
"cuda",
2137+
"cpu",
21342138
};
21352139

21362140
const char* sd_rng_type_name(enum rng_type_t rng_type) {

stable-diffusion.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ extern "C" {
3131
enum rng_type_t {
3232
STD_DEFAULT_RNG,
3333
CUDA_RNG,
34+
CPU_RNG,
3435
RNG_TYPE_COUNT
3536
};
3637

0 commit comments

Comments
 (0)