Skip to content

Commit 84f03d6

Browse files
Beautifier layers doc (#2117)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent d101fdd commit 84f03d6

File tree

12 files changed

+107
-109
lines changed

12 files changed

+107
-109
lines changed

tensorflow_addons/layers/multihead_attention.py

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,49 +20,48 @@
2020

2121
@tf.keras.utils.register_keras_serializable(package="Addons")
2222
class MultiHeadAttention(tf.keras.layers.Layer):
23-
r"""
24-
MultiHead Attention layer.
23+
r"""MultiHead Attention layer.
2524
2625
Defines the MultiHead Attention operation as described in
2726
[Attention Is All You Need](https://arxiv.org/abs/1706.03762) which takes
2827
in the tensors `query`, `key`, and `value`, and returns the dot-product attention
2928
between them:
3029
31-
```python
32-
mha = MultiHeadAttention(head_size=128, num_heads=12)
30+
```python
31+
mha = MultiHeadAttention(head_size=128, num_heads=12)
3332
34-
query = tf.random.uniform((32, 20, 200)) # (batch_size, query_elements, query_depth)
35-
key = tf.random.uniform((32, 15, 300)) # (batch_size, key_elements, key_depth)
36-
value = tf.random.uniform((32, 15, 400)) # (batch_size, key_elements, value_depth)
33+
query = tf.random.uniform((32, 20, 200)) # (batch_size, query_elements, query_depth)
34+
key = tf.random.uniform((32, 15, 300)) # (batch_size, key_elements, key_depth)
35+
value = tf.random.uniform((32, 15, 400)) # (batch_size, key_elements, value_depth)
3736
38-
attention = mha([query, key, value]) # (batch_size, query_elements, value_depth)
39-
```
37+
attention = mha([query, key, value]) # (batch_size, query_elements, value_depth)
38+
```
4039
4140
If `value` is not given then internally `value = key` will be used:
4241
43-
```python
44-
mha = MultiHeadAttention(head_size=128, num_heads=12)
42+
```python
43+
mha = MultiHeadAttention(head_size=128, num_heads=12)
4544
46-
query = tf.random.uniform((32, 20, 200)) # (batch_size, query_elements, query_depth)
47-
key = tf.random.uniform((32, 15, 300)) # (batch_size, key_elements, key_depth)
45+
query = tf.random.uniform((32, 20, 200)) # (batch_size, query_elements, query_depth)
46+
key = tf.random.uniform((32, 15, 300)) # (batch_size, key_elements, key_depth)
4847
49-
attention = mha([query, key]) # (batch_size, query_elements, key_depth)
50-
```
48+
attention = mha([query, key]) # (batch_size, query_elements, key_depth)
49+
```
5150
5251
Arguments:
5352
head_size: int, dimensionality of the `query`, `key` and `value` tensors
54-
after the linear transformation.
53+
after the linear transformation.
5554
num_heads: int, number of attention heads.
5655
output_size: int, dimensionality of the output space, if `None` then the
57-
input dimension of
58-
`value` or `key` will be used, default `None`.
56+
input dimension of `value` or `key` will be used,
57+
default `None`.
5958
dropout: float, `rate` parameter for the dropout layer that is
60-
applied to attention after softmax,
59+
applied to attention after softmax,
6160
default `0`.
6261
use_projection_bias: bool, whether to use a bias term after the linear
63-
output projection.
62+
output projection.
6463
return_attn_coef: bool, if `True`, return the attention coefficients as
65-
an additional output argument.
64+
an additional output argument.
6665
kernel_initializer: initializer, initializer for the kernel weights.
6766
kernel_regularizer: regularizer, regularizer for the kernel weights.
6867
kernel_constraint: constraint, constraint for the kernel weights.

tensorflow_addons/layers/netvlad.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,11 @@
2323
class NetVLAD(tf.keras.layers.Layer):
2424
"""Applies NetVLAD to the input.
2525
26-
This is a fully-differentiable version of "Vector of Locally Aggregated Descriptors" commonly used in image
27-
retrieval. It is also used in audio retrieval, and audio represenation learning (ex
28-
"Towards Learning a Universal Non-Semantic Representation of Speech", https://arxiv.org/abs/2002.12764).
26+
This is a fully-differentiable version of "Vector of Locally Aggregated Descriptors" commonly used in image
27+
retrieval.
2928
30-
"NetVLAD: CNN architecture for weakly supervised place recognition"
31-
Relja Arandjelovic, Petr Gronat, Akihiko Torii, Tomas Pajdla, Josef Sivic.
32-
https://arxiv.org/abs/1511.07247
29+
See [NetVLAD: CNN architecture for weakly supervised place recognition](https://arxiv.org/abs/1511.07247), and.
30+
[Towards Learning a Universal Non-Semantic Representation of Speech](https://arxiv.org/abs/2002.12764)
3331
3432
Arguments:
3533
num_clusters: The number of clusters to use.

tensorflow_addons/layers/normalizations.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class GroupNormalization(tf.keras.layers.Layer):
4242
to number of channels), then this operation becomes
4343
identical to Instance Normalization.
4444
45-
Arguments
45+
Arguments:
4646
groups: Integer, the number of groups for Group Normalization.
4747
Can be in the range [1, N] where N is the input dimension.
4848
The input dimension must be divisible by the number of groups.
@@ -59,14 +59,15 @@ class GroupNormalization(tf.keras.layers.Layer):
5959
beta_constraint: Optional constraint for the beta weight.
6060
gamma_constraint: Optional constraint for the gamma weight.
6161
62-
Input shape
62+
Input shape:
6363
Arbitrary. Use the keyword argument `input_shape`
6464
(tuple of integers, does not include the samples axis)
6565
when using this layer as the first layer in a model.
6666
67-
Output shape
67+
Output shape:
6868
Same shape as input.
69-
References
69+
70+
References:
7071
- [Group Normalization](https://arxiv.org/abs/1803.08494)
7172
"""
7273

tensorflow_addons/layers/optical_flow.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,7 @@ def _correlation_cost(
3434
):
3535
"""Correlation Cost Volume computation.
3636
37-
"FlowNet: Learning Optical Flow with Convolutional Networks"
38-
Philipp Fischer, Alexey Dosovitskiy, Eddy Ilg, Philip Hausser,
39-
Caner Hazirbas, Vladimir Golkov, Patrick van der Smagt,
40-
Daniel Cremers, Thomas Brox. https://arxiv.org/abs/1504.06852
37+
See [FlowNet: Learning Optical Flow with Convolutional Networks](https://arxiv.org/abs/1504.06852).
4138
4239
Computes a cost volume using correlation for two inputs. For feature
4340
maps A, B with spatial dimensions w, h, c it computes
@@ -142,9 +139,8 @@ def _correlation_cost_grad(op, grad_output):
142139
class CorrelationCost(tf.keras.layers.Layer):
143140
"""Correlation Cost Layer.
144141
145-
This layer implements the correlation operation from FlowNet Learning
146-
Optical Flow with Convolutional Networks (Fischer et al.):
147-
https://arxiv.org/abs/1504.06
142+
This layer implements the correlation operation from [FlowNet Learning
143+
Optical Flow with Convolutional Networks](https://arxiv.org/abs/1504.06)(Fischer et al.).
148144
149145
Args:
150146
kernel_size: An integer specifying the height and width of the

tensorflow_addons/layers/poincare.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,10 @@
2121

2222
@tf.keras.utils.register_keras_serializable(package="Addons")
2323
class PoincareNormalize(tf.keras.layers.Layer):
24-
"""Project into the Poincare ball with norm <= 1.0 - epsilon.
24+
"""Project into the Poincare ball with `norm <= 1.0 - epsilon`.
2525
26-
https://en.wikipedia.org/wiki/Poincare_ball_model
27-
28-
Used in Poincare Embeddings for Learning Hierarchical Representations
29-
Maximilian Nickel, Douwe Kiela https://arxiv.org/pdf/1705.08039.pdf
26+
See [Poincaré Embeddings for Learning Hierarchical Representations](https://arxiv.org/pdf/1705.08039.pdf),
27+
and [wiki](https://en.wikipedia.org/wiki/Poincare_ball_model).
3028
3129
For a 1-D tensor with `axis = 0`, computes
3230

tensorflow_addons/layers/polynomial.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -32,24 +32,23 @@ class PolynomialCrossing(tf.keras.layers.Layer):
3232
is the output of the previous `PolynomialCrossing` layer in the stack, i.e.,
3333
the i-th `PolynomialCrossing` layer.
3434
35-
The output is x_{i+1} = x0 .* (W * x_i + diag_scale * x_i) + bias + xi, where .* designates elementwise
36-
multiplication, W could be a full rank matrix, or a low rank matrix U*V to reduce the computational cost,
35+
The output is `x[i+1] = x0 .* (W * x[i] + diag_scale * x[i]) + bias + x[i]`, where .* designates elementwise
36+
multiplication, W could be a full rank matrix, or a low rank matrix `U*V` to reduce the computational cost,
3737
and diag_scale increases the diagonal of W to improve training stability (especially for the low rank case).
3838
39-
References
40-
See [R. Wang](https://arxiv.org/pdf/1708.05123.pdf)
39+
See [Deep & Cross Network for Ad Click Predictions](https://arxiv.org/pdf/1708.05123.pdf).
4140
4241
Example:
4342
44-
```python
45-
# after embedding layer in a functional model:
46-
input = tf.keras.Input(shape=(None,), name='index', dtype=tf.int64)
47-
x0 = tf.keras.layers.Embedding(input_dim=32, output_dim=6))
48-
x1 = PolynomialCrossing(projection_dim=None)((x0, x0))
49-
x2 = PolynomialCrossing(projection_dim=None)((x0, x1))
50-
logits = tf.keras.layers.Dense(units=10)(x2)
51-
model = tf.keras.Model(input, logits)
52-
```
43+
```python
44+
# after embedding layer in a functional model:
45+
input = tf.keras.Input(shape=(None,), name='index', dtype=tf.int64)
46+
x0 = tf.keras.layers.Embedding(input_dim=32, output_dim=6))
47+
x1 = PolynomialCrossing(projection_dim=None)((x0, x0))
48+
x2 = PolynomialCrossing(projection_dim=None)((x0, x1))
49+
logits = tf.keras.layers.Dense(units=10)(x2)
50+
model = tf.keras.Model(input, logits)
51+
```
5352
5453
Arguments:
5554
projection_dim: project dimension to reduce the computational cost.
@@ -69,10 +68,10 @@ class PolynomialCrossing(tf.keras.layers.Layer):
6968
bias_regularizer: Regularizer instance to use on bias vector.
7069
7170
Input shape:
72-
A tuple of 2 (batch_size, `input_dim`) dimensional inputs.
71+
A tuple of 2 `(batch_size, input_dim)` dimensional inputs.
7372
7473
Output shape:
75-
A single (batch_size, `input_dim`) dimensional output.
74+
A single `(batch_size, input_dim)` dimensional output.
7675
"""
7776

7877
@typechecked

tensorflow_addons/layers/snake.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
class Snake(tf.keras.layers.Layer):
2727
"""Snake layer to learn periodic functions with the trainable `frequency` scalar.
2828
29-
https://arxiv.org/abs/2006.08195
29+
See [Neural Networks Fail to Learn Periodic Functions and How to Fix It](https://arxiv.org/abs/2006.08195).
3030
3131
Arguments:
3232
frequency_initializer: Initializer for the `frequency` scalar.

tensorflow_addons/layers/sparsemax.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@
2020

2121
@tf.keras.utils.register_keras_serializable(package="Addons")
2222
class Sparsemax(tf.keras.layers.Layer):
23-
"""Sparsemax activation function [1].
23+
"""Sparsemax activation function.
2424
2525
The output shape is the same as the input shape.
2626
27-
[1]: https://arxiv.org/abs/1602.02068
27+
See [From Softmax to Sparsemax: A Sparse Model of Attention and Multi-Label Classification](https://arxiv.org/abs/1602.02068).
2828
2929
Arguments:
3030
axis: Integer, axis along which the sparsemax normalization is applied.

tensorflow_addons/layers/spatial_pyramid_pooling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
class SpatialPyramidPooling2D(tf.keras.layers.Layer):
2727
"""Performs Spatial Pyramid Pooling.
2828
29-
Original Paper: https://arxiv.org/pdf/1406.4729.pdf
29+
See [Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition](https://arxiv.org/pdf/1406.4729.pdf).
3030
3131
Spatial Pyramid Pooling generates a fixed-length representation
3232
regardless of input size/scale. It is typically used before a layer

tensorflow_addons/layers/spectral_normalization.py

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,30 +19,33 @@
1919

2020
@tf.keras.utils.register_keras_serializable(package="Addons")
2121
class SpectralNormalization(tf.keras.layers.Wrapper):
22-
"""This wrapper controls the Lipschitz constant of the layer by
23-
constraining its spectral norm.
24-
This stabilizes the training of GANs.
25-
Spectral Normalization for Generative Adversarial Networks:
26-
https://arxiv.org/abs/1802.05957
27-
Takeru Miyato, Toshiki Kataoka, Masanori Koyama, Yuichi Yoshida (2018)
28-
SpectralNormalization wrapper works for keras and tf layers.
22+
"""Performs spectral normalization on weights.
23+
24+
This wrapper controls the Lipschitz constant of the layer by
25+
constraining its spectral norm, which can stabilize the training of GANs.
26+
27+
See [Spectral Normalization for Generative Adversarial Networks](https://arxiv.org/abs/1802.05957).
28+
2929
```python
30-
net = SpectralNormalization(
31-
tf.keras.layers.Conv2D(2, 2, activation="relu"),
32-
input_shape=(32, 32, 3))(x)
33-
net = SpectralNormalization(
34-
tf.keras.layers.Conv2D(16, 5, activation="relu"))(net)
35-
net = SpectralNormalization(
36-
tf.keras.layers.Dense(120, activation="relu"))(net)
37-
net = SpectralNormalization(
38-
tf.keras.layers.Dense(n_classes))(net)
30+
net = SpectralNormalization(
31+
tf.keras.layers.Conv2D(2, 2, activation="relu"),
32+
input_shape=(32, 32, 3))(x)
33+
net = SpectralNormalization(
34+
tf.keras.layers.Conv2D(16, 5, activation="relu"))(net)
35+
net = SpectralNormalization(
36+
tf.keras.layers.Dense(120, activation="relu"))(net)
37+
net = SpectralNormalization(
38+
tf.keras.layers.Dense(n_classes))(net)
3939
```
40+
4041
Arguments:
41-
layer: a layer instance.
42+
layer: A `tf.keras.layers.Layer` instance that
43+
has either `kernel` or `embeddings` attribute.
44+
power_iterations: `int`, the number of iterations during normalization.
4245
Raises:
4346
AssertionError: If not initialized with a `Layer` instance.
44-
ValueError: If initialized with negative `power_iterations`
45-
AttributeError: If `Layer` does not contain a `kernel` or `embeddings` of weights
47+
ValueError: If initialized with negative `power_iterations`.
48+
AttributeError: If `layer` does not has `kernel` or `embeddings` attribute.
4649
"""
4750

4851
@typechecked
@@ -99,8 +102,9 @@ def compute_output_shape(self, input_shape):
99102
@tf.function
100103
def normalize_weights(self):
101104
"""Generate spectral normalized weights.
102-
This method will update the value of self.w with the
103-
spectral normalized value, so that the layer is ready for call().
105+
106+
This method will update the value of `self.w` with the
107+
spectral normalized value, so that the layer is ready for `call()`.
104108
"""
105109

106110
w = tf.reshape(self.w, [-1, self.w_shape[-1]])

0 commit comments

Comments
 (0)