From dbc999f857d459edc60d7e7aeb19154efc415d42 Mon Sep 17 00:00:00 2001 From: gdarkwah <61522557+gdarkwah@users.noreply.github.com> Date: Wed, 2 Mar 2022 13:15:56 -0600 Subject: [PATCH 001/636] update to checkpoint callback options (save_frequency) introduced the number of batches ('n_batches') option for the save frequency instead of 'batch_size'. Using 'batch_size' works in this tutorial because the length of the training data is 1000 which coincidentally results in a rounded value of ~32 when it is divided by the 'batch_size'. In cases when the number of samples is not 1000, this will result in the model saving at different epoch frequencies other than after every 5 epochs. the definition of 'save_freq' (https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/ModelCheckpoint#args) clearly refers to the number of batches ('n_batches' in this context) and not the number of samples in a batch ('batch_size'). --- site/en/tutorials/keras/save_and_load.ipynb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/site/en/tutorials/keras/save_and_load.ipynb b/site/en/tutorials/keras/save_and_load.ipynb index 6e48a08a4a6..a75e01116ba 100644 --- a/site/en/tutorials/keras/save_and_load.ipynb +++ b/site/en/tutorials/keras/save_and_load.ipynb @@ -385,12 +385,17 @@ "\n", "batch_size = 32\n", "\n", + "# calculate the number of batches per epoch\n", + "import math\n", + "n_batches = len(train_images) / batch_size\n", + "n_batches = math.ceil(n_batches) # round up the number of batches to the nearest whole integer\n", + "\n", "# Create a callback that saves the model's weights every 5 epochs\n", "cp_callback = tf.keras.callbacks.ModelCheckpoint(\n", " filepath=checkpoint_path, \n", " verbose=1, \n", " save_weights_only=True,\n", - " save_freq=5*batch_size)\n", + " save_freq=5*n_batches)\n", "\n", "# Create a new model instance\n", "model = create_model()\n", From 8d7775038aa7714f0ce7c70844c7f1587e2535a3 Mon Sep 17 00:00:00 2001 From: Anas Neumann Date: Wed, 25 May 2022 09:41:48 +0100 Subject: [PATCH 002/636] Update text_classification.ipynb This line was a mistake and probably comes from this page: https://www.tensorflow.org/text/guide/word_embeddings (the model is almost the same except for the Dense(16, activation='relu') layer). --- site/en/tutorials/keras/text_classification.ipynb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/site/en/tutorials/keras/text_classification.ipynb b/site/en/tutorials/keras/text_classification.ipynb index 3dabeeff095..1d39a064b13 100644 --- a/site/en/tutorials/keras/text_classification.ipynb +++ b/site/en/tutorials/keras/text_classification.ipynb @@ -650,8 +650,7 @@ "\n", "1. The first layer is an `Embedding` layer. This layer takes the integer-encoded reviews and looks up an embedding vector for each word-index. These vectors are learned as the model trains. The vectors add a dimension to the output array. The resulting dimensions are: `(batch, sequence, embedding)`. To learn more about embeddings, check out the [Word embeddings](https://www.tensorflow.org/text/guide/word_embeddings) tutorial.\n", "2. Next, a `GlobalAveragePooling1D` layer returns a fixed-length output vector for each example by averaging over the sequence dimension. This allows the model to handle input of variable length, in the simplest way possible.\n", - "3. This fixed-length output vector is piped through a fully-connected (`Dense`) layer with 16 hidden units. \n", - "4. The last layer is densely connected with a single output node." + "3. The last layer is densely connected with a single output node." ] }, { From eaef2e3cfe61d3940846d00ba022b44db6c96996 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 1 Aug 2022 09:25:54 -0700 Subject: [PATCH 003/636] Make py_module_names a dict to capture the `{short_name: long_name}` mappings. PiperOrigin-RevId: 464552675 --- .../api_generator/generate_lib.py | 2 +- .../api_generator/reference_resolver.py | 17 +++++++++++------ .../api_generator/reference_resolver_test.py | 6 +++--- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/tools/tensorflow_docs/api_generator/generate_lib.py b/tools/tensorflow_docs/api_generator/generate_lib.py index 21bf617ceff..7eb441a0d13 100644 --- a/tools/tensorflow_docs/api_generator/generate_lib.py +++ b/tools/tensorflow_docs/api_generator/generate_lib.py @@ -384,7 +384,7 @@ def __init__( def make_reference_resolver(self, visitor): return reference_resolver_lib.ReferenceResolver.from_visitor( - visitor, py_module_names=[self._short_name]) + visitor, py_module_names={self._short_name: self._py_module.__name__}) def make_parser_config(self, visitor: doc_generator_visitor.DocGeneratorVisitor): diff --git a/tools/tensorflow_docs/api_generator/reference_resolver.py b/tools/tensorflow_docs/api_generator/reference_resolver.py index 268d23c97f7..af5d2a03d76 100644 --- a/tools/tensorflow_docs/api_generator/reference_resolver.py +++ b/tools/tensorflow_docs/api_generator/reference_resolver.py @@ -22,7 +22,7 @@ import posixpath import re -from typing import Dict, List, Optional +from typing import Optional, Union from tensorflow_docs.api_generator import parser @@ -81,11 +81,11 @@ class ReferenceResolver: def __init__( self, *, - duplicate_of: Dict[str, str], - is_fragment: Dict[str, bool], - py_module_names: List[str], + duplicate_of: dict[str, str], + is_fragment: dict[str, bool], + py_module_names: Union[list[str], dict[str, str]], link_prefix: Optional[str] = None, - physical_path: Optional[Dict[str, str]] = None, + physical_path: Optional[dict[str, str]] = None, ): """Initializes a Reference Resolver. @@ -95,7 +95,9 @@ def __init__( is_fragment: A map from full names to bool for each symbol. If True the object lives at a page fragment `tf.a.b.c` --> `tf/a/b#c`. If False object has a page to itself: `tf.a.b.c` --> `tf/a/b/c`. - py_module_names: A list of string names of Python modules. + py_module_names: A dict from short name to module name Like + `{'tf': 'tensorflow'}`. Or [deprecated] a list of short-names like + `['tf']`. link_prefix: The website to which these symbols should link to. A prefix is added before the links to enable cross-site linking if `link_prefix` is not None. @@ -105,7 +107,10 @@ def __init__( self._duplicate_of = duplicate_of self._is_fragment = is_fragment self._physical_path = physical_path + if isinstance(py_module_names, list): + py_module_names = {short: short for short in py_module_names} self._py_module_names = py_module_names + self._link_prefix = link_prefix self._all_names = set(is_fragment.keys()) diff --git a/tools/tensorflow_docs/api_generator/reference_resolver_test.py b/tools/tensorflow_docs/api_generator/reference_resolver_test.py index fc31602eae0..b8184e88983 100644 --- a/tools/tensorflow_docs/api_generator/reference_resolver_test.py +++ b/tools/tensorflow_docs/api_generator/reference_resolver_test.py @@ -47,7 +47,7 @@ def testSaveReferenceResolver(self): 'tf.AClass2': False, 'tf.function': False } - py_module_names = ['tf', 'tfdbg'] + py_module_names = {'tf': 'tensorflow'} resolver = reference_resolver_lib.ReferenceResolver( duplicate_of=duplicate_of, @@ -79,7 +79,7 @@ def test_duplicate_fragment(self): 'tf.Class2': False, 'tf.sub.Class2': False } - py_module_names = ['tf'] + py_module_names = {'tf': 'tensorflow'} reference_resolver = reference_resolver_lib.ReferenceResolver( duplicate_of=duplicate_of, @@ -133,7 +133,7 @@ def test_partial_symbol_references(self, string, link): 'tf.contrib.y.z': False, } - py_module_names = ['tf'] + py_module_names = {'tf': 'tensorflow'} resolver = reference_resolver_lib.ReferenceResolver( duplicate_of=duplicate_of, From 75971c26d34b6a820efa3d62e522e13aff373747 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 1 Aug 2022 13:52:56 -0700 Subject: [PATCH 004/636] updated TF basics overview guide to use tf.module functionality PiperOrigin-RevId: 464615889 --- site/en/guide/basics.ipynb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/site/en/guide/basics.ipynb b/site/en/guide/basics.ipynb index 7e8a556e0a7..8ee26c5703a 100644 --- a/site/en/guide/basics.ipynb +++ b/site/en/guide/basics.ipynb @@ -698,7 +698,6 @@ " self.w_q = tf.Variable(rand_init[0])\n", " self.w_l = tf.Variable(rand_init[1])\n", " self.b = tf.Variable(rand_init[2])\n", - " self.vars = [self.w_q, self.w_l, self.b]\n", " \n", " @tf.function\n", " def __call__(self, x):\n", @@ -820,8 +819,8 @@ " with tf.GradientTape() as tape:\n", " batch_loss = mse_loss(quad_model(x_batch), y_batch)\n", " # Update parameters with respect to the gradient calculations\n", - " grads = tape.gradient(batch_loss, quad_model.vars)\n", - " for g,v in zip(grads, quad_model.vars):\n", + " grads = tape.gradient(batch_loss, quad_model.variables)\n", + " for g,v in zip(grads, quad_model.variables):\n", " v.assign_sub(learning_rate*g)\n", " # Keep track of model loss per epoch\n", " loss = mse_loss(quad_model(x), y)\n", From c7e6c58022db8248b0bf29771091dc83ba6a7225 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Tue, 2 Aug 2022 13:40:47 -0700 Subject: [PATCH 005/636] Use fewer epochs for CycleGan. PiperOrigin-RevId: 464875766 --- site/en/tutorials/generative/cyclegan.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/site/en/tutorials/generative/cyclegan.ipynb b/site/en/tutorials/generative/cyclegan.ipynb index 4dae5b77791..56ee1ae382c 100644 --- a/site/en/tutorials/generative/cyclegan.ipynb +++ b/site/en/tutorials/generative/cyclegan.ipynb @@ -634,7 +634,7 @@ "source": [ "## Training\n", "\n", - "Note: This example model is trained for fewer epochs (40) than the paper (200) to keep training time reasonable for this tutorial. Predictions may be less accurate. " + "Note: This example model is trained for fewer epochs (10) than the paper (200) to keep training time reasonable for this tutorial. The generated images will have much lower quality." ] }, { @@ -645,7 +645,7 @@ }, "outputs": [], "source": [ - "EPOCHS = 40" + "EPOCHS = 10" ] }, { From 336b3db9731ecefa99c02b478fbe2639364d93ce Mon Sep 17 00:00:00 2001 From: Olzhas Akpambetov Date: Wed, 3 Aug 2022 13:41:18 -0700 Subject: [PATCH 006/636] Fix external link formatting, lint NMT with attention tutorial PiperOrigin-RevId: 465137234 --- site/en/install/gpu_plugins.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/site/en/install/gpu_plugins.md b/site/en/install/gpu_plugins.md index 290cd48cf76..1083daa949a 100644 --- a/site/en/install/gpu_plugins.md +++ b/site/en/install/gpu_plugins.md @@ -4,7 +4,7 @@ Note: This page is for non-NVIDIA® GPU devices. For NVIDIA® GPU support, go to the [Install TensorFlow with pip](./pip.md) guide. TensorFlow's -[pluggable device](https://github.com/tensorflow/community/blob/master/rfcs/20200624-pluggable-device-for-tensorflow.md){.external} +[pluggable device](https://github.com/tensorflow/community/blob/master/rfcs/20200624-pluggable-device-for-tensorflow.md){:.external} architecture adds new device support as separate plug-in packages that are installed alongside the official TensorFlow package. @@ -64,5 +64,5 @@ Metal `PluggableDevice` for macOS GPUs: DirectML `PluggableDevice` for Windows and WSL (preview): * [PyPI wheel](https://pypi.org/project/tensorflow-directml-plugin/){:.external}. -* [GitHub repo](https://github.com/microsoft/tensorflow-directml-plugin){.external}. +* [GitHub repo](https://github.com/microsoft/tensorflow-directml-plugin){:.external}. * For questions, feedback or to raise issues, please visit the [Issues page of `tensorflow-directml-plugin` on GitHub](https://github.com/microsoft/tensorflow-directml-plugin/issues){:.external}. From 1a1909b87979d77988e5ec98edb55b1a2a19f768 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Thu, 4 Aug 2022 08:58:57 -0700 Subject: [PATCH 007/636] Update pip.md --- site/en/install/pip.md | 223 ++++++++++++++++++++++++----------------- 1 file changed, 133 insertions(+), 90 deletions(-) diff --git a/site/en/install/pip.md b/site/en/install/pip.md index 6bc4a61e5c8..0e69a84c09f 100644 --- a/site/en/install/pip.md +++ b/site/en/install/pip.md @@ -3,10 +3,10 @@ This guide is for the latest stable version of TensorFlow. For the preview build *(nightly)*, please use the pip package named `tf-nightly`. Refer to [these tables](./source#tested_build_configurations) for -older TensorFlow version requirements. For TensorFlow 1.x users, please refer to -the [migration guide](../guide/migrate) to upgrade to TensorFlow 2. +older TensorFlow version requirements. For the CPU-only build use the pip +package named `tensorflow-cpu` -Here is a lookup table for the install commands. Scroll down for the +Here are the quick versions of the install commands. Scroll down for the step-by-step instructions. * {Linux} @@ -28,10 +28,37 @@ step-by-step instructions. python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" ``` -* {Windows} +* {Windows Native} + Caution: The current TensorFlow version, `2.10`, is the **last** TensorFlow + release that will support GPU on native-Windows. + Starting with TensorFlow `2.11`, you will need to install + [TensorFlow in WSL2](https://tensorflow.org/install/pip#windows-wsl2), + or install `tensorflow_cpu` and, optionally, try the + [TensorFlow-DirectML-Plugin](https://github.com/microsoft/tensorflow-directml-plugin#tensorflow-directml-plugin-) + + ```bash + conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 + python3 -m pip install tensorflow + # Verify install: + python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" + ``` + +* {Windows WSL2} + + Note: TensorFlow is supported on WSL2 on Windows 10 19044 or higher with GPU + access is now available. This corresponds to Windows 10 version + 21H2, the November 2021 update. You can get the latest update from here: + [Download Windows 10](https://www.microsoft.com/en-us/software-download/windows10){:.external}. + For instructions, please see + [Install WSL2](https://docs.microsoft.com/en-us/windows/wsl/install){:.external} + and + [NVIDIA’s setup docs](https://docs.nvidia.com/cuda/wsl-user-guide/index.html){:.external} + for CUDA in WSL. + ```bash conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/ python3 -m pip install tensorflow # Verify install: python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" @@ -105,120 +132,123 @@ The following NVIDIA® software are only required for GPU support. ## Step-by-step instructions +{% setvar linux_instructions %} +We only officially support Ubuntu. However, the following instructions may +also work for other Linux distros. -* {Linux} +We recommend using +[Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} to +create a separate environment to avoid changing any installed software in +your system. This is also the easiest way to install the required software, +especially for the GPU setup. - We only officially support Ubuntu. However, the following instructions may - also work for other Linux distros. +### 1. Install Miniconda - We recommend using - [Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} to - create a separate environment to avoid changing any installed software in - your system. This is also the easiest way to install the required software, - especially for the GPU setup. +You can use the following command to install Miniconda. During installation, +you may need to press enter and type "yes". - ### 1. Install Miniconda +```bash +curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -o Miniconda3-latest-Linux-x86_64.sh +bash Miniconda3-latest-Linux-x86_64.sh +``` - You can use the following command to install Miniconda. During installation, - you may need to press enter and type "yes". +You may need to restart your terminal or `source ~/.bashrc` to enable the +`conda` command. Use `conda -V` to test if it is installed successfully. - ```bash - curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -o Miniconda3-latest-Linux-x86_64.sh - bash Miniconda3-latest-Linux-x86_64.sh - ``` +### 2. Create a conda environment - You may need to restart your terminal or `source ~/.bashrc` to enable the - `conda` command. Use `conda -V` to test if it is installed successfully. + Create a new conda environment named `tf` with the following command. - ### 2. Create a conda environment +```bash +conda create --name tf python=3.9 +``` - Create a new conda environment named `tf` with the following command. +You can deactivate and activate it with the following commands. - ```bash - conda create --name tf python=3.9 - ``` +```bash +conda deactivate +conda activate tf +``` - You can deactivate and activate it with the following commands. +Please make sure it is activated for the rest of the installation. - ```bash - conda deactivate - conda activate tf - ``` +### 3. GPU setup - Please make sure it is activated for the rest of the installation. +You can skip this section if you only run TensorFlow on CPU. - ### 3. GPU setup +First, we need to install +[NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx){:.external} +if you have not. You can use the following command to verify it is +installed. - You can skip this section if you only run TensorFlow on CPU. +```bash +nvidia-smi +``` - First, we need to install - [NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx){:.external} - if you have not. You can use the following command to verify it is - installed. +Then, we install the CUDA, cuDNN with conda. - ```bash - nvidia-smi - ``` +```bash +conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 +``` - Then, we install the CUDA, cuDNN with conda. +Configure the system paths. You can do it with following command everytime +your start a new terminal after activating your conda environment. - ```bash - conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 - ``` +```bash +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/ +``` - Configure the system paths. You can do it with following command everytime - your start a new terminal after activating your conda environment. +However, for your convenience, we recommend automating it with the following +commands. The system paths will be automatically configured when you +activate this conda environment. - ```bash - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/ - ``` +```bash +mkdir -p $CONDA_PREFIX/etc/conda/activate.d +echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/' > $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh +``` - However, for your convenience, we recommend automating it with the following - commands. The system paths will be automatically configured when you - activate this conda environment. +### 4. Install TensorFlow - ```bash - mkdir -p $CONDA_PREFIX/etc/conda/activate.d - echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/' > $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh - ``` +TensorFlow requires a recent version of pip, so upgrade your pip +installation to be sure you're running the latest version. - ### 4. Install TensorFlow +```bash +pip install --upgrade pip +``` - TensorFlow requires a recent version of pip, so upgrade your pip - installation to be sure you're running the latest version. +Then, install TensorFlow with pip. - ```bash - pip install --upgrade pip - ``` +Note: Do not install with conda. It may not have the latest stable +version. We recommend using pip since TensorFlow is only +officially released to PyPI. - Then, install TensorFlow with pip. +```bash +pip install tensorflow +``` - Note: Do not install with conda. It may not have the latest stable - version. We recommend using pip since TensorFlow is only - officially released to PyPI. +### 5. Verify install - ```bash - pip install tensorflow - ``` +Verify the CPU setup: - ### 5. Verify install +```bash +python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" +``` - Verify the CPU setup: +If a tensor is returned, you've installed TensorFlow successfully. - ```bash - python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" - ``` +Verify the GPU setup: - If a tensor is returned, you've installed TensorFlow successfully. +```bash +python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" +``` - Verify the GPU setup: +If a list of GPU devices is returned, you've installed TensorFlow +successfully. +{% endsetvar %} - ```bash - python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" - ``` +* {Linux} - If a list of GPU devices is returned, you've installed TensorFlow - successfully. + {{ linux_instructions }} * {MacOS} @@ -309,15 +339,14 @@ The following NVIDIA® software are only required for GPU support. If a tensor is returned, you've installed TensorFlow successfully. -* {Windows} +* {Windows Native} - Note: Experimental support for WSL2 on Windows 10 19044 or higher with GPU - access is now available. This corresponds to Windows 10 version - 21H2, the November 2021 update. You can get the latest update from here: - [Download Windows 10](https://www.microsoft.com/en-us/software-download/windows10){:.external}. - For instructions, please see - [NVIDIA’s setup docs](https://docs.nvidia.com/cuda/wsl-user-guide/index.html){:.external} - for CUDA in WSL. + Caution: The current TensorFlow version, `2.10`, is the **last** TensorFlow + release that will support GPU on native-Windows. + Starting with TensorFlow `2.11`, you will need to install + [TensorFlow in WSL2](https://tensorflow.org/install/pip#windows-[wsl2]), + or install `tensorflow_cpu` and, optionally, try the + [TensorFlow-DirectML-Plugin](https://github.com/microsoft/tensorflow-directml-plugin#tensorflow-directml-plugin-) ### 1. Install Microsoft Visual C++ Redistributable @@ -418,6 +447,20 @@ The following NVIDIA® software are only required for GPU support. If a list of GPU devices is returned, you've installed TensorFlow successfully. +* {Windows WSL2} + + Note: TensorFlow is supported for WSL2 on Windows 10 19044 or higher with GPU + access is now available. This corresponds to Windows 10 version + 21H2, the November 2021 update. You can get the latest update from here: + [Download Windows 10](https://www.microsoft.com/en-us/software-download/windows10){:.external}. + For instructions, please see + [Install WSL2](https://docs.microsoft.com/en-us/windows/wsl/install){:.external} + and + [NVIDIA’s setup docs](https://docs.nvidia.com/cuda/wsl-user-guide/index.html){:.external} + for CUDA in WSL. + + {{ linux_instructions }} + ## Package location A few installation mechanisms require the URL of the TensorFlow Python package. From a30144caa1df397fe676ac871b3c168f3aa31269 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Thu, 4 Aug 2022 09:02:00 -0700 Subject: [PATCH 008/636] formatting --- site/en/install/pip.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/site/en/install/pip.md b/site/en/install/pip.md index 0e69a84c09f..1499a1b11e3 100644 --- a/site/en/install/pip.md +++ b/site/en/install/pip.md @@ -133,6 +133,7 @@ The following NVIDIA® software are only required for GPU support. ## Step-by-step instructions {% setvar linux_instructions %} + We only officially support Ubuntu. However, the following instructions may also work for other Linux distros. @@ -244,6 +245,7 @@ python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU' If a list of GPU devices is returned, you've installed TensorFlow successfully. + {% endsetvar %} * {Linux} From 6ca36ba030f34532ffba61419b49a682db82acf6 Mon Sep 17 00:00:00 2001 From: 8bitmp3 <19637339+8bitmp3@users.noreply.github.com> Date: Thu, 4 Aug 2022 15:07:16 -0700 Subject: [PATCH 009/636] Lint Install TensorFlow with pip guide --- site/en/install/pip.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/site/en/install/pip.md b/site/en/install/pip.md index 1499a1b11e3..7a7dc847629 100644 --- a/site/en/install/pip.md +++ b/site/en/install/pip.md @@ -134,10 +134,10 @@ The following NVIDIA® software are only required for GPU support. {% setvar linux_instructions %} -We only officially support Ubuntu. However, the following instructions may -also work for other Linux distros. +TensorFlow only officially supports Ubuntu. However, the following instructions may +also work for other Linux distributions. -We recommend using +It's recommended to use [Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} to create a separate environment to avoid changing any installed software in your system. This is also the easiest way to install the required software, @@ -175,9 +175,9 @@ Please make sure it is activated for the rest of the installation. ### 3. GPU setup -You can skip this section if you only run TensorFlow on CPU. +You can skip this section if you only run TensorFlow on the CPU. -First, we need to install +First, you need to install the [NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx){:.external} if you have not. You can use the following command to verify it is installed. @@ -192,14 +192,14 @@ Then, we install the CUDA, cuDNN with conda. conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 ``` -Configure the system paths. You can do it with following command everytime +Configure the system paths. You can do it using the following command every time your start a new terminal after activating your conda environment. ```bash export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/ ``` -However, for your convenience, we recommend automating it with the following +However, for your convenience, it's recommended to automate it with the following commands. The system paths will be automatically configured when you activate this conda environment. @@ -219,8 +219,8 @@ pip install --upgrade pip Then, install TensorFlow with pip. -Note: Do not install with conda. It may not have the latest stable -version. We recommend using pip since TensorFlow is only +Note: Do not install with conda because it may not have the latest stable +version. It's recommended to use pip because TensorFlow is only officially released to PyPI. ```bash From 5a55ff570908dc6b08ead380b4db99ce692d3d57 Mon Sep 17 00:00:00 2001 From: 8bitmp3 <19637339+8bitmp3@users.noreply.github.com> Date: Thu, 4 Aug 2022 15:10:12 -0700 Subject: [PATCH 010/636] Lint Install TensorFlow with pip guide --- site/en/install/pip.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/site/en/install/pip.md b/site/en/install/pip.md index 7a7dc847629..5ba07f321aa 100644 --- a/site/en/install/pip.md +++ b/site/en/install/pip.md @@ -22,7 +22,7 @@ step-by-step instructions. * {MacOS} ```bash - # Currently, we do not have official GPU support for MacOS. + # Currently, there is no official GPU support for MacOS. python3 -m pip install tensorflow # Verify install: python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" @@ -186,7 +186,7 @@ installed. nvidia-smi ``` -Then, we install the CUDA, cuDNN with conda. +Then, install the CUDA, cuDNN with conda. ```bash conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 @@ -263,8 +263,8 @@ successfully. you need those libraries, you will have to use TensorFlow with x86 emulation and Rosetta. - Currently, we do not have official GPU support for running TensorFlow on - MacOS. The following is instructions are for running on CPU. + Currently, there is no official GPU support for running TensorFlow on + MacOS. The following is instructions are for running on the CPU. ### 1. Check Python version @@ -278,7 +278,7 @@ successfully. ``` If you have the correct version of Python and pip, you may skip the next two - steps and go to "4. Install TensorFlow". However, we still recommend not + steps and go to "4. Install TensorFlow". However, it's still recommended not skipping the steps. Use [Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} to install Python and pip. It create a separate environment to avoid @@ -326,7 +326,7 @@ successfully. Then, install TensorFlow with pip. Note: Do not install with conda. It may not have the latest stable - version. We recommend using pip since TensorFlow is only + version. It's recommended to use pip since TensorFlow is only officially released to PyPI. ```bash @@ -370,7 +370,7 @@ successfully. ### 2. Install Miniconda - We recommend using + It's recommended to use [Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} to create a separate environment to avoid changing any installed software in your system. This is also the easiest way to install the required software, @@ -401,11 +401,11 @@ successfully. You can skip this section if you only run TensorFlow on CPU. - First, we need to install + First, you need to install [NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx){:.external} if you have not. - Then, we install the CUDA, cuDNN with conda. + Then, install the CUDA, cuDNN with conda. ```bash conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 @@ -423,7 +423,7 @@ successfully. Then, install TensorFlow with pip. Note: Do not install with conda. It may not have the latest stable - version. We recommend using pip since TensorFlow is only + version. It's recommended to use pip since TensorFlow is only officially released to PyPI. ```bash From 7e3850c919523bd9080eab45d27d93795129f953 Mon Sep 17 00:00:00 2001 From: 8bitmp3 <19637339+8bitmp3@users.noreply.github.com> Date: Thu, 4 Aug 2022 15:15:03 -0700 Subject: [PATCH 011/636] Fix minor typos/grammar in Install TensorFlow with pip guide --- site/en/install/pip.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/site/en/install/pip.md b/site/en/install/pip.md index 5ba07f321aa..8b2ccc152fd 100644 --- a/site/en/install/pip.md +++ b/site/en/install/pip.md @@ -193,7 +193,7 @@ conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 ``` Configure the system paths. You can do it using the following command every time -your start a new terminal after activating your conda environment. +you start a new terminal after activating your conda environment. ```bash export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/ @@ -264,7 +264,7 @@ successfully. and Rosetta. Currently, there is no official GPU support for running TensorFlow on - MacOS. The following is instructions are for running on the CPU. + MacOS. The following instructions are for running on the CPU. ### 1. Check Python version @@ -281,7 +281,7 @@ successfully. steps and go to "4. Install TensorFlow". However, it's still recommended not skipping the steps. Use [Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} to - install Python and pip. It create a separate environment to avoid + install Python and pip. It creates a separate environment to avoid changing any installed software in your system. ### 2. Install Miniconda @@ -399,7 +399,7 @@ successfully. ### 4. GPU setup - You can skip this section if you only run TensorFlow on CPU. + You can skip this section if you only run TensorFlow on the CPU. First, you need to install [NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx){:.external} From df2261fb31b07753e63d70a1c27c92464c7c787b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 5 Aug 2022 09:25:46 -0700 Subject: [PATCH 012/636] updated TF basics overview guide to use cleaner math notation PiperOrigin-RevId: 465580297 --- site/en/guide/basics.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/site/en/guide/basics.ipynb b/site/en/guide/basics.ipynb index 8ee26c5703a..dce5bdfa440 100644 --- a/site/en/guide/basics.ipynb +++ b/site/en/guide/basics.ipynb @@ -761,9 +761,9 @@ "source": [ "Now, define a loss for your model:\n", "\n", - "Given that this model is intended to predict continuous values, the mean squared error (MSE) is a good choice for the loss function. The MSE is defined as the mean of the squared differences between the predicted values and the ground truth. \n", + "Given that this model is intended to predict continuous values, the mean squared error (MSE) is a good choice for the loss function. Given a vector of predictions, $\\hat{y}$, and a vector of true targets, $y$, the MSE is defined as the mean of the squared differences between the predicted values and the ground truth.\n", "\n", - "$MSE = \\frac{1}{n}\\sum_{i=1}^{n}({y_{pred\\_i}}-y_i)^2$" + "$MSE = \\frac{1}{m}\\sum_{i=1}^{m}(\\hat{y}_i -y_i)^2$" ] }, { From ccbc0e50126ad431028b064d52c306014aa1c251 Mon Sep 17 00:00:00 2001 From: William Muir Date: Sat, 6 Aug 2022 06:09:18 -0500 Subject: [PATCH 013/636] Bump libtensorflow to v2.9.1 --- site/en/install/lang_c.ipynb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/site/en/install/lang_c.ipynb b/site/en/install/lang_c.ipynb index da4bbf03fc6..b5d308551f4 100644 --- a/site/en/install/lang_c.ipynb +++ b/site/en/install/lang_c.ipynb @@ -130,25 +130,25 @@ " Linux\n", " \n", " Linux CPU only\n", - " https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-linux-x86_64-2.8.0.tar.gz\n", + " https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-linux-x86_64-2.9.1.tar.gz\n", " \n", " \n", " Linux GPU support\n", - " https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-linux-x86_64-2.8.0.tar.gz\n", + " https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-linux-x86_64-2.9.1.tar.gz\n", " \n", " macOS\n", " \n", " macOS CPU only\n", - " https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-darwin-x86_64-2.8.0.tar.gz\n", + " https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-darwin-x86_64-2.9.1.tar.gz\n", " \n", " Windows\n", " \n", " Windows CPU only\n", - " https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-windows-x86_64-2.8.0.zip\n", + " https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-windows-x86_64-2.9.1.zip\n", " \n", " \n", " Windows GPU only\n", - " https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-windows-x86_64-2.8.0.zip\n", + " https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-windows-x86_64-2.9.1.zip\n", " \n", "" ] @@ -174,7 +174,7 @@ "outputs": [], "source": [ "%%bash\n", - "FILENAME=libtensorflow-cpu-linux-x86_64-2.8.0.tar.gz\n", + "FILENAME=libtensorflow-cpu-linux-x86_64-2.9.1.tar.gz\n", "wget -q --no-check-certificate https://storage.googleapis.com/tensorflow/libtensorflow/${FILENAME}\n", "sudo tar -C /usr/local -xzf ${FILENAME}" ] From 84adf2c04d03b7a2ff5a94608a19120761be4f6f Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 8 Aug 2022 17:11:17 -0700 Subject: [PATCH 014/636] Resolve all comments. --- site/en/install/pip.md | 174 +++++++++++++++++++++++------------------ 1 file changed, 97 insertions(+), 77 deletions(-) diff --git a/site/en/install/pip.md b/site/en/install/pip.md index 8b2ccc152fd..aee909a2d4e 100644 --- a/site/en/install/pip.md +++ b/site/en/install/pip.md @@ -22,7 +22,7 @@ step-by-step instructions. * {MacOS} ```bash - # Currently, there is no official GPU support for MacOS. + # There is currently no official GPU support for MacOS. python3 -m pip install tensorflow # Verify install: python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" @@ -46,12 +46,12 @@ step-by-step instructions. * {Windows WSL2} - Note: TensorFlow is supported on WSL2 on Windows 10 19044 or higher with GPU - access is now available. This corresponds to Windows 10 version - 21H2, the November 2021 update. You can get the latest update from here: - [Download Windows 10](https://www.microsoft.com/en-us/software-download/windows10){:.external}. + Note: TensorFlow with GPU access is supported for WSL2 on Windows 10 19044 or + higher. This corresponds to Windows 10 version 21H2, the November 2021 + update. You can get the latest update from here: + [Download Windows 10](https://www.microsoft.com/software-download/windows10){:.external}. For instructions, please see - [Install WSL2](https://docs.microsoft.com/en-us/windows/wsl/install){:.external} + [Install WSL2](https://docs.microsoft.com/windows/wsl/install){:.external} and [NVIDIA’s setup docs](https://docs.nvidia.com/cuda/wsl-user-guide/index.html){:.external} for CUDA in WSL. @@ -108,7 +108,8 @@ enable compute capabilities by [building TensorFlow from source](./source.md). * Ubuntu 16.04 or higher (64-bit) * macOS 10.12.6 (Sierra) or higher (64-bit) *(no GPU support)* -* Windows 7 or higher (64-bit) +* Windows Native - Windows 7 or higher (64-bit) +* Windows WSL2 - Windows 10 19044 or higher (64-bit) Note: GPU support is available for Ubuntu and Windows with CUDA®-enabled cards. @@ -116,10 +117,11 @@ Note: GPU support is available for Ubuntu and Windows with CUDA®-enabled cards. * Python 3.7–3.10 * pip version 19.0 or higher for Linux (requires `manylinux2010` support) and - Windows, version 20.3 or higher for macOS -* Windows Requires + Windows. pip version 20.3 or higher for macOS. +* Windows Native Requires [Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019](https://support.microsoft.com/help/2977003/the-latest-supported-visual-c-downloads){:.external} + The following NVIDIA® software are only required for GPU support. * [NVIDIA® GPU drivers](https://www.nvidia.com/drivers){:.external} @@ -134,16 +136,13 @@ The following NVIDIA® software are only required for GPU support. {% setvar linux_instructions %} -TensorFlow only officially supports Ubuntu. However, the following instructions may -also work for other Linux distributions. - -It's recommended to use -[Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} to -create a separate environment to avoid changing any installed software in -your system. This is also the easiest way to install the required software, -especially for the GPU setup. +### 2. Install Miniconda -### 1. Install Miniconda +[Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} is the +recommended approach for installing TensorFlow with GPU support. +It creates a separate environment to avoid changing any installed +software in your system. This is also the easiest way to install the required +software especially for the GPU setup. You can use the following command to install Miniconda. During installation, you may need to press enter and type "yes". @@ -156,7 +155,7 @@ bash Miniconda3-latest-Linux-x86_64.sh You may need to restart your terminal or `source ~/.bashrc` to enable the `conda` command. Use `conda -V` to test if it is installed successfully. -### 2. Create a conda environment +### 3. Create a conda environment Create a new conda environment named `tf` with the following command. @@ -173,11 +172,11 @@ conda activate tf Please make sure it is activated for the rest of the installation. -### 3. GPU setup +### 4. GPU setup -You can skip this section if you only run TensorFlow on the CPU. +You can skip this section if you only run TensorFlow on CPU. -First, you need to install the +First install the [NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx){:.external} if you have not. You can use the following command to verify it is installed. @@ -186,20 +185,20 @@ installed. nvidia-smi ``` -Then, install the CUDA, cuDNN with conda. +Then install CUDA and cuDNN with conda. ```bash conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 ``` -Configure the system paths. You can do it using the following command every time -you start a new terminal after activating your conda environment. +Configure the system paths. You can do it with following command everytime +your start a new terminal after activating your conda environment. ```bash export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/ ``` -However, for your convenience, it's recommended to automate it with the following +For your convenience it is recommended that you automate it with the following commands. The system paths will be automatically configured when you activate this conda environment. @@ -208,7 +207,7 @@ mkdir -p $CONDA_PREFIX/etc/conda/activate.d echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/' > $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh ``` -### 4. Install TensorFlow +### 5. Install TensorFlow TensorFlow requires a recent version of pip, so upgrade your pip installation to be sure you're running the latest version. @@ -219,15 +218,15 @@ pip install --upgrade pip Then, install TensorFlow with pip. -Note: Do not install with conda because it may not have the latest stable -version. It's recommended to use pip because TensorFlow is only -officially released to PyPI. +Note: Do not install TensorFlow with conda. It may not have the latest stable +version. pip is recommended since TensorFlow is only officially released to +PyPI. ```bash pip install tensorflow ``` -### 5. Verify install +### 6. Verify install Verify the CPU setup: @@ -245,15 +244,31 @@ python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU' If a list of GPU devices is returned, you've installed TensorFlow successfully. - {% endsetvar %} * {Linux} + ### 1. System requirements + + * Ubuntu 16.04 or higher (64-bit) + + TensorFlow only officially support Ubuntu. However, the following + instructions may also work for other Linux distros. + + Note: Linux Aarch64 TensorFlow builds are build and tested by a third party. + TensorFlow will endeavor to maintain availability and integrity of this + binary on a best-effort basis. + + + {{ linux_instructions }} * {MacOS} + ### 1. System requirements + + * macOS 10.12.6 (Sierra) or higher (64-bit) + Note: For users of Apple M1 computers, to get native performance, you'll want to follow the instructions found [here](https://developer.apple.com/metal/tensorflow-plugin/){:.external}. @@ -263,10 +278,10 @@ successfully. you need those libraries, you will have to use TensorFlow with x86 emulation and Rosetta. - Currently, there is no official GPU support for running TensorFlow on - MacOS. The following instructions are for running on the CPU. + Currently there is no official GPU support for running TensorFlow on + MacOS. The following is instructions are for running on CPU. - ### 1. Check Python version + ### 2. Check Python version Check if your Python environment is already configured: @@ -277,17 +292,13 @@ successfully. python3 -m pip --version ``` - If you have the correct version of Python and pip, you may skip the next two - steps and go to "4. Install TensorFlow". However, it's still recommended not - skipping the steps. Use - [Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} to - install Python and pip. It creates a separate environment to avoid - changing any installed software in your system. - - ### 2. Install Miniconda + ### 2. Install Miniconda - You can use the following command to install Miniconda. During installation, - you may need to press enter and type "yes". + [Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} + is the recommended approach for installing TensorFlow with GPU support. + It creates a separate environment to avoid changing any installed + software in your system. This is also the easiest way to install the required + software especially for the GPU setup. ```bash curl https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -o Miniconda3-latest-MacOSX-x86_64.sh @@ -297,7 +308,7 @@ successfully. You may need to restart your terminal or `source ~/.bashrc` to enable the `conda` command. Use `conda -V` to test if it is installed successfully. - ### 3. Create a conda environment + ### 4. Create a conda environment Create a new conda environment named `tf` with the following command. @@ -314,7 +325,7 @@ successfully. Please make sure it is activated for the rest of the installation. - ### 4. Install TensorFlow + ### 5. Install TensorFlow TensorFlow requires a recent version of pip, so upgrade your pip installation to be sure you're running the latest version. @@ -325,15 +336,15 @@ successfully. Then, install TensorFlow with pip. - Note: Do not install with conda. It may not have the latest stable - version. It's recommended to use pip since TensorFlow is only - officially released to PyPI. + Note: Do not install TensorFlow with conda. It may not have the latest stable + version. pip is recommended since TensorFlow is only officially released to + PyPI. ```bash pip install tensorflow ``` - ### 5. Verify install + ### 6. Verify install ```bash python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" @@ -350,7 +361,15 @@ successfully. or install `tensorflow_cpu` and, optionally, try the [TensorFlow-DirectML-Plugin](https://github.com/microsoft/tensorflow-directml-plugin#tensorflow-directml-plugin-) - ### 1. Install Microsoft Visual C++ Redistributable + ## 1. System requirements + + * Windows 7 or higher (64-bit) + + Note: Windows CPU TensorFlow builds are built and tested by a third party. + TensorFlow will endeavor to maintain availability and integrity of this + binary on a best-effort basis. + + ### 2. Install Microsoft Visual C++ Redistributable Install the *Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017, and 2019*. Starting with the TensorFlow 2.1.0 version, the @@ -368,19 +387,19 @@ successfully. [long paths are enabled](https://superuser.com/questions/1119883/windows-10-enable-ntfs-long-paths-policy-option-missing){:.external} on Windows. - ### 2. Install Miniconda + ### 3. Install Miniconda - It's recommended to use - [Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} to - create a separate environment to avoid changing any installed software in - your system. This is also the easiest way to install the required software, - especially for the GPU setup. + [Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} + is the recommended approach for installing TensorFlow with GPU support. + It creates a separate environment to avoid changing any installed + software in your system. This is also the easiest way to install the + required software especially for the GPU setup. Download the [Miniconda Windows Installer](https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe){:.external}. Double-click the downloaded file and follow the instructions on the screen. - ### 3. Create a conda environment + ### 4. Create a conda environment Create a new conda environment named `tf` with the following command. @@ -397,21 +416,21 @@ successfully. Please make sure it is activated for the rest of the installation. - ### 4. GPU setup + ### 5. GPU setup - You can skip this section if you only run TensorFlow on the CPU. + You can skip this section if you only run TensorFlow on CPU. - First, you need to install + First install [NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx){:.external} if you have not. - Then, install the CUDA, cuDNN with conda. + Then install the CUDA, cuDNN with conda. ```bash conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 ``` - ### 5. Install TensorFlow + ### 6. Install TensorFlow TensorFlow requires a recent version of pip, so upgrade your pip installation to be sure you're running the latest version. @@ -422,15 +441,15 @@ successfully. Then, install TensorFlow with pip. - Note: Do not install with conda. It may not have the latest stable - version. It's recommended to use pip since TensorFlow is only - officially released to PyPI. + Note: Do not install TensorFlow with conda. It may not have the latest stable + version. pip is recommended since TensorFlow is only officially released to + PyPI. ```bash pip install tensorflow ``` - ### 6. Verify install + ### 7. Verify install Verify the CPU setup: @@ -451,15 +470,16 @@ successfully. * {Windows WSL2} - Note: TensorFlow is supported for WSL2 on Windows 10 19044 or higher with GPU - access is now available. This corresponds to Windows 10 version - 21H2, the November 2021 update. You can get the latest update from here: - [Download Windows 10](https://www.microsoft.com/en-us/software-download/windows10){:.external}. - For instructions, please see - [Install WSL2](https://docs.microsoft.com/en-us/windows/wsl/install){:.external} - and - [NVIDIA’s setup docs](https://docs.nvidia.com/cuda/wsl-user-guide/index.html){:.external} - for CUDA in WSL. + ### 1. System requirements + + * Windows 10 19044 or higher (64-bit). This corresponds to Windows 10 + version 21H2, the November 2021 update. + + See the following documents to: + + * [Download the latest Windows 10 update](https://www.microsoft.com/software-download/windows10){:.external}. + * [Install WSL2](https://docs.microsoft.com/windows/wsl/install){:.external} + * [Setup NVIDIA® GPU support in WSL2](https://docs.nvidia.com/cuda/wsl-user-guide/index.html){:.external} {{ linux_instructions }} From a08cac3899626fe742fd42c7cffd1cf05af7d31e Mon Sep 17 00:00:00 2001 From: tfdocsbot Date: Tue, 9 Aug 2022 00:12:41 +0000 Subject: [PATCH 015/636] nbfmt --- site/en/guide/basics.ipynb | 1 - 1 file changed, 1 deletion(-) diff --git a/site/en/guide/basics.ipynb b/site/en/guide/basics.ipynb index 8ee26c5703a..e63a1398af3 100644 --- a/site/en/guide/basics.ipynb +++ b/site/en/guide/basics.ipynb @@ -956,7 +956,6 @@ "colab": { "collapsed_sections": [], "name": "basics.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { From 4525d20756d1dfcc228e88d5ea35e8a44c6e85c3 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 8 Aug 2022 17:16:54 -0700 Subject: [PATCH 016/636] Clear trailing white space. --- site/en/install/pip.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/site/en/install/pip.md b/site/en/install/pip.md index aee909a2d4e..b4826465243 100644 --- a/site/en/install/pip.md +++ b/site/en/install/pip.md @@ -50,12 +50,12 @@ step-by-step instructions. higher. This corresponds to Windows 10 version 21H2, the November 2021 update. You can get the latest update from here: [Download Windows 10](https://www.microsoft.com/software-download/windows10){:.external}. - For instructions, please see + For instructions, please see [Install WSL2](https://docs.microsoft.com/windows/wsl/install){:.external} and [NVIDIA’s setup docs](https://docs.nvidia.com/cuda/wsl-user-guide/index.html){:.external} for CUDA in WSL. - + ```bash conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/ @@ -364,7 +364,7 @@ successfully. ## 1. System requirements * Windows 7 or higher (64-bit) - + Note: Windows CPU TensorFlow builds are built and tested by a third party. TensorFlow will endeavor to maintain availability and integrity of this binary on a best-effort basis. From 1529486a44d867cc742d79a904aa994f695db17a Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Thu, 11 Aug 2022 10:58:59 -0700 Subject: [PATCH 017/636] Update install for changes to windows builds. Split "Windows Native" and "Windows WSL2" into two separate tabs. Add a caution that "TensorFlow version, 2.10, is the last TensorFlow release that will support GPU on native-Windows". Copy the linux instructions into the Windows WSL2 tab. PiperOrigin-RevId: 466998549 --- site/en/install/pip.md | 364 ++++++++++++++++++++++------------------- 1 file changed, 198 insertions(+), 166 deletions(-) diff --git a/site/en/install/pip.md b/site/en/install/pip.md index 6bc4a61e5c8..f11c94b4175 100644 --- a/site/en/install/pip.md +++ b/site/en/install/pip.md @@ -1,57 +1,66 @@ # Install TensorFlow with pip -This guide is for the latest stable version of TensorFlow. For the -preview build *(nightly)*, please use the pip package named -`tf-nightly`. Refer to [these tables](./source#tested_build_configurations) for -older TensorFlow version requirements. For TensorFlow 1.x users, please refer to -the [migration guide](../guide/migrate) to upgrade to TensorFlow 2. +This guide is for the latest stable version of TensorFlow. For the preview build +*(nightly)*, please use the pip package named `tf-nightly`. Refer to +[these tables](./source#tested_build_configurations) for older TensorFlow +version requirements. For the CPU-only build use the pip package named +`tensorflow-cpu` -Here is a lookup table for the install commands. Scroll down for the +Here are the quick versions of the install commands. Scroll down for the step-by-step instructions. -* {Linux} +* ~~~bash conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 export + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/ python3 -m pip install + tensorflow # Verify install: python3 -c "import tensorflow as tf; + print(tf.config.list_physical_devices('GPU'))" ``` {Linux} ~~~ - ```bash - conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/ - python3 -m pip install tensorflow - # Verify install: - python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" - ``` +* ~~~bash # There is currently no official GPU support for MacOS. python3 -m + pip install tensorflow # Verify install: python3 -c "import tensorflow as + tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" ``` {MacOS} ~~~ -* {MacOS} +* Caution: The current TensorFlow version, `2.10`, is the **last** TensorFlow + release that will support GPU on native-Windows. Starting with TensorFlow + `2.11`, you will need to install + [TensorFlow in WSL2](https://tensorflow.org/install/pip#windows-wsl2), or + install `tensorflow_cpu` and, optionally, try the + [TensorFlow-DirectML-Plugin](https://github.com/microsoft/tensorflow-directml-plugin#tensorflow-directml-plugin-) + {Windows Native} - ```bash - # Currently, we do not have official GPU support for MacOS. - python3 -m pip install tensorflow - # Verify install: - python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" - ``` - -* {Windows} - - ```bash - conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 - python3 -m pip install tensorflow - # Verify install: - python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" - ``` + ```bash + conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 + python3 -m pip install tensorflow + # Verify install: + python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" + ``` -* {CPU} +* Note: TensorFlow with GPU access is supported for WSL2 on Windows 10 19044 + or higher. This corresponds to Windows 10 version 21H2, the November 2021 + update. You can get the latest update from here: + [Download Windows 10](https://www.microsoft.com/software-download/windows10){:.external}. + For instructions, please see + [Install WSL2](https://docs.microsoft.com/windows/wsl/install){:.external} + and + [NVIDIA’s setup docs](https://docs.nvidia.com/cuda/wsl-user-guide/index.html){:.external} + for CUDA in WSL. {Windows WSL2} - ```bash - python3 -m pip install tensorflow - # Verify install: - python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" - ``` + ```bash + conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/ + python3 -m pip install tensorflow + # Verify install: + python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" + ``` -* {Nightly} +* ~~~bash + python3 -m pip install tensorflow + # Verify install: + python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" + ``` {CPU} + ~~~ - ```bash - python3 -m pip install tf-nightly - # Verify install: - python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" - ``` +* ~~~bash python3 -m pip install tf-nightly # Verify install: python3 -c + "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, + 1000])))" ``` {Nightly} ~~~ ## Hardware requirements @@ -81,7 +90,8 @@ enable compute capabilities by [building TensorFlow from source](./source.md). * Ubuntu 16.04 or higher (64-bit) * macOS 10.12.6 (Sierra) or higher (64-bit) *(no GPU support)* -* Windows 7 or higher (64-bit) +* Windows Native - Windows 7 or higher (64-bit) +* Windows WSL2 - Windows 10 19044 or higher (64-bit) Note: GPU support is available for Ubuntu and Windows with CUDA®-enabled cards. @@ -89,9 +99,10 @@ Note: GPU support is available for Ubuntu and Windows with CUDA®-enabled cards. * Python 3.7–3.10 * pip version 19.0 or higher for Linux (requires `manylinux2010` support) and - Windows, version 20.3 or higher for macOS -* Windows Requires - [Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019](https://support.microsoft.com/help/2977003/the-latest-supported-visual-c-downloads){:.external} + Windows. pip version 20.3 or higher for macOS. +* Windows Native Requires + [Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019](https://support.microsoft.com/help/2977003/the-latest-supported-visual-c-downloads) + {:.external} The following NVIDIA® software are only required for GPU support. @@ -105,122 +116,130 @@ The following NVIDIA® software are only required for GPU support. ## Step-by-step instructions +{% setvar linux_instructions %} -* {Linux} +### 2. Install Miniconda - We only officially support Ubuntu. However, the following instructions may - also work for other Linux distros. +[Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} is the +recommended approach for installing TensorFlow with GPU support. It creates a +separate environment to avoid changing any installed software in your system. +This is also the easiest way to install the required software especially for the +GPU setup. - We recommend using - [Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} to - create a separate environment to avoid changing any installed software in - your system. This is also the easiest way to install the required software, - especially for the GPU setup. +You can use the following command to install Miniconda. During installation, you +may need to press enter and type "yes". - ### 1. Install Miniconda +```bash +curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -o Miniconda3-latest-Linux-x86_64.sh +bash Miniconda3-latest-Linux-x86_64.sh +``` - You can use the following command to install Miniconda. During installation, - you may need to press enter and type "yes". +You may need to restart your terminal or `source ~/.bashrc` to enable the +`conda` command. Use `conda -V` to test if it is installed successfully. - ```bash - curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -o Miniconda3-latest-Linux-x86_64.sh - bash Miniconda3-latest-Linux-x86_64.sh - ``` +### 3. Create a conda environment - You may need to restart your terminal or `source ~/.bashrc` to enable the - `conda` command. Use `conda -V` to test if it is installed successfully. +``` +Create a new conda environment named `tf` with the following command. +``` - ### 2. Create a conda environment +```bash +conda create --name tf python=3.9 +``` - Create a new conda environment named `tf` with the following command. +You can deactivate and activate it with the following commands. - ```bash - conda create --name tf python=3.9 - ``` +```bash +conda deactivate +conda activate tf +``` - You can deactivate and activate it with the following commands. +Please make sure it is activated for the rest of the installation. - ```bash - conda deactivate - conda activate tf - ``` +### 4. GPU setup - Please make sure it is activated for the rest of the installation. +You can skip this section if you only run TensorFlow on CPU. - ### 3. GPU setup +First install the +[NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx){:.external} if +you have not. You can use the following command to verify it is installed. - You can skip this section if you only run TensorFlow on CPU. +```bash +nvidia-smi +``` - First, we need to install - [NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx){:.external} - if you have not. You can use the following command to verify it is - installed. +Then install CUDA and cuDNN with conda. - ```bash - nvidia-smi - ``` +```bash +conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 +``` - Then, we install the CUDA, cuDNN with conda. +Configure the system paths. You can do it with following command everytime your +start a new terminal after activating your conda environment. - ```bash - conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 - ``` +```bash +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/ +``` - Configure the system paths. You can do it with following command everytime - your start a new terminal after activating your conda environment. +For your convenience it is recommended that you automate it with the following +commands. The system paths will be automatically configured when you activate +this conda environment. - ```bash - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/ - ``` +```bash +mkdir -p $CONDA_PREFIX/etc/conda/activate.d +echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/' > $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh +``` - However, for your convenience, we recommend automating it with the following - commands. The system paths will be automatically configured when you - activate this conda environment. +### 5. Install TensorFlow - ```bash - mkdir -p $CONDA_PREFIX/etc/conda/activate.d - echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/' > $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh - ``` +TensorFlow requires a recent version of pip, so upgrade your pip installation to +be sure you're running the latest version. - ### 4. Install TensorFlow +```bash +pip install --upgrade pip +``` - TensorFlow requires a recent version of pip, so upgrade your pip - installation to be sure you're running the latest version. +Then, install TensorFlow with pip. - ```bash - pip install --upgrade pip - ``` +Note: Do not install TensorFlow with conda. It may not have the latest stable +version. pip is recommended since TensorFlow is only officially released to +PyPI. - Then, install TensorFlow with pip. +```bash +pip install tensorflow +``` - Note: Do not install with conda. It may not have the latest stable - version. We recommend using pip since TensorFlow is only - officially released to PyPI. +### 6. Verify install - ```bash - pip install tensorflow - ``` +Verify the CPU setup: - ### 5. Verify install +```bash +python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" +``` - Verify the CPU setup: +If a tensor is returned, you've installed TensorFlow successfully. - ```bash - python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" - ``` +Verify the GPU setup: - If a tensor is returned, you've installed TensorFlow successfully. +```bash +python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" +``` - Verify the GPU setup: +If a list of GPU devices is returned, you've installed TensorFlow successfully. +{% endsetvar %} - ```bash - python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" - ``` +* ### 1. System requirements {Linux} - If a list of GPU devices is returned, you've installed TensorFlow - successfully. + * Ubuntu 16.04 or higher (64-bit) + + TensorFlow only officially support Ubuntu. However, the following + instructions may also work for other Linux distros. + + {{ linux_instructions }} -* {MacOS} +* ### 1. System requirements {MacOS} + + * macOS 10.12.6 (Sierra) or higher (64-bit) Note: For users of Apple M1 computers, to get native performance, you'll want to follow the instructions found @@ -231,10 +250,10 @@ The following NVIDIA® software are only required for GPU support. you need those libraries, you will have to use TensorFlow with x86 emulation and Rosetta. - Currently, we do not have official GPU support for running TensorFlow on - MacOS. The following is instructions are for running on CPU. + Currently there is no official GPU support for running TensorFlow on MacOS. + The following is instructions are for running on CPU. - ### 1. Check Python version + ### 2. Check Python version Check if your Python environment is already configured: @@ -245,17 +264,13 @@ The following NVIDIA® software are only required for GPU support. python3 -m pip --version ``` - If you have the correct version of Python and pip, you may skip the next two - steps and go to "4. Install TensorFlow". However, we still recommend not - skipping the steps. Use - [Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} to - install Python and pip. It create a separate environment to avoid - changing any installed software in your system. - ### 2. Install Miniconda - You can use the following command to install Miniconda. During installation, - you may need to press enter and type "yes". + [Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} is + the recommended approach for installing TensorFlow with GPU support. It + creates a separate environment to avoid changing any installed software in + your system. This is also the easiest way to install the required software + especially for the GPU setup. ```bash curl https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -o Miniconda3-latest-MacOSX-x86_64.sh @@ -265,7 +280,7 @@ The following NVIDIA® software are only required for GPU support. You may need to restart your terminal or `source ~/.bashrc` to enable the `conda` command. Use `conda -V` to test if it is installed successfully. - ### 3. Create a conda environment + ### 4. Create a conda environment Create a new conda environment named `tf` with the following command. @@ -282,7 +297,7 @@ The following NVIDIA® software are only required for GPU support. Please make sure it is activated for the rest of the installation. - ### 4. Install TensorFlow + ### 5. Install TensorFlow TensorFlow requires a recent version of pip, so upgrade your pip installation to be sure you're running the latest version. @@ -293,15 +308,15 @@ The following NVIDIA® software are only required for GPU support. Then, install TensorFlow with pip. - Note: Do not install with conda. It may not have the latest stable - version. We recommend using pip since TensorFlow is only - officially released to PyPI. + Note: Do not install TensorFlow with conda. It may not have the latest + stable version. pip is recommended since TensorFlow is only officially + released to PyPI. ```bash pip install tensorflow ``` - ### 5. Verify install + ### 6. Verify install ```bash python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" @@ -309,17 +324,19 @@ The following NVIDIA® software are only required for GPU support. If a tensor is returned, you've installed TensorFlow successfully. -* {Windows} +* Caution: The current TensorFlow version, `2.10`, is the **last** TensorFlow + release that will support GPU on native-Windows. Starting with TensorFlow + `2.11`, you will need to install + [TensorFlow in WSL2](https://tensorflow.org/install/pip#windows-[wsl2]), or + install `tensorflow_cpu` and, optionally, try the + [TensorFlow-DirectML-Plugin](https://github.com/microsoft/tensorflow-directml-plugin#tensorflow-directml-plugin-) + {Windows Native} - Note: Experimental support for WSL2 on Windows 10 19044 or higher with GPU - access is now available. This corresponds to Windows 10 version - 21H2, the November 2021 update. You can get the latest update from here: - [Download Windows 10](https://www.microsoft.com/en-us/software-download/windows10){:.external}. - For instructions, please see - [NVIDIA’s setup docs](https://docs.nvidia.com/cuda/wsl-user-guide/index.html){:.external} - for CUDA in WSL. + ## 1. System requirements - ### 1. Install Microsoft Visual C++ Redistributable + * Windows 7 or higher (64-bit) + + ### 2. Install Microsoft Visual C++ Redistributable Install the *Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017, and 2019*. Starting with the TensorFlow 2.1.0 version, the @@ -337,19 +354,19 @@ The following NVIDIA® software are only required for GPU support. [long paths are enabled](https://superuser.com/questions/1119883/windows-10-enable-ntfs-long-paths-policy-option-missing){:.external} on Windows. - ### 2. Install Miniconda + ### 3. Install Miniconda - We recommend using - [Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} to - create a separate environment to avoid changing any installed software in - your system. This is also the easiest way to install the required software, + [Miniconda](https://docs.conda.io/en/latest/miniconda.html){:.external} is + the recommended approach for installing TensorFlow with GPU support. It + creates a separate environment to avoid changing any installed software in + your system. This is also the easiest way to install the required software especially for the GPU setup. Download the [Miniconda Windows Installer](https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe){:.external}. Double-click the downloaded file and follow the instructions on the screen. - ### 3. Create a conda environment + ### 4. Create a conda environment Create a new conda environment named `tf` with the following command. @@ -366,21 +383,21 @@ The following NVIDIA® software are only required for GPU support. Please make sure it is activated for the rest of the installation. - ### 4. GPU setup + ### 5. GPU setup You can skip this section if you only run TensorFlow on CPU. - First, we need to install + First install [NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx){:.external} if you have not. - Then, we install the CUDA, cuDNN with conda. + Then install the CUDA, cuDNN with conda. ```bash conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 ``` - ### 5. Install TensorFlow + ### 6. Install TensorFlow TensorFlow requires a recent version of pip, so upgrade your pip installation to be sure you're running the latest version. @@ -391,15 +408,15 @@ The following NVIDIA® software are only required for GPU support. Then, install TensorFlow with pip. - Note: Do not install with conda. It may not have the latest stable - version. We recommend using pip since TensorFlow is only - officially released to PyPI. + Note: Do not install TensorFlow with conda. It may not have the latest + stable version. pip is recommended since TensorFlow is only officially + released to PyPI. ```bash pip install tensorflow ``` - ### 6. Verify install + ### 7. Verify install Verify the CPU setup: @@ -418,6 +435,21 @@ The following NVIDIA® software are only required for GPU support. If a list of GPU devices is returned, you've installed TensorFlow successfully. +* ### 1. System requirements {Windows WSL2} + + * Windows 10 19044 or higher (64-bit). This corresponds to Windows 10 + version 21H2, the November 2021 update. + + See the following documents to: + + * [Download the latest Windows 10 update](https://www.microsoft.com/software-download/windows10){:.external}. + * [Install WSL2](https://docs.microsoft.com/windows/wsl/install) + {:.external} + * [Setup NVIDIA® GPU support in WSL2](https://docs.nvidia.com/cuda/wsl-user-guide/index.html) + {:.external} + + {{ linux_instructions }} + ## Package location A few installation mechanisms require the URL of the TensorFlow Python package. From 344da5b2cb9c6a831d7719f78aff91a27e630bf3 Mon Sep 17 00:00:00 2001 From: Olzhas Akpambetov Date: Thu, 11 Aug 2022 13:07:01 -0700 Subject: [PATCH 018/636] Add TF Lite step to Image classification PiperOrigin-RevId: 467029865 --- site/en/tutorials/images/classification.ipynb | 240 ++++++++++++++++-- 1 file changed, 213 insertions(+), 27 deletions(-) diff --git a/site/en/tutorials/images/classification.ipynb b/site/en/tutorials/images/classification.ipynb index 80f950b1c0c..e027826de73 100644 --- a/site/en/tutorials/images/classification.ipynb +++ b/site/en/tutorials/images/classification.ipynb @@ -68,7 +68,8 @@ "id": "gN7G9GFmVrVY" }, "source": [ - "This tutorial shows how to classify images of flowers. It creates an image classifier using a `tf.keras.Sequential` model, and loads data using `tf.keras.utils.image_dataset_from_directory`. You will gain practical experience with the following concepts:\n", + "This tutorial shows how to classify images of flowers using a `tf.keras.Sequential` model and load data using `tf.keras.utils.image_dataset_from_directory`. It demonstrates the following concepts:\n", + "\n", "\n", "* Efficiently loading a dataset off disk.\n", "* Identifying overfitting and applying techniques to mitigate it, including data augmentation and dropout.\n", @@ -80,7 +81,9 @@ "3. Build the model\n", "4. Train the model\n", "5. Test the model\n", - "6. Improve the model and repeat the process" + "6. Improve the model and repeat the process\n", + "\n", + "In addition, the notebook demonstrates how to convert a [saved model](../../../guide/saved_model.ipynb) to a [TensorFlow Lite](https://www.tensorflow.org/lite/) model for on-device machine learning on mobile, embedded, and IoT devices." ] }, { @@ -89,7 +92,9 @@ "id": "zF9uvbXNVrVY" }, "source": [ - "## Import TensorFlow and other libraries" + "## Setup\n", + "\n", + "Import TensorFlow and other necessary libraries:" ] }, { @@ -102,7 +107,6 @@ "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", - "import os\n", "import PIL\n", "import tensorflow as tf\n", "\n", @@ -243,9 +247,9 @@ "id": "gIjgz7_JIo_m" }, "source": [ - "# Load data using a Keras utility\n", + "## Load data using a Keras utility\n", "\n", - "Let's load these images off disk using the helpful `tf.keras.utils.image_dataset_from_directory` utility. This will take you from a directory of images on disk to a `tf.data.Dataset` in just a couple lines of code. If you like, you can also write your own data loading code from scratch by visiting the [Load and preprocess images](../load_data/images.ipynb) tutorial." + "Next, load these images off disk using the helpful `tf.keras.utils.image_dataset_from_directory` utility. This will take you from a directory of images on disk to a `tf.data.Dataset` in just a couple lines of code. If you like, you can also write your own data loading code from scratch by visiting the [Load and preprocess images](../load_data/images.ipynb) tutorial." ] }, { @@ -254,7 +258,7 @@ "id": "xyDNn9MbIzfT" }, "source": [ - "## Create a dataset" + "### Create a dataset" ] }, { @@ -285,7 +289,7 @@ "id": "pFBhRrrEI49z" }, "source": [ - "It's good practice to use a validation split when developing your model. Let's use 80% of the images for training, and 20% for validation." + "It's good practice to use a validation split when developing your model. Use 80% of the images for training and 20% for validation." ] }, { @@ -379,7 +383,7 @@ "id": "5M6BXtXFJdW0" }, "source": [ - "You will train a model using these datasets by passing them to `Model.fit` in a moment. If you like, you can also manually iterate over the dataset and retrieve batches of images:" + "You will pass these datasets to the Keras `Model.fit` method for training later in this tutorial. If you like, you can also manually iterate over the dataset and retrieve batches of images:" ] }, { @@ -415,7 +419,7 @@ "source": [ "## Configure the dataset for performance\n", "\n", - "Let's make sure to use buffered prefetching so you can yield data from disk without having I/O become blocking. These are two important methods you should use when loading data:\n", + "Make sure to use buffered prefetching, so you can yield data from disk without having I/O become blocking. These are two important methods you should use when loading data:\n", "\n", "- `Dataset.cache` keeps the images in memory after they're loaded off disk during the first epoch. This will ensure the dataset does not become a bottleneck while training your model. If your dataset is too large to fit into memory, you can also use this method to create a performant on-disk cache.\n", "- `Dataset.prefetch` overlaps data preprocessing and model execution while training.\n", @@ -489,7 +493,7 @@ "image_batch, labels_batch = next(iter(normalized_ds))\n", "first_image = image_batch[0]\n", "# Notice the pixel values are now in `[0,1]`.\n", - "print(np.min(first_image), np.max(first_image)) " + "print(np.min(first_image), np.max(first_image))" ] }, { @@ -498,7 +502,7 @@ "id": "XWEOmRSBJ9J8" }, "source": [ - "Or, you can include the layer inside your model definition, which can simplify deployment. Let's use the second approach here." + "Or, you can include the layer inside your model definition, which can simplify deployment. Use the second approach here." ] }, { @@ -516,9 +520,11 @@ "id": "WcUTyDOPKucd" }, "source": [ - "# Create the model\n", + "## A basic Keras model\n", "\n", - "The [Sequential](https://www.tensorflow.org/guide/keras/sequential_model) model consists of three convolution blocks (`tf.keras.layers.Conv2D`) with a max pooling layer (`tf.keras.layers.MaxPooling2D`) in each of them. There's a fully-connected layer (`tf.keras.layers.Dense`) with 128 units on top of it that is activated by a ReLU activation function (`'relu'`). This model has not been tuned for high accuracy—the goal of this tutorial is to show a standard approach." + "### Create the model\n", + "\n", + "The Keras [Sequential](https://www.tensorflow.org/guide/keras/sequential_model) model consists of three convolution blocks (`tf.keras.layers.Conv2D`) with a max pooling layer (`tf.keras.layers.MaxPooling2D`) in each of them. There's a fully-connected layer (`tf.keras.layers.Dense`) with 128 units on top of it that is activated by a ReLU activation function (`'relu'`). This model has not been tuned for high accuracy; the goal of this tutorial is to show a standard approach." ] }, { @@ -551,7 +557,7 @@ "id": "EaKFzz72Lqpg" }, "source": [ - "## Compile the model\n", + "### Compile the model\n", "\n", "For this tutorial, choose the `tf.keras.optimizers.Adam` optimizer and `tf.keras.losses.SparseCategoricalCrossentropy` loss function. To view training and validation accuracy for each training epoch, pass the `metrics` argument to `Model.compile`." ] @@ -575,9 +581,9 @@ "id": "aMJ4DnuJL55A" }, "source": [ - "## Model summary\n", + "### Model summary\n", "\n", - "View all the layers of the network using the model's `Model.summary` method:" + "View all the layers of the network using the Keras `Model.summary` method:" ] }, { @@ -597,7 +603,16 @@ "id": "NiYHcbvaL9H-" }, "source": [ - "## Train the model" + "### Train the model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j30F69T4sIVN" + }, + "source": [ + "Train the model for 10 epochs with the Keras `Model.fit` method:" ] }, { @@ -631,7 +646,7 @@ "id": "dFvOvmAmMK9w" }, "source": [ - "Create plots of loss and accuracy on the training and validation sets:" + "Create plots of the loss and accuracy on the training and validation sets:" ] }, { @@ -673,7 +688,7 @@ "source": [ "The plots show that training accuracy and validation accuracy are off by large margins, and the model has achieved only around 60% accuracy on the validation set.\n", "\n", - "Let's inspect what went wrong and try to increase the overall performance of the model." + "The following tutorial sections show how to inspect what went wrong and try to increase the overall performance of the model." ] }, { @@ -695,7 +710,7 @@ "\n", "When there are a small number of training examples, the model sometimes learns from noises or unwanted details from training examples—to an extent that it negatively impacts the performance of the model on new examples. This phenomenon is known as overfitting. It means that the model will have a difficult time generalizing on a new dataset.\n", "\n", - "There are multiple ways to fight overfitting in the training process. In this tutorial, you'll use *data augmentation* and add *Dropout* to your model." + "There are multiple ways to fight overfitting in the training process. In this tutorial, you'll use *data augmentation* and add *dropout* to your model." ] }, { @@ -744,7 +759,7 @@ "id": "PN4k1dK3S6eV" }, "source": [ - "Let's visualize what a few augmented examples look like by applying data augmentation to the same image several times:" + "Visualize a few augmented examples by applying data augmentation to the same image several times:" ] }, { @@ -770,7 +785,7 @@ "id": "tsjXCBLYYNs5" }, "source": [ - "You will use data augmentation to train a model in a moment." + "You will add data augmentation to your model before training in the next step." ] }, { @@ -781,11 +796,11 @@ "source": [ "## Dropout\n", "\n", - "Another technique to reduce overfitting is to introduce [dropout](https://developers.google.com/machine-learning/glossary#dropout_regularization) regularization to the network.\n", + "Another technique to reduce overfitting is to introduce [dropout](https://developers.google.com/machine-learning/glossary#dropout_regularization){:.external} regularization to the network.\n", "\n", "When you apply dropout to a layer, it randomly drops out (by setting the activation to zero) a number of output units from the layer during the training process. Dropout takes a fractional number as its input value, in the form such as 0.1, 0.2, 0.4, etc. This means dropping out 10%, 20% or 40% of the output units randomly from the applied layer.\n", "\n", - "Let's create a new neural network with `tf.keras.layers.Dropout` before training it using the augmented images:" + "Create a new neural network with `tf.keras.layers.Dropout` before training it using the augmented images:" ] }, { @@ -808,7 +823,7 @@ " layers.Dropout(0.2),\n", " layers.Flatten(),\n", " layers.Dense(128, activation='relu'),\n", - " layers.Dense(num_classes)\n", + " layers.Dense(num_classes, name=\"outputs\")\n", "])" ] }, @@ -918,7 +933,7 @@ "id": "10buWpJbcCQz" }, "source": [ - "Finally, let's use our model to classify an image that wasn't included in the training or validation sets." + "Use your model to classify an image that wasn't included in the training or validation sets." ] }, { @@ -955,6 +970,177 @@ " .format(class_names[np.argmax(score)], 100 * np.max(score))\n", ")" ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aOc3PZ2N2r18" + }, + "source": [ + "## Use TensorFlow Lite\n", + "\n", + "TensorFlow Lite is a set of tools that enables on-device machine learning by helping developers run their models on mobile, embedded, and edge devices." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cThu25rh4LPP" + }, + "source": [ + "### Convert the Keras Sequential model to a TensorFlow Lite model\n", + "\n", + "To use the trained model with on-device applications, first [convert it](https://www.tensorflow.org/lite/models/convert) to a smaller and more efficient model format called a [TensorFlow Lite](https://www.tensorflow.org/lite/) model.\n", + "\n", + "In this example, take the trained Keras Sequential model and use `tf.lite.TFLiteConverter.from_keras_model` to generate a [TensorFlow Lite](https://www.tensorflow.org/lite/) model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mXo6ftuL2ufx" + }, + "outputs": [], + "source": [ + "# Convert the model.\n", + "converter = tf.lite.TFLiteConverter.from_keras_model(model)\n", + "tflite_model = converter.convert()\n", + "\n", + "# Save the model.\n", + "with open('model.tflite', 'wb') as f:\n", + " f.write(tflite_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4R26OU4gGKhh" + }, + "source": [ + "The TensorFlow Lite model you saved in the previous step can contain several function signatures. The Keras model converter API uses the default signature automatically. Learn more about [TensorFlow Lite signatures](https://www.tensorflow.org/lite/guide/signatures)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7fjQfXaV2l-5" + }, + "source": [ + "### Run the TensorFlow Lite model\n", + "\n", + "You can access the TensorFlow Lite saved model signatures in Python via the `tf.lite.Interpreter` class.\n", + "\n", + "Load the model with the `Interpreter`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cHYcip_FOaHq" + }, + "outputs": [], + "source": [ + "TF_MODEL_FILE_PATH = 'model.tflite' # The default path to the saved TensorFlow Lite model\n", + "\n", + "interpreter = tf.lite.Interpreter(model_path=TF_MODEL_FILE_PATH)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nPUXY6BdHDHo" + }, + "source": [ + "Print the signatures from the converted model to obtain the names of the inputs (and outputs):\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZdDl00E2OaHq" + }, + "outputs": [], + "source": [ + "interpreter.get_signature_list()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4eVFqT0je3YG" + }, + "source": [ + "In this example, you have one default signature called `serving_default`. In addition, the name of the `'inputs'` is `'sequential_1_input'`, while the `'outputs'` are called `'outputs'`. You can look up these first and last Keras layer names when running `Model.summary`, as demonstrated earlier in this tutorial.\n", + "\n", + "Now you can test the loaded TensorFlow Model by performing inference on a sample image with `tf.lite.Interpreter.get_signature_runner` by passing the signature name as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yFoT_7W_OaHq" + }, + "outputs": [], + "source": [ + "classify_lite = interpreter.get_signature_runner('serving_default')\n", + "classify_lite" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b1mfRcBOnEx0" + }, + "source": [ + "Similar to what you did earlier in the tutorial, you can use the TensorFlow Lite model to classify images that weren't included in the training or validation sets.\n", + "\n", + "You have already tensorized that image and saved it as `img_array`. Now, pass it to the first argument (the name of the `'inputs'`) of the loaded TensorFlow Lite model (`predictions_lite`), compute softmax activations, and then print the prediction for the class with the highest computed probability." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sEqR27YcnFvc" + }, + "outputs": [], + "source": [ + "predictions_lite = classify_lite(sequential_1_input=img_array)['outputs']\n", + "score_lite = tf.nn.softmax(predictions_lite)\n", + "\n", + "assert np.allclose(predictions, predictions_lite)\n", + "\n", + "print(\n", + " \"This image most likely belongs to {} with a {:.2f} percent confidence.\"\n", + " .format(class_names[np.argmax(score_lite)], 100 * np.max(score_lite))\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5hJzY8XijM7N" + }, + "source": [ + "Of the five classes—`'daisy'`, `'dandelion'`, `'roses'`, `'sunflowers'`, and `'tulips'`—the model should predict the image belongs to sunflowers, which is the same result as before the TensorFlow Lite conversion.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1RlfCY9v2_ir" + }, + "source": [ + "## Next steps\n", + "\n", + "This tutorial showed how to train a model for image classification, test it, convert it to the TensorFlow Lite format for on-device applications (such as an image classification app), and perform inference with the TensorFlow Lite model with the Python API.\n", + "\n", + "You can learn more about TensorFlow Lite through [tutorials](https://www.tensorflow.org/lite/tutorials) and [guides](https://www.tensorflow.org/lite/guide)." + ] } ], "metadata": { From 74eb0587a8933433f8de591116098337f82f0c41 Mon Sep 17 00:00:00 2001 From: artemboiko1 <108079274+artemboiko1@users.noreply.github.com> Date: Sat, 13 Aug 2022 21:29:54 -0400 Subject: [PATCH 019/636] Update imbalanced_data.ipynb remove unneeded symbol --- site/en/tutorials/structured_data/imbalanced_data.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/en/tutorials/structured_data/imbalanced_data.ipynb b/site/en/tutorials/structured_data/imbalanced_data.ipynb index 4d7e0f22cab..e51e0fee5b4 100644 --- a/site/en/tutorials/structured_data/imbalanced_data.ipynb +++ b/site/en/tutorials/structured_data/imbalanced_data.ipynb @@ -814,7 +814,7 @@ " else:\n", " plt.ylim([0,1])\n", "\n", - " plt.legend();" + " plt.legend()" ] }, { From 19aaa6932b44643db28549ef50b8b813fddb42a6 Mon Sep 17 00:00:00 2001 From: Mark McDonald Date: Sun, 14 Aug 2022 22:21:15 -0700 Subject: [PATCH 020/636] Helper for copying cells between notebooks. Basic tool for copying notebook code cells from one book to another. It copies code cells using matching cell IDs, but does not yet support inserting, deleting or reordering cells - these will need to be done by a human. It will also run `nbfmt` to make sure the notebook has the correct whitespace, etc. Users can pass `--oss` if operating directly on github files. Tested with [en/guide/distributed_training.ipynb](https://github.com/tensorflow/docs/blob/master/site/en/guide/distributed_training.ipynb) and [zh_cn/guide/distributed_training.ipynb](https://github.com/tensorflow/docs-l10n/blob/master/site/zh-cn/guide/distributed_training.ipynb). PiperOrigin-RevId: 467593397 --- tools/nbcp/__init__.py | 14 ++ tools/nbcp/__main__.py | 92 ++++++++++ .../tools/nbfmt/notebook_utils.py | 65 ++++++- .../tools/nbfmt/notebook_utils_test.py | 169 ++++++++++++++++++ 4 files changed, 338 insertions(+), 2 deletions(-) create mode 100644 tools/nbcp/__init__.py create mode 100644 tools/nbcp/__main__.py create mode 100644 tools/tensorflow_docs/tools/nbfmt/notebook_utils_test.py diff --git a/tools/nbcp/__init__.py b/tools/nbcp/__init__.py new file mode 100644 index 00000000000..78cb171abba --- /dev/null +++ b/tools/nbcp/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== diff --git a/tools/nbcp/__main__.py b/tools/nbcp/__main__.py new file mode 100644 index 00000000000..c85b22129d1 --- /dev/null +++ b/tools/nbcp/__main__.py @@ -0,0 +1,92 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utility for copying cells between notebooks.""" +import pathlib +import sys +import textwrap + +from absl import app +import nbformat + +from tensorflow_docs.tools.nbfmt import __main__ as nbfmt +from tensorflow_docs.tools.nbfmt import notebook_utils + + +def process_stats(stats: notebook_utils.CellCopyStats) -> bool: + """Displays summary stats to the user. Returns True if any warnings.""" + print( + textwrap.dedent(f""" + Notebook copy complete. + - Total code cells processed: {stats.processed_cells} + - Cells updated: {stats.updated_cells} + """)) + + has_warnings = any(( + stats.unmatched_target_cells, + stats.unmatched_source_cells, + stats.out_of_order_target_cells, + )) + if has_warnings: + print('Warnings:') + + if stats.unmatched_target_cells: + notebook_utils.warn( + '- Cells in source notebook that are not in the destination: ' + f'{" ".join(stats.unmatched_target_cells)}') + + if stats.unmatched_source_cells: + notebook_utils.warn( + '- Cells in destination notebook that are not in the source: ' + f'{" ".join(stats.unmatched_source_cells)}') + + if stats.out_of_order_target_cells: + notebook_utils.warn( + '- Cells found earlier in destination notebook than source: ' + f'{" ".join(stats.out_of_order_target_cells)}') + + print() + + return has_warnings + + +def main(args: list[str]) -> int: + if len(args) != 3: + notebook_utils.warn('nbcp requires 2 notebooks as arguments:') + notebook_utils.warn(' $ ...nbcp src_notebook.ipynb dest_notebook.ipynb' + ' [--nbfmt --args --supported]') + sys.exit(1) + + src = pathlib.Path(args[1]) + dest = pathlib.Path(args[2]) + + # Open files and copy cells. + with src.open('rt') as src_fh, dest.open('rt') as dest_fh: + dest_nb = nbformat.read(dest_fh, nbformat.NO_CONVERT) + stats = notebook_utils.copy_code_cells( + nbformat.read(src_fh, nbformat.NO_CONVERT), dest_nb) + + # Write over destination file. + with dest.open('wt') as dest_fh: + nbformat.write(dest_nb, dest_fh) + + warnings = process_stats(stats) + + # Format the notebook. + nbfmt.main(['', str(dest)]) + + return int(warnings) + + +if __name__ == '__main__': + app.run(main) diff --git a/tools/tensorflow_docs/tools/nbfmt/notebook_utils.py b/tools/tensorflow_docs/tools/nbfmt/notebook_utils.py index 577f2e1e18e..6e5e8a36553 100644 --- a/tools/tensorflow_docs/tools/nbfmt/notebook_utils.py +++ b/tools/tensorflow_docs/tools/nbfmt/notebook_utils.py @@ -12,15 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -r"""A collection of utilties for working with notebook files.""" -import json +"""A collection of utilities for working with notebook files.""" +import dataclasses import hashlib +import json +import logging import pathlib import sys import textwrap from typing import Any, Dict, List, Optional, Tuple, Union +from nbformat import notebooknode + def collect_notebook_paths( filepaths: List[Union[str, pathlib.Path]] @@ -109,3 +113,60 @@ def del_entries_except(data: Dict[str, Any], keep: List[str]) -> None: to_delete = set(data.keys()) - frozenset(keep) for key in to_delete: del data[key] + + +@dataclasses.dataclass +class CellCopyStats: + processed_cells: int = 0 + updated_cells: int = 0 + unmatched_target_cells: list[str] = dataclasses.field(default_factory=list) + unmatched_source_cells: list[str] = dataclasses.field(default_factory=list) + out_of_order_target_cells: list[str] = dataclasses.field(default_factory=list) + + +def copy_code_cells(source: notebooknode.NotebookNode, + target: notebooknode.NotebookNode) -> CellCopyStats: + """Copies code cell source and outputs from source to target.""" + stats = CellCopyStats() + if len(source.cells) != len(target.cells): + logging.warning('Source and target notebook have unequal cell counts.') + + target_indices = {c['metadata']['id']: i for i, c in enumerate(target.cells)} + + last_target_idx = -1 + for cell in source.cells: + cell_id = cell['metadata']['id'] + + if cell.get('cell_type') != 'code': + target_indices.pop(cell_id, None) + continue + + if cell_id not in target_indices: + logging.warning('Cell %s is not present in the target notebook.', cell_id) + stats.unmatched_target_cells.append(cell_id) + continue + + stats.processed_cells += 1 + + if last_target_idx > (target_idx := target_indices.pop(cell_id)): + logging.warning( + 'Cell %s has been moved earlier in the notebook than expected.', + cell_id) + stats.out_of_order_target_cells.append(cell_id) + + target_cell = target.cells[target_idx] + modified = False + for field in 'source', 'outputs': + new_value = cell.get(field) + if target_cell.get(field) != new_value: + target_cell[field] = new_value + modified = True + + stats.updated_cells += modified + last_target_idx = target_idx + + stats.unmatched_source_cells = [ + c for c, i in target_indices.items() + if target.cells[i].get('cell_type') == 'code' + ] + return stats diff --git a/tools/tensorflow_docs/tools/nbfmt/notebook_utils_test.py b/tools/tensorflow_docs/tools/nbfmt/notebook_utils_test.py new file mode 100644 index 00000000000..4866dcb21b8 --- /dev/null +++ b/tools/tensorflow_docs/tools/nbfmt/notebook_utils_test.py @@ -0,0 +1,169 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit tests for notebook_utils.""" + +from absl.testing import absltest +from nbformat import notebooknode + +from tensorflow_docs.tools.nbfmt import notebook_utils + + +class NotebookUtilsTest(absltest.TestCase): + + def test_copy_code_cells(self): + source_notebook = notebooknode.NotebookNode({ + "cells": [{ + "cell_type": "markdown", + "metadata": { + "id": "id1" + }, + "source": ["## Some text"] + }, { + "cell_type": "code", + "metadata": { + "id": "id2" + }, + "source": ["# some python\n", "print('hi')"] + }] + }) + target_notebook = notebooknode.NotebookNode({ + "cells": [{ + "cell_type": "markdown", + "metadata": { + "id": "id1" + }, + "source": ["## Different text"] + }, { + "cell_type": "code", + "metadata": { + "id": "id2" + }, + "source": ["# some old python\n", "print 'hi'"] + }] + }) + + stat = notebook_utils.copy_code_cells(source_notebook, target_notebook) + + # Ensure we have the expected contents (markdown untouched, code copied) + self.assertIn("## Different text", target_notebook.cells[0]["source"]) + self.assertIn("print('hi')", target_notebook.cells[1]["source"]) + + # Ensure only the code cell was updated + self.assertEqual(1, stat.updated_cells) + self.assertEqual(1, stat.processed_cells) + + def test_missing_target_cell(self): + source_notebook = notebooknode.NotebookNode({ + "cells": [{ + "cell_type": "code", + "metadata": { + "id": "cell1" + }, + "source": ["# some python\n", "print('hi')"] + }, { + "cell_type": "markdown", + "metadata": { + "id": "md1" + }, + "source": ["## text"] + }] + }) + target_notebook = notebooknode.NotebookNode({ + "cells": [{ + "cell_type": "code", + "metadata": { + "id": "cell2" + }, + "source": ["# some old python\n", "print 'hi'"] + }] + }) + + stat = notebook_utils.copy_code_cells(source_notebook, target_notebook) + + self.assertEqual(0, stat.updated_cells) + self.assertEqual(0, stat.processed_cells) + self.assertEqual(["cell1"], stat.unmatched_target_cells) + + def test_missing_source_cell(self): + source_notebook = notebooknode.NotebookNode({ + "cells": [{ + "cell_type": "code", + "metadata": { + "id": "cell1" + }, + "source": ["# some python\n", "print('hi')"] + }] + }) + target_notebook = notebooknode.NotebookNode({ + "cells": [{ + "cell_type": "code", + "metadata": { + "id": "cell2" + }, + "source": ["# some old python\n", "print 'hi'"] + }, { + "cell_type": "markdown", + "metadata": { + "id": "text1" + }, + "source": ["## texty texty"] + }] + }) + + stat = notebook_utils.copy_code_cells(source_notebook, target_notebook) + + self.assertEqual(0, stat.updated_cells) + self.assertEqual(0, stat.processed_cells) + self.assertEqual(["cell2"], stat.unmatched_source_cells) + + def test_cell_ordering(self): + source_notebook = notebooknode.NotebookNode({ + "cells": [{ + "cell_type": "code", + "metadata": { + "id": "cell1" + }, + "source": ["# first code\n"] + }, { + "cell_type": "code", + "metadata": { + "id": "cell2" + }, + "source": ["# second code\n"] + }] + }) + target_notebook = notebooknode.NotebookNode({ + "cells": [{ + "cell_type": "code", + "metadata": { + "id": "cell2" + }, + "source": ["# update me\n"] + }, { + "cell_type": "code", + "metadata": { + "id": "cell1" + }, + "source": ["# update me\n"] + }] + }) + + stat = notebook_utils.copy_code_cells(source_notebook, target_notebook) + + self.assertEqual(2, stat.updated_cells) + self.assertIn("cell2", stat.out_of_order_target_cells) + + +if __name__ == "__main__": + absltest.main() From e513e01d0f202f697b9e27209f6078e888417e0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Ball=C3=A9?= Date: Mon, 15 Aug 2022 09:26:34 -0700 Subject: [PATCH 021/636] Adds model compression tutorial. PiperOrigin-RevId: 467687576 --- site/en/tutorials/_toc.yaml | 12 + .../tutorials/optimization/compression.ipynb | 1183 +++++++++++++++++ 2 files changed, 1195 insertions(+) create mode 100644 site/en/tutorials/optimization/compression.ipynb diff --git a/site/en/tutorials/_toc.yaml b/site/en/tutorials/_toc.yaml index 7c629d69dea..188a0975aa0 100644 --- a/site/en/tutorials/_toc.yaml +++ b/site/en/tutorials/_toc.yaml @@ -135,6 +135,7 @@ toc: status: external - title: "Image captioning" path: /tutorials/text/image_captioning + - title: "Audio" style: accordion section: @@ -186,6 +187,16 @@ toc: path: /tutorials/generative/data_compression status: new +- title: "Model optimization" + style: accordion + section: + - title: "Scalable model compression with EPR" + path: /tutorials/optimization/compression + status: new + - title: "TensorFlow model optimization" + status: external + path: /model_optimization + - title: "Model Understanding" style: accordion section: @@ -196,6 +207,7 @@ toc: - title: "Probabilistic regression" path: /probability/examples/Probabilistic_Layers_Regression status: external + - title: "Reinforcement learning" style: accordion section: diff --git a/site/en/tutorials/optimization/compression.ipynb b/site/en/tutorials/optimization/compression.ipynb new file mode 100644 index 00000000000..75b5517aa57 --- /dev/null +++ b/site/en/tutorials/optimization/compression.ipynb @@ -0,0 +1,1183 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Tce3stUlHN0L" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Compression Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "tuOe1ymfHZPu" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qFdPvlXBOdUN" + }, + "source": [ + "# Scalable model compression" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " View on TensorFlow.org\n", + " \n", + " \n", + " \n", + " Run in Google Colab\n", + " \n", + " \n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xHxb-dlhMIzW" + }, + "source": [ + "## Overview\n", + "\n", + "This notebook shows how to compress a model using [TensorFlow Compression](https://github.com/tensorflow/compression).\n", + "\n", + "In the example below, we compress the weights of an MNIST classifier to a much smaller size than their floating point representation, while retaining classification accuracy. This is done by a two step process, based on the paper [Scalable Model Compression by Entropy Penalized Reparameterization](https://arxiv.org/abs/1906.06624):\n", + "\n", + "- Training a \"compressible\" model with an explicit **entropy penalty** during training, which encourages compressibility of the model parameters. The weight on this penalty, $\\lambda$, enables continuously controlling the trade-off between the compressed model size and its accuracy.\n", + "\n", + "- Encoding the compressible model into a compressed model using a coding scheme that is matched with the penalty, meaning that the penalty is a good predictor for model size. This ensures that the method doesn't require multiple iterations of training, compressing, and re-training the model for fine-tuning.\n", + "\n", + "This method is strictly concerned with compressed model size, not with computational complexity. It can be combined with a technique like model pruning to reduce size and complexity.\n", + "\n", + "Example compression results on various models:\n", + "\n", + "Model (dataset) | Model size | Comp. ratio | Top-1 error comp. (uncomp.)\n", + "------------------------|------------|-------------|------------\n", + "LeNet300-100 (MNIST) | 8.56 KB | 124x | 1.9% (1.6%)\n", + "LeNet5-Caffe (MNIST) | 2.84 KB | 606x | 1.0% (0.7%)\n", + "VGG-16 (CIFAR-10) | 101 KB | 590x | 10.0% (6.6%)\n", + "ResNet-20-4 (CIFAR-10) | 128 KB | 134x | 8.8% (5.0%)\n", + "ResNet-18 (ImageNet) | 1.97 MB | 24x | 30.0% (30.0%)\n", + "ResNet-50 (ImageNet) | 5.49 MB | 19x | 26.0% (25.0%)\n", + "\n", + "Applications include:\n", + "- Deploying/broadcasting models to edge devices on a large scale, saving bandwidth in transit.\n", + "- Communicating global model state to clients in federated learning. The model architecture (number of hidden units, etc.) is unchanged from the initial model, and clients can continue learning on the decompressed model.\n", + "- Performing inference on extremely memory limited clients. During inference, the weights of each layer can be sequentially decompressed, and discarded right after the activations are computed." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MUXex9ctTuDB" + }, + "source": [ + "## Setup\n", + "\n", + "Install Tensorflow Compression via `pip`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K489KsEgxuLI" + }, + "outputs": [], + "source": [ + "!pip install tensorflow-compression~=$(pip show tensorflow | perl -p -0777 -e 's/.*Version: (\\d\\.\\d).*/\\1.0/sg')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WfVAmHCVxpTS" + }, + "source": [ + "Import library dependencies." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IqR2PQG4ZaZ0" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import tensorflow as tf\n", + "import tensorflow_compression as tfc\n", + "import tensorflow_datasets as tfds\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wsncKT2iymgQ" + }, + "source": [ + "## Define and train a basic MNIST classifier\n", + "\n", + "In order to effectively compress dense and convolutional layers, we need to define custom layer classes. These are analogous to the layers under `tf.keras.layers`, but we will subclass them later to effectively implement Entropy Penalized Reparameterization (EPR). For this purpose, we also add a copy constructor.\n", + "\n", + "First, we define a standard dense layer:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "n_7ZRqiaO1WQ" + }, + "outputs": [], + "source": [ + "class CustomDense(tf.keras.layers.Layer):\n", + "\n", + " def __init__(self, filters, name=\"dense\"):\n", + " super().__init__(name=name)\n", + " self.filters = filters\n", + "\n", + " @classmethod\n", + " def copy(cls, other, **kwargs):\n", + " \"\"\"Returns an instantiated and built layer, initialized from `other`.\"\"\"\n", + " self = cls(filters=other.filters, name=other.name, **kwargs)\n", + " self.build(None, other=other)\n", + " return self\n", + "\n", + " def build(self, input_shape, other=None):\n", + " \"\"\"Instantiates weights, optionally initializing them from `other`.\"\"\"\n", + " if other is None:\n", + " kernel_shape = (input_shape[-1], self.filters)\n", + " kernel = tf.keras.initializers.GlorotUniform()(shape=kernel_shape)\n", + " bias = tf.keras.initializers.Zeros()(shape=(self.filters,))\n", + " else:\n", + " kernel, bias = other.kernel, other.bias\n", + " self.kernel = tf.Variable(\n", + " tf.cast(kernel, self.variable_dtype), name=\"kernel\")\n", + " self.bias = tf.Variable(\n", + " tf.cast(bias, self.variable_dtype), name=\"bias\")\n", + " self.built = True\n", + "\n", + " def call(self, inputs):\n", + " outputs = tf.linalg.matvec(self.kernel, inputs, transpose_a=True)\n", + " outputs = tf.nn.bias_add(outputs, self.bias)\n", + " return tf.nn.leaky_relu(outputs)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RUZkcXegc0yR" + }, + "source": [ + "And similarly, a 2D convolutional layer:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RDibtb8EWCSj" + }, + "outputs": [], + "source": [ + "class CustomConv2D(tf.keras.layers.Layer):\n", + "\n", + " def __init__(self, filters, kernel_size,\n", + " strides=1, padding=\"SAME\", name=\"conv2d\"):\n", + " super().__init__(name=name)\n", + " self.filters = filters\n", + " self.kernel_size = kernel_size\n", + " self.strides = strides\n", + " self.padding = padding\n", + "\n", + " @classmethod\n", + " def copy(cls, other, **kwargs):\n", + " \"\"\"Returns an instantiated and built layer, initialized from `other`.\"\"\"\n", + " self = cls(filters=other.filters, kernel_size=other.kernel_size,\n", + " strides=other.strides, padding=other.padding, name=other.name,\n", + " **kwargs)\n", + " self.build(None, other=other)\n", + " return self\n", + "\n", + " def build(self, input_shape, other=None):\n", + " \"\"\"Instantiates weights, optionally initializing them from `other`.\"\"\"\n", + " if other is None:\n", + " kernel_shape = 2 * (self.kernel_size,) + (input_shape[-1], self.filters)\n", + " kernel = tf.keras.initializers.GlorotUniform()(shape=kernel_shape)\n", + " bias = tf.keras.initializers.Zeros()(shape=(self.filters,))\n", + " else:\n", + " kernel, bias = other.kernel, other.bias\n", + " self.kernel = tf.Variable(\n", + " tf.cast(kernel, self.variable_dtype), name=\"kernel\")\n", + " self.bias = tf.Variable(\n", + " tf.cast(bias, self.variable_dtype), name=\"bias\")\n", + " self.built = True\n", + "\n", + " def call(self, inputs):\n", + " outputs = tf.nn.convolution(\n", + " inputs, self.kernel, strides=self.strides, padding=self.padding)\n", + " outputs = tf.nn.bias_add(outputs, self.bias)\n", + " return tf.nn.leaky_relu(outputs)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6xWa1hHMdCpG" + }, + "source": [ + "Before we continue with model compression, let's check that we can successfully train a regular classifier.\n", + "\n", + "Define the model architecture:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8yZESLgW-vp1" + }, + "outputs": [], + "source": [ + "classifier = tf.keras.Sequential([\n", + " CustomConv2D(20, 5, strides=2, name=\"conv_1\"),\n", + " CustomConv2D(50, 5, strides=2, name=\"conv_2\"),\n", + " tf.keras.layers.Flatten(),\n", + " CustomDense(500, name=\"fc_1\"),\n", + " CustomDense(10, name=\"fc_2\"),\n", + "], name=\"classifier\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9iRSvt_CdUuY" + }, + "source": [ + "Load the training data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "L4bsA3HFF2k0" + }, + "outputs": [], + "source": [ + "def normalize_img(image, label):\n", + " \"\"\"Normalizes images: `uint8` -> `float32`.\"\"\"\n", + " return tf.cast(image, tf.float32) / 255., label\n", + "\n", + "training_dataset, validation_dataset = tfds.load(\n", + " \"mnist\",\n", + " split=[\"train\", \"test\"],\n", + " shuffle_files=True,\n", + " as_supervised=True,\n", + " with_info=False,\n", + ")\n", + "training_dataset = training_dataset.map(normalize_img)\n", + "validation_dataset = validation_dataset.map(normalize_img)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rR9WYjt_daRG" + }, + "source": [ + "Finally, train the model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ROn2DbzsBirI" + }, + "outputs": [], + "source": [ + "def train_model(model, training_data, validation_data, **kwargs):\n", + " model.compile(\n", + " optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),\n", + " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + " metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],\n", + " # Uncomment this to ease debugging:\n", + " # run_eagerly=True,\n", + " )\n", + " kwargs.setdefault(\"epochs\", 5)\n", + " kwargs.setdefault(\"verbose\", 1)\n", + " log = model.fit(\n", + " training_data.batch(128).prefetch(8),\n", + " validation_data=validation_data.batch(128).cache(),\n", + " validation_freq=1,\n", + " **kwargs,\n", + " )\n", + " return log.history[\"val_sparse_categorical_accuracy\"][-1]\n", + "\n", + "classifier_accuracy = train_model(\n", + " classifier, training_dataset, validation_dataset)\n", + "\n", + "print(f\"Accuracy: {classifier_accuracy:0.4f}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QupWKZ91di-y" + }, + "source": [ + "Success! The model trained fine, and reached an accuracy of over 98% on the validation set within 5 epochs." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yRqZFwb5dqQm" + }, + "source": [ + "## Train a compressible classifier\n", + "\n", + "Entropy Penalized Reparameterization (EPR) has two main ingredients:\n", + "\n", + "- Applying a **penalty** to the model weights during training which corresponds to their entropy under a probabilistic model, which is matched with the encoding scheme of the weights. Below, we define a Keras `Regularizer` which implements this penalty.\n", + "\n", + "- **Reparameterizing** the weights, i.e. bringing them into a latent representation which is more compressible (yields a better trade-off between compressibility and model performance). For convolutional kernels, [it has been shown](https://arxiv.org/abs/1906.06624) that the Fourier domain is a good representation. For other parameters, the below example simply uses scalar quantization (rounding) with a varying quantization step size." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e4jmnqEmO6eB" + }, + "source": [ + "First, define the penalty.\n", + "\n", + "The example below uses a code/probabilistic model implemented in the `tfc.PowerLawEntropyModel` class, inspired by the paper [Optimizing the Communication-Accuracy Trade-off in Federated Learning with Rate-Distortion Theory](https://arxiv.org/abs/2201.02664). The penalty is defined as:\n", + "$$ \\log \\Bigl(\\frac {|x| + \\alpha} \\alpha\\Bigr), $$\n", + "where $x$ is one element of the model parameter or its latent representation, and $\\alpha$ is a small constant for numerical stability around values of 0." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hh57nxjuwocc" + }, + "outputs": [], + "source": [ + "_ = tf.linspace(-5., 5., 501)\n", + "plt.plot(_, tfc.PowerLawEntropyModel(0).penalty(_));\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Gr3-6vLrwo-H" + }, + "source": [ + "The penalty is effectively a regularization loss (sometimes called \"weight loss\"). The fact that it is concave with a cusp at zero encourages weight sparsity. The coding scheme applied for compressing the weights, an [Elias gamma code](https://en.wikipedia.org/wiki/Elias_gamma_coding), produces codes of length $ 1 + \\lfloor \\log_2 |x| \\rfloor $ bits for the magnitude of the element. That is, it is matched to the penalty, and applying the penalty thus minimizes the expected code length." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H1Yt6e1ub6pU" + }, + "outputs": [], + "source": [ + "class PowerLawRegularizer(tf.keras.regularizers.Regularizer):\n", + "\n", + " def __init__(self, lmbda):\n", + " super().__init__()\n", + " self.lmbda = lmbda\n", + "\n", + " def __call__(self, variable):\n", + " em = tfc.PowerLawEntropyModel(coding_rank=variable.shape.rank)\n", + " return self.lmbda * em.penalty(variable)\n", + "\n", + "# Normalizing the weight of the penalty by the number of model parameters is a\n", + "# good rule of thumb to produce comparable results across models.\n", + "regularizer = PowerLawRegularizer(lmbda=2./classifier.count_params())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kyQc35QTf8Aq" + }, + "source": [ + "Second, define subclasses of `CustomDense` and `CustomConv2D` which have the following additional functionality:\n", + "\n", + "- They take an instance of the above regularizer and apply it to the kernels and biases during training.\n", + "- They define kernel and bias as a `@property`, which perform quantization with straight-through gradients whenever the variables are accessed. This accurately reflects the computation that is carried out later in the compressed model.\n", + "- They define additional `log_step` variables, which represent the logarithm of the quantization step size. The coarser the quantization, the smaller the model size, but the lower the accuracy. The quantization step sizes are trainable for each model parameter, so that performing optimization on the penalized loss function will determine what quantization step size is best.\n", + "\n", + "The quantization step is defined as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "60fMt3avgSFw" + }, + "outputs": [], + "source": [ + "def quantize(latent, log_step):\n", + " step = tf.exp(log_step)\n", + " return tfc.round_st(latent / step) * step\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "stKrchp7mB0b" + }, + "source": [ + "With that, we can define the dense layer:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ciz1F1WsXre_" + }, + "outputs": [], + "source": [ + "class CompressibleDense(CustomDense):\n", + "\n", + " def __init__(self, regularizer, *args, **kwargs):\n", + " super().__init__(*args, **kwargs)\n", + " self.regularizer = regularizer\n", + "\n", + " def build(self, input_shape, other=None):\n", + " \"\"\"Instantiates weights, optionally initializing them from `other`.\"\"\"\n", + " super().build(input_shape, other=other)\n", + " if other is not None and hasattr(other, \"kernel_log_step\"):\n", + " kernel_log_step = other.kernel_log_step\n", + " bias_log_step = other.bias_log_step\n", + " else:\n", + " kernel_log_step = bias_log_step = -4.\n", + " self.kernel_log_step = tf.Variable(\n", + " tf.cast(kernel_log_step, self.variable_dtype), name=\"kernel_log_step\")\n", + " self.bias_log_step = tf.Variable(\n", + " tf.cast(bias_log_step, self.variable_dtype), name=\"bias_log_step\")\n", + " self.add_loss(lambda: self.regularizer(\n", + " self.kernel_latent / tf.exp(self.kernel_log_step)))\n", + " self.add_loss(lambda: self.regularizer(\n", + " self.bias_latent / tf.exp(self.bias_log_step)))\n", + "\n", + " @property\n", + " def kernel(self):\n", + " return quantize(self.kernel_latent, self.kernel_log_step)\n", + "\n", + " @kernel.setter\n", + " def kernel(self, kernel):\n", + " self.kernel_latent = tf.Variable(kernel, name=\"kernel_latent\")\n", + "\n", + " @property\n", + " def bias(self):\n", + " return quantize(self.bias_latent, self.bias_log_step)\n", + "\n", + " @bias.setter\n", + " def bias(self, bias):\n", + " self.bias_latent = tf.Variable(bias, name=\"bias_latent\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CsykbQO0hxzW" + }, + "source": [ + "The convolutional layer is analogous. In addition, the convolution kernel is stored as its real-valued discrete Fourier transform (RDFT) whenever the kernel is set, and the transform is inverted whenever the kernel is used. Since the different frequency components of the kernel tend to be more or less compressible, each of them gets its own quantization step size assigned.\n", + "\n", + "Define the Fourier transform and its inverse as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rUFMKGHDguJS" + }, + "outputs": [], + "source": [ + "def to_rdft(kernel, kernel_size):\n", + " # The kernel has shape (H, W, I, O) -> transpose to take DFT over last two\n", + " # dimensions.\n", + " kernel = tf.transpose(kernel, (2, 3, 0, 1))\n", + " # The RDFT has type complex64 and shape (I, O, FH, FW).\n", + " kernel_rdft = tf.signal.rfft2d(kernel)\n", + " # Map real and imaginary parts into regular floats. The result is float32\n", + " # and has shape (I, O, FH, FW, 2).\n", + " kernel_rdft = tf.stack(\n", + " [tf.math.real(kernel_rdft), tf.math.imag(kernel_rdft)], axis=-1)\n", + " # Divide by kernel size to make the DFT orthonormal (length-preserving).\n", + " return kernel_rdft / kernel_size\n", + "\n", + "def from_rdft(kernel_rdft, kernel_size):\n", + " # Undoes the transformations in to_rdft.\n", + " kernel_rdft *= kernel_size\n", + " kernel_rdft = tf.dtypes.complex(*tf.unstack(kernel_rdft, axis=-1))\n", + " kernel = tf.signal.irfft2d(kernel_rdft, fft_length=2 * (kernel_size,))\n", + " return tf.transpose(kernel, (2, 3, 0, 1))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "esZZrJ5ImVDY" + }, + "source": [ + "With that, define the convolutional layer as:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YKzXBNCO7bjB" + }, + "outputs": [], + "source": [ + "class CompressibleConv2D(CustomConv2D):\n", + "\n", + " def __init__(self, regularizer, *args, **kwargs):\n", + " super().__init__(*args, **kwargs)\n", + " self.regularizer = regularizer\n", + "\n", + " def build(self, input_shape, other=None):\n", + " \"\"\"Instantiates weights, optionally initializing them from `other`.\"\"\"\n", + " super().build(input_shape, other=other)\n", + " if other is not None and hasattr(other, \"kernel_log_step\"):\n", + " kernel_log_step = other.kernel_log_step\n", + " bias_log_step = other.bias_log_step\n", + " else:\n", + " kernel_log_step = tf.fill(self.kernel_latent.shape[2:], -4.)\n", + " bias_log_step = -4.\n", + " self.kernel_log_step = tf.Variable(\n", + " tf.cast(kernel_log_step, self.variable_dtype), name=\"kernel_log_step\")\n", + " self.bias_log_step = tf.Variable(\n", + " tf.cast(bias_log_step, self.variable_dtype), name=\"bias_log_step\")\n", + " self.add_loss(lambda: self.regularizer(\n", + " self.kernel_latent / tf.exp(self.kernel_log_step)))\n", + " self.add_loss(lambda: self.regularizer(\n", + " self.bias_latent / tf.exp(self.bias_log_step)))\n", + "\n", + " @property\n", + " def kernel(self):\n", + " kernel_rdft = quantize(self.kernel_latent, self.kernel_log_step)\n", + " return from_rdft(kernel_rdft, self.kernel_size)\n", + "\n", + " @kernel.setter\n", + " def kernel(self, kernel):\n", + " kernel_rdft = to_rdft(kernel, self.kernel_size)\n", + " self.kernel_latent = tf.Variable(kernel_rdft, name=\"kernel_latent\")\n", + "\n", + " @property\n", + " def bias(self):\n", + " return quantize(self.bias_latent, self.bias_log_step)\n", + "\n", + " @bias.setter\n", + " def bias(self, bias):\n", + " self.bias_latent = tf.Variable(bias, name=\"bias_latent\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1-ekDDQ9jidI" + }, + "source": [ + "Define a classifier model with the same architecture as above, but using these modified layers:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TQgp84L7qalw" + }, + "outputs": [], + "source": [ + "def make_mnist_classifier(regularizer):\n", + " return tf.keras.Sequential([\n", + " CompressibleConv2D(regularizer, 20, 5, strides=2, name=\"conv_1\"),\n", + " CompressibleConv2D(regularizer, 50, 5, strides=2, name=\"conv_2\"),\n", + " tf.keras.layers.Flatten(),\n", + " CompressibleDense(regularizer, 500, name=\"fc_1\"),\n", + " CompressibleDense(regularizer, 10, name=\"fc_2\"),\n", + " ], name=\"classifier\")\n", + "\n", + "compressible_classifier = make_mnist_classifier(regularizer)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hJ-TMHE1kNFc" + }, + "source": [ + "And train the model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6L5ZJAX4EiXW" + }, + "outputs": [], + "source": [ + "penalized_accuracy = train_model(\n", + " compressible_classifier, training_dataset, validation_dataset)\n", + "\n", + "print(f\"Accuracy: {penalized_accuracy:0.4f}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZuE4NeY_kTDz" + }, + "source": [ + "The compressible model has reached a similar accuracy as the plain classifier.\n", + "\n", + "However, the model is not actually compressed yet. To do this, we define another set of subclasses which store the kernels and biases in their compressed form – as a sequence of bits." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AZhj8A2gnBkD" + }, + "source": [ + "## Compress the classifier\n", + "\n", + "The subclasses of `CustomDense` and `CustomConv2D` defined below convert the weights of a compressible dense layer into binary strings. In addition, they store the logarithm of the quantization step size at half precision to save space. Whenever the kernel or bias is accessed through the `@property`, they are decompressed from their string representation and dequantized.\n", + "\n", + "First, define functions to compress and decompress a model parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xS19FhDajeto" + }, + "outputs": [], + "source": [ + "def compress_latent(latent, log_step, name):\n", + " em = tfc.PowerLawEntropyModel(latent.shape.rank)\n", + " compressed = em.compress(latent / tf.exp(log_step))\n", + " compressed = tf.Variable(compressed, name=f\"{name}_compressed\")\n", + " log_step = tf.cast(log_step, tf.float16)\n", + " log_step = tf.Variable(log_step, name=f\"{name}_log_step\")\n", + " return compressed, log_step\n", + "\n", + "def decompress_latent(compressed, shape, log_step):\n", + " latent = tfc.PowerLawEntropyModel(len(shape)).decompress(compressed, shape)\n", + " step = tf.exp(tf.cast(log_step, latent.dtype))\n", + " return latent * step\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bPPABE9fjqHJ" + }, + "source": [ + "With these, we can define `CompressedDense`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CnaiNzhgaZ7s" + }, + "outputs": [], + "source": [ + "class CompressedDense(CustomDense):\n", + "\n", + " def build(self, input_shape, other=None):\n", + " assert isinstance(other, CompressibleDense)\n", + " self.input_channels = other.kernel.shape[0]\n", + " self.kernel_compressed, self.kernel_log_step = compress_latent(\n", + " other.kernel_latent, other.kernel_log_step, \"kernel\")\n", + " self.bias_compressed, self.bias_log_step = compress_latent(\n", + " other.bias_latent, other.bias_log_step, \"bias\")\n", + " self.built = True\n", + "\n", + " @property\n", + " def kernel(self):\n", + " kernel_shape = (self.input_channels, self.filters)\n", + " return decompress_latent(\n", + " self.kernel_compressed, kernel_shape, self.kernel_log_step)\n", + "\n", + " @property\n", + " def bias(self):\n", + " bias_shape = (self.filters,)\n", + " return decompress_latent(\n", + " self.bias_compressed, bias_shape, self.bias_log_step)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tzvMCM0El2iW" + }, + "source": [ + "The convolutional layer class is analogous to the above." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hS-2ADA6iWeQ" + }, + "outputs": [], + "source": [ + "class CompressedConv2D(CustomConv2D):\n", + "\n", + " def build(self, input_shape, other=None):\n", + " assert isinstance(other, CompressibleConv2D)\n", + " self.input_channels = other.kernel.shape[2]\n", + " self.kernel_compressed, self.kernel_log_step = compress_latent(\n", + " other.kernel_latent, other.kernel_log_step, \"kernel\")\n", + " self.bias_compressed, self.bias_log_step = compress_latent(\n", + " other.bias_latent, other.bias_log_step, \"bias\")\n", + " self.built = True\n", + "\n", + " @property\n", + " def kernel(self):\n", + " rdft_shape = (self.input_channels, self.filters,\n", + " self.kernel_size, self.kernel_size // 2 + 1, 2)\n", + " kernel_rdft = decompress_latent(\n", + " self.kernel_compressed, rdft_shape, self.kernel_log_step)\n", + " return from_rdft(kernel_rdft, self.kernel_size)\n", + "\n", + " @property\n", + " def bias(self):\n", + " bias_shape = (self.filters,)\n", + " return decompress_latent(\n", + " self.bias_compressed, bias_shape, self.bias_log_step)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cJLCPoe3l8jG" + }, + "source": [ + "To turn the compressible model into a compressed one, we can conveniently use the `clone_model` function. `compress_layer` converts any compressible layer into a compressed one, and simply passes through any other types of layers (such as `Flatten`, etc.).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WEHroUyhG56m" + }, + "outputs": [], + "source": [ + "def compress_layer(layer):\n", + " if isinstance(layer, CompressibleDense):\n", + " return CompressedDense.copy(layer)\n", + " if isinstance(layer, CompressibleConv2D):\n", + " return CompressedConv2D.copy(layer)\n", + " return type(layer).from_config(layer.get_config())\n", + "\n", + "compressed_classifier = tf.keras.models.clone_model(\n", + " compressible_classifier, clone_function=compress_layer)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b3wbN1XQmkDg" + }, + "source": [ + "Now, let's validate that the compressed model still performs as expected:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "R95kuURITpa9" + }, + "outputs": [], + "source": [ + "compressed_classifier.compile(metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])\n", + "_, compressed_accuracy = compressed_classifier.evaluate(validation_dataset.batch(128))\n", + "\n", + "print(f\"Accuracy of the compressible classifier: {penalized_accuracy:0.4f}\")\n", + "print(f\"Accuracy of the compressed classifier: {compressed_accuracy:0.4f}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KtFhpXh6uaIY" + }, + "source": [ + "The classification accuracy of the compressed model is identical to the one achieved during training!\n", + "\n", + "In addition, the size of the compressed model weights is much smaller than the original model size:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Qp-ecfuYufbs" + }, + "outputs": [], + "source": [ + "def get_weight_size_in_bytes(weight):\n", + " if weight.dtype == tf.string:\n", + " return tf.reduce_sum(tf.strings.length(weight, unit=\"BYTE\"))\n", + " else:\n", + " return tf.size(weight) * weight.dtype.size\n", + "\n", + "original_size = sum(map(get_weight_size_in_bytes, classifier.weights))\n", + "compressed_size = sum(map(get_weight_size_in_bytes, compressed_classifier.weights))\n", + "\n", + "print(f\"Size of original model weights: {original_size} bytes\")\n", + "print(f\"Size of compressed model weights: {compressed_size} bytes\")\n", + "print(f\"Compression ratio: {(original_size/compressed_size):0.0f}x\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K8A8v0df6TR2" + }, + "source": [ + "Storing the models on disk requires some overhead for storing the model architecture, function graphs, etc.\n", + "\n", + "Lossless compression methods such as ZIP are good at compressing this type of data, but not the weights themselves. That is why there is still a significant benefit of EPR when counting model size inclusive of that overhead, after also applying ZIP compression:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4hunDYxH1zqb" + }, + "outputs": [], + "source": [ + "import os\n", + "import shutil\n", + "\n", + "def get_disk_size(model, path):\n", + " model.save(path)\n", + " zip_path = shutil.make_archive(path, \"zip\", path)\n", + " return os.path.getsize(zip_path)\n", + "\n", + "original_zip_size = get_disk_size(classifier, \"/tmp/classifier\")\n", + "compressed_zip_size = get_disk_size(\n", + " compressed_classifier, \"/tmp/compressed_classifier\")\n", + "\n", + "print(f\"Original on-disk size (ZIP compressed): {original_zip_size} bytes\")\n", + "print(f\"Compressed on-disk size (ZIP compressed): {compressed_zip_size} bytes\")\n", + "print(f\"Compression ratio: {(original_zip_size/compressed_zip_size):0.0f}x\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FSITvJrlAhZs" + }, + "source": [ + "## Regularization effect and size–accuracy trade-off\n", + "\n", + "Above, the $\\lambda$ hyperparameter was set to 2 (normalized by the number of parameters in the model). As we increase $\\lambda$, the model weights are more and more heavily penalized for compressibility.\n", + "\n", + "For low values, the penalty can act like a weight regularizer. It actually has a beneficial effect on the generalization performance of the classifier, and can lead to a slightly higher accuracy on the validation dataset:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "4rhmKu98FdPJ" + }, + "outputs": [], + "source": [ + "#@title\n", + "\n", + "print(f\"Accuracy of the vanilla classifier: {classifier_accuracy:0.4f}\")\n", + "print(f\"Accuracy of the penalized classifier: {penalized_accuracy:0.4f}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9UCfC4LQFdjL" + }, + "source": [ + "For higher values, we see a smaller and smaller model size, but also a gradually diminishing accuracy. To see this, let's train a few models and plot their size vs. accuracy:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "diApPKHbAIqa" + }, + "outputs": [], + "source": [ + "def compress_and_evaluate_model(lmbda):\n", + " print(f\"lambda={lmbda:0.0f}: training...\", flush=True)\n", + " regularizer = PowerLawRegularizer(lmbda=lmbda/classifier.count_params())\n", + " compressible_classifier = make_mnist_classifier(regularizer)\n", + " train_model(\n", + " compressible_classifier, training_dataset, validation_dataset, verbose=0)\n", + " print(\"compressing...\", flush=True)\n", + " compressed_classifier = tf.keras.models.clone_model(\n", + " compressible_classifier, clone_function=compress_layer)\n", + " compressed_size = sum(map(\n", + " get_weight_size_in_bytes, compressed_classifier.weights))\n", + " compressed_zip_size = float(get_disk_size(\n", + " compressed_classifier, \"/tmp/compressed_classifier\"))\n", + " print(\"evaluating...\", flush=True)\n", + " compressed_classifier = tf.keras.models.load_model(\n", + " \"/tmp/compressed_classifier\")\n", + " compressed_classifier.compile(\n", + " metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])\n", + " _, compressed_accuracy = compressed_classifier.evaluate(\n", + " validation_dataset.batch(128), verbose=0)\n", + " print()\n", + " return compressed_size, compressed_zip_size, compressed_accuracy\n", + "\n", + "lambdas = (2., 5., 10., 20., 50.)\n", + "metrics = [compress_and_evaluate_model(l) for l in lambdas]\n", + "metrics = tf.convert_to_tensor(metrics, tf.float32)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "bhAi85KzGqTz" + }, + "outputs": [], + "source": [ + "#@title\n", + "\n", + "def plot_broken_xaxis(ax, compressed_sizes, original_size, original_accuracy):\n", + " xticks = list(range(\n", + " int(tf.math.floor(min(compressed_sizes) / 5) * 5),\n", + " int(tf.math.ceil(max(compressed_sizes) / 5) * 5) + 1,\n", + " 5))\n", + " xticks.append(xticks[-1] + 10)\n", + " ax.set_xlim(xticks[0], xticks[-1] + 2)\n", + " ax.set_xticks(xticks[1:])\n", + " ax.set_xticklabels(xticks[1:-1] + [f\"{original_size:0.2f}\"])\n", + " ax.plot(xticks[-1], original_accuracy, \"o\", label=\"float32\")\n", + "\n", + "sizes, zip_sizes, accuracies = tf.transpose(metrics)\n", + "sizes /= 1024\n", + "zip_sizes /= 1024\n", + "\n", + "fig, (axl, axr) = plt.subplots(1, 2, sharey=True, figsize=(10, 4))\n", + "axl.plot(sizes, accuracies, \"o-\", label=\"EPR compressed\")\n", + "axr.plot(zip_sizes, accuracies, \"o-\", label=\"EPR compressed\")\n", + "plot_broken_xaxis(axl, sizes, original_size/1024, classifier_accuracy)\n", + "plot_broken_xaxis(axr, zip_sizes, original_zip_size/1024, classifier_accuracy)\n", + "\n", + "axl.set_xlabel(\"size of model weights [kbytes]\")\n", + "axr.set_xlabel(\"ZIP compressed on-disk model size [kbytes]\")\n", + "axl.set_ylabel(\"accuracy\")\n", + "axl.legend(loc=\"lower right\")\n", + "axr.legend(loc=\"lower right\")\n", + "axl.grid()\n", + "axr.grid()\n", + "for i in range(len(lambdas)):\n", + " axl.annotate(f\"$\\lambda = {lambdas[i]:0.0f}$\", (sizes[i], accuracies[i]),\n", + " xytext=(10, -5), xycoords=\"data\", textcoords=\"offset points\")\n", + " axr.annotate(f\"$\\lambda = {lambdas[i]:0.0f}$\", (zip_sizes[i], accuracies[i]),\n", + " xytext=(10, -5), xycoords=\"data\", textcoords=\"offset points\")\n", + "plt.tight_layout()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ajrHaFTAaLd2" + }, + "source": [ + "The plot should ideally show an elbow-shaped size–accuracy trade-off, but it is normal for accuracy metrics to be somewhat noisy. Depending on initialization, the curve can exhibit some kinks.\n", + "\n", + "Due to the regularization effect, the EPR compressed model is more accurate on the test set than the original model for small values of $\\lambda$. The EPR compressed model is also many times smaller, even if we compare the sizes after additional ZIP compression." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-RBhdXZTzoWw" + }, + "source": [ + "## Decompress the classifier\n", + "\n", + "`CompressedDense` and `CompressedConv2D` decompress their weights on every forward pass. This makes them ideal for memory-limited devices, but the decompression can be computationally expensive, especially for small batch sizes.\n", + "\n", + "To decompress the model once, and use it for further training or inference, we can convert it back into a model using regular or compressible layers. This can be useful in model deployment or federated learning scenarios.\n", + "\n", + "First, converting back into a plain model, we can do inference, and/or continue regular training without a compression penalty:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QBB2-X5XzvwB" + }, + "outputs": [], + "source": [ + "def decompress_layer(layer):\n", + " if isinstance(layer, CompressedDense):\n", + " return CustomDense.copy(layer)\n", + " if isinstance(layer, CompressedConv2D):\n", + " return CustomConv2D.copy(layer)\n", + " return type(layer).from_config(layer.get_config())\n", + "\n", + "decompressed_classifier = tf.keras.models.clone_model(\n", + " compressed_classifier, clone_function=decompress_layer)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ehE2ov8U0p0G" + }, + "outputs": [], + "source": [ + "decompressed_accuracy = train_model(\n", + " decompressed_classifier, training_dataset, validation_dataset, epochs=1)\n", + "\n", + "print(f\"Accuracy of the compressed classifier: {compressed_accuracy:0.4f}\")\n", + "print(f\"Accuracy of the decompressed classifier after one more epoch of training: {decompressed_accuracy:0.4f}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jiSCvemQ04o8" + }, + "source": [ + "Note that the validation accuracy drops after training for an additional epoch, since the training is done without regularization.\n", + "\n", + "Alternatively, we can convert the model back into a \"compressible\" one, for inference and/or further training with a compression penalty:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JDppVUdx1BvY" + }, + "outputs": [], + "source": [ + "def decompress_layer_with_penalty(layer):\n", + " if isinstance(layer, CompressedDense):\n", + " return CompressibleDense.copy(layer, regularizer=regularizer)\n", + " if isinstance(layer, CompressedConv2D):\n", + " return CompressibleConv2D.copy(layer, regularizer=regularizer)\n", + " return type(layer).from_config(layer.get_config())\n", + "\n", + "decompressed_classifier = tf.keras.models.clone_model(\n", + " compressed_classifier, clone_function=decompress_layer_with_penalty)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AJcnyOFW2IcK" + }, + "outputs": [], + "source": [ + "decompressed_accuracy = train_model(\n", + " decompressed_classifier, training_dataset, validation_dataset, epochs=1)\n", + "\n", + "print(f\"Accuracy of the compressed classifier: {compressed_accuracy:0.4f}\")\n", + "print(f\"Accuracy of the decompressed classifier after one more epoch of training: {decompressed_accuracy:0.4f}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ciol315T_TwQ" + }, + "source": [ + "Here, the accuracy improves after training for an additional epoch." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "Tce3stUlHN0L", + "xHxb-dlhMIzW" + ], + "name": "compression.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 2e756845962c4ad735744ceb543a3b28b1729c5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Ball=C3=A9?= Date: Mon, 15 Aug 2022 09:26:50 -0700 Subject: [PATCH 022/636] Shows how to sample from a compression model by feeding random bits. PiperOrigin-RevId: 467687646 --- .../generative/data_compression.ipynb | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/site/en/tutorials/generative/data_compression.ipynb b/site/en/tutorials/generative/data_compression.ipynb index f8a5fe3d201..b8edf946ae3 100644 --- a/site/en/tutorials/generative/data_compression.ipynb +++ b/site/en/tutorials/generative/data_compression.ipynb @@ -825,6 +825,60 @@ "\n", "This demonstrates that this model is agnostic to human perceptions of error, it just measures the absolute deviation in terms of pixel values. To achieve a better perceived image quality, we would need to replace the pixel loss with a perceptual loss." ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v9cWHtH0LP_r" + }, + "source": [ + "## Use the decoder as a generative model.\n", + "\n", + "If we feed the decoder random bits, this will effectively sample from the distribution that the model learned to represent digits.\n", + "\n", + "First, re-instantiate the compressor/decompressor without a sanity check that would detect if the input string isn't completely decoded." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qnic8YsM0_ke" + }, + "outputs": [], + "source": [ + "compressor, decompressor = make_mnist_codec(trainer, decode_sanity_check=False)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "86uc9_Is1eeo" + }, + "source": [ + "Now, feed long enough random strings into the decompressor so that it can decode/sample digits from them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "o4fP7BkqKCHY" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "strings = tf.constant([os.urandom(8) for _ in range(16)])\n", + "samples = decompressor(strings)\n", + "\n", + "fig, axes = plt.subplots(4, 4, sharex=True, sharey=True, figsize=(5, 5))\n", + "axes = axes.ravel()\n", + "for i in range(len(axes)):\n", + " axes[i].imshow(tf.squeeze(samples[i]))\n", + " axes[i].axis(\"off\")\n", + "plt.subplots_adjust(wspace=0, hspace=0, left=0, right=1, bottom=0, top=1)\n" + ] } ], "metadata": { From 3715a3b51162ab5ef15689414a5134937e92b308 Mon Sep 17 00:00:00 2001 From: Mark McDonald Date: Mon, 15 Aug 2022 15:57:25 -0700 Subject: [PATCH 023/636] Move nbcp. PiperOrigin-RevId: 467782554 --- tools/{ => tensorflow_docs/tools}/nbcp/__init__.py | 0 tools/{ => tensorflow_docs/tools}/nbcp/__main__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tools/{ => tensorflow_docs/tools}/nbcp/__init__.py (100%) rename tools/{ => tensorflow_docs/tools}/nbcp/__main__.py (100%) diff --git a/tools/nbcp/__init__.py b/tools/tensorflow_docs/tools/nbcp/__init__.py similarity index 100% rename from tools/nbcp/__init__.py rename to tools/tensorflow_docs/tools/nbcp/__init__.py diff --git a/tools/nbcp/__main__.py b/tools/tensorflow_docs/tools/nbcp/__main__.py similarity index 100% rename from tools/nbcp/__main__.py rename to tools/tensorflow_docs/tools/nbcp/__main__.py From 64a1a97afb999cefcfd4c464448272d0ac15b102 Mon Sep 17 00:00:00 2001 From: Mark McDonald Date: Mon, 15 Aug 2022 17:16:45 -0700 Subject: [PATCH 024/636] Add `nbformat` dependency to tensorflow docs package. It's only required for type checking in `nbfmt`, but it's a runtime dep for `nbcp`, so install it as a dep. PiperOrigin-RevId: 467798529 --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 3a726be637b..ea4a4c24c79 100644 --- a/setup.py +++ b/setup.py @@ -29,6 +29,7 @@ 'astor', 'absl-py', 'jinja2', + 'nbformat', # TODO(b/182876485): Protobuf 3.20 results in linker errors on Windows # Protobuf 4.0 is binary incompatible with what C++ TF uses. # We need ~1 quarter to update properly. From 1eadb62fe32913a6bb17a6bc5cd696d3af99a033 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 16 Aug 2022 14:01:24 -0700 Subject: [PATCH 025/636] added core quickstart tutorial PiperOrigin-RevId: 468019110 --- site/en/guide/_toc.yaml | 7 + site/en/guide/core/index.md | 113 +++++ site/en/guide/core/quickstart_core.ipynb | 581 +++++++++++++++++++++++ 3 files changed, 701 insertions(+) create mode 100644 site/en/guide/core/index.md create mode 100644 site/en/guide/core/quickstart_core.ipynb diff --git a/site/en/guide/_toc.yaml b/site/en/guide/_toc.yaml index 1f98745faee..58c5d810cd6 100644 --- a/site/en/guide/_toc.yaml +++ b/site/en/guide/_toc.yaml @@ -21,6 +21,13 @@ toc: - heading: "Keras" - include: /guide/keras/_toc.yaml +- heading: "Build with Core" + status: new +- title: "Overview" + path: /guide/core/index +- title: "Quickstart for Core" + path: /guide/core/quickstart_core + - heading: "TensorFlow in depth" - title: "Tensor slicing" path: /guide/tensor_slicing diff --git a/site/en/guide/core/index.md b/site/en/guide/core/index.md new file mode 100644 index 00000000000..cfe3b19ad27 --- /dev/null +++ b/site/en/guide/core/index.md @@ -0,0 +1,113 @@ +# TensorFlow Core APIs overview + +The TensorFlow Core APIs provide a set of low-level APIs for high-performance +(distributed & accelerated) computation, primarily aimed at building machine +learning (ML) models as well as authoring ML workflow tools and frameworks +within the TensorFlow platform. These APIs provide a foundation for creating +highly configurable models with fine-grain control and new frameworks from the +ground up. + +The Core APIs can be used as an alternative to high-level machine learning APIs +such as Keras. These high-level APIs are best suited for general machine +learning needs. They offer a variety of modules that abstract away the +complexities of ML while also offering functionalities for customization through +subclassing. If you are looking for an overview of TensorFlow using Keras, see +the Quickstarts and Keras sections in the +[tutorials](https://www.tensorflow.org/tutorials) + +## Core API developer audience + +The TensorFlow Core low-level APIs are designed with the following ML Developers +in mind: + +* Researchers building complex models with high levels of configurability +* Developers interested in using TensorFlow as a high-performance scientific + computing platform +* Framework authors building tools on top of the TensorFlow platform +* High-level API users interested in: + * Adding additional functionalities to their machine learning workflows + such as custom layers, losses, models, and optimizers + * Learning more about the inner workings of their models + +## Core API applications + +The TensorFlow Core APIs provide access to low level functionality within the +TensorFlow ecosystem. This API provides more flexibility and control for +building ML models, applications, and tools, compared to high-level APIs, such +as Keras. + +### Build models and workflows + +The Core APIs are most commonly used to build highly customizable and optimized +machine learning models and workflows. Here are some of the ways that the +TensorFlow Core APIs can improve your machine learning models and workflow +development:: + +TensorFlow + +* Building non-traditional models or layers that do not fully fit the + structures supported by high-level APIs +* Building custom layers, losses, models, and optimizers within Keras +* Implementing new optimization techniques to expedite convergence during + training +* Creating custom metrics for performance evaluation +* Designing highly-configurable training loops with support for features like + batching, cross-validation, and distribution strategies + +### Build frameworks and tools + +The TensorFlow Core APIs can also serve as the building blocks for new +high-level frameworks. Here are some examples of tools and frameworks that are +created with the low-level APIs: +TensorFlow + +* [Keras](https://keras.io): deep learning for humans +* [TensorFlow Model Optimization Toolkit](https://www.tensorflow.org/model_optimization): + a suite of tools to optimize ML models for deployment and execution +* [TensorFlow Graphics](https://www.tensorflow.org/graphics): a library for + making useful graphics functions widely accessible + +### Build for scientific computing + +The TensorFlow Core APIs can also be applied outside the realm of machine +learning. Here are a few general-purpose use cases of TensorFlow for scientific +computing: +TensorFlow + +* Physics simulations for solid mechanics and + [fluid dynamics](https://arxiv.org/abs/2108.11076) problems +* Graphics rendering applications like + [ray tracing](https://github.com/BachiLi/redner) +* Solving + [constrained optimization problems](https://github.com/google-research/tensorflow_constrained_optimization/blob/master/README.md) + +## Core API components + +Here are some of the fundamental components that comprise TensorFlow Core’s low- +level APIs. Note that this is not an all-encompassing list: + +TensorFlow + +* Data structures : `tf.Tensor`, `tf.Variable`, `tf.TensorArray` +* Primitive APIs: `tf.shape`, + [slicing](https://www.tensorflow.org/guide/tensor_slicing), `tf.concat`, + `tf.bitwise` +* Numerical: `tf.math`, `tf.linalg`, `tf.random` +* Functional components: `tf.function`, `tf.GradientTape` +* Distribution: [DTensor](https://www.tensorflow.org/guide/dtensor_overview) +* Export: `tf.saved_model` + +## Next steps + +The *Build with Core* documentation provides tutorials of basic machine learning +concepts from scratch. These tutorials in this section help you get comfortable +with writing low-level code with Core APIs that you can then apply to more +complex use cases of your own. + +Note: You should not use the Core APIs to simply re-implement high-level APIs, +and it is possible to use high-level APIs, such as Keras, with the Core APIs. + +To get started using and learning more about the Core APIs, check out the +[Quickstart for TensorFlow Core](https://www.tensorflow.org/guide/core/quickstart_core). diff --git a/site/en/guide/core/quickstart_core.ipynb b/site/en/guide/core/quickstart_core.ipynb new file mode 100644 index 00000000000..aad634aea4c --- /dev/null +++ b/site/en/guide/core/quickstart_core.ipynb @@ -0,0 +1,581 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "rX8mhOLljYeM" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "BZSlp3DAjdYf" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3wF5wszaj97Y" + }, + "source": [ + "# Quickstart for the TensorFlow Core APIs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DUNzJc4jTj6G" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "04QgGZc9bF5D" + }, + "source": [ + "This short introduction uses the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) to walk through the following stages of a machine learning example:\n", + "\n", + "1. Load a prebuilt dataset.\n", + "1. Build a multiple linear regression model that predicts fuel efficiency.\n", + "2. Train this multiple linear regression model.\n", + "3. Evaluate the performance of the model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hiH7AC-NTniF" + }, + "source": [ + "This tutorial is a [Google Colaboratory](https://colab.research.google.com/notebooks/welcome.ipynb) notebook. Python programs are run directly in the browser—a great way to learn and use TensorFlow. To follow this tutorial, run the notebook in Google Colab by clicking the button at the top of this page.\n", + "\n", + "1. In Colab, connect to a Python runtime: At the top-right of the menu bar, select *CONNECT*.\n", + "2. Run all the notebook code cells: Select *Runtime* > *Run all*." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nnrWf3PCEzXL" + }, + "source": [ + "## Set up TensorFlow\n", + "\n", + "Import TensorFlow and pandas into your program to get started:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0trJmd6DjqBZ" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import pandas as pd\n", + "print(\"TensorFlow version:\", tf.__version__)\n", + "# Set a random seed for reproducible results \n", + "tf.random.set_seed(22)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7NAbSZiaoJ4z" + }, + "source": [ + "If you are following along in your own development environment, rather than [Colab](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/core/quickstart_core.ipynb), see the [install guide](https://www.tensorflow.org/install) for setting up TensorFlow for development.\n", + "\n", + "Note: Make sure you have upgraded to the latest `pip` to install the TensorFlow 2 package if you are using your own development environment. See the [install guide](https://www.tensorflow.org/install) for details.\n", + "\n", + "## Load a dataset\n", + "\n", + "Load and prepare the [Auto MPG dataset](https://archive.ics.uci.edu/ml/datasets/auto+mpg) from the UCI Machine Learning Repository. This dataset uses a variety of quantitative and categorical features to predict the fuel efficiencies of automobiles in the late-1970s and early 1980s. Make sure to drop any missing values in the dataset and convert it to a tf.float32 tensor with the `tf.convert_to_tensor` and `tf.cast` functions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HglhDsUfrJ98" + }, + "outputs": [], + "source": [ + "url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'\n", + "column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',\n", + " 'Acceleration', 'Model Year', 'Origin']\n", + "\n", + "dataset = pd.read_csv(url, names=column_names, na_values='?', comment='\\t',\n", + " sep=' ', skipinitialspace=True)\n", + "\n", + "dataset = dataset.dropna()\n", + "dataset_tf = tf.convert_to_tensor(dataset, dtype=tf.float32)\n", + "dataset.tail()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0vgoDL3hYesB" + }, + "source": [ + "Next split the dataset into [training and testing groups](https://developers.google.com/machine-learning/crash-course/training-and-test-sets/splitting-data). Make sure to shuffle the dataset with `tf.random.shuffle` to avoid biased splits." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0mJU4kt6YiAp" + }, + "outputs": [], + "source": [ + "dataset_shuffled = tf.random.shuffle(dataset_tf, seed=22)\n", + "train_data, test_data = dataset_shuffled[100:], dataset_shuffled[:100]\n", + "x_train, y_train = train_data[:, 1:], train_data[:, 0]\n", + "x_test, y_test = test_data[:, 1:], test_data[:, 0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Bscb2Vsbi3TE" + }, + "source": [ + "Perform basic [feature engineering](https://developers.google.com/machine-learning/crash-course/representation/feature-engineering) by one-hot-encoding the `\"Origin\"` feature. The `tf.one_hot` function is useful for transforming this categorical column into 3 separate binary columns. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_B8N9IV1i6IV" + }, + "outputs": [], + "source": [ + "def onehot_origin(x):\n", + " origin = tf.cast(x[:, -1], tf.int32)\n", + " # Use origin - 1 to account for 1-indexed feature\n", + " origin_oh = tf.one_hot(origin - 1, 3)\n", + " x_ohe = tf.concat([x[:, :-1], origin_oh], axis = 1)\n", + " return x_ohe\n", + "\n", + "x_train_ohe, x_test_ohe = onehot_origin(x_train), onehot_origin(x_test)\n", + "x_train_ohe.numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qnoCDzzedite" + }, + "source": [ + "This example shows a multiple regression problem with predictors or features on vastly different scales. Therefore, it is beneficial to standardize the data so that each feature has zero mean and unit variance. Use the `tf.reduce_mean` and `tf.math.reduce_std` functions for standardization. The regression model's prediction can then be unstandardized to obtain its value in terms of the original units." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dJJFdvqydhyp" + }, + "outputs": [], + "source": [ + "class Normalize(tf.Module):\n", + " def __init__(self, x):\n", + " # Initialize the mean and standard deviation for normalization\n", + " self.mean = tf.math.reduce_mean(x, axis=0)\n", + " self.std = tf.math.reduce_std(x, axis=0)\n", + "\n", + " def norm(self, x):\n", + " # Normalize the input\n", + " return (x - self.mean)/self.std\n", + "\n", + " def unnorm(self, x):\n", + " # Unnormalize the input\n", + " return (x * self.std) + self.mean" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5BONV6fYYwZb" + }, + "outputs": [], + "source": [ + "norm_x = Normalize(x_train_ohe)\n", + "norm_y = Normalize(y_train)\n", + "x_train_norm, y_train_norm = norm_x.norm(x_train_ohe), norm_y.norm(y_train)\n", + "x_test_norm, y_test_norm = norm_x.norm(x_test_ohe), norm_y.norm(y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BPZ68wASog_I" + }, + "source": [ + "## Build a machine learning model\n", + "\n", + "Build a linear regression model with the TensorFlow Core APIs. The equation for multiple linear regression is as follows:\n", + "\n", + "$${\\mathrm{Y}} = {\\mathrm{X}}w + b$$\n", + "\n", + "where\n", + "\n", + "* $\\underset{m\\times 1}{\\mathrm{Y}}$: target vector\n", + "* $\\underset{m\\times n}{\\mathrm{X}}$: feature matrix\n", + "* $\\underset{n\\times 1}w$: weight vector\n", + "* $b$: bias\n", + "\n", + "By using the `@tf.function` decorator, the corresponding Python code is traced to generate a callable TensorFlow graph. This approach is beneficial for saving and loading the model after training. It can also provide a performance boost for models with many layers and complex operations. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "h3IKyzTCDNGo" + }, + "outputs": [], + "source": [ + "class LinearRegression(tf.Module):\n", + "\n", + " def __init__(self):\n", + " self.built = False\n", + "\n", + " @tf.function\n", + " def __call__(self, x):\n", + " # Initialize the model parameters on the first call\n", + " if not self.built:\n", + " # Randomly generate the weight vector and bias term\n", + " rand_w = tf.random.uniform(shape=[x.shape[-1], 1])\n", + " rand_b = tf.random.uniform(shape=[])\n", + " self.w = tf.Variable(rand_w)\n", + " self.b = tf.Variable(rand_b)\n", + " self.built = True\n", + " y = tf.add(tf.matmul(x, self.w), self.b)\n", + " return tf.squeeze(y, axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l2hiez2eIUz8" + }, + "source": [ + "For each example, the model returns a prediction for the input automobile's MPG by computing the weighted sum of its features plus a bias term. This prediction can then be unstandardized to obtain its value in terms of the original units." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OeOrNdnkEEcR" + }, + "outputs": [], + "source": [ + "lin_reg = LinearRegression()\n", + "prediction = lin_reg(x_train_norm[:1])\n", + "prediction_unnorm = norm_y.unnorm(prediction)\n", + "prediction_unnorm.numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FIHANxNSvWr9" + }, + "source": [ + "## Defining a loss function\n", + "\n", + "Now define a loss function to evaluate the model's performance during the training process.\n", + "\n", + "Since regression problems deal with continuous outputs, the mean squared error (MSE) is an ideal choice for the loss function. The MSE is defined by the following equation:\n", + "\n", + "$$MSE = \\frac{1}{m}\\sum_{i=1}^{m}(\\hat{y}_i -y_i)^2$$\n", + "\n", + "where\n", + "\n", + "* $\\hat{y}$: vector of predictions\n", + "* $y$: vector of true targets\n", + "\n", + "The goal of this regression problem is to find the optimal weight vector, $w$, and bias, $b$, that minimizes the MSE loss function. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8tYNVUkmw35s" + }, + "outputs": [], + "source": [ + "def mse_loss(y_pred, y):\n", + " return tf.reduce_mean(tf.square(y_pred - y))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ix4mEL65on-w" + }, + "source": [ + "## Train and evaluate your model\n", + "\n", + "Write a training loop to iteratively update your model's parameters by making use of the MSE loss function and its gradients with respect to the input parameters.\n", + "\n", + "This iterative method is referred to as gradient descent. At each iteration, the model's parameters are updated by taking a step in the opposite direction of their computed gradients. The size of this step is determined by the learning rate, which is a configurable hyperparameter. Recall that the gradient of a function indicates the direction of its steepest ascent; therefore, taking a step in the opposite direction indicates the direction of steepest descent, which ultimately helps to minimize the MSE loss function.\n", + "\n", + "Using mini-batches for training provides both memory efficiency and faster convergence. The `tf.data.Dataset` API has useful functions for batching and shuffling." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xS9hB17NY3sf" + }, + "outputs": [], + "source": [ + "batch_size = 64\n", + "dataset = tf.data.Dataset.from_tensor_slices((x_train_norm, y_train_norm))\n", + "dataset = dataset.shuffle(buffer_size=x_train.shape[0]).batch(batch_size)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "y7suUbJXVLqP" + }, + "outputs": [], + "source": [ + "# Set training parameters\n", + "epochs = 100\n", + "learning_rate = 0.01\n", + "train_losses, test_losses = [], []\n", + "\n", + "# Format training loop\n", + "for epoch in range(epochs):\n", + " for x_batch, y_batch in dataset:\n", + " batch_losses = []\n", + " with tf.GradientTape() as tape:\n", + " y_pred_batch = lin_reg(x_batch)\n", + " batch_loss = mse_loss(y_pred_batch, y_batch)\n", + " # Update parameters with respect to the gradient calculations\n", + " grads = tape.gradient(batch_loss, lin_reg.variables)\n", + " for g,v in zip(grads, lin_reg.variables):\n", + " v.assign_sub(learning_rate * g)\n", + " # Keep track of batch-level model performance \n", + " batch_losses.append(batch_loss)\n", + " # Keep track of epoch-level model performance\n", + " train_loss = tf.reduce_mean(batch_losses)\n", + " test_loss = mse_loss(lin_reg(x_test_norm), y_test_norm)\n", + " train_losses.append(train_loss)\n", + " test_losses.append(test_loss)\n", + " if epoch % 10 == 0:\n", + " print(f'Mean squared error for step {epoch}: {train_loss.numpy():0.3f}')\n", + "\n", + "# Output final losses\n", + "print(f\"\\nFinal train loss: {train_loss:0.3f}\")\n", + "print(f\"Final test loss: {test_loss:0.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4mDAAPFqVVgn" + }, + "source": [ + "Plot the changes in MSE loss over time. Calculating performance metrics on a designated [Validation-set](https://developers.google.com/machine-learning/glossary#validation-set) or [Test-set](https://developers.google.com/machine-learning/glossary#test-set) ensures the model does not overfit to the training dataset and can generalize well to unseen data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "F7dTAzgHDUh7" + }, + "outputs": [], + "source": [ + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "matplotlib.rcParams['figure.figsize'] = [9, 6]\n", + "\n", + "plt.plot(range(epochs), train_losses, label = \"Training loss\")\n", + "plt.plot(range(epochs), test_losses, label = \"Testing loss\")\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Mean squared error loss\")\n", + "plt.legend()\n", + "plt.title(\"MSE loss vs training iterations\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Aj8NrlzlJqDG" + }, + "source": [ + "It seems like the model does a good job of fitting the training data while also generalizing well to the unseen test data." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AUNIPubuPYDR" + }, + "source": [ + "## Saving your model\n", + "\n", + "Start by making an export module that takes in raw data and performs the following operations:\n", + "- Feature extraction \n", + "- Normalization \n", + "- Prediction\n", + "- Unnormalization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "g-uOrGa9ZehG" + }, + "outputs": [], + "source": [ + "class ExportModule(tf.Module):\n", + " def __init__(self, model, extract_features, norm_x, norm_y):\n", + " # Initialize pre and postprocessing functions\n", + " self.model = model\n", + " self.extract_features = extract_features\n", + " self.norm_x = norm_x\n", + " self.norm_y = norm_y\n", + "\n", + " @tf.function(input_signature=[tf.TensorSpec(shape=[None, None], dtype=tf.float32)]) \n", + " def __call__(self, x):\n", + " # Run the ExportModule for new data points\n", + " x = self.extract_features(x)\n", + " x = self.norm_x.norm(x)\n", + " y = self.model(x)\n", + " y = self.norm_y.unnorm(y)\n", + " return y " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YPYYLQ8EZiU8" + }, + "outputs": [], + "source": [ + "lin_reg_export = ExportModule(model=lin_reg,\n", + " extract_features=onehot_origin,\n", + " norm_x=norm_x,\n", + " norm_y=norm_y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6v8xi06XZWiC" + }, + "source": [ + "If you want to save the model at its current state, you can do so with the `tf.saved_model.save` function. To load a saved model and make predictions, use the `tf.saved_model.load` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K1IvMoHbptht" + }, + "outputs": [], + "source": [ + "import tempfile\n", + "import os\n", + "\n", + "models = tempfile.mkdtemp()\n", + "save_path = os.path.join(models, 'lin_reg_export')\n", + "tf.saved_model.save(lin_reg_export, save_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rYb6DrEH0GMv" + }, + "outputs": [], + "source": [ + "lin_reg_loaded = tf.saved_model.load(save_path)\n", + "test_preds = lin_reg_loaded(x_test)\n", + "test_preds[:10].numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-47O6_GLdRuT" + }, + "source": [ + "## Conclusion\n", + "\n", + "Congratulations! You have trained a machine learning model with a prebuilt dataset using the [TensorFlow Core APIs](https://www.tensorflow.org/guide/core). If you want learn more about loading and preparing data, see the tutorials on [image data loading](https://www.tensorflow.org/tutorials/load_data/images) or [CSV data loading](https://www.tensorflow.org/tutorials/load_data/csv).\n" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "rX8mhOLljYeM" + ], + "name": "quickstart_core.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 9233d91ae135b495ab347b5c3c92c8d524273d33 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 16 Aug 2022 14:09:29 -0700 Subject: [PATCH 026/636] added core logistic regression tutorial PiperOrigin-RevId: 468021272 --- site/en/guide/_toc.yaml | 2 + .../guide/core/logistic_regression_core.ipynb | 872 ++++++++++++++++++ 2 files changed, 874 insertions(+) create mode 100644 site/en/guide/core/logistic_regression_core.ipynb diff --git a/site/en/guide/_toc.yaml b/site/en/guide/_toc.yaml index 58c5d810cd6..bb583ac1338 100644 --- a/site/en/guide/_toc.yaml +++ b/site/en/guide/_toc.yaml @@ -27,6 +27,8 @@ toc: path: /guide/core/index - title: "Quickstart for Core" path: /guide/core/quickstart_core +- title: "Logistic regression" + path: /guide/core/logistic_regression_core - heading: "TensorFlow in depth" - title: "Tensor slicing" diff --git a/site/en/guide/core/logistic_regression_core.ipynb b/site/en/guide/core/logistic_regression_core.ipynb new file mode 100644 index 00000000000..23a82f10264 --- /dev/null +++ b/site/en/guide/core/logistic_regression_core.ipynb @@ -0,0 +1,872 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "FhGuhbZ6M5tl" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AwOEIRJC6Une" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EIdT9iu_Z4Rb" + }, + "source": [ + "# Logistic regression for binary classification with Core APIs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bBIlTPscrIT9" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DauaqJ7WhIhO" + }, + "source": [ + "## Introduction\n", + "\n", + "This notebook uses the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) to build an end-to-end machine learning workflow from scratch. Visit the [Core APIs overview](https://www.tensorflow.org/guide/core) to learn more about TensorFlow Core and its intended use cases." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AHp3M9ZmrIxj" + }, + "source": [ + "[Logistic regression](https://developers.google.com/machine-learning/crash-course/logistic-regression/) is one of the most popular algorithms for binary classification. Given a set of examples with features, the goal of logistic regression is to output values between 0 and 1, which can be interpreted as the probabilities of each example belonging to a particular class. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nchsZfwEVtVs" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5lZoUK6AVTos" + }, + "outputs": [], + "source": [ + "# Use seaborn for pairplot.\n", + "!pip install -q seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1rRo8oNqZ-Rj" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "import seaborn as sns\n", + "import tempfile\n", + "import os\n", + "# Preset Matplotlib figure sizes.\n", + "matplotlib.rcParams['figure.figsize'] = [9, 6]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9xQKvCJ85kCQ" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "print(tf.__version__)\n", + "# Set random seed for reproducible results \n", + "tf.random.set_seed(22)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F_72b0LCNbjx" + }, + "source": [ + "## The Wisconsin Breast Cancer Dataset\n", + "\n", + "This tutorial uses the [Wisconsin Breast Cancer Dataset](https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(original)), and demonstrates how to build a logistic regression model that can classify tumors as benign or malignant given features such as a tumor's radius, texture, and concavity.\n", + "The dataset is available from the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gFh9ne3FZ-On" + }, + "source": [ + "### Get the data\n", + "First download and import the dataset using pandas." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CiX2FI4gZtTt" + }, + "outputs": [], + "source": [ + "url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data'\n", + "\n", + "features = ['radius', 'texture', 'perimieter','area', 'smoothness', 'compactness', \n", + " 'concavity', 'concave_poinits','symmetry', 'fractal_dimension'] \n", + "column_names = ['id', 'diagnosis']\n", + "\n", + "for attr in ['mean', 'ste', 'largest']:\n", + " for feature in features:\n", + " column_names.append(feature + \"_\" + attr)\n", + "\n", + "dataset = pd.read_csv(url, names=column_names)\n", + "dataset.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s4-Wn2jzVC1W" + }, + "source": [ + "### Split the data into training and test sets\n", + "\n", + "Now, split the dataset into a training set and a test set. Make sure to split the features from the target labels. The test set is used to evaluate your model's generalizability to unseen data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "m2O60B-IVG9Q" + }, + "outputs": [], + "source": [ + "train_dataset = dataset.sample(frac=0.75)\n", + "test_dataset = dataset.drop(train_dataset.index)\n", + "# The `id` column can be dropped since each row is unique\n", + "x_train, y_train = train_dataset.iloc[:, 2:], train_dataset.iloc[:, 1]\n", + "x_test, y_test = test_dataset.iloc[:, 2:], test_dataset.iloc[:, 1]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3MWuJTKEDM-f" + }, + "source": [ + "### Data preprocessing\n", + "\n", + "This dataset contains the mean, standard error, and largest values for each of the 10 tumor measurements collected per example. The `\"diagnosis\"` target column is categorical variable with `'M'` indicating a malignant tumor and `'B'` indicating a benign tumor diagnosis. This column needs to be converted into a numerical binary format for model training. The dataset should also be converted to a tensor with the `tf.convert_to_tensor` function after the preprocessing is complete." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JEJHhN65a2VV" + }, + "outputs": [], + "source": [ + "y_train, y_test = y_train.map({'B': 0, 'M': 1}), y_test.map({'B': 0, 'M': 1})\n", + "x_train, y_train = tf.convert_to_tensor(x_train, dtype=tf.float32), tf.convert_to_tensor(y_train, dtype=tf.float32)\n", + "x_test, y_test = tf.convert_to_tensor(x_test, dtype=tf.float32), tf.convert_to_tensor(y_test, dtype=tf.float32)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J4ubs136WLNp" + }, + "source": [ + "### Inspect the data\n", + "\n", + "Review the joint distribution a few pairs of mean-based features from the training set and observe how they relate to the target." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oRKO_x8gWKv-" + }, + "outputs": [], + "source": [ + "sns.pairplot(train_dataset.iloc[:, 1:6], hue = 'diagnosis', diag_kind='kde');" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5YOG5iKYKW_3" + }, + "source": [ + "This pairplot demonstrates that certain features such as radius, perimeter and area are highly correlated. This is expected since the tumor radius is directly involved in the computation of both perimeter and area. Additionally, note that malignant diagnoses seem to be more right-skewed for many of the features." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gavKO_6DWRMP" + }, + "source": [ + "Make sure to also check the overall statistics. Note how each feature covers a vastly different range of values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yi2FzC3T21jR" + }, + "outputs": [], + "source": [ + "train_dataset.describe().transpose()[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_8pDCIFjMla8" + }, + "source": [ + "### Standardize the data\n", + "\n", + "Given the inconsistent ranges, it is beneficial to standardize the data so that each feature has zero mean and unit variance. The target variable is binary; therefore, it does not require any scaling. Build a normalizer class to handle this standardization." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FrzKNFNjLQDl" + }, + "outputs": [], + "source": [ + "class Normalize(tf.Module):\n", + " def __init__(self, x):\n", + " # Initialize the mean and standard deviation for normalization\n", + " self.mean = tf.Variable(tf.math.reduce_mean(x, axis=0))\n", + " self.std = tf.Variable(tf.math.reduce_std(x, axis=0))\n", + "\n", + " def norm(self, x):\n", + " # Normalize the input\n", + " return (x - self.mean)/self.std\n", + "\n", + " def unnorm(self, x):\n", + " # Unnormalize the input\n", + " return (x * self.std) + self.mean\n", + "\n", + "norm_x = Normalize(x_train)\n", + "x_train_norm, x_test_norm = norm_x.norm(x_train), norm_x.norm(x_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6o3CrycBXA2s" + }, + "source": [ + "## Logistic regression\n", + "\n", + "Before building a logistic regression model, it is crucial to understand the method's differences compared to traditional linear regression." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lFby9n0tnHkw" + }, + "source": [ + "### Logistic regression fundamentals review\n", + "\n", + "Linear regression takes a linear combination of its inputs and outputs continuous values in the range, $(-∞, ∞)$. Recall that logistic regression is intended to output probabilities in the range, $(0, 1)$, for a binary classification problem. \n", + "\n", + "Logistic regression maps the continuous outputs of traditional linear regression, $(-∞, ∞)$, to probabilies, $(0, 1)$. This transformation is also symmetric so that flipping the sign of the output results in the inverse of the original probability. \n", + "Let $Y$ denote the probability of being in class 1 (malignant diagnosis). The desired mapping can be achieved by interpreting the linear regression output as the log odds ratio of being in class 1 as opposed to class 0: \n", + "\n", + "$$\\ln(\\frac{Y}{1-Y}) = wX + b$$\n", + "\n", + "By setting $wX + b = z$, this equation can then be solved for $Y$:\n", + "\n", + "$$Y = \\frac{e^{z}}{1 + e^{z}} = \\frac{1}{1 + e^{-z}}$$ \n", + "\n", + "The expression, $\\frac{1}{1 + e^{-z}}$, is known as the sigmoid function, $\\sigma(z)$. Hence, the equation for logistic regression can be written as $Y = \\sigma(wX + b)$.\n", + "\n", + "This dataset deals with a high-dimensional feature matrix; therefore, the above equation must be rewritten in matrix vector form:\n", + "\n", + "$${\\mathrm{Y}} = \\sigma({\\mathrm{X}}w + b)$$\n", + "\n", + "where\n", + "\n", + "* $\\underset{m\\times 1}{\\mathrm{Y}}$: target vector\n", + "* $\\underset{m\\times n}{\\mathrm{X}}$: feature matrix\n", + "* $\\underset{n\\times 1}w$: weight vector\n", + "* $b$: bias\n", + "* $\\sigma$: sigmoid function applied to each element of the output vector\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "n3N3pMC0uesW" + }, + "source": [ + "Start by visualizing the sigmoid function. It transforms the outputs of traditional linear regression, $(-∞, ∞)$, to fall between 0 and 1. The sigmoid function is available in `tf.math.sigmoid`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ThHaV_RmucZl" + }, + "outputs": [], + "source": [ + "x = tf.linspace(-10, 10, 500)\n", + "x = tf.cast(x, tf.float32)\n", + "f = lambda x : (1/20)*x + 0.6\n", + "plt.plot(x, tf.math.sigmoid(x))\n", + "plt.ylim((-0.1,1.1))\n", + "plt.title(\"Sigmoid Function\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VMXEhrZuKECV" + }, + "source": [ + "### The log loss function\n", + "\n", + "The log loss or binary cross-entropy loss is the ideal loss function for a binary classification problem with logistic regression. For each example, the log loss quantifies the similarity between a predicted probability and the example's true value. It is determined by the following equation:\n", + "\n", + "$$L = -\\frac{1}{m}\\sum_{i=1}^{m}y_i\\cdot\\log(\\hat{y}_i) + (1- y_i)\\cdot\\log(1 - \\hat{y}_i)$$\n", + "\n", + "where\n", + "\n", + "* $\\hat{y}$: vector of predicted probabilities\n", + "* $y$: vector of true targets\n", + "\n", + "The `tf.nn.sigmoid_cross_entropy_with_logits` function can be used to compute the log loss. This function automatically applies the sigmoid activation to the regression output." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JVBInnSqS36W" + }, + "outputs": [], + "source": [ + "def log_loss(y_pred, y):\n", + " # Compute the log loss\n", + " ce = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_pred)\n", + " return tf.reduce_mean(ce)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q_mutLj0KNUb" + }, + "source": [ + "###The gradient descent update rule\n", + "\n", + "The TensorFlow Core APIs support automatic differentiation with `tf.GradientTape`. If you are curious about the math behind the logistic regression gradient updates, here is the derivation:\n", + "\n", + "In the above equation for the log loss, recall that each $\\hat{y}_i$ can be rewritten in terms of the inputs as $\\sigma({\\mathrm{X_i}}w + b)$. \n", + "\n", + "The goal is to find a $w^*$ and $b^*$ that minimize the log loss: \n", + "\n", + "$$L = -\\frac{1}{m}\\sum_{i=1}^{m}y_i\\cdot\\log(\\sigma({\\mathrm{X_i}}w + b)) + (1- y_i)\\cdot\\log(1 - \\sigma({\\mathrm{X_i}}w + b))$$ \n", + "\n", + "By taking the gradient $L$ with respect to $w$, you get the following: \n", + "\n", + "$$\\frac{\\partial L}{\\partial w} = \\frac{1}{m}(\\sigma({\\mathrm{X}}w + b) - y)X$$\n", + "\n", + "By taking the gradient $L$ with respect to $b$, you get the following: \n", + "\n", + "$$\\frac{\\partial L}{\\partial b} = \\frac{1}{m}\\sum_{i=1}^{m}\\sigma({\\mathrm{X_i}}w + b) - y_i$$" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uTCndUecKZho" + }, + "source": [ + "Now, build the logistic regression model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "c0sXM7qLlKfZ" + }, + "outputs": [], + "source": [ + "class LogisticRegression(tf.Module):\n", + "\n", + " def __init__(self):\n", + " self.built = False\n", + " \n", + " def __call__(self, x, train=True):\n", + " # Initialize the model parameters on the first call\n", + " if not self.built:\n", + " # Randomly generate the weight vector and bias term\n", + " rand_w = tf.random.uniform(shape=[x.shape[-1], 1], seed=22)\n", + " rand_b = tf.random.uniform(shape=[], seed=22)\n", + " self.w = tf.Variable(rand_w)\n", + " self.b = tf.Variable(rand_b)\n", + " self.built = True\n", + " # Compute model output\n", + " z = tf.add(tf.matmul(x, self.w), self.b)\n", + " z = tf.squeeze(z, axis=1)\n", + " if train:\n", + " return z\n", + " return tf.sigmoid(z)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eObQu9fDnXGL" + }, + "source": [ + "As a sanity check, make sure the untrained model outputs values in the range of (0,1) for a small subset of the training data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5bIovC0Z4QHJ" + }, + "outputs": [], + "source": [ + "log_reg = LogisticRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QJ2ievISyf0p" + }, + "outputs": [], + "source": [ + "y_pred = log_reg(x_train_norm[:5], train=False)\n", + "y_pred.numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PribnwDHUksC" + }, + "source": [ + "Next, write an accuracy function to calculate the proportion of correct classifications during training. In order to retrieve the classifications from the predicted probabilities, set a threshold for which all probabilities higher than the threshold belong to class 1. This is a configurable hyperparameter that can be set to 0.5 as a default." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ssnVcKg7oMe6" + }, + "outputs": [], + "source": [ + "def predict_class(y_pred, thresh=0.5):\n", + " # Return a tensor with : 1. if y_pred_i > 0.5 and 0. ow/\n", + " return tf.cast(y_pred > thresh, tf.float32)\n", + "\n", + "def accuracy(y_pred, y):\n", + " # Return the proportion of matches between y_pred and y\n", + " y_pred = tf.math.sigmoid(y_pred)\n", + " y_pred_class = predict_class(y_pred)\n", + " check_equal = tf.cast(y_pred_class == y,tf.float32)\n", + " acc_val = tf.reduce_mean(check_equal)\n", + " return acc_val" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J_0KHQ25_2dF" + }, + "source": [ + "### Model training\n", + "\n", + "Now write a training loop for the logistic regression model. The loop utilizes the log loss function and its gradients with respect to the input in order to iteratively update the model's parameters. Using mini-batches for training provides both memory efficiency and faster convergence. The `tf.data.Dataset` API has useful functions for batching and shuffling.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vJD7-4U0etqa" + }, + "outputs": [], + "source": [ + "batch_size = 64\n", + "dataset = tf.data.Dataset.from_tensor_slices((x_train_norm, y_train))\n", + "dataset = dataset.shuffle(buffer_size=x_train.shape[0]).batch(batch_size)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jNC3D1DGsGgK" + }, + "outputs": [], + "source": [ + "# Set training parameters\n", + "epochs = 200\n", + "learning_rate = 0.01\n", + "train_losses, test_losses = [], []\n", + "train_accs, test_accs = [], []\n", + "\n", + "# Format training loop\n", + "for epoch in range(epochs):\n", + " batch_losses, batch_accs = [], []\n", + " for x_batch, y_batch in dataset:\n", + " with tf.GradientTape() as tape:\n", + " y_pred_batch = log_reg(x_batch)\n", + " batch_loss = log_loss(y_pred_batch, y_batch)\n", + " batch_acc = accuracy(y_pred_batch, y_batch)\n", + " # Update parameters with respect to the gradient calculations\n", + " grads = tape.gradient(batch_loss, log_reg.variables)\n", + " for g,v in zip(grads, log_reg.variables):\n", + " v.assign_sub(learning_rate * g)\n", + " # Keep track of batch-level model performance \n", + " batch_losses.append(batch_loss)\n", + " batch_accs.append(batch_acc)\n", + " # Keep track of epoch-level model performance\n", + " train_loss, train_acc = tf.reduce_mean(batch_losses), tf.reduce_mean(batch_accs)\n", + " y_pred_test = log_reg(x_test_norm)\n", + " test_loss, test_acc = log_loss(y_pred_test, y_test), accuracy(y_pred_test, y_test)\n", + " train_losses.append(train_loss)\n", + " train_accs.append(train_acc)\n", + " test_losses.append(test_loss)\n", + " test_accs.append(test_acc)\n", + " if epoch % 20 == 0:\n", + " print(f\"Epoch: {epoch}, Training log loss: {train_loss:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NoLiAg7fYft7" + }, + "source": [ + "### Performance evaluation\n", + "\n", + "Observe the changes in your model's loss and accuracy over time. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mv3oCQPvWhr0" + }, + "outputs": [], + "source": [ + "plt.plot(range(epochs), train_losses, label = \"Training loss\")\n", + "plt.plot(range(epochs), test_losses, label = \"Testing loss\")\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Log loss\")\n", + "plt.legend()\n", + "plt.title(\"Log loss vs training iterations\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "D2HDVGLPODIE" + }, + "outputs": [], + "source": [ + "plt.plot(range(epochs), train_accs, label = \"Training accuracy\")\n", + "plt.plot(range(epochs), test_accs, label = \"Testing accuracy\")\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Accuracy (%)\")\n", + "plt.legend()\n", + "plt.title(\"Accuracy vs training iterations\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jonKhUzuPyfa" + }, + "outputs": [], + "source": [ + "print(f\"Final training log loss: {train_losses[-1]:.3f}\")\n", + "print(f\"Final Testing Log Loss: {test_losses[-1]:.3f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d3DF4qyrPyke" + }, + "outputs": [], + "source": [ + "print(f\"Final training accuracy: {train_accs[-1]:.3f}\")\n", + "print(f\"Final testing accuracy: {test_accs[-1]:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yrj1TbOJasjA" + }, + "source": [ + "The model does a great job of classifying tumors in the training dataset and also generalizes well to unseen data. To go one step further, you can explore error rates that give more insight beyond the overall accuracy score. The two most popular error rates for binary classification problems are the false positive rate (FPR) and the false negative rate (FNR). \n", + "\n", + "For this problem, the FPR is the proportion of malignant tumor predictions amongst tumors that are actually benign. Conversely, the FNR is the proportion of benign tumor predictions among tumors that are actually malignant. These metrics can be visualized with a confusion matrix. Scikit-learn has some useful tools for visualizing confusion matrices." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OJO7YkA8ZDMU" + }, + "outputs": [], + "source": [ + "import sklearn.metrics as sk_metrics\n", + "\n", + "def show_confusion_matrix(y, y_classes, typ):\n", + " # Compute confusion matrix and normalize\n", + " plt.figure(figsize=(10,10))\n", + " confusion = sk_metrics.confusion_matrix(y.numpy(), y_classes.numpy())\n", + " confusion_normalized = confusion / confusion.sum(axis=1)\n", + " axis_labels = range(2)\n", + " ax = sns.heatmap(\n", + " confusion_normalized, xticklabels=axis_labels, yticklabels=axis_labels,\n", + " cmap='Blues', annot=True, fmt='.4f', square=True)\n", + " plt.title(f\"Confusion matrix: {typ}\")\n", + " plt.ylabel(\"True label\")\n", + " plt.xlabel(\"Predicted label\")\n", + "\n", + "y_pred_train, y_pred_test = log_reg(x_train_norm, train=False), log_reg(x_test_norm, train=False)\n", + "train_classes, test_classes = predict_class(y_pred_train), predict_class(y_pred_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OQ5DFcleiDFm" + }, + "outputs": [], + "source": [ + "show_confusion_matrix(y_train, train_classes, 'train')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gtfcsAp_iCNR" + }, + "outputs": [], + "source": [ + "show_confusion_matrix(y_test, test_classes, 'test')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DlivxaDmTnGq" + }, + "source": [ + "Observe the error rate measurements and interpret their significance in the context of this example. In many medical testing studies such as cancer detection, having a high false positive rate to ensure a low false negative rate is perfectly acceptable and in fact encouraged since the risk of missing a malignant tumor diagnosis (false negative) is a lot worse than misclassifying a benign tumor as malignant (false positive).\n", + "\n", + "In order to control for the FPR and FNR, try changing the threshold hyperparameter before classifying the probability predictions. A lower threshold increases the model's overall chances of making a malignant tumor classification. This inevitably increases the number of false positives and the FPR but it also helps to decrease the number of false negatives and the FNR." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7ADEN2rb4Nhj" + }, + "source": [ + "## Saving your model\n", + "\n", + "Start by making an export module that takes in raw data and performs the following operations:\n", + "- Normalization \n", + "- Probability prediction\n", + "- Class prediction\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6KPRHCzg4ZxH" + }, + "outputs": [], + "source": [ + "class ExportModule(tf.Module):\n", + " def __init__(self, model, norm_x, class_pred):\n", + " # Initialize pre and postprocessing functions\n", + " self.model = model\n", + " self.norm_x = norm_x\n", + " self.class_pred = class_pred\n", + "\n", + " @tf.function(input_signature=[tf.TensorSpec(shape=[None, None], dtype=tf.float32)]) \n", + " def __call__(self, x):\n", + " # Run the ExportModule for new data points\n", + " x = self.norm_x.norm(x)\n", + " y = self.model(x, train=False)\n", + " y = self.class_pred(y)\n", + " return y " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2YzRclo5-yjO" + }, + "outputs": [], + "source": [ + "log_reg_export = ExportModule(model=log_reg,\n", + " norm_x=norm_x,\n", + " class_pred=predict_class)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gtofGIBN_qFd" + }, + "source": [ + "If you want to save the model at its current state, you can do so with the `tf.saved_model.save` function. To load a saved model and make predictions, use the `tf.saved_model.load` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a4Qum1Ts_pmF" + }, + "outputs": [], + "source": [ + "models = tempfile.mkdtemp()\n", + "save_path = os.path.join(models, 'log_reg_export')\n", + "tf.saved_model.save(log_reg_export, save_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3KPILr1i_M_c" + }, + "outputs": [], + "source": [ + "log_reg_loaded = tf.saved_model.load(save_path)\n", + "test_preds = log_reg_loaded(x_test)\n", + "test_preds[:10].numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vgGQuV-yqYZH" + }, + "source": [ + "## Conclusion\n", + "\n", + "This notebook introduced a few techniques to handle a logistic regression problem. Here are a few more tips that may help:\n", + "\n", + "- The [TensorFlow Core APIs](https://www.tensorflow.org/guide/core) can be used to build machine learning workflows with high levels of configurability\n", + "- Analyzing error rates is a great way to gain more insight about a classification model's performance beyond its overall accuracy score. For more information on classification error rates, visit the following [crash course](https://developers.google.com/machine-learning/crash-course/classification/true-false-positive-negative).\n", + "- Overfitting is another common problem for logistic regression models, though it wasn't a problem for this tutorial. Visit the [Overfit and underfit](overfit_and_underfit.ipynb) tutorial for more help with this.\n", + "\n", + "For more examples of using the TensorFlow Core APIs, check out the [guide](https://www.tensorflow.org/guide/core). If you want learn more about loading and preparing data, see the tutorials on [image data loading](https://www.tensorflow.org/tutorials/load_data/images) or [CSV data loading](https://www.tensorflow.org/tutorials/load_data/csv)." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "FhGuhbZ6M5tl" + ], + "name": "logistic_regression_core.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 859ccf4e853edbf09eb78b3044fb4e5ac9846824 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 16 Aug 2022 14:21:09 -0700 Subject: [PATCH 027/636] added core multilayer perceptrons tutorial PiperOrigin-RevId: 468024089 --- site/en/guide/_toc.yaml | 2 + site/en/guide/core/mlp_core.ipynb | 1002 +++++++++++++++++++++++++++++ 2 files changed, 1004 insertions(+) create mode 100644 site/en/guide/core/mlp_core.ipynb diff --git a/site/en/guide/_toc.yaml b/site/en/guide/_toc.yaml index bb583ac1338..fc22705f95b 100644 --- a/site/en/guide/_toc.yaml +++ b/site/en/guide/_toc.yaml @@ -29,6 +29,8 @@ toc: path: /guide/core/quickstart_core - title: "Logistic regression" path: /guide/core/logistic_regression_core +- title: "Multilayer perceptrons" + path: /guide/core/mlp_core - heading: "TensorFlow in depth" - title: "Tensor slicing" diff --git a/site/en/guide/core/mlp_core.ipynb b/site/en/guide/core/mlp_core.ipynb new file mode 100644 index 00000000000..dcd2997a723 --- /dev/null +++ b/site/en/guide/core/mlp_core.ipynb @@ -0,0 +1,1002 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "FhGuhbZ6M5tl" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AwOEIRJC6Une" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EIdT9iu_Z4Rb" + }, + "source": [ + "# Multilayer perceptrons for digit recognition with Core APIs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bBIlTPscrIT9" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SjAxxRpBzVYg" + }, + "source": [ + "## Introduction\n", + "\n", + "This notebook uses the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) to build an end-to-end machine learning workflow from scratch. Visit the [Core APIs overview](https://www.tensorflow.org/guide/core) to learn more about TensorFlow Core and its intended use cases." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AHp3M9ZmrIxj" + }, + "source": [ + "The Multilayer Perceptron (MLP) is a special type of feedforward neural network used to approach [multiclass classification](https://developers.google.com/machine-learning/crash-course/multi-class-neural-networks/video-lecture) problems. \n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GHVMVIFHSzl1" + }, + "source": [ + "## Multilayer perceptron (MLP) overview\n", + "\n", + "Before building an MLP, it is crucial to understand the concepts of perceptrons, layers, and activation functions.\n", + "\n", + "Multilayer Perceptrons are made up of functional units called perceptrons. The equation of a perceptron is as follows:\n", + "\n", + "$$Z = \\vec{w}⋅\\mathrm{X} + b$$\n", + "\n", + "where\n", + "\n", + "* $Z$: perceptron output\n", + "* $\\mathrm{X}$: feature matrix\n", + "* $\\vec{w}$: weight vector\n", + "* $b$: bias\n", + "\n", + "When these perceptrons are stacked, they form structures called dense layers which can then be connected to build a neural network. A dense layer's equation is similar to that of a perceptron's but uses a weight matrix and a bias vector instead: \n", + "\n", + "$$Y = \\mathrm{W}⋅\\mathrm{X} + \\vec{b}$$\n", + "\n", + "where\n", + "\n", + "* $Z$: dense layer output\n", + "* $\\mathrm{X}$: feature matrix\n", + "* $\\mathrm{W}$: weight matrix\n", + "* $\\vec{b}$: bias vector\n", + "\n", + "\n", + "In an MLP, multiple dense layers are connected in such a way that the outputs of one layer are fully connected to the inputs of the next layer. Adding non-linear activation functions to the outputs of dense layers can help the MLP classifier learn complex decision boundaries and generalize well to unseen data." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nchsZfwEVtVs" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mSfgqmwBagw_" + }, + "outputs": [], + "source": [ + "# Use seaborn for countplot.\n", + "!pip install -q seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1rRo8oNqZ-Rj" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "import seaborn as sns\n", + "import tempfile\n", + "import os\n", + "# Preset Matplotlib figure sizes.\n", + "matplotlib.rcParams['figure.figsize'] = [9, 6]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9xQKvCJ85kCQ" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_datasets as tfds\n", + "print(tf.__version__)\n", + "# Set random seed for reproducible results \n", + "tf.random.set_seed(22)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F_72b0LCNbjx" + }, + "source": [ + "## The MNIST Dataset\n", + "\n", + "This tutorial uses the [MNIST Dataset](http://yann.lecun.com/exdb/mnist), and demonstrates how to build an MLP model that can classify handwritten digits. The dataset is available from [TensorFlow Datasets](https://www.tensorflow.org/datasets/catalog/mnist).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gFh9ne3FZ-On" + }, + "source": [ + "### Get the data\n", + "\n", + "Split the MNIST dataset into training, validation, and testing sets. The validation set can be used to gauge the model's generalizability during training so that the test set can serve as a final unbiased estimator for the model's performance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Uiuh0B098_3p" + }, + "outputs": [], + "source": [ + "train_data, val_data, test_data = tfds.load(\"mnist\", \n", + " split=['train[10000:]', 'train[0:10000]', 'test'],\n", + " as_supervised=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U3kn85nzHhQN" + }, + "source": [ + "### Visualizing the data\n", + "\n", + "The MNIST dataset consists of handwritten digits and their corresponding true labels. Visualize a couple of examples below. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7b3uzLGIHlNc" + }, + "outputs": [], + "source": [ + "train_viz = train_data.map(lambda x, y: (tf.reshape(x, shape=[28, 28]), y))\n", + "for i, (x, y) in enumerate(train_viz):\n", + " plt.subplot(3,3,1+i)\n", + " plt.axis('off')\n", + " plt.imshow(x, cmap='gray')\n", + " plt.title(f\"True Label: {y}\")\n", + " plt.subplots_adjust(hspace=.5)\n", + " if i == 8:\n", + " break\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9ljx31IWJCwx" + }, + "source": [ + "Also review the distribution of digits in the training data to verify that each class is well represented in the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YLnpnaLIJDh7" + }, + "outputs": [], + "source": [ + "x_train, y_train = [data for data in train_data.batch(len(train_data))][0]\n", + "sns.countplot(y_train.numpy());\n", + "plt.xlabel('Digits')\n", + "plt.title(\"MNIST Digit Distribution\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Z-JYDIjx-gg5" + }, + "source": [ + "### Pre-Processing the data\n", + "\n", + "First, reshape the feature matrices to be 2-dimensional by flattening the images. Next, rescale the data so that the pixel values of [0,255] fit into the range of [0,1]. This step ensures that the input pixels have similar distributions and helps with training convergence." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2PBHcHtR-_Jn" + }, + "outputs": [], + "source": [ + "def preprocess(x, y):\n", + " # Reshaping the data\n", + " x = tf.reshape(x, shape=[-1, 784])\n", + " # Remove dimension of size 1\n", + " x = tf.squeeze(x, axis=0)\n", + " # Rescaling the data\n", + " x = x/255\n", + " return x, y\n", + "\n", + "train_data, val_data = train_data.map(preprocess), val_data.map(preprocess)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6o3CrycBXA2s" + }, + "source": [ + "## Build the MLP \n", + "\n", + "Start by visualizing the [ReLU](https://developers.google.com/machine-learning/glossary#ReLU) and [Softmax](https://developers.google.com/machine-learning/glossary#softmax) activation functions. Both functions are available in `tf.nn.relu` and `tf.nn.softmax` respectively. The ReLU is a non-linear activation function that outputs the input if it is positive and 0 otherwise: \n", + "\n", + "$$\\text{ReLU}(X) = max(0, X)$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hYunzt3UyT9G" + }, + "outputs": [], + "source": [ + "x = tf.linspace(-2, 2, 201)\n", + "x = tf.cast(x, tf.float32)\n", + "plt.plot(x, tf.nn.relu(x));\n", + "plt.xlabel('x')\n", + "plt.ylabel('ReLU(x)')\n", + "plt.title('ReLU activation function');" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fuGrM9jMwsRM" + }, + "source": [ + "The softmax activation function is a normalized exponential function that converts $m$ real numbers into a probability distribution with $m$ outcomes/classes. This is useful for predicting class probabilities from a neural network's output:\n", + "\n", + "$$\\text{Softmax}(X) = \\frac{e^{X}}{\\sum_{i=1}^{m}e^{X_i}}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fVM8pvhWwuwI" + }, + "outputs": [], + "source": [ + "x = tf.linspace(-4, 4, 201)\n", + "x = tf.cast(x, tf.float32)\n", + "plt.plot(x, tf.nn.softmax(x, axis=0));\n", + "plt.xlabel('x')\n", + "plt.ylabel('Softmax(x)')\n", + "plt.title('Softmax activation function');" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OHW6Yvg2yS6H" + }, + "source": [ + "### The dense layer\n", + "\n", + "Create a class for the dense layer. By definition, the outputs of one layer are fully connected to the inputs of the next layer in an MLP. Therefore, the input dimension for a dense layer can be inferred based on the output dimension of its previous layer and does not need to be specified upfront during its initialization. The weights should also be initialized properly to prevent activation outputs from becoming too large or small. One of the most popular weight initialization methods is the Xavier scheme, where each element of the weight matrix is sampled in the following manner:\n", + "\n", + "$$W_{ij} \\sim \\text{Uniform}(-\\frac{\\sqrt{6}}{\\sqrt{n + m}},\\frac{\\sqrt{6}}{\\sqrt{n + m}})$$\n", + "\n", + "The bias vector can be initialized to zeros." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "re1SSFyBdMrS" + }, + "outputs": [], + "source": [ + "def xavier_init(shape):\n", + " # Computes the xavier initialization values for a weight matrix\n", + " in_dim, out_dim = shape\n", + " xavier_lim = tf.sqrt(6.)/tf.sqrt(tf.cast(in_dim + out_dim, tf.float32))\n", + " weight_vals = tf.random.uniform(shape=(in_dim, out_dim), \n", + " minval=-xavier_lim, maxval=xavier_lim, seed=22)\n", + " return weight_vals" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "otDFX4u6e6ml" + }, + "source": [ + "The Xavier initialization method can also be implemented with `tf.keras.initializers.GlorotUniform`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IM0yJos25FG5" + }, + "outputs": [], + "source": [ + "class DenseLayer(tf.Module):\n", + "\n", + " def __init__(self, out_dim, weight_init=xavier_init, activation=tf.identity):\n", + " # Initialize the dimensions and activation functions\n", + " self.out_dim = out_dim\n", + " self.weight_init = weight_init\n", + " self.activation = activation\n", + " self.built = False\n", + "\n", + " def __call__(self, x):\n", + " if not self.built:\n", + " # Infer the input dimension based on first call\n", + " self.in_dim = x.shape[1]\n", + " # Initialize the weights and biases using Xavier scheme\n", + " self.w = tf.Variable(xavier_init(shape=(self.in_dim, self.out_dim)))\n", + " self.b = tf.Variable(tf.zeros(shape=(self.out_dim,)))\n", + " self.built = True\n", + " # Compute the forward pass\n", + " z = tf.add(tf.matmul(x, self.w), self.b)\n", + " return self.activation(z)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X-7MzpjgyHg6" + }, + "source": [ + "Next, build a class for the MLP model that executes layers sequentially.\n", + "Remember that the model variables are only available after the first sequence of dense layer calls due to dimension inference." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6XisRWiCyHAb" + }, + "outputs": [], + "source": [ + "class MLP(tf.Module):\n", + "\n", + " def __init__(self, layers):\n", + " self.layers = layers\n", + " \n", + " @tf.function\n", + " def __call__(self, x, preds=False): \n", + " # Execute the model's layers sequentially\n", + " for layer in self.layers:\n", + " x = layer(x)\n", + " return x" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "luXKup-43nd7" + }, + "source": [ + "### The MLP architecture\n", + "\n", + "Build an MLP model with the following architecture:\n", + "\n", + "Input Size = Image Size = 784
\n", + "Hidden Layer 1 Size = 700
\n", + "Hidden Layer 2 Size = 500
\n", + "Output Size = Number of Classes = 10\n", + "\n", + "Forward Pass: ReLU(784 x 700) x ReLU(700 x 500) x Softmax(500 x 10)\n", + "\n", + "The softmax activation function does not need to be applied by the MLP. It is computed separately in the loss and prediction functions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VmlACuki3oPi" + }, + "outputs": [], + "source": [ + "mlp_model = MLP([\n", + " DenseLayer(out_dim=700, activation=tf.nn.relu),\n", + " DenseLayer(out_dim=500, activation=tf.nn.relu),\n", + " DenseLayer(out_dim=10)])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tyBATDoRmDkg" + }, + "source": [ + "### Defining a loss function\n", + "\n", + "The cross-entropy loss function is a great choice for multiclass classification problems since it measures the negative-log-likelihood of the data according to the model's probability predictions. The higher the probability assigned to the true class, the lower the loss. The equation for the cross-entropy loss is as follows:\n", + "\n", + "$$L = -\\frac{1}{n}\\sum_{i=1}^{n}\\sum_{i=j}^{n} {y_j}^{[i]}⋅\\log(\\hat{{y_j}}^{[i]})$$\n", + "\n", + "where\n", + "\n", + "* $\\underset{n\\times m}{\\hat{y}}$: predicted class distributions\n", + "* $\\underset{n\\times m}{y}$: one hot encoded matrix of true classes\n", + "\n", + "The `tf.nn.sparse_softmax_cross_entropy_with_logits` function can be used to compute the cross-entropy loss. This function does not require the model's last layer to apply the softmax activation function nor does it require the class labels to be one hot encoded" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rskOYA7FVCwg" + }, + "outputs": [], + "source": [ + "def cross_entropy_loss(y_pred, y):\n", + " # Compute cross entropy loss with a sparse operation\n", + " sparse_ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=y_pred)\n", + " return tf.reduce_mean(sparse_ce)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BvWxED1km8jh" + }, + "source": [ + "Write a basic accuracy function that calculates the proportion of correct classifications during training. In order to generate class predictions from softmax outputs, return the index that corresponds to the largest class probability. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jPJMWx2UgiBm" + }, + "outputs": [], + "source": [ + "def accuracy(y_pred, y):\n", + " # Compute accuracy after extracting class predictions\n", + " class_preds = tf.argmax(tf.nn.softmax(y_pred), axis=1)\n", + " is_equal = tf.equal(y, class_preds)\n", + " return tf.reduce_mean(tf.cast(is_equal, tf.float32))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JSiNRhTOnKZr" + }, + "source": [ + "### Model training\n", + "\n", + "Using an optimizer can result in significantly faster convergence compared to standard gradient descent. The Adam optimizer is implemented below. Visit the [Optimizers](https://www.tensorflow.org/guide/core/optimizers_core) guide to learn more about designing custom optimizers with TensorFlow Core." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iGIBDk3cAv6a" + }, + "outputs": [], + "source": [ + "class Adam:\n", + "\n", + " def __init__(self, learning_rate=1e-3, beta_1=0.9, beta_2=0.999, ep=1e-7):\n", + " # Initialize optimizer parameters and variable slots\n", + " self.beta_1 = beta_1\n", + " self.beta_2 = beta_2\n", + " self.learning_rate = learning_rate\n", + " self.ep = ep\n", + " self.t = 1.\n", + " self.v_dvar, self.s_dvar = [], []\n", + " self.built = False\n", + "\n", + " def reset(self):\n", + " # Reset variables after each epoch\n", + " for v in self.v_dvar:\n", + " v.assign(tf.zeros(shape=v.shape))\n", + " for s in self.s_dvar:\n", + " s.assign(tf.zeros(shape=s.shape))\n", + " self.t = 1.\n", + " return\n", + " \n", + " def apply_gradients(self, grads, vars):\n", + " # Initialize variables on the first call\n", + " if not self.built:\n", + " for var in vars:\n", + " v = tf.Variable(tf.zeros(shape=var.shape))\n", + " s = tf.Variable(tf.zeros(shape=var.shape))\n", + " self.v_dvar.append(v)\n", + " self.s_dvar.append(s)\n", + " self.built = True\n", + " # Update the model variables given their gradients\n", + " for i, (d_var, var) in enumerate(zip(grads, vars)):\n", + " self.v_dvar[i].assign(self.beta_1*self.v_dvar[i] + (1-self.beta_1)*d_var)\n", + " self.s_dvar[i].assign(self.beta_2*self.s_dvar[i] + (1-self.beta_2)*tf.square(d_var))\n", + " v_dvar_bc = self.v_dvar[i]/(1-(self.beta_1**self.t))\n", + " s_dvar_bc = self.s_dvar[i]/(1-(self.beta_2**self.t))\n", + " var.assign_sub(self.learning_rate*(v_dvar_bc/(tf.sqrt(s_dvar_bc) + self.ep)))\n", + " self.t += 1.\n", + " return " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "osEK3rqpYfKd" + }, + "source": [ + "Now, write a custom training loop that updates the MLP parameters with mini-batch gradient descent. Using mini-batches for training provides both memory efficiency and faster convergence." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CJLeY2ao1aw6" + }, + "outputs": [], + "source": [ + "def train_step(x_batch, y_batch, loss, acc, model, optimizer):\n", + " # Update the model state given a batch of data\n", + " with tf.GradientTape() as tape:\n", + " y_pred = model(x_batch)\n", + " batch_loss = loss(y_pred, y_batch)\n", + " batch_acc = acc(y_pred, y_batch)\n", + " grads = tape.gradient(batch_loss, model.variables)\n", + " optimizer.apply_gradients(grads, model.variables)\n", + " return batch_loss, batch_acc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oC85kuZgmh3q" + }, + "outputs": [], + "source": [ + "def train_model(mlp, train_data, val_data, loss, acc, optimizer, epochs):\n", + " # Initialize data structures\n", + " train_losses, val_losses = [], []\n", + " train_accs, val_accs = [], []\n", + " x_val, y_val = [data for data in val_data][0]\n", + "\n", + " # Format training loop\n", + " for epoch in range(epochs):\n", + " optimizer.reset()\n", + " batch_losses, batch_accs = [], []\n", + " for x_batch, y_batch in train_data:\n", + " # Compute gradients and update the model's parameters\n", + " batch_loss, batch_acc = train_step(x_batch, y_batch, loss, acc, mlp, optimizer)\n", + " # Keep track of batch-level model performance\n", + " batch_losses.append(batch_loss)\n", + " batch_accs.append(batch_acc)\n", + " # Keep track of epoch-level model performance\n", + " train_loss, train_acc = tf.reduce_mean(batch_losses), tf.reduce_mean(batch_accs)\n", + " val_pred = mlp(x_val)\n", + " val_loss, val_acc = loss(val_pred, y_val), acc(val_pred, y_val)\n", + " train_losses.append(train_loss)\n", + " train_accs.append(train_acc)\n", + " val_losses.append(val_loss)\n", + " val_accs.append(val_acc)\n", + " print(f\"Epoch: {epoch}\")\n", + " print(f\"Training loss: {train_loss:.3f}, Training accuracy: {train_acc:.3f}\")\n", + " print(f\"Validation loss: {val_loss:.3f}, Validation accuracy: {val_acc:.3f}\")\n", + " return train_losses, train_accs, val_losses, val_accs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FvbfXlN5lwwB" + }, + "source": [ + "Train the MLP model for 10 epochs with batch size of 128. Hardware accelerators like GPUs or TPUs can also help speed up training time. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ijx0jkVmO47S" + }, + "outputs": [], + "source": [ + "batch_size = 128\n", + "train_data = train_data.shuffle(len(train_data)).batch(batch_size)\n", + "val_data = val_data.batch(len(val_data))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zPlT8QfxptYl" + }, + "outputs": [], + "source": [ + "train_losses, train_accs, val_losses, val_accs = train_model(mlp_model, train_data, val_data, \n", + " loss=cross_entropy_loss, acc=accuracy,\n", + " optimizer=Adam(), epochs=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j_RVmt43G12R" + }, + "source": [ + "### Performance evaluation\n", + "\n", + "Start by writing a plotting function to visualize the model's loss and accuracy during training. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VXTCYVtNDjAM" + }, + "outputs": [], + "source": [ + "def plot_metrics(train_metric, val_metric, metric_type):\n", + " # Visualize metrics vs training Epochs\n", + " plt.figure()\n", + " plt.plot(range(len(train_metric)), train_metric, label = f\"Training {metric_type}\")\n", + " plt.plot(range(len(val_metric)), val_metric, label = f\"Validation {metric_type}\")\n", + " plt.xlabel(\"Epochs\")\n", + " plt.ylabel(metric_type)\n", + " plt.legend()\n", + " plt.title(f\"{metric_type} vs Training epochs\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DC-qIvZbHo0G" + }, + "outputs": [], + "source": [ + "plot_metrics(train_losses, val_losses, \"cross entropy loss\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "P-w2xk2PIDve" + }, + "outputs": [], + "source": [ + "plot_metrics(train_accs, val_accs, \"accuracy\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tbrJJaFrD_XR" + }, + "source": [ + "## Saving your model\n", + "\n", + "Start by making an export module that takes in raw data and performs the following operations:\n", + "- Data preprocessing \n", + "- Probability prediction\n", + "- Class prediction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1sszfWuJJZoo" + }, + "outputs": [], + "source": [ + "class ExportModule(tf.Module):\n", + " def __init__(self, model, preprocess, class_pred):\n", + " # Initialize pre and postprocessing functions\n", + " self.model = model\n", + " self.preprocess = preprocess\n", + " self.class_pred = class_pred\n", + "\n", + " @tf.function(input_signature=[tf.TensorSpec(shape=[None, None, None, None], dtype=tf.uint8)]) \n", + " def __call__(self, x):\n", + " # Run the ExportModule for new data points\n", + " x = self.preprocess(x)\n", + " y = self.model(x)\n", + " y = self.class_pred(y)\n", + " return y " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "p8x6gjTDVi5d" + }, + "outputs": [], + "source": [ + "def preprocess_test(x):\n", + " # The export module takes in unprocessed and unlabeled data\n", + " x = tf.reshape(x, shape=[-1, 784])\n", + " x = x/255\n", + " return x\n", + "\n", + "def class_pred_test(y):\n", + " # Generate class predictions from MLP output\n", + " return tf.argmax(tf.nn.softmax(y), axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vu9H5STrJzdo" + }, + "source": [ + "This export module can now be saved with the `tf.saved_model.save` function. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fN9pPBQTKTe3" + }, + "outputs": [], + "source": [ + "mlp_model_export = ExportModule(model=mlp_model,\n", + " preprocess=preprocess_test,\n", + " class_pred=class_pred_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "idS7rQKbKwRS" + }, + "outputs": [], + "source": [ + "models = tempfile.mkdtemp()\n", + "save_path = os.path.join(models, 'mlp_model_export')\n", + "tf.saved_model.save(mlp_model_export, save_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_zZxO8iqBGZ-" + }, + "source": [ + "\n", + "Load the saved model with `tf.saved_model.load` and examine its performance on the unseen test data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "W5cwBTUqxldW" + }, + "outputs": [], + "source": [ + "mlp_loaded = tf.saved_model.load(save_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bmv0u6j_b5OC" + }, + "outputs": [], + "source": [ + "def accuracy_score(y_pred, y):\n", + " # Generic accuracy function\n", + " is_equal = tf.equal(y_pred, y)\n", + " return tf.reduce_mean(tf.cast(is_equal, tf.float32))\n", + "\n", + "x_test, y_test = [data for data in test_data.batch(len(test_data))][0]\n", + "test_classes = mlp_loaded(x_test)\n", + "test_acc = accuracy_score(test_classes, y_test)\n", + "print(f\"Test Accuracy: {test_acc:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j5t9vgv_ciQ_" + }, + "source": [ + "The model does a great job of classifying handwritten digits in the training dataset and also generalizes well to unseen data. Now, examine the model's class-wise accuracy to ensure good performance for each digit. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UD8YiC1Vfeyp" + }, + "outputs": [], + "source": [ + "print(\"Accuracy breakdown by digit:\")\n", + "print(\"---------------------------\")\n", + "label_accs = {}\n", + "for label in range(10):\n", + " label_ind = (y_test == label)\n", + " # extract predictions for specific true label\n", + " pred_label = test_classes[label_ind]\n", + " label_filled = tf.cast(tf.fill(pred_label.shape[0], label), tf.int64)\n", + " # compute class-wise accuracy\n", + " label_accs[accuracy_score(pred_label, label_filled).numpy()] = label\n", + "for key in sorted(label_accs):\n", + " print(f\"Digit {label_accs[key]}: {key:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rcykuJFhdGb0" + }, + "source": [ + "It looks like the model struggles with some digits a little more than others which is quite common in many multiclass classification problems. As a final exercise, plot a confusion matrix of the model's predictions and its corresponding true labels to gather more class-level insights. Sklearn and seaborn have functions for generating and visualizing confusion matrices. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JqCaqPwwh1tN" + }, + "outputs": [], + "source": [ + "import sklearn.metrics as sk_metrics\n", + "\n", + "def show_confusion_matrix(test_labels, test_classes):\n", + " # Compute confusion matrix and normalize\n", + " plt.figure(figsize=(10,10))\n", + " confusion = sk_metrics.confusion_matrix(test_labels.numpy(), \n", + " test_classes.numpy())\n", + " confusion_normalized = confusion / confusion.sum(axis=1)\n", + " axis_labels = range(10)\n", + " ax = sns.heatmap(\n", + " confusion_normalized, xticklabels=axis_labels, yticklabels=axis_labels,\n", + " cmap='Blues', annot=True, fmt='.4f', square=True)\n", + " plt.title(\"Confusion matrix\")\n", + " plt.ylabel(\"True label\")\n", + " plt.xlabel(\"Predicted label\")\n", + "\n", + "show_confusion_matrix(y_test, test_classes)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JT-WA7GVda6d" + }, + "source": [ + "Class-level insights can help identify reasons for misclassifications and improve model performance in future training cycles." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VFLfEH4ManbW" + }, + "source": [ + "## Conclusion\n", + "\n", + "This notebook introduced a few techniques to handle a multiclass classification problem with an [MLP](https://developers.google.com/machine-learning/crash-course/multi-class-neural-networks/softmax). Here are a few more tips that may help:\n", + "\n", + "- The [TensorFlow Core APIs](https://www.tensorflow.org/guide/core) can be used to build machine learning workflows with high levels of configurability\n", + "- Initialization schemes can help prevent model parameters from vanishing or exploding during training.\n", + "- Overfitting is another common problem for neural networks, though it wasn't a problem for this tutorial. Visit the [Overfit and underfit](overfit_and_underfit.ipynb) tutorial for more help with this.\n", + "\n", + "For more examples of using the TensorFlow Core APIs, check out the [guide](https://www.tensorflow.org/guide/core). If you want learn more about loading and preparing data, see the tutorials on [image data loading](https://www.tensorflow.org/tutorials/load_data/images) or [CSV data loading](https://www.tensorflow.org/tutorials/load_data/csv)." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "FhGuhbZ6M5tl" + ], + "name": "mlp_core.ipynb", + "provenance": [], + "toc_visible": true + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 5f5641f084771d582622afe40523420e6b524b92 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 16 Aug 2022 14:32:24 -0700 Subject: [PATCH 028/636] added core image compression tutorial PiperOrigin-RevId: 468026822 --- site/en/guide/_toc.yaml | 2 + site/en/guide/core/matrix_core.ipynb | 731 +++++++++++++++++++++++++++ 2 files changed, 733 insertions(+) create mode 100644 site/en/guide/core/matrix_core.ipynb diff --git a/site/en/guide/_toc.yaml b/site/en/guide/_toc.yaml index fc22705f95b..c48689f554b 100644 --- a/site/en/guide/_toc.yaml +++ b/site/en/guide/_toc.yaml @@ -31,6 +31,8 @@ toc: path: /guide/core/logistic_regression_core - title: "Multilayer perceptrons" path: /guide/core/mlp_core +- title: "Matrix approximation" + path: /guide/core/matrix_core - heading: "TensorFlow in depth" - title: "Tensor slicing" diff --git a/site/en/guide/core/matrix_core.ipynb b/site/en/guide/core/matrix_core.ipynb new file mode 100644 index 00000000000..491f373cf89 --- /dev/null +++ b/site/en/guide/core/matrix_core.ipynb @@ -0,0 +1,731 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "FhGuhbZ6M5tl" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AwOEIRJC6Une" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EIdT9iu_Z4Rb" + }, + "source": [ + "# Matrix approximation with Core APIs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bBIlTPscrIT9" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qGw8TF2vtzru" + }, + "source": [ + "## Introduction \n", + "\n", + "This notebook uses the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) to showcase TensorFlow's capabilities as a high-performance scientific computing platform. Visit the [Core APIs overview](https://www.tensorflow.org/guide/core) to learn more about TensorFlow Core and its intended use cases.\n", + "\n", + "This tutorial explores the technique of [singular value decomposition](https://developers.google.com/machine-learning/recommendation/collaborative/matrix) (SVD) and its applications for low-rank approximation problems. The SVD is used to factorize real or complex matrices and has a variety of use cases in data science such as image compression. The images for this tutorial come from Google Brain's [Imagen](https://imagen.research.google/) project. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5_FdwaovEkCC" + }, + "source": [ + ">![svd_intro](http://tensorflow.org/images/core/svd_intro.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nchsZfwEVtVs" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1rRo8oNqZ-Rj" + }, + "outputs": [], + "source": [ + "import matplotlib\n", + "from matplotlib.image import imread\n", + "from matplotlib import pyplot as plt\n", + "import requests\n", + "# Preset Matplotlib figure sizes.\n", + "matplotlib.rcParams['figure.figsize'] = [16, 9]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9xQKvCJ85kCQ" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "print(tf.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "so_ewq3gAoEI" + }, + "source": [ + "## SVD fundamentals\n", + "\n", + "The singular value decomposition of a matrix, ${\\mathrm{A}}$, is determined by the following factorization:\n", + "\n", + "$${\\mathrm{A}} = {\\mathrm{U}} \\Sigma {\\mathrm{V}}^T$$\n", + "\n", + "where\n", + "\n", + "* $\\underset{m \\times n}{\\mathrm{A}}$: input matrix where $m \\geq n$\n", + "* $\\underset{m \\times n}{\\mathrm{U}}$: orthogonal matrix, ${\\mathrm{U}}^T{\\mathrm{U}} = {\\mathrm{I}}$, with each column, $u_i$, denoting a left singular vector of ${\\mathrm{A}}$\n", + "* $\\underset{n \\times n}{\\Sigma}$: diagonal matrix with each diagonal entry, $\\sigma_i$, denoting a singular value of ${\\mathrm{A}}$\n", + "* $\\underset{n \\times n}{{\\mathrm{V}}^T}$: orthogonal matrix, ${\\mathrm{V}}^T{\\mathrm{V}} = {\\mathrm{I}}$, with each row, $v_i$, denoting a right singular vector of ${\\mathrm{A}}$\n", + "\n", + "When $m < n$, ${\\mathrm{U}}$ and $\\Sigma$ both have dimension $(m \\times m)$, and ${\\mathrm{V}}^T$ has dimension $(m \\times n)$." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "enGGGXCQKNv8" + }, + "source": [ + ">![svd_full](http://tensorflow.org/images/core/svd_full.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NlP-cBdSKLtc" + }, + "source": [ + "TensorFlow's linear algebra package has a function, `tf.linalg.svd`, which can be used to compute the singular value decomposition of one or more matrices. Start by defining a simple matrix and computing its SVD factorization.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "C3QAcgyoeIpv" + }, + "outputs": [], + "source": [ + "A = tf.random.uniform(shape=[40,30])\n", + "# Compute the SVD factorization\n", + "s, U, V = tf.linalg.svd(A)\n", + "# Define Sigma and V Transpose\n", + "S = tf.linalg.diag(s)\n", + "V_T = tf.transpose(V)\n", + "# Reconstruct the original matrix\n", + "A_svd = U@S@V_T\n", + "# Visualize \n", + "plt.bar(range(len(s)), s);\n", + "plt.xlabel(\"Singular value rank\")\n", + "plt.ylabel(\"Singular value\")\n", + "plt.title(\"Bar graph of singular values\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6H_C9WhFACm4" + }, + "source": [ + "The `tf.einsum` function can be used to directly compute the matrix reconstruction from the outputs of `tf.linalg.svd`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TPE6QeMtADUn" + }, + "outputs": [], + "source": [ + "A_svd = tf.einsum('s,us,vs -> uv',s,U,V)\n", + "print('\\nReconstructed Matrix, A_svd', A_svd)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x1m6JIsM9DLP" + }, + "source": [ + "## Low rank approximation with the SVD\n", + "\n", + "The rank of a matrix, ${\\mathrm{A}}$, is determined by the dimension of the vector space spanned by its columns. \n", + "The SVD can be used to approximate a matrix with a lower rank, which ultimately decreases the dimensionality of data required to store the information represented by the matrix.\n", + "\n", + "The rank-r approximation of ${\\mathrm{A}}$ in terms of the SVD is defined by the formula:\n", + "\n", + "$${\\mathrm{A_r}} = {\\mathrm{U_r}} \\Sigma_r {\\mathrm{V_r}}^T$$\n", + "\n", + "where\n", + "\n", + "* $\\underset{m \\times r}{\\mathrm{U_r}}$: matrix consisting of the first $r$ columns of ${\\mathrm{U}}$\n", + "* $\\underset{r \\times r}{\\Sigma_r}$: diagonal matrix consisting of the first $r$ singular values in $\\Sigma$\n", + "* $\\underset{r \\times n}{\\mathrm{V_r}}^T$: matrix consisting of the first $r$ rows of ${\\mathrm{V}}^T$" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nJWMJu36QyUV" + }, + "source": [ + ">![svd_approx](http://tensorflow.org/images/core/svd_approx.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TkiVUxeaQybq" + }, + "source": [ + "Start by writing a function to compute the rank-r approximation of a given matrix. This low-rank approximation procedure is used for image compression; therefore, it is also helpful to compute the physical data sizes for each approximation. For simplicity, assume that data size for an rank-r approximated matrix is equal to the total number of elements required to compute the approximation. Next, write a function to visualize the original matrix, $\\mathrm{A}$ its rank-r approximation, $\\mathrm{A}_r$ and the error matrix, $|\\mathrm{A} - \\mathrm{A}_r|$." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2oY3pMPagJrO" + }, + "outputs": [], + "source": [ + "def rank_r_approx(s, U, V, r, verbose=False):\n", + " # Compute the matrices necessary for a rank-r approximation\n", + " s_r, U_r, V_r = s[..., :r], U[..., :, :r], V[..., :, :r] # ... implies any number of extra batch axes\n", + " # Compute the low-rank approximation and its size\n", + " A_r = tf.einsum('...s,...us,...vs->...uv',s_r,U_r,V_r)\n", + " A_r_size = tf.size(U_r) + tf.size(s_r) + tf.size(V_r)\n", + " if verbose:\n", + " print(f\"Approximation Size: {A_r_size}\")\n", + " return A_r, A_r_size\n", + "\n", + "def viz_approx(A, A_r):\n", + " # Plot A, A_r, and A - A_r\n", + " vmin, vmax = 0, tf.reduce_max(A)\n", + " fig, ax = plt.subplots(1,3)\n", + " mats = [A, A_r, abs(A - A_r)]\n", + " titles = ['Original A', 'Approximated A_r', 'Error |A - A_r|']\n", + " for i, (mat, title) in enumerate(zip(mats, titles)):\n", + " ax[i].pcolormesh(mat, vmin=vmin, vmax=vmax)\n", + " ax[i].set_title(title)\n", + " ax[i].axis('off')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O3ZRkYCkX2FQ" + }, + "outputs": [], + "source": [ + "print(f\"Original Size of A: {tf.size(A)}\")\n", + "s, U, V = tf.linalg.svd(A)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "S1DR83VMX4cM" + }, + "outputs": [], + "source": [ + "# Rank-15 approximation\n", + "A_15, A_15_size = rank_r_approx(s, U, V, 15, verbose = True)\n", + "viz_approx(A, A_15)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KgFT70XFX57E" + }, + "outputs": [], + "source": [ + "# Rank-3 approximation\n", + "A_3, A_3_size = rank_r_approx(s, U, V, 3, verbose = True)\n", + "viz_approx(A, A_3)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DS4XoSlTJgX0" + }, + "source": [ + "As expected, using lower ranks results in less-accurate approximations. However, the quality of these low-rank approximations are often good enough in real world scenarios. Also note that the main goal of low-rank approximation with SVD \n", + "is to reduce the dimensionality of the data but not to reduce the disk space of the data itself. However, as the input matrices become higher-dimensional, many low-rank approximations also end up benefiting from reduced data size. This reduction benefit is why the process is applicable for image compression problems." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IhsaiOnnZs6M" + }, + "source": [ + "## Image loading\n", + "\n", + "The following image is available on the [Imagen](https://imagen.research.google/) home page. Imagen is a text-to-image diffusion model developed by Google Research's Brain team. An AI created this image based on the prompt: \"A photo of a Corgi dog riding a bike in Times Square. It is wearing sunglasses and a beach hat.\" How cool is that! You can also change the url below to any .jpg link to load in a custom image of choice. \n", + "\n", + "Start by reading in and visualizing the image. After reading a JPEG file, Matplotlib outputs a matrix, ${\\mathrm{I}}$, of shape $(m \\times n \\times 3)$ which represents a 2-dimensional image with 3 color channels for red, green and blue respectively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OVsZOQUAZ2C7" + }, + "outputs": [], + "source": [ + "img_link = \"https://imagen.research.google/main_gallery_images/a-photo-of-a-corgi-dog-riding-a-bike-in-times-square.jpg\"\n", + "img_path = requests.get(img_link, stream=True).raw\n", + "I = imread(img_path, 0)\n", + "print(\"Input Image Shape:\", I.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Qvs7uftcZ54x" + }, + "outputs": [], + "source": [ + "def show_img(I):\n", + " # Display the image in matplotlib\n", + " img = plt.imshow(I)\n", + " plt.axis('off')\n", + " return" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZbesXO3HZ6Qs" + }, + "outputs": [], + "source": [ + "show_img(I)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tdnUBVg_JoOa" + }, + "source": [ + "## The image compression algorithm\n", + "\n", + "Now, use the SVD to compute low-rank approximations of the sample image. Recall that the image is of shape $(1024 \\times 1024 \\times 3)$ and that the theory SVD only applies for 2-dimensional matrices. This means that the sample image has to be batched into 3 equal-size matrices that correspond to each of the 3 color channels. This can be done so by transposing the matrix to be of shape $(3 \\times 1024 \\times 1024)$. In order to clearly visualize the approximation error, rescale the RGB values of the image from $[0,255]$ to $[0,1]$. Remember to clip the approximated values to fall within this interval before visualizing them. The `tf.clip_by_value` function is useful for this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "i7DDp0h7oSIk" + }, + "outputs": [], + "source": [ + "def compress_image(I, r, verbose=False):\n", + " # Compress an image with the SVD given a rank \n", + " I_size = tf.size(I)\n", + " print(f\"Original size of image: {I_size}\")\n", + " # Compute SVD of image\n", + " I = tf.convert_to_tensor(I)/255\n", + " I_batched = tf.transpose(I, [2, 0, 1]) # einops.rearrange(I, 'h w c -> c h w')\n", + " s, U, V = tf.linalg.svd(I_batched)\n", + " # Compute low-rank approximation of image across each RGB channel\n", + " I_r, I_r_size = rank_r_approx(s, U, V, r)\n", + " I_r = tf.transpose(I_r, [1, 2, 0]) # einops.rearrange(I_r, 'c h w -> h w c')\n", + " I_r_prop = (I_r_size / I_size)\n", + " if verbose:\n", + " # Display compressed image and attributes\n", + " print(f\"Number of singular values used in compression: {r}\")\n", + " print(f\"Compressed image size: {I_r_size}\")\n", + " print(f\"Proportion of original size: {I_r_prop:.3f}\")\n", + " ax_1 = plt.subplot(1,2,1)\n", + " show_img(tf.clip_by_value(I_r,0.,1.))\n", + " ax_1.set_title(\"Approximated image\")\n", + " ax_2 = plt.subplot(1,2,2)\n", + " show_img(tf.clip_by_value(0.5+abs(I-I_r),0.,1.))\n", + " ax_2.set_title(\"Error\")\n", + " return I_r, I_r_prop" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RGQ_rTyKDX9F" + }, + "source": [ + "Now, compute rank-r approximations for the following ranks : 100, 50, 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7GlKkVLGDjre" + }, + "outputs": [], + "source": [ + "I_100, I_100_prop = compress_image(I, 100, verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XdvUkF5_E75D" + }, + "outputs": [], + "source": [ + "I_50, I_50_prop = compress_image(I, 50, verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MsCNZ8416Sbk" + }, + "outputs": [], + "source": [ + "I_10, I_10_prop = compress_image(I, 10, verbose=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RfYYBhcuNkvH" + }, + "source": [ + "## Evaluating approximations\n", + "\n", + "There are a variety of interesting methods to measure the effectiveness and have more control over matrix approximations." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D2Lotde9Zg7v" + }, + "source": [ + "### Compression factor vs rank\n", + "\n", + "For each of the above approximations, observe how the data sizes change with the rank." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O1ariNQe6Wbl" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(11,6))\n", + "plt.plot([100, 50, 10], [I_100_prop, I_50_prop, I_10_prop])\n", + "plt.xlabel(\"Rank\")\n", + "plt.ylabel(\"Proportion of original image size\")\n", + "plt.title(\"Compression factor vs rank\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dvHcLRj2QoDg" + }, + "source": [ + "Based on this plot, there is a linear relationship between an approximated image's compression factor and its rank. To explore this further, recall that the data size of an approximated matrix, ${\\mathrm{A}}_r$, is defined as the total number of elements required for its computation. The following equations can be used to find the relationship between compression factor and rank:\n", + "\n", + "$$x = (m \\times r) + r + (r \\times n) = r \\times (m + n + 1)$$\n", + "\n", + "$$c = \\large \\frac{x}{y} = \\frac{r \\times (m + n + 1)}{m \\times n}$$\n", + "\n", + "where\n", + "\n", + "* $x$: size of ${\\mathrm{A_r}}$\n", + "* $y$: size of ${\\mathrm{A}}$\n", + "* $c = \\frac{x}{y}$: compression factor\n", + "* $r$: rank of the approximation\n", + "* $m$ and $n$: row and column dimensions of ${\\mathrm{A}}$\n", + "\n", + "In order to find the rank, $r$, that is necessary to compress an image to a desired factor, $c$, the above equation can be rearranged to solve for $r$:\n", + "\n", + "$$r = ⌊{\\large\\frac{c \\times m \\times n}{m + n + 1}}⌋$$\n", + "\n", + "Note that this formula is independent of the color channel dimension since each of the RGB approximations do not affect each other. Now, write a function to compress an input image given a desired compression factor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "viVO-I60QynI" + }, + "outputs": [], + "source": [ + "def compress_image_with_factor(I, compression_factor, verbose=False):\n", + " # Returns a compressed image based on a desired compression factor\n", + " m,n,o = I.shape\n", + " r = int((compression_factor * m * n)/(m + n + 1))\n", + " I_r, I_r_prop = compress_image(I, r, verbose=verbose)\n", + " return I_r" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gWSv58J6LSRQ" + }, + "source": [ + "Compress an image to 15% of its original size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HVeeloIwQ1b6" + }, + "outputs": [], + "source": [ + "compression_factor = 0.15\n", + "I_r_img = compress_image_with_factor(I, compression_factor, verbose=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LkeRyms7jZMd" + }, + "source": [ + "### Cumulative sum of singular values\n", + "\n", + "The cumulative sum of singular values can be a useful indicator for the amount of energy captured by a rank-r approximation. Visualize the RGB-averaged cumulative proportion of singular values in the sample image. The `tf.cumsum` function can be useful for this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CteJ6VbKlndu" + }, + "outputs": [], + "source": [ + "def viz_energy(I):\n", + " # Visualize the energy captured based on rank\n", + " # Computing SVD\n", + " I = tf.convert_to_tensor(I)/255\n", + " I_batched = tf.transpose(I, [2, 0, 1]) \n", + " s, U, V = tf.linalg.svd(I_batched)\n", + " # Plotting average proportion across RGB channels \n", + " props_rgb = tf.map_fn(lambda x: tf.cumsum(x)/tf.reduce_sum(x), s)\n", + " props_rgb_mean = tf.reduce_mean(props_rgb, axis=0)\n", + " plt.figure(figsize=(11,6))\n", + " plt.plot(range(len(I)), props_rgb_mean, color='k')\n", + " plt.xlabel(\"Rank / singular value number\")\n", + " plt.ylabel(\"Cumulative proportion of singular values\")\n", + " plt.title(\"RGB-averaged proportion of energy captured by the first 'r' singular values\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Vl9PKow-GgCp" + }, + "outputs": [], + "source": [ + "viz_energy(I)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vQtwimKuQP19" + }, + "source": [ + "It looks like over 90% of the energy in this image is captured within the first 200-250 singular values. Now, write a function to compress an input image given a desired energy retention factor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fum5Cvm7R5vH" + }, + "outputs": [], + "source": [ + "def compress_image_with_energy(I, energy_factor, verbose=False):\n", + " # Returns a compressed image based on a desired energy factor\n", + " # Computing SVD\n", + " I_rescaled = tf.convert_to_tensor(I)/255\n", + " I_batched = tf.transpose(I_rescaled, [2, 0, 1]) \n", + " s, U, V = tf.linalg.svd(I_batched)\n", + " # Extracting singular values\n", + " props_rgb = tf.map_fn(lambda x: tf.cumsum(x)/tf.reduce_sum(x), s)\n", + " props_rgb_mean = tf.reduce_mean(props_rgb, axis=0)\n", + " # Find closest r that corresponds to the energy factor\n", + " r = tf.argmin(tf.abs(props_rgb_mean - energy_factor)) + 1\n", + " actual_ef = props_rgb_mean[r]\n", + " I_r, I_r_prop = compress_image(I, r, verbose=verbose)\n", + " print(f\"Proportion of energy captured by the first {r} singular values: {actual_ef:.3f}\")\n", + " return I_r" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Y_rChG0OLby1" + }, + "source": [ + "Compress an image to retain 75% of its energy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xDXBaZQ4c5jF" + }, + "outputs": [], + "source": [ + "energy_factor = 0.75\n", + "I_r_img = compress_image_with_energy(I, energy_factor, verbose=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2tmqTW0CYX-v" + }, + "source": [ + "### Error and singular values\n", + "\n", + "There is also an interesting relationship between the approximation error and the singular values. It turns out that the squared Frobenius norm of the approximation is equal to the sum of the squares of its singular values that were left out:\n", + "\n", + "$${||A - A_r||}^2 = \\sum_{i=r+1}^{R}σ_i^2$$\n", + "\n", + "Test out this relationship with a rank-10 approximation of the example matrix in the beginning of this tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hctOvN8BckiS" + }, + "outputs": [], + "source": [ + "s, U, V = tf.linalg.svd(A)\n", + "A_10, A_10_size = rank_r_approx(s, U, V, 10)\n", + "squared_norm = tf.norm(A - A_10)**2\n", + "s_squared_sum = tf.reduce_sum(s[10:]**2)\n", + "print(f\"Squared Frobenius norm: {squared_norm:.3f}\")\n", + "print(f\"Sum of squared singular values left out: {s_squared_sum:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vgGQuV-yqYZH" + }, + "source": [ + "## Conclusion\n", + "\n", + "This notebook introduced the process of implementing the singular value decomposition with TensorFlow and applying it to write an image compression algorithm. Here are a few more tips that may help:\n", + "\n", + "* The [TensorFlow Core APIs](https://www.tensorflow.org/guide/core) can be utilized for a variety of high-performance scientific computing use cases.\n", + "* To learn more about TensorFlow's linear algebra functionalities, visit the docs for the [linalg module](https://www.tensorflow.org/api_docs/python/tf/linalg).\n", + "* The SVD can also be applied to build [recommendation systems](https://developers.google.com/machine-learning/recommendation/labs/movie-rec-programming-exercise).\n", + "\n", + "\n", + "For more examples of using the TensorFlow Core APIs, check out the [guide](https://www.tensorflow.org/guide/core). If you want learn more about loading and preparing data, see the tutorials on [image data loading](https://www.tensorflow.org/tutorials/load_data/images) or [CSV data loading](https://www.tensorflow.org/tutorials/load_data/csv)." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "matrix_core.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 8e7b0c3d29502b1aba25cf94dffd6656c29ecba4 Mon Sep 17 00:00:00 2001 From: synandi <98147397+synandi@users.noreply.github.com> Date: Wed, 17 Aug 2022 09:36:13 +0530 Subject: [PATCH 029/636] Updated a link in pix2pix.ipynb Changed the link in the line 81 --- site/en/tutorials/generative/pix2pix.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/en/tutorials/generative/pix2pix.ipynb b/site/en/tutorials/generative/pix2pix.ipynb index 5c97053c50b..5b96a011c54 100644 --- a/site/en/tutorials/generative/pix2pix.ipynb +++ b/site/en/tutorials/generative/pix2pix.ipynb @@ -78,7 +78,7 @@ "\n", "The architecture of your network will contain:\n", "\n", - "- A generator with a [U-Net]([U-Net](https://arxiv.org/abs/1505.04597))-based architecture.\n", + "- A generator with a [U-Net](https://arxiv.org/abs/1505.04597)-based architecture.\n", "- A discriminator represented by a convolutional PatchGAN classifier (proposed in the [pix2pix paper](https://arxiv.org/abs/1611.07004)).\n", "\n", "Note that each epoch can take around 15 seconds on a single V100 GPU.\n", From 875db999374298afcc8d7f429a33eea51d43c9f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Ball=C3=A9?= Date: Wed, 17 Aug 2022 08:00:34 -0700 Subject: [PATCH 030/636] Fixes one-line scripts to install TFC. PiperOrigin-RevId: 468198363 --- site/en/tutorials/generative/data_compression.ipynb | 5 ++++- site/en/tutorials/optimization/compression.ipynb | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/site/en/tutorials/generative/data_compression.ipynb b/site/en/tutorials/generative/data_compression.ipynb index b8edf946ae3..b6c043c0598 100644 --- a/site/en/tutorials/generative/data_compression.ipynb +++ b/site/en/tutorials/generative/data_compression.ipynb @@ -104,8 +104,11 @@ }, "outputs": [], "source": [ + "%%bash\n", "# Installs the latest version of TFC compatible with the installed TF version.\n", - "!pip install tensorflow-compression~=$(pip show tensorflow | perl -p -0777 -e 's/.*Version: (\\d\\.\\d).*/\\1.0/sg')\n" + "\n", + "read MAJOR MINOR <<< \"$(pip show tensorflow | perl -p -0777 -e 's/.*Version: (\\d+)\\.(\\d+).*/\\1 \\2/sg')\"\n", + "pip install \"tensorflow-compression<$MAJOR.$(($MINOR+1))\"\n" ] }, { diff --git a/site/en/tutorials/optimization/compression.ipynb b/site/en/tutorials/optimization/compression.ipynb index 75b5517aa57..b94ecaf6476 100644 --- a/site/en/tutorials/optimization/compression.ipynb +++ b/site/en/tutorials/optimization/compression.ipynb @@ -122,7 +122,11 @@ }, "outputs": [], "source": [ - "!pip install tensorflow-compression~=$(pip show tensorflow | perl -p -0777 -e 's/.*Version: (\\d\\.\\d).*/\\1.0/sg')\n" + "%%bash\n", + "# Installs the latest version of TFC compatible with the installed TF version.\n", + "\n", + "read MAJOR MINOR <<< \"$(pip show tensorflow | perl -p -0777 -e 's/.*Version: (\\d+)\\.(\\d+).*/\\1 \\2/sg')\"\n", + "pip install \"tensorflow-compression<$MAJOR.$(($MINOR+1))\"\n" ] }, { From 59912d4f2f1be3851638b632b42d3422e7285464 Mon Sep 17 00:00:00 2001 From: 8bitmp3 <19637339+8bitmp3@users.noreply.github.com> Date: Wed, 17 Aug 2022 10:18:58 -0700 Subject: [PATCH 031/636] Lint links in conditional GAN pix2pix tutorial --- site/en/tutorials/generative/pix2pix.ipynb | 25 +++++++++++----------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/site/en/tutorials/generative/pix2pix.ipynb b/site/en/tutorials/generative/pix2pix.ipynb index 5b96a011c54..3026ae5500f 100644 --- a/site/en/tutorials/generative/pix2pix.ipynb +++ b/site/en/tutorials/generative/pix2pix.ipynb @@ -70,16 +70,16 @@ "id": "ITZuApL56Mny" }, "source": [ - "This tutorial demonstrates how to build and train a conditional generative adversarial network (cGAN) called pix2pix that learns a mapping from input images to output images, as described in [Image-to-image translation with conditional adversarial networks](https://arxiv.org/abs/1611.07004) by Isola et al. (2017). pix2pix is not application specific—it can be applied to a wide range of tasks, including synthesizing photos from label maps, generating colorized photos from black and white images, turning Google Maps photos into aerial images, and even transforming sketches into photos.\n", + "This tutorial demonstrates how to build and train a conditional generative adversarial network (cGAN) called pix2pix that learns a mapping from input images to output images, as described in [Image-to-image translation with conditional adversarial networks](https://arxiv.org/abs/1611.07004){:.external} by Isola et al. (2017). pix2pix is not application specific—it can be applied to a wide range of tasks, including synthesizing photos from label maps, generating colorized photos from black and white images, turning Google Maps photos into aerial images, and even transforming sketches into photos.\n", "\n", - "In this example, your network will generate images of building facades using the [CMP Facade Database](http://cmp.felk.cvut.cz/~tylecr1/facade/) provided by the [Center for Machine Perception](http://cmp.felk.cvut.cz/) at the [Czech Technical University in Prague](https://www.cvut.cz/). To keep it short, you will use a [preprocessed copy]((https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets/)) of this dataset created by the pix2pix authors.\n", + "In this example, your network will generate images of building facades using the [CMP Facade Database](http://cmp.felk.cvut.cz/~tylecr1/facade/) provided by the [Center for Machine Perception](http://cmp.felk.cvut.cz/){:.external} at the [Czech Technical University in Prague](https://www.cvut.cz/){:.external}. To keep it short, you will use a [preprocessed copy](https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets/){:.external} of this dataset created by the pix2pix authors.\n", "\n", "In the pix2pix cGAN, you condition on input images and generate corresponding output images. cGANs were first proposed in [Conditional Generative Adversarial Nets](https://arxiv.org/abs/1411.1784) (Mirza and Osindero, 2014)\n", "\n", "The architecture of your network will contain:\n", "\n", - "- A generator with a [U-Net](https://arxiv.org/abs/1505.04597)-based architecture.\n", - "- A discriminator represented by a convolutional PatchGAN classifier (proposed in the [pix2pix paper](https://arxiv.org/abs/1611.07004)).\n", + "- A generator with a [U-Net](https://arxiv.org/abs/1505.04597){:.external}-based architecture.\n", + "- A discriminator represented by a convolutional PatchGAN classifier (proposed in the [pix2pix paper](https://arxiv.org/abs/1611.07004){:.external}).\n", "\n", "Note that each epoch can take around 15 seconds on a single V100 GPU.\n", "\n", @@ -125,7 +125,7 @@ "source": [ "## Load the dataset\n", "\n", - "Download the CMP Facade Database data (30MB). Additional datasets are available in the same format [here](http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/). In Colab you can select other datasets from the drop-down menu. Note that some of the other datasets are significantly larger (`edges2handbags` is 8GB). " + "Download the CMP Facade Database data (30MB). Additional datasets are available in the same format [here](http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/){:.external}. In Colab you can select other datasets from the drop-down menu. Note that some of the other datasets are significantly larger (`edges2handbags` is 8GB in size). " ] }, { @@ -274,7 +274,7 @@ "id": "PVuZQTfI_c-s" }, "source": [ - "As described in the [pix2pix paper](https://arxiv.org/abs/1611.07004), you need to apply random jittering and mirroring to preprocess the training set.\n", + "As described in the [pix2pix paper](https://arxiv.org/abs/1611.07004){:.external}, you need to apply random jittering and mirroring to preprocess the training set.\n", "\n", "Define several functions that:\n", "\n", @@ -490,7 +490,7 @@ "source": [ "## Build the generator\n", "\n", - "The generator of your pix2pix cGAN is a _modified_ [U-Net](https://arxiv.org/abs/1505.04597). A U-Net consists of an encoder (downsampler) and decoder (upsampler). (You can find out more about it in the [Image segmentation](https://www.tensorflow.org/tutorials/images/segmentation) tutorial and on the [U-Net project website](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/).)\n", + "The generator of your pix2pix cGAN is a _modified_ [U-Net](https://arxiv.org/abs/1505.04597){:.external}. A U-Net consists of an encoder (downsampler) and decoder (upsampler). (You can find out more about it in the [Image segmentation](../images/segmentation.ipynb) tutorial and on the [U-Net project website](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/){:.external}.)\n", "\n", "- Each block in the encoder is: Convolution -> Batch normalization -> Leaky ReLU\n", "- Each block in the decoder is: Transposed convolution -> Batch normalization -> Dropout (applied to the first 3 blocks) -> ReLU\n", @@ -722,7 +722,7 @@ "source": [ "### Define the generator loss\n", "\n", - "GANs learn a loss that adapts to the data, while cGANs learn a structured loss that penalizes a possible structure that differs from the network output and the target image, as described in the [pix2pix paper](https://arxiv.org/abs/1611.07004).\n", + "GANs learn a loss that adapts to the data, while cGANs learn a structured loss that penalizes a possible structure that differs from the network output and the target image, as described in the [pix2pix paper](https://arxiv.org/abs/1611.07004){:.external}.\n", "\n", "- The generator loss is a sigmoid cross-entropy loss of the generated images and an **array of ones**.\n", "- The pix2pix paper also mentions the L1 loss, which is a MAE (mean absolute error) between the generated image and the target image.\n", @@ -797,7 +797,7 @@ "source": [ "## Build the discriminator\n", "\n", - "The discriminator in the pix2pix cGAN is a convolutional PatchGAN classifier—it tries to classify if each image _patch_ is real or not real, as described in the [pix2pix paper](https://arxiv.org/abs/1611.07004).\n", + "The discriminator in the pix2pix cGAN is a convolutional PatchGAN classifier—it tries to classify if each image _patch_ is real or not real, as described in the [pix2pix paper](https://arxiv.org/abs/1611.07004){:.external}.\n", "\n", "- Each block in the discriminator is: Convolution -> Batch normalization -> Leaky ReLU.\n", "- The shape of the output after the last layer is `(batch_size, 30, 30, 1)`.\n", @@ -937,7 +937,7 @@ "source": [ "The training procedure for the discriminator is shown below.\n", "\n", - "To learn more about the architecture and the hyperparameters you can refer to the [pix2pix paper](https://arxiv.org/abs/1611.07004)." + "To learn more about the architecture and the hyperparameters you can refer to the [pix2pix paper](https://arxiv.org/abs/1611.07004){:.external}." ] }, { @@ -1007,8 +1007,7 @@ "id": "Rb0QQFHF-JfS" }, "source": [ - "Note: The `training=True` is intentional here since\n", - "you want the batch statistics, while running the model on the test dataset. If you use `training=False`, you get the accumulated statistics learned from the training dataset (which you don't want)." + "Note: The `training=True` is intentional here since you want the batch statistics, while running the model on the test dataset. If you use `training=False`, you get the accumulated statistics learned from the training dataset (which you don't want)." ] }, { @@ -1248,7 +1247,7 @@ "source": [ "You can view the [results of a previous run](https://tensorboard.dev/experiment/lZ0C6FONROaUMfjYkVyJqw) of this notebook on [TensorBoard.dev](https://tensorboard.dev/).\n", "\n", - "TensorBoard.dev is a managed experience for hosting, tracking, and sharing ML experiments with everyone.\n", + "[TensorBoard.dev](https://tensorboard.dev){:.external} is a managed experience for hosting, tracking, and sharing ML experiments with everyone.\n", "\n", "It can also included inline using an `