From c6e072496a0a545aa8820f2770c5eac95a63eb8e Mon Sep 17 00:00:00 2001 From: gaina Date: Mon, 30 Aug 2021 11:08:29 +0800 Subject: [PATCH 1/7] linear_reg_with_uci add privc --- .../examples/linear_reg_with_uci/README.md | 43 ++++++----- .../examples/linear_reg_with_uci/README_CN.md | 47 +++++++----- .../linear_reg_with_uci/decrypt_save.py | 2 + .../examples/linear_reg_with_uci/prepare.py | 4 +- .../linear_reg_with_uci/process_data.py | 5 +- .../linear_reg_with_uci/run_standalone.sh | 74 +++++++++++++++++++ .../examples/linear_reg_with_uci/uci_demo.py | 4 +- 7 files changed, 137 insertions(+), 42 deletions(-) create mode 100755 python/paddle_fl/mpc/examples/linear_reg_with_uci/run_standalone.sh diff --git a/python/paddle_fl/mpc/examples/linear_reg_with_uci/README.md b/python/paddle_fl/mpc/examples/linear_reg_with_uci/README.md index 4f48aaa9..ef9701ec 100644 --- a/python/paddle_fl/mpc/examples/linear_reg_with_uci/README.md +++ b/python/paddle_fl/mpc/examples/linear_reg_with_uci/README.md @@ -8,10 +8,12 @@ This document introduces how to run UCI Housing demo based on Paddle-MPC, which #### (1). Prepare Data -Generate encrypted data utilizing `generate_encrypted_data()` in `process_data.py` script. For example, users can write the following code into a python script named `prepare.py`, and then run the script with command `python prepare.py`. +Generate encrypted data utilizing `generate_encrypted_data()` in `process_data.py` script. For example, users can write the following code into a python script named `prepare.py`, and then run the script with command `python3 prepare.py aby3` or `python3 prepare.py privc`. Users can choose to use ABY3 protocol or PrivC protocol according to the needs of parties. ```python +import sys import process_data +process_data.protocol = sys.argv[1] process_data.generate_encrypted_data() ``` @@ -31,18 +33,21 @@ export REDIS_PORT=/your/redis/port Launch demo with the `run_standalone.sh` script. The concrete command is: ```bash -bash run_standalone.sh uci_demo.py +`if ABY3` +bash run_standalone.sh uci_demo.py aby3 +`if PrivC` +bash run_standalone.sh uci_demo.py privc ``` -The loss with cypher text format will be displayed on screen while training. At the same time, the loss data would be also save in `/tmp` directory, and the format of file name is similar to what is described in Step 1. +The loss with cypher text format will be displayed on screen while training. At the same time, the loss data would be also save in `./mpc_infer_data/` directory, and the format of file name is similar to what is described in Step 1. -Besides, predictions would be made in this demo once training is finished. The predictions with cypher text format would also be save in `/tmp` directory. +Besides, predictions would be made in this demo once training is finished. The predictions with cypher text format would also be save in `./mpc_infer_data/` directory. #### (3). Decrypt Data Finally, using `load_decrypt_data()` in `process_data.py` script, this demo would decrypt and print the loss and predictions, which can be compared with related results of Paddle plain text model. -For example, users can write the following code into a python script named `decrypt_save.py`, and then run the script with command `python decrypt_save.py decrypt_loss_file decrypt_prediction_file`. The decrypted loss and prediction results would be saved into two files correspondingly. +For example, users can write the following code into a python script named `decrypt_save.py`, and then run the script with command `python3 decrypt_save.py decrypt_loss_file decrypt_prediction_file aby3` or `python3 decrypt_save.py decrypt_loss_file decrypt_prediction_file privc`. The decrypted loss and prediction results would be saved into two files correspondingly. ```python import sys @@ -52,17 +57,18 @@ import process_data decrypt_loss_file=sys.argv[1] decrypt_prediction_file=sys.argv[2] +process_data.protocol = sys.argv[3] BATCH_SIZE=10 -process_data.load_decrypt_data("/tmp/uci_loss", (1, ), decrypt_loss_file) -process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file) +process_data.load_decrypt_data("./mpc_infer_data/uci_loss", (1, ), decrypt_loss_file) +process_data.load_decrypt_data("./mpc_infer_data/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file) ``` -**Note** that remember to delete the loss and prediction files in `/tmp` directory generated in last running, in case of any influence on the decrypted results of current running. For simplifying users operations, we provide the following commands in `run_standalone.sh`, which can delete the files mentioned above when running this script. +**Note** that remember to delete the loss and prediction files in `./mpc_infer_data` directory generated in last running, in case of any influence on the decrypted results of current running. For simplifying users operations, you can write the following commands in `run_standalone.sh`, which can delete the files mentioned above when running this script. ```bash # remove temp data generated in last time -LOSS_FILE="/tmp/uci_loss.*" -PRED_FILE="/tmp/uci_prediction.*" +LOSS_FILE="./mpc_infer_data/uci_loss.*" +PRED_FILE="./mpc_infer_data/uci_prediction.*" if [ "$LOSS_FILE" ]; then rm -rf $LOSS_FILE fi @@ -93,7 +99,7 @@ Each computation party makes the following modifications on `uci_demo.py` accord Modify `localhost` in the following code as the IP address of the machine. ```python - pfl_mpc.init("aby3", int(role), "localhost", server, int(port)) + pfl_mpc.init(mpc_protocol_name, int(role), "localhost", server, int(port)) ``` #### (4). Launch Demo on Each Party @@ -107,18 +113,18 @@ $REDIS_BIN -h $SERVER -p $PORT flushall Launch demo on each computation party with the following command, ``` -$PYTHON_EXECUTABLE uci_demo.py $PARTY_ID $SERVER $PORT +$PYTHON_EXECUTABLE uci_demo.py $PARTY_ID $SERVER $PORT $PROTOCOL ``` -where PYTHON_EXECUTABLE is the python which installs PaddleFL, PARTY_ID is the ID of computation party, which is 0, 1, or 2, SERVER and PORT represent the IP and port of Redis server respectively. +where PYTHON_EXECUTABLE is the python which installs PaddleFL, PARTY_ID is the ID of computation party, which is 0, 1, or 2, SERVER and PORT represent the IP and port of Redis server respectively. PROTOCOL is the MPC protocol that users choose. -Similarly, training loss with cypher text format would be printed on the screen of each computation party. And at the same time, the loss and predictions would be saved in `/tmp` directory. +Similarly, training loss with cypher text format would be printed on the screen of each computation party. And at the same time, the loss and predictions would be saved in `./mpc_infer_data` directory. -**Note** that remember to delete the loss and prediction files in `/tmp` directory generated in last running, in case of any influence on the decrypted results of current running. +**Note** that remember to delete the loss and prediction files in `./mpc_infer_data` directory generated in last running, in case of any influence on the decrypted results of current running. #### (5). Decrypt Loss and Prediction Data -Each computation party sends `uci_loss.part` and `uci_prediction.part` files in `/tmp` directory to the `/tmp` directory of data owner. Data owner decrypts and gets the plain text of loss and predictions with ` load_decrypt_data()` in `process_data.py`. +Each computation party sends `uci_loss.part` and `uci_prediction.part` files in `./mpc_infer_data` directory to the `./mpc_infer_data` directory of data owner. Data owner decrypts and gets the plain text of loss and predictions with ` load_decrypt_data()` in `process_data.py`. For example, the following code can be written into a python script to decrypt and print training loss and predictions. @@ -130,9 +136,10 @@ import process_data decrypt_loss_file=sys.argv[1] decrypt_prediction_file=sys.argv[2] +process_data.protocol = sys.argv[3] BATCH_SIZE=10 -process_data.load_decrypt_data("/tmp/uci_loss", (1, ), decrypt_loss_file) -process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file) +process_data.load_decrypt_data("./mpc_infer_data/uci_loss", (1, ), decrypt_loss_file) +process_data.load_decrypt_data("./mpc_infer_data/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file) ``` ### 3. Convergence of paddle_fl.mpc vs paddle diff --git a/python/paddle_fl/mpc/examples/linear_reg_with_uci/README_CN.md b/python/paddle_fl/mpc/examples/linear_reg_with_uci/README_CN.md index 5fea5ff4..e3771f50 100644 --- a/python/paddle_fl/mpc/examples/linear_reg_with_uci/README_CN.md +++ b/python/paddle_fl/mpc/examples/linear_reg_with_uci/README_CN.md @@ -8,14 +8,16 @@ #### 1. 准备数据 -使用`process_data.py`脚本中的`generate_encrypted_data()`产生加密数据,比如将如下内容写到一个`prepare.py`脚本中,然后`python prepare.py` +使用`process_data.py`脚本中的`generate_encrypted_data()`产生加密数据,比如将如下内容写到一个`prepare.py`脚本中,然后`python3 prepare.py aby3` 或者 `python3 prepare.py privc`,用户可根据多方计算实体数目需求选择采用ABY3协议或者PrivC协议。 ```python +import sys import process_data +process_data.protocol = sys.argv[1] process_data.generate_encrypted_data() ``` -将在/tmp目录下生成对应于3个计算party的feature和label的加密数据文件,以后缀名区分属于不同party的数据。比如,`house_feature.part0`表示属于party0的feature数据。 +以ABY3协议为例,将在/tmp目录下生成对应于3个计算party的feature和label的加密数据文件,以后缀名区分属于不同party的数据。比如,`house_feature.part0`表示属于party0的feature数据。 #### 2. 使用shell脚本启动demo @@ -31,17 +33,20 @@ export REDIS_PORT=/your/redis/port 然后使用`run_standalone.sh`脚本,启动并运行demo,命令如下: ```bash  -bash run_standalone.sh uci_demo.py +`若采用ABY3` +bash run_standalone.sh uci_demo.py aby3 +`若采用PrivC` +bash run_standalone.sh uci_demo.py privc ``` -运行之后将在屏幕上打印训练过程中的密文loss数据,同时,对应的密文loss数据将会保存到/tmp目录下的文件中,文件命名格式类似于步骤1中所述。 +运行之后将在屏幕上打印训练过程中的密文loss数据,同时,对应的密文loss数据将会保存到./mpc_infer_data/目录下的文件中,文件命名格式类似于步骤1中所述。 -此外,在完成训练之后,demo会继续进行预测,并将预测密文结果也保存到/tmp目录下的文件中。 +此外,在完成训练之后,demo会继续进行预测,并将预测密文结果也保存到./mpc_infer_data/目录下的文件中。 #### 3. 解密数据 最后,demo会使用`process_data.py`脚本中的`load_decrypt_data()`,恢复并打印出明文的loss数据和prediction结果,用以和明文Paddle模型结果进行对比。 -例如,将下面的内容写到一个decrypt_save.py脚本中,然后python decrypt_save.py decrypt_loss_file decrypt_prediction_file,将把明文losss数据和预测结果分别保存在文件中。 +例如,将下面的内容写到一个decrypt_save.py脚本中,然后python3 decrypt_save.py decrypt_loss_file decrypt_prediction_file aby3,将把明文losss数据和预测结果分别保存在文件中。 ```python import sys @@ -52,16 +57,17 @@ import process_data decrypt_loss_file=sys.argv[1] decrypt_prediction_file=sys.argv[2] BATCH_SIZE=10 -process_data.load_decrypt_data("/tmp/uci_loss", (1, ), decrypt_loss_file) -process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file) +process_data.protocol = sys.argv[3] +process_data.load_decrypt_data("./mpc_infer_data/uci_loss", (1, ), decrypt_loss_file) +process_data.load_decrypt_data("./mpc_infer_data/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file) ``` -**注意**:再次启动运行demo之前,请先将上次在`/tmp`保存的loss和prediction文件删除,以免影响本次密文数据的恢复结果。为了简化用户操作,我们在`run_standalone.sh`脚本中加入了如下的内容,可以在执行脚本时删除上次数据。 +**注意**:再次启动运行demo之前,请先将上次在`./mpc_infer_data`保存的loss和prediction文件删除,以免影响本次密文数据的恢复结果。为了简化用户操作,可以在`run_standalone.sh`脚本中加入如下的内容,在执行脚本时删除上次数据。 ```bash # remove temp data generated in last time -LOSS_FILE="/tmp/uci_loss.*" -PRED_FILE="/tmp/uci_prediction.*" +LOSS_FILE="./mpc_infer_data/uci_loss.*" +PRED_FILE="./mpc_infer_data/uci_prediction.*" if [ "$LOSS_FILE" ]; then rm -rf $LOSS_FILE fi @@ -94,7 +100,7 @@ fi 将脚本如下内容中的`localhost`修改为自己的IP地址: ```python - pfl_mpc.init("aby3", int(role), "localhost", server, int(port)) + pfl_mpc.init(mpc_protocol_name, int(role), "localhost", server, int(port)) ``` @@ -109,20 +115,20 @@ $REDIS_BIN -h $SERVER -p $PORT flushall 在各计算party分别执行以下命令,启动demo: ``` -$PYTHON_EXECUTABLE uci_demo.py $PARTY_ID $SERVER $PORT +$PYTHON_EXECUTABLE uci_demo.py $PARTY_ID $SERVER $PORT $PROTOCOL ``` -其中,PYTHON_EXECUTABLE表示自己安装了PaddleFL的python,PARTY_ID表示计算party的编号,值为0、1或2,SERVER和PORT分别表示redis server的IP地址和端口号。 +其中,PYTHON_EXECUTABLE表示自己安装了PaddleFL的python,PARTY_ID表示计算party的编号,值为0、1或2,SERVER和PORT分别表示redis server的IP地址和端口号,PROTOCOL表示采用的多方安全计算协议。 -同样地,运行之后将在各计算party的屏幕上打印训练过程中的密文loss数据。同时,对应的密文loss和prediction数据将会保存到`/tmp`目录下的文件中,文件命名格式类似于步骤1中所述。 +同样地,运行之后将在各计算party的屏幕上打印训练过程中的密文loss数据。同时,对应的密文loss和prediction数据将会保存到`./mpc_infer_data`目录下的文件中,文件命名格式类似于步骤1中所述。 -**注意**:再次启动运行demo之前,请先将上次在`/tmp`保存的loss和prediction文件删除,以免影响本次密文数据的恢复结果。 +**注意**:再次启动运行demo之前,请先将上次在`./mpc_infer_data`保存的loss和prediction文件删除,以免影响本次密文数据的恢复结果。 #### 5. 数据方解密loss和prediction -各计算party将`/tmp`目录下的`uci_loss.part`和`uci_prediction.part`文件发送到数据方的/tmp目录下。数据方使用process_data.py脚本中的load_decrypt_data()解密恢复出loss数据和prediction数据。 +各计算party将`./mpc_infer_data`目录下的`uci_loss.part`和`uci_prediction.part`文件发送到数据方的./mpc_infer_data目录下。数据方使用process_data.py脚本中的load_decrypt_data()解密恢复出loss数据和prediction数据。 -例如,将下面的内容写到一个decrypt_save.py脚本中,然后python decrypt_save.py decrypt_loss_file decrypt_prediction_file,将把明文losss数据和预测结果分别保存在文件中。 +例如,将下面的内容写到一个decrypt_save.py脚本中,然后python3 decrypt_save.py decrypt_loss_file decrypt_prediction_file aby3,将把明文losss数据和预测结果分别保存在文件中。 ```python import sys @@ -132,9 +138,10 @@ import process_data decrypt_loss_file=sys.argv[1] decrypt_prediction_file=sys.argv[2] +process_data.protocol = sys.argv[3] BATCH_SIZE=10 -process_data.load_decrypt_data("/tmp/uci_loss", (1, ), decrypt_loss_file) -process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file) +process_data.load_decrypt_data("./mpc_infer_data/uci_loss", (1, ), decrypt_loss_file) +process_data.load_decrypt_data("./mpc_infer_data/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file) ``` ### 三. 单机精度测试 diff --git a/python/paddle_fl/mpc/examples/linear_reg_with_uci/decrypt_save.py b/python/paddle_fl/mpc/examples/linear_reg_with_uci/decrypt_save.py index 0b514067..5e8b8ca9 100644 --- a/python/paddle_fl/mpc/examples/linear_reg_with_uci/decrypt_save.py +++ b/python/paddle_fl/mpc/examples/linear_reg_with_uci/decrypt_save.py @@ -22,6 +22,8 @@ decrypt_loss_file=sys.argv[1] decrypt_prediction_file=sys.argv[2] BATCH_SIZE=10 + +process_data.protocol = sys.argv[3] process_data.load_decrypt_data("./mpc_infer_data/uci_loss", (1, ), decrypt_loss_file) print("uci_loss done") process_data.load_decrypt_data("./mpc_infer_data/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file) diff --git a/python/paddle_fl/mpc/examples/linear_reg_with_uci/prepare.py b/python/paddle_fl/mpc/examples/linear_reg_with_uci/prepare.py index b9c00741..a664892c 100644 --- a/python/paddle_fl/mpc/examples/linear_reg_with_uci/prepare.py +++ b/python/paddle_fl/mpc/examples/linear_reg_with_uci/prepare.py @@ -14,7 +14,9 @@ """ Prepare data for UCI Housing. """ -import process_data +import sys +import process_data +process_data.protocol = sys.argv[1] process_data.generate_encrypted_data() diff --git a/python/paddle_fl/mpc/examples/linear_reg_with_uci/process_data.py b/python/paddle_fl/mpc/examples/linear_reg_with_uci/process_data.py index 58c2f18a..3fc02933 100644 --- a/python/paddle_fl/mpc/examples/linear_reg_with_uci/process_data.py +++ b/python/paddle_fl/mpc/examples/linear_reg_with_uci/process_data.py @@ -22,7 +22,8 @@ import paddle_fl.mpc as pfl_mpc from paddle_fl.mpc.data_utils.data_utils import get_datautils -mpc_du = get_datautils('aby3') +protocol = 'aby3' +mpc_du = get_datautils(protocol) sample_reader = paddle.dataset.uci_housing.train() @@ -65,7 +66,7 @@ def encrypted_data_generator(data_location_party, sample_reader, index1, index2_ main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): - pfl_mpc.init("aby3", int(role), "localhost", server, int(port)) + pfl_mpc.init(protocol, int(role), "localhost", server, int(port)) input = fluid.data(name='input', shape=[feature_num], dtype='float32') out = pfl_mpc.layers.share(input, party_id=data_location_party) diff --git a/python/paddle_fl/mpc/examples/linear_reg_with_uci/run_standalone.sh b/python/paddle_fl/mpc/examples/linear_reg_with_uci/run_standalone.sh new file mode 100755 index 00000000..171533d5 --- /dev/null +++ b/python/paddle_fl/mpc/examples/linear_reg_with_uci/run_standalone.sh @@ -0,0 +1,74 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash +# +# A tools to faciliate the parallel running of fluid_encrypted test scrips. +# A test script is EXPECTED to accepted arguments in the following format: +# +# SCRIPT_NAME $ROLE $SERVER $PORT +# ROLE: the role of the running party +# SERVER: the address of the party discovering service +# PORT: the port of the party discovering service +# +# This tool will try to fill the above three argument to the test script, +# so that totally three processes running the script will be started, to +# simulate run of three party in a standalone machine. +# +# Usage of this script: +# +# bash run_standalone.sh TEST_SCRIPT_NAME +# + +# please set the following environment vars according in your environment +PYTHON=${PYTHON} +REDIS_HOME=${PATH_TO_REDIS_BIN} +SERVER=${LOCALHOST} +PORT=${REDIS_PORT} + +PROTOCOL=$2 + +echo "redis home in ${REDIS_HOME}, server is ${SERVER}, port is ${PORT}, protocol is ${PROTOCOL}" +function usage() { + echo 'run_standalone.sh SCRIPT_NAME [ARG...]' + exit 0 +} + +if [ $# -lt 1 ]; then + usage +fi + +SCRIPT=$1 +if [ ! -f $SCRIPT ]; then + echo 'Could not find script of '$SCRIPT + exit 1 +fi + +REDIS_BIN=$REDIS_HOME/redis-cli +if [ ! -f $REDIS_BIN ]; then + echo 'Could not find redis cli in '$REDIS_HOME + exit 1 +fi + +# clear the redis cache +$REDIS_BIN -h $SERVER -p $PORT flushall + +# kick off script with roles of 1 and 2, and redirect output to /dev/null +for role in {1..2}; do + $PYTHON $SCRIPT $role $SERVER $PORT $PROTOCOL 2>&1 >/dev/null & +done + +# for party of role 0, run in a foreground mode and show the output +$PYTHON $SCRIPT 0 $SERVER $PORT $PROTOCOL + diff --git a/python/paddle_fl/mpc/examples/linear_reg_with_uci/uci_demo.py b/python/paddle_fl/mpc/examples/linear_reg_with_uci/uci_demo.py index 54575f77..e68656ca 100644 --- a/python/paddle_fl/mpc/examples/linear_reg_with_uci/uci_demo.py +++ b/python/paddle_fl/mpc/examples/linear_reg_with_uci/uci_demo.py @@ -26,8 +26,10 @@ from paddle_fl.mpc.data_utils.data_utils import get_datautils import process_data -mpc_protocol_name = 'aby3' + +mpc_protocol_name = sys.argv[4] mpc_du = get_datautils(mpc_protocol_name) +process_data.protocol = sys.argv[4] role, server, port = sys.argv[1], sys.argv[2], sys.argv[3] pfl_mpc.init(mpc_protocol_name, int(role), "localhost", server, int(port)) From 54a5799cc685ef0badc786de390111719e3f6e3d Mon Sep 17 00:00:00 2001 From: gaina Date: Mon, 30 Aug 2021 11:38:09 +0800 Subject: [PATCH 2/7] change --- README | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 README diff --git a/README b/README new file mode 100644 index 00000000..e69de29b From 9aaecc4cfa8889cd569d133b0fa2197a6d5a3663 Mon Sep 17 00:00:00 2001 From: gaina <74752800+gaina99@users.noreply.github.com> Date: Mon, 30 Aug 2021 11:46:56 +0800 Subject: [PATCH 3/7] Delete README --- README | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 README diff --git a/README b/README deleted file mode 100644 index e69de29b..00000000 From c2ca5ff077adcaef78e51b51c9500118d8a23e1d Mon Sep 17 00:00:00 2001 From: gaina Date: Mon, 30 Aug 2021 14:55:33 +0800 Subject: [PATCH 4/7] linear with privc --- .../examples/linear_reg_with_uci/README.md | 10 +-- .../examples/linear_reg_with_uci/README_CN.md | 14 ++-- .../linear_reg_with_uci/process_data.py | 31 +++++--- .../run_standalone_aby3.sh | 75 +++++++++++++++++++ .../run_standalone_privc.sh | 72 ++++++++++++++++++ .../examples/linear_reg_with_uci/uci_demo.py | 16 ++-- 6 files changed, 187 insertions(+), 31 deletions(-) create mode 100755 python/paddle_fl/mpc/examples/linear_reg_with_uci/run_standalone_aby3.sh create mode 100755 python/paddle_fl/mpc/examples/linear_reg_with_uci/run_standalone_privc.sh diff --git a/python/paddle_fl/mpc/examples/linear_reg_with_uci/README.md b/python/paddle_fl/mpc/examples/linear_reg_with_uci/README.md index ef9701ec..eb76090e 100644 --- a/python/paddle_fl/mpc/examples/linear_reg_with_uci/README.md +++ b/python/paddle_fl/mpc/examples/linear_reg_with_uci/README.md @@ -17,7 +17,7 @@ process_data.protocol = sys.argv[1] process_data.generate_encrypted_data() ``` -Encrypted data files of feature and label would be generated and saved in `/tmp` directory. Different suffix names are used for these files to indicate the ownership of different computation parties. For instance, a file named `house_feature.part0` means it is a feature file of party 0. +Encrypted data files of feature and label would be generated and saved in `./mpc_data` directory. Different suffix names are used for these files to indicate the ownership of different computation parties. For instance, a file named `house_feature.part0` means it is a feature file of party 0. #### (2). Launch Demo with A Shell Script @@ -30,13 +30,13 @@ export LOCALHOST=/your/localhost export REDIS_PORT=/your/redis/port ``` -Launch demo with the `run_standalone.sh` script. The concrete command is: +Launch demo with the `run_standalone_*.sh` script. The concrete command is: ```bash `if ABY3` -bash run_standalone.sh uci_demo.py aby3 +bash run_standalone_aby3.sh uci_demo.py `if PrivC` -bash run_standalone.sh uci_demo.py privc +bash run_standalone_privc.sh uci_demo.py ``` The loss with cypher text format will be displayed on screen while training. At the same time, the loss data would be also save in `./mpc_infer_data/` directory, and the format of file name is similar to what is described in Step 1. @@ -88,7 +88,7 @@ Data owner encrypts data. Concrete operations are consistent with “Prepare Dat #### (2). Distribute Encrypted Data -According to the suffix of file name, distribute encrypted data files to `/tmp ` directories of all 3 computation parties. For example, send `house_feature.part0` and `house_label.part0` to `/tmp` of party 0 with `scp` command. +According to the suffix of file name, distribute encrypted data files to `./mpc_data ` directories of all 3 computation parties. For example, send `house_feature.part0` and `house_label.part0` to `./mpc_data` of party 0 with `scp` command. #### (3). Modify uci_demo.py diff --git a/python/paddle_fl/mpc/examples/linear_reg_with_uci/README_CN.md b/python/paddle_fl/mpc/examples/linear_reg_with_uci/README_CN.md index e3771f50..032fa802 100644 --- a/python/paddle_fl/mpc/examples/linear_reg_with_uci/README_CN.md +++ b/python/paddle_fl/mpc/examples/linear_reg_with_uci/README_CN.md @@ -17,7 +17,7 @@ process_data.protocol = sys.argv[1] process_data.generate_encrypted_data() ``` -以ABY3协议为例,将在/tmp目录下生成对应于3个计算party的feature和label的加密数据文件,以后缀名区分属于不同party的数据。比如,`house_feature.part0`表示属于party0的feature数据。 +以ABY3协议为例,将在./mpc_data目录下生成对应于3个计算party的feature和label的加密数据文件,以后缀名区分属于不同party的数据。比如,`house_feature.part0`表示属于party0的feature数据。 #### 2. 使用shell脚本启动demo @@ -30,13 +30,13 @@ export LOCALHOST=/your/localhost export REDIS_PORT=/your/redis/port ``` -然后使用`run_standalone.sh`脚本,启动并运行demo,命令如下: +然后使用`run_standalone_**.sh`脚本,启动并运行demo,命令如下: ```bash  `若采用ABY3` -bash run_standalone.sh uci_demo.py aby3 +bash run_standalone_aby3.sh uci_demo.py `若采用PrivC` -bash run_standalone.sh uci_demo.py privc +bash run_standalone_privc.sh uci_demo.py ``` 运行之后将在屏幕上打印训练过程中的密文loss数据,同时,对应的密文loss数据将会保存到./mpc_infer_data/目录下的文件中,文件命名格式类似于步骤1中所述。 @@ -46,7 +46,7 @@ bash run_standalone.sh uci_demo.py privc #### 3. 解密数据 最后,demo会使用`process_data.py`脚本中的`load_decrypt_data()`,恢复并打印出明文的loss数据和prediction结果,用以和明文Paddle模型结果进行对比。 -例如,将下面的内容写到一个decrypt_save.py脚本中,然后python3 decrypt_save.py decrypt_loss_file decrypt_prediction_file aby3,将把明文losss数据和预测结果分别保存在文件中。 +例如,将下面的内容写到一个decrypt_save.py脚本中,然后python3 decrypt_save.py decrypt_loss_file decrypt_prediction_file aby3 或者 python3 decrypt_save.py decrypt_loss_file decrypt_prediction_file privc,将把明文losss数据和预测结果分别保存在文件中。 ```python import sys @@ -87,9 +87,9 @@ fi #### 2. 分发数据 -按照后缀名,将步骤1中准备好的数据分别发送到对应的计算party的/tmp目录下。比如,使用scp命令,将 +按照后缀名,将步骤1中准备好的数据分别发送到对应的计算party的./mpc_data目录下。比如,使用scp命令,将 -`house_feature.part0`和`house_label.part0`发送到party0的/tmp目录下。 +`house_feature.part0`和`house_label.part0`发送到party0的./mpc_data目录下。 #### 3. 计算party修改uci_demo.py脚本 diff --git a/python/paddle_fl/mpc/examples/linear_reg_with_uci/process_data.py b/python/paddle_fl/mpc/examples/linear_reg_with_uci/process_data.py index 3fc02933..9a478d43 100644 --- a/python/paddle_fl/mpc/examples/linear_reg_with_uci/process_data.py +++ b/python/paddle_fl/mpc/examples/linear_reg_with_uci/process_data.py @@ -22,7 +22,7 @@ import paddle_fl.mpc as pfl_mpc from paddle_fl.mpc.data_utils.data_utils import get_datautils -protocol = 'aby3' +protocol="aby3" mpc_du = get_datautils(protocol) sample_reader = paddle.dataset.uci_housing.train() @@ -31,7 +31,8 @@ def generate_encrypted_data(): """ generate encrypted samples """ - + global protocol + mpc_du = get_datautils(protocol) def encrypted_housing_features(): """ feature reader @@ -46,8 +47,8 @@ def encrypted_housing_labels(): for instance in sample_reader(): yield mpc_du.make_shares(instance[1]) - mpc_du.save_shares(encrypted_housing_features, "/tmp/house_feature") - mpc_du.save_shares(encrypted_housing_labels, "/tmp/house_label") + mpc_du.save_shares(encrypted_housing_features, "./mpc_data/house_feature") + mpc_du.save_shares(encrypted_housing_labels, "./mpc_data/house_label") def generate_encrypted_data_online(role, server, port): @@ -106,16 +107,24 @@ def load_decrypt_data(filepath, shape, decrypted_file): """ load the encrypted data and reconstruct """ + global protocol + mpc_du = get_datautils(protocol) if os.path.exists(decrypted_file): os.remove(decrypted_file) part_readers = [] - for id in six.moves.range(3): - part_readers.append( - mpc_du.load_shares( - filepath, id=id, shape=shape)) - mpc_share_reader = paddle.reader.compose(part_readers[0], part_readers[1], - part_readers[2]) - + if protocol == "aby3": + for id in six.moves.range(3): + part_readers.append( + mpc_du.load_shares( + filepath, id=id, shape=shape)) + mpc_share_reader = paddle.reader.compose(part_readers[0], part_readers[1], + part_readers[2]) + elif protocol == "privc": + for id in six.moves.range(2): + part_readers.append( + mpc_du.load_shares( + filepath, id=id, shape=shape)) + mpc_share_reader = paddle.reader.compose(part_readers[0], part_readers[1]) for instance in mpc_share_reader(): p = mpc_du.reconstruct(np.array(instance)) with open(decrypted_file, 'a+') as f: diff --git a/python/paddle_fl/mpc/examples/linear_reg_with_uci/run_standalone_aby3.sh b/python/paddle_fl/mpc/examples/linear_reg_with_uci/run_standalone_aby3.sh new file mode 100755 index 00000000..aa716de1 --- /dev/null +++ b/python/paddle_fl/mpc/examples/linear_reg_with_uci/run_standalone_aby3.sh @@ -0,0 +1,75 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash +# +# A tools to faciliate the parallel running of fluid_encrypted test scrips. +# A test script is EXPECTED to accepted arguments in the following format: +# +# SCRIPT_NAME $ROLE $SERVER $PORT +# ROLE: the role of the running party +# SERVER: the address of the party discovering service +# PORT: the port of the party discovering service +# +# This tool will try to fill the above three argument to the test script, +# so that totally three processes running the script will be started, to +# simulate run of three party in a standalone machine. +# +# Usage of this script: +# +# bash run_standalone.sh TEST_SCRIPT_NAME +# + +# please set the following environment vars according in your environment +PYTHON=${PYTHON} +REDIS_HOME=${PATH_TO_REDIS_BIN} +SERVER=${LOCALHOST} +PORT=${REDIS_PORT} + +PROTOCOL="aby3" + +echo "redis home in ${REDIS_HOME}, server is ${SERVER}, port is ${PORT}, protocol is ${PROTOCOL}" +function usage() { + echo 'run_standalone.sh SCRIPT_NAME [ARG...]' + exit 0 +} + +if [ $# -lt 1 ]; then + usage +fi + +SCRIPT=$1 +if [ ! -f $SCRIPT ]; then + echo 'Could not find script of '$SCRIPT + exit 1 +fi + +REDIS_BIN=$REDIS_HOME/redis-cli +if [ ! -f $REDIS_BIN ]; then + echo 'Could not find redis cli in '$REDIS_HOME + exit 1 +fi + +# clear the redis cache +$REDIS_BIN -h $SERVER -p $PORT flushall + +# kick off script with roles of 1 and 2, and redirect output to /dev/null +for role in {1..2}; do + $PYTHON $SCRIPT $role $SERVER $PORT $PROTOCOL 2>&1 >/dev/null & +done + + +# for party of role 0, run in a foreground mode and show the output +$PYTHON $SCRIPT 0 $SERVER $PORT $PROTOCOL + diff --git a/python/paddle_fl/mpc/examples/linear_reg_with_uci/run_standalone_privc.sh b/python/paddle_fl/mpc/examples/linear_reg_with_uci/run_standalone_privc.sh new file mode 100755 index 00000000..78808f48 --- /dev/null +++ b/python/paddle_fl/mpc/examples/linear_reg_with_uci/run_standalone_privc.sh @@ -0,0 +1,72 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash +# +# A tools to faciliate the parallel running of fluid_encrypted test scrips. +# A test script is EXPECTED to accepted arguments in the following format: +# +# SCRIPT_NAME $ROLE $SERVER $PORT +# ROLE: the role of the running party +# SERVER: the address of the party discovering service +# PORT: the port of the party discovering service +# +# This tool will try to fill the above three argument to the test script, +# so that totally three processes running the script will be started, to +# simulate run of three party in a standalone machine. +# +# Usage of this script: +# +# bash run_standalone.sh TEST_SCRIPT_NAME +# + +# please set the following environment vars according in your environment +PYTHON=${PYTHON} +REDIS_HOME=${PATH_TO_REDIS_BIN} +SERVER=${LOCALHOST} +PORT=${REDIS_PORT} + +PROTOCOL="privc" + +echo "redis home in ${REDIS_HOME}, server is ${SERVER}, port is ${PORT}, protocol is ${PROTOCOL}" +function usage() { + echo 'run_standalone.sh SCRIPT_NAME [ARG...]' + exit 0 +} + +if [ $# -lt 1 ]; then + usage +fi + +SCRIPT=$1 +if [ ! -f $SCRIPT ]; then + echo 'Could not find script of '$SCRIPT + exit 1 +fi + +REDIS_BIN=$REDIS_HOME/redis-cli +if [ ! -f $REDIS_BIN ]; then + echo 'Could not find redis cli in '$REDIS_HOME + exit 1 +fi + +# clear the redis cache +$REDIS_BIN -h $SERVER -p $PORT flushall + +# kick off script with roles of 1, and redirect output to log.txt +$PYTHON $SCRIPT 1 $SERVER $PORT $PROTOCOL > log.txt 2>&1 & + +# for party of role 0, run in a foreground mode and show the output +$PYTHON $SCRIPT 0 $SERVER $PORT $PROTOCOL + diff --git a/python/paddle_fl/mpc/examples/linear_reg_with_uci/uci_demo.py b/python/paddle_fl/mpc/examples/linear_reg_with_uci/uci_demo.py index e68656ca..2fc53921 100644 --- a/python/paddle_fl/mpc/examples/linear_reg_with_uci/uci_demo.py +++ b/python/paddle_fl/mpc/examples/linear_reg_with_uci/uci_demo.py @@ -43,14 +43,14 @@ mpc_data_dir = "./mpc_data/" # generate share online -feature_reader, label_reader = process_data.generate_encrypted_data_online(role, server, port) +#feature_reader, label_reader = process_data.generate_encrypted_data_online(role, server, port) -""" -# load shares from file + +#load shares from file feature_reader = mpc_du.load_shares( mpc_data_dir + "house_feature", id=role, shape=(13, )) label_reader = mpc_du.load_shares(mpc_data_dir + "house_label", id=role, shape=(1, )) -""" + batch_feature = mpc_du.batch(feature_reader, BATCH_SIZE, drop_last=True) batch_label = mpc_du.batch(label_reader, BATCH_SIZE, drop_last=True) @@ -113,7 +113,7 @@ #print('Epoch={}, Step={}, batch_cost={:.4f} s, Loss={},'.format( # epoch_id, step, (step_end - step_start), mpc_loss)) with open(loss_file, 'ab') as f: - f.write(np.array(mpc_loss).tostring()) + f.write(np.array(mpc_loss).tobytes()) step += 1 end_time = time.time() @@ -126,6 +126,6 @@ feed=sample, fetch_list=[y_pre]) with open(prediction_file, 'ab') as f: - f.write(np.array(prediction).tostring()) - print("revealed result: {}".format(process_data.decrypt_online(prediction, (2, BATCH_SIZE)))) - break + f.write(np.array(prediction).tobytes()) + #print("revealed result: {}".format(process_data.decrypt_online(prediction, (2, BATCH_SIZE)))) + #break From f39f5af372d7bfd07504f7bd5afba7e587d34a5c Mon Sep 17 00:00:00 2001 From: gaina <74752800+gaina99@users.noreply.github.com> Date: Mon, 30 Aug 2021 14:59:18 +0800 Subject: [PATCH 5/7] Delete run_standalone.sh --- .../linear_reg_with_uci/run_standalone.sh | 74 ------------------- 1 file changed, 74 deletions(-) delete mode 100755 python/paddle_fl/mpc/examples/linear_reg_with_uci/run_standalone.sh diff --git a/python/paddle_fl/mpc/examples/linear_reg_with_uci/run_standalone.sh b/python/paddle_fl/mpc/examples/linear_reg_with_uci/run_standalone.sh deleted file mode 100755 index 171533d5..00000000 --- a/python/paddle_fl/mpc/examples/linear_reg_with_uci/run_standalone.sh +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#!/bin/bash -# -# A tools to faciliate the parallel running of fluid_encrypted test scrips. -# A test script is EXPECTED to accepted arguments in the following format: -# -# SCRIPT_NAME $ROLE $SERVER $PORT -# ROLE: the role of the running party -# SERVER: the address of the party discovering service -# PORT: the port of the party discovering service -# -# This tool will try to fill the above three argument to the test script, -# so that totally three processes running the script will be started, to -# simulate run of three party in a standalone machine. -# -# Usage of this script: -# -# bash run_standalone.sh TEST_SCRIPT_NAME -# - -# please set the following environment vars according in your environment -PYTHON=${PYTHON} -REDIS_HOME=${PATH_TO_REDIS_BIN} -SERVER=${LOCALHOST} -PORT=${REDIS_PORT} - -PROTOCOL=$2 - -echo "redis home in ${REDIS_HOME}, server is ${SERVER}, port is ${PORT}, protocol is ${PROTOCOL}" -function usage() { - echo 'run_standalone.sh SCRIPT_NAME [ARG...]' - exit 0 -} - -if [ $# -lt 1 ]; then - usage -fi - -SCRIPT=$1 -if [ ! -f $SCRIPT ]; then - echo 'Could not find script of '$SCRIPT - exit 1 -fi - -REDIS_BIN=$REDIS_HOME/redis-cli -if [ ! -f $REDIS_BIN ]; then - echo 'Could not find redis cli in '$REDIS_HOME - exit 1 -fi - -# clear the redis cache -$REDIS_BIN -h $SERVER -p $PORT flushall - -# kick off script with roles of 1 and 2, and redirect output to /dev/null -for role in {1..2}; do - $PYTHON $SCRIPT $role $SERVER $PORT $PROTOCOL 2>&1 >/dev/null & -done - -# for party of role 0, run in a foreground mode and show the output -$PYTHON $SCRIPT 0 $SERVER $PORT $PROTOCOL - From 415eff41a9cdc4bd6abee62be73080013007921a Mon Sep 17 00:00:00 2001 From: gaina Date: Mon, 30 Aug 2021 16:30:15 +0800 Subject: [PATCH 6/7] logistic with privc --- .../examples/logistic_with_mnist/README.md | 21 ++++-- .../examples/logistic_with_mnist/README_CN.md | 23 +++--- .../logistic_with_mnist/decrypt_save.py | 9 ++- .../logistic_with_mnist/process_data.py | 38 ++++++++-- .../run_standalone_aby3.sh | 74 +++++++++++++++++++ .../run_standalone_privc.sh | 72 ++++++++++++++++++ .../logistic_with_mnist/train_fc_sigmoid.py | 8 +- .../logistic_with_mnist/train_fc_softmax.py | 4 +- 8 files changed, 217 insertions(+), 32 deletions(-) create mode 100755 python/paddle_fl/mpc/examples/logistic_with_mnist/run_standalone_aby3.sh create mode 100755 python/paddle_fl/mpc/examples/logistic_with_mnist/run_standalone_privc.sh diff --git a/python/paddle_fl/mpc/examples/logistic_with_mnist/README.md b/python/paddle_fl/mpc/examples/logistic_with_mnist/README.md index dae0d4ee..1f5336df 100644 --- a/python/paddle_fl/mpc/examples/logistic_with_mnist/README.md +++ b/python/paddle_fl/mpc/examples/logistic_with_mnist/README.md @@ -8,7 +8,7 @@ This document introduces how to run MNIST demo based on Paddle-MPC, which has tw #### (1). Prepare Data -Generate encrypted training and testing data utilizing `generate_encrypted_data()` and `generate_encrypted_test_data()` in `process_data.py` script. Users can run the script with command `python process_data.py` to generate encrypted feature and label in given directory, e.g., `./mpc_data/`. Users can specify `class_num` (2 or 10) to determine the encrypted data is for `logisticfc_sigmoid`(two classes) or `lenet` and `logistic_fc_softmax`(10 classes) network. Different suffix names are used for these files to indicate the ownership of different computation parties. For instance, a file named `mnist2_feature.part0` means it is a feature file of party 0. +Generate encrypted training and testing data utilizing `generate_encrypted_data()` and `generate_encrypted_test_data()` in `process_data.py` script. Users can run the script with command `python3 process_data.py aby3` or `python3 process_data.py privc` to generate encrypted feature and label in given directory, e.g., `./mpc_data/`. Users can specify `class_num` (2 or 10) to determine the encrypted data is for `logisticfc_sigmoid`(two classes) or `lenet` and `logistic_fc_softmax`(10 classes) network. Different suffix names are used for these files to indicate the ownership of different computation parties. For instance, a file named `mnist2_feature.part0` means it is a feature file of party 0. #### (2). Launch Demo with A Shell Script @@ -21,10 +21,13 @@ export LOCALHOST=/your/localhost export REDIS_PORT=/your/redis/port ``` -Launch demo with the `run_standalone.sh` script. The concrete command is: +Launch demo with the `run_standalone_**.sh` script. The concrete command is: ```bash -bash run_standalone.sh train_fc_sigmoid.py +`if aby3` +bash run_standalone_aby3.sh train_fc_sigmoid.py +`if privc` +bash run_standalone_privc.sh train_fc_sigmoid.py ``` The information of current epoch and step will be displayed on screen while training, as well as the total cost time when traning finished. @@ -33,14 +36,15 @@ Besides, predictions would be made in this demo once training is finished. The p #### (3). Decrypt Data -Decrypt the saved prediction data and save the decrypted prediction results into a specified file using `decrypt_data_to_file()` in `process_data.py` script. For example, users can write the following code into a python script named `decrypt_save.py`, and then run the script with command `python decrypt_save.py decrypt_file`. The decrypted prediction results would be saved into `decrypt_file`. +Decrypt the saved prediction data and save the decrypted prediction results into a specified file using `decrypt_data_to_file()` in `process_data.py` script. For example, users can write the following code into a python script named `decrypt_save.py`, and then run the script with command `python3 decrypt_save.py decrypt_file aby3` or `python3 decrypt_save.py decrypt_file privc`. The decrypted prediction results would be saved into `decrypt_file`. ```python import sys decrypt_file=sys.argv[1] import process_data -process_data.decrypt_data_to_file("/tmp/mnist_output_prediction", (BATCH_SIZE,), decrypt_file) +process_data.protocol=sys.argv[2] +process_data.decrypt_data_to_file("./mpc_infer_data/mnist_output_prediction", (BATCH_SIZE,), decrypt_file) ``` @@ -73,22 +77,23 @@ $REDIS_BIN -h $SERVER -p $PORT flushall Launch demo on each computation party with the following command, ``` -$PYTHON_EXECUTABLE train_fc_sigmoid.py $PARTY_ID $SERVER $PORT +$PYTHON_EXECUTABLE train_fc_sigmoid.py $PARTY_ID $SERVER $PORT $PROTOCOL ``` -where PYTHON_EXECUTABLE is the python which installs PaddleFL, PARTY_ID is the ID of computation party, which is 0, 1, or 2, SERVER and PORT represent the IP and port of Redis server respectively. +where PYTHON_EXECUTABLE is the python which installs PaddleFL, PARTY_ID is the ID of computation party, which is 0, 1, or 2, SERVER and PORT represent the IP and port of Redis server respectively, PROTOCOL is the MPC protocol that users choose. Similarly, predictions with cypher text format would be saved in `./mpc_infer_data/` directory, for example, a file named `mnist_output_prediction.part0` for party 0. #### (5). Decrypt Prediction Data -Each computation party sends `mnist_output_prediction.part` file in `./mpc_infer_data/` directory to the `./mpc_infer_data/` directory of data owner. Data owner decrypts the prediction data and saves the decrypted prediction results into a specified file using `decrypt_data_to_file()` in `process_data.py` script. For example, users can write the following code into a python script named `decrypt_save.py`, and then run the script with command `python decrypt_save.py decrypt_file`. The decrypted prediction results would be saved into file `decrypt_file`. +Each computation party sends `mnist_output_prediction.part` file in `./mpc_infer_data/` directory to the `./mpc_infer_data/` directory of data owner. Data owner decrypts the prediction data and saves the decrypted prediction results into a specified file using `decrypt_data_to_file()` in `process_data.py` script. For example, users can write the following code into a python script named `decrypt_save.py`, and then run the script with command `python3 decrypt_save.py decrypt_file aby3` or `python3 decrypt_save.py decrypt_file privc`. The decrypted prediction results would be saved into file `decrypt_file`. ```python import sys decrypt_file=sys.argv[1] import process_data +process_data.protocol=sys.argv[2] process_data.decrypt_data_to_file("./mpc_infer_data/mnist_output_prediction", (BATCH_SIZE,), decrypt_file) ``` diff --git a/python/paddle_fl/mpc/examples/logistic_with_mnist/README_CN.md b/python/paddle_fl/mpc/examples/logistic_with_mnist/README_CN.md index 33ebcfc7..8efa241e 100644 --- a/python/paddle_fl/mpc/examples/logistic_with_mnist/README_CN.md +++ b/python/paddle_fl/mpc/examples/logistic_with_mnist/README_CN.md @@ -8,7 +8,7 @@ #### 1. 准备数据 -使用`process_data.py`脚本中的`generate_encrypted_data()`和`generate_encrypted_test_data()`产生加密训练数据和测试数据,用户可以直接运行脚本`python process_data.py`在指定的目录下(比如`./mpc_data/`)产生加密特征和标签。用户可以通过参数`class_num`指定label的类别数目,从而产生适用于`logistic_fc_sigmoid`(二分类)或`lenet``logistic_fc_softmax`(十分类)网络的加密数据。在指定目录下生成对应于3个计算party的feature和label的加密数据文件,以后缀名区分属于不同party的数据。比如,`mnist2_feature.part0`表示属于party0的feature数据。 +使用`process_data.py`脚本中的`generate_encrypted_data()`和`generate_encrypted_test_data()`产生加密训练数据和测试数据,用户可以直接运行脚本`python3 process_data.py aby3`或`python3 process_data.py privc`在指定的目录下(比如`./mpc_data/`)产生加密特征和标签。用户可以通过参数`class_num`指定label的类别数目,从而产生适用于`logistic_fc_sigmoid`(二分类)或`lenet``logistic_fc_softmax`(十分类)网络的加密数据。在指定目录下生成对应于3个(ABY3)/2个(PrivC)计算party的feature和label的加密数据文件,以后缀名区分属于不同party的数据。比如,`mnist2_feature.part0`表示属于party0的feature数据。 #### 2. 使用shell脚本启动demo @@ -21,10 +21,13 @@ export LOCALHOST=/your/localhost export REDIS_PORT=/your/redis/port ``` -然后使用`run_standalone.sh`脚本,启动并运行demo,命令如下: +然后使用`run_standalone_**.sh`脚本,启动并运行demo,命令如下: ```bash  -bash run_standalone.sh train_fc_sigmoid.py +`如果采用ABY3` +bash run_standalone_aby3.sh train_fc_sigmoid.py +`如果采用PrivC` +bash run_standalone_privc.sh train_fc_sigmoid.py ``` 运行之后将在屏幕上打印训练过程中所处的epoch和step,并在完成训练后打印出训练花费的时间。 @@ -33,13 +36,14 @@ bash run_standalone.sh train_fc_sigmoid.py #### 3. 解密数据 -使用`process_data.py`脚本中的`decrypt_data_to_file()`,将保存的密文预测结果进行解密,并且将解密得到的明文预测结果保存到指定文件中。例如,将下面的内容写到一个`decrypt_save.py`脚本中,然后`python decrypt_save.py decrypt_file`,将把明文预测结果保存在`decrypt_file`文件中。 +使用`process_data.py`脚本中的`decrypt_data_to_file()`,将保存的密文预测结果进行解密,并且将解密得到的明文预测结果保存到指定文件中。例如,将下面的内容写到一个`decrypt_save.py`脚本中,然后`python3 decrypt_save.py decrypt_file aby3`或者`python3 decrypt_save.py decrypt_file privc`,将把明文预测结果保存在`decrypt_file`文件中。 ```python import sys -decrypt_file=sys.argv[1] +decrypt_file = sys.argv[1] import process_data +process_data.protocol = sys.argv[2] process_data.decrypt_data_to_file("./mpc_infer_data/mnist_output_prediction", (BATCH_SIZE,), decrypt_file) ``` @@ -75,22 +79,23 @@ $REDIS_BIN -h $SERVER -p $PORT flushall 在各计算party分别执行以下命令,启动demo: ``` -$PYTHON_EXECUTABLE train_fc_sigmoid.py $PARTY_ID $SERVER $PORT +$PYTHON_EXECUTABLE train_fc_sigmoid.py $PARTY_ID $SERVER $PORT $PROTOCOL ``` -其中,PYTHON_EXECUTABLE表示自己安装了PaddleFL的python,PARTY_ID表示计算party的编号,值为0、1或2,SERVER和PORT分别表示redis server的IP地址和端口号。 +其中,PYTHON_EXECUTABLE表示自己安装了PaddleFL的python,PARTY_ID表示计算party的编号,值为0、1或2,SERVER和PORT分别表示redis server的IP地址和端口号, PROTOCOL表示采用的MPC协议。 同样地,密文prediction数据将会保存到`./mpc_infer_data/`目录下的文件中。比如,在party0中将保存为文件`mnist_output_prediction.part0`. #### 5. 解密预测数据 -各计算party将`./mpc_infer_data/`目录下的`mnist_output_prediction.part`文件发送到数据方的`./mpc_infer_data/`目录下。数据方使用`process_data.py`脚本中的`decrypt_data_to_file()`,将密文预测结果进行解密,并且将解密得到的明文预测结果保存到指定文件中。例如,将下面的内容写到一个`decrypt_save.py`脚本中,然后`python decrypt_save.py decrypt_file`,将把明文预测结果保存在`decrypt_file`文件中。 +各计算party将`./mpc_infer_data/`目录下的`mnist_output_prediction.part`文件发送到数据方的`./mpc_infer_data/`目录下。数据方使用`process_data.py`脚本中的`decrypt_data_to_file()`,将密文预测结果进行解密,并且将解密得到的明文预测结果保存到指定文件中。例如,将下面的内容写到一个`decrypt_save.py`脚本中,然后`python3 decrypt_save.py decrypt_file aby3` 或者`python3 decrypt_save.py decrypt_file privc`,将把明文预测结果保存在`decrypt_file`文件中。 ```python import sys -decrypt_file=sys.argv[1] +decrypt_file = sys.argv[1] import process_data +process_data.protocol = sys.argv[2] process_data.decrypt_data_to_file("./mpc_infer_data/mnist_output_prediction", (BATCH_SIZE,), decrypt_file) ``` diff --git a/python/paddle_fl/mpc/examples/logistic_with_mnist/decrypt_save.py b/python/paddle_fl/mpc/examples/logistic_with_mnist/decrypt_save.py index ac50b8b5..27afb786 100644 --- a/python/paddle_fl/mpc/examples/logistic_with_mnist/decrypt_save.py +++ b/python/paddle_fl/mpc/examples/logistic_with_mnist/decrypt_save.py @@ -16,7 +16,8 @@ """ import sys import os -from process_data import decrypt_data_to_file +import process_data + decrypt_file=sys.argv[1] BATCH_SIZE=128 @@ -25,13 +26,15 @@ mpc_infer_data_dir = "./mpc_infer_data/" prediction_file = mpc_infer_data_dir + "mnist_debug_prediction" +process_data.protocol = sys.argv[2] + if os.path.exists(decrypt_file): os.remove(decrypt_file) if class_num == 2: - decrypt_data_to_file(prediction_file, (BATCH_SIZE,), decrypt_file) + process_data.decrypt_data_to_file(prediction_file, (BATCH_SIZE,), decrypt_file) elif class_num == 10: - decrypt_data_to_file(prediction_file, (BATCH_SIZE, 10), decrypt_file) + process_data.decrypt_data_to_file(prediction_file, (BATCH_SIZE, 10), decrypt_file) else: raise ValueError("class_num should be 2 or 10, but received {}.".format(class_num)) diff --git a/python/paddle_fl/mpc/examples/logistic_with_mnist/process_data.py b/python/paddle_fl/mpc/examples/logistic_with_mnist/process_data.py index ca29e2b6..92fd256e 100644 --- a/python/paddle_fl/mpc/examples/logistic_with_mnist/process_data.py +++ b/python/paddle_fl/mpc/examples/logistic_with_mnist/process_data.py @@ -19,6 +19,7 @@ import logging import numpy as np import six +import sys import paddle from paddle_fl.mpc.data_utils.data_utils import get_datautils @@ -27,8 +28,8 @@ logger = logging.getLogger("fluid") logger.setLevel(logging.INFO) - -mpc_du = get_datautils('aby3') +protocol = "aby3" +mpc_du = get_datautils(protocol) sample_reader = paddle.dataset.mnist.train() test_reader = paddle.dataset.mnist.test() @@ -37,7 +38,8 @@ def generate_encrypted_train_data(mpc_data_dir, class_num): """ generate encrypted samples """ - + global protocol + mpc_du = get_datautils(protocol) def encrypted_mnist_features(): """ feature reader @@ -66,7 +68,8 @@ def generate_encrypted_test_data(mpc_data_dir, class_num, label_mnist_filepath): """ generate encrypted samples """ - + global protocol + mpc_du = get_datautils(protocol) def encrypted_mnist_features(): """ feature reader @@ -99,6 +102,8 @@ def load_decrypt_data(filepath, shape): """ load the encrypted data and reconstruct """ + global protocol + mpc_du = get_datautils(protocol) part_readers = [] for id in six.moves.range(3): part_readers.append(mpc_du.load_shares(filepath, id=id, shape=shape)) @@ -113,6 +118,8 @@ def load_decrypt_bs_data(filepath, shape): """ load the encrypted data and reconstruct """ + global protocol + mpc_du = get_datautils(protocol) part_readers = [] for id in six.moves.range(3): part_readers.append(mpc_du.load_shares(filepath, id=id, shape=shape)) @@ -128,12 +135,25 @@ def decrypt_data_to_file(filepath, shape, decrypted_filepath): """ load the encrypted data (arithmetic share) and reconstruct to a file """ + global protocol + mpc_du = get_datautils(protocol) if os.path.exists(decrypted_filepath): os.remove(decrypted_filepath) part_readers = [] - for id in six.moves.range(3): - part_readers.append(mpc_du.load_shares(filepath, id=id, shape=shape)) - mpc_share_reader = paddle.reader.compose(part_readers[0], part_readers[1], part_readers[2]) + + if protocol == "aby3": + for id in six.moves.range(3): + part_readers.append( + mpc_du.load_shares( + filepath, id=id, shape=shape)) + mpc_share_reader = paddle.reader.compose(part_readers[0], part_readers[1], + part_readers[2]) + elif protocol == "privc": + for id in six.moves.range(2): + part_readers.append( + mpc_du.load_shares( + filepath, id=id, shape=shape)) + mpc_share_reader = paddle.reader.compose(part_readers[0], part_readers[1]) for instance in mpc_share_reader(): p = mpc_du.reconstruct(np.array(instance)) @@ -146,6 +166,8 @@ def decrypt_bs_data_to_file(filepath, shape, decrypted_filepath): """ load the encrypted data (boolean share) and reconstruct to a file """ + global protocol + mpc_du = get_datautils(protocol) if os.path.exists(decrypted_filepath): os.remove(decrypted_filepath) part_readers = [] @@ -164,6 +186,8 @@ def decrypt_bs_data_to_file(filepath, shape, decrypted_filepath): if __name__ == '__main__': mpc_data_dir = './mpc_data/' label_mnist_filepath = mpc_data_dir + "label_mnist" + protocol = sys.argv[1] + mpc_du = get_datautils(protocol) if not os.path.exists(mpc_data_dir): os.mkdir(mpc_data_dir) if os.path.exists(label_mnist_filepath): diff --git a/python/paddle_fl/mpc/examples/logistic_with_mnist/run_standalone_aby3.sh b/python/paddle_fl/mpc/examples/logistic_with_mnist/run_standalone_aby3.sh new file mode 100755 index 00000000..f47fb38c --- /dev/null +++ b/python/paddle_fl/mpc/examples/logistic_with_mnist/run_standalone_aby3.sh @@ -0,0 +1,74 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash +# +# A tools to faciliate the parallel running of fluid_encrypted test scrips. +# A test script is EXPECTED to accepted arguments in the following format: +# +# SCRIPT_NAME $ROLE $SERVER $PORT +# ROLE: the role of the running party +# SERVER: the address of the party discovering service +# PORT: the port of the party discovering service +# +# This tool will try to fill the above three argument to the test script, +# so that totally three processes running the script will be started, to +# simulate run of three party in a standalone machine. +# +# Usage of this script: +# +# bash run_standalone.sh TEST_SCRIPT_NAME +# + +# please set the following environment vars according in your environment +PYTHON=${PYTHON} +REDIS_HOME=${PATH_TO_REDIS_BIN} +SERVER=${LOCALHOST} +PORT=${REDIS_PORT} + +PROTOCOL="aby3" + +echo "redis home in ${REDIS_HOME}, server is ${SERVER}, port is ${PORT}, protocol is ${PROTOCOL}" +function usage() { + echo 'run_standalone.sh SCRIPT_NAME [ARG...]' + exit 0 +} + +if [ $# -lt 1 ]; then + usage +fi + +SCRIPT=$1 +if [ ! -f $SCRIPT ]; then + echo 'Could not find script of '$SCRIPT + exit 1 +fi + +REDIS_BIN=$REDIS_HOME/redis-cli +if [ ! -f $REDIS_BIN ]; then + echo 'Could not find redis cli in '$REDIS_HOME + exit 1 +fi + +# clear the redis cache +$REDIS_BIN -h $SERVER -p $PORT flushall + +# kick off script with roles of 1 and 2, and redirect output to /dev/null +for role in {1..2}; do + $PYTHON $SCRIPT $role $SERVER $PORT $PROTOCOL 2>&1 >/dev/null & +done + +# for party of role 0, run in a foreground mode and show the output +$PYTHON $SCRIPT 0 $SERVER $PORT $PROTOCOL + diff --git a/python/paddle_fl/mpc/examples/logistic_with_mnist/run_standalone_privc.sh b/python/paddle_fl/mpc/examples/logistic_with_mnist/run_standalone_privc.sh new file mode 100755 index 00000000..78808f48 --- /dev/null +++ b/python/paddle_fl/mpc/examples/logistic_with_mnist/run_standalone_privc.sh @@ -0,0 +1,72 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash +# +# A tools to faciliate the parallel running of fluid_encrypted test scrips. +# A test script is EXPECTED to accepted arguments in the following format: +# +# SCRIPT_NAME $ROLE $SERVER $PORT +# ROLE: the role of the running party +# SERVER: the address of the party discovering service +# PORT: the port of the party discovering service +# +# This tool will try to fill the above three argument to the test script, +# so that totally three processes running the script will be started, to +# simulate run of three party in a standalone machine. +# +# Usage of this script: +# +# bash run_standalone.sh TEST_SCRIPT_NAME +# + +# please set the following environment vars according in your environment +PYTHON=${PYTHON} +REDIS_HOME=${PATH_TO_REDIS_BIN} +SERVER=${LOCALHOST} +PORT=${REDIS_PORT} + +PROTOCOL="privc" + +echo "redis home in ${REDIS_HOME}, server is ${SERVER}, port is ${PORT}, protocol is ${PROTOCOL}" +function usage() { + echo 'run_standalone.sh SCRIPT_NAME [ARG...]' + exit 0 +} + +if [ $# -lt 1 ]; then + usage +fi + +SCRIPT=$1 +if [ ! -f $SCRIPT ]; then + echo 'Could not find script of '$SCRIPT + exit 1 +fi + +REDIS_BIN=$REDIS_HOME/redis-cli +if [ ! -f $REDIS_BIN ]; then + echo 'Could not find redis cli in '$REDIS_HOME + exit 1 +fi + +# clear the redis cache +$REDIS_BIN -h $SERVER -p $PORT flushall + +# kick off script with roles of 1, and redirect output to log.txt +$PYTHON $SCRIPT 1 $SERVER $PORT $PROTOCOL > log.txt 2>&1 & + +# for party of role 0, run in a foreground mode and show the output +$PYTHON $SCRIPT 0 $SERVER $PORT $PROTOCOL + diff --git a/python/paddle_fl/mpc/examples/logistic_with_mnist/train_fc_sigmoid.py b/python/paddle_fl/mpc/examples/logistic_with_mnist/train_fc_sigmoid.py index 43093973..476cb47a 100644 --- a/python/paddle_fl/mpc/examples/logistic_with_mnist/train_fc_sigmoid.py +++ b/python/paddle_fl/mpc/examples/logistic_with_mnist/train_fc_sigmoid.py @@ -27,9 +27,11 @@ import paddle_fl.mpc as pfl_mpc from paddle_fl.mpc.data_utils.data_utils import get_datautils -mpc_protocol_name = 'aby3' -mpc_du= get_datautils(mpc_protocol_name) + role, server, port = sys.argv[1], sys.argv[2], sys.argv[3] +mpc_protocol_name = sys.argv[4] +mpc_du = get_datautils(mpc_protocol_name) + # modify host(localhost). pfl_mpc.init(mpc_protocol_name, int(role), "localhost", server, int(port)) role = int(role) @@ -116,4 +118,4 @@ for sample in test_loader(): prediction = exe.run(program=infer_program, feed=sample, fetch_list=[cost]) with open(prediction_file, 'ab') as f: - f.write(np.array(prediction).tostring()) + f.write(np.array(prediction).tobytes()) diff --git a/python/paddle_fl/mpc/examples/logistic_with_mnist/train_fc_softmax.py b/python/paddle_fl/mpc/examples/logistic_with_mnist/train_fc_softmax.py index 59362ce4..23660351 100644 --- a/python/paddle_fl/mpc/examples/logistic_with_mnist/train_fc_softmax.py +++ b/python/paddle_fl/mpc/examples/logistic_with_mnist/train_fc_softmax.py @@ -35,7 +35,7 @@ logger = logging.getLogger("fluid") logger.setLevel(logging.INFO) -mpc_protocol_name = 'aby3' +mpc_protocol_name = sys.argv[4] mpc_du = get_datautils(mpc_protocol_name) role, server, port = sys.argv[1], sys.argv[2], sys.argv[3] @@ -115,7 +115,7 @@ def infer(): step += 1 prediction = exe.run(program=infer_program, feed=sample, fetch_list=[softmax]) with open(prediction_file_part, 'ab') as f: - f.write(np.array(prediction).tostring()) + f.write(np.array(prediction).tobytes()) if step % 10 == 0: end_time = time.time() logger.info('MPC infer of step={}, cost time in seconds:{}'.format(step, (end_time - start_time))) From a1c8d26187bb5c50cfa824fcb360666e16763481 Mon Sep 17 00:00:00 2001 From: gaina <74752800+gaina99@users.noreply.github.com> Date: Mon, 30 Aug 2021 16:38:40 +0800 Subject: [PATCH 7/7] Update README_CN.md --- python/paddle_fl/mpc/examples/linear_reg_with_uci/README_CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle_fl/mpc/examples/linear_reg_with_uci/README_CN.md b/python/paddle_fl/mpc/examples/linear_reg_with_uci/README_CN.md index 032fa802..90510189 100644 --- a/python/paddle_fl/mpc/examples/linear_reg_with_uci/README_CN.md +++ b/python/paddle_fl/mpc/examples/linear_reg_with_uci/README_CN.md @@ -46,7 +46,7 @@ bash run_standalone_privc.sh uci_demo.py #### 3. 解密数据 最后,demo会使用`process_data.py`脚本中的`load_decrypt_data()`,恢复并打印出明文的loss数据和prediction结果,用以和明文Paddle模型结果进行对比。 -例如,将下面的内容写到一个decrypt_save.py脚本中,然后python3 decrypt_save.py decrypt_loss_file decrypt_prediction_file aby3 或者 python3 decrypt_save.py decrypt_loss_file decrypt_prediction_file privc,将把明文losss数据和预测结果分别保存在文件中。 +例如,将下面的内容写到一个decrypt_save.py脚本中,然后python3 decrypt_save.py decrypt_loss_file decrypt_prediction_file aby3 或者 python3 decrypt_save.py decrypt_loss_file decrypt_prediction_file privc,将把明文loss数据和预测结果分别保存在文件中。 ```python import sys