3434/*
3535 This example demonstrates how to set up a beacon on an encrypted attribute,
3636 put an item with the beacon, and query against that beacon.
37- This example follows a use case of a database that stores customer location data .
37+ This example follows a use case of a database that stores unit inspection information .
3838
39- Running this example requires access to a DDB table with the
40- following primary key configuration:
41- - Partition key is named "customer_id " with type (S)
42- - Sort key is named "create_time " with type (S)
43- This table must have a Global Secondary Index (GSI) configured named "state-zip -index":
44- - Partition key is named "aws_dbe_b_state " with type (S)
45- - Sort key is named "aws_dbe_b_zip " with type (S)
39+ Running this example requires access to a DDB table with the
40+ following key configuration:
41+ - Partition key is named "work_id " with type (S)
42+ - Sort key is named "inspection_date " with type (S)
43+ This table must have a Global Secondary Index (GSI) configured named "last4-unit -index":
44+ - Partition key is named "aws_dbe_b_inspector_id_last4 " with type (S)
45+ - Sort key is named "aws_dbe_b_unit " with type (S)
4646
47- In this example for storing customer location data, this schema is utilized for the data:
48- - "customer_id" stores a unique customer identifier
49- - "create_time" stores a Unix timestamp
50- - "state" stores an encrypted 2-letter US state or territory abbreviation
51- (https://www.faa.gov/air_traffic/publications/atpubs/cnt_html/appendix_a.html)
52- - "zip" stores an encrypted 5-digit US zipcode (00000 - 99999)
47+ In this example for storing unit inspection information, this schema is utilized for the data:
48+ - "work_id" stores a unique identifier for a unit inspection work order (v4 UUID)
49+ - "inspection_date" stores an ISO 8601 date for the inspection (YYYY-MM-DD)
50+ - "inspector_id_last4" stores the last 4 digits of the ID of the inspector performing the work
51+ - "unit" stores a 12-digit serial number for the unit being inspected
5352
5453 The example requires the following ordered input command line parameters:
5554 1. DDB table name for table to put/query data from
@@ -61,10 +60,9 @@ This table must have a Global Secondary Index (GSI) configured named "state-zip-
6160 */
6261
6362public class BasicSearchableEncryptionExample {
64-
65- static String GSI_NAME = "state-zip-index" ;
66-
67- public static void PutItemQueryItemWithBeacon (String ddbTableName , String branchKeyId , String branchKeyWrappingKmsKeyArn , String branchKeyDdbTableName ) {
63+ static String GSI_NAME = "last4-unit-index" ;
64+ public static void PutItemQueryItemWithBeacon (String ddbTableName , String branchKeyId ,
65+ String branchKeyWrappingKmsKeyArn , String branchKeyDdbTableName ) {
6866
6967 // 1. Configure Beacons.
7068 // The beacon name must be the name of a table attribute that will be encrypted.
@@ -73,56 +71,90 @@ public static void PutItemQueryItemWithBeacon(String ddbTableName, String branch
7371 // https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/choosing-beacon-length.html
7472 List <StandardBeacon > standardBeaconList = new ArrayList <>();
7573
76- // The configured DDB table has a GSI on the `aws_dbe_b_state ` AttributeName
77- // Since this field is assumed to hold a well-distributed US 2-letter state abbreviation
78- // (56 = 50 states + 6 territories),
79- // we follow the guidance in the link above to determine acceptable bounds for beacon length:
80- // - min: log(sqrt(56))/log(2) ~= 2.9, round up to 3
81- // - max: log((56/2))/log(2) ~= 4.8, round up to 5
82- // We can safely choose a beacon length between 3 and 5:
83- // - Closer to 3, the underlying data is better obfuscated, but more "false positives" are returned in
84- // queries, leading to more decrypt calls and worse performance
85- // - Closer to 5, fewer "false positives" are returned in queries, leading to fewer decrypt calls and
86- // better performance, but it is easier to distinguish unique plaintext values
87- // As an example, we will choose 4 .
88- // Values stored in aws_dbe_b_state will be 4 bits long (0x0 - 0xf)
89- // There will be 2^4 = 16 possible HMAC values.
90- // With well-distributed plaintext data (56 values), we expect (56/16) = 3.5 abbrevations sharing the same beacon
91- // value.
92- // NOTE: This example assumes that the field values are well-distributed. In practice, this will not be true.
93- // Some flaws in this assumption:
94- // - More populous states would be expected to have more records; those beacons will be overused
95- // - States where a business is not operating would expect no customer records for that state; those
96- // beacons will be underused
97- // This is a streamlined example and should not be used as a basis for determining beacon length
98- // in production. Users should analyze their specific dataset to determine acceptable beacon length bounds.
99- StandardBeacon stringBeacon = StandardBeacon . builder ()
100- . name ( "state" )
101- . length ( 4 )
102- . build ();
103- standardBeaconList . add ( stringBeacon );
104-
105- // The configured DDB table has a GSI on the `aws_dbe_b_zip` AttributeName
106- // Since this field holds a well-distributed zipcode (100,000 possible values, of which ~42,000 are valid;
107- // see: https://facts.usps.com/42000-zip-codes/) ,
108- // we follow the guidance in the link above to determine acceptable bounds for beacon length:
109- // - min: log(sqrt(42,000))/log(2) ~= 7.7, round up to 8
110- // - max: log((42,000/2))/log(2) ~= 14.3, round up to 15
111- // We can safely choose a beacon length between 8 and 15:
112- // - Closer to 8, the underlying data is better obfuscated, but more "false positives" are returned in
113- // queries, leading to more decrypt calls and worse performance
114- // - Closer to 15, fewer "false positives" are returned in queries, leading to fewer decrypt calls and
115- // better performance, but it is easier to distinguish unique plaintext values
74+ // The configured DDB table has a GSI on the `aws_dbe_b_inspector_id_last4 ` AttributeName.
75+ // This field holds the last 4 digits of an inspector ID.
76+ // For our example, this field may range from 0 to 9,999 (10,000 possible values).
77+ // For our example, we assume a full inspector ID is an integer
78+ // ranging from 0 to 99,999,999. We do not assume that the full inspector ID's
79+ // values are uniformly distributed across its range of possible values.
80+ // In many use cases, the prefix of an identifier encodes some information
81+ // about that identifier (e.g. zipcode and SSN prefixes encode geographic
82+ // information), while the suffix does not and is more uniformly distributed.
83+ // We will assume that the inspector ID field matches a similar use case.
84+ // So for this example, we only store and use the last
85+ // 4 digits of the inspector ID, which we assume is uniformly distributed .
86+ // Since the full ID's range is divisible by the range of the last 4 digits,
87+ // then the last 4 digits of the inspector ID are uniformly distributed
88+ // over the range from 0 to 9,999.
89+ // See our documentation for why you should avoid creating beacons over non-uniform distributions
90+ // https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/searchable-encryption.html#are-beacons-right-for-me
91+ // A single inspector ID suffix may be assigned to multiple `work_id`s.
92+ //
93+ // This link provides guidance for choosing a beacon length:
94+ // https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/choosing-beacon-length.html
95+ // We follow the guidance in the link above to determine reasonable bounds
96+ // for the length of a beacon on the last 4 digits of an inspector ID:
97+ // - min: log(sqrt(10,000))/log(2) ~= 6.6, round up to 7
98+ // - max: log((10,000/2))/log(2) ~= 12.3, round down to 12
99+ // You will somehow need to round results to a nearby integer.
100+ // We choose to round to the nearest integer; you might consider a different rounding approach.
101+ // Rounding up will return fewer expected "false positives" in queries,
102+ // leading to fewer decrypt calls and better performance,
103+ // but it is easier to identify which beacon values encode distinct plaintexts.
104+ // Rounding down will return more expected "false positives" in queries,
105+ // leading to more decrypt calls and worse performance ,
106+ // but it is harder to identify which beacon values encode distinct plaintexts.
107+ // We can choose a beacon length between 7 and 12:
108+ // - Closer to 7, we expect more "false positives" to be returned,
109+ // making it harder to identify which beacon values encode distinct plaintexts,
110+ // but leading to more decrypt calls and worse performance
111+ // - Closer to 12, we expect fewer "false positives" returned in queries,
112+ // leading to fewer decrypt calls and better performance,
113+ // but it is easier to identify which beacon values encode distinct plaintexts.
116114 // As an example, we will choose 10.
117- // Values stored in aws_dbe_b_zip will be 10 bits long (0x000 - 0x3ff).
118- // There will be 2^10 = 1024 possible HMAC values.
119- // With well-distributed plaintext data (100,000 values), we expect (42,000/1024) ~= 41 zipcodes sharing the same
120- // beacon value.
121- StandardBeacon numberBeacon = StandardBeacon .builder ()
122- .name ("zip" )
115+ //
116+ // Values stored in aws_dbe_b_inspector_id_last4 will be 10 bits long (0x000 - 0x3ff)
117+ // There will be 2^10 = 1,024 possible HMAC values.
118+ // With a sufficiently large number of well-distributed inspector IDs,
119+ // for a particular beacon we expect (10,000/1,024) ~= 9.8 4-digit inspector ID suffixes
120+ // sharing that beacon value.
121+ StandardBeacon last4Beacon = StandardBeacon .builder ()
122+ .name ("inspector_id_last4" )
123123 .length (10 )
124124 .build ();
125- standardBeaconList .add (numberBeacon );
125+ standardBeaconList .add (last4Beacon );
126+
127+ // The configured DDB table has a GSI on the `aws_dbe_b_unit` AttributeName.
128+ // This field holds a unit serial number.
129+ // For this example, this is a 12-digit integer from 0 to 999,999,999,999 (10^12 possible values).
130+ // We will assume values for this attribute are uniformly distributed across this range.
131+ // A single unit serial number may be assigned to multiple `work_id`s.
132+ //
133+ // This link provides guidance for choosing a beacon length:
134+ // https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/choosing-beacon-length.html
135+ // We follow the guidance in the link above to determine reasonable bounds
136+ // for the length of a beacon on a unit serial number:
137+ // - min: log(sqrt(999,999,999,999))/log(2) ~= 19.9, round up to 20
138+ // - max: log((999,999,999,999/2))/log(2) ~= 38.9, round up to 39
139+ // We can choose a beacon length between 20 and 39:
140+ // - Closer to 20, we expect more "false positives" to be returned,
141+ // making it harder to identify which beacon values encode distinct plaintexts,
142+ // but leading to more decrypt calls and worse performance
143+ // - Closer to 39, we expect fewer "false positives" returned in queries,
144+ // leading to fewer decrypt calls and better performance,
145+ // but it is easier to identify which beacon values encode distinct plaintexts.
146+ // As an example, we will choose 30.
147+ //
148+ // Values stored in aws_dbe_b_unit will be 30 bits long (0x00000000 - 0x3fffffff)
149+ // There will be 2^30 = 1,073,741,824 ~= 1.1B possible HMAC values.
150+ // With a sufficiently large number of well-distributed inspector IDs,
151+ // for a particular beacon we expect (10^12/2^30) ~= 931.3 unit serial numbers
152+ // sharing that beacon value.
153+ StandardBeacon unitBeacon = StandardBeacon .builder ()
154+ .name ("unit" )
155+ .length (30 )
156+ .build ();
157+ standardBeaconList .add (unitBeacon );
126158
127159 // 2. Configure Keystore.
128160 // The keystore is a separate DDB table where the client stores encryption and decryption materials.
@@ -196,18 +228,18 @@ public static void PutItemQueryItemWithBeacon(String ddbTableName, String branch
196228 // - DO_NOTHING: The attribute is not encrypted and not included in the signature
197229 // Any attributes that will be used in beacons must be configured as ENCRYPT_AND_SIGN.
198230 final Map <String , CryptoAction > attributeActionsOnEncrypt = new HashMap <>();
199- attributeActionsOnEncrypt .put ("customer_id " , CryptoAction .SIGN_ONLY ); // Our partition attribute must be SIGN_ONLY
200- attributeActionsOnEncrypt .put ("create_time " , CryptoAction .SIGN_ONLY ); // Our sort attribute must be SIGN_ONLY
201- attributeActionsOnEncrypt .put ("state " , CryptoAction .ENCRYPT_AND_SIGN ); // Beaconized attributes must be encrypted
202- attributeActionsOnEncrypt .put ("zip " , CryptoAction .ENCRYPT_AND_SIGN ); // Beaconized attributes must be encrypted
231+ attributeActionsOnEncrypt .put ("work_id " , CryptoAction .SIGN_ONLY ); // Our partition attribute must be SIGN_ONLY
232+ attributeActionsOnEncrypt .put ("inspection_date " , CryptoAction .SIGN_ONLY ); // Our sort attribute must be SIGN_ONLY
233+ attributeActionsOnEncrypt .put ("inspector_id_last4 " , CryptoAction .ENCRYPT_AND_SIGN ); // Beaconized attributes must be encrypted
234+ attributeActionsOnEncrypt .put ("unit " , CryptoAction .ENCRYPT_AND_SIGN ); // Beaconized attributes must be encrypted
203235
204236 // 6. Create the DynamoDb Encryption configuration for the table we will be writing to.
205237 // The beaconVersions are added to the search configuration.
206238 final Map <String , DynamoDbTableEncryptionConfig > tableConfigs = new HashMap <>();
207239 final DynamoDbTableEncryptionConfig config = DynamoDbTableEncryptionConfig .builder ()
208240 .logicalTableName (ddbTableName )
209- .partitionKeyName ("customer_id " )
210- .sortKeyName ("create_time " )
241+ .partitionKeyName ("work_id " )
242+ .sortKeyName ("inspection_date " )
211243 .attributeActionsOnEncrypt (attributeActionsOnEncrypt )
212244 .keyring (kmsKeyring )
213245 .search (SearchConfig .builder ()
@@ -235,17 +267,17 @@ public static void PutItemQueryItemWithBeacon(String ddbTableName, String branch
235267 // 9. Put an item into our table using the above client.
236268 // Before the item gets sent to DynamoDb, it will be encrypted
237269 // client-side, according to our configuration.
238- // Since our configuration includes beacons for `state ` and `zip `,
270+ // Since our configuration includes beacons for `inspector_id_last4 ` and `unit `,
239271 // the client will add two additional attributes to the item. These attributes will have names
240- // `aws_dbe_b_state ` and `aws_dbe_b_zip `. Their values will be HMACs
272+ // `aws_dbe_b_inspector_id_last4 ` and `aws_dbe_b_unit `. Their values will be HMACs
241273 // truncated to as many bits as the beacon's `length` parameter; e.g.
242- // aws_dbe_b_state = truncate(HMAC("WA "), 4 )
243- // aws_dbe_b_zip = truncate(HMAC("98101 "), 10 )
274+ // aws_dbe_b_inspector_id_last4 = truncate(HMAC("4321 "), 10 )
275+ // aws_dbe_b_unit = truncate(HMAC("123456789012 "), 30 )
244276 final HashMap <String , AttributeValue > item = new HashMap <>();
245- item .put ("customer_id " , AttributeValue .builder ().s ("ABCD-1234 " ).build ());
246- item .put ("create_time " , AttributeValue .builder ().n ( "1681495205 " ).build ());
247- item .put ("state " , AttributeValue .builder ().s ("WA " ).build ());
248- item .put ("zip " , AttributeValue .builder ().s ("98101 " ).build ());
277+ item .put ("work_id " , AttributeValue .builder ().s ("1313ba89-5661-41eb-ba6c-cb1b4cb67b2d " ).build ());
278+ item .put ("inspection_date " , AttributeValue .builder ().s ( "2023-06-13 " ).build ());
279+ item .put ("inspector_id_last4 " , AttributeValue .builder ().s ("4321 " ).build ());
280+ item .put ("unit " , AttributeValue .builder ().s ("123456789012 " ).build ());
249281
250282 final PutItemRequest putRequest = PutItemRequest .builder ()
251283 .tableName (ddbTableName )
@@ -262,23 +294,26 @@ public static void PutItemQueryItemWithBeacon(String ddbTableName, String branch
262294 // and transform the query to use the beaconized name and value.
263295 // Internally, the client will query for and receive all items with a matching HMAC value in the beacon field.
264296 // This may include a number of "false positives" with different ciphertext, but the same truncated HMAC.
265- // e.g. if truncate(HMAC("WA"), 4) == truncate(HMAC("DC"), 4), the query will return both items.
297+ // e.g. if truncate(HMAC("123456789012"), 30)
298+ // == truncate(HMAC("098765432109"), 30),
299+ // the query will return both items.
266300 // The client will decrypt all returned items to determine which ones have the expected attribute values,
267301 // and only surface items with the correct plaintext to the user.
268302 // This procedure is internal to the client and is abstracted away from the user;
269- // e.g. the user will only see "WA" and never "DC", though the actual query returned both.
303+ // e.g. the user will only see "123456789012" and never
304+ // "098765432109", though the actual query returned both.
270305 Map <String ,String > expressionAttributesNames = new HashMap <>();
271- expressionAttributesNames .put ("#s " , "state " );
272- expressionAttributesNames .put ("#z " , "zip " );
306+ expressionAttributesNames .put ("#last4 " , "inspector_id_last4 " );
307+ expressionAttributesNames .put ("#unit " , "unit " );
273308
274309 Map <String ,AttributeValue > expressionAttributeValues = new HashMap <>();
275- expressionAttributeValues .put (":s " , AttributeValue .builder ().s ("WA " ).build ());
276- expressionAttributeValues .put (":z " , AttributeValue .builder ().s ("98101 " ).build ());
310+ expressionAttributeValues .put (":last4 " , AttributeValue .builder ().s ("4321 " ).build ());
311+ expressionAttributeValues .put (":unit " , AttributeValue .builder ().s ("123456789012 " ).build ());
277312
278313 QueryRequest queryRequest = QueryRequest .builder ()
279314 .tableName (ddbTableName )
280315 .indexName (GSI_NAME )
281- .keyConditionExpression ("#s = :s and #z = :z " )
316+ .keyConditionExpression ("#last4 = :last4 and #unit = :unit " )
282317 .expressionAttributeNames (expressionAttributesNames )
283318 .expressionAttributeValues (expressionAttributeValues )
284319 .build ();
@@ -291,8 +326,8 @@ public static void PutItemQueryItemWithBeacon(String ddbTableName, String branch
291326 assert attributeValues .size () == 1 ;
292327 final Map <String , AttributeValue > returnedItem = attributeValues .get (0 );
293328 // Validate the item has the expected attributes
294- assert returnedItem .get ("state " ).s ().equals ("WA " );
295- assert returnedItem .get ("zip " ).s ().equals ("98101 " );
329+ assert returnedItem .get ("inspector_id_last4 " ).s ().equals ("4321 " );
330+ assert returnedItem .get ("unit " ).s ().equals ("123456789012 " );
296331 }
297332
298333 public static void main (final String [] args ) {
0 commit comments