GDPRuler/scripts/direct_client.py at dev · dimstav23/GDPRuler · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
import subprocess
import socket
import argparse
import multiprocessing
import time
import os
import sys
import glob
from contextlib import contextmanager

curr_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(curr_dir)
sys.path.insert(0, parent_dir)
from policy_compiler.helper import safe_open

workload_trace_dir = os.path.join(curr_dir, '..', 'workload_traces')
exit_query = "query(exit)"
msg_header_size = 4

def generate_value(size):
  """Generate a string of the specified size in bytes."""
  return 'x' * size

def process_query(query, value):
  """Replace 'VAL' with the dummy value in the query."""
  """and remove any additional predicates for the direct client (keep everything before &)"""
  return query.replace('VAL', value).split('&')[0]

def preprocess_queries(queries, value_size):
  """Preprocess all queries, replacing 'VAL' with a dummy value."""

  value = generate_value(value_size)
  return [process_query(query, value) for query in queries]

def get_workload_options():
  workload_files = glob.glob(os.path.join(workload_trace_dir, '*_run'))
  return [os.path.basename(f).replace('_run', '') for f in workload_files]

def safe_receive(sock, size):
  """Safely receives data from a socket, ensuring that the desired number of bytes are received."""
  data = b""
  total_bytes_received = 0
  while total_bytes_received < size:
    chunk = sock.recv(size - total_bytes_received)
    if not chunk:
      return b""
    data += chunk
    total_bytes_received += len(chunk)
  return data

def load_workload(db_type, db_address, socket_path, workload_name, value_size):
  """Load workload phase - start server and run load queries"""

  load_file = os.path.join(workload_trace_dir, f"{workload_name}_load")

  # Connect to Unix socket
  client_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
  client_socket.connect(socket_path)

  value = generate_value(value_size)

  with safe_open(load_file, 'r') as file:
    for line in file:
      if not line.startswith("#"):
        preprocessed_query = process_query(line, value)
        query_encoded = preprocessed_query.encode()
        msg_size = len(query_encoded).to_bytes(msg_header_size, 'big')
        client_socket.sendall(msg_size + query_encoded)

        # Read response
        response_size_data = safe_receive(client_socket, msg_header_size)
        response_size = int.from_bytes(response_size_data, 'big')
        response = safe_receive(client_socket, response_size)

  # Send exit
  exit_msg_size = len(exit_query).to_bytes(msg_header_size, 'big')
  client_socket.sendall(exit_msg_size + exit_query.encode())

  # Read the exit response (but don't print for loading phase)
  try:
    response_size_data = safe_receive(client_socket, msg_header_size)
    if response_size_data:
      response_size = int.from_bytes(response_size_data, 'big')
      response = safe_receive(client_socket, response_size)
      # Don't print loading phase timing
  except:
    pass

  client_socket.close()
  print(f"Workload {workload_name} loaded successfully.")

@contextmanager
def timer(time_dict, stage, breakdown):
  """Context manager to time a section of code, conditional on 'breakdown'."""
  if breakdown:
    start_time = time.perf_counter()
    yield
    end_time = time.perf_counter()
    time_dict[stage] += end_time - start_time
  else:
    yield

def send_queries(socket_path, queries, latency_results, time_breakdowns, client_num, breakdown):
  # Connect to Unix socket
  client_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
  client_socket.connect(socket_path)

  breakdown_dict = {'prep': 0, 'send': 0, 'wait': 0}
  total_latency = 0
  request_count = 0

  for query in queries:
    start_time = time.perf_counter()

    # Time query preparation
    with timer(breakdown_dict, 'prep', breakdown):
      query_encoded = query.encode()
      msg_size = len(query_encoded).to_bytes(msg_header_size, 'big')

    # Time sending
    with timer(breakdown_dict, 'send', breakdown):
      client_socket.sendall(msg_size + query_encoded)

    # Time waiting and receiving
    with timer(breakdown_dict, 'wait', breakdown):
      response_size_data = safe_receive(client_socket, msg_header_size)
      response_size = int.from_bytes(response_size_data, 'big')
      response = safe_receive(client_socket, response_size)

    end_time = time.perf_counter()

    latency = end_time - start_time
    total_latency += latency
    request_count += 1

  # Send exit query to the server and READ the response
  exit_msg_size = len(exit_query).to_bytes(msg_header_size, 'big')
  client_socket.sendall(exit_msg_size + exit_query.encode())

  # Read the exit response from server
  try:
    response_size_data = safe_receive(client_socket, msg_header_size)
    if response_size_data:
      response_size = int.from_bytes(response_size_data, 'big')
      response = safe_receive(client_socket, response_size)
      if response:
        print(f"[Client {client_num}] {response.decode()}")
  except:
    pass  # Connection might be closed

  # Close the connection
  client_socket.close()

  if request_count > 0:
    average_latency = total_latency / request_count
    latency_results.append(average_latency)

  if breakdown:
    time_breakdowns.append((breakdown_dict['prep'], breakdown_dict['send'], breakdown_dict['wait']))

def create_client_process(socket_path, queries, latency_results, time_breakdowns, client_num, breakdown):
  process = multiprocessing.Process(target=send_queries, args=(socket_path, queries, latency_results, time_breakdowns, client_num, breakdown))
  process.start()
  return process

def main():
  parser = argparse.ArgumentParser(description='Start a Unix socket client.')
  parser.add_argument('--workload', help='Name of the workload trace', required=True, type=str, choices=get_workload_options())
  parser.add_argument('--db', help='Database type (redis or rocksdb)', required=True, type=str, choices=["redis", "rocksdb"])
  parser.add_argument('--db_address', help='Address of the database server', required=True, type=str)
  parser.add_argument('--socket_path', help='Path to Unix socket', default='/tmp/direct_kv_client.sock', type=str)
  parser.add_argument('--clients', help='Number of clients to spawn', default=1, type=int)
  parser.add_argument('--value_size', help='Size of the value in bytes for PUT queries', default=1024, type=int)
  parser.add_argument('--breakdown', help='Enable breakdown measurements', action='store_true')
  args = parser.parse_args()

  # Start the server for the run phase
  server_process = subprocess.Popen([
      os.path.join(os.path.dirname(os.path.abspath(__file__)), '../controller/build/direct_kv_client'),
      '--db', args.db,
      '--db_address', args.db_address,
      '--socket_path', args.socket_path
  ])

  # Give server time to start
  time.sleep(1)

  # Perform the load phase
  load_workload(args.db, args.db_address, args.socket_path, args.workload, args.value_size)

  # Read the run phase
  run_file = os.path.join(workload_trace_dir, f"{args.workload}_run")
  with safe_open(run_file, 'r') as file:
    queries = [line for line in file if not line.startswith("#")]

  preprocessed_queries = preprocess_queries(queries, args.value_size)
  queries_per_client = [preprocessed_queries[i::args.clients] for i in range(args.clients)]

  manager = multiprocessing.Manager()
  latency_results = manager.list()
  time_breakdowns = manager.list()
  processes = []

  # Start the time measurement before sending the workload
  start_time = time.perf_counter()

  for i, client_queries in enumerate(queries_per_client):
    process = create_client_process(args.socket_path, client_queries, latency_results, time_breakdowns, i, args.breakdown)
    processes.append(process)

  # Wait for all client processes to finish
  for process in processes:
    process.join()

  # End the timer after the controller has returned
  end_time = time.perf_counter()
  elapsed_time = end_time - start_time

  # Terminate server
  server_process.terminate()
  server_process.wait()

  if len(latency_results) > 0:
    average_latency = sum(latency_results) / len(latency_results)
    print(f"Average Latency: {average_latency:.6f} seconds")
  else:
    print("Did not gather latency statistics --- experiment failed.")

  if args.breakdown and len(time_breakdowns) > 0:
    total_prep_time = sum(breakdown[0] for breakdown in time_breakdowns)
    total_send_time = sum(breakdown[1] for breakdown in time_breakdowns)
    total_wait_time = sum(breakdown[2] for breakdown in time_breakdowns)
    print(f"Query preparation time: {total_prep_time:.3f} seconds ({total_prep_time/elapsed_time*100:.2f}%)")
    print(f"Unix socket send time: {total_send_time:.3f} seconds ({total_send_time/elapsed_time*100:.2f}%)")
    print(f"Unix socket wait time: {total_wait_time:.3f} seconds ({total_wait_time/elapsed_time*100:.2f}%)")

  print(f"Elapsed time: {elapsed_time:.3f} seconds")

if __name__ == "__main__":
    main()