From fa16273830635aabbd8907fe72fe23ecfc8cc866 Mon Sep 17 00:00:00 2001 From: dreamin <2534393465@qq.com> Date: Sat, 6 Jul 2024 12:53:35 +0000 Subject: [PATCH 01/16] old bug? solve --- src/dbtest/src/mda_detect.py | 342 ++-------------------------- src/dbtest/src/mda_detect_modify.md | 31 +++ 2 files changed, 50 insertions(+), 323 deletions(-) create mode 100644 src/dbtest/src/mda_detect_modify.md diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py index 982361ea..ce68dd31 100644 --- a/src/dbtest/src/mda_detect.py +++ b/src/dbtest/src/mda_detect.py @@ -33,16 +33,9 @@ class Txn: def __init__(self): self.begin_ts = -1 self.end_ts = 99999999999999999999 + self.isolation = "" -""" -Find the total variable number. -Args: -- lines (list): A list of queries. - -Returns: -int: The maximum variable number found in the queries. -""" # find total variable number def get_total(lines): num = 0 @@ -56,17 +49,17 @@ def get_total(lines): break return num +# find total Txn number +def get_total_txn(lines): + num = 0 + for query in lines: + query = query.replace("\n", "") + query = query.replace(" ", "") + if query[0:1] == "Q" and query.find("T") != -1: + tmp = find_data(query, "T") + num = max(num, tmp) + return num -""" -Extract the data we need from a query. - -Args: -- query (str): The input query string. -- target (str): The target substring to search for. - -Returns: -int: The extracted data value, or -1 if not found. -""" # extract the data we need in query def find_data(query, target): pos = query.find(target) @@ -85,20 +78,6 @@ def find_data(query, target): return data_value -""" -When a statement is executed, this function sets the end time, modifies the transaction list, -and updates the version list as needed. - -Args: -- op_time (int): The operation time of the statement. -- data_op_list (list): A list of data operations. -- query (str): The query string containing information about the statement execution. -- txn (list): A list of transaction objects. -- version_list (list): A list of version lists for data operations. - -Returns: -None -""" # when a statement is executed, set the end time and modify the version list def set_finish_time(op_time, data_op_list, query, txn, version_list): pos = query.find("finishedat:") @@ -135,17 +114,6 @@ def set_finish_time(op_time, data_op_list, query, txn, version_list): op.value = len(version_list[i]) - 1 -""" -Check if two transactions are concurrent based on their start and end times. - -Args: -- data1: Information about the first transaction. -- data2: Information about the second transaction. -- txn: A list of transaction objects. - -Returns: -bool: True if the transactions are concurrent, False otherwise. -""" # if both transactions are running # or the start time of the second transaction is less than the end time of the first transaction # we think they are concurrent @@ -158,20 +126,6 @@ def check_concurrency(data1, data2, txn): return False -""" -Determine the type of edge between two operations based on their read or write versions. - -Args: -- data1: Information about the first operation. -- data2: Information about the second operation. -- txn: A list of transaction objects. - -Returns: -tuple: A tuple containing three values: - - A string indicating the edge type ('R', 'W', 'CR', 'CW'). - - Information about the operation that comes first. - - Information about the operation that comes second. -""" # decide which operation comes first depending on the read or write version # if later operation happened after the first txn commit time, edge type will add "C" def get_edge_type(data1, data2, txn): @@ -191,22 +145,6 @@ def get_edge_type(data1, data2, txn): return before.op_type + state + after.op_type, before, after -""" -Build a directed graph representing the concurrency relationships between operations. - -Args: -- data_op_list: A list of lists, where each inner list contains information about operations for a specific transaction. -- indegree: A list representing the in-degrees of each operation node in the graph. -- edge: A list representing the edges (concurrency relationships) between operations. -- txn: A list of transaction objects. - -This function constructs a directed graph where nodes represent operations, and edges represent concurrency relationships -between operations. It iterates through the list of operations for each transaction and calls the 'insert_edge' function -to create edges in the graph based on concurrency relationships. - -Returns: -None -""" def build_graph(data_op_list, indegree, edge, txn): for list1 in data_op_list: for i, data in enumerate(list1): @@ -214,25 +152,6 @@ def build_graph(data_op_list, indegree, edge, txn): insert_edge(list1[j], data, indegree, edge, txn) -""" -Insert an edge into the directed graph representing concurrency relationships between operations. - -Args: -- data1: An operation object representing the first operation. -- data2: An operation object representing the second operation. -- indegree: A list representing the in-degrees of each transaction in the graph. -- edge: A list representing the edges (concurrency relationships) between operations for each transaction. -- txn: A list of transaction objects. - -This function inserts an edge into the directed graph to represent the concurrency relationship between 'data1' and 'data2'. -It first checks if the two operations are concurrent by calling the 'check_concurrency' function. If they are concurrent, it -determines the edge type using the 'get_edge_type' function and adds the edge to the 'edge' list. - -The 'indegree' list is updated to reflect the in-degree of the target transaction node when an edge is inserted. - -Returns: -None -""" def insert_edge(data1, data2, indegree, edge, txn): if check_concurrency(data1, data2, txn): edge_type, data1, data2 = get_edge_type(data1, data2, txn) @@ -241,42 +160,12 @@ def insert_edge(data1, data2, indegree, edge, txn): edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) -""" -Initialize a record in the version list based on the information in the query. - -Args: -- query: A query string that contains information about a record. -- version_list: A list of lists representing versioned records. - -This function initializes a record in the 'version_list' based on the information provided in the 'query'. It extracts the 'key' -and 'value' of the record from the query using the 'find_data' function and appends the 'value' to the corresponding version list. - -Returns: -None -""" def init_record(query, version_list): key = find_data(query, "(") value = find_data(query, ",") version_list[key].append(value) -""" -Read the versioned record based on the information in the query. - -Args: -- query (str): A query string that contains information about reading a versioned record. -- op_time (int): The operation time of the read operation. -- data_op_list (list): A list of lists representing data operations. -- version_list (list): A list of lists representing versioned records. - -This function reads the versioned record specified in the 'query'. It extracts the 'key' and 'value' from the query, which are -used to identify the record and version to read. The function checks if the specified version exists in the version list and -updates the 'op.value' accordingly. If the version doesn't exist or if the read operation is not successful, an error message -is returned. - -Returns: -str: An error message indicating the result of the read operation. An empty string means the read was successful. -""" def readVersion_record(query, op_time, data_op_list, version_list): error_message = "" data = query.split(")") @@ -323,27 +212,6 @@ def readVersion_record(query, op_time, data_op_list, version_list): -""" -Read records based on the information in the query and update data operations. - -Args: -- op_time (int): The operation time of the read operation. -- txn_num (int): The transaction number. -- total_num (int): The total number of records. -- txn (list): A list of transactions. -- data_op_list (list): A list of lists representing data operations. - -This function reads records specified in the query and updates the 'data_op_list' accordingly. It extracts information from -the 'query' to determine which records to read and what type of operation to perform (read or predicate). The function also -sets the 'begin_ts' of the transaction if it's not already set. - -The 'query' is analyzed to identify specific record keys or predicates and create corresponding 'Operation' objects in the -'data_op_list'. Depending on the structure of the query, this function handles various cases, such as reading single records, -handling predicates, and selecting all rows in a table. - -Returns: -None -""" def read_record(op_time, txn_num, total_num, txn, data_op_list): if txn[txn_num].begin_ts == -1: txn[txn_num].begin_ts = op_time @@ -372,24 +240,6 @@ def read_record(op_time, txn_num, total_num, txn, data_op_list): data_op_list[i].append(Operation("R", txn_num, op_time, i)) -""" -Write records based on the information in the query and update data operations. - -Args: -- op_time (int): The operation time of the write operation. -- txn_num (int): The transaction number. -- txn (list): A list of transactions. -- data_op_list (list): A list of lists representing data operations. - -This function writes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the -'query' to determine which records to write and what type of operation to perform (write). The function also sets the 'begin_ts' -of the transaction if it's not already set. - -The 'query' is analyzed to identify specific record keys and values, and it creates corresponding 'Operation' objects in the 'data_op_list'. - -Returns: -None -""" def write_record(op_time, txn_num, txn, data_op_list): if txn[txn_num].begin_ts == -1: txn[txn_num].begin_ts = op_time @@ -403,24 +253,6 @@ def write_record(op_time, txn_num, txn, data_op_list): data_op_list[op_data].append(Operation("W", txn_num, op_time, op_value)) -""" -Delete records based on the information in the query and update data operations. - -Args: -- op_time (int): The operation time of the delete operation. -- txn_num (int): The transaction number. -- txn (list): A list of transactions. -- data_op_list (list): A list of lists representing data operations. - -This function deletes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the -'query' to determine which records to delete and what type of operation to perform (delete). The function also sets the 'begin_ts' -of the transaction if it's not already set. - -The 'query' is analyzed to identify specific record keys, and it creates corresponding 'Operation' objects in the 'data_op_list'. - -Returns: -None -""" def delete_record(op_time, txn_num, txn, data_op_list): if txn[txn_num].begin_ts == -1: txn[txn_num].begin_ts = op_time @@ -432,25 +264,6 @@ def delete_record(op_time, txn_num, txn, data_op_list): data_op_list[op_data].append(Operation("D", txn_num, op_time, op_data)) -""" -Insert records based on the information in the query and update data operations. - -Args: -- op_time (int): The operation time of the insert operation. -- txn_num (int): The transaction number. -- txn (list): A list of transactions. -- data_op_list (list): A list of lists representing data operations. - -This function inserts records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the -'query' to determine which records to insert and what type of operation to perform (insert). The function also sets the 'begin_ts' -of the transaction if it's not already set. - -The 'query' is analyzed to identify specific record keys and their corresponding values, and it creates corresponding 'Operation' -objects in the 'data_op_list'. - -Returns: -None -""" def insert_record(op_time, txn_num, txn, data_op_list): if txn[txn_num].begin_ts == -1 and op_time != 0: txn[txn_num].begin_ts = op_time @@ -459,44 +272,15 @@ def insert_record(op_time, txn_num, txn, data_op_list): data_op_list[key].append(Operation("I", txn_num, op_time, value)) -""" -Set the end timestamp for a transaction. - -Args: -- op_time (int): The operation time when the transaction ends. -- txn_num (int): The transaction number. -- txn (list): A list of transactions. - -This function sets the 'end_ts' attribute of a transaction specified by 'txn_num' to the given 'op_time'. It marks the end of the -transaction's execution. - -Returns: -None -""" def end_record(op_time, txn_num, txn): txn[txn_num].end_ts = op_time -""" -Record and process database operations. - -Args: -- total_num (int): The total number of database operations. -- query (str): The SQL query representing a database operation. -- txn (list): A list of transactions. -- data_op_list (list): A list of data operations. -- version_list (list): A list of version information for data operations. - -This function records and processes database operations based on the provided SQL query. It updates the transaction list, data -operation list, and version list accordingly. The 'total_num' parameter specifies the total number of database operations. - -Returns: -str: An error message (if any), or an empty string if the operation is successful. -""" def operation_record(total_num, query, txn, data_op_list, version_list): error_message = "" op_time = find_data(query, "Q") txn_num = find_data(query, "T") + # print("total_num:{}, query:{},optime: {}, txn_num: {}\n".format(total_num,query, op_time, txn_num)) if op_time == 0 and query.find("INSERT") != -1: init_record(query, version_list) return error_message @@ -528,18 +312,6 @@ def operation_record(total_num, query, txn, data_op_list, version_list): -""" -Remove unfinished operations from the data operation list. - -Args: -- data_op_list (list): A list of data operations. - -This function iterates through the data operation list and removes any unfinished operations based on their operation time. -Unfinished operations are those with an operation time less than 10,000,000. - -Returns: -None -""" # remove failed statements to prevent redundant edges from being built def remove_unfinished_operation(data_op_list): for list1 in data_op_list: @@ -547,20 +319,6 @@ def remove_unfinished_operation(data_op_list): if op.op_time < 10000000: list1.pop(i) -""" -Check for cycles in a directed graph using topological sorting. - -Args: -- edge (List[List[Edge]]): A list representing the directed edges in the graph. -- indegree (List[int]): A list representing the in-degrees of nodes in the graph. -- total (int): The total number of nodes in the graph. - -This function checks for cycles in a directed graph by performing topological sorting. It takes as input the directed edges (`edge`), -in-degrees of nodes (`indegree`), and the total number of nodes in the graph (`total`). - -Returns: -bool: True if a cycle is detected, False otherwise. -""" # toposort to determine whether there is a cycle def check_cycle(edge, indegree, total): q = Queue.Queue() @@ -580,25 +338,6 @@ def check_cycle(edge, indegree, total): return True -""" -Perform depth-first search (DFS) to find and print loops in a directed graph. - -Args: -- result_folder (str): The path to the folder where the results will be saved. -- ts_now (str): The current timestamp or identifier for result file naming. -- now (int): The current node being visited. -- type (str): The type of edge leading to the current node ('C' for commit, 'R' for read, 'W' for write, etc.). - -This function performs depth-first search (DFS) to find and print loops in a directed graph. It takes as input the result folder -path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`), the current node being visited (`now`), -and the type of edge leading to the current node (`type`). - -The function recursively explores the graph, tracking the visited nodes and edges to detect loops. When a loop is found, it is printed -to a result file in the specified result folder. - -Note: This function assumes that global variables like 'visit', 'visit1', 'path', 'edge_type', and 'edge' are defined elsewhere. - -""" # for loop graphs, print the loop def dfs(result_folder, ts_now, now, type): visit1[now] = 1 @@ -624,21 +363,6 @@ def dfs(result_folder, ts_now, now, type): visit[now] = 0 -""" -Print the paths in a directed graph to a result file. - -Args: -- result_folder (str): The path to the folder where the results will be saved. -- ts_now (str): The current timestamp or identifier for result file naming. -- edge (list of lists): A list of lists representing the directed edges in the graph. - -This function prints the paths in a directed graph to a result file. It takes as input the result folder path (`result_folder`), -the current timestamp or identifier for result file naming (`ts_now`), and a list of lists (`edge`) representing the directed edges -in the graph. - -The function iterates through the edges and writes the paths to the result file in the specified result folder. - -""" def print_path(result_folder, ts_now, edge): with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f: flag = 0 @@ -652,41 +376,11 @@ def print_path(result_folder, ts_now, edge): f.write("\n\n") -""" -Output the result of cycle detection to a result file. - -Args: -- file (str): The name of the file or input source being analyzed. -- result_folder (str): The path to the folder where the results will be saved. -- ts_now (str): The current timestamp or identifier for result file naming. -- IsCyclic (str): A string indicating whether a cycle was detected. - -This function outputs the result of cycle detection to a result file. It takes as input the name of the file or input source being -analyzed (`file`), the result folder path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`), -and a string (`IsCyclic`) indicating whether a cycle was detected. - -The function writes the result, including the file name and the cyclic status, to the specified result file in the result folder. - -""" def output_result(file, result_folder, ts_now, IsCyclic): with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f: f.write(file + ": " + IsCyclic + "\n") -""" -Print an error message to a result file. - -Args: -- result_folder (str): The path to the folder where the results will be saved. -- ts_now (str): The current timestamp or identifier for result file naming. -- error_message (str): The error message to be printed. - -This function prints an error message to a result file. It takes as input the result folder path (`result_folder`), the current -timestamp or identifier for result file naming (`ts_now`), and the error message (`error_message`) to be printed. - -The function appends the error message to the specified result file in the result folder and adds a newline for separation. - -""" def print_error(result_folder, ts_now, error_message): with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f: f.write(error_message + "\n") @@ -714,12 +408,13 @@ def print_error(result_folder, ts_now, error_message): lines = f.readlines() total_num = get_total(lines) # total number of variables - txn = [Txn() for i in range(total_num + 2)] # total num of transaction + total_num_txn = get_total_txn(lines) # total number of txn + txn = [Txn() for i in range(total_num_txn + 2)] # total num of transaction data_op_list = [[] for i in range(total_num + 2)] # record every operation that occurs on the variable - edge = [[] for i in range(total_num + 2)] # all edges from the current point - indegree = [0] * (total_num + 2) # in-degree of each point - visit = [0] * (total_num + 2) # in dfs, whether the current point has been visited - visit1 = [0] * (total_num + 2) # we will only use unvisited points as the starting point of the dfs + edge = [[] for i in range(total_num_txn + 2)] # all edges from the current point + indegree = [0] * (total_num_txn + 2) # in-degree of each point + visit = [0] * (total_num_txn + 2) # in dfs, whether the current point has been visited + visit1 = [0] * (total_num_txn + 2) # we will only use unvisited points as the starting point of the dfs path = [] # points in cycle edge_type = [] # edge type of the cycle version_list = [[] for i in range(total_num + 2)] @@ -731,6 +426,7 @@ def print_error(result_folder, ts_now, error_message): query = query.replace(" ", "") if query.find("Rollback") != -1 or query.find("Timeout") != -1: go_end = True + print("total_num:{}, total_num_txn:{}, query:{},ts_now: {}, file: {}\n".format(total_num,total_num_txn,query,ts_now,run_result_folder + "/" + file + ".txt")) error_message = operation_record(total_num, query, txn, data_op_list, version_list) if error_message != "": break diff --git a/src/dbtest/src/mda_detect_modify.md b/src/dbtest/src/mda_detect_modify.md new file mode 100644 index 00000000..3af9e9f3 --- /dev/null +++ b/src/dbtest/src/mda_detect_modify.md @@ -0,0 +1,31 @@ +[text](mda_detect.py) 修改日志 +# 思考 +1. 用于加边建立图的节点对应一个操作还是一个事务。 + +# 原来代码中问题 +### 数组访问越界问题 +现象 +```python +total_num = get_total(lines) # 统计的个数是插入数据的个数,不是事务的个数。 +txn = [Txn() for i in range(total_num + 2)] # 导致构造的 txn 数组较小 +.... # 还有 indegree edge 数组的大小应该是事务的个数。 +``` +解决:构造一个新函数获取事务个数。 +```python +# find total Txn number +def get_total_txn(lines): + num = 0 + for query in lines: + query = query.replace("\n", "") + query = query.replace(" ", "") + if query[0:1] == "Q" and query.find("T") != -1: + tmp = find_data(query, "T") + num = max(num, tmp) + return num +total_num_txn = get_total_txn(lines) # total number of txn +``` +效果:不同数据使用不同的初始长度 +```python +# total_num: data_op_list, version_list +# total_num_txn: txn, edge, total_num_txn, visit, visit1 +``` From c73c237c3c25e90e23d3023afb43698da9a8398d Mon Sep 17 00:00:00 2001 From: dreamin <2534393465@qq.com> Date: Sat, 6 Jul 2024 17:13:38 +0000 Subject: [PATCH 02/16] add txn.isolation --- src/dbtest/src/{ => doc}/mda_detect_modify.md | 6 ++++-- src/dbtest/src/mda_detect.py | 19 ++++++++++++++++--- 2 files changed, 20 insertions(+), 5 deletions(-) rename src/dbtest/src/{ => doc}/mda_detect_modify.md (78%) diff --git a/src/dbtest/src/mda_detect_modify.md b/src/dbtest/src/doc/mda_detect_modify.md similarity index 78% rename from src/dbtest/src/mda_detect_modify.md rename to src/dbtest/src/doc/mda_detect_modify.md index 3af9e9f3..7e29b4e6 100644 --- a/src/dbtest/src/mda_detect_modify.md +++ b/src/dbtest/src/doc/mda_detect_modify.md @@ -1,6 +1,7 @@ [text](mda_detect.py) 修改日志 -# 思考 -1. 用于加边建立图的节点对应一个操作还是一个事务。 +# 思考&分析 +1. 用于加边建立图的节点对应一个操作还是一个事务。答:一个事务。 +2. 目标:一个对数据库的操作文件(运行效果文件)中有多个事务,每一个事务有不同的隔离级别,通过执行结果和隔离级别判断是否满足一致性 # 原来代码中问题 ### 数组访问越界问题 @@ -29,3 +30,4 @@ total_num_txn = get_total_txn(lines) # total number of txn # total_num: data_op_list, version_list # total_num_txn: txn, edge, total_num_txn, visit, visit1 ``` + diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py index ce68dd31..b859c38b 100644 --- a/src/dbtest/src/mda_detect.py +++ b/src/dbtest/src/mda_detect.py @@ -33,7 +33,7 @@ class Txn: def __init__(self): self.begin_ts = -1 self.end_ts = 99999999999999999999 - self.isolation = "" + self.isolation = "serializable" # find total variable number @@ -77,6 +77,16 @@ def find_data(query, target): data_value = int(data_value) return data_value +# extract the isolation from content +def find_isolation(query): + if query.find("read-uncommitted") != -1: + return "read-uncommitted" + if query.find("read-committed") != -1: + return "read-committed" + if query.find("repeatable-read") != -1: + return "repeatable-read" + if query.find("serializable") != -1: + return "serializable" # when a statement is executed, set the end time and modify the version list def set_finish_time(op_time, data_op_list, query, txn, version_list): @@ -120,7 +130,7 @@ def set_finish_time(op_time, data_op_list, query, txn, version_list): def check_concurrency(data1, data2, txn): if txn[data2.txn_num].begin_ts < txn[data1.txn_num].end_ts: return True - elif txn[data1.txn_num].begin_ts < txn[data2.txn_num].end_ts: + elif txn[data1.txn_num].begin_ts < txn[data2.txn_num].end_ts: # TODO maybe a bug: don't need return True else: return False @@ -138,7 +148,7 @@ def get_edge_type(data1, data2, txn): # before, after = data2, data1 # else: # before, after = data1, data2 - if data2.op_time > txn[data1.txn_num].end_ts: + if data2.op_time > txn[data1.txn_num].end_ts: # TODO maybe a bug, before after state = "C" else: state = "" @@ -290,6 +300,9 @@ def operation_record(total_num, query, txn, data_op_list, version_list): if query.find("finished") != -1: set_finish_time(op_time, data_op_list, query, txn, version_list) return error_message + if op_time == -1 and txn_num != -1 and query.find("set_isolation") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.: + # query such as "T2 set_isolation=serializable " + txn[txn_num].isolation = find_isolation(query) if op_time == -1 or txn_num == -1: return error_message if query.find("SELECT") != -1: From 67d5ecc94551103e16c3b52ef0d248237cc9fa2e Mon Sep 17 00:00:00 2001 From: dreamin <2534393465@qq.com> Date: Sun, 7 Jul 2024 15:01:35 +0000 Subject: [PATCH 03/16] demo implement --- src/dbtest/src/doc/mda_detect_modify.md | 17 +++++- src/dbtest/src/mda_detect.py | 71 ++++++++++++++++++++----- 2 files changed, 73 insertions(+), 15 deletions(-) diff --git a/src/dbtest/src/doc/mda_detect_modify.md b/src/dbtest/src/doc/mda_detect_modify.md index 7e29b4e6..e249f6bf 100644 --- a/src/dbtest/src/doc/mda_detect_modify.md +++ b/src/dbtest/src/doc/mda_detect_modify.md @@ -2,7 +2,7 @@ # 思考&分析 1. 用于加边建立图的节点对应一个操作还是一个事务。答:一个事务。 2. 目标:一个对数据库的操作文件(运行效果文件)中有多个事务,每一个事务有不同的隔离级别,通过执行结果和隔离级别判断是否满足一致性 - +3. 原来输出未所有事务是否满足一致性,现在输出为每个事务是否满足一致性? 单个整体报错 or 多个报错? 多个,每个错误都识别,兼容单个整体报错(实现较难) # 原来代码中问题 ### 数组访问越界问题 现象 @@ -31,3 +31,18 @@ total_num_txn = get_total_txn(lines) # total number of txn # total_num_txn: txn, edge, total_num_txn, visit, visit1 ``` + +### 默认字符串少了空格 +```python + pos = query.find("finished at:") + pos += len("finished at:") +``` + +### "R" 类型的操作并没有修改 value 值为下标: +```python + if data1.value <= data2.value: + before, after = data1, data2 + else: + before, after = data2, data1 +``` + diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py index b859c38b..0b3393c4 100644 --- a/src/dbtest/src/mda_detect.py +++ b/src/dbtest/src/mda_detect.py @@ -42,11 +42,11 @@ def get_total(lines): for query in lines: query = query.replace("\n", "") query = query.replace(" ", "") - if query[0:2] == "Q0" and query.find("INSERT") != -1: + if query.find("INSERT") != -1: # query[0:2] == "Q0" and tmp = find_data(query, "(") num = max(num, tmp) - elif query[0:2] == "Q1": - break + # elif query[0:2] == "Q1": + # break return num # find total Txn number @@ -90,8 +90,8 @@ def find_isolation(query): # when a statement is executed, set the end time and modify the version list def set_finish_time(op_time, data_op_list, query, txn, version_list): - pos = query.find("finishedat:") - pos += len("finishedat:") + pos = query.find("finished at:") + pos += len("finished at:") data_value = "" tmp, tmp1 = "", "" for i in range(pos, len(query)): @@ -123,7 +123,6 @@ def set_finish_time(op_time, data_op_list, query, txn, version_list): version_list[i].append(op.value) op.value = len(version_list[i]) - 1 - # if both transactions are running # or the start time of the second transaction is less than the end time of the first transaction # we think they are concurrent @@ -136,19 +135,26 @@ def check_concurrency(data1, data2, txn): return False +def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type): + for e in edge[src_txn]: + if e.out == tar_txn and e.type[0] == src_type and e.type[-1] == tar_type: + return True + return False + # decide which operation comes first depending on the read or write version # if later operation happened after the first txn commit time, edge type will add "C" def get_edge_type(data1, data2, txn): - if data1.value <= data2.value: - before, after = data1, data2 - else: - before, after = data2, data1 + # if data1.value <= data2.value: + # before, after = data1, data2 + # else: + # before, after = data2, data1 + before, after = data1, data2 # if data1.op_type == "D" or data2.op_type == "D": # if data1.value < data2.value: # before, after = data2, data1 # else: # before, after = data1, data2 - if data2.op_time > txn[data1.txn_num].end_ts: # TODO maybe a bug, before after + if data2.op_time > txn[data1.txn_num].end_ts: state = "C" else: state = "" @@ -165,9 +171,45 @@ def build_graph(data_op_list, indegree, edge, txn): def insert_edge(data1, data2, indegree, edge, txn): if check_concurrency(data1, data2, txn): edge_type, data1, data2 = get_edge_type(data1, data2, txn) - if edge_type != "RR" and edge_type != "RCR" and data1.txn_num != data2.txn_num: + # if edge_type != "RR" and edge_type != "RCR" and data1.txn_num != data2.txn_num: + # indegree[data2.txn_num] += 1 + # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + if edge_type == "WW" or edge_type == "WCW": indegree[data2.txn_num] += 1 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + elif data1.isolation == "read-uncommitted": + if edge_type[0] != 'R' and not(data2.isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)): + if edge_type[-1] == 'R': # not R -- R + if data2.isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读 + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + if data2.isolation == "repeatable-read" and edge_type[0]== 'W': + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + if data2.isolation == "serializable": + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + elif edge_type[-1] != 'R': # not R -- not R + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + elif data1.isolation == "read-committed" or data1.isolation == "repeatable-read" or data1.isolation == "serializable": + if edge_type[0] != 'R' and not(data2.isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)): + if edge_type[-1] == 'R': # not R -- R + if data2.isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读 + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + if data2.isolation == "repeatable-read" and edge_type[0]== 'W': + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + if data2.isolation == "serializable": + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + elif edge_type[-1] != 'R': # not R -- not R + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + elif edge_type[0] == 'R' and edge_type[-1] != 'R': + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) def init_record(query, version_list): @@ -238,12 +280,12 @@ def read_record(op_time, txn_num, total_num, txn, data_op_list): left = find_data(query, "k>") + 1 right = find_data(query, "k<") for i in range(left, right): - data_op_list[i].append(Operation("P", txn_num, op_time, i)) + data_op_list[i].append(Operation("R", txn_num, op_time, i)) # P elif query.find("value1>") != -1: left = find_data(query, "value1>") + 1 right = find_data(query, "value1<") for i in range(left, right): - data_op_list[i].append(Operation("P", txn_num, op_time, i)) + data_op_list[i].append(Operation("R", txn_num, op_time, i)) # p else: # it means select all rows in table for i in range(total_num): @@ -303,6 +345,7 @@ def operation_record(total_num, query, txn, data_op_list, version_list): if op_time == -1 and txn_num != -1 and query.find("set_isolation") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.: # query such as "T2 set_isolation=serializable " txn[txn_num].isolation = find_isolation(query) + return error_message if op_time == -1 or txn_num == -1: return error_message if query.find("SELECT") != -1: From 3f90a6ef2a6f4797304152e7b0e6e40ff0dd1f8d Mon Sep 17 00:00:00 2001 From: dreamin <2534393465@qq.com> Date: Tue, 9 Jul 2024 07:21:45 +0000 Subject: [PATCH 04/16] bugs --- src/dbtest/src/mda_detect.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py index 0b3393c4..0136e3fe 100644 --- a/src/dbtest/src/mda_detect.py +++ b/src/dbtest/src/mda_detect.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + # /* # * Tencent is pleased to support the open source community by making 3TS available. # * @@ -177,31 +179,31 @@ def insert_edge(data1, data2, indegree, edge, txn): if edge_type == "WW" or edge_type == "WCW": indegree[data2.txn_num] += 1 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - elif data1.isolation == "read-uncommitted": - if edge_type[0] != 'R' and not(data2.isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)): + elif txn[data1.txn_num].isolation == "read-uncommitted": + if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)): if edge_type[-1] == 'R': # not R -- R - if data2.isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读 + if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'): # 可能脏读 indegree[data2.txn_num] += 1 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - if data2.isolation == "repeatable-read" and edge_type[0]== 'W': + if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W': indegree[data2.txn_num] += 1 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - if data2.isolation == "serializable": + if txn[data2.txn_num].isolation == "serializable": indegree[data2.txn_num] += 1 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) elif edge_type[-1] != 'R': # not R -- not R indegree[data2.txn_num] += 1 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - elif data1.isolation == "read-committed" or data1.isolation == "repeatable-read" or data1.isolation == "serializable": - if edge_type[0] != 'R' and not(data2.isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)): + elif txn[data1.txn_num].isolation == "read-committed" or txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable": + if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)): if edge_type[-1] == 'R': # not R -- R - if data2.isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读 + if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读 indegree[data2.txn_num] += 1 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - if data2.isolation == "repeatable-read" and edge_type[0]== 'W': + if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W': indegree[data2.txn_num] += 1 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - if data2.isolation == "serializable": + if txn[data2.txn_num].isolation == "serializable": indegree[data2.txn_num] += 1 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) elif edge_type[-1] != 'R': # not R -- not R @@ -345,6 +347,7 @@ def operation_record(total_num, query, txn, data_op_list, version_list): if op_time == -1 and txn_num != -1 and query.find("set_isolation") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.: # query such as "T2 set_isolation=serializable " txn[txn_num].isolation = find_isolation(query) + print(str(txn_num)+"------------------"+txn[txn_num].isolation) return error_message if op_time == -1 or txn_num == -1: return error_message @@ -443,7 +446,7 @@ def print_error(result_folder, ts_now, error_message): f.write("\n\n") -run_result_folder = "pg/serializable" +run_result_folder = "pg/repeatable-read" result_folder = "check_result/" + run_result_folder do_test_list = "do_test_list.txt" #ts_now = "_2param_3txn_insert" From f0a40e929e7359d05fac50417b5a9692168d477e Mon Sep 17 00:00:00 2001 From: dreamin <2534393465@qq.com> Date: Tue, 9 Jul 2024 08:39:40 +0000 Subject: [PATCH 05/16] graph print --- src/dbtest/src/mda_detect.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py index 0136e3fe..b90e5d08 100644 --- a/src/dbtest/src/mda_detect.py +++ b/src/dbtest/src/mda_detect.py @@ -21,7 +21,8 @@ class Edge: def __init__(self, type, out): self.type = type self.out = out - + def __repr__(self): + return "Edge(type={}, out={})".format(self.type, self.out) class Operation: def __init__(self, op_type, txn_num, op_time, value): @@ -37,6 +38,12 @@ def __init__(self): self.end_ts = 99999999999999999999 self.isolation = "serializable" +# print edge after build graph +def print_graph(edge): + for i, edges in enumerate(edge): + print("Transaction {}:".format(i)) + for e in edges: + print(" {}".format(e)) # find total variable number def get_total(lines): @@ -485,7 +492,7 @@ def print_error(result_folder, ts_now, error_message): query = query.replace(" ", "") if query.find("Rollback") != -1 or query.find("Timeout") != -1: go_end = True - print("total_num:{}, total_num_txn:{}, query:{},ts_now: {}, file: {}\n".format(total_num,total_num_txn,query,ts_now,run_result_folder + "/" + file + ".txt")) + # print("total_num:{}, total_num_txn:{}, query:{},ts_now: {}, file: {}\n".format(total_num,total_num_txn,query,ts_now,run_result_folder + "/" + file + ".txt")) error_message = operation_record(total_num, query, txn, data_op_list, version_list) if error_message != "": break @@ -498,6 +505,7 @@ def print_error(result_folder, ts_now, error_message): cycle = False remove_unfinished_operation(data_op_list) build_graph(data_op_list, indegree, edge, txn) + print_graph(edge) if not go_end: cycle = check_cycle(edge, indegree, total_num + 2) if cycle: From 5338b6ac66119d072b7aa855f9750f90fd0c64ad Mon Sep 17 00:00:00 2001 From: dreamin <2534393465@qq.com> Date: Fri, 12 Jul 2024 02:58:45 +0000 Subject: [PATCH 06/16] implement --- src/dbtest/src/doc/mda_detect_modify.md | 3 + src/dbtest/src/doc/mda_detect_read.md | 36 ++++ src/dbtest/src/mda_detect.py | 221 +++++++++++++++--------- 3 files changed, 183 insertions(+), 77 deletions(-) create mode 100644 src/dbtest/src/doc/mda_detect_read.md diff --git a/src/dbtest/src/doc/mda_detect_modify.md b/src/dbtest/src/doc/mda_detect_modify.md index e249f6bf..3c64dbda 100644 --- a/src/dbtest/src/doc/mda_detect_modify.md +++ b/src/dbtest/src/doc/mda_detect_modify.md @@ -3,6 +3,9 @@ 1. 用于加边建立图的节点对应一个操作还是一个事务。答:一个事务。 2. 目标:一个对数据库的操作文件(运行效果文件)中有多个事务,每一个事务有不同的隔离级别,通过执行结果和隔离级别判断是否满足一致性 3. 原来输出未所有事务是否满足一致性,现在输出为每个事务是否满足一致性? 单个整体报错 or 多个报错? 多个,每个错误都识别,兼容单个整体报错(实现较难) +4. 有两个检测思路: + 1. 修改建立图的过程中加边策略,保留循环检测流程。(当前实现方式) + 2. 保留加边策略,修改冲突检测流程。 # 原来代码中问题 ### 数组访问越界问题 现象 diff --git a/src/dbtest/src/doc/mda_detect_read.md b/src/dbtest/src/doc/mda_detect_read.md new file mode 100644 index 00000000..ebf0b144 --- /dev/null +++ b/src/dbtest/src/doc/mda_detect_read.md @@ -0,0 +1,36 @@ + +## mda_detect.py 代码功能 +主要用于检测数据库事务之间的并发关系,以及是否存在循环依赖。具体来说,代码通过解析输入的SQL语句,构建事务操作的有向图,检测是否存在循环依赖(即事务之间是否存在无法解决的并发冲突),并输出结果。 +### 变量命名含义 + +| 名称 | 含义 | 备注 | +| ------------ | ------------------------------------- | ------------------- | +| data | 一次操作信息:type、txn_num、op_time、op_data | | +| data_op_list | 数据操作列表 【数据key:【操作...】, 数据key:【操作...】】 | | +| indegree | 下标:事务号;元素:依赖该事务的事务个数 | | +| edge | 下标:事务号;元素:该事务的出边 | | +| version_list | key:数据编号;value:数据的值的历史记录 | 下标是 value? | +| total_num | 数据库操作总数 | 对于数据库全表扫描类的操作有关键作用。 | + +### 函数列表 + +| 函数 | 功能 | 备注 | +| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------- | +| get_total | 获取一个测试文件中 Q0 部分插入的数据个数 | key 的个数 | +| find_data | 特定位置提取一个数字 | | +| set_finish_time | 在一个数据库操作语句执行完毕后更新相关的时间戳:finishedat
1. 所有事务中 begin_ts | end_ts 和 op_time 相等的进行替换
2. 所有操作中 op_time 和 op_time 相等的进行替换
3. version_list 保存操作历史值 op.value
4. op.value 保存 version_list 历史下标 | | +| check_concurrency | 检查两个事务和是否并发 | 通过开始时间和结束时间来判断,默认开始时间之间已经比较过了吗? | +| get_edge_type | 确定两个操作之间的边的类型,添加C 标志,跨事务操作,同时返回新的操作顺序 | data1 和 data2 是同一个 Key 的前后两个操作 | +| build_graph | 建立一个有向图,这个图表示不同操作之间的并发关系 | 只会在同一组操作之间建立边 | +| insert_edge | 具体的插边操作(check_concurrency 的前提下插入 get_edge_type 边):
不同事务之间有并发读写冲突; | data1 发生时间默认在data2 之前 | +| init_record | 根据查询中的信息初始化版本列表中的记录。 | | +| readVersion_record | 处理数据库查询操作 | 只是更改了: op.value | +| read_record | 根据查询中的信息读取记录并更新数据操作。增加到 data_op_list 中。 | | +| write_record | 根据查询和更新数据操作中的信息写入记录。增加到 data_op_list 中。 | | +| delete_record | 根据查询中的信息删除记录并更新数据操作。增加到 data_op_list 中。 | | +| insert_record | 根据查询中的信息插入记录并更新数据操作。 | | +| end_record | 设置事务的结束时间戳。 | | +| operation_record | 记录并且处理数据库操作。 | | +| remove_unfinished_operation | 删除失败的语句以防止构建冗余边 | | +| check_cycle | 在有向图中查找环 | | +| dfs | 在有环的有向图中找环 | | diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py index b90e5d08..55e1f1c1 100644 --- a/src/dbtest/src/mda_detect.py +++ b/src/dbtest/src/mda_detect.py @@ -39,12 +39,22 @@ def __init__(self): self.isolation = "serializable" # print edge after build graph -def print_graph(edge): +def print_graph(edge,txn): for i, edges in enumerate(edge): - print("Transaction {}:".format(i)) + if i == 0 or i == len(edge)-1: + continue + print("Transaction {}:-----{}-----".format(i,txn[i].isolation)) for e in edges: print(" {}".format(e)) + +# print data_op_list +def print_data_op_list(data_op_list): + for k,list in enumerate(data_op_list): + print("\nk:{}---".format(k)) + for i, data in enumerate(list): + print("op:{}--{}-".format(data.op_type,data.txn_num)) + # find total variable number def get_total(lines): num = 0 @@ -99,20 +109,21 @@ def find_isolation(query): # when a statement is executed, set the end time and modify the version list def set_finish_time(op_time, data_op_list, query, txn, version_list): - pos = query.find("finished at:") - pos += len("finished at:") - data_value = "" - tmp, tmp1 = "", "" - for i in range(pos, len(query)): - if query[i].isdigit(): - tmp += query[i] - else: - for j in range(3 - len(tmp)): - tmp1 += "0" - tmp = tmp1 + tmp - data_value += tmp - tmp, tmp1 = "", "" - data_value = int(data_value) + # pos = query.find("finished at:") + # pos += len("finished at:") + # data_value = "" + # tmp, tmp1 = "", "" + # for i in range(pos, len(query)): + # if query[i].isdigit(): + # tmp += query[i] + # else: + # for j in range(3 - len(tmp)): + # tmp1 += "0" + # tmp = tmp1 + tmp + # data_value += tmp + # tmp, tmp1 = "", "" + # data_value = int(data_value) + data_value = int(op_time) for t in txn: if t.begin_ts == op_time: t.begin_ts = data_value @@ -177,48 +188,67 @@ def build_graph(data_op_list, indegree, edge, txn): insert_edge(list1[j], data, indegree, edge, txn) + def insert_edge(data1, data2, indegree, edge, txn): if check_concurrency(data1, data2, txn): edge_type, data1, data2 = get_edge_type(data1, data2, txn) - # if edge_type != "RR" and edge_type != "RCR" and data1.txn_num != data2.txn_num: - # indegree[data2.txn_num] += 1 - # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - if edge_type == "WW" or edge_type == "WCW": + if data1.txn_num == data2.txn_num or edge_type in ["RCR", "RR"]: + return + #* read-uncommitted: Dirty Write + # WI 不存在,如果有,那么一定会有 WD + DI 的等效边 + # II 不存在,如果有,那么一定会有 ID + DI 的等效边 + # DW 允许存在, UPDATE 时使用条件查询包含 D 的数据 + # DD 不存在,如果有,那么一定会有 DI + ID 的等效边 + if edge_type in ["WCW", "WW", "WCD", "WD", "ICW","IW", "ICD", "ID", "DCW", "DW", "DCI", "DI"]: + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + #* read-committed: Dirty Read + elif edge_type in ["WCR","WR"] and (txn[data2.txn_num].isolation == "read-committed" or txn[data2.txn_num].isolation == "repeatable-read" or txn[data2.txn_num].isolation == "serializable"): + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + #* repeatable-read: Unrepeatable Read + elif edge_type in ["RCW","RW"] and (txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable"): indegree[data2.txn_num] += 1 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - elif txn[data1.txn_num].isolation == "read-uncommitted": - if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)): - if edge_type[-1] == 'R': # not R -- R - if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'): # 可能脏读 - indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W': - indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - if txn[data2.txn_num].isolation == "serializable": - indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - elif edge_type[-1] != 'R': # not R -- not R - indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - elif txn[data1.txn_num].isolation == "read-committed" or txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable": - if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)): - if edge_type[-1] == 'R': # not R -- R - if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读 - indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W': - indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - if txn[data2.txn_num].isolation == "serializable": - indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - elif edge_type[-1] != 'R': # not R -- not R - indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - elif edge_type[0] == 'R' and edge_type[-1] != 'R': - indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + #* serializable: Phantom Read + elif edge_type in ["ICR","IR","DCR","DR","RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable": + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + + # 入边 + # elif txn[data1.txn_num].isolation == "read-uncommitted": + # if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)): + # if edge_type[-1] == 'R': # not R -- R + # if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'): # 可能脏读 + # indegree[data2.txn_num] += 1 + # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + # if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W': + # indegree[data2.txn_num] += 1 + # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + # if txn[data2.txn_num].isolation == "serializable": + # indegree[data2.txn_num] += 1 + # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + # elif edge_type[-1] != 'R': # not R -- not R + # indegree[data2.txn_num] += 1 + # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + # elif txn[data1.txn_num].isolation == "read-committed" or txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable": + # if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)): + # if edge_type[-1] == 'R': # not R -- R + # if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读 + # indegree[data2.txn_num] += 1 + # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + # if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W': + # indegree[data2.txn_num] += 1 + # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + # if txn[data2.txn_num].isolation == "serializable": + # indegree[data2.txn_num] += 1 + # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + # elif edge_type[-1] != 'R': # not R -- not R + # indegree[data2.txn_num] += 1 + # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + # elif edge_type[0] == 'R' and edge_type[-1] != 'R': + # indegree[data2.txn_num] += 1 + # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) def init_record(query, version_list): @@ -297,7 +327,7 @@ def read_record(op_time, txn_num, total_num, txn, data_op_list): data_op_list[i].append(Operation("R", txn_num, op_time, i)) # p else: # it means select all rows in table - for i in range(total_num): + for i in range(total_num+1): data_op_list[i].append(Operation("R", txn_num, op_time, i)) @@ -312,7 +342,21 @@ def write_record(op_time, txn_num, txn, data_op_list): op_data = find_data(query, "k=") op_value = find_data(query, "v=") data_op_list[op_data].append(Operation("W", txn_num, op_time, op_value)) - + # for predicate cases + elif query.find("k>") != -1: + left = find_data(query, "k>") + 1 + right = find_data(query, "k<") + for i in range(left, right): + data_op_list[i].append(Operation("W", txn_num, op_time, i)) # P + elif query.find("value1>") != -1: + left = find_data(query, "value1>") + 1 + right = find_data(query, "value1<") + for i in range(left, right): + data_op_list[i].append(Operation("W", txn_num, op_time, i)) # p + else: + # it means select all rows in table + for i in range(total_num+1): + data_op_list[i].append(Operation("W", txn_num, op_time, i)) def delete_record(op_time, txn_num, txn, data_op_list): if txn[txn_num].begin_ts == -1: @@ -323,7 +367,21 @@ def delete_record(op_time, txn_num, txn, data_op_list): elif query.find("k=") != -1: op_data = find_data(query, "k=") data_op_list[op_data].append(Operation("D", txn_num, op_time, op_data)) - + # for predicate cases + elif query.find("k>") != -1: + left = find_data(query, "k>") + 1 + right = find_data(query, "k<") + for i in range(left, right): + data_op_list[i].append(Operation("D", txn_num, op_time, i)) # P + elif query.find("value1>") != -1: + left = find_data(query, "value1>") + 1 + right = find_data(query, "value1<") + for i in range(left, right): + data_op_list[i].append(Operation("D", txn_num, op_time, i)) # p + else: + # it means select all rows in table + for i in range(total_num+1): + data_op_list[i].append(Operation("D", txn_num, op_time, i)) def insert_record(op_time, txn_num, txn, data_op_list): if txn[txn_num].begin_ts == -1 and op_time != 0: @@ -337,6 +395,7 @@ def end_record(op_time, txn_num, txn): txn[txn_num].end_ts = op_time + def operation_record(total_num, query, txn, data_op_list, version_list): error_message = "" op_time = find_data(query, "Q") @@ -345,35 +404,28 @@ def operation_record(total_num, query, txn, data_op_list, version_list): if op_time == 0 and query.find("INSERT") != -1: init_record(query, version_list) return error_message - if query.find("returnresult") != -1: + if query.find("returnresult") != -1: #! 1"returnresult" maybe don't exist error_message = readVersion_record(query, op_time, data_op_list, version_list) return error_message - if query.find("finished") != -1: + if query.find("finished") != -1: #! "finished" maybe don't exist set_finish_time(op_time, data_op_list, query, txn, version_list) return error_message - if op_time == -1 and txn_num != -1 and query.find("set_isolation") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.: - # query such as "T2 set_isolation=serializable " - txn[txn_num].isolation = find_isolation(query) - print(str(txn_num)+"------------------"+txn[txn_num].isolation) - return error_message if op_time == -1 or txn_num == -1: return error_message - if query.find("SELECT") != -1: + if query.find("BEGIN") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.: + txn[txn_num].isolation = find_isolation(query) + elif query.find("SELECT") != -1: read_record(op_time, txn_num, total_num, txn, data_op_list) - return error_message elif query.find("UPDATE") != -1: write_record(op_time, txn_num, txn, data_op_list) - return error_message - elif query.find("DELETE") != -1: + elif query.find("DELETE") != -1: delete_record(op_time, txn_num, txn, data_op_list) - return error_message - elif query.find("INSERT") != -1: + elif query.find("INSERT") != -1: #! assume existing data will not be inserted ("Rollback") insert_record(op_time, txn_num, txn, data_op_list) - return error_message elif query.find("COMMIT") != -1: if op_time != 0: end_record(op_time, txn_num, txn) - return error_message + set_finish_time(op_time, data_op_list, query, txn, version_list) return error_message @@ -453,9 +505,21 @@ def print_error(result_folder, ts_now, error_message): f.write("\n\n") -run_result_folder = "pg/repeatable-read" + + + +#! ------Some assumption------ +# 在任何隔离级别事务的修改互相可见,即等价于单一存储,无读写缓冲 +# 在输入文件中有设置各个事务隔离级别的语句,在 "BEGIN 之后" + # BEGIN T1 set_isolation=repeatable-read + # BEGIN T2 set_isolation=serializable + # BEGIN T3 set_isolation=read-uncommitted + # BEGIN T4 set_isolation=read-committed +# 假定插入的数据 key 是从 0 向上递增的顺序 + +run_result_folder = "pg/mda_detect_test" result_folder = "check_result/" + run_result_folder -do_test_list = "do_test_list.txt" +do_test_list = "mda_detect_test_list.txt" #ts_now = "_2param_3txn_insert" ts_now = time.strftime("%Y%m%d_%H%M%S", time.localtime()) if not os.path.exists(result_folder): @@ -503,16 +567,19 @@ def print_error(result_folder, ts_now, error_message): continue cycle = False - remove_unfinished_operation(data_op_list) + # remove_unfinished_operation(data_op_list) 动态测试中默认所有的执行时间 Qi 都没有 finish 字段 build_graph(data_op_list, indegree, edge, txn) - print_graph(edge) + print("--------file:{}--------".format(file)) + print_graph(edge,txn) + # print_data_op_list(data_op_list) if not go_end: - cycle = check_cycle(edge, indegree, total_num + 2) + cycle = check_cycle(edge, indegree, total_num_txn+2) if cycle: output_result(file, result_folder, ts_now, "Cyclic") - for i in range(total_num + 2): + for i in range(total_num_txn + 2): if visit1[i] == 0: dfs(result_folder, ts_now, i, "null") else: output_result(file, result_folder, ts_now, "Avoid") print_path(result_folder, ts_now, edge) + print("---------------------------------\n") \ No newline at end of file From f4909ca6c6a4add3778731851d7be63bc65ef0b2 Mon Sep 17 00:00:00 2001 From: dreamin <2534393465@qq.com> Date: Fri, 12 Jul 2024 03:04:13 +0000 Subject: [PATCH 07/16] test case --- src/dbtest/mda_detect_test_list.txt | 10 +++++++ src/dbtest/pg/mda_detect_test/aa.txt | 20 +++++++++++++ src/dbtest/pg/mda_detect_test/rc_rc_avoid.txt | 27 ++++++++++++++++++ .../pg/mda_detect_test/rc_rc_cyclic.txt | 28 +++++++++++++++++++ .../pg/mda_detect_test/rr_rr_cyclic.txt | 27 ++++++++++++++++++ .../mda_detect_test/rr_rr_cyclic_commit.txt | 25 +++++++++++++++++ .../pg/mda_detect_test/rr_s_s_avoid.txt | 25 +++++++++++++++++ src/dbtest/pg/mda_detect_test/ru_ru_avoid.txt | 28 +++++++++++++++++++ src/dbtest/pg/mda_detect_test/s_s_avoid.txt | 25 +++++++++++++++++ src/dbtest/pg/mda_detect_test/s_s_cyclic.txt | 28 +++++++++++++++++++ .../pg/mda_detect_test/s_s_cyclic_pr.txt | 20 +++++++++++++ 11 files changed, 263 insertions(+) create mode 100644 src/dbtest/mda_detect_test_list.txt create mode 100755 src/dbtest/pg/mda_detect_test/aa.txt create mode 100755 src/dbtest/pg/mda_detect_test/rc_rc_avoid.txt create mode 100755 src/dbtest/pg/mda_detect_test/rc_rc_cyclic.txt create mode 100755 src/dbtest/pg/mda_detect_test/rr_rr_cyclic.txt create mode 100755 src/dbtest/pg/mda_detect_test/rr_rr_cyclic_commit.txt create mode 100755 src/dbtest/pg/mda_detect_test/rr_s_s_avoid.txt create mode 100755 src/dbtest/pg/mda_detect_test/ru_ru_avoid.txt create mode 100755 src/dbtest/pg/mda_detect_test/s_s_avoid.txt create mode 100755 src/dbtest/pg/mda_detect_test/s_s_cyclic.txt create mode 100755 src/dbtest/pg/mda_detect_test/s_s_cyclic_pr.txt diff --git a/src/dbtest/mda_detect_test_list.txt b/src/dbtest/mda_detect_test_list.txt new file mode 100644 index 00000000..82f9304a --- /dev/null +++ b/src/dbtest/mda_detect_test_list.txt @@ -0,0 +1,10 @@ + +ru_ru_avoid +rc_rc_cyclic +rc_rc_avoid +rr_rr_cyclic +rr_rr_cyclic_commit +s_s_cyclic +s_s_avoid +rr_s_s_avoid + diff --git a/src/dbtest/pg/mda_detect_test/aa.txt b/src/dbtest/pg/mda_detect_test/aa.txt new file mode 100755 index 00000000..2b19e7c5 --- /dev/null +++ b/src/dbtest/pg/mda_detect_test/aa.txt @@ -0,0 +1,20 @@ + + + +Q0-T1-DROP TABLE IF EXISTS t1; +Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT); +Q0-T1-INSERT INTO t1 VALUES (0, 0); +Q0-T1-INSERT INTO t1 VALUES (1, 1); +Q0-T1-INSERT INTO t1 VALUES (2, 2); +Q0-T1-COMMIT; + +Q1-T1-BEGIN set_isolation=repeatable-read; +Q2-T1-SELECT * FROM t1; + + Q3-T2-BEGIN set_isolation=serializable; + Q4-T2-INSERT INTO t1 VALUES (3, 3); + Q5-T2-COMMIT; + +Q6-T1-SELECT * FROM t1 WHERE k=1; +Q7-T1-SELECT * FROM t1; +Q8-T1-COMMIT; diff --git a/src/dbtest/pg/mda_detect_test/rc_rc_avoid.txt b/src/dbtest/pg/mda_detect_test/rc_rc_avoid.txt new file mode 100755 index 00000000..b0f99e62 --- /dev/null +++ b/src/dbtest/pg/mda_detect_test/rc_rc_avoid.txt @@ -0,0 +1,27 @@ + + +Q0-T1-DROP TABLE IF EXISTS t1; +Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT); +Q0-T1-INSERT INTO t1 VALUES (0, 0); +Q0-T1-INSERT INTO t1 VALUES (1, 0); +Q0-T1-COMMIT; + + +Q1-T1-BEGIN set_isolation=read-committed; +Q2-T1-SELECT * FROM t1 WHERE k=1; + + Q3-T2-BEGIN set_isolation=read-committed; + Q4-T2-UPDATE t1 SET v=1 WHERE k=1; + +Q6-T1-SELECT * FROM t1 WHERE k=1; + + Q7-T2-COMMIT; + +Q8-T1-COMMIT; + + + + + +Test Result: + diff --git a/src/dbtest/pg/mda_detect_test/rc_rc_cyclic.txt b/src/dbtest/pg/mda_detect_test/rc_rc_cyclic.txt new file mode 100755 index 00000000..3a6c5f09 --- /dev/null +++ b/src/dbtest/pg/mda_detect_test/rc_rc_cyclic.txt @@ -0,0 +1,28 @@ + + +Q0-T1-DROP TABLE IF EXISTS t1; +Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT); +Q0-T1-INSERT INTO t1 VALUES (0, 0); +Q0-T1-INSERT INTO t1 VALUES (1, 0); +Q0-T1-COMMIT; + + +Q1-T1-BEGIN set_isolation=read-committed; +Q2-T1-UPDATE t1 SET v=1 WHERE k=0; + + Q3-T2-BEGIN set_isolation=read-committed; + Q4-T2-UPDATE t1 SET v=1 WHERE k=1; + Q5-T2-SELECT * FROM t1 WHERE k=0; + +Q6-T1-SELECT * FROM t1 WHERE k=1; + + Q7-T2-COMMIT; + +Q8-T1-COMMIT; + + + + + +Test Result: + diff --git a/src/dbtest/pg/mda_detect_test/rr_rr_cyclic.txt b/src/dbtest/pg/mda_detect_test/rr_rr_cyclic.txt new file mode 100755 index 00000000..8cf92abd --- /dev/null +++ b/src/dbtest/pg/mda_detect_test/rr_rr_cyclic.txt @@ -0,0 +1,27 @@ + + +Q0-T1-DROP TABLE IF EXISTS t1; +Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT); +Q0-T1-INSERT INTO t1 VALUES (0, 0); +Q0-T1-INSERT INTO t1 VALUES (1, 0); +Q0-T1-COMMIT; + + +Q1-T1-BEGIN set_isolation=repeatable-read; +Q2-T1-SELECT * FROM t1 WHERE k=1; + + Q3-T2-BEGIN set_isolation=repeatable-read; + Q4-T2-UPDATE t1 SET v=1 WHERE k=1; + +Q6-T1-SELECT * FROM t1 WHERE k=1; + + Q7-T2-COMMIT; + +Q8-T1-COMMIT; + + + + + +Test Result: + diff --git a/src/dbtest/pg/mda_detect_test/rr_rr_cyclic_commit.txt b/src/dbtest/pg/mda_detect_test/rr_rr_cyclic_commit.txt new file mode 100755 index 00000000..aded104d --- /dev/null +++ b/src/dbtest/pg/mda_detect_test/rr_rr_cyclic_commit.txt @@ -0,0 +1,25 @@ + + +Q0-T1-DROP TABLE IF EXISTS t1; +Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT); +Q0-T1-INSERT INTO t1 VALUES (0, 0); +Q0-T1-INSERT INTO t1 VALUES (1, 0); +Q0-T1-COMMIT; + + +Q1-T1-BEGIN set_isolation=repeatable-read; +Q2-T1-SELECT * FROM t1 WHERE k=1; + + Q3-T2-BEGIN set_isolation=repeatable-read; + Q4-T2-UPDATE t1 SET v=1 WHERE k=1; + Q5-T2-COMMIT; + +Q6-T1-SELECT * FROM t1 WHERE k=1; +Q7-T1-COMMIT; + + + + + +Test Result: + diff --git a/src/dbtest/pg/mda_detect_test/rr_s_s_avoid.txt b/src/dbtest/pg/mda_detect_test/rr_s_s_avoid.txt new file mode 100755 index 00000000..7ba19745 --- /dev/null +++ b/src/dbtest/pg/mda_detect_test/rr_s_s_avoid.txt @@ -0,0 +1,25 @@ + + + +Q0-T1-DROP TABLE IF EXISTS t1; +Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT); +Q0-T1-INSERT INTO t1 VALUES (0, 0); +Q0-T1-INSERT INTO t1 VALUES (1, 1); +Q0-T1-INSERT INTO t1 VALUES (2, 2); +Q0-T1-COMMIT; + +Q1-T1-BEGIN set_isolation=repeatable-read; +Q2-T1-SELECT * FROM t1 WHERE k=1; + + Q3-T2-BEGIN set_isolation=serializable; + Q4-T2-INSERT INTO t1 VALUES (3, 3); + Q5-T2-COMMIT; + +Q6-T1-SELECT * FROM t1 WHERE k=1; +Q7-T1-SELECT * FROM t1; +Q8-T1-COMMIT; + + Q9-T3-BEGIN set_isolation=serializable ; + Q10-T3-SELECT * FROM t1 WHERE k=3; + Q11-T3-COMMIT; + diff --git a/src/dbtest/pg/mda_detect_test/ru_ru_avoid.txt b/src/dbtest/pg/mda_detect_test/ru_ru_avoid.txt new file mode 100755 index 00000000..09a35143 --- /dev/null +++ b/src/dbtest/pg/mda_detect_test/ru_ru_avoid.txt @@ -0,0 +1,28 @@ + + +Q0-T1-DROP TABLE IF EXISTS t1; +Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT); +Q0-T1-INSERT INTO t1 VALUES (0, 0); +Q0-T1-INSERT INTO t1 VALUES (1, 0); +Q0-T1-COMMIT; + + +Q1-T1-BEGIN set_isolation=read-uncommitted; +Q2-T1-UPDATE t1 SET v=1 WHERE k=0; + + Q3-T2-BEGIN set_isolation=read-uncommitted; + Q4-T2-UPDATE t1 SET v=1 WHERE k=1; + Q5-T2-SELECT * FROM t1 WHERE k=0; + +Q6-T1-SELECT * FROM t1 WHERE k=1; + + Q7-T2-COMMIT; + +Q8-T1-COMMIT; + + + + + +Test Result: + diff --git a/src/dbtest/pg/mda_detect_test/s_s_avoid.txt b/src/dbtest/pg/mda_detect_test/s_s_avoid.txt new file mode 100755 index 00000000..dc18cce5 --- /dev/null +++ b/src/dbtest/pg/mda_detect_test/s_s_avoid.txt @@ -0,0 +1,25 @@ + + +Q0-T1-DROP TABLE IF EXISTS t1; +Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT); +Q0-T1-INSERT INTO t1 VALUES (0, 0); +Q0-T1-INSERT INTO t1 VALUES (1, 0); +Q0-T1-COMMIT; + + +Q1-T1-BEGIN set_isolation=serializable ; +Q2-T1-UPDATE t1 SET v=1 WHERE k=0; +Q3-T1-SELECT * FROM t1 WHERE k=1; + Q4-T2-BEGIN set_isolation=serializable; + Q5-T2-UPDATE t1 SET v=1 WHERE k=1; + Q6-T2-SELECT * FROM t1 WHERE k=0; + Q7-T2-COMMIT; + +Q8-T1-COMMIT; + + + + + +Test Result: + diff --git a/src/dbtest/pg/mda_detect_test/s_s_cyclic.txt b/src/dbtest/pg/mda_detect_test/s_s_cyclic.txt new file mode 100755 index 00000000..a2f24b06 --- /dev/null +++ b/src/dbtest/pg/mda_detect_test/s_s_cyclic.txt @@ -0,0 +1,28 @@ + + +Q0-T1-DROP TABLE IF EXISTS t1; +Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT); +Q0-T1-INSERT INTO t1 VALUES (0, 0); +Q0-T1-INSERT INTO t1 VALUES (1, 0); +Q0-T1-COMMIT; + + +Q1-T1-BEGIN set_isolation=serializable ; +Q2-T1-UPDATE t1 SET v=1 WHERE k=0; + + Q3-T2-BEGIN set_isolation=serializable; + Q4-T2-UPDATE t1 SET v=1 WHERE k=1; + Q5-T2-SELECT * FROM t1 WHERE k=0; + +Q6-T1-SELECT * FROM t1 WHERE k=1; + + Q7-T2-COMMIT; + +Q8-T1-COMMIT; + + + + + +Test Result: + diff --git a/src/dbtest/pg/mda_detect_test/s_s_cyclic_pr.txt b/src/dbtest/pg/mda_detect_test/s_s_cyclic_pr.txt new file mode 100755 index 00000000..dc47f13f --- /dev/null +++ b/src/dbtest/pg/mda_detect_test/s_s_cyclic_pr.txt @@ -0,0 +1,20 @@ + + + +Q0-T1-DROP TABLE IF EXISTS t1; +Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT); +Q0-T1-INSERT INTO t1 VALUES (0, 0); +Q0-T1-INSERT INTO t1 VALUES (1, 1); +Q0-T1-INSERT INTO t1 VALUES (2, 2); +Q0-T1-COMMIT; + +Q1-T1-BEGIN set_isolation=serializable; +Q2-T1-SELECT * FROM t1; + + Q3-T2-BEGIN set_isolation=serializable; + Q4-T2-INSERT INTO t1 VALUES (3, 3); + Q5-T2-COMMIT; + +Q6-T1-SELECT * FROM t1 WHERE k=1; +Q7-T1-SELECT * FROM t1; +Q8-T1-COMMIT; From 8484c7b20a819a03615eb6a08d256ba4d428a456 Mon Sep 17 00:00:00 2001 From: dreamin <2534393465@qq.com> Date: Mon, 15 Jul 2024 10:44:50 +0000 Subject: [PATCH 08/16] bug --- src/dbtest/src/mda_detect.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py index 55e1f1c1..5e885bb2 100644 --- a/src/dbtest/src/mda_detect.py +++ b/src/dbtest/src/mda_detect.py @@ -211,10 +211,13 @@ def insert_edge(data1, data2, indegree, edge, txn): indegree[data2.txn_num] += 1 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) #* serializable: Phantom Read - elif edge_type in ["ICR","IR","DCR","DR","RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable": + elif edge_type in ["RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable": indegree[data2.txn_num] += 1 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - + #* serializable: Phantom Read + elif edge_type in ["ICR","IR","DCR","DR"] and txn[data2.txn_num].isolation == "serializable": + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) # 入边 # elif txn[data1.txn_num].isolation == "read-uncommitted": # if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)): From 690703b23fab59ae232527ae3e26538c4dab5338 Mon Sep 17 00:00:00 2001 From: dreamin <2534393465@qq.com> Date: Mon, 15 Jul 2024 11:01:04 +0000 Subject: [PATCH 09/16] comment translate --- src/dbtest/src/mda_detect.py | 57 +++++++----------------------------- 1 file changed, 11 insertions(+), 46 deletions(-) diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py index 5e885bb2..79df82ca 100644 --- a/src/dbtest/src/mda_detect.py +++ b/src/dbtest/src/mda_detect.py @@ -195,10 +195,10 @@ def insert_edge(data1, data2, indegree, edge, txn): if data1.txn_num == data2.txn_num or edge_type in ["RCR", "RR"]: return #* read-uncommitted: Dirty Write - # WI 不存在,如果有,那么一定会有 WD + DI 的等效边 - # II 不存在,如果有,那么一定会有 ID + DI 的等效边 - # DW 允许存在, UPDATE 时使用条件查询包含 D 的数据 - # DD 不存在,如果有,那么一定会有 DI + ID 的等效边 + # WI does not exist. If it does, there must be an equivalent edge of WD + DI + # II does not exist. If it does, there must be an equivalent edge of ID + DI + # DW is allowed to exist. When UPDATE, use the condition to query the data containing D + # DD does not exist. If it does, there must be an equivalent edge of DI + ID if edge_type in ["WCW", "WW", "WCD", "WD", "ICW","IW", "ICD", "ID", "DCW", "DW", "DCI", "DI"]: indegree[data2.txn_num] += 1 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) @@ -218,41 +218,6 @@ def insert_edge(data1, data2, indegree, edge, txn): elif edge_type in ["ICR","IR","DCR","DR"] and txn[data2.txn_num].isolation == "serializable": indegree[data2.txn_num] += 1 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - # 入边 - # elif txn[data1.txn_num].isolation == "read-uncommitted": - # if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)): - # if edge_type[-1] == 'R': # not R -- R - # if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'): # 可能脏读 - # indegree[data2.txn_num] += 1 - # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - # if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W': - # indegree[data2.txn_num] += 1 - # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - # if txn[data2.txn_num].isolation == "serializable": - # indegree[data2.txn_num] += 1 - # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - # elif edge_type[-1] != 'R': # not R -- not R - # indegree[data2.txn_num] += 1 - # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - # elif txn[data1.txn_num].isolation == "read-committed" or txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable": - # if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)): - # if edge_type[-1] == 'R': # not R -- R - # if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读 - # indegree[data2.txn_num] += 1 - # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - # if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W': - # indegree[data2.txn_num] += 1 - # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - # if txn[data2.txn_num].isolation == "serializable": - # indegree[data2.txn_num] += 1 - # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - # elif edge_type[-1] != 'R': # not R -- not R - # indegree[data2.txn_num] += 1 - # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - # elif edge_type[0] == 'R' and edge_type[-1] != 'R': - # indegree[data2.txn_num] += 1 - # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) - def init_record(query, version_list): key = find_data(query, "(") @@ -512,13 +477,13 @@ def print_error(result_folder, ts_now, error_message): #! ------Some assumption------ -# 在任何隔离级别事务的修改互相可见,即等价于单一存储,无读写缓冲 -# 在输入文件中有设置各个事务隔离级别的语句,在 "BEGIN 之后" - # BEGIN T1 set_isolation=repeatable-read - # BEGIN T2 set_isolation=serializable - # BEGIN T3 set_isolation=read-uncommitted - # BEGIN T4 set_isolation=read-committed -# 假定插入的数据 key 是从 0 向上递增的顺序 +# The modifications of transactions at any isolation level are mutually visible, which is equivalent to a single storage, without read-write buffer +# There are statements to set the isolation level of each transaction in the input file, after "BEGIN" + # BEGIN T1 set_isolation=repeatable-read + # BEGIN T2 set_isolation=serializable + # BEGIN T3 set_isolation=read-uncommitted + # BEGIN T4 set_isolation=read-committed +# Assume that the inserted data key is in ascending order from 0 run_result_folder = "pg/mda_detect_test" result_folder = "check_result/" + run_result_folder From 3286a99f2cc37642977364be0401321f90306040 Mon Sep 17 00:00:00 2001 From: dreamin <2534393465@qq.com> Date: Mon, 15 Jul 2024 14:38:45 +0000 Subject: [PATCH 10/16] loop detect optimize --- src/dbtest/src/mda_detect.py | 102 +++++++++++++++++++++++------------ 1 file changed, 67 insertions(+), 35 deletions(-) diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py index 79df82ca..40d10c9f 100644 --- a/src/dbtest/src/mda_detect.py +++ b/src/dbtest/src/mda_detect.py @@ -18,11 +18,12 @@ class Edge: - def __init__(self, type, out): + def __init__(self, type, out, begin_time): self.type = type self.out = out + self.time = begin_time def __repr__(self): - return "Edge(type={}, out={})".format(self.type, self.out) + return "Edge(begin_time={}, type={}, out={})".format(self.time, self.type, self.out) class Operation: def __init__(self, op_type, txn_num, op_time, value): @@ -51,9 +52,10 @@ def print_graph(edge,txn): # print data_op_list def print_data_op_list(data_op_list): for k,list in enumerate(data_op_list): - print("\nk:{}---".format(k)) - for i, data in enumerate(list): - print("op:{}--{}-".format(data.op_type,data.txn_num)) + if k< len(data_op_list)-1: + print("\nk:{}---".format(k)) + for i, data in enumerate(list): + print("op:{}--{}-".format(data.op_type,data.txn_num)) # find total variable number def get_total(lines): @@ -164,17 +166,16 @@ def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type): # decide which operation comes first depending on the read or write version # if later operation happened after the first txn commit time, edge type will add "C" def get_edge_type(data1, data2, txn): - # if data1.value <= data2.value: - # before, after = data1, data2 - # else: - # before, after = data2, data1 - before, after = data1, data2 + if data1.op_time <= data2.op_time: + before, after = data1, data2 + else: + before, after = data2, data1 # if data1.op_type == "D" or data2.op_type == "D": # if data1.value < data2.value: # before, after = data2, data1 # else: # before, after = data1, data2 - if data2.op_time > txn[data1.txn_num].end_ts: + if after.op_time > txn[before.txn_num].end_ts: state = "C" else: state = "" @@ -201,23 +202,23 @@ def insert_edge(data1, data2, indegree, edge, txn): # DD does not exist. If it does, there must be an equivalent edge of DI + ID if edge_type in ["WCW", "WW", "WCD", "WD", "ICW","IW", "ICD", "ID", "DCW", "DW", "DCI", "DI"]: indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time)) #* read-committed: Dirty Read elif edge_type in ["WCR","WR"] and (txn[data2.txn_num].isolation == "read-committed" or txn[data2.txn_num].isolation == "repeatable-read" or txn[data2.txn_num].isolation == "serializable"): indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time)) #* repeatable-read: Unrepeatable Read elif edge_type in ["RCW","RW"] and (txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable"): indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time)) #* serializable: Phantom Read elif edge_type in ["RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable": indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time)) #* serializable: Phantom Read elif edge_type in ["ICR","IR","DCR","DR"] and txn[data2.txn_num].isolation == "serializable": indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num,data1.op_time)) def init_record(query, version_list): key = find_data(query, "(") @@ -425,29 +426,59 @@ def check_cycle(edge, indegree, total): # for loop graphs, print the loop -def dfs(result_folder, ts_now, now, type): - visit1[now] = 1 - if visit[now] == 1: return - visit[now] = 1 - path.append(now) - edge_type.append(type) - for v in edge[now]: +def dfs(result_folder, ts_now , e): + visit1[e.out] = 1 + if visit[e.out] == 1: return + visit[e.out] = 1 + path.append(e) + for v in edge[e.out]: if visit[v.out] == 0: - dfs(result_folder, ts_now, v.out, v.type) + dfs(result_folder, ts_now, v) else: - path.append(v.out) - edge_type.append(v.type) + path.append(v) with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f: - for i in range(0, len(path)): - f.write(str(path[i])) - if i != len(path) - 1: f.write("->" + edge_type[i+1] + "->") - f.write("\n\n") + content = "" + list_loop = [] + for i in range(len(path) - 1, -1, -1): + if i != len(path) - 1 and path[i].out == path[len(path) - 1].out: + break + index = 0 + while(index < len(list_loop) and path[list_loop[index]].time < path[i].time): + index += 1 + list_loop.insert(index,i) + for idx in list_loop: + content = content + "->" + path[idx].type + "->" + str(path[idx].out) + content = str(path[list_loop[-1]].out) + content + "\n\n" + f.write(content) path.pop() - edge_type.pop() path.pop() - edge_type.pop() - visit[now] = 0 - + visit[e.out] = 0 + + +# # for loop graphs, print the loop +# # Contains redundant edge information and the starting point of the ring is unreasonable +# def dfs(result_folder, ts_now, now, type): +# visit1[now] = 1 +# if visit[now] == 1: return +# visit[now] = 1 +# path.append(now) +# edge_type.append(type) +# for v in edge[now]: +# if visit[v.out] == 0: +# dfs(result_folder, ts_now, v.out, v.type) +# else: +# path.append(v.out) +# edge_type.append(v.type) +# with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f: +# for i in range(0, len(path)): +# f.write(str(path[i])) +# if i != len(path) - 1: f.write("->" + edge_type[i+1] + "->") +# f.write("\n\n") +# path.pop() +# edge_type.pop() +# path.pop() +# edge_type.pop() +# visit[now] = 0 def print_path(result_folder, ts_now, edge): with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f: @@ -546,7 +577,8 @@ def print_error(result_folder, ts_now, error_message): output_result(file, result_folder, ts_now, "Cyclic") for i in range(total_num_txn + 2): if visit1[i] == 0: - dfs(result_folder, ts_now, i, "null") + # dfs(result_folder, ts_now, i, "null") + dfs(result_folder, ts_now, Edge("null",i,-1)) else: output_result(file, result_folder, ts_now, "Avoid") print_path(result_folder, ts_now, edge) From 168ddc8f23f0d7ed4320d41e0fd6e9eb7d342486 Mon Sep 17 00:00:00 2001 From: dreamin <2534393465@qq.com> Date: Mon, 29 Jul 2024 06:52:54 +0000 Subject: [PATCH 11/16] remove doc from commit --- src/dbtest/src/doc/mda_detect_modify.md | 51 ------------------------- src/dbtest/src/doc/mda_detect_read.md | 36 ----------------- 2 files changed, 87 deletions(-) delete mode 100644 src/dbtest/src/doc/mda_detect_modify.md delete mode 100644 src/dbtest/src/doc/mda_detect_read.md diff --git a/src/dbtest/src/doc/mda_detect_modify.md b/src/dbtest/src/doc/mda_detect_modify.md deleted file mode 100644 index 3c64dbda..00000000 --- a/src/dbtest/src/doc/mda_detect_modify.md +++ /dev/null @@ -1,51 +0,0 @@ -[text](mda_detect.py) 修改日志 -# 思考&分析 -1. 用于加边建立图的节点对应一个操作还是一个事务。答:一个事务。 -2. 目标:一个对数据库的操作文件(运行效果文件)中有多个事务,每一个事务有不同的隔离级别,通过执行结果和隔离级别判断是否满足一致性 -3. 原来输出未所有事务是否满足一致性,现在输出为每个事务是否满足一致性? 单个整体报错 or 多个报错? 多个,每个错误都识别,兼容单个整体报错(实现较难) -4. 有两个检测思路: - 1. 修改建立图的过程中加边策略,保留循环检测流程。(当前实现方式) - 2. 保留加边策略,修改冲突检测流程。 -# 原来代码中问题 -### 数组访问越界问题 -现象 -```python -total_num = get_total(lines) # 统计的个数是插入数据的个数,不是事务的个数。 -txn = [Txn() for i in range(total_num + 2)] # 导致构造的 txn 数组较小 -.... # 还有 indegree edge 数组的大小应该是事务的个数。 -``` -解决:构造一个新函数获取事务个数。 -```python -# find total Txn number -def get_total_txn(lines): - num = 0 - for query in lines: - query = query.replace("\n", "") - query = query.replace(" ", "") - if query[0:1] == "Q" and query.find("T") != -1: - tmp = find_data(query, "T") - num = max(num, tmp) - return num -total_num_txn = get_total_txn(lines) # total number of txn -``` -效果:不同数据使用不同的初始长度 -```python -# total_num: data_op_list, version_list -# total_num_txn: txn, edge, total_num_txn, visit, visit1 -``` - - -### 默认字符串少了空格 -```python - pos = query.find("finished at:") - pos += len("finished at:") -``` - -### "R" 类型的操作并没有修改 value 值为下标: -```python - if data1.value <= data2.value: - before, after = data1, data2 - else: - before, after = data2, data1 -``` - diff --git a/src/dbtest/src/doc/mda_detect_read.md b/src/dbtest/src/doc/mda_detect_read.md deleted file mode 100644 index ebf0b144..00000000 --- a/src/dbtest/src/doc/mda_detect_read.md +++ /dev/null @@ -1,36 +0,0 @@ - -## mda_detect.py 代码功能 -主要用于检测数据库事务之间的并发关系,以及是否存在循环依赖。具体来说,代码通过解析输入的SQL语句,构建事务操作的有向图,检测是否存在循环依赖(即事务之间是否存在无法解决的并发冲突),并输出结果。 -### 变量命名含义 - -| 名称 | 含义 | 备注 | -| ------------ | ------------------------------------- | ------------------- | -| data | 一次操作信息:type、txn_num、op_time、op_data | | -| data_op_list | 数据操作列表 【数据key:【操作...】, 数据key:【操作...】】 | | -| indegree | 下标:事务号;元素:依赖该事务的事务个数 | | -| edge | 下标:事务号;元素:该事务的出边 | | -| version_list | key:数据编号;value:数据的值的历史记录 | 下标是 value? | -| total_num | 数据库操作总数 | 对于数据库全表扫描类的操作有关键作用。 | - -### 函数列表 - -| 函数 | 功能 | 备注 | -| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------- | -| get_total | 获取一个测试文件中 Q0 部分插入的数据个数 | key 的个数 | -| find_data | 特定位置提取一个数字 | | -| set_finish_time | 在一个数据库操作语句执行完毕后更新相关的时间戳:finishedat
1. 所有事务中 begin_ts | end_ts 和 op_time 相等的进行替换
2. 所有操作中 op_time 和 op_time 相等的进行替换
3. version_list 保存操作历史值 op.value
4. op.value 保存 version_list 历史下标 | | -| check_concurrency | 检查两个事务和是否并发 | 通过开始时间和结束时间来判断,默认开始时间之间已经比较过了吗? | -| get_edge_type | 确定两个操作之间的边的类型,添加C 标志,跨事务操作,同时返回新的操作顺序 | data1 和 data2 是同一个 Key 的前后两个操作 | -| build_graph | 建立一个有向图,这个图表示不同操作之间的并发关系 | 只会在同一组操作之间建立边 | -| insert_edge | 具体的插边操作(check_concurrency 的前提下插入 get_edge_type 边):
不同事务之间有并发读写冲突; | data1 发生时间默认在data2 之前 | -| init_record | 根据查询中的信息初始化版本列表中的记录。 | | -| readVersion_record | 处理数据库查询操作 | 只是更改了: op.value | -| read_record | 根据查询中的信息读取记录并更新数据操作。增加到 data_op_list 中。 | | -| write_record | 根据查询和更新数据操作中的信息写入记录。增加到 data_op_list 中。 | | -| delete_record | 根据查询中的信息删除记录并更新数据操作。增加到 data_op_list 中。 | | -| insert_record | 根据查询中的信息插入记录并更新数据操作。 | | -| end_record | 设置事务的结束时间戳。 | | -| operation_record | 记录并且处理数据库操作。 | | -| remove_unfinished_operation | 删除失败的语句以防止构建冗余边 | | -| check_cycle | 在有向图中查找环 | | -| dfs | 在有环的有向图中找环 | | From 89d3f56a9e1fe5dc6f67bb6d8de16f2f6588ca68 Mon Sep 17 00:00:00 2001 From: dreamin <2534393465@qq.com> Date: Mon, 29 Jul 2024 07:24:04 +0000 Subject: [PATCH 12/16] comments restore, delete and add --- src/dbtest/src/mda_detect.py | 377 ++++++++++++++++++++++++++++++++--- 1 file changed, 353 insertions(+), 24 deletions(-) diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py index 40d10c9f..9b410f51 100644 --- a/src/dbtest/src/mda_detect.py +++ b/src/dbtest/src/mda_detect.py @@ -39,6 +39,17 @@ def __init__(self): self.end_ts = 99999999999999999999 self.isolation = "serializable" + +""" +Print the graph edges after building the graph. + +Args: +- edge (list): A list of Edge lists +- txn (list): A list of Txn objects + +Returns: +None +""" # print edge after build graph def print_graph(edge,txn): for i, edges in enumerate(edge): @@ -49,6 +60,15 @@ def print_graph(edge,txn): print(" {}".format(e)) +""" +Print the contents of the data operation list. + +Args: +- data_op_list (list): A list of Operation lists + +Returns: +None +""" # print data_op_list def print_data_op_list(data_op_list): for k,list in enumerate(data_op_list): @@ -57,6 +77,15 @@ def print_data_op_list(data_op_list): for i, data in enumerate(list): print("op:{}--{}-".format(data.op_type,data.txn_num)) +""" +Find the total variable number. + +Args: +- lines (list): A list of queries. + +Returns: +int: The maximum variable number found in the queries. +""" # find total variable number def get_total(lines): num = 0 @@ -81,6 +110,17 @@ def get_total_txn(lines): num = max(num, tmp) return num + +""" +Extract the data we need from a query. + +Args: +- query (str): The input query string. +- target (str): The target substring to search for. + +Returns: +int: The extracted data value, or -1 if not found. +""" # extract the data we need in query def find_data(query, target): pos = query.find(target) @@ -109,6 +149,20 @@ def find_isolation(query): if query.find("serializable") != -1: return "serializable" +""" +When a statement is executed, this function sets the end time, modifies the transaction list, +and updates the version list as needed. + +Args: +- op_time (int): The operation time of the statement. +- data_op_list (list): A list of data operations. +- query (str): The query string containing information about the statement execution. +- txn (list): A list of transaction objects. +- version_list (list): A list of version lists for data operations. + +Returns: +None +""" # when a statement is executed, set the end time and modify the version list def set_finish_time(op_time, data_op_list, query, txn, version_list): # pos = query.find("finished at:") @@ -145,6 +199,18 @@ def set_finish_time(op_time, data_op_list, query, txn, version_list): version_list[i].append(op.value) op.value = len(version_list[i]) - 1 + +""" +Check if two transactions are concurrent based on their start and end times. + +Args: +- data1: Information about the first transaction. +- data2: Information about the second transaction. +- txn: A list of transaction objects. + +Returns: +bool: True if the transactions are concurrent, False otherwise. +""" # if both transactions are running # or the start time of the second transaction is less than the end time of the first transaction # we think they are concurrent @@ -157,12 +223,40 @@ def check_concurrency(data1, data2, txn): return False +""" +Check if a specific edge exists between two transactions in the graph. + +Args: +- edge (list): A list of lists, where each sublist contains edge objects representing the connections in the graph. +- src_txn (int): The source transaction number, which the edge originates from. +- src_type (str): The operation type (e.g., 'R', 'W') at the source of the edge. +- tar_txn (int): The target transaction number, which the edge points to. +- tar_type (str): The operation type (e.g., 'R', 'W') at the target of the edge. + +Returns: +bool: True if the specified edge exists, False otherwise. +""" def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type): for e in edge[src_txn]: if e.out == tar_txn and e.type[0] == src_type and e.type[-1] == tar_type: return True return False + +""" +Determine the type of edge between two operations based on their read or write versions. + +Args: +- data1: Information about the first operation. +- data2: Information about the second operation. +- txn: A list of transaction objects. + +Returns: +tuple: A tuple containing three values: + - A string indicating the edge type ('R', 'W', 'CR', 'CW'). + - Information about the operation that comes first. + - Information about the operation that comes second. +""" # decide which operation comes first depending on the read or write version # if later operation happened after the first txn commit time, edge type will add "C" def get_edge_type(data1, data2, txn): @@ -181,7 +275,22 @@ def get_edge_type(data1, data2, txn): state = "" return before.op_type + state + after.op_type, before, after +""" +Build a directed graph representing the concurrency relationships between operations. + +Args: +- data_op_list: A list of lists, where each inner list contains information about operations for a specific transaction. +- indegree: A list representing the in-degrees of each operation node in the graph. +- edge: A list representing the edges (concurrency relationships) between operations. +- txn: A list of transaction objects. + +This function constructs a directed graph where nodes represent operations, and edges represent concurrency relationships +between operations. It iterates through the list of operations for each transaction and calls the 'insert_edge' function +to create edges in the graph based on concurrency relationships. +Returns: +None +""" def build_graph(data_op_list, indegree, edge, txn): for list1 in data_op_list: for i, data in enumerate(list1): @@ -189,7 +298,25 @@ def build_graph(data_op_list, indegree, edge, txn): insert_edge(list1[j], data, indegree, edge, txn) +""" +Insert an edge into the directed graph representing concurrency relationships between operations. + +Args: +- data1: An operation object representing the first operation. +- data2: An operation object representing the second operation. +- indegree: A list representing the in-degrees of each transaction in the graph. +- edge: A list representing the edges (concurrency relationships) between operations for each transaction. +- txn: A list of transaction objects. + +This function inserts an edge into the directed graph to represent the concurrency relationship between 'data1' and 'data2'. +It first checks if the two operations are concurrent by calling the 'check_concurrency' function. If they are concurrent, it +determines the edge type using the 'get_edge_type' function and adds the edge to the 'edge' list. +The 'indegree' list is updated to reflect the in-degree of the target transaction node when an edge is inserted. + +Returns: +None +""" def insert_edge(data1, data2, indegree, edge, txn): if check_concurrency(data1, data2, txn): edge_type, data1, data2 = get_edge_type(data1, data2, txn) @@ -220,12 +347,42 @@ def insert_edge(data1, data2, indegree, edge, txn): indegree[data2.txn_num] += 1 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num,data1.op_time)) +""" +Initialize a record in the version list based on the information in the query. + +Args: +- query: A query string that contains information about a record. +- version_list: A list of lists representing versioned records. + +This function initializes a record in the 'version_list' based on the information provided in the 'query'. It extracts the 'key' +and 'value' of the record from the query using the 'find_data' function and appends the 'value' to the corresponding version list. + +Returns: +None +""" def init_record(query, version_list): key = find_data(query, "(") value = find_data(query, ",") version_list[key].append(value) +""" +Read the versioned record based on the information in the query. + +Args: +- query (str): A query string that contains information about reading a versioned record. +- op_time (int): The operation time of the read operation. +- data_op_list (list): A list of lists representing data operations. +- version_list (list): A list of lists representing versioned records. + +This function reads the versioned record specified in the 'query'. It extracts the 'key' and 'value' from the query, which are +used to identify the record and version to read. The function checks if the specified version exists in the version list and +updates the 'op.value' accordingly. If the version doesn't exist or if the read operation is not successful, an error message +is returned. + +Returns: +str: An error message indicating the result of the read operation. An empty string means the read was successful. +""" def readVersion_record(query, op_time, data_op_list, version_list): error_message = "" data = query.split(")") @@ -272,6 +429,27 @@ def readVersion_record(query, op_time, data_op_list, version_list): +""" +Read records based on the information in the query and update data operations. + +Args: +- op_time (int): The operation time of the read operation. +- txn_num (int): The transaction number. +- total_num (int): The total number of records. +- txn (list): A list of transactions. +- data_op_list (list): A list of lists representing data operations. + +This function reads records specified in the query and updates the 'data_op_list' accordingly. It extracts information from +the 'query' to determine which records to read and what type of operation to perform (read or predicate). The function also +sets the 'begin_ts' of the transaction if it's not already set. + +The 'query' is analyzed to identify specific record keys or predicates and create corresponding 'Operation' objects in the +'data_op_list'. Depending on the structure of the query, this function handles various cases, such as reading single records, +handling predicates, and selecting all rows in a table. + +Returns: +None +""" def read_record(op_time, txn_num, total_num, txn, data_op_list): if txn[txn_num].begin_ts == -1: txn[txn_num].begin_ts = op_time @@ -300,6 +478,24 @@ def read_record(op_time, txn_num, total_num, txn, data_op_list): data_op_list[i].append(Operation("R", txn_num, op_time, i)) +""" +Write records based on the information in the query and update data operations. + +Args: +- op_time (int): The operation time of the write operation. +- txn_num (int): The transaction number. +- txn (list): A list of transactions. +- data_op_list (list): A list of lists representing data operations. + +This function writes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the +'query' to determine which records to write and what type of operation to perform (write). The function also sets the 'begin_ts' +of the transaction if it's not already set. + +The 'query' is analyzed to identify specific record keys and values, and it creates corresponding 'Operation' objects in the 'data_op_list'. + +Returns: +None +""" def write_record(op_time, txn_num, txn, data_op_list): if txn[txn_num].begin_ts == -1: txn[txn_num].begin_ts = op_time @@ -327,6 +523,25 @@ def write_record(op_time, txn_num, txn, data_op_list): for i in range(total_num+1): data_op_list[i].append(Operation("W", txn_num, op_time, i)) + +""" +Delete records based on the information in the query and update data operations. + +Args: +- op_time (int): The operation time of the delete operation. +- txn_num (int): The transaction number. +- txn (list): A list of transactions. +- data_op_list (list): A list of lists representing data operations. + +This function deletes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the +'query' to determine which records to delete and what type of operation to perform (delete). The function also sets the 'begin_ts' +of the transaction if it's not already set. + +The 'query' is analyzed to identify specific record keys, and it creates corresponding 'Operation' objects in the 'data_op_list'. + +Returns: +None +""" def delete_record(op_time, txn_num, txn, data_op_list): if txn[txn_num].begin_ts == -1: txn[txn_num].begin_ts = op_time @@ -352,6 +567,26 @@ def delete_record(op_time, txn_num, txn, data_op_list): for i in range(total_num+1): data_op_list[i].append(Operation("D", txn_num, op_time, i)) + +""" +Insert records based on the information in the query and update data operations. + +Args: +- op_time (int): The operation time of the insert operation. +- txn_num (int): The transaction number. +- txn (list): A list of transactions. +- data_op_list (list): A list of lists representing data operations. + +This function inserts records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the +'query' to determine which records to insert and what type of operation to perform (insert). The function also sets the 'begin_ts' +of the transaction if it's not already set. + +The 'query' is analyzed to identify specific record keys and their corresponding values, and it creates corresponding 'Operation' +objects in the 'data_op_list'. + +Returns: +None +""" def insert_record(op_time, txn_num, txn, data_op_list): if txn[txn_num].begin_ts == -1 and op_time != 0: txn[txn_num].begin_ts = op_time @@ -360,11 +595,40 @@ def insert_record(op_time, txn_num, txn, data_op_list): data_op_list[key].append(Operation("I", txn_num, op_time, value)) +""" +Set the end timestamp for a transaction. + +Args: +- op_time (int): The operation time when the transaction ends. +- txn_num (int): The transaction number. +- txn (list): A list of transactions. + +This function sets the 'end_ts' attribute of a transaction specified by 'txn_num' to the given 'op_time'. It marks the end of the +transaction's execution. + +Returns: +None +""" def end_record(op_time, txn_num, txn): txn[txn_num].end_ts = op_time +""" +Record and process database operations. + +Args: +- total_num (int): The total number of database operations. +- query (str): The SQL query representing a database operation. +- txn (list): A list of transactions. +- data_op_list (list): A list of data operations. +- version_list (list): A list of version information for data operations. +This function records and processes database operations based on the provided SQL query. It updates the transaction list, data +operation list, and version list accordingly. The 'total_num' parameter specifies the total number of database operations. + +Returns: +str: An error message (if any), or an empty string if the operation is successful. +""" def operation_record(total_num, query, txn, data_op_list, version_list): error_message = "" op_time = find_data(query, "Q") @@ -399,6 +663,18 @@ def operation_record(total_num, query, txn, data_op_list, version_list): +""" +Remove unfinished operations from the data operation list. + +Args: +- data_op_list (list): A list of data operations. + +This function iterates through the data operation list and removes any unfinished operations based on their operation time. +Unfinished operations are those with an operation time less than 10,000,000. + +Returns: +None +""" # remove failed statements to prevent redundant edges from being built def remove_unfinished_operation(data_op_list): for list1 in data_op_list: @@ -406,6 +682,20 @@ def remove_unfinished_operation(data_op_list): if op.op_time < 10000000: list1.pop(i) +""" +Check for cycles in a directed graph using topological sorting. + +Args: +- edge (List[List[Edge]]): A list representing the directed edges in the graph. +- indegree (List[int]): A list representing the in-degrees of nodes in the graph. +- total (int): The total number of nodes in the graph. + +This function checks for cycles in a directed graph by performing topological sorting. It takes as input the directed edges (`edge`), +in-degrees of nodes (`indegree`), and the total number of nodes in the graph (`total`). + +Returns: +bool: True if a cycle is detected, False otherwise. +""" # toposort to determine whether there is a cycle def check_cycle(edge, indegree, total): q = Queue.Queue() @@ -425,6 +715,25 @@ def check_cycle(edge, indegree, total): return True +""" +Perform depth-first search (DFS) to find and print loops in a directed graph. + +Args: +- result_folder (str): The path to the folder where the results will be saved. +- ts_now (str): The current timestamp or identifier for result file naming. +- now (int): The current node being visited. +- type (str): The type of edge leading to the current node ('C' for commit, 'R' for read, 'W' for write, etc.). + +This function performs depth-first search (DFS) to find and print loops in a directed graph. It takes as input the result folder +path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`), the current node being visited (`now`), +and the type of edge leading to the current node (`type`). + +The function recursively explores the graph, tracking the visited nodes and edges to detect loops. When a loop is found, it is printed +to a result file in the specified result folder. + +Note: This function assumes that global variables like 'visit', 'visit1', 'path', 'edge_type', and 'edge' are defined elsewhere. + +""" # for loop graphs, print the loop def dfs(result_folder, ts_now , e): visit1[e.out] = 1 @@ -455,31 +764,21 @@ def dfs(result_folder, ts_now , e): visit[e.out] = 0 -# # for loop graphs, print the loop -# # Contains redundant edge information and the starting point of the ring is unreasonable -# def dfs(result_folder, ts_now, now, type): -# visit1[now] = 1 -# if visit[now] == 1: return -# visit[now] = 1 -# path.append(now) -# edge_type.append(type) -# for v in edge[now]: -# if visit[v.out] == 0: -# dfs(result_folder, ts_now, v.out, v.type) -# else: -# path.append(v.out) -# edge_type.append(v.type) -# with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f: -# for i in range(0, len(path)): -# f.write(str(path[i])) -# if i != len(path) - 1: f.write("->" + edge_type[i+1] + "->") -# f.write("\n\n") -# path.pop() -# edge_type.pop() -# path.pop() -# edge_type.pop() -# visit[now] = 0 +""" +Print the paths in a directed graph to a result file. + +Args: +- result_folder (str): The path to the folder where the results will be saved. +- ts_now (str): The current timestamp or identifier for result file naming. +- edge (list of lists): A list of lists representing the directed edges in the graph. + +This function prints the paths in a directed graph to a result file. It takes as input the result folder path (`result_folder`), +the current timestamp or identifier for result file naming (`ts_now`), and a list of lists (`edge`) representing the directed edges +in the graph. +The function iterates through the edges and writes the paths to the result file in the specified result folder. + +""" def print_path(result_folder, ts_now, edge): with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f: flag = 0 @@ -493,11 +792,41 @@ def print_path(result_folder, ts_now, edge): f.write("\n\n") +""" +Output the result of cycle detection to a result file. + +Args: +- file (str): The name of the file or input source being analyzed. +- result_folder (str): The path to the folder where the results will be saved. +- ts_now (str): The current timestamp or identifier for result file naming. +- IsCyclic (str): A string indicating whether a cycle was detected. + +This function outputs the result of cycle detection to a result file. It takes as input the name of the file or input source being +analyzed (`file`), the result folder path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`), +and a string (`IsCyclic`) indicating whether a cycle was detected. + +The function writes the result, including the file name and the cyclic status, to the specified result file in the result folder. + +""" def output_result(file, result_folder, ts_now, IsCyclic): with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f: f.write(file + ": " + IsCyclic + "\n") +""" +Print an error message to a result file. + +Args: +- result_folder (str): The path to the folder where the results will be saved. +- ts_now (str): The current timestamp or identifier for result file naming. +- error_message (str): The error message to be printed. + +This function prints an error message to a result file. It takes as input the result folder path (`result_folder`), the current +timestamp or identifier for result file naming (`ts_now`), and the error message (`error_message`) to be printed. + +The function appends the error message to the specified result file in the result folder and adds a newline for separation. + +""" def print_error(result_folder, ts_now, error_message): with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f: f.write(error_message + "\n") From 54930e6d1c1b371903a246d706916971e23e858b Mon Sep 17 00:00:00 2001 From: dreamin <2534393465@qq.com> Date: Mon, 29 Jul 2024 07:33:38 +0000 Subject: [PATCH 13/16] comments add --- src/dbtest/src/mda_detect.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py index 9b410f51..864fc82c 100644 --- a/src/dbtest/src/mda_detect.py +++ b/src/dbtest/src/mda_detect.py @@ -99,6 +99,15 @@ def get_total(lines): # break return num +""" +Find the total number of transactions based on transaction identifiers in queries. + +Args: +- lines (list): A list of query strings, each potentially containing transaction identifiers. + +Returns: +int: The highest transaction number found in the queries. +""" # find total Txn number def get_total_txn(lines): num = 0 From da83039c5b0058481d3ebaa2944a952b69032d8e Mon Sep 17 00:00:00 2001 From: dinream <2534393465@qq.com> Date: Fri, 25 Oct 2024 17:02:26 +0800 Subject: [PATCH 14/16] move mda_detect.py to mda_detect_mixed.py --- src/dbtest/src/mda_detect_mixed.py | 923 +++++++++++++++++++++++++++++ 1 file changed, 923 insertions(+) create mode 100644 src/dbtest/src/mda_detect_mixed.py diff --git a/src/dbtest/src/mda_detect_mixed.py b/src/dbtest/src/mda_detect_mixed.py new file mode 100644 index 00000000..2a44bf71 --- /dev/null +++ b/src/dbtest/src/mda_detect_mixed.py @@ -0,0 +1,923 @@ +# -*- coding: utf-8 -*- + +# /* +# * Tencent is pleased to support the open source community by making 3TS available. +# * +# * Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. The below software +# * in this distribution may have been modified by THL A29 Limited ("Tencent Modifications"). All +# * Tencent Modifications are Copyright (C) THL A29 Limited. +# * +# * Author: xenitchen axingguchen tsunaouyang (xenitchen,axingguchen,tsunaouyang@tencent.com) +# * +# */ + + +import queue +import os +import time + + +class Edge: + def __init__(self, type, out, begin_time): + self.type = type + self.out = out + self.time = begin_time + def __repr__(self): + return "Edge(begin_time={}, type={}, out={})".format(self.time, self.type, self.out) + +class Operation: + def __init__(self, op_type, txn_num, op_time, value): + self.op_type = op_type + self.txn_num = txn_num + self.op_time = op_time + self.value = value + + +class Txn: + def __init__(self): + self.begin_ts = -1 + self.end_ts = 99999999999999999999 + self.isolation = "serializable" + + +""" +Print the graph edges after building the graph. + +Args: +- edge (list): A list of Edge lists +- txn (list): A list of Txn objects + +Returns: +None +""" +# print edge after build graph +def print_graph(edge,txn): + for i, edges in enumerate(edge): + if i == 0 or i == len(edge)-1: + continue + print("Transaction {}:-----{}-----".format(i,txn[i].isolation)) + for e in edges: + print(" {}".format(e)) + + +""" +Print the contents of the data operation list. + +Args: +- data_op_list (list): A list of Operation lists + +Returns: +None +""" +# print data_op_list +def print_data_op_list(data_op_list): + for k,list in enumerate(data_op_list): + if k< len(data_op_list)-1: + print("\nk:{}---".format(k)) + for i, data in enumerate(list): + print("op:{}--{}-".format(data.op_type,data.txn_num)) + +""" +Find the total variable number. + +Args: +- lines (list): A list of queries. + +Returns: +int: The maximum variable number found in the queries. +""" +# find total variable number +def get_total(lines): + num = 0 + for query in lines: + query = query.replace("\n", "") + query = query.replace(" ", "") + if query.find("INSERT") != -1: # query[0:2] == "Q0" and + tmp = find_data(query, "(") + num = max(num, tmp) + # elif query[0:2] == "Q1": + # break + return num + +""" +Find the total number of transactions based on transaction identifiers in queries. + +Args: +- lines (list): A list of query strings, each potentially containing transaction identifiers. + +Returns: +int: The highest transaction number found in the queries. +""" +# find total Txn number +def get_total_txn(lines): + num = 0 + for query in lines: + query = query.replace("\n", "") + query = query.replace(" ", "") + if query[0:1] == "Q" and query.find("T") != -1: + tmp = find_data(query, "T") + num = max(num, tmp) + return num + + +""" +Extract the data we need from a query. + +Args: +- query (str): The input query string. +- target (str): The target substring to search for. + +Returns: +int: The extracted data value, or -1 if not found. +""" +# extract the data we need in query +def find_data(query, target): + pos = query.find(target) + if pos == -1: + return pos + pos += len(target) + data_value = "" + for i in range(pos, len(query)): + if query[i].isdigit(): + data_value += query[i] + else: + break + if data_value == "": + return -1 + data_value = int(data_value) + return data_value + +# extract the isolation from content +def find_isolation(query): + if query.find("read-uncommitted") != -1: + return "read-uncommitted" + if query.find("read-committed") != -1: + return "read-committed" + if query.find("repeatable-read") != -1: + return "repeatable-read" + if query.find("serializable") != -1: + return "serializable" + +""" +When a statement is executed, this function sets the end time, modifies the transaction list, +and updates the version list as needed. + +Args: +- op_time (int): The operation time of the statement. +- data_op_list (list): A list of data operations. +- query (str): The query string containing information about the statement execution. +- txn (list): A list of transaction objects. +- version_list (list): A list of version lists for data operations. + +Returns: +None +""" +# when a statement is executed, set the end time and modify the version list +def set_finish_time(op_time, data_op_list, query, txn, version_list): + # pos = query.find("finished at:") + # pos += len("finished at:") + # data_value = "" + # tmp, tmp1 = "", "" + # for i in range(pos, len(query)): + # if query[i].isdigit(): + # tmp += query[i] + # else: + # for j in range(3 - len(tmp)): + # tmp1 += "0" + # tmp = tmp1 + tmp + # data_value += tmp + # tmp, tmp1 = "", "" + # data_value = int(data_value) + data_value = int(op_time) + for t in txn: + if t.begin_ts == op_time: + t.begin_ts = data_value + if t.end_ts == op_time: + t.end_ts = data_value + for i, list1 in enumerate(data_op_list): + for op in list1: + if op.op_time == op_time: + op.op_time = data_value + if op.op_type == "W": + version_list[i].append(op.value) + op.value = len(version_list[i]) - 1 + elif op.op_type == "D": + version_list[i].append(-1) + op.value = len(version_list[i]) - 1 + elif op.op_type == "I": + version_list[i].append(op.value) + op.value = len(version_list[i]) - 1 + + +""" +Check if two transactions are concurrent based on their start and end times. + +Args: +- data1: Information about the first transaction. +- data2: Information about the second transaction. +- txn: A list of transaction objects. + +Returns: +bool: True if the transactions are concurrent, False otherwise. +""" +# if both transactions are running +# or the start time of the second transaction is less than the end time of the first transaction +# we think they are concurrent +def check_concurrency(data1, data2, txn): + if txn[data2.txn_num].begin_ts < txn[data1.txn_num].end_ts: + return True + elif txn[data1.txn_num].begin_ts < txn[data2.txn_num].end_ts: # TODO maybe a bug: don't need + return True + else: + return False + + +""" +Check if a specific edge exists between two transactions in the graph. + +Args: +- edge (list): A list of lists, where each sublist contains edge objects representing the connections in the graph. +- src_txn (int): The source transaction number, which the edge originates from. +- src_type (str): The operation type (e.g., 'R', 'W') at the source of the edge. +- tar_txn (int): The target transaction number, which the edge points to. +- tar_type (str): The operation type (e.g., 'R', 'W') at the target of the edge. + +Returns: +bool: True if the specified edge exists, False otherwise. +""" +def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type): + for e in edge[src_txn]: + if e.out == tar_txn and e.type[0] == src_type and e.type[-1] == tar_type: + return True + return False + + +""" +Determine the type of edge between two operations based on their read or write versions. + +Args: +- data1: Information about the first operation. +- data2: Information about the second operation. +- txn: A list of transaction objects. + +Returns: +tuple: A tuple containing three values: + - A string indicating the edge type ('R', 'W', 'CR', 'CW'). + - Information about the operation that comes first. + - Information about the operation that comes second. +""" +# decide which operation comes first depending on the read or write version +# if later operation happened after the first txn commit time, edge type will add "C" +def get_edge_type(data1, data2, txn): + if data1.op_time <= data2.op_time: + before, after = data1, data2 + else: + before, after = data2, data1 + # if data1.op_type == "D" or data2.op_type == "D": + # if data1.value < data2.value: + # before, after = data2, data1 + # else: + # before, after = data1, data2 + if after.op_time > txn[before.txn_num].end_ts: + state = "C" + else: + state = "" + return before.op_type + state + after.op_type, before, after + +""" +Build a directed graph representing the concurrency relationships between operations. + +Args: +- data_op_list: A list of lists, where each inner list contains information about operations for a specific transaction. +- indegree: A list representing the in-degrees of each operation node in the graph. +- edge: A list representing the edges (concurrency relationships) between operations. +- txn: A list of transaction objects. + +This function constructs a directed graph where nodes represent operations, and edges represent concurrency relationships +between operations. It iterates through the list of operations for each transaction and calls the 'insert_edge' function +to create edges in the graph based on concurrency relationships. + +Returns: +None +""" +def build_graph(data_op_list, indegree, edge, txn): + for list1 in data_op_list: + for i, data in enumerate(list1): + for j in range(0, i): + insert_edge(list1[j], data, indegree, edge, txn) + + +""" +Insert an edge into the directed graph representing concurrency relationships between operations. + +Args: +- data1: An operation object representing the first operation. +- data2: An operation object representing the second operation. +- indegree: A list representing the in-degrees of each transaction in the graph. +- edge: A list representing the edges (concurrency relationships) between operations for each transaction. +- txn: A list of transaction objects. + +This function inserts an edge into the directed graph to represent the concurrency relationship between 'data1' and 'data2'. +It first checks if the two operations are concurrent by calling the 'check_concurrency' function. If they are concurrent, it +determines the edge type using the 'get_edge_type' function and adds the edge to the 'edge' list. + +The 'indegree' list is updated to reflect the in-degree of the target transaction node when an edge is inserted. + +Returns: +None +""" +def insert_edge(data1, data2, indegree, edge, txn): + if check_concurrency(data1, data2, txn): + edge_type, data1, data2 = get_edge_type(data1, data2, txn) + if data1.txn_num == data2.txn_num or edge_type in ["RCR", "RR"]: + return + #* read-uncommitted: Dirty Write + # WI does not exist. If it does, there must be an equivalent edge of WD + DI + # II does not exist. If it does, there must be an equivalent edge of ID + DI + # DW is allowed to exist. When UPDATE, use the condition to query the data containing D + # DD does not exist. If it does, there must be an equivalent edge of DI + ID + if edge_type in ["WCW", "WW", "WCD", "WD", "ICW","IW", "ICD", "ID", "DCW", "DW", "DCI", "DI"]: + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time)) + #* read-committed: Dirty Read + elif edge_type in ["WCR","WR"] and (txn[data2.txn_num].isolation == "read-committed" or txn[data2.txn_num].isolation == "repeatable-read" or txn[data2.txn_num].isolation == "serializable"): + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time)) + #* repeatable-read: Unrepeatable Read + elif edge_type in ["RCW","RW"] and (txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable"): + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time)) + #* serializable: Phantom Read + elif edge_type in ["RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable": + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time)) + #* serializable: Phantom Read + elif edge_type in ["ICR","IR","DCR","DR"] and txn[data2.txn_num].isolation == "serializable": + indegree[data2.txn_num] += 1 + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num,data1.op_time)) + +""" +Initialize a record in the version list based on the information in the query. + +Args: +- query: A query string that contains information about a record. +- version_list: A list of lists representing versioned records. + +This function initializes a record in the 'version_list' based on the information provided in the 'query'. It extracts the 'key' +and 'value' of the record from the query using the 'find_data' function and appends the 'value' to the corresponding version list. + +Returns: +None +""" +def init_record(query, version_list): + key = find_data(query, "(") + value = find_data(query, ",") + version_list[key].append(value) + + +""" +Read the versioned record based on the information in the query. + +Args: +- query (str): A query string that contains information about reading a versioned record. +- op_time (int): The operation time of the read operation. +- data_op_list (list): A list of lists representing data operations. +- version_list (list): A list of lists representing versioned records. + +This function reads the versioned record specified in the 'query'. It extracts the 'key' and 'value' from the query, which are +used to identify the record and version to read. The function checks if the specified version exists in the version list and +updates the 'op.value' accordingly. If the version doesn't exist or if the read operation is not successful, an error message +is returned. + +Returns: +str: An error message indicating the result of the read operation. An empty string means the read was successful. +""" +def readVersion_record(query, op_time, data_op_list, version_list): + error_message = "" + data = query.split(")") + if len(data) == 1: + for list1 in data_op_list: + for op in list1: + if op.op_time == op_time: + value = op.value + if len(version_list[value]) == 0: + op.value = -1 + else: + if -1 not in version_list[value]: + error_message = "Value exists, but did not successully read" + return error_message + pos = version_list[value].index(-1) + op.value = pos + else: + for s in data: + key = find_data(s, "(") + value = find_data(s, ",") + for i, list1 in enumerate(data_op_list): + for op in list1: + if key == i and op.op_time == op_time: + value1 = op.value + if len(version_list[value1]) == 0: + op.value = -1 + else: + if version_list[value1].count(value) == 0: + error_message = "Read version that does not exist" + return error_message + pos = version_list[value1].index(value) + op.value = pos + + return error_message + # for i, list1 in enumerate(data_op_list): + # print(i) + # if list1: + # print("") + # print(list1[0].txn_num) + # print(list1[0].op_type) + # print(list1[0].op_time) + # print(list1[0].op_value) + + + + +""" +Read records based on the information in the query and update data operations. + +Args: +- op_time (int): The operation time of the read operation. +- txn_num (int): The transaction number. +- total_num (int): The total number of records. +- txn (list): A list of transactions. +- data_op_list (list): A list of lists representing data operations. + +This function reads records specified in the query and updates the 'data_op_list' accordingly. It extracts information from +the 'query' to determine which records to read and what type of operation to perform (read or predicate). The function also +sets the 'begin_ts' of the transaction if it's not already set. + +The 'query' is analyzed to identify specific record keys or predicates and create corresponding 'Operation' objects in the +'data_op_list'. Depending on the structure of the query, this function handles various cases, such as reading single records, +handling predicates, and selecting all rows in a table. + +Returns: +None +""" +def read_record(op_time, txn_num, total_num, txn, data_op_list): + if txn[txn_num].begin_ts == -1: + txn[txn_num].begin_ts = op_time + # for some distributed cases which have 4 param, write part is same + if query.find("value1=") != -1: + op_data = find_data(query, "value1=") + data_op_list[op_data].append(Operation("R", txn_num, op_time, op_data)) + # for normal cases + elif query.find("k=") != -1: + op_data = find_data(query, "k=") + data_op_list[op_data].append(Operation("R", txn_num, op_time, op_data)) + # for predicate cases + elif query.find("k>") != -1: + left = find_data(query, "k>") + 1 + right = find_data(query, "k<") + for i in range(left, right): + data_op_list[i].append(Operation("R", txn_num, op_time, i)) # P + elif query.find("value1>") != -1: + left = find_data(query, "value1>") + 1 + right = find_data(query, "value1<") + for i in range(left, right): + data_op_list[i].append(Operation("R", txn_num, op_time, i)) # p + else: + # it means select all rows in table + for i in range(total_num+1): + data_op_list[i].append(Operation("R", txn_num, op_time, i)) + + +""" +Write records based on the information in the query and update data operations. + +Args: +- op_time (int): The operation time of the write operation. +- txn_num (int): The transaction number. +- txn (list): A list of transactions. +- data_op_list (list): A list of lists representing data operations. + +This function writes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the +'query' to determine which records to write and what type of operation to perform (write). The function also sets the 'begin_ts' +of the transaction if it's not already set. + +The 'query' is analyzed to identify specific record keys and values, and it creates corresponding 'Operation' objects in the 'data_op_list'. + +Returns: +None +""" +def write_record(op_time, txn_num, txn, data_op_list): + if txn[txn_num].begin_ts == -1: + txn[txn_num].begin_ts = op_time + if query.find("value1=") != -1: + op_data = find_data(query, "value1=") + op_value = find_data(query, "value2=") + data_op_list[op_data].append(Operation("W", txn_num, op_time, op_value)) + elif query.find("k=") != -1: + op_data = find_data(query, "k=") + op_value = find_data(query, "v=") + data_op_list[op_data].append(Operation("W", txn_num, op_time, op_value)) + # for predicate cases + elif query.find("k>") != -1: + left = find_data(query, "k>") + 1 + right = find_data(query, "k<") + for i in range(left, right): + data_op_list[i].append(Operation("W", txn_num, op_time, i)) # P + elif query.find("value1>") != -1: + left = find_data(query, "value1>") + 1 + right = find_data(query, "value1<") + for i in range(left, right): + data_op_list[i].append(Operation("W", txn_num, op_time, i)) # p + else: + # it means select all rows in table + for i in range(total_num+1): + data_op_list[i].append(Operation("W", txn_num, op_time, i)) + + +""" +Delete records based on the information in the query and update data operations. + +Args: +- op_time (int): The operation time of the delete operation. +- txn_num (int): The transaction number. +- txn (list): A list of transactions. +- data_op_list (list): A list of lists representing data operations. + +This function deletes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the +'query' to determine which records to delete and what type of operation to perform (delete). The function also sets the 'begin_ts' +of the transaction if it's not already set. + +The 'query' is analyzed to identify specific record keys, and it creates corresponding 'Operation' objects in the 'data_op_list'. + +Returns: +None +""" +def delete_record(op_time, txn_num, txn, data_op_list): + if txn[txn_num].begin_ts == -1: + txn[txn_num].begin_ts = op_time + if query.find("value1=") != -1: + op_data = find_data(query, "value1=") + data_op_list[op_data].append(Operation("D", txn_num, op_time, op_data)) + elif query.find("k=") != -1: + op_data = find_data(query, "k=") + data_op_list[op_data].append(Operation("D", txn_num, op_time, op_data)) + # for predicate cases + elif query.find("k>") != -1: + left = find_data(query, "k>") + 1 + right = find_data(query, "k<") + for i in range(left, right): + data_op_list[i].append(Operation("D", txn_num, op_time, i)) # P + elif query.find("value1>") != -1: + left = find_data(query, "value1>") + 1 + right = find_data(query, "value1<") + for i in range(left, right): + data_op_list[i].append(Operation("D", txn_num, op_time, i)) # p + else: + # it means select all rows in table + for i in range(total_num+1): + data_op_list[i].append(Operation("D", txn_num, op_time, i)) + + +""" +Insert records based on the information in the query and update data operations. + +Args: +- op_time (int): The operation time of the insert operation. +- txn_num (int): The transaction number. +- txn (list): A list of transactions. +- data_op_list (list): A list of lists representing data operations. + +This function inserts records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the +'query' to determine which records to insert and what type of operation to perform (insert). The function also sets the 'begin_ts' +of the transaction if it's not already set. + +The 'query' is analyzed to identify specific record keys and their corresponding values, and it creates corresponding 'Operation' +objects in the 'data_op_list'. + +Returns: +None +""" +def insert_record(op_time, txn_num, txn, data_op_list): + if txn[txn_num].begin_ts == -1 and op_time != 0: + txn[txn_num].begin_ts = op_time + key = find_data(query, "(") + value = find_data(query, ",") + data_op_list[key].append(Operation("I", txn_num, op_time, value)) + + +""" +Set the end timestamp for a transaction. + +Args: +- op_time (int): The operation time when the transaction ends. +- txn_num (int): The transaction number. +- txn (list): A list of transactions. + +This function sets the 'end_ts' attribute of a transaction specified by 'txn_num' to the given 'op_time'. It marks the end of the +transaction's execution. + +Returns: +None +""" +def end_record(op_time, txn_num, txn): + txn[txn_num].end_ts = op_time + + +""" +Record and process database operations. + +Args: +- total_num (int): The total number of database operations. +- query (str): The SQL query representing a database operation. +- txn (list): A list of transactions. +- data_op_list (list): A list of data operations. +- version_list (list): A list of version information for data operations. + +This function records and processes database operations based on the provided SQL query. It updates the transaction list, data +operation list, and version list accordingly. The 'total_num' parameter specifies the total number of database operations. + +Returns: +str: An error message (if any), or an empty string if the operation is successful. +""" +def operation_record(total_num, query, txn, data_op_list, version_list): + error_message = "" + op_time = find_data(query, "Q") + txn_num = find_data(query, "T") + # print("total_num:{}, query:{},optime: {}, txn_num: {}\n".format(total_num,query, op_time, txn_num)) + if op_time == 0 and query.find("INSERT") != -1: + init_record(query, version_list) + return error_message + if query.find("returnresult") != -1: #! 1"returnresult" maybe don't exist + error_message = readVersion_record(query, op_time, data_op_list, version_list) + return error_message + if query.find("finished") != -1: #! "finished" maybe don't exist + set_finish_time(op_time, data_op_list, query, txn, version_list) + return error_message + if op_time == -1 or txn_num == -1: + return error_message + if query.find("BEGIN") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.: + txn[txn_num].isolation = find_isolation(query) + elif query.find("SELECT") != -1: + read_record(op_time, txn_num, total_num, txn, data_op_list) + elif query.find("UPDATE") != -1: + write_record(op_time, txn_num, txn, data_op_list) + elif query.find("DELETE") != -1: + delete_record(op_time, txn_num, txn, data_op_list) + elif query.find("INSERT") != -1: #! assume existing data will not be inserted ("Rollback") + insert_record(op_time, txn_num, txn, data_op_list) + elif query.find("COMMIT") != -1: + if op_time != 0: + end_record(op_time, txn_num, txn) + set_finish_time(op_time, data_op_list, query, txn, version_list) + return error_message + + + +""" +Remove unfinished operations from the data operation list. + +Args: +- data_op_list (list): A list of data operations. + +This function iterates through the data operation list and removes any unfinished operations based on their operation time. +Unfinished operations are those with an operation time less than 10,000,000. + +Returns: +None +""" +# remove failed statements to prevent redundant edges from being built +def remove_unfinished_operation(data_op_list): + for list1 in data_op_list: + for i, op in enumerate(list1): + if op.op_time < 10000000: + list1.pop(i) + +""" +Check for cycles in a directed graph using topological sorting. + +Args: +- edge (List[List[Edge]]): A list representing the directed edges in the graph. +- indegree (List[int]): A list representing the in-degrees of nodes in the graph. +- total (int): The total number of nodes in the graph. + +This function checks for cycles in a directed graph by performing topological sorting. It takes as input the directed edges (`edge`), +in-degrees of nodes (`indegree`), and the total number of nodes in the graph (`total`). + +Returns: +bool: True if a cycle is detected, False otherwise. +""" +# toposort to determine whether there is a cycle +def check_cycle(edge, indegree, total): + q = queue.Queue() + for i, degree in enumerate(indegree): + if degree == 0: q.put(i) + ans = [] + while not q.empty(): + now = q.get() + ans.append(now) + for val in edge[now]: + next_node = val.out + indegree[next_node] -= 1 + if indegree[next_node] == 0: + q.put(next_node) + if len(ans) == total: + return False + return True + + +""" +Perform depth-first search (DFS) to find and print loops in a directed graph. + +Args: +- result_folder (str): The path to the folder where the results will be saved. +- ts_now (str): The current timestamp or identifier for result file naming. +- now (int): The current node being visited. +- type (str): The type of edge leading to the current node ('C' for commit, 'R' for read, 'W' for write, etc.). + +This function performs depth-first search (DFS) to find and print loops in a directed graph. It takes as input the result folder +path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`), the current node being visited (`now`), +and the type of edge leading to the current node (`type`). + +The function recursively explores the graph, tracking the visited nodes and edges to detect loops. When a loop is found, it is printed +to a result file in the specified result folder. + +Note: This function assumes that global variables like 'visit', 'visit1', 'path', 'edge_type', and 'edge' are defined elsewhere. + +""" +# for loop graphs, print the loop +def dfs(result_folder, ts_now , e): + visit1[e.out] = 1 + if visit[e.out] == 1: return + visit[e.out] = 1 + path.append(e) + for v in edge[e.out]: + if visit[v.out] == 0: + dfs(result_folder, ts_now, v) + else: + path.append(v) + with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f: + content = "" + list_loop = [] + for i in range(len(path) - 1, -1, -1): + if i != len(path) - 1 and path[i].out == path[len(path) - 1].out: + break + index = 0 + while(index < len(list_loop) and path[list_loop[index]].time < path[i].time): + index += 1 + list_loop.insert(index,i) + for idx in list_loop: + content = content + "->" + path[idx].type + "->" + str(path[idx].out) + content = str(path[list_loop[-1]].out) + content + "\n\n" + f.write(content) + path.pop() + path.pop() + visit[e.out] = 0 + + +""" +Print the paths in a directed graph to a result file. + +Args: +- result_folder (str): The path to the folder where the results will be saved. +- ts_now (str): The current timestamp or identifier for result file naming. +- edge (list of lists): A list of lists representing the directed edges in the graph. + +This function prints the paths in a directed graph to a result file. It takes as input the result folder path (`result_folder`), +the current timestamp or identifier for result file naming (`ts_now`), and a list of lists (`edge`) representing the directed edges +in the graph. + +The function iterates through the edges and writes the paths to the result file in the specified result folder. + +""" +def print_path(result_folder, ts_now, edge): + with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f: + flag = 0 + for i in range(len(edge)): + for v in edge[i]: + if flag == 0: + flag = 1 + else: + f.write(", ") + f.write(str(i) + "->" + v.type + "->" + str(v.out)) + f.write("\n\n") + + +""" +Output the result of cycle detection to a result file. + +Args: +- file (str): The name of the file or input source being analyzed. +- result_folder (str): The path to the folder where the results will be saved. +- ts_now (str): The current timestamp or identifier for result file naming. +- IsCyclic (str): A string indicating whether a cycle was detected. + +This function outputs the result of cycle detection to a result file. It takes as input the name of the file or input source being +analyzed (`file`), the result folder path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`), +and a string (`IsCyclic`) indicating whether a cycle was detected. + +The function writes the result, including the file name and the cyclic status, to the specified result file in the result folder. + +""" +def output_result(file, result_folder, ts_now, IsCyclic): + with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f: + f.write(file + ": " + IsCyclic + "\n") + + +""" +Print an error message to a result file. + +Args: +- result_folder (str): The path to the folder where the results will be saved. +- ts_now (str): The current timestamp or identifier for result file naming. +- error_message (str): The error message to be printed. + +This function prints an error message to a result file. It takes as input the result folder path (`result_folder`), the current +timestamp or identifier for result file naming (`ts_now`), and the error message (`error_message`) to be printed. + +The function appends the error message to the specified result file in the result folder and adds a newline for separation. + +""" +def print_error(result_folder, ts_now, error_message): + with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f: + f.write(error_message + "\n") + f.write("\n\n") + + + + + +#! ------Some assumption------ +# The modifications of transactions at any isolation level are mutually visible, which is equivalent to a single storage, without read-write buffer +# There are statements to set the isolation level of each transaction in the input file, after "BEGIN" + # BEGIN T1 set_isolation=repeatable-read + # BEGIN T2 set_isolation=serializable + # BEGIN T3 set_isolation=read-uncommitted + # BEGIN T4 set_isolation=read-committed +# Assume that the inserted data key is in ascending order from 0 + +run_result_folder = "pg/mda_detect_test" +result_folder = "check_result/" + run_result_folder +do_test_list = "mda_detect_test_list.txt" +#ts_now = "_2param_3txn_insert" +ts_now = time.strftime("%Y%m%d_%H%M%S", time.localtime()) +if not os.path.exists(result_folder): + os.makedirs(result_folder) + +with open(do_test_list, "r") as f: + files = f.readlines() +for file in files: + file = file.replace("\n", "") + file = file.replace(" ", "") + if file == "": + continue + if file[0] == "#": + continue + with open(run_result_folder + "/" + file + ".txt", "r") as f: + lines = f.readlines() + + total_num = get_total(lines) # total number of variables + total_num_txn = get_total_txn(lines) # total number of txn + txn = [Txn() for i in range(total_num_txn + 2)] # total num of transaction + data_op_list = [[] for i in range(total_num + 2)] # record every operation that occurs on the variable + edge = [[] for i in range(total_num_txn + 2)] # all edges from the current point + indegree = [0] * (total_num_txn + 2) # in-degree of each point + visit = [0] * (total_num_txn + 2) # in dfs, whether the current point has been visited + visit1 = [0] * (total_num_txn + 2) # we will only use unvisited points as the starting point of the dfs + path = [] # points in cycle + edge_type = [] # edge type of the cycle + version_list = [[] for i in range(total_num + 2)] + go_end = False # if test result is "Rollback" or "Timeout", we will don't check + + error_message = "" + for query in lines: + query = query.replace("\n", "") + query = query.replace(" ", "") + if query.find("Rollback") != -1 or query.find("Timeout") != -1: + go_end = True + # print("total_num:{}, total_num_txn:{}, query:{},ts_now: {}, file: {}\n".format(total_num,total_num_txn,query,ts_now,run_result_folder + "/" + file + ".txt")) + error_message = operation_record(total_num, query, txn, data_op_list, version_list) + if error_message != "": + break + + if error_message != "": + output_result(file, result_folder, ts_now, "Error") + print_error(result_folder, ts_now, error_message) + continue + + cycle = False + # remove_unfinished_operation(data_op_list) 动态测试中默认所有的执行时间 Qi 都没有 finish 字段 + build_graph(data_op_list, indegree, edge, txn) + print("--------file:{}--------".format(file)) + print_graph(edge,txn) + # print_data_op_list(data_op_list) + if not go_end: + cycle = check_cycle(edge, indegree, total_num_txn+2) + if cycle: + output_result(file, result_folder, ts_now, "Cyclic") + for i in range(total_num_txn + 2): + if visit1[i] == 0: + # dfs(result_folder, ts_now, i, "null") + dfs(result_folder, ts_now, Edge("null",i,-1)) + else: + output_result(file, result_folder, ts_now, "Avoid") + print_path(result_folder, ts_now, edge) + print("---------------------------------\n") \ No newline at end of file From 98bf22708551c346a6e28510dbd261fe147900be Mon Sep 17 00:00:00 2001 From: dinream <2534393465@qq.com> Date: Fri, 25 Oct 2024 17:03:58 +0800 Subject: [PATCH 15/16] Restore mda_detect.py version to 965b2be --- src/dbtest/src/mda_detect.py | 304 ++++++++--------------------------- 1 file changed, 68 insertions(+), 236 deletions(-) diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py index 864fc82c..b97f5ef4 100644 --- a/src/dbtest/src/mda_detect.py +++ b/src/dbtest/src/mda_detect.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - # /* # * Tencent is pleased to support the open source community by making 3TS available. # * @@ -18,12 +16,10 @@ class Edge: - def __init__(self, type, out, begin_time): + def __init__(self, type, out): self.type = type self.out = out - self.time = begin_time - def __repr__(self): - return "Edge(begin_time={}, type={}, out={})".format(self.time, self.type, self.out) + class Operation: def __init__(self, op_type, txn_num, op_time, value): @@ -37,45 +33,6 @@ class Txn: def __init__(self): self.begin_ts = -1 self.end_ts = 99999999999999999999 - self.isolation = "serializable" - - -""" -Print the graph edges after building the graph. - -Args: -- edge (list): A list of Edge lists -- txn (list): A list of Txn objects - -Returns: -None -""" -# print edge after build graph -def print_graph(edge,txn): - for i, edges in enumerate(edge): - if i == 0 or i == len(edge)-1: - continue - print("Transaction {}:-----{}-----".format(i,txn[i].isolation)) - for e in edges: - print(" {}".format(e)) - - -""" -Print the contents of the data operation list. - -Args: -- data_op_list (list): A list of Operation lists - -Returns: -None -""" -# print data_op_list -def print_data_op_list(data_op_list): - for k,list in enumerate(data_op_list): - if k< len(data_op_list)-1: - print("\nk:{}---".format(k)) - for i, data in enumerate(list): - print("op:{}--{}-".format(data.op_type,data.txn_num)) """ Find the total variable number. @@ -92,31 +49,11 @@ def get_total(lines): for query in lines: query = query.replace("\n", "") query = query.replace(" ", "") - if query.find("INSERT") != -1: # query[0:2] == "Q0" and + if query[0:2] == "Q0" and query.find("INSERT") != -1: tmp = find_data(query, "(") num = max(num, tmp) - # elif query[0:2] == "Q1": - # break - return num - -""" -Find the total number of transactions based on transaction identifiers in queries. - -Args: -- lines (list): A list of query strings, each potentially containing transaction identifiers. - -Returns: -int: The highest transaction number found in the queries. -""" -# find total Txn number -def get_total_txn(lines): - num = 0 - for query in lines: - query = query.replace("\n", "") - query = query.replace(" ", "") - if query[0:1] == "Q" and query.find("T") != -1: - tmp = find_data(query, "T") - num = max(num, tmp) + elif query[0:2] == "Q1": + break return num @@ -147,16 +84,6 @@ def find_data(query, target): data_value = int(data_value) return data_value -# extract the isolation from content -def find_isolation(query): - if query.find("read-uncommitted") != -1: - return "read-uncommitted" - if query.find("read-committed") != -1: - return "read-committed" - if query.find("repeatable-read") != -1: - return "repeatable-read" - if query.find("serializable") != -1: - return "serializable" """ When a statement is executed, this function sets the end time, modifies the transaction list, @@ -174,21 +101,20 @@ def find_isolation(query): """ # when a statement is executed, set the end time and modify the version list def set_finish_time(op_time, data_op_list, query, txn, version_list): - # pos = query.find("finished at:") - # pos += len("finished at:") - # data_value = "" - # tmp, tmp1 = "", "" - # for i in range(pos, len(query)): - # if query[i].isdigit(): - # tmp += query[i] - # else: - # for j in range(3 - len(tmp)): - # tmp1 += "0" - # tmp = tmp1 + tmp - # data_value += tmp - # tmp, tmp1 = "", "" - # data_value = int(data_value) - data_value = int(op_time) + pos = query.find("finishedat:") + pos += len("finishedat:") + data_value = "" + tmp, tmp1 = "", "" + for i in range(pos, len(query)): + if query[i].isdigit(): + tmp += query[i] + else: + for j in range(3 - len(tmp)): + tmp1 += "0" + tmp = tmp1 + tmp + data_value += tmp + tmp, tmp1 = "", "" + data_value = int(data_value) for t in txn: if t.begin_ts == op_time: t.begin_ts = data_value @@ -226,32 +152,12 @@ def set_finish_time(op_time, data_op_list, query, txn, version_list): def check_concurrency(data1, data2, txn): if txn[data2.txn_num].begin_ts < txn[data1.txn_num].end_ts: return True - elif txn[data1.txn_num].begin_ts < txn[data2.txn_num].end_ts: # TODO maybe a bug: don't need + elif txn[data1.txn_num].begin_ts < txn[data2.txn_num].end_ts: return True else: return False -""" -Check if a specific edge exists between two transactions in the graph. - -Args: -- edge (list): A list of lists, where each sublist contains edge objects representing the connections in the graph. -- src_txn (int): The source transaction number, which the edge originates from. -- src_type (str): The operation type (e.g., 'R', 'W') at the source of the edge. -- tar_txn (int): The target transaction number, which the edge points to. -- tar_type (str): The operation type (e.g., 'R', 'W') at the target of the edge. - -Returns: -bool: True if the specified edge exists, False otherwise. -""" -def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type): - for e in edge[src_txn]: - if e.out == tar_txn and e.type[0] == src_type and e.type[-1] == tar_type: - return True - return False - - """ Determine the type of edge between two operations based on their read or write versions. @@ -269,7 +175,7 @@ def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type): # decide which operation comes first depending on the read or write version # if later operation happened after the first txn commit time, edge type will add "C" def get_edge_type(data1, data2, txn): - if data1.op_time <= data2.op_time: + if data1.value <= data2.value: before, after = data1, data2 else: before, after = data2, data1 @@ -278,12 +184,13 @@ def get_edge_type(data1, data2, txn): # before, after = data2, data1 # else: # before, after = data1, data2 - if after.op_time > txn[before.txn_num].end_ts: + if data2.op_time > txn[data1.txn_num].end_ts: state = "C" else: state = "" return before.op_type + state + after.op_type, before, after + """ Build a directed graph representing the concurrency relationships between operations. @@ -329,32 +236,10 @@ def build_graph(data_op_list, indegree, edge, txn): def insert_edge(data1, data2, indegree, edge, txn): if check_concurrency(data1, data2, txn): edge_type, data1, data2 = get_edge_type(data1, data2, txn) - if data1.txn_num == data2.txn_num or edge_type in ["RCR", "RR"]: - return - #* read-uncommitted: Dirty Write - # WI does not exist. If it does, there must be an equivalent edge of WD + DI - # II does not exist. If it does, there must be an equivalent edge of ID + DI - # DW is allowed to exist. When UPDATE, use the condition to query the data containing D - # DD does not exist. If it does, there must be an equivalent edge of DI + ID - if edge_type in ["WCW", "WW", "WCD", "WD", "ICW","IW", "ICD", "ID", "DCW", "DW", "DCI", "DI"]: + if edge_type != "RR" and edge_type != "RCR" and data1.txn_num != data2.txn_num: indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time)) - #* read-committed: Dirty Read - elif edge_type in ["WCR","WR"] and (txn[data2.txn_num].isolation == "read-committed" or txn[data2.txn_num].isolation == "repeatable-read" or txn[data2.txn_num].isolation == "serializable"): - indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time)) - #* repeatable-read: Unrepeatable Read - elif edge_type in ["RCW","RW"] and (txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable"): - indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time)) - #* serializable: Phantom Read - elif edge_type in ["RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable": - indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time)) - #* serializable: Phantom Read - elif edge_type in ["ICR","IR","DCR","DR"] and txn[data2.txn_num].isolation == "serializable": - indegree[data2.txn_num] += 1 - edge[data1.txn_num].append(Edge(edge_type, data2.txn_num,data1.op_time)) + edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) + """ Initialize a record in the version list based on the information in the query. @@ -475,15 +360,15 @@ def read_record(op_time, txn_num, total_num, txn, data_op_list): left = find_data(query, "k>") + 1 right = find_data(query, "k<") for i in range(left, right): - data_op_list[i].append(Operation("R", txn_num, op_time, i)) # P + data_op_list[i].append(Operation("P", txn_num, op_time, i)) elif query.find("value1>") != -1: left = find_data(query, "value1>") + 1 right = find_data(query, "value1<") for i in range(left, right): - data_op_list[i].append(Operation("R", txn_num, op_time, i)) # p + data_op_list[i].append(Operation("P", txn_num, op_time, i)) else: # it means select all rows in table - for i in range(total_num+1): + for i in range(total_num): data_op_list[i].append(Operation("R", txn_num, op_time, i)) @@ -516,21 +401,6 @@ def write_record(op_time, txn_num, txn, data_op_list): op_data = find_data(query, "k=") op_value = find_data(query, "v=") data_op_list[op_data].append(Operation("W", txn_num, op_time, op_value)) - # for predicate cases - elif query.find("k>") != -1: - left = find_data(query, "k>") + 1 - right = find_data(query, "k<") - for i in range(left, right): - data_op_list[i].append(Operation("W", txn_num, op_time, i)) # P - elif query.find("value1>") != -1: - left = find_data(query, "value1>") + 1 - right = find_data(query, "value1<") - for i in range(left, right): - data_op_list[i].append(Operation("W", txn_num, op_time, i)) # p - else: - # it means select all rows in table - for i in range(total_num+1): - data_op_list[i].append(Operation("W", txn_num, op_time, i)) """ @@ -560,21 +430,6 @@ def delete_record(op_time, txn_num, txn, data_op_list): elif query.find("k=") != -1: op_data = find_data(query, "k=") data_op_list[op_data].append(Operation("D", txn_num, op_time, op_data)) - # for predicate cases - elif query.find("k>") != -1: - left = find_data(query, "k>") + 1 - right = find_data(query, "k<") - for i in range(left, right): - data_op_list[i].append(Operation("D", txn_num, op_time, i)) # P - elif query.find("value1>") != -1: - left = find_data(query, "value1>") + 1 - right = find_data(query, "value1<") - for i in range(left, right): - data_op_list[i].append(Operation("D", txn_num, op_time, i)) # p - else: - # it means select all rows in table - for i in range(total_num+1): - data_op_list[i].append(Operation("D", txn_num, op_time, i)) """ @@ -642,32 +497,33 @@ def operation_record(total_num, query, txn, data_op_list, version_list): error_message = "" op_time = find_data(query, "Q") txn_num = find_data(query, "T") - # print("total_num:{}, query:{},optime: {}, txn_num: {}\n".format(total_num,query, op_time, txn_num)) if op_time == 0 and query.find("INSERT") != -1: init_record(query, version_list) return error_message - if query.find("returnresult") != -1: #! 1"returnresult" maybe don't exist + if query.find("returnresult") != -1: error_message = readVersion_record(query, op_time, data_op_list, version_list) return error_message - if query.find("finished") != -1: #! "finished" maybe don't exist + if query.find("finished") != -1: set_finish_time(op_time, data_op_list, query, txn, version_list) return error_message if op_time == -1 or txn_num == -1: return error_message - if query.find("BEGIN") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.: - txn[txn_num].isolation = find_isolation(query) - elif query.find("SELECT") != -1: + if query.find("SELECT") != -1: read_record(op_time, txn_num, total_num, txn, data_op_list) + return error_message elif query.find("UPDATE") != -1: write_record(op_time, txn_num, txn, data_op_list) - elif query.find("DELETE") != -1: + return error_message + elif query.find("DELETE") != -1: delete_record(op_time, txn_num, txn, data_op_list) - elif query.find("INSERT") != -1: #! assume existing data will not be inserted ("Rollback") + return error_message + elif query.find("INSERT") != -1: insert_record(op_time, txn_num, txn, data_op_list) + return error_message elif query.find("COMMIT") != -1: if op_time != 0: end_record(op_time, txn_num, txn) - set_finish_time(op_time, data_op_list, query, txn, version_list) + return error_message return error_message @@ -744,33 +600,28 @@ def check_cycle(edge, indegree, total): """ # for loop graphs, print the loop -def dfs(result_folder, ts_now , e): - visit1[e.out] = 1 - if visit[e.out] == 1: return - visit[e.out] = 1 - path.append(e) - for v in edge[e.out]: +def dfs(result_folder, ts_now, now, type): + visit1[now] = 1 + if visit[now] == 1: return + visit[now] = 1 + path.append(now) + edge_type.append(type) + for v in edge[now]: if visit[v.out] == 0: - dfs(result_folder, ts_now, v) + dfs(result_folder, ts_now, v.out, v.type) else: - path.append(v) + path.append(v.out) + edge_type.append(v.type) with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f: - content = "" - list_loop = [] - for i in range(len(path) - 1, -1, -1): - if i != len(path) - 1 and path[i].out == path[len(path) - 1].out: - break - index = 0 - while(index < len(list_loop) and path[list_loop[index]].time < path[i].time): - index += 1 - list_loop.insert(index,i) - for idx in list_loop: - content = content + "->" + path[idx].type + "->" + str(path[idx].out) - content = str(path[list_loop[-1]].out) + content + "\n\n" - f.write(content) + for i in range(0, len(path)): + f.write(str(path[i])) + if i != len(path) - 1: f.write("->" + edge_type[i+1] + "->") + f.write("\n\n") path.pop() + edge_type.pop() path.pop() - visit[e.out] = 0 + edge_type.pop() + visit[now] = 0 """ @@ -842,21 +693,9 @@ def print_error(result_folder, ts_now, error_message): f.write("\n\n") - - - -#! ------Some assumption------ -# The modifications of transactions at any isolation level are mutually visible, which is equivalent to a single storage, without read-write buffer -# There are statements to set the isolation level of each transaction in the input file, after "BEGIN" - # BEGIN T1 set_isolation=repeatable-read - # BEGIN T2 set_isolation=serializable - # BEGIN T3 set_isolation=read-uncommitted - # BEGIN T4 set_isolation=read-committed -# Assume that the inserted data key is in ascending order from 0 - -run_result_folder = "pg/mda_detect_test" +run_result_folder = "pg/serializable" result_folder = "check_result/" + run_result_folder -do_test_list = "mda_detect_test_list.txt" +do_test_list = "do_test_list.txt" #ts_now = "_2param_3txn_insert" ts_now = time.strftime("%Y%m%d_%H%M%S", time.localtime()) if not os.path.exists(result_folder): @@ -875,13 +714,12 @@ def print_error(result_folder, ts_now, error_message): lines = f.readlines() total_num = get_total(lines) # total number of variables - total_num_txn = get_total_txn(lines) # total number of txn - txn = [Txn() for i in range(total_num_txn + 2)] # total num of transaction + txn = [Txn() for i in range(total_num + 2)] # total num of transaction data_op_list = [[] for i in range(total_num + 2)] # record every operation that occurs on the variable - edge = [[] for i in range(total_num_txn + 2)] # all edges from the current point - indegree = [0] * (total_num_txn + 2) # in-degree of each point - visit = [0] * (total_num_txn + 2) # in dfs, whether the current point has been visited - visit1 = [0] * (total_num_txn + 2) # we will only use unvisited points as the starting point of the dfs + edge = [[] for i in range(total_num + 2)] # all edges from the current point + indegree = [0] * (total_num + 2) # in-degree of each point + visit = [0] * (total_num + 2) # in dfs, whether the current point has been visited + visit1 = [0] * (total_num + 2) # we will only use unvisited points as the starting point of the dfs path = [] # points in cycle edge_type = [] # edge type of the cycle version_list = [[] for i in range(total_num + 2)] @@ -893,7 +731,6 @@ def print_error(result_folder, ts_now, error_message): query = query.replace(" ", "") if query.find("Rollback") != -1 or query.find("Timeout") != -1: go_end = True - # print("total_num:{}, total_num_txn:{}, query:{},ts_now: {}, file: {}\n".format(total_num,total_num_txn,query,ts_now,run_result_folder + "/" + file + ".txt")) error_message = operation_record(total_num, query, txn, data_op_list, version_list) if error_message != "": break @@ -904,20 +741,15 @@ def print_error(result_folder, ts_now, error_message): continue cycle = False - # remove_unfinished_operation(data_op_list) 动态测试中默认所有的执行时间 Qi 都没有 finish 字段 + remove_unfinished_operation(data_op_list) build_graph(data_op_list, indegree, edge, txn) - print("--------file:{}--------".format(file)) - print_graph(edge,txn) - # print_data_op_list(data_op_list) if not go_end: - cycle = check_cycle(edge, indegree, total_num_txn+2) + cycle = check_cycle(edge, indegree, total_num + 2) if cycle: output_result(file, result_folder, ts_now, "Cyclic") - for i in range(total_num_txn + 2): + for i in range(total_num + 2): if visit1[i] == 0: - # dfs(result_folder, ts_now, i, "null") - dfs(result_folder, ts_now, Edge("null",i,-1)) + dfs(result_folder, ts_now, i, "null") else: output_result(file, result_folder, ts_now, "Avoid") - print_path(result_folder, ts_now, edge) - print("---------------------------------\n") \ No newline at end of file + print_path(result_folder, ts_now, edge) \ No newline at end of file From 827b5079050c03a68bc210155f06a5733ea3f221 Mon Sep 17 00:00:00 2001 From: dinream <2534393465@qq.com> Date: Fri, 25 Oct 2024 17:16:34 +0800 Subject: [PATCH 16/16] delete no English commit --- src/dbtest/src/mda_detect_mixed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dbtest/src/mda_detect_mixed.py b/src/dbtest/src/mda_detect_mixed.py index 2a44bf71..a464a087 100644 --- a/src/dbtest/src/mda_detect_mixed.py +++ b/src/dbtest/src/mda_detect_mixed.py @@ -904,7 +904,7 @@ def print_error(result_folder, ts_now, error_message): continue cycle = False - # remove_unfinished_operation(data_op_list) 动态测试中默认所有的执行时间 Qi 都没有 finish 字段 + # remove_unfinished_operation(data_op_list) build_graph(data_op_list, indegree, edge, txn) print("--------file:{}--------".format(file)) print_graph(edge,txn)