From fa16273830635aabbd8907fe72fe23ecfc8cc866 Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Sat, 6 Jul 2024 12:53:35 +0000
Subject: [PATCH 01/16] old bug? solve
---
src/dbtest/src/mda_detect.py | 342 ++--------------------------
src/dbtest/src/mda_detect_modify.md | 31 +++
2 files changed, 50 insertions(+), 323 deletions(-)
create mode 100644 src/dbtest/src/mda_detect_modify.md
diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 982361ea..ce68dd31 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -33,16 +33,9 @@ class Txn:
def __init__(self):
self.begin_ts = -1
self.end_ts = 99999999999999999999
+ self.isolation = ""
-"""
-Find the total variable number.
-Args:
-- lines (list): A list of queries.
-
-Returns:
-int: The maximum variable number found in the queries.
-"""
# find total variable number
def get_total(lines):
num = 0
@@ -56,17 +49,17 @@ def get_total(lines):
break
return num
+# find total Txn number
+def get_total_txn(lines):
+ num = 0
+ for query in lines:
+ query = query.replace("\n", "")
+ query = query.replace(" ", "")
+ if query[0:1] == "Q" and query.find("T") != -1:
+ tmp = find_data(query, "T")
+ num = max(num, tmp)
+ return num
-"""
-Extract the data we need from a query.
-
-Args:
-- query (str): The input query string.
-- target (str): The target substring to search for.
-
-Returns:
-int: The extracted data value, or -1 if not found.
-"""
# extract the data we need in query
def find_data(query, target):
pos = query.find(target)
@@ -85,20 +78,6 @@ def find_data(query, target):
return data_value
-"""
-When a statement is executed, this function sets the end time, modifies the transaction list,
-and updates the version list as needed.
-
-Args:
-- op_time (int): The operation time of the statement.
-- data_op_list (list): A list of data operations.
-- query (str): The query string containing information about the statement execution.
-- txn (list): A list of transaction objects.
-- version_list (list): A list of version lists for data operations.
-
-Returns:
-None
-"""
# when a statement is executed, set the end time and modify the version list
def set_finish_time(op_time, data_op_list, query, txn, version_list):
pos = query.find("finishedat:")
@@ -135,17 +114,6 @@ def set_finish_time(op_time, data_op_list, query, txn, version_list):
op.value = len(version_list[i]) - 1
-"""
-Check if two transactions are concurrent based on their start and end times.
-
-Args:
-- data1: Information about the first transaction.
-- data2: Information about the second transaction.
-- txn: A list of transaction objects.
-
-Returns:
-bool: True if the transactions are concurrent, False otherwise.
-"""
# if both transactions are running
# or the start time of the second transaction is less than the end time of the first transaction
# we think they are concurrent
@@ -158,20 +126,6 @@ def check_concurrency(data1, data2, txn):
return False
-"""
-Determine the type of edge between two operations based on their read or write versions.
-
-Args:
-- data1: Information about the first operation.
-- data2: Information about the second operation.
-- txn: A list of transaction objects.
-
-Returns:
-tuple: A tuple containing three values:
- - A string indicating the edge type ('R', 'W', 'CR', 'CW').
- - Information about the operation that comes first.
- - Information about the operation that comes second.
-"""
# decide which operation comes first depending on the read or write version
# if later operation happened after the first txn commit time, edge type will add "C"
def get_edge_type(data1, data2, txn):
@@ -191,22 +145,6 @@ def get_edge_type(data1, data2, txn):
return before.op_type + state + after.op_type, before, after
-"""
-Build a directed graph representing the concurrency relationships between operations.
-
-Args:
-- data_op_list: A list of lists, where each inner list contains information about operations for a specific transaction.
-- indegree: A list representing the in-degrees of each operation node in the graph.
-- edge: A list representing the edges (concurrency relationships) between operations.
-- txn: A list of transaction objects.
-
-This function constructs a directed graph where nodes represent operations, and edges represent concurrency relationships
-between operations. It iterates through the list of operations for each transaction and calls the 'insert_edge' function
-to create edges in the graph based on concurrency relationships.
-
-Returns:
-None
-"""
def build_graph(data_op_list, indegree, edge, txn):
for list1 in data_op_list:
for i, data in enumerate(list1):
@@ -214,25 +152,6 @@ def build_graph(data_op_list, indegree, edge, txn):
insert_edge(list1[j], data, indegree, edge, txn)
-"""
-Insert an edge into the directed graph representing concurrency relationships between operations.
-
-Args:
-- data1: An operation object representing the first operation.
-- data2: An operation object representing the second operation.
-- indegree: A list representing the in-degrees of each transaction in the graph.
-- edge: A list representing the edges (concurrency relationships) between operations for each transaction.
-- txn: A list of transaction objects.
-
-This function inserts an edge into the directed graph to represent the concurrency relationship between 'data1' and 'data2'.
-It first checks if the two operations are concurrent by calling the 'check_concurrency' function. If they are concurrent, it
-determines the edge type using the 'get_edge_type' function and adds the edge to the 'edge' list.
-
-The 'indegree' list is updated to reflect the in-degree of the target transaction node when an edge is inserted.
-
-Returns:
-None
-"""
def insert_edge(data1, data2, indegree, edge, txn):
if check_concurrency(data1, data2, txn):
edge_type, data1, data2 = get_edge_type(data1, data2, txn)
@@ -241,42 +160,12 @@ def insert_edge(data1, data2, indegree, edge, txn):
edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-"""
-Initialize a record in the version list based on the information in the query.
-
-Args:
-- query: A query string that contains information about a record.
-- version_list: A list of lists representing versioned records.
-
-This function initializes a record in the 'version_list' based on the information provided in the 'query'. It extracts the 'key'
-and 'value' of the record from the query using the 'find_data' function and appends the 'value' to the corresponding version list.
-
-Returns:
-None
-"""
def init_record(query, version_list):
key = find_data(query, "(")
value = find_data(query, ",")
version_list[key].append(value)
-"""
-Read the versioned record based on the information in the query.
-
-Args:
-- query (str): A query string that contains information about reading a versioned record.
-- op_time (int): The operation time of the read operation.
-- data_op_list (list): A list of lists representing data operations.
-- version_list (list): A list of lists representing versioned records.
-
-This function reads the versioned record specified in the 'query'. It extracts the 'key' and 'value' from the query, which are
-used to identify the record and version to read. The function checks if the specified version exists in the version list and
-updates the 'op.value' accordingly. If the version doesn't exist or if the read operation is not successful, an error message
-is returned.
-
-Returns:
-str: An error message indicating the result of the read operation. An empty string means the read was successful.
-"""
def readVersion_record(query, op_time, data_op_list, version_list):
error_message = ""
data = query.split(")")
@@ -323,27 +212,6 @@ def readVersion_record(query, op_time, data_op_list, version_list):
-"""
-Read records based on the information in the query and update data operations.
-
-Args:
-- op_time (int): The operation time of the read operation.
-- txn_num (int): The transaction number.
-- total_num (int): The total number of records.
-- txn (list): A list of transactions.
-- data_op_list (list): A list of lists representing data operations.
-
-This function reads records specified in the query and updates the 'data_op_list' accordingly. It extracts information from
-the 'query' to determine which records to read and what type of operation to perform (read or predicate). The function also
-sets the 'begin_ts' of the transaction if it's not already set.
-
-The 'query' is analyzed to identify specific record keys or predicates and create corresponding 'Operation' objects in the
-'data_op_list'. Depending on the structure of the query, this function handles various cases, such as reading single records,
-handling predicates, and selecting all rows in a table.
-
-Returns:
-None
-"""
def read_record(op_time, txn_num, total_num, txn, data_op_list):
if txn[txn_num].begin_ts == -1:
txn[txn_num].begin_ts = op_time
@@ -372,24 +240,6 @@ def read_record(op_time, txn_num, total_num, txn, data_op_list):
data_op_list[i].append(Operation("R", txn_num, op_time, i))
-"""
-Write records based on the information in the query and update data operations.
-
-Args:
-- op_time (int): The operation time of the write operation.
-- txn_num (int): The transaction number.
-- txn (list): A list of transactions.
-- data_op_list (list): A list of lists representing data operations.
-
-This function writes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the
-'query' to determine which records to write and what type of operation to perform (write). The function also sets the 'begin_ts'
-of the transaction if it's not already set.
-
-The 'query' is analyzed to identify specific record keys and values, and it creates corresponding 'Operation' objects in the 'data_op_list'.
-
-Returns:
-None
-"""
def write_record(op_time, txn_num, txn, data_op_list):
if txn[txn_num].begin_ts == -1:
txn[txn_num].begin_ts = op_time
@@ -403,24 +253,6 @@ def write_record(op_time, txn_num, txn, data_op_list):
data_op_list[op_data].append(Operation("W", txn_num, op_time, op_value))
-"""
-Delete records based on the information in the query and update data operations.
-
-Args:
-- op_time (int): The operation time of the delete operation.
-- txn_num (int): The transaction number.
-- txn (list): A list of transactions.
-- data_op_list (list): A list of lists representing data operations.
-
-This function deletes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the
-'query' to determine which records to delete and what type of operation to perform (delete). The function also sets the 'begin_ts'
-of the transaction if it's not already set.
-
-The 'query' is analyzed to identify specific record keys, and it creates corresponding 'Operation' objects in the 'data_op_list'.
-
-Returns:
-None
-"""
def delete_record(op_time, txn_num, txn, data_op_list):
if txn[txn_num].begin_ts == -1:
txn[txn_num].begin_ts = op_time
@@ -432,25 +264,6 @@ def delete_record(op_time, txn_num, txn, data_op_list):
data_op_list[op_data].append(Operation("D", txn_num, op_time, op_data))
-"""
-Insert records based on the information in the query and update data operations.
-
-Args:
-- op_time (int): The operation time of the insert operation.
-- txn_num (int): The transaction number.
-- txn (list): A list of transactions.
-- data_op_list (list): A list of lists representing data operations.
-
-This function inserts records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the
-'query' to determine which records to insert and what type of operation to perform (insert). The function also sets the 'begin_ts'
-of the transaction if it's not already set.
-
-The 'query' is analyzed to identify specific record keys and their corresponding values, and it creates corresponding 'Operation'
-objects in the 'data_op_list'.
-
-Returns:
-None
-"""
def insert_record(op_time, txn_num, txn, data_op_list):
if txn[txn_num].begin_ts == -1 and op_time != 0:
txn[txn_num].begin_ts = op_time
@@ -459,44 +272,15 @@ def insert_record(op_time, txn_num, txn, data_op_list):
data_op_list[key].append(Operation("I", txn_num, op_time, value))
-"""
-Set the end timestamp for a transaction.
-
-Args:
-- op_time (int): The operation time when the transaction ends.
-- txn_num (int): The transaction number.
-- txn (list): A list of transactions.
-
-This function sets the 'end_ts' attribute of a transaction specified by 'txn_num' to the given 'op_time'. It marks the end of the
-transaction's execution.
-
-Returns:
-None
-"""
def end_record(op_time, txn_num, txn):
txn[txn_num].end_ts = op_time
-"""
-Record and process database operations.
-
-Args:
-- total_num (int): The total number of database operations.
-- query (str): The SQL query representing a database operation.
-- txn (list): A list of transactions.
-- data_op_list (list): A list of data operations.
-- version_list (list): A list of version information for data operations.
-
-This function records and processes database operations based on the provided SQL query. It updates the transaction list, data
-operation list, and version list accordingly. The 'total_num' parameter specifies the total number of database operations.
-
-Returns:
-str: An error message (if any), or an empty string if the operation is successful.
-"""
def operation_record(total_num, query, txn, data_op_list, version_list):
error_message = ""
op_time = find_data(query, "Q")
txn_num = find_data(query, "T")
+ # print("total_num:{}, query:{},optime: {}, txn_num: {}\n".format(total_num,query, op_time, txn_num))
if op_time == 0 and query.find("INSERT") != -1:
init_record(query, version_list)
return error_message
@@ -528,18 +312,6 @@ def operation_record(total_num, query, txn, data_op_list, version_list):
-"""
-Remove unfinished operations from the data operation list.
-
-Args:
-- data_op_list (list): A list of data operations.
-
-This function iterates through the data operation list and removes any unfinished operations based on their operation time.
-Unfinished operations are those with an operation time less than 10,000,000.
-
-Returns:
-None
-"""
# remove failed statements to prevent redundant edges from being built
def remove_unfinished_operation(data_op_list):
for list1 in data_op_list:
@@ -547,20 +319,6 @@ def remove_unfinished_operation(data_op_list):
if op.op_time < 10000000:
list1.pop(i)
-"""
-Check for cycles in a directed graph using topological sorting.
-
-Args:
-- edge (List[List[Edge]]): A list representing the directed edges in the graph.
-- indegree (List[int]): A list representing the in-degrees of nodes in the graph.
-- total (int): The total number of nodes in the graph.
-
-This function checks for cycles in a directed graph by performing topological sorting. It takes as input the directed edges (`edge`),
-in-degrees of nodes (`indegree`), and the total number of nodes in the graph (`total`).
-
-Returns:
-bool: True if a cycle is detected, False otherwise.
-"""
# toposort to determine whether there is a cycle
def check_cycle(edge, indegree, total):
q = Queue.Queue()
@@ -580,25 +338,6 @@ def check_cycle(edge, indegree, total):
return True
-"""
-Perform depth-first search (DFS) to find and print loops in a directed graph.
-
-Args:
-- result_folder (str): The path to the folder where the results will be saved.
-- ts_now (str): The current timestamp or identifier for result file naming.
-- now (int): The current node being visited.
-- type (str): The type of edge leading to the current node ('C' for commit, 'R' for read, 'W' for write, etc.).
-
-This function performs depth-first search (DFS) to find and print loops in a directed graph. It takes as input the result folder
-path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`), the current node being visited (`now`),
-and the type of edge leading to the current node (`type`).
-
-The function recursively explores the graph, tracking the visited nodes and edges to detect loops. When a loop is found, it is printed
-to a result file in the specified result folder.
-
-Note: This function assumes that global variables like 'visit', 'visit1', 'path', 'edge_type', and 'edge' are defined elsewhere.
-
-"""
# for loop graphs, print the loop
def dfs(result_folder, ts_now, now, type):
visit1[now] = 1
@@ -624,21 +363,6 @@ def dfs(result_folder, ts_now, now, type):
visit[now] = 0
-"""
-Print the paths in a directed graph to a result file.
-
-Args:
-- result_folder (str): The path to the folder where the results will be saved.
-- ts_now (str): The current timestamp or identifier for result file naming.
-- edge (list of lists): A list of lists representing the directed edges in the graph.
-
-This function prints the paths in a directed graph to a result file. It takes as input the result folder path (`result_folder`),
-the current timestamp or identifier for result file naming (`ts_now`), and a list of lists (`edge`) representing the directed edges
-in the graph.
-
-The function iterates through the edges and writes the paths to the result file in the specified result folder.
-
-"""
def print_path(result_folder, ts_now, edge):
with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
flag = 0
@@ -652,41 +376,11 @@ def print_path(result_folder, ts_now, edge):
f.write("\n\n")
-"""
-Output the result of cycle detection to a result file.
-
-Args:
-- file (str): The name of the file or input source being analyzed.
-- result_folder (str): The path to the folder where the results will be saved.
-- ts_now (str): The current timestamp or identifier for result file naming.
-- IsCyclic (str): A string indicating whether a cycle was detected.
-
-This function outputs the result of cycle detection to a result file. It takes as input the name of the file or input source being
-analyzed (`file`), the result folder path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`),
-and a string (`IsCyclic`) indicating whether a cycle was detected.
-
-The function writes the result, including the file name and the cyclic status, to the specified result file in the result folder.
-
-"""
def output_result(file, result_folder, ts_now, IsCyclic):
with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
f.write(file + ": " + IsCyclic + "\n")
-"""
-Print an error message to a result file.
-
-Args:
-- result_folder (str): The path to the folder where the results will be saved.
-- ts_now (str): The current timestamp or identifier for result file naming.
-- error_message (str): The error message to be printed.
-
-This function prints an error message to a result file. It takes as input the result folder path (`result_folder`), the current
-timestamp or identifier for result file naming (`ts_now`), and the error message (`error_message`) to be printed.
-
-The function appends the error message to the specified result file in the result folder and adds a newline for separation.
-
-"""
def print_error(result_folder, ts_now, error_message):
with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
f.write(error_message + "\n")
@@ -714,12 +408,13 @@ def print_error(result_folder, ts_now, error_message):
lines = f.readlines()
total_num = get_total(lines) # total number of variables
- txn = [Txn() for i in range(total_num + 2)] # total num of transaction
+ total_num_txn = get_total_txn(lines) # total number of txn
+ txn = [Txn() for i in range(total_num_txn + 2)] # total num of transaction
data_op_list = [[] for i in range(total_num + 2)] # record every operation that occurs on the variable
- edge = [[] for i in range(total_num + 2)] # all edges from the current point
- indegree = [0] * (total_num + 2) # in-degree of each point
- visit = [0] * (total_num + 2) # in dfs, whether the current point has been visited
- visit1 = [0] * (total_num + 2) # we will only use unvisited points as the starting point of the dfs
+ edge = [[] for i in range(total_num_txn + 2)] # all edges from the current point
+ indegree = [0] * (total_num_txn + 2) # in-degree of each point
+ visit = [0] * (total_num_txn + 2) # in dfs, whether the current point has been visited
+ visit1 = [0] * (total_num_txn + 2) # we will only use unvisited points as the starting point of the dfs
path = [] # points in cycle
edge_type = [] # edge type of the cycle
version_list = [[] for i in range(total_num + 2)]
@@ -731,6 +426,7 @@ def print_error(result_folder, ts_now, error_message):
query = query.replace(" ", "")
if query.find("Rollback") != -1 or query.find("Timeout") != -1:
go_end = True
+ print("total_num:{}, total_num_txn:{}, query:{},ts_now: {}, file: {}\n".format(total_num,total_num_txn,query,ts_now,run_result_folder + "/" + file + ".txt"))
error_message = operation_record(total_num, query, txn, data_op_list, version_list)
if error_message != "":
break
diff --git a/src/dbtest/src/mda_detect_modify.md b/src/dbtest/src/mda_detect_modify.md
new file mode 100644
index 00000000..3af9e9f3
--- /dev/null
+++ b/src/dbtest/src/mda_detect_modify.md
@@ -0,0 +1,31 @@
+[text](mda_detect.py) 修改日志
+# 思考
+1. 用于加边建立图的节点对应一个操作还是一个事务。
+
+# 原来代码中问题
+### 数组访问越界问题
+现象
+```python
+total_num = get_total(lines) # 统计的个数是插入数据的个数,不是事务的个数。
+txn = [Txn() for i in range(total_num + 2)] # 导致构造的 txn 数组较小
+.... # 还有 indegree edge 数组的大小应该是事务的个数。
+```
+解决:构造一个新函数获取事务个数。
+```python
+# find total Txn number
+def get_total_txn(lines):
+ num = 0
+ for query in lines:
+ query = query.replace("\n", "")
+ query = query.replace(" ", "")
+ if query[0:1] == "Q" and query.find("T") != -1:
+ tmp = find_data(query, "T")
+ num = max(num, tmp)
+ return num
+total_num_txn = get_total_txn(lines) # total number of txn
+```
+效果:不同数据使用不同的初始长度
+```python
+# total_num: data_op_list, version_list
+# total_num_txn: txn, edge, total_num_txn, visit, visit1
+```
From c73c237c3c25e90e23d3023afb43698da9a8398d Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Sat, 6 Jul 2024 17:13:38 +0000
Subject: [PATCH 02/16] add txn.isolation
---
src/dbtest/src/{ => doc}/mda_detect_modify.md | 6 ++++--
src/dbtest/src/mda_detect.py | 19 ++++++++++++++++---
2 files changed, 20 insertions(+), 5 deletions(-)
rename src/dbtest/src/{ => doc}/mda_detect_modify.md (78%)
diff --git a/src/dbtest/src/mda_detect_modify.md b/src/dbtest/src/doc/mda_detect_modify.md
similarity index 78%
rename from src/dbtest/src/mda_detect_modify.md
rename to src/dbtest/src/doc/mda_detect_modify.md
index 3af9e9f3..7e29b4e6 100644
--- a/src/dbtest/src/mda_detect_modify.md
+++ b/src/dbtest/src/doc/mda_detect_modify.md
@@ -1,6 +1,7 @@
[text](mda_detect.py) 修改日志
-# 思考
-1. 用于加边建立图的节点对应一个操作还是一个事务。
+# 思考&分析
+1. 用于加边建立图的节点对应一个操作还是一个事务。答:一个事务。
+2. 目标:一个对数据库的操作文件(运行效果文件)中有多个事务,每一个事务有不同的隔离级别,通过执行结果和隔离级别判断是否满足一致性
# 原来代码中问题
### 数组访问越界问题
@@ -29,3 +30,4 @@ total_num_txn = get_total_txn(lines) # total number of txn
# total_num: data_op_list, version_list
# total_num_txn: txn, edge, total_num_txn, visit, visit1
```
+
diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index ce68dd31..b859c38b 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -33,7 +33,7 @@ class Txn:
def __init__(self):
self.begin_ts = -1
self.end_ts = 99999999999999999999
- self.isolation = ""
+ self.isolation = "serializable"
# find total variable number
@@ -77,6 +77,16 @@ def find_data(query, target):
data_value = int(data_value)
return data_value
+# extract the isolation from content
+def find_isolation(query):
+ if query.find("read-uncommitted") != -1:
+ return "read-uncommitted"
+ if query.find("read-committed") != -1:
+ return "read-committed"
+ if query.find("repeatable-read") != -1:
+ return "repeatable-read"
+ if query.find("serializable") != -1:
+ return "serializable"
# when a statement is executed, set the end time and modify the version list
def set_finish_time(op_time, data_op_list, query, txn, version_list):
@@ -120,7 +130,7 @@ def set_finish_time(op_time, data_op_list, query, txn, version_list):
def check_concurrency(data1, data2, txn):
if txn[data2.txn_num].begin_ts < txn[data1.txn_num].end_ts:
return True
- elif txn[data1.txn_num].begin_ts < txn[data2.txn_num].end_ts:
+ elif txn[data1.txn_num].begin_ts < txn[data2.txn_num].end_ts: # TODO maybe a bug: don't need
return True
else:
return False
@@ -138,7 +148,7 @@ def get_edge_type(data1, data2, txn):
# before, after = data2, data1
# else:
# before, after = data1, data2
- if data2.op_time > txn[data1.txn_num].end_ts:
+ if data2.op_time > txn[data1.txn_num].end_ts: # TODO maybe a bug, before after
state = "C"
else:
state = ""
@@ -290,6 +300,9 @@ def operation_record(total_num, query, txn, data_op_list, version_list):
if query.find("finished") != -1:
set_finish_time(op_time, data_op_list, query, txn, version_list)
return error_message
+ if op_time == -1 and txn_num != -1 and query.find("set_isolation") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.:
+ # query such as "T2 set_isolation=serializable "
+ txn[txn_num].isolation = find_isolation(query)
if op_time == -1 or txn_num == -1:
return error_message
if query.find("SELECT") != -1:
From 67d5ecc94551103e16c3b52ef0d248237cc9fa2e Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Sun, 7 Jul 2024 15:01:35 +0000
Subject: [PATCH 03/16] demo implement
---
src/dbtest/src/doc/mda_detect_modify.md | 17 +++++-
src/dbtest/src/mda_detect.py | 71 ++++++++++++++++++++-----
2 files changed, 73 insertions(+), 15 deletions(-)
diff --git a/src/dbtest/src/doc/mda_detect_modify.md b/src/dbtest/src/doc/mda_detect_modify.md
index 7e29b4e6..e249f6bf 100644
--- a/src/dbtest/src/doc/mda_detect_modify.md
+++ b/src/dbtest/src/doc/mda_detect_modify.md
@@ -2,7 +2,7 @@
# 思考&分析
1. 用于加边建立图的节点对应一个操作还是一个事务。答:一个事务。
2. 目标:一个对数据库的操作文件(运行效果文件)中有多个事务,每一个事务有不同的隔离级别,通过执行结果和隔离级别判断是否满足一致性
-
+3. 原来输出未所有事务是否满足一致性,现在输出为每个事务是否满足一致性? 单个整体报错 or 多个报错? 多个,每个错误都识别,兼容单个整体报错(实现较难)
# 原来代码中问题
### 数组访问越界问题
现象
@@ -31,3 +31,18 @@ total_num_txn = get_total_txn(lines) # total number of txn
# total_num_txn: txn, edge, total_num_txn, visit, visit1
```
+
+### 默认字符串少了空格
+```python
+ pos = query.find("finished at:")
+ pos += len("finished at:")
+```
+
+### "R" 类型的操作并没有修改 value 值为下标:
+```python
+ if data1.value <= data2.value:
+ before, after = data1, data2
+ else:
+ before, after = data2, data1
+```
+
diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index b859c38b..0b3393c4 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -42,11 +42,11 @@ def get_total(lines):
for query in lines:
query = query.replace("\n", "")
query = query.replace(" ", "")
- if query[0:2] == "Q0" and query.find("INSERT") != -1:
+ if query.find("INSERT") != -1: # query[0:2] == "Q0" and
tmp = find_data(query, "(")
num = max(num, tmp)
- elif query[0:2] == "Q1":
- break
+ # elif query[0:2] == "Q1":
+ # break
return num
# find total Txn number
@@ -90,8 +90,8 @@ def find_isolation(query):
# when a statement is executed, set the end time and modify the version list
def set_finish_time(op_time, data_op_list, query, txn, version_list):
- pos = query.find("finishedat:")
- pos += len("finishedat:")
+ pos = query.find("finished at:")
+ pos += len("finished at:")
data_value = ""
tmp, tmp1 = "", ""
for i in range(pos, len(query)):
@@ -123,7 +123,6 @@ def set_finish_time(op_time, data_op_list, query, txn, version_list):
version_list[i].append(op.value)
op.value = len(version_list[i]) - 1
-
# if both transactions are running
# or the start time of the second transaction is less than the end time of the first transaction
# we think they are concurrent
@@ -136,19 +135,26 @@ def check_concurrency(data1, data2, txn):
return False
+def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type):
+ for e in edge[src_txn]:
+ if e.out == tar_txn and e.type[0] == src_type and e.type[-1] == tar_type:
+ return True
+ return False
+
# decide which operation comes first depending on the read or write version
# if later operation happened after the first txn commit time, edge type will add "C"
def get_edge_type(data1, data2, txn):
- if data1.value <= data2.value:
- before, after = data1, data2
- else:
- before, after = data2, data1
+ # if data1.value <= data2.value:
+ # before, after = data1, data2
+ # else:
+ # before, after = data2, data1
+ before, after = data1, data2
# if data1.op_type == "D" or data2.op_type == "D":
# if data1.value < data2.value:
# before, after = data2, data1
# else:
# before, after = data1, data2
- if data2.op_time > txn[data1.txn_num].end_ts: # TODO maybe a bug, before after
+ if data2.op_time > txn[data1.txn_num].end_ts:
state = "C"
else:
state = ""
@@ -165,9 +171,45 @@ def build_graph(data_op_list, indegree, edge, txn):
def insert_edge(data1, data2, indegree, edge, txn):
if check_concurrency(data1, data2, txn):
edge_type, data1, data2 = get_edge_type(data1, data2, txn)
- if edge_type != "RR" and edge_type != "RCR" and data1.txn_num != data2.txn_num:
+ # if edge_type != "RR" and edge_type != "RCR" and data1.txn_num != data2.txn_num:
+ # indegree[data2.txn_num] += 1
+ # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ if edge_type == "WW" or edge_type == "WCW":
indegree[data2.txn_num] += 1
edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ elif data1.isolation == "read-uncommitted":
+ if edge_type[0] != 'R' and not(data2.isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
+ if edge_type[-1] == 'R': # not R -- R
+ if data2.isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ if data2.isolation == "repeatable-read" and edge_type[0]== 'W':
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ if data2.isolation == "serializable":
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ elif edge_type[-1] != 'R': # not R -- not R
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ elif data1.isolation == "read-committed" or data1.isolation == "repeatable-read" or data1.isolation == "serializable":
+ if edge_type[0] != 'R' and not(data2.isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
+ if edge_type[-1] == 'R': # not R -- R
+ if data2.isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ if data2.isolation == "repeatable-read" and edge_type[0]== 'W':
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ if data2.isolation == "serializable":
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ elif edge_type[-1] != 'R': # not R -- not R
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ elif edge_type[0] == 'R' and edge_type[-1] != 'R':
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
def init_record(query, version_list):
@@ -238,12 +280,12 @@ def read_record(op_time, txn_num, total_num, txn, data_op_list):
left = find_data(query, "k>") + 1
right = find_data(query, "k<")
for i in range(left, right):
- data_op_list[i].append(Operation("P", txn_num, op_time, i))
+ data_op_list[i].append(Operation("R", txn_num, op_time, i)) # P
elif query.find("value1>") != -1:
left = find_data(query, "value1>") + 1
right = find_data(query, "value1<")
for i in range(left, right):
- data_op_list[i].append(Operation("P", txn_num, op_time, i))
+ data_op_list[i].append(Operation("R", txn_num, op_time, i)) # p
else:
# it means select all rows in table
for i in range(total_num):
@@ -303,6 +345,7 @@ def operation_record(total_num, query, txn, data_op_list, version_list):
if op_time == -1 and txn_num != -1 and query.find("set_isolation") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.:
# query such as "T2 set_isolation=serializable "
txn[txn_num].isolation = find_isolation(query)
+ return error_message
if op_time == -1 or txn_num == -1:
return error_message
if query.find("SELECT") != -1:
From 3f90a6ef2a6f4797304152e7b0e6e40ff0dd1f8d Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Tue, 9 Jul 2024 07:21:45 +0000
Subject: [PATCH 04/16] bugs
---
src/dbtest/src/mda_detect.py | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)
diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 0b3393c4..0136e3fe 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
# /*
# * Tencent is pleased to support the open source community by making 3TS available.
# *
@@ -177,31 +179,31 @@ def insert_edge(data1, data2, indegree, edge, txn):
if edge_type == "WW" or edge_type == "WCW":
indegree[data2.txn_num] += 1
edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- elif data1.isolation == "read-uncommitted":
- if edge_type[0] != 'R' and not(data2.isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
+ elif txn[data1.txn_num].isolation == "read-uncommitted":
+ if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
if edge_type[-1] == 'R': # not R -- R
- if data2.isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读
+ if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'): # 可能脏读
indegree[data2.txn_num] += 1
edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- if data2.isolation == "repeatable-read" and edge_type[0]== 'W':
+ if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W':
indegree[data2.txn_num] += 1
edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- if data2.isolation == "serializable":
+ if txn[data2.txn_num].isolation == "serializable":
indegree[data2.txn_num] += 1
edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
elif edge_type[-1] != 'R': # not R -- not R
indegree[data2.txn_num] += 1
edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- elif data1.isolation == "read-committed" or data1.isolation == "repeatable-read" or data1.isolation == "serializable":
- if edge_type[0] != 'R' and not(data2.isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
+ elif txn[data1.txn_num].isolation == "read-committed" or txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable":
+ if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
if edge_type[-1] == 'R': # not R -- R
- if data2.isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读
+ if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读
indegree[data2.txn_num] += 1
edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- if data2.isolation == "repeatable-read" and edge_type[0]== 'W':
+ if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W':
indegree[data2.txn_num] += 1
edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- if data2.isolation == "serializable":
+ if txn[data2.txn_num].isolation == "serializable":
indegree[data2.txn_num] += 1
edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
elif edge_type[-1] != 'R': # not R -- not R
@@ -345,6 +347,7 @@ def operation_record(total_num, query, txn, data_op_list, version_list):
if op_time == -1 and txn_num != -1 and query.find("set_isolation") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.:
# query such as "T2 set_isolation=serializable "
txn[txn_num].isolation = find_isolation(query)
+ print(str(txn_num)+"------------------"+txn[txn_num].isolation)
return error_message
if op_time == -1 or txn_num == -1:
return error_message
@@ -443,7 +446,7 @@ def print_error(result_folder, ts_now, error_message):
f.write("\n\n")
-run_result_folder = "pg/serializable"
+run_result_folder = "pg/repeatable-read"
result_folder = "check_result/" + run_result_folder
do_test_list = "do_test_list.txt"
#ts_now = "_2param_3txn_insert"
From f0a40e929e7359d05fac50417b5a9692168d477e Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Tue, 9 Jul 2024 08:39:40 +0000
Subject: [PATCH 05/16] graph print
---
src/dbtest/src/mda_detect.py | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 0136e3fe..b90e5d08 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -21,7 +21,8 @@ class Edge:
def __init__(self, type, out):
self.type = type
self.out = out
-
+ def __repr__(self):
+ return "Edge(type={}, out={})".format(self.type, self.out)
class Operation:
def __init__(self, op_type, txn_num, op_time, value):
@@ -37,6 +38,12 @@ def __init__(self):
self.end_ts = 99999999999999999999
self.isolation = "serializable"
+# print edge after build graph
+def print_graph(edge):
+ for i, edges in enumerate(edge):
+ print("Transaction {}:".format(i))
+ for e in edges:
+ print(" {}".format(e))
# find total variable number
def get_total(lines):
@@ -485,7 +492,7 @@ def print_error(result_folder, ts_now, error_message):
query = query.replace(" ", "")
if query.find("Rollback") != -1 or query.find("Timeout") != -1:
go_end = True
- print("total_num:{}, total_num_txn:{}, query:{},ts_now: {}, file: {}\n".format(total_num,total_num_txn,query,ts_now,run_result_folder + "/" + file + ".txt"))
+ # print("total_num:{}, total_num_txn:{}, query:{},ts_now: {}, file: {}\n".format(total_num,total_num_txn,query,ts_now,run_result_folder + "/" + file + ".txt"))
error_message = operation_record(total_num, query, txn, data_op_list, version_list)
if error_message != "":
break
@@ -498,6 +505,7 @@ def print_error(result_folder, ts_now, error_message):
cycle = False
remove_unfinished_operation(data_op_list)
build_graph(data_op_list, indegree, edge, txn)
+ print_graph(edge)
if not go_end:
cycle = check_cycle(edge, indegree, total_num + 2)
if cycle:
From 5338b6ac66119d072b7aa855f9750f90fd0c64ad Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Fri, 12 Jul 2024 02:58:45 +0000
Subject: [PATCH 06/16] implement
---
src/dbtest/src/doc/mda_detect_modify.md | 3 +
src/dbtest/src/doc/mda_detect_read.md | 36 ++++
src/dbtest/src/mda_detect.py | 221 +++++++++++++++---------
3 files changed, 183 insertions(+), 77 deletions(-)
create mode 100644 src/dbtest/src/doc/mda_detect_read.md
diff --git a/src/dbtest/src/doc/mda_detect_modify.md b/src/dbtest/src/doc/mda_detect_modify.md
index e249f6bf..3c64dbda 100644
--- a/src/dbtest/src/doc/mda_detect_modify.md
+++ b/src/dbtest/src/doc/mda_detect_modify.md
@@ -3,6 +3,9 @@
1. 用于加边建立图的节点对应一个操作还是一个事务。答:一个事务。
2. 目标:一个对数据库的操作文件(运行效果文件)中有多个事务,每一个事务有不同的隔离级别,通过执行结果和隔离级别判断是否满足一致性
3. 原来输出未所有事务是否满足一致性,现在输出为每个事务是否满足一致性? 单个整体报错 or 多个报错? 多个,每个错误都识别,兼容单个整体报错(实现较难)
+4. 有两个检测思路:
+ 1. 修改建立图的过程中加边策略,保留循环检测流程。(当前实现方式)
+ 2. 保留加边策略,修改冲突检测流程。
# 原来代码中问题
### 数组访问越界问题
现象
diff --git a/src/dbtest/src/doc/mda_detect_read.md b/src/dbtest/src/doc/mda_detect_read.md
new file mode 100644
index 00000000..ebf0b144
--- /dev/null
+++ b/src/dbtest/src/doc/mda_detect_read.md
@@ -0,0 +1,36 @@
+
+## mda_detect.py 代码功能
+主要用于检测数据库事务之间的并发关系,以及是否存在循环依赖。具体来说,代码通过解析输入的SQL语句,构建事务操作的有向图,检测是否存在循环依赖(即事务之间是否存在无法解决的并发冲突),并输出结果。
+### 变量命名含义
+
+| 名称 | 含义 | 备注 |
+| ------------ | ------------------------------------- | ------------------- |
+| data | 一次操作信息:type、txn_num、op_time、op_data | |
+| data_op_list | 数据操作列表 【数据key:【操作...】, 数据key:【操作...】】 | |
+| indegree | 下标:事务号;元素:依赖该事务的事务个数 | |
+| edge | 下标:事务号;元素:该事务的出边 | |
+| version_list | key:数据编号;value:数据的值的历史记录 | 下标是 value? |
+| total_num | 数据库操作总数 | 对于数据库全表扫描类的操作有关键作用。 |
+
+### 函数列表
+
+| 函数 | 功能 | 备注 |
+| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------- |
+| get_total | 获取一个测试文件中 Q0 部分插入的数据个数 | key 的个数 |
+| find_data | 特定位置提取一个数字 | |
+| set_finish_time | 在一个数据库操作语句执行完毕后更新相关的时间戳:finishedat
1. 所有事务中 begin_ts | end_ts 和 op_time 相等的进行替换
2. 所有操作中 op_time 和 op_time 相等的进行替换
3. version_list 保存操作历史值 op.value
4. op.value 保存 version_list 历史下标 | |
+| check_concurrency | 检查两个事务和是否并发 | 通过开始时间和结束时间来判断,默认开始时间之间已经比较过了吗? |
+| get_edge_type | 确定两个操作之间的边的类型,添加C 标志,跨事务操作,同时返回新的操作顺序 | data1 和 data2 是同一个 Key 的前后两个操作 |
+| build_graph | 建立一个有向图,这个图表示不同操作之间的并发关系 | 只会在同一组操作之间建立边 |
+| insert_edge | 具体的插边操作(check_concurrency 的前提下插入 get_edge_type 边):
不同事务之间有并发读写冲突; | data1 发生时间默认在data2 之前 |
+| init_record | 根据查询中的信息初始化版本列表中的记录。 | |
+| readVersion_record | 处理数据库查询操作 | 只是更改了: op.value |
+| read_record | 根据查询中的信息读取记录并更新数据操作。增加到 data_op_list 中。 | |
+| write_record | 根据查询和更新数据操作中的信息写入记录。增加到 data_op_list 中。 | |
+| delete_record | 根据查询中的信息删除记录并更新数据操作。增加到 data_op_list 中。 | |
+| insert_record | 根据查询中的信息插入记录并更新数据操作。 | |
+| end_record | 设置事务的结束时间戳。 | |
+| operation_record | 记录并且处理数据库操作。 | |
+| remove_unfinished_operation | 删除失败的语句以防止构建冗余边 | |
+| check_cycle | 在有向图中查找环 | |
+| dfs | 在有环的有向图中找环 | |
diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index b90e5d08..55e1f1c1 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -39,12 +39,22 @@ def __init__(self):
self.isolation = "serializable"
# print edge after build graph
-def print_graph(edge):
+def print_graph(edge,txn):
for i, edges in enumerate(edge):
- print("Transaction {}:".format(i))
+ if i == 0 or i == len(edge)-1:
+ continue
+ print("Transaction {}:-----{}-----".format(i,txn[i].isolation))
for e in edges:
print(" {}".format(e))
+
+# print data_op_list
+def print_data_op_list(data_op_list):
+ for k,list in enumerate(data_op_list):
+ print("\nk:{}---".format(k))
+ for i, data in enumerate(list):
+ print("op:{}--{}-".format(data.op_type,data.txn_num))
+
# find total variable number
def get_total(lines):
num = 0
@@ -99,20 +109,21 @@ def find_isolation(query):
# when a statement is executed, set the end time and modify the version list
def set_finish_time(op_time, data_op_list, query, txn, version_list):
- pos = query.find("finished at:")
- pos += len("finished at:")
- data_value = ""
- tmp, tmp1 = "", ""
- for i in range(pos, len(query)):
- if query[i].isdigit():
- tmp += query[i]
- else:
- for j in range(3 - len(tmp)):
- tmp1 += "0"
- tmp = tmp1 + tmp
- data_value += tmp
- tmp, tmp1 = "", ""
- data_value = int(data_value)
+ # pos = query.find("finished at:")
+ # pos += len("finished at:")
+ # data_value = ""
+ # tmp, tmp1 = "", ""
+ # for i in range(pos, len(query)):
+ # if query[i].isdigit():
+ # tmp += query[i]
+ # else:
+ # for j in range(3 - len(tmp)):
+ # tmp1 += "0"
+ # tmp = tmp1 + tmp
+ # data_value += tmp
+ # tmp, tmp1 = "", ""
+ # data_value = int(data_value)
+ data_value = int(op_time)
for t in txn:
if t.begin_ts == op_time:
t.begin_ts = data_value
@@ -177,48 +188,67 @@ def build_graph(data_op_list, indegree, edge, txn):
insert_edge(list1[j], data, indegree, edge, txn)
+
def insert_edge(data1, data2, indegree, edge, txn):
if check_concurrency(data1, data2, txn):
edge_type, data1, data2 = get_edge_type(data1, data2, txn)
- # if edge_type != "RR" and edge_type != "RCR" and data1.txn_num != data2.txn_num:
- # indegree[data2.txn_num] += 1
- # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- if edge_type == "WW" or edge_type == "WCW":
+ if data1.txn_num == data2.txn_num or edge_type in ["RCR", "RR"]:
+ return
+ #* read-uncommitted: Dirty Write
+ # WI 不存在,如果有,那么一定会有 WD + DI 的等效边
+ # II 不存在,如果有,那么一定会有 ID + DI 的等效边
+ # DW 允许存在, UPDATE 时使用条件查询包含 D 的数据
+ # DD 不存在,如果有,那么一定会有 DI + ID 的等效边
+ if edge_type in ["WCW", "WW", "WCD", "WD", "ICW","IW", "ICD", "ID", "DCW", "DW", "DCI", "DI"]:
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ #* read-committed: Dirty Read
+ elif edge_type in ["WCR","WR"] and (txn[data2.txn_num].isolation == "read-committed" or txn[data2.txn_num].isolation == "repeatable-read" or txn[data2.txn_num].isolation == "serializable"):
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ #* repeatable-read: Unrepeatable Read
+ elif edge_type in ["RCW","RW"] and (txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable"):
indegree[data2.txn_num] += 1
edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- elif txn[data1.txn_num].isolation == "read-uncommitted":
- if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
- if edge_type[-1] == 'R': # not R -- R
- if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'): # 可能脏读
- indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W':
- indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- if txn[data2.txn_num].isolation == "serializable":
- indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- elif edge_type[-1] != 'R': # not R -- not R
- indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- elif txn[data1.txn_num].isolation == "read-committed" or txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable":
- if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
- if edge_type[-1] == 'R': # not R -- R
- if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读
- indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W':
- indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- if txn[data2.txn_num].isolation == "serializable":
- indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- elif edge_type[-1] != 'R': # not R -- not R
- indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- elif edge_type[0] == 'R' and edge_type[-1] != 'R':
- indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ #* serializable: Phantom Read
+ elif edge_type in ["ICR","IR","DCR","DR","RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable":
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+
+ # 入边
+ # elif txn[data1.txn_num].isolation == "read-uncommitted":
+ # if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
+ # if edge_type[-1] == 'R': # not R -- R
+ # if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'): # 可能脏读
+ # indegree[data2.txn_num] += 1
+ # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ # if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W':
+ # indegree[data2.txn_num] += 1
+ # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ # if txn[data2.txn_num].isolation == "serializable":
+ # indegree[data2.txn_num] += 1
+ # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ # elif edge_type[-1] != 'R': # not R -- not R
+ # indegree[data2.txn_num] += 1
+ # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ # elif txn[data1.txn_num].isolation == "read-committed" or txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable":
+ # if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
+ # if edge_type[-1] == 'R': # not R -- R
+ # if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读
+ # indegree[data2.txn_num] += 1
+ # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ # if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W':
+ # indegree[data2.txn_num] += 1
+ # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ # if txn[data2.txn_num].isolation == "serializable":
+ # indegree[data2.txn_num] += 1
+ # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ # elif edge_type[-1] != 'R': # not R -- not R
+ # indegree[data2.txn_num] += 1
+ # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ # elif edge_type[0] == 'R' and edge_type[-1] != 'R':
+ # indegree[data2.txn_num] += 1
+ # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
def init_record(query, version_list):
@@ -297,7 +327,7 @@ def read_record(op_time, txn_num, total_num, txn, data_op_list):
data_op_list[i].append(Operation("R", txn_num, op_time, i)) # p
else:
# it means select all rows in table
- for i in range(total_num):
+ for i in range(total_num+1):
data_op_list[i].append(Operation("R", txn_num, op_time, i))
@@ -312,7 +342,21 @@ def write_record(op_time, txn_num, txn, data_op_list):
op_data = find_data(query, "k=")
op_value = find_data(query, "v=")
data_op_list[op_data].append(Operation("W", txn_num, op_time, op_value))
-
+ # for predicate cases
+ elif query.find("k>") != -1:
+ left = find_data(query, "k>") + 1
+ right = find_data(query, "k<")
+ for i in range(left, right):
+ data_op_list[i].append(Operation("W", txn_num, op_time, i)) # P
+ elif query.find("value1>") != -1:
+ left = find_data(query, "value1>") + 1
+ right = find_data(query, "value1<")
+ for i in range(left, right):
+ data_op_list[i].append(Operation("W", txn_num, op_time, i)) # p
+ else:
+ # it means select all rows in table
+ for i in range(total_num+1):
+ data_op_list[i].append(Operation("W", txn_num, op_time, i))
def delete_record(op_time, txn_num, txn, data_op_list):
if txn[txn_num].begin_ts == -1:
@@ -323,7 +367,21 @@ def delete_record(op_time, txn_num, txn, data_op_list):
elif query.find("k=") != -1:
op_data = find_data(query, "k=")
data_op_list[op_data].append(Operation("D", txn_num, op_time, op_data))
-
+ # for predicate cases
+ elif query.find("k>") != -1:
+ left = find_data(query, "k>") + 1
+ right = find_data(query, "k<")
+ for i in range(left, right):
+ data_op_list[i].append(Operation("D", txn_num, op_time, i)) # P
+ elif query.find("value1>") != -1:
+ left = find_data(query, "value1>") + 1
+ right = find_data(query, "value1<")
+ for i in range(left, right):
+ data_op_list[i].append(Operation("D", txn_num, op_time, i)) # p
+ else:
+ # it means select all rows in table
+ for i in range(total_num+1):
+ data_op_list[i].append(Operation("D", txn_num, op_time, i))
def insert_record(op_time, txn_num, txn, data_op_list):
if txn[txn_num].begin_ts == -1 and op_time != 0:
@@ -337,6 +395,7 @@ def end_record(op_time, txn_num, txn):
txn[txn_num].end_ts = op_time
+
def operation_record(total_num, query, txn, data_op_list, version_list):
error_message = ""
op_time = find_data(query, "Q")
@@ -345,35 +404,28 @@ def operation_record(total_num, query, txn, data_op_list, version_list):
if op_time == 0 and query.find("INSERT") != -1:
init_record(query, version_list)
return error_message
- if query.find("returnresult") != -1:
+ if query.find("returnresult") != -1: #! 1"returnresult" maybe don't exist
error_message = readVersion_record(query, op_time, data_op_list, version_list)
return error_message
- if query.find("finished") != -1:
+ if query.find("finished") != -1: #! "finished" maybe don't exist
set_finish_time(op_time, data_op_list, query, txn, version_list)
return error_message
- if op_time == -1 and txn_num != -1 and query.find("set_isolation") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.:
- # query such as "T2 set_isolation=serializable "
- txn[txn_num].isolation = find_isolation(query)
- print(str(txn_num)+"------------------"+txn[txn_num].isolation)
- return error_message
if op_time == -1 or txn_num == -1:
return error_message
- if query.find("SELECT") != -1:
+ if query.find("BEGIN") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.:
+ txn[txn_num].isolation = find_isolation(query)
+ elif query.find("SELECT") != -1:
read_record(op_time, txn_num, total_num, txn, data_op_list)
- return error_message
elif query.find("UPDATE") != -1:
write_record(op_time, txn_num, txn, data_op_list)
- return error_message
- elif query.find("DELETE") != -1:
+ elif query.find("DELETE") != -1:
delete_record(op_time, txn_num, txn, data_op_list)
- return error_message
- elif query.find("INSERT") != -1:
+ elif query.find("INSERT") != -1: #! assume existing data will not be inserted ("Rollback")
insert_record(op_time, txn_num, txn, data_op_list)
- return error_message
elif query.find("COMMIT") != -1:
if op_time != 0:
end_record(op_time, txn_num, txn)
- return error_message
+ set_finish_time(op_time, data_op_list, query, txn, version_list)
return error_message
@@ -453,9 +505,21 @@ def print_error(result_folder, ts_now, error_message):
f.write("\n\n")
-run_result_folder = "pg/repeatable-read"
+
+
+
+#! ------Some assumption------
+# 在任何隔离级别事务的修改互相可见,即等价于单一存储,无读写缓冲
+# 在输入文件中有设置各个事务隔离级别的语句,在 "BEGIN 之后"
+ # BEGIN T1 set_isolation=repeatable-read
+ # BEGIN T2 set_isolation=serializable
+ # BEGIN T3 set_isolation=read-uncommitted
+ # BEGIN T4 set_isolation=read-committed
+# 假定插入的数据 key 是从 0 向上递增的顺序
+
+run_result_folder = "pg/mda_detect_test"
result_folder = "check_result/" + run_result_folder
-do_test_list = "do_test_list.txt"
+do_test_list = "mda_detect_test_list.txt"
#ts_now = "_2param_3txn_insert"
ts_now = time.strftime("%Y%m%d_%H%M%S", time.localtime())
if not os.path.exists(result_folder):
@@ -503,16 +567,19 @@ def print_error(result_folder, ts_now, error_message):
continue
cycle = False
- remove_unfinished_operation(data_op_list)
+ # remove_unfinished_operation(data_op_list) 动态测试中默认所有的执行时间 Qi 都没有 finish 字段
build_graph(data_op_list, indegree, edge, txn)
- print_graph(edge)
+ print("--------file:{}--------".format(file))
+ print_graph(edge,txn)
+ # print_data_op_list(data_op_list)
if not go_end:
- cycle = check_cycle(edge, indegree, total_num + 2)
+ cycle = check_cycle(edge, indegree, total_num_txn+2)
if cycle:
output_result(file, result_folder, ts_now, "Cyclic")
- for i in range(total_num + 2):
+ for i in range(total_num_txn + 2):
if visit1[i] == 0:
dfs(result_folder, ts_now, i, "null")
else:
output_result(file, result_folder, ts_now, "Avoid")
print_path(result_folder, ts_now, edge)
+ print("---------------------------------\n")
\ No newline at end of file
From f4909ca6c6a4add3778731851d7be63bc65ef0b2 Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Fri, 12 Jul 2024 03:04:13 +0000
Subject: [PATCH 07/16] test case
---
src/dbtest/mda_detect_test_list.txt | 10 +++++++
src/dbtest/pg/mda_detect_test/aa.txt | 20 +++++++++++++
src/dbtest/pg/mda_detect_test/rc_rc_avoid.txt | 27 ++++++++++++++++++
.../pg/mda_detect_test/rc_rc_cyclic.txt | 28 +++++++++++++++++++
.../pg/mda_detect_test/rr_rr_cyclic.txt | 27 ++++++++++++++++++
.../mda_detect_test/rr_rr_cyclic_commit.txt | 25 +++++++++++++++++
.../pg/mda_detect_test/rr_s_s_avoid.txt | 25 +++++++++++++++++
src/dbtest/pg/mda_detect_test/ru_ru_avoid.txt | 28 +++++++++++++++++++
src/dbtest/pg/mda_detect_test/s_s_avoid.txt | 25 +++++++++++++++++
src/dbtest/pg/mda_detect_test/s_s_cyclic.txt | 28 +++++++++++++++++++
.../pg/mda_detect_test/s_s_cyclic_pr.txt | 20 +++++++++++++
11 files changed, 263 insertions(+)
create mode 100644 src/dbtest/mda_detect_test_list.txt
create mode 100755 src/dbtest/pg/mda_detect_test/aa.txt
create mode 100755 src/dbtest/pg/mda_detect_test/rc_rc_avoid.txt
create mode 100755 src/dbtest/pg/mda_detect_test/rc_rc_cyclic.txt
create mode 100755 src/dbtest/pg/mda_detect_test/rr_rr_cyclic.txt
create mode 100755 src/dbtest/pg/mda_detect_test/rr_rr_cyclic_commit.txt
create mode 100755 src/dbtest/pg/mda_detect_test/rr_s_s_avoid.txt
create mode 100755 src/dbtest/pg/mda_detect_test/ru_ru_avoid.txt
create mode 100755 src/dbtest/pg/mda_detect_test/s_s_avoid.txt
create mode 100755 src/dbtest/pg/mda_detect_test/s_s_cyclic.txt
create mode 100755 src/dbtest/pg/mda_detect_test/s_s_cyclic_pr.txt
diff --git a/src/dbtest/mda_detect_test_list.txt b/src/dbtest/mda_detect_test_list.txt
new file mode 100644
index 00000000..82f9304a
--- /dev/null
+++ b/src/dbtest/mda_detect_test_list.txt
@@ -0,0 +1,10 @@
+
+ru_ru_avoid
+rc_rc_cyclic
+rc_rc_avoid
+rr_rr_cyclic
+rr_rr_cyclic_commit
+s_s_cyclic
+s_s_avoid
+rr_s_s_avoid
+
diff --git a/src/dbtest/pg/mda_detect_test/aa.txt b/src/dbtest/pg/mda_detect_test/aa.txt
new file mode 100755
index 00000000..2b19e7c5
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/aa.txt
@@ -0,0 +1,20 @@
+
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 1);
+Q0-T1-INSERT INTO t1 VALUES (2, 2);
+Q0-T1-COMMIT;
+
+Q1-T1-BEGIN set_isolation=repeatable-read;
+Q2-T1-SELECT * FROM t1;
+
+ Q3-T2-BEGIN set_isolation=serializable;
+ Q4-T2-INSERT INTO t1 VALUES (3, 3);
+ Q5-T2-COMMIT;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1;
+Q7-T1-SELECT * FROM t1;
+Q8-T1-COMMIT;
diff --git a/src/dbtest/pg/mda_detect_test/rc_rc_avoid.txt b/src/dbtest/pg/mda_detect_test/rc_rc_avoid.txt
new file mode 100755
index 00000000..b0f99e62
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/rc_rc_avoid.txt
@@ -0,0 +1,27 @@
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 0);
+Q0-T1-COMMIT;
+
+
+Q1-T1-BEGIN set_isolation=read-committed;
+Q2-T1-SELECT * FROM t1 WHERE k=1;
+
+ Q3-T2-BEGIN set_isolation=read-committed;
+ Q4-T2-UPDATE t1 SET v=1 WHERE k=1;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1;
+
+ Q7-T2-COMMIT;
+
+Q8-T1-COMMIT;
+
+
+
+
+
+Test Result:
+
diff --git a/src/dbtest/pg/mda_detect_test/rc_rc_cyclic.txt b/src/dbtest/pg/mda_detect_test/rc_rc_cyclic.txt
new file mode 100755
index 00000000..3a6c5f09
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/rc_rc_cyclic.txt
@@ -0,0 +1,28 @@
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 0);
+Q0-T1-COMMIT;
+
+
+Q1-T1-BEGIN set_isolation=read-committed;
+Q2-T1-UPDATE t1 SET v=1 WHERE k=0;
+
+ Q3-T2-BEGIN set_isolation=read-committed;
+ Q4-T2-UPDATE t1 SET v=1 WHERE k=1;
+ Q5-T2-SELECT * FROM t1 WHERE k=0;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1;
+
+ Q7-T2-COMMIT;
+
+Q8-T1-COMMIT;
+
+
+
+
+
+Test Result:
+
diff --git a/src/dbtest/pg/mda_detect_test/rr_rr_cyclic.txt b/src/dbtest/pg/mda_detect_test/rr_rr_cyclic.txt
new file mode 100755
index 00000000..8cf92abd
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/rr_rr_cyclic.txt
@@ -0,0 +1,27 @@
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 0);
+Q0-T1-COMMIT;
+
+
+Q1-T1-BEGIN set_isolation=repeatable-read;
+Q2-T1-SELECT * FROM t1 WHERE k=1;
+
+ Q3-T2-BEGIN set_isolation=repeatable-read;
+ Q4-T2-UPDATE t1 SET v=1 WHERE k=1;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1;
+
+ Q7-T2-COMMIT;
+
+Q8-T1-COMMIT;
+
+
+
+
+
+Test Result:
+
diff --git a/src/dbtest/pg/mda_detect_test/rr_rr_cyclic_commit.txt b/src/dbtest/pg/mda_detect_test/rr_rr_cyclic_commit.txt
new file mode 100755
index 00000000..aded104d
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/rr_rr_cyclic_commit.txt
@@ -0,0 +1,25 @@
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 0);
+Q0-T1-COMMIT;
+
+
+Q1-T1-BEGIN set_isolation=repeatable-read;
+Q2-T1-SELECT * FROM t1 WHERE k=1;
+
+ Q3-T2-BEGIN set_isolation=repeatable-read;
+ Q4-T2-UPDATE t1 SET v=1 WHERE k=1;
+ Q5-T2-COMMIT;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1;
+Q7-T1-COMMIT;
+
+
+
+
+
+Test Result:
+
diff --git a/src/dbtest/pg/mda_detect_test/rr_s_s_avoid.txt b/src/dbtest/pg/mda_detect_test/rr_s_s_avoid.txt
new file mode 100755
index 00000000..7ba19745
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/rr_s_s_avoid.txt
@@ -0,0 +1,25 @@
+
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 1);
+Q0-T1-INSERT INTO t1 VALUES (2, 2);
+Q0-T1-COMMIT;
+
+Q1-T1-BEGIN set_isolation=repeatable-read;
+Q2-T1-SELECT * FROM t1 WHERE k=1;
+
+ Q3-T2-BEGIN set_isolation=serializable;
+ Q4-T2-INSERT INTO t1 VALUES (3, 3);
+ Q5-T2-COMMIT;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1;
+Q7-T1-SELECT * FROM t1;
+Q8-T1-COMMIT;
+
+ Q9-T3-BEGIN set_isolation=serializable ;
+ Q10-T3-SELECT * FROM t1 WHERE k=3;
+ Q11-T3-COMMIT;
+
diff --git a/src/dbtest/pg/mda_detect_test/ru_ru_avoid.txt b/src/dbtest/pg/mda_detect_test/ru_ru_avoid.txt
new file mode 100755
index 00000000..09a35143
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/ru_ru_avoid.txt
@@ -0,0 +1,28 @@
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 0);
+Q0-T1-COMMIT;
+
+
+Q1-T1-BEGIN set_isolation=read-uncommitted;
+Q2-T1-UPDATE t1 SET v=1 WHERE k=0;
+
+ Q3-T2-BEGIN set_isolation=read-uncommitted;
+ Q4-T2-UPDATE t1 SET v=1 WHERE k=1;
+ Q5-T2-SELECT * FROM t1 WHERE k=0;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1;
+
+ Q7-T2-COMMIT;
+
+Q8-T1-COMMIT;
+
+
+
+
+
+Test Result:
+
diff --git a/src/dbtest/pg/mda_detect_test/s_s_avoid.txt b/src/dbtest/pg/mda_detect_test/s_s_avoid.txt
new file mode 100755
index 00000000..dc18cce5
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/s_s_avoid.txt
@@ -0,0 +1,25 @@
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 0);
+Q0-T1-COMMIT;
+
+
+Q1-T1-BEGIN set_isolation=serializable ;
+Q2-T1-UPDATE t1 SET v=1 WHERE k=0;
+Q3-T1-SELECT * FROM t1 WHERE k=1;
+ Q4-T2-BEGIN set_isolation=serializable;
+ Q5-T2-UPDATE t1 SET v=1 WHERE k=1;
+ Q6-T2-SELECT * FROM t1 WHERE k=0;
+ Q7-T2-COMMIT;
+
+Q8-T1-COMMIT;
+
+
+
+
+
+Test Result:
+
diff --git a/src/dbtest/pg/mda_detect_test/s_s_cyclic.txt b/src/dbtest/pg/mda_detect_test/s_s_cyclic.txt
new file mode 100755
index 00000000..a2f24b06
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/s_s_cyclic.txt
@@ -0,0 +1,28 @@
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 0);
+Q0-T1-COMMIT;
+
+
+Q1-T1-BEGIN set_isolation=serializable ;
+Q2-T1-UPDATE t1 SET v=1 WHERE k=0;
+
+ Q3-T2-BEGIN set_isolation=serializable;
+ Q4-T2-UPDATE t1 SET v=1 WHERE k=1;
+ Q5-T2-SELECT * FROM t1 WHERE k=0;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1;
+
+ Q7-T2-COMMIT;
+
+Q8-T1-COMMIT;
+
+
+
+
+
+Test Result:
+
diff --git a/src/dbtest/pg/mda_detect_test/s_s_cyclic_pr.txt b/src/dbtest/pg/mda_detect_test/s_s_cyclic_pr.txt
new file mode 100755
index 00000000..dc47f13f
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/s_s_cyclic_pr.txt
@@ -0,0 +1,20 @@
+
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 1);
+Q0-T1-INSERT INTO t1 VALUES (2, 2);
+Q0-T1-COMMIT;
+
+Q1-T1-BEGIN set_isolation=serializable;
+Q2-T1-SELECT * FROM t1;
+
+ Q3-T2-BEGIN set_isolation=serializable;
+ Q4-T2-INSERT INTO t1 VALUES (3, 3);
+ Q5-T2-COMMIT;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1;
+Q7-T1-SELECT * FROM t1;
+Q8-T1-COMMIT;
From 8484c7b20a819a03615eb6a08d256ba4d428a456 Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Mon, 15 Jul 2024 10:44:50 +0000
Subject: [PATCH 08/16] bug
---
src/dbtest/src/mda_detect.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 55e1f1c1..5e885bb2 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -211,10 +211,13 @@ def insert_edge(data1, data2, indegree, edge, txn):
indegree[data2.txn_num] += 1
edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
#* serializable: Phantom Read
- elif edge_type in ["ICR","IR","DCR","DR","RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable":
+ elif edge_type in ["RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable":
indegree[data2.txn_num] += 1
edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-
+ #* serializable: Phantom Read
+ elif edge_type in ["ICR","IR","DCR","DR"] and txn[data2.txn_num].isolation == "serializable":
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
# 入边
# elif txn[data1.txn_num].isolation == "read-uncommitted":
# if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
From 690703b23fab59ae232527ae3e26538c4dab5338 Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Mon, 15 Jul 2024 11:01:04 +0000
Subject: [PATCH 09/16] comment translate
---
src/dbtest/src/mda_detect.py | 57 +++++++-----------------------------
1 file changed, 11 insertions(+), 46 deletions(-)
diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 5e885bb2..79df82ca 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -195,10 +195,10 @@ def insert_edge(data1, data2, indegree, edge, txn):
if data1.txn_num == data2.txn_num or edge_type in ["RCR", "RR"]:
return
#* read-uncommitted: Dirty Write
- # WI 不存在,如果有,那么一定会有 WD + DI 的等效边
- # II 不存在,如果有,那么一定会有 ID + DI 的等效边
- # DW 允许存在, UPDATE 时使用条件查询包含 D 的数据
- # DD 不存在,如果有,那么一定会有 DI + ID 的等效边
+ # WI does not exist. If it does, there must be an equivalent edge of WD + DI
+ # II does not exist. If it does, there must be an equivalent edge of ID + DI
+ # DW is allowed to exist. When UPDATE, use the condition to query the data containing D
+ # DD does not exist. If it does, there must be an equivalent edge of DI + ID
if edge_type in ["WCW", "WW", "WCD", "WD", "ICW","IW", "ICD", "ID", "DCW", "DW", "DCI", "DI"]:
indegree[data2.txn_num] += 1
edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
@@ -218,41 +218,6 @@ def insert_edge(data1, data2, indegree, edge, txn):
elif edge_type in ["ICR","IR","DCR","DR"] and txn[data2.txn_num].isolation == "serializable":
indegree[data2.txn_num] += 1
edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- # 入边
- # elif txn[data1.txn_num].isolation == "read-uncommitted":
- # if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
- # if edge_type[-1] == 'R': # not R -- R
- # if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'): # 可能脏读
- # indegree[data2.txn_num] += 1
- # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- # if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W':
- # indegree[data2.txn_num] += 1
- # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- # if txn[data2.txn_num].isolation == "serializable":
- # indegree[data2.txn_num] += 1
- # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- # elif edge_type[-1] != 'R': # not R -- not R
- # indegree[data2.txn_num] += 1
- # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- # elif txn[data1.txn_num].isolation == "read-committed" or txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable":
- # if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
- # if edge_type[-1] == 'R': # not R -- R
- # if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读
- # indegree[data2.txn_num] += 1
- # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- # if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W':
- # indegree[data2.txn_num] += 1
- # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- # if txn[data2.txn_num].isolation == "serializable":
- # indegree[data2.txn_num] += 1
- # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- # elif edge_type[-1] != 'R': # not R -- not R
- # indegree[data2.txn_num] += 1
- # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
- # elif edge_type[0] == 'R' and edge_type[-1] != 'R':
- # indegree[data2.txn_num] += 1
- # edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-
def init_record(query, version_list):
key = find_data(query, "(")
@@ -512,13 +477,13 @@ def print_error(result_folder, ts_now, error_message):
#! ------Some assumption------
-# 在任何隔离级别事务的修改互相可见,即等价于单一存储,无读写缓冲
-# 在输入文件中有设置各个事务隔离级别的语句,在 "BEGIN 之后"
- # BEGIN T1 set_isolation=repeatable-read
- # BEGIN T2 set_isolation=serializable
- # BEGIN T3 set_isolation=read-uncommitted
- # BEGIN T4 set_isolation=read-committed
-# 假定插入的数据 key 是从 0 向上递增的顺序
+# The modifications of transactions at any isolation level are mutually visible, which is equivalent to a single storage, without read-write buffer
+# There are statements to set the isolation level of each transaction in the input file, after "BEGIN"
+ # BEGIN T1 set_isolation=repeatable-read
+ # BEGIN T2 set_isolation=serializable
+ # BEGIN T3 set_isolation=read-uncommitted
+ # BEGIN T4 set_isolation=read-committed
+# Assume that the inserted data key is in ascending order from 0
run_result_folder = "pg/mda_detect_test"
result_folder = "check_result/" + run_result_folder
From 3286a99f2cc37642977364be0401321f90306040 Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Mon, 15 Jul 2024 14:38:45 +0000
Subject: [PATCH 10/16] loop detect optimize
---
src/dbtest/src/mda_detect.py | 102 +++++++++++++++++++++++------------
1 file changed, 67 insertions(+), 35 deletions(-)
diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 79df82ca..40d10c9f 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -18,11 +18,12 @@
class Edge:
- def __init__(self, type, out):
+ def __init__(self, type, out, begin_time):
self.type = type
self.out = out
+ self.time = begin_time
def __repr__(self):
- return "Edge(type={}, out={})".format(self.type, self.out)
+ return "Edge(begin_time={}, type={}, out={})".format(self.time, self.type, self.out)
class Operation:
def __init__(self, op_type, txn_num, op_time, value):
@@ -51,9 +52,10 @@ def print_graph(edge,txn):
# print data_op_list
def print_data_op_list(data_op_list):
for k,list in enumerate(data_op_list):
- print("\nk:{}---".format(k))
- for i, data in enumerate(list):
- print("op:{}--{}-".format(data.op_type,data.txn_num))
+ if k< len(data_op_list)-1:
+ print("\nk:{}---".format(k))
+ for i, data in enumerate(list):
+ print("op:{}--{}-".format(data.op_type,data.txn_num))
# find total variable number
def get_total(lines):
@@ -164,17 +166,16 @@ def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type):
# decide which operation comes first depending on the read or write version
# if later operation happened after the first txn commit time, edge type will add "C"
def get_edge_type(data1, data2, txn):
- # if data1.value <= data2.value:
- # before, after = data1, data2
- # else:
- # before, after = data2, data1
- before, after = data1, data2
+ if data1.op_time <= data2.op_time:
+ before, after = data1, data2
+ else:
+ before, after = data2, data1
# if data1.op_type == "D" or data2.op_type == "D":
# if data1.value < data2.value:
# before, after = data2, data1
# else:
# before, after = data1, data2
- if data2.op_time > txn[data1.txn_num].end_ts:
+ if after.op_time > txn[before.txn_num].end_ts:
state = "C"
else:
state = ""
@@ -201,23 +202,23 @@ def insert_edge(data1, data2, indegree, edge, txn):
# DD does not exist. If it does, there must be an equivalent edge of DI + ID
if edge_type in ["WCW", "WW", "WCD", "WD", "ICW","IW", "ICD", "ID", "DCW", "DW", "DCI", "DI"]:
indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
#* read-committed: Dirty Read
elif edge_type in ["WCR","WR"] and (txn[data2.txn_num].isolation == "read-committed" or txn[data2.txn_num].isolation == "repeatable-read" or txn[data2.txn_num].isolation == "serializable"):
indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
#* repeatable-read: Unrepeatable Read
elif edge_type in ["RCW","RW"] and (txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable"):
indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
#* serializable: Phantom Read
elif edge_type in ["RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable":
indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
#* serializable: Phantom Read
elif edge_type in ["ICR","IR","DCR","DR"] and txn[data2.txn_num].isolation == "serializable":
indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num,data1.op_time))
def init_record(query, version_list):
key = find_data(query, "(")
@@ -425,29 +426,59 @@ def check_cycle(edge, indegree, total):
# for loop graphs, print the loop
-def dfs(result_folder, ts_now, now, type):
- visit1[now] = 1
- if visit[now] == 1: return
- visit[now] = 1
- path.append(now)
- edge_type.append(type)
- for v in edge[now]:
+def dfs(result_folder, ts_now , e):
+ visit1[e.out] = 1
+ if visit[e.out] == 1: return
+ visit[e.out] = 1
+ path.append(e)
+ for v in edge[e.out]:
if visit[v.out] == 0:
- dfs(result_folder, ts_now, v.out, v.type)
+ dfs(result_folder, ts_now, v)
else:
- path.append(v.out)
- edge_type.append(v.type)
+ path.append(v)
with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
- for i in range(0, len(path)):
- f.write(str(path[i]))
- if i != len(path) - 1: f.write("->" + edge_type[i+1] + "->")
- f.write("\n\n")
+ content = ""
+ list_loop = []
+ for i in range(len(path) - 1, -1, -1):
+ if i != len(path) - 1 and path[i].out == path[len(path) - 1].out:
+ break
+ index = 0
+ while(index < len(list_loop) and path[list_loop[index]].time < path[i].time):
+ index += 1
+ list_loop.insert(index,i)
+ for idx in list_loop:
+ content = content + "->" + path[idx].type + "->" + str(path[idx].out)
+ content = str(path[list_loop[-1]].out) + content + "\n\n"
+ f.write(content)
path.pop()
- edge_type.pop()
path.pop()
- edge_type.pop()
- visit[now] = 0
-
+ visit[e.out] = 0
+
+
+# # for loop graphs, print the loop
+# # Contains redundant edge information and the starting point of the ring is unreasonable
+# def dfs(result_folder, ts_now, now, type):
+# visit1[now] = 1
+# if visit[now] == 1: return
+# visit[now] = 1
+# path.append(now)
+# edge_type.append(type)
+# for v in edge[now]:
+# if visit[v.out] == 0:
+# dfs(result_folder, ts_now, v.out, v.type)
+# else:
+# path.append(v.out)
+# edge_type.append(v.type)
+# with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
+# for i in range(0, len(path)):
+# f.write(str(path[i]))
+# if i != len(path) - 1: f.write("->" + edge_type[i+1] + "->")
+# f.write("\n\n")
+# path.pop()
+# edge_type.pop()
+# path.pop()
+# edge_type.pop()
+# visit[now] = 0
def print_path(result_folder, ts_now, edge):
with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
@@ -546,7 +577,8 @@ def print_error(result_folder, ts_now, error_message):
output_result(file, result_folder, ts_now, "Cyclic")
for i in range(total_num_txn + 2):
if visit1[i] == 0:
- dfs(result_folder, ts_now, i, "null")
+ # dfs(result_folder, ts_now, i, "null")
+ dfs(result_folder, ts_now, Edge("null",i,-1))
else:
output_result(file, result_folder, ts_now, "Avoid")
print_path(result_folder, ts_now, edge)
From 168ddc8f23f0d7ed4320d41e0fd6e9eb7d342486 Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Mon, 29 Jul 2024 06:52:54 +0000
Subject: [PATCH 11/16] remove doc from commit
---
src/dbtest/src/doc/mda_detect_modify.md | 51 -------------------------
src/dbtest/src/doc/mda_detect_read.md | 36 -----------------
2 files changed, 87 deletions(-)
delete mode 100644 src/dbtest/src/doc/mda_detect_modify.md
delete mode 100644 src/dbtest/src/doc/mda_detect_read.md
diff --git a/src/dbtest/src/doc/mda_detect_modify.md b/src/dbtest/src/doc/mda_detect_modify.md
deleted file mode 100644
index 3c64dbda..00000000
--- a/src/dbtest/src/doc/mda_detect_modify.md
+++ /dev/null
@@ -1,51 +0,0 @@
-[text](mda_detect.py) 修改日志
-# 思考&分析
-1. 用于加边建立图的节点对应一个操作还是一个事务。答:一个事务。
-2. 目标:一个对数据库的操作文件(运行效果文件)中有多个事务,每一个事务有不同的隔离级别,通过执行结果和隔离级别判断是否满足一致性
-3. 原来输出未所有事务是否满足一致性,现在输出为每个事务是否满足一致性? 单个整体报错 or 多个报错? 多个,每个错误都识别,兼容单个整体报错(实现较难)
-4. 有两个检测思路:
- 1. 修改建立图的过程中加边策略,保留循环检测流程。(当前实现方式)
- 2. 保留加边策略,修改冲突检测流程。
-# 原来代码中问题
-### 数组访问越界问题
-现象
-```python
-total_num = get_total(lines) # 统计的个数是插入数据的个数,不是事务的个数。
-txn = [Txn() for i in range(total_num + 2)] # 导致构造的 txn 数组较小
-.... # 还有 indegree edge 数组的大小应该是事务的个数。
-```
-解决:构造一个新函数获取事务个数。
-```python
-# find total Txn number
-def get_total_txn(lines):
- num = 0
- for query in lines:
- query = query.replace("\n", "")
- query = query.replace(" ", "")
- if query[0:1] == "Q" and query.find("T") != -1:
- tmp = find_data(query, "T")
- num = max(num, tmp)
- return num
-total_num_txn = get_total_txn(lines) # total number of txn
-```
-效果:不同数据使用不同的初始长度
-```python
-# total_num: data_op_list, version_list
-# total_num_txn: txn, edge, total_num_txn, visit, visit1
-```
-
-
-### 默认字符串少了空格
-```python
- pos = query.find("finished at:")
- pos += len("finished at:")
-```
-
-### "R" 类型的操作并没有修改 value 值为下标:
-```python
- if data1.value <= data2.value:
- before, after = data1, data2
- else:
- before, after = data2, data1
-```
-
diff --git a/src/dbtest/src/doc/mda_detect_read.md b/src/dbtest/src/doc/mda_detect_read.md
deleted file mode 100644
index ebf0b144..00000000
--- a/src/dbtest/src/doc/mda_detect_read.md
+++ /dev/null
@@ -1,36 +0,0 @@
-
-## mda_detect.py 代码功能
-主要用于检测数据库事务之间的并发关系,以及是否存在循环依赖。具体来说,代码通过解析输入的SQL语句,构建事务操作的有向图,检测是否存在循环依赖(即事务之间是否存在无法解决的并发冲突),并输出结果。
-### 变量命名含义
-
-| 名称 | 含义 | 备注 |
-| ------------ | ------------------------------------- | ------------------- |
-| data | 一次操作信息:type、txn_num、op_time、op_data | |
-| data_op_list | 数据操作列表 【数据key:【操作...】, 数据key:【操作...】】 | |
-| indegree | 下标:事务号;元素:依赖该事务的事务个数 | |
-| edge | 下标:事务号;元素:该事务的出边 | |
-| version_list | key:数据编号;value:数据的值的历史记录 | 下标是 value? |
-| total_num | 数据库操作总数 | 对于数据库全表扫描类的操作有关键作用。 |
-
-### 函数列表
-
-| 函数 | 功能 | 备注 |
-| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------- |
-| get_total | 获取一个测试文件中 Q0 部分插入的数据个数 | key 的个数 |
-| find_data | 特定位置提取一个数字 | |
-| set_finish_time | 在一个数据库操作语句执行完毕后更新相关的时间戳:finishedat
1. 所有事务中 begin_ts | end_ts 和 op_time 相等的进行替换
2. 所有操作中 op_time 和 op_time 相等的进行替换
3. version_list 保存操作历史值 op.value
4. op.value 保存 version_list 历史下标 | |
-| check_concurrency | 检查两个事务和是否并发 | 通过开始时间和结束时间来判断,默认开始时间之间已经比较过了吗? |
-| get_edge_type | 确定两个操作之间的边的类型,添加C 标志,跨事务操作,同时返回新的操作顺序 | data1 和 data2 是同一个 Key 的前后两个操作 |
-| build_graph | 建立一个有向图,这个图表示不同操作之间的并发关系 | 只会在同一组操作之间建立边 |
-| insert_edge | 具体的插边操作(check_concurrency 的前提下插入 get_edge_type 边):
不同事务之间有并发读写冲突; | data1 发生时间默认在data2 之前 |
-| init_record | 根据查询中的信息初始化版本列表中的记录。 | |
-| readVersion_record | 处理数据库查询操作 | 只是更改了: op.value |
-| read_record | 根据查询中的信息读取记录并更新数据操作。增加到 data_op_list 中。 | |
-| write_record | 根据查询和更新数据操作中的信息写入记录。增加到 data_op_list 中。 | |
-| delete_record | 根据查询中的信息删除记录并更新数据操作。增加到 data_op_list 中。 | |
-| insert_record | 根据查询中的信息插入记录并更新数据操作。 | |
-| end_record | 设置事务的结束时间戳。 | |
-| operation_record | 记录并且处理数据库操作。 | |
-| remove_unfinished_operation | 删除失败的语句以防止构建冗余边 | |
-| check_cycle | 在有向图中查找环 | |
-| dfs | 在有环的有向图中找环 | |
From 89d3f56a9e1fe5dc6f67bb6d8de16f2f6588ca68 Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Mon, 29 Jul 2024 07:24:04 +0000
Subject: [PATCH 12/16] comments restore, delete and add
---
src/dbtest/src/mda_detect.py | 377 ++++++++++++++++++++++++++++++++---
1 file changed, 353 insertions(+), 24 deletions(-)
diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 40d10c9f..9b410f51 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -39,6 +39,17 @@ def __init__(self):
self.end_ts = 99999999999999999999
self.isolation = "serializable"
+
+"""
+Print the graph edges after building the graph.
+
+Args:
+- edge (list): A list of Edge lists
+- txn (list): A list of Txn objects
+
+Returns:
+None
+"""
# print edge after build graph
def print_graph(edge,txn):
for i, edges in enumerate(edge):
@@ -49,6 +60,15 @@ def print_graph(edge,txn):
print(" {}".format(e))
+"""
+Print the contents of the data operation list.
+
+Args:
+- data_op_list (list): A list of Operation lists
+
+Returns:
+None
+"""
# print data_op_list
def print_data_op_list(data_op_list):
for k,list in enumerate(data_op_list):
@@ -57,6 +77,15 @@ def print_data_op_list(data_op_list):
for i, data in enumerate(list):
print("op:{}--{}-".format(data.op_type,data.txn_num))
+"""
+Find the total variable number.
+
+Args:
+- lines (list): A list of queries.
+
+Returns:
+int: The maximum variable number found in the queries.
+"""
# find total variable number
def get_total(lines):
num = 0
@@ -81,6 +110,17 @@ def get_total_txn(lines):
num = max(num, tmp)
return num
+
+"""
+Extract the data we need from a query.
+
+Args:
+- query (str): The input query string.
+- target (str): The target substring to search for.
+
+Returns:
+int: The extracted data value, or -1 if not found.
+"""
# extract the data we need in query
def find_data(query, target):
pos = query.find(target)
@@ -109,6 +149,20 @@ def find_isolation(query):
if query.find("serializable") != -1:
return "serializable"
+"""
+When a statement is executed, this function sets the end time, modifies the transaction list,
+and updates the version list as needed.
+
+Args:
+- op_time (int): The operation time of the statement.
+- data_op_list (list): A list of data operations.
+- query (str): The query string containing information about the statement execution.
+- txn (list): A list of transaction objects.
+- version_list (list): A list of version lists for data operations.
+
+Returns:
+None
+"""
# when a statement is executed, set the end time and modify the version list
def set_finish_time(op_time, data_op_list, query, txn, version_list):
# pos = query.find("finished at:")
@@ -145,6 +199,18 @@ def set_finish_time(op_time, data_op_list, query, txn, version_list):
version_list[i].append(op.value)
op.value = len(version_list[i]) - 1
+
+"""
+Check if two transactions are concurrent based on their start and end times.
+
+Args:
+- data1: Information about the first transaction.
+- data2: Information about the second transaction.
+- txn: A list of transaction objects.
+
+Returns:
+bool: True if the transactions are concurrent, False otherwise.
+"""
# if both transactions are running
# or the start time of the second transaction is less than the end time of the first transaction
# we think they are concurrent
@@ -157,12 +223,40 @@ def check_concurrency(data1, data2, txn):
return False
+"""
+Check if a specific edge exists between two transactions in the graph.
+
+Args:
+- edge (list): A list of lists, where each sublist contains edge objects representing the connections in the graph.
+- src_txn (int): The source transaction number, which the edge originates from.
+- src_type (str): The operation type (e.g., 'R', 'W') at the source of the edge.
+- tar_txn (int): The target transaction number, which the edge points to.
+- tar_type (str): The operation type (e.g., 'R', 'W') at the target of the edge.
+
+Returns:
+bool: True if the specified edge exists, False otherwise.
+"""
def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type):
for e in edge[src_txn]:
if e.out == tar_txn and e.type[0] == src_type and e.type[-1] == tar_type:
return True
return False
+
+"""
+Determine the type of edge between two operations based on their read or write versions.
+
+Args:
+- data1: Information about the first operation.
+- data2: Information about the second operation.
+- txn: A list of transaction objects.
+
+Returns:
+tuple: A tuple containing three values:
+ - A string indicating the edge type ('R', 'W', 'CR', 'CW').
+ - Information about the operation that comes first.
+ - Information about the operation that comes second.
+"""
# decide which operation comes first depending on the read or write version
# if later operation happened after the first txn commit time, edge type will add "C"
def get_edge_type(data1, data2, txn):
@@ -181,7 +275,22 @@ def get_edge_type(data1, data2, txn):
state = ""
return before.op_type + state + after.op_type, before, after
+"""
+Build a directed graph representing the concurrency relationships between operations.
+
+Args:
+- data_op_list: A list of lists, where each inner list contains information about operations for a specific transaction.
+- indegree: A list representing the in-degrees of each operation node in the graph.
+- edge: A list representing the edges (concurrency relationships) between operations.
+- txn: A list of transaction objects.
+
+This function constructs a directed graph where nodes represent operations, and edges represent concurrency relationships
+between operations. It iterates through the list of operations for each transaction and calls the 'insert_edge' function
+to create edges in the graph based on concurrency relationships.
+Returns:
+None
+"""
def build_graph(data_op_list, indegree, edge, txn):
for list1 in data_op_list:
for i, data in enumerate(list1):
@@ -189,7 +298,25 @@ def build_graph(data_op_list, indegree, edge, txn):
insert_edge(list1[j], data, indegree, edge, txn)
+"""
+Insert an edge into the directed graph representing concurrency relationships between operations.
+
+Args:
+- data1: An operation object representing the first operation.
+- data2: An operation object representing the second operation.
+- indegree: A list representing the in-degrees of each transaction in the graph.
+- edge: A list representing the edges (concurrency relationships) between operations for each transaction.
+- txn: A list of transaction objects.
+
+This function inserts an edge into the directed graph to represent the concurrency relationship between 'data1' and 'data2'.
+It first checks if the two operations are concurrent by calling the 'check_concurrency' function. If they are concurrent, it
+determines the edge type using the 'get_edge_type' function and adds the edge to the 'edge' list.
+The 'indegree' list is updated to reflect the in-degree of the target transaction node when an edge is inserted.
+
+Returns:
+None
+"""
def insert_edge(data1, data2, indegree, edge, txn):
if check_concurrency(data1, data2, txn):
edge_type, data1, data2 = get_edge_type(data1, data2, txn)
@@ -220,12 +347,42 @@ def insert_edge(data1, data2, indegree, edge, txn):
indegree[data2.txn_num] += 1
edge[data1.txn_num].append(Edge(edge_type, data2.txn_num,data1.op_time))
+"""
+Initialize a record in the version list based on the information in the query.
+
+Args:
+- query: A query string that contains information about a record.
+- version_list: A list of lists representing versioned records.
+
+This function initializes a record in the 'version_list' based on the information provided in the 'query'. It extracts the 'key'
+and 'value' of the record from the query using the 'find_data' function and appends the 'value' to the corresponding version list.
+
+Returns:
+None
+"""
def init_record(query, version_list):
key = find_data(query, "(")
value = find_data(query, ",")
version_list[key].append(value)
+"""
+Read the versioned record based on the information in the query.
+
+Args:
+- query (str): A query string that contains information about reading a versioned record.
+- op_time (int): The operation time of the read operation.
+- data_op_list (list): A list of lists representing data operations.
+- version_list (list): A list of lists representing versioned records.
+
+This function reads the versioned record specified in the 'query'. It extracts the 'key' and 'value' from the query, which are
+used to identify the record and version to read. The function checks if the specified version exists in the version list and
+updates the 'op.value' accordingly. If the version doesn't exist or if the read operation is not successful, an error message
+is returned.
+
+Returns:
+str: An error message indicating the result of the read operation. An empty string means the read was successful.
+"""
def readVersion_record(query, op_time, data_op_list, version_list):
error_message = ""
data = query.split(")")
@@ -272,6 +429,27 @@ def readVersion_record(query, op_time, data_op_list, version_list):
+"""
+Read records based on the information in the query and update data operations.
+
+Args:
+- op_time (int): The operation time of the read operation.
+- txn_num (int): The transaction number.
+- total_num (int): The total number of records.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of lists representing data operations.
+
+This function reads records specified in the query and updates the 'data_op_list' accordingly. It extracts information from
+the 'query' to determine which records to read and what type of operation to perform (read or predicate). The function also
+sets the 'begin_ts' of the transaction if it's not already set.
+
+The 'query' is analyzed to identify specific record keys or predicates and create corresponding 'Operation' objects in the
+'data_op_list'. Depending on the structure of the query, this function handles various cases, such as reading single records,
+handling predicates, and selecting all rows in a table.
+
+Returns:
+None
+"""
def read_record(op_time, txn_num, total_num, txn, data_op_list):
if txn[txn_num].begin_ts == -1:
txn[txn_num].begin_ts = op_time
@@ -300,6 +478,24 @@ def read_record(op_time, txn_num, total_num, txn, data_op_list):
data_op_list[i].append(Operation("R", txn_num, op_time, i))
+"""
+Write records based on the information in the query and update data operations.
+
+Args:
+- op_time (int): The operation time of the write operation.
+- txn_num (int): The transaction number.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of lists representing data operations.
+
+This function writes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the
+'query' to determine which records to write and what type of operation to perform (write). The function also sets the 'begin_ts'
+of the transaction if it's not already set.
+
+The 'query' is analyzed to identify specific record keys and values, and it creates corresponding 'Operation' objects in the 'data_op_list'.
+
+Returns:
+None
+"""
def write_record(op_time, txn_num, txn, data_op_list):
if txn[txn_num].begin_ts == -1:
txn[txn_num].begin_ts = op_time
@@ -327,6 +523,25 @@ def write_record(op_time, txn_num, txn, data_op_list):
for i in range(total_num+1):
data_op_list[i].append(Operation("W", txn_num, op_time, i))
+
+"""
+Delete records based on the information in the query and update data operations.
+
+Args:
+- op_time (int): The operation time of the delete operation.
+- txn_num (int): The transaction number.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of lists representing data operations.
+
+This function deletes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the
+'query' to determine which records to delete and what type of operation to perform (delete). The function also sets the 'begin_ts'
+of the transaction if it's not already set.
+
+The 'query' is analyzed to identify specific record keys, and it creates corresponding 'Operation' objects in the 'data_op_list'.
+
+Returns:
+None
+"""
def delete_record(op_time, txn_num, txn, data_op_list):
if txn[txn_num].begin_ts == -1:
txn[txn_num].begin_ts = op_time
@@ -352,6 +567,26 @@ def delete_record(op_time, txn_num, txn, data_op_list):
for i in range(total_num+1):
data_op_list[i].append(Operation("D", txn_num, op_time, i))
+
+"""
+Insert records based on the information in the query and update data operations.
+
+Args:
+- op_time (int): The operation time of the insert operation.
+- txn_num (int): The transaction number.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of lists representing data operations.
+
+This function inserts records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the
+'query' to determine which records to insert and what type of operation to perform (insert). The function also sets the 'begin_ts'
+of the transaction if it's not already set.
+
+The 'query' is analyzed to identify specific record keys and their corresponding values, and it creates corresponding 'Operation'
+objects in the 'data_op_list'.
+
+Returns:
+None
+"""
def insert_record(op_time, txn_num, txn, data_op_list):
if txn[txn_num].begin_ts == -1 and op_time != 0:
txn[txn_num].begin_ts = op_time
@@ -360,11 +595,40 @@ def insert_record(op_time, txn_num, txn, data_op_list):
data_op_list[key].append(Operation("I", txn_num, op_time, value))
+"""
+Set the end timestamp for a transaction.
+
+Args:
+- op_time (int): The operation time when the transaction ends.
+- txn_num (int): The transaction number.
+- txn (list): A list of transactions.
+
+This function sets the 'end_ts' attribute of a transaction specified by 'txn_num' to the given 'op_time'. It marks the end of the
+transaction's execution.
+
+Returns:
+None
+"""
def end_record(op_time, txn_num, txn):
txn[txn_num].end_ts = op_time
+"""
+Record and process database operations.
+
+Args:
+- total_num (int): The total number of database operations.
+- query (str): The SQL query representing a database operation.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of data operations.
+- version_list (list): A list of version information for data operations.
+This function records and processes database operations based on the provided SQL query. It updates the transaction list, data
+operation list, and version list accordingly. The 'total_num' parameter specifies the total number of database operations.
+
+Returns:
+str: An error message (if any), or an empty string if the operation is successful.
+"""
def operation_record(total_num, query, txn, data_op_list, version_list):
error_message = ""
op_time = find_data(query, "Q")
@@ -399,6 +663,18 @@ def operation_record(total_num, query, txn, data_op_list, version_list):
+"""
+Remove unfinished operations from the data operation list.
+
+Args:
+- data_op_list (list): A list of data operations.
+
+This function iterates through the data operation list and removes any unfinished operations based on their operation time.
+Unfinished operations are those with an operation time less than 10,000,000.
+
+Returns:
+None
+"""
# remove failed statements to prevent redundant edges from being built
def remove_unfinished_operation(data_op_list):
for list1 in data_op_list:
@@ -406,6 +682,20 @@ def remove_unfinished_operation(data_op_list):
if op.op_time < 10000000:
list1.pop(i)
+"""
+Check for cycles in a directed graph using topological sorting.
+
+Args:
+- edge (List[List[Edge]]): A list representing the directed edges in the graph.
+- indegree (List[int]): A list representing the in-degrees of nodes in the graph.
+- total (int): The total number of nodes in the graph.
+
+This function checks for cycles in a directed graph by performing topological sorting. It takes as input the directed edges (`edge`),
+in-degrees of nodes (`indegree`), and the total number of nodes in the graph (`total`).
+
+Returns:
+bool: True if a cycle is detected, False otherwise.
+"""
# toposort to determine whether there is a cycle
def check_cycle(edge, indegree, total):
q = Queue.Queue()
@@ -425,6 +715,25 @@ def check_cycle(edge, indegree, total):
return True
+"""
+Perform depth-first search (DFS) to find and print loops in a directed graph.
+
+Args:
+- result_folder (str): The path to the folder where the results will be saved.
+- ts_now (str): The current timestamp or identifier for result file naming.
+- now (int): The current node being visited.
+- type (str): The type of edge leading to the current node ('C' for commit, 'R' for read, 'W' for write, etc.).
+
+This function performs depth-first search (DFS) to find and print loops in a directed graph. It takes as input the result folder
+path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`), the current node being visited (`now`),
+and the type of edge leading to the current node (`type`).
+
+The function recursively explores the graph, tracking the visited nodes and edges to detect loops. When a loop is found, it is printed
+to a result file in the specified result folder.
+
+Note: This function assumes that global variables like 'visit', 'visit1', 'path', 'edge_type', and 'edge' are defined elsewhere.
+
+"""
# for loop graphs, print the loop
def dfs(result_folder, ts_now , e):
visit1[e.out] = 1
@@ -455,31 +764,21 @@ def dfs(result_folder, ts_now , e):
visit[e.out] = 0
-# # for loop graphs, print the loop
-# # Contains redundant edge information and the starting point of the ring is unreasonable
-# def dfs(result_folder, ts_now, now, type):
-# visit1[now] = 1
-# if visit[now] == 1: return
-# visit[now] = 1
-# path.append(now)
-# edge_type.append(type)
-# for v in edge[now]:
-# if visit[v.out] == 0:
-# dfs(result_folder, ts_now, v.out, v.type)
-# else:
-# path.append(v.out)
-# edge_type.append(v.type)
-# with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
-# for i in range(0, len(path)):
-# f.write(str(path[i]))
-# if i != len(path) - 1: f.write("->" + edge_type[i+1] + "->")
-# f.write("\n\n")
-# path.pop()
-# edge_type.pop()
-# path.pop()
-# edge_type.pop()
-# visit[now] = 0
+"""
+Print the paths in a directed graph to a result file.
+
+Args:
+- result_folder (str): The path to the folder where the results will be saved.
+- ts_now (str): The current timestamp or identifier for result file naming.
+- edge (list of lists): A list of lists representing the directed edges in the graph.
+
+This function prints the paths in a directed graph to a result file. It takes as input the result folder path (`result_folder`),
+the current timestamp or identifier for result file naming (`ts_now`), and a list of lists (`edge`) representing the directed edges
+in the graph.
+The function iterates through the edges and writes the paths to the result file in the specified result folder.
+
+"""
def print_path(result_folder, ts_now, edge):
with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
flag = 0
@@ -493,11 +792,41 @@ def print_path(result_folder, ts_now, edge):
f.write("\n\n")
+"""
+Output the result of cycle detection to a result file.
+
+Args:
+- file (str): The name of the file or input source being analyzed.
+- result_folder (str): The path to the folder where the results will be saved.
+- ts_now (str): The current timestamp or identifier for result file naming.
+- IsCyclic (str): A string indicating whether a cycle was detected.
+
+This function outputs the result of cycle detection to a result file. It takes as input the name of the file or input source being
+analyzed (`file`), the result folder path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`),
+and a string (`IsCyclic`) indicating whether a cycle was detected.
+
+The function writes the result, including the file name and the cyclic status, to the specified result file in the result folder.
+
+"""
def output_result(file, result_folder, ts_now, IsCyclic):
with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
f.write(file + ": " + IsCyclic + "\n")
+"""
+Print an error message to a result file.
+
+Args:
+- result_folder (str): The path to the folder where the results will be saved.
+- ts_now (str): The current timestamp or identifier for result file naming.
+- error_message (str): The error message to be printed.
+
+This function prints an error message to a result file. It takes as input the result folder path (`result_folder`), the current
+timestamp or identifier for result file naming (`ts_now`), and the error message (`error_message`) to be printed.
+
+The function appends the error message to the specified result file in the result folder and adds a newline for separation.
+
+"""
def print_error(result_folder, ts_now, error_message):
with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
f.write(error_message + "\n")
From 54930e6d1c1b371903a246d706916971e23e858b Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Mon, 29 Jul 2024 07:33:38 +0000
Subject: [PATCH 13/16] comments add
---
src/dbtest/src/mda_detect.py | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 9b410f51..864fc82c 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -99,6 +99,15 @@ def get_total(lines):
# break
return num
+"""
+Find the total number of transactions based on transaction identifiers in queries.
+
+Args:
+- lines (list): A list of query strings, each potentially containing transaction identifiers.
+
+Returns:
+int: The highest transaction number found in the queries.
+"""
# find total Txn number
def get_total_txn(lines):
num = 0
From da83039c5b0058481d3ebaa2944a952b69032d8e Mon Sep 17 00:00:00 2001
From: dinream <2534393465@qq.com>
Date: Fri, 25 Oct 2024 17:02:26 +0800
Subject: [PATCH 14/16] move mda_detect.py to mda_detect_mixed.py
---
src/dbtest/src/mda_detect_mixed.py | 923 +++++++++++++++++++++++++++++
1 file changed, 923 insertions(+)
create mode 100644 src/dbtest/src/mda_detect_mixed.py
diff --git a/src/dbtest/src/mda_detect_mixed.py b/src/dbtest/src/mda_detect_mixed.py
new file mode 100644
index 00000000..2a44bf71
--- /dev/null
+++ b/src/dbtest/src/mda_detect_mixed.py
@@ -0,0 +1,923 @@
+# -*- coding: utf-8 -*-
+
+# /*
+# * Tencent is pleased to support the open source community by making 3TS available.
+# *
+# * Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved. The below software
+# * in this distribution may have been modified by THL A29 Limited ("Tencent Modifications"). All
+# * Tencent Modifications are Copyright (C) THL A29 Limited.
+# *
+# * Author: xenitchen axingguchen tsunaouyang (xenitchen,axingguchen,tsunaouyang@tencent.com)
+# *
+# */
+
+
+import queue
+import os
+import time
+
+
+class Edge:
+ def __init__(self, type, out, begin_time):
+ self.type = type
+ self.out = out
+ self.time = begin_time
+ def __repr__(self):
+ return "Edge(begin_time={}, type={}, out={})".format(self.time, self.type, self.out)
+
+class Operation:
+ def __init__(self, op_type, txn_num, op_time, value):
+ self.op_type = op_type
+ self.txn_num = txn_num
+ self.op_time = op_time
+ self.value = value
+
+
+class Txn:
+ def __init__(self):
+ self.begin_ts = -1
+ self.end_ts = 99999999999999999999
+ self.isolation = "serializable"
+
+
+"""
+Print the graph edges after building the graph.
+
+Args:
+- edge (list): A list of Edge lists
+- txn (list): A list of Txn objects
+
+Returns:
+None
+"""
+# print edge after build graph
+def print_graph(edge,txn):
+ for i, edges in enumerate(edge):
+ if i == 0 or i == len(edge)-1:
+ continue
+ print("Transaction {}:-----{}-----".format(i,txn[i].isolation))
+ for e in edges:
+ print(" {}".format(e))
+
+
+"""
+Print the contents of the data operation list.
+
+Args:
+- data_op_list (list): A list of Operation lists
+
+Returns:
+None
+"""
+# print data_op_list
+def print_data_op_list(data_op_list):
+ for k,list in enumerate(data_op_list):
+ if k< len(data_op_list)-1:
+ print("\nk:{}---".format(k))
+ for i, data in enumerate(list):
+ print("op:{}--{}-".format(data.op_type,data.txn_num))
+
+"""
+Find the total variable number.
+
+Args:
+- lines (list): A list of queries.
+
+Returns:
+int: The maximum variable number found in the queries.
+"""
+# find total variable number
+def get_total(lines):
+ num = 0
+ for query in lines:
+ query = query.replace("\n", "")
+ query = query.replace(" ", "")
+ if query.find("INSERT") != -1: # query[0:2] == "Q0" and
+ tmp = find_data(query, "(")
+ num = max(num, tmp)
+ # elif query[0:2] == "Q1":
+ # break
+ return num
+
+"""
+Find the total number of transactions based on transaction identifiers in queries.
+
+Args:
+- lines (list): A list of query strings, each potentially containing transaction identifiers.
+
+Returns:
+int: The highest transaction number found in the queries.
+"""
+# find total Txn number
+def get_total_txn(lines):
+ num = 0
+ for query in lines:
+ query = query.replace("\n", "")
+ query = query.replace(" ", "")
+ if query[0:1] == "Q" and query.find("T") != -1:
+ tmp = find_data(query, "T")
+ num = max(num, tmp)
+ return num
+
+
+"""
+Extract the data we need from a query.
+
+Args:
+- query (str): The input query string.
+- target (str): The target substring to search for.
+
+Returns:
+int: The extracted data value, or -1 if not found.
+"""
+# extract the data we need in query
+def find_data(query, target):
+ pos = query.find(target)
+ if pos == -1:
+ return pos
+ pos += len(target)
+ data_value = ""
+ for i in range(pos, len(query)):
+ if query[i].isdigit():
+ data_value += query[i]
+ else:
+ break
+ if data_value == "":
+ return -1
+ data_value = int(data_value)
+ return data_value
+
+# extract the isolation from content
+def find_isolation(query):
+ if query.find("read-uncommitted") != -1:
+ return "read-uncommitted"
+ if query.find("read-committed") != -1:
+ return "read-committed"
+ if query.find("repeatable-read") != -1:
+ return "repeatable-read"
+ if query.find("serializable") != -1:
+ return "serializable"
+
+"""
+When a statement is executed, this function sets the end time, modifies the transaction list,
+and updates the version list as needed.
+
+Args:
+- op_time (int): The operation time of the statement.
+- data_op_list (list): A list of data operations.
+- query (str): The query string containing information about the statement execution.
+- txn (list): A list of transaction objects.
+- version_list (list): A list of version lists for data operations.
+
+Returns:
+None
+"""
+# when a statement is executed, set the end time and modify the version list
+def set_finish_time(op_time, data_op_list, query, txn, version_list):
+ # pos = query.find("finished at:")
+ # pos += len("finished at:")
+ # data_value = ""
+ # tmp, tmp1 = "", ""
+ # for i in range(pos, len(query)):
+ # if query[i].isdigit():
+ # tmp += query[i]
+ # else:
+ # for j in range(3 - len(tmp)):
+ # tmp1 += "0"
+ # tmp = tmp1 + tmp
+ # data_value += tmp
+ # tmp, tmp1 = "", ""
+ # data_value = int(data_value)
+ data_value = int(op_time)
+ for t in txn:
+ if t.begin_ts == op_time:
+ t.begin_ts = data_value
+ if t.end_ts == op_time:
+ t.end_ts = data_value
+ for i, list1 in enumerate(data_op_list):
+ for op in list1:
+ if op.op_time == op_time:
+ op.op_time = data_value
+ if op.op_type == "W":
+ version_list[i].append(op.value)
+ op.value = len(version_list[i]) - 1
+ elif op.op_type == "D":
+ version_list[i].append(-1)
+ op.value = len(version_list[i]) - 1
+ elif op.op_type == "I":
+ version_list[i].append(op.value)
+ op.value = len(version_list[i]) - 1
+
+
+"""
+Check if two transactions are concurrent based on their start and end times.
+
+Args:
+- data1: Information about the first transaction.
+- data2: Information about the second transaction.
+- txn: A list of transaction objects.
+
+Returns:
+bool: True if the transactions are concurrent, False otherwise.
+"""
+# if both transactions are running
+# or the start time of the second transaction is less than the end time of the first transaction
+# we think they are concurrent
+def check_concurrency(data1, data2, txn):
+ if txn[data2.txn_num].begin_ts < txn[data1.txn_num].end_ts:
+ return True
+ elif txn[data1.txn_num].begin_ts < txn[data2.txn_num].end_ts: # TODO maybe a bug: don't need
+ return True
+ else:
+ return False
+
+
+"""
+Check if a specific edge exists between two transactions in the graph.
+
+Args:
+- edge (list): A list of lists, where each sublist contains edge objects representing the connections in the graph.
+- src_txn (int): The source transaction number, which the edge originates from.
+- src_type (str): The operation type (e.g., 'R', 'W') at the source of the edge.
+- tar_txn (int): The target transaction number, which the edge points to.
+- tar_type (str): The operation type (e.g., 'R', 'W') at the target of the edge.
+
+Returns:
+bool: True if the specified edge exists, False otherwise.
+"""
+def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type):
+ for e in edge[src_txn]:
+ if e.out == tar_txn and e.type[0] == src_type and e.type[-1] == tar_type:
+ return True
+ return False
+
+
+"""
+Determine the type of edge between two operations based on their read or write versions.
+
+Args:
+- data1: Information about the first operation.
+- data2: Information about the second operation.
+- txn: A list of transaction objects.
+
+Returns:
+tuple: A tuple containing three values:
+ - A string indicating the edge type ('R', 'W', 'CR', 'CW').
+ - Information about the operation that comes first.
+ - Information about the operation that comes second.
+"""
+# decide which operation comes first depending on the read or write version
+# if later operation happened after the first txn commit time, edge type will add "C"
+def get_edge_type(data1, data2, txn):
+ if data1.op_time <= data2.op_time:
+ before, after = data1, data2
+ else:
+ before, after = data2, data1
+ # if data1.op_type == "D" or data2.op_type == "D":
+ # if data1.value < data2.value:
+ # before, after = data2, data1
+ # else:
+ # before, after = data1, data2
+ if after.op_time > txn[before.txn_num].end_ts:
+ state = "C"
+ else:
+ state = ""
+ return before.op_type + state + after.op_type, before, after
+
+"""
+Build a directed graph representing the concurrency relationships between operations.
+
+Args:
+- data_op_list: A list of lists, where each inner list contains information about operations for a specific transaction.
+- indegree: A list representing the in-degrees of each operation node in the graph.
+- edge: A list representing the edges (concurrency relationships) between operations.
+- txn: A list of transaction objects.
+
+This function constructs a directed graph where nodes represent operations, and edges represent concurrency relationships
+between operations. It iterates through the list of operations for each transaction and calls the 'insert_edge' function
+to create edges in the graph based on concurrency relationships.
+
+Returns:
+None
+"""
+def build_graph(data_op_list, indegree, edge, txn):
+ for list1 in data_op_list:
+ for i, data in enumerate(list1):
+ for j in range(0, i):
+ insert_edge(list1[j], data, indegree, edge, txn)
+
+
+"""
+Insert an edge into the directed graph representing concurrency relationships between operations.
+
+Args:
+- data1: An operation object representing the first operation.
+- data2: An operation object representing the second operation.
+- indegree: A list representing the in-degrees of each transaction in the graph.
+- edge: A list representing the edges (concurrency relationships) between operations for each transaction.
+- txn: A list of transaction objects.
+
+This function inserts an edge into the directed graph to represent the concurrency relationship between 'data1' and 'data2'.
+It first checks if the two operations are concurrent by calling the 'check_concurrency' function. If they are concurrent, it
+determines the edge type using the 'get_edge_type' function and adds the edge to the 'edge' list.
+
+The 'indegree' list is updated to reflect the in-degree of the target transaction node when an edge is inserted.
+
+Returns:
+None
+"""
+def insert_edge(data1, data2, indegree, edge, txn):
+ if check_concurrency(data1, data2, txn):
+ edge_type, data1, data2 = get_edge_type(data1, data2, txn)
+ if data1.txn_num == data2.txn_num or edge_type in ["RCR", "RR"]:
+ return
+ #* read-uncommitted: Dirty Write
+ # WI does not exist. If it does, there must be an equivalent edge of WD + DI
+ # II does not exist. If it does, there must be an equivalent edge of ID + DI
+ # DW is allowed to exist. When UPDATE, use the condition to query the data containing D
+ # DD does not exist. If it does, there must be an equivalent edge of DI + ID
+ if edge_type in ["WCW", "WW", "WCD", "WD", "ICW","IW", "ICD", "ID", "DCW", "DW", "DCI", "DI"]:
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
+ #* read-committed: Dirty Read
+ elif edge_type in ["WCR","WR"] and (txn[data2.txn_num].isolation == "read-committed" or txn[data2.txn_num].isolation == "repeatable-read" or txn[data2.txn_num].isolation == "serializable"):
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
+ #* repeatable-read: Unrepeatable Read
+ elif edge_type in ["RCW","RW"] and (txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable"):
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
+ #* serializable: Phantom Read
+ elif edge_type in ["RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable":
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
+ #* serializable: Phantom Read
+ elif edge_type in ["ICR","IR","DCR","DR"] and txn[data2.txn_num].isolation == "serializable":
+ indegree[data2.txn_num] += 1
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num,data1.op_time))
+
+"""
+Initialize a record in the version list based on the information in the query.
+
+Args:
+- query: A query string that contains information about a record.
+- version_list: A list of lists representing versioned records.
+
+This function initializes a record in the 'version_list' based on the information provided in the 'query'. It extracts the 'key'
+and 'value' of the record from the query using the 'find_data' function and appends the 'value' to the corresponding version list.
+
+Returns:
+None
+"""
+def init_record(query, version_list):
+ key = find_data(query, "(")
+ value = find_data(query, ",")
+ version_list[key].append(value)
+
+
+"""
+Read the versioned record based on the information in the query.
+
+Args:
+- query (str): A query string that contains information about reading a versioned record.
+- op_time (int): The operation time of the read operation.
+- data_op_list (list): A list of lists representing data operations.
+- version_list (list): A list of lists representing versioned records.
+
+This function reads the versioned record specified in the 'query'. It extracts the 'key' and 'value' from the query, which are
+used to identify the record and version to read. The function checks if the specified version exists in the version list and
+updates the 'op.value' accordingly. If the version doesn't exist or if the read operation is not successful, an error message
+is returned.
+
+Returns:
+str: An error message indicating the result of the read operation. An empty string means the read was successful.
+"""
+def readVersion_record(query, op_time, data_op_list, version_list):
+ error_message = ""
+ data = query.split(")")
+ if len(data) == 1:
+ for list1 in data_op_list:
+ for op in list1:
+ if op.op_time == op_time:
+ value = op.value
+ if len(version_list[value]) == 0:
+ op.value = -1
+ else:
+ if -1 not in version_list[value]:
+ error_message = "Value exists, but did not successully read"
+ return error_message
+ pos = version_list[value].index(-1)
+ op.value = pos
+ else:
+ for s in data:
+ key = find_data(s, "(")
+ value = find_data(s, ",")
+ for i, list1 in enumerate(data_op_list):
+ for op in list1:
+ if key == i and op.op_time == op_time:
+ value1 = op.value
+ if len(version_list[value1]) == 0:
+ op.value = -1
+ else:
+ if version_list[value1].count(value) == 0:
+ error_message = "Read version that does not exist"
+ return error_message
+ pos = version_list[value1].index(value)
+ op.value = pos
+
+ return error_message
+ # for i, list1 in enumerate(data_op_list):
+ # print(i)
+ # if list1:
+ # print("")
+ # print(list1[0].txn_num)
+ # print(list1[0].op_type)
+ # print(list1[0].op_time)
+ # print(list1[0].op_value)
+
+
+
+
+"""
+Read records based on the information in the query and update data operations.
+
+Args:
+- op_time (int): The operation time of the read operation.
+- txn_num (int): The transaction number.
+- total_num (int): The total number of records.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of lists representing data operations.
+
+This function reads records specified in the query and updates the 'data_op_list' accordingly. It extracts information from
+the 'query' to determine which records to read and what type of operation to perform (read or predicate). The function also
+sets the 'begin_ts' of the transaction if it's not already set.
+
+The 'query' is analyzed to identify specific record keys or predicates and create corresponding 'Operation' objects in the
+'data_op_list'. Depending on the structure of the query, this function handles various cases, such as reading single records,
+handling predicates, and selecting all rows in a table.
+
+Returns:
+None
+"""
+def read_record(op_time, txn_num, total_num, txn, data_op_list):
+ if txn[txn_num].begin_ts == -1:
+ txn[txn_num].begin_ts = op_time
+ # for some distributed cases which have 4 param, write part is same
+ if query.find("value1=") != -1:
+ op_data = find_data(query, "value1=")
+ data_op_list[op_data].append(Operation("R", txn_num, op_time, op_data))
+ # for normal cases
+ elif query.find("k=") != -1:
+ op_data = find_data(query, "k=")
+ data_op_list[op_data].append(Operation("R", txn_num, op_time, op_data))
+ # for predicate cases
+ elif query.find("k>") != -1:
+ left = find_data(query, "k>") + 1
+ right = find_data(query, "k<")
+ for i in range(left, right):
+ data_op_list[i].append(Operation("R", txn_num, op_time, i)) # P
+ elif query.find("value1>") != -1:
+ left = find_data(query, "value1>") + 1
+ right = find_data(query, "value1<")
+ for i in range(left, right):
+ data_op_list[i].append(Operation("R", txn_num, op_time, i)) # p
+ else:
+ # it means select all rows in table
+ for i in range(total_num+1):
+ data_op_list[i].append(Operation("R", txn_num, op_time, i))
+
+
+"""
+Write records based on the information in the query and update data operations.
+
+Args:
+- op_time (int): The operation time of the write operation.
+- txn_num (int): The transaction number.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of lists representing data operations.
+
+This function writes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the
+'query' to determine which records to write and what type of operation to perform (write). The function also sets the 'begin_ts'
+of the transaction if it's not already set.
+
+The 'query' is analyzed to identify specific record keys and values, and it creates corresponding 'Operation' objects in the 'data_op_list'.
+
+Returns:
+None
+"""
+def write_record(op_time, txn_num, txn, data_op_list):
+ if txn[txn_num].begin_ts == -1:
+ txn[txn_num].begin_ts = op_time
+ if query.find("value1=") != -1:
+ op_data = find_data(query, "value1=")
+ op_value = find_data(query, "value2=")
+ data_op_list[op_data].append(Operation("W", txn_num, op_time, op_value))
+ elif query.find("k=") != -1:
+ op_data = find_data(query, "k=")
+ op_value = find_data(query, "v=")
+ data_op_list[op_data].append(Operation("W", txn_num, op_time, op_value))
+ # for predicate cases
+ elif query.find("k>") != -1:
+ left = find_data(query, "k>") + 1
+ right = find_data(query, "k<")
+ for i in range(left, right):
+ data_op_list[i].append(Operation("W", txn_num, op_time, i)) # P
+ elif query.find("value1>") != -1:
+ left = find_data(query, "value1>") + 1
+ right = find_data(query, "value1<")
+ for i in range(left, right):
+ data_op_list[i].append(Operation("W", txn_num, op_time, i)) # p
+ else:
+ # it means select all rows in table
+ for i in range(total_num+1):
+ data_op_list[i].append(Operation("W", txn_num, op_time, i))
+
+
+"""
+Delete records based on the information in the query and update data operations.
+
+Args:
+- op_time (int): The operation time of the delete operation.
+- txn_num (int): The transaction number.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of lists representing data operations.
+
+This function deletes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the
+'query' to determine which records to delete and what type of operation to perform (delete). The function also sets the 'begin_ts'
+of the transaction if it's not already set.
+
+The 'query' is analyzed to identify specific record keys, and it creates corresponding 'Operation' objects in the 'data_op_list'.
+
+Returns:
+None
+"""
+def delete_record(op_time, txn_num, txn, data_op_list):
+ if txn[txn_num].begin_ts == -1:
+ txn[txn_num].begin_ts = op_time
+ if query.find("value1=") != -1:
+ op_data = find_data(query, "value1=")
+ data_op_list[op_data].append(Operation("D", txn_num, op_time, op_data))
+ elif query.find("k=") != -1:
+ op_data = find_data(query, "k=")
+ data_op_list[op_data].append(Operation("D", txn_num, op_time, op_data))
+ # for predicate cases
+ elif query.find("k>") != -1:
+ left = find_data(query, "k>") + 1
+ right = find_data(query, "k<")
+ for i in range(left, right):
+ data_op_list[i].append(Operation("D", txn_num, op_time, i)) # P
+ elif query.find("value1>") != -1:
+ left = find_data(query, "value1>") + 1
+ right = find_data(query, "value1<")
+ for i in range(left, right):
+ data_op_list[i].append(Operation("D", txn_num, op_time, i)) # p
+ else:
+ # it means select all rows in table
+ for i in range(total_num+1):
+ data_op_list[i].append(Operation("D", txn_num, op_time, i))
+
+
+"""
+Insert records based on the information in the query and update data operations.
+
+Args:
+- op_time (int): The operation time of the insert operation.
+- txn_num (int): The transaction number.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of lists representing data operations.
+
+This function inserts records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the
+'query' to determine which records to insert and what type of operation to perform (insert). The function also sets the 'begin_ts'
+of the transaction if it's not already set.
+
+The 'query' is analyzed to identify specific record keys and their corresponding values, and it creates corresponding 'Operation'
+objects in the 'data_op_list'.
+
+Returns:
+None
+"""
+def insert_record(op_time, txn_num, txn, data_op_list):
+ if txn[txn_num].begin_ts == -1 and op_time != 0:
+ txn[txn_num].begin_ts = op_time
+ key = find_data(query, "(")
+ value = find_data(query, ",")
+ data_op_list[key].append(Operation("I", txn_num, op_time, value))
+
+
+"""
+Set the end timestamp for a transaction.
+
+Args:
+- op_time (int): The operation time when the transaction ends.
+- txn_num (int): The transaction number.
+- txn (list): A list of transactions.
+
+This function sets the 'end_ts' attribute of a transaction specified by 'txn_num' to the given 'op_time'. It marks the end of the
+transaction's execution.
+
+Returns:
+None
+"""
+def end_record(op_time, txn_num, txn):
+ txn[txn_num].end_ts = op_time
+
+
+"""
+Record and process database operations.
+
+Args:
+- total_num (int): The total number of database operations.
+- query (str): The SQL query representing a database operation.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of data operations.
+- version_list (list): A list of version information for data operations.
+
+This function records and processes database operations based on the provided SQL query. It updates the transaction list, data
+operation list, and version list accordingly. The 'total_num' parameter specifies the total number of database operations.
+
+Returns:
+str: An error message (if any), or an empty string if the operation is successful.
+"""
+def operation_record(total_num, query, txn, data_op_list, version_list):
+ error_message = ""
+ op_time = find_data(query, "Q")
+ txn_num = find_data(query, "T")
+ # print("total_num:{}, query:{},optime: {}, txn_num: {}\n".format(total_num,query, op_time, txn_num))
+ if op_time == 0 and query.find("INSERT") != -1:
+ init_record(query, version_list)
+ return error_message
+ if query.find("returnresult") != -1: #! 1"returnresult" maybe don't exist
+ error_message = readVersion_record(query, op_time, data_op_list, version_list)
+ return error_message
+ if query.find("finished") != -1: #! "finished" maybe don't exist
+ set_finish_time(op_time, data_op_list, query, txn, version_list)
+ return error_message
+ if op_time == -1 or txn_num == -1:
+ return error_message
+ if query.find("BEGIN") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.:
+ txn[txn_num].isolation = find_isolation(query)
+ elif query.find("SELECT") != -1:
+ read_record(op_time, txn_num, total_num, txn, data_op_list)
+ elif query.find("UPDATE") != -1:
+ write_record(op_time, txn_num, txn, data_op_list)
+ elif query.find("DELETE") != -1:
+ delete_record(op_time, txn_num, txn, data_op_list)
+ elif query.find("INSERT") != -1: #! assume existing data will not be inserted ("Rollback")
+ insert_record(op_time, txn_num, txn, data_op_list)
+ elif query.find("COMMIT") != -1:
+ if op_time != 0:
+ end_record(op_time, txn_num, txn)
+ set_finish_time(op_time, data_op_list, query, txn, version_list)
+ return error_message
+
+
+
+"""
+Remove unfinished operations from the data operation list.
+
+Args:
+- data_op_list (list): A list of data operations.
+
+This function iterates through the data operation list and removes any unfinished operations based on their operation time.
+Unfinished operations are those with an operation time less than 10,000,000.
+
+Returns:
+None
+"""
+# remove failed statements to prevent redundant edges from being built
+def remove_unfinished_operation(data_op_list):
+ for list1 in data_op_list:
+ for i, op in enumerate(list1):
+ if op.op_time < 10000000:
+ list1.pop(i)
+
+"""
+Check for cycles in a directed graph using topological sorting.
+
+Args:
+- edge (List[List[Edge]]): A list representing the directed edges in the graph.
+- indegree (List[int]): A list representing the in-degrees of nodes in the graph.
+- total (int): The total number of nodes in the graph.
+
+This function checks for cycles in a directed graph by performing topological sorting. It takes as input the directed edges (`edge`),
+in-degrees of nodes (`indegree`), and the total number of nodes in the graph (`total`).
+
+Returns:
+bool: True if a cycle is detected, False otherwise.
+"""
+# toposort to determine whether there is a cycle
+def check_cycle(edge, indegree, total):
+ q = queue.Queue()
+ for i, degree in enumerate(indegree):
+ if degree == 0: q.put(i)
+ ans = []
+ while not q.empty():
+ now = q.get()
+ ans.append(now)
+ for val in edge[now]:
+ next_node = val.out
+ indegree[next_node] -= 1
+ if indegree[next_node] == 0:
+ q.put(next_node)
+ if len(ans) == total:
+ return False
+ return True
+
+
+"""
+Perform depth-first search (DFS) to find and print loops in a directed graph.
+
+Args:
+- result_folder (str): The path to the folder where the results will be saved.
+- ts_now (str): The current timestamp or identifier for result file naming.
+- now (int): The current node being visited.
+- type (str): The type of edge leading to the current node ('C' for commit, 'R' for read, 'W' for write, etc.).
+
+This function performs depth-first search (DFS) to find and print loops in a directed graph. It takes as input the result folder
+path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`), the current node being visited (`now`),
+and the type of edge leading to the current node (`type`).
+
+The function recursively explores the graph, tracking the visited nodes and edges to detect loops. When a loop is found, it is printed
+to a result file in the specified result folder.
+
+Note: This function assumes that global variables like 'visit', 'visit1', 'path', 'edge_type', and 'edge' are defined elsewhere.
+
+"""
+# for loop graphs, print the loop
+def dfs(result_folder, ts_now , e):
+ visit1[e.out] = 1
+ if visit[e.out] == 1: return
+ visit[e.out] = 1
+ path.append(e)
+ for v in edge[e.out]:
+ if visit[v.out] == 0:
+ dfs(result_folder, ts_now, v)
+ else:
+ path.append(v)
+ with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
+ content = ""
+ list_loop = []
+ for i in range(len(path) - 1, -1, -1):
+ if i != len(path) - 1 and path[i].out == path[len(path) - 1].out:
+ break
+ index = 0
+ while(index < len(list_loop) and path[list_loop[index]].time < path[i].time):
+ index += 1
+ list_loop.insert(index,i)
+ for idx in list_loop:
+ content = content + "->" + path[idx].type + "->" + str(path[idx].out)
+ content = str(path[list_loop[-1]].out) + content + "\n\n"
+ f.write(content)
+ path.pop()
+ path.pop()
+ visit[e.out] = 0
+
+
+"""
+Print the paths in a directed graph to a result file.
+
+Args:
+- result_folder (str): The path to the folder where the results will be saved.
+- ts_now (str): The current timestamp or identifier for result file naming.
+- edge (list of lists): A list of lists representing the directed edges in the graph.
+
+This function prints the paths in a directed graph to a result file. It takes as input the result folder path (`result_folder`),
+the current timestamp or identifier for result file naming (`ts_now`), and a list of lists (`edge`) representing the directed edges
+in the graph.
+
+The function iterates through the edges and writes the paths to the result file in the specified result folder.
+
+"""
+def print_path(result_folder, ts_now, edge):
+ with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
+ flag = 0
+ for i in range(len(edge)):
+ for v in edge[i]:
+ if flag == 0:
+ flag = 1
+ else:
+ f.write(", ")
+ f.write(str(i) + "->" + v.type + "->" + str(v.out))
+ f.write("\n\n")
+
+
+"""
+Output the result of cycle detection to a result file.
+
+Args:
+- file (str): The name of the file or input source being analyzed.
+- result_folder (str): The path to the folder where the results will be saved.
+- ts_now (str): The current timestamp or identifier for result file naming.
+- IsCyclic (str): A string indicating whether a cycle was detected.
+
+This function outputs the result of cycle detection to a result file. It takes as input the name of the file or input source being
+analyzed (`file`), the result folder path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`),
+and a string (`IsCyclic`) indicating whether a cycle was detected.
+
+The function writes the result, including the file name and the cyclic status, to the specified result file in the result folder.
+
+"""
+def output_result(file, result_folder, ts_now, IsCyclic):
+ with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
+ f.write(file + ": " + IsCyclic + "\n")
+
+
+"""
+Print an error message to a result file.
+
+Args:
+- result_folder (str): The path to the folder where the results will be saved.
+- ts_now (str): The current timestamp or identifier for result file naming.
+- error_message (str): The error message to be printed.
+
+This function prints an error message to a result file. It takes as input the result folder path (`result_folder`), the current
+timestamp or identifier for result file naming (`ts_now`), and the error message (`error_message`) to be printed.
+
+The function appends the error message to the specified result file in the result folder and adds a newline for separation.
+
+"""
+def print_error(result_folder, ts_now, error_message):
+ with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
+ f.write(error_message + "\n")
+ f.write("\n\n")
+
+
+
+
+
+#! ------Some assumption------
+# The modifications of transactions at any isolation level are mutually visible, which is equivalent to a single storage, without read-write buffer
+# There are statements to set the isolation level of each transaction in the input file, after "BEGIN"
+ # BEGIN T1 set_isolation=repeatable-read
+ # BEGIN T2 set_isolation=serializable
+ # BEGIN T3 set_isolation=read-uncommitted
+ # BEGIN T4 set_isolation=read-committed
+# Assume that the inserted data key is in ascending order from 0
+
+run_result_folder = "pg/mda_detect_test"
+result_folder = "check_result/" + run_result_folder
+do_test_list = "mda_detect_test_list.txt"
+#ts_now = "_2param_3txn_insert"
+ts_now = time.strftime("%Y%m%d_%H%M%S", time.localtime())
+if not os.path.exists(result_folder):
+ os.makedirs(result_folder)
+
+with open(do_test_list, "r") as f:
+ files = f.readlines()
+for file in files:
+ file = file.replace("\n", "")
+ file = file.replace(" ", "")
+ if file == "":
+ continue
+ if file[0] == "#":
+ continue
+ with open(run_result_folder + "/" + file + ".txt", "r") as f:
+ lines = f.readlines()
+
+ total_num = get_total(lines) # total number of variables
+ total_num_txn = get_total_txn(lines) # total number of txn
+ txn = [Txn() for i in range(total_num_txn + 2)] # total num of transaction
+ data_op_list = [[] for i in range(total_num + 2)] # record every operation that occurs on the variable
+ edge = [[] for i in range(total_num_txn + 2)] # all edges from the current point
+ indegree = [0] * (total_num_txn + 2) # in-degree of each point
+ visit = [0] * (total_num_txn + 2) # in dfs, whether the current point has been visited
+ visit1 = [0] * (total_num_txn + 2) # we will only use unvisited points as the starting point of the dfs
+ path = [] # points in cycle
+ edge_type = [] # edge type of the cycle
+ version_list = [[] for i in range(total_num + 2)]
+ go_end = False # if test result is "Rollback" or "Timeout", we will don't check
+
+ error_message = ""
+ for query in lines:
+ query = query.replace("\n", "")
+ query = query.replace(" ", "")
+ if query.find("Rollback") != -1 or query.find("Timeout") != -1:
+ go_end = True
+ # print("total_num:{}, total_num_txn:{}, query:{},ts_now: {}, file: {}\n".format(total_num,total_num_txn,query,ts_now,run_result_folder + "/" + file + ".txt"))
+ error_message = operation_record(total_num, query, txn, data_op_list, version_list)
+ if error_message != "":
+ break
+
+ if error_message != "":
+ output_result(file, result_folder, ts_now, "Error")
+ print_error(result_folder, ts_now, error_message)
+ continue
+
+ cycle = False
+ # remove_unfinished_operation(data_op_list) 动态测试中默认所有的执行时间 Qi 都没有 finish 字段
+ build_graph(data_op_list, indegree, edge, txn)
+ print("--------file:{}--------".format(file))
+ print_graph(edge,txn)
+ # print_data_op_list(data_op_list)
+ if not go_end:
+ cycle = check_cycle(edge, indegree, total_num_txn+2)
+ if cycle:
+ output_result(file, result_folder, ts_now, "Cyclic")
+ for i in range(total_num_txn + 2):
+ if visit1[i] == 0:
+ # dfs(result_folder, ts_now, i, "null")
+ dfs(result_folder, ts_now, Edge("null",i,-1))
+ else:
+ output_result(file, result_folder, ts_now, "Avoid")
+ print_path(result_folder, ts_now, edge)
+ print("---------------------------------\n")
\ No newline at end of file
From 98bf22708551c346a6e28510dbd261fe147900be Mon Sep 17 00:00:00 2001
From: dinream <2534393465@qq.com>
Date: Fri, 25 Oct 2024 17:03:58 +0800
Subject: [PATCH 15/16] Restore mda_detect.py version to 965b2be
---
src/dbtest/src/mda_detect.py | 304 ++++++++---------------------------
1 file changed, 68 insertions(+), 236 deletions(-)
diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 864fc82c..b97f5ef4 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
# /*
# * Tencent is pleased to support the open source community by making 3TS available.
# *
@@ -18,12 +16,10 @@
class Edge:
- def __init__(self, type, out, begin_time):
+ def __init__(self, type, out):
self.type = type
self.out = out
- self.time = begin_time
- def __repr__(self):
- return "Edge(begin_time={}, type={}, out={})".format(self.time, self.type, self.out)
+
class Operation:
def __init__(self, op_type, txn_num, op_time, value):
@@ -37,45 +33,6 @@ class Txn:
def __init__(self):
self.begin_ts = -1
self.end_ts = 99999999999999999999
- self.isolation = "serializable"
-
-
-"""
-Print the graph edges after building the graph.
-
-Args:
-- edge (list): A list of Edge lists
-- txn (list): A list of Txn objects
-
-Returns:
-None
-"""
-# print edge after build graph
-def print_graph(edge,txn):
- for i, edges in enumerate(edge):
- if i == 0 or i == len(edge)-1:
- continue
- print("Transaction {}:-----{}-----".format(i,txn[i].isolation))
- for e in edges:
- print(" {}".format(e))
-
-
-"""
-Print the contents of the data operation list.
-
-Args:
-- data_op_list (list): A list of Operation lists
-
-Returns:
-None
-"""
-# print data_op_list
-def print_data_op_list(data_op_list):
- for k,list in enumerate(data_op_list):
- if k< len(data_op_list)-1:
- print("\nk:{}---".format(k))
- for i, data in enumerate(list):
- print("op:{}--{}-".format(data.op_type,data.txn_num))
"""
Find the total variable number.
@@ -92,31 +49,11 @@ def get_total(lines):
for query in lines:
query = query.replace("\n", "")
query = query.replace(" ", "")
- if query.find("INSERT") != -1: # query[0:2] == "Q0" and
+ if query[0:2] == "Q0" and query.find("INSERT") != -1:
tmp = find_data(query, "(")
num = max(num, tmp)
- # elif query[0:2] == "Q1":
- # break
- return num
-
-"""
-Find the total number of transactions based on transaction identifiers in queries.
-
-Args:
-- lines (list): A list of query strings, each potentially containing transaction identifiers.
-
-Returns:
-int: The highest transaction number found in the queries.
-"""
-# find total Txn number
-def get_total_txn(lines):
- num = 0
- for query in lines:
- query = query.replace("\n", "")
- query = query.replace(" ", "")
- if query[0:1] == "Q" and query.find("T") != -1:
- tmp = find_data(query, "T")
- num = max(num, tmp)
+ elif query[0:2] == "Q1":
+ break
return num
@@ -147,16 +84,6 @@ def find_data(query, target):
data_value = int(data_value)
return data_value
-# extract the isolation from content
-def find_isolation(query):
- if query.find("read-uncommitted") != -1:
- return "read-uncommitted"
- if query.find("read-committed") != -1:
- return "read-committed"
- if query.find("repeatable-read") != -1:
- return "repeatable-read"
- if query.find("serializable") != -1:
- return "serializable"
"""
When a statement is executed, this function sets the end time, modifies the transaction list,
@@ -174,21 +101,20 @@ def find_isolation(query):
"""
# when a statement is executed, set the end time and modify the version list
def set_finish_time(op_time, data_op_list, query, txn, version_list):
- # pos = query.find("finished at:")
- # pos += len("finished at:")
- # data_value = ""
- # tmp, tmp1 = "", ""
- # for i in range(pos, len(query)):
- # if query[i].isdigit():
- # tmp += query[i]
- # else:
- # for j in range(3 - len(tmp)):
- # tmp1 += "0"
- # tmp = tmp1 + tmp
- # data_value += tmp
- # tmp, tmp1 = "", ""
- # data_value = int(data_value)
- data_value = int(op_time)
+ pos = query.find("finishedat:")
+ pos += len("finishedat:")
+ data_value = ""
+ tmp, tmp1 = "", ""
+ for i in range(pos, len(query)):
+ if query[i].isdigit():
+ tmp += query[i]
+ else:
+ for j in range(3 - len(tmp)):
+ tmp1 += "0"
+ tmp = tmp1 + tmp
+ data_value += tmp
+ tmp, tmp1 = "", ""
+ data_value = int(data_value)
for t in txn:
if t.begin_ts == op_time:
t.begin_ts = data_value
@@ -226,32 +152,12 @@ def set_finish_time(op_time, data_op_list, query, txn, version_list):
def check_concurrency(data1, data2, txn):
if txn[data2.txn_num].begin_ts < txn[data1.txn_num].end_ts:
return True
- elif txn[data1.txn_num].begin_ts < txn[data2.txn_num].end_ts: # TODO maybe a bug: don't need
+ elif txn[data1.txn_num].begin_ts < txn[data2.txn_num].end_ts:
return True
else:
return False
-"""
-Check if a specific edge exists between two transactions in the graph.
-
-Args:
-- edge (list): A list of lists, where each sublist contains edge objects representing the connections in the graph.
-- src_txn (int): The source transaction number, which the edge originates from.
-- src_type (str): The operation type (e.g., 'R', 'W') at the source of the edge.
-- tar_txn (int): The target transaction number, which the edge points to.
-- tar_type (str): The operation type (e.g., 'R', 'W') at the target of the edge.
-
-Returns:
-bool: True if the specified edge exists, False otherwise.
-"""
-def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type):
- for e in edge[src_txn]:
- if e.out == tar_txn and e.type[0] == src_type and e.type[-1] == tar_type:
- return True
- return False
-
-
"""
Determine the type of edge between two operations based on their read or write versions.
@@ -269,7 +175,7 @@ def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type):
# decide which operation comes first depending on the read or write version
# if later operation happened after the first txn commit time, edge type will add "C"
def get_edge_type(data1, data2, txn):
- if data1.op_time <= data2.op_time:
+ if data1.value <= data2.value:
before, after = data1, data2
else:
before, after = data2, data1
@@ -278,12 +184,13 @@ def get_edge_type(data1, data2, txn):
# before, after = data2, data1
# else:
# before, after = data1, data2
- if after.op_time > txn[before.txn_num].end_ts:
+ if data2.op_time > txn[data1.txn_num].end_ts:
state = "C"
else:
state = ""
return before.op_type + state + after.op_type, before, after
+
"""
Build a directed graph representing the concurrency relationships between operations.
@@ -329,32 +236,10 @@ def build_graph(data_op_list, indegree, edge, txn):
def insert_edge(data1, data2, indegree, edge, txn):
if check_concurrency(data1, data2, txn):
edge_type, data1, data2 = get_edge_type(data1, data2, txn)
- if data1.txn_num == data2.txn_num or edge_type in ["RCR", "RR"]:
- return
- #* read-uncommitted: Dirty Write
- # WI does not exist. If it does, there must be an equivalent edge of WD + DI
- # II does not exist. If it does, there must be an equivalent edge of ID + DI
- # DW is allowed to exist. When UPDATE, use the condition to query the data containing D
- # DD does not exist. If it does, there must be an equivalent edge of DI + ID
- if edge_type in ["WCW", "WW", "WCD", "WD", "ICW","IW", "ICD", "ID", "DCW", "DW", "DCI", "DI"]:
+ if edge_type != "RR" and edge_type != "RCR" and data1.txn_num != data2.txn_num:
indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
- #* read-committed: Dirty Read
- elif edge_type in ["WCR","WR"] and (txn[data2.txn_num].isolation == "read-committed" or txn[data2.txn_num].isolation == "repeatable-read" or txn[data2.txn_num].isolation == "serializable"):
- indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
- #* repeatable-read: Unrepeatable Read
- elif edge_type in ["RCW","RW"] and (txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable"):
- indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
- #* serializable: Phantom Read
- elif edge_type in ["RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable":
- indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
- #* serializable: Phantom Read
- elif edge_type in ["ICR","IR","DCR","DR"] and txn[data2.txn_num].isolation == "serializable":
- indegree[data2.txn_num] += 1
- edge[data1.txn_num].append(Edge(edge_type, data2.txn_num,data1.op_time))
+ edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+
"""
Initialize a record in the version list based on the information in the query.
@@ -475,15 +360,15 @@ def read_record(op_time, txn_num, total_num, txn, data_op_list):
left = find_data(query, "k>") + 1
right = find_data(query, "k<")
for i in range(left, right):
- data_op_list[i].append(Operation("R", txn_num, op_time, i)) # P
+ data_op_list[i].append(Operation("P", txn_num, op_time, i))
elif query.find("value1>") != -1:
left = find_data(query, "value1>") + 1
right = find_data(query, "value1<")
for i in range(left, right):
- data_op_list[i].append(Operation("R", txn_num, op_time, i)) # p
+ data_op_list[i].append(Operation("P", txn_num, op_time, i))
else:
# it means select all rows in table
- for i in range(total_num+1):
+ for i in range(total_num):
data_op_list[i].append(Operation("R", txn_num, op_time, i))
@@ -516,21 +401,6 @@ def write_record(op_time, txn_num, txn, data_op_list):
op_data = find_data(query, "k=")
op_value = find_data(query, "v=")
data_op_list[op_data].append(Operation("W", txn_num, op_time, op_value))
- # for predicate cases
- elif query.find("k>") != -1:
- left = find_data(query, "k>") + 1
- right = find_data(query, "k<")
- for i in range(left, right):
- data_op_list[i].append(Operation("W", txn_num, op_time, i)) # P
- elif query.find("value1>") != -1:
- left = find_data(query, "value1>") + 1
- right = find_data(query, "value1<")
- for i in range(left, right):
- data_op_list[i].append(Operation("W", txn_num, op_time, i)) # p
- else:
- # it means select all rows in table
- for i in range(total_num+1):
- data_op_list[i].append(Operation("W", txn_num, op_time, i))
"""
@@ -560,21 +430,6 @@ def delete_record(op_time, txn_num, txn, data_op_list):
elif query.find("k=") != -1:
op_data = find_data(query, "k=")
data_op_list[op_data].append(Operation("D", txn_num, op_time, op_data))
- # for predicate cases
- elif query.find("k>") != -1:
- left = find_data(query, "k>") + 1
- right = find_data(query, "k<")
- for i in range(left, right):
- data_op_list[i].append(Operation("D", txn_num, op_time, i)) # P
- elif query.find("value1>") != -1:
- left = find_data(query, "value1>") + 1
- right = find_data(query, "value1<")
- for i in range(left, right):
- data_op_list[i].append(Operation("D", txn_num, op_time, i)) # p
- else:
- # it means select all rows in table
- for i in range(total_num+1):
- data_op_list[i].append(Operation("D", txn_num, op_time, i))
"""
@@ -642,32 +497,33 @@ def operation_record(total_num, query, txn, data_op_list, version_list):
error_message = ""
op_time = find_data(query, "Q")
txn_num = find_data(query, "T")
- # print("total_num:{}, query:{},optime: {}, txn_num: {}\n".format(total_num,query, op_time, txn_num))
if op_time == 0 and query.find("INSERT") != -1:
init_record(query, version_list)
return error_message
- if query.find("returnresult") != -1: #! 1"returnresult" maybe don't exist
+ if query.find("returnresult") != -1:
error_message = readVersion_record(query, op_time, data_op_list, version_list)
return error_message
- if query.find("finished") != -1: #! "finished" maybe don't exist
+ if query.find("finished") != -1:
set_finish_time(op_time, data_op_list, query, txn, version_list)
return error_message
if op_time == -1 or txn_num == -1:
return error_message
- if query.find("BEGIN") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.:
- txn[txn_num].isolation = find_isolation(query)
- elif query.find("SELECT") != -1:
+ if query.find("SELECT") != -1:
read_record(op_time, txn_num, total_num, txn, data_op_list)
+ return error_message
elif query.find("UPDATE") != -1:
write_record(op_time, txn_num, txn, data_op_list)
- elif query.find("DELETE") != -1:
+ return error_message
+ elif query.find("DELETE") != -1:
delete_record(op_time, txn_num, txn, data_op_list)
- elif query.find("INSERT") != -1: #! assume existing data will not be inserted ("Rollback")
+ return error_message
+ elif query.find("INSERT") != -1:
insert_record(op_time, txn_num, txn, data_op_list)
+ return error_message
elif query.find("COMMIT") != -1:
if op_time != 0:
end_record(op_time, txn_num, txn)
- set_finish_time(op_time, data_op_list, query, txn, version_list)
+ return error_message
return error_message
@@ -744,33 +600,28 @@ def check_cycle(edge, indegree, total):
"""
# for loop graphs, print the loop
-def dfs(result_folder, ts_now , e):
- visit1[e.out] = 1
- if visit[e.out] == 1: return
- visit[e.out] = 1
- path.append(e)
- for v in edge[e.out]:
+def dfs(result_folder, ts_now, now, type):
+ visit1[now] = 1
+ if visit[now] == 1: return
+ visit[now] = 1
+ path.append(now)
+ edge_type.append(type)
+ for v in edge[now]:
if visit[v.out] == 0:
- dfs(result_folder, ts_now, v)
+ dfs(result_folder, ts_now, v.out, v.type)
else:
- path.append(v)
+ path.append(v.out)
+ edge_type.append(v.type)
with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
- content = ""
- list_loop = []
- for i in range(len(path) - 1, -1, -1):
- if i != len(path) - 1 and path[i].out == path[len(path) - 1].out:
- break
- index = 0
- while(index < len(list_loop) and path[list_loop[index]].time < path[i].time):
- index += 1
- list_loop.insert(index,i)
- for idx in list_loop:
- content = content + "->" + path[idx].type + "->" + str(path[idx].out)
- content = str(path[list_loop[-1]].out) + content + "\n\n"
- f.write(content)
+ for i in range(0, len(path)):
+ f.write(str(path[i]))
+ if i != len(path) - 1: f.write("->" + edge_type[i+1] + "->")
+ f.write("\n\n")
path.pop()
+ edge_type.pop()
path.pop()
- visit[e.out] = 0
+ edge_type.pop()
+ visit[now] = 0
"""
@@ -842,21 +693,9 @@ def print_error(result_folder, ts_now, error_message):
f.write("\n\n")
-
-
-
-#! ------Some assumption------
-# The modifications of transactions at any isolation level are mutually visible, which is equivalent to a single storage, without read-write buffer
-# There are statements to set the isolation level of each transaction in the input file, after "BEGIN"
- # BEGIN T1 set_isolation=repeatable-read
- # BEGIN T2 set_isolation=serializable
- # BEGIN T3 set_isolation=read-uncommitted
- # BEGIN T4 set_isolation=read-committed
-# Assume that the inserted data key is in ascending order from 0
-
-run_result_folder = "pg/mda_detect_test"
+run_result_folder = "pg/serializable"
result_folder = "check_result/" + run_result_folder
-do_test_list = "mda_detect_test_list.txt"
+do_test_list = "do_test_list.txt"
#ts_now = "_2param_3txn_insert"
ts_now = time.strftime("%Y%m%d_%H%M%S", time.localtime())
if not os.path.exists(result_folder):
@@ -875,13 +714,12 @@ def print_error(result_folder, ts_now, error_message):
lines = f.readlines()
total_num = get_total(lines) # total number of variables
- total_num_txn = get_total_txn(lines) # total number of txn
- txn = [Txn() for i in range(total_num_txn + 2)] # total num of transaction
+ txn = [Txn() for i in range(total_num + 2)] # total num of transaction
data_op_list = [[] for i in range(total_num + 2)] # record every operation that occurs on the variable
- edge = [[] for i in range(total_num_txn + 2)] # all edges from the current point
- indegree = [0] * (total_num_txn + 2) # in-degree of each point
- visit = [0] * (total_num_txn + 2) # in dfs, whether the current point has been visited
- visit1 = [0] * (total_num_txn + 2) # we will only use unvisited points as the starting point of the dfs
+ edge = [[] for i in range(total_num + 2)] # all edges from the current point
+ indegree = [0] * (total_num + 2) # in-degree of each point
+ visit = [0] * (total_num + 2) # in dfs, whether the current point has been visited
+ visit1 = [0] * (total_num + 2) # we will only use unvisited points as the starting point of the dfs
path = [] # points in cycle
edge_type = [] # edge type of the cycle
version_list = [[] for i in range(total_num + 2)]
@@ -893,7 +731,6 @@ def print_error(result_folder, ts_now, error_message):
query = query.replace(" ", "")
if query.find("Rollback") != -1 or query.find("Timeout") != -1:
go_end = True
- # print("total_num:{}, total_num_txn:{}, query:{},ts_now: {}, file: {}\n".format(total_num,total_num_txn,query,ts_now,run_result_folder + "/" + file + ".txt"))
error_message = operation_record(total_num, query, txn, data_op_list, version_list)
if error_message != "":
break
@@ -904,20 +741,15 @@ def print_error(result_folder, ts_now, error_message):
continue
cycle = False
- # remove_unfinished_operation(data_op_list) 动态测试中默认所有的执行时间 Qi 都没有 finish 字段
+ remove_unfinished_operation(data_op_list)
build_graph(data_op_list, indegree, edge, txn)
- print("--------file:{}--------".format(file))
- print_graph(edge,txn)
- # print_data_op_list(data_op_list)
if not go_end:
- cycle = check_cycle(edge, indegree, total_num_txn+2)
+ cycle = check_cycle(edge, indegree, total_num + 2)
if cycle:
output_result(file, result_folder, ts_now, "Cyclic")
- for i in range(total_num_txn + 2):
+ for i in range(total_num + 2):
if visit1[i] == 0:
- # dfs(result_folder, ts_now, i, "null")
- dfs(result_folder, ts_now, Edge("null",i,-1))
+ dfs(result_folder, ts_now, i, "null")
else:
output_result(file, result_folder, ts_now, "Avoid")
- print_path(result_folder, ts_now, edge)
- print("---------------------------------\n")
\ No newline at end of file
+ print_path(result_folder, ts_now, edge)
\ No newline at end of file
From 827b5079050c03a68bc210155f06a5733ea3f221 Mon Sep 17 00:00:00 2001
From: dinream <2534393465@qq.com>
Date: Fri, 25 Oct 2024 17:16:34 +0800
Subject: [PATCH 16/16] delete no English commit
---
src/dbtest/src/mda_detect_mixed.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/dbtest/src/mda_detect_mixed.py b/src/dbtest/src/mda_detect_mixed.py
index 2a44bf71..a464a087 100644
--- a/src/dbtest/src/mda_detect_mixed.py
+++ b/src/dbtest/src/mda_detect_mixed.py
@@ -904,7 +904,7 @@ def print_error(result_folder, ts_now, error_message):
continue
cycle = False
- # remove_unfinished_operation(data_op_list) 动态测试中默认所有的执行时间 Qi 都没有 finish 字段
+ # remove_unfinished_operation(data_op_list)
build_graph(data_op_list, indegree, edge, txn)
print("--------file:{}--------".format(file))
print_graph(edge,txn)