From fa16273830635aabbd8907fe72fe23ecfc8cc866 Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Sat, 6 Jul 2024 12:53:35 +0000
Subject: [PATCH 01/16] old bug? solve

---
 src/dbtest/src/mda_detect.py        | 342 ++--------------------------
 src/dbtest/src/mda_detect_modify.md |  31 +++
 2 files changed, 50 insertions(+), 323 deletions(-)
 create mode 100644 src/dbtest/src/mda_detect_modify.md

diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 982361ea..ce68dd31 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -33,16 +33,9 @@ class Txn:
     def __init__(self):
         self.begin_ts = -1
         self.end_ts = 99999999999999999999
+        self.isolation = ""
 
-"""
-Find the total variable number.
 
-Args:
-- lines (list): A list of queries.
-
-Returns:
-int: The maximum variable number found in the queries.
-"""
 # find total variable number
 def get_total(lines):
     num = 0
@@ -56,17 +49,17 @@ def get_total(lines):
             break
     return num
 
+# find total Txn number
+def get_total_txn(lines):
+    num = 0
+    for query in lines:
+        query = query.replace("\n", "")
+        query = query.replace(" ", "")
+        if query[0:1] == "Q" and query.find("T") != -1:
+            tmp = find_data(query, "T")
+            num = max(num, tmp)
+    return num
 
-"""
-Extract the data we need from a query.
-
-Args:
-- query (str): The input query string.
-- target (str): The target substring to search for.
-
-Returns:
-int: The extracted data value, or -1 if not found.
-"""
 # extract the data we need in query
 def find_data(query, target):
     pos = query.find(target)
@@ -85,20 +78,6 @@ def find_data(query, target):
     return data_value
 
 
-"""
-When a statement is executed, this function sets the end time, modifies the transaction list,
-and updates the version list as needed.
-
-Args:
-- op_time (int): The operation time of the statement.
-- data_op_list (list): A list of data operations.
-- query (str): The query string containing information about the statement execution.
-- txn (list): A list of transaction objects.
-- version_list (list): A list of version lists for data operations.
-
-Returns:
-None
-"""
 # when a statement is executed, set the end time and modify the version list
 def set_finish_time(op_time, data_op_list, query, txn, version_list):
     pos = query.find("finishedat:")
@@ -135,17 +114,6 @@ def set_finish_time(op_time, data_op_list, query, txn, version_list):
                     op.value = len(version_list[i]) - 1
 
 
-"""
-Check if two transactions are concurrent based on their start and end times.
-
-Args:
-- data1: Information about the first transaction.
-- data2: Information about the second transaction.
-- txn: A list of transaction objects.
-
-Returns:
-bool: True if the transactions are concurrent, False otherwise.
-"""
 # if both transactions are running
 # or the start time of the second transaction is less than the end time of the first transaction
 # we think they are concurrent
@@ -158,20 +126,6 @@ def check_concurrency(data1, data2, txn):
         return False
 
 
-"""
-Determine the type of edge between two operations based on their read or write versions.
-
-Args:
-- data1: Information about the first operation.
-- data2: Information about the second operation.
-- txn: A list of transaction objects.
-
-Returns:
-tuple: A tuple containing three values:
-    - A string indicating the edge type ('R', 'W', 'CR', 'CW').
-    - Information about the operation that comes first.
-    - Information about the operation that comes second.
-"""
 # decide which operation comes first depending on the read or write version
 # if later operation happened after the first txn commit time, edge type will add "C"
 def get_edge_type(data1, data2, txn):
@@ -191,22 +145,6 @@ def get_edge_type(data1, data2, txn):
     return before.op_type + state + after.op_type, before, after
 
 
-"""
-Build a directed graph representing the concurrency relationships between operations.
-
-Args:
-- data_op_list: A list of lists, where each inner list contains information about operations for a specific transaction.
-- indegree: A list representing the in-degrees of each operation node in the graph.
-- edge: A list representing the edges (concurrency relationships) between operations.
-- txn: A list of transaction objects.
-
-This function constructs a directed graph where nodes represent operations, and edges represent concurrency relationships
-between operations. It iterates through the list of operations for each transaction and calls the 'insert_edge' function 
-to create edges in the graph based on concurrency relationships.
-
-Returns:
-None
-"""
 def build_graph(data_op_list, indegree, edge, txn):
     for list1 in data_op_list:
         for i, data in enumerate(list1):
@@ -214,25 +152,6 @@ def build_graph(data_op_list, indegree, edge, txn):
                 insert_edge(list1[j], data, indegree, edge, txn)
 
 
-"""
-Insert an edge into the directed graph representing concurrency relationships between operations.
-
-Args:
-- data1: An operation object representing the first operation.
-- data2: An operation object representing the second operation.
-- indegree: A list representing the in-degrees of each transaction in the graph.
-- edge: A list representing the edges (concurrency relationships) between operations for each transaction.
-- txn: A list of transaction objects.
-
-This function inserts an edge into the directed graph to represent the concurrency relationship between 'data1' and 'data2'. 
-It first checks if the two operations are concurrent by calling the 'check_concurrency' function. If they are concurrent, it 
-determines the edge type using the 'get_edge_type' function and adds the edge to the 'edge' list.
-
-The 'indegree' list is updated to reflect the in-degree of the target transaction node when an edge is inserted.
-
-Returns:
-None
-"""
 def insert_edge(data1, data2, indegree, edge, txn):
     if check_concurrency(data1, data2, txn):
         edge_type, data1, data2 = get_edge_type(data1, data2, txn)
@@ -241,42 +160,12 @@ def insert_edge(data1, data2, indegree, edge, txn):
             edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
 
 
-"""
-Initialize a record in the version list based on the information in the query.
-
-Args:
-- query: A query string that contains information about a record.
-- version_list: A list of lists representing versioned records.
-
-This function initializes a record in the 'version_list' based on the information provided in the 'query'. It extracts the 'key'
-and 'value' of the record from the query using the 'find_data' function and appends the 'value' to the corresponding version list.
-
-Returns:
-None
-"""
 def init_record(query, version_list):
     key = find_data(query, "(")
     value = find_data(query, ",")
     version_list[key].append(value)
 
 
-"""
-Read the versioned record based on the information in the query.
-
-Args:
-- query (str): A query string that contains information about reading a versioned record.
-- op_time (int): The operation time of the read operation.
-- data_op_list (list): A list of lists representing data operations.
-- version_list (list): A list of lists representing versioned records.
-
-This function reads the versioned record specified in the 'query'. It extracts the 'key' and 'value' from the query, which are 
-used to identify the record and version to read. The function checks if the specified version exists in the version list and 
-updates the 'op.value' accordingly. If the version doesn't exist or if the read operation is not successful, an error message 
-is returned.
-
-Returns:
-str: An error message indicating the result of the read operation. An empty string means the read was successful.
-"""
 def readVersion_record(query, op_time, data_op_list, version_list):
     error_message = ""
     data = query.split(")")
@@ -323,27 +212,6 @@ def readVersion_record(query, op_time, data_op_list, version_list):
 
 
 
-"""
-Read records based on the information in the query and update data operations.
-
-Args:
-- op_time (int): The operation time of the read operation.
-- txn_num (int): The transaction number.
-- total_num (int): The total number of records.
-- txn (list): A list of transactions.
-- data_op_list (list): A list of lists representing data operations.
-
-This function reads records specified in the query and updates the 'data_op_list' accordingly. It extracts information from 
-the 'query' to determine which records to read and what type of operation to perform (read or predicate). The function also 
-sets the 'begin_ts' of the transaction if it's not already set.
-
-The 'query' is analyzed to identify specific record keys or predicates and create corresponding 'Operation' objects in the 
-'data_op_list'. Depending on the structure of the query, this function handles various cases, such as reading single records,
-handling predicates, and selecting all rows in a table.
-
-Returns:
-None
-"""
 def read_record(op_time, txn_num, total_num, txn, data_op_list):
     if txn[txn_num].begin_ts == -1:
         txn[txn_num].begin_ts = op_time
@@ -372,24 +240,6 @@ def read_record(op_time, txn_num, total_num, txn, data_op_list):
             data_op_list[i].append(Operation("R", txn_num, op_time, i))
 
 
-"""
-Write records based on the information in the query and update data operations.
-
-Args:
-- op_time (int): The operation time of the write operation.
-- txn_num (int): The transaction number.
-- txn (list): A list of transactions.
-- data_op_list (list): A list of lists representing data operations.
-
-This function writes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the 
-'query' to determine which records to write and what type of operation to perform (write). The function also sets the 'begin_ts' 
-of the transaction if it's not already set.
-
-The 'query' is analyzed to identify specific record keys and values, and it creates corresponding 'Operation' objects in the 'data_op_list'.
-
-Returns:
-None
-"""
 def write_record(op_time, txn_num, txn, data_op_list):
     if txn[txn_num].begin_ts == -1:
         txn[txn_num].begin_ts = op_time
@@ -403,24 +253,6 @@ def write_record(op_time, txn_num, txn, data_op_list):
         data_op_list[op_data].append(Operation("W", txn_num, op_time, op_value))
 
 
-"""
-Delete records based on the information in the query and update data operations.
-
-Args:
-- op_time (int): The operation time of the delete operation.
-- txn_num (int): The transaction number.
-- txn (list): A list of transactions.
-- data_op_list (list): A list of lists representing data operations.
-
-This function deletes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the 
-'query' to determine which records to delete and what type of operation to perform (delete). The function also sets the 'begin_ts' 
-of the transaction if it's not already set.
-
-The 'query' is analyzed to identify specific record keys, and it creates corresponding 'Operation' objects in the 'data_op_list'.
-
-Returns:
-None
-"""
 def delete_record(op_time, txn_num, txn, data_op_list):
     if txn[txn_num].begin_ts == -1:
         txn[txn_num].begin_ts = op_time
@@ -432,25 +264,6 @@ def delete_record(op_time, txn_num, txn, data_op_list):
         data_op_list[op_data].append(Operation("D", txn_num, op_time, op_data))
 
 
-"""
-Insert records based on the information in the query and update data operations.
-
-Args:
-- op_time (int): The operation time of the insert operation.
-- txn_num (int): The transaction number.
-- txn (list): A list of transactions.
-- data_op_list (list): A list of lists representing data operations.
-
-This function inserts records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the 
-'query' to determine which records to insert and what type of operation to perform (insert). The function also sets the 'begin_ts' 
-of the transaction if it's not already set.
-
-The 'query' is analyzed to identify specific record keys and their corresponding values, and it creates corresponding 'Operation' 
-objects in the 'data_op_list'.
-
-Returns:
-None
-"""
 def insert_record(op_time, txn_num, txn, data_op_list):
     if txn[txn_num].begin_ts == -1 and op_time != 0:
         txn[txn_num].begin_ts = op_time
@@ -459,44 +272,15 @@ def insert_record(op_time, txn_num, txn, data_op_list):
     data_op_list[key].append(Operation("I", txn_num, op_time, value))
 
 
-"""
-Set the end timestamp for a transaction.
-
-Args:
-- op_time (int): The operation time when the transaction ends.
-- txn_num (int): The transaction number.
-- txn (list): A list of transactions.
-
-This function sets the 'end_ts' attribute of a transaction specified by 'txn_num' to the given 'op_time'. It marks the end of the 
-transaction's execution.
-
-Returns:
-None
-"""
 def end_record(op_time, txn_num, txn):
     txn[txn_num].end_ts = op_time
 
 
-"""
-Record and process database operations.
-
-Args:
-- total_num (int): The total number of database operations.
-- query (str): The SQL query representing a database operation.
-- txn (list): A list of transactions.
-- data_op_list (list): A list of data operations.
-- version_list (list): A list of version information for data operations.
-
-This function records and processes database operations based on the provided SQL query. It updates the transaction list, data 
-operation list, and version list accordingly. The 'total_num' parameter specifies the total number of database operations.
-
-Returns:
-str: An error message (if any), or an empty string if the operation is successful.
-"""
 def operation_record(total_num, query, txn, data_op_list, version_list):
     error_message = ""
     op_time = find_data(query, "Q")
     txn_num = find_data(query, "T")
+    #  print("total_num:{}, query:{},optime: {}, txn_num: {}\n".format(total_num,query, op_time, txn_num))
     if op_time == 0 and query.find("INSERT") != -1:
         init_record(query, version_list)
         return error_message
@@ -528,18 +312,6 @@ def operation_record(total_num, query, txn, data_op_list, version_list):
     
 
 
-"""
-Remove unfinished operations from the data operation list.
-
-Args:
-- data_op_list (list): A list of data operations.
-
-This function iterates through the data operation list and removes any unfinished operations based on their operation time. 
-Unfinished operations are those with an operation time less than 10,000,000.
-
-Returns:
-None
-"""
 # remove failed statements to prevent redundant edges from being built
 def remove_unfinished_operation(data_op_list):
     for list1 in data_op_list:
@@ -547,20 +319,6 @@ def remove_unfinished_operation(data_op_list):
             if op.op_time < 10000000:
                 list1.pop(i)
 
-"""
-Check for cycles in a directed graph using topological sorting.
-
-Args:
-- edge (List[List[Edge]]): A list representing the directed edges in the graph.
-- indegree (List[int]): A list representing the in-degrees of nodes in the graph.
-- total (int): The total number of nodes in the graph.
-
-This function checks for cycles in a directed graph by performing topological sorting. It takes as input the directed edges (`edge`), 
-in-degrees of nodes (`indegree`), and the total number of nodes in the graph (`total`).
-
-Returns:
-bool: True if a cycle is detected, False otherwise.
-"""
 # toposort to determine whether there is a cycle
 def check_cycle(edge, indegree, total):
     q = Queue.Queue()
@@ -580,25 +338,6 @@ def check_cycle(edge, indegree, total):
     return True
 
 
-"""
-Perform depth-first search (DFS) to find and print loops in a directed graph.
-
-Args:
-- result_folder (str): The path to the folder where the results will be saved.
-- ts_now (str): The current timestamp or identifier for result file naming.
-- now (int): The current node being visited.
-- type (str): The type of edge leading to the current node ('C' for commit, 'R' for read, 'W' for write, etc.).
-
-This function performs depth-first search (DFS) to find and print loops in a directed graph. It takes as input the result folder 
-path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`), the current node being visited (`now`),
-and the type of edge leading to the current node (`type`).
-
-The function recursively explores the graph, tracking the visited nodes and edges to detect loops. When a loop is found, it is printed
-to a result file in the specified result folder.
-
-Note: This function assumes that global variables like 'visit', 'visit1', 'path', 'edge_type', and 'edge' are defined elsewhere.
-
-"""
 # for loop graphs, print the loop
 def dfs(result_folder, ts_now, now, type):
     visit1[now] = 1
@@ -624,21 +363,6 @@ def dfs(result_folder, ts_now, now, type):
     visit[now] = 0
 
 
-"""
-Print the paths in a directed graph to a result file.
-
-Args:
-- result_folder (str): The path to the folder where the results will be saved.
-- ts_now (str): The current timestamp or identifier for result file naming.
-- edge (list of lists): A list of lists representing the directed edges in the graph.
-
-This function prints the paths in a directed graph to a result file. It takes as input the result folder path (`result_folder`), 
-the current timestamp or identifier for result file naming (`ts_now`), and a list of lists (`edge`) representing the directed edges 
-in the graph.
-
-The function iterates through the edges and writes the paths to the result file in the specified result folder.
-
-"""
 def print_path(result_folder, ts_now, edge):
     with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
         flag = 0
@@ -652,41 +376,11 @@ def print_path(result_folder, ts_now, edge):
         f.write("\n\n")
 
 
-"""
-Output the result of cycle detection to a result file.
-
-Args:
-- file (str): The name of the file or input source being analyzed.
-- result_folder (str): The path to the folder where the results will be saved.
-- ts_now (str): The current timestamp or identifier for result file naming.
-- IsCyclic (str): A string indicating whether a cycle was detected.
-
-This function outputs the result of cycle detection to a result file. It takes as input the name of the file or input source being 
-analyzed (`file`), the result folder path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`),
-and a string (`IsCyclic`) indicating whether a cycle was detected.
-
-The function writes the result, including the file name and the cyclic status, to the specified result file in the result folder.
-
-"""
 def output_result(file, result_folder, ts_now, IsCyclic):
     with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
         f.write(file + ": " + IsCyclic + "\n")
 
 
-"""
-Print an error message to a result file.
-
-Args:
-- result_folder (str): The path to the folder where the results will be saved.
-- ts_now (str): The current timestamp or identifier for result file naming.
-- error_message (str): The error message to be printed.
-
-This function prints an error message to a result file. It takes as input the result folder path (`result_folder`), the current 
-timestamp or identifier for result file naming (`ts_now`), and the error message (`error_message`) to be printed.
-
-The function appends the error message to the specified result file in the result folder and adds a newline for separation.
-
-"""
 def print_error(result_folder, ts_now, error_message):
     with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
         f.write(error_message + "\n")
@@ -714,12 +408,13 @@ def print_error(result_folder, ts_now, error_message):
         lines = f.readlines()
 
     total_num = get_total(lines)  # total number of variables
-    txn = [Txn() for i in range(total_num + 2)]  # total num of transaction
+    total_num_txn = get_total_txn(lines)  # total number of txn
+    txn = [Txn() for i in range(total_num_txn + 2)]  # total num of transaction
     data_op_list = [[] for i in range(total_num + 2)]  # record every operation that occurs on the variable
-    edge = [[] for i in range(total_num + 2)]  # all edges from the current point
-    indegree = [0] * (total_num + 2)  # in-degree of each point
-    visit = [0] * (total_num + 2)  # in dfs, whether the current point has been visited
-    visit1 = [0] * (total_num + 2)  # we will only use unvisited points as the starting point of the dfs
+    edge = [[] for i in range(total_num_txn + 2)]  # all edges from the current point
+    indegree = [0] * (total_num_txn + 2)  # in-degree of each point
+    visit = [0] * (total_num_txn + 2)  # in dfs, whether the current point has been visited
+    visit1 = [0] * (total_num_txn + 2)  # we will only use unvisited points as the starting point of the dfs
     path = []  # points in cycle
     edge_type = []  # edge type of the cycle
     version_list = [[] for i in range(total_num + 2)]
@@ -731,6 +426,7 @@ def print_error(result_folder, ts_now, error_message):
         query = query.replace(" ", "")
         if query.find("Rollback") != -1 or query.find("Timeout") != -1:
             go_end = True
+        print("total_num:{}, total_num_txn:{}, query:{},ts_now: {}, file: {}\n".format(total_num,total_num_txn,query,ts_now,run_result_folder + "/" + file + ".txt"))
         error_message = operation_record(total_num, query, txn, data_op_list, version_list)
         if error_message != "":
             break
diff --git a/src/dbtest/src/mda_detect_modify.md b/src/dbtest/src/mda_detect_modify.md
new file mode 100644
index 00000000..3af9e9f3
--- /dev/null
+++ b/src/dbtest/src/mda_detect_modify.md
@@ -0,0 +1,31 @@
+[text](mda_detect.py) 修改日志
+# 思考
+1. 用于加边建立图的节点对应一个操作还是一个事务。
+
+# 原来代码中问题
+### 数组访问越界问题
+现象
+```python
+total_num = get_total(lines) # 统计的个数是插入数据的个数，不是事务的个数。
+txn = [Txn() for i in range(total_num + 2)] # 导致构造的 txn 数组较小
+.... # 还有 indegree edge 数组的大小应该是事务的个数。
+``` 
+解决：构造一个新函数获取事务个数。
+```python
+# find total Txn number
+def get_total_txn(lines):
+    num = 0
+    for query in lines:
+        query = query.replace("\n", "")
+        query = query.replace(" ", "")
+        if query[0:1] == "Q" and query.find("T") != -1:
+            tmp = find_data(query, "T")
+            num = max(num, tmp)
+    return num
+total_num_txn = get_total_txn(lines)  # total number of txn
+```
+效果：不同数据使用不同的初始长度
+```python
+# total_num:     data_op_list, version_list
+# total_num_txn: txn, edge, total_num_txn, visit, visit1
+```

From c73c237c3c25e90e23d3023afb43698da9a8398d Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Sat, 6 Jul 2024 17:13:38 +0000
Subject: [PATCH 02/16] add txn.isolation

---
 src/dbtest/src/{ => doc}/mda_detect_modify.md |  6 ++++--
 src/dbtest/src/mda_detect.py                  | 19 ++++++++++++++++---
 2 files changed, 20 insertions(+), 5 deletions(-)
 rename src/dbtest/src/{ => doc}/mda_detect_modify.md (78%)

diff --git a/src/dbtest/src/mda_detect_modify.md b/src/dbtest/src/doc/mda_detect_modify.md
similarity index 78%
rename from src/dbtest/src/mda_detect_modify.md
rename to src/dbtest/src/doc/mda_detect_modify.md
index 3af9e9f3..7e29b4e6 100644
--- a/src/dbtest/src/mda_detect_modify.md
+++ b/src/dbtest/src/doc/mda_detect_modify.md
@@ -1,6 +1,7 @@
 [text](mda_detect.py) 修改日志
-# 思考
-1. 用于加边建立图的节点对应一个操作还是一个事务。
+# 思考&分析
+1. 用于加边建立图的节点对应一个操作还是一个事务。答：一个事务。
+2. 目标：一个对数据库的操作文件（运行效果文件）中有多个事务，每一个事务有不同的隔离级别，通过执行结果和隔离级别判断是否满足一致性
 
 # 原来代码中问题
 ### 数组访问越界问题
@@ -29,3 +30,4 @@ total_num_txn = get_total_txn(lines)  # total number of txn
 # total_num:     data_op_list, version_list
 # total_num_txn: txn, edge, total_num_txn, visit, visit1
 ```
+
diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index ce68dd31..b859c38b 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -33,7 +33,7 @@ class Txn:
     def __init__(self):
         self.begin_ts = -1
         self.end_ts = 99999999999999999999
-        self.isolation = ""
+        self.isolation = "serializable"
 
 
 # find total variable number
@@ -77,6 +77,16 @@ def find_data(query, target):
     data_value = int(data_value)
     return data_value
 
+# extract the isolation from content 
+def find_isolation(query):
+    if query.find("read-uncommitted") != -1:
+        return "read-uncommitted"
+    if query.find("read-committed") != -1:
+        return "read-committed"
+    if query.find("repeatable-read") != -1:
+        return "repeatable-read"
+    if query.find("serializable") != -1:
+        return "serializable"
 
 # when a statement is executed, set the end time and modify the version list
 def set_finish_time(op_time, data_op_list, query, txn, version_list):
@@ -120,7 +130,7 @@ def set_finish_time(op_time, data_op_list, query, txn, version_list):
 def check_concurrency(data1, data2, txn):
     if txn[data2.txn_num].begin_ts < txn[data1.txn_num].end_ts:
         return True
-    elif txn[data1.txn_num].begin_ts < txn[data2.txn_num].end_ts:
+    elif txn[data1.txn_num].begin_ts < txn[data2.txn_num].end_ts: # TODO maybe a bug: don't need
         return True
     else:
         return False
@@ -138,7 +148,7 @@ def get_edge_type(data1, data2, txn):
     #         before, after = data2, data1
     #     else:
     #         before, after = data1, data2
-    if data2.op_time > txn[data1.txn_num].end_ts:
+    if data2.op_time > txn[data1.txn_num].end_ts: # TODO maybe a bug, before after
         state = "C"
     else:
         state = ""
@@ -290,6 +300,9 @@ def operation_record(total_num, query, txn, data_op_list, version_list):
     if query.find("finished") != -1:
         set_finish_time(op_time, data_op_list, query, txn, version_list)
         return error_message
+    if op_time == -1 and txn_num != -1 and query.find("set_isolation") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.:
+        # query such as "T2 set_isolation=serializable "
+        txn[txn_num].isolation = find_isolation(query)
     if op_time == -1 or txn_num == -1:
         return error_message
     if query.find("SELECT") != -1:

From 67d5ecc94551103e16c3b52ef0d248237cc9fa2e Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Sun, 7 Jul 2024 15:01:35 +0000
Subject: [PATCH 03/16] demo implement

---
 src/dbtest/src/doc/mda_detect_modify.md | 17 +++++-
 src/dbtest/src/mda_detect.py            | 71 ++++++++++++++++++++-----
 2 files changed, 73 insertions(+), 15 deletions(-)

diff --git a/src/dbtest/src/doc/mda_detect_modify.md b/src/dbtest/src/doc/mda_detect_modify.md
index 7e29b4e6..e249f6bf 100644
--- a/src/dbtest/src/doc/mda_detect_modify.md
+++ b/src/dbtest/src/doc/mda_detect_modify.md
@@ -2,7 +2,7 @@
 # 思考&分析
 1. 用于加边建立图的节点对应一个操作还是一个事务。答：一个事务。
 2. 目标：一个对数据库的操作文件（运行效果文件）中有多个事务，每一个事务有不同的隔离级别，通过执行结果和隔离级别判断是否满足一致性
-
+3. 原来输出未所有事务是否满足一致性，现在输出为每个事务是否满足一致性？ 单个整体报错 or 多个报错？ 多个，每个错误都识别，兼容单个整体报错（实现较难）
 # 原来代码中问题
 ### 数组访问越界问题
 现象
@@ -31,3 +31,18 @@ total_num_txn = get_total_txn(lines)  # total number of txn
 # total_num_txn: txn, edge, total_num_txn, visit, visit1
 ```
 
+
+### 默认字符串少了空格
+```python
+    pos = query.find("finished at:")
+    pos += len("finished at:")
+```
+
+### "R" 类型的操作并没有修改 value 值为下标：
+```python
+    if data1.value <= data2.value:          
+        before, after = data1, data2
+    else:
+        before, after = data2, data1
+```
+
diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index b859c38b..0b3393c4 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -42,11 +42,11 @@ def get_total(lines):
     for query in lines:
         query = query.replace("\n", "")
         query = query.replace(" ", "")
-        if query[0:2] == "Q0" and query.find("INSERT") != -1:
+        if query.find("INSERT") != -1: # query[0:2] == "Q0" and 
             tmp = find_data(query, "(")
             num = max(num, tmp)
-        elif query[0:2] == "Q1":
-            break
+        # elif query[0:2] == "Q1":
+        #     break
     return num
 
 # find total Txn number
@@ -90,8 +90,8 @@ def find_isolation(query):
 
 # when a statement is executed, set the end time and modify the version list
 def set_finish_time(op_time, data_op_list, query, txn, version_list):
-    pos = query.find("finishedat:")
-    pos += len("finishedat:")
+    pos = query.find("finished at:")
+    pos += len("finished at:")
     data_value = ""
     tmp, tmp1 = "", ""
     for i in range(pos, len(query)):
@@ -123,7 +123,6 @@ def set_finish_time(op_time, data_op_list, query, txn, version_list):
                     version_list[i].append(op.value)
                     op.value = len(version_list[i]) - 1
 
-
 # if both transactions are running
 # or the start time of the second transaction is less than the end time of the first transaction
 # we think they are concurrent
@@ -136,19 +135,26 @@ def check_concurrency(data1, data2, txn):
         return False
 
 
+def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type):
+    for e in edge[src_txn]:
+        if e.out == tar_txn and e.type[0] == src_type and e.type[-1] == tar_type:
+            return True
+    return False
+
 # decide which operation comes first depending on the read or write version
 # if later operation happened after the first txn commit time, edge type will add "C"
 def get_edge_type(data1, data2, txn):
-    if data1.value <= data2.value:
-        before, after = data1, data2
-    else:
-        before, after = data2, data1
+    # if data1.value <= data2.value:          
+    #     before, after = data1, data2
+    # else:
+    #     before, after = data2, data1
+    before, after = data1, data2
     # if data1.op_type == "D" or data2.op_type == "D":
     #     if data1.value < data2.value:
     #         before, after = data2, data1
     #     else:
     #         before, after = data1, data2
-    if data2.op_time > txn[data1.txn_num].end_ts: # TODO maybe a bug, before after
+    if data2.op_time > txn[data1.txn_num].end_ts: 
         state = "C"
     else:
         state = ""
@@ -165,9 +171,45 @@ def build_graph(data_op_list, indegree, edge, txn):
 def insert_edge(data1, data2, indegree, edge, txn):
     if check_concurrency(data1, data2, txn):
         edge_type, data1, data2 = get_edge_type(data1, data2, txn)
-        if edge_type != "RR" and edge_type != "RCR" and data1.txn_num != data2.txn_num:
+        # if edge_type != "RR" and edge_type != "RCR" and data1.txn_num != data2.txn_num:
+        #     indegree[data2.txn_num] += 1
+        #     edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+        if edge_type == "WW" or edge_type == "WCW":
             indegree[data2.txn_num] += 1
             edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+        elif data1.isolation == "read-uncommitted":
+            if edge_type[0] != 'R' and not(data2.isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
+                if edge_type[-1] == 'R': #  not R -- R 
+                    if data2.isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读
+                        indegree[data2.txn_num] += 1 
+                        edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+                    if data2.isolation == "repeatable-read" and edge_type[0]== 'W':
+                        indegree[data2.txn_num] += 1
+                        edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))         
+                    if data2.isolation == "serializable":
+                        indegree[data2.txn_num] += 1
+                        edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))                 
+                elif edge_type[-1] != 'R': #  not R -- not R 
+                    indegree[data2.txn_num] += 1
+                    edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+        elif data1.isolation == "read-committed" or data1.isolation == "repeatable-read" or data1.isolation == "serializable":
+            if edge_type[0] != 'R' and not(data2.isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
+                if edge_type[-1] == 'R': #  not R -- R 
+                    if data2.isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读
+                        indegree[data2.txn_num] += 1 
+                        edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+                    if data2.isolation == "repeatable-read" and edge_type[0]== 'W':
+                        indegree[data2.txn_num] += 1
+                        edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))         
+                    if data2.isolation == "serializable":
+                        indegree[data2.txn_num] += 1
+                        edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))                 
+                elif edge_type[-1] != 'R': #  not R -- not R 
+                    indegree[data2.txn_num] += 1
+                    edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+            elif edge_type[0] == 'R' and edge_type[-1] != 'R':
+                indegree[data2.txn_num] += 1
+                edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
 
 
 def init_record(query, version_list):
@@ -238,12 +280,12 @@ def read_record(op_time, txn_num, total_num, txn, data_op_list):
         left = find_data(query, "k>") + 1
         right = find_data(query, "k<")
         for i in range(left, right):
-            data_op_list[i].append(Operation("P", txn_num, op_time, i))
+            data_op_list[i].append(Operation("R", txn_num, op_time, i)) # P
     elif query.find("value1>") != -1:
         left = find_data(query, "value1>") + 1
         right = find_data(query, "value1<")
         for i in range(left, right):
-            data_op_list[i].append(Operation("P", txn_num, op_time, i))
+            data_op_list[i].append(Operation("R", txn_num, op_time, i)) # p
     else:
         # it means select all rows in table
         for i in range(total_num):
@@ -303,6 +345,7 @@ def operation_record(total_num, query, txn, data_op_list, version_list):
     if op_time == -1 and txn_num != -1 and query.find("set_isolation") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.:
         # query such as "T2 set_isolation=serializable "
         txn[txn_num].isolation = find_isolation(query)
+        return error_message
     if op_time == -1 or txn_num == -1:
         return error_message
     if query.find("SELECT") != -1:

From 3f90a6ef2a6f4797304152e7b0e6e40ff0dd1f8d Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Tue, 9 Jul 2024 07:21:45 +0000
Subject: [PATCH 04/16] bugs

---
 src/dbtest/src/mda_detect.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 0b3393c4..0136e3fe 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 # /*
 #  * Tencent is pleased to support the open source community by making 3TS available.
 #  *
@@ -177,31 +179,31 @@ def insert_edge(data1, data2, indegree, edge, txn):
         if edge_type == "WW" or edge_type == "WCW":
             indegree[data2.txn_num] += 1
             edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-        elif data1.isolation == "read-uncommitted":
-            if edge_type[0] != 'R' and not(data2.isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
+        elif txn[data1.txn_num].isolation == "read-uncommitted":
+            if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
                 if edge_type[-1] == 'R': #  not R -- R 
-                    if data2.isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读
+                    if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'): # 可能脏读
                         indegree[data2.txn_num] += 1 
                         edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-                    if data2.isolation == "repeatable-read" and edge_type[0]== 'W':
+                    if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W':
                         indegree[data2.txn_num] += 1
                         edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))         
-                    if data2.isolation == "serializable":
+                    if txn[data2.txn_num].isolation == "serializable":
                         indegree[data2.txn_num] += 1
                         edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))                 
                 elif edge_type[-1] != 'R': #  not R -- not R 
                     indegree[data2.txn_num] += 1
                     edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-        elif data1.isolation == "read-committed" or data1.isolation == "repeatable-read" or data1.isolation == "serializable":
-            if edge_type[0] != 'R' and not(data2.isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
+        elif txn[data1.txn_num].isolation == "read-committed" or txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable":
+            if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
                 if edge_type[-1] == 'R': #  not R -- R 
-                    if data2.isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读
+                    if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读
                         indegree[data2.txn_num] += 1 
                         edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-                    if data2.isolation == "repeatable-read" and edge_type[0]== 'W':
+                    if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W':
                         indegree[data2.txn_num] += 1
                         edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))         
-                    if data2.isolation == "serializable":
+                    if txn[data2.txn_num].isolation == "serializable":
                         indegree[data2.txn_num] += 1
                         edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))                 
                 elif edge_type[-1] != 'R': #  not R -- not R 
@@ -345,6 +347,7 @@ def operation_record(total_num, query, txn, data_op_list, version_list):
     if op_time == -1 and txn_num != -1 and query.find("set_isolation") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.:
         # query such as "T2 set_isolation=serializable "
         txn[txn_num].isolation = find_isolation(query)
+        print(str(txn_num)+"------------------"+txn[txn_num].isolation)
         return error_message
     if op_time == -1 or txn_num == -1:
         return error_message
@@ -443,7 +446,7 @@ def print_error(result_folder, ts_now, error_message):
         f.write("\n\n")
 
 
-run_result_folder = "pg/serializable"
+run_result_folder = "pg/repeatable-read"
 result_folder = "check_result/" + run_result_folder
 do_test_list = "do_test_list.txt"
 #ts_now = "_2param_3txn_insert"

From f0a40e929e7359d05fac50417b5a9692168d477e Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Tue, 9 Jul 2024 08:39:40 +0000
Subject: [PATCH 05/16] graph print

---
 src/dbtest/src/mda_detect.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 0136e3fe..b90e5d08 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -21,7 +21,8 @@ class Edge:
     def __init__(self, type, out):
         self.type = type
         self.out = out
-
+    def __repr__(self):
+        return "Edge(type={}, out={})".format(self.type, self.out)
 
 class Operation:
     def __init__(self, op_type, txn_num, op_time, value):
@@ -37,6 +38,12 @@ def __init__(self):
         self.end_ts = 99999999999999999999
         self.isolation = "serializable"
 
+# print edge after build graph
+def print_graph(edge):
+    for i, edges in enumerate(edge):
+        print("Transaction {}:".format(i))
+        for e in edges:
+            print("  {}".format(e))
 
 # find total variable number
 def get_total(lines):
@@ -485,7 +492,7 @@ def print_error(result_folder, ts_now, error_message):
         query = query.replace(" ", "")
         if query.find("Rollback") != -1 or query.find("Timeout") != -1:
             go_end = True
-        print("total_num:{}, total_num_txn:{}, query:{},ts_now: {}, file: {}\n".format(total_num,total_num_txn,query,ts_now,run_result_folder + "/" + file + ".txt"))
+        # print("total_num:{}, total_num_txn:{}, query:{},ts_now: {}, file: {}\n".format(total_num,total_num_txn,query,ts_now,run_result_folder + "/" + file + ".txt"))
         error_message = operation_record(total_num, query, txn, data_op_list, version_list)
         if error_message != "":
             break
@@ -498,6 +505,7 @@ def print_error(result_folder, ts_now, error_message):
     cycle = False
     remove_unfinished_operation(data_op_list)
     build_graph(data_op_list, indegree, edge, txn)
+    print_graph(edge)
     if not go_end:
         cycle = check_cycle(edge, indegree, total_num + 2)
     if cycle:

From 5338b6ac66119d072b7aa855f9750f90fd0c64ad Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Fri, 12 Jul 2024 02:58:45 +0000
Subject: [PATCH 06/16] implement

---
 src/dbtest/src/doc/mda_detect_modify.md |   3 +
 src/dbtest/src/doc/mda_detect_read.md   |  36 ++++
 src/dbtest/src/mda_detect.py            | 221 +++++++++++++++---------
 3 files changed, 183 insertions(+), 77 deletions(-)
 create mode 100644 src/dbtest/src/doc/mda_detect_read.md

diff --git a/src/dbtest/src/doc/mda_detect_modify.md b/src/dbtest/src/doc/mda_detect_modify.md
index e249f6bf..3c64dbda 100644
--- a/src/dbtest/src/doc/mda_detect_modify.md
+++ b/src/dbtest/src/doc/mda_detect_modify.md
@@ -3,6 +3,9 @@
 1. 用于加边建立图的节点对应一个操作还是一个事务。答：一个事务。
 2. 目标：一个对数据库的操作文件（运行效果文件）中有多个事务，每一个事务有不同的隔离级别，通过执行结果和隔离级别判断是否满足一致性
 3. 原来输出未所有事务是否满足一致性，现在输出为每个事务是否满足一致性？ 单个整体报错 or 多个报错？ 多个，每个错误都识别，兼容单个整体报错（实现较难）
+4. 有两个检测思路：
+    1. 修改建立图的过程中加边策略，保留循环检测流程。（当前实现方式）
+    2. 保留加边策略，修改冲突检测流程。
 # 原来代码中问题
 ### 数组访问越界问题
 现象
diff --git a/src/dbtest/src/doc/mda_detect_read.md b/src/dbtest/src/doc/mda_detect_read.md
new file mode 100644
index 00000000..ebf0b144
--- /dev/null
+++ b/src/dbtest/src/doc/mda_detect_read.md
@@ -0,0 +1,36 @@
+
+##  mda_detect.py 代码功能
+主要用于检测数据库事务之间的并发关系，以及是否存在循环依赖。具体来说，代码通过解析输入的SQL语句，构建事务操作的有向图，检测是否存在循环依赖（即事务之间是否存在无法解决的并发冲突），并输出结果。
+### 变量命名含义
+
+| 名称           | 含义                                    | 备注                  |
+| ------------ | ------------------------------------- | ------------------- |
+| data         | 一次操作信息：type、txn_num、op_time、op_data   |                     |
+| data_op_list | 数据操作列表 【数据key：【操作...】， 数据key：【操作...】】 |                     |
+| indegree     | 下标：事务号；元素：依赖该事务的事务个数                  |                     |
+| edge         | 下标：事务号；元素：该事务的出边                      |                     |
+| version_list | key：数据编号；value：数据的值的历史记录              | 下标是 value?          |
+| total_num    | 数据库操作总数                               | 对于数据库全表扫描类的操作有关键作用。 |
+
+### 函数列表
+
+| 函数                          | 功能                                                                                                                                                                                               | 备注                              |
+| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------- |
+| get_total                   | 获取一个测试文件中 Q0 部分插入的数据个数                                                                                                                                                                           | key 的个数                         |
+| find_data                   | 特定位置提取一个数字                                                                                                                                                                                       |                                 |
+| set_finish_time             | 在一个数据库操作语句执行完毕后更新相关的时间戳：finishedat<br>1. 所有事务中 begin_ts ｜ end_ts 和 op_time 相等的进行替换<br>2. 所有操作中 op_time 和 op_time 相等的进行替换<br>3. version_list 保存操作历史值 op.value<br>4. op.value 保存 version_list 历史下标 |                                 |
+| check_concurrency           | 检查两个事务和是否并发                                                                                                                                                                                      | 通过开始时间和结束时间来判断，默认开始时间之间已经比较过了吗？ |
+| get_edge_type               | 确定两个操作之间的边的类型，添加C 标志，跨事务操作，同时返回新的操作顺序                                                                                                                                                            | data1 和 data2 是同一个 Key 的前后两个操作  |
+| build_graph                 | 建立一个有向图，这个图表示不同操作之间的并发关系                                                                                                                                                                         | 只会在同一组操作之间建立边                   |
+| insert_edge                 | 具体的插边操作（check_concurrency 的前提下插入 get_edge_type 边）：<br>不同事务之间有并发读写冲突；                                                                                                                             | data1 发生时间默认在data2 之前           |
+| init_record                 | 根据查询中的信息初始化版本列表中的记录。                                                                                                                                                                             |                                 |
+| readVersion_record          | 处理数据库查询操作                                                                                                                                                                                        | 只是更改了： op.value                 |
+| read_record                 | 根据查询中的信息读取记录并更新数据操作。增加到 data_op_list 中。                                                                                                                                                          |                                 |
+| write_record                | 根据查询和更新数据操作中的信息写入记录。增加到 data_op_list 中。                                                                                                                                                          |                                 |
+| delete_record               | 根据查询中的信息删除记录并更新数据操作。增加到 data_op_list 中。                                                                                                                                                          |                                 |
+| insert_record               | 根据查询中的信息插入记录并更新数据操作。                                                                                                                                                                             |                                 |
+| end_record                  | 设置事务的结束时间戳。                                                                                                                                                                                      |                                 |
+| operation_record            | 记录并且处理数据库操作。                                                                                                                                                                                     |                                 |
+| remove_unfinished_operation | 删除失败的语句以防止构建冗余边                                                                                                                                                                                  |                                 |
+| check_cycle                 | 在有向图中查找环                                                                                                                                                                                         |                                 |
+| dfs                         | 在有环的有向图中找环                                                                                                                                                                                       |                                 |
diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index b90e5d08..55e1f1c1 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -39,12 +39,22 @@ def __init__(self):
         self.isolation = "serializable"
 
 # print edge after build graph
-def print_graph(edge):
+def print_graph(edge,txn):
     for i, edges in enumerate(edge):
-        print("Transaction {}:".format(i))
+        if i == 0 or i == len(edge)-1:
+            continue
+        print("Transaction {}:-----{}-----".format(i,txn[i].isolation))
         for e in edges:
             print("  {}".format(e))
 
+
+# print data_op_list
+def print_data_op_list(data_op_list):
+    for k,list in enumerate(data_op_list):
+        print("\nk:{}---".format(k))
+        for i, data in enumerate(list):
+            print("op:{}--{}-".format(data.op_type,data.txn_num))
+
 # find total variable number
 def get_total(lines):
     num = 0
@@ -99,20 +109,21 @@ def find_isolation(query):
 
 # when a statement is executed, set the end time and modify the version list
 def set_finish_time(op_time, data_op_list, query, txn, version_list):
-    pos = query.find("finished at:")
-    pos += len("finished at:")
-    data_value = ""
-    tmp, tmp1 = "", ""
-    for i in range(pos, len(query)):
-        if query[i].isdigit():
-            tmp += query[i]
-        else:
-            for j in range(3 - len(tmp)):
-                tmp1 += "0"
-            tmp = tmp1 + tmp
-            data_value += tmp
-            tmp, tmp1 = "", ""
-    data_value = int(data_value)
+    # pos = query.find("finished at:")
+    # pos += len("finished at:")
+    # data_value = ""
+    # tmp, tmp1 = "", ""
+    # for i in range(pos, len(query)):
+    #     if query[i].isdigit():
+    #         tmp += query[i]
+    #     else:
+    #         for j in range(3 - len(tmp)):
+    #             tmp1 += "0"
+    #         tmp = tmp1 + tmp
+    #         data_value += tmp
+    #         tmp, tmp1 = "", ""
+    # data_value = int(data_value)
+    data_value = int(op_time)
     for t in txn:
         if t.begin_ts == op_time:
             t.begin_ts = data_value
@@ -177,48 +188,67 @@ def build_graph(data_op_list, indegree, edge, txn):
                 insert_edge(list1[j], data, indegree, edge, txn)
 
 
+
 def insert_edge(data1, data2, indegree, edge, txn):
     if check_concurrency(data1, data2, txn):
         edge_type, data1, data2 = get_edge_type(data1, data2, txn)
-        # if edge_type != "RR" and edge_type != "RCR" and data1.txn_num != data2.txn_num:
-        #     indegree[data2.txn_num] += 1
-        #     edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-        if edge_type == "WW" or edge_type == "WCW":
+        if data1.txn_num == data2.txn_num or edge_type in ["RCR", "RR"]:
+            return 
+        #* read-uncommitted： Dirty Write
+        # WI 不存在，如果有，那么一定会有 WD + DI 的等效边
+        # II 不存在，如果有，那么一定会有 ID + DI 的等效边
+        # DW 允许存在， UPDATE 时使用条件查询包含 D 的数据
+        # DD 不存在，如果有，那么一定会有 DI + ID 的等效边
+        if edge_type in ["WCW", "WW", "WCD", "WD", "ICW","IW", "ICD", "ID", "DCW", "DW", "DCI", "DI"]:   
+            indegree[data2.txn_num] += 1
+            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+        #* read-committed： Dirty Read
+        elif edge_type in ["WCR","WR"] and (txn[data2.txn_num].isolation == "read-committed" or txn[data2.txn_num].isolation == "repeatable-read" or txn[data2.txn_num].isolation == "serializable"):   
+            indegree[data2.txn_num] += 1
+            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+        #* repeatable-read： Unrepeatable Read
+        elif edge_type in ["RCW","RW"] and (txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable"):   
             indegree[data2.txn_num] += 1
             edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-        elif txn[data1.txn_num].isolation == "read-uncommitted":
-            if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
-                if edge_type[-1] == 'R': #  not R -- R 
-                    if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'): # 可能脏读
-                        indegree[data2.txn_num] += 1 
-                        edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-                    if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W':
-                        indegree[data2.txn_num] += 1
-                        edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))         
-                    if txn[data2.txn_num].isolation == "serializable":
-                        indegree[data2.txn_num] += 1
-                        edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))                 
-                elif edge_type[-1] != 'R': #  not R -- not R 
-                    indegree[data2.txn_num] += 1
-                    edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-        elif txn[data1.txn_num].isolation == "read-committed" or txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable":
-            if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
-                if edge_type[-1] == 'R': #  not R -- R 
-                    if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读
-                        indegree[data2.txn_num] += 1 
-                        edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-                    if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W':
-                        indegree[data2.txn_num] += 1
-                        edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))         
-                    if txn[data2.txn_num].isolation == "serializable":
-                        indegree[data2.txn_num] += 1
-                        edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))                 
-                elif edge_type[-1] != 'R': #  not R -- not R 
-                    indegree[data2.txn_num] += 1
-                    edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-            elif edge_type[0] == 'R' and edge_type[-1] != 'R':
-                indegree[data2.txn_num] += 1
-                edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+        #* serializable： Phantom Read
+        elif edge_type in ["ICR","IR","DCR","DR","RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable":   
+            indegree[data2.txn_num] += 1
+            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))       
+
+        # 入边
+        # elif txn[data1.txn_num].isolation == "read-uncommitted":
+        #     if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
+        #         if edge_type[-1] == 'R': #  not R -- R 
+        #             if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'): # 可能脏读
+        #                 indegree[data2.txn_num] += 1 
+        #                 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+        #             if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W':
+        #                 indegree[data2.txn_num] += 1
+        #                 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))         
+        #             if txn[data2.txn_num].isolation == "serializable":
+        #                 indegree[data2.txn_num] += 1
+        #                 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))                 
+        #         elif edge_type[-1] != 'R': #  not R -- not R 
+        #             indegree[data2.txn_num] += 1
+        #             edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+        # elif txn[data1.txn_num].isolation == "read-committed" or txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable":
+        #     if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
+        #         if edge_type[-1] == 'R': #  not R -- R 
+        #             if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读
+        #                 indegree[data2.txn_num] += 1 
+        #                 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+        #             if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W':
+        #                 indegree[data2.txn_num] += 1
+        #                 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))         
+        #             if txn[data2.txn_num].isolation == "serializable":
+        #                 indegree[data2.txn_num] += 1
+        #                 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))                 
+        #         elif edge_type[-1] != 'R': #  not R -- not R 
+        #             indegree[data2.txn_num] += 1
+        #             edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+        #     elif edge_type[0] == 'R' and edge_type[-1] != 'R':
+        #         indegree[data2.txn_num] += 1
+        #         edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
 
 
 def init_record(query, version_list):
@@ -297,7 +327,7 @@ def read_record(op_time, txn_num, total_num, txn, data_op_list):
             data_op_list[i].append(Operation("R", txn_num, op_time, i)) # p
     else:
         # it means select all rows in table
-        for i in range(total_num):
+        for i in range(total_num+1):
             data_op_list[i].append(Operation("R", txn_num, op_time, i))
 
 
@@ -312,7 +342,21 @@ def write_record(op_time, txn_num, txn, data_op_list):
         op_data = find_data(query, "k=")
         op_value = find_data(query, "v=")
         data_op_list[op_data].append(Operation("W", txn_num, op_time, op_value))
-
+    # for predicate cases
+    elif query.find("k>") != -1:
+        left = find_data(query, "k>") + 1
+        right = find_data(query, "k<")
+        for i in range(left, right):
+            data_op_list[i].append(Operation("W", txn_num, op_time, i)) # P
+    elif query.find("value1>") != -1:
+        left = find_data(query, "value1>") + 1
+        right = find_data(query, "value1<")
+        for i in range(left, right):
+            data_op_list[i].append(Operation("W", txn_num, op_time, i)) # p
+    else:
+        # it means select all rows in table
+        for i in range(total_num+1):
+            data_op_list[i].append(Operation("W", txn_num, op_time, i))
 
 def delete_record(op_time, txn_num, txn, data_op_list):
     if txn[txn_num].begin_ts == -1:
@@ -323,7 +367,21 @@ def delete_record(op_time, txn_num, txn, data_op_list):
     elif query.find("k=") != -1:
         op_data = find_data(query, "k=")
         data_op_list[op_data].append(Operation("D", txn_num, op_time, op_data))
-
+    # for predicate cases
+    elif query.find("k>") != -1:
+        left = find_data(query, "k>") + 1
+        right = find_data(query, "k<")
+        for i in range(left, right):
+            data_op_list[i].append(Operation("D", txn_num, op_time, i)) # P
+    elif query.find("value1>") != -1:
+        left = find_data(query, "value1>") + 1
+        right = find_data(query, "value1<")
+        for i in range(left, right):
+            data_op_list[i].append(Operation("D", txn_num, op_time, i)) # p
+    else:
+        # it means select all rows in table
+        for i in range(total_num+1):
+            data_op_list[i].append(Operation("D", txn_num, op_time, i))
 
 def insert_record(op_time, txn_num, txn, data_op_list):
     if txn[txn_num].begin_ts == -1 and op_time != 0:
@@ -337,6 +395,7 @@ def end_record(op_time, txn_num, txn):
     txn[txn_num].end_ts = op_time
 
 
+
 def operation_record(total_num, query, txn, data_op_list, version_list):
     error_message = ""
     op_time = find_data(query, "Q")
@@ -345,35 +404,28 @@ def operation_record(total_num, query, txn, data_op_list, version_list):
     if op_time == 0 and query.find("INSERT") != -1:
         init_record(query, version_list)
         return error_message
-    if query.find("returnresult") != -1:
+    if query.find("returnresult") != -1: #! 1"returnresult"  maybe don't exist
         error_message = readVersion_record(query, op_time, data_op_list, version_list)
         return error_message
-    if query.find("finished") != -1:
+    if query.find("finished") != -1: #! "finished"  maybe don't exist
         set_finish_time(op_time, data_op_list, query, txn, version_list)
         return error_message
-    if op_time == -1 and txn_num != -1 and query.find("set_isolation") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.:
-        # query such as "T2 set_isolation=serializable "
-        txn[txn_num].isolation = find_isolation(query)
-        print(str(txn_num)+"------------------"+txn[txn_num].isolation)
-        return error_message
     if op_time == -1 or txn_num == -1:
         return error_message
-    if query.find("SELECT") != -1:
+    if query.find("BEGIN") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.:
+        txn[txn_num].isolation = find_isolation(query)
+    elif query.find("SELECT") != -1:
         read_record(op_time, txn_num, total_num, txn, data_op_list)
-        return error_message
     elif query.find("UPDATE") != -1:
         write_record(op_time, txn_num, txn, data_op_list)
-        return error_message
-    elif query.find("DELETE") != -1:
+    elif query.find("DELETE") != -1:    
         delete_record(op_time, txn_num, txn, data_op_list)
-        return error_message
-    elif query.find("INSERT") != -1:
+    elif query.find("INSERT") != -1:    #! assume existing data will not be inserted ("Rollback")
         insert_record(op_time, txn_num, txn, data_op_list)
-        return error_message
     elif query.find("COMMIT") != -1:
         if op_time != 0:
             end_record(op_time, txn_num, txn)
-        return error_message
+    set_finish_time(op_time, data_op_list, query, txn, version_list)
     return error_message
     
 
@@ -453,9 +505,21 @@ def print_error(result_folder, ts_now, error_message):
         f.write("\n\n")
 
 
-run_result_folder = "pg/repeatable-read"
+
+
+
+#! ------Some assumption------
+# 在任何隔离级别事务的修改互相可见,即等价于单一存储，无读写缓冲
+# 在输入文件中有设置各个事务隔离级别的语句，在 "BEGIN 之后"
+    # BEGIN T1 set_isolation=repeatable-read 
+    # BEGIN T2 set_isolation=serializable 
+    # BEGIN T3 set_isolation=read-uncommitted 
+    # BEGIN T4 set_isolation=read-committed 
+# 假定插入的数据 key 是从 0 向上递增的顺序
+
+run_result_folder = "pg/mda_detect_test"
 result_folder = "check_result/" + run_result_folder
-do_test_list = "do_test_list.txt"
+do_test_list = "mda_detect_test_list.txt"
 #ts_now = "_2param_3txn_insert"
 ts_now = time.strftime("%Y%m%d_%H%M%S", time.localtime())
 if not os.path.exists(result_folder):
@@ -503,16 +567,19 @@ def print_error(result_folder, ts_now, error_message):
         continue
     
     cycle = False
-    remove_unfinished_operation(data_op_list)
+    # remove_unfinished_operation(data_op_list) 动态测试中默认所有的执行时间 Qi 都没有 finish 字段
     build_graph(data_op_list, indegree, edge, txn)
-    print_graph(edge)
+    print("--------file:{}--------".format(file))
+    print_graph(edge,txn)
+    # print_data_op_list(data_op_list)
     if not go_end:
-        cycle = check_cycle(edge, indegree, total_num + 2)
+        cycle = check_cycle(edge, indegree, total_num_txn+2)
     if cycle:
         output_result(file, result_folder, ts_now, "Cyclic")
-        for i in range(total_num + 2):
+        for i in range(total_num_txn + 2):
             if visit1[i] == 0:
                 dfs(result_folder, ts_now, i, "null")
     else:
         output_result(file, result_folder, ts_now, "Avoid")
         print_path(result_folder, ts_now, edge)
+    print("---------------------------------\n")
\ No newline at end of file

From f4909ca6c6a4add3778731851d7be63bc65ef0b2 Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Fri, 12 Jul 2024 03:04:13 +0000
Subject: [PATCH 07/16] test case

---
 src/dbtest/mda_detect_test_list.txt           | 10 +++++++
 src/dbtest/pg/mda_detect_test/aa.txt          | 20 +++++++++++++
 src/dbtest/pg/mda_detect_test/rc_rc_avoid.txt | 27 ++++++++++++++++++
 .../pg/mda_detect_test/rc_rc_cyclic.txt       | 28 +++++++++++++++++++
 .../pg/mda_detect_test/rr_rr_cyclic.txt       | 27 ++++++++++++++++++
 .../mda_detect_test/rr_rr_cyclic_commit.txt   | 25 +++++++++++++++++
 .../pg/mda_detect_test/rr_s_s_avoid.txt       | 25 +++++++++++++++++
 src/dbtest/pg/mda_detect_test/ru_ru_avoid.txt | 28 +++++++++++++++++++
 src/dbtest/pg/mda_detect_test/s_s_avoid.txt   | 25 +++++++++++++++++
 src/dbtest/pg/mda_detect_test/s_s_cyclic.txt  | 28 +++++++++++++++++++
 .../pg/mda_detect_test/s_s_cyclic_pr.txt      | 20 +++++++++++++
 11 files changed, 263 insertions(+)
 create mode 100644 src/dbtest/mda_detect_test_list.txt
 create mode 100755 src/dbtest/pg/mda_detect_test/aa.txt
 create mode 100755 src/dbtest/pg/mda_detect_test/rc_rc_avoid.txt
 create mode 100755 src/dbtest/pg/mda_detect_test/rc_rc_cyclic.txt
 create mode 100755 src/dbtest/pg/mda_detect_test/rr_rr_cyclic.txt
 create mode 100755 src/dbtest/pg/mda_detect_test/rr_rr_cyclic_commit.txt
 create mode 100755 src/dbtest/pg/mda_detect_test/rr_s_s_avoid.txt
 create mode 100755 src/dbtest/pg/mda_detect_test/ru_ru_avoid.txt
 create mode 100755 src/dbtest/pg/mda_detect_test/s_s_avoid.txt
 create mode 100755 src/dbtest/pg/mda_detect_test/s_s_cyclic.txt
 create mode 100755 src/dbtest/pg/mda_detect_test/s_s_cyclic_pr.txt

diff --git a/src/dbtest/mda_detect_test_list.txt b/src/dbtest/mda_detect_test_list.txt
new file mode 100644
index 00000000..82f9304a
--- /dev/null
+++ b/src/dbtest/mda_detect_test_list.txt
@@ -0,0 +1,10 @@
+
+ru_ru_avoid
+rc_rc_cyclic
+rc_rc_avoid
+rr_rr_cyclic
+rr_rr_cyclic_commit
+s_s_cyclic
+s_s_avoid
+rr_s_s_avoid
+
diff --git a/src/dbtest/pg/mda_detect_test/aa.txt b/src/dbtest/pg/mda_detect_test/aa.txt
new file mode 100755
index 00000000..2b19e7c5
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/aa.txt
@@ -0,0 +1,20 @@
+
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 1);
+Q0-T1-INSERT INTO t1 VALUES (2, 2);
+Q0-T1-COMMIT;
+
+Q1-T1-BEGIN set_isolation=repeatable-read;
+Q2-T1-SELECT * FROM t1;
+
+      Q3-T2-BEGIN set_isolation=serializable;
+      Q4-T2-INSERT INTO t1 VALUES (3, 3);
+      Q5-T2-COMMIT;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1; 
+Q7-T1-SELECT * FROM t1; 
+Q8-T1-COMMIT;
diff --git a/src/dbtest/pg/mda_detect_test/rc_rc_avoid.txt b/src/dbtest/pg/mda_detect_test/rc_rc_avoid.txt
new file mode 100755
index 00000000..b0f99e62
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/rc_rc_avoid.txt
@@ -0,0 +1,27 @@
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 0);
+Q0-T1-COMMIT;
+
+
+Q1-T1-BEGIN set_isolation=read-committed;
+Q2-T1-SELECT * FROM t1 WHERE k=1;
+
+      Q3-T2-BEGIN set_isolation=read-committed;
+      Q4-T2-UPDATE t1 SET v=1 WHERE k=1;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1; 
+
+      Q7-T2-COMMIT;
+
+Q8-T1-COMMIT;
+
+
+
+
+
+Test Result:  
+
diff --git a/src/dbtest/pg/mda_detect_test/rc_rc_cyclic.txt b/src/dbtest/pg/mda_detect_test/rc_rc_cyclic.txt
new file mode 100755
index 00000000..3a6c5f09
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/rc_rc_cyclic.txt
@@ -0,0 +1,28 @@
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 0);
+Q0-T1-COMMIT;
+
+
+Q1-T1-BEGIN set_isolation=read-committed;
+Q2-T1-UPDATE t1 SET v=1 WHERE k=0;
+
+      Q3-T2-BEGIN set_isolation=read-committed;
+      Q4-T2-UPDATE t1 SET v=1 WHERE k=1;
+      Q5-T2-SELECT * FROM t1 WHERE k=0;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1; 
+
+      Q7-T2-COMMIT;
+
+Q8-T1-COMMIT;
+
+
+
+
+
+Test Result:  
+
diff --git a/src/dbtest/pg/mda_detect_test/rr_rr_cyclic.txt b/src/dbtest/pg/mda_detect_test/rr_rr_cyclic.txt
new file mode 100755
index 00000000..8cf92abd
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/rr_rr_cyclic.txt
@@ -0,0 +1,27 @@
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 0);
+Q0-T1-COMMIT;
+
+
+Q1-T1-BEGIN set_isolation=repeatable-read;
+Q2-T1-SELECT * FROM t1 WHERE k=1;
+
+      Q3-T2-BEGIN set_isolation=repeatable-read;
+      Q4-T2-UPDATE t1 SET v=1 WHERE k=1;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1; 
+
+      Q7-T2-COMMIT;
+
+Q8-T1-COMMIT;
+
+
+
+
+
+Test Result:  
+
diff --git a/src/dbtest/pg/mda_detect_test/rr_rr_cyclic_commit.txt b/src/dbtest/pg/mda_detect_test/rr_rr_cyclic_commit.txt
new file mode 100755
index 00000000..aded104d
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/rr_rr_cyclic_commit.txt
@@ -0,0 +1,25 @@
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 0);
+Q0-T1-COMMIT;
+
+
+Q1-T1-BEGIN set_isolation=repeatable-read;
+Q2-T1-SELECT * FROM t1 WHERE k=1;
+
+      Q3-T2-BEGIN set_isolation=repeatable-read;
+      Q4-T2-UPDATE t1 SET v=1 WHERE k=1;
+      Q5-T2-COMMIT;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1; 
+Q7-T1-COMMIT;
+
+
+
+
+
+Test Result:  
+
diff --git a/src/dbtest/pg/mda_detect_test/rr_s_s_avoid.txt b/src/dbtest/pg/mda_detect_test/rr_s_s_avoid.txt
new file mode 100755
index 00000000..7ba19745
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/rr_s_s_avoid.txt
@@ -0,0 +1,25 @@
+
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 1);
+Q0-T1-INSERT INTO t1 VALUES (2, 2);
+Q0-T1-COMMIT;
+
+Q1-T1-BEGIN set_isolation=repeatable-read;
+Q2-T1-SELECT * FROM t1 WHERE k=1;
+
+      Q3-T2-BEGIN set_isolation=serializable;
+      Q4-T2-INSERT INTO t1 VALUES (3, 3);
+      Q5-T2-COMMIT;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1; 
+Q7-T1-SELECT * FROM t1; 
+Q8-T1-COMMIT;
+
+                  Q9-T3-BEGIN set_isolation=serializable ;
+                  Q10-T3-SELECT * FROM t1 WHERE k=3;
+                  Q11-T3-COMMIT;
+
diff --git a/src/dbtest/pg/mda_detect_test/ru_ru_avoid.txt b/src/dbtest/pg/mda_detect_test/ru_ru_avoid.txt
new file mode 100755
index 00000000..09a35143
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/ru_ru_avoid.txt
@@ -0,0 +1,28 @@
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 0);
+Q0-T1-COMMIT;
+
+
+Q1-T1-BEGIN set_isolation=read-uncommitted;
+Q2-T1-UPDATE t1 SET v=1 WHERE k=0;
+
+      Q3-T2-BEGIN set_isolation=read-uncommitted;
+      Q4-T2-UPDATE t1 SET v=1 WHERE k=1;
+      Q5-T2-SELECT * FROM t1 WHERE k=0;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1; 
+
+      Q7-T2-COMMIT;
+
+Q8-T1-COMMIT;
+
+
+
+
+
+Test Result:  
+
diff --git a/src/dbtest/pg/mda_detect_test/s_s_avoid.txt b/src/dbtest/pg/mda_detect_test/s_s_avoid.txt
new file mode 100755
index 00000000..dc18cce5
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/s_s_avoid.txt
@@ -0,0 +1,25 @@
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 0);
+Q0-T1-COMMIT;
+
+
+Q1-T1-BEGIN set_isolation=serializable ;
+Q2-T1-UPDATE t1 SET v=1 WHERE k=0;
+Q3-T1-SELECT * FROM t1 WHERE k=1; 
+      Q4-T2-BEGIN set_isolation=serializable;
+      Q5-T2-UPDATE t1 SET v=1 WHERE k=1;
+      Q6-T2-SELECT * FROM t1 WHERE k=0;
+      Q7-T2-COMMIT;
+
+Q8-T1-COMMIT;
+
+
+
+
+
+Test Result:  
+
diff --git a/src/dbtest/pg/mda_detect_test/s_s_cyclic.txt b/src/dbtest/pg/mda_detect_test/s_s_cyclic.txt
new file mode 100755
index 00000000..a2f24b06
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/s_s_cyclic.txt
@@ -0,0 +1,28 @@
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 0);
+Q0-T1-COMMIT;
+
+
+Q1-T1-BEGIN set_isolation=serializable ;
+Q2-T1-UPDATE t1 SET v=1 WHERE k=0;
+
+      Q3-T2-BEGIN set_isolation=serializable;
+      Q4-T2-UPDATE t1 SET v=1 WHERE k=1;
+      Q5-T2-SELECT * FROM t1 WHERE k=0;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1; 
+
+      Q7-T2-COMMIT;
+
+Q8-T1-COMMIT;
+
+
+
+
+
+Test Result:  
+
diff --git a/src/dbtest/pg/mda_detect_test/s_s_cyclic_pr.txt b/src/dbtest/pg/mda_detect_test/s_s_cyclic_pr.txt
new file mode 100755
index 00000000..dc47f13f
--- /dev/null
+++ b/src/dbtest/pg/mda_detect_test/s_s_cyclic_pr.txt
@@ -0,0 +1,20 @@
+
+
+
+Q0-T1-DROP TABLE IF EXISTS t1;
+Q0-T1-CREATE TABLE t1 (k INT PRIMARY KEY, v INT);
+Q0-T1-INSERT INTO t1 VALUES (0, 0);
+Q0-T1-INSERT INTO t1 VALUES (1, 1);
+Q0-T1-INSERT INTO t1 VALUES (2, 2);
+Q0-T1-COMMIT;
+
+Q1-T1-BEGIN set_isolation=serializable;
+Q2-T1-SELECT * FROM t1;
+
+      Q3-T2-BEGIN set_isolation=serializable;
+      Q4-T2-INSERT INTO t1 VALUES (3, 3);
+      Q5-T2-COMMIT;
+
+Q6-T1-SELECT * FROM t1 WHERE k=1; 
+Q7-T1-SELECT * FROM t1; 
+Q8-T1-COMMIT;

From 8484c7b20a819a03615eb6a08d256ba4d428a456 Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Mon, 15 Jul 2024 10:44:50 +0000
Subject: [PATCH 08/16] bug

---
 src/dbtest/src/mda_detect.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 55e1f1c1..5e885bb2 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -211,10 +211,13 @@ def insert_edge(data1, data2, indegree, edge, txn):
             indegree[data2.txn_num] += 1
             edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
         #* serializable： Phantom Read
-        elif edge_type in ["ICR","IR","DCR","DR","RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable":   
+        elif edge_type in ["RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable":   
             indegree[data2.txn_num] += 1
             edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))       
-
+        #* serializable： Phantom Read
+        elif edge_type in ["ICR","IR","DCR","DR"] and txn[data2.txn_num].isolation == "serializable":   
+            indegree[data2.txn_num] += 1
+            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) 
         # 入边
         # elif txn[data1.txn_num].isolation == "read-uncommitted":
         #     if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):

From 690703b23fab59ae232527ae3e26538c4dab5338 Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Mon, 15 Jul 2024 11:01:04 +0000
Subject: [PATCH 09/16] comment translate

---
 src/dbtest/src/mda_detect.py | 57 +++++++-----------------------------
 1 file changed, 11 insertions(+), 46 deletions(-)

diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 5e885bb2..79df82ca 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -195,10 +195,10 @@ def insert_edge(data1, data2, indegree, edge, txn):
         if data1.txn_num == data2.txn_num or edge_type in ["RCR", "RR"]:
             return 
         #* read-uncommitted： Dirty Write
-        # WI 不存在，如果有，那么一定会有 WD + DI 的等效边
-        # II 不存在，如果有，那么一定会有 ID + DI 的等效边
-        # DW 允许存在， UPDATE 时使用条件查询包含 D 的数据
-        # DD 不存在，如果有，那么一定会有 DI + ID 的等效边
+        # WI does not exist. If it does, there must be an equivalent edge of WD + DI
+        # II does not exist. If it does, there must be an equivalent edge of ID + DI
+        # DW is allowed to exist. When UPDATE, use the condition to query the data containing D
+        # DD does not exist. If it does, there must be an equivalent edge of DI + ID
         if edge_type in ["WCW", "WW", "WCD", "WD", "ICW","IW", "ICD", "ID", "DCW", "DW", "DCI", "DI"]:   
             indegree[data2.txn_num] += 1
             edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
@@ -218,41 +218,6 @@ def insert_edge(data1, data2, indegree, edge, txn):
         elif edge_type in ["ICR","IR","DCR","DR"] and txn[data2.txn_num].isolation == "serializable":   
             indegree[data2.txn_num] += 1
             edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) 
-        # 入边
-        # elif txn[data1.txn_num].isolation == "read-uncommitted":
-        #     if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
-        #         if edge_type[-1] == 'R': #  not R -- R 
-        #             if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'): # 可能脏读
-        #                 indegree[data2.txn_num] += 1 
-        #                 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-        #             if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W':
-        #                 indegree[data2.txn_num] += 1
-        #                 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))         
-        #             if txn[data2.txn_num].isolation == "serializable":
-        #                 indegree[data2.txn_num] += 1
-        #                 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))                 
-        #         elif edge_type[-1] != 'R': #  not R -- not R 
-        #             indegree[data2.txn_num] += 1
-        #             edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-        # elif txn[data1.txn_num].isolation == "read-committed" or txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable":
-        #     if edge_type[0] != 'R' and not(txn[data2.txn_num].isolation == "read-uncommitted" and edge_type[-1] == 'R'): #and (edge_type[-1] != 'R' or not check_edge_exit(edge,data2.txn_num,data1.txn_num)):
-        #         if edge_type[-1] == 'R': #  not R -- R 
-        #             if txn[data2.txn_num].isolation == "read-committed" and edge_type[0]== 'W' and not check_edge_exit(edge,data2.txn_num,'R',data1.txn_num,'W'):# 可能脏读
-        #                 indegree[data2.txn_num] += 1 
-        #                 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-        #             if txn[data2.txn_num].isolation == "repeatable-read" and edge_type[0]== 'W':
-        #                 indegree[data2.txn_num] += 1
-        #                 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))         
-        #             if txn[data2.txn_num].isolation == "serializable":
-        #                 indegree[data2.txn_num] += 1
-        #                 edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))                 
-        #         elif edge_type[-1] != 'R': #  not R -- not R 
-        #             indegree[data2.txn_num] += 1
-        #             edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-        #     elif edge_type[0] == 'R' and edge_type[-1] != 'R':
-        #         indegree[data2.txn_num] += 1
-        #         edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
-
 
 def init_record(query, version_list):
     key = find_data(query, "(")
@@ -512,13 +477,13 @@ def print_error(result_folder, ts_now, error_message):
 
 
 #! ------Some assumption------
-# 在任何隔离级别事务的修改互相可见,即等价于单一存储，无读写缓冲
-# 在输入文件中有设置各个事务隔离级别的语句，在 "BEGIN 之后"
-    # BEGIN T1 set_isolation=repeatable-read 
-    # BEGIN T2 set_isolation=serializable 
-    # BEGIN T3 set_isolation=read-uncommitted 
-    # BEGIN T4 set_isolation=read-committed 
-# 假定插入的数据 key 是从 0 向上递增的顺序
+# The modifications of transactions at any isolation level are mutually visible, which is equivalent to a single storage, without read-write buffer
+# There are statements to set the isolation level of each transaction in the input file, after "BEGIN"
+    # BEGIN T1 set_isolation=repeatable-read
+    # BEGIN T2 set_isolation=serializable
+    # BEGIN T3 set_isolation=read-uncommitted
+    # BEGIN T4 set_isolation=read-committed
+# Assume that the inserted data key is in ascending order from 0
 
 run_result_folder = "pg/mda_detect_test"
 result_folder = "check_result/" + run_result_folder

From 3286a99f2cc37642977364be0401321f90306040 Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Mon, 15 Jul 2024 14:38:45 +0000
Subject: [PATCH 10/16] loop detect optimize

---
 src/dbtest/src/mda_detect.py | 102 +++++++++++++++++++++++------------
 1 file changed, 67 insertions(+), 35 deletions(-)

diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 79df82ca..40d10c9f 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -18,11 +18,12 @@
 
 
 class Edge:
-    def __init__(self, type, out):
+    def __init__(self, type, out, begin_time):
         self.type = type
         self.out = out
+        self.time = begin_time
     def __repr__(self):
-        return "Edge(type={}, out={})".format(self.type, self.out)
+        return "Edge(begin_time={}, type={}, out={})".format(self.time, self.type, self.out)
 
 class Operation:
     def __init__(self, op_type, txn_num, op_time, value):
@@ -51,9 +52,10 @@ def print_graph(edge,txn):
 # print data_op_list
 def print_data_op_list(data_op_list):
     for k,list in enumerate(data_op_list):
-        print("\nk:{}---".format(k))
-        for i, data in enumerate(list):
-            print("op:{}--{}-".format(data.op_type,data.txn_num))
+        if k< len(data_op_list)-1:
+            print("\nk:{}---".format(k))
+            for i, data in enumerate(list):
+                    print("op:{}--{}-".format(data.op_type,data.txn_num))
 
 # find total variable number
 def get_total(lines):
@@ -164,17 +166,16 @@ def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type):
 # decide which operation comes first depending on the read or write version
 # if later operation happened after the first txn commit time, edge type will add "C"
 def get_edge_type(data1, data2, txn):
-    # if data1.value <= data2.value:          
-    #     before, after = data1, data2
-    # else:
-    #     before, after = data2, data1
-    before, after = data1, data2
+    if data1.op_time <= data2.op_time:          
+        before, after = data1, data2
+    else:
+        before, after = data2, data1
     # if data1.op_type == "D" or data2.op_type == "D":
     #     if data1.value < data2.value:
     #         before, after = data2, data1
     #     else:
     #         before, after = data1, data2
-    if data2.op_time > txn[data1.txn_num].end_ts: 
+    if after.op_time > txn[before.txn_num].end_ts: 
         state = "C"
     else:
         state = ""
@@ -201,23 +202,23 @@ def insert_edge(data1, data2, indegree, edge, txn):
         # DD does not exist. If it does, there must be an equivalent edge of DI + ID
         if edge_type in ["WCW", "WW", "WCD", "WD", "ICW","IW", "ICD", "ID", "DCW", "DW", "DCI", "DI"]:   
             indegree[data2.txn_num] += 1
-            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
         #* read-committed： Dirty Read
         elif edge_type in ["WCR","WR"] and (txn[data2.txn_num].isolation == "read-committed" or txn[data2.txn_num].isolation == "repeatable-read" or txn[data2.txn_num].isolation == "serializable"):   
             indegree[data2.txn_num] += 1
-            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
         #* repeatable-read： Unrepeatable Read
         elif edge_type in ["RCW","RW"] and (txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable"):   
             indegree[data2.txn_num] += 1
-            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
         #* serializable： Phantom Read
         elif edge_type in ["RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable":   
             indegree[data2.txn_num] += 1
-            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))       
+            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))       
         #* serializable： Phantom Read
         elif edge_type in ["ICR","IR","DCR","DR"] and txn[data2.txn_num].isolation == "serializable":   
             indegree[data2.txn_num] += 1
-            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num)) 
+            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num,data1.op_time)) 
 
 def init_record(query, version_list):
     key = find_data(query, "(")
@@ -425,29 +426,59 @@ def check_cycle(edge, indegree, total):
 
 
 # for loop graphs, print the loop
-def dfs(result_folder, ts_now, now, type):
-    visit1[now] = 1
-    if visit[now] == 1: return
-    visit[now] = 1
-    path.append(now)
-    edge_type.append(type)
-    for v in edge[now]:
+def dfs(result_folder, ts_now , e):
+    visit1[e.out] = 1
+    if visit[e.out] == 1: return
+    visit[e.out] = 1
+    path.append(e)
+    for v in edge[e.out]:
         if visit[v.out] == 0:
-            dfs(result_folder, ts_now, v.out, v.type)
+            dfs(result_folder, ts_now, v)
         else:
-            path.append(v.out)
-            edge_type.append(v.type)
+            path.append(v)
             with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
-                for i in range(0, len(path)):
-                    f.write(str(path[i]))
-                    if i != len(path) - 1: f.write("->" + edge_type[i+1] + "->")
-                f.write("\n\n")
+                content = ""
+                list_loop = []
+                for i in range(len(path) - 1, -1, -1):
+                    if i != len(path) - 1 and path[i].out == path[len(path) - 1].out:
+                        break
+                    index = 0
+                    while(index < len(list_loop) and path[list_loop[index]].time < path[i].time):
+                        index += 1
+                    list_loop.insert(index,i)
+                for idx in list_loop:
+                    content = content + "->" + path[idx].type + "->" + str(path[idx].out)
+                content = str(path[list_loop[-1]].out) + content + "\n\n"
+                f.write(content)
             path.pop()
-            edge_type.pop()
     path.pop()
-    edge_type.pop()
-    visit[now] = 0
-
+    visit[e.out] = 0
+
+
+# # for loop graphs, print the loop
+# # Contains redundant edge information and the starting point of the ring is unreasonable
+# def dfs(result_folder, ts_now, now, type):
+#     visit1[now] = 1
+#     if visit[now] == 1: return
+#     visit[now] = 1
+#     path.append(now)
+#     edge_type.append(type)
+#     for v in edge[now]:
+#         if visit[v.out] == 0:
+#             dfs(result_folder, ts_now, v.out, v.type)
+#         else:
+#             path.append(v.out)
+#             edge_type.append(v.type)
+#             with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
+#                 for i in range(0, len(path)):
+#                     f.write(str(path[i]))
+#                     if i != len(path) - 1: f.write("->" + edge_type[i+1] + "->")
+#                 f.write("\n\n")
+#             path.pop()
+#             edge_type.pop()
+#     path.pop()
+#     edge_type.pop()
+#     visit[now] = 0
 
 def print_path(result_folder, ts_now, edge):
     with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
@@ -546,7 +577,8 @@ def print_error(result_folder, ts_now, error_message):
         output_result(file, result_folder, ts_now, "Cyclic")
         for i in range(total_num_txn + 2):
             if visit1[i] == 0:
-                dfs(result_folder, ts_now, i, "null")
+                # dfs(result_folder, ts_now, i, "null")
+                dfs(result_folder, ts_now, Edge("null",i,-1))
     else:
         output_result(file, result_folder, ts_now, "Avoid")
         print_path(result_folder, ts_now, edge)

From 168ddc8f23f0d7ed4320d41e0fd6e9eb7d342486 Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Mon, 29 Jul 2024 06:52:54 +0000
Subject: [PATCH 11/16] remove doc from commit

---
 src/dbtest/src/doc/mda_detect_modify.md | 51 -------------------------
 src/dbtest/src/doc/mda_detect_read.md   | 36 -----------------
 2 files changed, 87 deletions(-)
 delete mode 100644 src/dbtest/src/doc/mda_detect_modify.md
 delete mode 100644 src/dbtest/src/doc/mda_detect_read.md

diff --git a/src/dbtest/src/doc/mda_detect_modify.md b/src/dbtest/src/doc/mda_detect_modify.md
deleted file mode 100644
index 3c64dbda..00000000
--- a/src/dbtest/src/doc/mda_detect_modify.md
+++ /dev/null
@@ -1,51 +0,0 @@
-[text](mda_detect.py) 修改日志
-# 思考&分析
-1. 用于加边建立图的节点对应一个操作还是一个事务。答：一个事务。
-2. 目标：一个对数据库的操作文件（运行效果文件）中有多个事务，每一个事务有不同的隔离级别，通过执行结果和隔离级别判断是否满足一致性
-3. 原来输出未所有事务是否满足一致性，现在输出为每个事务是否满足一致性？ 单个整体报错 or 多个报错？ 多个，每个错误都识别，兼容单个整体报错（实现较难）
-4. 有两个检测思路：
-    1. 修改建立图的过程中加边策略，保留循环检测流程。（当前实现方式）
-    2. 保留加边策略，修改冲突检测流程。
-# 原来代码中问题
-### 数组访问越界问题
-现象
-```python
-total_num = get_total(lines) # 统计的个数是插入数据的个数，不是事务的个数。
-txn = [Txn() for i in range(total_num + 2)] # 导致构造的 txn 数组较小
-.... # 还有 indegree edge 数组的大小应该是事务的个数。
-``` 
-解决：构造一个新函数获取事务个数。
-```python
-# find total Txn number
-def get_total_txn(lines):
-    num = 0
-    for query in lines:
-        query = query.replace("\n", "")
-        query = query.replace(" ", "")
-        if query[0:1] == "Q" and query.find("T") != -1:
-            tmp = find_data(query, "T")
-            num = max(num, tmp)
-    return num
-total_num_txn = get_total_txn(lines)  # total number of txn
-```
-效果：不同数据使用不同的初始长度
-```python
-# total_num:     data_op_list, version_list
-# total_num_txn: txn, edge, total_num_txn, visit, visit1
-```
-
-
-### 默认字符串少了空格
-```python
-    pos = query.find("finished at:")
-    pos += len("finished at:")
-```
-
-### "R" 类型的操作并没有修改 value 值为下标：
-```python
-    if data1.value <= data2.value:          
-        before, after = data1, data2
-    else:
-        before, after = data2, data1
-```
-
diff --git a/src/dbtest/src/doc/mda_detect_read.md b/src/dbtest/src/doc/mda_detect_read.md
deleted file mode 100644
index ebf0b144..00000000
--- a/src/dbtest/src/doc/mda_detect_read.md
+++ /dev/null
@@ -1,36 +0,0 @@
-
-##  mda_detect.py 代码功能
-主要用于检测数据库事务之间的并发关系，以及是否存在循环依赖。具体来说，代码通过解析输入的SQL语句，构建事务操作的有向图，检测是否存在循环依赖（即事务之间是否存在无法解决的并发冲突），并输出结果。
-### 变量命名含义
-
-| 名称           | 含义                                    | 备注                  |
-| ------------ | ------------------------------------- | ------------------- |
-| data         | 一次操作信息：type、txn_num、op_time、op_data   |                     |
-| data_op_list | 数据操作列表 【数据key：【操作...】， 数据key：【操作...】】 |                     |
-| indegree     | 下标：事务号；元素：依赖该事务的事务个数                  |                     |
-| edge         | 下标：事务号；元素：该事务的出边                      |                     |
-| version_list | key：数据编号；value：数据的值的历史记录              | 下标是 value?          |
-| total_num    | 数据库操作总数                               | 对于数据库全表扫描类的操作有关键作用。 |
-
-### 函数列表
-
-| 函数                          | 功能                                                                                                                                                                                               | 备注                              |
-| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------- |
-| get_total                   | 获取一个测试文件中 Q0 部分插入的数据个数                                                                                                                                                                           | key 的个数                         |
-| find_data                   | 特定位置提取一个数字                                                                                                                                                                                       |                                 |
-| set_finish_time             | 在一个数据库操作语句执行完毕后更新相关的时间戳：finishedat<br>1. 所有事务中 begin_ts ｜ end_ts 和 op_time 相等的进行替换<br>2. 所有操作中 op_time 和 op_time 相等的进行替换<br>3. version_list 保存操作历史值 op.value<br>4. op.value 保存 version_list 历史下标 |                                 |
-| check_concurrency           | 检查两个事务和是否并发                                                                                                                                                                                      | 通过开始时间和结束时间来判断，默认开始时间之间已经比较过了吗？ |
-| get_edge_type               | 确定两个操作之间的边的类型，添加C 标志，跨事务操作，同时返回新的操作顺序                                                                                                                                                            | data1 和 data2 是同一个 Key 的前后两个操作  |
-| build_graph                 | 建立一个有向图，这个图表示不同操作之间的并发关系                                                                                                                                                                         | 只会在同一组操作之间建立边                   |
-| insert_edge                 | 具体的插边操作（check_concurrency 的前提下插入 get_edge_type 边）：<br>不同事务之间有并发读写冲突；                                                                                                                             | data1 发生时间默认在data2 之前           |
-| init_record                 | 根据查询中的信息初始化版本列表中的记录。                                                                                                                                                                             |                                 |
-| readVersion_record          | 处理数据库查询操作                                                                                                                                                                                        | 只是更改了： op.value                 |
-| read_record                 | 根据查询中的信息读取记录并更新数据操作。增加到 data_op_list 中。                                                                                                                                                          |                                 |
-| write_record                | 根据查询和更新数据操作中的信息写入记录。增加到 data_op_list 中。                                                                                                                                                          |                                 |
-| delete_record               | 根据查询中的信息删除记录并更新数据操作。增加到 data_op_list 中。                                                                                                                                                          |                                 |
-| insert_record               | 根据查询中的信息插入记录并更新数据操作。                                                                                                                                                                             |                                 |
-| end_record                  | 设置事务的结束时间戳。                                                                                                                                                                                      |                                 |
-| operation_record            | 记录并且处理数据库操作。                                                                                                                                                                                     |                                 |
-| remove_unfinished_operation | 删除失败的语句以防止构建冗余边                                                                                                                                                                                  |                                 |
-| check_cycle                 | 在有向图中查找环                                                                                                                                                                                         |                                 |
-| dfs                         | 在有环的有向图中找环                                                                                                                                                                                       |                                 |

From 89d3f56a9e1fe5dc6f67bb6d8de16f2f6588ca68 Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Mon, 29 Jul 2024 07:24:04 +0000
Subject: [PATCH 12/16] comments restore, delete and add

---
 src/dbtest/src/mda_detect.py | 377 ++++++++++++++++++++++++++++++++---
 1 file changed, 353 insertions(+), 24 deletions(-)

diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 40d10c9f..9b410f51 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -39,6 +39,17 @@ def __init__(self):
         self.end_ts = 99999999999999999999
         self.isolation = "serializable"
 
+
+"""
+Print the graph edges after building the graph.
+
+Args:
+- edge (list): A list of Edge lists
+- txn (list): A list of Txn objects
+
+Returns:
+None
+"""
 # print edge after build graph
 def print_graph(edge,txn):
     for i, edges in enumerate(edge):
@@ -49,6 +60,15 @@ def print_graph(edge,txn):
             print("  {}".format(e))
 
 
+"""
+Print the contents of the data operation list.
+
+Args:
+- data_op_list (list): A list of Operation lists
+
+Returns:
+None
+"""
 # print data_op_list
 def print_data_op_list(data_op_list):
     for k,list in enumerate(data_op_list):
@@ -57,6 +77,15 @@ def print_data_op_list(data_op_list):
             for i, data in enumerate(list):
                     print("op:{}--{}-".format(data.op_type,data.txn_num))
 
+"""
+Find the total variable number.
+
+Args:
+- lines (list): A list of queries.
+
+Returns:
+int: The maximum variable number found in the queries.
+"""
 # find total variable number
 def get_total(lines):
     num = 0
@@ -81,6 +110,17 @@ def get_total_txn(lines):
             num = max(num, tmp)
     return num
 
+
+"""
+Extract the data we need from a query.
+
+Args:
+- query (str): The input query string.
+- target (str): The target substring to search for.
+
+Returns:
+int: The extracted data value, or -1 if not found.
+"""
 # extract the data we need in query
 def find_data(query, target):
     pos = query.find(target)
@@ -109,6 +149,20 @@ def find_isolation(query):
     if query.find("serializable") != -1:
         return "serializable"
 
+"""
+When a statement is executed, this function sets the end time, modifies the transaction list,
+and updates the version list as needed.
+
+Args:
+- op_time (int): The operation time of the statement.
+- data_op_list (list): A list of data operations.
+- query (str): The query string containing information about the statement execution.
+- txn (list): A list of transaction objects.
+- version_list (list): A list of version lists for data operations.
+
+Returns:
+None
+"""
 # when a statement is executed, set the end time and modify the version list
 def set_finish_time(op_time, data_op_list, query, txn, version_list):
     # pos = query.find("finished at:")
@@ -145,6 +199,18 @@ def set_finish_time(op_time, data_op_list, query, txn, version_list):
                     version_list[i].append(op.value)
                     op.value = len(version_list[i]) - 1
 
+
+"""
+Check if two transactions are concurrent based on their start and end times.
+
+Args:
+- data1: Information about the first transaction.
+- data2: Information about the second transaction.
+- txn: A list of transaction objects.
+
+Returns:
+bool: True if the transactions are concurrent, False otherwise.
+"""
 # if both transactions are running
 # or the start time of the second transaction is less than the end time of the first transaction
 # we think they are concurrent
@@ -157,12 +223,40 @@ def check_concurrency(data1, data2, txn):
         return False
 
 
+"""
+Check if a specific edge exists between two transactions in the graph.
+
+Args:
+- edge (list): A list of lists, where each sublist contains edge objects representing the connections in the graph.
+- src_txn (int): The source transaction number, which the edge originates from.
+- src_type (str): The operation type (e.g., 'R', 'W') at the source of the edge.
+- tar_txn (int): The target transaction number, which the edge points to.
+- tar_type (str): The operation type (e.g., 'R', 'W') at the target of the edge.
+
+Returns:
+bool: True if the specified edge exists, False otherwise.
+"""
 def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type):
     for e in edge[src_txn]:
         if e.out == tar_txn and e.type[0] == src_type and e.type[-1] == tar_type:
             return True
     return False
 
+
+"""
+Determine the type of edge between two operations based on their read or write versions.
+
+Args:
+- data1: Information about the first operation.
+- data2: Information about the second operation.
+- txn: A list of transaction objects.
+
+Returns:
+tuple: A tuple containing three values:
+    - A string indicating the edge type ('R', 'W', 'CR', 'CW').
+    - Information about the operation that comes first.
+    - Information about the operation that comes second.
+"""
 # decide which operation comes first depending on the read or write version
 # if later operation happened after the first txn commit time, edge type will add "C"
 def get_edge_type(data1, data2, txn):
@@ -181,7 +275,22 @@ def get_edge_type(data1, data2, txn):
         state = ""
     return before.op_type + state + after.op_type, before, after
 
+"""
+Build a directed graph representing the concurrency relationships between operations.
+
+Args:
+- data_op_list: A list of lists, where each inner list contains information about operations for a specific transaction.
+- indegree: A list representing the in-degrees of each operation node in the graph.
+- edge: A list representing the edges (concurrency relationships) between operations.
+- txn: A list of transaction objects.
+
+This function constructs a directed graph where nodes represent operations, and edges represent concurrency relationships
+between operations. It iterates through the list of operations for each transaction and calls the 'insert_edge' function 
+to create edges in the graph based on concurrency relationships.
 
+Returns:
+None
+"""
 def build_graph(data_op_list, indegree, edge, txn):
     for list1 in data_op_list:
         for i, data in enumerate(list1):
@@ -189,7 +298,25 @@ def build_graph(data_op_list, indegree, edge, txn):
                 insert_edge(list1[j], data, indegree, edge, txn)
 
 
+"""
+Insert an edge into the directed graph representing concurrency relationships between operations.
+
+Args:
+- data1: An operation object representing the first operation.
+- data2: An operation object representing the second operation.
+- indegree: A list representing the in-degrees of each transaction in the graph.
+- edge: A list representing the edges (concurrency relationships) between operations for each transaction.
+- txn: A list of transaction objects.
+
+This function inserts an edge into the directed graph to represent the concurrency relationship between 'data1' and 'data2'. 
+It first checks if the two operations are concurrent by calling the 'check_concurrency' function. If they are concurrent, it 
+determines the edge type using the 'get_edge_type' function and adds the edge to the 'edge' list.
 
+The 'indegree' list is updated to reflect the in-degree of the target transaction node when an edge is inserted.
+
+Returns:
+None
+"""
 def insert_edge(data1, data2, indegree, edge, txn):
     if check_concurrency(data1, data2, txn):
         edge_type, data1, data2 = get_edge_type(data1, data2, txn)
@@ -220,12 +347,42 @@ def insert_edge(data1, data2, indegree, edge, txn):
             indegree[data2.txn_num] += 1
             edge[data1.txn_num].append(Edge(edge_type, data2.txn_num,data1.op_time)) 
 
+"""
+Initialize a record in the version list based on the information in the query.
+
+Args:
+- query: A query string that contains information about a record.
+- version_list: A list of lists representing versioned records.
+
+This function initializes a record in the 'version_list' based on the information provided in the 'query'. It extracts the 'key'
+and 'value' of the record from the query using the 'find_data' function and appends the 'value' to the corresponding version list.
+
+Returns:
+None
+"""
 def init_record(query, version_list):
     key = find_data(query, "(")
     value = find_data(query, ",")
     version_list[key].append(value)
 
 
+"""
+Read the versioned record based on the information in the query.
+
+Args:
+- query (str): A query string that contains information about reading a versioned record.
+- op_time (int): The operation time of the read operation.
+- data_op_list (list): A list of lists representing data operations.
+- version_list (list): A list of lists representing versioned records.
+
+This function reads the versioned record specified in the 'query'. It extracts the 'key' and 'value' from the query, which are 
+used to identify the record and version to read. The function checks if the specified version exists in the version list and 
+updates the 'op.value' accordingly. If the version doesn't exist or if the read operation is not successful, an error message 
+is returned.
+
+Returns:
+str: An error message indicating the result of the read operation. An empty string means the read was successful.
+"""
 def readVersion_record(query, op_time, data_op_list, version_list):
     error_message = ""
     data = query.split(")")
@@ -272,6 +429,27 @@ def readVersion_record(query, op_time, data_op_list, version_list):
 
 
 
+"""
+Read records based on the information in the query and update data operations.
+
+Args:
+- op_time (int): The operation time of the read operation.
+- txn_num (int): The transaction number.
+- total_num (int): The total number of records.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of lists representing data operations.
+
+This function reads records specified in the query and updates the 'data_op_list' accordingly. It extracts information from 
+the 'query' to determine which records to read and what type of operation to perform (read or predicate). The function also 
+sets the 'begin_ts' of the transaction if it's not already set.
+
+The 'query' is analyzed to identify specific record keys or predicates and create corresponding 'Operation' objects in the 
+'data_op_list'. Depending on the structure of the query, this function handles various cases, such as reading single records,
+handling predicates, and selecting all rows in a table.
+
+Returns:
+None
+"""
 def read_record(op_time, txn_num, total_num, txn, data_op_list):
     if txn[txn_num].begin_ts == -1:
         txn[txn_num].begin_ts = op_time
@@ -300,6 +478,24 @@ def read_record(op_time, txn_num, total_num, txn, data_op_list):
             data_op_list[i].append(Operation("R", txn_num, op_time, i))
 
 
+"""
+Write records based on the information in the query and update data operations.
+
+Args:
+- op_time (int): The operation time of the write operation.
+- txn_num (int): The transaction number.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of lists representing data operations.
+
+This function writes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the 
+'query' to determine which records to write and what type of operation to perform (write). The function also sets the 'begin_ts' 
+of the transaction if it's not already set.
+
+The 'query' is analyzed to identify specific record keys and values, and it creates corresponding 'Operation' objects in the 'data_op_list'.
+
+Returns:
+None
+"""
 def write_record(op_time, txn_num, txn, data_op_list):
     if txn[txn_num].begin_ts == -1:
         txn[txn_num].begin_ts = op_time
@@ -327,6 +523,25 @@ def write_record(op_time, txn_num, txn, data_op_list):
         for i in range(total_num+1):
             data_op_list[i].append(Operation("W", txn_num, op_time, i))
 
+
+"""
+Delete records based on the information in the query and update data operations.
+
+Args:
+- op_time (int): The operation time of the delete operation.
+- txn_num (int): The transaction number.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of lists representing data operations.
+
+This function deletes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the 
+'query' to determine which records to delete and what type of operation to perform (delete). The function also sets the 'begin_ts' 
+of the transaction if it's not already set.
+
+The 'query' is analyzed to identify specific record keys, and it creates corresponding 'Operation' objects in the 'data_op_list'.
+
+Returns:
+None
+"""
 def delete_record(op_time, txn_num, txn, data_op_list):
     if txn[txn_num].begin_ts == -1:
         txn[txn_num].begin_ts = op_time
@@ -352,6 +567,26 @@ def delete_record(op_time, txn_num, txn, data_op_list):
         for i in range(total_num+1):
             data_op_list[i].append(Operation("D", txn_num, op_time, i))
 
+
+"""
+Insert records based on the information in the query and update data operations.
+
+Args:
+- op_time (int): The operation time of the insert operation.
+- txn_num (int): The transaction number.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of lists representing data operations.
+
+This function inserts records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the 
+'query' to determine which records to insert and what type of operation to perform (insert). The function also sets the 'begin_ts' 
+of the transaction if it's not already set.
+
+The 'query' is analyzed to identify specific record keys and their corresponding values, and it creates corresponding 'Operation' 
+objects in the 'data_op_list'.
+
+Returns:
+None
+"""
 def insert_record(op_time, txn_num, txn, data_op_list):
     if txn[txn_num].begin_ts == -1 and op_time != 0:
         txn[txn_num].begin_ts = op_time
@@ -360,11 +595,40 @@ def insert_record(op_time, txn_num, txn, data_op_list):
     data_op_list[key].append(Operation("I", txn_num, op_time, value))
 
 
+"""
+Set the end timestamp for a transaction.
+
+Args:
+- op_time (int): The operation time when the transaction ends.
+- txn_num (int): The transaction number.
+- txn (list): A list of transactions.
+
+This function sets the 'end_ts' attribute of a transaction specified by 'txn_num' to the given 'op_time'. It marks the end of the 
+transaction's execution.
+
+Returns:
+None
+"""
 def end_record(op_time, txn_num, txn):
     txn[txn_num].end_ts = op_time
 
 
+"""
+Record and process database operations.
+
+Args:
+- total_num (int): The total number of database operations.
+- query (str): The SQL query representing a database operation.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of data operations.
+- version_list (list): A list of version information for data operations.
 
+This function records and processes database operations based on the provided SQL query. It updates the transaction list, data 
+operation list, and version list accordingly. The 'total_num' parameter specifies the total number of database operations.
+
+Returns:
+str: An error message (if any), or an empty string if the operation is successful.
+"""
 def operation_record(total_num, query, txn, data_op_list, version_list):
     error_message = ""
     op_time = find_data(query, "Q")
@@ -399,6 +663,18 @@ def operation_record(total_num, query, txn, data_op_list, version_list):
     
 
 
+"""
+Remove unfinished operations from the data operation list.
+
+Args:
+- data_op_list (list): A list of data operations.
+
+This function iterates through the data operation list and removes any unfinished operations based on their operation time. 
+Unfinished operations are those with an operation time less than 10,000,000.
+
+Returns:
+None
+"""
 # remove failed statements to prevent redundant edges from being built
 def remove_unfinished_operation(data_op_list):
     for list1 in data_op_list:
@@ -406,6 +682,20 @@ def remove_unfinished_operation(data_op_list):
             if op.op_time < 10000000:
                 list1.pop(i)
 
+"""
+Check for cycles in a directed graph using topological sorting.
+
+Args:
+- edge (List[List[Edge]]): A list representing the directed edges in the graph.
+- indegree (List[int]): A list representing the in-degrees of nodes in the graph.
+- total (int): The total number of nodes in the graph.
+
+This function checks for cycles in a directed graph by performing topological sorting. It takes as input the directed edges (`edge`), 
+in-degrees of nodes (`indegree`), and the total number of nodes in the graph (`total`).
+
+Returns:
+bool: True if a cycle is detected, False otherwise.
+"""
 # toposort to determine whether there is a cycle
 def check_cycle(edge, indegree, total):
     q = Queue.Queue()
@@ -425,6 +715,25 @@ def check_cycle(edge, indegree, total):
     return True
 
 
+"""
+Perform depth-first search (DFS) to find and print loops in a directed graph.
+
+Args:
+- result_folder (str): The path to the folder where the results will be saved.
+- ts_now (str): The current timestamp or identifier for result file naming.
+- now (int): The current node being visited.
+- type (str): The type of edge leading to the current node ('C' for commit, 'R' for read, 'W' for write, etc.).
+
+This function performs depth-first search (DFS) to find and print loops in a directed graph. It takes as input the result folder 
+path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`), the current node being visited (`now`),
+and the type of edge leading to the current node (`type`).
+
+The function recursively explores the graph, tracking the visited nodes and edges to detect loops. When a loop is found, it is printed
+to a result file in the specified result folder.
+
+Note: This function assumes that global variables like 'visit', 'visit1', 'path', 'edge_type', and 'edge' are defined elsewhere.
+
+"""
 # for loop graphs, print the loop
 def dfs(result_folder, ts_now , e):
     visit1[e.out] = 1
@@ -455,31 +764,21 @@ def dfs(result_folder, ts_now , e):
     visit[e.out] = 0
 
 
-# # for loop graphs, print the loop
-# # Contains redundant edge information and the starting point of the ring is unreasonable
-# def dfs(result_folder, ts_now, now, type):
-#     visit1[now] = 1
-#     if visit[now] == 1: return
-#     visit[now] = 1
-#     path.append(now)
-#     edge_type.append(type)
-#     for v in edge[now]:
-#         if visit[v.out] == 0:
-#             dfs(result_folder, ts_now, v.out, v.type)
-#         else:
-#             path.append(v.out)
-#             edge_type.append(v.type)
-#             with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
-#                 for i in range(0, len(path)):
-#                     f.write(str(path[i]))
-#                     if i != len(path) - 1: f.write("->" + edge_type[i+1] + "->")
-#                 f.write("\n\n")
-#             path.pop()
-#             edge_type.pop()
-#     path.pop()
-#     edge_type.pop()
-#     visit[now] = 0
+"""
+Print the paths in a directed graph to a result file.
+
+Args:
+- result_folder (str): The path to the folder where the results will be saved.
+- ts_now (str): The current timestamp or identifier for result file naming.
+- edge (list of lists): A list of lists representing the directed edges in the graph.
+
+This function prints the paths in a directed graph to a result file. It takes as input the result folder path (`result_folder`), 
+the current timestamp or identifier for result file naming (`ts_now`), and a list of lists (`edge`) representing the directed edges 
+in the graph.
 
+The function iterates through the edges and writes the paths to the result file in the specified result folder.
+
+"""
 def print_path(result_folder, ts_now, edge):
     with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
         flag = 0
@@ -493,11 +792,41 @@ def print_path(result_folder, ts_now, edge):
         f.write("\n\n")
 
 
+"""
+Output the result of cycle detection to a result file.
+
+Args:
+- file (str): The name of the file or input source being analyzed.
+- result_folder (str): The path to the folder where the results will be saved.
+- ts_now (str): The current timestamp or identifier for result file naming.
+- IsCyclic (str): A string indicating whether a cycle was detected.
+
+This function outputs the result of cycle detection to a result file. It takes as input the name of the file or input source being 
+analyzed (`file`), the result folder path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`),
+and a string (`IsCyclic`) indicating whether a cycle was detected.
+
+The function writes the result, including the file name and the cyclic status, to the specified result file in the result folder.
+
+"""
 def output_result(file, result_folder, ts_now, IsCyclic):
     with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
         f.write(file + ": " + IsCyclic + "\n")
 
 
+"""
+Print an error message to a result file.
+
+Args:
+- result_folder (str): The path to the folder where the results will be saved.
+- ts_now (str): The current timestamp or identifier for result file naming.
+- error_message (str): The error message to be printed.
+
+This function prints an error message to a result file. It takes as input the result folder path (`result_folder`), the current 
+timestamp or identifier for result file naming (`ts_now`), and the error message (`error_message`) to be printed.
+
+The function appends the error message to the specified result file in the result folder and adds a newline for separation.
+
+"""
 def print_error(result_folder, ts_now, error_message):
     with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
         f.write(error_message + "\n")

From 54930e6d1c1b371903a246d706916971e23e858b Mon Sep 17 00:00:00 2001
From: dreamin <2534393465@qq.com>
Date: Mon, 29 Jul 2024 07:33:38 +0000
Subject: [PATCH 13/16]  comments add

---
 src/dbtest/src/mda_detect.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 9b410f51..864fc82c 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -99,6 +99,15 @@ def get_total(lines):
         #     break
     return num
 
+"""
+Find the total number of transactions based on transaction identifiers in queries.
+
+Args:
+- lines (list): A list of query strings, each potentially containing transaction identifiers.
+
+Returns:
+int: The highest transaction number found in the queries.
+"""
 # find total Txn number
 def get_total_txn(lines):
     num = 0

From da83039c5b0058481d3ebaa2944a952b69032d8e Mon Sep 17 00:00:00 2001
From: dinream <2534393465@qq.com>
Date: Fri, 25 Oct 2024 17:02:26 +0800
Subject: [PATCH 14/16] move mda_detect.py to mda_detect_mixed.py

---
 src/dbtest/src/mda_detect_mixed.py | 923 +++++++++++++++++++++++++++++
 1 file changed, 923 insertions(+)
 create mode 100644 src/dbtest/src/mda_detect_mixed.py

diff --git a/src/dbtest/src/mda_detect_mixed.py b/src/dbtest/src/mda_detect_mixed.py
new file mode 100644
index 00000000..2a44bf71
--- /dev/null
+++ b/src/dbtest/src/mda_detect_mixed.py
@@ -0,0 +1,923 @@
+# -*- coding: utf-8 -*-
+
+# /*
+#  * Tencent is pleased to support the open source community by making 3TS available.
+#  *
+#  * Copyright (C) 2022 THL A29 Limited, a Tencent company.  All rights reserved. The below software
+#  * in this distribution may have been modified by THL A29 Limited ("Tencent Modifications"). All
+#  * Tencent Modifications are Copyright (C) THL A29 Limited.
+#  *
+#  * Author:  xenitchen axingguchen tsunaouyang (xenitchen,axingguchen,tsunaouyang@tencent.com)
+#  *
+#  */
+
+
+import queue
+import os
+import time
+
+
+class Edge:
+    def __init__(self, type, out, begin_time):
+        self.type = type
+        self.out = out
+        self.time = begin_time
+    def __repr__(self):
+        return "Edge(begin_time={}, type={}, out={})".format(self.time, self.type, self.out)
+
+class Operation:
+    def __init__(self, op_type, txn_num, op_time, value):
+        self.op_type = op_type
+        self.txn_num = txn_num
+        self.op_time = op_time
+        self.value = value
+
+
+class Txn:
+    def __init__(self):
+        self.begin_ts = -1
+        self.end_ts = 99999999999999999999
+        self.isolation = "serializable"
+
+
+"""
+Print the graph edges after building the graph.
+
+Args:
+- edge (list): A list of Edge lists
+- txn (list): A list of Txn objects
+
+Returns:
+None
+"""
+# print edge after build graph
+def print_graph(edge,txn):
+    for i, edges in enumerate(edge):
+        if i == 0 or i == len(edge)-1:
+            continue
+        print("Transaction {}:-----{}-----".format(i,txn[i].isolation))
+        for e in edges:
+            print("  {}".format(e))
+
+
+"""
+Print the contents of the data operation list.
+
+Args:
+- data_op_list (list): A list of Operation lists
+
+Returns:
+None
+"""
+# print data_op_list
+def print_data_op_list(data_op_list):
+    for k,list in enumerate(data_op_list):
+        if k< len(data_op_list)-1:
+            print("\nk:{}---".format(k))
+            for i, data in enumerate(list):
+                    print("op:{}--{}-".format(data.op_type,data.txn_num))
+
+"""
+Find the total variable number.
+
+Args:
+- lines (list): A list of queries.
+
+Returns:
+int: The maximum variable number found in the queries.
+"""
+# find total variable number
+def get_total(lines):
+    num = 0
+    for query in lines:
+        query = query.replace("\n", "")
+        query = query.replace(" ", "")
+        if query.find("INSERT") != -1: # query[0:2] == "Q0" and 
+            tmp = find_data(query, "(")
+            num = max(num, tmp)
+        # elif query[0:2] == "Q1":
+        #     break
+    return num
+
+"""
+Find the total number of transactions based on transaction identifiers in queries.
+
+Args:
+- lines (list): A list of query strings, each potentially containing transaction identifiers.
+
+Returns:
+int: The highest transaction number found in the queries.
+"""
+# find total Txn number
+def get_total_txn(lines):
+    num = 0
+    for query in lines:
+        query = query.replace("\n", "")
+        query = query.replace(" ", "")
+        if query[0:1] == "Q" and query.find("T") != -1:
+            tmp = find_data(query, "T")
+            num = max(num, tmp)
+    return num
+
+
+"""
+Extract the data we need from a query.
+
+Args:
+- query (str): The input query string.
+- target (str): The target substring to search for.
+
+Returns:
+int: The extracted data value, or -1 if not found.
+"""
+# extract the data we need in query
+def find_data(query, target):
+    pos = query.find(target)
+    if pos == -1:
+        return pos
+    pos += len(target)
+    data_value = ""
+    for i in range(pos, len(query)):
+        if query[i].isdigit():
+            data_value += query[i]
+        else:
+            break
+    if data_value == "":
+        return -1
+    data_value = int(data_value)
+    return data_value
+
+# extract the isolation from content 
+def find_isolation(query):
+    if query.find("read-uncommitted") != -1:
+        return "read-uncommitted"
+    if query.find("read-committed") != -1:
+        return "read-committed"
+    if query.find("repeatable-read") != -1:
+        return "repeatable-read"
+    if query.find("serializable") != -1:
+        return "serializable"
+
+"""
+When a statement is executed, this function sets the end time, modifies the transaction list,
+and updates the version list as needed.
+
+Args:
+- op_time (int): The operation time of the statement.
+- data_op_list (list): A list of data operations.
+- query (str): The query string containing information about the statement execution.
+- txn (list): A list of transaction objects.
+- version_list (list): A list of version lists for data operations.
+
+Returns:
+None
+"""
+# when a statement is executed, set the end time and modify the version list
+def set_finish_time(op_time, data_op_list, query, txn, version_list):
+    # pos = query.find("finished at:")
+    # pos += len("finished at:")
+    # data_value = ""
+    # tmp, tmp1 = "", ""
+    # for i in range(pos, len(query)):
+    #     if query[i].isdigit():
+    #         tmp += query[i]
+    #     else:
+    #         for j in range(3 - len(tmp)):
+    #             tmp1 += "0"
+    #         tmp = tmp1 + tmp
+    #         data_value += tmp
+    #         tmp, tmp1 = "", ""
+    # data_value = int(data_value)
+    data_value = int(op_time)
+    for t in txn:
+        if t.begin_ts == op_time:
+            t.begin_ts = data_value
+        if t.end_ts == op_time:
+            t.end_ts = data_value
+    for i, list1 in enumerate(data_op_list):
+        for op in list1:
+            if op.op_time == op_time:
+                op.op_time = data_value
+                if op.op_type == "W":
+                    version_list[i].append(op.value)
+                    op.value = len(version_list[i]) - 1
+                elif op.op_type == "D":
+                    version_list[i].append(-1)
+                    op.value = len(version_list[i]) - 1
+                elif op.op_type == "I":
+                    version_list[i].append(op.value)
+                    op.value = len(version_list[i]) - 1
+
+
+"""
+Check if two transactions are concurrent based on their start and end times.
+
+Args:
+- data1: Information about the first transaction.
+- data2: Information about the second transaction.
+- txn: A list of transaction objects.
+
+Returns:
+bool: True if the transactions are concurrent, False otherwise.
+"""
+# if both transactions are running
+# or the start time of the second transaction is less than the end time of the first transaction
+# we think they are concurrent
+def check_concurrency(data1, data2, txn):
+    if txn[data2.txn_num].begin_ts < txn[data1.txn_num].end_ts:
+        return True
+    elif txn[data1.txn_num].begin_ts < txn[data2.txn_num].end_ts: # TODO maybe a bug: don't need
+        return True
+    else:
+        return False
+
+
+"""
+Check if a specific edge exists between two transactions in the graph.
+
+Args:
+- edge (list): A list of lists, where each sublist contains edge objects representing the connections in the graph.
+- src_txn (int): The source transaction number, which the edge originates from.
+- src_type (str): The operation type (e.g., 'R', 'W') at the source of the edge.
+- tar_txn (int): The target transaction number, which the edge points to.
+- tar_type (str): The operation type (e.g., 'R', 'W') at the target of the edge.
+
+Returns:
+bool: True if the specified edge exists, False otherwise.
+"""
+def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type):
+    for e in edge[src_txn]:
+        if e.out == tar_txn and e.type[0] == src_type and e.type[-1] == tar_type:
+            return True
+    return False
+
+
+"""
+Determine the type of edge between two operations based on their read or write versions.
+
+Args:
+- data1: Information about the first operation.
+- data2: Information about the second operation.
+- txn: A list of transaction objects.
+
+Returns:
+tuple: A tuple containing three values:
+    - A string indicating the edge type ('R', 'W', 'CR', 'CW').
+    - Information about the operation that comes first.
+    - Information about the operation that comes second.
+"""
+# decide which operation comes first depending on the read or write version
+# if later operation happened after the first txn commit time, edge type will add "C"
+def get_edge_type(data1, data2, txn):
+    if data1.op_time <= data2.op_time:          
+        before, after = data1, data2
+    else:
+        before, after = data2, data1
+    # if data1.op_type == "D" or data2.op_type == "D":
+    #     if data1.value < data2.value:
+    #         before, after = data2, data1
+    #     else:
+    #         before, after = data1, data2
+    if after.op_time > txn[before.txn_num].end_ts: 
+        state = "C"
+    else:
+        state = ""
+    return before.op_type + state + after.op_type, before, after
+
+"""
+Build a directed graph representing the concurrency relationships between operations.
+
+Args:
+- data_op_list: A list of lists, where each inner list contains information about operations for a specific transaction.
+- indegree: A list representing the in-degrees of each operation node in the graph.
+- edge: A list representing the edges (concurrency relationships) between operations.
+- txn: A list of transaction objects.
+
+This function constructs a directed graph where nodes represent operations, and edges represent concurrency relationships
+between operations. It iterates through the list of operations for each transaction and calls the 'insert_edge' function 
+to create edges in the graph based on concurrency relationships.
+
+Returns:
+None
+"""
+def build_graph(data_op_list, indegree, edge, txn):
+    for list1 in data_op_list:
+        for i, data in enumerate(list1):
+            for j in range(0, i):
+                insert_edge(list1[j], data, indegree, edge, txn)
+
+
+"""
+Insert an edge into the directed graph representing concurrency relationships between operations.
+
+Args:
+- data1: An operation object representing the first operation.
+- data2: An operation object representing the second operation.
+- indegree: A list representing the in-degrees of each transaction in the graph.
+- edge: A list representing the edges (concurrency relationships) between operations for each transaction.
+- txn: A list of transaction objects.
+
+This function inserts an edge into the directed graph to represent the concurrency relationship between 'data1' and 'data2'. 
+It first checks if the two operations are concurrent by calling the 'check_concurrency' function. If they are concurrent, it 
+determines the edge type using the 'get_edge_type' function and adds the edge to the 'edge' list.
+
+The 'indegree' list is updated to reflect the in-degree of the target transaction node when an edge is inserted.
+
+Returns:
+None
+"""
+def insert_edge(data1, data2, indegree, edge, txn):
+    if check_concurrency(data1, data2, txn):
+        edge_type, data1, data2 = get_edge_type(data1, data2, txn)
+        if data1.txn_num == data2.txn_num or edge_type in ["RCR", "RR"]:
+            return 
+        #* read-uncommitted： Dirty Write
+        # WI does not exist. If it does, there must be an equivalent edge of WD + DI
+        # II does not exist. If it does, there must be an equivalent edge of ID + DI
+        # DW is allowed to exist. When UPDATE, use the condition to query the data containing D
+        # DD does not exist. If it does, there must be an equivalent edge of DI + ID
+        if edge_type in ["WCW", "WW", "WCD", "WD", "ICW","IW", "ICD", "ID", "DCW", "DW", "DCI", "DI"]:   
+            indegree[data2.txn_num] += 1
+            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
+        #* read-committed： Dirty Read
+        elif edge_type in ["WCR","WR"] and (txn[data2.txn_num].isolation == "read-committed" or txn[data2.txn_num].isolation == "repeatable-read" or txn[data2.txn_num].isolation == "serializable"):   
+            indegree[data2.txn_num] += 1
+            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
+        #* repeatable-read： Unrepeatable Read
+        elif edge_type in ["RCW","RW"] and (txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable"):   
+            indegree[data2.txn_num] += 1
+            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
+        #* serializable： Phantom Read
+        elif edge_type in ["RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable":   
+            indegree[data2.txn_num] += 1
+            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))       
+        #* serializable： Phantom Read
+        elif edge_type in ["ICR","IR","DCR","DR"] and txn[data2.txn_num].isolation == "serializable":   
+            indegree[data2.txn_num] += 1
+            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num,data1.op_time)) 
+
+"""
+Initialize a record in the version list based on the information in the query.
+
+Args:
+- query: A query string that contains information about a record.
+- version_list: A list of lists representing versioned records.
+
+This function initializes a record in the 'version_list' based on the information provided in the 'query'. It extracts the 'key'
+and 'value' of the record from the query using the 'find_data' function and appends the 'value' to the corresponding version list.
+
+Returns:
+None
+"""
+def init_record(query, version_list):
+    key = find_data(query, "(")
+    value = find_data(query, ",")
+    version_list[key].append(value)
+
+
+"""
+Read the versioned record based on the information in the query.
+
+Args:
+- query (str): A query string that contains information about reading a versioned record.
+- op_time (int): The operation time of the read operation.
+- data_op_list (list): A list of lists representing data operations.
+- version_list (list): A list of lists representing versioned records.
+
+This function reads the versioned record specified in the 'query'. It extracts the 'key' and 'value' from the query, which are 
+used to identify the record and version to read. The function checks if the specified version exists in the version list and 
+updates the 'op.value' accordingly. If the version doesn't exist or if the read operation is not successful, an error message 
+is returned.
+
+Returns:
+str: An error message indicating the result of the read operation. An empty string means the read was successful.
+"""
+def readVersion_record(query, op_time, data_op_list, version_list):
+    error_message = ""
+    data = query.split(")")
+    if len(data) == 1:
+        for list1 in data_op_list:
+            for op in list1:
+                if op.op_time == op_time:
+                    value = op.value
+                    if len(version_list[value]) == 0:
+                        op.value = -1
+                    else:    
+                        if -1 not in version_list[value]:
+                            error_message = "Value exists, but did not successully read"
+                            return error_message    
+                        pos = version_list[value].index(-1)
+                        op.value = pos
+    else:
+        for s in data:
+            key = find_data(s, "(")
+            value = find_data(s, ",")
+            for i, list1 in enumerate(data_op_list):
+                for op in list1:
+                    if key == i and op.op_time == op_time:
+                        value1 = op.value
+                        if len(version_list[value1]) == 0:
+                            op.value = -1
+                        else:
+                            if version_list[value1].count(value) == 0:
+                                error_message = "Read version that does not exist"
+                                return error_message
+                            pos = version_list[value1].index(value)
+                            op.value = pos
+    
+    return error_message
+    # for i, list1 in enumerate(data_op_list):
+    #     print(i)
+    #     if list1:
+    #         print("")
+    #         print(list1[0].txn_num)
+    #         print(list1[0].op_type)
+    #         print(list1[0].op_time)
+    #         print(list1[0].op_value)
+            
+
+
+
+"""
+Read records based on the information in the query and update data operations.
+
+Args:
+- op_time (int): The operation time of the read operation.
+- txn_num (int): The transaction number.
+- total_num (int): The total number of records.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of lists representing data operations.
+
+This function reads records specified in the query and updates the 'data_op_list' accordingly. It extracts information from 
+the 'query' to determine which records to read and what type of operation to perform (read or predicate). The function also 
+sets the 'begin_ts' of the transaction if it's not already set.
+
+The 'query' is analyzed to identify specific record keys or predicates and create corresponding 'Operation' objects in the 
+'data_op_list'. Depending on the structure of the query, this function handles various cases, such as reading single records,
+handling predicates, and selecting all rows in a table.
+
+Returns:
+None
+"""
+def read_record(op_time, txn_num, total_num, txn, data_op_list):
+    if txn[txn_num].begin_ts == -1:
+        txn[txn_num].begin_ts = op_time
+    # for some distributed cases which have 4 param, write part is same
+    if query.find("value1=") != -1:
+        op_data = find_data(query, "value1=")
+        data_op_list[op_data].append(Operation("R", txn_num, op_time, op_data))
+    # for normal cases
+    elif query.find("k=") != -1:
+        op_data = find_data(query, "k=")
+        data_op_list[op_data].append(Operation("R", txn_num, op_time, op_data))
+    # for predicate cases
+    elif query.find("k>") != -1:
+        left = find_data(query, "k>") + 1
+        right = find_data(query, "k<")
+        for i in range(left, right):
+            data_op_list[i].append(Operation("R", txn_num, op_time, i)) # P
+    elif query.find("value1>") != -1:
+        left = find_data(query, "value1>") + 1
+        right = find_data(query, "value1<")
+        for i in range(left, right):
+            data_op_list[i].append(Operation("R", txn_num, op_time, i)) # p
+    else:
+        # it means select all rows in table
+        for i in range(total_num+1):
+            data_op_list[i].append(Operation("R", txn_num, op_time, i))
+
+
+"""
+Write records based on the information in the query and update data operations.
+
+Args:
+- op_time (int): The operation time of the write operation.
+- txn_num (int): The transaction number.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of lists representing data operations.
+
+This function writes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the 
+'query' to determine which records to write and what type of operation to perform (write). The function also sets the 'begin_ts' 
+of the transaction if it's not already set.
+
+The 'query' is analyzed to identify specific record keys and values, and it creates corresponding 'Operation' objects in the 'data_op_list'.
+
+Returns:
+None
+"""
+def write_record(op_time, txn_num, txn, data_op_list):
+    if txn[txn_num].begin_ts == -1:
+        txn[txn_num].begin_ts = op_time
+    if query.find("value1=") != -1:
+        op_data = find_data(query, "value1=")
+        op_value = find_data(query, "value2=")
+        data_op_list[op_data].append(Operation("W", txn_num, op_time, op_value))
+    elif query.find("k=") != -1:
+        op_data = find_data(query, "k=")
+        op_value = find_data(query, "v=")
+        data_op_list[op_data].append(Operation("W", txn_num, op_time, op_value))
+    # for predicate cases
+    elif query.find("k>") != -1:
+        left = find_data(query, "k>") + 1
+        right = find_data(query, "k<")
+        for i in range(left, right):
+            data_op_list[i].append(Operation("W", txn_num, op_time, i)) # P
+    elif query.find("value1>") != -1:
+        left = find_data(query, "value1>") + 1
+        right = find_data(query, "value1<")
+        for i in range(left, right):
+            data_op_list[i].append(Operation("W", txn_num, op_time, i)) # p
+    else:
+        # it means select all rows in table
+        for i in range(total_num+1):
+            data_op_list[i].append(Operation("W", txn_num, op_time, i))
+
+
+"""
+Delete records based on the information in the query and update data operations.
+
+Args:
+- op_time (int): The operation time of the delete operation.
+- txn_num (int): The transaction number.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of lists representing data operations.
+
+This function deletes records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the 
+'query' to determine which records to delete and what type of operation to perform (delete). The function also sets the 'begin_ts' 
+of the transaction if it's not already set.
+
+The 'query' is analyzed to identify specific record keys, and it creates corresponding 'Operation' objects in the 'data_op_list'.
+
+Returns:
+None
+"""
+def delete_record(op_time, txn_num, txn, data_op_list):
+    if txn[txn_num].begin_ts == -1:
+        txn[txn_num].begin_ts = op_time
+    if query.find("value1=") != -1:
+        op_data = find_data(query, "value1=")
+        data_op_list[op_data].append(Operation("D", txn_num, op_time, op_data))
+    elif query.find("k=") != -1:
+        op_data = find_data(query, "k=")
+        data_op_list[op_data].append(Operation("D", txn_num, op_time, op_data))
+    # for predicate cases
+    elif query.find("k>") != -1:
+        left = find_data(query, "k>") + 1
+        right = find_data(query, "k<")
+        for i in range(left, right):
+            data_op_list[i].append(Operation("D", txn_num, op_time, i)) # P
+    elif query.find("value1>") != -1:
+        left = find_data(query, "value1>") + 1
+        right = find_data(query, "value1<")
+        for i in range(left, right):
+            data_op_list[i].append(Operation("D", txn_num, op_time, i)) # p
+    else:
+        # it means select all rows in table
+        for i in range(total_num+1):
+            data_op_list[i].append(Operation("D", txn_num, op_time, i))
+
+
+"""
+Insert records based on the information in the query and update data operations.
+
+Args:
+- op_time (int): The operation time of the insert operation.
+- txn_num (int): The transaction number.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of lists representing data operations.
+
+This function inserts records specified in the query and updates the 'data_op_list' accordingly. It extracts information from the 
+'query' to determine which records to insert and what type of operation to perform (insert). The function also sets the 'begin_ts' 
+of the transaction if it's not already set.
+
+The 'query' is analyzed to identify specific record keys and their corresponding values, and it creates corresponding 'Operation' 
+objects in the 'data_op_list'.
+
+Returns:
+None
+"""
+def insert_record(op_time, txn_num, txn, data_op_list):
+    if txn[txn_num].begin_ts == -1 and op_time != 0:
+        txn[txn_num].begin_ts = op_time
+    key = find_data(query, "(")
+    value = find_data(query, ",")
+    data_op_list[key].append(Operation("I", txn_num, op_time, value))
+
+
+"""
+Set the end timestamp for a transaction.
+
+Args:
+- op_time (int): The operation time when the transaction ends.
+- txn_num (int): The transaction number.
+- txn (list): A list of transactions.
+
+This function sets the 'end_ts' attribute of a transaction specified by 'txn_num' to the given 'op_time'. It marks the end of the 
+transaction's execution.
+
+Returns:
+None
+"""
+def end_record(op_time, txn_num, txn):
+    txn[txn_num].end_ts = op_time
+
+
+"""
+Record and process database operations.
+
+Args:
+- total_num (int): The total number of database operations.
+- query (str): The SQL query representing a database operation.
+- txn (list): A list of transactions.
+- data_op_list (list): A list of data operations.
+- version_list (list): A list of version information for data operations.
+
+This function records and processes database operations based on the provided SQL query. It updates the transaction list, data 
+operation list, and version list accordingly. The 'total_num' parameter specifies the total number of database operations.
+
+Returns:
+str: An error message (if any), or an empty string if the operation is successful.
+"""
+def operation_record(total_num, query, txn, data_op_list, version_list):
+    error_message = ""
+    op_time = find_data(query, "Q")
+    txn_num = find_data(query, "T")
+    #  print("total_num:{}, query:{},optime: {}, txn_num: {}\n".format(total_num,query, op_time, txn_num))
+    if op_time == 0 and query.find("INSERT") != -1:
+        init_record(query, version_list)
+        return error_message
+    if query.find("returnresult") != -1: #! 1"returnresult"  maybe don't exist
+        error_message = readVersion_record(query, op_time, data_op_list, version_list)
+        return error_message
+    if query.find("finished") != -1: #! "finished"  maybe don't exist
+        set_finish_time(op_time, data_op_list, query, txn, version_list)
+        return error_message
+    if op_time == -1 or txn_num == -1:
+        return error_message
+    if query.find("BEGIN") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.:
+        txn[txn_num].isolation = find_isolation(query)
+    elif query.find("SELECT") != -1:
+        read_record(op_time, txn_num, total_num, txn, data_op_list)
+    elif query.find("UPDATE") != -1:
+        write_record(op_time, txn_num, txn, data_op_list)
+    elif query.find("DELETE") != -1:    
+        delete_record(op_time, txn_num, txn, data_op_list)
+    elif query.find("INSERT") != -1:    #! assume existing data will not be inserted ("Rollback")
+        insert_record(op_time, txn_num, txn, data_op_list)
+    elif query.find("COMMIT") != -1:
+        if op_time != 0:
+            end_record(op_time, txn_num, txn)
+    set_finish_time(op_time, data_op_list, query, txn, version_list)
+    return error_message
+    
+
+
+"""
+Remove unfinished operations from the data operation list.
+
+Args:
+- data_op_list (list): A list of data operations.
+
+This function iterates through the data operation list and removes any unfinished operations based on their operation time. 
+Unfinished operations are those with an operation time less than 10,000,000.
+
+Returns:
+None
+"""
+# remove failed statements to prevent redundant edges from being built
+def remove_unfinished_operation(data_op_list):
+    for list1 in data_op_list:
+        for i, op in enumerate(list1):
+            if op.op_time < 10000000:
+                list1.pop(i)
+
+"""
+Check for cycles in a directed graph using topological sorting.
+
+Args:
+- edge (List[List[Edge]]): A list representing the directed edges in the graph.
+- indegree (List[int]): A list representing the in-degrees of nodes in the graph.
+- total (int): The total number of nodes in the graph.
+
+This function checks for cycles in a directed graph by performing topological sorting. It takes as input the directed edges (`edge`), 
+in-degrees of nodes (`indegree`), and the total number of nodes in the graph (`total`).
+
+Returns:
+bool: True if a cycle is detected, False otherwise.
+"""
+# toposort to determine whether there is a cycle
+def check_cycle(edge, indegree, total):
+    q = queue.Queue()
+    for i, degree in enumerate(indegree):
+        if degree == 0: q.put(i)
+    ans = []
+    while not q.empty():
+        now = q.get()
+        ans.append(now)
+        for val in edge[now]:
+            next_node = val.out
+            indegree[next_node] -= 1
+            if indegree[next_node] == 0:
+                q.put(next_node)
+    if len(ans) == total:
+        return False
+    return True
+
+
+"""
+Perform depth-first search (DFS) to find and print loops in a directed graph.
+
+Args:
+- result_folder (str): The path to the folder where the results will be saved.
+- ts_now (str): The current timestamp or identifier for result file naming.
+- now (int): The current node being visited.
+- type (str): The type of edge leading to the current node ('C' for commit, 'R' for read, 'W' for write, etc.).
+
+This function performs depth-first search (DFS) to find and print loops in a directed graph. It takes as input the result folder 
+path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`), the current node being visited (`now`),
+and the type of edge leading to the current node (`type`).
+
+The function recursively explores the graph, tracking the visited nodes and edges to detect loops. When a loop is found, it is printed
+to a result file in the specified result folder.
+
+Note: This function assumes that global variables like 'visit', 'visit1', 'path', 'edge_type', and 'edge' are defined elsewhere.
+
+"""
+# for loop graphs, print the loop
+def dfs(result_folder, ts_now , e):
+    visit1[e.out] = 1
+    if visit[e.out] == 1: return
+    visit[e.out] = 1
+    path.append(e)
+    for v in edge[e.out]:
+        if visit[v.out] == 0:
+            dfs(result_folder, ts_now, v)
+        else:
+            path.append(v)
+            with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
+                content = ""
+                list_loop = []
+                for i in range(len(path) - 1, -1, -1):
+                    if i != len(path) - 1 and path[i].out == path[len(path) - 1].out:
+                        break
+                    index = 0
+                    while(index < len(list_loop) and path[list_loop[index]].time < path[i].time):
+                        index += 1
+                    list_loop.insert(index,i)
+                for idx in list_loop:
+                    content = content + "->" + path[idx].type + "->" + str(path[idx].out)
+                content = str(path[list_loop[-1]].out) + content + "\n\n"
+                f.write(content)
+            path.pop()
+    path.pop()
+    visit[e.out] = 0
+
+
+"""
+Print the paths in a directed graph to a result file.
+
+Args:
+- result_folder (str): The path to the folder where the results will be saved.
+- ts_now (str): The current timestamp or identifier for result file naming.
+- edge (list of lists): A list of lists representing the directed edges in the graph.
+
+This function prints the paths in a directed graph to a result file. It takes as input the result folder path (`result_folder`), 
+the current timestamp or identifier for result file naming (`ts_now`), and a list of lists (`edge`) representing the directed edges 
+in the graph.
+
+The function iterates through the edges and writes the paths to the result file in the specified result folder.
+
+"""
+def print_path(result_folder, ts_now, edge):
+    with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
+        flag = 0
+        for i in range(len(edge)):
+            for v in edge[i]:
+                if flag == 0:
+                    flag = 1
+                else:
+                    f.write(", ")
+                f.write(str(i) + "->" + v.type + "->" + str(v.out))
+        f.write("\n\n")
+
+
+"""
+Output the result of cycle detection to a result file.
+
+Args:
+- file (str): The name of the file or input source being analyzed.
+- result_folder (str): The path to the folder where the results will be saved.
+- ts_now (str): The current timestamp or identifier for result file naming.
+- IsCyclic (str): A string indicating whether a cycle was detected.
+
+This function outputs the result of cycle detection to a result file. It takes as input the name of the file or input source being 
+analyzed (`file`), the result folder path (`result_folder`), the current timestamp or identifier for result file naming (`ts_now`),
+and a string (`IsCyclic`) indicating whether a cycle was detected.
+
+The function writes the result, including the file name and the cyclic status, to the specified result file in the result folder.
+
+"""
+def output_result(file, result_folder, ts_now, IsCyclic):
+    with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
+        f.write(file + ": " + IsCyclic + "\n")
+
+
+"""
+Print an error message to a result file.
+
+Args:
+- result_folder (str): The path to the folder where the results will be saved.
+- ts_now (str): The current timestamp or identifier for result file naming.
+- error_message (str): The error message to be printed.
+
+This function prints an error message to a result file. It takes as input the result folder path (`result_folder`), the current 
+timestamp or identifier for result file naming (`ts_now`), and the error message (`error_message`) to be printed.
+
+The function appends the error message to the specified result file in the result folder and adds a newline for separation.
+
+"""
+def print_error(result_folder, ts_now, error_message):
+    with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
+        f.write(error_message + "\n")
+        f.write("\n\n")
+
+
+
+
+
+#! ------Some assumption------
+# The modifications of transactions at any isolation level are mutually visible, which is equivalent to a single storage, without read-write buffer
+# There are statements to set the isolation level of each transaction in the input file, after "BEGIN"
+    # BEGIN T1 set_isolation=repeatable-read
+    # BEGIN T2 set_isolation=serializable
+    # BEGIN T3 set_isolation=read-uncommitted
+    # BEGIN T4 set_isolation=read-committed
+# Assume that the inserted data key is in ascending order from 0
+
+run_result_folder = "pg/mda_detect_test"
+result_folder = "check_result/" + run_result_folder
+do_test_list = "mda_detect_test_list.txt"
+#ts_now = "_2param_3txn_insert"
+ts_now = time.strftime("%Y%m%d_%H%M%S", time.localtime())
+if not os.path.exists(result_folder):
+    os.makedirs(result_folder)
+
+with open(do_test_list, "r") as f:
+    files = f.readlines()
+for file in files:
+    file = file.replace("\n", "")
+    file = file.replace(" ", "")
+    if file == "":
+        continue
+    if file[0] == "#":
+        continue
+    with open(run_result_folder + "/" + file + ".txt", "r") as f:
+        lines = f.readlines()
+
+    total_num = get_total(lines)  # total number of variables
+    total_num_txn = get_total_txn(lines)  # total number of txn
+    txn = [Txn() for i in range(total_num_txn + 2)]  # total num of transaction
+    data_op_list = [[] for i in range(total_num + 2)]  # record every operation that occurs on the variable
+    edge = [[] for i in range(total_num_txn + 2)]  # all edges from the current point
+    indegree = [0] * (total_num_txn + 2)  # in-degree of each point
+    visit = [0] * (total_num_txn + 2)  # in dfs, whether the current point has been visited
+    visit1 = [0] * (total_num_txn + 2)  # we will only use unvisited points as the starting point of the dfs
+    path = []  # points in cycle
+    edge_type = []  # edge type of the cycle
+    version_list = [[] for i in range(total_num + 2)]
+    go_end = False  # if test result is "Rollback" or "Timeout", we will don't check
+
+    error_message = ""
+    for query in lines:
+        query = query.replace("\n", "")
+        query = query.replace(" ", "")
+        if query.find("Rollback") != -1 or query.find("Timeout") != -1:
+            go_end = True
+        # print("total_num:{}, total_num_txn:{}, query:{},ts_now: {}, file: {}\n".format(total_num,total_num_txn,query,ts_now,run_result_folder + "/" + file + ".txt"))
+        error_message = operation_record(total_num, query, txn, data_op_list, version_list)
+        if error_message != "":
+            break
+    
+    if error_message != "":
+        output_result(file, result_folder, ts_now, "Error")
+        print_error(result_folder, ts_now, error_message)
+        continue
+    
+    cycle = False
+    # remove_unfinished_operation(data_op_list) 动态测试中默认所有的执行时间 Qi 都没有 finish 字段
+    build_graph(data_op_list, indegree, edge, txn)
+    print("--------file:{}--------".format(file))
+    print_graph(edge,txn)
+    # print_data_op_list(data_op_list)
+    if not go_end:
+        cycle = check_cycle(edge, indegree, total_num_txn+2)
+    if cycle:
+        output_result(file, result_folder, ts_now, "Cyclic")
+        for i in range(total_num_txn + 2):
+            if visit1[i] == 0:
+                # dfs(result_folder, ts_now, i, "null")
+                dfs(result_folder, ts_now, Edge("null",i,-1))
+    else:
+        output_result(file, result_folder, ts_now, "Avoid")
+        print_path(result_folder, ts_now, edge)
+    print("---------------------------------\n")
\ No newline at end of file

From 98bf22708551c346a6e28510dbd261fe147900be Mon Sep 17 00:00:00 2001
From: dinream <2534393465@qq.com>
Date: Fri, 25 Oct 2024 17:03:58 +0800
Subject: [PATCH 15/16] Restore mda_detect.py version to 965b2be

---
 src/dbtest/src/mda_detect.py | 304 ++++++++---------------------------
 1 file changed, 68 insertions(+), 236 deletions(-)

diff --git a/src/dbtest/src/mda_detect.py b/src/dbtest/src/mda_detect.py
index 864fc82c..b97f5ef4 100644
--- a/src/dbtest/src/mda_detect.py
+++ b/src/dbtest/src/mda_detect.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 # /*
 #  * Tencent is pleased to support the open source community by making 3TS available.
 #  *
@@ -18,12 +16,10 @@
 
 
 class Edge:
-    def __init__(self, type, out, begin_time):
+    def __init__(self, type, out):
         self.type = type
         self.out = out
-        self.time = begin_time
-    def __repr__(self):
-        return "Edge(begin_time={}, type={}, out={})".format(self.time, self.type, self.out)
+
 
 class Operation:
     def __init__(self, op_type, txn_num, op_time, value):
@@ -37,45 +33,6 @@ class Txn:
     def __init__(self):
         self.begin_ts = -1
         self.end_ts = 99999999999999999999
-        self.isolation = "serializable"
-
-
-"""
-Print the graph edges after building the graph.
-
-Args:
-- edge (list): A list of Edge lists
-- txn (list): A list of Txn objects
-
-Returns:
-None
-"""
-# print edge after build graph
-def print_graph(edge,txn):
-    for i, edges in enumerate(edge):
-        if i == 0 or i == len(edge)-1:
-            continue
-        print("Transaction {}:-----{}-----".format(i,txn[i].isolation))
-        for e in edges:
-            print("  {}".format(e))
-
-
-"""
-Print the contents of the data operation list.
-
-Args:
-- data_op_list (list): A list of Operation lists
-
-Returns:
-None
-"""
-# print data_op_list
-def print_data_op_list(data_op_list):
-    for k,list in enumerate(data_op_list):
-        if k< len(data_op_list)-1:
-            print("\nk:{}---".format(k))
-            for i, data in enumerate(list):
-                    print("op:{}--{}-".format(data.op_type,data.txn_num))
 
 """
 Find the total variable number.
@@ -92,31 +49,11 @@ def get_total(lines):
     for query in lines:
         query = query.replace("\n", "")
         query = query.replace(" ", "")
-        if query.find("INSERT") != -1: # query[0:2] == "Q0" and 
+        if query[0:2] == "Q0" and query.find("INSERT") != -1:
             tmp = find_data(query, "(")
             num = max(num, tmp)
-        # elif query[0:2] == "Q1":
-        #     break
-    return num
-
-"""
-Find the total number of transactions based on transaction identifiers in queries.
-
-Args:
-- lines (list): A list of query strings, each potentially containing transaction identifiers.
-
-Returns:
-int: The highest transaction number found in the queries.
-"""
-# find total Txn number
-def get_total_txn(lines):
-    num = 0
-    for query in lines:
-        query = query.replace("\n", "")
-        query = query.replace(" ", "")
-        if query[0:1] == "Q" and query.find("T") != -1:
-            tmp = find_data(query, "T")
-            num = max(num, tmp)
+        elif query[0:2] == "Q1":
+            break
     return num
 
 
@@ -147,16 +84,6 @@ def find_data(query, target):
     data_value = int(data_value)
     return data_value
 
-# extract the isolation from content 
-def find_isolation(query):
-    if query.find("read-uncommitted") != -1:
-        return "read-uncommitted"
-    if query.find("read-committed") != -1:
-        return "read-committed"
-    if query.find("repeatable-read") != -1:
-        return "repeatable-read"
-    if query.find("serializable") != -1:
-        return "serializable"
 
 """
 When a statement is executed, this function sets the end time, modifies the transaction list,
@@ -174,21 +101,20 @@ def find_isolation(query):
 """
 # when a statement is executed, set the end time and modify the version list
 def set_finish_time(op_time, data_op_list, query, txn, version_list):
-    # pos = query.find("finished at:")
-    # pos += len("finished at:")
-    # data_value = ""
-    # tmp, tmp1 = "", ""
-    # for i in range(pos, len(query)):
-    #     if query[i].isdigit():
-    #         tmp += query[i]
-    #     else:
-    #         for j in range(3 - len(tmp)):
-    #             tmp1 += "0"
-    #         tmp = tmp1 + tmp
-    #         data_value += tmp
-    #         tmp, tmp1 = "", ""
-    # data_value = int(data_value)
-    data_value = int(op_time)
+    pos = query.find("finishedat:")
+    pos += len("finishedat:")
+    data_value = ""
+    tmp, tmp1 = "", ""
+    for i in range(pos, len(query)):
+        if query[i].isdigit():
+            tmp += query[i]
+        else:
+            for j in range(3 - len(tmp)):
+                tmp1 += "0"
+            tmp = tmp1 + tmp
+            data_value += tmp
+            tmp, tmp1 = "", ""
+    data_value = int(data_value)
     for t in txn:
         if t.begin_ts == op_time:
             t.begin_ts = data_value
@@ -226,32 +152,12 @@ def set_finish_time(op_time, data_op_list, query, txn, version_list):
 def check_concurrency(data1, data2, txn):
     if txn[data2.txn_num].begin_ts < txn[data1.txn_num].end_ts:
         return True
-    elif txn[data1.txn_num].begin_ts < txn[data2.txn_num].end_ts: # TODO maybe a bug: don't need
+    elif txn[data1.txn_num].begin_ts < txn[data2.txn_num].end_ts:
         return True
     else:
         return False
 
 
-"""
-Check if a specific edge exists between two transactions in the graph.
-
-Args:
-- edge (list): A list of lists, where each sublist contains edge objects representing the connections in the graph.
-- src_txn (int): The source transaction number, which the edge originates from.
-- src_type (str): The operation type (e.g., 'R', 'W') at the source of the edge.
-- tar_txn (int): The target transaction number, which the edge points to.
-- tar_type (str): The operation type (e.g., 'R', 'W') at the target of the edge.
-
-Returns:
-bool: True if the specified edge exists, False otherwise.
-"""
-def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type):
-    for e in edge[src_txn]:
-        if e.out == tar_txn and e.type[0] == src_type and e.type[-1] == tar_type:
-            return True
-    return False
-
-
 """
 Determine the type of edge between two operations based on their read or write versions.
 
@@ -269,7 +175,7 @@ def check_edge_exit(edge,src_txn,src_type,tar_txn,tar_type):
 # decide which operation comes first depending on the read or write version
 # if later operation happened after the first txn commit time, edge type will add "C"
 def get_edge_type(data1, data2, txn):
-    if data1.op_time <= data2.op_time:          
+    if data1.value <= data2.value:
         before, after = data1, data2
     else:
         before, after = data2, data1
@@ -278,12 +184,13 @@ def get_edge_type(data1, data2, txn):
     #         before, after = data2, data1
     #     else:
     #         before, after = data1, data2
-    if after.op_time > txn[before.txn_num].end_ts: 
+    if data2.op_time > txn[data1.txn_num].end_ts:
         state = "C"
     else:
         state = ""
     return before.op_type + state + after.op_type, before, after
 
+
 """
 Build a directed graph representing the concurrency relationships between operations.
 
@@ -329,32 +236,10 @@ def build_graph(data_op_list, indegree, edge, txn):
 def insert_edge(data1, data2, indegree, edge, txn):
     if check_concurrency(data1, data2, txn):
         edge_type, data1, data2 = get_edge_type(data1, data2, txn)
-        if data1.txn_num == data2.txn_num or edge_type in ["RCR", "RR"]:
-            return 
-        #* read-uncommitted： Dirty Write
-        # WI does not exist. If it does, there must be an equivalent edge of WD + DI
-        # II does not exist. If it does, there must be an equivalent edge of ID + DI
-        # DW is allowed to exist. When UPDATE, use the condition to query the data containing D
-        # DD does not exist. If it does, there must be an equivalent edge of DI + ID
-        if edge_type in ["WCW", "WW", "WCD", "WD", "ICW","IW", "ICD", "ID", "DCW", "DW", "DCI", "DI"]:   
+        if edge_type != "RR" and edge_type != "RCR" and data1.txn_num != data2.txn_num:
             indegree[data2.txn_num] += 1
-            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
-        #* read-committed： Dirty Read
-        elif edge_type in ["WCR","WR"] and (txn[data2.txn_num].isolation == "read-committed" or txn[data2.txn_num].isolation == "repeatable-read" or txn[data2.txn_num].isolation == "serializable"):   
-            indegree[data2.txn_num] += 1
-            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
-        #* repeatable-read： Unrepeatable Read
-        elif edge_type in ["RCW","RW"] and (txn[data1.txn_num].isolation == "repeatable-read" or txn[data1.txn_num].isolation == "serializable"):   
-            indegree[data2.txn_num] += 1
-            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))
-        #* serializable： Phantom Read
-        elif edge_type in ["RCI","RI","RCD","RD"] and txn[data1.txn_num].isolation == "serializable":   
-            indegree[data2.txn_num] += 1
-            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num, data1.op_time))       
-        #* serializable： Phantom Read
-        elif edge_type in ["ICR","IR","DCR","DR"] and txn[data2.txn_num].isolation == "serializable":   
-            indegree[data2.txn_num] += 1
-            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num,data1.op_time)) 
+            edge[data1.txn_num].append(Edge(edge_type, data2.txn_num))
+
 
 """
 Initialize a record in the version list based on the information in the query.
@@ -475,15 +360,15 @@ def read_record(op_time, txn_num, total_num, txn, data_op_list):
         left = find_data(query, "k>") + 1
         right = find_data(query, "k<")
         for i in range(left, right):
-            data_op_list[i].append(Operation("R", txn_num, op_time, i)) # P
+            data_op_list[i].append(Operation("P", txn_num, op_time, i))
     elif query.find("value1>") != -1:
         left = find_data(query, "value1>") + 1
         right = find_data(query, "value1<")
         for i in range(left, right):
-            data_op_list[i].append(Operation("R", txn_num, op_time, i)) # p
+            data_op_list[i].append(Operation("P", txn_num, op_time, i))
     else:
         # it means select all rows in table
-        for i in range(total_num+1):
+        for i in range(total_num):
             data_op_list[i].append(Operation("R", txn_num, op_time, i))
 
 
@@ -516,21 +401,6 @@ def write_record(op_time, txn_num, txn, data_op_list):
         op_data = find_data(query, "k=")
         op_value = find_data(query, "v=")
         data_op_list[op_data].append(Operation("W", txn_num, op_time, op_value))
-    # for predicate cases
-    elif query.find("k>") != -1:
-        left = find_data(query, "k>") + 1
-        right = find_data(query, "k<")
-        for i in range(left, right):
-            data_op_list[i].append(Operation("W", txn_num, op_time, i)) # P
-    elif query.find("value1>") != -1:
-        left = find_data(query, "value1>") + 1
-        right = find_data(query, "value1<")
-        for i in range(left, right):
-            data_op_list[i].append(Operation("W", txn_num, op_time, i)) # p
-    else:
-        # it means select all rows in table
-        for i in range(total_num+1):
-            data_op_list[i].append(Operation("W", txn_num, op_time, i))
 
 
 """
@@ -560,21 +430,6 @@ def delete_record(op_time, txn_num, txn, data_op_list):
     elif query.find("k=") != -1:
         op_data = find_data(query, "k=")
         data_op_list[op_data].append(Operation("D", txn_num, op_time, op_data))
-    # for predicate cases
-    elif query.find("k>") != -1:
-        left = find_data(query, "k>") + 1
-        right = find_data(query, "k<")
-        for i in range(left, right):
-            data_op_list[i].append(Operation("D", txn_num, op_time, i)) # P
-    elif query.find("value1>") != -1:
-        left = find_data(query, "value1>") + 1
-        right = find_data(query, "value1<")
-        for i in range(left, right):
-            data_op_list[i].append(Operation("D", txn_num, op_time, i)) # p
-    else:
-        # it means select all rows in table
-        for i in range(total_num+1):
-            data_op_list[i].append(Operation("D", txn_num, op_time, i))
 
 
 """
@@ -642,32 +497,33 @@ def operation_record(total_num, query, txn, data_op_list, version_list):
     error_message = ""
     op_time = find_data(query, "Q")
     txn_num = find_data(query, "T")
-    #  print("total_num:{}, query:{},optime: {}, txn_num: {}\n".format(total_num,query, op_time, txn_num))
     if op_time == 0 and query.find("INSERT") != -1:
         init_record(query, version_list)
         return error_message
-    if query.find("returnresult") != -1: #! 1"returnresult"  maybe don't exist
+    if query.find("returnresult") != -1:
         error_message = readVersion_record(query, op_time, data_op_list, version_list)
         return error_message
-    if query.find("finished") != -1: #! "finished"  maybe don't exist
+    if query.find("finished") != -1:
         set_finish_time(op_time, data_op_list, query, txn, version_list)
         return error_message
     if op_time == -1 or txn_num == -1:
         return error_message
-    if query.find("BEGIN") != -1: # TODO: Need a related interface, I assume that it is read from the do_test_list file.:
-        txn[txn_num].isolation = find_isolation(query)
-    elif query.find("SELECT") != -1:
+    if query.find("SELECT") != -1:
         read_record(op_time, txn_num, total_num, txn, data_op_list)
+        return error_message
     elif query.find("UPDATE") != -1:
         write_record(op_time, txn_num, txn, data_op_list)
-    elif query.find("DELETE") != -1:    
+        return error_message
+    elif query.find("DELETE") != -1:
         delete_record(op_time, txn_num, txn, data_op_list)
-    elif query.find("INSERT") != -1:    #! assume existing data will not be inserted ("Rollback")
+        return error_message
+    elif query.find("INSERT") != -1:
         insert_record(op_time, txn_num, txn, data_op_list)
+        return error_message
     elif query.find("COMMIT") != -1:
         if op_time != 0:
             end_record(op_time, txn_num, txn)
-    set_finish_time(op_time, data_op_list, query, txn, version_list)
+        return error_message
     return error_message
     
 
@@ -744,33 +600,28 @@ def check_cycle(edge, indegree, total):
 
 """
 # for loop graphs, print the loop
-def dfs(result_folder, ts_now , e):
-    visit1[e.out] = 1
-    if visit[e.out] == 1: return
-    visit[e.out] = 1
-    path.append(e)
-    for v in edge[e.out]:
+def dfs(result_folder, ts_now, now, type):
+    visit1[now] = 1
+    if visit[now] == 1: return
+    visit[now] = 1
+    path.append(now)
+    edge_type.append(type)
+    for v in edge[now]:
         if visit[v.out] == 0:
-            dfs(result_folder, ts_now, v)
+            dfs(result_folder, ts_now, v.out, v.type)
         else:
-            path.append(v)
+            path.append(v.out)
+            edge_type.append(v.type)
             with open(result_folder + "/check_result" + ts_now + ".txt", "a+") as f:
-                content = ""
-                list_loop = []
-                for i in range(len(path) - 1, -1, -1):
-                    if i != len(path) - 1 and path[i].out == path[len(path) - 1].out:
-                        break
-                    index = 0
-                    while(index < len(list_loop) and path[list_loop[index]].time < path[i].time):
-                        index += 1
-                    list_loop.insert(index,i)
-                for idx in list_loop:
-                    content = content + "->" + path[idx].type + "->" + str(path[idx].out)
-                content = str(path[list_loop[-1]].out) + content + "\n\n"
-                f.write(content)
+                for i in range(0, len(path)):
+                    f.write(str(path[i]))
+                    if i != len(path) - 1: f.write("->" + edge_type[i+1] + "->")
+                f.write("\n\n")
             path.pop()
+            edge_type.pop()
     path.pop()
-    visit[e.out] = 0
+    edge_type.pop()
+    visit[now] = 0
 
 
 """
@@ -842,21 +693,9 @@ def print_error(result_folder, ts_now, error_message):
         f.write("\n\n")
 
 
-
-
-
-#! ------Some assumption------
-# The modifications of transactions at any isolation level are mutually visible, which is equivalent to a single storage, without read-write buffer
-# There are statements to set the isolation level of each transaction in the input file, after "BEGIN"
-    # BEGIN T1 set_isolation=repeatable-read
-    # BEGIN T2 set_isolation=serializable
-    # BEGIN T3 set_isolation=read-uncommitted
-    # BEGIN T4 set_isolation=read-committed
-# Assume that the inserted data key is in ascending order from 0
-
-run_result_folder = "pg/mda_detect_test"
+run_result_folder = "pg/serializable"
 result_folder = "check_result/" + run_result_folder
-do_test_list = "mda_detect_test_list.txt"
+do_test_list = "do_test_list.txt"
 #ts_now = "_2param_3txn_insert"
 ts_now = time.strftime("%Y%m%d_%H%M%S", time.localtime())
 if not os.path.exists(result_folder):
@@ -875,13 +714,12 @@ def print_error(result_folder, ts_now, error_message):
         lines = f.readlines()
 
     total_num = get_total(lines)  # total number of variables
-    total_num_txn = get_total_txn(lines)  # total number of txn
-    txn = [Txn() for i in range(total_num_txn + 2)]  # total num of transaction
+    txn = [Txn() for i in range(total_num + 2)]  # total num of transaction
     data_op_list = [[] for i in range(total_num + 2)]  # record every operation that occurs on the variable
-    edge = [[] for i in range(total_num_txn + 2)]  # all edges from the current point
-    indegree = [0] * (total_num_txn + 2)  # in-degree of each point
-    visit = [0] * (total_num_txn + 2)  # in dfs, whether the current point has been visited
-    visit1 = [0] * (total_num_txn + 2)  # we will only use unvisited points as the starting point of the dfs
+    edge = [[] for i in range(total_num + 2)]  # all edges from the current point
+    indegree = [0] * (total_num + 2)  # in-degree of each point
+    visit = [0] * (total_num + 2)  # in dfs, whether the current point has been visited
+    visit1 = [0] * (total_num + 2)  # we will only use unvisited points as the starting point of the dfs
     path = []  # points in cycle
     edge_type = []  # edge type of the cycle
     version_list = [[] for i in range(total_num + 2)]
@@ -893,7 +731,6 @@ def print_error(result_folder, ts_now, error_message):
         query = query.replace(" ", "")
         if query.find("Rollback") != -1 or query.find("Timeout") != -1:
             go_end = True
-        # print("total_num:{}, total_num_txn:{}, query:{},ts_now: {}, file: {}\n".format(total_num,total_num_txn,query,ts_now,run_result_folder + "/" + file + ".txt"))
         error_message = operation_record(total_num, query, txn, data_op_list, version_list)
         if error_message != "":
             break
@@ -904,20 +741,15 @@ def print_error(result_folder, ts_now, error_message):
         continue
     
     cycle = False
-    # remove_unfinished_operation(data_op_list) 动态测试中默认所有的执行时间 Qi 都没有 finish 字段
+    remove_unfinished_operation(data_op_list)
     build_graph(data_op_list, indegree, edge, txn)
-    print("--------file:{}--------".format(file))
-    print_graph(edge,txn)
-    # print_data_op_list(data_op_list)
     if not go_end:
-        cycle = check_cycle(edge, indegree, total_num_txn+2)
+        cycle = check_cycle(edge, indegree, total_num + 2)
     if cycle:
         output_result(file, result_folder, ts_now, "Cyclic")
-        for i in range(total_num_txn + 2):
+        for i in range(total_num + 2):
             if visit1[i] == 0:
-                # dfs(result_folder, ts_now, i, "null")
-                dfs(result_folder, ts_now, Edge("null",i,-1))
+                dfs(result_folder, ts_now, i, "null")
     else:
         output_result(file, result_folder, ts_now, "Avoid")
-        print_path(result_folder, ts_now, edge)
-    print("---------------------------------\n")
\ No newline at end of file
+        print_path(result_folder, ts_now, edge)
\ No newline at end of file

From 827b5079050c03a68bc210155f06a5733ea3f221 Mon Sep 17 00:00:00 2001
From: dinream <2534393465@qq.com>
Date: Fri, 25 Oct 2024 17:16:34 +0800
Subject: [PATCH 16/16] delete no English commit

---
 src/dbtest/src/mda_detect_mixed.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dbtest/src/mda_detect_mixed.py b/src/dbtest/src/mda_detect_mixed.py
index 2a44bf71..a464a087 100644
--- a/src/dbtest/src/mda_detect_mixed.py
+++ b/src/dbtest/src/mda_detect_mixed.py
@@ -904,7 +904,7 @@ def print_error(result_folder, ts_now, error_message):
         continue
     
     cycle = False
-    # remove_unfinished_operation(data_op_list) 动态测试中默认所有的执行时间 Qi 都没有 finish 字段
+    # remove_unfinished_operation(data_op_list)
     build_graph(data_op_list, indegree, edge, txn)
     print("--------file:{}--------".format(file))
     print_graph(edge,txn)