-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
We mentioned this already: Two projects pointing to a common storage directory, with the same data added to them, result in duplicate entries in audit log, yet since the data hashes to the same, we only have one common entry in the storage directory itself.
Setup phase
Details
> if (!exists("dvs_workspace")) {
+ dvs_workspace <- getwd()
+ }
> source(file.path(dvs_workspace, "ui/scripts/R", "tree.R"), echo = TRUE)
> fs_manual_dir_tree <- function(path = ".", recurse = TRUE, ...) {
+ files <- fs::dir_ls(path, recurse = recurse, ...)
+ by_dir <- split(files, f .... [TRUNCATED]
> dvs_workspace
[1] "/Users/elea/Documents/a2ai_github/dvs2"
> withr::with_dir(
+ dvs_workspace,
+ system2(
+ "just",
+ "install-cli",
+ )
+ )
cargo install --force --locked --path=dvs-cli The following is initializing the two projects
Details
> proj_root_a <- file.path(tempfile(), "projectA")
> proj_root_a
[1] "/var/folders/_x/bq8vb1b156sgl363l71by61h0000gn/T//Rtmp4Fdbri/file143597d2ac1/projectA"
> dir.create(proj_root_a, recursive = TRUE)
> dir.create(file.path(proj_root_a, ".git/"))
> message("define global storage directory")
define global storage directory
> storage_directory <- file.path(tempdir(), "dvs_data_directory")
> storage_directory
[1] "/var/folders/_x/bq8vb1b156sgl363l71by61h0000gn/T//Rtmp4Fdbri/dvs_data_directory"
> message("dvs repository for project A with a storage directory provided")
dvs repository for project A with a storage directory provided
> setwd(proj_root_a)
> system2(
+ "dvs",
+ c("init", storage_directory)
+ )
DVS Initialized
> proj_root_b <- file.path(tempfile(), "projectB")
> dir.create(proj_root_b, recursive = TRUE)
> dir.create(file.path(proj_root_b, ".git/"))
> message("dvs repository for project B with a storage directory provided")
dvs repository for project B with a storage directory provided
> setwd(proj_root_b)
> system2(
+ "dvs",
+ c("init", storage_directory)
+ )
DVS Initialized
> # create data directories
>
> fs::dir_create(proj_root_a, "data")
> fs::dir_create(proj_root_b, "data")
Added and store two distinct data files in the two projects.
Details
> # store one data file in the two projects
>
> write.table(
+ file = file.path(proj_root_a, "data", "theoph_head_15.tab"),
+ head(Theoph, 15),
+ eol = "\n"
+ )
> write.table(
+ file = file.path(proj_root_b, "data", "theoph_head_15.tab"),
+ head(Theoph, 15),
+ eol = "\n"
+ )
> # store another file in the two projects
>
> write.table(
+ file = file.path(proj_root_a, "data", "theoph_head_23.tab"),
+ head(Theoph, 23),
+ eol = "\n"
+ )
> write.table(
+ file = file.path(proj_root_b, "data", "theoph_head_23.tab"),
+ head(Theoph, 23),
+ eol = "\n"
+ )
> # add two 15/23 files to projects a and b
>
> setwd(proj_root_a)
> system2(
+ "dvs",
+ c(
+ "add",
+ file.path(proj_root_a, "data", "theoph_head_23.tab"),
+ "--message",
+ r"("added head(23) of theoph in tab format")"
+ )
+ )
Added: data/theoph_head_23.tab
> system2(
+ "dvs",
+ c(
+ "add",
+ file.path(proj_root_a, "data", "theoph_head_15.tab"),
+ "--message",
+ r"("added head(15) of theoph in tab format")"
+ )
+ )
Added: data/theoph_head_15.tabLet's inspect the whole tree and the audit log:
> fs::dir_tree(
+ tempdir(),
+ recurse = TRUE,
+ all = TRUE,
+ # invert = TRUE,
+ # glob = "*.git"
+ )
/var/folders/_x/bq8vb1b156sgl363l71by61h0000gn/T//Rtmp4Fdbri
├── dvs_data_directory
│ ├── 07
│ │ └── 53308ffe7cb4bcefa85def0a4692da383510e3a5e1755c8cfea6558f595e32
│ ├── audit.log.jsonl
│ └── d1
│ └── aa2ed583c752d5212e4ac18cc3e2005048b92ce5e1fa43521c2046f5995a81
├── file143591981e527
│ └── projectB
│ ├── .dvs
│ ├── .git
│ ├── data
│ │ ├── theoph_head_15.tab
│ │ └── theoph_head_23.tab
│ └── dvs.toml
└── file143597d2ac1
└── projectA
├── .dvs
│ ├── .cache
│ │ └── dvs.db
│ ├── .gitignore
│ └── data
│ ├── theoph_head_15.tab.dvs
│ └── theoph_head_23.tab.dvs
├── .git
├── data
│ ├── .gitignore
│ ├── theoph_head_15.tab
│ └── theoph_head_23.tab
└── dvs.toml
> readLines(fs::path(storage_directory, "audit.log.jsonl")) |>
+ cat(sep = "\n")
{"operation_id":"ff3d0908-10a4-47d0-879a-05fba46bebe9","timestamp":1771932025,"user":"elea","file":{"path":"data/theoph_head_23.tab","hashes":{"blake3":"0753308ffe7cb4bcefa85def0a4692da383510e3a5e1755c8cfea6558f595e32","md5":"d407dfbc21154b4efc341e3ae8d03ca4"}},"action":"add"}
{"operation_id":"058f42b6-6cb1-48e4-a578-42a1d8c053c8","timestamp":1771932025,"user":"elea","file":{"path":"data/theoph_head_15.tab","hashes":{"blake3":"d1aa2ed583c752d5212e4ac18cc3e2005048b92ce5e1fa43521c2046f5995a81","md5":"2be6986c70a0208f6071c25d5d1a3353"}},"action":"add"}
> # <continue> add the two files to project B
>
> setwd(proj_root_b)
> system2(
+ "dvs",
+ c(
+ "add",
+ file.path(proj_root_b, "data", "theoph_head_23.tab"),
+ "--message",
+ r"("added head(23) of theoph in tab format")"
+ )
+ )
Added: data/theoph_head_23.tab
> system2(
+ "dvs",
+ c(
+ "add",
+ file.path(proj_root_b, "data", "theoph_head_15.tab"),
+ "--message",
+ r"("added head(15) of theoph in tab format")"
+ )
+ )
Added: data/theoph_head_15.tab
> fs::dir_tree(
+ tempdir(),
+ recurse = TRUE,
+ all = TRUE,
+ invert = TRUE,
+ glob = "*.git"
+ )
/var/folders/_x/bq8vb1b156sgl363l71by61h0000gn/T//Rtmp4Fdbri
├── dvs_data_directory
│ ├── 07
│ │ └── 53308ffe7cb4bcefa85def0a4692da383510e3a5e1755c8cfea6558f595e32
│ ├── audit.log.jsonl
│ └── d1
│ └── aa2ed583c752d5212e4ac18cc3e2005048b92ce5e1fa43521c2046f5995a81
├── file143591981e527
│ └── projectB
│ ├── .dvs
│ │ ├── .cache
│ │ │ └── dvs.db
│ │ ├── .gitignore
│ │ └── data
│ │ ├── theoph_head_15.tab.dvs
│ │ └── theoph_head_23.tab.dvs
│ ├── data
│ │ ├── .gitignore
│ │ ├── theoph_head_15.tab
│ │ └── theoph_head_23.tab
│ └── dvs.toml
└── file143597d2ac1
└── projectA
├── .dvs
│ ├── .cache
│ │ └── dvs.db
│ ├── .gitignore
│ └── data
│ ├── theoph_head_15.tab.dvs
│ └── theoph_head_23.tab.dvs
├── data
│ ├── .gitignore
│ ├── theoph_head_15.tab
│ └── theoph_head_23.tab
└── dvs.toml
> readLines(fs::path(storage_directory, "audit.log.jsonl")) |>
+ cat(sep = "\n")
{"operation_id":"ff3d0908-10a4-47d0-879a-05fba46bebe9","timestamp":1771932025,"user":"elea","file":{"path":"data/theoph_head_23.tab","hashes":{"blake3":"0753308ffe7cb4bcefa85def0a4692da383510e3a5e1755c8cfea6558f595e32","md5":"d407dfbc21154b4efc341e3ae8d03ca4"}},"action":"add"}
{"operation_id":"058f42b6-6cb1-48e4-a578-42a1d8c053c8","timestamp":1771932025,"user":"elea","file":{"path":"data/theoph_head_15.tab","hashes":{"blake3":"d1aa2ed583c752d5212e4ac18cc3e2005048b92ce5e1fa43521c2046f5995a81","md5":"2be6986c70a0208f6071c25d5d1a3353"}},"action":"add"}
{"operation_id":"6f95227d-b568-444d-a51d-8bc27e92304a","timestamp":1771932025,"user":"elea","file":{"path":"data/theoph_head_23.tab","hashes":{"blake3":"0753308ffe7cb4bcefa85def0a4692da383510e3a5e1755c8cfea6558f595e32","md5":"d407dfbc21154b4efc341e3ae8d03ca4"}},"action":"add"}
{"operation_id":"ed55f391-a77a-42e3-971f-c4f66694aa57","timestamp":1771932025,"user":"elea","file":{"path":"data/theoph_head_15.tab","hashes":{"blake3":"d1aa2ed583c752d5212e4ac18cc3e2005048b92ce5e1fa43521c2046f5995a81","md5":"2be6986c70a0208f6071c25d5d1a3353"}},"action":"add"}
> #' Add the Theoph(15) file, with the same message twice.
> #'
>
> setwd(proj_root_a)
> system2(
+ "dvs",
+ c(
+ "add",
+ file.path(proj_root_a, "data", "theoph_head_15.tab"),
+ "--message",
+ r"("added head(23) of theoph in tab format")"
+ )
+ )
Added: data/theoph_head_15.tab
> setwd(proj_root_b)
> system2(
+ "dvs",
+ c(
+ "add",
+ file.path(proj_root_b, "data", "theoph_head_15.tab"),
+ "--message",
+ r"("added head(15) of theoph in tab format")"
+ )
+ )
Added: data/theoph_head_15.tab
> fs::dir_tree(
+ tempdir(),
+ recurse = TRUE,
+ all = TRUE,
+ glob = "*/.git",
+ invert = TRUE
+ )
/var/folders/_x/bq8vb1b156sgl363l71by61h0000gn/T//Rtmp4Fdbri
├── dvs_data_directory
│ ├── 07
│ │ └── 53308ffe7cb4bcefa85def0a4692da383510e3a5e1755c8cfea6558f595e32
│ ├── audit.log.jsonl
│ └── d1
│ └── aa2ed583c752d5212e4ac18cc3e2005048b92ce5e1fa43521c2046f5995a81
├── file143591981e527
│ └── projectB
│ ├── .dvs
│ │ ├── .cache
│ │ │ └── dvs.db
│ │ ├── .gitignore
│ │ └── data
│ │ ├── theoph_head_15.tab.dvs
│ │ └── theoph_head_23.tab.dvs
│ ├── data
│ │ ├── .gitignore
│ │ ├── theoph_head_15.tab
│ │ └── theoph_head_23.tab
│ └── dvs.toml
└── file143597d2ac1
└── projectA
├── .dvs
│ ├── .cache
│ │ └── dvs.db
│ ├── .gitignore
│ └── data
│ ├── theoph_head_15.tab.dvs
│ └── theoph_head_23.tab.dvs
├── data
│ ├── .gitignore
│ ├── theoph_head_15.tab
│ └── theoph_head_23.tab
└── dvs.toml
> readLines(fs::path(storage_directory, "audit.log.jsonl")) |>
+ cat(sep = "\n")
{"operation_id":"ff3d0908-10a4-47d0-879a-05fba46bebe9","timestamp":1771932025,"user":"elea","file":{"path":"data/theoph_head_23.tab","hashes":{"blake3":"0753308ffe7cb4bcefa85def0a4692da383510e3a5e1755c8cfea6558f595e32","md5":"d407dfbc21154b4efc341e3ae8d03ca4"}},"action":"add"}
{"operation_id":"058f42b6-6cb1-48e4-a578-42a1d8c053c8","timestamp":1771932025,"user":"elea","file":{"path":"data/theoph_head_15.tab","hashes":{"blake3":"d1aa2ed583c752d5212e4ac18cc3e2005048b92ce5e1fa43521c2046f5995a81","md5":"2be6986c70a0208f6071c25d5d1a3353"}},"action":"add"}
{"operation_id":"6f95227d-b568-444d-a51d-8bc27e92304a","timestamp":1771932025,"user":"elea","file":{"path":"data/theoph_head_23.tab","hashes":{"blake3":"0753308ffe7cb4bcefa85def0a4692da383510e3a5e1755c8cfea6558f595e32","md5":"d407dfbc21154b4efc341e3ae8d03ca4"}},"action":"add"}
{"operation_id":"ed55f391-a77a-42e3-971f-c4f66694aa57","timestamp":1771932025,"user":"elea","file":{"path":"data/theoph_head_15.tab","hashes":{"blake3":"d1aa2ed583c752d5212e4ac18cc3e2005048b92ce5e1fa43521c2046f5995a81","md5":"2be6986c70a0208f6071c25d5d1a3353"}},"action":"add"}
Convenience way to open vscode:
> message(
+ "open visual studio code in `tempdir()` (session constant)",
+ " to have an overview over all file changes"
+ )
open visual studio code in `tempdir()` (session constant) to have an overview over all file changes
> browseURL(url = tempdir(), browser = "code")
>
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels