Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions oxen-rust/src/cli/src/cmd/df.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ use liboxen::model::LocalRepository;
use liboxen::util::fs;

use crate::cmd::RunCmd;

pub mod add_image;

pub const NAME: &str = "df";
pub struct DFCmd;

Expand All @@ -20,10 +23,14 @@ impl RunCmd for DFCmd {

fn args(&self) -> Command {
// Setups the CLI args for the command
let add_image_cmd = add_image::AddImageCmd;
Command::new(NAME)
.about("View and transform data frames. Supported types: csv, tsv, ndjson, jsonl, parquet.")
.arg(arg!(<PATH> ... "The DataFrame you want to process. If in the schema subcommand the schema ref."))
.arg_required_else_help(true)
.subcommand_negates_reqs(true)
.args_conflicts_with_subcommands(true)
.subcommand(add_image_cmd.args())
.arg(
Arg::new("write")
.long("write")
Expand Down Expand Up @@ -240,6 +247,21 @@ impl RunCmd for DFCmd {
}

async fn run(&self, args: &clap::ArgMatches) -> Result<(), OxenError> {
// Check for subcommands first
if let Some((name, sub_matches)) = args.subcommand() {
match name {
add_image::NAME => {
let cmd = add_image::AddImageCmd;
return cmd.run(sub_matches).await;
}
_ => {
return Err(OxenError::basic_str(format!(
"Unknown df subcommand: {name}"
)));
}
}
}

// Parse Args
let mut opts = DFCmd::parse_df_args(args);
let Some(path) = args.get_one::<String>("PATH") else {
Expand Down
131 changes: 131 additions & 0 deletions oxen-rust/src/cli/src/cmd/df/add_image.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
use std::path::PathBuf;

use async_trait::async_trait;
use clap::{Arg, Command};

use liboxen::command;
use liboxen::error::OxenError;
use liboxen::model::LocalRepository;

use crate::cmd::RunCmd;

pub const NAME: &str = "add-image";

pub struct AddImageCmd;

#[async_trait]
impl RunCmd for AddImageCmd {
fn name(&self) -> &str {
NAME
}

fn args(&self) -> Command {
Command::new(NAME)
.about("Add image(s) to a data frame, copying external images into the repo and staging all files.")
.arg(
Arg::new("IMAGE_PATH")
.help("Path(s) to image file(s) to add")
.required(true)
.num_args(1..),
)
.arg(
Arg::new("file")
.long("file")
.short('f')
.help("The data frame file to add the image path(s) to")
.required(true),
)
.arg(
Arg::new("dest")
.long("dest")
.help("Destination directory or path in the repo for external images"),
)
.arg(
Arg::new("extension")
.long("extension")
.help("Override the data frame format (e.g. csv, tsv, parquet)"),
)
}

async fn run(&self, args: &clap::ArgMatches) -> Result<(), OxenError> {
let repo = LocalRepository::from_current_dir()?;

let current_dir = std::env::current_dir()
.map_err(|e| OxenError::basic_str(format!("Failed to get current directory: {e}")))?;

// Collect image paths, resolving relative to CWD
let image_paths: Vec<PathBuf> = args
.get_many::<String>("IMAGE_PATH")
.ok_or_else(|| OxenError::basic_str("At least one IMAGE_PATH is required"))?
.map(|p| {
let path = PathBuf::from(p);
if path.is_absolute() {
path
} else {
current_dir.join(path)
}
})
.collect();

// Get df path relative to repo root
let df_arg = args
.get_one::<String>("file")
.ok_or_else(|| OxenError::basic_str("--file is required"))?;
let df_path = PathBuf::from(df_arg);
let df_repo_relative = if df_path.is_absolute() {
liboxen::util::fs::path_relative_to_dir(&df_path, &repo.path)?
} else {
// Resolve relative to CWD, then make repo-relative
let abs_df = current_dir.join(&df_path);
// If the file doesn't exist yet, we can't canonicalize.
// Just compute the relative path.
if abs_df.exists() {
let canonical = abs_df.canonicalize().map_err(|e| {
OxenError::basic_str(format!("Could not canonicalize {abs_df:?}: {e}"))
})?;
let repo_canonical = repo.path.canonicalize().map_err(|e| {
OxenError::basic_str(format!(
"Could not canonicalize repo path {:?}: {e}",
repo.path
))
})?;
liboxen::util::fs::path_relative_to_dir(&canonical, &repo_canonical)?
} else {
let repo_canonical = repo.path.canonicalize().map_err(|e| {
OxenError::basic_str(format!(
"Could not canonicalize repo path {:?}: {e}",
repo.path
))
})?;
let abs_cwd_canonical = current_dir.canonicalize().map_err(|e| {
OxenError::basic_str(format!("Could not canonicalize CWD: {e}"))
})?;
let cwd_relative =
liboxen::util::fs::path_relative_to_dir(&abs_cwd_canonical, &repo_canonical)?;
cwd_relative.join(&df_path)
}
};

let dest = args.get_one::<String>("dest").map(PathBuf::from);
let dest_ref = dest.as_deref();

let extension_override = args.get_one::<String>("extension").map(|s| s.as_str());

let result = command::df::add_images(
&repo,
&df_repo_relative,
&image_paths,
dest_ref,
extension_override,
)
.await?;

println!(
"Added {} image(s) to data frame '{}'",
result.len(),
df_repo_relative.display()
);

Ok(())
}
}
20 changes: 19 additions & 1 deletion oxen-rust/src/lib/src/command/df.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//! Interact with DataFrames
//!

use std::path::Path;
use std::path::{Path, PathBuf};

use crate::core::df::tabular;
use crate::error::OxenError;
Expand Down Expand Up @@ -74,6 +74,24 @@ pub async fn add_row(path: &Path, data: &str) -> Result<(), OxenError> {
}
}

/// Add images to a dataframe, copying external images into the repo and staging all files.
pub async fn add_images(
repo: &LocalRepository,
df_path: &Path,
image_paths: &[PathBuf],
dest: Option<&Path>,
extension_override: Option<&str>,
) -> Result<Vec<PathBuf>, OxenError> {
repositories::data_frames::images::add_images_to_df(
repo,
df_path,
image_paths,
dest,
extension_override,
)
.await
}

/// Add a column to a dataframe
pub async fn add_column(path: &Path, data: &str) -> Result<(), OxenError> {
if util::fs::is_tabular(path) {
Expand Down
30 changes: 20 additions & 10 deletions oxen-rust/src/lib/src/core/df/tabular.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1477,20 +1477,30 @@ pub fn write_df(df: &mut DataFrame, path: impl AsRef<Path>) -> Result<(), OxenEr
let err = format!("Unknown file type write_df {path:?} {extension:?}");

match extension {
Some(extension) => match extension {
"ndjson" => write_df_jsonl(df, path),
"jsonl" => write_df_jsonl(df, path),
"json" => write_df_json(df, path),
"tsv" => write_df_csv(df, path, b'\t'),
"csv" => write_df_csv(df, path, b','),
"parquet" => write_df_parquet(df, path),
"arrow" => write_df_arrow(df, path),
_ => Err(OxenError::basic_str(err)),
},
Some(ext) => write_df_with_ext(df, path, ext),
None => Err(OxenError::basic_str(err)),
}
}

pub fn write_df_with_ext(
df: &mut DataFrame,
path: impl AsRef<Path>,
ext: &str,
) -> Result<(), OxenError> {
let path = path.as_ref();
match ext {
"ndjson" | "jsonl" => write_df_jsonl(df, path),
"json" => write_df_json(df, path),
"tsv" => write_df_csv(df, path, b'\t'),
"csv" => write_df_csv(df, path, b','),
"parquet" => write_df_parquet(df, path),
"arrow" => write_df_arrow(df, path),
_ => Err(OxenError::basic_str(format!(
"Unknown file type write_df_with_ext {path:?} {ext:?}"
))),
}
}

pub async fn copy_df(
input: impl AsRef<Path>,
output: impl AsRef<Path>,
Expand Down
1 change: 1 addition & 0 deletions oxen-rust/src/lib/src/repositories/data_frames.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use crate::opts::DFOpts;

use std::path::Path;

pub mod images;
pub mod schemas;

pub async fn get_slice(
Expand Down
Loading