|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Process Markdown files with embedded Python code blocks, saving |
| 4 | +the output and images. |
| 5 | +""" |
| 6 | + |
| 7 | +import argparse |
| 8 | +from contextlib import redirect_stdout, redirect_stderr |
| 9 | +import io |
| 10 | +from pathlib import Path |
| 11 | +import plotly.graph_objects as go |
| 12 | +import sys |
| 13 | +import traceback |
| 14 | + |
| 15 | + |
| 16 | +def main(): |
| 17 | + args = _parse_args() |
| 18 | + for filename in args.input: |
| 19 | + _do_file(args, Path(filename)) |
| 20 | + |
| 21 | + |
| 22 | +def _do_file(args, input_file): |
| 23 | + """Process a single file.""" |
| 24 | + |
| 25 | + # Validate input file |
| 26 | + if not input_file.exists(): |
| 27 | + print(f"Error: '{input_file}' not found", file=sys.stderr) |
| 28 | + sys.exit(1) |
| 29 | + |
| 30 | + # Determine output file path etc. |
| 31 | + stem = input_file.stem |
| 32 | + output_file = args.outdir / f"{input_file.stem}{input_file.suffix}" |
| 33 | + if input_file.resolve() == output_file.resolve(): |
| 34 | + print(f"Error: output would overwrite input '{input_file}'", file=sys.stderr) |
| 35 | + sys.exit(1) |
| 36 | + |
| 37 | + # Read input |
| 38 | + try: |
| 39 | + with open(input_file, "r", encoding="utf-8") as f: |
| 40 | + content = f.read() |
| 41 | + except Exception as e: |
| 42 | + print(f"Error reading input file: {e}", file=sys.stderr) |
| 43 | + sys.exit(1) |
| 44 | + |
| 45 | + # Parse markdown and extract code blocks |
| 46 | + _report(args.verbose, f"Processing {input_file}...") |
| 47 | + code_blocks = _parse_md(content) |
| 48 | + _report(args.verbose, f"- Found {len(code_blocks)} code blocks") |
| 49 | + |
| 50 | + # Execute code blocks and collect results |
| 51 | + execution_results = [] |
| 52 | + figure_counter = 0 |
| 53 | + for i, block in enumerate(code_blocks): |
| 54 | + _report(args.verbose, f"- Executing block {i + 1}/{len(code_blocks)}") |
| 55 | + figure_counter, result = _run_code(block["code"], args.outdir, stem, figure_counter) |
| 56 | + execution_results.append(result) |
| 57 | + _report(result["error"], f" - Warning: block {i + 1} had an error") |
| 58 | + _report(result["images"], f" - Generated {len(result['images'])} image(s)") |
| 59 | + |
| 60 | + # Generate and save output |
| 61 | + content = _generate_markdown(args, content, code_blocks, execution_results, args.outdir) |
| 62 | + try: |
| 63 | + with open(output_file, "w", encoding="utf-8") as f: |
| 64 | + f.write(content) |
| 65 | + _report(args.verbose, f"- Output written to {output_file}") |
| 66 | + _report(any(result["images"] for result in execution_results), f"- Images saved to {args.outdir}") |
| 67 | + except Exception as e: |
| 68 | + print(f"Error writing output file: {e}", file=sys.stderr) |
| 69 | + sys.exit(1) |
| 70 | + |
| 71 | + |
| 72 | +def _capture_plotly_show(fig, counter, result, output_dir, stem): |
| 73 | + """Saves figures instead of displaying them.""" |
| 74 | + # Save PNG |
| 75 | + png_filename = f"{stem}_{counter}.png" |
| 76 | + png_path = output_dir / png_filename |
| 77 | + fig.write_image(png_path, width=800, height=600) |
| 78 | + result["images"].append(png_filename) |
| 79 | + |
| 80 | + # Save HTML and get the content for embedding |
| 81 | + html_filename = f"{stem}_{counter}.html" |
| 82 | + html_path = output_dir / html_filename |
| 83 | + fig.write_html(html_path, include_plotlyjs="cdn") |
| 84 | + html_content = fig.to_html(include_plotlyjs="cdn", div_id=f"plotly-div-{counter}", full_html=False) |
| 85 | + result["html_files"].append(html_filename) |
| 86 | + result.setdefault("html_content", []).append(html_content) |
| 87 | + |
| 88 | + |
| 89 | +def _generate_markdown(args, content, code_blocks, execution_results, output_dir): |
| 90 | + """Generate the output markdown with embedded results.""" |
| 91 | + lines = content.split("\n") |
| 92 | + |
| 93 | + # Sort code blocks by start line in reverse order for safe insertion |
| 94 | + sorted_blocks = sorted( |
| 95 | + enumerate(code_blocks), key=lambda x: x[1]["start_line"], reverse=True |
| 96 | + ) |
| 97 | + |
| 98 | + # Process each code block and insert results |
| 99 | + for block_idx, block in sorted_blocks: |
| 100 | + result = execution_results[block_idx] |
| 101 | + insert_lines = [] |
| 102 | + |
| 103 | + # Add output if there's stdout |
| 104 | + if result["stdout"].strip(): |
| 105 | + insert_lines.append("") |
| 106 | + insert_lines.append("**Output:**") |
| 107 | + insert_lines.append("```") |
| 108 | + insert_lines.extend(result["stdout"].rstrip().split("\n")) |
| 109 | + insert_lines.append("```") |
| 110 | + |
| 111 | + # Add error if there was one |
| 112 | + if result["error"]: |
| 113 | + insert_lines.append("") |
| 114 | + insert_lines.append("**Error:**") |
| 115 | + insert_lines.append("```") |
| 116 | + insert_lines.extend(result["error"].rstrip().split("\n")) |
| 117 | + insert_lines.append("```") |
| 118 | + |
| 119 | + # Add stderr if there's content |
| 120 | + if result["stderr"].strip(): |
| 121 | + insert_lines.append("") |
| 122 | + insert_lines.append("**Warnings/Messages:**") |
| 123 | + insert_lines.append("```") |
| 124 | + insert_lines.extend(result["stderr"].rstrip().split("\n")) |
| 125 | + insert_lines.append("```") |
| 126 | + |
| 127 | + # Add images |
| 128 | + for image in result["images"]: |
| 129 | + insert_lines.append("") |
| 130 | + insert_lines.append(f"") |
| 131 | + |
| 132 | + # Embed HTML content for plotly figures |
| 133 | + if args.inline: |
| 134 | + for html_content in result.get("html_content", []): |
| 135 | + insert_lines.append("") |
| 136 | + insert_lines.append("**Interactive Plot:**") |
| 137 | + insert_lines.append("") |
| 138 | + insert_lines.extend(html_content.split("\n")) |
| 139 | + |
| 140 | + # Insert the results after the code block |
| 141 | + if insert_lines: |
| 142 | + # Insert after the closing ``` of the code block |
| 143 | + insertion_point = block["end_line"] + 1 |
| 144 | + lines[insertion_point:insertion_point] = insert_lines |
| 145 | + |
| 146 | + return "\n".join(lines) |
| 147 | + |
| 148 | + |
| 149 | +def _parse_args(): |
| 150 | + """Parse command-line arguments.""" |
| 151 | + parser = argparse.ArgumentParser(description="Process Markdown files with code blocks") |
| 152 | + parser.add_argument("input", nargs="+", help="Input .md file") |
| 153 | + parser.add_argument("--inline", action="store_true", help="Inline HTML in .md") |
| 154 | + parser.add_argument("--outdir", type=Path, help="Output directory") |
| 155 | + parser.add_argument("--verbose", action="store_true", help="Report progress") |
| 156 | + return parser.parse_args() |
| 157 | + |
| 158 | + |
| 159 | +def _parse_md(content): |
| 160 | + """Parse Markdown and extract Python code blocks.""" |
| 161 | + lines = content.split("\n") |
| 162 | + blocks = [] |
| 163 | + current_block = None |
| 164 | + in_code_block = False |
| 165 | + |
| 166 | + for i, line in enumerate(lines): |
| 167 | + # Start of Python code block |
| 168 | + if line.strip().startswith("```python"): |
| 169 | + in_code_block = True |
| 170 | + current_block = { |
| 171 | + "start_line": i, |
| 172 | + "end_line": None, |
| 173 | + "code": [], |
| 174 | + "type": "python", |
| 175 | + } |
| 176 | + |
| 177 | + # End of code block |
| 178 | + elif line.strip() == "```" and in_code_block: |
| 179 | + in_code_block = False |
| 180 | + current_block["end_line"] = i |
| 181 | + current_block["code"] = "\n".join(current_block["code"]) |
| 182 | + blocks.append(current_block) |
| 183 | + current_block = None |
| 184 | + |
| 185 | + # Line inside code block |
| 186 | + elif in_code_block: |
| 187 | + current_block["code"].append(line) |
| 188 | + |
| 189 | + return blocks |
| 190 | + |
| 191 | + |
| 192 | +def _report(condition, message): |
| 193 | + """Report if condition is true.""" |
| 194 | + if condition: |
| 195 | + print(message, file=sys.stderr) |
| 196 | + |
| 197 | + |
| 198 | +def _run_code(code, output_dir, stem, figure_counter): |
| 199 | + """Execute code capturing output and generated files.""" |
| 200 | + # Capture stdout and stderr |
| 201 | + stdout_buffer = io.StringIO() |
| 202 | + stderr_buffer = io.StringIO() |
| 203 | + |
| 204 | + # Track files created during execution |
| 205 | + if not output_dir.exists(): |
| 206 | + output_dir.mkdir(parents=True, exist_ok=True) |
| 207 | + |
| 208 | + files_before = set(f.name for f in output_dir.iterdir()) |
| 209 | + result = {"stdout": "", "stderr": "", "error": None, "images": [], "html_files": []} |
| 210 | + try: |
| 211 | + |
| 212 | + # Create a namespace for code execution |
| 213 | + exec_globals = { |
| 214 | + "__name__": "__main__", |
| 215 | + "__file__": "<markdown_code>", |
| 216 | + } |
| 217 | + |
| 218 | + # Execute the code with output capture |
| 219 | + with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer): |
| 220 | + # Try to import plotly and patch the show method |
| 221 | + def patched_show(self, *args, **kwargs): |
| 222 | + nonlocal figure_counter |
| 223 | + figure_counter += 1 |
| 224 | + _capture_plotly_show(self, figure_counter, result, output_dir, stem) |
| 225 | + original_show = go.Figure.show |
| 226 | + go.Figure.show = patched_show |
| 227 | + exec(code, exec_globals) |
| 228 | + go.Figure.show = original_show |
| 229 | + |
| 230 | + except Exception as e: |
| 231 | + result["error"] = f"Error executing code: {str(e)}\n{traceback.format_exc()}" |
| 232 | + |
| 233 | + result["stdout"] = stdout_buffer.getvalue() |
| 234 | + result["stderr"] = stderr_buffer.getvalue() |
| 235 | + |
| 236 | + # Check for any additional files created |
| 237 | + files_after = set(f.name for f in output_dir.iterdir()) |
| 238 | + for f in (files_after - files_before): |
| 239 | + if f not in result["images"] and f.lower().endswith(".png"): |
| 240 | + result["images"].append(f) |
| 241 | + |
| 242 | + return figure_counter, result |
| 243 | + |
| 244 | + |
| 245 | +if __name__ == "__main__": |
| 246 | + main() |
0 commit comments