diff --git a/docs/experiments/markdown-feeds.md b/docs/experiments/markdown-feeds.md new file mode 100644 index 00000000..510fb054 --- /dev/null +++ b/docs/experiments/markdown-feeds.md @@ -0,0 +1,45 @@ +# Markdown Feeds Experiment + +## Summary + +Adds Markdown representations of WordPress content: + +- Feed: `/?feed=markdown` (and `/feed/markdown/` once rewrite rules are flushed). +- Singular: `https://example.com/my-post.md` (optional). +- Singular content negotiation: `Accept: text/markdown` (optional). + +The output is intended to be a lightweight, text-first format that is easier for automated clients (including AI tooling) to ingest than full HTML. + +## Key Hooks & Entry Points + +- `init` -> registers a custom feed using `add_feed( 'markdown', ... )` (optional). +- `do_parse_request` -> strips a trailing `.md` from front-end requests so WordPress can resolve the underlying canonical URL (optional). +- `template_redirect` -> renders `text/markdown` for singular content when requested via `.md` or `Accept: text/markdown` (optional). +- Renderer -> converts rendered `the_content` HTML into Markdown using WordPress core’s HTML API (`WP_HTML_Processor`, with fallback to `WP_HTML_Tag_Processor`). +- Filters: + - `ai_experiments_markdown_feed_html` -> adjust HTML before conversion. + - `ai_experiments_markdown_feed_markdown` -> adjust Markdown after conversion. + - `ai_experiments_markdown_feed_post_sections` -> reorder or inject sections in each feed entry. + - `ai_experiments_markdown_singular_html` -> adjust HTML before conversion (singular). + - `ai_experiments_markdown_singular_markdown` -> adjust Markdown after conversion (singular). + - `ai_experiments_markdown_singular_post_sections` -> reorder or inject sections in each singular response. + +## Assets & Data Flow + +- No JS/CSS assets. +- Uses the main feed query loop (`have_posts()` / `the_post()`) and outputs Markdown for each item (title, URL, publish date, content). + +## Testing + +1. Enable **AI Experiments** globally and enable **Markdown**. +2. Visit `/?feed=markdown` and confirm a `text/markdown` response containing one or more posts. +3. Visit a single post or page with `.md` appended (e.g. `/hello-world.md`) and confirm a `text/markdown` response. +4. Make a request to a post or page with `Accept: text/markdown` and confirm a `text/markdown` response. +5. If pretty permalinks are enabled, flush rewrite rules (e.g., visit Settings → Permalinks) and confirm `/feed/markdown/` works. +6. Verify common content renders reasonably (headings, paragraphs, lists, links, images, code blocks). + +## Notes + +- The HTML-to-Markdown conversion is intentionally conservative and based on WordPress core’s HTML API (`WP_HTML_Processor`) rather than a bundled third-party parser. +- This experiment currently targets singular post content (title + metadata + content). It does not attempt to convert full theme templates or archive views. +- `.md` permalinks require pretty permalinks (a non-empty permalink structure). diff --git a/includes/Experiment_Loader.php b/includes/Experiment_Loader.php index eef246ac..9324e1ee 100644 --- a/includes/Experiment_Loader.php +++ b/includes/Experiment_Loader.php @@ -107,6 +107,7 @@ private function get_default_experiments(): array { \WordPress\AI\Experiments\Abilities_Explorer\Abilities_Explorer::class, \WordPress\AI\Experiments\Excerpt_Generation\Excerpt_Generation::class, \WordPress\AI\Experiments\Image_Generation\Image_Generation::class, + \WordPress\AI\Experiments\Markdown_Feeds\Markdown_Feeds::class, \WordPress\AI\Experiments\Summarization\Summarization::class, \WordPress\AI\Experiments\Title_Generation\Title_Generation::class, ); diff --git a/includes/Experiments/Markdown_Feeds/HTML_To_Markdown_Converter.php b/includes/Experiments/Markdown_Feeds/HTML_To_Markdown_Converter.php new file mode 100644 index 00000000..e0a5a6ea --- /dev/null +++ b/includes/Experiments/Markdown_Feeds/HTML_To_Markdown_Converter.php @@ -0,0 +1,464 @@ +create_processor( $html ); + if ( ! $processor ) { + return trim( wp_strip_all_tags( $html ) ); + } + + $markdown = $this->convert_with_processor( $processor ); + + if ( + $processor instanceof WP_HTML_Processor + && WP_HTML_Processor::ERROR_UNSUPPORTED === $processor->get_last_error() + && class_exists( WP_HTML_Tag_Processor::class ) + ) { + $markdown = $this->convert_with_processor( new WP_HTML_Tag_Processor( $html ) ); + } + + return trim( $this->cleanup( $markdown ) ); + } + + /** + * Creates the best available HTML processor for conversion. + * + * Uses the HTML Processor in fragment mode when available, and falls back to + * the Tag Processor for broader tag tolerance. + * + * @since x.x.x + * + * @param string $html HTML string. + * @return \WP_HTML_Tag_Processor|\WP_HTML_Processor|null Processor instance. + */ + private function create_processor( string $html ) { + $processor = null; + + if ( class_exists( WP_HTML_Processor::class ) ) { + $processor = WP_HTML_Processor::create_fragment( $html ); + + if ( ! $processor ) { + $processor = new WP_HTML_Processor( $html ); + } + } elseif ( class_exists( WP_HTML_Tag_Processor::class ) ) { + $processor = new WP_HTML_Tag_Processor( $html ); + } + + return $processor; + } + + /** + * Converts HTML into Markdown using a provided HTML API processor. + * + * @since x.x.x + * + * @param \WP_HTML_Tag_Processor|\WP_HTML_Processor $processor Processor instance. + * @return string Markdown output. + */ + private function convert_with_processor( $processor ): string { + $markdown = ''; + + $at_line_start = true; + $blockquote_depth = 0; + $in_pre = false; + + $link_stack = array(); + $list_stack = array(); + + // Table state. + $in_table = false; + $current_row = array(); + $is_header_row = false; + $header_row_done = false; + + while ( $processor->next_token() ) { + $token_name = $processor->get_token_name(); + + if ( '#text' === $token_name ) { + $text = (string) $processor->get_modifiable_text(); + + // If we're in a table cell, collect text for the cell. + if ( $in_table && ! empty( $current_row ) ) { + $cell_index = count( $current_row ) - 1; + $current_row[ $cell_index ] .= trim( (string) preg_replace( '/\s+/', ' ', $text ) ); + continue; + } + + $this->append_text( $markdown, $text, $at_line_start, $blockquote_depth, $in_pre ); + continue; + } + + // Skip script/style entirely. + if ( 'SCRIPT' === $token_name || 'STYLE' === $token_name ) { + continue; + } + + $is_closer = $processor->is_tag_closer(); + + if ( 'BR' === $token_name ) { + $this->append_newline( $markdown, $at_line_start ); + continue; + } + + if ( 'HR' === $token_name && ! $is_closer ) { + $this->ensure_blank_line( $markdown, $at_line_start ); + $this->append_line( $markdown, '---', $at_line_start, $blockquote_depth ); + $this->ensure_blank_line( $markdown, $at_line_start ); + continue; + } + + if ( ( 'P' === $token_name || 'DIV' === $token_name ) && $is_closer ) { + if ( ! $in_pre ) { + $this->ensure_blank_line( $markdown, $at_line_start ); + } + continue; + } + + if ( 'BLOCKQUOTE' === $token_name ) { + if ( $is_closer ) { + $blockquote_depth = max( 0, $blockquote_depth - 1 ); + $this->ensure_blank_line( $markdown, $at_line_start ); + } else { + ++$blockquote_depth; + $this->ensure_blank_line( $markdown, $at_line_start ); + } + continue; + } + + if ( 'PRE' === $token_name ) { + if ( $is_closer ) { + if ( ! $at_line_start ) { + $this->append_newline( $markdown, $at_line_start ); + } + $this->append_line( $markdown, '```', $at_line_start, $blockquote_depth ); + $this->ensure_blank_line( $markdown, $at_line_start ); + $in_pre = false; + } else { + $this->ensure_blank_line( $markdown, $at_line_start ); + $this->append_line( $markdown, '```', $at_line_start, $blockquote_depth ); + $in_pre = true; + } + continue; + } + + if ( 'CODE' === $token_name && ! $in_pre ) { + $markdown .= '`'; + $at_line_start = false; + continue; + } + + if ( 'STRONG' === $token_name || 'B' === $token_name ) { + $markdown .= '**'; + $at_line_start = false; + continue; + } + + if ( 'EM' === $token_name || 'I' === $token_name ) { + $markdown .= '*'; + $at_line_start = false; + continue; + } + + if ( 'A' === $token_name ) { + if ( $is_closer ) { + $href = array_pop( $link_stack ); + if ( $href ) { + $markdown .= '](' . $href . ')'; + } else { + $markdown .= ']'; + } + } else { + $link_stack[] = (string) $processor->get_attribute( 'href' ); + $markdown .= '['; + } + $at_line_start = false; + continue; + } + + if ( 'IMG' === $token_name && ! $is_closer ) { + $src = (string) $processor->get_attribute( 'src' ); + if ( '' === $src ) { + continue; + } + + $alt = (string) $processor->get_attribute( 'alt' ); + $this->append_text( $markdown, '![' . $alt . '](' . $src . ')', $at_line_start, $blockquote_depth, true ); + continue; + } + + // Figure element (contains image + optional caption). + if ( 'FIGURE' === $token_name ) { + $this->ensure_blank_line( $markdown, $at_line_start ); + continue; + } + + // Figure caption - render as italic text on new line. + if ( 'FIGCAPTION' === $token_name ) { + if ( ! $is_closer ) { + $this->ensure_newline( $markdown, $at_line_start ); + $markdown .= '*'; + $at_line_start = false; + } else { + $markdown .= '*'; + } + continue; + } + + // Citation in blockquotes - prefix with em dash. + if ( 'CITE' === $token_name ) { + if ( ! $is_closer ) { + $markdown .= '— '; + $at_line_start = false; + } + continue; + } + + if ( 'UL' === $token_name || 'OL' === $token_name ) { + if ( $is_closer ) { + array_pop( $list_stack ); + $this->ensure_blank_line( $markdown, $at_line_start ); + } else { + $list_stack[] = array( + 'type' => $token_name, + 'index' => 0, + ); + $this->ensure_blank_line( $markdown, $at_line_start ); + } + continue; + } + + if ( 'LI' === $token_name && ! $is_closer ) { + $this->ensure_newline( $markdown, $at_line_start ); + + $depth = count( $list_stack ); + $indent = str_repeat( ' ', max( 0, $depth - 1 ) ); + + $marker = '-'; + if ( $depth > 0 && 'OL' === $list_stack[ $depth - 1 ]['type'] ) { + ++$list_stack[ $depth - 1 ]['index']; + $marker = (string) $list_stack[ $depth - 1 ]['index'] . '.'; + } + + $this->append_text( + $markdown, + $indent . $marker . ' ', + $at_line_start, + $blockquote_depth, + true + ); + + continue; + } + + // Table handling. + if ( 'TABLE' === $token_name ) { + if ( $is_closer ) { + $in_table = false; + $header_row_done = false; + $this->ensure_blank_line( $markdown, $at_line_start ); + } else { + $in_table = true; + $this->ensure_blank_line( $markdown, $at_line_start ); + } + continue; + } + + if ( 'THEAD' === $token_name || 'TBODY' === $token_name || 'TFOOT' === $token_name ) { + // Skip these structural elements; we detect headers via TH tags. + continue; + } + + if ( 'TR' === $token_name ) { + if ( $is_closer ) { + // Output the row. + if ( ! empty( $current_row ) ) { + $row_line = '| ' . implode( ' | ', $current_row ) . ' |'; + $this->append_line( $markdown, $row_line, $at_line_start, $blockquote_depth ); + + // Add separator after header row. + if ( $is_header_row && ! $header_row_done ) { + $separator = '|' . str_repeat( ' --- |', count( $current_row ) ); + $this->append_line( $markdown, $separator, $at_line_start, $blockquote_depth ); + $header_row_done = true; + } + } + $current_row = array(); + $is_header_row = false; + } + continue; + } + + if ( 'TH' === $token_name ) { + if ( ! $is_closer ) { + $current_row[] = ''; + $is_header_row = true; + } + continue; + } + + if ( 'TD' === $token_name ) { + if ( ! $is_closer ) { + $current_row[] = ''; + } + continue; + } + + if ( ! $token_name || ! preg_match( '/^H([1-6])$/', $token_name, $matches ) ) { + continue; + } + + if ( $is_closer ) { + $this->ensure_blank_line( $markdown, $at_line_start ); + } else { + $level = (int) $matches[1]; + $this->ensure_blank_line( $markdown, $at_line_start ); + $this->append_text( + $markdown, + str_repeat( '#', $level ) . ' ', + $at_line_start, + $blockquote_depth, + true + ); + } + continue; + } + + return $markdown; + } + + /** + * Appends plain text to the Markdown output. + * + * @since x.x.x + * + * @param string $markdown Markdown buffer. + * @param string $text Text to append. + * @param bool $at_line_start Whether output is at the start of a line. + * @param int $blockquote_depth Current blockquote depth. + * @param bool $preserve_whitespace Whether to preserve whitespace. + */ + private function append_text( string &$markdown, string $text, bool &$at_line_start, int $blockquote_depth, bool $preserve_whitespace = false ): void { + if ( '' === $text ) { + return; + } + + $text = str_replace( array( "\r\n", "\r" ), "\n", $text ); + + if ( ! $preserve_whitespace ) { + $text = preg_replace( '/\\s+/u', ' ', $text ); + } + + if ( $at_line_start && 0 < $blockquote_depth ) { + $markdown .= str_repeat( '> ', $blockquote_depth ); + } + + $markdown .= $text; + $at_line_start = false; + } + + /** + * Appends a newline. + * + * @since x.x.x + * + * @param string $markdown Markdown buffer. + * @param bool $at_line_start Whether output is at the start of a line. + */ + private function append_newline( string &$markdown, bool &$at_line_start ): void { + $markdown .= "\n"; + $at_line_start = true; + } + + /** + * Appends a full line and ensures the buffer ends at a new line. + * + * @since x.x.x + * + * @param string $markdown Markdown buffer. + * @param string $line Line content. + * @param bool $at_line_start Whether output is at the start of a line. + * @param int $blockquote_depth Current blockquote depth. + */ + private function append_line( string &$markdown, string $line, bool &$at_line_start, int $blockquote_depth ): void { + $this->ensure_newline( $markdown, $at_line_start ); + $this->append_text( $markdown, $line, $at_line_start, $blockquote_depth, true ); + $this->append_newline( $markdown, $at_line_start ); + } + + /** + * Ensures output starts on a new line. + * + * @since x.x.x + * + * @param string $markdown Markdown buffer. + * @param bool $at_line_start Whether output is at the start of a line. + */ + private function ensure_newline( string &$markdown, bool &$at_line_start ): void { + if ( $at_line_start ) { + return; + } + + $this->append_newline( $markdown, $at_line_start ); + } + + /** + * Ensures output ends with a blank line. + * + * @since x.x.x + * + * @param string $markdown Markdown buffer. + * @param bool $at_line_start Whether output is at the start of a line. + */ + private function ensure_blank_line( string &$markdown, bool &$at_line_start ): void { + $markdown = rtrim( $markdown, "\n" ); + $markdown .= "\n\n"; + $at_line_start = true; + } + + /** + * Cleans up excessive whitespace and newlines. + * + * @since x.x.x + * + * @param string $markdown Markdown buffer. + * @return string Cleaned buffer. + */ + private function cleanup( string $markdown ): string { + // Remove trailing whitespace from lines. + $markdown = preg_replace( "/[ \\t]+\\n/", "\n", $markdown ); + // Remove leading whitespace from lines (except in code blocks). + $markdown = preg_replace( "/\\n[ \\t]+(?!\\s*```)/", "\n", (string) $markdown ); + // Collapse multiple blank lines. + $markdown = preg_replace( "/\\n{3,}/", "\n\n", (string) $markdown ); + return (string) $markdown; + } +} diff --git a/includes/Experiments/Markdown_Feeds/Markdown_Feed_Renderer.php b/includes/Experiments/Markdown_Feeds/Markdown_Feed_Renderer.php new file mode 100644 index 00000000..443a77c8 --- /dev/null +++ b/includes/Experiments/Markdown_Feeds/Markdown_Feed_Renderer.php @@ -0,0 +1,274 @@ +get_feed_last_modified(); + + // Check for conditional GET (304 Not Modified). + if ( $this->handle_conditional_get( $last_modified ) ) { + return; + } + + $this->send_headers( $last_modified ); + + if ( $this->is_head_request() ) { + return; + } + + $this->send_feed_header(); + $this->send_posts(); + } + + /** + * Sends HTTP headers for the Markdown feed response. + * + * @since x.x.x + * + * @param int $last_modified Unix timestamp of last modification. + */ + private function send_headers( int $last_modified ): void { + status_header( 200 ); + header( 'Content-Type: text/markdown; charset=' . get_option( 'blog_charset' ), true ); + header( 'X-Content-Type-Options: nosniff', true ); + + // Caching headers similar to core RSS feeds. + if ( $last_modified > 0 ) { + header( 'Last-Modified: ' . gmdate( 'D, d M Y H:i:s', $last_modified ) . ' GMT', true ); + + // Generate ETag from last modified time and feed URL. + $etag = md5( $last_modified . get_self_link() ); + header( 'ETag: "' . $etag . '"', true ); + } + + // Allow caching for a short period (similar to core feeds behavior). + header( 'Cache-Control: max-age=300, must-revalidate', true ); + } + + /** + * Handles conditional GET requests (If-Modified-Since, If-None-Match). + * + * @since x.x.x + * + * @param int $last_modified Unix timestamp of last modification. + * @return bool True if 304 response was sent, false otherwise. + */ + private function handle_conditional_get( int $last_modified ): bool { + if ( $last_modified <= 0 ) { + return false; + } + + $client_etag = isset( $_SERVER['HTTP_IF_NONE_MATCH'] ) ? trim( (string) wp_unslash( $_SERVER['HTTP_IF_NONE_MATCH'] ) ) : ''; // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized + $client_last_modified = isset( $_SERVER['HTTP_IF_MODIFIED_SINCE'] ) ? (string) wp_unslash( $_SERVER['HTTP_IF_MODIFIED_SINCE'] ) : ''; // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized + + $etag = '"' . md5( $last_modified . get_self_link() ) . '"'; + + $etag_match = '' !== $client_etag && $client_etag === $etag; + $last_modified_match = '' !== $client_last_modified && strtotime( $client_last_modified ) >= $last_modified; + + if ( $etag_match || $last_modified_match ) { + status_header( 304 ); + header( 'Content-Type: text/markdown; charset=' . get_option( 'blog_charset' ), true ); + return true; + } + + return false; + } + + /** + * Gets the last modified timestamp for the feed. + * + * @since x.x.x + * + * @return int Unix timestamp, or 0 if unknown. + */ + private function get_feed_last_modified(): int { + global $wp_query; + + if ( ! $wp_query instanceof WP_Query || empty( $wp_query->posts ) ) { + return 0; + } + + $latest = 0; + foreach ( $wp_query->posts as $post ) { + if ( ! $post instanceof WP_Post ) { + continue; + } + + $modified = strtotime( $post->post_modified_gmt ); + if ( $modified <= $latest ) { + continue; + } + + $latest = $modified; + } + + return $latest; + } + + /** + * Outputs the feed header in Markdown. + * + * @since x.x.x + */ + private function send_feed_header(): void { + $site_name = wp_strip_all_tags( (string) get_bloginfo( 'name' ) ); + $site_desc = wp_strip_all_tags( (string) get_bloginfo( 'description' ) ); + $feed_url = esc_url_raw( get_self_link() ); + + echo '# ' . esc_html( $site_name ) . ' — ' . esc_html__( 'Markdown Feed', 'ai' ) . "\n\n"; + + if ( '' !== $site_desc ) { + echo esc_html( $site_desc ) . "\n\n"; + } + + echo esc_html__( 'Feed URL:', 'ai' ) . ' <' . esc_url( $feed_url ) . ">\n\n"; + } + + /** + * Outputs all posts in the current feed query. + * + * @since x.x.x + */ + private function send_posts(): void { + global $more; + + // Ensure full content is used. + // phpcs:ignore WordPress.WP.GlobalVariablesOverride.Prohibited -- Used intentionally to ensure full content in feed. + $more = 1; + + if ( ! have_posts() ) { + echo esc_html__( 'No posts found.', 'ai' ) . "\n"; + return; + } + + while ( have_posts() ) { + the_post(); + + $post = get_post(); + if ( ! $post instanceof WP_Post ) { + continue; + } + + $this->send_post( $post ); + } + } + + /** + * Outputs a single post block in Markdown. + * + * @since x.x.x + * + * @param \WP_Post $post Post object. + */ + private function send_post( WP_Post $post ): void { + $title = wp_strip_all_tags( (string) get_the_title( $post ) ); + $permalink = esc_url_raw( get_permalink( $post ) ); + $date_r = (string) get_post_time( 'r', true, $post ); + + $content = get_post_field( 'post_content', $post ); + $html = (string) apply_filters( 'the_content', $content ); + + $markdown = $this->convert_html_to_markdown( $html ); + $markdown = trim( $markdown ); + + $meta_lines = array( + esc_html__( 'URL:', 'ai' ) . ' <' . esc_url( $permalink ) . '>', + esc_html__( 'Published:', 'ai' ) . ' ' . esc_html( $date_r ), + ); + + $sections = array( + 'header' => '## ' . esc_html( $title ), + 'meta' => implode( "\n", $meta_lines ), + ); + + if ( '' !== $markdown ) { + $sections['content'] = $markdown; + } + + $sections['footer'] = '---'; + + /** + * Filters the Markdown feed entry sections. + * + * Allows reordering or inserting custom sections before output is emitted. + * + * @since x.x.x + * + * @param array $sections Markdown sections keyed by role. + * @param \WP_Post $post Post object. + */ + $sections = (array) apply_filters( 'ai_experiments_markdown_feed_post_sections', $sections, $post ); + + $entry = implode( "\n\n", $sections ); + if ( '' === $entry ) { + return; + } + + // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped -- Markdown response. + echo $entry . "\n\n"; + } + + /** + * Converts HTML to Markdown. + * + * @since x.x.x + * + * @param string $html HTML string. + * @return string Markdown string. + */ + private function convert_html_to_markdown( string $html ): string { + /** + * Filters the HTML input before conversion to Markdown. + * + * @since x.x.x + * + * @param string $html HTML to convert. + */ + $html = (string) apply_filters( 'ai_experiments_markdown_feed_html', $html ); + + $converter = new HTML_To_Markdown_Converter(); + $markdown = $converter->convert( $html ); + + /** + * Filters the Markdown output after conversion. + * + * @since x.x.x + * + * @param string $markdown Markdown output. + * @param string $html Original HTML input. + */ + return (string) apply_filters( 'ai_experiments_markdown_feed_markdown', $markdown, $html ); + } + + /** + * Checks whether the current HTTP request is a HEAD request. + * + * @since x.x.x + * + * @return bool + */ + private function is_head_request(): bool { + $method = isset( $_SERVER['REQUEST_METHOD'] ) ? sanitize_key( (string) $_SERVER['REQUEST_METHOD'] ) : ''; // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized + return 'head' === $method; + } +} diff --git a/includes/Experiments/Markdown_Feeds/Markdown_Feeds.php b/includes/Experiments/Markdown_Feeds/Markdown_Feeds.php new file mode 100644 index 00000000..93f15d13 --- /dev/null +++ b/includes/Experiments/Markdown_Feeds/Markdown_Feeds.php @@ -0,0 +1,571 @@ + 'markdown-feeds', + 'label' => esc_html__( 'Markdown Feeds', 'ai' ), + 'description' => esc_html__( 'Adds Markdown representations of posts and pages via feeds, .md URLs, and Accept header negotiation.', 'ai' ), + ); + } + + /** + * {@inheritDoc} + */ + public function register(): void { + add_action( 'init', array( $this, 'register_feed' ), 11 ); + add_filter( 'request', array( $this, 'filter_request_for_markdown_extension' ), 1 ); + add_filter( 'redirect_canonical', array( $this, 'filter_redirect_canonical' ), 10, 2 ); + add_filter( 'wp_headers', array( $this, 'filter_wp_headers' ) ); + add_action( 'template_redirect', array( $this, 'maybe_render_singular_markdown' ), 0 ); + + // Feed autodiscovery. + add_action( 'wp_head', array( $this, 'add_feed_autodiscovery_links' ) ); + } + + /** + * Registers the Markdown feed. + * + * @since x.x.x + */ + public function register_feed(): void { + $settings = $this->get_settings(); + if ( ! $settings['enable_feed'] ) { + return; + } + + add_feed( self::FEED_NAME, array( $this, 'render_feed' ) ); + } + + /** + * Outputs feed autodiscovery links in the HTML head. + * + * Mirrors the behavior of `feed_links()` and `feed_links_extra()` for RSS/Atom. + * + * @since x.x.x + */ + public function add_feed_autodiscovery_links(): void { + $settings = $this->get_settings(); + if ( ! $settings['enable_feed'] ) { + return; + } + + // Don't add discovery links on feeds themselves. + if ( is_feed() ) { + return; + } + + $site_name = get_bloginfo( 'name' ); + + // Main site feed. + $feed_url = $this->get_markdown_feed_link(); + + /* translators: %s: Site name. */ + $title = sprintf( __( '%s Markdown Feed', 'ai' ), $site_name ); + + printf( + '' . "\n", + esc_attr( $title ), + esc_url( $feed_url ) + ); + + // Singular post/page: add link to .md version. + if ( ! is_singular() || ! $settings['enable_md_extension'] ) { + return; + } + + $post = get_queried_object(); + if ( ! ( $post instanceof \WP_Post ) || ! $this->is_post_accessible( $post ) ) { + return; + } + + $md_url = $this->get_markdown_permalink( $post ); + if ( '' === $md_url ) { + return; + } + + /* translators: %s: Post title. */ + $md_title = sprintf( __( '%s (Markdown)', 'ai' ), get_the_title( $post ) ); + + printf( + '' . "\n", + esc_attr( $md_title ), + esc_url( $md_url ) + ); + } + + /** + * Gets the Markdown feed URL. + * + * @since x.x.x + * + * @param string $context Optional. Feed context (empty for main feed, or 'category', 'tag', etc.). + * @return string Feed URL. + */ + public function get_markdown_feed_link( string $context = '' ): string { + if ( '' === $context ) { + return get_feed_link( self::FEED_NAME ); + } + + // For archive feeds, use the base feed link with feed query parameter. + return add_query_arg( 'feed', self::FEED_NAME, $context ); + } + + /** + * Gets the Markdown permalink for a post. + * + * @since x.x.x + * + * @param \WP_Post $post Post object. + * @return string Markdown permalink. + */ + public function get_markdown_permalink( \WP_Post $post ): string { + $permalink_structure = (string) get_option( 'permalink_structure' ); + if ( '' === $permalink_structure ) { + return ''; + } + + $permalink = get_permalink( $post ); + if ( ! $permalink || false !== strpos( $permalink, '?' ) ) { + return ''; + } + + // Remove trailing slash, add .md extension. + return rtrim( trailingslashit( $permalink ), '/' ) . '.md'; + } + + /** + * {@inheritDoc} + */ + public function register_settings(): void { + register_setting( + Settings_Registration::OPTION_GROUP, + self::OPTION_ENABLE_FEED, + array( + 'type' => 'boolean', + 'default' => self::DEFAULT_ENABLE_FEED, + 'sanitize_callback' => 'rest_sanitize_boolean', + ) + ); + + register_setting( + Settings_Registration::OPTION_GROUP, + self::OPTION_ENABLE_MD_EXTENSION, + array( + 'type' => 'boolean', + 'default' => self::DEFAULT_ENABLE_MD_EXTENSION, + 'sanitize_callback' => 'rest_sanitize_boolean', + ) + ); + + register_setting( + Settings_Registration::OPTION_GROUP, + self::OPTION_ENABLE_ACCEPT_HEADERS, + array( + 'type' => 'boolean', + 'default' => self::DEFAULT_ENABLE_ACCEPT_HEADERS, + 'sanitize_callback' => 'rest_sanitize_boolean', + ) + ); + } + + /** + * {@inheritDoc} + */ + public function render_settings_fields(): void { + $settings = $this->get_settings(); + ?> +
+ + + + + +
+ render(); + } + + /** + * Filters parsed query vars to strip `.md` suffix from slug-based vars. + * + * This allows URLs like `/post-name.md` to resolve to the post with slug `post-name`. + * Works with the `request` filter which fires after WordPress parses the URL. + * + * @since x.x.x + * + * @param array $query_vars Parsed query vars. + * @return array + */ + public function filter_request_for_markdown_extension( array $query_vars ): array { + $settings = $this->get_settings(); + if ( ! $settings['enable_md_extension'] ) { + return $query_vars; + } + + if ( is_admin() ) { + return $query_vars; + } + + $method = $this->get_request_method(); + if ( 'get' !== $method && 'head' !== $method ) { + return $query_vars; + } + + // Query vars that contain post/page slugs which may have .md suffix. + $slug_vars = array( 'name', 'pagename', 'attachment' ); + + foreach ( $slug_vars as $var ) { + if ( empty( $query_vars[ $var ] ) ) { + continue; + } + + $value = (string) $query_vars[ $var ]; + if ( '.md' !== substr( $value, -3 ) ) { + continue; + } + + // Strip the .md suffix and mark this as a markdown request. + $query_vars[ $var ] = substr( $value, 0, -3 ); + $this->markdown_extension_request = true; + break; + } + + return $query_vars; + } + + /** + * Prevents canonical redirects for `.md` requests. + * + * @since x.x.x + * + * @param string|false $redirect_url The redirect URL. + * @param string $requested_url The requested URL. + * @return string|false + */ + public function filter_redirect_canonical( $redirect_url, string $requested_url ) { // phpcs:ignore VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable + if ( $this->markdown_extension_request ) { + return false; + } + + return $redirect_url; + } + + /** + * Adds `Vary: Accept` when Accept header negotiation is enabled. + * + * @since x.x.x + * + * @param array $headers Array of headers to send. + * @return array + */ + public function filter_wp_headers( array $headers ): array { + $settings = $this->get_settings(); + if ( ! $settings['enable_accept_headers'] ) { + return $headers; + } + + if ( $this->markdown_extension_request ) { + return $headers; + } + + if ( ! is_singular() ) { + return $headers; + } + + $headers['Vary'] = $this->merge_vary_header( $headers['Vary'] ?? '', 'Accept' ); + return $headers; + } + + /** + * Renders Markdown for singular content when requested via `.md` or `Accept: text/markdown`. + * + * @since x.x.x + */ + public function maybe_render_singular_markdown(): void { + $settings = $this->get_settings(); + + $method = $this->get_request_method(); + if ( 'get' !== $method && 'head' !== $method ) { + return; + } + + $wants_markdown = false; + if ( $settings['enable_md_extension'] && $this->markdown_extension_request ) { + $wants_markdown = true; + } elseif ( $settings['enable_accept_headers'] && $this->client_accepts_markdown() ) { + $wants_markdown = true; + } + + if ( ! $wants_markdown ) { + return; + } + + $renderer = new Markdown_Singular_Renderer(); + + if ( ! is_singular() ) { + if ( $this->markdown_extension_request ) { + $renderer->render_not_found(); + exit; + } + + return; + } + + $post = get_queried_object(); + if ( ! $post instanceof \WP_Post ) { + $renderer->render_not_found(); + exit; + } + + // Check post accessibility (status, password protection, etc.). + if ( ! $this->is_post_accessible( $post ) ) { + if ( post_password_required( $post ) ) { + $renderer->render_password_required(); + } else { + $renderer->render_not_found(); + } + exit; + } + + // Send Link header pointing to canonical HTML version. + $canonical_url = get_permalink( $post ); + if ( $canonical_url ) { + header( 'Link: <' . esc_url( $canonical_url ) . '>; rel="canonical"', false ); + } + + $renderer->render( $post ); + exit; + } + + /** + * Checks whether a post is accessible for Markdown rendering. + * + * Validates post status and password protection similar to core feed behavior. + * + * @since x.x.x + * + * @param \WP_Post $post Post object. + * @return bool True if accessible, false otherwise. + */ + private function is_post_accessible( \WP_Post $post ): bool { + $status = get_post_status( $post ); + + // Published posts are accessible unless password-protected. + if ( 'publish' === $status ) { + return ! post_password_required( $post ); + } + + // Private posts require the read_private_posts capability. + if ( 'private' === $status ) { + $post_type_obj = get_post_type_object( $post->post_type ); + if ( ! $post_type_obj ) { + return false; + } + + return current_user_can( $post_type_obj->cap->read_private_posts ?? 'read_private_posts' ); // phpcs:ignore WordPress.WP.Capabilities.Undetermined + } + + // Draft, pending, future, trash, etc. are not accessible. + return false; + } + + /** + * Checks whether the current request's `Accept` header includes Markdown. + * + * @since x.x.x + * + * @return bool + */ + private function client_accepts_markdown(): bool { + $accept = isset( $_SERVER['HTTP_ACCEPT'] ) ? strtolower( (string) wp_unslash( $_SERVER['HTTP_ACCEPT'] ) ) : ''; // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized + if ( '' === $accept ) { + return false; + } + + $types = array( + 'text/markdown', + 'text/x-markdown', + 'application/markdown', + ); + + foreach ( $types as $type ) { + if ( false !== strpos( $accept, $type ) ) { + return true; + } + } + + return false; + } + + /** + * Merges a token into a `Vary` header value. + * + * @since x.x.x + * + * @param string $current Existing Vary header. + * @param string $token Token to add. + * @return string + */ + private function merge_vary_header( string $current, string $token ): string { + $current = trim( $current ); + + if ( '' === $current ) { + return $token; + } + + $parts = array_map( 'trim', explode( ',', $current ) ); + foreach ( $parts as $part ) { + if ( strtolower( $part ) === strtolower( $token ) ) { + return $current; + } + } + + $parts[] = $token; + return implode( ', ', $parts ); + } + + /** + * Reads experiment settings with defaults. + * + * @since x.x.x + * + * @return array{enable_feed: bool, enable_md_extension: bool, enable_accept_headers: bool} + */ + private function get_settings(): array { + return array( + 'enable_feed' => (bool) get_option( self::OPTION_ENABLE_FEED, self::DEFAULT_ENABLE_FEED ), + 'enable_md_extension' => (bool) get_option( self::OPTION_ENABLE_MD_EXTENSION, self::DEFAULT_ENABLE_MD_EXTENSION ), + 'enable_accept_headers' => (bool) get_option( self::OPTION_ENABLE_ACCEPT_HEADERS, self::DEFAULT_ENABLE_ACCEPT_HEADERS ), + ); + } + + /** + * Reads the current HTTP request method (lowercase). + * + * @since x.x.x + * + * @return string + */ + private function get_request_method(): string { + if ( ! isset( $_SERVER['REQUEST_METHOD'] ) ) { + return 'get'; + } + + // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized -- Request method used for routing only. + return sanitize_key( (string) wp_unslash( $_SERVER['REQUEST_METHOD'] ) ); + } +} diff --git a/includes/Experiments/Markdown_Feeds/Markdown_Singular_Renderer.php b/includes/Experiments/Markdown_Feeds/Markdown_Singular_Renderer.php new file mode 100644 index 00000000..06b421de --- /dev/null +++ b/includes/Experiments/Markdown_Feeds/Markdown_Singular_Renderer.php @@ -0,0 +1,224 @@ +post_modified_gmt ); + + // Check for conditional GET (304 Not Modified). + if ( $last_modified && $this->handle_conditional_get( $last_modified, $post ) ) { + return; + } + + $this->send_headers( 200, $last_modified, $post ); + + if ( $this->is_head_request() ) { + return; + } + + $this->send_post( $post ); + } + + /** + * Writes a Markdown "not found" response. + * + * @since x.x.x + */ + public function render_not_found(): void { + $this->send_headers( 404 ); + + if ( $this->is_head_request() ) { + return; + } + + echo '# ' . esc_html__( 'Not Found', 'ai' ) . "\n\n"; + echo esc_html__( 'No Markdown representation is available for this URL.', 'ai' ) . "\n"; + } + + /** + * Writes a Markdown "password required" response. + * + * @since x.x.x + */ + public function render_password_required(): void { + $this->send_headers( 401 ); + + if ( $this->is_head_request() ) { + return; + } + + echo '# ' . esc_html__( 'Password Required', 'ai' ) . "\n\n"; + echo esc_html__( 'This content is password protected. Please provide the password to view the Markdown representation.', 'ai' ) . "\n"; + } + + /** + * Sends HTTP headers for the Markdown response. + * + * @since x.x.x + * + * @param int $status_code HTTP status code. + * @param int|false $last_modified Unix timestamp of last modification, or false. + * @param \WP_Post|null $post Post object for ETag generation. + */ + private function send_headers( int $status_code, $last_modified = false, ?WP_Post $post = null ): void { + status_header( $status_code ); + header( 'Content-Type: text/markdown; charset=' . get_option( 'blog_charset' ), true ); + header( 'X-Content-Type-Options: nosniff', true ); + + // Only add caching headers for successful responses with valid data. + if ( 200 !== $status_code || ! $last_modified || ! $post ) { + return; + } + + header( 'Last-Modified: ' . gmdate( 'D, d M Y H:i:s', $last_modified ) . ' GMT', true ); + + // Generate ETag from last modified time and post ID. + $etag = md5( $last_modified . '-' . $post->ID ); + header( 'ETag: "' . $etag . '"', true ); + + // Allow caching for a short period. + header( 'Cache-Control: max-age=300, must-revalidate', true ); + } + + /** + * Handles conditional GET requests (If-Modified-Since, If-None-Match). + * + * @since x.x.x + * + * @param int $last_modified Unix timestamp of last modification. + * @param \WP_Post $post Post object. + * @return bool True if 304 response was sent, false otherwise. + */ + private function handle_conditional_get( int $last_modified, WP_Post $post ): bool { + $client_etag = isset( $_SERVER['HTTP_IF_NONE_MATCH'] ) ? trim( (string) wp_unslash( $_SERVER['HTTP_IF_NONE_MATCH'] ) ) : ''; // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized + $client_last_modified = isset( $_SERVER['HTTP_IF_MODIFIED_SINCE'] ) ? (string) wp_unslash( $_SERVER['HTTP_IF_MODIFIED_SINCE'] ) : ''; // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized + + $etag = '"' . md5( $last_modified . '-' . $post->ID ) . '"'; + + $etag_match = '' !== $client_etag && $client_etag === $etag; + $last_modified_match = '' !== $client_last_modified && strtotime( $client_last_modified ) >= $last_modified; + + if ( $etag_match || $last_modified_match ) { + status_header( 304 ); + header( 'Content-Type: text/markdown; charset=' . get_option( 'blog_charset' ), true ); + return true; + } + + return false; + } + + /** + * Outputs a single post block in Markdown. + * + * @since x.x.x + * + * @param \WP_Post $post Post object. + */ + private function send_post( WP_Post $post ): void { + $title = wp_strip_all_tags( (string) get_the_title( $post ) ); + $permalink = (string) get_permalink( $post ); + + $published = (string) get_post_time( 'r', true, $post ); + $modified = (string) get_post_modified_time( 'r', true, $post ); + + $content = get_post_field( 'post_content', $post ); + $html = (string) apply_filters( 'the_content', $content ); + + /** + * Filters the HTML input before conversion to Markdown. + * + * @since x.x.x + * + * @param string $html HTML to convert. + * @param \WP_Post $post Post object. + */ + $html = (string) apply_filters( 'ai_experiments_markdown_singular_html', $html, $post ); + + $converter = new HTML_To_Markdown_Converter(); + $markdown = $converter->convert( $html ); + + /** + * Filters the Markdown output after conversion. + * + * @since x.x.x + * + * @param string $markdown Markdown output. + * @param string $html Original HTML input. + * @param \WP_Post $post Post object. + */ + $markdown = (string) apply_filters( 'ai_experiments_markdown_singular_markdown', $markdown, $html, $post ); + $markdown = trim( $markdown ); + + $meta_lines = array( + esc_html__( 'URL:', 'ai' ) . ' <' . esc_url( $permalink ) . '>', + esc_html__( 'Published:', 'ai' ) . ' ' . esc_html( $published ), + ); + + if ( $modified !== $published ) { + $meta_lines[] = esc_html__( 'Updated:', 'ai' ) . ' ' . esc_html( $modified ); + } + + $sections = array( + 'header' => '# ' . esc_html( $title ), + 'meta' => implode( "\n", $meta_lines ), + ); + + if ( '' !== $markdown ) { + $sections['content'] = $markdown; + } + + /** + * Filters the Markdown singular entry sections. + * + * Allows reordering or inserting custom sections before output is emitted. + * + * @since x.x.x + * + * @param array $sections Markdown sections keyed by role. + * @param \WP_Post $post Post object. + */ + $sections = (array) apply_filters( 'ai_experiments_markdown_singular_post_sections', $sections, $post ); + + $entry = implode( "\n\n", $sections ); + if ( '' === $entry ) { + return; + } + + // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped -- Markdown response. + echo $entry . "\n"; + } + + /** + * Checks whether the current HTTP request is a HEAD request. + * + * @since x.x.x + * + * @return bool + */ + private function is_head_request(): bool { + $method = isset( $_SERVER['REQUEST_METHOD'] ) ? sanitize_key( (string) $_SERVER['REQUEST_METHOD'] ) : ''; // phpcs:ignore WordPress.Security.ValidatedSanitizedInput.InputNotSanitized + return 'head' === $method; + } +} diff --git a/src/admin/settings/index.scss b/src/admin/settings/index.scss index cce638c0..85d2d470 100644 --- a/src/admin/settings/index.scss +++ b/src/admin/settings/index.scss @@ -135,4 +135,12 @@ margin-left: 1.5rem; } } + + /* Element: item settings */ + &__item-settings { + display: flex; + flex-direction: column; + gap: 0.75rem; + margin-left: 1.5rem; + } } diff --git a/tests/Integration/Includes/Experiment_LoaderTest.php b/tests/Integration/Includes/Experiment_LoaderTest.php index 617dddd4..5c0a60f4 100644 --- a/tests/Integration/Includes/Experiment_LoaderTest.php +++ b/tests/Integration/Includes/Experiment_LoaderTest.php @@ -127,6 +127,10 @@ public function test_register_default_experiments() { $this->registry->has_experiment( 'title-generation' ), 'Title generation experiment should be registered' ); + $this->assertTrue( + $this->registry->has_experiment( 'markdown-feeds' ), + 'Markdown feeds experiment should be registered' + ); $abilities_explorer_experiment = $this->registry->get_experiment( 'abilities-explorer' ); $this->assertNotNull( $abilities_explorer_experiment, 'Abilities explorer experiment should exist' ); @@ -147,6 +151,9 @@ public function test_register_default_experiments() { $title_experiment = $this->registry->get_experiment( 'title-generation' ); $this->assertNotNull( $title_experiment, 'Title generation experiment should exist' ); $this->assertEquals( 'title-generation', $title_experiment->get_id() ); + $markdown_experiment = $this->registry->get_experiment( 'markdown-feeds' ); + $this->assertNotNull( $markdown_experiment, 'Markdown feeds experiment should exist' ); + $this->assertEquals( 'markdown-feeds', $markdown_experiment->get_id() ); } /** diff --git a/tests/Integration/Includes/Experiments/Markdown_Feeds/HTML_To_Markdown_ConverterTest.php b/tests/Integration/Includes/Experiments/Markdown_Feeds/HTML_To_Markdown_ConverterTest.php new file mode 100644 index 00000000..97bc5fe1 --- /dev/null +++ b/tests/Integration/Includes/Experiments/Markdown_Feeds/HTML_To_Markdown_ConverterTest.php @@ -0,0 +1,354 @@ +converter = new HTML_To_Markdown_Converter(); + } + + /** + * Test that basic HTML is converted into Markdown. + * + * @since x.x.x + */ + public function test_basic_conversion(): void { + $code = "
echo \"hi\";\n
"; + + $html = '

Heading

' + . '

Hello world link.

' + . '
  • One
  • Two
' + . '

Alt text

' + . $code; + + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( '## Heading', $markdown ); + $this->assertStringContainsString( 'Hello **world** [link](https://example.com).', $markdown ); + $this->assertStringContainsString( '- One', $markdown ); + $this->assertStringContainsString( '- Two', $markdown ); + $this->assertStringContainsString( '![Alt text](https://example.com/a.jpg)', $markdown ); + $this->assertStringContainsString( '```', $markdown ); + $this->assertStringContainsString( 'echo "hi";', $markdown ); + } + + /** + * Test conversion of all heading levels. + * + * @since x.x.x + */ + public function test_converts_all_heading_levels(): void { + $html = '

H1

H2

H3

H4

H5
H6
'; + + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( '# H1', $markdown ); + $this->assertStringContainsString( '## H2', $markdown ); + $this->assertStringContainsString( '### H3', $markdown ); + $this->assertStringContainsString( '#### H4', $markdown ); + $this->assertStringContainsString( '##### H5', $markdown ); + $this->assertStringContainsString( '###### H6', $markdown ); + } + + /** + * Test conversion of bold text using both strong and b tags. + * + * @since x.x.x + */ + public function test_converts_bold_text(): void { + $html = '

This is bold and also bold.

'; + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( '**bold**', $markdown ); + $this->assertStringContainsString( '**also bold**', $markdown ); + } + + /** + * Test conversion of italic text using both em and i tags. + * + * @since x.x.x + */ + public function test_converts_italic_text(): void { + $html = '

This is italic and also italic.

'; + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( '*italic*', $markdown ); + $this->assertStringContainsString( '*also italic*', $markdown ); + } + + /** + * Test conversion of links with href attribute. + * + * @since x.x.x + */ + public function test_converts_links(): void { + $html = '

Visit Example Site for more info.

'; + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( '[Example Site](https://example.com)', $markdown ); + } + + /** + * Test conversion of images with alt text. + * + * @since x.x.x + */ + public function test_converts_images_with_alt(): void { + $html = 'A beautiful sunset'; + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( '![A beautiful sunset](https://example.com/photo.jpg)', $markdown ); + } + + /** + * Test conversion of unordered lists. + * + * @since x.x.x + */ + public function test_converts_unordered_lists(): void { + $html = '
  • Apple
  • Banana
  • Cherry
'; + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( '- Apple', $markdown ); + $this->assertStringContainsString( '- Banana', $markdown ); + $this->assertStringContainsString( '- Cherry', $markdown ); + } + + /** + * Test conversion of ordered lists. + * + * @since x.x.x + */ + public function test_converts_ordered_lists(): void { + $html = '
  1. First
  2. Second
  3. Third
'; + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( '1. First', $markdown ); + $this->assertStringContainsString( '2. Second', $markdown ); + $this->assertStringContainsString( '3. Third', $markdown ); + } + + /** + * Test conversion of blockquotes. + * + * @since x.x.x + */ + public function test_converts_blockquotes(): void { + $html = '
This is a quoted text.
'; + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( '> This is a quoted text.', $markdown ); + } + + /** + * Test conversion of inline code. + * + * @since x.x.x + */ + public function test_converts_inline_code(): void { + $html = '

Use the console.log() function for debugging.

'; + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( '`console.log()`', $markdown ); + } + + /** + * Test conversion of code blocks. + * + * @since x.x.x + */ + public function test_converts_code_blocks(): void { + $html = '
const x = 42;
'; + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( '```', $markdown ); + $this->assertStringContainsString( 'const x = 42;', $markdown ); + } + + /** + * Test conversion of horizontal rules. + * + * @since x.x.x + */ + public function test_converts_horizontal_rules(): void { + $html = '

Above the line.


Below the line.

'; + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( '---', $markdown ); + } + + /** + * Test conversion of line breaks. + * + * @since x.x.x + */ + public function test_converts_line_breaks(): void { + $html = '

Line one
Line two

'; + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( "Line one\nLine two", $markdown ); + } + + /** + * Test conversion of simple tables with thead and tbody. + * + * @since x.x.x + */ + public function test_converts_tables_with_thead(): void { + $html = ' + + + + + + + +
NameAge
Alice30
Bob25
'; + + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( '| Name | Age |', $markdown ); + $this->assertStringContainsString( '| --- | --- |', $markdown ); + $this->assertStringContainsString( '| Alice | 30 |', $markdown ); + $this->assertStringContainsString( '| Bob | 25 |', $markdown ); + } + + /** + * Test conversion of tables using th tags without explicit thead. + * + * @since x.x.x + */ + public function test_converts_tables_with_th_no_thead(): void { + $html = ' + + +
ProductPrice
Widget$10
'; + + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( '| Product | Price |', $markdown ); + $this->assertStringContainsString( '| --- | --- |', $markdown ); + $this->assertStringContainsString( '| Widget | $10 |', $markdown ); + } + + /** + * Test that script tags are completely removed. + * + * @since x.x.x + */ + public function test_removes_script_tags(): void { + $html = '

Safe content

More safe content

'; + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( 'Safe content', $markdown ); + $this->assertStringContainsString( 'More safe content', $markdown ); + $this->assertStringNotContainsString( 'malicious', $markdown ); + $this->assertStringNotContainsString( 'script', $markdown ); + } + + /** + * Test that style tags are completely removed. + * + * @since x.x.x + */ + public function test_removes_style_tags(): void { + $html = '

Content

'; + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( 'Content', $markdown ); + $this->assertStringNotContainsString( 'color', $markdown ); + $this->assertStringNotContainsString( 'style', $markdown ); + } + + /** + * Test that empty input returns empty output. + * + * @since x.x.x + */ + public function test_empty_input(): void { + $markdown = $this->converter->convert( '' ); + + $this->assertEmpty( $markdown ); + } + + /** + * Test that whitespace-only input is handled gracefully. + * + * @since x.x.x + */ + public function test_whitespace_only_input(): void { + $markdown = $this->converter->convert( ' ' ); + + $this->assertEmpty( trim( $markdown ) ); + } + + /** + * Test nested formatting (bold within italic, etc.). + * + * @since x.x.x + */ + public function test_nested_formatting(): void { + $html = '

This is bold italic text.

'; + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( '***bold italic***', $markdown ); + } + + /** + * Test complex nested structure with blockquote containing formatted text. + * + * @since x.x.x + */ + public function test_complex_blockquote(): void { + $html = '
+

A quote with bold and italic text.

+
'; + + $markdown = $this->converter->convert( $html ); + + $this->assertStringContainsString( '>', $markdown ); + $this->assertStringContainsString( '**bold**', $markdown ); + $this->assertStringContainsString( '*italic*', $markdown ); + } + + /** + * Test that excessive whitespace is normalized. + * + * @since x.x.x + */ + public function test_normalizes_whitespace(): void { + $html = '

Multiple spaces here

'; + $markdown = $this->converter->convert( $html ); + + // Should not contain multiple consecutive spaces. + $this->assertStringNotContainsString( ' ', $markdown ); + $this->assertStringContainsString( 'Multiple spaces here', $markdown ); + } +} diff --git a/tests/Integration/Includes/Experiments/Markdown_Feeds/Markdown_FeedsTest.php b/tests/Integration/Includes/Experiments/Markdown_Feeds/Markdown_FeedsTest.php new file mode 100644 index 00000000..6a4d21ee --- /dev/null +++ b/tests/Integration/Includes/Experiments/Markdown_Feeds/Markdown_FeedsTest.php @@ -0,0 +1,240 @@ + 'test-api-key' ) ); + + // Mock has_valid_ai_credentials to return true for tests. + add_filter( 'ai_experiments_pre_has_valid_credentials_check', '__return_true' ); + + // Enable experiments globally and individually. + update_option( 'ai_experiments_enabled', true ); + update_option( 'ai_experiment_markdown-feeds_enabled', true ); + + $registry = new Experiment_Registry(); + $loader = new Experiment_Loader( $registry ); + $loader->register_default_experiments(); + + $experiment = $registry->get_experiment( 'markdown-feeds' ); + $this->assertInstanceOf( Markdown_Feeds::class, $experiment, 'Markdown feeds experiment should be registered in the registry.' ); + + $this->experiment = new Markdown_Feeds(); + $this->experiment->register(); + } + + /** + * Tear down test case. + * + * @since x.x.x + */ + public function tearDown(): void { + delete_option( 'ai_experiments_enabled' ); + delete_option( 'ai_experiment_markdown-feeds_enabled' ); + delete_option( 'wp_ai_client_provider_credentials' ); + remove_filter( 'ai_experiments_pre_has_valid_credentials_check', '__return_true' ); + parent::tearDown(); + } + + /** + * Test that the experiment is registered correctly. + * + * @since x.x.x + */ + public function test_experiment_registration(): void { + $experiment = new Markdown_Feeds(); + + $this->assertEquals( 'markdown-feeds', $experiment->get_id() ); + $this->assertEquals( 'Markdown Feeds', $experiment->get_label() ); + $this->assertTrue( $experiment->is_enabled() ); + } + + /** + * Test that the Markdown feed is registered. + * + * @since x.x.x + */ + public function test_markdown_feed_is_registered(): void { + global $wp_rewrite; + + $this->experiment->register_feed(); + + // The feed should be added to the registered feeds. + $this->assertContains( 'markdown', $wp_rewrite->feeds ); + } + + /** + * Test that get_markdown_feed_link returns the correct URL. + * + * @since x.x.x + */ + public function test_get_markdown_feed_link(): void { + $feed_link = $this->experiment->get_markdown_feed_link(); + + $this->assertStringContainsString( 'feed', $feed_link ); + $this->assertStringContainsString( 'markdown', $feed_link ); + } + + /** + * Test that get_markdown_permalink returns the correct URL for a post. + * + * @since x.x.x + */ + public function test_get_markdown_permalink(): void { + global $wp_rewrite; + + $original_structure = (string) get_option( 'permalink_structure' ); + $wp_rewrite->set_permalink_structure( '/%postname%/' ); + + try { + $post_id = self::factory()->post->create( + array( + 'post_title' => 'Test Post', + 'post_name' => 'test-post', + 'post_status' => 'publish', + ) + ); + + $post = get_post( $post_id ); + $md_permalink = $this->experiment->get_markdown_permalink( $post ); + + $this->assertStringEndsWith( '.md', $md_permalink ); + $this->assertStringContainsString( 'test-post', $md_permalink ); + } finally { + $wp_rewrite->set_permalink_structure( $original_structure ); + } + } + + /** + * Test that get_markdown_permalink returns empty without pretty permalinks. + * + * @since x.x.x + */ + public function test_get_markdown_permalink_returns_empty_with_plain_permalinks(): void { + global $wp_rewrite; + + $original_structure = (string) get_option( 'permalink_structure' ); + $wp_rewrite->set_permalink_structure( '' ); + + try { + $post_id = self::factory()->post->create( + array( + 'post_title' => 'Plain Post', + 'post_name' => 'plain-post', + 'post_status' => 'publish', + ) + ); + + $post = get_post( $post_id ); + $this->assertSame( '', $this->experiment->get_markdown_permalink( $post ) ); + } finally { + $wp_rewrite->set_permalink_structure( $original_structure ); + } + } + + /** + * Test that request filter strips .md from post name. + * + * @since x.x.x + */ + public function test_filter_request_strips_md_from_name(): void { + $_SERVER['REQUEST_METHOD'] = 'GET'; + + $query_vars = array( 'name' => 'test-post.md' ); + $result = $this->experiment->filter_request_for_markdown_extension( $query_vars ); + + $this->assertEquals( 'test-post', $result['name'] ); + } + + /** + * Test that request filter strips .md from pagename. + * + * @since x.x.x + */ + public function test_filter_request_strips_md_from_pagename(): void { + $_SERVER['REQUEST_METHOD'] = 'GET'; + + $query_vars = array( 'pagename' => 'sample-page.md' ); + $result = $this->experiment->filter_request_for_markdown_extension( $query_vars ); + + $this->assertEquals( 'sample-page', $result['pagename'] ); + } + + /** + * Test that request filter ignores non-.md extensions. + * + * @since x.x.x + */ + public function test_filter_request_ignores_non_md_extensions(): void { + $_SERVER['REQUEST_METHOD'] = 'GET'; + + $query_vars = array( 'name' => 'test-post.html' ); + $result = $this->experiment->filter_request_for_markdown_extension( $query_vars ); + + $this->assertEquals( 'test-post.html', $result['name'] ); + } + + /** + * Test that request filter ignores POST requests. + * + * @since x.x.x + */ + public function test_filter_request_ignores_post_requests(): void { + $_SERVER['REQUEST_METHOD'] = 'POST'; + + $query_vars = array( 'name' => 'test-post.md' ); + $result = $this->experiment->filter_request_for_markdown_extension( $query_vars ); + + $this->assertEquals( 'test-post.md', $result['name'] ); + } + + /** + * Test canonical redirect is prevented for .md requests. + * + * @since x.x.x + */ + public function test_filter_redirect_canonical_prevents_redirect_for_md(): void { + $_SERVER['REQUEST_METHOD'] = 'GET'; + + // First, trigger the markdown extension detection. + $query_vars = array( 'name' => 'test-post.md' ); + $this->experiment->filter_request_for_markdown_extension( $query_vars ); + + // Now test that canonical redirect is prevented. + $result = $this->experiment->filter_redirect_canonical( 'http://example.com/test-post/', 'http://example.com/test-post.md' ); + + $this->assertFalse( $result ); + } +}