diff --git a/CHANGELOG.md b/CHANGELOG.md index a60278e..d70bdc8 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,25 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added +- **`--log` flag** - Save outputs to log files in logs/ directory + - By default, only prints output to console (no files saved) + - When enabled, saves original text, markdown (if --markdown used), and comparison stats + - Usage: `python scripts/run.py ask_question.py --question "..." --log` + +- **`--markdown` flag** - Get formatted markdown output from NotebookLM + - Clicks the copy button in the NotebookLM UI to get clean markdown + - Useful for responses with code blocks, lists, tables, and other formatting + - Automatically falls back to plain text if copy button fails + - Usage: `python scripts/run.py ask_question.py --question "..." --markdown` + +### Changed +- **Logging behavior** - Logs are now opt-in via `--log` flag + - Previously, logs were always saved when `--markdown` was enabled + - Now, no files are created unless user explicitly uses `--log` + ## [1.3.0] - 2025-11-21 ### Added diff --git a/SKILL.md b/SKILL.md index 2be7e16..594a885 100755 --- a/SKILL.md +++ b/SKILL.md @@ -123,6 +123,9 @@ python scripts/run.py ask_question.py --question "..." --notebook-url "https://. # Show browser for debugging python scripts/run.py ask_question.py --question "..." --show-browser + +# Get formatted markdown output (for code, lists, tables, etc.) +python scripts/run.py ask_question.py --question "..." --markdown ``` ## Follow-Up Mechanism (CRITICAL) @@ -162,9 +165,13 @@ python scripts/run.py notebook_manager.py stats ### Question Interface (`ask_question.py`) ```bash -python scripts/run.py ask_question.py --question "..." [--notebook-id ID] [--notebook-url URL] [--show-browser] +python scripts/run.py ask_question.py --question "..." [--notebook-id ID] [--notebook-url URL] [--show-browser] [--markdown] [--log] ``` +**Options:** +- `--markdown` - Get formatted markdown output via copy button (recommended for longer responses with formatting) +- `--log` - Save outputs to log files in logs/ directory (original, markdown, and stats) + ### Data Cleanup (`cleanup_manager.py`) ```bash python scripts/run.py cleanup_manager.py # Preview cleanup diff --git a/scripts/ask_question.py b/scripts/ask_question.py index aa47e4b..0107cb5 100755 --- a/scripts/ask_question.py +++ b/scripts/ask_question.py @@ -20,10 +20,120 @@ # Add parent directory to path sys.path.insert(0, str(Path(__file__).parent)) +# Get project root for logs directory +PROJECT_ROOT = Path(__file__).parent.parent +LOGS_DIR = PROJECT_ROOT / "logs" +LOGS_DIR.mkdir(exist_ok=True) + from auth_manager import AuthManager from notebook_manager import NotebookLibrary from config import QUERY_INPUT_SELECTORS, RESPONSE_SELECTORS from browser_utils import BrowserFactory, StealthUtils +from logger import QueryLogger + + +def _try_copy_button(response_element, page) -> str: + """ + Try to click the copy button associated with a specific response element. + + Args: + response_element: The Playwright element handle for the response + page: The Playwright page object + + Returns: + Clipboard text if successful, None otherwise + """ + try: + # Search for the copy button within the same container as the response + # This ensures we get the copy button for THIS response, not an old one + result = page.evaluate("""(element) => { + // Find the container of this response + let container = element; + + // Try different container levels + const possibleContainers = [ + element, + element.parentElement, // Parent + element.parentElement?.parentElement, // Grandparent + element.closest('.to-user-container'), // Closest message container + element.closest('[data-message-author="bot"]'), // Bot message container + element.closest('[data-message-author="assistant"]'), // Assistant container + ]; + + let copyButton = null; + + // Search for copy button in each container level + for (const cont of possibleContainers) { + if (!cont) continue; + + // Try multiple selectors for copy button + const selectors = [ + 'button[aria-label="Copy model response to clipboard"]', + 'button[aria-label*="copy" i]', + 'button[class*="copy" i]', + 'button[title*="copy" i]', + '.copy-button', + 'button[aria-label*="Copy"]', + ]; + + for (const selector of selectors) { + const buttons = cont.querySelectorAll(selector); + if (buttons.length > 0) { + // Get the first copy button in this container + copyButton = buttons[0]; + break; + } + } + + if (copyButton) break; + } + + if (!copyButton) { + return { found: false, error: 'No copy button found in response container' }; + } + + // Click the button + copyButton.click(); + + return { found: true, buttonHTML: copyButton.outerHTML }; + }""", response_element) + + if not result or not result.get('found'): + print(f" ! Copy button not found: {result.get('error', 'Unknown error')}") + return None + + print(" āœ“ Clicked copy button") + + # Wait for clipboard to be populated + StealthUtils.random_delay(500, 1000) + + # Read clipboard + clipboard_text = page.evaluate("() => navigator.clipboard.readText()") + + if not clipboard_text: + print(" ! Clipboard is empty") + return None + + print(f" šŸ“‹ Got clipboard content ({len(clipboard_text)} chars)") + + # Validate clipboard content matches response roughly + # The clipboard might have markdown formatting, so it could be longer + # But it shouldn't be drastically different + response_text = response_element.inner_text().strip() + clipboard_ratio = len(clipboard_text) / len(response_text) if len(response_text) > 0 else 0 + + # Accept clipboard if ratio is reasonable (0.3 to 5.0) + # Markdown formatting can make it significantly longer or shorter + if 0.3 <= clipboard_ratio <= 5.0: + print(f" āœ“ Clipboard content validated (ratio: {clipboard_ratio:.2f})") + return clipboard_text + else: + print(f" ! Clipboard content seems off (ratio: {clipboard_ratio:.2f}), ignoring") + return None + + except Exception as e: + print(f" ! Copy button error: {e}") + return None # Follow-up reminder (adapted from MCP server for stateless operation) @@ -37,7 +147,7 @@ ) -def ask_notebooklm(question: str, notebook_url: str, headless: bool = True) -> str: +def ask_notebooklm(question: str, notebook_url: str, headless: bool = True, use_markdown: bool = False) -> dict: """ Ask a question to NotebookLM @@ -45,9 +155,10 @@ def ask_notebooklm(question: str, notebook_url: str, headless: bool = True) -> s question: Question to ask notebook_url: NotebookLM notebook URL headless: Run browser in headless mode + use_markdown: If True, try to get formatted markdown via copy button Returns: - Answer text from NotebookLM + Dictionary with 'original' and 'markdown' keys (markdown may be None) """ auth = AuthManager() @@ -117,11 +228,23 @@ def ask_notebooklm(question: str, notebook_url: str, headless: bool = True) -> s # Wait for response (MCP approach: poll for stable text) print(" ā³ Waiting for answer...") - answer = None + result = { + 'original': None, + 'markdown': None, + 'success': False + } stable_count = 0 last_text = None deadline = time.time() + 120 # 2 minutes timeout + # Rate limit detection patterns + RATE_LIMIT_PATTERNS = [ + "The system was unable to answer", + "Unable to answer", + "Daily limit reached", + "Rate limit exceeded", + ] + while time.time() < deadline: # Check if NotebookLM is still thinking (most reliable indicator) try: @@ -133,38 +256,73 @@ def ask_notebooklm(question: str, notebook_url: str, headless: bool = True) -> s pass # Try to find response with MCP selectors + current_element = None for selector in RESPONSE_SELECTORS: try: elements = page.query_selector_all(selector) if elements: # Get last (newest) response - latest = elements[-1] - text = latest.inner_text().strip() + current_element = elements[-1] + text = current_element.inner_text().strip() if text: + # Check for rate limit messages + is_rate_limit = any(pattern.lower() in text.lower() for pattern in RATE_LIMIT_PATTERNS) + if is_rate_limit: + print(f" āš ļø Rate limit detected: {text}") + result['original'] = text + result['markdown'] = None + result['success'] = True + break + if text == last_text: stable_count += 1 if stable_count >= 3: # Stable for 3 polls - answer = text + print(f" āœ“ Response stable (length: {len(text)} chars)") + + # Always store original text + result['original'] = text + + # Try copy button if markdown is enabled and response is long enough + if use_markdown and len(text) >= 100: + print(" šŸ“‹ Trying copy button for clean markdown...") + markdown = _try_copy_button(current_element, page) + if markdown: + result['markdown'] = markdown + else: + print(" ! Copy button failed, using original text") + result['markdown'] = None + elif use_markdown: + print(" āœ“ Response too short for copy button (< 100 chars)") + result['markdown'] = None + + result['success'] = True break else: stable_count = 0 last_text = text + print(f" ā³ Response changing... (length: {len(text)} chars)") except: continue - if answer: + if result['success']: break time.sleep(1) - if not answer: + if not result['success']: print(" āŒ Timeout waiting for answer") return None print(" āœ… Got answer!") + # Add follow-up reminder to encourage Claude to ask more questions - return answer + FOLLOW_UP_REMINDER + if result['original']: + result['original'] = result['original'] + FOLLOW_UP_REMINDER + if result['markdown']: + result['markdown'] = result['markdown'] + FOLLOW_UP_REMINDER + + return result except Exception as e: print(f" āŒ Error: {e}") @@ -194,6 +352,8 @@ def main(): parser.add_argument('--notebook-url', help='NotebookLM notebook URL') parser.add_argument('--notebook-id', help='Notebook ID from library') parser.add_argument('--show-browser', action='store_true', help='Show browser') + parser.add_argument('--markdown', action='store_true', help='Get formatted markdown output via copy button (saves both original and markdown)') + parser.add_argument('--log', action='store_true', help='Save outputs to log files in logs/ directory') args = parser.parse_args() @@ -232,25 +392,49 @@ def main(): return 1 # Ask the question - answer = ask_notebooklm( + result = ask_notebooklm( question=args.question, notebook_url=notebook_url, - headless=not args.show_browser + headless=not args.show_browser, + use_markdown=args.markdown ) - if answer: - print("\n" + "=" * 60) - print(f"Question: {args.question}") - print("=" * 60) - print() - print(answer) - print() - print("=" * 60) - return 0 - else: + if not result: print("\nāŒ Failed to get answer") return 1 + # Determine which output to display + display_answer = result['markdown'] if (args.markdown and result['markdown']) else result['original'] + + # Print the answer to console + print("\n" + "=" * 60) + print(f"Question: {args.question}") + print("=" * 60) + if args.markdown and result['markdown']: + print("šŸ“‹ Output: Markdown (from copy button)") + elif args.markdown: + print("šŸ“„ Output: Original (copy button failed or response too short)") + else: + print("šŸ“„ Output: Original") + print("=" * 60) + print() + print(display_answer) + print() + print("=" * 60) + + # Save outputs to logs directory only if --log option is enabled + if args.log: + logger = QueryLogger(LOGS_DIR) + saved_files = logger.save_query_results( + question=args.question, + notebook_url=notebook_url, + result=result, + use_markdown=args.markdown + ) + logger.print_save_summary(saved_files) + + return 0 + if __name__ == "__main__": sys.exit(main()) diff --git a/scripts/browser_utils.py b/scripts/browser_utils.py index 60a1210..5d5018e 100755 --- a/scripts/browser_utils.py +++ b/scripts/browser_utils.py @@ -33,7 +33,8 @@ def launch_persistent_context( no_viewport=True, ignore_default_args=["--enable-automation"], user_agent=USER_AGENT, - args=BROWSER_ARGS + args=BROWSER_ARGS, + permissions=["clipboard-read", "clipboard-write"] # Grant clipboard permissions ) # Cookie Workaround for Playwright bug #36139 diff --git a/scripts/logger.py b/scripts/logger.py new file mode 100644 index 0000000..6e79cad --- /dev/null +++ b/scripts/logger.py @@ -0,0 +1,132 @@ +""" +Logging utilities for NotebookLM Skill +Handles saving query results and statistics to log files +""" + +import json +from pathlib import Path +from datetime import datetime +from typing import Dict, Optional + + +class QueryLogger: + """Handles logging of NotebookLM query results""" + + def __init__(self, logs_dir: Path): + """ + Initialize the logger with a logs directory + + Args: + logs_dir: Path to the logs directory + """ + self.logs_dir = logs_dir + self.logs_dir.mkdir(exist_ok=True) + + def _sanitize_filename(self, text: str, max_length: int = 50) -> str: + """ + Sanitize text for use in filename + + Args: + text: Text to sanitize + max_length: Maximum length of result + + Returns: + Sanitized filename-safe string + """ + safe = "".join( + c if c.isalnum() or c in (' ', '-', '_') else '_' + for c in text + ) + return safe[:max_length] + + def save_query_results( + self, + question: str, + notebook_url: str, + result: Dict[str, Optional[str]], + use_markdown: bool = True + ) -> Dict[str, Path]: + """ + Save query results to log files + + Args: + question: The question that was asked + notebook_url: URL of the notebook used + result: Dictionary with 'original', 'markdown', 'success' keys + use_markdown: Whether markdown mode was enabled + + Returns: + Dictionary mapping file type to saved file path + """ + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + safe_question = self._sanitize_filename(question) + + saved_files = {} + + # Always save original text + original_file = self.logs_dir / f"{timestamp}_original_{safe_question}.txt" + if result.get('original'): + original_file.write_text(result['original'], encoding='utf-8') + saved_files['original'] = original_file + + # Save markdown and stats if markdown was enabled and available + if use_markdown and result.get('markdown'): + markdown_file = self.logs_dir / f"{timestamp}_markdown_{safe_question}.md" + markdown_file.write_text(result['markdown'], encoding='utf-8') + saved_files['markdown'] = markdown_file + + # Save comparison statistics + stats_file = self.logs_dir / f"{timestamp}_stats_{safe_question}.json" + stats = { + 'question': question, + 'notebook_url': notebook_url, + 'original_length': len(result['original']) if result.get('original') else 0, + 'markdown_length': len(result['markdown']) if result.get('markdown') else 0, + 'ratio': ( + len(result['markdown']) / len(result['original']) + if result.get('original') and result.get('markdown') else 0 + ), + 'timestamp': timestamp, + 'has_markdown': True + } + stats_file.write_text( + json.dumps(stats, indent=2, ensure_ascii=False), + encoding='utf-8' + ) + saved_files['stats'] = stats_file + + # Save stats indicating markdown failed if enabled but not available + elif use_markdown: + stats_file = self.logs_dir / f"{timestamp}_stats_{safe_question}.json" + stats = { + 'question': question, + 'notebook_url': notebook_url, + 'original_length': len(result['original']) if result.get('original') else 0, + 'markdown_length': 0, + 'ratio': 0, + 'timestamp': timestamp, + 'has_markdown': False, + 'reason': 'Copy button failed or response too short' + } + stats_file.write_text( + json.dumps(stats, indent=2, ensure_ascii=False), + encoding='utf-8' + ) + saved_files['stats'] = stats_file + + return saved_files + + def print_save_summary(self, saved_files: Dict[str, Path]): + """ + Print a summary of saved files + + Args: + saved_files: Dictionary mapping file type to saved file path + """ + for file_type, file_path in saved_files.items(): + label = { + 'original': 'original output', + 'markdown': 'markdown output', + 'stats': 'comparison stats' + }.get(file_type, file_type) + print(f"šŸ’¾ Saved {label} to: {file_path}")