From 0eea19cf9332a9ead252098f2c0046315f0e6366 Mon Sep 17 00:00:00 2001 From: Diana Velychko <68444337+VelDia@users.noreply.github.com> Date: Sun, 19 Oct 2025 13:54:52 -0400 Subject: [PATCH] Add CLI option to inspect journal categories --- README.md | 56 ++++++ digital_journal.py | 459 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 515 insertions(+) create mode 100644 digital_journal.py diff --git a/README.md b/README.md index af2ec58..91acce8 100644 --- a/README.md +++ b/README.md @@ -1 +1,57 @@ # StoryTell + +## Digital Journal + +This repository now includes a small command line helper that lets you record a +single, freeform entry about your day and automatically separates the text into +feelings, situations, events, health updates, and the people you mention. + +The script stores all entries in `data/journal_entries.json` so the information +remains structured and easy to review later. + +### Usage + +``` +python digital_journal.py --entry "I met Alex for coffee and felt really happy." +``` + +If you omit `--entry`, the script will prompt you to type (or paste) the entry +interactively. You can also provide an ISO timestamp if you want to override +the automatically generated time: + +``` +python digital_journal.py --entry "Visited the doctor for a checkup." --timestamp 2024-05-22T09:30 +``` + +After saving, the script prints the structured summary of what it detected. + +### Reviewing past entries + +To revisit what you've logged so far, use the `--history` flag. The script +prints the most recent entries first and echoes both the original text and the +detected categories: + +``` +python digital_journal.py --history +``` + +You can limit the output to the latest _N_ entries: + +``` +python digital_journal.py --history --limit 5 +``` + +### Viewing categories directly + +If you want to focus on specific categories across your saved entries, use +`--categories` followed by one or more category names. Combine it with +`--limit` to restrict the scan to the most recent entries and `--unique` to +collapse duplicate sentences: + +``` +python digital_journal.py --categories feelings health --limit 10 +python digital_journal.py --categories people --unique +``` + +The available category names are `feelings`, `events`, `health`, `situations`, +and `people`. diff --git a/digital_journal.py b/digital_journal.py new file mode 100644 index 0000000..aa1c0c3 --- /dev/null +++ b/digital_journal.py @@ -0,0 +1,459 @@ +"""Digital journal entry processor. + +This module provides a small command line interface that accepts a freeform +journal entry and automatically separates it into themed sections. The goal is +to let someone write about their day once and have the script extract useful +structured information that can be reviewed later. + +The script keeps a persistent JSON file (``data/journal_entries.json``) where +each entry stores the original text alongside the detected people, events, +feelings, situations, and health updates. +""" + +from __future__ import annotations + +import argparse +import json +import re +import sys +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Dict, Iterable, List, Sequence, Set + + +DATA_PATH = Path("data") +JOURNAL_FILE = DATA_PATH / "journal_entries.json" + + +FEELING_KEYWORDS = { + "happy", + "joy", + "joyful", + "glad", + "excited", + "content", + "peaceful", + "sad", + "upset", + "angry", + "frustrated", + "anxious", + "nervous", + "worried", + "calm", + "relaxed", + "tired", + "exhausted", + "stressed", + "grateful", + "thankful", + "lonely", + "confident", + "proud", + "hopeful", +} + + +EVENT_KEYWORDS = { + "meeting", + "birthday", + "anniversary", + "party", + "presentation", + "conference", + "interview", + "deadline", + "vacation", + "trip", + "wedding", + "graduation", + "celebration", + "appointment", + "game", + "practice", + "ceremony", +} + + +HEALTH_KEYWORDS = { + "exercise", + "workout", + "gym", + "run", + "running", + "walk", + "walking", + "yoga", + "meditation", + "doctor", + "dentist", + "medicine", + "medication", + "pill", + "headache", + "stomach", + "flu", + "cold", + "fever", + "pain", + "ache", + "injury", + "therapy", + "sleep", + "rest", + "hydrate", + "hydrated", + "nutrition", + "diet", +} + + +SITUATION_KEYWORDS = { + "work", + "office", + "project", + "school", + "class", + "lesson", + "family", + "friends", + "home", + "travel", + "commute", + "study", + "exam", + "shopping", + "errand", + "cooking", + "cleaning", + "house", + "weather", + "rain", + "sunny", + "storm", +} + + +STOP_WORDS = { + "I", + "We", + "My", + "The", + "A", + "An", + "It", + "He", + "She", + "They", + "Today", + "Tonight", + "Morning", + "Evening", + "Afternoon", + "Later", + "Yesterday", + "Tomorrow", + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", +} + + +NAME_PATTERN = re.compile(r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+)*)\b") + + +def sentence_split(text: str) -> List[str]: + """Split freeform text into a list of sentences.""" + + sentences = re.split(r"(?<=[.!?])\s+", text.strip()) + return [sentence.strip() for sentence in sentences if sentence.strip()] + + +def detect_keywords(sentence: str, keywords: Iterable[str]) -> bool: + """Return True if any keyword appears as a whole word in ``sentence``.""" + + normalized = sentence.lower() + return any(re.search(rf"\b{re.escape(word)}\b", normalized) for word in keywords) + + +def extract_people(text: str) -> List[str]: + """Extract capitalized names from the text using a light-weight heuristic.""" + + candidates = { + match.group(1) + for match in NAME_PATTERN.finditer(text) + if match.group(1) not in STOP_WORDS + } + # Sort to keep ordering predictable for tests and ease of reading. + return sorted(candidates) + + +@dataclass +class JournalEntry: + raw_entry: str + timestamp: str + feelings: List[str] = field(default_factory=list) + events: List[str] = field(default_factory=list) + health: List[str] = field(default_factory=list) + situations: List[str] = field(default_factory=list) + people: List[str] = field(default_factory=list) + + @classmethod + def from_text(cls, entry_text: str, timestamp: datetime | None = None) -> "JournalEntry": + timestamp = timestamp or datetime.now() + sentences = sentence_split(entry_text) + + feelings = [s for s in sentences if detect_keywords(s, FEELING_KEYWORDS)] + events = [s for s in sentences if detect_keywords(s, EVENT_KEYWORDS)] + health = [s for s in sentences if detect_keywords(s, HEALTH_KEYWORDS)] + + # A situation is a sentence with general context keywords that is not + # already stored as a feeling, event, or health update. + categorized_sentences: Set[str] = set(feelings + events + health) + situations = [ + s + for s in sentences + if s not in categorized_sentences and detect_keywords(s, SITUATION_KEYWORDS) + ] + + people = extract_people(entry_text) + + return cls( + raw_entry=entry_text.strip(), + timestamp=timestamp.isoformat(timespec="seconds"), + feelings=feelings, + events=events, + health=health, + situations=situations, + people=people, + ) + + def to_dict(self) -> Dict[str, Sequence[str] | str]: + return { + "timestamp": self.timestamp, + "raw_entry": self.raw_entry, + "feelings": self.feelings, + "events": self.events, + "health": self.health, + "situations": self.situations, + "people": self.people, + } + + +def load_journal() -> Dict[str, List[Dict[str, Sequence[str] | str]]]: + if not JOURNAL_FILE.exists(): + return {"entries": []} + with JOURNAL_FILE.open("r", encoding="utf-8") as handle: + return json.load(handle) + + +def save_journal(data: Dict[str, List[Dict[str, Sequence[str] | str]]]) -> None: + DATA_PATH.mkdir(parents=True, exist_ok=True) + with JOURNAL_FILE.open("w", encoding="utf-8") as handle: + json.dump(data, handle, ensure_ascii=False, indent=2) + + +def add_entry(entry_text: str, timestamp: datetime | None = None) -> JournalEntry: + journal = load_journal() + entry = JournalEntry.from_text(entry_text, timestamp=timestamp) + journal.setdefault("entries", []).append(entry.to_dict()) + save_journal(journal) + return entry + + +def parse_arguments() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Log or review structured journal entries") + parser.add_argument( + "--entry", + help="The journal entry text. If omitted, the script will read from standard input.", + ) + parser.add_argument( + "--timestamp", + help="Optional ISO timestamp for the entry (defaults to the current time).", + ) + parser.add_argument( + "--history", + action="store_true", + help="Display previously saved entries instead of adding a new one.", + ) + parser.add_argument( + "--limit", + type=int, + help="When showing history or categories, only use the most recent N entries.", + ) + parser.add_argument( + "--categories", + nargs="+", + choices=["feelings", "events", "health", "situations", "people"], + help=( + "Display the collected sentences for one or more categories instead of adding " + "a new entry. Choices: feelings, events, health, situations, people." + ), + ) + parser.add_argument( + "--unique", + action="store_true", + help="When used with --categories, collapse duplicate sentences across entries.", + ) + return parser.parse_args() + + +def _history_sort_key(entry: Dict[str, Sequence[str] | str]) -> datetime: + timestamp = entry.get("timestamp") + if isinstance(timestamp, str): + try: + return datetime.fromisoformat(timestamp) + except ValueError: + pass + return datetime.min + + +def show_history(limit: int | None = None) -> None: + if limit is not None and limit <= 0: + raise SystemExit("Limit must be a positive integer when provided.") + + journal = load_journal() + entries = journal.get("entries", []) + if not entries: + print("No journal entries saved yet. Add one with --entry to get started.") + return + + sorted_entries = sorted(entries, key=_history_sort_key, reverse=True) + if limit is not None: + sorted_entries = sorted_entries[:limit] + + total = len(entries) + shown = len(sorted_entries) + print(f"Showing {shown} of {total} saved journal entries:\n") + + sections = ["feelings", "events", "health", "situations", "people"] + divider = "-" * 60 + + for entry in sorted_entries: + timestamp = entry.get("timestamp", "(no timestamp)") + raw_entry = entry.get("raw_entry", "").strip() + print(divider) + print(f"Timestamp: {timestamp}") + if raw_entry: + print(f"Entry: {raw_entry}") + for section in sections: + values = entry.get(section) or [] + if values: + readable = ", ".join(str(value) for value in values) + print(f"{section.title()}: {readable}") + print() + print(divider) + + +def show_categories( + categories: Sequence[str], + *, + unique: bool = False, + limit: int | None = None, +) -> None: + if limit is not None and limit <= 0: + raise SystemExit("Limit must be a positive integer when provided.") + + journal = load_journal() + entries = journal.get("entries", []) + if not entries: + print("No journal entries saved yet. Add one with --entry to get started.") + return + + sorted_entries = sorted(entries, key=_history_sort_key, reverse=True) + if limit is not None: + sorted_entries = sorted_entries[:limit] + + divider = "-" * 60 + for category in categories: + print(divider) + print(f"Category: {category.title()}") + + collected: list[tuple[str, list[str]]] = [] + seen: set[str] = set() + + for entry in sorted_entries: + values = list(entry.get(category, []) or []) + if unique: + filtered: list[str] = [] + for value in values: + if value not in seen: + seen.add(value) + filtered.append(value) + values = filtered + if not values: + continue + timestamp = entry.get("timestamp", "(no timestamp)") + collected.append((str(timestamp), values)) + + if not collected: + print(f"No {category} found in the selected entries.\n") + continue + + for timestamp, values in collected: + print(f"- {timestamp}:") + for value in values: + print(f" • {value}") + print() + + print(divider) + + +def main() -> None: + args = parse_arguments() + + if args.history and args.categories: + raise SystemExit("--history cannot be combined with --categories.") + + if args.categories: + if args.entry or args.timestamp: + raise SystemExit("--categories cannot be combined with --entry or --timestamp.") + show_categories(args.categories, unique=args.unique, limit=args.limit) + return + + if args.unique: + raise SystemExit("--unique can only be used together with --categories.") + + if args.history: + if args.entry or args.timestamp: + raise SystemExit("--history cannot be combined with --entry or --timestamp.") + show_history(limit=args.limit) + return + + if args.limit is not None: + raise SystemExit("--limit can only be used together with --history or --categories.") + + if args.entry: + entry_text = args.entry + else: + print("Enter your journal entry. Finish with Ctrl-D (or Ctrl-Z on Windows) and press Enter:") + try: + entry_text = sys.stdin.read() + except KeyboardInterrupt: + raise SystemExit("Journal entry cancelled.") + + entry_text = entry_text.strip() + if not entry_text: + raise SystemExit("No journal entry provided.") + + timestamp: datetime | None = None + if args.timestamp: + try: + timestamp = datetime.fromisoformat(args.timestamp) + except ValueError as exc: + raise SystemExit(f"Invalid timestamp: {args.timestamp}") from exc + + entry = add_entry(entry_text, timestamp=timestamp) + + print("Journal entry saved! Here's the structured summary:\n") + print(json.dumps(entry.to_dict(), ensure_ascii=False, indent=2)) + + +if __name__ == "__main__": + main() +