From 0eea19cf9332a9ead252098f2c0046315f0e6366 Mon Sep 17 00:00:00 2001
From: Diana Velychko <68444337+VelDia@users.noreply.github.com>
Date: Sun, 19 Oct 2025 13:54:52 -0400
Subject: [PATCH] Add CLI option to inspect journal categories

---
 README.md          |  56 ++++++
 digital_journal.py | 459 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 515 insertions(+)
 create mode 100644 digital_journal.py

diff --git a/README.md b/README.md
index af2ec58..91acce8 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,57 @@
 # StoryTell
+
+## Digital Journal
+
+This repository now includes a small command line helper that lets you record a
+single, freeform entry about your day and automatically separates the text into
+feelings, situations, events, health updates, and the people you mention.
+
+The script stores all entries in `data/journal_entries.json` so the information
+remains structured and easy to review later.
+
+### Usage
+
+```
+python digital_journal.py --entry "I met Alex for coffee and felt really happy."
+```
+
+If you omit `--entry`, the script will prompt you to type (or paste) the entry
+interactively. You can also provide an ISO timestamp if you want to override
+the automatically generated time:
+
+```
+python digital_journal.py --entry "Visited the doctor for a checkup." --timestamp 2024-05-22T09:30
+```
+
+After saving, the script prints the structured summary of what it detected.
+
+### Reviewing past entries
+
+To revisit what you've logged so far, use the `--history` flag. The script
+prints the most recent entries first and echoes both the original text and the
+detected categories:
+
+```
+python digital_journal.py --history
+```
+
+You can limit the output to the latest _N_ entries:
+
+```
+python digital_journal.py --history --limit 5
+```
+
+### Viewing categories directly
+
+If you want to focus on specific categories across your saved entries, use
+`--categories` followed by one or more category names. Combine it with
+`--limit` to restrict the scan to the most recent entries and `--unique` to
+collapse duplicate sentences:
+
+```
+python digital_journal.py --categories feelings health --limit 10
+python digital_journal.py --categories people --unique
+```
+
+The available category names are `feelings`, `events`, `health`, `situations`,
+and `people`.
diff --git a/digital_journal.py b/digital_journal.py
new file mode 100644
index 0000000..aa1c0c3
--- /dev/null
+++ b/digital_journal.py
@@ -0,0 +1,459 @@
+"""Digital journal entry processor.
+
+This module provides a small command line interface that accepts a freeform
+journal entry and automatically separates it into themed sections.  The goal is
+to let someone write about their day once and have the script extract useful
+structured information that can be reviewed later.
+
+The script keeps a persistent JSON file (``data/journal_entries.json``) where
+each entry stores the original text alongside the detected people, events,
+feelings, situations, and health updates.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import sys
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, Iterable, List, Sequence, Set
+
+
+DATA_PATH = Path("data")
+JOURNAL_FILE = DATA_PATH / "journal_entries.json"
+
+
+FEELING_KEYWORDS = {
+    "happy",
+    "joy",
+    "joyful",
+    "glad",
+    "excited",
+    "content",
+    "peaceful",
+    "sad",
+    "upset",
+    "angry",
+    "frustrated",
+    "anxious",
+    "nervous",
+    "worried",
+    "calm",
+    "relaxed",
+    "tired",
+    "exhausted",
+    "stressed",
+    "grateful",
+    "thankful",
+    "lonely",
+    "confident",
+    "proud",
+    "hopeful",
+}
+
+
+EVENT_KEYWORDS = {
+    "meeting",
+    "birthday",
+    "anniversary",
+    "party",
+    "presentation",
+    "conference",
+    "interview",
+    "deadline",
+    "vacation",
+    "trip",
+    "wedding",
+    "graduation",
+    "celebration",
+    "appointment",
+    "game",
+    "practice",
+    "ceremony",
+}
+
+
+HEALTH_KEYWORDS = {
+    "exercise",
+    "workout",
+    "gym",
+    "run",
+    "running",
+    "walk",
+    "walking",
+    "yoga",
+    "meditation",
+    "doctor",
+    "dentist",
+    "medicine",
+    "medication",
+    "pill",
+    "headache",
+    "stomach",
+    "flu",
+    "cold",
+    "fever",
+    "pain",
+    "ache",
+    "injury",
+    "therapy",
+    "sleep",
+    "rest",
+    "hydrate",
+    "hydrated",
+    "nutrition",
+    "diet",
+}
+
+
+SITUATION_KEYWORDS = {
+    "work",
+    "office",
+    "project",
+    "school",
+    "class",
+    "lesson",
+    "family",
+    "friends",
+    "home",
+    "travel",
+    "commute",
+    "study",
+    "exam",
+    "shopping",
+    "errand",
+    "cooking",
+    "cleaning",
+    "house",
+    "weather",
+    "rain",
+    "sunny",
+    "storm",
+}
+
+
+STOP_WORDS = {
+    "I",
+    "We",
+    "My",
+    "The",
+    "A",
+    "An",
+    "It",
+    "He",
+    "She",
+    "They",
+    "Today",
+    "Tonight",
+    "Morning",
+    "Evening",
+    "Afternoon",
+    "Later",
+    "Yesterday",
+    "Tomorrow",
+    "Monday",
+    "Tuesday",
+    "Wednesday",
+    "Thursday",
+    "Friday",
+    "Saturday",
+    "Sunday",
+}
+
+
+NAME_PATTERN = re.compile(r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+)*)\b")
+
+
+def sentence_split(text: str) -> List[str]:
+    """Split freeform text into a list of sentences."""
+
+    sentences = re.split(r"(?<=[.!?])\s+", text.strip())
+    return [sentence.strip() for sentence in sentences if sentence.strip()]
+
+
+def detect_keywords(sentence: str, keywords: Iterable[str]) -> bool:
+    """Return True if any keyword appears as a whole word in ``sentence``."""
+
+    normalized = sentence.lower()
+    return any(re.search(rf"\b{re.escape(word)}\b", normalized) for word in keywords)
+
+
+def extract_people(text: str) -> List[str]:
+    """Extract capitalized names from the text using a light-weight heuristic."""
+
+    candidates = {
+        match.group(1)
+        for match in NAME_PATTERN.finditer(text)
+        if match.group(1) not in STOP_WORDS
+    }
+    # Sort to keep ordering predictable for tests and ease of reading.
+    return sorted(candidates)
+
+
+@dataclass
+class JournalEntry:
+    raw_entry: str
+    timestamp: str
+    feelings: List[str] = field(default_factory=list)
+    events: List[str] = field(default_factory=list)
+    health: List[str] = field(default_factory=list)
+    situations: List[str] = field(default_factory=list)
+    people: List[str] = field(default_factory=list)
+
+    @classmethod
+    def from_text(cls, entry_text: str, timestamp: datetime | None = None) -> "JournalEntry":
+        timestamp = timestamp or datetime.now()
+        sentences = sentence_split(entry_text)
+
+        feelings = [s for s in sentences if detect_keywords(s, FEELING_KEYWORDS)]
+        events = [s for s in sentences if detect_keywords(s, EVENT_KEYWORDS)]
+        health = [s for s in sentences if detect_keywords(s, HEALTH_KEYWORDS)]
+
+        # A situation is a sentence with general context keywords that is not
+        # already stored as a feeling, event, or health update.
+        categorized_sentences: Set[str] = set(feelings + events + health)
+        situations = [
+            s
+            for s in sentences
+            if s not in categorized_sentences and detect_keywords(s, SITUATION_KEYWORDS)
+        ]
+
+        people = extract_people(entry_text)
+
+        return cls(
+            raw_entry=entry_text.strip(),
+            timestamp=timestamp.isoformat(timespec="seconds"),
+            feelings=feelings,
+            events=events,
+            health=health,
+            situations=situations,
+            people=people,
+        )
+
+    def to_dict(self) -> Dict[str, Sequence[str] | str]:
+        return {
+            "timestamp": self.timestamp,
+            "raw_entry": self.raw_entry,
+            "feelings": self.feelings,
+            "events": self.events,
+            "health": self.health,
+            "situations": self.situations,
+            "people": self.people,
+        }
+
+
+def load_journal() -> Dict[str, List[Dict[str, Sequence[str] | str]]]:
+    if not JOURNAL_FILE.exists():
+        return {"entries": []}
+    with JOURNAL_FILE.open("r", encoding="utf-8") as handle:
+        return json.load(handle)
+
+
+def save_journal(data: Dict[str, List[Dict[str, Sequence[str] | str]]]) -> None:
+    DATA_PATH.mkdir(parents=True, exist_ok=True)
+    with JOURNAL_FILE.open("w", encoding="utf-8") as handle:
+        json.dump(data, handle, ensure_ascii=False, indent=2)
+
+
+def add_entry(entry_text: str, timestamp: datetime | None = None) -> JournalEntry:
+    journal = load_journal()
+    entry = JournalEntry.from_text(entry_text, timestamp=timestamp)
+    journal.setdefault("entries", []).append(entry.to_dict())
+    save_journal(journal)
+    return entry
+
+
+def parse_arguments() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Log or review structured journal entries")
+    parser.add_argument(
+        "--entry",
+        help="The journal entry text. If omitted, the script will read from standard input.",
+    )
+    parser.add_argument(
+        "--timestamp",
+        help="Optional ISO timestamp for the entry (defaults to the current time).",
+    )
+    parser.add_argument(
+        "--history",
+        action="store_true",
+        help="Display previously saved entries instead of adding a new one.",
+    )
+    parser.add_argument(
+        "--limit",
+        type=int,
+        help="When showing history or categories, only use the most recent N entries.",
+    )
+    parser.add_argument(
+        "--categories",
+        nargs="+",
+        choices=["feelings", "events", "health", "situations", "people"],
+        help=(
+            "Display the collected sentences for one or more categories instead of adding "
+            "a new entry. Choices: feelings, events, health, situations, people."
+        ),
+    )
+    parser.add_argument(
+        "--unique",
+        action="store_true",
+        help="When used with --categories, collapse duplicate sentences across entries.",
+    )
+    return parser.parse_args()
+
+
+def _history_sort_key(entry: Dict[str, Sequence[str] | str]) -> datetime:
+    timestamp = entry.get("timestamp")
+    if isinstance(timestamp, str):
+        try:
+            return datetime.fromisoformat(timestamp)
+        except ValueError:
+            pass
+    return datetime.min
+
+
+def show_history(limit: int | None = None) -> None:
+    if limit is not None and limit <= 0:
+        raise SystemExit("Limit must be a positive integer when provided.")
+
+    journal = load_journal()
+    entries = journal.get("entries", [])
+    if not entries:
+        print("No journal entries saved yet. Add one with --entry to get started.")
+        return
+
+    sorted_entries = sorted(entries, key=_history_sort_key, reverse=True)
+    if limit is not None:
+        sorted_entries = sorted_entries[:limit]
+
+    total = len(entries)
+    shown = len(sorted_entries)
+    print(f"Showing {shown} of {total} saved journal entries:\n")
+
+    sections = ["feelings", "events", "health", "situations", "people"]
+    divider = "-" * 60
+
+    for entry in sorted_entries:
+        timestamp = entry.get("timestamp", "(no timestamp)")
+        raw_entry = entry.get("raw_entry", "").strip()
+        print(divider)
+        print(f"Timestamp: {timestamp}")
+        if raw_entry:
+            print(f"Entry: {raw_entry}")
+        for section in sections:
+            values = entry.get(section) or []
+            if values:
+                readable = ", ".join(str(value) for value in values)
+                print(f"{section.title()}: {readable}")
+        print()
+    print(divider)
+
+
+def show_categories(
+    categories: Sequence[str],
+    *,
+    unique: bool = False,
+    limit: int | None = None,
+) -> None:
+    if limit is not None and limit <= 0:
+        raise SystemExit("Limit must be a positive integer when provided.")
+
+    journal = load_journal()
+    entries = journal.get("entries", [])
+    if not entries:
+        print("No journal entries saved yet. Add one with --entry to get started.")
+        return
+
+    sorted_entries = sorted(entries, key=_history_sort_key, reverse=True)
+    if limit is not None:
+        sorted_entries = sorted_entries[:limit]
+
+    divider = "-" * 60
+    for category in categories:
+        print(divider)
+        print(f"Category: {category.title()}")
+
+        collected: list[tuple[str, list[str]]] = []
+        seen: set[str] = set()
+
+        for entry in sorted_entries:
+            values = list(entry.get(category, []) or [])
+            if unique:
+                filtered: list[str] = []
+                for value in values:
+                    if value not in seen:
+                        seen.add(value)
+                        filtered.append(value)
+                values = filtered
+            if not values:
+                continue
+            timestamp = entry.get("timestamp", "(no timestamp)")
+            collected.append((str(timestamp), values))
+
+        if not collected:
+            print(f"No {category} found in the selected entries.\n")
+            continue
+
+        for timestamp, values in collected:
+            print(f"- {timestamp}:")
+            for value in values:
+                print(f"  • {value}")
+        print()
+
+    print(divider)
+
+
+def main() -> None:
+    args = parse_arguments()
+
+    if args.history and args.categories:
+        raise SystemExit("--history cannot be combined with --categories.")
+
+    if args.categories:
+        if args.entry or args.timestamp:
+            raise SystemExit("--categories cannot be combined with --entry or --timestamp.")
+        show_categories(args.categories, unique=args.unique, limit=args.limit)
+        return
+
+    if args.unique:
+        raise SystemExit("--unique can only be used together with --categories.")
+
+    if args.history:
+        if args.entry or args.timestamp:
+            raise SystemExit("--history cannot be combined with --entry or --timestamp.")
+        show_history(limit=args.limit)
+        return
+
+    if args.limit is not None:
+        raise SystemExit("--limit can only be used together with --history or --categories.")
+
+    if args.entry:
+        entry_text = args.entry
+    else:
+        print("Enter your journal entry. Finish with Ctrl-D (or Ctrl-Z on Windows) and press Enter:")
+        try:
+            entry_text = sys.stdin.read()
+        except KeyboardInterrupt:
+            raise SystemExit("Journal entry cancelled.")
+
+    entry_text = entry_text.strip()
+    if not entry_text:
+        raise SystemExit("No journal entry provided.")
+
+    timestamp: datetime | None = None
+    if args.timestamp:
+        try:
+            timestamp = datetime.fromisoformat(args.timestamp)
+        except ValueError as exc:
+            raise SystemExit(f"Invalid timestamp: {args.timestamp}") from exc
+
+    entry = add_entry(entry_text, timestamp=timestamp)
+
+    print("Journal entry saved! Here's the structured summary:\n")
+    print(json.dumps(entry.to_dict(), ensure_ascii=False, indent=2))
+
+
+if __name__ == "__main__":
+    main()
+