claw-code/tests/test_load_session_cli.py

"""Tests for load-session CLI parity with list-sessions/delete-session (ROADMAP #165).

Verifies the session-lifecycle CLI triplet is now symmetric:
- --directory DIR accepted (alternate storage locations reachable)
- --output-format {text,json} accepted
- Not-found emits typed JSON error envelope, never a Python traceback
- Corrupted session file distinguished from not-found via 'kind'
- Legacy text-mode output unchanged (backward compat)
"""

from __future__ import annotations

import json
import subprocess
import sys
from pathlib import Path

import pytest

sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

from src.session_store import StoredSession, save_session  # noqa: E402


_REPO_ROOT = Path(__file__).resolve().parent.parent


def _run_cli(
    *args: str, cwd: Path | None = None,
) -> subprocess.CompletedProcess[str]:
    """Always invoke the CLI with cwd=repo-root so ``python -m src.main``
    can resolve the ``src`` package, regardless of where the test's
    tmp_path is.
    """
    return subprocess.run(
        [sys.executable, '-m', 'src.main', *args],
        capture_output=True,
        text=True,
        cwd=str(cwd) if cwd else str(_REPO_ROOT),
    )


def _make_session(session_id: str) -> StoredSession:
    return StoredSession(
        session_id=session_id, messages=('hi',), input_tokens=1, output_tokens=2,
    )


class TestDirectoryFlagParity:
    def test_load_session_accepts_directory_flag(self, tmp_path: Path) -> None:
        save_session(_make_session('alpha'), tmp_path)
        result = _run_cli('load-session', 'alpha', '--directory', str(tmp_path))
        assert result.returncode == 0, result.stderr
        assert 'alpha' in result.stdout

    def test_load_session_without_directory_uses_cwd_default(
        self, tmp_path: Path,
    ) -> None:
        """When --directory is omitted, fall back to .port_sessions in CWD.

        Subprocess CWD must still be able to import ``src.main``, so we use
        ``cwd=tmp_path`` which means ``python -m src.main`` needs ``src/`` on
        sys.path. We set PYTHONPATH to the repo root via env.
        """
        sessions_dir = tmp_path / '.port_sessions'
        sessions_dir.mkdir()
        save_session(_make_session('beta'), sessions_dir)
        import os
        env = os.environ.copy()
        env['PYTHONPATH'] = str(_REPO_ROOT)
        result = subprocess.run(
            [sys.executable, '-m', 'src.main', 'load-session', 'beta'],
            capture_output=True, text=True, cwd=str(tmp_path), env=env,
        )
        assert result.returncode == 0, result.stderr
        assert 'beta' in result.stdout


class TestOutputFormatFlagParity:
    def test_json_mode_on_success(self, tmp_path: Path) -> None:
        save_session(
            StoredSession(
                session_id='gamma', messages=('x', 'y'),
                input_tokens=5, output_tokens=7,
            ),
            tmp_path,
        )
        result = _run_cli(
            'load-session', 'gamma',
            '--directory', str(tmp_path),
            '--output-format', 'json',
        )
        assert result.returncode == 0
        data = json.loads(result.stdout)
        # Verify common envelope fields (SCHEMAS.md contract)
        assert 'timestamp' in data
        assert data['command'] == 'load-session'
        assert data['exit_code'] == 0
        assert data['schema_version'] == '1.0'
        # Verify command-specific fields
        assert data['session_id'] == 'gamma'
        assert data['loaded'] is True
        assert data['messages_count'] == 2
        assert data['input_tokens'] == 5
        assert data['output_tokens'] == 7

    def test_text_mode_unchanged_on_success(self, tmp_path: Path) -> None:
        """Legacy text output must be byte-identical for backward compat."""
        save_session(_make_session('delta'), tmp_path)
        result = _run_cli('load-session', 'delta', '--directory', str(tmp_path))
        assert result.returncode == 0
        lines = result.stdout.strip().split('\n')
        assert lines == ['delta', '1 messages', 'in=1 out=2']


class TestNotFoundTypedError:
    def test_not_found_json_envelope(self, tmp_path: Path) -> None:
        """Not-found emits structured JSON, never a Python traceback."""
        result = _run_cli(
            'load-session', 'missing',
            '--directory', str(tmp_path),
            '--output-format', 'json',
        )
        assert result.returncode == 1
        assert 'Traceback' not in result.stderr, (
            'regression #165: raw traceback leaked to stderr'
        )
        assert 'SessionNotFoundError' not in result.stdout, (
            'regression #165: internal class name leaked into CLI output'
        )
        data = json.loads(result.stdout)
        assert data['session_id'] == 'missing'
        assert data['loaded'] is False
        assert data['error']['kind'] == 'session_not_found'
        assert data['error']['retryable'] is False
        # directory field is populated so claws know where we looked
        assert 'directory' in data['error']

    def test_not_found_text_mode_no_traceback(self, tmp_path: Path) -> None:
        """Text mode on not-found must not dump a Python stack either."""
        result = _run_cli(
            'load-session', 'missing', '--directory', str(tmp_path),
        )
        assert result.returncode == 1
        assert 'Traceback' not in result.stderr
        assert result.stdout.startswith('error:')


class TestLoadFailedDistinctFromNotFound:
    def test_corrupted_session_file_surfaces_distinct_kind(
        self, tmp_path: Path,
    ) -> None:
        """A corrupted JSON file must emit kind='session_load_failed', not 'session_not_found'."""
        (tmp_path / 'broken.json').write_text('{ not valid json')
        result = _run_cli(
            'load-session', 'broken',
            '--directory', str(tmp_path),
            '--output-format', 'json',
        )
        assert result.returncode == 1
        data = json.loads(result.stdout)
        assert data['error']['kind'] == 'session_load_failed'
        assert data['error']['retryable'] is True, (
            'corrupted file is potentially retryable (fs glitch) unlike not-found'
        )


class TestTripletParityConsistency:
    """All three #160 CLI commands should accept the same flag pair."""

    @pytest.mark.parametrize('command', ['list-sessions', 'delete-session', 'load-session'])
    def test_all_three_accept_directory_flag(self, command: str) -> None:
        help_text = _run_cli(command, '--help').stdout
        assert '--directory' in help_text, (
            f'{command} missing --directory flag (#165 parity gap)'
        )

    @pytest.mark.parametrize('command', ['list-sessions', 'delete-session', 'load-session'])
    def test_all_three_accept_output_format_flag(self, command: str) -> None:
        help_text = _run_cli(command, '--help').stdout
        assert '--output-format' in help_text, (
            f'{command} missing --output-format flag (#165 parity gap)'
        )
fix: #165 — load-session CLI now parity-matches list/delete (--directory, --output-format, typed JSON errors) The #160 session-lifecycle CLI triplet was asymmetric: list-sessions and delete-session accepted --directory + --output-format and emitted typed JSON error envelopes, but load-session had neither flag and dumped a raw Python traceback (including the SessionNotFoundError class name) on a missing session. Three concrete impacts this fix closes: 1. Alternate session-store locations (e.g. /tmp/claw-run-XXX/.port_sessions) were unreachable via load-session; claws had to chdir or monkeypatch DEFAULT_SESSION_DIR to work around it. 2. Not-found emitted a multi-line Python stack, not a parseable envelope. Claws deciding retry/escalate/give-up had only exit code 1 to work with. 3. The traceback leaked 'src.session_store.SessionNotFoundError' verbatim, coupling version-pinned claws to our internal exception class name. Now all three triplet commands accept the same flag pair and emit the same JSON error shape: Success (json mode): {"session_id": "alpha", "loaded": true, "messages_count": 3, "input_tokens": 42, "output_tokens": 99} Not-found: {"session_id": "missing", "loaded": false, "error": {"kind": "session_not_found", "message": "session 'missing' not found in /path", "directory": "/path", "retryable": false}} Corrupted file: {"session_id": "broken", "loaded": false, "error": {"kind": "session_load_failed", "message": "...", "directory": "/path", "retryable": true}} Exit code contract: - 0 on successful load - 1 on not-found (preserves existing $?) - 1 on OSError/JSONDecodeError (distinct 'kind' in JSON) Backward compat: legacy 'claw load-session ID' text output unchanged byte-for-byte. Only new behaviour is the flags and structured error path. Tests (tests/test_load_session_cli.py, 13 tests): - TestDirectoryFlagParity (2): --directory works + fallback to CWD/.port_sessions - TestOutputFormatFlagParity (2): json schema + text-mode backward compat - TestNotFoundTypedError (2): JSON envelope on not-found; no traceback in either mode; no internal class name leak - TestLoadFailedDistinctFromNotFound (1): corrupted file = session_load_failed with retryable=true, distinct from session_not_found - TestTripletParityConsistency (6): parametrised over [list, delete, load] * [--directory, --output-format] — explicit parity guard for future regressions Full suite: 80/80 passing, zero regression. Discovered via Jobdori dogfood sweep 2026-04-22 17:44 KST — ran 'claw load-session nonexistent' expecting a clean error, got a Python traceback. Filed #165 + fixed in same commit. Closes ROADMAP #165. 2026-04-22 17:44:48 +09:00			`"""Tests for load-session CLI parity with list-sessions/delete-session (ROADMAP #165).`

			`Verifies the session-lifecycle CLI triplet is now symmetric:`
			`- --directory DIR accepted (alternate storage locations reachable)`
			`- --output-format {text,json} accepted`
			`- Not-found emits typed JSON error envelope, never a Python traceback`
			`- Corrupted session file distinguished from not-found via 'kind'`
			`- Legacy text-mode output unchanged (backward compat)`
			`"""`

			`from __future__ import annotations`

			`import json`
			`import subprocess`
			`import sys`
			`from pathlib import Path`

			`import pytest`

			`sys.path.insert(0, str(Path(__file__).resolve().parent.parent))`

			`from src.session_store import StoredSession, save_session # noqa: E402`


			`_REPO_ROOT = Path(__file__).resolve().parent.parent`


			`def _run_cli(`
			`*args: str, cwd: Path \| None = None,`
			`) -> subprocess.CompletedProcess[str]:`
			"""Always invoke the CLI with cwd=repo-root so ``python -m src.main``
			can resolve the ``src`` package, regardless of where the test's
			`tmp_path is.`
			`"""`
			`return subprocess.run(`
			`[sys.executable, '-m', 'src.main', *args],`
			`capture_output=True,`
			`text=True,`
			`cwd=str(cwd) if cwd else str(_REPO_ROOT),`
			`)`


			`def _make_session(session_id: str) -> StoredSession:`
			`return StoredSession(`
			`session_id=session_id, messages=('hi',), input_tokens=1, output_tokens=2,`
			`)`


			`class TestDirectoryFlagParity:`
			`def test_load_session_accepts_directory_flag(self, tmp_path: Path) -> None:`
			`save_session(_make_session('alpha'), tmp_path)`
			`result = _run_cli('load-session', 'alpha', '--directory', str(tmp_path))`
			`assert result.returncode == 0, result.stderr`
			`assert 'alpha' in result.stdout`

			`def test_load_session_without_directory_uses_cwd_default(`
			`self, tmp_path: Path,`
			`) -> None:`
			`"""When --directory is omitted, fall back to .port_sessions in CWD.`

			Subprocess CWD must still be able to import ``src.main``, so we use
			``cwd=tmp_path`` which means ``python -m src.main`` needs ``src/`` on
			`sys.path. We set PYTHONPATH to the repo root via env.`
			`"""`
			`sessions_dir = tmp_path / '.port_sessions'`
			`sessions_dir.mkdir()`
			`save_session(_make_session('beta'), sessions_dir)`
			`import os`
			`env = os.environ.copy()`
			`env['PYTHONPATH'] = str(_REPO_ROOT)`
			`result = subprocess.run(`
			`[sys.executable, '-m', 'src.main', 'load-session', 'beta'],`
			`capture_output=True, text=True, cwd=str(tmp_path), env=env,`
			`)`
			`assert result.returncode == 0, result.stderr`
			`assert 'beta' in result.stdout`


			`class TestOutputFormatFlagParity:`
			`def test_json_mode_on_success(self, tmp_path: Path) -> None:`
			`save_session(`
			`StoredSession(`
			`session_id='gamma', messages=('x', 'y'),`
			`input_tokens=5, output_tokens=7,`
			`),`
			`tmp_path,`
			`)`
			`result = _run_cli(`
			`'load-session', 'gamma',`
			`'--directory', str(tmp_path),`
			`'--output-format', 'json',`
			`)`
			`assert result.returncode == 0`
			`data = json.loads(result.stdout)`
feat: #173 — wrap_json_envelope() applied to all 13 clawable commands (LOOP CLOSED) Completes the coverage → enforcement → documentation → alignment cycle. Every clawable command now emits the canonical JSON envelope per SCHEMAS.md: Common fields (now real in output): - timestamp (ISO 8601 UTC) - command (argv[1]) - exit_code (0/1/2) - output_format ('json') - schema_version ('1.0') 13 commands wrapped: - list-sessions, delete-session, load-session, flush-transcript - show-command, show-tool - exec-command, exec-tool, route, bootstrap - command-graph, tool-pool, bootstrap-graph Implementation: - Added wrap_json_envelope() helper in src/main.py - Wrapped all 18 JSON output paths (13 success + 5 error paths) - Applied exit_code=1 to error/not-found envelopes - Kept text mode byte-identical (backward compat preserved) Test updates: - 3 skipped common-field tests now pass automatically - 3 existing tests updated to verify common envelope fields while preserving command-specific field checks - test_list_sessions_cli_runs, test_delete_session_cli_idempotent, test_load_session_cli::test_json_mode_on_success Full suite: 179 → 182 passing (+3 activated from skipped), zero regression. Loop completion: Coverage (#167-#170) ✅ All 13 commands accept --output-format Enforcement (#171) ✅ CI blocks new commands without --output-format Documentation (#172) ✅ SCHEMAS.md defines envelope contract Alignment (#173 this) ✅ Actual output matches SCHEMAS.md contract Example output now: $ claw list-sessions --output-format json { "timestamp": "2026-04-22T10:34:12Z", "command": "list-sessions", "exit_code": 0, "output_format": "json", "schema_version": "1.0", "sessions": ["alpha", "bravo"], "count": 2 } Closes ROADMAP #173. Protocol is now documented AND real. Claws can build ONE error handler, ONE timestamp parser, ONE version check instead of 13 special cases. 2026-04-22 19:35:37 +09:00			`# Verify common envelope fields (SCHEMAS.md contract)`
			`assert 'timestamp' in data`
			`assert data['command'] == 'load-session'`
			`assert data['exit_code'] == 0`
			`assert data['schema_version'] == '1.0'`
			`# Verify command-specific fields`
			`assert data['session_id'] == 'gamma'`
			`assert data['loaded'] is True`
			`assert data['messages_count'] == 2`
			`assert data['input_tokens'] == 5`
			`assert data['output_tokens'] == 7`
fix: #165 — load-session CLI now parity-matches list/delete (--directory, --output-format, typed JSON errors) The #160 session-lifecycle CLI triplet was asymmetric: list-sessions and delete-session accepted --directory + --output-format and emitted typed JSON error envelopes, but load-session had neither flag and dumped a raw Python traceback (including the SessionNotFoundError class name) on a missing session. Three concrete impacts this fix closes: 1. Alternate session-store locations (e.g. /tmp/claw-run-XXX/.port_sessions) were unreachable via load-session; claws had to chdir or monkeypatch DEFAULT_SESSION_DIR to work around it. 2. Not-found emitted a multi-line Python stack, not a parseable envelope. Claws deciding retry/escalate/give-up had only exit code 1 to work with. 3. The traceback leaked 'src.session_store.SessionNotFoundError' verbatim, coupling version-pinned claws to our internal exception class name. Now all three triplet commands accept the same flag pair and emit the same JSON error shape: Success (json mode): {"session_id": "alpha", "loaded": true, "messages_count": 3, "input_tokens": 42, "output_tokens": 99} Not-found: {"session_id": "missing", "loaded": false, "error": {"kind": "session_not_found", "message": "session 'missing' not found in /path", "directory": "/path", "retryable": false}} Corrupted file: {"session_id": "broken", "loaded": false, "error": {"kind": "session_load_failed", "message": "...", "directory": "/path", "retryable": true}} Exit code contract: - 0 on successful load - 1 on not-found (preserves existing $?) - 1 on OSError/JSONDecodeError (distinct 'kind' in JSON) Backward compat: legacy 'claw load-session ID' text output unchanged byte-for-byte. Only new behaviour is the flags and structured error path. Tests (tests/test_load_session_cli.py, 13 tests): - TestDirectoryFlagParity (2): --directory works + fallback to CWD/.port_sessions - TestOutputFormatFlagParity (2): json schema + text-mode backward compat - TestNotFoundTypedError (2): JSON envelope on not-found; no traceback in either mode; no internal class name leak - TestLoadFailedDistinctFromNotFound (1): corrupted file = session_load_failed with retryable=true, distinct from session_not_found - TestTripletParityConsistency (6): parametrised over [list, delete, load] * [--directory, --output-format] — explicit parity guard for future regressions Full suite: 80/80 passing, zero regression. Discovered via Jobdori dogfood sweep 2026-04-22 17:44 KST — ran 'claw load-session nonexistent' expecting a clean error, got a Python traceback. Filed #165 + fixed in same commit. Closes ROADMAP #165. 2026-04-22 17:44:48 +09:00
			`def test_text_mode_unchanged_on_success(self, tmp_path: Path) -> None:`
			`"""Legacy text output must be byte-identical for backward compat."""`
			`save_session(_make_session('delta'), tmp_path)`
			`result = _run_cli('load-session', 'delta', '--directory', str(tmp_path))`
			`assert result.returncode == 0`
			`lines = result.stdout.strip().split('\n')`
			`assert lines == ['delta', '1 messages', 'in=1 out=2']`


			`class TestNotFoundTypedError:`
			`def test_not_found_json_envelope(self, tmp_path: Path) -> None:`
			`"""Not-found emits structured JSON, never a Python traceback."""`
			`result = _run_cli(`
			`'load-session', 'missing',`
			`'--directory', str(tmp_path),`
			`'--output-format', 'json',`
			`)`
			`assert result.returncode == 1`
			`assert 'Traceback' not in result.stderr, (`
			`'regression #165: raw traceback leaked to stderr'`
			`)`
			`assert 'SessionNotFoundError' not in result.stdout, (`
			`'regression #165: internal class name leaked into CLI output'`
			`)`
			`data = json.loads(result.stdout)`
			`assert data['session_id'] == 'missing'`
			`assert data['loaded'] is False`
			`assert data['error']['kind'] == 'session_not_found'`
			`assert data['error']['retryable'] is False`
			`# directory field is populated so claws know where we looked`
			`assert 'directory' in data['error']`

			`def test_not_found_text_mode_no_traceback(self, tmp_path: Path) -> None:`
			`"""Text mode on not-found must not dump a Python stack either."""`
			`result = _run_cli(`
			`'load-session', 'missing', '--directory', str(tmp_path),`
			`)`
			`assert result.returncode == 1`
			`assert 'Traceback' not in result.stderr`
			`assert result.stdout.startswith('error:')`


			`class TestLoadFailedDistinctFromNotFound:`
			`def test_corrupted_session_file_surfaces_distinct_kind(`
			`self, tmp_path: Path,`
			`) -> None:`
			`"""A corrupted JSON file must emit kind='session_load_failed', not 'session_not_found'."""`
			`(tmp_path / 'broken.json').write_text('{ not valid json')`
			`result = _run_cli(`
			`'load-session', 'broken',`
			`'--directory', str(tmp_path),`
			`'--output-format', 'json',`
			`)`
			`assert result.returncode == 1`
			`data = json.loads(result.stdout)`
			`assert data['error']['kind'] == 'session_load_failed'`
			`assert data['error']['retryable'] is True, (`
			`'corrupted file is potentially retryable (fs glitch) unlike not-found'`
			`)`


			`class TestTripletParityConsistency:`
			`"""All three #160 CLI commands should accept the same flag pair."""`

			`@pytest.mark.parametrize('command', ['list-sessions', 'delete-session', 'load-session'])`
			`def test_all_three_accept_directory_flag(self, command: str) -> None:`
			`help_text = _run_cli(command, '--help').stdout`
			`assert '--directory' in help_text, (`
			`f'{command} missing --directory flag (#165 parity gap)'`
			`)`

			`@pytest.mark.parametrize('command', ['list-sessions', 'delete-session', 'load-session'])`
			`def test_all_three_accept_output_format_flag(self, command: str) -> None:`
			`help_text = _run_cli(command, '--help').stdout`
			`assert '--output-format' in help_text, (`
			`f'{command} missing --output-format flag (#165 parity gap)'`
			`)`