fix: #165 — load-session CLI now parity-matches list/delete (--directory, --output-format, typed JSON errors)
The #160 session-lifecycle CLI triplet was asymmetric: list-sessions and
delete-session accepted --directory + --output-format and emitted typed
JSON error envelopes, but load-session had neither flag and dumped a raw
Python traceback (including the SessionNotFoundError class name) on a
missing session.
Three concrete impacts this fix closes:
1. Alternate session-store locations (e.g. /tmp/claw-run-XXX/.port_sessions)
were unreachable via load-session; claws had to chdir or monkeypatch
DEFAULT_SESSION_DIR to work around it.
2. Not-found emitted a multi-line Python stack, not a parseable envelope.
Claws deciding retry/escalate/give-up had only exit code 1 to work with.
3. The traceback leaked 'src.session_store.SessionNotFoundError' verbatim,
coupling version-pinned claws to our internal exception class name.
Now all three triplet commands accept the same flag pair and emit the
same JSON error shape:
Success (json mode):
{"session_id": "alpha", "loaded": true, "messages_count": 3,
"input_tokens": 42, "output_tokens": 99}
Not-found:
{"session_id": "missing", "loaded": false,
"error": {"kind": "session_not_found",
"message": "session 'missing' not found in /path",
"directory": "/path", "retryable": false}}
Corrupted file:
{"session_id": "broken", "loaded": false,
"error": {"kind": "session_load_failed",
"message": "...", "directory": "/path",
"retryable": true}}
Exit code contract:
- 0 on successful load
- 1 on not-found (preserves existing $?)
- 1 on OSError/JSONDecodeError (distinct 'kind' in JSON)
Backward compat: legacy 'claw load-session ID' text output unchanged
byte-for-byte. Only new behaviour is the flags and structured error path.
Tests (tests/test_load_session_cli.py, 13 tests):
- TestDirectoryFlagParity (2): --directory works + fallback to CWD/.port_sessions
- TestOutputFormatFlagParity (2): json schema + text-mode backward compat
- TestNotFoundTypedError (2): JSON envelope on not-found; no traceback in
either mode; no internal class name leak
- TestLoadFailedDistinctFromNotFound (1): corrupted file = session_load_failed
with retryable=true, distinct from session_not_found
- TestTripletParityConsistency (6): parametrised over [list, delete, load] *
[--directory, --output-format] — explicit parity guard for future regressions
Full suite: 80/80 passing, zero regression.
Discovered via Jobdori dogfood sweep 2026-04-22 17:44 KST — ran
'claw load-session nonexistent' expecting a clean error, got a Python
traceback. Filed #165 + fixed in same commit.
Closes ROADMAP #165.
2026-04-22 17:44:48 +09:00
|
|
|
"""Tests for load-session CLI parity with list-sessions/delete-session (ROADMAP #165).
|
|
|
|
|
|
|
|
|
|
Verifies the session-lifecycle CLI triplet is now symmetric:
|
|
|
|
|
- --directory DIR accepted (alternate storage locations reachable)
|
|
|
|
|
- --output-format {text,json} accepted
|
|
|
|
|
- Not-found emits typed JSON error envelope, never a Python traceback
|
|
|
|
|
- Corrupted session file distinguished from not-found via 'kind'
|
|
|
|
|
- Legacy text-mode output unchanged (backward compat)
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
import subprocess
|
|
|
|
|
import sys
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
|
|
|
|
|
|
|
|
from src.session_store import StoredSession, save_session # noqa: E402
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _run_cli(
|
|
|
|
|
*args: str, cwd: Path | None = None,
|
|
|
|
|
) -> subprocess.CompletedProcess[str]:
|
|
|
|
|
"""Always invoke the CLI with cwd=repo-root so ``python -m src.main``
|
|
|
|
|
can resolve the ``src`` package, regardless of where the test's
|
|
|
|
|
tmp_path is.
|
|
|
|
|
"""
|
|
|
|
|
return subprocess.run(
|
|
|
|
|
[sys.executable, '-m', 'src.main', *args],
|
|
|
|
|
capture_output=True,
|
|
|
|
|
text=True,
|
|
|
|
|
cwd=str(cwd) if cwd else str(_REPO_ROOT),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _make_session(session_id: str) -> StoredSession:
|
|
|
|
|
return StoredSession(
|
|
|
|
|
session_id=session_id, messages=('hi',), input_tokens=1, output_tokens=2,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestDirectoryFlagParity:
|
|
|
|
|
def test_load_session_accepts_directory_flag(self, tmp_path: Path) -> None:
|
|
|
|
|
save_session(_make_session('alpha'), tmp_path)
|
|
|
|
|
result = _run_cli('load-session', 'alpha', '--directory', str(tmp_path))
|
|
|
|
|
assert result.returncode == 0, result.stderr
|
|
|
|
|
assert 'alpha' in result.stdout
|
|
|
|
|
|
|
|
|
|
def test_load_session_without_directory_uses_cwd_default(
|
|
|
|
|
self, tmp_path: Path,
|
|
|
|
|
) -> None:
|
|
|
|
|
"""When --directory is omitted, fall back to .port_sessions in CWD.
|
|
|
|
|
|
|
|
|
|
Subprocess CWD must still be able to import ``src.main``, so we use
|
|
|
|
|
``cwd=tmp_path`` which means ``python -m src.main`` needs ``src/`` on
|
|
|
|
|
sys.path. We set PYTHONPATH to the repo root via env.
|
|
|
|
|
"""
|
|
|
|
|
sessions_dir = tmp_path / '.port_sessions'
|
|
|
|
|
sessions_dir.mkdir()
|
|
|
|
|
save_session(_make_session('beta'), sessions_dir)
|
|
|
|
|
import os
|
|
|
|
|
env = os.environ.copy()
|
|
|
|
|
env['PYTHONPATH'] = str(_REPO_ROOT)
|
|
|
|
|
result = subprocess.run(
|
|
|
|
|
[sys.executable, '-m', 'src.main', 'load-session', 'beta'],
|
|
|
|
|
capture_output=True, text=True, cwd=str(tmp_path), env=env,
|
|
|
|
|
)
|
|
|
|
|
assert result.returncode == 0, result.stderr
|
|
|
|
|
assert 'beta' in result.stdout
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestOutputFormatFlagParity:
|
|
|
|
|
def test_json_mode_on_success(self, tmp_path: Path) -> None:
|
|
|
|
|
save_session(
|
|
|
|
|
StoredSession(
|
|
|
|
|
session_id='gamma', messages=('x', 'y'),
|
|
|
|
|
input_tokens=5, output_tokens=7,
|
|
|
|
|
),
|
|
|
|
|
tmp_path,
|
|
|
|
|
)
|
|
|
|
|
result = _run_cli(
|
|
|
|
|
'load-session', 'gamma',
|
|
|
|
|
'--directory', str(tmp_path),
|
|
|
|
|
'--output-format', 'json',
|
|
|
|
|
)
|
|
|
|
|
assert result.returncode == 0
|
|
|
|
|
data = json.loads(result.stdout)
|
feat: #173 — wrap_json_envelope() applied to all 13 clawable commands (LOOP CLOSED)
Completes the coverage → enforcement → documentation → alignment cycle.
Every clawable command now emits the canonical JSON envelope per SCHEMAS.md:
Common fields (now real in output):
- timestamp (ISO 8601 UTC)
- command (argv[1])
- exit_code (0/1/2)
- output_format ('json')
- schema_version ('1.0')
13 commands wrapped:
- list-sessions, delete-session, load-session, flush-transcript
- show-command, show-tool
- exec-command, exec-tool, route, bootstrap
- command-graph, tool-pool, bootstrap-graph
Implementation:
- Added wrap_json_envelope() helper in src/main.py
- Wrapped all 18 JSON output paths (13 success + 5 error paths)
- Applied exit_code=1 to error/not-found envelopes
- Kept text mode byte-identical (backward compat preserved)
Test updates:
- 3 skipped common-field tests now pass automatically
- 3 existing tests updated to verify common envelope fields while preserving command-specific field checks
- test_list_sessions_cli_runs, test_delete_session_cli_idempotent,
test_load_session_cli::test_json_mode_on_success
Full suite: 179 → 182 passing (+3 activated from skipped), zero regression.
Loop completion:
Coverage (#167-#170) ✅ All 13 commands accept --output-format
Enforcement (#171) ✅ CI blocks new commands without --output-format
Documentation (#172) ✅ SCHEMAS.md defines envelope contract
Alignment (#173 this) ✅ Actual output matches SCHEMAS.md contract
Example output now:
$ claw list-sessions --output-format json
{
"timestamp": "2026-04-22T10:34:12Z",
"command": "list-sessions",
"exit_code": 0,
"output_format": "json",
"schema_version": "1.0",
"sessions": ["alpha", "bravo"],
"count": 2
}
Closes ROADMAP #173. Protocol is now documented AND real.
Claws can build ONE error handler, ONE timestamp parser, ONE version check
instead of 13 special cases.
2026-04-22 19:35:37 +09:00
|
|
|
# Verify common envelope fields (SCHEMAS.md contract)
|
|
|
|
|
assert 'timestamp' in data
|
|
|
|
|
assert data['command'] == 'load-session'
|
|
|
|
|
assert data['exit_code'] == 0
|
|
|
|
|
assert data['schema_version'] == '1.0'
|
|
|
|
|
# Verify command-specific fields
|
|
|
|
|
assert data['session_id'] == 'gamma'
|
|
|
|
|
assert data['loaded'] is True
|
|
|
|
|
assert data['messages_count'] == 2
|
|
|
|
|
assert data['input_tokens'] == 5
|
|
|
|
|
assert data['output_tokens'] == 7
|
fix: #165 — load-session CLI now parity-matches list/delete (--directory, --output-format, typed JSON errors)
The #160 session-lifecycle CLI triplet was asymmetric: list-sessions and
delete-session accepted --directory + --output-format and emitted typed
JSON error envelopes, but load-session had neither flag and dumped a raw
Python traceback (including the SessionNotFoundError class name) on a
missing session.
Three concrete impacts this fix closes:
1. Alternate session-store locations (e.g. /tmp/claw-run-XXX/.port_sessions)
were unreachable via load-session; claws had to chdir or monkeypatch
DEFAULT_SESSION_DIR to work around it.
2. Not-found emitted a multi-line Python stack, not a parseable envelope.
Claws deciding retry/escalate/give-up had only exit code 1 to work with.
3. The traceback leaked 'src.session_store.SessionNotFoundError' verbatim,
coupling version-pinned claws to our internal exception class name.
Now all three triplet commands accept the same flag pair and emit the
same JSON error shape:
Success (json mode):
{"session_id": "alpha", "loaded": true, "messages_count": 3,
"input_tokens": 42, "output_tokens": 99}
Not-found:
{"session_id": "missing", "loaded": false,
"error": {"kind": "session_not_found",
"message": "session 'missing' not found in /path",
"directory": "/path", "retryable": false}}
Corrupted file:
{"session_id": "broken", "loaded": false,
"error": {"kind": "session_load_failed",
"message": "...", "directory": "/path",
"retryable": true}}
Exit code contract:
- 0 on successful load
- 1 on not-found (preserves existing $?)
- 1 on OSError/JSONDecodeError (distinct 'kind' in JSON)
Backward compat: legacy 'claw load-session ID' text output unchanged
byte-for-byte. Only new behaviour is the flags and structured error path.
Tests (tests/test_load_session_cli.py, 13 tests):
- TestDirectoryFlagParity (2): --directory works + fallback to CWD/.port_sessions
- TestOutputFormatFlagParity (2): json schema + text-mode backward compat
- TestNotFoundTypedError (2): JSON envelope on not-found; no traceback in
either mode; no internal class name leak
- TestLoadFailedDistinctFromNotFound (1): corrupted file = session_load_failed
with retryable=true, distinct from session_not_found
- TestTripletParityConsistency (6): parametrised over [list, delete, load] *
[--directory, --output-format] — explicit parity guard for future regressions
Full suite: 80/80 passing, zero regression.
Discovered via Jobdori dogfood sweep 2026-04-22 17:44 KST — ran
'claw load-session nonexistent' expecting a clean error, got a Python
traceback. Filed #165 + fixed in same commit.
Closes ROADMAP #165.
2026-04-22 17:44:48 +09:00
|
|
|
|
|
|
|
|
def test_text_mode_unchanged_on_success(self, tmp_path: Path) -> None:
|
|
|
|
|
"""Legacy text output must be byte-identical for backward compat."""
|
|
|
|
|
save_session(_make_session('delta'), tmp_path)
|
|
|
|
|
result = _run_cli('load-session', 'delta', '--directory', str(tmp_path))
|
|
|
|
|
assert result.returncode == 0
|
|
|
|
|
lines = result.stdout.strip().split('\n')
|
|
|
|
|
assert lines == ['delta', '1 messages', 'in=1 out=2']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestNotFoundTypedError:
|
|
|
|
|
def test_not_found_json_envelope(self, tmp_path: Path) -> None:
|
|
|
|
|
"""Not-found emits structured JSON, never a Python traceback."""
|
|
|
|
|
result = _run_cli(
|
|
|
|
|
'load-session', 'missing',
|
|
|
|
|
'--directory', str(tmp_path),
|
|
|
|
|
'--output-format', 'json',
|
|
|
|
|
)
|
|
|
|
|
assert result.returncode == 1
|
|
|
|
|
assert 'Traceback' not in result.stderr, (
|
|
|
|
|
'regression #165: raw traceback leaked to stderr'
|
|
|
|
|
)
|
|
|
|
|
assert 'SessionNotFoundError' not in result.stdout, (
|
|
|
|
|
'regression #165: internal class name leaked into CLI output'
|
|
|
|
|
)
|
|
|
|
|
data = json.loads(result.stdout)
|
|
|
|
|
assert data['session_id'] == 'missing'
|
|
|
|
|
assert data['loaded'] is False
|
|
|
|
|
assert data['error']['kind'] == 'session_not_found'
|
|
|
|
|
assert data['error']['retryable'] is False
|
|
|
|
|
# directory field is populated so claws know where we looked
|
|
|
|
|
assert 'directory' in data['error']
|
|
|
|
|
|
|
|
|
|
def test_not_found_text_mode_no_traceback(self, tmp_path: Path) -> None:
|
|
|
|
|
"""Text mode on not-found must not dump a Python stack either."""
|
|
|
|
|
result = _run_cli(
|
|
|
|
|
'load-session', 'missing', '--directory', str(tmp_path),
|
|
|
|
|
)
|
|
|
|
|
assert result.returncode == 1
|
|
|
|
|
assert 'Traceback' not in result.stderr
|
|
|
|
|
assert result.stdout.startswith('error:')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestLoadFailedDistinctFromNotFound:
|
|
|
|
|
def test_corrupted_session_file_surfaces_distinct_kind(
|
|
|
|
|
self, tmp_path: Path,
|
|
|
|
|
) -> None:
|
|
|
|
|
"""A corrupted JSON file must emit kind='session_load_failed', not 'session_not_found'."""
|
|
|
|
|
(tmp_path / 'broken.json').write_text('{ not valid json')
|
|
|
|
|
result = _run_cli(
|
|
|
|
|
'load-session', 'broken',
|
|
|
|
|
'--directory', str(tmp_path),
|
|
|
|
|
'--output-format', 'json',
|
|
|
|
|
)
|
|
|
|
|
assert result.returncode == 1
|
|
|
|
|
data = json.loads(result.stdout)
|
|
|
|
|
assert data['error']['kind'] == 'session_load_failed'
|
|
|
|
|
assert data['error']['retryable'] is True, (
|
|
|
|
|
'corrupted file is potentially retryable (fs glitch) unlike not-found'
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestTripletParityConsistency:
|
|
|
|
|
"""All three #160 CLI commands should accept the same flag pair."""
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize('command', ['list-sessions', 'delete-session', 'load-session'])
|
|
|
|
|
def test_all_three_accept_directory_flag(self, command: str) -> None:
|
|
|
|
|
help_text = _run_cli(command, '--help').stdout
|
|
|
|
|
assert '--directory' in help_text, (
|
|
|
|
|
f'{command} missing --directory flag (#165 parity gap)'
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize('command', ['list-sessions', 'delete-session', 'load-session'])
|
|
|
|
|
def test_all_three_accept_output_format_flag(self, command: str) -> None:
|
|
|
|
|
help_text = _run_cli(command, '--help').stdout
|
|
|
|
|
assert '--output-format' in help_text, (
|
|
|
|
|
f'{command} missing --output-format flag (#165 parity gap)'
|
|
|
|
|
)
|