Files
claw-code/tests/test_run_turn_loop_permissions.py

96 lines
3.7 KiB
Python
Raw Normal View History

"""Tests for run_turn_loop permission denials parity (ROADMAP #159).
Verifies that multi-turn sessions have the same security posture as
single-turn bootstrap_session: denied_tools are inferred from matches
and threaded through every turn, not hardcoded empty.
"""
from __future__ import annotations
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from src.runtime import PortRuntime # noqa: E402
class TestPermissionDenialsInTurnLoop:
"""#159: permission denials must be non-empty in run_turn_loop,
matching what bootstrap_session produces for the same prompt.
"""
def test_turn_loop_surfaces_permission_denials_like_bootstrap(self) -> None:
"""Symmetry check: turn_loop and bootstrap_session infer the same denials."""
runtime = PortRuntime()
prompt = 'run bash ls'
# Single-turn via bootstrap
bootstrap_result = runtime.bootstrap_session(prompt)
bootstrap_denials = bootstrap_result.turn_result.permission_denials
# Multi-turn via run_turn_loop (single turn, no continuation)
loop_results = runtime.run_turn_loop(prompt, max_turns=1)
loop_denials = loop_results[0].permission_denials
# Both should infer denials for bash-family tools
assert len(bootstrap_denials) > 0, (
'bootstrap_session should deny bash-family tools'
)
assert len(loop_denials) > 0, (
f'#159 regression: run_turn_loop returned empty denials; '
f'expected {len(bootstrap_denials)} like bootstrap_session'
)
# The denial kinds should match (both deny the same tools)
bootstrap_denied_names = {d.tool_name for d in bootstrap_denials}
loop_denied_names = {d.tool_name for d in loop_denials}
assert bootstrap_denied_names == loop_denied_names, (
f'asymmetric denials: bootstrap denied {bootstrap_denied_names}, '
f'loop denied {loop_denied_names}'
)
def test_turn_loop_with_continuation_preserves_denials(self) -> None:
"""Denials are inferred once at loop start, then passed to every turn."""
runtime = PortRuntime()
from unittest.mock import patch
with patch('src.runtime.QueryEnginePort.from_workspace') as mock_factory:
from src.models import UsageSummary
from src.query_engine import TurnResult
engine = mock_factory.return_value
submitted_denials: list[tuple] = []
def _capture(prompt, commands, tools, denials):
submitted_denials.append(denials)
return TurnResult(
prompt=prompt,
output='ok',
matched_commands=(),
matched_tools=(),
permission_denials=denials, # echo back the denials
usage=UsageSummary(),
stop_reason='completed',
)
engine.submit_message.side_effect = _capture
loop_results = runtime.run_turn_loop(
'run bash rm', max_turns=2, continuation_prompt='continue'
)
# Both turn 0 and turn 1 should have received the same denials
assert len(submitted_denials) == 2
assert submitted_denials[0] == submitted_denials[1], (
'denials should be consistent across all turns'
)
# And they should be non-empty (bash is destructive)
assert len(submitted_denials[0]) > 0, (
'turn-loop denials were empty — #159 regression'
)
# Turn results should reflect the denials that were passed
for result in loop_results:
assert len(result.permission_denials) > 0