"""Tests for pseudonymization: AES-256-GCM encryptor + PseudonymMapper.""" from __future__ import annotations import base64 import re from unittest.mock import MagicMock import pytest from layers.regex_layer import DetectedEntity from pseudonymize import AESEncryptor, PseudonymMapper # Stable 32-byte dev key (base64-encoded) _KEY_B64 = base64.b64encode(b"veylant-dev-key-32bytes-padding-").decode() # Token pattern _TOKEN_RE = re.compile(r"\[PII:[A-Z_]+:[0-9a-f]{8}\]") # --------------------------------------------------------------------------- # AESEncryptor tests # --------------------------------------------------------------------------- class TestAESEncryptor: @pytest.fixture def enc(self) -> AESEncryptor: return AESEncryptor(_KEY_B64) def test_encrypt_returns_bytes(self, enc): result = enc.encrypt("secret") assert isinstance(result, bytes) def test_decrypt_round_trip(self, enc): plaintext = "jean.dupont@example.com" assert enc.decrypt(enc.encrypt(plaintext)) == plaintext def test_different_encryptions_of_same_value(self, enc): """AES-GCM uses random nonce — same input gives different ciphertext.""" a = enc.encrypt("secret") b = enc.encrypt("secret") assert a != b # But both decrypt correctly assert enc.decrypt(a) == enc.decrypt(b) == "secret" def test_empty_string_roundtrip(self, enc): assert enc.decrypt(enc.encrypt("")) == "" def test_unicode_roundtrip(self, enc): value = "Óscar García-López" assert enc.decrypt(enc.encrypt(value)) == value def test_wrong_key_raises(self, enc): encrypted = enc.encrypt("secret") wrong_enc = AESEncryptor(base64.b64encode(b"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx").decode()) with pytest.raises(Exception): wrong_enc.decrypt(encrypted) def test_invalid_key_length_raises(self): bad_key = base64.b64encode(b"short").decode() with pytest.raises(ValueError): AESEncryptor(bad_key) # --------------------------------------------------------------------------- # PseudonymMapper tests # --------------------------------------------------------------------------- @pytest.fixture def redis_mock() -> MagicMock: mock = MagicMock() mock.set.return_value = True return mock @pytest.fixture def mapper(redis_mock) -> PseudonymMapper: enc = AESEncryptor(_KEY_B64) return PseudonymMapper(redis_mock, enc, ttl_seconds=3600) class TestPseudonymMapper: def test_anonymize_replaces_entity(self, mapper): text = "Email: alice@example.com" entities = [DetectedEntity("EMAIL", "alice@example.com", 7, 24)] anon, mapping = mapper.anonymize(text, entities, "tenant1", "req1") assert "alice@example.com" not in anon assert _TOKEN_RE.search(anon) is not None def test_anonymize_returns_mapping(self, mapper): text = "alice@example.com" entities = [DetectedEntity("EMAIL", "alice@example.com", 0, 17)] _, mapping = mapper.anonymize(text, entities, "tenant1", "req1") assert len(mapping) == 1 assert "alice@example.com" in mapping.values() def test_token_format(self, mapper): text = "alice@example.com" entities = [DetectedEntity("EMAIL", "alice@example.com", 0, 17)] anon, mapping = mapper.anonymize(text, entities, "t", "r") token = list(mapping.keys())[0] assert re.match(r"\[PII:EMAIL:[0-9a-f]{8}\]", token) def test_depseudonymize_restores_original(self, mapper): text = "Email: alice@example.com" entities = [DetectedEntity("EMAIL", "alice@example.com", 7, 24)] anon, mapping = mapper.anonymize(text, entities, "t", "r") restored = mapper.depseudonymize(anon, mapping) assert restored == text def test_multiple_entities_anonymized(self, mapper): text = "alice@example.com et bob@example.com" entities = [ DetectedEntity("EMAIL", "alice@example.com", 0, 17), DetectedEntity("EMAIL", "bob@example.com", 21, 36), ] anon, mapping = mapper.anonymize(text, entities, "t", "r") assert "alice@example.com" not in anon assert "bob@example.com" not in anon assert len(mapping) == 2 def test_depseudonymize_multiple(self, mapper): text = "alice@example.com et bob@example.com" entities = [ DetectedEntity("EMAIL", "alice@example.com", 0, 17), DetectedEntity("EMAIL", "bob@example.com", 21, 36), ] anon, mapping = mapper.anonymize(text, entities, "t", "r") restored = mapper.depseudonymize(anon, mapping) assert "alice@example.com" in restored assert "bob@example.com" in restored def test_redis_set_called_per_entity(self, mapper, redis_mock): text = "alice@example.com et 06 12 34 56 78" entities = [ DetectedEntity("EMAIL", "alice@example.com", 0, 17), DetectedEntity("PHONE_FR", "06 12 34 56 78", 21, 35), ] mapper.anonymize(text, entities, "tenant1", "req1") assert redis_mock.set.call_count == 2 def test_redis_ttl_passed(self, mapper, redis_mock): text = "alice@example.com" entities = [DetectedEntity("EMAIL", "alice@example.com", 0, 17)] mapper.anonymize(text, entities, "t", "r") call_kwargs = redis_mock.set.call_args[1] assert call_kwargs.get("ex") == 3600 def test_empty_entities_no_change(self, mapper): text = "Texte sans PII" anon, mapping = mapper.anonymize(text, [], "t", "r") assert anon == text assert mapping == {} def test_depseudonymize_unknown_token_left_as_is(self, mapper): text = "[PII:EMAIL:deadbeef]" result = mapper.depseudonymize(text, {}) assert result == "[PII:EMAIL:deadbeef]" def test_overlapping_entities_correct_offsets(self, mapper): """Right-to-left replacement must preserve offsets.""" text = "a@b.com c@d.com" entities = [ DetectedEntity("EMAIL", "a@b.com", 0, 7), DetectedEntity("EMAIL", "c@d.com", 8, 15), ] anon, mapping = mapper.anonymize(text, entities, "t", "r") restored = mapper.depseudonymize(anon, mapping) assert "a@b.com" in restored assert "c@d.com" in restored