DataDog · NouemanKHAL · Aug 7, 2025 · Aug 7, 2025 · Aug 7, 2025 · Aug 7, 2025
@@ -2,13 +2,15 @@
 # All rights reserved
 # Licensed under Simplified BSD License (see LICENSE)
 
+from collections.abc import Iterator
+
 try:
     import tagger
 except ImportError:
     from datadog_checks.base.stubs import tagger  # noqa: F401
 
 
-GENERIC_TAGS = {
+GENERIC_TAGS: set[str] = {
     'cluster_name',
     'clustername',
     'cluster',
@@ -21,3 +23,112 @@
     'service',
     'version',
 }
+
+
+class TagsSet:
+    """
+    A data structure to manage a collection of tags supporting both formats:
+      - key:value pairs
+      - standalone values (stored with empty string key)
+
+    Supports:
+      - add(tag_string): add a tag in 'key:value' or 'value' format
+      - add_tag(key, value): add a key-value pair tag
+      - add_standalone_tag(value): add a standalone value tag
+      - add_unique_tag(key, value): add a tag ensuring the key has only this value
+      - get_tag(key): return a set of all values for the given key
+      - get_tags(sort=True): return a list of formatted tag strings
+      - iteration: iterate over tags yielding (key, value) tuples
+      - remove(tag_string): remove a tag in 'key:value' or 'value' format
+      - remove_tag(key, value=None): remove all tags under a given key, or a specific key:value tag
+      - clear(): remove all tags
+    """
+
+    def __init__(self) -> None:
+        self._data: dict[str, set[str]] = {}
+
+    def add_tag(self, key: str, value: str) -> None:
+        """Add a tag with explicit key and value.
+
+        For standalone value tags, use add_standalone_tag() instead.
+
+        Raises:
+            ValueError: If key is empty
+        """
+        if not key:
+            raise ValueError("Tag key cannot be empty. Use add_standalone_tag() for standalone values.")
+
+        if key not in self._data:
+            self._data[key] = set()
+        self._data[key].add(value)
-        if key not in self._data:
-            self._data[key] = set()
-        self._data[key].add(value)
+        self._data.setdefault(key, set()).add(value)
-        if key not in self._data:
-            self._data[key] = set()
-        self._data[key].add(value)
+        self._data.setdefault(key, set()).add(value)
+
+    def add_standalone_tag(self, value: str) -> None:
+        """Add a standalone value tag (no key).
+
+        Standalone tags are stored internally with an empty key.
+        """
+        if '' not in self._data:
+            self._data[''] = set()
+        self._data[''].add(value)
+
+    def add_unique_tag(self, key: str, value: str) -> None:
+        """Add a tag under given key, ensuring the key has only this value.
+
+        Raises:
+            ValueError: If key is empty
+        """
+        if not key:
+            raise ValueError("Tag key cannot be empty. Use add_standalone_tag() for standalone values.")
+        self._data[key] = {value}
+
+    def get_tag(self, key: str) -> set[str]:
+        """Return all values for the given key. Returns an empty set if key doesn't exist."""
+        return self._data.get(key, set())
+
+    def get_standalone_tags(self) -> set[str]:
+        return self.get_tag('')
+
+    def _get_tag_tuples(self, sort: bool = True) -> list[tuple[str, str]]:
+        """Return all tags as a list of (key, value) tuples, sorted if requested."""
+        tags_list: list[tuple[str, str]] = []
+        for key, values in self._data.items():
+            for val in values:
+                tags_list.append((key, val))
+        return sorted(tags_list) if sort else tags_list
+
+    def get_tags(self, sort: bool = True) -> list[str]:
+        """Return all tags as a list of formatted strings, sorted if requested.
+
+        Returns tags in their original format:
+        - 'key:value' for key-value pairs
+        - 'value' for standalone values
+        """
+        tags = []
+        for key, value in self._get_tag_tuples(sort=False):
+            if key:
+                tags.append(f"{key}:{value}")
+            else:
+                tags.append(value)
+        return sorted(tags) if sort else tags
+
+    def remove_tag(self, key: str, value: str | None = None) -> None:
+        """Remove tag(s) under the given key.
+
+        If value is None, remove all tags under the given key.
+        If value is provided, remove only the specific key:value tag.
+        """
+        if value is None:
+            self._data.pop(key, None)
+        else:
+            if key in self._data:
+                self._data[key].discard(value)
+                if not self._data[key]:
+                    del self._data[key]
+
+    def clear(self) -> None:
+        """Remove all tags."""
+        self._data.clear()
+
+    def __iter__(self) -> Iterator[tuple[str, str]]:
+        """Allow iteration over tags: for tag in ts or list(ts)."""
+        return iter(self._get_tag_tuples())
@@ -0,0 +1,185 @@
+# (C) Datadog, Inc. 2025-present
+# All rights reserved
+# Licensed under Simplified BSD License (see LICENSE)
+
+import pytest
+
+from datadog_checks.base.utils.tagging import TagsSet
+
+
+class TestTagsSet:
+    """Test the TagsSet data structure with minimal, focused tests."""
+
+    def test_init_empty(self):
+        """Test empty initialization."""
+        tags = TagsSet()
+        assert tags.get_tags() == []
+
+    def test_add_single_tag(self):
+        """Test adding a single tag."""
+        tags = TagsSet()
+        tags.add_tag('env', 'prod')
+        assert tags.get_tags() == ['env:prod']
+
+    def test_add_multiple_values_same_key(self):
+        """Test adding multiple values to same key."""
+        tags = TagsSet()
+        tags.add_tag('env', 'prod')
+        tags.add_tag('env', 'dev')
+        assert tags.get_tags() == ['env:dev', 'env:prod']
+
+    def test_add_different_keys(self):
+        """Test adding tags with different keys."""
+        tags = TagsSet()
+        tags.add_tag('env', 'prod')
+        tags.add_tag('app', 'web')
+        assert tags.get_tags() == ['app:web', 'env:prod']
+
+    def test_add_unique_tag_replaces(self):
+        """Test add_unique_tag replaces existing values."""
+        tags = TagsSet()
+        tags.add_tag('env', 'prod')
+        tags.add_tag('env', 'dev')
+        tags.add_unique_tag('env', 'test')
+        assert tags.get_tags() == ['env:test']
+
+    def test_add_unique_tag_new_key(self):
+        """Test add_unique_tag with new key."""
+        tags = TagsSet()
+        tags.add_unique_tag('env', 'prod')
+        assert tags.get_tags() == ['env:prod']
+
+    def test_get_tag_empty(self):
+        """Test get_tag on empty set."""
+        tags = TagsSet()
+        assert tags.get_tag('env') == set()
+
+    def test_get_tag_single_value(self):
+        """Test get_tag with single value."""
+        tags = TagsSet()
+        tags.add_tag('env', 'prod')
+        assert tags.get_tag('env') == {'prod'}
+
+    def test_get_tag_multiple_values(self):
+        """Test get_tag with multiple values."""
+        tags = TagsSet()
+        tags.add_tag('env', 'prod')
+        tags.add_tag('env', 'dev')
+        assert tags.get_tag('env') == {'prod', 'dev'}
+
+    def test_get_tag_nonexistent_key(self):
+        """Test get_tag with non-existent key."""
+        tags = TagsSet()
+        tags.add_tag('env', 'prod')
+        assert tags.get_tag('app') == set()
+
+    def test_get_tags_unsorted(self):
+        """Test get_tags with sort=False."""
+        tags = TagsSet()
+        tags.add_tag('b', '1')
+        tags.add_tag('a', '2')
+        result = tags.get_tags(sort=False)
+        assert set(result) == {'a:2', 'b:1'}
+
+    def test_get_tags_sorted(self):
+        """Test get_tags with sort=True."""
+        tags = TagsSet()
+        tags.add_tag('b', '1')
+        tags.add_tag('a', '2')
+        assert tags.get_tags(sort=True) == ['a:2', 'b:1']
+
+    def test_remove_tag_all_values(self):
+        """Test removing all values for a key."""
+        tags = TagsSet()
+        tags.add_tag('env', 'prod')
+        tags.add_tag('env', 'dev')
+        tags.remove_tag('env')
+        assert tags.get_tags() == []
+
+    def test_remove_tag_specific_value(self):
+        """Test removing specific value."""
+        tags = TagsSet()
+        tags.add_tag('env', 'prod')
+        tags.add_tag('env', 'dev')
+        tags.remove_tag('env', 'dev')
+        assert tags.get_tags() == ['env:prod']
+
+    def test_remove_tag_nonexistent_key(self):
+        """Test removing non-existent key doesn't error."""
+        tags = TagsSet()
+        tags.remove_tag('env')  # Should not raise
+        assert tags.get_tags() == []
+
+    def test_remove_tag_nonexistent_value(self):
+        """Test removing non-existent value doesn't error."""
+        tags = TagsSet()
+        tags.add_tag('env', 'prod')
+        tags.remove_tag('env', 'dev')  # Should not raise
+        assert tags.get_tags() == ['env:prod']
+
+    def test_clear_with_tags(self):
+        """Test clear removes all tags."""
+        tags = TagsSet()
+        tags.add_tag('env', 'prod')
+        tags.add_tag('app', 'web')
+        tags.clear()
+        assert tags.get_tags() == []
+
+    def test_iterator_empty(self):
+        """Test iteration on empty set."""
+        tags = TagsSet()
+        assert list(tags) == []
+
+    def test_iterator_multiple_tags(self):
+        """Test iteration yields tuples."""
+        tags = TagsSet()
+        tags.add_tag('env', 'prod')
+        tags.add_tag('app', 'web')
+        assert list(tags) == [('app', 'web'), ('env', 'prod')]
+
+    def test_empty_key_raises_error(self):
+        """Test that empty key raises ValueError."""
+        tags = TagsSet()
+        with pytest.raises(ValueError, match="Tag key cannot be empty"):
+            tags.add_tag('', 'value')
+
+    def test_empty_key_add_unique_raises_error(self):
+        """Test that empty key in add_unique_tag raises ValueError."""
+        tags = TagsSet()
+        with pytest.raises(ValueError, match="Tag key cannot be empty"):
+            tags.add_unique_tag('', 'value')
+
+    def test_special_chars_colon(self):
+        """Test key/value with colons."""
+        tags = TagsSet()
+        tags.add_tag('url', 'http://example.com')
+        assert tags.get_tags() == ['url:http://example.com']
+
+    def test_sorting_by_key_then_value(self):
+        """Test sorting is by key first, then value."""
+        tags = TagsSet()
+        tags.add_tag('a', '2')
+        tags.add_tag('a', '1')
+        tags.add_tag('b', '1')
+        assert tags.get_tags() == ['a:1', 'a:2', 'b:1']
+
+    # Tests for both tag formats (key:value and standalone value)
+    def test_add_standalone_tag(self):
+        """Test adding standalone value tags."""
+        tags = TagsSet()
+        tags.add_standalone_tag('production')
+        tags.add_standalone_tag('critical')
+        assert sorted(tags.get_tags()) == ['critical', 'production']
+
+    def test_standalone_tags_use_empty_key(self):
+        """Test that standalone tags are stored with empty key."""
+        tags = TagsSet()
+        tags.add_standalone_tag('production')
+        tags.add_standalone_tag('staging')
+        # Verify they're stored under empty key
+        assert tags.get_standalone_tags() == {'production', 'staging'}
+
+        tags.add_tag('env', 'prod')
+        tags.add_tag('env', 'staging')
+        # Verify they appear as standalone in output
+        assert sorted(tags.get_tags()) == ['env:prod', 'env:staging', 'production', 'staging']