Source code for next.static.collector

"""Collector, dedup strategies, JS context policies, and placeholder slots.

Rendering flows are stateful. Every HTTP request spins up a fresh
collector that rides along in the template context, absorbs every
`{% use_style %}`, `{% #use_script %}`, co-located `template.css`, and
`styles` or `scripts` list entry, then hands the accumulated set back to
the static manager when the template finishes.

The collector does not hardcode deduplication or merge semantics.
Strategy objects plug in at construction time, so users can swap
URL-based dedup for content-hash dedup or replace the default
first-wins JS-context merge with a deep-merge policy without touching
the collector source.

The collector is also fully type-agnostic. Each asset routes to a
slot named in `KindRegistry`, and the buckets live in a slot-keyed
dictionary on the collector. There is no built-in knowledge of `css`,
`js`, or any other specific kind here.
"""

from __future__ import annotations

import hashlib
import logging
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable

from .assets import StaticAsset, default_kinds
from .serializers import JsContextSerializer, resolve_serializer


if TYPE_CHECKING:
    from collections.abc import Hashable, Iterator
    from pathlib import Path


logger = logging.getLogger(__name__)


HEAD_CLOSE: str = "</head>"


def _inline_dedup_key(asset: StaticAsset) -> tuple[str, str, str]:
    """Return the tuple key used to dedupe inline assets by body and kind."""
    return ("inline", asset.kind, asset.inline or "")



[docs]
@runtime_checkable
class DedupStrategy(Protocol):
    """Key-based dedup strategy consumed by the static collector.

    Implementations return a hashable value that uniquely identifies an
    asset for deduplication. The collector ignores any asset whose key
    was already recorded.
    """


[docs]
    def key(self, asset: StaticAsset) -> Hashable:
        """Return a hashable key identifying the asset for dedup."""
        raise NotImplementedError





[docs]
class UrlDedup:
    """Dedupe inline assets by rendered body and URL-form assets by URL.

    This is the process-wide default. It mirrors the behavior of the
    original hand-rolled dedup built into the earlier collector.
    """


[docs]
    def key(self, asset: StaticAsset) -> Hashable:
        """Return an inline-form key or a URL-form key based on the asset."""
        if asset.inline is not None:
            return _inline_dedup_key(asset)
        return ("url", asset.kind, asset.url)





[docs]
class HashContentDedup:
    """Dedupe URL-form assets by sha256 of their disk content.

    This is useful in production builds where identical CSS may be
    emitted under different hashed filenames by a manifest storage. The
    strategy falls back to URL-based dedup when the `source_path` is
    missing.
    """


[docs]
    def __init__(self) -> None:
        """Initialise an empty per-path sha256 cache."""
        self._cache: dict[Path, str] = {}



[docs]
    def key(self, asset: StaticAsset) -> Hashable:
        """Hash the asset disk contents when available, otherwise fall back."""
        if asset.inline is not None:  # pragma: no cover
            return _inline_dedup_key(asset)
        if asset.source_path is None:
            return ("url", asset.kind, asset.url)
        cached = self._cache.get(asset.source_path)
        if cached is None:
            cached = hashlib.sha256(asset.source_path.read_bytes()).hexdigest()
            self._cache[asset.source_path] = cached
        return ("hash", asset.kind, cached)





[docs]
class IdentityDedup:
    """Disable deduplication so every registration yields a unique key."""


[docs]
    def __init__(self) -> None:
        """Initialise the monotonically increasing counter."""
        self._counter = 0



[docs]
    def key(self, asset: StaticAsset) -> Hashable:  # noqa: ARG002
        """Return a unique incrementing key so dedup never triggers."""
        self._counter += 1
        return ("unique", self._counter)





[docs]
@runtime_checkable
class JsContextPolicy(Protocol):
    """Merge strategy for the collector JS context."""


[docs]
    def merge(
        self,
        existing: dict[str, Any],
        key: str,
        value: Any,  # noqa: ANN401
    ) -> dict[str, Any]:
        """Merge a new entry into the existing mapping and return it."""
        raise NotImplementedError





[docs]
class FirstWinsPolicy:
    """Keep the first registration and silently ignore subsequent writes.

    This is the default policy. Page-level context wins over
    component-level context when both register the same key.
    """


[docs]
    def merge(
        self,
        existing: dict[str, Any],
        key: str,
        value: Any,  # noqa: ANN401
    ) -> dict[str, Any]:
        """Write the value only when the key is absent from existing."""
        if key not in existing:
            existing[key] = value
        return existing





[docs]
class LastWinsPolicy:
    """Overwrite the previous value with the latest registration."""


[docs]
    def merge(
        self,
        existing: dict[str, Any],
        key: str,
        value: Any,  # noqa: ANN401
    ) -> dict[str, Any]:
        """Assign the value under the key, overwriting any existing entry."""
        existing[key] = value
        return existing





[docs]
class RaiseOnConflictPolicy:
    """Raise `KeyError` when the same key is registered twice."""


[docs]
    def merge(
        self,
        existing: dict[str, Any],
        key: str,
        value: Any,  # noqa: ANN401
    ) -> dict[str, Any]:
        """Assign the value or raise when the key already exists."""
        if key in existing:
            msg = f"Duplicate JS context key: {key!r}"
            raise KeyError(msg)
        existing[key] = value
        return existing





[docs]
class DeepMergePolicy:
    """Recursively merge dict values and override scalars with the latest value."""


[docs]
    def merge(
        self,
        existing: dict[str, Any],
        key: str,
        value: Any,  # noqa: ANN401
    ) -> dict[str, Any]:
        """Recursively merge dict values or assign the new one otherwise."""
        current = existing.get(key)
        if isinstance(current, dict) and isinstance(value, dict):
            existing[key] = self._deep_merge(current, value)
        else:
            existing[key] = value
        return existing


    @classmethod
    def _deep_merge(
        cls,
        a: dict[str, Any],
        b: dict[str, Any],
    ) -> dict[str, Any]:
        out = dict(a)
        for k, v in b.items():
            cur = out.get(k)
            if isinstance(cur, dict) and isinstance(v, dict):
                out[k] = cls._deep_merge(cur, v)
            else:
                out[k] = v
        return out




[docs]
@dataclass(frozen=True, slots=True)
class PlaceholderSlot:
    """Binding between a `{% collect_* %}` placeholder name and its token.

    The `name` field identifies the slot. Assets routed to this slot by
    `KindRegistry.slot(asset.kind)` accumulate in the collector under
    this name. The `token` field is the HTML comment marker emitted by
    the matching template tag at render time and replaced by the static
    manager during injection.
    """

    name: str
    token: str




[docs]
class PlaceholderRegistry:
    """Mutable registry of placeholder slots.

    The registry ships empty. Framework bootstrap registers built-in
    slots such as `styles` and `scripts`, and user code registers
    additional slots with the same `register` call when introducing new
    asset destinations.
    """


[docs]
    def __init__(self) -> None:
        """Initialise an empty slot registry."""
        self._slots: dict[str, PlaceholderSlot] = {}



[docs]
    def register(self, name: str, *, token: str) -> None:
        """Register the slot under its name with the given placeholder token.

        A repeated call with the same token is idempotent. A repeated
        call with a different token raises `ValueError` so silent
        overrides cannot mask bugs.
        """
        if not name:
            msg = "Slot name must be a non-empty string"
            raise ValueError(msg)
        if not token:
            msg = "Slot token must be a non-empty string"
            raise ValueError(msg)
        existing = self._slots.get(name)
        if existing is not None:
            if existing.token == token:
                return
            msg = (
                f"Slot {name!r} is already registered with token "
                f"{existing.token!r}. Cannot re-register with token {token!r}."
            )
            raise ValueError(msg)
        self._slots[name] = PlaceholderSlot(name=name, token=token)



[docs]
    def get(self, name: str) -> PlaceholderSlot | None:
        """Return the slot registered under the given name or None."""
        return self._slots.get(name)



[docs]
    def __iter__(self) -> Iterator[PlaceholderSlot]:
        """Iterate over registered slots in registration order."""
        return iter(self._slots.values())



[docs]
    def __len__(self) -> int:
        """Return the number of registered slots."""
        return len(self._slots)




default_placeholders: PlaceholderRegistry = PlaceholderRegistry()



[docs]
class StaticCollector:
    """Accumulate static asset references during a single page render.

    The optional `dedup` argument plugs in a custom dedup strategy. The
    default is `UrlDedup`. The optional `js_context_policy` argument
    plugs in a custom merge strategy for the JS context. The default is
    `FirstWinsPolicy`, which ensures page-level context wins over
    component-level context.

    Assets are added through the `add` method and later consumed by the
    static manager during injection. The collector has no knowledge of
    backends or rendering. It coordinates insertion order,
    deduplication, and JS context merging.

    Buckets are keyed by slot name as resolved through `KindRegistry`.
    The collector does not hardcode any specific slot, so adding new
    asset kinds to the registry transparently produces new buckets.
    """


[docs]
    def __init__(
        self,
        *,
        dedup: DedupStrategy | None = None,
        js_context_policy: JsContextPolicy | None = None,
        js_serializer: JsContextSerializer | None = None,
    ) -> None:
        """Wire up dedup, JS-context policy, and JS serializer."""
        self._dedup = dedup if dedup is not None else UrlDedup()
        self._js_policy = (
            js_context_policy if js_context_policy is not None else FirstWinsPolicy()
        )
        self._js_serializer = js_serializer
        self._seen_keys: set[Hashable] = set()
        self._buckets: dict[str, list[StaticAsset]] = {}
        self._prepend_idx: dict[str, int] = {}
        self._js_context: dict[str, Any] = {}
        self._js_context_serializers: dict[str, JsContextSerializer] = {}



[docs]
    def add(self, asset: StaticAsset, *, prepend: bool = False) -> None:
        """Add the asset unless its dedup key was already recorded.

        Inline assets always append because their dedup key derives
        from the body. URL-form assets with `prepend=True` are inserted
        before existing append entries while keeping registration
        order among prepended items.

        The asset routes to the bucket named by
        `KindRegistry.slot(asset.kind)`. Unregistered kinds raise
        `KeyError` so misconfiguration surfaces immediately.
        """
        key = self._dedup.key(asset)
        if key in self._seen_keys:
            return
        self._seen_keys.add(key)
        slot = default_kinds.slot(asset.kind)
        bucket = self._buckets.setdefault(slot, [])
        is_inline = asset.inline is not None
        use_prepend = prepend and not is_inline
        if use_prepend:
            idx = self._prepend_idx.get(slot, 0)
            bucket.insert(idx, asset)
            self._prepend_idx[slot] = idx + 1
        else:
            bucket.append(asset)



[docs]
    def assets_in_slot(self, name: str) -> list[StaticAsset]:
        """Return collected assets for the named slot in insertion order.

        Returns an empty list when nothing was registered for the slot.
        Callers must not mutate the returned list.
        """
        return self._buckets.get(name, [])


    def _get_js_serializer(self) -> JsContextSerializer:
        if self._js_serializer is None:
            self._js_serializer = resolve_serializer()
        return self._js_serializer


[docs]
    def add_js_context(
        self,
        key: str,
        value: Any,  # noqa: ANN401
        *,
        serializer: JsContextSerializer | None = None,
    ) -> None:
        """Merge the value under the key through the JS-context policy.

        Validates that `value` is serialisable by the active serializer
        before merging. Surfacing the failure here, at the registration
        site, gives a much better traceback than catching it at final
        page inject time. When `serializer` is supplied, the override
        validates this value and is recorded for the inject phase so
        the same key uses the same serializer end to end. The override
        does not leak into other keys.
        """
        active = serializer if serializer is not None else self._get_js_serializer()
        try:
            active.dumps(value)
        except (TypeError, ValueError) as e:
            msg = f"JS context value for key {key!r} is not serialisable: {e}"
            raise TypeError(msg) from e
        self._js_context = self._js_policy.merge(self._js_context, key, value)
        if serializer is not None:
            self._js_context_serializers[key] = serializer



[docs]
    def js_context(self) -> dict[str, Any]:
        """Return the accumulated JS context.

        Callers must not mutate the returned mapping.
        """
        return self._js_context



[docs]
    def js_context_serializers(self) -> dict[str, JsContextSerializer]:
        """Return the per-key serializer overrides recorded so far.

        The returned mapping is empty when every key uses the global
        serializer. Callers must not mutate it.
        """
        return self._js_context_serializers