#37125: Use __new__ to sanitize TaskResult instead of __post_init__ to half memory
usage
-------------------------------------+-------------------------------------
Reporter: Johannes Maron | Owner: (none)
Type: | Status: new
Cleanup/optimization |
Component: Tasks | Version: dev
Severity: Normal | Resolution:
Keywords: | Triage Stage:
| Unreviewed
Has patch: 0 | Needs documentation: 0
Needs tests: 0 | Patch needs improvement: 0
Easy pickings: 1 | UI/UX: 0
-------------------------------------+-------------------------------------
Comment (by Johannes Maron):
Sure, where are my manners? Here you go. It's slop, but I tweaked it to
minimize side effects:
{{{
import timeit
import tracemalloc
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any
from django.utils.json import normalize_json
from django.tasks.base import Task, TaskResult, TaskResultStatus
# --- Real Task instance ---
def my_func():
pass
real_task = Task.__new__(Task)
object.__setattr__(real_task, "func", my_func)
object.__setattr__(real_task, "priority", 0)
object.__setattr__(real_task, "backend", "default")
object.__setattr__(real_task, "queue_name", "default")
object.__setattr__(real_task, "run_after", None)
object.__setattr__(real_task, "takes_context", False)
# --- Shared kwargs ---
now = datetime.now()
common_kwargs = dict(
task=real_task,
id="abc123",
status=TaskResultStatus.SUCCESSFUL,
enqueued_at=now,
started_at=now,
finished_at=now,
last_attempted_at=now,
args=[],
kwargs={},
backend="default",
errors=[],
worker_ids=["worker-1"],
)
# --- Variant 1: __post_init__ (same as original TaskResult) ---
@dataclass(frozen=True, slots=True, kw_only=True)
class TaskResultPostInit:
task: Any
id: str
status: Any
enqueued_at: datetime | None
started_at: datetime | None
finished_at: datetime | None
last_attempted_at: datetime | None
args: list[Any]
kwargs: dict[str, Any]
backend: str
errors: list
worker_ids: list[str]
_return_value: Any | None = field(init=False, default=None)
def __post_init__(self):
object.__setattr__(self, "args", normalize_json(self.args))
object.__setattr__(self, "kwargs", normalize_json(self.kwargs))
# --- Variant 2: classmethod factory, no __post_init__ ---
@dataclass(frozen=True, slots=True, kw_only=True)
class TaskResultNew:
task: Any
id: str
status: Any
enqueued_at: datetime | None
started_at: datetime | None
finished_at: datetime | None
last_attempted_at: datetime | None
args: list[Any]
kwargs: dict[str, Any]
backend: str
errors: list
worker_ids: list[str]
_return_value: Any | None = field(init=False, default=None)
def __new__(cls, *args, **kwargs):
kwargs["args"] = normalize_json(kwargs["args"])
kwargs["kwargs"] = normalize_json(kwargs["kwargs"])
return super().__new__(cls)
# --- Benchmark ---
N = 1_000_000
def run_bench(label, fn):
t = timeit.timeit(fn, number=N)
tracemalloc.start()
instances = [fn() for _ in range(N)]
_, peak = tracemalloc.get_traced_memory()
tracemalloc.stop()
del instances
print(f"{label}")
print(f" Time: {t:.3f}s ({t/N*1e6:.2f} µs/call)")
print(f" Peak mem: {peak / 1024 / 1024:.2f} MB (for {N:,}
instances)")
print()
run_bench(
"TaskResult with __post_init__",
lambda: TaskResultPostInit(**common_kwargs),
)
run_bench(
"TaskResult with classmethod factory",
lambda: TaskResultNew(**common_kwargs),
)
}}}
--
Ticket URL: <
https://code.djangoproject.com/ticket/37125#comment:2>