Giao diện
dataclasses & attrs Standard Library
Viết data classes ngắn gọn, type-safe - không cần boilerplate
Learning Outcomes
Sau khi hoàn thành trang này, bạn sẽ:
- 🎯 Sử dụng @dataclass decorator để tạo data classes nhanh chóng
- 🎯 Hiểu field() và các options như default_factory, compare, hash
- 🎯 Implement post_init cho validation và computed fields
- 🎯 So sánh dataclass vs attrs vs Pydantic để chọn đúng tool
- 🎯 Tránh các Production Pitfalls phổ biến
@dataclass Cơ bản
Vấn đề: Boilerplate Code
python
# ❌ Cách cũ: Quá nhiều boilerplate
class User:
def __init__(self, name: str, email: str, age: int = 0):
self.name = name
self.email = email
self.age = age
def __repr__(self):
return f"User(name={self.name!r}, email={self.email!r}, age={self.age!r})"
def __eq__(self, other):
if not isinstance(other, User):
return NotImplemented
return (self.name, self.email, self.age) == (other.name, other.email, other.age)
def __hash__(self):
return hash((self.name, self.email, self.age))Giải pháp: @dataclass
python
from dataclasses import dataclass
# ✅ Với dataclass: Gọn gàng, tự động generate methods
@dataclass
class User:
name: str
email: str
age: int = 0
# Tự động có:
# - __init__(self, name, email, age=0)
# - __repr__(self) → "User(name='...', email='...', age=0)"
# - __eq__(self, other) → So sánh theo giá trị
user = User("HPN", "hpn@example.com", 30)
print(user) # User(name='HPN', email='hpn@example.com', age=30)
user1 = User("HPN", "hpn@example.com")
user2 = User("HPN", "hpn@example.com")
user1 == user2 # True (value equality)@dataclass Parameters
python
from dataclasses import dataclass
@dataclass(
init=True, # Generate __init__
repr=True, # Generate __repr__
eq=True, # Generate __eq__
order=False, # Generate __lt__, __le__, __gt__, __ge__
unsafe_hash=False, # Generate __hash__ (cẩn thận với mutable!)
frozen=False, # Immutable (như tuple)
match_args=True, # Pattern matching support (3.10+)
kw_only=False, # All fields keyword-only (3.10+)
slots=False, # Use __slots__ for memory (3.10+)
)
class Config:
host: str
port: int = 8080frozen=True - Immutable Dataclass
python
from dataclasses import dataclass
@dataclass(frozen=True)
class Point:
x: float
y: float
p = Point(1.0, 2.0)
# p.x = 3.0 # FrozenInstanceError!
# ✅ Có thể dùng làm dict key (hashable)
points = {Point(0, 0): "origin", Point(1, 1): "diagonal"}order=True - Comparison Methods
python
from dataclasses import dataclass
@dataclass(order=True)
class Version:
major: int
minor: int
patch: int = 0
v1 = Version(1, 0, 0)
v2 = Version(2, 0, 0)
v3 = Version(1, 5, 0)
v1 < v2 # True
v1 < v3 # True (so sánh tuple: (1,0,0) < (1,5,0))
sorted([v2, v1, v3]) # [Version(1,0,0), Version(1,5,0), Version(2,0,0)]slots=True - Memory Optimization (Python 3.10+)
python
from dataclasses import dataclass
import sys
@dataclass
class UserNoSlots:
name: str
age: int
@dataclass(slots=True)
class UserWithSlots:
name: str
age: int
# Memory comparison
no_slots = UserNoSlots("HPN", 30)
with_slots = UserWithSlots("HPN", 30)
sys.getsizeof(no_slots) # ~48 bytes
sys.getsizeof(no_slots.__dict__) # ~104 bytes
# Total: ~152 bytes
sys.getsizeof(with_slots) # ~56 bytes
# Tiết kiệm ~63% memory!kw_only=True - Keyword-Only Arguments (Python 3.10+)
python
from dataclasses import dataclass
@dataclass(kw_only=True)
class Config:
host: str
port: int
debug: bool = False
# Phải dùng keyword arguments
config = Config(host="localhost", port=8080)
# config = Config("localhost", 8080) # TypeError!field() - Fine-grained Control
python
from dataclasses import dataclass, field
from typing import List
@dataclass
class User:
name: str
email: str
# ❌ WRONG: Mutable default value
# tags: List[str] = [] # Shared across all instances!
# ✅ CORRECT: Use default_factory
tags: List[str] = field(default_factory=list)
# Field không xuất hiện trong __init__
id: int = field(init=False, default=0)
# Field không xuất hiện trong __repr__
_cache: dict = field(default_factory=dict, repr=False)
# Field không dùng trong comparison
created_at: str = field(default="", compare=False)field() Parameters
python
from dataclasses import dataclass, field
@dataclass
class Example:
# default: Giá trị mặc định (immutable only!)
name: str = field(default="Unknown")
# default_factory: Factory function cho mutable defaults
items: list = field(default_factory=list)
# init: Có xuất hiện trong __init__ không
computed: int = field(init=False, default=0)
# repr: Có xuất hiện trong __repr__ không
internal: str = field(default="", repr=False)
# compare: Có dùng trong __eq__ và ordering không
metadata: dict = field(default_factory=dict, compare=False)
# hash: Có dùng trong __hash__ không (None = theo compare)
id: int = field(default=0, hash=True)
# kw_only: Field này phải là keyword argument (3.10+)
debug: bool = field(default=False, kw_only=True)Pattern: Computed Fields
python
from dataclasses import dataclass, field
@dataclass
class Rectangle:
width: float
height: float
# Computed field - không trong __init__
area: float = field(init=False)
perimeter: float = field(init=False)
def __post_init__(self):
self.area = self.width * self.height
self.perimeter = 2 * (self.width + self.height)
rect = Rectangle(10, 5)
print(rect.area) # 50.0
print(rect.perimeter) # 30.0post_init - Post-Initialization
__post_init__ được gọi sau __init__, dùng cho validation và computed fields.
Basic Usage
python
from dataclasses import dataclass
@dataclass
class User:
name: str
email: str
age: int = 0
def __post_init__(self):
# Validation
if self.age < 0:
raise ValueError("Age cannot be negative")
# Normalization
self.email = self.email.lower().strip()
self.name = self.name.strip()
user = User(" HPN ", " HPN@Example.COM ", 30)
print(user.name) # "HPN"
print(user.email) # "hpn@example.com"
# User("Test", "test@test.com", -1) # ValueError!InitVar - Init-Only Variables
python
from dataclasses import dataclass, field, InitVar
@dataclass
class User:
name: str
email: str
# InitVar: Chỉ dùng trong __init__, không lưu làm attribute
password: InitVar[str]
# Computed từ password
password_hash: str = field(init=False)
def __post_init__(self, password: str):
# password chỉ available trong __post_init__
import hashlib
self.password_hash = hashlib.sha256(password.encode()).hexdigest()
user = User("HPN", "hpn@test.com", "secret123")
print(user.password_hash) # "a665a45920422f9d..."
# user.password # AttributeError! Không có attribute nàyPattern: Dependency Injection
python
from dataclasses import dataclass, field, InitVar
from typing import Optional
@dataclass
class Service:
name: str
# Optional dependency injection
logger: InitVar[Optional["Logger"]] = None
_logger: "Logger" = field(init=False, repr=False)
def __post_init__(self, logger: Optional["Logger"]):
# Use injected logger or create default
self._logger = logger or DefaultLogger()Inheritance với Dataclass
Basic Inheritance
python
from dataclasses import dataclass
@dataclass
class Person:
name: str
age: int
@dataclass
class Employee(Person):
employee_id: str
department: str = "Engineering"
emp = Employee("HPN", 30, "E001")
print(emp) # Employee(name='HPN', age=30, employee_id='E001', department='Engineering')Field Order với Defaults
python
from dataclasses import dataclass, field
# ❌ PROBLEM: Parent có default, child không có
@dataclass
class Base:
name: str = "default"
# @dataclass
# class Child(Base):
# id: int # TypeError: non-default argument follows default argument
# ✅ SOLUTION 1: Child cũng có default
@dataclass
class Child(Base):
id: int = 0
# ✅ SOLUTION 2: Dùng field(kw_only=True) (Python 3.10+)
@dataclass
class Base:
name: str = "default"
@dataclass
class Child(Base):
id: int = field(kw_only=True)
child = Child(id=1) # name="default", id=1Override post_init
python
from dataclasses import dataclass
@dataclass
class Base:
name: str
def __post_init__(self):
self.name = self.name.upper()
@dataclass
class Child(Base):
age: int
def __post_init__(self):
super().__post_init__() # Gọi parent's __post_init__
if self.age < 0:
raise ValueError("Age must be positive")
child = Child("hpn", 30)
print(child.name) # "HPN" (từ parent's __post_init__)dataclass vs namedtuple
python
from dataclasses import dataclass
from typing import NamedTuple
# === NAMEDTUPLE ===
class PointNT(NamedTuple):
x: float
y: float
# === DATACLASS ===
@dataclass
class PointDC:
x: float
y: floatSo sánh Chi tiết
| Feature | namedtuple | dataclass |
|---|---|---|
| Immutable | ✅ Always | ❌ Default (frozen=True để immutable) |
| Hashable | ✅ Always | ❌ Default (frozen=True để hashable) |
| Memory | Nhỏ hơn | Lớn hơn (slots=True để tối ưu) |
| Tuple unpacking | ✅ x, y = point | ❌ Không |
| Index access | ✅ point[0] | ❌ Không |
| Default values | ✅ Yes | ✅ Yes |
| Mutable fields | ❌ No | ✅ Yes |
| post_init | ❌ No | ✅ Yes |
| Inheritance | ⚠️ Limited | ✅ Full |
Khi nào dùng gì?
python
# ✅ NAMEDTUPLE: Immutable, lightweight, dict keys
class Coordinate(NamedTuple):
lat: float
lon: float
locations = {Coordinate(10.0, 106.0): "HCMC"} # Hashable!
lat, lon = Coordinate(10.0, 106.0) # Unpacking!
# ✅ DATACLASS: Mutable, complex logic, validation
@dataclass
class User:
name: str
email: str
def __post_init__(self):
self.email = self.email.lower()
# ✅ FROZEN DATACLASS: Best of both (nhưng không unpack được)
@dataclass(frozen=True)
class Config:
host: str
port: int = 8080attrs Library
attrs là thư viện third-party mạnh hơn dataclass, có trước dataclass (Python 3.7).
Installation
bash
pip install attrsBasic Usage
python
import attrs
@attrs.define
class User:
name: str
email: str
age: int = 0
# Tương đương @dataclass(slots=True, eq=True, ...)
user = User("HPN", "hpn@test.com", 30)attrs Validators
python
import attrs
from attrs import validators
@attrs.define
class User:
name: str = attrs.field(validator=validators.instance_of(str))
email: str = attrs.field(validator=[
validators.instance_of(str),
validators.matches_re(r'^[\w\.-]+@[\w\.-]+\.\w+$')
])
age: int = attrs.field(validator=[
validators.instance_of(int),
validators.ge(0), # >= 0
validators.le(150) # <= 150
])
# Validation tự động khi tạo instance
user = User("HPN", "hpn@test.com", 30) # OK
# User("HPN", "invalid-email", 30) # ValueError!
# User("HPN", "hpn@test.com", -1) # ValueError!attrs Converters
python
import attrs
@attrs.define
class Config:
host: str = attrs.field(converter=str.lower)
port: int = attrs.field(converter=int)
debug: bool = attrs.field(converter=bool, default=False)
config = Config("LOCALHOST", "8080", 1)
print(config.host) # "localhost"
print(config.port) # 8080 (int)
print(config.debug) # Trueattrs vs dataclass
| Feature | dataclass | attrs |
|---|---|---|
| Built-in | ✅ Python 3.7+ | ❌ pip install |
| Validators | ❌ Manual | ✅ Built-in |
| Converters | ❌ Manual | ✅ Built-in |
| slots | ✅ 3.10+ | ✅ Default |
| Performance | Good | Better |
| Ecosystem | Standard | Rich (cattrs, etc.) |
Pydantic Comparison
Pydantic là thư viện validation mạnh nhất, đặc biệt cho API/JSON data.
Installation
bash
pip install pydanticBasic Usage
python
from pydantic import BaseModel, EmailStr, Field
from typing import Optional
class User(BaseModel):
name: str = Field(..., min_length=1, max_length=100)
email: EmailStr
age: int = Field(default=0, ge=0, le=150)
bio: Optional[str] = None
# Validation tự động
user = User(name="HPN", email="hpn@test.com", age=30)
# Type coercion
user = User(name="HPN", email="hpn@test.com", age="30") # age: int = 30
# Validation error
# User(name="", email="invalid", age=-1) # ValidationError!Pydantic JSON Serialization
python
from pydantic import BaseModel
from datetime import datetime
class Event(BaseModel):
name: str
timestamp: datetime
class Config:
json_encoders = {
datetime: lambda v: v.isoformat()
}
event = Event(name="Deploy", timestamp=datetime.now())
# Serialize to dict/JSON
event.model_dump() # {'name': 'Deploy', 'timestamp': datetime(...)}
event.model_dump_json() # '{"name": "Deploy", "timestamp": "2024-..."}'
# Parse from dict/JSON
Event.model_validate({"name": "Test", "timestamp": "2024-01-01T00:00:00"})
Event.model_validate_json('{"name": "Test", "timestamp": "2024-01-01T00:00:00"}')So sánh Tổng hợp
| Feature | dataclass | attrs | Pydantic |
|---|---|---|---|
| Built-in | ✅ | ❌ | ❌ |
| Validation | ❌ Manual | ✅ Basic | ✅ Advanced |
| Type coercion | ❌ | ❌ | ✅ |
| JSON support | ❌ | ⚠️ cattrs | ✅ Built-in |
| Performance | Fast | Faster | Slower* |
| Use case | Simple data | Complex data | API/Config |
*Pydantic v2 đã cải thiện performance đáng kể
Khi nào dùng gì?
python
# ✅ DATACLASS: Internal data structures, simple DTOs
@dataclass
class Point:
x: float
y: float
# ✅ ATTRS: Complex validation, performance-critical
@attrs.define
class User:
name: str = attrs.field(validator=validators.instance_of(str))
age: int = attrs.field(validator=validators.ge(0))
# ✅ PYDANTIC: API models, config parsing, JSON handling
class APIResponse(BaseModel):
status: str
data: dict
timestamp: datetimeAdvanced Patterns
Pattern 1: Factory Methods
python
from dataclasses import dataclass
from typing import Self # Python 3.11+
@dataclass
class User:
name: str
email: str
role: str = "user"
@classmethod
def admin(cls, name: str, email: str) -> Self:
"""Factory method cho admin user."""
return cls(name=name, email=email, role="admin")
@classmethod
def from_dict(cls, data: dict) -> Self:
"""Factory method từ dictionary."""
return cls(**data)
admin = User.admin("HPN", "hpn@test.com")
user = User.from_dict({"name": "Test", "email": "test@test.com"})Pattern 2: Immutable with Replace
python
from dataclasses import dataclass, replace
@dataclass(frozen=True)
class Config:
host: str
port: int
debug: bool = False
config = Config("localhost", 8080)
# Tạo copy với một số fields thay đổi
dev_config = replace(config, debug=True)
prod_config = replace(config, host="prod.example.com", port=443)
print(dev_config) # Config(host='localhost', port=8080, debug=True)
print(prod_config) # Config(host='prod.example.com', port=443, debug=False)Pattern 3: Serialization
python
from dataclasses import dataclass, asdict, astuple
import json
@dataclass
class User:
name: str
email: str
age: int = 0
user = User("HPN", "hpn@test.com", 30)
# Convert to dict
user_dict = asdict(user)
# {'name': 'HPN', 'email': 'hpn@test.com', 'age': 30}
# Convert to tuple
user_tuple = astuple(user)
# ('HPN', 'hpn@test.com', 30)
# JSON serialization
json_str = json.dumps(asdict(user))
# '{"name": "HPN", "email": "hpn@test.com", "age": 30}'
# JSON deserialization
data = json.loads(json_str)
user_restored = User(**data)Pattern 4: Nested Dataclasses
python
from dataclasses import dataclass, field, asdict
from typing import List
@dataclass
class Address:
street: str
city: str
country: str = "Vietnam"
@dataclass
class User:
name: str
email: str
addresses: List[Address] = field(default_factory=list)
user = User(
name="HPN",
email="hpn@test.com",
addresses=[
Address("123 Main St", "HCMC"),
Address("456 Side St", "Hanoi")
]
)
# asdict handles nested dataclasses
user_dict = asdict(user)
# {
# 'name': 'HPN',
# 'email': 'hpn@test.com',
# 'addresses': [
# {'street': '123 Main St', 'city': 'HCMC', 'country': 'Vietnam'},
# {'street': '456 Side St', 'city': 'Hanoi', 'country': 'Vietnam'}
# ]
# }Production Pitfalls
Pitfall 1: Mutable Default Values
python
from dataclasses import dataclass, field
# ❌ BUG: Mutable default shared across instances
@dataclass
class User:
name: str
tags: list = [] # DANGER!
u1 = User("A")
u2 = User("B")
u1.tags.append("admin")
print(u2.tags) # ['admin'] - BUG! Shared list
# ✅ FIX: Use default_factory
@dataclass
class User:
name: str
tags: list = field(default_factory=list)Pitfall 2: Hashability với Mutable Fields
python
from dataclasses import dataclass
# ❌ BUG: Mutable dataclass không hashable
@dataclass
class User:
name: str
tags: list = None
user = User("HPN", [])
# {user} # TypeError: unhashable type: 'User'
# ✅ FIX 1: frozen=True (nhưng không thể modify)
@dataclass(frozen=True)
class User:
name: str
# ✅ FIX 2: unsafe_hash=True (cẩn thận!)
@dataclass(unsafe_hash=True)
class User:
name: str
tags: list = field(default_factory=list, hash=False)Pitfall 3: post_init với Inheritance
python
from dataclasses import dataclass
@dataclass
class Base:
name: str
def __post_init__(self):
print("Base __post_init__")
@dataclass
class Child(Base):
age: int
def __post_init__(self):
# ❌ BUG: Quên gọi super()
print("Child __post_init__")
child = Child("HPN", 30)
# Chỉ in "Child __post_init__"
# Base's __post_init__ không được gọi!
# ✅ FIX: Gọi super().__post_init__()
@dataclass
class Child(Base):
age: int
def __post_init__(self):
super().__post_init__()
print("Child __post_init__")Pitfall 4: slots với Inheritance
python
from dataclasses import dataclass
# ❌ BUG: Parent không có slots, child có slots
@dataclass
class Base:
name: str
@dataclass(slots=True)
class Child(Base):
age: int
child = Child("HPN", 30)
child.dynamic = "value" # Vẫn hoạt động! (từ Base's __dict__)
# ✅ FIX: Cả parent và child đều có slots
@dataclass(slots=True)
class Base:
name: str
@dataclass(slots=True)
class Child(Base):
age: intPitfall 5: asdict với Non-Serializable Fields
python
from dataclasses import dataclass, asdict, field
from datetime import datetime
import json
@dataclass
class Event:
name: str
timestamp: datetime
event = Event("Deploy", datetime.now())
# ❌ BUG: datetime không JSON serializable
# json.dumps(asdict(event)) # TypeError!
# ✅ FIX: Custom dict factory
def custom_asdict(obj):
def convert(o):
if isinstance(o, datetime):
return o.isoformat()
return o
return {k: convert(v) for k, v in asdict(obj).items()}
json.dumps(custom_asdict(event)) # OK!Quick Reference
python
from dataclasses import dataclass, field, asdict, astuple, replace
# === BASIC ===
@dataclass
class User:
name: str
age: int = 0
# === IMMUTABLE ===
@dataclass(frozen=True)
class Config:
host: str
port: int
# === MEMORY OPTIMIZED (3.10+) ===
@dataclass(slots=True)
class Point:
x: float
y: float
# === FIELD OPTIONS ===
@dataclass
class Example:
items: list = field(default_factory=list) # Mutable default
computed: int = field(init=False) # Not in __init__
internal: str = field(repr=False) # Not in __repr__
metadata: dict = field(compare=False) # Not in __eq__
# === POST INIT ===
@dataclass
class Validated:
value: int
def __post_init__(self):
if self.value < 0:
raise ValueError("Must be positive")
# === UTILITIES ===
asdict(obj) # Convert to dict
astuple(obj) # Convert to tuple
replace(obj, **kw) # Copy with changesCross-links
- Prerequisites: Classes & Objects, Type Hinting
- Related: Magic Methods -
__slots__,__eq__,__hash__ - See Also: collections - namedtuple comparison
- See Also: functools - Functional programming patterns