Giao diện
Property-Based Testing Nâng cao
Property-Based Testing = Để máy tìm bugs cho bạn = Test thông minh hơn, không phải nhiều hơn
Learning Outcomes
Sau khi hoàn thành trang này, bạn sẽ:
- ✅ Hiểu khái niệm property-based testing vs example-based testing
- ✅ Sử dụng Hypothesis library để viết property tests
- ✅ Tạo custom strategies và composite strategies
- ✅ Áp dụng stateful testing cho complex systems
- ✅ Tìm edge cases tự động mà manual testing bỏ sót
Property-Based Testing là gì?
Example-based testing (traditional): Bạn nghĩ ra các test cases cụ thể.
Property-based testing: Bạn định nghĩa properties (tính chất) mà code phải thỏa mãn, framework tự động generate hàng trăm test cases.
python
# Example-based: Bạn nghĩ ra cases
def test_sort_examples():
assert sort([3, 1, 2]) == [1, 2, 3]
assert sort([]) == []
assert sort([1]) == [1]
# Bạn có nghĩ đến [1, 1, 1]? [-1, 0, 1]? [10**100]?
# Property-based: Định nghĩa tính chất
from hypothesis import given
from hypothesis import strategies as st
@given(st.lists(st.integers()))
def test_sort_properties(xs):
result = sort(xs)
# Property 1: Output có cùng length
assert len(result) == len(xs)
# Property 2: Output được sắp xếp
assert all(result[i] <= result[i+1] for i in range(len(result)-1))
# Property 3: Output chứa cùng elements
assert sorted(result) == sorted(xs)Tại sao Property-Based Testing?
| Aspect | Example-Based | Property-Based |
|---|---|---|
| Coverage | Limited by imagination | Explores edge cases automatically |
| Maintenance | Many test cases to maintain | Few properties, many generated cases |
| Bug finding | Finds expected bugs | Finds unexpected bugs |
| Documentation | Shows specific examples | Documents invariants |
Hypothesis Library
Hypothesis là property-based testing library phổ biến nhất cho Python:
bash
pip install hypothesisBasic Usage
python
from hypothesis import given, settings, example
from hypothesis import strategies as st
@given(st.integers(), st.integers())
def test_addition_commutative(a, b):
"""Addition is commutative: a + b == b + a"""
assert a + b == b + a
@given(st.integers(), st.integers(), st.integers())
def test_addition_associative(a, b, c):
"""Addition is associative: (a + b) + c == a + (b + c)"""
assert (a + b) + c == a + (b + c)
@given(st.integers())
def test_addition_identity(a):
"""Zero is identity element: a + 0 == a"""
assert a + 0 == aExplicit Examples với @example
python
from hypothesis import given, example
from hypothesis import strategies as st
@given(st.text())
@example("") # Always test empty string
@example("hello") # Always test simple case
@example("a" * 10000) # Always test long string
def test_string_reverse(s):
"""Reversing twice returns original."""
assert s[::-1][::-1] == sSettings Configuration
python
from hypothesis import given, settings, Verbosity
from hypothesis import strategies as st
@given(st.integers())
@settings(
max_examples=500, # Run 500 examples (default: 100)
deadline=None, # No time limit per example
verbosity=Verbosity.verbose # Show all generated examples
)
def test_with_settings(n):
assert n * 0 == 0
# Profile-based settings
@settings(max_examples=1000)
class TestIntensive:
@given(st.integers())
def test_intensive(self, n):
passStrategies (Generators)
Strategies định nghĩa cách generate test data:
Built-in Strategies
python
from hypothesis import strategies as st
# Primitives
st.integers() # Any integer
st.integers(min_value=0) # Non-negative
st.integers(min_value=1, max_value=100) # Range
st.floats() # Any float (including inf, nan)
st.floats(allow_nan=False, allow_infinity=False) # Finite only
st.floats(min_value=0.0, max_value=1.0) # Range
st.booleans() # True or False
st.text() # Any unicode string
st.text(min_size=1, max_size=100) # Length constraints
st.text(alphabet="abc123") # Limited alphabet
st.binary() # Bytes
st.binary(min_size=1, max_size=1024)
# None
st.none() # Always None
# Collections
st.lists(st.integers()) # List of integers
st.lists(st.integers(), min_size=1, max_size=10)
st.lists(st.integers(), unique=True) # No duplicates
st.sets(st.integers()) # Set of integers
st.frozensets(st.integers())
st.dictionaries(
keys=st.text(min_size=1),
values=st.integers()
)
st.tuples(st.integers(), st.text(), st.booleans()) # Fixed structureCombining Strategies
python
from hypothesis import strategies as st
# One of multiple strategies
st.one_of(st.integers(), st.text(), st.none())
# Optional (value or None)
st.integers() | st.none() # Same as one_of
# Sampled from list
st.sampled_from(["red", "green", "blue"])
st.sampled_from(MyEnum) # From enum
# Just a constant
st.just(42)
st.just(None)
# Build from other strategies
st.builds(
MyClass,
name=st.text(min_size=1),
age=st.integers(min_value=0, max_value=150)
)Filtering Strategies
python
from hypothesis import strategies as st, assume
from hypothesis import given
# Filter strategy
even_integers = st.integers().filter(lambda x: x % 2 == 0)
@given(even_integers)
def test_even(n):
assert n % 2 == 0
# assume() inside test (less efficient)
@given(st.integers())
def test_with_assume(n):
assume(n > 0) # Skip if n <= 0
assert n > 0Mapping Strategies
python
from hypothesis import strategies as st
# Transform generated values
positive_integers = st.integers(min_value=1)
squares = positive_integers.map(lambda x: x ** 2)
@given(squares)
def test_squares(n):
import math
assert math.isqrt(n) ** 2 == n
# Chain transformations
emails = st.text(
alphabet="abcdefghijklmnopqrstuvwxyz",
min_size=1,
max_size=10
).map(lambda s: f"{s}@example.com")
@given(emails)
def test_email_format(email):
assert "@" in email
assert email.endswith("@example.com")Composite Strategies
Composite strategies cho phép tạo complex data structures:
Basic Composite
python
from hypothesis import strategies as st
from hypothesis import given
from dataclasses import dataclass
@dataclass
class User:
id: int
name: str
email: str
is_active: bool
# Composite strategy
@st.composite
def users(draw):
"""Generate valid User objects."""
user_id = draw(st.integers(min_value=1))
name = draw(st.text(min_size=1, max_size=50))
# Email derived from name
email_local = draw(st.text(
alphabet="abcdefghijklmnopqrstuvwxyz0123456789",
min_size=1,
max_size=20
))
email = f"{email_local}@example.com"
is_active = draw(st.booleans())
return User(id=user_id, name=name, email=email, is_active=is_active)
@given(users())
def test_user_email(user):
assert "@" in user.email
assert user.id > 0Dependent Data Generation
python
from hypothesis import strategies as st
from hypothesis import given
@st.composite
def sorted_pair(draw):
"""Generate (a, b) where a <= b."""
a = draw(st.integers())
b = draw(st.integers(min_value=a)) # b depends on a
return (a, b)
@given(sorted_pair())
def test_sorted_pair(pair):
a, b = pair
assert a <= b
@st.composite
def non_empty_list_with_element(draw):
"""Generate (list, element) where element is in list."""
xs = draw(st.lists(st.integers(), min_size=1))
element = draw(st.sampled_from(xs)) # Element from generated list
return (xs, element)
@given(non_empty_list_with_element())
def test_element_in_list(data):
xs, element = data
assert element in xsComplex Domain Objects
python
from hypothesis import strategies as st
from hypothesis import given
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import List
@dataclass
class Order:
id: str
customer_id: int
items: List[dict]
total: float
created_at: datetime
status: str
@st.composite
def orders(draw):
"""Generate valid Order objects with consistent data."""
order_id = draw(st.text(
alphabet="ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",
min_size=8,
max_size=8
))
customer_id = draw(st.integers(min_value=1, max_value=10000))
# Generate items
num_items = draw(st.integers(min_value=1, max_value=10))
items = []
total = 0.0
for _ in range(num_items):
price = draw(st.floats(min_value=0.01, max_value=1000.0))
quantity = draw(st.integers(min_value=1, max_value=100))
item = {
"product_id": draw(st.integers(min_value=1)),
"price": round(price, 2),
"quantity": quantity
}
items.append(item)
total += item["price"] * item["quantity"]
created_at = draw(st.datetimes(
min_value=datetime(2020, 1, 1),
max_value=datetime(2025, 12, 31)
))
status = draw(st.sampled_from(["pending", "processing", "shipped", "delivered"]))
return Order(
id=order_id,
customer_id=customer_id,
items=items,
total=round(total, 2),
created_at=created_at,
status=status
)
@given(orders())
def test_order_total_matches_items(order):
"""Order total should match sum of item prices."""
calculated_total = sum(
item["price"] * item["quantity"]
for item in order.items
)
assert abs(order.total - calculated_total) < 0.01 # Float toleranceCommon Property Patterns
1. Round-Trip (Encode/Decode)
python
from hypothesis import given
from hypothesis import strategies as st
import json
@given(st.dictionaries(
keys=st.text(min_size=1),
values=st.one_of(st.integers(), st.text(), st.booleans(), st.none())
))
def test_json_roundtrip(data):
"""JSON encode then decode returns original."""
encoded = json.dumps(data)
decoded = json.loads(encoded)
assert decoded == data
# Custom serialization
@given(users()) # From earlier example
def test_user_serialization_roundtrip(user):
"""User serialize then deserialize returns equivalent."""
serialized = user.to_dict()
deserialized = User.from_dict(serialized)
assert deserialized == user2. Invariants
python
from hypothesis import given
from hypothesis import strategies as st
@given(st.lists(st.integers()))
def test_sort_preserves_length(xs):
"""Sorting preserves list length."""
assert len(sorted(xs)) == len(xs)
@given(st.lists(st.integers()))
def test_sort_preserves_elements(xs):
"""Sorting preserves all elements."""
from collections import Counter
assert Counter(sorted(xs)) == Counter(xs)
@given(st.lists(st.integers()))
def test_sort_is_sorted(xs):
"""Sorted list is actually sorted."""
result = sorted(xs)
for i in range(len(result) - 1):
assert result[i] <= result[i + 1]3. Idempotence
python
from hypothesis import given
from hypothesis import strategies as st
@given(st.text())
def test_strip_idempotent(s):
"""Stripping twice equals stripping once."""
assert s.strip().strip() == s.strip()
@given(st.lists(st.integers()))
def test_sort_idempotent(xs):
"""Sorting twice equals sorting once."""
assert sorted(sorted(xs)) == sorted(xs)
@given(st.sets(st.integers()))
def test_set_union_idempotent(s):
"""Union with self equals self."""
assert s | s == s4. Commutativity
python
from hypothesis import given
from hypothesis import strategies as st
@given(st.integers(), st.integers())
def test_addition_commutative(a, b):
assert a + b == b + a
@given(st.integers(), st.integers())
def test_multiplication_commutative(a, b):
assert a * b == b * a
@given(st.sets(st.integers()), st.sets(st.integers()))
def test_set_union_commutative(a, b):
assert a | b == b | a5. Inverse Operations
python
from hypothesis import given, assume
from hypothesis import strategies as st
@given(st.integers(), st.integers())
def test_add_subtract_inverse(a, b):
"""Subtraction is inverse of addition."""
assert (a + b) - b == a
@given(st.integers(min_value=1), st.integers(min_value=1))
def test_multiply_divide_inverse(a, b):
"""Division is inverse of multiplication (for integers)."""
assume(a * b // b == a) # Avoid overflow issues
assert (a * b) // b == a
)Stateful Testing
Stateful testing kiểm tra sequences of operations:
Rule-Based State Machines
python
from hypothesis import strategies as st
from hypothesis.stateful import RuleBasedStateMachine, rule, invariant, precondition
class SetMachine(RuleBasedStateMachine):
"""Test that our custom Set behaves like Python's set."""
def __init__(self):
super().__init__()
self.model = set() # Reference implementation
self.actual = MyCustomSet() # Implementation under test
@rule(value=st.integers())
def add(self, value):
"""Add element to both sets."""
self.model.add(value)
self.actual.add(value)
@rule(value=st.integers())
def remove(self, value):
"""Remove element from both sets."""
if value in self.model:
self.model.remove(value)
self.actual.remove(value)
@rule(value=st.integers())
def contains(self, value):
"""Check containment matches."""
assert (value in self.model) == (value in self.actual)
@invariant()
def size_matches(self):
"""Size should always match."""
assert len(self.model) == len(self.actual)
@invariant()
def contents_match(self):
"""Contents should always match."""
assert set(self.actual) == self.model
# Run the state machine
TestSetMachine = SetMachine.TestCaseDatabase State Machine
python
from hypothesis.stateful import RuleBasedStateMachine, rule, invariant, Bundle
class DatabaseMachine(RuleBasedStateMachine):
"""Test database operations maintain consistency."""
users = Bundle("users")
def __init__(self):
super().__init__()
self.db = TestDatabase()
self.expected_users = {}
@rule(target=users, name=st.text(min_size=1), email=st.emails())
def create_user(self, name, email):
"""Create a user and track it."""
user_id = self.db.create_user(name=name, email=email)
self.expected_users[user_id] = {"name": name, "email": email}
return user_id
@rule(user_id=users, new_name=st.text(min_size=1))
def update_user_name(self, user_id, new_name):
"""Update user name."""
self.db.update_user(user_id, name=new_name)
self.expected_users[user_id]["name"] = new_name
@rule(user_id=users)
def get_user(self, user_id):
"""Get user and verify data."""
user = self.db.get_user(user_id)
expected = self.expected_users[user_id]
assert user["name"] == expected["name"]
assert user["email"] == expected["email"]
@rule(user_id=users)
def delete_user(self, user_id):
"""Delete user."""
self.db.delete_user(user_id)
del self.expected_users[user_id]
@invariant()
def user_count_matches(self):
"""Database user count matches expected."""
assert self.db.count_users() == len(self.expected_users)
TestDatabaseMachine = DatabaseMachine.TestCaseFinding Edge Cases Automatically
Shrinking
Khi Hypothesis tìm thấy failing case, nó shrinks để tìm minimal example:
python
from hypothesis import given, settings
from hypothesis import strategies as st
def buggy_function(xs):
"""Bug: fails when list has more than 5 elements."""
if len(xs) > 5:
raise ValueError("Too many elements")
return sum(xs)
@given(st.lists(st.integers()))
def test_buggy_function(xs):
result = buggy_function(xs)
assert isinstance(result, int)
# Hypothesis finds: [0, 0, 0, 0, 0, 0]
# Not: [847293, -12938, 0, 999, -1, 42, 7]
# Shrinking finds minimal failing case!Example Database
Hypothesis lưu failing examples để reproduce:
python
# .hypothesis/examples/ directory stores failing cases
# Force specific example
from hypothesis import given, example
from hypothesis import strategies as st
@given(st.integers())
@example(0) # Always test zero
@example(-1) # Always test negative
@example(2**31 - 1) # Always test max int32
def test_with_explicit_examples(n):
passTargeting
Guide Hypothesis toward interesting values:
python
from hypothesis import given, target
from hypothesis import strategies as st
@given(st.lists(st.integers(), min_size=1))
def test_with_targeting(xs):
"""Target larger lists to find edge cases."""
target(float(len(xs))) # Hypothesis will try to maximize this
result = process_list(xs)
assert result is not NoneProduction Pitfalls ⚠️
1. Flaky Tests từ Non-Determinism
python
# ❌ BAD: Test depends on current time
from hypothesis import given
from hypothesis import strategies as st
from datetime import datetime
@given(st.integers())
def test_flaky(n):
# Fails randomly based on current second
assert datetime.now().second != 30
# ✅ GOOD: Mock time or avoid time dependency
from unittest.mock import patch
@given(st.integers())
def test_deterministic(n):
with patch("datetime.datetime") as mock_dt:
mock_dt.now.return_value = datetime(2024, 1, 1, 12, 0, 0)
# Test logic here2. Slow Tests từ Complex Strategies
python
# ❌ BAD: Expensive strategy
@given(st.lists(st.lists(st.lists(st.integers()))))
def test_slow(nested):
pass # Very slow!
# ✅ GOOD: Limit size
@given(st.lists(
st.lists(
st.integers(),
max_size=10
),
max_size=10
))
def test_faster(nested):
pass3. Assume Overuse
python
# ❌ BAD: Too many assumes = slow tests
@given(st.integers(), st.integers())
def test_with_many_assumes(a, b):
assume(a > 0)
assume(b > 0)
assume(a != b)
assume(a + b < 100)
# Most generated values are rejected!
# ✅ GOOD: Use appropriate strategies
@given(
st.integers(min_value=1, max_value=49),
st.integers(min_value=1, max_value=49)
)
def test_with_strategies(a, b):
assume(a != b) # Only one assume needed
assert a + b < 1004. Missing Invariants
python
# ❌ BAD: Only testing happy path
@given(st.lists(st.integers()))
def test_incomplete(xs):
result = my_sort(xs)
assert len(result) == len(xs) # Missing: is it actually sorted?
# ✅ GOOD: Test all invariants
@given(st.lists(st.integers()))
def test_complete(xs):
result = my_sort(xs)
# Invariant 1: Same length
assert len(result) == len(xs)
# Invariant 2: Same elements
assert sorted(result) == sorted(xs)
# Invariant 3: Actually sorted
assert all(result[i] <= result[i+1] for i in range(len(result)-1))Bảng Tóm tắt
python
# === BASIC USAGE ===
from hypothesis import given, example, assume, settings
from hypothesis import strategies as st
@given(st.integers())
def test_property(n):
assert property_holds(n)
# === COMMON STRATEGIES ===
st.integers()
st.floats()
st.text()
st.booleans()
st.none()
st.lists(st.integers())
st.dictionaries(keys=st.text(), values=st.integers())
st.one_of(st.integers(), st.text())
st.sampled_from(["a", "b", "c"])
# === COMPOSITE STRATEGIES ===
@st.composite
def my_strategy(draw):
x = draw(st.integers())
y = draw(st.integers(min_value=x))
return (x, y)
# === COMMON PROPERTIES ===
# Round-trip: decode(encode(x)) == x
# Invariant: len(sort(xs)) == len(xs)
# Idempotent: f(f(x)) == f(x)
# Commutative: f(a, b) == f(b, a)
# Inverse: f_inv(f(x)) == x
# === STATEFUL TESTING ===
from hypothesis.stateful import RuleBasedStateMachine, rule, invariant
class MyMachine(RuleBasedStateMachine):
@rule(x=st.integers())
def do_something(self, x):
pass
@invariant()
def check_invariant(self):
assert conditionCross-links
- Prerequisites: Pytest Fundamentals, Fixtures & Mocking
- Next: Test Architecture - Unit vs Integration vs E2E
- Related: Type Hinting - Types help generate better test data
- Related: Protocols & ABCs - Protocol-based testing