Giao diện
functools & operator Standard Library
Functional programming tools - viết code ngắn gọn, hiệu quả hơn
Learning Outcomes
Sau khi hoàn thành trang này, bạn sẽ:
- 🎯 Sử dụng lru_cache và cache để memoization
- 🎯 Apply partial cho function currying
- 🎯 Hiểu khi nào dùng reduce vs comprehensions
- 🎯 Implement singledispatch cho function overloading
- 🎯 Tránh các Production Pitfalls với caching
Caching với lru_cache
Basic Usage
python
from functools import lru_cache
@lru_cache(maxsize=128)
def fibonacci(n: int) -> int:
"""Fibonacci với memoization - O(n) thay vì O(2^n)."""
if n < 2:
return n
return fibonacci(n - 1) + fibonacci(n - 2)
# Không có cache: fibonacci(35) mất ~5 giây
# Với cache: fibonacci(35) mất ~0.00001 giây
# Xem cache stats
print(fibonacci.cache_info())
# CacheInfo(hits=33, misses=36, maxsize=128, currsize=36)
# Clear cache
fibonacci.cache_clear()cache vs lru_cache
python
from functools import cache, lru_cache
# cache (Python 3.9+): Unlimited cache, simpler
@cache
def expensive_computation(x: int) -> int:
return x ** 2
# lru_cache: Limited size, LRU eviction
@lru_cache(maxsize=128)
def limited_cache(x: int) -> int:
return x ** 2
# lru_cache(maxsize=None) = cache
@lru_cache(maxsize=None)
def unlimited_cache(x: int) -> int:
return x ** 2Cache Parameters
python
from functools import lru_cache
@lru_cache(maxsize=256, typed=True)
def process(value):
"""
maxsize: Số entries tối đa (None = unlimited)
typed: True = cache riêng cho int(1) và float(1.0)
"""
return expensive_operation(value)
# typed=True example
@lru_cache(maxsize=128, typed=True)
def typed_func(x):
return x * 2
typed_func(1) # Cache miss
typed_func(1.0) # Cache miss (different type!)
typed_func(1) # Cache hitCaching với Hashable Arguments
python
from functools import lru_cache
# ❌ ERROR: List không hashable
@lru_cache
def process_list(items: list) -> int:
return sum(items)
process_list([1, 2, 3]) # TypeError: unhashable type: 'list'
# ✅ SỬA: Convert to tuple
@lru_cache
def process_items(items: tuple) -> int:
return sum(items)
process_items(tuple([1, 2, 3])) # OK
# ✅ BETTER: Wrapper function
def process_list(items: list) -> int:
return _process_items(tuple(items))
@lru_cache
def _process_items(items: tuple) -> int:
return sum(items)Cache với Methods
python
from functools import lru_cache, cached_property
class DataProcessor:
def __init__(self, data: list):
self.data = data
# ❌ PROBLEM: lru_cache trên method cache theo self
# Mỗi instance có cache riêng, nhưng self không hashable!
# ✅ SỬA 1: cached_property (Python 3.8+)
@cached_property
def processed_data(self) -> list:
"""Computed once, cached on instance."""
return [x * 2 for x in self.data]
# ✅ SỬA 2: Cache static/class method
@staticmethod
@lru_cache(maxsize=128)
def expensive_calc(value: int) -> int:
return value ** 2
# cached_property usage
processor = DataProcessor([1, 2, 3])
result1 = processor.processed_data # Computed
result2 = processor.processed_data # Cached (same object)partial - Function Currying
Basic Usage
python
from functools import partial
def power(base: int, exponent: int) -> int:
return base ** exponent
# Tạo specialized functions
square = partial(power, exponent=2)
cube = partial(power, exponent=3)
square(5) # 25
cube(5) # 125
# Equivalent to:
def square(base: int) -> int:
return power(base, exponent=2)partial Patterns
python
from functools import partial
import json
# Pattern 1: Configure functions
json_dumps = partial(json.dumps, indent=2, ensure_ascii=False)
config = {"name": "Việt Nam", "code": "VN"}
print(json_dumps(config))
# Pattern 2: Callback với arguments
def log_message(level: str, message: str) -> None:
print(f"[{level}] {message}")
log_info = partial(log_message, "INFO")
log_error = partial(log_message, "ERROR")
log_info("Application started")
log_error("Connection failed")
# Pattern 3: Event handlers
def handle_click(button_id: str, event: dict) -> None:
print(f"Button {button_id} clicked: {event}")
# Bind button_id, event sẽ được pass khi gọi
button1_handler = partial(handle_click, "btn_submit")
button1_handler({"x": 100, "y": 200})partial vs lambda
python
from functools import partial
# Cả hai đều tạo function mới với bound arguments
add = lambda x, y: x + y
# Lambda
add_5_lambda = lambda x: add(x, 5)
# partial
add_5_partial = partial(add, y=5)
# Khác biệt:
# 1. partial có __name__, __doc__ từ original function
# 2. partial có .func, .args, .keywords attributes
# 3. partial nhanh hơn một chút (no closure lookup)
print(add_5_partial.func) # <function add>
print(add_5_partial.keywords) # {'y': 5}partialmethod cho Methods
python
from functools import partialmethod
class Cell:
def __init__(self):
self._alive = False
def set_state(self, state: bool) -> None:
self._alive = state
# Tạo specialized methods
set_alive = partialmethod(set_state, True)
set_dead = partialmethod(set_state, False)
cell = Cell()
cell.set_alive()
print(cell._alive) # Truereduce - Fold Operations
Basic Usage
python
from functools import reduce
from operator import add, mul
# reduce(function, iterable, initializer)
numbers = [1, 2, 3, 4, 5]
# Sum
total = reduce(add, numbers) # 15
# Equivalent: ((((1+2)+3)+4)+5)
# Product
product = reduce(mul, numbers) # 120
# With initializer
total = reduce(add, numbers, 10) # 25 (10 + 1 + 2 + 3 + 4 + 5)
# Empty iterable với initializer
reduce(add, [], 0) # 0 (returns initializer)reduce vs Built-ins
python
from functools import reduce
from operator import add, mul
numbers = [1, 2, 3, 4, 5]
# ❌ AVOID: reduce khi có built-in
total = reduce(add, numbers) # Dùng sum(numbers)
product = reduce(mul, numbers) # Dùng math.prod(numbers)
maximum = reduce(max, numbers) # Dùng max(numbers)
joined = reduce(lambda a, b: a + b, ["a", "b", "c"]) # Dùng "".join()
# ✅ USE reduce khi: Custom accumulation logic
from typing import TypeVar
T = TypeVar('T')
def compose(*functions):
"""Compose functions: compose(f, g, h)(x) = f(g(h(x)))"""
return reduce(lambda f, g: lambda x: f(g(x)), functions)
# Pipeline
pipeline = compose(str.upper, str.strip, lambda s: s.replace("-", " "))
pipeline(" hello-world ") # "HELLO WORLD"reduce Patterns
python
from functools import reduce
# Pattern 1: Flatten nested lists
nested = [[1, 2], [3, 4], [5, 6]]
flat = reduce(lambda acc, lst: acc + lst, nested, [])
# [1, 2, 3, 4, 5, 6]
# ⚠️ Better: [item for sublist in nested for item in sublist]
# Pattern 2: Dict merging
dicts = [{"a": 1}, {"b": 2}, {"c": 3}]
merged = reduce(lambda acc, d: {**acc, **d}, dicts, {})
# {"a": 1, "b": 2, "c": 3}
# ⚠️ Python 3.9+: dict1 | dict2 | dict3
# Pattern 3: Running maximum
from itertools import accumulate
numbers = [3, 1, 4, 1, 5, 9, 2, 6]
running_max = list(accumulate(numbers, max))
# [3, 3, 4, 4, 5, 9, 9, 9]singledispatch - Function Overloading
Basic Usage
python
from functools import singledispatch
@singledispatch
def process(data):
"""Default implementation."""
raise NotImplementedError(f"Cannot process {type(data)}")
@process.register(str)
def _(data: str) -> str:
return data.upper()
@process.register(int)
def _(data: int) -> int:
return data * 2
@process.register(list)
def _(data: list) -> list:
return [process(item) for item in data]
# Usage
process("hello") # "HELLO"
process(5) # 10
process([1, "a", 2]) # [2, "A", 4]
process(3.14) # NotImplementedErrorType Annotations Registration
python
from functools import singledispatch
from typing import Union
@singledispatch
def serialize(obj) -> str:
return str(obj)
# Python 3.7+: Register via type annotation
@serialize.register
def _(obj: dict) -> str:
import json
return json.dumps(obj)
@serialize.register
def _(obj: list) -> str:
return f"[{', '.join(serialize(item) for item in obj)}]"
@serialize.register
def _(obj: bool) -> str:
return "true" if obj else "false"
@serialize.register
def _(obj: type(None)) -> str:
return "null"singledispatchmethod cho Classes
python
from functools import singledispatchmethod
class Formatter:
@singledispatchmethod
def format(self, data) -> str:
return str(data)
@format.register(int)
def _(self, data: int) -> str:
return f"{data:,}" # Thousands separator
@format.register(float)
def _(self, data: float) -> str:
return f"{data:.2f}"
@format.register(list)
def _(self, data: list) -> str:
return " | ".join(self.format(item) for item in data)
formatter = Formatter()
formatter.format(1000000) # "1,000,000"
formatter.format(3.14159) # "3.14"
formatter.format([1000, 2.5]) # "1,000 | 2.50"operator Module
Operator Functions
python
from operator import add, sub, mul, truediv, floordiv, mod, pow
from operator import eq, ne, lt, le, gt, ge
from operator import and_, or_, not_, xor
# Arithmetic
add(5, 3) # 8
mul(5, 3) # 15
truediv(7, 2) # 3.5
floordiv(7, 2) # 3
# Comparison
lt(3, 5) # True
ge(5, 5) # True
# Logical (trailing underscore vì and/or là keywords)
and_(True, False) # False
or_(True, False) # Trueitemgetter và attrgetter
python
from operator import itemgetter, attrgetter
# itemgetter - Access by index/key
data = [
{"name": "Alice", "age": 30, "score": 85},
{"name": "Bob", "age": 25, "score": 92},
{"name": "Charlie", "age": 35, "score": 78},
]
# Sort by single key
sorted(data, key=itemgetter("age"))
# [{'name': 'Bob', ...}, {'name': 'Alice', ...}, {'name': 'Charlie', ...}]
# Sort by multiple keys
sorted(data, key=itemgetter("score", "age"))
# Get multiple values
get_name_score = itemgetter("name", "score")
get_name_score(data[0]) # ('Alice', 85)
# attrgetter - Access attributes
from dataclasses import dataclass
@dataclass
class Person:
name: str
age: int
people = [Person("Alice", 30), Person("Bob", 25)]
sorted(people, key=attrgetter("age"))
# Nested attribute access
get_name_length = attrgetter("name.__len__")methodcaller
python
from operator import methodcaller
# Call method on objects
strings = [" hello ", " world ", " python "]
# Without methodcaller
stripped = [s.strip() for s in strings]
# With methodcaller
strip = methodcaller("strip")
stripped = list(map(strip, strings))
# With arguments
replace_spaces = methodcaller("replace", " ", "_")
replace_spaces("hello world") # "hello_world"
# Useful với sorted
words = ["banana", "Apple", "cherry"]
sorted(words, key=methodcaller("lower"))
# ['Apple', 'banana', 'cherry']wraps - Decorator Metadata
Problem Without wraps
python
def my_decorator(func):
def wrapper(*args, **kwargs):
"""Wrapper docstring."""
return func(*args, **kwargs)
return wrapper
@my_decorator
def greet(name: str) -> str:
"""Greet a person by name."""
return f"Hello, {name}!"
# ❌ Metadata bị mất
print(greet.__name__) # "wrapper" (không phải "greet")
print(greet.__doc__) # "Wrapper docstring." (không phải original)Solution với wraps
python
from functools import wraps
def my_decorator(func):
@wraps(func) # Copy metadata từ func sang wrapper
def wrapper(*args, **kwargs):
"""Wrapper docstring."""
return func(*args, **kwargs)
return wrapper
@my_decorator
def greet(name: str) -> str:
"""Greet a person by name."""
return f"Hello, {name}!"
# ✅ Metadata được preserve
print(greet.__name__) # "greet"
print(greet.__doc__) # "Greet a person by name."
# Access original function
print(greet.__wrapped__) # <function greet at ...>wraps với Class Decorators
python
from functools import wraps
def class_decorator(cls):
@wraps(cls, updated=[]) # updated=[] vì class không có __dict__ update
class Wrapped(cls):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._wrapped = True
return Wrapped
@class_decorator
class MyClass:
"""My class docstring."""
pass
print(MyClass.__name__) # "MyClass"
print(MyClass.__doc__) # "My class docstring."total_ordering - Comparison Methods
python
from functools import total_ordering
@total_ordering
class Version:
"""Chỉ cần define __eq__ và một comparison method."""
def __init__(self, major: int, minor: int, patch: int):
self.major = major
self.minor = minor
self.patch = patch
def __eq__(self, other: "Version") -> bool:
if not isinstance(other, Version):
return NotImplemented
return (self.major, self.minor, self.patch) == \
(other.major, other.minor, other.patch)
def __lt__(self, other: "Version") -> bool:
if not isinstance(other, Version):
return NotImplemented
return (self.major, self.minor, self.patch) < \
(other.major, other.minor, other.patch)
# total_ordering tự động tạo __le__, __gt__, __ge__
v1 = Version(1, 0, 0)
v2 = Version(2, 0, 0)
v1 < v2 # True (defined)
v1 <= v2 # True (auto-generated)
v1 > v2 # False (auto-generated)
v1 >= v2 # False (auto-generated)Production Pitfalls
Pitfall 1: Cache Memory Leak
python
from functools import lru_cache
# ❌ DANGER: Unbounded cache với user input
@lru_cache(maxsize=None)
def get_user_data(user_id: str) -> dict:
return fetch_from_db(user_id)
# Nếu có millions of unique user_ids → Memory explosion!
# ✅ FIX: Set reasonable maxsize
@lru_cache(maxsize=1000)
def get_user_data(user_id: str) -> dict:
return fetch_from_db(user_id)
# ✅ BETTER: Use TTL cache (cachetools library)
from cachetools import TTLCache
from cachetools.func import ttl_cache
@ttl_cache(maxsize=1000, ttl=300) # 5 minutes TTL
def get_user_data(user_id: str) -> dict:
return fetch_from_db(user_id)Pitfall 2: Cache với Mutable Return Values
python
from functools import lru_cache
# ❌ DANGER: Returning mutable object
@lru_cache
def get_default_config() -> dict:
return {"debug": False, "timeout": 30}
config = get_default_config()
config["debug"] = True # Modifies cached value!
get_default_config() # Returns {"debug": True, ...} !!!
# ✅ FIX 1: Return immutable
from types import MappingProxyType
@lru_cache
def get_default_config() -> MappingProxyType:
return MappingProxyType({"debug": False, "timeout": 30})
# ✅ FIX 2: Return copy
@lru_cache
def _get_default_config() -> dict:
return {"debug": False, "timeout": 30}
def get_default_config() -> dict:
return _get_default_config().copy()Pitfall 3: singledispatch với Subclasses
python
from functools import singledispatch
@singledispatch
def process(data):
return f"Unknown: {data}"
@process.register(dict)
def _(data: dict):
return f"Dict: {data}"
# ❌ PROBLEM: OrderedDict là subclass của dict
from collections import OrderedDict
process({"a": 1}) # "Dict: {'a': 1}"
process(OrderedDict(a=1)) # "Dict: OrderedDict([('a', 1)])"
# Có thể không phải behavior mong muốn!
# ✅ FIX: Register specific subclass
@process.register(OrderedDict)
def _(data: OrderedDict):
return f"OrderedDict: {list(data.items())}"Pitfall 4: partial và Default Arguments
python
from functools import partial
def greet(name: str, greeting: str = "Hello") -> str:
return f"{greeting}, {name}!"
# ❌ CONFUSING: partial overrides defaults
greet_bob = partial(greet, "Bob")
greet_bob() # "Hello, Bob!"
greet_bob(greeting="Hi") # "Hi, Bob!"
# Nhưng:
greet_formal = partial(greet, greeting="Good morning")
greet_formal("Alice") # "Good morning, Alice!"
# ⚠️ Mixing positional và keyword có thể confusing
greet_weird = partial(greet, "Bob", "Hi")
greet_weird() # "Hi, Bob!"
# greet_weird(greeting="Hey") # TypeError: multiple values for 'greeting'Quick Reference
| Function | Purpose | Example |
|---|---|---|
@lru_cache | Memoization với LRU eviction | @lru_cache(maxsize=128) |
@cache | Unlimited memoization (3.9+) | @cache |
@cached_property | Lazy computed property | @cached_property |
partial | Bind arguments to function | partial(func, arg1, kwarg=val) |
partialmethod | partial cho methods | partialmethod(method, arg) |
reduce | Fold/accumulate operation | reduce(add, [1,2,3]) |
@singledispatch | Function overloading | @singledispatch |
@singledispatchmethod | Method overloading | @singledispatchmethod |
@wraps | Preserve decorator metadata | @wraps(func) |
@total_ordering | Auto-generate comparisons | @total_ordering |
Cross-links
- Prerequisites: Functions & Closures, Decorators
- Related: Memory Model - Understanding caching impact
- See Also: collections & itertools - Data processing patterns