Skip to content

functools & operator Standard Library

Functional programming tools - viết code ngắn gọn, hiệu quả hơn

Learning Outcomes

Sau khi hoàn thành trang này, bạn sẽ:

  • 🎯 Sử dụng lru_cachecache để memoization
  • 🎯 Apply partial cho function currying
  • 🎯 Hiểu khi nào dùng reduce vs comprehensions
  • 🎯 Implement singledispatch cho function overloading
  • 🎯 Tránh các Production Pitfalls với caching

Caching với lru_cache

Basic Usage

python
from functools import lru_cache

@lru_cache(maxsize=128)
def fibonacci(n: int) -> int:
    """Fibonacci với memoization - O(n) thay vì O(2^n)."""
    if n < 2:
        return n
    return fibonacci(n - 1) + fibonacci(n - 2)

# Không có cache: fibonacci(35) mất ~5 giây
# Với cache: fibonacci(35) mất ~0.00001 giây

# Xem cache stats
print(fibonacci.cache_info())
# CacheInfo(hits=33, misses=36, maxsize=128, currsize=36)

# Clear cache
fibonacci.cache_clear()

cache vs lru_cache

python
from functools import cache, lru_cache

# cache (Python 3.9+): Unlimited cache, simpler
@cache
def expensive_computation(x: int) -> int:
    return x ** 2

# lru_cache: Limited size, LRU eviction
@lru_cache(maxsize=128)
def limited_cache(x: int) -> int:
    return x ** 2

# lru_cache(maxsize=None) = cache
@lru_cache(maxsize=None)
def unlimited_cache(x: int) -> int:
    return x ** 2

Cache Parameters

python
from functools import lru_cache

@lru_cache(maxsize=256, typed=True)
def process(value):
    """
    maxsize: Số entries tối đa (None = unlimited)
    typed: True = cache riêng cho int(1) và float(1.0)
    """
    return expensive_operation(value)

# typed=True example
@lru_cache(maxsize=128, typed=True)
def typed_func(x):
    return x * 2

typed_func(1)    # Cache miss
typed_func(1.0)  # Cache miss (different type!)
typed_func(1)    # Cache hit

Caching với Hashable Arguments

python
from functools import lru_cache

# ❌ ERROR: List không hashable
@lru_cache
def process_list(items: list) -> int:
    return sum(items)

process_list([1, 2, 3])  # TypeError: unhashable type: 'list'

# ✅ SỬA: Convert to tuple
@lru_cache
def process_items(items: tuple) -> int:
    return sum(items)

process_items(tuple([1, 2, 3]))  # OK

# ✅ BETTER: Wrapper function
def process_list(items: list) -> int:
    return _process_items(tuple(items))

@lru_cache
def _process_items(items: tuple) -> int:
    return sum(items)

Cache với Methods

python
from functools import lru_cache, cached_property

class DataProcessor:
    def __init__(self, data: list):
        self.data = data
    
    # ❌ PROBLEM: lru_cache trên method cache theo self
    # Mỗi instance có cache riêng, nhưng self không hashable!
    
    # ✅ SỬA 1: cached_property (Python 3.8+)
    @cached_property
    def processed_data(self) -> list:
        """Computed once, cached on instance."""
        return [x * 2 for x in self.data]
    
    # ✅ SỬA 2: Cache static/class method
    @staticmethod
    @lru_cache(maxsize=128)
    def expensive_calc(value: int) -> int:
        return value ** 2

# cached_property usage
processor = DataProcessor([1, 2, 3])
result1 = processor.processed_data  # Computed
result2 = processor.processed_data  # Cached (same object)

partial - Function Currying

Basic Usage

python
from functools import partial

def power(base: int, exponent: int) -> int:
    return base ** exponent

# Tạo specialized functions
square = partial(power, exponent=2)
cube = partial(power, exponent=3)

square(5)  # 25
cube(5)    # 125

# Equivalent to:
def square(base: int) -> int:
    return power(base, exponent=2)

partial Patterns

python
from functools import partial
import json

# Pattern 1: Configure functions
json_dumps = partial(json.dumps, indent=2, ensure_ascii=False)
config = {"name": "Việt Nam", "code": "VN"}
print(json_dumps(config))

# Pattern 2: Callback với arguments
def log_message(level: str, message: str) -> None:
    print(f"[{level}] {message}")

log_info = partial(log_message, "INFO")
log_error = partial(log_message, "ERROR")

log_info("Application started")
log_error("Connection failed")

# Pattern 3: Event handlers
def handle_click(button_id: str, event: dict) -> None:
    print(f"Button {button_id} clicked: {event}")

# Bind button_id, event sẽ được pass khi gọi
button1_handler = partial(handle_click, "btn_submit")
button1_handler({"x": 100, "y": 200})

partial vs lambda

python
from functools import partial

# Cả hai đều tạo function mới với bound arguments
add = lambda x, y: x + y

# Lambda
add_5_lambda = lambda x: add(x, 5)

# partial
add_5_partial = partial(add, y=5)

# Khác biệt:
# 1. partial có __name__, __doc__ từ original function
# 2. partial có .func, .args, .keywords attributes
# 3. partial nhanh hơn một chút (no closure lookup)

print(add_5_partial.func)      # <function add>
print(add_5_partial.keywords)  # {'y': 5}

partialmethod cho Methods

python
from functools import partialmethod

class Cell:
    def __init__(self):
        self._alive = False
    
    def set_state(self, state: bool) -> None:
        self._alive = state
    
    # Tạo specialized methods
    set_alive = partialmethod(set_state, True)
    set_dead = partialmethod(set_state, False)

cell = Cell()
cell.set_alive()
print(cell._alive)  # True

reduce - Fold Operations

Basic Usage

python
from functools import reduce
from operator import add, mul

# reduce(function, iterable, initializer)
numbers = [1, 2, 3, 4, 5]

# Sum
total = reduce(add, numbers)  # 15
# Equivalent: ((((1+2)+3)+4)+5)

# Product
product = reduce(mul, numbers)  # 120

# With initializer
total = reduce(add, numbers, 10)  # 25 (10 + 1 + 2 + 3 + 4 + 5)

# Empty iterable với initializer
reduce(add, [], 0)  # 0 (returns initializer)

reduce vs Built-ins

python
from functools import reduce
from operator import add, mul

numbers = [1, 2, 3, 4, 5]

# ❌ AVOID: reduce khi có built-in
total = reduce(add, numbers)  # Dùng sum(numbers)
product = reduce(mul, numbers)  # Dùng math.prod(numbers)
maximum = reduce(max, numbers)  # Dùng max(numbers)
joined = reduce(lambda a, b: a + b, ["a", "b", "c"])  # Dùng "".join()

# ✅ USE reduce khi: Custom accumulation logic
from typing import TypeVar
T = TypeVar('T')

def compose(*functions):
    """Compose functions: compose(f, g, h)(x) = f(g(h(x)))"""
    return reduce(lambda f, g: lambda x: f(g(x)), functions)

# Pipeline
pipeline = compose(str.upper, str.strip, lambda s: s.replace("-", " "))
pipeline("  hello-world  ")  # "HELLO WORLD"

reduce Patterns

python
from functools import reduce

# Pattern 1: Flatten nested lists
nested = [[1, 2], [3, 4], [5, 6]]
flat = reduce(lambda acc, lst: acc + lst, nested, [])
# [1, 2, 3, 4, 5, 6]
# ⚠️ Better: [item for sublist in nested for item in sublist]

# Pattern 2: Dict merging
dicts = [{"a": 1}, {"b": 2}, {"c": 3}]
merged = reduce(lambda acc, d: {**acc, **d}, dicts, {})
# {"a": 1, "b": 2, "c": 3}
# ⚠️ Python 3.9+: dict1 | dict2 | dict3

# Pattern 3: Running maximum
from itertools import accumulate
numbers = [3, 1, 4, 1, 5, 9, 2, 6]
running_max = list(accumulate(numbers, max))
# [3, 3, 4, 4, 5, 9, 9, 9]

singledispatch - Function Overloading

Basic Usage

python
from functools import singledispatch

@singledispatch
def process(data):
    """Default implementation."""
    raise NotImplementedError(f"Cannot process {type(data)}")

@process.register(str)
def _(data: str) -> str:
    return data.upper()

@process.register(int)
def _(data: int) -> int:
    return data * 2

@process.register(list)
def _(data: list) -> list:
    return [process(item) for item in data]

# Usage
process("hello")     # "HELLO"
process(5)           # 10
process([1, "a", 2]) # [2, "A", 4]
process(3.14)        # NotImplementedError

Type Annotations Registration

python
from functools import singledispatch
from typing import Union

@singledispatch
def serialize(obj) -> str:
    return str(obj)

# Python 3.7+: Register via type annotation
@serialize.register
def _(obj: dict) -> str:
    import json
    return json.dumps(obj)

@serialize.register
def _(obj: list) -> str:
    return f"[{', '.join(serialize(item) for item in obj)}]"

@serialize.register
def _(obj: bool) -> str:
    return "true" if obj else "false"

@serialize.register
def _(obj: type(None)) -> str:
    return "null"

singledispatchmethod cho Classes

python
from functools import singledispatchmethod

class Formatter:
    @singledispatchmethod
    def format(self, data) -> str:
        return str(data)
    
    @format.register(int)
    def _(self, data: int) -> str:
        return f"{data:,}"  # Thousands separator
    
    @format.register(float)
    def _(self, data: float) -> str:
        return f"{data:.2f}"
    
    @format.register(list)
    def _(self, data: list) -> str:
        return " | ".join(self.format(item) for item in data)

formatter = Formatter()
formatter.format(1000000)        # "1,000,000"
formatter.format(3.14159)        # "3.14"
formatter.format([1000, 2.5])    # "1,000 | 2.50"

operator Module

Operator Functions

python
from operator import add, sub, mul, truediv, floordiv, mod, pow
from operator import eq, ne, lt, le, gt, ge
from operator import and_, or_, not_, xor

# Arithmetic
add(5, 3)       # 8
mul(5, 3)       # 15
truediv(7, 2)   # 3.5
floordiv(7, 2)  # 3

# Comparison
lt(3, 5)        # True
ge(5, 5)        # True

# Logical (trailing underscore vì and/or là keywords)
and_(True, False)  # False
or_(True, False)   # True

itemgetter và attrgetter

python
from operator import itemgetter, attrgetter

# itemgetter - Access by index/key
data = [
    {"name": "Alice", "age": 30, "score": 85},
    {"name": "Bob", "age": 25, "score": 92},
    {"name": "Charlie", "age": 35, "score": 78},
]

# Sort by single key
sorted(data, key=itemgetter("age"))
# [{'name': 'Bob', ...}, {'name': 'Alice', ...}, {'name': 'Charlie', ...}]

# Sort by multiple keys
sorted(data, key=itemgetter("score", "age"))

# Get multiple values
get_name_score = itemgetter("name", "score")
get_name_score(data[0])  # ('Alice', 85)

# attrgetter - Access attributes
from dataclasses import dataclass

@dataclass
class Person:
    name: str
    age: int
    
people = [Person("Alice", 30), Person("Bob", 25)]
sorted(people, key=attrgetter("age"))

# Nested attribute access
get_name_length = attrgetter("name.__len__")

methodcaller

python
from operator import methodcaller

# Call method on objects
strings = ["  hello  ", "  world  ", "  python  "]

# Without methodcaller
stripped = [s.strip() for s in strings]

# With methodcaller
strip = methodcaller("strip")
stripped = list(map(strip, strings))

# With arguments
replace_spaces = methodcaller("replace", " ", "_")
replace_spaces("hello world")  # "hello_world"

# Useful với sorted
words = ["banana", "Apple", "cherry"]
sorted(words, key=methodcaller("lower"))
# ['Apple', 'banana', 'cherry']

wraps - Decorator Metadata

Problem Without wraps

python
def my_decorator(func):
    def wrapper(*args, **kwargs):
        """Wrapper docstring."""
        return func(*args, **kwargs)
    return wrapper

@my_decorator
def greet(name: str) -> str:
    """Greet a person by name."""
    return f"Hello, {name}!"

# ❌ Metadata bị mất
print(greet.__name__)  # "wrapper" (không phải "greet")
print(greet.__doc__)   # "Wrapper docstring." (không phải original)

Solution với wraps

python
from functools import wraps

def my_decorator(func):
    @wraps(func)  # Copy metadata từ func sang wrapper
    def wrapper(*args, **kwargs):
        """Wrapper docstring."""
        return func(*args, **kwargs)
    return wrapper

@my_decorator
def greet(name: str) -> str:
    """Greet a person by name."""
    return f"Hello, {name}!"

# ✅ Metadata được preserve
print(greet.__name__)  # "greet"
print(greet.__doc__)   # "Greet a person by name."

# Access original function
print(greet.__wrapped__)  # <function greet at ...>

wraps với Class Decorators

python
from functools import wraps

def class_decorator(cls):
    @wraps(cls, updated=[])  # updated=[] vì class không có __dict__ update
    class Wrapped(cls):
        def __init__(self, *args, **kwargs):
            super().__init__(*args, **kwargs)
            self._wrapped = True
    return Wrapped

@class_decorator
class MyClass:
    """My class docstring."""
    pass

print(MyClass.__name__)  # "MyClass"
print(MyClass.__doc__)   # "My class docstring."

total_ordering - Comparison Methods

python
from functools import total_ordering

@total_ordering
class Version:
    """Chỉ cần define __eq__ và một comparison method."""
    
    def __init__(self, major: int, minor: int, patch: int):
        self.major = major
        self.minor = minor
        self.patch = patch
    
    def __eq__(self, other: "Version") -> bool:
        if not isinstance(other, Version):
            return NotImplemented
        return (self.major, self.minor, self.patch) == \
               (other.major, other.minor, other.patch)
    
    def __lt__(self, other: "Version") -> bool:
        if not isinstance(other, Version):
            return NotImplemented
        return (self.major, self.minor, self.patch) < \
               (other.major, other.minor, other.patch)

# total_ordering tự động tạo __le__, __gt__, __ge__
v1 = Version(1, 0, 0)
v2 = Version(2, 0, 0)

v1 < v2   # True (defined)
v1 <= v2  # True (auto-generated)
v1 > v2   # False (auto-generated)
v1 >= v2  # False (auto-generated)

Production Pitfalls

Pitfall 1: Cache Memory Leak

python
from functools import lru_cache

# ❌ DANGER: Unbounded cache với user input
@lru_cache(maxsize=None)
def get_user_data(user_id: str) -> dict:
    return fetch_from_db(user_id)

# Nếu có millions of unique user_ids → Memory explosion!

# ✅ FIX: Set reasonable maxsize
@lru_cache(maxsize=1000)
def get_user_data(user_id: str) -> dict:
    return fetch_from_db(user_id)

# ✅ BETTER: Use TTL cache (cachetools library)
from cachetools import TTLCache
from cachetools.func import ttl_cache

@ttl_cache(maxsize=1000, ttl=300)  # 5 minutes TTL
def get_user_data(user_id: str) -> dict:
    return fetch_from_db(user_id)

Pitfall 2: Cache với Mutable Return Values

python
from functools import lru_cache

# ❌ DANGER: Returning mutable object
@lru_cache
def get_default_config() -> dict:
    return {"debug": False, "timeout": 30}

config = get_default_config()
config["debug"] = True  # Modifies cached value!

get_default_config()  # Returns {"debug": True, ...} !!!

# ✅ FIX 1: Return immutable
from types import MappingProxyType

@lru_cache
def get_default_config() -> MappingProxyType:
    return MappingProxyType({"debug": False, "timeout": 30})

# ✅ FIX 2: Return copy
@lru_cache
def _get_default_config() -> dict:
    return {"debug": False, "timeout": 30}

def get_default_config() -> dict:
    return _get_default_config().copy()

Pitfall 3: singledispatch với Subclasses

python
from functools import singledispatch

@singledispatch
def process(data):
    return f"Unknown: {data}"

@process.register(dict)
def _(data: dict):
    return f"Dict: {data}"

# ❌ PROBLEM: OrderedDict là subclass của dict
from collections import OrderedDict

process({"a": 1})           # "Dict: {'a': 1}"
process(OrderedDict(a=1))   # "Dict: OrderedDict([('a', 1)])"
# Có thể không phải behavior mong muốn!

# ✅ FIX: Register specific subclass
@process.register(OrderedDict)
def _(data: OrderedDict):
    return f"OrderedDict: {list(data.items())}"

Pitfall 4: partial và Default Arguments

python
from functools import partial

def greet(name: str, greeting: str = "Hello") -> str:
    return f"{greeting}, {name}!"

# ❌ CONFUSING: partial overrides defaults
greet_bob = partial(greet, "Bob")
greet_bob()  # "Hello, Bob!"
greet_bob(greeting="Hi")  # "Hi, Bob!"

# Nhưng:
greet_formal = partial(greet, greeting="Good morning")
greet_formal("Alice")  # "Good morning, Alice!"

# ⚠️ Mixing positional và keyword có thể confusing
greet_weird = partial(greet, "Bob", "Hi")
greet_weird()  # "Hi, Bob!"
# greet_weird(greeting="Hey")  # TypeError: multiple values for 'greeting'

Quick Reference

FunctionPurposeExample
@lru_cacheMemoization với LRU eviction@lru_cache(maxsize=128)
@cacheUnlimited memoization (3.9+)@cache
@cached_propertyLazy computed property@cached_property
partialBind arguments to functionpartial(func, arg1, kwarg=val)
partialmethodpartial cho methodspartialmethod(method, arg)
reduceFold/accumulate operationreduce(add, [1,2,3])
@singledispatchFunction overloading@singledispatch
@singledispatchmethodMethod overloading@singledispatchmethod
@wrapsPreserve decorator metadata@wraps(func)
@total_orderingAuto-generate comparisons@total_ordering