Giao diện
Memory Optimization Advanced
RAM không miễn phí - Tối ưu memory = Tiết kiệm tiền + Tăng throughput
Learning Outcomes
Sau khi hoàn thành trang này, bạn sẽ:
- 🎯 Sử dụng slots để giảm memory footprint của objects
- 🎯 Chọn đúng giữa generators vs lists cho memory efficiency
- 🎯 Dùng memory_profiler để tìm memory hogs
- 🎯 Phát hiện và fix memory leaks
- 🎯 Áp dụng memory optimization patterns trong production
Tại Sao Memory Matters?
┌─────────────────────────────────────────────────────────┐
│ Memory Impact Analysis │
├─────────────────────────────────────────────────────────┤
│ 1. Cost → Cloud RAM = $$$ │
│ 2. Throughput → Less memory = More concurrent │
│ 3. GC Pressure → Less objects = Less GC pauses │
│ 4. Cache → Smaller data = Better cache hits │
│ 5. Swap → Exceed RAM = Disk swap = SLOW │
└─────────────────────────────────────────────────────────┘Memory Hierarchy
┌─────────────────────────────────────────────────────────┐
│ L1 Cache │ ~1ns │ 64KB │ Fastest │
├──────────────┼──────────┼────────────┼─────────────────┤
│ L2 Cache │ ~4ns │ 256KB │ │
├──────────────┼──────────┼────────────┼─────────────────┤
│ L3 Cache │ ~12ns │ 8MB │ │
├──────────────┼──────────┼────────────┼─────────────────┤
│ RAM │ ~100ns │ 16-64GB │ │
├──────────────┼──────────┼────────────┼─────────────────┤
│ SSD │ ~100µs │ TB │ │
├──────────────┼──────────┼────────────┼─────────────────┤
│ HDD │ ~10ms │ TB │ Slowest │
└─────────────────────────────────────────────────────────┘slots - Memory Savings
Problem: dict Overhead
Mỗi Python object có __dict__ để store attributes. Dict có overhead lớn.
python
import sys
class PointWithDict:
def __init__(self, x: float, y: float):
self.x = x
self.y = y
point = PointWithDict(1.0, 2.0)
print(sys.getsizeof(point)) # 48 bytes (object)
print(sys.getsizeof(point.__dict__)) # 104 bytes (dict)
# Total: ~152 bytes per point!Solution: slots
python
import sys
class PointWithSlots:
__slots__ = ('x', 'y')
def __init__(self, x: float, y: float):
self.x = x
self.y = y
point = PointWithSlots(1.0, 2.0)
print(sys.getsizeof(point)) # 48 bytes total!
# hasattr(point, '__dict__') # False - no dict!Memory Comparison
python
import sys
from pympler import asizeof # pip install pympler
class UserWithDict:
def __init__(self, id: int, name: str, email: str):
self.id = id
self.name = name
self.email = email
class UserWithSlots:
__slots__ = ('id', 'name', 'email')
def __init__(self, id: int, name: str, email: str):
self.id = id
self.name = name
self.email = email
# Create 1 million users
users_dict = [UserWithDict(i, f"User{i}", f"user{i}@example.com")
for i in range(1_000_000)]
users_slots = [UserWithSlots(i, f"User{i}", f"user{i}@example.com")
for i in range(1_000_000)]
# Memory usage
print(f"With __dict__: {asizeof.asizeof(users_dict) / 1024 / 1024:.1f} MB")
print(f"With __slots__: {asizeof.asizeof(users_slots) / 1024 / 1024:.1f} MB")
# Typical results:
# With __dict__: ~400 MB
# With __slots__: ~150 MB (62% reduction!)slots với Inheritance
python
class Base:
__slots__ = ('x',)
class Derived(Base):
__slots__ = ('y',) # Chỉ thêm slots mới
def __init__(self, x, y):
self.x = x
self.y = y
# ⚠️ PITFALL: Nếu parent không có __slots__
class Parent:
pass # Có __dict__
class Child(Parent):
__slots__ = ('x',) # Vẫn có __dict__ từ Parent!
child = Child()
child.x = 1
child.y = 2 # Works! __dict__ từ Parentslots Limitations
python
class SlottedClass:
__slots__ = ('x', 'y')
obj = SlottedClass()
# ❌ Cannot add new attributes
obj.z = 3 # AttributeError!
# ❌ Cannot use __dict__
obj.__dict__ # AttributeError!
# ❌ Cannot use weakref by default
import weakref
weakref.ref(obj) # TypeError!
# ✅ FIX: Add __weakref__ to slots
class SlottedWithWeakref:
__slots__ = ('x', 'y', '__weakref__')When to Use slots
python
# ✅ USE __slots__ when:
# - Creating millions of instances
# - Memory is constrained
# - Attributes are fixed and known
# ❌ AVOID __slots__ when:
# - Need dynamic attributes
# - Using multiple inheritance with non-slotted classes
# - Need __dict__ for serialization
# - Class is rarely instantiatedGenerators vs Lists
Memory Comparison
python
import sys
# List - stores all values in memory
numbers_list = [i ** 2 for i in range(1_000_000)]
print(f"List size: {sys.getsizeof(numbers_list) / 1024 / 1024:.1f} MB")
# ~8 MB
# Generator - computes on demand
numbers_gen = (i ** 2 for i in range(1_000_000))
print(f"Generator size: {sys.getsizeof(numbers_gen)} bytes")
# ~200 bytes!Generator Patterns
python
# Pattern 1: File processing
def read_large_file(path: str):
"""Yield lines one at a time."""
with open(path, 'r') as f:
for line in f:
yield line.strip()
# ❌ BAD: Load entire file
lines = open('huge.txt').readlines() # Memory explosion!
# ✅ GOOD: Stream lines
for line in read_large_file('huge.txt'):
process(line)
# Pattern 2: Data transformation pipeline
def pipeline(data):
"""Chain generators for memory efficiency."""
# Each step is a generator - no intermediate lists
step1 = (x * 2 for x in data)
step2 = (x + 1 for x in step1)
step3 = (x for x in step2 if x > 10)
return step3
# Pattern 3: Infinite sequences
def fibonacci():
"""Infinite Fibonacci generator."""
a, b = 0, 1
while True:
yield a
a, b = b, a + b
# Take first 100 Fibonacci numbers
from itertools import islice
first_100 = list(islice(fibonacci(), 100))itertools for Memory Efficiency
python
from itertools import chain, islice, groupby, filterfalse
# chain - Combine iterables without creating new list
combined = chain(range(1000), range(1000, 2000))
# NOT: list(range(1000)) + list(range(1000, 2000))
# islice - Slice without creating list
first_10 = islice(huge_generator, 10)
# NOT: list(huge_generator)[:10]
# filterfalse - Filter without list
evens = filterfalse(lambda x: x % 2, range(1000000))
# NOT: [x for x in range(1000000) if x % 2 == 0]When to Use Generators
python
# ✅ USE generators when:
# - Processing large datasets
# - Data is consumed once
# - Memory is constrained
# - Building pipelines
# ❌ USE lists when:
# - Need random access (data[500])
# - Need to iterate multiple times
# - Need len()
# - Data is smallmemory_profiler
Installation
bash
pip install memory_profilerLine-by-Line Memory Profiling
python
# script.py
from memory_profiler import profile
@profile
def memory_hungry_function():
# Line 1: Allocate large list
data = [i ** 2 for i in range(1_000_000)]
# Line 2: Create copy
data_copy = data.copy()
# Line 3: Delete original
del data
# Line 4: Process
result = sum(data_copy)
return result
if __name__ == "__main__":
memory_hungry_function()bash
python -m memory_profiler script.pyOutput Explained
Line # Mem usage Increment Occurrences Line Contents
=============================================================
4 50.0 MiB 50.0 MiB 1 @profile
5 def memory_hungry_function():
6 88.5 MiB 38.5 MiB 1 data = [i ** 2 for i in range(1_000_000)]
7 127.0 MiB 38.5 MiB 1 data_copy = data.copy()
8 88.5 MiB -38.5 MiB 1 del data
9 88.5 MiB 0.0 MiB 1 result = sum(data_copy)
10 88.5 MiB 0.0 MiB 1 return result| Column | Meaning |
|---|---|
Mem usage | Total memory at this line |
Increment | Memory change from previous line |
Occurrences | Times line was executed |
mprof - Memory Over Time
bash
# Record memory usage over time
mprof run script.py
# Plot results
mprof plot
# Clean up
mprof cleanProgrammatic Usage
python
from memory_profiler import memory_usage
def my_function():
data = [i for i in range(1_000_000)]
return sum(data)
# Measure memory usage
mem_usage = memory_usage((my_function,))
print(f"Peak memory: {max(mem_usage):.1f} MiB")
# With interval
mem_usage = memory_usage((my_function,), interval=0.1)Memory Leak Detection
Common Leak Patterns
Pattern 1: Growing Collections
python
# ❌ LEAK: Cache grows forever
cache = {}
def get_data(key):
if key not in cache:
cache[key] = expensive_fetch(key)
return cache[key]
# ✅ FIX: Bounded cache
from functools import lru_cache
@lru_cache(maxsize=1000)
def get_data(key):
return expensive_fetch(key)Pattern 2: Circular References
python
# ❌ LEAK: Circular reference với __del__
class Node:
def __init__(self):
self.children = []
self.parent = None
def add_child(self, child):
self.children.append(child)
child.parent = self # Circular!
def __del__(self):
print("Deleted") # May never be called!
# ✅ FIX: Use weakref
import weakref
class Node:
def __init__(self):
self.children = []
self._parent = None
@property
def parent(self):
return self._parent() if self._parent else None
@parent.setter
def parent(self, node):
self._parent = weakref.ref(node) if node else NonePattern 3: Event Handlers
python
# ❌ LEAK: Handler keeps object alive
class Button:
handlers = [] # Class-level list!
def on_click(self, handler):
self.handlers.append(handler)
class Window:
def __init__(self, button):
button.on_click(self.handle_click) # Window never GC'd!
def handle_click(self):
pass
# ✅ FIX: Instance-level handlers + cleanup
class Button:
def __init__(self):
self.handlers = [] # Instance-level
def on_click(self, handler):
self.handlers.append(handler)
def remove_handler(self, handler):
self.handlers.remove(handler)tracemalloc - Built-in Leak Detection
python
import tracemalloc
# Start tracing
tracemalloc.start()
# Your code here
data = [i ** 2 for i in range(100000)]
# Take snapshot
snapshot = tracemalloc.take_snapshot()
# Top memory consumers
top_stats = snapshot.statistics('lineno')
print("Top 10 memory consumers:")
for stat in top_stats[:10]:
print(stat)
# Compare snapshots to find leaks
snapshot1 = tracemalloc.take_snapshot()
# ... more code ...
snapshot2 = tracemalloc.take_snapshot()
top_stats = snapshot2.compare_to(snapshot1, 'lineno')
print("\nMemory changes:")
for stat in top_stats[:10]:
print(stat)objgraph - Object Graph Visualization
bash
pip install objgraphpython
import objgraph
# Show most common types
objgraph.show_most_common_types(limit=10)
# Show growth between calls
objgraph.show_growth()
# ... code that might leak ...
objgraph.show_growth()
# Find what's keeping object alive
objgraph.show_backrefs(my_object, filename='refs.png')
# Find reference chains
objgraph.show_chain(
objgraph.find_backref_chain(
my_object,
objgraph.is_proper_module
),
filename='chain.png'
)Memory Optimization Patterns
Pattern 1: Lazy Loading
python
class LazyLoader:
"""Load data only when accessed."""
def __init__(self, path: str):
self.path = path
self._data = None
@property
def data(self):
if self._data is None:
self._data = self._load()
return self._data
def _load(self):
with open(self.path) as f:
return f.read()
# Data not loaded until accessed
loader = LazyLoader('huge_file.txt')
# ... later ...
print(loader.data) # Now it loadsPattern 2: Object Pooling
python
from typing import TypeVar, Generic
from collections import deque
T = TypeVar('T')
class ObjectPool(Generic[T]):
"""Reuse objects instead of creating new ones."""
def __init__(self, factory, max_size: int = 100):
self.factory = factory
self.max_size = max_size
self.pool: deque[T] = deque()
def acquire(self) -> T:
if self.pool:
return self.pool.pop()
return self.factory()
def release(self, obj: T) -> None:
if len(self.pool) < self.max_size:
self.pool.append(obj)
# Usage
buffer_pool = ObjectPool(lambda: bytearray(4096))
buffer = buffer_pool.acquire()
# ... use buffer ...
buffer_pool.release(buffer) # Return to poolPattern 3: Memory-Mapped Files
python
import mmap
def process_large_file(path: str):
"""Process file without loading into memory."""
with open(path, 'r+b') as f:
# Memory-map the file
mm = mmap.mmap(f.fileno(), 0)
# Access like a string
if mm.find(b'pattern') != -1:
print("Found!")
# Read specific portion
mm.seek(1000)
chunk = mm.read(100)
mm.close()Pattern 4: Streaming Processing
python
import json
from typing import Iterator, Any
def stream_json_array(path: str) -> Iterator[Any]:
"""Stream JSON array items without loading entire file."""
import ijson # pip install ijson
with open(path, 'rb') as f:
for item in ijson.items(f, 'item'):
yield item
# Process 10GB JSON file with constant memory
for record in stream_json_array('huge_data.json'):
process(record)Pattern 5: Compact Data Structures
python
import array
import struct
# array - Compact numeric arrays
# List of ints: ~28 bytes per int
numbers_list = [1, 2, 3, 4, 5]
# array of ints: ~4 bytes per int
numbers_array = array.array('i', [1, 2, 3, 4, 5])
# struct - Pack data into bytes
# Tuple: (int, float, bool) = ~100+ bytes
data_tuple = (42, 3.14, True)
# Packed: 13 bytes
packed = struct.pack('if?', 42, 3.14, True)
unpacked = struct.unpack('if?', packed)Production Pitfalls
Pitfall 1: String Interning Assumptions
python
# ❌ PROBLEM: Assuming strings are interned
a = "hello"
b = "hello"
print(a is b) # True (interned)
a = "hello world"
b = "hello world"
print(a is b) # May be False!
# ✅ FIX: Use == for string comparison
print(a == b) # Always correctPitfall 2: Large Default Arguments
python
# ❌ PROBLEM: Default argument created once
def process(data, cache={}): # Same dict for all calls!
if data in cache:
return cache[data]
result = expensive_compute(data)
cache[data] = result # Cache grows forever!
return result
# ✅ FIX: Use None as default
def process(data, cache=None):
if cache is None:
cache = {}
# ...Pitfall 3: Holding References in Exceptions
python
# ❌ PROBLEM: Exception holds reference to locals
def process_data():
huge_data = load_huge_data() # 1GB
try:
result = transform(huge_data)
except Exception as e:
# e.__traceback__ holds reference to huge_data!
raise
# ✅ FIX: Clear traceback or use raise from
def process_data():
huge_data = load_huge_data()
try:
result = transform(huge_data)
except Exception as e:
del huge_data # Explicit cleanup
raisePitfall 4: Pandas Memory Explosion
python
import pandas as pd
# ❌ PROBLEM: Default dtypes waste memory
df = pd.read_csv('data.csv')
# int64 for all integers (8 bytes each)
# object for strings (pointer + string)
# ✅ FIX: Specify dtypes
df = pd.read_csv('data.csv', dtype={
'id': 'int32', # 4 bytes instead of 8
'count': 'int16', # 2 bytes
'category': 'category', # Categorical encoding
'flag': 'bool', # 1 byte
})
# ✅ FIX: Use chunking for large files
for chunk in pd.read_csv('huge.csv', chunksize=10000):
process(chunk)Quick Reference
python
# === __slots__ ===
class Optimized:
__slots__ = ('x', 'y', '__weakref__')
# === Generators ===
gen = (x ** 2 for x in range(1000000))
from itertools import chain, islice
# === memory_profiler ===
from memory_profiler import profile
@profile
def func(): ...
# python -m memory_profiler script.py
# === tracemalloc ===
import tracemalloc
tracemalloc.start()
snapshot = tracemalloc.take_snapshot()
top_stats = snapshot.statistics('lineno')
# === objgraph ===
import objgraph
objgraph.show_most_common_types()
objgraph.show_growth()
# === Size checking ===
import sys
sys.getsizeof(obj) # Shallow size
from pympler import asizeof
asizeof.asizeof(obj) # Deep sizeCross-links
- Prerequisites: Memory Model - Reference counting, GC
- Previous: Profiling - CPU profiling
- Next: C Extensions & Cython - Native performance
- Related: Generators - Lazy evaluation