Skip to content

Memory Optimization Advanced

RAM không miễn phí - Tối ưu memory = Tiết kiệm tiền + Tăng throughput

Learning Outcomes

Sau khi hoàn thành trang này, bạn sẽ:

  • 🎯 Sử dụng slots để giảm memory footprint của objects
  • 🎯 Chọn đúng giữa generators vs lists cho memory efficiency
  • 🎯 Dùng memory_profiler để tìm memory hogs
  • 🎯 Phát hiện và fix memory leaks
  • 🎯 Áp dụng memory optimization patterns trong production

Tại Sao Memory Matters?

┌─────────────────────────────────────────────────────────┐
│                Memory Impact Analysis                   │
├─────────────────────────────────────────────────────────┤
│  1. Cost         →  Cloud RAM = $$$                    │
│  2. Throughput   →  Less memory = More concurrent      │
│  3. GC Pressure  →  Less objects = Less GC pauses      │
│  4. Cache        →  Smaller data = Better cache hits   │
│  5. Swap         →  Exceed RAM = Disk swap = SLOW      │
└─────────────────────────────────────────────────────────┘

Memory Hierarchy

┌─────────────────────────────────────────────────────────┐
│  L1 Cache    │  ~1ns    │  64KB      │  Fastest        │
├──────────────┼──────────┼────────────┼─────────────────┤
│  L2 Cache    │  ~4ns    │  256KB     │                 │
├──────────────┼──────────┼────────────┼─────────────────┤
│  L3 Cache    │  ~12ns   │  8MB       │                 │
├──────────────┼──────────┼────────────┼─────────────────┤
│  RAM         │  ~100ns  │  16-64GB   │                 │
├──────────────┼──────────┼────────────┼─────────────────┤
│  SSD         │  ~100µs  │  TB        │                 │
├──────────────┼──────────┼────────────┼─────────────────┤
│  HDD         │  ~10ms   │  TB        │  Slowest        │
└─────────────────────────────────────────────────────────┘

slots - Memory Savings

Problem: dict Overhead

Mỗi Python object có __dict__ để store attributes. Dict có overhead lớn.

python
import sys

class PointWithDict:
    def __init__(self, x: float, y: float):
        self.x = x
        self.y = y

point = PointWithDict(1.0, 2.0)
print(sys.getsizeof(point))           # 48 bytes (object)
print(sys.getsizeof(point.__dict__))  # 104 bytes (dict)
# Total: ~152 bytes per point!

Solution: slots

python
import sys

class PointWithSlots:
    __slots__ = ('x', 'y')
    
    def __init__(self, x: float, y: float):
        self.x = x
        self.y = y

point = PointWithSlots(1.0, 2.0)
print(sys.getsizeof(point))  # 48 bytes total!
# hasattr(point, '__dict__')  # False - no dict!

Memory Comparison

python
import sys
from pympler import asizeof  # pip install pympler

class UserWithDict:
    def __init__(self, id: int, name: str, email: str):
        self.id = id
        self.name = name
        self.email = email

class UserWithSlots:
    __slots__ = ('id', 'name', 'email')
    
    def __init__(self, id: int, name: str, email: str):
        self.id = id
        self.name = name
        self.email = email

# Create 1 million users
users_dict = [UserWithDict(i, f"User{i}", f"user{i}@example.com") 
              for i in range(1_000_000)]
users_slots = [UserWithSlots(i, f"User{i}", f"user{i}@example.com") 
               for i in range(1_000_000)]

# Memory usage
print(f"With __dict__: {asizeof.asizeof(users_dict) / 1024 / 1024:.1f} MB")
print(f"With __slots__: {asizeof.asizeof(users_slots) / 1024 / 1024:.1f} MB")

# Typical results:
# With __dict__: ~400 MB
# With __slots__: ~150 MB (62% reduction!)

slots với Inheritance

python
class Base:
    __slots__ = ('x',)

class Derived(Base):
    __slots__ = ('y',)  # Chỉ thêm slots mới
    
    def __init__(self, x, y):
        self.x = x
        self.y = y

# ⚠️ PITFALL: Nếu parent không có __slots__
class Parent:
    pass  # Có __dict__

class Child(Parent):
    __slots__ = ('x',)  # Vẫn có __dict__ từ Parent!

child = Child()
child.x = 1
child.y = 2  # Works! __dict__ từ Parent

slots Limitations

python
class SlottedClass:
    __slots__ = ('x', 'y')

obj = SlottedClass()

# ❌ Cannot add new attributes
obj.z = 3  # AttributeError!

# ❌ Cannot use __dict__
obj.__dict__  # AttributeError!

# ❌ Cannot use weakref by default
import weakref
weakref.ref(obj)  # TypeError!

# ✅ FIX: Add __weakref__ to slots
class SlottedWithWeakref:
    __slots__ = ('x', 'y', '__weakref__')

When to Use slots

python
# ✅ USE __slots__ when:
# - Creating millions of instances
# - Memory is constrained
# - Attributes are fixed and known

# ❌ AVOID __slots__ when:
# - Need dynamic attributes
# - Using multiple inheritance with non-slotted classes
# - Need __dict__ for serialization
# - Class is rarely instantiated

Generators vs Lists

Memory Comparison

python
import sys

# List - stores all values in memory
numbers_list = [i ** 2 for i in range(1_000_000)]
print(f"List size: {sys.getsizeof(numbers_list) / 1024 / 1024:.1f} MB")
# ~8 MB

# Generator - computes on demand
numbers_gen = (i ** 2 for i in range(1_000_000))
print(f"Generator size: {sys.getsizeof(numbers_gen)} bytes")
# ~200 bytes!

Generator Patterns

python
# Pattern 1: File processing
def read_large_file(path: str):
    """Yield lines one at a time."""
    with open(path, 'r') as f:
        for line in f:
            yield line.strip()

# ❌ BAD: Load entire file
lines = open('huge.txt').readlines()  # Memory explosion!

# ✅ GOOD: Stream lines
for line in read_large_file('huge.txt'):
    process(line)


# Pattern 2: Data transformation pipeline
def pipeline(data):
    """Chain generators for memory efficiency."""
    # Each step is a generator - no intermediate lists
    step1 = (x * 2 for x in data)
    step2 = (x + 1 for x in step1)
    step3 = (x for x in step2 if x > 10)
    return step3

# Pattern 3: Infinite sequences
def fibonacci():
    """Infinite Fibonacci generator."""
    a, b = 0, 1
    while True:
        yield a
        a, b = b, a + b

# Take first 100 Fibonacci numbers
from itertools import islice
first_100 = list(islice(fibonacci(), 100))

itertools for Memory Efficiency

python
from itertools import chain, islice, groupby, filterfalse

# chain - Combine iterables without creating new list
combined = chain(range(1000), range(1000, 2000))
# NOT: list(range(1000)) + list(range(1000, 2000))

# islice - Slice without creating list
first_10 = islice(huge_generator, 10)
# NOT: list(huge_generator)[:10]

# filterfalse - Filter without list
evens = filterfalse(lambda x: x % 2, range(1000000))
# NOT: [x for x in range(1000000) if x % 2 == 0]

When to Use Generators

python
# ✅ USE generators when:
# - Processing large datasets
# - Data is consumed once
# - Memory is constrained
# - Building pipelines

# ❌ USE lists when:
# - Need random access (data[500])
# - Need to iterate multiple times
# - Need len()
# - Data is small

memory_profiler

Installation

bash
pip install memory_profiler

Line-by-Line Memory Profiling

python
# script.py
from memory_profiler import profile

@profile
def memory_hungry_function():
    # Line 1: Allocate large list
    data = [i ** 2 for i in range(1_000_000)]
    
    # Line 2: Create copy
    data_copy = data.copy()
    
    # Line 3: Delete original
    del data
    
    # Line 4: Process
    result = sum(data_copy)
    
    return result

if __name__ == "__main__":
    memory_hungry_function()
bash
python -m memory_profiler script.py

Output Explained

Line #    Mem usage    Increment  Occurrences   Line Contents
=============================================================
     4     50.0 MiB     50.0 MiB           1   @profile
     5                                         def memory_hungry_function():
     6     88.5 MiB     38.5 MiB           1       data = [i ** 2 for i in range(1_000_000)]
     7    127.0 MiB     38.5 MiB           1       data_copy = data.copy()
     8     88.5 MiB    -38.5 MiB           1       del data
     9     88.5 MiB      0.0 MiB           1       result = sum(data_copy)
    10     88.5 MiB      0.0 MiB           1       return result
ColumnMeaning
Mem usageTotal memory at this line
IncrementMemory change from previous line
OccurrencesTimes line was executed

mprof - Memory Over Time

bash
# Record memory usage over time
mprof run script.py

# Plot results
mprof plot

# Clean up
mprof clean

Programmatic Usage

python
from memory_profiler import memory_usage

def my_function():
    data = [i for i in range(1_000_000)]
    return sum(data)

# Measure memory usage
mem_usage = memory_usage((my_function,))
print(f"Peak memory: {max(mem_usage):.1f} MiB")

# With interval
mem_usage = memory_usage((my_function,), interval=0.1)

Memory Leak Detection

Common Leak Patterns

Pattern 1: Growing Collections

python
# ❌ LEAK: Cache grows forever
cache = {}

def get_data(key):
    if key not in cache:
        cache[key] = expensive_fetch(key)
    return cache[key]

# ✅ FIX: Bounded cache
from functools import lru_cache

@lru_cache(maxsize=1000)
def get_data(key):
    return expensive_fetch(key)

Pattern 2: Circular References

python
# ❌ LEAK: Circular reference với __del__
class Node:
    def __init__(self):
        self.children = []
        self.parent = None
    
    def add_child(self, child):
        self.children.append(child)
        child.parent = self  # Circular!
    
    def __del__(self):
        print("Deleted")  # May never be called!

# ✅ FIX: Use weakref
import weakref

class Node:
    def __init__(self):
        self.children = []
        self._parent = None
    
    @property
    def parent(self):
        return self._parent() if self._parent else None
    
    @parent.setter
    def parent(self, node):
        self._parent = weakref.ref(node) if node else None

Pattern 3: Event Handlers

python
# ❌ LEAK: Handler keeps object alive
class Button:
    handlers = []  # Class-level list!
    
    def on_click(self, handler):
        self.handlers.append(handler)

class Window:
    def __init__(self, button):
        button.on_click(self.handle_click)  # Window never GC'd!
    
    def handle_click(self):
        pass

# ✅ FIX: Instance-level handlers + cleanup
class Button:
    def __init__(self):
        self.handlers = []  # Instance-level
    
    def on_click(self, handler):
        self.handlers.append(handler)
    
    def remove_handler(self, handler):
        self.handlers.remove(handler)

tracemalloc - Built-in Leak Detection

python
import tracemalloc

# Start tracing
tracemalloc.start()

# Your code here
data = [i ** 2 for i in range(100000)]

# Take snapshot
snapshot = tracemalloc.take_snapshot()

# Top memory consumers
top_stats = snapshot.statistics('lineno')
print("Top 10 memory consumers:")
for stat in top_stats[:10]:
    print(stat)

# Compare snapshots to find leaks
snapshot1 = tracemalloc.take_snapshot()
# ... more code ...
snapshot2 = tracemalloc.take_snapshot()

top_stats = snapshot2.compare_to(snapshot1, 'lineno')
print("\nMemory changes:")
for stat in top_stats[:10]:
    print(stat)

objgraph - Object Graph Visualization

bash
pip install objgraph
python
import objgraph

# Show most common types
objgraph.show_most_common_types(limit=10)

# Show growth between calls
objgraph.show_growth()
# ... code that might leak ...
objgraph.show_growth()

# Find what's keeping object alive
objgraph.show_backrefs(my_object, filename='refs.png')

# Find reference chains
objgraph.show_chain(
    objgraph.find_backref_chain(
        my_object,
        objgraph.is_proper_module
    ),
    filename='chain.png'
)

Memory Optimization Patterns

Pattern 1: Lazy Loading

python
class LazyLoader:
    """Load data only when accessed."""
    
    def __init__(self, path: str):
        self.path = path
        self._data = None
    
    @property
    def data(self):
        if self._data is None:
            self._data = self._load()
        return self._data
    
    def _load(self):
        with open(self.path) as f:
            return f.read()

# Data not loaded until accessed
loader = LazyLoader('huge_file.txt')
# ... later ...
print(loader.data)  # Now it loads

Pattern 2: Object Pooling

python
from typing import TypeVar, Generic
from collections import deque

T = TypeVar('T')

class ObjectPool(Generic[T]):
    """Reuse objects instead of creating new ones."""
    
    def __init__(self, factory, max_size: int = 100):
        self.factory = factory
        self.max_size = max_size
        self.pool: deque[T] = deque()
    
    def acquire(self) -> T:
        if self.pool:
            return self.pool.pop()
        return self.factory()
    
    def release(self, obj: T) -> None:
        if len(self.pool) < self.max_size:
            self.pool.append(obj)

# Usage
buffer_pool = ObjectPool(lambda: bytearray(4096))

buffer = buffer_pool.acquire()
# ... use buffer ...
buffer_pool.release(buffer)  # Return to pool

Pattern 3: Memory-Mapped Files

python
import mmap

def process_large_file(path: str):
    """Process file without loading into memory."""
    with open(path, 'r+b') as f:
        # Memory-map the file
        mm = mmap.mmap(f.fileno(), 0)
        
        # Access like a string
        if mm.find(b'pattern') != -1:
            print("Found!")
        
        # Read specific portion
        mm.seek(1000)
        chunk = mm.read(100)
        
        mm.close()

Pattern 4: Streaming Processing

python
import json
from typing import Iterator, Any

def stream_json_array(path: str) -> Iterator[Any]:
    """Stream JSON array items without loading entire file."""
    import ijson  # pip install ijson
    
    with open(path, 'rb') as f:
        for item in ijson.items(f, 'item'):
            yield item

# Process 10GB JSON file with constant memory
for record in stream_json_array('huge_data.json'):
    process(record)

Pattern 5: Compact Data Structures

python
import array
import struct

# array - Compact numeric arrays
# List of ints: ~28 bytes per int
numbers_list = [1, 2, 3, 4, 5]

# array of ints: ~4 bytes per int
numbers_array = array.array('i', [1, 2, 3, 4, 5])

# struct - Pack data into bytes
# Tuple: (int, float, bool) = ~100+ bytes
data_tuple = (42, 3.14, True)

# Packed: 13 bytes
packed = struct.pack('if?', 42, 3.14, True)
unpacked = struct.unpack('if?', packed)

Production Pitfalls

Pitfall 1: String Interning Assumptions

python
# ❌ PROBLEM: Assuming strings are interned
a = "hello"
b = "hello"
print(a is b)  # True (interned)

a = "hello world"
b = "hello world"
print(a is b)  # May be False!

# ✅ FIX: Use == for string comparison
print(a == b)  # Always correct

Pitfall 2: Large Default Arguments

python
# ❌ PROBLEM: Default argument created once
def process(data, cache={}):  # Same dict for all calls!
    if data in cache:
        return cache[data]
    result = expensive_compute(data)
    cache[data] = result  # Cache grows forever!
    return result

# ✅ FIX: Use None as default
def process(data, cache=None):
    if cache is None:
        cache = {}
    # ...

Pitfall 3: Holding References in Exceptions

python
# ❌ PROBLEM: Exception holds reference to locals
def process_data():
    huge_data = load_huge_data()  # 1GB
    try:
        result = transform(huge_data)
    except Exception as e:
        # e.__traceback__ holds reference to huge_data!
        raise

# ✅ FIX: Clear traceback or use raise from
def process_data():
    huge_data = load_huge_data()
    try:
        result = transform(huge_data)
    except Exception as e:
        del huge_data  # Explicit cleanup
        raise

Pitfall 4: Pandas Memory Explosion

python
import pandas as pd

# ❌ PROBLEM: Default dtypes waste memory
df = pd.read_csv('data.csv')
# int64 for all integers (8 bytes each)
# object for strings (pointer + string)

# ✅ FIX: Specify dtypes
df = pd.read_csv('data.csv', dtype={
    'id': 'int32',           # 4 bytes instead of 8
    'count': 'int16',        # 2 bytes
    'category': 'category',  # Categorical encoding
    'flag': 'bool',          # 1 byte
})

# ✅ FIX: Use chunking for large files
for chunk in pd.read_csv('huge.csv', chunksize=10000):
    process(chunk)

Quick Reference

python
# === __slots__ ===
class Optimized:
    __slots__ = ('x', 'y', '__weakref__')

# === Generators ===
gen = (x ** 2 for x in range(1000000))
from itertools import chain, islice

# === memory_profiler ===
from memory_profiler import profile
@profile
def func(): ...
# python -m memory_profiler script.py

# === tracemalloc ===
import tracemalloc
tracemalloc.start()
snapshot = tracemalloc.take_snapshot()
top_stats = snapshot.statistics('lineno')

# === objgraph ===
import objgraph
objgraph.show_most_common_types()
objgraph.show_growth()

# === Size checking ===
import sys
sys.getsizeof(obj)  # Shallow size

from pympler import asizeof
asizeof.asizeof(obj)  # Deep size