Skip to content

Secrets Management Security

Quản lý credentials an toàn - vì một commit sai có thể phá hủy cả hệ thống

Learning Outcomes

Sau khi hoàn thành trang này, bạn sẽ:

  • 🎯 Hiểu tại sao hardcoded secrets là thảm họa bảo mật
  • 🎯 Master environment variables cho configuration
  • 🎯 Sử dụng secrets module cho cryptographic operations
  • 🎯 Implement keyring cho secure local storage
  • 🎯 Tránh các Production Pitfalls về secrets management

Tại sao Secrets Management quan trọng?

python
# SECURITY: Never hardcode credentials in production code
# Use environment variables or secure credential management systems
# ❌ THẢM HỌA: Hardcoded credentials
DATABASE_URL = "postgresql://admin:SuperSecret123@prod-db.example.com/app"
AWS_SECRET_KEY = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
API_KEY = os.getenv("API_KEY")

# Hậu quả:
# 1. Commit lên Git → Exposed forever (even after delete)
# 2. Attacker có full access to production
# 3. Compliance violations (PCI-DSS, HIPAA, GDPR)
# 4. Reputation damage, financial loss

Real-World Horror Stories

┌─────────────────────────────────────────────────────────────┐
│  2019: Capital One breach - 100M+ records exposed           │
│  Cause: Misconfigured AWS credentials                       │
│                                                             │
│  2021: Twitch source code leak - included secrets           │
│  Cause: Server misconfiguration exposed Git repos           │
│                                                             │
│  Daily: GitHub scans find 1000s of exposed API keys         │
│  Bots automatically exploit within minutes of commit        │
└─────────────────────────────────────────────────────────────┘

Environment Variables

Basic Usage

python
import os
from typing import Optional

# ✅ GOOD: Read from environment
DATABASE_URL = os.environ.get("DATABASE_URL")
API_KEY = os.environ.get("API_KEY")

# With default value (for non-sensitive config)
DEBUG = os.environ.get("DEBUG", "false").lower() == "true"
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")

# Required environment variable
def get_required_env(key: str) -> str:
    """Get required environment variable or raise error."""
    value = os.environ.get(key)
    if value is None:
        raise ValueError(f"Required environment variable {key} is not set")
    return value

DATABASE_URL = get_required_env("DATABASE_URL")

python-dotenv for Development

python
# .env file (NEVER commit this!)
# DATABASE_URL=postgresql://user:pass@localhost/dev
# API_KEY=dev-key-12345
# DEBUG=true

from dotenv import load_dotenv
import os

# Load .env file (only in development)
load_dotenv()  # Loads from .env in current directory

# Or specify path
load_dotenv("/path/to/.env")

# Override existing env vars
load_dotenv(override=True)

# Now use os.environ as normal
DATABASE_URL = os.environ.get("DATABASE_URL")
python
from pydantic_settings import BaseSettings, SettingsConfigDict
from pydantic import Field, SecretStr
from typing import Optional

class Settings(BaseSettings):
    """Application settings with validation."""
    
    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",
        case_sensitive=False,  # DATABASE_URL = database_url
        extra="ignore"  # Ignore extra env vars
    )
    
    # Database
    database_url: SecretStr  # SecretStr hides value in logs
    database_pool_size: int = Field(default=5, ge=1, le=100)
    
    # API Keys
    api_key: SecretStr
    api_secret: Optional[SecretStr] = None
    
    # Application
    debug: bool = False
    log_level: str = "INFO"
    allowed_hosts: list[str] = ["localhost"]
    
    # AWS (with prefix)
    aws_access_key_id: Optional[SecretStr] = None
    aws_secret_access_key: Optional[SecretStr] = None
    aws_region: str = "us-east-1"

# Usage
settings = Settings()

# Access secret value
db_url = settings.database_url.get_secret_value()

# SecretStr prevents accidental logging
print(settings.api_key)  # SecretStr('**********')
print(settings.api_key.get_secret_value())  # Actual value

Environment Variable Best Practices

1. Use prefixes for organization

APP_DATABASE_URL, APP_API_KEY, APP_DEBUG

2. Validate at startup

from pydantic_settings import BaseSettings

class Settings(BaseSettings): database_url: str api_key: str

def __init__(self, **kwargs):
    super().__init__(**kwargs)
    # Validate on instantiation
    self._validate_database_url()

def _validate_database_url(self):
    if not self.database_url.startswith(('postgresql://', 'mysql://')):
        raise ValueError("Invalid database URL scheme")

3. Different .env files per environment

.env.development

.env.staging

.env.production (should not exist - use real env vars)

4. Document required variables

""" Required Environment Variables:

  • DATABASE_URL: PostgreSQL connection string
  • API_KEY: External API key for service X
  • SECRET_KEY: Django/Flask secret key (min 50 chars)

Optional:

  • DEBUG: Enable debug mode (default: false)
  • LOG_LEVEL: Logging level (default: INFO) """ ))
  • LOG_LEVEL: Logging level (default: INFO) """

---

## Python secrets Module

Module `secrets` cung cấp cryptographically secure random numbers cho security-sensitive operations.

### Generating Secure Tokens

```python
import secrets

# Generate secure random bytes
random_bytes = secrets.token_bytes(32)  # 32 bytes = 256 bits
print(random_bytes)  # b'\x8f\x12...'

# Generate URL-safe token (base64 encoded)
token = secrets.token_urlsafe(32)  # 32 bytes → ~43 chars
print(token)  # "dGhpcyBpcyBhIHNlY3VyZSB0b2tlbg..."

# Generate hex token
hex_token = secrets.token_hex(32)  # 32 bytes → 64 hex chars
print(hex_token)  # "8f12a3b4c5d6e7f8..."

# Default length (32 bytes) if not specified
default_token = secrets.token_urlsafe()  # Recommended for most uses

Secure Password Generation

python
import secrets
import string

def generate_password(
    length: int = 16,
    include_uppercase: bool = True,
    include_lowercase: bool = True,
    include_digits: bool = True,
    include_special: bool = True,
    exclude_ambiguous: bool = True
) -> str:
    """
    Generate a cryptographically secure password.
    
    Args:
        length: Password length (minimum 8)
        include_*: Character sets to include
        exclude_ambiguous: Exclude similar chars (0/O, 1/l/I)
    
    Returns:
        Secure random password
    """
    if length < 8:
        raise ValueError("Password must be at least 8 characters")
    
    # Build character set
    chars = ""
    required = []  # Ensure at least one of each type
    
    if include_uppercase:
        uppercase = string.ascii_uppercase
        if exclude_ambiguous:
            uppercase = uppercase.replace('O', '').replace('I', '')
        chars += uppercase
        required.append(secrets.choice(uppercase))
    
    if include_lowercase:
        lowercase = string.ascii_lowercase
        if exclude_ambiguous:
            lowercase = lowercase.replace('l', '')
        chars += lowercase
        required.append(secrets.choice(lowercase))
    
    if include_digits:
        digits = string.digits
        if exclude_ambiguous:
            digits = digits.replace('0', '').replace('1', '')
        chars += digits
        required.append(secrets.choice(digits))
    
    if include_special:
        special = "!@#$%^&*()_+-="
        chars += special
        required.append(secrets.choice(special))
    
    if not chars:
        raise ValueError("At least one character set must be included")
    
    # Generate remaining characters
    remaining_length = length - len(required)
    password_chars = required + [secrets.choice(chars) for _ in range(remaining_length)]
    
    # Shuffle to avoid predictable positions
    secrets.SystemRandom().shuffle(password_chars)
    
    return ''.join(password_chars)

# Usage
password = generate_password(length=20)
print(password)  # "Kj8#mNp2$xQr5&wYz9!"

Secure Token Comparison

python
import secrets
import hmac

# ❌ VULNERABLE: Timing attack
def verify_token_bad(user_token: str, stored_token: str) -> bool:
    return user_token == stored_token  # Early exit reveals info!

# ✅ SAFE: Constant-time comparison
def verify_token_good(user_token: str, stored_token: str) -> bool:
    """Compare tokens in constant time to prevent timing attacks."""
    return secrets.compare_digest(user_token, stored_token)

# Also works with bytes
def verify_signature(received: bytes, expected: bytes) -> bool:
    return hmac.compare_digest(received, expected)

# Usage
stored_api_key = "sk-live-abc123xyz"
user_provided = request.headers.get("X-API-Key", "")

if secrets.compare_digest(user_provided, stored_api_key):
    # Authenticated
    pass

Secure Random Selection

python
import secrets

# Select random item from sequence
users = ["alice", "bob", "charlie"]
winner = secrets.choice(users)

# Generate random integer in range
otp = secrets.randbelow(1000000)  # 0 to 999999
otp_str = f"{otp:06d}"  # Zero-padded: "042857"

# Secure shuffle (in-place)
items = [1, 2, 3, 4, 5]
secrets.SystemRandom().shuffle(items)

Keyring Library

keyring provides secure credential storage using OS-native backends (macOS Keychain, Windows Credential Locker, Linux Secret Service).

Basic Usage

python
import keyring

# Store a credential
keyring.set_password("myapp", "api_key", "sk-live-secret123")
keyring.set_password("myapp", "database_password", "super-secret-db-pass")

# Retrieve a credential
api_key = keyring.get_password("myapp", "api_key")
db_pass = keyring.get_password("myapp", "database_password")

# Delete a credential
keyring.delete_password("myapp", "api_key")

# Check if credential exists
cred = keyring.get_password("myapp", "nonexistent")
if cred is None:
    print("Credential not found")

Application Integration

python
import keyring
from typing import Optional
from dataclasses import dataclass

@dataclass
class Credentials:
    """Application credentials manager."""
    
    service_name: str = "myapp"
    
    def get(self, key: str) -> Optional[str]:
        """Get credential from keyring."""
        return keyring.get_password(self.service_name, key)
    
    def set(self, key: str, value: str) -> None:
        """Store credential in keyring."""
        keyring.set_password(self.service_name, key, value)
    
    def delete(self, key: str) -> None:
        """Delete credential from keyring."""
        try:
            keyring.delete_password(self.service_name, key)
        except keyring.errors.PasswordDeleteError:
            pass  # Already deleted
    
    def get_or_prompt(self, key: str, prompt: str = None) -> str:
        """Get credential or prompt user to enter it."""
        value = self.get(key)
        if value is None:
            import getpass
            prompt = prompt or f"Enter {key}: "
            value = getpass.getpass(prompt)
            self.set(key, value)
        return value

# Usage
creds = Credentials(service_name="my-cli-tool")

# First run: prompts user, stores in keyring
api_key = creds.get_or_prompt("api_key", "Enter your API key: ")

# Subsequent runs: retrieves from keyring
api_key = creds.get_or_prompt("api_key")  # No prompt needed

Backend Selection

python
import keyring
from keyring.backends import SecretService, Windows, macOS

# Check current backend
print(keyring.get_keyring())
# <keyring.backends.macOS.Keyring object>

# List available backends
print(keyring.backend.get_all_keyring())

# Set specific backend (if needed)
# keyring.set_keyring(SecretService.Keyring())

# For CI/CD or headless environments, use encrypted file backend
# pip install keyrings.cryptfile
from keyrings.cryptfile.cryptfile import CryptFileKeyring

kr = CryptFileKeyring()
kr.keyring_key = "master-password"  # Set master password
keyring.set_keyring(kr)

Cloud Secrets Managers

AWS Secrets Manager

python
import boto3
import json
from functools import lru_cache

def get_aws_secret(secret_name: str, region: str = "us-east-1") -> dict:
    """
    Retrieve secret from AWS Secrets Manager.
    
    Args:
        secret_name: Name or ARN of the secret
        region: AWS region
    
    Returns:
        Secret value as dictionary
    """
    client = boto3.client("secretsmanager", region_name=region)
    
    response = client.get_secret_value(SecretId=secret_name)
    
    # Secrets can be string or binary
    if "SecretString" in response:
        return json.loads(response["SecretString"])
    else:
        import base64
        return json.loads(base64.b64decode(response["SecretBinary"]))

# Cache secrets to avoid repeated API calls
@lru_cache(maxsize=100)
def get_cached_secret(secret_name: str) -> dict:
    return get_aws_secret(secret_name)

# Usage
db_creds = get_cached_secret("prod/database/credentials")
db_url = f"postgresql://{db_creds['username']}:{db_creds['password']}@{db_creds['host']}/{db_creds['database']}"

Google Cloud Secret Manager

python
from google.cloud import secretmanager

def get_gcp_secret(
    project_id: str,
    secret_id: str,
    version: str = "latest"
) -> str:
    """
    Retrieve secret from Google Cloud Secret Manager.
    
    Args:
        project_id: GCP project ID
        secret_id: Secret name
        version: Secret version (default: latest)
    
    Returns:
        Secret value as string
    """
    client = secretmanager.SecretManagerServiceClient()
    
    name = f"projects/{project_id}/secrets/{secret_id}/versions/{version}"
    response = client.access_secret_version(request={"name": name})
    
    return response.payload.data.decode("UTF-8")

# Usage
api_key = get_gcp_secret("my-project", "api-key")

HashiCorp Vault

python
import hvac

def get_vault_secret(
    path: str,
    mount_point: str = "secret",
    vault_addr: str = None,
    vault_token: str = None
) -> dict:
    """
    Retrieve secret from HashiCorp Vault.
    
    Args:
        path: Secret path (e.g., "myapp/database")
        mount_point: Secrets engine mount point
        vault_addr: Vault server address
        vault_token: Vault authentication token
    
    Returns:
        Secret data as dictionary
    """
    import os
    
    vault_addr = vault_addr or os.environ.get("VAULT_ADDR")
    vault_token = vault_token or os.environ.get("VAULT_TOKEN")
    
    client = hvac.Client(url=vault_addr, token=vault_token)
    
    if not client.is_authenticated():
        raise ValueError("Vault authentication failed")
    
    response = client.secrets.kv.v2.read_secret_version(
        path=path,
        mount_point=mount_point
    )
    
    return response["data"]["data"]

# Usage
db_creds = get_vault_secret("myapp/database")

Production Pitfalls

Pitfall 1: Secrets in Git History

python
# ❌ BUG: Committed secret, then "fixed" by removing
# Git history still contains the secret!

# commit 1: Added config.py with API_KEY = "sk-live-secret"
# commit 2: Removed API_KEY from config.py
# Secret is STILL in commit 1!

# ✅ FIX: If you accidentally commit a secret:
# 1. IMMEDIATELY rotate the secret (generate new one)
# 2. Use git-filter-repo to remove from history (complex)
# 3. Force push (breaks collaborators' repos)
# 4. Better: Prevent with pre-commit hooks

# .pre-commit-config.yaml
"""
repos:
  - repo: https://github.com/Yelp/detect-secrets
    rev: v1.4.0
    hooks:
      - id: detect-secrets
        args: ['--baseline', '.secrets.baseline']
"""

import logging

logger = logging.getLogger(name)

BUG: Logging secrets

def connect_database_bad(url: str): logger.info(f"Connecting to database: {url}") # Logs password! # url = "postgresql://user:SECRET_PASSWORD@host/db"

FIX: Redact sensitive data

import re

def redact_url(url: str) -> str: """Redact password from database URL.""" return re.sub(r'😕/([^:]+)😦[^@]+)@', r'😕/\1😗**@', url)

def connect_database_good(url: str): logger.info(f"Connecting to database: {redact_url(url)}") # Logs: "Connecting to database: postgresql://user:***@host/db"

BETTER: Use structured logging with filters

class SecretFilter(logging.Filter): """Filter to redact secrets from log records."""

PATTERNS = [
    (r'password["\']?\s*[:=]\s*["\']?([^"\'}\s]+)', 'password=***'),
    (r'api[_-]?key["\']?\s*[:=]\s*["\']?([^"\'}\s]+)', 'api_key=***'),
    (r'secret["\']?\s*[:=]\s*["\']?([^"\'}\s]+)', 'secret=***'),
]

def filter(self, record: logging.LogRecord) -> bool:
    if isinstance(record.msg, str):
        for pattern, replacement in self.PATTERNS:
            record.msg = re.sub(pattern, replacement, record.msg, flags=re.I)
    return True

Apply filter

logger.addFilter(SecretFilter()) )

Apply filter

logger.addFilter(SecretFilter())


### Pitfall 3: Secrets in Error Messages

```python
from fastapi import HTTPException

# ❌ BUG: Exposing secrets in errors
def verify_api_key_bad(provided_key: str, stored_key: str):
    if provided_key != stored_key:
        raise HTTPException(
            status_code=401,
            detail=f"Invalid API key. Expected: {stored_key}"  # NEVER!
        )

# ✅ FIX: Generic error messages
def verify_api_key_good(provided_key: str, stored_key: str):
    import secrets
    if not secrets.compare_digest(provided_key, stored_key):
        raise HTTPException(
            status_code=401,
            detail="Invalid API key"  # No hint about correct value
        )

Pitfall 4: Secrets in Docker Images

dockerfile
# ❌ BUG: Secret in Dockerfile
FROM python:3.12
ENV API_KEY=sk-live-secret123
# Secret is baked into image layers!

# ❌ BUG: Copying .env file
COPY .env /app/.env
# .env is in the image!
dockerfile
# ✅ FIX: Use build-time secrets (Docker BuildKit)
# syntax=docker/dockerfile:1.4
FROM python:3.12

# Secret only available during build, not in final image
RUN --mount=type=secret,id=api_key \
    API_KEY=$(cat /run/secrets/api_key) && \
    some-command-that-needs-api-key

# Build with: docker build --secret id=api_key,src=./api_key.txt .
python
# ✅ FIX: Pass secrets at runtime
# docker run -e API_KEY=sk-live-secret myapp
# Or use Docker secrets / Kubernetes secrets

Pitfall 5: Weak Secret Generation

python
import random

# ❌ BUG: Using random module for secrets
def generate_token_bad():
    chars = "abcdefghijklmnopqrstuvwxyz0123456789"
    return ''.join(random.choice(chars) for _ in range(32))
    # random module is NOT cryptographically secure!
    # Predictable with enough samples

# ✅ FIX: Use secrets module
import secrets

def generate_token_good():
    return secrets.token_urlsafe(32)

.gitignore Template

ini
# Secrets and credentials
.env
.env.*
!.env.example
*.pem
*.key
*.crt
secrets.yaml
secrets.json
credentials.json

# IDE
.idea/
.vscode/
*.swp

# Python
__pycache__/
*.pyc
.pytest_cache/
.mypy_cache/

# Virtual environments
venv/
.venv/
env/

# OS
.DS_Store
Thumbs.db

Quick Reference

python
# === ENVIRONMENT VARIABLES ===
import os
value = os.environ.get("KEY", "default")
value = os.environ["KEY"]  # Raises KeyError if missing

# === PYDANTIC SETTINGS ===
from pydantic_settings import BaseSettings
from pydantic import SecretStr

class Settings(BaseSettings):
    api_key: SecretStr
    
settings = Settings()
actual_value = settings.api_key.get_secret_value()

# === SECRETS MODULE ===
import secrets
token = secrets.token_urlsafe(32)
hex_token = secrets.token_hex(32)
is_valid = secrets.compare_digest(a, b)

# === KEYRING ===
import keyring
keyring.set_password("service", "key", "value")
value = keyring.get_password("service", "key")
keyring.delete_password("service", "key")