第43章 DevOps实践
学习目标
完成本章学习后,你将能够:
- 掌握Docker容器化:Dockerfile编写、镜像构建、容器管理
- 使用Docker Compose:多容器编排、网络配置、数据持久化
- 实现CI/CD流水线:GitHub Actions、GitLab CI、Jenkins
- 使用Kubernetes编排:Deployment、Service、ConfigMap、Ingress
- 实现基础设施即代码:Terraform、Ansible、配置管理
- 进行监控与日志:Prometheus、Grafana、ELK Stack
- 实现自动化部署:蓝绿部署、金丝雀发布、滚动更新
- 构建云原生应用:微服务架构、服务网格、云服务集成
43.1 Docker容器化
43.1.1 Dockerfile编写
python
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Any
from enum import Enum
import os
class BaseImage(Enum):
PYTHON_311_SLIM = "python:3.11-slim"
PYTHON_311_ALPINE = "python:3.11-alpine"
PYTHON_311 = "python:3.11"
UBUNTU_22_04 = "ubuntu:22.04"
@dataclass
class DockerInstruction:
instruction: str
arguments: str
def to_string(self) -> str:
return f"{self.instruction} {self.arguments}"
@dataclass
class DockerfileBuilder:
base_image: str = "python:3.11-slim"
workdir: str = "/app"
maintainer: Optional[str] = None
labels: Dict[str, str] = field(default_factory=dict)
env_vars: Dict[str, str] = field(default_factory=dict)
expose_ports: List[int] = field(default_factory=list)
copy_files: List[tuple] = field(default_factory=list)
run_commands: List[str] = field(default_factory=list)
healthcheck: Optional[Dict] = None
entrypoint: Optional[str] = None
cmd: Optional[str] = None
user: Optional[str] = None
volumes: List[str] = field(default_factory=list)
def add_label(self, key: str, value: str) -> "DockerfileBuilder":
self.labels[key] = value
return self
def add_env(self, key: str, value: str) -> "DockerfileBuilder":
self.env_vars[key] = value
return self
def expose(self, port: int) -> "DockerfileBuilder":
self.expose_ports.append(port)
return self
def copy(self, src: str, dest: str) -> "DockerfileBuilder":
self.copy_files.append((src, dest))
return self
def run(self, command: str) -> "DockerfileBuilder":
self.run_commands.append(command)
return self
def add_volume(self, path: str) -> "DockerfileBuilder":
self.volumes.append(path)
return self
def set_healthcheck(
self,
cmd: str,
interval: str = "30s",
timeout: str = "5s",
retries: int = 3
) -> "DockerfileBuilder":
self.healthcheck = {
"cmd": cmd,
"interval": interval,
"timeout": timeout,
"retries": retries
}
return self
def set_entrypoint(self, entrypoint: str) -> "DockerfileBuilder":
self.entrypoint = entrypoint
return self
def set_cmd(self, cmd: str) -> "DockerfileBuilder":
self.cmd = cmd
return self
def set_user(self, user: str) -> "DockerfileBuilder":
self.user = user
return self
def build(self) -> str:
lines = []
lines.append(f"FROM {self.base_image}")
if self.maintainer:
lines.append(f"MAINTAINER {self.maintainer}")
if self.labels:
label_strs = [f'"{k}"="{v}"' for k, v in self.labels.items()]
lines.append(f"LABEL {' '.join(label_strs)}")
lines.append(f"WORKDIR {self.workdir}")
for key, value in self.env_vars.items():
lines.append(f"ENV {key}={value}")
for src, dest in self.copy_files:
lines.append(f"COPY {src} {dest}")
for cmd in self.run_commands:
lines.append(f"RUN {cmd}")
for port in self.expose_ports:
lines.append(f"EXPOSE {port}")
for volume in self.volumes:
lines.append(f"VOLUME {volume}")
if self.healthcheck:
hc = self.healthcheck
lines.append(
f"HEALTHCHECK --interval={hc['interval']} --timeout={hc['timeout']} --retries={hc['retries']} "
f"CMD {hc['cmd']}"
)
if self.user:
lines.append(f"USER {self.user}")
if self.entrypoint:
lines.append(f'ENTRYPOINT [{self.entrypoint}]')
if self.cmd:
lines.append(f"CMD {self.cmd}")
return "\n".join(lines)
class DockerfileTemplates:
@staticmethod
def python_web_app(
app_name: str = "app",
port: int = 8000,
requirements_file: str = "requirements.txt"
) -> str:
builder = DockerfileBuilder(
base_image="python:3.11-slim",
workdir=f"/{app_name}"
)
builder.add_label("maintainer", "devops@example.com")
builder.add_label("app", app_name)
builder.add_env("PYTHONDONTWRITEBYTECODE", "1")
builder.add_env("PYTHONUNBUFFERED", "1")
builder.run("apt-get update && apt-get install -y --no-install-recommends gcc && rm -rf /var/lib/apt/lists/*")
builder.copy(requirements_file, ".")
builder.run("pip install --no-cache-dir -r requirements.txt")
builder.copy(".", ".")
builder.expose(port)
builder.set_healthcheck(
cmd=f"curl -f http://localhost:{port}/health || exit 1",
interval="30s",
timeout="5s",
retries=3
)
builder.set_cmd(f'["python", "main.py"]')
return builder.build()
@staticmethod
def python_api_service(
app_name: str = "api",
port: int = 8000
) -> str:
builder = DockerfileBuilder(
base_image="python:3.11-slim",
workdir=f"/{app_name}"
)
builder.add_env("APP_ENV", "production")
builder.run("groupadd -r appuser && useradd -r -g appuser appuser")
builder.copy("requirements.txt", ".")
builder.run("pip install --no-cache-dir -r requirements.txt")
builder.copy("app", "./app")
builder.copy("main.py", ".")
builder.run("chown -R appuser:appuser /app")
builder.set_user("appuser")
builder.expose(port)
builder.set_cmd(f'["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "{port}"]')
return builder.build()
@staticmethod
def multi_stage_build(
app_name: str = "app",
port: int = 8000
) -> str:
lines = [
f"FROM python:3.11 AS builder",
f"WORKDIR /build",
f"COPY requirements.txt .",
f"RUN pip install --no-cache-dir --target=/build/deps -r requirements.txt",
"",
f"FROM python:3.11-slim",
f"WORKDIR /{app_name}",
f"COPY --from=builder /build/deps /usr/local/lib/python3.11/site-packages",
f"COPY . .",
f"EXPOSE {port}",
f'CMD ["python", "main.py"]'
]
return "\n".join(lines)
class DockerImageManager:
def __init__(self, registry: str = "docker.io"):
self.registry = registry
def build_image(
self,
dockerfile_path: str,
image_name: str,
tag: str = "latest",
build_args: Dict[str, str] = None
) -> str:
cmd = f"docker build -t {self.registry}/{image_name}:{tag}"
if build_args:
for key, value in build_args.items():
cmd += f" --build-arg {key}={value}"
cmd += f" -f {dockerfile_path} ."
return cmd
def tag_image(self, source: str, target: str) -> str:
return f"docker tag {source} {target}"
def push_image(self, image_name: str, tag: str = "latest") -> str:
return f"docker push {self.registry}/{image_name}:{tag}"
def pull_image(self, image_name: str, tag: str = "latest") -> str:
return f"docker pull {self.registry}/{image_name}:{tag}"
def remove_image(self, image_name: str, tag: str = "latest") -> str:
return f"docker rmi {self.registry}/{image_name}:{tag}"
def list_images(self) -> str:
return "docker images"
def inspect_image(self, image_name: str) -> str:
return f"docker inspect {image_name}"43.1.2 Docker Compose
python
from typing import Dict, List, Optional, Any
from dataclasses import dataclass, field
import yaml
@dataclass
class DockerService:
image: Optional[str] = None
build: Optional[str] = None
container_name: Optional[str] = None
ports: List[str] = field(default_factory=list)
environment: Dict[str, str] = field(default_factory=dict)
env_file: List[str] = field(default_factory=list)
volumes: List[str] = field(default_factory=list)
networks: List[str] = field(default_factory=list)
depends_on: List[str] = field(default_factory=list)
restart: str = "unless-stopped"
command: Optional[str] = None
entrypoint: Optional[str] = None
healthcheck: Optional[Dict] = None
labels: Dict[str, str] = field(default_factory=dict)
def to_dict(self) -> Dict:
config = {}
if self.image:
config["image"] = self.image
if self.build:
config["build"] = self.build
if self.container_name:
config["container_name"] = self.container_name
if self.ports:
config["ports"] = self.ports
if self.environment:
config["environment"] = self.environment
if self.env_file:
config["env_file"] = self.env_file
if self.volumes:
config["volumes"] = self.volumes
if self.networks:
config["networks"] = self.networks
if self.depends_on:
config["depends_on"] = self.depends_on
if self.restart:
config["restart"] = self.restart
if self.command:
config["command"] = self.command
if self.entrypoint:
config["entrypoint"] = self.entrypoint
if self.healthcheck:
config["healthcheck"] = self.healthcheck
if self.labels:
config["labels"] = self.labels
return config
@dataclass
class DockerNetwork:
driver: str = "bridge"
ipam: Optional[Dict] = None
def to_dict(self) -> Dict:
config = {"driver": self.driver}
if self.ipam:
config["ipam"] = self.ipam
return config
@dataclass
class DockerVolume:
driver: str = "local"
driver_opts: Optional[Dict] = None
def to_dict(self) -> Dict:
config = {"driver": self.driver}
if self.driver_opts:
config["driver_opts"] = self.driver_opts
return config
class DockerComposeBuilder:
def __init__(self, version: str = "3.8"):
self.version = version
self.services: Dict[str, DockerService] = {}
self.networks: Dict[str, DockerNetwork] = {}
self.volumes: Dict[str, DockerVolume] = {}
self.configs: Dict[str, Any] = {}
self.secrets: Dict[str, Any] = {}
def add_service(self, name: str, service: DockerService) -> "DockerComposeBuilder":
self.services[name] = service
return self
def add_network(self, name: str, network: DockerNetwork = None) -> "DockerComposeBuilder":
self.networks[name] = network or DockerNetwork()
return self
def add_volume(self, name: str, volume: DockerVolume = None) -> "DockerComposeBuilder":
self.volumes[name] = volume or DockerVolume()
return self
def build(self) -> Dict:
compose = {"version": self.version}
if self.services:
compose["services"] = {
name: service.to_dict()
for name, service in self.services.items()
}
if self.networks:
compose["networks"] = {
name: network.to_dict()
for name, network in self.networks.items()
}
if self.volumes:
compose["volumes"] = {
name: volume.to_dict()
for name, volume in self.volumes.items()
}
if self.configs:
compose["configs"] = self.configs
if self.secrets:
compose["secrets"] = self.secrets
return compose
def to_yaml(self) -> str:
return yaml.dump(self.build(), default_flow_style=False, sort_keys=False)
class DockerComposeTemplates:
@staticmethod
def web_app_stack(
app_name: str = "webapp",
db_password: str = "secret"
) -> str:
builder = DockerComposeBuilder()
web_service = DockerService(
build=".",
container_name=f"{app_name}_web",
ports=["8000:8000"],
environment={
"DATABASE_URL": f"postgresql://postgres:{db_password}@db:5432/{app_name}",
"REDIS_URL": "redis://redis:6379/0"
},
volumes=[f"{app_name}_data:/app/data"],
networks=["frontend", "backend"],
depends_on=["db", "redis"],
healthcheck={
"test": ["CMD", "curl", "-f", "http://localhost:8000/health"],
"interval": "30s",
"timeout": "10s",
"retries": 3
}
)
builder.add_service("web", web_service)
db_service = DockerService(
image="postgres:15-alpine",
container_name=f"{app_name}_db",
environment={
"POSTGRES_DB": app_name,
"POSTGRES_PASSWORD": db_password
},
volumes=[f"{app_name}_db:/var/lib/postgresql/data"],
networks=["backend"]
)
builder.add_service("db", db_service)
redis_service = DockerService(
image="redis:7-alpine",
container_name=f"{app_name}_redis",
volumes=[f"{app_name}_redis:/data"],
networks=["backend"]
)
builder.add_service("redis", redis_service)
builder.add_network("frontend")
builder.add_network("backend")
builder.add_volume(f"{app_name}_data")
builder.add_volume(f"{app_name}_db")
builder.add_volume(f"{app_name}_redis")
return builder.to_yaml()
@staticmethod
def microservices_stack() -> str:
builder = DockerComposeBuilder()
api_gateway = DockerService(
image="nginx:alpine",
container_name="api_gateway",
ports=["80:80", "443:443"],
volumes=["./nginx.conf:/etc/nginx/nginx.conf:ro"],
networks=["frontend"],
depends_on=["user_service", "product_service", "order_service"]
)
builder.add_service("gateway", api_gateway)
user_service = DockerService(
build="./services/user",
container_name="user_service",
environment={"SERVICE_PORT": "8001"},
networks=["frontend", "backend"],
depends_on=["user_db"]
)
builder.add_service("user_service", user_service)
product_service = DockerService(
build="./services/product",
container_name="product_service",
environment={"SERVICE_PORT": "8002"},
networks=["frontend", "backend"],
depends_on=["product_db"]
)
builder.add_service("product_service", product_service)
order_service = DockerService(
build="./services/order",
container_name="order_service",
environment={"SERVICE_PORT": "8003"},
networks=["frontend", "backend"],
depends_on=["order_db", "rabbitmq"]
)
builder.add_service("order_service", order_service)
user_db = DockerService(
image="postgres:15-alpine",
container_name="user_db",
environment={"POSTGRES_DB": "users"},
volumes=["user_db_data:/var/lib/postgresql/data"],
networks=["backend"]
)
builder.add_service("user_db", user_db)
product_db = DockerService(
image="mongodb:6",
container_name="product_db",
volumes=["product_db_data:/data/db"],
networks=["backend"]
)
builder.add_service("product_db", product_db)
order_db = DockerService(
image="postgres:15-alpine",
container_name="order_db",
environment={"POSTGRES_DB": "orders"},
volumes=["order_db_data:/var/lib/postgresql/data"],
networks=["backend"]
)
builder.add_service("order_db", order_db)
rabbitmq = DockerService(
image="rabbitmq:3-management-alpine",
container_name="rabbitmq",
ports=["15672:15672"],
networks=["backend"]
)
builder.add_service("rabbitmq", rabbitmq)
builder.add_network("frontend")
builder.add_network("backend")
return builder.to_yaml()43.2 CI/CD流水线
43.2.1 GitHub Actions
python
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Any
import yaml
@dataclass
class GitHubActionStep:
name: str
run: Optional[str] = None
uses: Optional[str] = None
with_params: Dict[str, Any] = field(default_factory=dict)
env: Dict[str, str] = field(default_factory=dict)
if_condition: Optional[str] = None
def to_dict(self) -> Dict:
step = {"name": self.name}
if self.run:
step["run"] = self.run
if self.uses:
step["uses"] = self.uses
if self.with_params:
step["with"] = self.with_params
if self.env:
step["env"] = self.env
if self.if_condition:
step["if"] = self.if_condition
return step
@dataclass
class GitHubActionJob:
runs_on: str = "ubuntu-latest"
steps: List[GitHubActionStep] = field(default_factory=list)
needs: List[str] = field(default_factory=list)
env: Dict[str, str] = field(default_factory=dict)
strategy: Optional[Dict] = None
services: Dict[str, Any] = field(default_factory=dict)
def add_step(self, step: GitHubActionStep) -> "GitHubActionJob":
self.steps.append(step)
return self
def to_dict(self) -> Dict:
job = {"runs-on": self.runs_on}
if self.needs:
job["needs"] = self.needs
if self.env:
job["env"] = self.env
if self.strategy:
job["strategy"] = self.strategy
if self.services:
job["services"] = self.services
if self.steps:
job["steps"] = [step.to_dict() for step in self.steps]
return job
class GitHubActionsBuilder:
def __init__(self, name: str):
self.name = name
self.on_events: Dict[str, Any] = {}
self.jobs: Dict[str, GitHubActionJob] = {}
self.env: Dict[str, str] = {}
def on_push(self, branches: List[str] = None) -> "GitHubActionsBuilder":
if branches:
self.on_events["push"] = {"branches": branches}
else:
self.on_events["push"] = None
return self
def on_pull_request(self, branches: List[str] = None) -> "GitHubActionsBuilder":
if branches:
self.on_events["pull_request"] = {"branches": branches}
else:
self.on_events["pull_request"] = None
return self
def on_schedule(self, cron: str) -> "GitHubActionsBuilder":
self.on_events["schedule"] = [{"cron": cron}]
return self
def add_job(self, name: str, job: GitHubActionJob) -> "GitHubActionsBuilder":
self.jobs[name] = job
return self
def add_env(self, key: str, value: str) -> "GitHubActionsBuilder":
self.env[key] = value
return self
def build(self) -> Dict:
workflow = {"name": self.name}
if self.on_events:
workflow["on"] = self.on_events
if self.env:
workflow["env"] = self.env
if self.jobs:
workflow["jobs"] = {
name: job.to_dict()
for name, job in self.jobs.items()
}
return workflow
def to_yaml(self) -> str:
return yaml.dump(self.build(), default_flow_style=False, sort_keys=False)
class GitHubActionsTemplates:
@staticmethod
def python_ci() -> str:
builder = GitHubActionsBuilder("Python CI")
builder.on_push(["main", "develop"])
builder.on_pull_request(["main"])
test_job = GitHubActionJob(runs_on="ubuntu-latest")
test_job.add_step(GitHubActionStep(
name="Checkout code",
uses="actions/checkout@v4"
))
test_job.add_step(GitHubActionStep(
name="Set up Python",
uses="actions/setup-python@v5",
with_params={
"python-version": "3.11",
"cache": "pip"
}
))
test_job.add_step(GitHubActionStep(
name="Install dependencies",
run="pip install -r requirements.txt\npip install pytest pytest-cov"
))
test_job.add_step(GitHubActionStep(
name="Run tests",
run="pytest --cov=app --cov-report=xml"
))
test_job.add_step(GitHubActionStep(
name="Upload coverage",
uses="codecov/codecov-action@v3",
with_params={"file": "./coverage.xml"}
))
builder.add_job("test", test_job)
return builder.to_yaml()
@staticmethod
def docker_build_push() -> str:
builder = GitHubActionsBuilder("Docker Build and Push")
builder.on_push(["main"])
build_job = GitHubActionJob(runs_on="ubuntu-latest")
build_job.add_step(GitHubActionStep(
name="Checkout code",
uses="actions/checkout@v4"
))
build_job.add_step(GitHubActionStep(
name="Set up Docker Buildx",
uses="docker/setup-buildx-action@v3"
))
build_job.add_step(GitHubActionStep(
name="Login to Docker Hub",
uses="docker/login-action@v3",
with_params={
"username": "${{ secrets.DOCKER_USERNAME }}",
"password": "${{ secrets.DOCKER_PASSWORD }}"
}
))
build_job.add_step(GitHubActionStep(
name="Build and push",
uses="docker/build-push-action@v5",
with_params={
"context": ".",
"push": True,
"tags": "${{ secrets.DOCKER_USERNAME }}/app:${{ github.sha }},${{ secrets.DOCKER_USERNAME }}/app:latest",
"cache-from": "type=registry,ref=${{ secrets.DOCKER_USERNAME }}/app:buildcache",
"cache-to": "type=registry,ref=${{ secrets.DOCKER_USERNAME }}/app:buildcache,mode=max"
}
))
builder.add_job("build", build_job)
return builder.to_yaml()
@staticmethod
def deploy_to_kubernetes() -> str:
builder = GitHubActionsBuilder("Deploy to Kubernetes")
builder.on_push(["main"])
deploy_job = GitHubActionJob(
runs_on="ubuntu-latest",
needs=["build"]
)
deploy_job.add_step(GitHubActionStep(
name="Checkout code",
uses="actions/checkout@v4"
))
deploy_job.add_step(GitHubActionStep(
name="Set up kubectl",
uses="azure/setup-kubectl@v3"
))
deploy_job.add_step(GitHubActionStep(
name="Configure kubectl",
run="mkdir -p ~/.kube\necho \"${{ secrets.KUBE_CONFIG }}\" | base64 -d > ~/.kube/config"
))
deploy_job.add_step(GitHubActionStep(
name="Deploy to Kubernetes",
run="kubectl apply -f k8s/"
))
deploy_job.add_step(GitHubActionStep(
name="Wait for rollout",
run="kubectl rollout status deployment/app -n default --timeout=300s"
))
builder.add_job("deploy", deploy_job)
return builder.to_yaml()43.2.2 GitLab CI
python
class GitLabCIBuilder:
def __init__(self):
self.stages: List[str] = []
self.variables: Dict[str, str] = {}
self.jobs: Dict[str, Dict] = {}
self.default_config: Dict = {}
def add_stage(self, stage: str) -> "GitLabCIBuilder":
if stage not in self.stages:
self.stages.append(stage)
return self
def add_variable(self, key: str, value: str) -> "GitLabCIBuilder":
self.variables[key] = value
return self
def add_job(
self,
name: str,
stage: str,
image: str = None,
script: List[str] = None,
only: List[str] = None,
except_branches: List[str] = None,
variables: Dict[str, str] = None,
artifacts: Dict = None,
services: List[str] = None,
before_script: List[str] = None,
after_script: List[str] = None
) -> "GitLabCIBuilder":
self.add_stage(stage)
job = {"stage": stage}
if image:
job["image"] = image
if script:
job["script"] = script
if only:
job["only"] = only
if except_branches:
job["except"] = except_branches
if variables:
job["variables"] = variables
if artifacts:
job["artifacts"] = artifacts
if services:
job["services"] = services
if before_script:
job["before_script"] = before_script
if after_script:
job["after_script"] = after_script
self.jobs[name] = job
return self
def set_default(self, image: str = None, before_script: List[str] = None) -> "GitLabCIBuilder":
if image:
self.default_config["image"] = image
if before_script:
self.default_config["before_script"] = before_script
return self
def build(self) -> Dict:
config = {}
if self.stages:
config["stages"] = self.stages
if self.variables:
config["variables"] = self.variables
if self.default_config:
config["default"] = self.default_config
if self.jobs:
config.update(self.jobs)
return config
def to_yaml(self) -> str:
return yaml.dump(self.build(), default_flow_style=False, sort_keys=False)
class GitLabCITemplates:
@staticmethod
def python_pipeline() -> str:
builder = GitLabCIBuilder()
builder.add_variable("PIP_CACHE_DIR", "$CI_PROJECT_DIR/.cache/pip")
builder.set_default(
image="python:3.11",
before_script=["pip install -r requirements.txt"]
)
builder.add_job(
name="test",
stage="test",
script=[
"pip install pytest pytest-cov",
"pytest --cov=app --cov-report=xml --cov-report=html"
],
artifacts={
"paths": ["htmlcov/", "coverage.xml"],
"reports": {"coverage_report": {"coverage_format": "cobertura", "path": "coverage.xml"}}
}
)
builder.add_job(
name="lint",
stage="test",
script=[
"pip install flake8 black",
"flake8 app/",
"black --check app/"
]
)
builder.add_job(
name="build",
stage="build",
script=[
"docker build -t $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA .",
"docker push $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA"
],
only=["main"],
variables={"DOCKER_DRIVER": "overlay2"}
)
builder.add_job(
name="deploy",
stage="deploy",
script=[
"kubectl set image deployment/app app=$CI_REGISTRY_IMAGE:$CI_COMMIT_SHA"
],
only=["main"]
)
return builder.to_yaml()43.3 Kubernetes编排
43.3.1 Deployment与Service
python
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Any
import yaml
@dataclass
class Container:
name: str
image: str
ports: List[int] = field(default_factory=list)
env: Dict[str, str] = field(default_factory=dict)
env_from: List[Dict] = field(default_factory=list)
resources: Dict = field(default_factory=dict)
volume_mounts: List[Dict] = field(default_factory=list)
liveness_probe: Optional[Dict] = None
readiness_probe: Optional[Dict] = None
command: List[str] = field(default_factory=list)
args: List[str] = field(default_factory=list)
def to_dict(self) -> Dict:
container = {
"name": self.name,
"image": self.image
}
if self.ports:
container["ports"] = [{"containerPort": p} for p in self.ports]
if self.env:
container["env"] = [
{"name": k, "value": v}
for k, v in self.env.items()
]
if self.env_from:
container["envFrom"] = self.env_from
if self.resources:
container["resources"] = self.resources
if self.volume_mounts:
container["volumeMounts"] = self.volume_mounts
if self.liveness_probe:
container["livenessProbe"] = self.liveness_probe
if self.readiness_probe:
container["readinessProbe"] = self.readiness_probe
if self.command:
container["command"] = self.command
if self.args:
container["args"] = self.args
return container
@dataclass
class Deployment:
name: str
replicas: int = 1
labels: Dict[str, str] = field(default_factory=dict)
containers: List[Container] = field(default_factory=list)
volumes: List[Dict] = field(default_factory=list)
image_pull_secrets: List[str] = field(default_factory=list)
node_selector: Dict[str, str] = field(default_factory=dict)
tolerations: List[Dict] = field(default_factory=list)
affinity: Optional[Dict] = None
def add_container(self, container: Container) -> "Deployment":
self.containers.append(container)
return self
def add_volume(self, name: str, config: Dict) -> "Deployment":
self.volumes.append({"name": name, **config})
return self
def to_dict(self) -> Dict:
spec = {
"replicas": self.replicas,
"selector": {
"matchLabels": self.labels
},
"template": {
"metadata": {"labels": self.labels},
"spec": {
"containers": [c.to_dict() for c in self.containers]
}
}
}
if self.volumes:
spec["template"]["spec"]["volumes"] = self.volumes
if self.image_pull_secrets:
spec["template"]["spec"]["imagePullSecrets"] = [
{"name": s} for s in self.image_pull_secrets
]
if self.node_selector:
spec["template"]["spec"]["nodeSelector"] = self.node_selector
if self.tolerations:
spec["template"]["spec"]["tolerations"] = self.tolerations
if self.affinity:
spec["template"]["spec"]["affinity"] = self.affinity
return {
"apiVersion": "apps/v1",
"kind": "Deployment",
"metadata": {
"name": self.name,
"labels": self.labels
},
"spec": spec
}
@dataclass
class Service:
name: str
selector: Dict[str, str]
ports: List[Dict]
service_type: str = "ClusterIP"
labels: Dict[str, str] = field(default_factory=dict)
def to_dict(self) -> Dict:
return {
"apiVersion": "v1",
"kind": "Service",
"metadata": {
"name": self.name,
"labels": self.labels
},
"spec": {
"type": self.service_type,
"selector": self.selector,
"ports": self.ports
}
}
class KubernetesManifestBuilder:
def __init__(self):
self.manifests: List[Dict] = []
def add_deployment(self, deployment: Deployment) -> "KubernetesManifestBuilder":
self.manifests.append(deployment.to_dict())
return self
def add_service(self, service: Service) -> "KubernetesManifestBuilder":
self.manifests.append(service.to_dict())
return self
def add_configmap(self, name: str, data: Dict, labels: Dict = None) -> "KubernetesManifestBuilder":
configmap = {
"apiVersion": "v1",
"kind": "ConfigMap",
"metadata": {"name": name},
"data": data
}
if labels:
configmap["metadata"]["labels"] = labels
self.manifests.append(configmap)
return self
def add_secret(self, name: str, data: Dict, secret_type: str = "Opaque") -> "KubernetesManifestBuilder":
import base64
encoded_data = {
k: base64.b64encode(v.encode()).decode()
for k, v in data.items()
}
self.manifests.append({
"apiVersion": "v1",
"kind": "Secret",
"metadata": {"name": name},
"type": secret_type,
"data": encoded_data
})
return self
def add_ingress(
self,
name: str,
rules: List[Dict],
annotations: Dict = None,
tls: List[Dict] = None
) -> "KubernetesManifestBuilder":
ingress = {
"apiVersion": "networking.k8s.io/v1",
"kind": "Ingress",
"metadata": {"name": name},
"spec": {"rules": rules}
}
if annotations:
ingress["metadata"]["annotations"] = annotations
if tls:
ingress["spec"]["tls"] = tls
self.manifests.append(ingress)
return self
def to_yaml(self) -> str:
docs = [yaml.dump(m, default_flow_style=False, sort_keys=False) for m in self.manifests]
return "---\n".join(docs)43.4 知识图谱
43.4.1 DevOps工具链架构
┌─────────────────────────────────────────────────────────────────────┐
│ DevOps工具链全景图 │
├─────────────────────────────────────────────────────────────────────┤
│ ┌─────────────────────────────────────────────────────────────┐ │
│ │ 代码管理 (Code) │ │
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
│ │ │ Git │ │ GitHub │ │ GitLab │ │Bitbucket │ │ │
│ │ │版本控制 │ │ 代码托管 │ │ CI/CD │ │ 企业托管 │ │ │
│ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ │
│ └─────────────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────┐ │
│ │ 构建与测试 (Build & Test) │ │
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
│ │ │ Docker │ │ Jenkins │ │ GitHub │ │ GitLab │ │ │
│ │ │ 容器构建 │ │ 流水线 │ │ Actions │ │ CI │ │ │
│ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ │
│ └─────────────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────┐ │
│ │ 部署与发布 (Deploy) │ │
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
│ │ │Kubernetes│ │ Helm │ │ ArgoCD │ │ Spinnaker│ │ │
│ │ │ 容器编排 │ │ 包管理 │ │ GitOps │ │ 持续部署 │ │ │
│ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ │
│ └─────────────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────┐ │
│ │ 监控与运维 (Monitor) │ │
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
│ │ │Prometheus│ │ Grafana │ │ ELK │ │ Jaeger │ │ │
│ │ │ 指标监控 │ │ 可视化 │ │ 日志分析 │ │ 链路追踪 │ │ │
│ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ │
│ └─────────────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────────────────────────────────────────────────────┐ │
│ │ 基础设施 (Infrastructure) │ │
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
│ │ │ Terraform│ │ Ansible │ │ AWS │ │ Azure │ │ │
│ │ │ IaC │ │ 配置管理 │ │ 云服务 │ │ 云服务 │ │ │
│ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ │
│ └─────────────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────────────┘43.4.2 CI/CD流水线流程
┌─────────────────────────────────────────────────────────────────────┐
│ CI/CD流水线工作流程 │
├─────────────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────┐ │
│ │ 代码提交 │ │
│ │ git push │ │
│ └────┬─────┘ │
│ │ │
│ ▼ │
│ ┌──────────────────────────────────────────────────────────┐ │
│ │ 持续集成 (CI) │ │
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
│ │ │ 代码检查 │ │ 单元测试 │ │ 构建镜像 │ │ 安全扫描 │ │ │
│ │ │ Lint │ │ Unit Test│ │ Docker │ │ SAST │ │ │
│ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ │
│ └──────────────────────────────────────────────────────────┘ │
│ │ │
│ │ 通过所有检查 │
│ ▼ │
│ ┌──────────────────────────────────────────────────────────┐ │
│ │ 制品管理 │ │
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
│ │ │ 镜像仓库 │ │ 制品存储 │ │ 版本标记 │ │ │
│ │ │ Registry │ │ Artifacts│ │ Tagging │ │ │
│ │ └──────────┘ └──────────┘ └──────────┘ │ │
│ └──────────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌──────────────────────────────────────────────────────────┐ │
│ │ 持续部署 (CD) │ │
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
│ │ │ 部署开发 │ │ 部署测试 │ │ 部署生产 │ │ │
│ │ │ Dev │ │ Stage │ │ Prod │ │ │
│ │ └──────────┘ └──────────┘ └──────────┘ │ │
│ └──────────────────────────────────────────────────────────┘ │
│ │ │
│ ▼ │
│ ┌──────────────────────────────────────────────────────────┐ │
│ │ 监控反馈 │ │
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
│ │ │ 健康检查 │ │ 性能监控 │ │ 告警通知 │ │ │
│ │ │ Health │ │ Metrics │ │ Alerting │ │ │
│ │ └──────────┘ └──────────┘ └──────────┘ │ │
│ └──────────────────────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────┘43.5 技术选型指南
43.5.1 CI/CD平台选型
| 平台 | 适用场景 | 特点 | 自托管 | 推荐指数 |
|---|---|---|---|---|
| GitHub Actions | GitHub项目 | 集成度高、生态丰富 | 可选 | ★★★★★ |
| GitLab CI | GitLab项目 | 一体化、功能完整 | 支持 | ★★★★★ |
| Jenkins | 企业级、复杂流程 | 插件丰富、高度可定制 | 支持 | ★★★★☆ |
| CircleCI | 云原生项目 | 快速、并行构建 | 不支持 | ★★★★☆ |
| Drone | 容器化项目 | 轻量级、Docker原生 | 支持 | ★★★☆☆ |
43.5.2 容器编排平台选型
| 平台 | 适用规模 | 复杂度 | 功能完整性 | 推荐指数 |
|---|---|---|---|---|
| Kubernetes | 大规模生产 | 高 | ★★★★★ | ★★★★★ |
| Docker Swarm | 中小规模 | 低 | ★★★☆☆ | ★★★☆☆ |
| Nomad | 混合负载 | 中 | ★★★★☆ | ★★★★☆ |
| ECS | AWS生态 | 中 | ★★★★☆ | ★★★★☆ |
43.5.3 监控方案选型
| 方案 | 指标监控 | 日志管理 | 链路追踪 | 学习曲线 |
|---|---|---|---|---|
| Prometheus + Grafana | ★★★★★ | - | - | 中 |
| ELK Stack | - | ★★★★★ | - | 高 |
| Jaeger | - | - | ★★★★★ | 中 |
| Datadog | ★★★★★ | ★★★★★ | ★★★★★ | 低 |
| SkyWalking | ★★★★☆ | ★★★☆☆ | ★★★★★ | 中 |
43.6 常见问题与解决方案
43.6.1 Docker镜像优化
python
from dataclasses import dataclass
from typing import List, Dict, Optional
class DockerImageOptimizer:
"""Docker镜像优化器"""
@staticmethod
def generate_optimized_dockerfile(
base_image: str = "python:3.11-slim",
app_name: str = "app"
) -> str:
"""生成优化的Dockerfile"""
return f'''# 多阶段构建
FROM python:3.11-slim AS builder
WORKDIR /build
COPY requirements.txt .
RUN pip install --no-cache-dir --target=/build/deps -r requirements.txt
# 最终镜像
FROM python:3.11-slim
WORKDIR /{app_name}
# 安全:非root用户
RUN groupadd -r appuser && useradd -r -g appuser appuser
# 复制依赖
COPY --from=builder /build/deps /usr/local/lib/python3.11/site-packages
# 复制应用代码
COPY --chown=appuser:appuser . .
# 设置环境变量
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
# 切换用户
USER appuser
# 健康检查
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \\
CMD curl -f http://localhost:8000/health || exit 1
CMD ["python", "main.py"]
'''
@staticmethod
def get_optimization_tips() -> List[str]:
"""获取优化建议"""
return [
"使用多阶段构建减小镜像体积",
"使用.alpine或.slim基础镜像",
"合并RUN命令减少层数",
"使用.dockerignore排除不必要文件",
"不要在镜像中存储敏感信息",
"使用非root用户运行应用",
"利用构建缓存加速构建",
"定期更新基础镜像版本"
]
@staticmethod
def create_dockerignore() -> str:
"""生成.dockerignore文件"""
return '''
__pycache__
*.pyc
*.pyo
*.pyd
.Python
*.so
.env
.venv
venv/
ENV/
.git
.gitignore
.docker
Dockerfile
docker-compose.yml
README.md
.pytest_cache
.coverage
htmlcov/
*.log
*.tmp
node_modules/
'''
class DockerSecurityChecker:
"""Docker安全检查器"""
SECURITY_CHECKS = [
{
"check": "非root用户运行",
"description": "确保容器以非root用户运行",
"fix": "在Dockerfile中添加: USER appuser"
},
{
"check": "镜像签名验证",
"description": "启用Docker Content Trust",
"fix": "设置环境变量: DOCKER_CONTENT_TRUST=1"
},
{
"check": "只读根文件系统",
"description": "使容器根文件系统只读",
"fix": "docker run --read-only ..."
},
{
"check": "资源限制",
"description": "设置CPU和内存限制",
"fix": "docker run --memory=512m --cpus=1 ..."
},
{
"check": "安全选项",
"description": "禁用不必要的特权",
"fix": "docker run --cap-drop=ALL --cap-add=NET_BIND_SERVICE ..."
}
]
@classmethod
def get_security_recommendations(cls) -> List[Dict]:
"""获取安全建议"""
return cls.SECURITY_CHECKS43.6.2 CI/CD流水线优化
python
from typing import List, Dict, Any
from dataclasses import dataclass
@dataclass
class PipelineStage:
name: str
parallel: bool = False
allow_failure: bool = False
cache: Dict[str, str] = None
class CIPipelineOptimizer:
"""CI流水线优化器"""
def __init__(self):
self.stages: List[PipelineStage] = []
self.caches: Dict[str, str] = {}
def add_cache(self, key: str, path: str):
"""添加缓存配置"""
self.caches[key] = path
def optimize_parallel(self) -> Dict:
"""优化并行执行"""
return {
"strategy": {
"matrix": {
"python-version": ["3.9", "3.10", "3.11"],
"os": ["ubuntu-latest", "macos-latest"]
},
"fail-fast": False
}
}
def generate_cache_config(self) -> Dict:
"""生成缓存配置"""
return {
"pip": {
"key": "${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}",
"restore-keys": "${{ runner.os }}-pip-",
"path": "~/.cache/pip"
},
"docker": {
"key": "${{ runner.os }}-docker-${{ hashFiles('Dockerfile') }}",
"path": "/tmp/.buildx-cache"
}
}
@staticmethod
def get_optimization_strategies() -> List[Dict]:
"""获取优化策略"""
return [
{
"strategy": "并行执行",
"description": "将独立的任务并行执行",
"example": "使用matrix策略并行测试多个版本"
},
{
"strategy": "缓存依赖",
"description": "缓存pip、npm等依赖",
"example": "actions/cache@v3"
},
{
"strategy": "增量构建",
"description": "只构建变更的部分",
"example": "使用Docker layer缓存"
},
{
"strategy": "条件执行",
"description": "只在特定条件下执行",
"example": "使用paths过滤触发"
},
{
"strategy": "复用工作流",
"description": "使用可复用的工作流",
"example": "使用composite actions"
}
]
class DeploymentStrategy:
"""部署策略"""
@staticmethod
def blue_green_deployment() -> Dict:
"""蓝绿部署策略"""
return {
"strategy": "blue-green",
"steps": [
"1. 部署新版本到Green环境",
"2. 运行冒烟测试验证Green",
"3. 切换流量到Green环境",
"4. 监控新版本运行状态",
"5. 如有问题快速回滚到Blue"
],
"advantages": ["零停机", "快速回滚"],
"disadvantages": ["需要双倍资源"]
}
@staticmethod
def canary_deployment() -> Dict:
"""金丝雀部署策略"""
return {
"strategy": "canary",
"steps": [
"1. 部署新版本到少量实例",
"2. 路由小部分流量到新版本",
"3. 监控关键指标",
"4. 逐步增加流量比例",
"5. 全量发布或回滚"
],
"advantages": ["风险可控", "渐进式发布"],
"disadvantages": ["部署时间较长"]
}
@staticmethod
def rolling_deployment() -> Dict:
"""滚动更新策略"""
return {
"strategy": "rolling",
"steps": [
"1. 逐个更新实例",
"2. 等待新实例就绪",
"3. 继续更新下一批",
"4. 直到所有实例更新完成"
],
"advantages": ["资源效率高", "平滑过渡"],
"disadvantages": ["回滚较慢"]
}43.6.3 Kubernetes配置最佳实践
python
from dataclasses import dataclass, field
from typing import Dict, List, Optional
import yaml
class KubernetesBestPractices:
"""Kubernetes最佳实践"""
@staticmethod
def resource_limits_template() -> Dict:
"""资源限制模板"""
return {
"resources": {
"requests": {
"memory": "128Mi",
"cpu": "100m"
},
"limits": {
"memory": "512Mi",
"cpu": "500m"
}
}
}
@staticmethod
def health_probe_template() -> Dict:
"""健康检查模板"""
return {
"livenessProbe": {
"httpGet": {"path": "/health", "port": 8000},
"initialDelaySeconds": 30,
"periodSeconds": 10,
"timeoutSeconds": 5,
"failureThreshold": 3
},
"readinessProbe": {
"httpGet": {"path": "/ready", "port": 8000},
"initialDelaySeconds": 5,
"periodSeconds": 5,
"timeoutSeconds": 3,
"failureThreshold": 3
}
}
@staticmethod
def security_context_template() -> Dict:
"""安全上下文模板"""
return {
"securityContext": {
"runAsNonRoot": True,
"runAsUser": 1000,
"readOnlyRootFilesystem": True,
"allowPrivilegeEscalation": False,
"capabilities": {
"drop": ["ALL"]
}
}
}
@staticmethod
def pod_disruption_budget_template(min_available: int = 1) -> Dict:
"""Pod中断预算模板"""
return {
"apiVersion": "policy/v1",
"kind": "PodDisruptionBudget",
"metadata": {"name": "app-pdb"},
"spec": {
"minAvailable": min_available,
"selector": {
"matchLabels": {"app": "myapp"}
}
}
}
@staticmethod
def horizontal_pod_autoscaler_template(
min_replicas: int = 2,
max_replicas: int = 10,
target_cpu: int = 70
) -> Dict:
"""HPA模板"""
return {
"apiVersion": "autoscaling/v2",
"kind": "HorizontalPodAutoscaler",
"metadata": {"name": "app-hpa"},
"spec": {
"scaleTargetRef": {
"apiVersion": "apps/v1",
"kind": "Deployment",
"name": "myapp"
},
"minReplicas": min_replicas,
"maxReplicas": max_replicas,
"metrics": [{
"type": "Resource",
"resource": {
"name": "cpu",
"target": {
"type": "Utilization",
"averageUtilization": target_cpu
}
}
}]
}
}
@staticmethod
def get_best_practices_checklist() -> List[str]:
"""获取最佳实践检查清单"""
return [
"✓ 设置资源请求和限制",
"✓ 配置健康检查探针",
"✓ 使用安全上下文",
"✓ 配置Pod中断预算",
"✓ 设置水平自动伸缩",
"✓ 使用ConfigMap管理配置",
"✓ 使用Secret管理敏感信息",
"✓ 配置网络策略",
"✓ 设置Pod反亲和性",
"✓ 启用日志收集"
]43.7 本章小结
本章详细介绍了Python DevOps实践的核心概念和实践:
- Docker容器化:Dockerfile编写、镜像构建、容器管理
- Docker Compose:多容器编排、网络配置、数据持久化
- CI/CD流水线:GitHub Actions、GitLab CI、自动化部署
- Kubernetes编排:Deployment、Service、ConfigMap、Ingress
- 基础设施即代码:配置管理、自动化部署
练习题
- 为一个Python Web应用编写完整的Dockerfile
- 使用Docker Compose编排一个包含Web、数据库、缓存的应用栈
- 编写GitHub Actions工作流,实现自动化测试和部署
- 为一个微服务应用编写Kubernetes部署清单
- 实现一个蓝绿部署脚本,支持零停机部署